Repository: pydantic/monty Branch: main Commit: 60538204fd9c Files: 724 Total size: 4.0 MB Directory structure: gitextract_527ed2df/ ├── .cargo/ │ └── config.toml ├── .claude/ │ ├── settings.json │ └── skills/ │ ├── coverage/ │ │ └── SKILL.md │ ├── fastmod/ │ │ └── SKILL.md │ └── python-playground/ │ └── SKILL.md ├── .codecov.yml ├── .github/ │ ├── actions/ │ │ └── build-pgo-wheel/ │ │ └── action.yml │ └── workflows/ │ ├── ci.yml │ ├── codspeed.yml │ └── init-npm-packages.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── .rustfmt.toml ├── CLAUDE.md ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── RELEASING.md ├── crates/ │ ├── fuzz/ │ │ ├── Cargo.toml │ │ └── fuzz_targets/ │ │ ├── string_input_panic.rs │ │ └── tokens_input_panic.rs │ ├── monty/ │ │ ├── Cargo.toml │ │ ├── benches/ │ │ │ └── main.rs │ │ ├── build.rs │ │ ├── src/ │ │ │ ├── args.rs │ │ │ ├── asyncio.rs │ │ │ ├── builtins/ │ │ │ │ ├── abs.rs │ │ │ │ ├── all.rs │ │ │ │ ├── any.rs │ │ │ │ ├── bin.rs │ │ │ │ ├── chr.rs │ │ │ │ ├── divmod.rs │ │ │ │ ├── enumerate.rs │ │ │ │ ├── filter.rs │ │ │ │ ├── getattr.rs │ │ │ │ ├── hash.rs │ │ │ │ ├── hex.rs │ │ │ │ ├── id.rs │ │ │ │ ├── isinstance.rs │ │ │ │ ├── len.rs │ │ │ │ ├── map.rs │ │ │ │ ├── min_max.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── next.rs │ │ │ │ ├── oct.rs │ │ │ │ ├── ord.rs │ │ │ │ ├── pow.rs │ │ │ │ ├── print.rs │ │ │ │ ├── repr.rs │ │ │ │ ├── reversed.rs │ │ │ │ ├── round.rs │ │ │ │ ├── sorted.rs │ │ │ │ ├── sum.rs │ │ │ │ ├── type_.rs │ │ │ │ └── zip.rs │ │ │ ├── bytecode/ │ │ │ │ ├── builder.rs │ │ │ │ ├── code.rs │ │ │ │ ├── compiler.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── op.rs │ │ │ │ └── vm/ │ │ │ │ ├── async_exec.rs │ │ │ │ ├── attr.rs │ │ │ │ ├── binary.rs │ │ │ │ ├── call.rs │ │ │ │ ├── collections.rs │ │ │ │ ├── compare.rs │ │ │ │ ├── exceptions.rs │ │ │ │ ├── format.rs │ │ │ │ ├── mod.rs │ │ │ │ └── scheduler.rs │ │ │ ├── exception_private.rs │ │ │ ├── exception_public.rs │ │ │ ├── expressions.rs │ │ │ ├── fstring.rs │ │ │ ├── function.rs │ │ │ ├── heap.rs │ │ │ ├── heap_data.rs │ │ │ ├── heap_traits.rs │ │ │ ├── intern.rs │ │ │ ├── io.rs │ │ │ ├── lib.rs │ │ │ ├── modules/ │ │ │ │ ├── asyncio.rs │ │ │ │ ├── math.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── os.rs │ │ │ │ ├── pathlib.rs │ │ │ │ ├── re.rs │ │ │ │ ├── sys.rs │ │ │ │ └── typing.rs │ │ │ ├── namespace.rs │ │ │ ├── object.rs │ │ │ ├── os.rs │ │ │ ├── parse.rs │ │ │ ├── prepare.rs │ │ │ ├── repl.rs │ │ │ ├── resource.rs │ │ │ ├── run.rs │ │ │ ├── run_progress.rs │ │ │ ├── signature.rs │ │ │ ├── sorting.rs │ │ │ ├── types/ │ │ │ │ ├── bytes.rs │ │ │ │ ├── dataclass.rs │ │ │ │ ├── dict.rs │ │ │ │ ├── dict_view.rs │ │ │ │ ├── iter.rs │ │ │ │ ├── list.rs │ │ │ │ ├── long_int.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── module.rs │ │ │ │ ├── namedtuple.rs │ │ │ │ ├── path.rs │ │ │ │ ├── property.rs │ │ │ │ ├── py_trait.rs │ │ │ │ ├── range.rs │ │ │ │ ├── re_match.rs │ │ │ │ ├── re_pattern.rs │ │ │ │ ├── set.rs │ │ │ │ ├── slice.rs │ │ │ │ ├── str.rs │ │ │ │ ├── tuple.rs │ │ │ │ └── type.rs │ │ │ └── value.rs │ │ ├── test_cases/ │ │ │ ├── args__dict_get_no_args.py │ │ │ ├── args__dict_get_too_many.py │ │ │ ├── args__dict_items_with_args.py │ │ │ ├── args__dict_keys_with_args.py │ │ │ ├── args__dict_pop_no_args.py │ │ │ ├── args__dict_pop_too_many.py │ │ │ ├── args__dict_values_with_args.py │ │ │ ├── args__id_too_many.py │ │ │ ├── args__len_no_args.py │ │ │ ├── args__len_too_many.py │ │ │ ├── args__len_type_error_int.py │ │ │ ├── args__len_type_error_none.py │ │ │ ├── args__list_append_no_args.py │ │ │ ├── args__list_append_too_many.py │ │ │ ├── args__list_insert_too_few.py │ │ │ ├── args__list_insert_too_many.py │ │ │ ├── args__repr_no_args.py │ │ │ ├── arith__div_zero_float.py │ │ │ ├── arith__div_zero_int.py │ │ │ ├── arith__floordiv_zero_float.py │ │ │ ├── arith__floordiv_zero_int.py │ │ │ ├── arith__pow_zero_neg.py │ │ │ ├── arith__pow_zero_neg_builtin.py │ │ │ ├── assert__expr_fail.py │ │ │ ├── assert__fail.py │ │ │ ├── assert__fail_msg.py │ │ │ ├── assert__fn_fail.py │ │ │ ├── assert__ops.py │ │ │ ├── async__asyncio_run.py │ │ │ ├── async__basic.py │ │ │ ├── async__closure.py │ │ │ ├── async__double_await_coroutine.py │ │ │ ├── async__exception.py │ │ │ ├── async__ext_call.py │ │ │ ├── async__gather_all.py │ │ │ ├── async__nested_await.py │ │ │ ├── async__nested_gather_ext.py │ │ │ ├── async__not_awaitable.py │ │ │ ├── async__not_imported.py │ │ │ ├── async__recursion_depth_isolation.py │ │ │ ├── async__return_types.py │ │ │ ├── async__sequential.py │ │ │ ├── async__traceback.py │ │ │ ├── async__with_args.py │ │ │ ├── attr__get_int_error.py │ │ │ ├── attr__get_list_error.py │ │ │ ├── attr__set_frozen_nonfield.py │ │ │ ├── attr__set_int_error.py │ │ │ ├── attr__set_list_error.py │ │ │ ├── bench__kitchen_sink.py │ │ │ ├── bool__ops.py │ │ │ ├── builtin__add_type_error.py │ │ │ ├── builtin__filter.py │ │ │ ├── builtin__filter_not_iterable.py │ │ │ ├── builtin__getattr.py │ │ │ ├── builtin__iter_err_unpack_int.py │ │ │ ├── builtin__iter_funcs.py │ │ │ ├── builtin__iter_next.py │ │ │ ├── builtin__map.py │ │ │ ├── builtin__map_not_iterable.py │ │ │ ├── builtin__math_funcs.py │ │ │ ├── builtin__more_iter_funcs.py │ │ │ ├── builtin__next_stop_iteration.py │ │ │ ├── builtin__print_invalid_kwarg.py │ │ │ ├── builtin__print_kwargs.py │ │ │ ├── builtin__repr.py │ │ │ ├── builtin__string_funcs.py │ │ │ ├── bytes__decode_invalid_utf8.py │ │ │ ├── bytes__endswith_str_error.py │ │ │ ├── bytes__getitem_index_error.py │ │ │ ├── bytes__index_start_gt_end.py │ │ │ ├── bytes__methods.py │ │ │ ├── bytes__negative_count.py │ │ │ ├── bytes__ops.py │ │ │ ├── bytes__startswith_str_error.py │ │ │ ├── call_object.py │ │ │ ├── chain_comparison__all.py │ │ │ ├── closure__param_shadows_outer.py │ │ │ ├── closure__pep448.py │ │ │ ├── closure__undefined_nonlocal.py │ │ │ ├── compare__mixed_types.py │ │ │ ├── comprehension__all.py │ │ │ ├── comprehension__scope.py │ │ │ ├── comprehension__unbound_local.py │ │ │ ├── dataclass__basic.py │ │ │ ├── dataclass__call_field_error.py │ │ │ ├── dataclass__frozen_set_error.py │ │ │ ├── dataclass__get_missing_attr_error.py │ │ │ ├── dict__get_unhashable_key.py │ │ │ ├── dict__literal_unhashable_key.py │ │ │ ├── dict__method_pop_missing_error.py │ │ │ ├── dict__methods.py │ │ │ ├── dict__ops.py │ │ │ ├── dict__pop_unhashable_key.py │ │ │ ├── dict__popitem_empty.py │ │ │ ├── dict__subscript_missing_key.py │ │ │ ├── dict__unhashable_dict_key.py │ │ │ ├── dict__unhashable_list_key.py │ │ │ ├── dict__unpack_type_error.py │ │ │ ├── dict__views.py │ │ │ ├── edge__all.py │ │ │ ├── edge__float_int_mod.py │ │ │ ├── edge__int_float_mod.py │ │ │ ├── exc__args.py │ │ │ ├── exc__str.py │ │ │ ├── execute_ok__all.py │ │ │ ├── execute_raise__error_instance_str.py │ │ │ ├── execute_raise__error_no_args.py │ │ │ ├── execute_raise__error_string_arg.py │ │ │ ├── execute_raise__error_string_arg_quotes.py │ │ │ ├── execute_raise__error_type.py │ │ │ ├── execute_raise__raise_instance_via_var.py │ │ │ ├── execute_raise__raise_list.py │ │ │ ├── execute_raise__raise_number.py │ │ │ ├── execute_raise__raise_type_call_via_var.py │ │ │ ├── execute_raise__raise_type_direct.py │ │ │ ├── execute_raise__raise_type_via_var.py │ │ │ ├── ext_call__arg_side_effect_bug.py │ │ │ ├── ext_call__augmented.py │ │ │ ├── ext_call__augmented_refcount_bug.py │ │ │ ├── ext_call__bare_raise_after_resume.py │ │ │ ├── ext_call__basic.py │ │ │ ├── ext_call__boolean.py │ │ │ ├── ext_call__boolean_side_effect_hang.py │ │ │ ├── ext_call__closure_bug.py │ │ │ ├── ext_call__comparison.py │ │ │ ├── ext_call__deep_call_stack.py │ │ │ ├── ext_call__elif.py │ │ │ ├── ext_call__exc.py │ │ │ ├── ext_call__exc_deep_stack.py │ │ │ ├── ext_call__exc_in_function.py │ │ │ ├── ext_call__exc_nested_functions.py │ │ │ ├── ext_call__ext_exc.py │ │ │ ├── ext_call__for.py │ │ │ ├── ext_call__fstring.py │ │ │ ├── ext_call__if.py │ │ │ ├── ext_call__if_condition.py │ │ │ ├── ext_call__in_closure.py │ │ │ ├── ext_call__in_function.py │ │ │ ├── ext_call__in_function_simple.py │ │ │ ├── ext_call__literals.py │ │ │ ├── ext_call__multi_in_func.py │ │ │ ├── ext_call__name_lookup.py │ │ │ ├── ext_call__name_lookup_undefined.py │ │ │ ├── ext_call__nested_calls.py │ │ │ ├── ext_call__recursion_bug.py │ │ │ ├── ext_call__return.py │ │ │ ├── ext_call__side_effects.py │ │ │ ├── ext_call__subscript.py │ │ │ ├── ext_call__ternary.py │ │ │ ├── ext_call__try.py │ │ │ ├── ext_call__try_simple.py │ │ │ ├── ext_call__unary.py │ │ │ ├── frozenset__ops.py │ │ │ ├── fstring__all.py │ │ │ ├── fstring__error_eq_align_on_str.py │ │ │ ├── fstring__error_float_f_on_str.py │ │ │ ├── fstring__error_int_d_on_float.py │ │ │ ├── fstring__error_int_d_on_str.py │ │ │ ├── fstring__error_invalid_spec.py │ │ │ ├── fstring__error_invalid_spec_dynamic.py │ │ │ ├── fstring__error_invalid_spec_str.py │ │ │ ├── fstring__error_str_s_on_int.py │ │ │ ├── function__call_duplicate_kwargs.py │ │ │ ├── function__call_unpack.py │ │ │ ├── function__defaults.py │ │ │ ├── function__err_duplicate_arg.py │ │ │ ├── function__err_duplicate_first_arg.py │ │ │ ├── function__err_duplicate_kwarg_cleanup.py │ │ │ ├── function__err_kwonly_as_positional.py │ │ │ ├── function__err_missing_all_posonly.py │ │ │ ├── function__err_missing_heap_cleanup.py │ │ │ ├── function__err_missing_kwonly.py │ │ │ ├── function__err_missing_posonly_with_kwarg.py │ │ │ ├── function__err_missing_with_posonly.py │ │ │ ├── function__err_posonly_as_kwarg.py │ │ │ ├── function__err_posonly_first_as_kwarg.py │ │ │ ├── function__err_too_many_posonly.py │ │ │ ├── function__err_too_many_with_kwonly.py │ │ │ ├── function__err_unexpected_kwarg.py │ │ │ ├── function__err_unexpected_kwarg_cleanup.py │ │ │ ├── function__err_unexpected_kwarg_quote.py │ │ │ ├── function__err_unexpected_kwarg_simple.py │ │ │ ├── function__err_unpack_duplicate_arg.py │ │ │ ├── function__err_unpack_duplicate_heap.py │ │ │ ├── function__err_unpack_int.py │ │ │ ├── function__err_unpack_nonstring_key.py │ │ │ ├── function__err_unpack_not_mapping.py │ │ │ ├── function__kwargs_unpacking.py │ │ │ ├── function__ops.py │ │ │ ├── function__return_none.py │ │ │ ├── function__signatures.py │ │ │ ├── function__too_few_args_all.py │ │ │ ├── function__too_few_args_one.py │ │ │ ├── function__too_few_args_two.py │ │ │ ├── function__too_many_args_one.py │ │ │ ├── function__too_many_args_two.py │ │ │ ├── function__too_many_args_zero.py │ │ │ ├── global__error_assigned_before.py │ │ │ ├── global__ops.py │ │ │ ├── hash__dict_unhashable.py │ │ │ ├── hash__list_unhashable.py │ │ │ ├── hash__ops.py │ │ │ ├── id__bytes_literals_distinct.py │ │ │ ├── id__int_copy_distinct.py │ │ │ ├── id__is_number_is_number.py │ │ │ ├── id__non_overlapping_lifetimes_distinct_types.py │ │ │ ├── id__non_overlapping_lifetimes_same_types.py │ │ │ ├── id__ops.py │ │ │ ├── id__str_literals_same.py │ │ │ ├── if__elif_else.py │ │ │ ├── if__raise_elif.py │ │ │ ├── if__raise_else.py │ │ │ ├── if__raise_if.py │ │ │ ├── if__raise_in_elif_condition.py │ │ │ ├── if__raise_in_if_condition.py │ │ │ ├── if_else_expr__all.py │ │ │ ├── import__error_cannot_import.py │ │ │ ├── import__error_module_not_found.py │ │ │ ├── import__local_scope.py │ │ │ ├── import__os.py │ │ │ ├── import__relative_error.py │ │ │ ├── import__relative_no_module_error.py │ │ │ ├── import__runtime_error_when_executed.py │ │ │ ├── import__star_error.py │ │ │ ├── import__sys.py │ │ │ ├── import__sys_monty.py │ │ │ ├── import__type_checking_guard.py │ │ │ ├── import__typing.py │ │ │ ├── import__typing_type_ignore.py │ │ │ ├── int__bigint.py │ │ │ ├── int__bigint_errors.py │ │ │ ├── int__ops.py │ │ │ ├── int__overflow_division.py │ │ │ ├── is_variant__all.py │ │ │ ├── isinstance__arg2_list_error.py │ │ │ ├── isinstance__arg2_type_error.py │ │ │ ├── iter__dict_mutation.py │ │ │ ├── iter__for.py │ │ │ ├── iter__for_loop_unpacking.py │ │ │ ├── iter__generator_expr.py │ │ │ ├── iter__generator_expr_type.py │ │ │ ├── iter__not_iterable.py │ │ │ ├── lambda__all.py │ │ │ ├── list__extend_not_iterable.py │ │ │ ├── list__getitem_out_of_bounds.py │ │ │ ├── list__index_not_found.py │ │ │ ├── list__index_start_gt_end.py │ │ │ ├── list__ops.py │ │ │ ├── list__pop_empty.py │ │ │ ├── list__pop_out_of_range.py │ │ │ ├── list__pop_type_error.py │ │ │ ├── list__remove_not_found.py │ │ │ ├── list__setitem_dict_index.py │ │ │ ├── list__setitem_huge_int_index.py │ │ │ ├── list__setitem_index_error.py │ │ │ ├── list__setitem_type_error.py │ │ │ ├── list__unpack_type_error.py │ │ │ ├── longint__index_error.py │ │ │ ├── longint__repeat_error.py │ │ │ ├── loop__break_continue.py │ │ │ ├── loop__break_finally.py │ │ │ ├── loop__break_in_function_error.py │ │ │ ├── loop__break_in_if_error.py │ │ │ ├── loop__break_nested_except_clears.py │ │ │ ├── loop__break_outside_error.py │ │ │ ├── loop__continue_finally.py │ │ │ ├── loop__continue_in_function_error.py │ │ │ ├── loop__continue_in_if_error.py │ │ │ ├── loop__continue_nested_except_clears.py │ │ │ ├── loop__continue_outside_error.py │ │ │ ├── math__acos_domain_error.py │ │ │ ├── math__acosh_domain_error.py │ │ │ ├── math__asin_domain_error.py │ │ │ ├── math__atanh_domain_error.py │ │ │ ├── math__cos_inf_error.py │ │ │ ├── math__cosh_overflow_error.py │ │ │ ├── math__exp_overflow_error.py │ │ │ ├── math__factorial_float_error.py │ │ │ ├── math__factorial_negative_error.py │ │ │ ├── math__floor_inf_error.py │ │ │ ├── math__floor_nan_error.py │ │ │ ├── math__floor_str_error.py │ │ │ ├── math__fmod_inf_error.py │ │ │ ├── math__gamma_neg_int_error.py │ │ │ ├── math__gcd_float_error.py │ │ │ ├── math__isqrt_negative_error.py │ │ │ ├── math__ldexp_overflow_error.py │ │ │ ├── math__log1p_domain_error.py │ │ │ ├── math__log_base1_error.py │ │ │ ├── math__log_zero_error.py │ │ │ ├── math__module.py │ │ │ ├── math__pow_domain_error.py │ │ │ ├── math__sin_inf_error.py │ │ │ ├── math__sqrt_negative_error.py │ │ │ ├── math__tan_inf_error.py │ │ │ ├── math__trunc_str_error.py │ │ │ ├── method__args_kwargs_unpacking.py │ │ │ ├── name_error__unbound_local_func.py │ │ │ ├── name_error__unbound_local_module.py │ │ │ ├── name_error__undefined_call_chained.py │ │ │ ├── name_error__undefined_call_in_expr.py │ │ │ ├── name_error__undefined_call_in_function.py │ │ │ ├── name_error__undefined_call_with_args.py │ │ │ ├── name_error__undefined_global.py │ │ │ ├── namedtuple__missing_attr.py │ │ │ ├── namedtuple__ops.py │ │ │ ├── nonlocal__error_module_level.py │ │ │ ├── nonlocal__ops.py │ │ │ ├── os__environ.py │ │ │ ├── os__getenv_key_list_error.py │ │ │ ├── os__getenv_key_type_error.py │ │ │ ├── parse_error__complex.py │ │ │ ├── pathlib__import.py │ │ │ ├── pathlib__os.py │ │ │ ├── pathlib__os_read_error.py │ │ │ ├── pathlib__pure.py │ │ │ ├── pyobject__cycle_dict_self.py │ │ │ ├── pyobject__cycle_list_dict.py │ │ │ ├── pyobject__cycle_list_self.py │ │ │ ├── pyobject__cycle_multiple_refs.py │ │ │ ├── range__error_no_args.py │ │ │ ├── range__error_step_zero.py │ │ │ ├── range__error_too_many_args.py │ │ │ ├── range__getitem_index_error.py │ │ │ ├── range__ops.py │ │ │ ├── re__basic.py │ │ │ ├── re__grouping.py │ │ │ ├── re__match.py │ │ │ ├── recursion__deep_drop.py │ │ │ ├── recursion__deep_eq.py │ │ │ ├── recursion__deep_hash.py │ │ │ ├── recursion__deep_repr.py │ │ │ ├── recursion__function_depth.py │ │ │ ├── refcount__cycle_mutual_reference.py │ │ │ ├── refcount__cycle_self_reference.py │ │ │ ├── refcount__dict_basic.py │ │ │ ├── refcount__dict_get.py │ │ │ ├── refcount__dict_keys_and.py │ │ │ ├── refcount__dict_overwrite.py │ │ │ ├── refcount__gather_cleanup.py │ │ │ ├── refcount__gather_exception.py │ │ │ ├── refcount__gather_nested_cancel.py │ │ │ ├── refcount__immediate_skipped.py │ │ │ ├── refcount__keyword_only_kwarg_arity_errors.py │ │ │ ├── refcount__kwargs_unpacking.py │ │ │ ├── refcount__list_append_multiple.py │ │ │ ├── refcount__list_append_ref.py │ │ │ ├── refcount__list_concat.py │ │ │ ├── refcount__list_getitem.py │ │ │ ├── refcount__list_iadd.py │ │ │ ├── refcount__min_max_key_error_paths.py │ │ │ ├── refcount__nested_list.py │ │ │ ├── refcount__re_pattern_sub_error_paths.py │ │ │ ├── refcount__re_search_match.py │ │ │ ├── refcount__re_sub_error_paths.py │ │ │ ├── refcount__shared_reference.py │ │ │ ├── refcount__single_list.py │ │ │ ├── repr__cycle_detection.py │ │ │ ├── set__ops.py │ │ │ ├── set__review_bugs.py │ │ │ ├── set__unpack_type_error.py │ │ │ ├── slice__invalid_indices.py │ │ │ ├── slice__kwargs.py │ │ │ ├── slice__no_args.py │ │ │ ├── slice__ops.py │ │ │ ├── slice__step_zero.py │ │ │ ├── slice__step_zero_bytes.py │ │ │ ├── slice__step_zero_range.py │ │ │ ├── slice__step_zero_str.py │ │ │ ├── slice__step_zero_tuple.py │ │ │ ├── slice__too_many_args.py │ │ │ ├── str__getitem_index_error.py │ │ │ ├── str__index_not_found.py │ │ │ ├── str__join_no_args.py │ │ │ ├── str__join_non_string.py │ │ │ ├── str__join_not_iterable.py │ │ │ ├── str__join_too_many_args.py │ │ │ ├── str__methods.py │ │ │ ├── str__ops.py │ │ │ ├── str__partition_empty.py │ │ │ ├── str__rsplit_empty_sep.py │ │ │ ├── str__split_empty_sep.py │ │ │ ├── sys__types.py │ │ │ ├── traceback__division_error.py │ │ │ ├── traceback__index_error.py │ │ │ ├── traceback__insert_as_int.py │ │ │ ├── traceback__nested_call.py │ │ │ ├── traceback__nonlocal_module_scope.py │ │ │ ├── traceback__nonlocal_unbound.py │ │ │ ├── traceback__range_as_int.py │ │ │ ├── traceback__recursion_error.py │ │ │ ├── traceback__set_mutation.py │ │ │ ├── traceback__undefined_attr_call.py │ │ │ ├── traceback__undefined_call.py │ │ │ ├── traceback__undefined_raise.py │ │ │ ├── try_except__all.py │ │ │ ├── try_except__bare_raise_no_context.py │ │ │ ├── try_except__invalid_type.py │ │ │ ├── tuple__getitem_out_of_bounds.py │ │ │ ├── tuple__index_not_found.py │ │ │ ├── tuple__index_start_gt_end.py │ │ │ ├── tuple__methods.py │ │ │ ├── tuple__ops.py │ │ │ ├── tuple__unpack_type_error.py │ │ │ ├── type__builtin_attr_error.py │ │ │ ├── type__bytes_negative.py │ │ │ ├── type__cell_not_builtin.py │ │ │ ├── type__exception_attr_error.py │ │ │ ├── type__float_conversion_error.py │ │ │ ├── type__float_repr_both_quotes.py │ │ │ ├── type__float_repr_newline.py │ │ │ ├── type__float_repr_single_quote.py │ │ │ ├── type__int_conversion_error.py │ │ │ ├── type__list_not_iterable.py │ │ │ ├── type__non_builtin_name_error.py │ │ │ ├── type__ops.py │ │ │ ├── type__shadow_exc.py │ │ │ ├── type__shadow_int.py │ │ │ ├── type__shadow_len.py │ │ │ ├── type__tuple_not_iterable.py │ │ │ ├── type_error__int_add_list.py │ │ │ ├── type_error__int_div_str.py │ │ │ ├── type_error__int_floordiv_str.py │ │ │ ├── type_error__int_iadd_str.py │ │ │ ├── type_error__int_mod_str.py │ │ │ ├── type_error__int_pow_str.py │ │ │ ├── type_error__int_sub_str.py │ │ │ ├── type_error__list_add_int.py │ │ │ ├── type_error__list_add_str.py │ │ │ ├── type_error__list_iadd_int.py │ │ │ ├── type_error__str_add_int.py │ │ │ ├── type_error__str_iadd_int.py │ │ │ ├── type_error__unary_invert_str.py │ │ │ ├── type_error__unary_minus_str.py │ │ │ ├── type_error__unary_neg_str.py │ │ │ ├── type_error__unary_plus_str.py │ │ │ ├── typing__types.py │ │ │ ├── unpack__nested.py │ │ │ ├── unpack__non_sequence.py │ │ │ ├── unpack__not_enough.py │ │ │ ├── unpack__ops.py │ │ │ ├── unpack__star_not_enough.py │ │ │ ├── unpack__too_many.py │ │ │ ├── version__cpython.py │ │ │ ├── walrus__all.py │ │ │ └── while__all.py │ │ └── tests/ │ │ ├── asyncio.rs │ │ ├── binary_serde.rs │ │ ├── bytecode_limits.rs │ │ ├── datatest_runner.rs │ │ ├── inputs.rs │ │ ├── json_serde.rs │ │ ├── main.rs │ │ ├── math_module.rs │ │ ├── name_lookup.rs │ │ ├── os_tests.rs │ │ ├── parse_errors.rs │ │ ├── print_writer.rs │ │ ├── py_object.rs │ │ ├── regex.rs │ │ ├── repl.rs │ │ ├── resource_limits.rs │ │ └── try_from.rs │ ├── monty-cli/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── main.rs │ ├── monty-js/ │ │ ├── .cargo/ │ │ │ └── config.toml │ │ ├── .gitignore │ │ ├── .prettierignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── __test__/ │ │ │ ├── async.spec.ts │ │ │ ├── basic.spec.ts │ │ │ ├── exceptions.spec.ts │ │ │ ├── external.spec.ts │ │ │ ├── inputs.spec.ts │ │ │ ├── limits.spec.ts │ │ │ ├── package.json │ │ │ ├── print.spec.ts │ │ │ ├── repl.spec.ts │ │ │ ├── serialize.spec.ts │ │ │ ├── start.spec.ts │ │ │ ├── type_check.spec.ts │ │ │ └── types.spec.ts │ │ ├── build.rs │ │ ├── index-header.d.ts │ │ ├── package.json │ │ ├── scripts/ │ │ │ └── smoke-test.sh │ │ ├── smoke-test/ │ │ │ ├── .gitignore │ │ │ ├── package.json │ │ │ ├── test.ts │ │ │ └── tsconfig.json │ │ ├── src/ │ │ │ ├── convert.rs │ │ │ ├── exceptions.rs │ │ │ ├── lib.rs │ │ │ ├── limits.rs │ │ │ └── monty_cls.rs │ │ ├── tsconfig.json │ │ └── wrapper.ts │ ├── monty-python/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── build.rs │ │ ├── example.py │ │ ├── exercise.py │ │ ├── pyproject.toml │ │ ├── python/ │ │ │ └── pydantic_monty/ │ │ │ ├── __init__.py │ │ │ ├── _monty.pyi │ │ │ ├── os_access.py │ │ │ └── py.typed │ │ ├── src/ │ │ │ ├── convert.rs │ │ │ ├── dataclass.rs │ │ │ ├── exceptions.rs │ │ │ ├── external.rs │ │ │ ├── lib.rs │ │ │ ├── limits.rs │ │ │ ├── monty_cls.rs │ │ │ ├── repl.rs │ │ │ └── serialization.rs │ │ └── tests/ │ │ ├── test_async.py │ │ ├── test_basic.py │ │ ├── test_dataclasses.py │ │ ├── test_exceptions.py │ │ ├── test_external.py │ │ ├── test_inputs.py │ │ ├── test_limits.py │ │ ├── test_os_access.py │ │ ├── test_os_access_compat.py │ │ ├── test_os_access_raw.py │ │ ├── test_os_calls.py │ │ ├── test_print.py │ │ ├── test_re.py │ │ ├── test_readme_examples.py │ │ ├── test_repl.py │ │ ├── test_serialize.py │ │ ├── test_start.py │ │ ├── test_threading.py │ │ ├── test_type_check.py │ │ └── test_types.py │ ├── monty-type-checking/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ ├── db.rs │ │ │ ├── lib.rs │ │ │ └── type_check.rs │ │ └── tests/ │ │ ├── bad_types.py │ │ ├── bad_types_output.txt │ │ ├── good_types.py │ │ ├── main.rs │ │ ├── reveal_types.py │ │ └── reveal_types_output.txt │ └── monty-typeshed/ │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── build.rs │ ├── custom/ │ │ ├── README.md │ │ ├── asyncio.pyi │ │ ├── os.pyi │ │ └── sys.pyi │ ├── src/ │ │ └── lib.rs │ ├── update.py │ └── vendor/ │ └── typeshed/ │ ├── source_commit.txt │ └── stdlib/ │ ├── VERSIONS │ ├── _collections_abc.pyi │ ├── _typeshed/ │ │ └── __init__.pyi │ ├── asyncio.pyi │ ├── builtins.pyi │ ├── collections/ │ │ ├── __init__.pyi │ │ └── abc.pyi │ ├── dataclasses.pyi │ ├── enum.pyi │ ├── math.pyi │ ├── os.pyi │ ├── pathlib/ │ │ ├── __init__.pyi │ │ └── types.pyi │ ├── re.pyi │ ├── sys.pyi │ ├── types.pyi │ ├── typing.pyi │ └── typing_extensions.pyi ├── examples/ │ ├── README.md │ ├── expense_analysis/ │ │ ├── README.md │ │ ├── data.py │ │ └── main.py │ ├── sql_playground/ │ │ ├── README.md │ │ ├── external_functions.py │ │ ├── main.py │ │ ├── sandbox_code.py │ │ └── type_stubs.pyi │ └── web_scraper/ │ ├── README.md │ ├── browser.py │ ├── example_code.py │ ├── external_functions.py │ ├── main.py │ └── sub_agent.py ├── pyproject.toml └── scripts/ ├── check_imports.py ├── codecov_diff.py ├── complete_tests.py ├── flamegraph_to_text.py ├── iter_test_methods.py ├── run_traceback.py └── startup_performance.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .cargo/config.toml ================================================ [env] # Use the project's venv Python for PyO3 builds PYO3_PYTHON = { value = ".venv/bin/python3", relative = true } ================================================ FILE: .claude/settings.json ================================================ { "permissions": { "allow": [ "Bash(cargo:*)", "Bash(timeout 30s cargo:*)", "Bash(timeout 10s cargo:*)", "Bash(timeout 15s cargo:*)", "Bash(RUST_BACKTRACE=1 cargo:*)", "Bash(make:*)", "Bash(INLINE_SNAPSHOT_DEFAULT_FLAGS=disable make:*)", "Bash(mkdir:*)", "Bash(uv:*)", "Bash(echo:*)", "Bash(rg:*)", "Bash(sed:*)", "Bash(grep:*)", "Bash(fastmod:*)", "Bash(find:*)", "Bash(ls:*)", "Bash(cat:*)", "Bash(python3:*)", "Bash(git show:*)", "Bash(git diff:*)", "Bash(git add:*)", "Bash(git stash:*)", "Bash(git checkout:*)", "Bash(git mv:*)", "Bash(git log:*)", "Bash(git grep:*)", "Bash(git status:*)", "Bash(gh pr view:*)", "Bash(gh api:*)", "Bash(gh run view:*)", "Bash(gh issue view:*)", "Skill(python-playground)", "Skill(fastmod)", "Skill(coverage)", "Bash(wc:*)", "Bash(xxd:*)", "Bash(head:*)", "Bash(rustup show:*)", "Bash(xargs:*)", "Bash(perl:*)", "Bash(du:*)", "Bash(npm run:*)", "Bash(mkdir:*)", "WebSearch", "WebFetch(domain:pypi.org)", "WebFetch(domain:docs.anthropic.com)", "WebFetch(domain:github.com)", "WebFetch(domain:15r10nk.github.io)", "WebFetch(domain:docs.rs)", "WebFetch(domain:pyo3.rs)", "WebFetch(domain:napi.rs)", "WebFetch(domain:app.codecov.io)" ], "deny": [], "ask": [] } } ================================================ FILE: .claude/skills/coverage/SKILL.md ================================================ --- name: coverage description: Fetch coverage diff from Codecov for the current branch or a specific PR. Shows uncovered lines, patch coverage, and overall coverage change. --- # Coverage Fetch line-by-line coverage information from Codecov for a GitHub pull request. ## Instructions Use this skill to check code coverage for your changes before merging. ### Current branch (auto-detect PR) ```bash uv run scripts/codecov_diff.py ``` This auto-detects the org, repo, and PR number using the `gh` CLI based on the current branch. ### Specific PR number ```bash uv run scripts/codecov_diff.py 123 ``` ## Output The script outputs: - PR title and state - HEAD coverage (overall coverage on the branch) - Patch coverage (coverage of changed lines only) - Coverage change (+/- percentage) - Per-file breakdown with: - Missed line count - Patch coverage percentage - Specific uncovered line numbers (as ranges like `45-48, 52, 60-65`) - Partial coverage line numbers ## Requirements - The `gh` CLI must be installed and authenticated for auto-detection - The PR must have Codecov coverage data uploaded ================================================ FILE: .claude/skills/fastmod/SKILL.md ================================================ --- name: fastmod description: Use fastmod to make mass code updates to avoid many repetitive changes. --- # fastmod ## Instructions You can occasionally use `fastmod` or `sed` to make mass updates to the codebase and avoid wasting tokens changing each case one at a time. Before making many repetitive changes to the codebase, consider using `fastmod --accept-all`. THINK HARD about how best to use `fastmod` as it can dramatically improve your productivity. ## Examples Example of switching the `py_type` function to use `impl ResourceTracker` instead of `T: ResourceTracker`: ```bash fastmod --accept-all 'fn py_type(\(.+?)' 'fn py_type$1' ``` ================================================ FILE: .claude/skills/python-playground/SKILL.md ================================================ --- name: python-playground description: Run and test Python code in a dedicated playground directory. Use when you need to execute Python scripts, test code snippets, investigate CPython behavior, or experiment with Python without affecting the main codebase. --- # Python Playground Run Python code in an isolated playground directory for testing and experimentation. ## Instructions 1. First, ensure the playground directory exists: `mkdir -p playground` 2. Use the Write tool to create the Python file at `playground/test.py` 3. Run with: `uv run playground/test.py` to test cpython behavior or `cargo run -- playground/test.py` to test monty behavior IMPORTANT: Use separate tool calls for each step - do NOT chain commands with `&&` or use heredocs. This allows the pre-approved commands to work without prompting. ## Example workflow Step 1 - Create directory (Bash, already allowed): ```bash mkdir -p playground ``` Step 2 - Write code (use Write tool, not cat): Write to `playground/test.py`: ```python def foo(): raise ValueError('test') foo() ``` Step 3 - Run script (Bash, already allowed): ```bash uv run playground/test.py ``` ## Guidelines - The `playground/` directory is gitignored - Use a different file name for each test you want to run, give the files recognizable names like `test_value_error.py` - Use `uv run ...` to run scripts (uses project Python) - Or, `cargo run -- ...` to run scripts using Monty - Use Write tool for creating files (avoids permission prompts) - Run mkdir and uv as separate commands (not chained) - do NOT delete files from playground after you've finished testing ================================================ FILE: .codecov.yml ================================================ codecov: require_ci_to_pass: false coverage: precision: 2 # range: [90, 100] status: patch: false project: false comment: layout: 'header, diff, flags, files, footer' ================================================ FILE: .github/actions/build-pgo-wheel/action.yml ================================================ name: Build PGO wheel description: Builds a PGO-optimized wheel for pydantic-monty inputs: interpreter: description: 'Interpreter(s) to build the wheel for' required: true rust-toolchain: description: 'Rust toolchain to use' required: true outputs: wheel-dir: description: 'Path to the directory containing built wheels' value: ${{ steps.find_wheel.outputs.dir }} runs: using: "composite" steps: - name: prepare profiling directory shell: bash run: mkdir -p ${{ github.workspace }}/profdata - name: build initial wheel (instrumented) uses: PyO3/maturin-action@v1 with: manylinux: auto args: > --release --out pgo-wheel --interpreter 3.12 rust-toolchain: ${{ inputs.rust-toolchain }} docker-options: -e CI working-directory: crates/monty-python env: RUSTFLAGS: '-Cprofile-generate=${{ github.workspace }}/profdata' - name: detect rust host run: echo RUST_HOST=$(rustc -Vv | grep host | cut -d ' ' -f 2) >> "$GITHUB_ENV" shell: bash - name: generate pgo data run: | pip install pydantic-monty --no-index --no-deps --find-links pgo-wheel --force-reinstall python exercise.py rustup run ${{ inputs.rust-toolchain }} bash -c 'echo LLVM_PROFDATA=$RUSTUP_HOME/toolchains/$RUSTUP_TOOLCHAIN/lib/rustlib/$RUST_HOST/bin/llvm-profdata >> "$GITHUB_ENV"' shell: bash working-directory: crates/monty-python - name: merge pgo data # PowerShell handles paths on Windows better, and works well enough on Unix run: ${{ env.LLVM_PROFDATA }} merge -o ${{ github.workspace }}/merged.profdata ${{ github.workspace }}/profdata shell: pwsh - name: build pgo-optimized wheel uses: PyO3/maturin-action@v1 with: manylinux: auto args: > --release --out dist --interpreter ${{ inputs.interpreter }} rust-toolchain: ${{ inputs.rust-toolchain }} docker-options: -e CI working-directory: crates/monty-python env: RUSTFLAGS: '-Cprofile-use=${{ github.workspace }}/merged.profdata' - name: find built wheels id: find_wheel run: echo "dir=crates/monty-python/dist" >> "$GITHUB_OUTPUT" shell: bash ================================================ FILE: .github/workflows/ci.yml ================================================ name: ci on: push: branches: - main tags: - "**" pull_request: {} workflow_dispatch: inputs: run_release: description: "Run release jobs (for manual tag releases)" type: boolean default: false env: COLUMNS: 150 UV_PYTHON: "3.14" UV_FROZEN: "1" DEBUG: "napi:*" jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@nightly with: components: rustfmt, clippy - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true prefix-key: "v1-rust" - uses: astral-sh/setup-uv@v7 - run: uv sync --all-packages --only-dev - uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit|${{ env.UV_PYTHON }}|${{ hashFiles('.pre-commit-config.yaml') }} - name: Setup node uses: actions/setup-node@v6 with: node-version: 24 cache: npm cache-dependency-path: crates/monty-js/package-lock.json - name: Install dependencies run: npm install working-directory: crates/monty-js - run: uvx pre-commit run --color=always --all-files --verbose env: SKIP: no-commit-to-branch - name: Show diff on failure if: failure() run: git diff test-rust: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable with: components: llvm-tools - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - uses: taiki-e/install-action@cargo-llvm-cov # this means pyo3 will use Python 3.14 in tests - uses: actions/setup-python@v6 with: python-version: "3.14" - run: rustc --version --verbose - run: python3 -V # don't use .venv python in CI - run: rm .cargo/config.toml # coverage for `make test-no-features` - run: cargo llvm-cov --no-report -p monty # coverage for `make test-ref-count-panic` - run: cargo llvm-cov --no-report -p monty --features ref-count-panic # coverage for `make test-ref-count-return` - run: cargo llvm-cov --no-report -p monty --features ref-count-return # coverage for `make test-type-checking` - run: cargo llvm-cov --no-report -p monty_type_checking -p monty_typeshed # Generating text report: - run: cargo llvm-cov report --ignore-filename-regex '(tests/|test_cases/|/tests\.rs$$)' # Generate codecov report (use `report` subcommand to avoid recompilation) - run: cargo llvm-cov report --codecov --output-path=codecov.json --ignore-filename-regex '(tests/|test_cases/|/tests\.rs$$)' - uses: codecov/codecov-action@v5 with: files: codecov.json token: ${{ secrets.CODECOV_TOKEN }} test-python: name: test python ${{ matrix.python-version }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] env: UV_PYTHON: ${{ matrix.python-version }} steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - uses: astral-sh/setup-uv@v7 - run: uv sync --all-packages --only-dev - run: make dev-py - run: make pytest # also test with a release build - run: make dev-py-release - run: make pytest # test uv run exercise script - run: uv run crates/monty-python/exercise.py bench-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - uses: actions/setup-python@v6 with: python-version: "3.14" # don't use .venv python in CI - run: rm .cargo/config.toml - run: make dev-bench fuzz: name: fuzz ${{ matrix.target }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: target: - tokens_input_panic # disable until https://github.com/astral-sh/ruff/issues/23198 is fixed # - string_input_panic steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@nightly - id: cache-rust uses: Swatinem/rust-cache@v2 with: cache-on-failure: true prefix-key: "v1-rust-fuzz" workspaces: "crates/fuzz -> target" - if: steps.cache-rust.outputs.cache-hit != 'true' run: cargo install cargo-fuzz # don't use .venv python in CI - run: rm .cargo/config.toml - name: Run ${{ matrix.target }} fuzzer run: | # Use --sanitizer none to avoid ASAN/SanitizerCoverage linking issues on CI # (undefined __sancov_gen_.* symbols). For short CI runs, we're mainly # catching panics, not memory bugs. cargo fuzz run --fuzz-dir crates/fuzz --sanitizer none ${{ matrix.target }} -- -max_total_time=60 # https://github.com/marketplace/actions/alls-green#why used for branch protection checks check: if: always() needs: - lint - test-rust - test-python - bench-test - fuzz runs-on: ubuntu-latest steps: - name: Decide whether the needed jobs succeeded or failed uses: re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} # Build source distribution build-sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.12" - uses: PyO3/maturin-action@v1 with: command: sdist args: --out dist rust-toolchain: stable working-directory: crates/monty-python - uses: actions/upload-artifact@v6 with: name: pypi_files-sdist path: crates/monty-python/dist # Build wheels for exotic architectures (non-PGO) build: name: build on ${{ matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }}) # only run on push to main, on tags, or if 'Full Build' label is present if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build') || (github.event_name == 'workflow_dispatch' && inputs.run_release) strategy: fail-fast: false matrix: include: # Linux aarch64 - os: linux target: aarch64 # Linux i686 - os: linux target: i686 # Linux armv7 - os: linux target: armv7 # Linux ppc64le - os: linux target: ppc64le # Linux s390x - os: linux target: s390x # Linux x86_64 musl - os: linux target: x86_64 manylinux: musllinux_1_1 # Linux aarch64 musl - os: linux target: aarch64 manylinux: musllinux_1_1 # macOS x86_64 (Intel) - os: macos target: x86_64 # Windows i686 - os: windows target: i686 runs-on: ${{ (matrix.os == 'linux' && 'ubuntu-latest') || (matrix.os == 'macos' && 'macos-latest') || (matrix.os == 'windows' && 'windows-latest') }} steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.12" - name: build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} manylinux: ${{ matrix.manylinux || 'auto' }} args: --release --out dist -i 3.10 3.11 3.12 3.13 3.14 rust-toolchain: stable docker-options: -e CI working-directory: crates/monty-python - uses: actions/upload-artifact@v6 with: name: pypi_files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux || 'manylinux' }} path: crates/monty-python/dist # PGO-optimized builds for main platforms build-pgo: name: build pgo on ${{ matrix.os }} # only run on push to main, on tags, or if 'Full Build' label is present if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build') || (github.event_name == 'workflow_dispatch' && inputs.run_release) strategy: fail-fast: false matrix: include: # Linux x86_64 (manylinux) - os: linux runs-on: ubuntu-latest interpreter: 3.10 3.11 3.12 3.13 3.14 # Windows x86_64 - os: windows runs-on: windows-latest interpreter: 3.10 3.11 3.12 3.13 3.14 # macOS aarch64 (Apple Silicon) - os: macos runs-on: macos-latest interpreter: 3.10 3.11 3.12 3.13 3.14 runs-on: ${{ matrix.runs-on }} steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.12" - uses: dtolnay/rust-toolchain@stable with: components: llvm-tools - name: build PGO wheel id: pgo uses: ./.github/actions/build-pgo-wheel with: interpreter: ${{ matrix.interpreter }} rust-toolchain: stable - uses: actions/upload-artifact@v6 with: name: pypi_files-${{ matrix.os }}-pgo path: ${{ steps.pgo.outputs.wheel-dir }} # Test wheels on exotic architectures via QEMU test-builds-arch: name: test build on ${{ matrix.target }} needs: [build] runs-on: ubuntu-latest strategy: fail-fast: false matrix: target: [aarch64, armv7, s390x, ppc64le] steps: - uses: actions/checkout@v6 - uses: actions/download-artifact@v7 with: pattern: pypi_files-linux-${{ matrix.target }}-* merge-multiple: true path: dist - uses: uraimo/run-on-arch-action@v3 name: install & test with: arch: ${{ matrix.target }} distro: ubuntu22.04 dockerRunArgs: --volume "${{ github.workspace }}/dist:/dist" install: | apt-get update apt-get install -y --no-install-recommends python3 python3-pip python3-venv run: | ls -lh /dist/ python3 -m venv venv source venv/bin/activate python3 -m pip install pydantic-monty --no-index --no-deps --find-links /dist --force-reinstall python3 -c "import pydantic_monty; print(pydantic_monty.Monty('1 + 2').run())" # Test wheels on main OS platforms test-builds-os: name: test build on ${{ matrix.os }} needs: [build, build-pgo] runs-on: ${{ matrix.runs-on }} strategy: fail-fast: false matrix: include: - os: linux runs-on: ubuntu-latest - os: macos runs-on: macos-latest - os: windows runs-on: windows-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.12" - uses: actions/download-artifact@v7 with: pattern: pypi_files-${{ matrix.os }}-* merge-multiple: true path: dist - run: pip install pydantic-monty --no-index --find-links dist --force-reinstall - run: python -c "import pydantic_monty; print(pydantic_monty.Monty('1 + 2').run())" # Inspect built artifacts inspect-python-assets: needs: [build, build-pgo, build-sdist] runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v7 with: pattern: pypi_files-* merge-multiple: true path: dist - name: list files run: | ls -lhR dist/ ls -1 dist/ | wc -l echo "Expected ~50 wheel files (5 Python versions × 10 platform variants)" - uses: astral-sh/setup-uv@v7 - run: uvx twine check dist/* # Release to PyPI release-python: name: release to PyPI needs: [check, inspect-python-assets, test-builds-arch, test-builds-os] if: success() && (startsWith(github.ref, 'refs/tags/') || (github.event_name == 'workflow_dispatch' && inputs.run_release)) runs-on: ubuntu-latest environment: name: release-python url: https://pypi.org/project/pydantic-monty/${{ steps.check-version.outputs.VERSION }} permissions: id-token: write steps: - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v7 with: enable-cache: true - uses: actions/download-artifact@v7 with: pattern: pypi_files-* merge-multiple: true path: dist - id: check-version uses: samuelcolvin/check-python-version@v5 with: version_file_path: "Cargo.toml" - run: ls -lhR dist/ - name: Publish to PyPI run: "uv publish --trusted-publishing always dist/*" build-js: name: build JS - ${{ matrix.settings.target }} - node@22 runs-on: ${{ matrix.settings.host }} # only run on push to main, on tags, or if 'Full Build' label is present if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build') || (github.event_name == 'workflow_dispatch' && inputs.run_release) strategy: fail-fast: false matrix: settings: - host: macos-latest target: x86_64-apple-darwin build: npm run build:napi -- --target x86_64-apple-darwin && npm run build:ts - host: windows-latest target: x86_64-pc-windows-msvc build: npm run build:napi -- --target x86_64-pc-windows-msvc && npm run build:ts - host: ubuntu-latest target: x86_64-unknown-linux-gnu build: npm run build:napi -- --target x86_64-unknown-linux-gnu --use-napi-cross && npm run build:ts - host: macos-latest target: aarch64-apple-darwin build: npm run build:napi -- --target aarch64-apple-darwin && npm run build:ts - host: ubuntu-24.04-arm target: aarch64-unknown-linux-gnu build: npm run build:napi -- --target aarch64-unknown-linux-gnu && npm run build:ts - host: ubuntu-latest target: wasm32-wasip1-threads build: npm run build:napi -- --target wasm32-wasip1-threads && npm run build:ts steps: - uses: actions/checkout@v6 - name: Setup node uses: actions/setup-node@v6 with: node-version: 24 cache: npm cache-dependency-path: crates/monty-js/package-lock.json - name: Install uses: dtolnay/rust-toolchain@stable with: toolchain: stable targets: ${{ matrix.settings.target }} - name: Cache cargo uses: actions/cache@v5 with: path: | ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ ~/.napi-rs .cargo-cache target/ key: ${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }} # don't use .venv python in CI - run: rm .cargo/config.toml - uses: mlugg/setup-zig@v2 if: ${{ contains(matrix.settings.target, 'musl') }} with: version: 0.14.1 - name: Install cargo-zigbuild uses: taiki-e/install-action@v2 if: ${{ contains(matrix.settings.target, 'musl') }} with: tool: cargo-zigbuild - name: Setup toolchain run: ${{ matrix.settings.setup }} if: ${{ matrix.settings.setup }} shell: bash - name: Install dependencies run: npm install working-directory: crates/monty-js - name: Build run: ${{ matrix.settings.build }} shell: bash working-directory: crates/monty-js - name: Upload artifact uses: actions/upload-artifact@v6 with: name: js-bindings-${{ matrix.settings.target }} path: | crates/monty-js/monty.*.node crates/monty-js/monty.*.wasm if-no-files-found: error # need to upload the .js files generated by napi; they are identical for whatever target # so might as well upload from the linux job - if: ${{ matrix.settings.target == 'x86_64-unknown-linux-gnu' }} name: Upload artifact uses: actions/upload-artifact@v6 with: name: js-stubs path: | crates/monty-js/browser.js crates/monty-js/index.js crates/monty-js/index.d.ts crates/monty-js/wrapper.js crates/monty-js/wrapper.d.ts crates/monty-js/monty.wasi.cjs crates/monty-js/monty.wasi-browser.js crates/monty-js/wasi-worker.mjs crates/monty-js/wasi-worker-browser.mjs if-no-files-found: error env: MACOSX_DEPLOYMENT_TARGET: "10.13" CARGO_INCREMENTAL: "1" test-js-macOS-windows-binding: name: Test JS bindings on ${{ matrix.settings.target }} - node@${{ matrix.node }} needs: - build-js strategy: fail-fast: false matrix: settings: - host: windows-latest target: x86_64-pc-windows-msvc architecture: x64 - host: macos-latest target: aarch64-apple-darwin architecture: arm64 - host: macos-latest target: x86_64-apple-darwin architecture: x64 node: - "20" - "22" runs-on: ${{ matrix.settings.host }} steps: - uses: actions/checkout@v6 - name: Setup node uses: actions/setup-node@v6 with: node-version: ${{ matrix.node }} cache: npm cache-dependency-path: crates/monty-js/package-lock.json architecture: ${{ matrix.settings.architecture }} - name: Install dependencies run: npm install working-directory: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-bindings-${{ matrix.settings.target }} path: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-stubs path: crates/monty-js - name: List packages run: ls -R . shell: bash working-directory: crates/monty-js - name: Test bindings run: npm test working-directory: crates/monty-js test-js-linux-binding: name: Test JS ${{ matrix.target }} - node@${{ matrix.node }} needs: - build-js strategy: fail-fast: false matrix: target: - x86_64-unknown-linux-gnu - aarch64-unknown-linux-gnu node: - "20" - "22" runs-on: ${{ contains(matrix.target, 'aarch64') && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v6 - name: Setup node uses: actions/setup-node@v6 with: node-version: ${{ matrix.node }} cache: npm cache-dependency-path: crates/monty-js/package-lock.json - name: Output docker params id: docker run: | node -e " if ('${{ matrix.target }}'.startsWith('aarch64')) { console.log('PLATFORM=linux/arm64') } else if ('${{ matrix.target }}'.startsWith('armv7')) { console.log('PLATFORM=linux/arm/v7') } else { console.log('PLATFORM=linux/amd64') } " >> $GITHUB_OUTPUT node -e " if ('${{ matrix.target }}'.endsWith('-musl')) { console.log('IMAGE=node:${{ matrix.node }}-alpine') } else { console.log('IMAGE=node:${{ matrix.node }}-slim') } " >> $GITHUB_OUTPUT - name: Install dependencies run: npm install working-directory: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-bindings-${{ matrix.target }} path: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-stubs path: crates/monty-js - name: List packages run: ls -R . shell: bash working-directory: crates/monty-js - name: Set up QEMU uses: docker/setup-qemu-action@v3 if: ${{ contains(matrix.target, 'armv7') }} with: platforms: all - run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes if: ${{ contains(matrix.target, 'armv7') }} - name: Test bindings run: > docker run --rm -v ${{ github.workspace }}:${{ github.workspace }} -w ${{ github.workspace }}/crates/monty-js --platform ${{ steps.docker.outputs.PLATFORM }} ${{ steps.docker.outputs.IMAGE }} npm test test-js-wasi: name: Test WASI target needs: - build-js runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Setup node uses: actions/setup-node@v6 with: node-version: 24 cache: npm cache-dependency-path: crates/monty-js/package-lock.json - name: Install dependencies run: npm install --cpu wasm32 working-directory: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-bindings-wasm32-wasip1-threads path: crates/monty-js - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-stubs path: crates/monty-js - name: List packages run: ls -R . shell: bash working-directory: crates/monty-js - name: Test bindings run: npm test env: NAPI_RS_FORCE_WASI: 1 working-directory: crates/monty-js release-js: name: Release to NPM runs-on: ubuntu-latest needs: - check - inspect-python-assets - test-js-macOS-windows-binding - test-js-linux-binding - test-js-wasi if: success() && (startsWith(github.ref, 'refs/tags/') || (github.event_name == 'workflow_dispatch' && inputs.run_release)) permissions: contents: write id-token: write steps: - uses: actions/checkout@v6 - name: Setup node uses: actions/setup-node@v6 with: node-version: 24 cache: npm cache-dependency-path: crates/monty-js/package-lock.json registry-url: "https://registry.npmjs.org" - name: Install dependencies run: npm install working-directory: crates/monty-js - name: create npm dirs run: npm run create-npm-dirs working-directory: crates/monty-js - name: Download all artifacts uses: actions/download-artifact@v7 with: pattern: js-bindings-* path: crates/monty-js/artifacts merge-multiple: true - name: Download artifacts uses: actions/download-artifact@v7 with: name: js-stubs path: crates/monty-js - name: Move artifacts run: npm run artifacts working-directory: crates/monty-js - name: List packages run: ls -R ./npm shell: bash working-directory: crates/monty-js - name: Publish run: | if [[ "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then echo "Publishing stable release" npm publish --provenance --access public else echo "Publishing pre-release with 'next' tag" npm publish --provenance --tag next --access public fi working-directory: crates/monty-js ================================================ FILE: .github/workflows/codspeed.yml ================================================ name: CodSpeed on: push: branches: - main pull_request: workflow_dispatch: permissions: contents: read id-token: write jobs: benchmarks: name: Run benchmarks runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - uses: actions/setup-python@v6 with: python-version: "3.14" - name: Remove .cargo config to use system Python run: rm .cargo/config.toml - name: Install cargo-codspeed run: cargo install cargo-codspeed - name: Build benchmarks run: cargo codspeed build -p monty --bench main - name: Run benchmarks uses: CodSpeedHQ/action@v4 with: mode: simulation run: cargo codspeed run -p monty --bench main ================================================ FILE: .github/workflows/init-npm-packages.yml ================================================ name: Initialize NPM Platform Packages # Creates placeholder packages on npm for any new napi platform targets. # npm requires packages to exist before OIDC/provenance publishing can work, # so this must be run once per new target before the first release that includes it. # # After running, configure Trusted Publishing on npmjs.com for each new package: # Package Settings > Trusted Publisher > GitHub Actions > pydantic/monty on: workflow_dispatch: {} jobs: init-packages: name: Initialize missing platform packages runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-node@v6 with: node-version: 24 registry-url: "https://registry.npmjs.org" - name: Install dependencies run: npm install working-directory: crates/monty-js - name: Create npm dirs run: npm run create-npm-dirs working-directory: crates/monty-js - name: Check and create missing platform packages working-directory: crates/monty-js env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} run: | created=() for pkg_dir in npm/*/; do name=$(node -p "require('./${pkg_dir}package.json').name") if npm view "$name" version > /dev/null 2>&1; then echo "✓ $name already exists" continue fi echo "Creating placeholder for $name..." tmp=$(mktemp -d) node -e " const pkg = require('./${pkg_dir}package.json'); const placeholder = { name: pkg.name, version: '0.0.0', description: 'Placeholder for OIDC trusted publishing setup — this version contains no code.', license: pkg.license, repository: pkg.repository, publishConfig: pkg.publishConfig, os: pkg.os, cpu: pkg.cpu, }; if (pkg.libc) placeholder.libc = pkg.libc; require('fs').writeFileSync('${tmp}/package.json', JSON.stringify(placeholder, null, 2)); " (cd "$tmp" && npm publish --access public) rm -rf "$tmp" created+=("$name") echo "✓ Created $name@0.0.0" done echo "" if [ ${#created[@]} -eq 0 ]; then echo "All platform packages already exist on npm." else echo "Created ${#created[@]} new package(s):" for name in "${created[@]}"; do echo " - $name" echo " Configure Trusted Publishing: https://www.npmjs.com/package/${name}/access" done echo "" echo "⚠ You must configure Trusted Publishing for each new package on npmjs.com" echo " before the next release will succeed with OIDC/provenance." fi ================================================ FILE: .gitignore ================================================ *.py[cod] *.so /.idea/ /target/ /env*/ /*.py /TODO.md /monty /.claude/settings.local.json /scratch/ /worktrees/ /flame/ /playground/ /type-sizes.txt /sandbox/ /starlark-rust/ /.playwright-mcp/ /crates/fuzz/artifacts/ /crates/fuzz/corpus/ /.worktrees/ /.zed/ ================================================ FILE: .pre-commit-config.yaml ================================================ fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - id: no-commit-to-branch # prevent direct commits to the `main` branch - id: check-yaml - id: check-toml - id: end-of-file-fixer - id: trailing-whitespace - id: check-added-large-files - repo: https://github.com/codespell-project/codespell # Configuration for codespell is in pyproject.toml rev: v2.3.0 hooks: - id: codespell additional_dependencies: - tomli - repo: local hooks: - id: format-rs name: Format Rust entry: make format-rs types: [rust] language: system pass_filenames: false - id: lint-rs name: Lint Rust entry: make lint-rs types: [rust] language: system pass_filenames: false - id: format-lint-py name: Format & Lint Python entry: make format-lint-py types: [python] language: system pass_filenames: false - id: format-lint-js name: Format & Lint TypeScript entry: make format-js lint-js types: [ts] language: system pass_filenames: false ================================================ FILE: .python-version ================================================ 3.14 ================================================ FILE: .rustfmt.toml ================================================ max_width = 120 imports_granularity = "Crate" group_imports = "StdExternalCrate" reorder_imports = true ================================================ FILE: CLAUDE.md ================================================ # CLAUDE.md This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. ## Project Overview Monty is a sandboxed Python interpreter written in Rust. It parses Python code using Ruff's `ruff_python_parser` but implements its own runtime execution model for safety and performance. This is a work-in-progress project that currently supports a subset of Python features. Project goals: - **Safety**: Execute untrusted Python code safely without FFI or C dependencies, instead sandbox will call back to host to run foreign/external functions. - **Performance**: Fast execution through compile-time optimizations and efficient memory layout - **Simplicity**: Clean, understandable implementation focused on a Python subset - **Snapshotting and iteration**: Plan is to allow code to be iteratively executed and snapshotted at each function call - Targets the latest stable version of Python, currently Python 3.14 ## Important Security Notice It's ABSOLUTELY CRITICAL that there's no way for code run in a Monty sandbox to access the host filesystem, or environment or to in any way "escape the sandbox". **Monty will be used to run untrusted, potentially malicious code.** Make sure there's no risk of this, either in the implementation, or in the public API that makes it more like that a developer using the pydantic_monty package might make such a mistake. Possible security risks to consider: * filesystem access * path traversal to access files the users did not intend to expose to the monty sandbox * memory errors - use of unsafe memory operations * excessive memory usage - evading monty's resource limits * infinite loops - evading monty's resource limits * network access - sockets, HTTP requests * subprocess/shell execution - os.system, subprocess, etc. * import system abuse - importing modules with side effects or accessing `__import__` * external function/callback misuse - callbacks run in host environment * deserialization attacks - loading untrusted serialized Monty/snapshot data * regex/string DoS - catastrophic backtracking or operations bypassing limits * information leakage via timing or error messages * Python/Javascript/Rust APIs that accidentally allow developers to expose their host to monty code ## Bytecode VM Architecture Monty is implemented as a bytecode VM, same as CPython. ### Reference Count Safety All types that implement `DropWithHeap` hold heap references and **must** be cleaned up correctly on every code path — not just the happy path, but also early returns via `?`, `continue`, conditional branches, etc. A missed `drop_with_heap` on any branch leaks reference counts. There are three mechanisms for ensuring this, listed in order of preference: #### 1. `defer_drop!` macro (preferred) The simplest and safest approach. Use `defer_drop!` (or `defer_drop_mut!` when mutable access to the value is needed) to bind a value into a guard that automatically drops it when scope exits — whether that's normal completion, early return via `?`, `continue`, or any other branch. The macro rebinds the value and heap variables as borrows from the guard, so you keep using them by name as before: ```rust let value = self.pop(); defer_drop!(value, heap); // value is now &Value, heap is now &mut Heap let result = value.py_repr(heap)?; // guard handles cleanup on all paths ``` Beyond safety, `defer_drop!` is often much more concise than inserting `drop_with_heap` calls in every branch of complex control flow. `defer_drop!` gives you an immutable reference to the value. Use `defer_drop_mut!` when you need a mutable reference (e.g. iterators, values you may swap): ```rust let iter = vm.heap.get_iter(iter_ref); defer_drop_mut!(iter, vm); while let Some(item) = iter.for_next(vm)? { ... } ``` **Limitation:** because the macro rebinds the heap, it cannot be used inside `&mut self` methods where `self` owns the heap — first assign `let this = self;` and pass `this` instead. #### 2. `HeapGuard` (when you need control over the value's fate) Use `HeapGuard` directly when `defer_drop!` is too restrictive — specifically when you need to conditionally extract the value instead of dropping it. `HeapGuard` provides `into_inner()` and `into_parts()` to reclaim ownership, while its `Drop` impl still guarantees cleanup on all other paths: ```rust // HeapGuard needed here because on success we push lhs back onto the stack // instead of dropping it let mut lhs_guard = HeapGuard::new(self.pop(), self); let (lhs, this) = lhs_guard.as_parts_mut(); if lhs.py_iadd(rhs, this.heap)? { let (lhs, this) = lhs_guard.into_parts(); // reclaim lhs, don't drop this.push(lhs); return Ok(()); } // otherwise lhs_guard drops lhs automatically at scope exit ``` #### 3. Manual `drop_with_heap` (for trivially simple cases) For very simple cases with a single linear code path and no branching between acquiring and releasing the value, a direct `drop_with_heap` call is fine: ```rust let iter = self.pop(); iter.drop_with_heap(&mut self.heap); // single path, no branching ``` Avoid manual `drop_with_heap` whenever there are multiple code paths (branching, `?`, `continue`, early returns) between acquiring and releasing the value — that is exactly where `defer_drop!` or `HeapGuard` prevent leaks by guaranteeing cleanup on every path. ## Dev Commands DO NOT run `cargo build` or `cargo run`, it will fail because of issues with Python bindings. Instead use the following `make` commands: ```bash make install-py Install python dependencies make install-js Install JS package dependencies make install Install the package, dependencies, and pre-commit for local development make dev-py Install the python package for development make dev-js Build the JS package (debug) make lint-js Lint JS code with oxlint make test-js Build and test the JS package make dev-py-release Install the python package for development with a release build make dev-js-release Build the JS package (release) make dev-py-pgo Install the python package for development with profile-guided optimization make format-rs Format Rust code with fmt make format-py Format Python code - WARNING be careful about this command as it may modify code and break tests silently! make format-js Format JS code with prettier make format Format Rust code, this does not format Python code as we have to be careful with that make lint-rs Lint Rust code with clippy and import checks make clippy-fix Fix Rust code with clippy make lint-py Lint Python code with ruff make lint Lint the code with ruff and clippy make format-lint-rs Format and lint Rust code with fmt and clippy make format-lint-py Format and lint Python code with ruff make test-no-features Run rust tests without any features enabled make test-ref-count-panic Run rust tests with ref-count-panic enabled make test-ref-count-return Run rust tests with ref-count-return enabled make test-cases Run tests cases only make test-type-checking Run rust tests on monty_type_checking make pytest Run Python tests with pytest make test-py Build the python package (debug profile) and run tests make test-docs Test docs examples only make test Run rust tests make testcov Run Rust tests with coverage, print table, and generate HTML report make complete-tests Fill in incomplete test expectations using CPython make update-typeshed Update vendored typeshed from upstream make bench Run benchmarks make dev-bench Run benchmarks to test with dev profile make profile Profile the code with pprof and generate flamegraphs make type-sizes Write type sizes for the crate to ./type-sizes.txt (requires nightly and top-type-sizes) make main run linting and the most important tests make help Show this help (usage: make help) ``` Use the /python-playground skill to check cpython and monty behavior. ## Releasing See [RELEASING.md](RELEASING.md) for the release process. ## Exception It's important that exceptions raised/returned by this library match those raised by Python. Wherever you see an Exception with a repeated message, create a dedicated method to create that exception `src/exceptions.rs`. When writing exception messages, always check `src/exceptions.rs` for existing methods to generate that message. ## Code style Avoid local imports, unless there's a very good reason, all imports should be at the top of the file. Avoid `fn my_func(..., param: T)` style function definitions, STRONGLY prefer `fn my_func(param: impl MyTrait)` syntax since changes are more localized. This includes in trait definitions and implementations. Also avoid using functions and structs via a path like `std::borrow::Cow::Owned(...)`, instead import `Cow` globally with `use std::borrow::Cow;`. NEVER use `allow()` in rust lint markers, instead use `expect()` so any unnecessary markers are removed. E.g. use ```rs #[expect(clippy::too_many_arguments)] ``` NOT! ```rs #[allow(clippy::too_many_arguments)] ``` ### Docstrings and comments. IMPORTANT: every struct, enum and function should be a comprehensive but concise docstring to explain what it does and why and any considerations or potential foot-guns of using that type. The only exception is trait implementation methods where a docstring is not necessary if the method is self-explanatory. It's important that docstrings cover the motivation and primary usage patterns of code, not just the simple "what it does". Similarly, you should add comments to code, especially if the code is complex or esoteric. Only add examples to docstrings of public functions and structs, examples should be <=8 lines, if the example is more, remove it. If you add example code to docstrings, it must be run in tests. NEVER add examples that are ignored. If you encounter a comment or docstring that's out of date - you MUST update it to be correct. Similarly, if you encounter code that has no docstrings or comments, or they are minimal, you should add more detail. NOTE: COMMENTS AND DOCSTRINGS ARE EXTREMELY IMPORTANT TO THE LONG TERM HEALTH OF THE PROJECT. ## Tests Do **NOT** write tests within modules unless explicitly prompted to do so. Tests should live in the relevant `tests/` directory. Commands: ```bash # Build the project cargo build # Run tests (this is the best way to run all tests as it enables the ref-count-panic feature) make test-ref-count-panic # Run crates/monty/test_cases tests only make test-cases # Run a specific test cargo test -p monty --test datatest_runner --features ref-count-panic str__ops # Run the interpreter on a Python file cargo run -- ``` See more test commands above. ### Experimentation and Playground Read `Makefile` for other useful commands. DO NOT run `cargo run --`, it will fail because of issues with Python bindings. You can use the `./playground` directory (excluded from git, create with `mkdir -p playground`) to write files when you want to experiment by running a file with cpython or monty, e.g.: * `python3 playground/test.py` to run the file with cpython * `cargo run -- playground/test.py` to run the file with monty DO NOT use `/tmp` or pipe code to the interpreter as it requires extra permissions and can slow you down! More details in the "python-playground" skill. ### Test File Structure Most functionality should be tested via python files in the `crates/monty/test_cases` directory. **DO NOT create many small test files.** This would be unmaintainable. ALWAYS consolidate related tests into single files using multiple `assert` statements. Follow `crates/monty/test_cases/fstring__all.py` as the gold standard pattern: ```python # === Section name === # brief comment if needed assert condition, 'descriptive message' assert another_condition, 'another descriptive message' # === Next section === x = setup_value assert x == expected, 'test description' ``` Each `assert` should have a descriptive message. Do NOT Write tests like `assert 'thing' in msg` it's lazy and inexact unless explicitly told to do so, instead write tests like `assert msg == 'expected message'` to ensure clarity and accuracy and most importantly, to identify differences between Monty and CPython. ### When to Create Separate Test Files Only create a separate test file when you MUST use one of these special expectation formats: - `"""TRACEBACK:..."""` - Test expects an exception with full traceback (PREFERRED for error tests) - `# Raise=Exception('message')` - Test expects an exception without traceback verification - NOT RECOMMENDED, use `TRACEBACK` instead - `# ref-counts={...}` - Test checks reference counts (special mode) - you're writing tests for a different behavior or section of the language For everything else, **add asserts to an existing test file** or create ONE consolidated file for the feature. ### File Naming Name files by feature, not by micro-variant: - ✅ `str__ops.py` - all string operations (add, iadd, len, etc.) - ✅ `list__methods.py` - all list method tests - ❌ `str__add_basic.py`, `str__add_empty.py`, `str__add_multiple.py` - TOO GRANULAR ### Expectation Formats (use sparingly) Only use these when `assert` won't work (on last line of file): - `# Return=value` - Check `repr()` output (prefer assert instead) - `# Return.str=value` - Check `str()` output (prefer assert instead) - `# Return.type=typename` - Check `type()` output (prefer assert instead) - `# Raise=Exception('message')` - Expect exception without traceback (REQUIRES separate file) - `"""TRACEBACK:..."""` - Expect exception with full traceback (PREFERRED over `# Raise=`) - `# ref-counts={...}` - Check reference counts (REQUIRES separate file) - No expectation comment - Assert-based test (PREFERRED) Do NOT use `# Return=` when you could use `assert` instead ### Traceback Tests (Preferred for Errors) For tests that expect exceptions, **prefer traceback tests over `# Raise=`** because they verify: - The full traceback with all stack frames - Correct line numbers for each frame - Function names in the traceback - The caret markers (`~`) pointing to the error location Traceback test format - add a triple-quoted string at the end of the file starting with `\nTRACEBACK:`: ```python def foo(): raise ValueError('oops') foo() """ TRACEBACK: Traceback (most recent call last): File "my_test.py", line 4, in foo() ~~~~~ File "my_test.py", line 2, in foo raise ValueError('oops') ValueError: oops """ ``` Key points: - The filename in the traceback should match the test file name (just the basename, not the full path) - Use `~` for caret markers (the test runner normalizes CPython's `^` to `~`) - The `` frame name is used for top-level code - Tests run against both Monty and CPython, so the traceback must match both Only use `# Raise=` when you only care about the exception type/message and not the traceback. ### Python fixture markers You may mark python files with: * `# call-external` to support calling external functions * `# run-async` to support running async code NEVER MARK TESTS AS XFAIL UNDER ANY CIRCUMSTANCES!!! INSTEAD FIX THE BEHAVIOR SO THAT THE TEST PASSES. Never mark tests as: - `# xfail=cpython` - Test is required to fail on CPython - `# xfail=monty` - Test is required to fail on Monty NEVER MARK TESTS AS XFAIL UNDER ANY CIRCUMSTANCES!!! INSTEAD FIX THE BEHAVIOR SO THAT THE TEST PASSES. All these markers must be at the start of comment lines to be recognized. ### Other Notes - Prefer single quotes for strings in Python tests - Do NOT add `# noqa` or `# pyright: ignore` comments to test code, instead add the failing code to `pyproject.toml` - The ONLY exception is `await` expressions outside of async functions, where you should add `# pyright: ignore` - Run `make lint-py` after adding tests - Use `make complete-tests` to fill in blank expectations - Tests run via `datatest-stable` harness in `tests/datatest_runner.rs`, use `make test-cases` to run them ## Python Package (`pydantic-monty`) The Python package provides Python bindings for the Monty interpreter, located in `crates/monty-python/`. ### Structure - `crates/monty-python/src/` - Rust source for PyO3 bindings - `crates/monty-python/python/pydantic_monty/_monty.pyi` - Type stubs for the Python module - `crates/monty-python/tests/` - Python tests using pytest ### Building and Testing Dependencies needed for python testing are installed in `crates/monty-python/pyproject.toml`. To install these dependencies, use `uv sync --all-packages --only-dev`. ```bash # Build the Python package for development (required before running tests) make dev-py # Run Python tests make test-py # Or run pytest directly (after dev-py) uv run pytest # Run a specific test file uv run pytest crates/monty-python/tests/test_basic.py # Run a specific test uv run pytest crates/monty-python/tests/test_basic.py::test_simple_expression ``` ### Python Test Guidelines Check and follow the style of other python tests. Make sure you put tests in the correct file. **DO NOT use python/pytest tests for `monty` core functionality!** When testing core functionality, add tests to `crates/monty/test_cases/` or `crates/monty/tests/`. Only use python/pytest tests for `pydantic_monty` functionality testing. **NEVER use class-based tests.** All tests should be simple functions. Use `@pytest.mark.parametrize` whenever testing multiple similar cases. Use `snapshot` from `inline-snapshot` for all test asserts. NEVER do the lazy `assert '...' in ...` instead always do `assert value == snapshot()`, then run the test and inline-snapshot will fill in the missing value in the `snapshot()` call. Use `pytest.raises` for expected exceptions, like this ```py with pytest.raises(ValueError) as exc_info: m.run(print_callback=callback) assert exc_info.value.args[0] == snapshot('stopped at 3') ``` ## Reference Counting Heap-allocated values (`Value::Ref`) use manual reference counting. Key rules: - **Cloning**: Use `clone_with_heap(heap)` which increments refcounts for `Ref` variants. - **Dropping**: Call `drop_with_heap(heap)` when discarding an `Value` that may be a `Ref`. Container types (`List`, `Tuple`, `Dict`) also have `clone_with_heap()` methods. **Resource limits**: When resource limits (allocations, memory, time) are exceeded, execution terminates with a `ResourceError`. No guarantees are made about the state of the heap or reference counts after a resource limit is exceeded. The heap may contain orphaned objects with incorrect refcounts. This is acceptable because resource exhaustion is a terminal error - the execution context should be discarded. ## NOTES ALWAYS consider code quality when adding new code, if functions are getting too complex or code is duplicated, move relevant logic to a new file. Make sure functions are added in the most logical place, e.g. as methods on a struct where appropriate. The code should follow the "newspaper" style where public and primary functions are at the top of the file, followed by private functions and utilities. ALWAYS put utility, private functions and "sub functions" underneath the function they're used in. It is important to the long term health of the project and maintainability of the codebase that code is well structured and organized, this is very important. ALWAYS run `make format-rs` and `make lint-rs` after making changes to rust code and fix all suggestions to maintain code quality. ALWAYS run `make lint-py` after making changes to python code and fix all suggestions to maintain code quality. ALWAYS update this file when it is out of date. NEVER add imports anywhere except at the top of the file, this applies to both python and rust. NEVER write `unsafe` code, if you think you need to write unsafe code, explicitly ask the user or leave a `todo!()` with a suggestion and explanation. ## JavaScript Package (`monty-js`) The JavaScript package provides Node.js bindings for the Monty interpreter via napi-rs, located in `crates/monty-js/`. ### Structure - `crates/monty-js/src/lib.rs` - Rust source for napi-rs bindings - `crates/monty-js/index.js` - Auto-generated JS loader that detects platform and loads the appropriate native binding - `crates/monty-js/index.d.ts` - TypeScript type declarations (auto-generated) - `crates/monty-js/__test__/` - Tests using ava ### Current API The package exposes: - `Monty` class - Parse and execute Python code with inputs, external functions, and resource limits - `MontySnapshot` / `MontyComplete` - For iterative execution with `start()` / `resume()` - `runMontyAsync()` - Helper for async external functions - `MontySyntaxError` / `MontyRuntimeError` / `MontyTypingError` - Error classes ```ts import { Monty, MontySnapshot, runMontyAsync } from '@pydantic/monty' // Basic execution const m = new Monty('x + 1', { inputs: ['x'] }) const result = m.run({ inputs: { x: 10 } }) // returns 11 // Iterative execution for external functions const m2 = new Monty('fetch(url)', { inputs: ['url'], externalFunctions: ['fetch'] }) let progress = m2.start({ inputs: { url: 'https://...' } }) if (progress instanceof MontySnapshot) { progress = progress.resume({ returnValue: 'response data' }) } ``` See `crates/monty-js/README.md` for full API documentation. ### Building and Testing ```bash # Install dependencies make install-js # Build native binding (debug) make build-js # Build native binding (release) make build-js-release # Run tests make test-js # Format JavaScript code make format-js # Lint JavaScript code make lint-js ``` Or run directly in `crates/monty-js`: ```bash npm install npm run build # release build npm run build:debug # debug build npm test ``` ### JavaScript Test Guidelines - Tests use [ava](https://github.com/avajs/ava) and live in `crates/monty-js/__test__/` - Tests are written in TypeScript - Follow the existing test style in the `__test__/` directory ================================================ FILE: Cargo.toml ================================================ [workspace] resolver = "2" members = [ "crates/monty", "crates/monty-cli", "crates/monty-python", "crates/monty-js", "crates/monty-type-checking", "crates/monty-typeshed", "crates/fuzz" ] default-members = ["crates/monty-cli"] [workspace.package] edition = "2024" version = "0.0.8" rust-version = "1.90" license = "MIT" authors = ["Samuel Colvin "] description = "A sandboxed, snapshotable Python interpreter written in Rust." categories = ["compilers", "emulators", "development-tools"] keywords = ["python", "interpreter", "sandbox", "embedded"] homepage = "https://github.com/pydantic/monty/" repository = "https://github.com/pydantic/monty/" documentation = "https://github.com/pydantic/monty/" [profile.release] lto = "fat" codegen-units = 1 strip = true [profile.profiling] inherits = "release" debug = true strip = false lto = false [workspace.dependencies] # ruff, ty and related crates ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", package = "ruff_python_parser", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", package = "ruff_python_ast", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", package = "ruff_text_size", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } ruff_db = { git = "https://github.com/astral-sh/ruff.git", package = "ruff_db", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d", features = ["serde"] } ty_python_semantic = { git = "https://github.com/astral-sh/ruff.git", package = "ty_python_semantic", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } ty_module_resolver = { git = "https://github.com/astral-sh/ruff.git", package = "ty_module_resolver", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } ty_vendored = { git = "https://github.com/astral-sh/ruff.git", package = "ty_vendored", rev = "6ded4bed1651e30b34dd04cdaa50c763036abb0d" } # salsa version matches current main of ruff salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "53421c2fff87426fa0bb51cab06632b87646de13", default-features = false, features = [ "compact_str", "macros", "salsa_unstable", "inventory", ] } # bigint and related crates num-bigint = { version = "0.4", features = ["serde"] } num-traits = "0.2" num-integer = "0.1" # others indexmap = { version = "2.9", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } postcard = { version = "1.1", features = ["alloc"] } sha2 = "0.10" pretty_assertions = "1.4" [workspace.lints.rust] # codspeed cfg is set by codspeed-criterion-compat when running in CodSpeed environment unexpected_cfgs = { level = "warn", check-cfg = ['cfg(codspeed)'] } [workspace.lints.rustdoc] invalid_codeblock_attributes = "allow" [workspace.lints.clippy] dbg_macro = "warn" use_self = "warn" allow_attributes = "warn" undocumented_unsafe_blocks = "warn" redundant_clone = "warn" # in general we lint against the pedantic group, but we will whitelist # certain lints which we don't want to enforce pedantic = { level = "warn", priority = -1 } cast_precision_loss = "allow" doc_markdown = "allow" match_same_arms = "allow" missing_errors_doc = "allow" similar_names = "allow" too_many_lines = "allow" ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) Pydantic Services Inc. 2026 to present Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ .DEFAULT_GOAL := main .PHONY: .cargo .cargo: ## Check that cargo is installed @cargo --version || echo 'Please install cargo: https://github.com/rust-lang/cargo' .PHONY: .uv .uv: ## Check that uv is installed @uv --version || echo 'Please install uv: https://docs.astral.sh/uv/getting-started/installation/' .PHONY: .pre-commit .pre-commit: ## Check that pre-commit is installed @pre-commit -V || echo 'Please install pre-commit: https://pre-commit.com/' .PHONY: install-py install-py: .uv ## Install python dependencies # --only-dev to avoid building the python package, use make dev-py for that uv sync --all-packages --only-dev .PHONY: install-js install-js: ## Install JS package dependencies cd crates/monty-js && npm install .PHONY: install install: .cargo .pre-commit install-py install-js ## Install the package, dependencies, and pre-commit for local development cargo check --workspace pre-commit install --install-hooks .PHONY: dev-py dev-py: ## Install the python package for development uv run maturin develop --uv -m crates/monty-python/Cargo.toml .PHONY: dev-js dev-js: ## Build the JS package (debug) cd crates/monty-js && npm run build:debug .PHONY: lint-js lint-js: install-js ## Lint JS code with oxlint cd crates/monty-js && npm run lint .PHONY: test-js test-js: dev-js ## Build and test the JS package cd crates/monty-js && npm test .PHONY: smoke-test-js smoke-test-js: ## Run smoke test for JS package (builds, packs, and tests installation) cd crates/monty-js && npm run smoke-test .PHONY: dev-py-release dev-py-release: ## Install the python package for development with a release build uv run maturin develop --uv -m crates/monty-python/Cargo.toml --release .PHONY: dev-js-release dev-js-release: ## Build the JS package (release) cd crates/monty-js && npm run build .PHONY: dev-py-pgo dev-py-pgo: ## Install the python package for development with profile-guided optimization $(eval PROFDATA := $(shell mktemp -d)) RUSTFLAGS='-Cprofile-generate=$(PROFDATA)' uv run maturin develop --uv -m crates/monty-python/Cargo.toml --release uv run --package pydantic-monty --only-dev pytest crates/monty-python/tests -k "not test_parallel_exec" $(eval LLVM_PROFDATA := $(shell rustup run stable bash -c 'echo $$RUSTUP_HOME/toolchains/$$RUSTUP_TOOLCHAIN/lib/rustlib/$$(rustc -Vv | grep host | cut -d " " -f 2)/bin/llvm-profdata')) $(LLVM_PROFDATA) merge -o $(PROFDATA)/merged.profdata $(PROFDATA) RUSTFLAGS='-Cprofile-use=$(PROFDATA)/merged.profdata' $(uv-run-no-sync) maturin develop --uv -m crates/monty-python/Cargo.toml --release @rm -rf $(PROFDATA) .PHONY: format-rs format-rs: ## Format Rust code with fmt @cargo +nightly fmt --version cargo +nightly fmt --all .PHONY: format-py format-py: ## Format Python code - WARNING be careful about this command as it may modify code and break tests silently! uv run ruff format uv run ruff check --fix --fix-only .PHONY: format-js format-js: install-js ## Format JS code with prettier cd crates/monty-js && npm run format:prettier .PHONY: format format: format-rs format-py format-js ## Format Rust code, this does not format Python code as we have to be careful with that .PHONY: lint-rs lint-rs: ## Lint Rust code with clippy and import checks @cargo clippy --version cargo clippy --workspace --tests --bench main -- -D warnings cargo clippy --workspace --tests --all-features -- -D warnings uv run scripts/check_imports.py .PHONY: clippy-fix clippy-fix: ## Fix Rust code with clippy cargo clippy --workspace --tests --bench main --all-features --fix --allow-dirty .PHONY: lint-py lint-py: dev-py ## Lint Python code with ruff uv run ruff format --check uv run ruff check uv run basedpyright # mypy-stubtest requires a build of the python package, hence dev-py uv run -m mypy.stubtest pydantic_monty._monty --ignore-disjoint-bases .PHONY: lint lint: lint-rs lint-py ## Lint the code with ruff and clippy .PHONY: format-lint-rs format-lint-rs: format-rs lint-rs ## Format and lint Rust code with fmt and clippy .PHONY: format-lint-py format-lint-py: format-py lint-py ## Format and lint Python code with ruff .PHONY: test-no-features test-no-features: ## Run rust tests without any features enabled cargo test -p monty .PHONY: test-ref-count-panic test-ref-count-panic: ## Run rust tests with ref-count-panic enabled cargo test -p monty --features ref-count-panic .PHONY: test-ref-count-return test-ref-count-return: ## Run rust tests with ref-count-return enabled cargo test -p monty --features ref-count-return .PHONY: test-cases test-cases: ## Run tests cases only cargo test -p monty --test datatest_runner .PHONY: test-type-checking test-type-checking: ## Run rust tests on monty_type_checking cargo test -p monty_type_checking -p monty_typeshed .PHONY: pytest pytest: ## Run Python tests with pytest uv run --package pydantic-monty --only-dev pytest crates/monty-python/tests .PHONY: test-py test-py: dev-py pytest ## Build the python package (debug profile) and run tests .PHONY: test-docs test-docs: dev-py ## Test docs examples only uv run --package pydantic-monty --only-dev pytest crates/monty-python/tests/test_readme_examples.py cargo test --doc -p monty .PHONY: test test: test-ref-count-panic test-ref-count-return test-no-features test-type-checking test-py ## Run rust tests .PHONY: testcov testcov: ## Run Rust tests with coverage, print table, and generate HTML report @cargo llvm-cov --version > /dev/null 2>&1 || echo 'Please run: `cargo install cargo-llvm-cov`' cargo llvm-cov clean --workspace echo "coverage for `make test-no-features`" cargo llvm-cov --no-report -p monty echo "coverage for `make test-ref-count-panic`" cargo llvm-cov --no-report -p monty --features ref-count-panic echo "coverage for `make test-ref-count-return`" cargo llvm-cov --no-report -p monty --features ref-count-return echo "coverage for `make test-type-checking`" cargo llvm-cov --no-report -p monty_type_checking -p monty_typeshed echo "Generating reports:" cargo llvm-cov report --ignore-filename-regex '(tests/|test_cases/|/tests\.rs$$)' cargo llvm-cov report --html --ignore-filename-regex '(tests/|test_cases/|/tests\.rs$$)' @echo "" @echo "HTML report: $${CARGO_TARGET_DIR:-target}/llvm-cov/html/index.html" .PHONY: complete-tests complete-tests: ## Fill in incomplete test expectations using CPython uv run scripts/complete_tests.py .PHONY: update-typeshed update-typeshed: ## Update vendored typeshed from upstream uv run crates/monty-typeshed/update.py uv run ruff format uv run ruff check --fix --fix-only --silent .PHONY: bench bench: ## Run benchmarks cargo bench -p monty --bench main .PHONY: dev-bench dev-bench: ## Run benchmarks to test with dev profile cargo bench --profile dev -p monty --bench main -- --test .PHONY: profile profile: ## Profile the code with pprof and generate flamegraphs cargo bench -p monty --bench main --profile profiling -- --profile-time=10 uv run scripts/flamegraph_to_text.py .PHONY: type-sizes type-sizes: ## Write type sizes for the crate to ./type-sizes.txt (requires nightly and top-type-sizes) RUSTFLAGS="-Zprint-type-sizes" cargo +nightly build -j1 2>&1 | top-type-sizes -f '^monty.*' > type-sizes.txt @echo "Type sizes written to ./type-sizes.txt" .PHONY: fuzz-string_input_panic fuzz-string_input_panic: ## Run the `string_input_panic` fuzz target cargo +nightly fuzz run --fuzz-dir crates/fuzz string_input_panic .PHONY: fuzz-tokens_input_panic fuzz-tokens_input_panic: ## Run the `tokens_input_panic` fuzz target (structured token input) cargo +nightly fuzz run --fuzz-dir crates/fuzz tokens_input_panic .PHONY: main main: lint test-ref-count-panic test-py ## run linting and the most important tests # (must stay last!) .PHONY: help help: ## Show this help (usage: make help) @echo "Usage: make [recipe]" @echo "Recipes:" @awk '/^[a-zA-Z0-9_-]+:.*?##/ { \ helpMessage = match($$0, /## (.*)/); \ if (helpMessage) { \ recipe = $$1; \ sub(/:/, "", recipe); \ printf " \033[36mmake %-20s\033[0m %s\n", recipe, substr($$0, RSTART + 3, RLENGTH); \ } \ }' $(MAKEFILE_LIST) ================================================ FILE: README.md ================================================

Monty

A minimal, secure Python interpreter written in Rust for use by AI.

CI Codspeed Coverage PyPI versions license Join Slack
--- **Experimental** - This project is still in development, and not ready for the prime time. A minimal, secure Python interpreter written in Rust for use by AI. Monty avoids the cost, latency, complexity and general faff of using a full container based sandbox for running LLM generated code. Instead, it lets you safely run Python code written by an LLM embedded in your agent, with startup times measured in single digit microseconds not hundreds of milliseconds. What Monty **can** do: - Run a reasonable subset of Python code - enough for your agent to express what it wants to do - Completely block access to the host environment: filesystem, env variables and network access are all implemented via external function calls the developer can control - Call functions on the host - only functions you give it access to - Run typechecking - monty supports full modern python type hints and comes with [ty](https://docs.astral.sh/ty/) included in a single binary to run typechecking - Be snapshotted to bytes at external function calls, meaning you can store the interpreter state in a file or database, and resume later - Startup extremely fast (<1μs to go from code to execution result), and has runtime performance that is similar to CPython (generally between 5x faster and 5x slower) - Be called from Rust, Python, or Javascript - because Monty has no dependencies on cpython, you can use it anywhere you can run Rust - Control resource usage - Monty can track memory usage, allocations, stack depth, and execution time and cancel execution if it exceeds preset limits - Collect stdout and stderr and return it to the caller - Run async or sync code on the host via async or sync code on the host - Use a small subset of the standard library: `sys`, `os`, `typing`, `asyncio`, `re`, `datetime` (soon), `dataclasses` (soon), `json` (soon) What Monty **cannot** do: - Use the rest of the standard library - Use third party libraries (like Pydantic), support for external python library is not a goal - define classes (support should come soon) - use match statements (again, support should come soon) --- In short, Monty is extremely limited and designed for **one** use case: **To run code written by agents.** For motivation on why you might want to do this, see: - [Codemode](https://blog.cloudflare.com/code-mode/) from Cloudflare - [Programmatic Tool Calling](https://platform.claude.com/docs/en/agents-and-tools/tool-use/programmatic-tool-calling) from Anthropic - [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-with-mcp) from Anthropic - [Smol Agents](https://github.com/huggingface/smolagents) from Hugging Face In very simple terms, the idea of all the above is that LLMs can work faster, cheaper and more reliably if they're asked to write Python (or Javascript) code, instead of relying on traditional tool calling. Monty makes that possible without the complexity of a sandbox or risk of running code directly on the host. **Note:** Monty will (soon) be used to implement `codemode` in [Pydantic AI](https://github.com/pydantic/pydantic-ai) ## Usage Monty can be called from Python, JavaScript/TypeScript or Rust. ### Python To install: ```bash uv add pydantic-monty ``` (Or `pip install pydantic-monty` for the boomers) Usage: ```python from typing import Any import pydantic_monty code = """ async def agent(prompt: str, messages: Messages): while True: print(f'messages so far: {messages}') output = await call_llm(prompt, messages) if isinstance(output, str): return output messages.extend(output) await agent(prompt, []) """ type_definitions = """ from typing import Any Messages = list[dict[str, Any]] async def call_llm(prompt: str, messages: Messages) -> str | Messages: raise NotImplementedError() prompt: str = '' """ m = pydantic_monty.Monty( code, inputs=['prompt'], script_name='agent.py', type_check=True, type_check_stubs=type_definitions, ) Messages = list[dict[str, Any]] async def call_llm(prompt: str, messages: Messages) -> str | Messages: if len(messages) < 2: return [{'role': 'system', 'content': 'example response'}] else: return f'example output, message count {len(messages)}' async def main(): output = await pydantic_monty.run_monty_async( m, inputs={'prompt': 'testing'}, external_functions={'call_llm': call_llm}, ) print(output) #> example output, message count 2 if __name__ == '__main__': import asyncio asyncio.run(main()) ``` #### Iterative Execution with External Functions Use `start()` and `resume()` to handle external function calls iteratively, giving you control over each call: ```python import pydantic_monty code = """ data = fetch(url) len(data) """ m = pydantic_monty.Monty(code, inputs=['url']) # Start execution - pauses when fetch() is called result = m.start(inputs={'url': 'https://example.com'}) print(type(result)) #> print(result.function_name) # fetch #> fetch print(result.args) #> ('https://example.com',) # Perform the actual fetch, then resume with the result result = result.resume(return_value='hello world') print(type(result)) #> print(result.output) #> 11 ``` #### Serialization Both `Monty` and snapshot types like `FunctionSnapshot` can be serialized to bytes and restored later. This allows caching parsed code or suspending execution across process boundaries: ```python import pydantic_monty # Serialize parsed code to avoid re-parsing m = pydantic_monty.Monty('x + 1', inputs=['x']) data = m.dump() # Later, restore and run m2 = pydantic_monty.Monty.load(data) print(m2.run(inputs={'x': 41})) #> 42 # Serialize execution state mid-flight m = pydantic_monty.Monty('fetch(url)', inputs=['url']) progress = m.start(inputs={'url': 'https://example.com'}) state = progress.dump() # Later, restore and resume (e.g., in a different process) progress2 = pydantic_monty.load_snapshot(state) result = progress2.resume(return_value='response data') print(result.output) #> response data ``` ### Rust ```rust use monty::{MontyRun, MontyObject, NoLimitTracker, PrintWriter}; let code = r#" def fib(n): if n <= 1: return n return fib(n - 1) + fib(n - 2) fib(x) "#; let runner = MontyRun::new(code.to_owned(), "fib.py", vec!["x".to_owned()]).unwrap(); let result = runner.run(vec![MontyObject::Int(10)], NoLimitTracker, PrintWriter::Stdout).unwrap(); assert_eq!(result, MontyObject::Int(55)); ``` #### Serialization `MontyRun` and `RunProgress` can be serialized using the `dump()` and `load()` methods: ```rust use monty::{MontyRun, MontyObject, NoLimitTracker, PrintWriter}; // Serialize parsed code let runner = MontyRun::new("x + 1".to_owned(), "main.py", vec!["x".to_owned()]).unwrap(); let bytes = runner.dump().unwrap(); // Later, restore and run let runner2 = MontyRun::load(&bytes).unwrap(); let result = runner2.run(vec![MontyObject::Int(41)], NoLimitTracker, PrintWriter::Stdout).unwrap(); assert_eq!(result, MontyObject::Int(42)); ``` ## PydanticAI Integration Monty will power code-mode in [Pydantic AI](https://github.com/pydantic/pydantic-ai). Instead of making sequential tool calls, the LLM writes Python code that calls your tools as functions and Monty executes it safely. ```python test="skip" import asyncio import json import logfire from httpx import AsyncClient from pydantic_ai import Agent, RunContext from pydantic_ai.toolsets.code_mode import CodeModeToolset from pydantic_ai.toolsets.function import FunctionToolset from typing_extensions import TypedDict logfire.configure() logfire.instrument_pydantic_ai() class LatLng(TypedDict): lat: float lng: float weather_toolset: FunctionToolset[AsyncClient] = FunctionToolset() @weather_toolset.tool async def get_lat_lng( ctx: RunContext[AsyncClient], location_description: str ) -> LatLng: """Get the latitude and longitude of a location.""" # NOTE: the response here will be random, and is not related to the location description. r = await ctx.deps.get( 'https://demo-endpoints.pydantic.workers.dev/latlng', params={'location': location_description}, ) r.raise_for_status() return json.loads(r.content) @weather_toolset.tool async def get_temp(ctx: RunContext[AsyncClient], lat: float, lng: float) -> float: """Get the temp at a location.""" # NOTE: the responses here will be random, and are not related to the lat and lng. r = await ctx.deps.get( 'https://demo-endpoints.pydantic.workers.dev/number', params={'min': 10, 'max': 30}, ) r.raise_for_status() return float(r.text) @weather_toolset.tool async def get_weather_description( ctx: RunContext[AsyncClient], lat: float, lng: float ) -> str: """Get the weather description at a location.""" # NOTE: the responses here will be random, and are not related to the lat and lng. r = await ctx.deps.get( 'https://demo-endpoints.pydantic.workers.dev/weather', params={'lat': lat, 'lng': lng}, ) r.raise_for_status() return r.text agent = Agent( 'gateway/anthropic:claude-sonnet-4-5', # toolsets=[weather_toolset], toolsets=[CodeModeToolset(weather_toolset)], deps_type=AsyncClient, ) async def main(): async with AsyncClient() as client: await agent.run('Compare the weather of London, Paris, and Tokyo.', deps=client) if __name__ == '__main__': asyncio.run(main()) ``` # Alternatives There are generally two responses when you show people Monty: 1. Oh my god, this solves so many problems, I want it. 2. Why not X? Where X is some alternative technology. Oddly often these responses are combined, suggesting people have not yet found an alternative that works for them, but are incredulous that there's really no good alternative to creating an entire Python implementation from scratch. I'll try to run through the most obvious alternatives, and why there aren't right for what we wanted. NOTE: all these technologies are impressive and have widespread uses, this commentary on their limitations for our use case should not be seen as a criticism. Most of these solutions were not conceived with the goal of providing an LLM sandbox, which is why they're not necessary great at it. | Tech | Language completeness | Security | Start latency | FOSS | Setup complexity | File mounting | Snapshotting | | ------------------ | --------------------- | ------------ | ------------- | ---------- | ---------------- | -------------- | ------------ | | Monty | partial | strict | 0.06ms | free / OSS | easy | easy | easy | | Docker | full | good | 195ms | free / OSS | intermediate | easy | intermediate | | Pyodide | full | poor | 2800ms | free / OSS | intermediate | easy | hard | | starlark-rust | very limited | good | 1.7ms | free / OSS | easy | not available? | impossible? | | WASI / Wasmer | partial, almost full | strict | 66ms | free \* | intermediate | easy | intermediate | | sandboxing service | full | strict | 1033ms | not free | intermediate | hard | intermediate | | YOLO Python | full | non-existent | 0.1ms / 30ms | free / OSS | easy | easy / scary | hard | See [./scripts/startup_performance.py](scripts/startup_performance.py) for the script used to calculate the startup performance numbers. Details on each row below: ### Monty - **Language completeness**: No classes (yet), limited stdlib, no third-party libraries - **Security**: Explicitly controlled filesystem, network, and env access, strict limits on execution time and memory usage - **Start latency**: Starts in microseconds - **Setup complexity**: just `pip install pydantic-monty` or `npm install @pydantic/monty`, ~4.5MB download - **File mounting**: Strictly controlled, see [#85](https://github.com/pydantic/monty/pull/85) - **Snapshotting**: Monty's pause and resume functionality with `dump()` and `load()` makes it trivial to pause, resume and fork execution ### Docker - **Language completeness**: Full CPython with any library - **Security**: Process and filesystem isolation, network policies, but container escapes exist, memory limitation is possible - **Start latency**: Container startup overhead (~195ms measured) - **Setup complexity**: Requires Docker daemon, container images, orchestration, `python:3.14-alpine` is 50MB - docker can't be installed from PyPI - **File mounting**: Volume mounts work well - **Snapshotting**: Possible with durable execution solutions like Temporal, or snapshotting an image and saving it as a Docker image. ### Pyodide - **Language completeness**: Full CPython compiled to WASM, almost all libraries available - **Security**: Relies on browser/WASM sandbox - not designed for server-side isolation, python code can run arbitrary code in the JS runtime, only deno allows isolation, memory limits are hard/impossible to enforce with deno - **Start latency**: WASM runtime loading is slow (~2800ms cold start) - **Setup complexity**: Need to load WASM runtime, handle async initialization, pyodide NPM package is ~12MB, deno is ~50MB - Pyodide can't be called with just PyPI packages - **File mounting**: Virtual filesystem via browser APIs - **Snapshotting**: Possible with durable execution solutions like Temporal presumably, but hard ### starlark-rust See [starlark-rust](https://github.com/facebook/starlark-rust). - **Language completeness**: Configuration language, not Python - no classes, exceptions, async - **Security**: Deterministic and hermetic by design - **Start latency**: runs embedded in the process like Monty, hence impressive startup time - **Setup complexity**: Usable in python via [starlark-pyo3](https://github.com/inducer/starlark-pyo3) - **File mounting**: No file handling by design AFAIK? - **Snapshotting**: Impossible AFAIK? ### WASI / Wasmer Running Python in WebAssembly via [Wasmer](https://wasmer.io/). - **Language completeness**: Full CPython, pure Python external packages work via mounting, external packages with C bindings don't work - **Security**: In principle WebAssembly should provide strong sandboxing guarantees. - **Start latency**: The [wasmer](https://pypi.org/project/wasmer/) python package hasn't been updated for 3 years and I couldn't find docs on calling Python in wasmer from Python, so I called it via subprocess. Start latency was 66ms. - **Setup complexity**: wasmer download is 100mb, the "python/python" package is 50mb. - **FOSS**: I marked this as "free \*" since the cost is zero but not everything seems to be open source. As of 2026-02-10 the [`python/python` wasmer package](https://wasmer.io/python/python) package has no readme, no license, no source link and no indication of how it's built, the recently uploaded versions show size as "0B" although the download is ~50MB - the build process for the Python binary is not clear and transparent. _(If I'm wrong here, please create an issue to correct correct me)_ - **File mounting**: Supported - **Snapshotting**: Supported via journaling ### sandboxing service Services like [Daytona](https://daytona.io), [E2B](https://e2b.dev), [Modal](https://modal.com). There are similar challenges, more setup complexity but lower network latency for setting up your own sandbox setup with k8s. - **Language completeness**: Full CPython with any library - **Security**: Professionally managed container isolation - **Start latency**: Network round-trip and container startup time. I got ~1s cold start time with Daytona EU from London, Daytona advertise sub 90ms latency, presumably that's for an existing container, not clear if it includes network latency - **FOSS**: Pay per execution or compute time, some implementations are open source - **Setup complexity**: API integration, auth tokens - fine for startups but generally a non-start for enterprises - **File mounting**: Upload/download via API calls - **Snapshotting**: Possible with durable execution solutions like Temporal, also the services offer some solutions for this, I think based con docker containers ### YOLO Python Running Python directly via `exec()` (~0.1ms) or subprocess (~30ms). - **Language completeness**: Full CPython with any library - **Security**: None - full filesystem, network, env vars, system commands - **Start latency**: Near-zero for `exec()`, ~30ms for subprocess - **Setup complexity**: None - **File mounting**: Direct filesystem access (that's the problem) - **Snapshotting**: Possible with durable execution solutions like Temporal ================================================ FILE: RELEASING.md ================================================ # Release Process ## 1. Bump Version Update version in both files: ```bash # Edit Cargo.toml - update workspace.package.version # Edit crates/monty-js/package.json - update version # Update Cargo.lock make lint-rs ``` Both `Cargo.toml` and `package.json` should have the same version (e.g., `0.0.2`). ## 2. Commit and Push ```bash git add Cargo.toml Cargo.lock crates/monty-js/package.json git commit -m "Bump version to X.Y.Z" git push ``` ## 3. Create Release via GitHub UI 1. Go to https://github.com/pydantic/monty/releases/new 2. Click "Choose a tag" and type the new tag name (e.g., `v0.0.2`) 3. Select "Create new tag on publish" 4. Set the release title (e.g., `v0.0.2`) 5. Add release notes 6. Click "Publish release" ## 4. CI Handles Publishing Once the tag is pushed, CI will: - Build wheels for all platforms - Publish to PyPI (`pydantic-monty`) - Publish to NPM (`@pydantic/monty`) Monitor the workflow at https://github.com/pydantic/monty/actions ## Pre-release Tags For pre-releases (alpha, beta, rc), use a tag like `v0.0.2-beta.1`: - PyPI: Published normally - NPM: Published with `--tag next` (not `latest`) ================================================ FILE: crates/fuzz/Cargo.toml ================================================ [package] name = "monty-fuzz" publish = false version = { workspace = true } edition = { workspace = true } [package.metadata] cargo-fuzz = true [dependencies] arbitrary = { version = "1", features = ["derive"] } libfuzzer-sys = "0.4" monty = { path = "../monty" } [[bin]] name = "string_input_panic" path = "fuzz_targets/string_input_panic.rs" test = false doc = false bench = false [[bin]] name = "tokens_input_panic" path = "fuzz_targets/tokens_input_panic.rs" test = false doc = false bench = false [lints] workspace = true ================================================ FILE: crates/fuzz/fuzz_targets/string_input_panic.rs ================================================ //! Fuzz target for testing that arbitrary Python code doesn't cause panics or crashes. //! //! This target feeds arbitrary byte sequences to the Monty interpreter and verifies that //! neither parsing nor execution causes the interpreter to panic or crash. Errors (parse //! errors, runtime errors, etc.) are expected and ignored - we only care about panics. //! //! Resource limits are enforced to prevent infinite loops and memory exhaustion. #![no_main] use std::time::Duration; use libfuzzer_sys::fuzz_target; use monty::{LimitedTracker, MontyRun, PrintWriter, ResourceLimits}; /// Resource limits for fuzzing - restrictive to prevent hangs and memory issues. fn fuzz_limits() -> LimitedTracker { LimitedTracker::new( ResourceLimits::new() .max_allocations(10_000) .max_memory(1024 * 1024) // 1 MB .max_duration(Duration::from_millis(100)), ) } fuzz_target!(|code: String| { // Try to parse the code let Ok(runner) = MontyRun::new( code.to_owned(), "fuzz.py", vec![], // no inputs ) else { return; // Parse errors are expected for random input }; // Try to execute with resource limits - ignore all errors, we only care about panics/crashes let _ = runner.run(vec![], fuzz_limits(), PrintWriter::Disabled); }); ================================================ FILE: crates/fuzz/fuzz_targets/tokens_input_panic.rs ================================================ //! Fuzz target using structured token input instead of random strings. //! //! This generates more syntactically plausible Python code by combining //! tokens that represent common Python constructs. The fuzzer explores //! combinations of these tokens to find edge cases. #![no_main] use std::{ fmt::{self, Display}, time::Duration, }; use arbitrary::Arbitrary; use libfuzzer_sys::fuzz_target; use monty::{LimitedTracker, MontyRun, PrintWriter, ResourceLimits}; /// A token representing a piece of Python syntax. #[derive(Debug, Clone, Arbitrary)] enum Token { // === Literals === String(StringLit), Int(i64), Float(FloatLit), Bool(bool), None, // === Identifiers === Var(VarName), Attr(AttrName), // === Operators === BinOp(BinOp), UnaryOp(UnaryOp), CompareOp(CompareOp), AugAssign(AugAssign), // === Keywords === Keyword(Keyword), // === Punctuation === LParen, RParen, LBracket, RBracket, LBrace, RBrace, Comma, Colon, Semicolon, Dot, Arrow, Assign, Walrus, // === Whitespace/Structure === Space, Newline, Indent(IndentLevel), Comment, } /// String literal variants. #[derive(Debug, Clone, Arbitrary)] enum StringLit { Empty, Short(ShortString), FString(ShortString), Raw(ShortString), Bytes(ShortString), } /// Short string content (limited to avoid huge inputs). #[derive(Debug, Clone, Arbitrary)] enum ShortString { Hello, World, Test, Foo, Bar, Empty, Space, Newline, Number, Special, } /// Float literal (avoiding infinity/NaN issues). #[derive(Debug, Clone, Arbitrary)] enum FloatLit { Zero, One, Half, Pi, Negative, Small, Large, } /// Common variable names. #[derive(Debug, Clone, Arbitrary)] enum VarName { X, Y, Z, A, B, C, I, J, N, Foo, Bar, Baz, Spam, Eggs, Result, Value, Item, Data, Args, Kwargs, Self_, Cls, } /// Common attribute names. #[derive(Debug, Clone, Arbitrary)] enum AttrName { Append, Pop, Get, Set, Keys, Values, Items, Join, Split, Strip, Lower, Upper, Format, Replace, Find, Count, Sort, Reverse, Copy, Clear, Update, Add, Remove, } /// Binary operators. #[derive(Debug, Clone, Arbitrary)] enum BinOp { Add, Sub, Mul, Div, FloorDiv, Mod, Pow, MatMul, BitAnd, BitOr, BitXor, LShift, RShift, And, Or, } /// Unary operators. #[derive(Debug, Clone, Arbitrary)] enum UnaryOp { Neg, Pos, Not, Invert, } /// Comparison operators. #[derive(Debug, Clone, Arbitrary)] enum CompareOp { Eq, Ne, Lt, Le, Gt, Ge, Is, IsNot, In, NotIn, } /// Augmented assignment operators. #[derive(Debug, Clone, Arbitrary)] enum AugAssign { AddEq, SubEq, MulEq, DivEq, FloorDivEq, ModEq, PowEq, AndEq, OrEq, XorEq, LShiftEq, RShiftEq, } /// Python keywords. #[derive(Debug, Clone, Arbitrary)] enum Keyword { If, Elif, Else, For, While, Break, Continue, Pass, Return, Def, Lambda, Async, Await, Try, Except, Finally, Raise, Assert, Import, From, As, Global, Nonlocal, } /// Indentation levels (0-4 levels deep). #[derive(Debug, Clone, Arbitrary)] enum IndentLevel { L0, L1, L2, L3, L4, } impl Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::String(s) => write!(f, "{s}"), Self::Int(n) => write!(f, "{n}"), Self::Float(fl) => write!(f, "{fl}"), Self::Bool(true) => write!(f, "True"), Self::Bool(false) => write!(f, "False"), Self::None => write!(f, "None"), Self::Var(v) => write!(f, "{v}"), Self::Attr(a) => write!(f, "{a}"), Self::BinOp(op) => write!(f, "{op}"), Self::UnaryOp(op) => write!(f, "{op}"), Self::CompareOp(op) => write!(f, "{op}"), Self::AugAssign(op) => write!(f, "{op}"), Self::Keyword(kw) => write!(f, "{kw}"), Self::LParen => write!(f, "("), Self::RParen => write!(f, ")"), Self::LBracket => write!(f, "["), Self::RBracket => write!(f, "]"), Self::LBrace => write!(f, "{{"), Self::RBrace => write!(f, "}}"), Self::Comma => write!(f, ","), Self::Colon => write!(f, ":"), Self::Semicolon => write!(f, ";"), Self::Dot => write!(f, "."), Self::Arrow => write!(f, "->"), Self::Assign => write!(f, "="), Self::Walrus => write!(f, ":="), Self::Space => write!(f, " "), Self::Newline => writeln!(f), Self::Indent(level) => write!(f, "{level}"), Self::Comment => write!(f, "# comment"), } } } impl Display for StringLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Empty => write!(f, "''"), Self::Short(s) => write!(f, "'{s}'"), Self::FString(s) => write!(f, "f'{s}'"), Self::Raw(s) => write!(f, "r'{s}'"), Self::Bytes(s) => write!(f, "b'{s}'"), } } } impl Display for ShortString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Hello => write!(f, "hello"), Self::World => write!(f, "world"), Self::Test => write!(f, "test"), Self::Foo => write!(f, "foo"), Self::Bar => write!(f, "bar"), Self::Empty => write!(f, ""), Self::Space => write!(f, " "), Self::Newline => write!(f, "\\n"), Self::Number => write!(f, "123"), Self::Special => write!(f, "{{}}"), } } } impl Display for FloatLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Zero => write!(f, "0.0"), Self::One => write!(f, "1.0"), Self::Half => write!(f, "0.5"), Self::Pi => write!(f, "3.14159"), Self::Negative => write!(f, "-1.5"), Self::Small => write!(f, "0.001"), Self::Large => write!(f, "1e10"), } } } impl Display for VarName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::X => write!(f, "x"), Self::Y => write!(f, "y"), Self::Z => write!(f, "z"), Self::A => write!(f, "a"), Self::B => write!(f, "b"), Self::C => write!(f, "c"), Self::I => write!(f, "i"), Self::J => write!(f, "j"), Self::N => write!(f, "n"), Self::Foo => write!(f, "foo"), Self::Bar => write!(f, "bar"), Self::Baz => write!(f, "baz"), Self::Spam => write!(f, "spam"), Self::Eggs => write!(f, "eggs"), Self::Result => write!(f, "result"), Self::Value => write!(f, "value"), Self::Item => write!(f, "item"), Self::Data => write!(f, "data"), Self::Args => write!(f, "args"), Self::Kwargs => write!(f, "kwargs"), Self::Self_ => write!(f, "self"), Self::Cls => write!(f, "cls"), } } } impl Display for AttrName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Append => write!(f, "append"), Self::Pop => write!(f, "pop"), Self::Get => write!(f, "get"), Self::Set => write!(f, "set"), Self::Keys => write!(f, "keys"), Self::Values => write!(f, "values"), Self::Items => write!(f, "items"), Self::Join => write!(f, "join"), Self::Split => write!(f, "split"), Self::Strip => write!(f, "strip"), Self::Lower => write!(f, "lower"), Self::Upper => write!(f, "upper"), Self::Format => write!(f, "format"), Self::Replace => write!(f, "replace"), Self::Find => write!(f, "find"), Self::Count => write!(f, "count"), Self::Sort => write!(f, "sort"), Self::Reverse => write!(f, "reverse"), Self::Copy => write!(f, "copy"), Self::Clear => write!(f, "clear"), Self::Update => write!(f, "update"), Self::Add => write!(f, "add"), Self::Remove => write!(f, "remove"), } } } impl Display for BinOp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Add => write!(f, " + "), Self::Sub => write!(f, " - "), Self::Mul => write!(f, " * "), Self::Div => write!(f, " / "), Self::FloorDiv => write!(f, " // "), Self::Mod => write!(f, " % "), Self::Pow => write!(f, " ** "), Self::MatMul => write!(f, " @ "), Self::BitAnd => write!(f, " & "), Self::BitOr => write!(f, " | "), Self::BitXor => write!(f, " ^ "), Self::LShift => write!(f, " << "), Self::RShift => write!(f, " >> "), Self::And => write!(f, " and "), Self::Or => write!(f, " or "), } } } impl Display for UnaryOp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Neg => write!(f, "-"), Self::Pos => write!(f, "+"), Self::Not => write!(f, "not "), Self::Invert => write!(f, "~"), } } } impl Display for CompareOp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Eq => write!(f, " == "), Self::Ne => write!(f, " != "), Self::Lt => write!(f, " < "), Self::Le => write!(f, " <= "), Self::Gt => write!(f, " > "), Self::Ge => write!(f, " >= "), Self::Is => write!(f, " is "), Self::IsNot => write!(f, " is not "), Self::In => write!(f, " in "), Self::NotIn => write!(f, " not in "), } } } impl Display for AugAssign { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::AddEq => write!(f, " += "), Self::SubEq => write!(f, " -= "), Self::MulEq => write!(f, " *= "), Self::DivEq => write!(f, " /= "), Self::FloorDivEq => write!(f, " //= "), Self::ModEq => write!(f, " %= "), Self::PowEq => write!(f, " **= "), Self::AndEq => write!(f, " &= "), Self::OrEq => write!(f, " |= "), Self::XorEq => write!(f, " ^= "), Self::LShiftEq => write!(f, " <<= "), Self::RShiftEq => write!(f, " >>= "), } } } impl Display for Keyword { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::If => write!(f, "if "), Self::Elif => write!(f, "elif "), Self::Else => write!(f, "else"), Self::For => write!(f, "for "), Self::While => write!(f, "while "), Self::Break => write!(f, "break"), Self::Continue => write!(f, "continue"), Self::Pass => write!(f, "pass"), Self::Return => write!(f, "return "), Self::Def => write!(f, "def "), Self::Lambda => write!(f, "lambda "), Self::Async => write!(f, "async "), Self::Await => write!(f, "await "), Self::Try => write!(f, "try"), Self::Except => write!(f, "except "), Self::Finally => write!(f, "finally"), Self::Raise => write!(f, "raise "), Self::Assert => write!(f, "assert "), Self::Import => write!(f, "import "), Self::From => write!(f, "from "), Self::As => write!(f, " as "), Self::Global => write!(f, "global "), Self::Nonlocal => write!(f, "nonlocal "), } } } impl Display for IndentLevel { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let spaces = match self { Self::L0 => 0, Self::L1 => 4, Self::L2 => 8, Self::L3 => 12, Self::L4 => 16, }; for _ in 0..spaces { write!(f, " ")?; } Ok(()) } } /// Wrapper for `Vec` with custom Debug that shows both tokens and generated code. struct Tokens(Vec); impl<'a> Arbitrary<'a> for Tokens { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Vec::::arbitrary(u).map(Tokens) } } impl fmt::Debug for Tokens { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Tokens") .field("tokens", &self.0) .field("code", &self.to_code()) .finish() } } impl Tokens { /// Convert the tokens to Python source code. fn to_code(&self) -> String { self.0.iter().map(|t| t.to_string()).collect() } } /// Resource limits for fuzzing. fn fuzz_limits() -> LimitedTracker { LimitedTracker::new( ResourceLimits::new() .max_allocations(10_000) .max_memory(1024 * 1024) // 1 MB .max_duration(Duration::from_millis(100)), ) } fuzz_target!(|tokens: Tokens| { let code = tokens.to_code(); // Try to parse the code let Ok(runner) = MontyRun::new(code, "fuzz.py", vec![]) else { return; // Parse errors are expected }; // Try to execute with resource limits let _ = runner.run(vec![], fuzz_limits(), PrintWriter::Disabled); }); ================================================ FILE: crates/monty/Cargo.toml ================================================ [package] name = "monty" readme = "../../README.md" version = { workspace = true } license = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } authors = { workspace = true } description = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [lib] name = "monty" path = "src/lib.rs" [dependencies] ruff_python_parser = { workspace = true } ruff_python_ast = { workspace = true } ruff_text_size = { workspace = true } ahash = { version = "0.8.0", features = ["serde"] } indexmap = { workspace = true } serde = { workspace = true } postcard = { workspace = true } strum = { version = "0.27", features = ["derive"] } hashbrown = "0.16" num-bigint = { workspace = true } num-traits = { workspace = true } num-integer = { workspace = true } smallvec = { version = "1.13", features = ["serde"] } fancy-regex = "0.17.0" libm = "0.2" itertools = "0.14.0" [features] # ref-count-return changes behavior to return information on reference counts to check they're correct # should be used for testing only ref-count-return = [] # ref-count-panic enables a Drop implementation on Value which catches heap allocated values that are dropped # without being dereferenced. # should be used for testing only ref-count-panic = [] [dev-dependencies] pyo3 = { version = "0.28", features = ["auto-initialize"] } # Use codspeed-criterion-compat for CI benchmarks, real criterion for local flamegraphs codspeed-criterion-compat = "4.2.1" criterion = "0.5" datatest-stable = "0.2" serde_json = "1.0" pprof = { version = "0.15", features = ["flamegraph", "criterion"] } similar = "2.7.0" [build-dependencies] pyo3-build-config = { version = "0.28", features = ["resolve-config"] } [[bench]] name = "main" harness = false [[test]] name = "datatest_runner" harness = false [lints] workspace = true ================================================ FILE: crates/monty/benches/main.rs ================================================ // Use codspeed-criterion-compat when running on CodSpeed (CI), real criterion otherwise (for flamegraphs) #[cfg(not(codspeed))] use std::ffi::CString; #[cfg(codspeed)] use codspeed_criterion_compat::{Bencher, Criterion, black_box, criterion_group, criterion_main}; #[cfg(not(codspeed))] use criterion::{Bencher, Criterion, black_box, criterion_group, criterion_main}; use monty::MontyRun; #[cfg(not(codspeed))] use pprof::criterion::{Output, PProfProfiler}; // CPython benchmarks are only run locally, not on CodSpeed CI (requires Python + pyo3 setup) #[cfg(not(codspeed))] use pyo3::prelude::*; /// Runs a benchmark using the Monty interpreter. /// Parses once, then benchmarks repeated execution. fn run_monty(bench: &mut Bencher, code: &str, expected: i64) { let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: i64 = r.as_ref().try_into().unwrap(); assert_eq!(int_value, expected); bench.iter(|| { let r = ex.run_no_limits(vec![]).unwrap(); let int_value: i64 = r.as_ref().try_into().unwrap(); black_box(int_value); }); } /// Runs a benchmark using CPython. /// Wraps code in main(), parses once, then benchmarks repeated execution. #[cfg(not(codspeed))] fn run_cpython(bench: &mut Bencher, code: &str, expected: i64) { Python::attach(|py| { let wrapped = wrap_for_cpython(code); let code_cstr = CString::new(wrapped).expect("Invalid C string in code"); let fun: Py = PyModule::from_code(py, &code_cstr, c"test.py", c"main") .unwrap() .getattr("main") .unwrap() .into(); let r_py = fun.call0(py).unwrap(); let r: i64 = r_py.extract(py).unwrap(); assert_eq!(r, expected); bench.iter(|| { let r_py = fun.call0(py).unwrap(); let r: i64 = r_py.extract(py).unwrap(); black_box(r); }); }); } /// Wraps code in a main() function for CPython execution. /// Indents each line and converts the last expression to a return statement. #[cfg(not(codspeed))] fn wrap_for_cpython(code: &str) -> String { let mut lines: Vec = Vec::new(); let mut last_expr = String::new(); for line in code.lines() { // Skip test metadata comments if line.starts_with("# Return=") || line.starts_with("# Raise=") || line.starts_with("# skip=") { continue; } // Track the last non-empty, non-comment line as potential return expression let trimmed = line.trim(); if !trimmed.is_empty() && !trimmed.starts_with('#') { last_expr = line.to_string(); } lines.push(format!(" {line}")); } // Replace last expression with return statement if let Some(last) = lines.iter().rposition(|l| l.trim() == last_expr.trim()) { lines[last] = format!(" return {}", last_expr.trim()); } format!("def main():\n{}", lines.join("\n")) } const ADD_TWO: &str = "1 + 2"; const LIST_APPEND: &str = " a = [] a.append(42) a[0] "; const LOOP_MOD_13: &str = " v = '' for i in range(1_000): if i % 13 == 0: v += 'x' len(v) "; /// Comprehensive benchmark exercising most supported Python features. /// Code is shared with test_cases/bench__kitchen_sink.py const KITCHEN_SINK: &str = include_str!("../test_cases/bench__kitchen_sink.py"); const FUNC_CALL_KWARGS: &str = " def add(a, b=2): return a + b add(a=1) "; const LIST_APPEND_STR: &str = " a = [] for i in range(100_000): a.append(str(i)) len(a) "; const LIST_APPEND_INT: &str = " a = [] for i in range(100_000): a.append(i) sum(a) "; const FIB_25: &str = " def fib(n): if n <= 1: return n return fib(n - 1) + fib(n - 2) fib(25) "; /// List comprehension benchmark - creates 1000 elements. const LIST_COMP: &str = "len([x * 2 for x in range(1000)])"; /// Dict comprehension benchmark - creates 500 unique keys (i // 2 deduplicates pairs). const DICT_COMP: &str = "len({i // 2: i * 2 for i in range(1000)})"; /// Empty tuple creation benchmark - creates 100,000 empty tuples in a list. const EMPTY_TUPLES: &str = "len([() for _ in range(100_000)])"; /// 2-tuple creation benchmark - creates 100,000 2-tuples in a list. const PAIR_TUPLES: &str = "len([(i, i + 1) for i in range(100_000)])"; /// Benchmarks end-to-end execution (parsing + running) using Monty. /// This is different from other benchmarks as it includes parsing in the loop. fn end_to_end_monty(bench: &mut Bencher) { bench.iter(|| { let ex = MontyRun::new(black_box("1 + 2").to_owned(), "test.py", vec![]).unwrap(); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: i64 = r.as_ref().try_into().unwrap(); black_box(int_value); }); } /// Benchmarks end-to-end execution (parsing + running) using CPython. /// This is different from other benchmarks as it includes parsing in the loop. #[cfg(not(codspeed))] fn end_to_end_cpython(bench: &mut Bencher) { Python::attach(|py| { bench.iter(|| { let fun: Py = PyModule::from_code(py, black_box(c"def main():\n return 1 + 2"), c"test.py", c"main") .unwrap() .getattr("main") .unwrap() .into(); let r_py = fun.call0(py).unwrap(); let r: i64 = r_py.extract(py).unwrap(); black_box(r); }); }); } /// Configures all benchmarks in a single group. fn criterion_benchmark(c: &mut Criterion) { c.bench_function("add_two__monty", |b| run_monty(b, ADD_TWO, 3)); #[cfg(not(codspeed))] c.bench_function("add_two__cpython", |b| run_cpython(b, ADD_TWO, 3)); c.bench_function("list_append__monty", |b| run_monty(b, LIST_APPEND, 42)); #[cfg(not(codspeed))] c.bench_function("list_append__cpython", |b| run_cpython(b, LIST_APPEND, 42)); c.bench_function("loop_mod_13__monty", |b| run_monty(b, LOOP_MOD_13, 77)); #[cfg(not(codspeed))] c.bench_function("loop_mod_13__cpython", |b| run_cpython(b, LOOP_MOD_13, 77)); c.bench_function("end_to_end__monty", end_to_end_monty); #[cfg(not(codspeed))] c.bench_function("end_to_end__cpython", end_to_end_cpython); c.bench_function("kitchen_sink__monty", |b| run_monty(b, KITCHEN_SINK, 373)); #[cfg(not(codspeed))] c.bench_function("kitchen_sink__cpython", |b| run_cpython(b, KITCHEN_SINK, 373)); c.bench_function("func_call_kwargs__monty", |b| run_monty(b, FUNC_CALL_KWARGS, 3)); #[cfg(not(codspeed))] c.bench_function("func_call_kwargs__cpython", |b| run_cpython(b, FUNC_CALL_KWARGS, 3)); c.bench_function("list_append_str__monty", |b| run_monty(b, LIST_APPEND_STR, 100_000)); #[cfg(not(codspeed))] c.bench_function("list_append_str__cpython", |b| run_cpython(b, LIST_APPEND_STR, 100_000)); c.bench_function("list_append_int__monty", |b| { run_monty(b, LIST_APPEND_INT, 4_999_950_000); }); #[cfg(not(codspeed))] c.bench_function("list_append_int__cpython", |b| { run_cpython(b, LIST_APPEND_INT, 4_999_950_000); }); c.bench_function("fib__monty", |b| run_monty(b, FIB_25, 75_025)); #[cfg(not(codspeed))] c.bench_function("fib__cpython", |b| run_cpython(b, FIB_25, 75_025)); c.bench_function("list_comp__monty", |b| run_monty(b, LIST_COMP, 1000)); #[cfg(not(codspeed))] c.bench_function("list_comp__cpython", |b| run_cpython(b, LIST_COMP, 1000)); c.bench_function("dict_comp__monty", |b| run_monty(b, DICT_COMP, 500)); #[cfg(not(codspeed))] c.bench_function("dict_comp__cpython", |b| run_cpython(b, DICT_COMP, 500)); c.bench_function("empty_tuples__monty", |b| run_monty(b, EMPTY_TUPLES, 100_000)); #[cfg(not(codspeed))] c.bench_function("empty_tuples__cpython", |b| run_cpython(b, EMPTY_TUPLES, 100_000)); c.bench_function("pair_tuples__monty", |b| run_monty(b, PAIR_TUPLES, 100_000)); #[cfg(not(codspeed))] c.bench_function("pair_tuples__cpython", |b| run_cpython(b, PAIR_TUPLES, 100_000)); } // Use pprof flamegraph profiler when running locally (not on CodSpeed) #[cfg(not(codspeed))] criterion_group!( name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); targets = criterion_benchmark ); // Use default config when running on CodSpeed (pprof's Profiler trait is incompatible) #[cfg(codspeed)] criterion_group!(benches, criterion_benchmark); criterion_main!(benches); ================================================ FILE: crates/monty/build.rs ================================================ fn main() { // This ensures that tests can find the libpython shared library at runtime, even if it's not on // the system library path. This makes running tests much easier on e.g. Linux with a uv venv. // // This is technically a bit wasteful because the main `lib` doesn't need this, just tests, but it // won't affect downstream executables other than requiring them to have a valid Python in their system. // // If that becomes a big problem, we can rethink. pyo3_build_config::add_libpython_rpath_link_args(); } ================================================ FILE: crates/monty/src/args.rs ================================================ use std::vec::IntoIter; use crate::{ MontyObject, ResourceTracker, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunError, RunResult, SimpleException}, expressions::{ExprLoc, Identifier}, heap::{ContainsHeap, DropWithHeap, Heap, HeapGuard}, intern::{Interns, StringId}, parse::ParseError, types::{Dict, dict::DictIntoIter}, value::Value, }; /// Type for method call arguments. /// /// Uses specific variants for common cases (0-2 arguments). /// Most Python method calls have at most 2 arguments, so this optimization /// eliminates the Vec heap allocation overhead for the vast majority of calls. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum ArgValues { Empty, One(Value), Two(Value, Value), Kwargs(KwargsValues), ArgsKargs { args: Vec, kwargs: KwargsValues }, } impl ArgValues { /// Checks that zero arguments were passed. /// /// On error, properly drops all contained values to maintain reference counts. pub fn check_zero_args(self, name: &str, heap: &mut Heap) -> RunResult<()> { match self { Self::Empty => Ok(()), other => { let count = other.count(); other.drop_with_heap(heap); Err(ExcType::type_error_no_args(name, count)) } } } /// Checks that exactly one positional argument was passed, returning it. /// /// On error, properly drops all contained values to maintain reference counts. pub fn get_one_arg(self, name: &str, heap: &mut Heap) -> RunResult { match self { Self::One(a) => Ok(a), other => { let count = other.count(); other.drop_with_heap(heap); Err(ExcType::type_error_arg_count(name, 1, count)) } } } /// Checks that exactly two positional arguments were passed, returning them as a tuple. /// /// On error, properly drops all contained values to maintain reference counts. pub fn get_two_args(self, name: &str, heap: &mut Heap) -> RunResult<(Value, Value)> { match self { Self::Two(a1, a2) => Ok((a1, a2)), other => { let count = other.count(); other.drop_with_heap(heap); Err(ExcType::type_error_arg_count(name, 2, count)) } } } /// Checks that one or two arguments were passed, returning them as a tuple. /// /// On error, properly drops all contained values to maintain reference counts. pub fn get_one_two_args( self, name: &str, heap: &mut Heap, ) -> RunResult<(Value, Option)> { match self { Self::One(a) => Ok((a, None)), Self::Two(a1, a2) => Ok((a1, Some(a2))), other => { let count = other.count(); other.drop_with_heap(heap); if count == 0 { Err(ExcType::type_error_at_least(name, 1, count)) } else { Err(ExcType::type_error_at_most(name, 2, count)) } } } } /// Checks that zero or one argument was passed, returning the optional value. /// /// On error, properly drops all contained values to maintain reference counts. pub fn get_zero_one_arg(self, name: &str, heap: &mut Heap) -> RunResult> { match self { Self::Empty => Ok(None), Self::One(a) => Ok(Some(a)), other => { let count = other.count(); other.drop_with_heap(heap); Err(ExcType::type_error_at_most(name, 1, count)) } } } /// Checks that zero, one, or two arguments were passed. /// /// Returns (None, None) for 0 args, (Some(a), None) for 1 arg, (Some(a), Some(b)) for 2 args. /// On error, properly drops all contained values to maintain reference counts. pub fn get_zero_one_two_args( self, name: &str, heap: &mut Heap, ) -> RunResult<(Option, Option)> { match self { Self::Empty => Ok((None, None)), Self::One(a) => Ok((Some(a), None)), Self::Two(a, b) => Ok((Some(a), Some(b))), other => { let count = other.count(); other.drop_with_heap(heap); Err(ExcType::type_error_at_most(name, 2, count)) } } } /// Extracts a keyword-only pair by name. /// /// Validates that no positional arguments are provided and only the specified /// keyword arguments are present. Returns `(None, None)` when neither keyword /// is provided. /// /// # Arguments /// * `method_name` - Method name for error messages (e.g., "list.sort") /// * `kwarg1` - Name of the first keyword argument /// * `kwarg2` - Name of the second keyword argument /// /// # Errors /// Returns an error if: /// - Any positional arguments are provided /// - A keyword argument other than `kwarg1` or `kwarg2` is provided /// - A keyword is not a string pub fn extract_keyword_only_pair( self, method_name: &str, kwarg1: &str, kwarg2: &str, heap: &mut Heap, interns: &Interns, ) -> RunResult<(Option, Option)> { let (pos, kwargs) = self.into_parts(); defer_drop!(pos, heap); // Check no positional arguments if pos.len() > 0 { kwargs.drop_with_heap(heap); return Err(ExcType::type_error_no_args(method_name, 1)); } kwargs.parse_named_kwargs_pair(method_name, kwarg1, kwarg2, heap, interns, |method_name, key_str| { ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for {method_name}()" )) }) } /// Prepends a value as the first positional argument. /// /// Used to insert `self` when dispatching dataclass method calls to the host. /// The dataclass instance becomes the first arg so the host can reconstruct /// the original object and call the method on it. pub fn prepend(self, value: Value) -> Self { match self { Self::Empty => Self::One(value), Self::One(a) => Self::Two(value, a), Self::Two(a, b) => Self::ArgsKargs { args: vec![value, a, b], kwargs: KwargsValues::Empty, }, Self::Kwargs(kw) => Self::ArgsKargs { args: vec![value], kwargs: kw, }, Self::ArgsKargs { mut args, kwargs } => { args.insert(0, value); Self::ArgsKargs { args, kwargs } } } } /// Splits into positional iterator and keyword values without allocating /// for the common One/Two cases. pub fn into_parts(self) -> (ArgPosIter, KwargsValues) { match self { Self::Empty => (ArgPosIter::Empty, KwargsValues::Empty), Self::One(v) => (ArgPosIter::One(v), KwargsValues::Empty), Self::Two(v1, v2) => (ArgPosIter::Two([v1, v2]), KwargsValues::Empty), Self::Kwargs(kwargs) => (ArgPosIter::Empty, kwargs), Self::ArgsKargs { args, kwargs } => (ArgPosIter::Vec(args.into_iter()), kwargs), } } /// Variant of [`into_parts()`](Self::into_parts) that accepts no kwargs, returning an error if any are present. pub fn into_pos_only(self, method_name: &str, heap: &mut Heap) -> RunResult { match self { Self::Empty => Ok(ArgPosIter::Empty), Self::One(v) => Ok(ArgPosIter::One(v)), Self::Two(v1, v2) => Ok(ArgPosIter::Two([v1, v2])), Self::Kwargs(kwargs) => { if kwargs.is_empty() { Ok(ArgPosIter::Empty) } else { Err(Self::unexpected_kwargs_error(kwargs, method_name, heap)) } } Self::ArgsKargs { args, kwargs } => { if kwargs.is_empty() { Ok(ArgPosIter::Vec(args.into_iter())) } else { args.drop_with_heap(heap); Err(Self::unexpected_kwargs_error(kwargs, method_name, heap)) } } } } #[cold] fn unexpected_kwargs_error( kwargs: KwargsValues, method_name: &str, heap: &mut Heap, ) -> RunError { kwargs.drop_with_heap(heap); ExcType::type_error_no_kwargs(method_name) } /// Converts the arguments into a Vec of MontyObjects. /// /// This is used when passing arguments to external functions. pub fn into_py_objects( self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> (Vec, Vec<(MontyObject, MontyObject)>) { match self { Self::Empty => (vec![], vec![]), Self::One(a) => (vec![MontyObject::new(a, vm)], vec![]), Self::Two(a1, a2) => (vec![MontyObject::new(a1, vm), MontyObject::new(a2, vm)], vec![]), Self::Kwargs(kwargs) => (vec![], kwargs.into_py_objects(vm)), Self::ArgsKargs { args, kwargs } => ( args.into_iter().map(|v| MontyObject::new(v, vm)).collect(), kwargs.into_py_objects(vm), ), } } /// Returns the number of positional arguments. /// /// For `Kwargs` returns 0, for `ArgsKargs` returns only the positional args count. fn count(&self) -> usize { match self { Self::Empty => 0, Self::One(_) => 1, Self::Two(_, _) => 2, Self::Kwargs(_) => 0, Self::ArgsKargs { args, .. } => args.len(), } } } impl DropWithHeap for ArgValues { fn drop_with_heap(self, heap: &mut H) { match self { Self::Empty => {} Self::One(v) => v.drop_with_heap(heap), Self::Two(v1, v2) => { v1.drop_with_heap(heap); v2.drop_with_heap(heap); } Self::Kwargs(kwargs) => { kwargs.drop_with_heap(heap); } Self::ArgsKargs { args, kwargs } => { args.drop_with_heap(heap); kwargs.drop_with_heap(heap); } } } } /// Iterator over positional arguments without allocation. /// /// Supports iterating over `ArgValues::One/Two` without converting to Vec. /// This iterator must be fully consumed OR explicitly dropped with /// `drop_remaining_with_heap()` to maintain correct reference counts. /// /// The iterator yields values by ownership transfer. Once a value is yielded, /// the caller is responsible for either using it or calling `drop_with_heap()` on it. pub(crate) enum ArgPosIter { Empty, One(Value), Two([Value; 2]), Vec(IntoIter), } impl ArgPosIter { /// Returns a slice of the remaining positional arguments without consuming them. pub fn as_slice(&self) -> &[Value] { match self { Self::Empty => &[], Self::One(v) => std::slice::from_ref(v), Self::Two(array) => array.as_slice(), Self::Vec(iter) => iter.as_slice(), } } } impl Iterator for ArgPosIter { type Item = Value; #[inline] fn next(&mut self) -> Option { match self { Self::Empty => None, Self::One(_) => { let Self::One(v) = std::mem::replace(self, Self::Empty) else { unreachable!() }; Some(v) } Self::Two(_) => { let Self::Two([v1, v2]) = std::mem::replace(self, Self::Empty) else { unreachable!() }; *self = Self::One(v2); Some(v1) } Self::Vec(iter) => iter.next(), } } #[inline] fn size_hint(&self) -> (usize, Option) { match self { Self::Empty => (0, Some(0)), Self::One(_) => (1, Some(1)), Self::Two(_) => (2, Some(2)), Self::Vec(iter) => iter.size_hint(), } } } impl ExactSizeIterator for ArgPosIter {} impl DropWithHeap for ArgPosIter { fn drop_with_heap(self, heap: &mut H) { match self { Self::Empty => {} Self::One(v1) => v1.drop_with_heap(heap), Self::Two(v12) => v12.drop_with_heap(heap), Self::Vec(iter) => iter.drop_with_heap(heap), } } } /// Type for keyword arguments. /// /// Used to capture both the case of inline keyword arguments `foo(foo=1, bar=2)` /// and the case of a dictionary passed as a single argument `foo(**kwargs)`. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum KwargsValues { Empty, Inline(Vec<(StringId, Value)>), Dict(Dict), } impl KwargsValues { /// Returns the number of keyword arguments. #[must_use] pub fn len(&self) -> usize { match self { Self::Empty => 0, Self::Inline(kvs) => kvs.len(), Self::Dict(dict) => dict.len(), } } /// Returns true if there are no keyword arguments. #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Converts the arguments into a Vec of MontyObjects. /// /// This is used when passing arguments to external functions. fn into_py_objects(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Vec<(MontyObject, MontyObject)> { match self { Self::Empty => vec![], Self::Inline(kvs) => kvs .into_iter() .map(|(k, v)| { let key = MontyObject::String(vm.interns.get_str(k).to_owned()); let value = MontyObject::new(v, vm); (key, value) }) .collect(), Self::Dict(dict) => dict .into_iter() .map(|(k, v)| (MontyObject::new(k, vm), MontyObject::new(v, vm))) .collect(), } } /// Helper for functions which do not yet support kwargs, returns an `Err` if there are kwargs. pub fn not_supported_yet(self, method_name: &str, heap: &mut Heap) -> RunResult<()> { if self.is_empty() { Ok(()) } else { self.drop_with_heap(heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("{method_name}() does not support keyword arguments yet"), ) .into()) } } /// Parses a fixed pair of named keyword arguments with duplicate checking. /// /// This helper is intentionally narrow: it covers the common builtin/method /// pattern of accepting a tiny fixed keyword surface such as `key/default` /// or `key/reverse`, while leaving positional-argument validation and any /// post-processing to the caller. /// /// `unexpected_keyword` formats the call-site-specific error for keywords /// other than `kwarg1` and `kwarg2`. pub fn parse_named_kwargs_pair( self, func_name: &str, kwarg1: &str, kwarg2: &str, heap: &mut Heap, interns: &Interns, unexpected_keyword: impl Fn(&str, &str) -> RunError, ) -> RunResult<(Option, Option)> { let kwargs = self.into_iter(); defer_drop_mut!(kwargs, heap); // Guards are reversed so that destructure can pull them. let mut val2_guard = HeapGuard::new(None::, heap); let (val2, heap) = val2_guard.as_parts_mut(); let mut val1_guard = HeapGuard::new(None::, heap); let (val1, heap) = val1_guard.as_parts_mut(); for (key, value) in kwargs { defer_drop!(key, heap); let mut value = HeapGuard::new(value, heap); let Some(keyword_name) = key.as_either_str(value.heap()) else { return Err(ExcType::type_error_kwargs_nonstring_key()); }; let key_str = keyword_name.as_str(interns); if key_str == kwarg1 { if val1.is_some() { return Err(ExcType::type_error_multiple_values(func_name, key_str)); } *val1 = Some(value.into_inner()); } else if key_str == kwarg2 { if val2.is_some() { return Err(ExcType::type_error_multiple_values(func_name, key_str)); } *val2 = Some(value.into_inner()); } else { return Err(unexpected_keyword(func_name, key_str)); } } Ok((val1_guard.into_inner(), val2_guard.into_inner())) } } impl DropWithHeap for KwargsValues { /// Properly drops all values in the arguments, decrementing reference counts. fn drop_with_heap(self, heap: &mut H) { match self { Self::Empty => {} Self::Inline(kvs) => { for (_, v) in kvs { v.drop_with_heap(heap); } } Self::Dict(dict) => { for (k, v) in dict { k.drop_with_heap(heap); v.drop_with_heap(heap); } } } } } impl IntoIterator for KwargsValues { type Item = (Value, Value); type IntoIter = KwargsValuesIter; fn into_iter(self) -> Self::IntoIter { match self { Self::Empty => KwargsValuesIter::Empty, Self::Inline(kvs) => KwargsValuesIter::Inline(kvs.into_iter()), Self::Dict(dict) => KwargsValuesIter::Dict(dict.into_iter()), } } } /// Iterator over keyword argument (key, value) pairs. /// /// For `Inline` kwargs, converts `StringId` keys to `Value::InternString`. /// For `Dict` kwargs, iterates directly over the dict's entries without /// intermediate allocation. pub(crate) enum KwargsValuesIter { Empty, Inline(IntoIter<(StringId, Value)>), Dict(DictIntoIter), } impl Iterator for KwargsValuesIter { type Item = (Value, Value); fn next(&mut self) -> Option { match self { Self::Empty => None, Self::Inline(iter) => iter.next().map(|(k, v)| (Value::InternString(k), v)), Self::Dict(iter) => iter.next(), } } fn size_hint(&self) -> (usize, Option) { match self { Self::Empty => (0, Some(0)), Self::Inline(iter) => iter.size_hint(), Self::Dict(iter) => iter.size_hint(), } } } impl ExactSizeIterator for KwargsValuesIter {} impl DropWithHeap for KwargsValuesIter { fn drop_with_heap(self, heap: &mut H) { match self { Self::Empty => {} Self::Inline(iter) => { for (_, v) in iter { v.drop_with_heap(heap); } } Self::Dict(iter) => { for (k, v) in iter { k.drop_with_heap(heap); v.drop_with_heap(heap); } } } } } /// A keyword argument in a function call expression. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Kwarg { pub key: Identifier, pub value: ExprLoc, } /// A positional argument item in a generalized function call (PEP 448). /// /// Used in `ArgExprs::GeneralizedCall` when a call has multiple `*unpacks` /// or positional arguments after a `*unpack`. Each item is either a plain /// value or a `*expr` iterable to be unpacked into the argument tuple. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) enum CallArg { /// A plain positional argument. Value(ExprLoc), /// A `*expr` unpack — the iterable is spread into consecutive arguments. Unpack(ExprLoc), } /// A keyword argument item in a generalized function call (PEP 448). /// /// Used in `ArgExprs::GeneralizedCall` when a call has multiple `**unpacks` /// or named kwargs interspersed with `**unpacks`. Duplicate keys from any /// combination raise `TypeError` (both `f(**a, **b)` with shared keys and /// `f(x=1, **{'x': 2})` are errors). This is enforced by `DictMerge` in /// the compiler. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) enum CallKwarg { /// A named keyword argument: `key=value`. Named(Kwarg), /// A `**expr` unpack — the mapping's entries are merged into kwargs. Unpack(ExprLoc), } /// Expressions that make up a function call's arguments. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum ArgExprs { Empty, One(ExprLoc), Two(ExprLoc, ExprLoc), Args(Vec), Kwargs(Vec), ArgsKargs { args: Option>, var_args: Option, kwargs: Option>, var_kwargs: Option, }, /// Generalized call with PEP 448 unpacking. /// /// Used when a call has multiple `*args` unpacks, positional arguments /// after a `*unpack`, or multiple `**kwargs` unpacks. The compiler /// builds the args tuple incrementally using `BuildList(0)` + /// `ListAppend`/`ListExtend` + `ListToTuple`, and the kwargs dict /// using `BuildDict(0)` + `DictMerge` (which raises `TypeError` on /// duplicate keys). GeneralizedCall { args: Vec, kwargs: Vec, }, } impl ArgExprs { /// Creates a `GeneralizedCall` for PEP 448 calls with multiple unpacks. /// /// Use this when a function call has multiple `*args` unpacks, positional /// arguments after a `*unpack`, or multiple `**kwargs` unpacks. The compiler /// will emit `BuildList(0)` + `ListAppend`/`ListExtend` + `ListToTuple` for /// the args tuple, and `BuildDict(0)` + `DictMerge` for the kwargs dict. pub(crate) fn new_generalized(args: Vec, kwargs: Vec) -> Self { Self::GeneralizedCall { args, kwargs } } /// Creates a new `ArgExprs` with optional `*args` and `**kwargs` unpacking expressions. /// /// This is used when parsing function calls that may include `*expr` / `**expr` /// syntax for unpacking iterables or mappings into arguments. pub fn new_with_var_kwargs( args: Vec, var_args: Option, kwargs: Vec, var_kwargs: Option, ) -> Self { // Full generality requires ArgsKargs when we have unpacking or mixed arg/kwarg usage if var_args.is_some() || var_kwargs.is_some() || (!kwargs.is_empty() && !args.is_empty()) { Self::ArgsKargs { args: if args.is_empty() { None } else { Some(args) }, var_args, kwargs: if kwargs.is_empty() { None } else { Some(kwargs) }, var_kwargs, } } else if !kwargs.is_empty() { Self::Kwargs(kwargs) } else if args.len() > 2 { Self::Args(args) } else { let mut iter = args.into_iter(); if let Some(first) = iter.next() { if let Some(second) = iter.next() { Self::Two(first, second) } else { Self::One(first) } } else { Self::Empty } } } /// Applies a transformation function to all `ExprLoc` elements in the args. /// /// This is used during the preparation phase to recursively prepare all /// argument expressions before execution. pub fn prepare_args( &mut self, mut f: impl FnMut(ExprLoc) -> Result, ) -> Result<(), ParseError> { // Swap self with Empty to take ownership, then rebuild let taken = std::mem::replace(self, Self::Empty); *self = match taken { Self::Empty => Self::Empty, Self::One(arg) => Self::One(f(arg)?), Self::Two(arg1, arg2) => Self::Two(f(arg1)?, f(arg2)?), Self::Args(args) => Self::Args(args.into_iter().map(&mut f).collect::, _>>()?), Self::Kwargs(kwargs) => Self::Kwargs( kwargs .into_iter() .map(|kwarg| { Ok(Kwarg { key: kwarg.key, value: f(kwarg.value)?, }) }) .collect::, ParseError>>()?, ), Self::ArgsKargs { args, var_args, kwargs, var_kwargs, } => { let args = args .map(|a| a.into_iter().map(&mut f).collect::, ParseError>>()) .transpose()?; let var_args = var_args.map(&mut f).transpose()?; let kwargs = kwargs .map(|k| { k.into_iter() .map(|kwarg| { Ok(Kwarg { key: kwarg.key, value: f(kwarg.value)?, }) }) .collect::, ParseError>>() }) .transpose()?; let var_kwargs = var_kwargs.map(&mut f).transpose()?; Self::ArgsKargs { args, var_args, kwargs, var_kwargs, } } Self::GeneralizedCall { args, kwargs } => { let args = args .into_iter() .map(|arg| match arg { CallArg::Value(e) => Ok(CallArg::Value(f(e)?)), CallArg::Unpack(e) => Ok(CallArg::Unpack(f(e)?)), }) .collect::, ParseError>>()?; let kwargs = kwargs .into_iter() .map(|kwarg| match kwarg { CallKwarg::Named(kw) => Ok(CallKwarg::Named(Kwarg { key: kw.key, value: f(kw.value)?, })), CallKwarg::Unpack(e) => Ok(CallKwarg::Unpack(f(e)?)), }) .collect::, ParseError>>()?; Self::GeneralizedCall { args, kwargs } } }; Ok(()) } } ================================================ FILE: crates/monty/src/asyncio.rs ================================================ //! Async/await support types for Monty. //! //! This module contains all async-related types including coroutines, futures, //! and task identifiers. The host acts as the event loop - external function //! calls return `ExternalFuture` objects that can be awaited. use crate::{heap::HeapId, intern::FunctionId, value::Value}; /// Unique identifier for external function calls. /// /// Sequential integers allocated by the scheduler. Used to correlate /// external function calls with their results when the host resolves them. /// The counter always increments, even for sync resolution, to keep IDs unique. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct CallId(u32); impl CallId { /// Creates a new CallId from a raw value. #[inline] pub fn new(id: u32) -> Self { Self(id) } /// Returns the raw u32 value. #[inline] pub fn raw(self) -> u32 { self.0 } } /// Unique identifier for an async task. /// /// Sequential integers allocated by the scheduler. Task 0 is always the main task /// which uses the VM's stack/frames directly. Spawned tasks (1+) store their own context, /// hence `TaskId::default()` is the main task. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct TaskId(u32); impl TaskId { /// Creates a new TaskId from a raw value. #[inline] pub fn new(id: u32) -> Self { Self(id) } /// Returns the raw u32 value. #[inline] pub fn raw(self) -> u32 { self.0 } /// Returns true if this is the main task (task 0). #[inline] pub fn is_main(self) -> bool { self.0 == 0 } } /// Coroutine execution state (single-shot semantics). /// /// Coroutines in Monty follow single-shot semantics - they can only be awaited once. /// This differs from Python generators which can be resumed multiple times. #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub(crate) enum CoroutineState { /// Coroutine has been created but not yet awaited. New, /// Coroutine is currently executing (has been awaited). Running, /// Coroutine has finished execution. Completed, } /// A coroutine object representing an async function call result. /// /// Created when an `async def` function is called. Argument binding happens at call time; /// awaiting the coroutine starts execution. Coroutines use single-shot semantics - /// they can only be awaited once. /// /// # Namespace Layout /// /// The `namespace` vector is pre-sized to match the function's namespace size and contains: /// ```text /// [params...][cell_vars...][free_vars...][locals...] /// ``` /// - Parameter slots are filled with bound argument values at call time /// - Cell/free var slots contain `Value::Ref` to captured cells /// - Local slots start as `Value::Undefined` /// /// When the coroutine is awaited, these values are pushed onto the VM's stack /// as inline locals, and a new frame is pushed to execute the async function body. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Coroutine { /// The async function to execute. pub func_id: FunctionId, /// Pre-bound namespace values (sized to function namespace). /// Contains bound parameters, captured cells, and uninitialized locals. pub namespace: Vec, /// Current execution state. pub state: CoroutineState, } impl Coroutine { /// Creates a new coroutine for an async function call. /// /// # Arguments /// * `func_id` - The async function to execute /// * `namespace` - Pre-bound namespace with parameters and captured variables pub fn new(func_id: FunctionId, namespace: Vec) -> Self { Self { func_id, namespace, state: CoroutineState::New, } } } /// An item that can be gathered - either a coroutine or an external future. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) enum GatherItem { /// A coroutine to spawn as a task. Coroutine(HeapId), /// An external future to wait for resolution. ExternalFuture(CallId), } /// A gather() result tracking multiple coroutines/tasks and external futures. /// /// Created by `asyncio.gather(*awaitables)`. Does NOT spawn tasks immediately - /// tasks are spawned when the GatherFuture is awaited in Await. /// /// # Lifecycle /// /// 1. **Creation**: `gather(coro1, coro2, ...)` stores coroutine HeapIds and external CallIds /// 2. **Await**: `await gather_future` spawns tasks and blocks the current task /// 3. **Completion**: As tasks/futures complete, results are stored in order /// 4. **Return**: When all items complete, returns list of results /// /// # Error Handling /// /// On any task failure, sibling tasks are cancelled and the exception propagates /// to the task that awaited the gather. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct GatherFuture { /// Items to gather (coroutines or external futures). pub items: Vec, /// TaskIds of spawned tasks (only for coroutine items, set when awaited). /// Length matches the number of Coroutine items. pub task_ids: Vec, /// Results from each item, in order (filled as items complete). /// Indices align with `items`. pub results: Vec>, /// Task waiting on this gather (set when awaited). pub waiter: Option, /// CallIds of external futures we're waiting on. /// Used to check if all external futures have resolved. pub pending_calls: Vec, } impl GatherFuture { /// Creates a new GatherFuture with the given items. /// /// # Arguments /// * `items` - Coroutines or external futures to run concurrently pub fn new(items: Vec) -> Self { let count = items.len(); Self { items, task_ids: Vec::new(), results: (0..count).map(|_| None).collect(), waiter: None, pending_calls: Vec::new(), } } /// Returns the number of items to gather. #[inline] pub fn item_count(&self) -> usize { self.items.len() } } ================================================ FILE: crates/monty/src/builtins/abs.rs ================================================ //! Implementation of the abs() builtin function. use num_bigint::BigInt; use num_traits::Signed; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, heap::HeapData, resource::ResourceTracker, types::{LongInt, PyTrait}, value::Value, }; /// Implementation of the abs() builtin function. /// /// Returns the absolute value of a number. Works with integers, floats, and LongInts. /// For `i64::MIN`, which overflows on negation, promotes to LongInt. pub fn builtin_abs(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("abs", vm.heap)?; defer_drop!(value, vm); match value { Value::Int(n) => { // Handle potential overflow for i64::MIN → promote to LongInt if let Some(abs_val) = n.checked_abs() { Ok(Value::Int(abs_val)) } else { // i64::MIN.abs() overflows, promote to LongInt let bi = BigInt::from(*n).abs(); Ok(LongInt::new(bi).into_value(vm.heap)?) } } Value::Float(f) => Ok(Value::Float(f.abs())), Value::Bool(b) => Ok(Value::Int(i64::from(*b))), Value::Ref(id) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { Ok(li.abs().into_value(vm.heap)?) } else { Err(SimpleException::new_msg( ExcType::TypeError, format!("bad operand type for abs(): '{}'", value.py_type(vm.heap)), ) .into()) } } _ => Err(SimpleException::new_msg( ExcType::TypeError, format!("bad operand type for abs(): '{}'", value.py_type(vm.heap)), ) .into()), } } ================================================ FILE: crates/monty/src/builtins/all.rs ================================================ //! Implementation of the all() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::RunResult, resource::ResourceTracker, types::{MontyIter, PyTrait}, value::Value, }; /// Implementation of the all() builtin function. /// /// Returns True if all elements of the iterable are true (or if the iterable is empty). /// Short-circuits on the first falsy value. pub fn builtin_all(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let iterable = args.get_one_arg("all", vm.heap)?; let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); while let Some(item) = iter.for_next(vm)? { defer_drop!(item, vm); if !item.py_bool(vm) { return Ok(Value::Bool(false)); } } Ok(Value::Bool(true)) } ================================================ FILE: crates/monty/src/builtins/any.rs ================================================ //! Implementation of the any() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::RunResult, resource::ResourceTracker, types::{MontyIter, PyTrait}, value::Value, }; /// Implementation of the any() builtin function. /// /// Returns True if any element of the iterable is true. /// Returns False for an empty iterable. Short-circuits on the first truthy value. pub fn builtin_any(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let iterable = args.get_one_arg("any", vm.heap)?; let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); while let Some(item) = iter.for_next(vm)? { defer_drop!(item, vm); if item.py_bool(vm) { return Ok(Value::Bool(true)); } } Ok(Value::Bool(false)) } ================================================ FILE: crates/monty/src/builtins/bin.rs ================================================ //! Implementation of the bin() builtin function. use num_bigint::BigInt; use num_traits::Signed; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, heap::HeapData, resource::ResourceTracker, types::{PyTrait, Str}, value::Value, }; /// Implementation of the bin() builtin function. /// /// Converts an integer to a binary string prefixed with '0b'. /// Supports both i64 and BigInt integers. pub fn builtin_bin(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("bin", vm.heap)?; defer_drop!(value, vm); match value { Value::Int(n) => { let abs_digits = format!("{:b}", n.unsigned_abs()); let prefix = if *n < 0 { "-0b" } else { "0b" }; let heap_id = vm .heap .allocate(HeapData::Str(Str::new(format!("{prefix}{abs_digits}"))))?; Ok(Value::Ref(heap_id)) } Value::Bool(b) => { let s = if *b { "0b1" } else { "0b0" }; let heap_id = vm.heap.allocate(HeapData::Str(Str::new(s.to_string())))?; Ok(Value::Ref(heap_id)) } Value::Ref(id) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let bin_str = format_bigint_bin(li.inner()); let heap_id = vm.heap.allocate(HeapData::Str(Str::new(bin_str)))?; Ok(Value::Ref(heap_id)) } else { Err(ExcType::type_error_not_integer(value.py_type(vm.heap))) } } _ => Err(ExcType::type_error_not_integer(value.py_type(vm.heap))), } } /// Formats a BigInt as a binary string with '0b' prefix. fn format_bigint_bin(bi: &BigInt) -> String { let is_negative = bi.is_negative(); let abs_bi = bi.abs(); let bin_digits = format!("{abs_bi:b}"); let prefix = if is_negative { "-0b" } else { "0b" }; format!("{prefix}{bin_digits}") } ================================================ FILE: crates/monty/src/builtins/chr.rs ================================================ //! Implementation of the chr() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, resource::ResourceTracker, types::{PyTrait, str::allocate_char}, value::Value, }; /// Implementation of the chr() builtin function. /// /// Returns a string representing a character whose Unicode code point is the integer. /// The valid range for the argument is from 0 through 1,114,111 (0x10FFFF). pub fn builtin_chr(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("chr", vm.heap)?; defer_drop!(value, vm); match value { Value::Int(n) => { if *n < 0 || *n > 0x0010_FFFF { Err(SimpleException::new_msg(ExcType::ValueError, "chr() arg not in range(0x110000)").into()) } else if let Some(c) = char::from_u32(u32::try_from(*n).expect("chr() range check failed")) { Ok(allocate_char(c, vm.heap)?) } else { // This shouldn't happen for valid Unicode range, but handle it Err(SimpleException::new_msg(ExcType::ValueError, "chr() arg not in range(0x110000)").into()) } } Value::Bool(b) => { // bool is subclass of int let c = if *b { '\x01' } else { '\x00' }; Ok(allocate_char(c, vm.heap)?) } _ => { let type_name = value.py_type(vm.heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("an integer is required (got type {type_name})"), ) .into()) } } } ================================================ FILE: crates/monty/src/builtins/divmod.rs ================================================ //! Implementation of the divmod() builtin function. use num_bigint::BigInt; use num_integer::Integer; use smallvec::smallvec; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, heap::HeapData, resource::{ResourceTracker, check_div_size}, types::{LongInt, PyTrait, allocate_tuple}, value::{Value, floor_divmod}, }; /// Implementation of the divmod() builtin function. /// /// Returns a tuple (quotient, remainder) from integer division. /// Equivalent to (a // b, a % b). pub fn builtin_divmod(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (a, b) = args.get_two_args("divmod", vm.heap)?; let a = super::round::normalize_bool_to_int(a); let b = super::round::normalize_bool_to_int(b); defer_drop!(a, vm); defer_drop!(b, vm); let heap = &mut *vm.heap; match (a, b) { (Value::Int(x), Value::Int(y)) => { if *y == 0 { Err(ExcType::divmod_by_zero()) } else if let Some((quot, rem)) = floor_divmod(*x, *y) { Ok(allocate_tuple(smallvec![Value::Int(quot), Value::Int(rem)], heap)?) } else { // Overflow - promote to BigInt check_div_size(64, heap.tracker())?; let (quot, rem) = bigint_floor_divmod(&BigInt::from(*x), &BigInt::from(*y)); let quot_val = LongInt::new(quot).into_value(heap)?; let rem_val = LongInt::new(rem).into_value(heap)?; Ok(allocate_tuple(smallvec![quot_val, rem_val], heap)?) } } (Value::Int(x), Value::Ref(id)) => { if let HeapData::LongInt(li) = heap.get(*id) { if li.is_zero() { Err(ExcType::divmod_by_zero()) } else { let x_bi = BigInt::from(*x); let (quot, rem) = bigint_floor_divmod(&x_bi, li.inner()); let quot_val = LongInt::new(quot).into_value(heap)?; let rem_val = LongInt::new(rem).into_value(heap)?; Ok(allocate_tuple(smallvec![quot_val, rem_val], heap)?) } } else { let a_type = a.py_type(heap); let b_type = b.py_type(heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("unsupported operand type(s) for divmod(): '{a_type}' and '{b_type}'"), ) .into()) } } (Value::Ref(id), Value::Int(y)) => { if let HeapData::LongInt(li) = heap.get(*id) { if *y == 0 { Err(ExcType::divmod_by_zero()) } else { let y_bi = BigInt::from(*y); let (quot, rem) = bigint_floor_divmod(li.inner(), &y_bi); let quot_val = LongInt::new(quot).into_value(heap)?; let rem_val = LongInt::new(rem).into_value(heap)?; Ok(allocate_tuple(smallvec![quot_val, rem_val], heap)?) } } else { let a_type = a.py_type(heap); let b_type = b.py_type(heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("unsupported operand type(s) for divmod(): '{a_type}' and '{b_type}'"), ) .into()) } } (Value::Ref(id1), Value::Ref(id2)) => { let x_bi = if let HeapData::LongInt(li) = heap.get(*id1) { li.inner().clone() } else { let a_type = a.py_type(heap); let b_type = b.py_type(heap); return Err(SimpleException::new_msg( ExcType::TypeError, format!("unsupported operand type(s) for divmod(): '{a_type}' and '{b_type}'"), ) .into()); }; if let HeapData::LongInt(li) = heap.get(*id2) { if li.is_zero() { Err(ExcType::divmod_by_zero()) } else { let (quot, rem) = bigint_floor_divmod(&x_bi, li.inner()); let quot_val = LongInt::new(quot).into_value(heap)?; let rem_val = LongInt::new(rem).into_value(heap)?; Ok(allocate_tuple(smallvec![quot_val, rem_val], heap)?) } } else { let a_type = a.py_type(heap); let b_type = b.py_type(heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("unsupported operand type(s) for divmod(): '{a_type}' and '{b_type}'"), ) .into()) } } (Value::Float(x), Value::Float(y)) => { if *y == 0.0 { Err(ExcType::divmod_by_zero()) } else { let quot = (x / y).floor(); let rem = x - quot * y; Ok(allocate_tuple(smallvec![Value::Float(quot), Value::Float(rem)], heap)?) } } (Value::Int(x), Value::Float(y)) => { if *y == 0.0 { Err(ExcType::divmod_by_zero()) } else { let xf = *x as f64; let quot = (xf / y).floor(); let rem = xf - quot * y; Ok(allocate_tuple(smallvec![Value::Float(quot), Value::Float(rem)], heap)?) } } (Value::Float(x), Value::Int(y)) => { if *y == 0 { Err(ExcType::divmod_by_zero()) } else { let yf = *y as f64; let quot = (x / yf).floor(); let rem = x - quot * yf; Ok(allocate_tuple(smallvec![Value::Float(quot), Value::Float(rem)], heap)?) } } _ => { let a_type = a.py_type(heap); let b_type = b.py_type(heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("unsupported operand type(s) for divmod(): '{a_type}' and '{b_type}'"), ) .into()) } } } /// Computes Python-style floor division and modulo for BigInts. /// /// Uses `div_mod_floor` from num_integer for correct floor semantics. fn bigint_floor_divmod(a: &BigInt, b: &BigInt) -> (BigInt, BigInt) { a.div_mod_floor(b) } ================================================ FILE: crates/monty/src/builtins/enumerate.rs ================================================ //! Implementation of the enumerate() builtin function. use smallvec::smallvec; use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, heap::HeapData, resource::ResourceTracker, types::{List, MontyIter, PyTrait, allocate_tuple}, value::Value, }; /// Implementation of the enumerate() builtin function. /// /// Returns a list of (index, value) tuples. /// Note: In Python this returns an iterator, but we return a list for simplicity. pub fn builtin_enumerate(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (iterable, start) = args.get_one_two_args("enumerate", vm.heap)?; let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); defer_drop!(start, vm); // Get start index (default 0) let mut index: i64 = match start { Some(Value::Int(n)) => *n, Some(Value::Bool(b)) => i64::from(*b), Some(v) => { let type_name = v.py_type(vm.heap); return Err(SimpleException::new_msg( ExcType::TypeError, format!("'{type_name}' object cannot be interpreted as an integer"), ) .into()); } None => 0, }; let mut result: Vec = Vec::new(); while let Some(item) = iter.for_next(vm)? { // Create tuple (index, item) let tuple_val = allocate_tuple(smallvec![Value::Int(index), item], vm.heap)?; result.push(tuple_val); index += 1; } let heap_id = vm.heap.allocate(HeapData::List(List::new(result)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/builtins/filter.rs ================================================ //! Implementation of the filter() builtin function. //! //! This module provides the filter() builtin which filters elements from an iterable //! based on a predicate function. The implementation supports: //! - `None` as predicate (filters falsy values) //! - Builtin functions (len, abs, etc.) //! - Type constructors (int, str, float, etc.) //! - User-defined functions (via `vm.evaluate_function`) use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::RunResult, heap::{HeapData, HeapGuard}, resource::ResourceTracker, types::{List, MontyIter, PyTrait}, value::Value, }; /// Implementation of the filter() builtin function. /// /// Filters elements from an iterable based on a predicate function. /// If the predicate is None, filters out falsy values. /// /// Note: In Python this returns an iterator, but we return a list for simplicity. /// /// Examples: /// ```python /// filter(lambda x: x > 0, [-1, 0, 1, 2]) # [1, 2] /// filter(None, [0, 1, False, True, '']) # [1, True] /// ``` pub fn builtin_filter(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (function, iterable) = args.get_two_args("filter", vm.heap)?; defer_drop!(function, vm); let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); let out: Vec = Vec::new(); let mut out_guard = HeapGuard::new(out, vm); let (out, vm) = out_guard.as_parts_mut(); while let Some(item) = iter.for_next(vm)? { let mut item_guard = HeapGuard::new(item, vm); let (item, vm) = item_guard.as_parts_mut(); let should_include = if let Value::None = function { // No predicate - use truthiness of element item.py_bool(vm) } else { // Clone for predicate call - the clone is consumed by evaluate_function let item_for_predicate = item.clone_with_heap(vm); let result = vm.evaluate_function("filter()", function, ArgValues::One(item_for_predicate))?; let is_truthy = result.py_bool(vm); result.drop_with_heap(vm); is_truthy }; if should_include { out.push(item_guard.into_inner()); } } let (out, vm) = out_guard.into_parts(); let heap_id = vm.heap.allocate(HeapData::List(List::new(out)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/builtins/getattr.rs ================================================ //! Implementation of the getattr() builtin function. use crate::{ ExcType, args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{RunResult, SimpleException}, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the getattr() builtin function. /// /// Returns the value of the named attribute of an object. /// If the attribute doesn't exist and a default is provided, returns the default. /// If no default is provided and the attribute doesn't exist, raises AttributeError. /// /// Note: name must be a string. Per Python docs, "Since private name mangling happens /// at compilation time, one must manually mangle a private attribute's (attributes with /// two leading underscores) name in order to retrieve it with getattr()." /// /// Examples: /// ```python /// getattr(obj, 'x') # Get obj.x /// getattr(obj, 'y', None) # Get obj.y or None if not found /// getattr(module, 'function') # Get module.function /// ``` pub fn builtin_getattr(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let positional = args.into_pos_only("getattr", vm.heap)?; defer_drop!(positional, vm); let (object, name, default) = match positional.as_slice() { too_few @ ([] | [_]) => return Err(ExcType::type_error_at_least("getattr", 2, too_few.len())), [object, name] => (object, name, None), [object, name, default] => (object, name, Some(default)), too_many => return Err(ExcType::type_error_at_most("getattr", 3, too_many.len())), }; let Some(attr) = name.as_either_str(vm.heap) else { let ty = name.py_type(vm.heap); return Err( SimpleException::new_msg(ExcType::TypeError, format!("attribute name must be string, not '{ty}'")).into(), ); }; match object.py_getattr(&attr, vm) { Ok(CallResult::Value(value)) => Ok(value), Ok(_) => { // getattr() only retrieves attribute values — OS calls, external calls, // method calls, and awaits are not supported here // // TODO: might need to support this case? Err(SimpleException::new_msg(ExcType::TypeError, "getattr(): attribute is not a simple value").into()) } Err(e) => { if let Some(d) = default { Ok(d.clone_with_heap(vm)) } else { Err(e) } } } } ================================================ FILE: crates/monty/src/builtins/hash.rs ================================================ //! Implementation of the hash() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the hash() builtin function. /// /// Returns the hash value of an object (if it has one). /// Raises TypeError for unhashable types like lists and dicts. pub fn builtin_hash(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("hash", vm.heap)?; defer_drop!(value, vm); match value.py_hash(vm.heap, vm.interns)? { Some(hash) => { // Python's hash() returns a signed integer; reinterpret bits for large values let hash_i64 = i64::from_ne_bytes(hash.to_ne_bytes()); Ok(Value::Int(hash_i64)) } None => Err(ExcType::type_error_unhashable(value.py_type(vm.heap))), } } ================================================ FILE: crates/monty/src/builtins/hex.rs ================================================ //! Implementation of the hex() builtin function. use num_bigint::BigInt; use num_traits::Signed; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, heap::HeapData, resource::ResourceTracker, types::{PyTrait, Str}, value::Value, }; /// Implementation of the hex() builtin function. /// /// Converts an integer to a lowercase hexadecimal string prefixed with '0x'. /// Supports both i64 and BigInt integers. pub fn builtin_hex(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("hex", vm.heap)?; defer_drop!(value, vm); let heap = &mut *vm.heap; match value { Value::Int(n) => { let abs_digits = format!("{:x}", n.unsigned_abs()); let prefix = if *n < 0 { "-0x" } else { "0x" }; let heap_id = heap.allocate(HeapData::Str(Str::new(format!("{prefix}{abs_digits}"))))?; Ok(Value::Ref(heap_id)) } Value::Bool(b) => { let s = if *b { "0x1" } else { "0x0" }; let heap_id = heap.allocate(HeapData::Str(Str::new(s.to_string())))?; Ok(Value::Ref(heap_id)) } Value::Ref(id) => { if let HeapData::LongInt(li) = heap.get(*id) { let hex_str = format_bigint_hex(li.inner()); let heap_id = heap.allocate(HeapData::Str(Str::new(hex_str)))?; Ok(Value::Ref(heap_id)) } else { Err(ExcType::type_error_not_integer(value.py_type(heap))) } } _ => Err(ExcType::type_error_not_integer(value.py_type(heap))), } } /// Formats a BigInt as a hexadecimal string with '0x' prefix. fn format_bigint_hex(bi: &BigInt) -> String { let is_negative = bi.is_negative(); let abs_bi = bi.abs(); let hex_digits = format!("{abs_bi:x}"); let prefix = if is_negative { "-0x" } else { "0x" }; format!("{prefix}{hex_digits}") } ================================================ FILE: crates/monty/src/builtins/id.rs ================================================ //! Implementation of the id() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::RunResult, resource::ResourceTracker, value::Value, }; /// Implementation of the id() builtin function. /// /// Returns the identity of an object (unique integer for the object's lifetime). pub fn builtin_id(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("id", vm.heap)?; defer_drop!(value, vm); let id = value.id(); // Python's id() returns a signed integer; reinterpret bits for large values // On 64-bit: large addresses wrap to negative; on 32-bit: always fits positive #[expect( clippy::cast_possible_wrap, reason = "Python id() returns signed; wrapping intentional" )] let id_i64 = id as i64; Ok(Value::Int(id_i64)) } ================================================ FILE: crates/monty/src/builtins/isinstance.rs ================================================ //! Implementation of the isinstance() builtin function. use super::Builtins; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData}, resource::ResourceTracker, types::{PyTrait, Type}, value::Value, }; /// Implementation of the isinstance() builtin function. /// /// Checks if an object is an instance of a class or a tuple of classes. pub fn builtin_isinstance(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (obj, classinfo) = args.get_two_args("isinstance", vm.heap)?; defer_drop!(obj, vm); defer_drop!(classinfo, vm); let heap = &mut *vm.heap; let obj_type = obj.py_type(heap); match isinstance_check(obj_type, classinfo, heap) { Ok(result) => Ok(Value::Bool(result)), Err(()) => Err(ExcType::isinstance_arg2_error()), } } /// Recursively checks if obj_type matches classinfo for isinstance(). /// /// Returns `Ok(true)` if the type matches, `Ok(false)` if it doesn't, /// or `Err(())` if classinfo is invalid (not a type or tuple of types). /// /// Supports: /// - Single types: `isinstance(x, int)` /// - Exception types: `isinstance(err, ValueError)` /// - Exception hierarchy: `isinstance(err, LookupError)` for KeyError/IndexError /// - Nested tuples: `isinstance(x, (int, (str, bytes)))` fn isinstance_check(obj_type: Type, classinfo: &Value, heap: &Heap) -> Result { match classinfo { // Single type: isinstance(x, int) Value::Builtin(Builtins::Type(t)) => Ok(obj_type.is_instance_of(*t)), // Exception type: isinstance(err, ValueError) or isinstance(err, LookupError) Value::Builtin(Builtins::ExcType(handler_type)) => { // Check exception hierarchy using is_subclass_of Ok(matches!(obj_type, Type::Exception(exc_type) if exc_type.is_subclass_of(*handler_type))) } // Tuple of types (possibly nested): isinstance(x, (int, (str, bytes))) Value::Ref(id) => { if let HeapData::Tuple(tuple) = heap.get(*id) { for v in tuple.as_slice() { if isinstance_check(obj_type, v, heap)? { return Ok(true); } } Ok(false) } else { Err(()) // Not a tuple - invalid } } _ => Err(()), // Invalid classinfo } } ================================================ FILE: crates/monty/src/builtins/len.rs ================================================ //! Implementation of the len() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the len() builtin function. /// /// Returns the length of an object (number of items in a container). pub fn builtin_len(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("len", vm.heap)?; defer_drop!(value, vm); if let Some(len) = value.py_len(vm) { Ok(Value::Int(i64::try_from(len).expect("len exceeds i64::MAX"))) } else { let type_name = value.py_type(vm.heap); Err(SimpleException::new_msg(ExcType::TypeError, format!("object of type '{type_name}' has no len()")).into()) } } ================================================ FILE: crates/monty/src/builtins/map.rs ================================================ //! Implementation of the map() builtin function. use crate::{ args::{ArgValues, KwargsValues}, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, heap::{DropWithHeap, HeapData}, resource::ResourceTracker, types::{List, MontyIter}, value::Value, }; /// Implementation of the map() builtin function. /// /// Applies a function to every item of one or more iterables and returns a list of results. /// With multiple iterables, stops when the shortest iterable is exhausted. /// /// Note: In Python this returns an iterator, but we return a list for simplicity. /// Note: The `strict=` parameter is not yet supported. /// /// Examples: /// ```python /// map(abs, [-1, 0, 1, 2]) # [1, 0, 1, 2] /// map(pow, [2, 3], [3, 2]) # [8, 9] /// map(str, [1, 2, 3]) # ['1', '2', '3'] /// ``` pub fn builtin_map(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (positional, kwargs) = args.into_parts(); defer_drop_mut!(positional, vm); kwargs.not_supported_yet("map", vm.heap)?; if positional.len() < 2 { return Err(SimpleException::new_msg(ExcType::TypeError, "map() must have at least two arguments.").into()); } let function = positional.next().unwrap(); defer_drop!(function, vm); let first_iterable = positional.next().expect("checked length above"); let first_iter = MontyIter::new(first_iterable, vm)?; defer_drop_mut!(first_iter, vm); let extra_iterators: Vec = Vec::with_capacity(positional.len()); defer_drop_mut!(extra_iterators, vm); for iterable in positional { extra_iterators.push(MontyIter::new(iterable, vm)?); } let mut out = Vec::with_capacity(first_iter.size_hint(vm.heap)); // map function over iterables until the shortest iter is exhausted match extra_iterators.as_mut_slice() { // map(f, iter) [] => { while let Some(item) = first_iter.for_next(vm)? { let args = ArgValues::One(item); out.push(vm.evaluate_function("map()", function, args)?); } } // map(f, iter1, iter2) [single] => { while let Some(arg1) = first_iter.for_next(vm)? { let Some(arg2) = single.for_next(vm)? else { arg1.drop_with_heap(vm); break; }; let args = ArgValues::Two(arg1, arg2); out.push(vm.evaluate_function("map()", function, args)?); } } // map(f, iter1, iter2, *iterables) multiple => 'outer: loop { let mut items = Vec::with_capacity(1 + multiple.len()); for iter in std::iter::once(&mut *first_iter).chain(multiple.iter_mut()) { if let Some(item) = iter.for_next(vm)? { items.push(item); } else { items.drop_with_heap(vm); break 'outer; } } let args = ArgValues::ArgsKargs { args: items, kwargs: KwargsValues::Empty, }; out.push(vm.evaluate_function("map()", function, args)?); }, } let heap_id = vm.heap.allocate(HeapData::List(List::new(out)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/builtins/min_max.rs ================================================ //! Implementation of the min() and max() builtin functions. use std::cmp::Ordering; use crate::{ args::{ArgValues, KwargsValues}, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunError, RunResult, SimpleException}, heap::{Heap, HeapGuard}, heap_traits::DropWithHeap, resource::ResourceTracker, types::{MontyIter, PyTrait}, value::Value, }; /// Implementation of the min() builtin function. /// /// Returns the smallest item in an iterable or the smallest of two or more arguments. /// Supports two forms: /// - `min(iterable)` - returns smallest item from iterable /// - `min(arg1, arg2, ...)` - returns smallest of the arguments pub fn builtin_min(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { builtin_min_max(vm, args, true) } /// Implementation of the max() builtin function. /// /// Returns the largest item in an iterable or the largest of two or more arguments. /// Supports two forms: /// - `max(iterable)` - returns largest item from iterable /// - `max(arg1, arg2, ...)` - returns largest of the arguments pub fn builtin_max(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { builtin_min_max(vm, args, false) } /// Shared implementation for min() and max(). /// /// When `is_min` is true, returns the minimum; otherwise returns the maximum. fn builtin_min_max(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues, is_min: bool) -> RunResult { let func_name = if is_min { "min" } else { "max" }; let key_context = if is_min { "min() key argument" } else { "max() key argument" }; let (positional, kwargs) = args.into_parts(); defer_drop_mut!(positional, vm); let Some(first_arg) = positional.next() else { kwargs.drop_with_heap(vm); return Err(SimpleException::new_msg( ExcType::TypeError, format!("{func_name} expected at least 1 argument, got 0"), ) .into()); }; let mut first_arg_guard = HeapGuard::new(first_arg, vm); let (key_fn, default_value) = parse_min_max_kwargs(kwargs, func_name, first_arg_guard.heap())?; let (first_arg, vm) = first_arg_guard.into_parts(); defer_drop!(key_fn, vm); let mut default_guard = HeapGuard::new(default_value, vm); let (default_value, vm) = default_guard.as_parts_mut(); // decide what to do based on remaining arguments if positional.len() == 0 { // Single argument: iterate over it let iter = MontyIter::new(first_arg, vm)?; defer_drop_mut!(iter, vm); let Some(result) = iter.for_next(vm)? else { if let Some(default) = default_value.take() { return Ok(default); } return Err(SimpleException::new_msg( ExcType::ValueError, format!("{func_name}() iterable argument is empty"), ) .into()); }; if let Some(key_fn) = key_fn { let mut result_guard = HeapGuard::new(result, vm); { let (result, vm) = result_guard.as_parts_mut(); let result_key = evaluate_key(result.clone_with_heap(vm), key_fn, key_context, vm)?; let mut result_key_guard = HeapGuard::new(result_key, vm); { let (result_key, vm) = result_key_guard.as_parts_mut(); while let Some(item) = iter.for_next(vm)? { defer_drop_mut!(item, vm); let item_key = evaluate_key(item.clone_with_heap(vm), key_fn, key_context, vm)?; defer_drop_mut!(item_key, vm); if candidate_wins(result_key, item_key, is_min, vm)? { std::mem::swap(result, item); std::mem::swap(result_key, item_key); } } } let result_key = result_key_guard.into_inner(); result_key.drop_with_heap(vm); } Ok(result_guard.into_inner()) } else { let mut result_guard = HeapGuard::new(result, vm); let (result, vm) = result_guard.as_parts_mut(); while let Some(item) = iter.for_next(vm)? { defer_drop_mut!(item, vm); if candidate_wins(result, item, is_min, vm)? { std::mem::swap(result, item); } } Ok(result_guard.into_inner()) } } else { // Multiple arguments: compare them directly if default_value.is_some() { first_arg.drop_with_heap(vm); return Err(default_with_multiple_args(func_name)); } if let Some(key_fn) = key_fn { let mut result_guard = HeapGuard::new(first_arg, vm); { let (result, vm) = result_guard.as_parts_mut(); let result_key = evaluate_key(result.clone_with_heap(vm), key_fn, key_context, vm)?; let mut result_key_guard = HeapGuard::new(result_key, vm); { let (result_key, vm) = result_key_guard.as_parts_mut(); for item in positional { defer_drop_mut!(item, vm); let item_key = evaluate_key(item.clone_with_heap(vm), key_fn, key_context, vm)?; defer_drop_mut!(item_key, vm); if candidate_wins(result_key, item_key, is_min, vm)? { std::mem::swap(result, item); std::mem::swap(result_key, item_key); } } } let result_key = result_key_guard.into_inner(); result_key.drop_with_heap(vm); } Ok(result_guard.into_inner()) } else { let mut result_guard = HeapGuard::new(first_arg, vm); let (result, vm) = result_guard.as_parts_mut(); for item in positional { defer_drop_mut!(item, vm); if candidate_wins(result, item, is_min, vm)? { std::mem::swap(result, item); } } Ok(result_guard.into_inner()) } } } /// Parses `key=` and `default=` for min()/max(). /// /// Returns `(key_fn, default_value)`. Passing `key=None` is normalized to `None` /// so the comparison logic can treat it the same as omitting the keyword. fn parse_min_max_kwargs( kwargs: KwargsValues, func_name: &str, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Option, Option)> { let (key_fn, default_value) = kwargs.parse_named_kwargs_pair( func_name, "key", "default", vm.heap, vm.interns, ExcType::type_error_unexpected_keyword, )?; let key_fn = match key_fn { Some(value) if matches!(value, Value::None) => { value.drop_with_heap(vm); None } other => other, }; Ok((key_fn, default_value)) } /// Calls the user-provided key function for a single candidate value. /// /// The caller passes an owned clone of the candidate so this helper can forward it /// into the function call without changing ownership of the original item being /// tracked as the eventual min/max result. fn evaluate_key( item: Value, key_fn: &Value, key_context: &'static str, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { vm.evaluate_function(key_context, key_fn, ArgValues::One(item)) } /// Returns whether `candidate` should replace `current` as the best value seen so far. /// /// `min()` replaces the current winner when the new candidate compares smaller, /// while `max()` replaces it when the new candidate compares larger. Equal values /// keep the existing winner so ties preserve the first-seen item, matching CPython. fn candidate_wins( current: &Value, candidate: &Value, is_min: bool, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let Some(ordering) = candidate.py_cmp(current, vm)? else { return Err(ord_not_supported(candidate, current, is_min, vm.heap)); }; Ok((is_min && ordering == Ordering::Less) || (!is_min && ordering == Ordering::Greater)) } /// Creates the CPython-compatible error for `default=` with multiple positional args. #[cold] fn default_with_multiple_args(func_name: &str) -> RunError { SimpleException::new_msg( ExcType::TypeError, format!("Cannot specify a default for {func_name}() with multiple positional arguments"), ) .into() } #[cold] fn ord_not_supported(left: &Value, right: &Value, is_min: bool, heap: &Heap) -> RunError { let left_type = left.py_type(heap); let right_type = right.py_type(heap); let operator = if is_min { '<' } else { '>' }; ExcType::type_error(format!( "'{operator}' not supported between instances of '{left_type}' and '{right_type}'" )) } ================================================ FILE: crates/monty/src/builtins/mod.rs ================================================ //! Python builtin functions, types, and exception constructors. //! //! This module provides the interpreter-native implementation of Python builtins. //! Each builtin function has its own submodule for organization. mod abs; mod all; mod any; mod bin; mod chr; mod divmod; mod enumerate; mod filter; mod getattr; mod hash; mod hex; mod id; mod isinstance; mod len; mod map; mod min_max; // min and max share implementation mod next; mod oct; mod ord; mod pow; mod print; mod repr; mod reversed; mod round; mod sorted; mod sum; mod type_; mod zip; use std::{fmt::Write, str::FromStr}; use strum::{Display, EnumString, FromRepr, IntoStaticStr}; use crate::{ args::ArgValues, bytecode::VM, exception_private::{ExcType, RunResult}, resource::ResourceTracker, types::Type, value::Value, }; /// Enumerates every interpreter-native Python builtins /// /// Uses strum derives for automatic `Display`, `FromStr`, and `AsRef` implementations. /// All variants serialize to lowercase (e.g., `Print` -> "print"). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) enum Builtins { /// A builtin function like `print`, `len`, `type`, etc. Function(BuiltinsFunctions), /// An exception type constructor like `ValueError`, `TypeError`, etc. ExcType(ExcType), /// A type constructor like `list`, `dict`, `int`, etc. Type(Type), } impl Builtins { /// Calls this builtin with the given arguments. pub fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { Self::Function(b) => b.call(vm, args), Self::ExcType(exc) => exc.call(vm, args), Self::Type(t) => t.call(vm, args), } } /// Writes the Python repr() string for this callable to a formatter. pub fn py_repr_fmt(self, f: &mut W) -> std::fmt::Result { match self { Self::Function(b) => write!(f, ""), Self::ExcType(e) => write!(f, ""), Self::Type(t) => write!(f, ""), } } /// Returns the type of this builtin. pub fn py_type(self) -> Type { match self { Self::Function(_) => Type::BuiltinFunction, Self::ExcType(_) => Type::Type, Self::Type(_) => Type::Type, } } } impl FromStr for Builtins { type Err = (); fn from_str(s: &str) -> Result { // Priority: BuiltinsFunctions > ExcType > Type // Only matches names that are true Python builtins (accessible without imports). if let Ok(b) = BuiltinsFunctions::from_str(s) { Ok(Self::Function(b)) } else if let Ok(exc) = ExcType::from_str(s) { Ok(Self::ExcType(exc)) } else if let Some(t) = Type::from_builtin_name(s) { Ok(Self::Type(t)) } else { Err(()) } } } /// Enumerates every interpreter-native Python builtin function. /// /// Listed alphabetically per https://docs.python.org/3/library/functions.html /// Commented-out variants are not yet implemented. /// /// Note: Type constructors are handled by the `Type` enum, not here. /// /// Uses strum derives for automatic `Display`, `FromStr`, and `IntoStaticStr` implementations. /// All variants serialize to lowercase (e.g., `Print` -> "print"). #[derive( Debug, Clone, Copy, Display, EnumString, FromRepr, IntoStaticStr, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, )] #[strum(serialize_all = "lowercase")] #[repr(u8)] pub enum BuiltinsFunctions { Abs, // Aiter, All, // Anext, Any, // Ascii, Bin, // bool - handled by Type enum // Breakpoint, // bytearray - handled by Type enum // bytes - handled by Type enum // Callable, Chr, // Classmethod, // Compile, // complex - handled by Type enum // Delattr, // dict - handled by Type enum // Dir, Divmod, Enumerate, // Eval, // Exec, Filter, // float - handled by Type enum // Format, // frozenset - handled by Type enum Getattr, // Globals, // Hasattr, Hash, // Help, Hex, Id, // Input, // int - handled by Type enum Isinstance, // Issubclass, // Iter - handled by Type enum Len, // list - handled by Type enum // Locals, Map, Max, // memoryview - handled by Type enum Min, Next, // object - handled by Type enum Oct, // Open, Ord, Pow, Print, // Property, // range - handled by Type enum Repr, Reversed, Round, // set - handled by Type enum // Setattr, // Slice, Sorted, // Staticmethod, // str - handled by Type enum Sum, // Super, // tuple - handled by Type enum Type, // Vars, Zip, // __import__ - not planned } impl BuiltinsFunctions { /// Executes the builtin with the provided arguments. /// /// All builtins receive the full VM context, which provides access to the heap, /// interned strings, and print output. pub(crate) fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { Self::Abs => abs::builtin_abs(vm, args), Self::All => all::builtin_all(vm, args), Self::Any => any::builtin_any(vm, args), Self::Bin => bin::builtin_bin(vm, args), Self::Chr => chr::builtin_chr(vm, args), Self::Divmod => divmod::builtin_divmod(vm, args), Self::Enumerate => enumerate::builtin_enumerate(vm, args), Self::Filter => filter::builtin_filter(vm, args), Self::Getattr => getattr::builtin_getattr(vm, args), Self::Hash => hash::builtin_hash(vm, args), Self::Hex => hex::builtin_hex(vm, args), Self::Id => id::builtin_id(vm, args), Self::Isinstance => isinstance::builtin_isinstance(vm, args), Self::Len => len::builtin_len(vm, args), Self::Map => map::builtin_map(vm, args), Self::Max => min_max::builtin_max(vm, args), Self::Min => min_max::builtin_min(vm, args), Self::Next => next::builtin_next(vm, args), Self::Oct => oct::builtin_oct(vm, args), Self::Ord => ord::builtin_ord(vm, args), Self::Pow => pow::builtin_pow(vm, args), Self::Print => print::builtin_print(vm, args), Self::Repr => repr::builtin_repr(vm, args), Self::Reversed => reversed::builtin_reversed(vm, args), Self::Round => round::builtin_round(vm, args), Self::Sorted => sorted::builtin_sorted(vm, args), Self::Sum => sum::builtin_sum(vm, args), Self::Type => type_::builtin_type(vm, args), Self::Zip => zip::builtin_zip(vm, args), } } } ================================================ FILE: crates/monty/src/builtins/next.rs ================================================ //! Implementation of the next() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::RunResult, resource::ResourceTracker, types::iter::iterator_next, value::Value, }; /// Implementation of the next() builtin function. /// /// Retrieves the next item from an iterator. /// /// Two forms are supported: /// - `next(iterator)` - Returns the next item from the iterator. Raises /// `StopIteration` when the iterator is exhausted. /// - `next(iterator, default)` - Returns the next item from the iterator, or /// `default` if the iterator is exhausted. pub fn builtin_next(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (iterator, default) = args.get_one_two_args("next", vm.heap)?; defer_drop!(iterator, vm); iterator_next(iterator, default, vm.heap, vm.interns) } ================================================ FILE: crates/monty/src/builtins/oct.rs ================================================ //! Implementation of the oct() builtin function. use num_bigint::BigInt; use num_traits::Signed; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, heap::HeapData, resource::ResourceTracker, types::{PyTrait, Str}, value::Value, }; /// Implementation of the oct() builtin function. /// /// Converts an integer to an octal string prefixed with '0o'. /// Supports both i64 and BigInt integers. pub fn builtin_oct(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("oct", vm.heap)?; defer_drop!(value, vm); match value { Value::Int(n) => { let abs_digits = format!("{:o}", n.unsigned_abs()); let prefix = if *n < 0 { "-0o" } else { "0o" }; let heap_id = vm .heap .allocate(HeapData::Str(Str::new(format!("{prefix}{abs_digits}"))))?; Ok(Value::Ref(heap_id)) } Value::Bool(b) => { let s = if *b { "0o1" } else { "0o0" }; let heap_id = vm.heap.allocate(HeapData::Str(Str::new(s.to_string())))?; Ok(Value::Ref(heap_id)) } Value::Ref(id) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let oct_str = format_bigint_oct(li.inner()); let heap_id = vm.heap.allocate(HeapData::Str(Str::new(oct_str)))?; Ok(Value::Ref(heap_id)) } else { Err(ExcType::type_error_not_integer(value.py_type(vm.heap))) } } _ => Err(ExcType::type_error_not_integer(value.py_type(vm.heap))), } } /// Formats a BigInt as an octal string with '0o' prefix. fn format_bigint_oct(bi: &BigInt) -> String { let is_negative = bi.is_negative(); let abs_bi = bi.abs(); let oct_digits = format!("{abs_bi:o}"); let prefix = if is_negative { "-0o" } else { "0o" }; format!("{prefix}{oct_digits}") } ================================================ FILE: crates/monty/src/builtins/ord.rs ================================================ //! Implementation of the ord() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, heap::HeapData, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the ord() builtin function. /// /// Returns the Unicode code point of a one-character string. pub fn builtin_ord(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("ord", vm.heap)?; defer_drop!(value, vm); match value { Value::InternString(string_id) => { let s = vm.interns.get_str(*string_id); let mut chars = s.chars(); if let (Some(c), None) = (chars.next(), chars.next()) { Ok(Value::Int(c as i64)) } else { let len = s.chars().count(); Err(SimpleException::new_msg( ExcType::TypeError, format!("ord() expected a character, but string of length {len} found"), ) .into()) } } Value::Ref(id) => { if let HeapData::Str(s) = vm.heap.get(*id) { let mut chars = s.as_str().chars(); if let (Some(c), None) = (chars.next(), chars.next()) { Ok(Value::Int(c as i64)) } else { let len = s.as_str().chars().count(); Err(SimpleException::new_msg( ExcType::TypeError, format!("ord() expected a character, but string of length {len} found"), ) .into()) } } else { let type_name = value.py_type(vm.heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("ord() expected string of length 1, but {type_name} found"), ) .into()) } } _ => { let type_name = value.py_type(vm.heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("ord() expected string of length 1, but {type_name} found"), ) .into()) } } } ================================================ FILE: crates/monty/src/builtins/pow.rs ================================================ //! Implementation of the pow() builtin function. use num_bigint::BigInt; use num_traits::{Signed, ToPrimitive, Zero}; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, heap::{Heap, HeapData}, resource::{ResourceTracker, check_pow_size}, types::{LongInt, PyTrait}, value::Value, }; /// Implementation of the pow() builtin function. /// /// Returns base to the power exp. With three arguments, returns (base ** exp) % mod. /// Handles negative exponents by returning a float. pub fn builtin_pow(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { // pow() accepts 2 or 3 arguments let positional = args.into_pos_only("pow", vm.heap)?; defer_drop!(positional, vm); match positional.as_slice() { [base, exp] => { let base = normalize_bool(base); let exp = normalize_bool(exp); two_arg_pow(base, exp, vm.heap) } [base, exp, m] => { let base = normalize_bool(base); let exp = normalize_bool(exp); let m = normalize_bool(m); // Three-argument pow: modular exponentiation match (base, exp, m) { (Value::Int(b), Value::Int(e), Value::Int(m_val)) => { if *m_val == 0 { Err(SimpleException::new_msg(ExcType::ValueError, "pow() 3rd argument cannot be 0").into()) } else if *e < 0 { Err(SimpleException::new_msg( ExcType::ValueError, "pow() 2nd argument cannot be negative when 3rd argument specified", ) .into()) } else { // Use modular exponentiation let result = mod_pow( *b, u64::try_from(*e).expect("pow exponent >= 0 but failed u64 conversion"), *m_val, ); Ok(Value::Int(result)) } } _ => Err(SimpleException::new_msg( ExcType::TypeError, "pow() 3rd argument not allowed unless all arguments are integers", ) .into()), } } args => Err(SimpleException::new_msg( ExcType::TypeError, format!("pow expected 2 or 3 arguments, got {}", args.len()), ) .into()), } } /// Normalizes a `Bool` to its `Int` equivalent by reference. /// /// Returns `&Value::Int(0)` or `&Value::Int(1)` for bools (using static storage), /// and the original reference unchanged for all other types. fn normalize_bool(value: &Value) -> &Value { static FALSE_INT: Value = Value::Int(0); static TRUE_INT: Value = Value::Int(1); match value { Value::Bool(false) => &FALSE_INT, Value::Bool(true) => &TRUE_INT, other => other, } } /// Computes (base^exp) % modulo using binary exponentiation. /// /// Handles negative bases correctly using Python's modulo semantics. fn mod_pow(base: i64, exp: u64, modulo: i64) -> i64 { if modulo == 1 { return 0; } let modulo_u = u128::from(modulo.unsigned_abs()); let mut result: u128 = 1; let mut b = base.rem_euclid(modulo) as u128; let mut e = exp; while e > 0 { if e % 2 == 1 { result = (result * b) % modulo_u; } e /= 2; b = (b * b) % modulo_u; } // Convert back to signed, handling negative modulo // result < modulo_u <= i64::MAX as u128, so this conversion is safe let result_i64 = i64::try_from(result).expect("mod_pow result exceeds i64::MAX"); if modulo < 0 && result_i64 > 0 { result_i64 + modulo } else { result_i64 } } fn checked_pow_i64(mut base: i64, mut exp: u32) -> Option { let mut result: i64 = 1; while exp > 0 { if exp & 1 == 1 { result = result.checked_mul(base)?; } exp >>= 1; if exp > 0 { base = base.checked_mul(base)?; } } Some(result) } /// Implements two-argument pow with LongInt support. /// /// On overflow, promotes to LongInt instead of returning an error. fn two_arg_pow(base: &Value, exp: &Value, heap: &mut Heap) -> RunResult { match (base, exp) { (Value::Int(b), Value::Int(e)) => int_pow_int(*b, *e, heap), (Value::Int(b), Value::Ref(id)) => { // Clone to avoid borrow conflict with heap mutation let e_bi = if let HeapData::LongInt(li) = heap.get(*id) { li.inner().clone() } else { return Err(ExcType::binary_type_error( "** or pow()", base.py_type(heap), exp.py_type(heap), )); }; int_pow_longint(*b, &e_bi, heap) } (Value::Ref(id), Value::Int(e)) => { // Clone to avoid borrow conflict with heap mutation let b_bi = if let HeapData::LongInt(li) = heap.get(*id) { li.inner().clone() } else { return Err(ExcType::binary_type_error( "** or pow()", base.py_type(heap), exp.py_type(heap), )); }; longint_pow_int(&b_bi, *e, heap) } (Value::Ref(id1), Value::Ref(id2)) => { // Clone both to avoid borrow conflict with heap mutation let b_bi = if let HeapData::LongInt(li) = heap.get(*id1) { li.inner().clone() } else { return Err(ExcType::binary_type_error( "** or pow()", base.py_type(heap), exp.py_type(heap), )); }; let e_bi = if let HeapData::LongInt(li) = heap.get(*id2) { li.inner().clone() } else { return Err(ExcType::binary_type_error( "** or pow()", base.py_type(heap), exp.py_type(heap), )); }; longint_pow_longint(&b_bi, &e_bi, heap) } (Value::Float(b), Value::Float(e)) => { if *b == 0.0 && *e < 0.0 { Err(ExcType::zero_negative_power()) } else { Ok(Value::Float(b.powf(*e))) } } (Value::Int(b), Value::Float(e)) => { if *b == 0 && *e < 0.0 { Err(ExcType::zero_negative_power()) } else { Ok(Value::Float((*b as f64).powf(*e))) } } (Value::Float(b), Value::Int(e)) => { if *b == 0.0 && *e < 0 { Err(ExcType::zero_negative_power()) } else if let Ok(exp_i32) = i32::try_from(*e) { Ok(Value::Float(b.powi(exp_i32))) } else { Ok(Value::Float(b.powf(*e as f64))) } } _ => Err(ExcType::binary_type_error( "** or pow()", base.py_type(heap), exp.py_type(heap), )), } } /// int ** int with LongInt promotion on overflow. fn int_pow_int(b: i64, e: i64, heap: &mut Heap) -> RunResult { if e < 0 { // Negative exponent returns float if b == 0 { return Err(ExcType::zero_negative_power()); } Ok(Value::Float((b as f64).powf(e as f64))) } else if let Ok(exp_u32) = u32::try_from(e) { if let Some(v) = checked_pow_i64(b, exp_u32) { Ok(Value::Int(v)) } else { // Overflow - promote to LongInt // Check size before computing to prevent DoS check_pow_size(i64_bits(b), u64::from(exp_u32), heap.tracker())?; let bi = BigInt::from(b).pow(exp_u32); Ok(LongInt::new(bi).into_value(heap)?) } } else { // Exponent too large for u32 - use BigInt for result // Safety: e >= 0 at this point #[expect(clippy::cast_sign_loss)] let exp_u64 = e as u64; // Check size before computing to prevent DoS check_pow_size(i64_bits(b), exp_u64, heap.tracker())?; let base_bi = BigInt::from(b); let bi = bigint_pow_large(&base_bi, exp_u64)?; Ok(LongInt::new(bi).into_value(heap)?) } } /// int ** LongInt with LongInt result. fn int_pow_longint(b: i64, e: &BigInt, heap: &mut Heap) -> RunResult { if b == 0 && e.is_negative() { return Err(ExcType::zero_negative_power()); } if e.is_negative() { // Negative LongInt exponent: return float if let Some(e_f64) = e.to_f64() { Ok(Value::Float((b as f64).powf(e_f64))) } else { Ok(Value::Float(0.0)) } } else if e.is_zero() { // x ** 0 = 1 for all x (including 0 ** 0 = 1) Ok(Value::Int(1)) } else if b == 0 { Ok(Value::Int(0)) } else if b == 1 { Ok(Value::Int(1)) } else if b == -1 { // (-1) ** n = 1 if n is even, -1 if n is odd let is_even = (e % 2i32).is_zero(); Ok(Value::Int(if is_even { 1 } else { -1 })) } else if let Some(exp_u32) = e.to_u32() { // Check size before computing to prevent DoS check_pow_size(i64_bits(b), u64::from(exp_u32), heap.tracker())?; let bi = BigInt::from(b).pow(exp_u32); Ok(LongInt::new(bi).into_value(heap)?) } else { // Exponent too large Err(ExcType::overflow_exponent_too_large()) } } /// LongInt ** int with LongInt result. fn longint_pow_int(b: &BigInt, e: i64, heap: &mut Heap) -> RunResult { if b.is_zero() && e < 0 { return Err(ExcType::zero_negative_power()); } if e < 0 { // Negative exponent: return float if let (Some(b_f64), Some(e_f64)) = (b.to_f64(), Some(e as f64)) { Ok(Value::Float(b_f64.powf(e_f64))) } else { Ok(Value::Float(0.0)) } } else if let Ok(exp_u32) = u32::try_from(e) { // Check size before computing to prevent DoS check_pow_size(b.bits(), u64::from(exp_u32), heap.tracker())?; let bi = b.pow(exp_u32); Ok(LongInt::new(bi).into_value(heap)?) } else { // Exponent too large for u32 // Safety: e >= 0 at this point #[expect(clippy::cast_sign_loss)] let exp_u64 = e as u64; // Check size before computing to prevent DoS check_pow_size(b.bits(), exp_u64, heap.tracker())?; let bi = bigint_pow_large(b, exp_u64)?; Ok(LongInt::new(bi).into_value(heap)?) } } /// LongInt ** LongInt with LongInt result. fn longint_pow_longint(b: &BigInt, e: &BigInt, heap: &mut Heap) -> RunResult { if b.is_zero() && e.is_negative() { return Err(ExcType::zero_negative_power()); } if e.is_negative() { // Negative exponent: return float if let (Some(b_f64), Some(e_f64)) = (b.to_f64(), e.to_f64()) { Ok(Value::Float(b_f64.powf(e_f64))) } else { Ok(Value::Float(0.0)) } } else if let Some(exp_u32) = e.to_u32() { // Check size before computing to prevent DoS check_pow_size(b.bits(), u64::from(exp_u32), heap.tracker())?; let bi = b.pow(exp_u32); Ok(LongInt::new(bi).into_value(heap)?) } else { // Exponent too large Err(ExcType::overflow_exponent_too_large()) } } /// BigInt power for large exponents (> u32::MAX). /// /// This handles exponents that are too large for the standard pow function. /// For most bases, the result would be astronomically large, so we only handle /// special cases (0, 1, -1) and return an error for others. fn bigint_pow_large(base: &BigInt, exp: u64) -> RunResult { if base.is_zero() { Ok(BigInt::from(0)) } else if *base == BigInt::from(1) { Ok(BigInt::from(1)) } else if *base == BigInt::from(-1) { // (-1) ** n = 1 if n is even, -1 if n is odd if exp.is_multiple_of(2) { Ok(BigInt::from(1)) } else { Ok(BigInt::from(-1)) } } else { // For any other base, exponent > u32::MAX would produce an astronomically large result Err(ExcType::overflow_exponent_too_large()) } } /// Computes the number of significant bits in an i64. fn i64_bits(value: i64) -> u64 { if value == 0 { 0 } else { u64::from(64 - value.unsigned_abs().leading_zeros()) } } ================================================ FILE: crates/monty/src/builtins/print.rs ================================================ //! Implementation of the print() builtin function. use crate::{ args::{ArgValues, KwargsValues}, bytecode::VM, defer_drop, exception_private::{ExcType, RunError, RunResult, SimpleException}, heap::{Heap, HeapData}, intern::Interns, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the print() builtin function. /// /// Supports the following keyword arguments: /// - `sep`: separator between values (default: " ") /// - `end`: string appended after the last value (default: "\n") /// - `flush`: whether to flush the stream (accepted but ignored) /// /// The `file` kwarg is not supported. pub fn builtin_print(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { // Split into positional args and kwargs let (positional, kwargs) = args.into_parts(); defer_drop!(positional, vm); // Extract kwargs first let (sep, end) = extract_print_kwargs(kwargs, vm.heap, vm.interns)?; // Print positional args with separator, dropping each value after use let mut first = true; for value in positional.as_slice() { if first { first = false; } else if let Some(sep) = &sep { vm.print_writer.stdout_write(sep.as_str().into())?; } else { vm.print_writer.stdout_push(' ')?; } vm.print_writer.stdout_write(value.py_str(vm))?; } // Append end string if let Some(end) = end { vm.print_writer.stdout_write(end.into())?; } else { vm.print_writer.stdout_push('\n')?; } Ok(Value::None) } /// Extracts sep and end kwargs from print() arguments. /// /// Consumes the kwargs, dropping all values after extraction. /// Returns (sep, end, error) where error is Some if a kwarg error occurred. fn extract_print_kwargs( kwargs: KwargsValues, heap: &mut Heap, interns: &Interns, ) -> RunResult<(Option, Option)> { let mut sep: Option = None; let mut end: Option = None; let mut error: Option = None; for (key, value) in kwargs { // defer_drop! ensures key and value are cleaned up on every path through // the loop body — including continue, early return, and normal iteration defer_drop!(key, heap); defer_drop!(value, heap); // If we already hit an error, just drop remaining values if error.is_some() { continue; } let Some(keyword_name) = key.as_either_str(heap) else { error = Some(SimpleException::new_msg(ExcType::TypeError, "keywords must be strings").into()); continue; }; let key_str = keyword_name.as_str(interns); match key_str { "sep" => match extract_string_kwarg(value, "sep", heap, interns) { Ok(custom_sep) => sep = custom_sep, Err(e) => error = Some(e), }, "end" => match extract_string_kwarg(value, "end", heap, interns) { Ok(custom_end) => end = custom_end, Err(e) => error = Some(e), }, "flush" => {} // Accepted but ignored (we don't buffer output) "file" => { error = Some( SimpleException::new_msg(ExcType::TypeError, "print() 'file' argument is not supported").into(), ); } _ => { error = Some(ExcType::type_error_unexpected_keyword("print", key_str)); } } } if let Some(error) = error { Err(error) } else { Ok((sep, end)) } } /// Extracts a string value from a print() kwarg. /// /// The kwarg can be None (returns empty string) or a string. /// Raises TypeError for other types. fn extract_string_kwarg( value: &Value, name: &str, heap: &Heap, interns: &Interns, ) -> RunResult> { match value { Value::None => Ok(None), Value::InternString(string_id) => Ok(Some(interns.get_str(*string_id).to_owned())), Value::Ref(id) => { if let HeapData::Str(s) = heap.get(*id) { return Ok(Some(s.as_str().to_owned())); } Err(SimpleException::new_msg( ExcType::TypeError, format!("{} must be None or a string, not {}", name, value.py_type(heap)), ) .into()) } _ => Err(SimpleException::new_msg( ExcType::TypeError, format!("{} must be None or a string, not {}", name, value.py_type(heap)), ) .into()), } } ================================================ FILE: crates/monty/src/builtins/repr.rs ================================================ //! Implementation of the repr() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::RunResult, heap::HeapData, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the repr() builtin function. /// /// Returns a string containing a printable representation of an object. pub fn builtin_repr(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("repr", vm.heap)?; defer_drop!(value, vm); let heap_id = vm.heap.allocate(HeapData::Str(value.py_repr(vm).into_owned().into()))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/builtins/reversed.rs ================================================ //! Implementation of the reversed() builtin function. use crate::{ args::ArgValues, bytecode::VM, exception_private::RunResult, heap::HeapData, resource::ResourceTracker, types::{List, MontyIter}, value::Value, }; /// Implementation of the reversed() builtin function. /// /// Returns a list with elements in reverse order. /// Note: In Python this returns an iterator, but we return a list for simplicity. pub fn builtin_reversed(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("reversed", vm.heap)?; // Collect all items let mut items: Vec<_> = MontyIter::new(value, vm)?.collect(vm)?; // Reverse in place items.reverse(); let heap_id = vm.heap.allocate(HeapData::List(List::new(items)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/builtins/round.rs ================================================ //! Implementation of the round() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, resource::ResourceTracker, types::PyTrait, value::Value, }; pub fn normalize_bool_to_int(value: Value) -> Value { match value { Value::Bool(b) => Value::Int(i64::from(b)), other => other, } } /// Implementation of the round() builtin function. /// /// Rounds a number to a given precision in decimal digits. /// If ndigits is omitted or None, returns the nearest integer. /// Uses banker's rounding (round half to even). pub fn builtin_round(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (number, ndigits) = args.get_one_two_args("round", vm.heap)?; let number = normalize_bool_to_int(number); defer_drop!(number, vm); defer_drop!(ndigits, vm); // Determine the number of digits (None means round to integer) // Extract digits value before potentially consuming ndigits for error handling let digits: Option = match ndigits { Some(Value::None) => None, Some(Value::Int(n)) => Some(*n), Some(Value::Bool(b)) => Some(i64::from(*b)), Some(v) => { let type_name = v.py_type(vm.heap); return Err(SimpleException::new_msg( ExcType::TypeError, format!("'{type_name}' object cannot be interpreted as an integer"), ) .into()); } None => None, }; match number { Value::Int(n) => { if let Some(d) = digits { if d >= 0 { // Positive or zero digits: return the integer unchanged Ok(Value::Int(*n)) } else { // Negative digits: round to tens, hundreds, etc. using banker's rounding // -d is positive since d < 0; use try_from to safely convert let exp = u32::try_from(-d).unwrap_or(u32::MAX); let factor = 10_i64.saturating_pow(exp); let rounded_f = bankers_round(*n as f64 / factor as f64); let rounded = f64_to_i64(rounded_f) * factor; Ok(Value::Int(rounded)) } } else { // No digits specified: return the integer unchanged Ok(Value::Int(*n)) } } Value::Float(f) => { if let Some(d) = digits { // Round to `d` decimal places using banker's rounding. Ok(Value::Float(round_float_to_digits(*f, d))) } else { // No digits: round to nearest integer and return int (banker's rounding) if f.is_nan() { Err(SimpleException::new_msg(ExcType::ValueError, "cannot convert float NaN to integer").into()) } else if f.is_infinite() { Err( SimpleException::new_msg(ExcType::OverflowError, "cannot convert float infinity to integer") .into(), ) } else { Ok(Value::Int(f64_to_i64(bankers_round(*f)))) } } } _ => { let type_name = number.py_type(vm.heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("type {type_name} doesn't define __round__ method"), ) .into()) } } } /// Implements banker's rounding (round half to even). /// /// This is the rounding mode used by Python's `round()` function. /// When the value is exactly halfway between two integers, it rounds to the nearest even integer. fn bankers_round(value: f64) -> f64 { let floor = value.floor(); let frac = value - floor; if frac < 0.5 { floor } else if frac > 0.5 { floor + 1.0 } else { // Exactly 0.5 - round to even if f64_to_i64(floor) % 2 == 0 { floor } else { floor + 1.0 } } } /// Rounds a finite float to a given number of decimal digits using banker's rounding. /// /// This is used for `round(x, ndigits)` where Python always returns a float. /// /// For large `ndigits` values where scaling by `10**ndigits` would overflow/underflow `f64`, /// CPython returns either the original value (large positive `ndigits`) or a signed zero /// (large negative `ndigits`). We mirror that behavior and also preserve the sign of `0.0`. fn round_float_to_digits(value: f64, digits: i64) -> f64 { if !value.is_finite() { return value; } let rounded = if digits >= 0 { let Ok(exp) = i32::try_from(digits) else { return value; }; let multiplier = 10_f64.powi(exp); if !multiplier.is_finite() { return value; } let scaled = value * multiplier; if !scaled.is_finite() { return value; } bankers_round(scaled) / multiplier } else { let Ok(exp) = i32::try_from(digits) else { return 0.0_f64.copysign(value); }; let multiplier = 10_f64.powi(exp); if multiplier == 0.0 { return 0.0_f64.copysign(value); } let scaled = value * multiplier; bankers_round(scaled) / multiplier }; if rounded == 0.0 { 0.0_f64.copysign(value) } else { rounded } } /// Converts `f64` to `i64` using saturating float-to-int casting. /// /// Monty uses `i64` for integer values, so float-to-int conversion must pick a /// bounded representation: /// - Values outside the `i64` range saturate to `i64::MIN`/`i64::MAX` /// - `NaN` converts to `0` /// /// This behavior is provided by Rust's `as` casting rules for float-to-int. fn f64_to_i64(value: f64) -> i64 { #[expect( clippy::cast_possible_truncation, reason = "intentional truncation; float-to-int casts saturate and map NaN to 0" )] let result = value as i64; result } ================================================ FILE: crates/monty/src/builtins/sorted.rs ================================================ //! Implementation of the sorted() builtin function. use itertools::Itertools; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult, SimpleException}, heap::{DropWithHeap, HeapData, HeapGuard}, resource::ResourceTracker, sorting::{apply_permutation, sort_indices}, types::{List, MontyIter, PyTrait}, value::Value, }; /// Implementation of the sorted() builtin function. /// /// Returns a new sorted list from the items in an iterable. /// Supports `key` and `reverse` keyword arguments matching Python's /// `sorted(iterable, /, *, key=None, reverse=False)` signature. pub fn builtin_sorted(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (iterable, key_fn, reverse) = parse_sorted_args(args, vm)?; defer_drop!(key_fn, vm); let items: Vec<_> = MontyIter::new(iterable, vm)?.collect(vm)?; let mut items_guard = HeapGuard::new(items, vm); let (items, vm) = items_guard.as_parts_mut(); { // Compute key values if a key function was provided, otherwise we'll sort by the items themselves let mut keys_guard; let (compare_values, vm) = if let Some(f) = key_fn { let keys: Vec = Vec::with_capacity(items.len()); // Use a HeapGuard to ensure that if key function evaluation fails partway through, // we clean up any keys that were successfully computed keys_guard = HeapGuard::new(keys, vm); let (keys, vm) = keys_guard.as_parts_mut(); items .iter() .map(|item| { let item = item.clone_with_heap(vm); vm.evaluate_function("sorted() key argument", f, ArgValues::One(item)) }) .process_results(|keys_iter| keys.extend(keys_iter))?; keys_guard.as_parts() } else { (&*items, vm) }; // Sort indices by comparing key values (or items themselves if no key) let len = compare_values.len(); let mut indices: Vec = (0..len).collect(); sort_indices(&mut indices, compare_values, reverse, vm)?; // Rearrange items in-place according to the sorted permutation apply_permutation(items, &mut indices); } let (items, vm) = items_guard.into_parts(); let heap_id = vm.heap.allocate(HeapData::List(List::new(items)))?; Ok(Value::Ref(heap_id)) } /// Parses the arguments for `sorted(iterable, /, *, key=None, reverse=False)`. /// /// Returns `(iterable, key_fn, reverse)` where `key_fn` is `None` when no key /// function was provided (or `None` was explicitly passed), and `reverse` defaults /// to `false`. fn parse_sorted_args( args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Value, Option, bool)> { let (mut positional, kwargs) = args.into_parts(); // Extract the single required positional argument let positional_len = positional.len(); let Some(iterable) = positional.next() else { kwargs.drop_with_heap(vm); positional.drop_with_heap(vm); return Err(SimpleException::new_msg( ExcType::TypeError, format!("sorted expected 1 argument, got {positional_len}"), ) .into()); }; // Reject extra positional arguments if positional.len() > 0 { let total = positional_len; kwargs.drop_with_heap(vm); iterable.drop_with_heap(vm); positional.drop_with_heap(vm); return Err( SimpleException::new_msg(ExcType::TypeError, format!("sorted expected 1 argument, got {total}")).into(), ); } // Parse keyword arguments: key and reverse let mut iterable_guard = HeapGuard::new(iterable, vm); let vm = iterable_guard.heap(); let (key_arg, reverse_arg) = kwargs.parse_named_kwargs_pair( "sorted", "key", "reverse", vm.heap, vm.interns, |_func_name, key_str| { // CPython currently reuses the list.sort()-style wording here rather than // saying "sorted() got ...", so match that exact user-visible message. ExcType::type_error_unexpected_keyword("sort", key_str) }, )?; // Convert reverse to bool (default false) let reverse = if let Some(v) = reverse_arg { let result = v.py_bool(vm); v.drop_with_heap(vm); result } else { false }; // Handle key function (None means no key function) let key_fn = match key_arg { Some(v) if matches!(v, Value::None) => { v.drop_with_heap(iterable_guard.heap()); None } other => other, }; Ok((iterable_guard.into_inner(), key_fn, reverse)) } ================================================ FILE: crates/monty/src/builtins/sum.rs ================================================ //! Implementation of the sum() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, heap::HeapGuard, resource::ResourceTracker, types::{MontyIter, PyTrait, Type}, value::Value, }; /// Implementation of the sum() builtin function. /// /// Sums the items of an iterable from left to right with an optional start value. /// The default start value is 0. String start values are explicitly rejected /// (use `''.join(seq)` instead for string concatenation). pub fn builtin_sum(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (iterable, start) = args.get_one_two_args("sum", vm.heap)?; defer_drop_mut!(start, vm); let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); // Get the start value, defaulting to 0 let accumulator = match start.take() { Some(v) => { // Reject string start values - Python explicitly forbids this if matches!(v.py_type(vm.heap), Type::Str) { v.drop_with_heap(vm); return Err(SimpleException::new_msg( ExcType::TypeError, "sum() can't sum strings [use ''.join(seq) instead]", ) .into()); } v } None => Value::Int(0), }; // HeapGuard for accumulator: on success we extract it via into_inner(), // on any error path it's dropped automatically let mut acc_guard = HeapGuard::new(accumulator, vm); let (accumulator, vm) = acc_guard.as_parts_mut(); // Sum all items while let Some(item) = iter.for_next(vm)? { defer_drop!(item, vm); // Try to add the item to accumulator if let Some(new_value) = accumulator.py_add(item, vm)? { // Replace the old accumulator with the new value, dropping the old one let old = std::mem::replace(accumulator, new_value); old.drop_with_heap(vm); } else { // Types don't support addition let acc_type = accumulator.py_type(vm.heap); let item_type = item.py_type(vm.heap); return Err(ExcType::binary_type_error("+", acc_type, item_type)); } } Ok(acc_guard.into_inner()) } ================================================ FILE: crates/monty/src/builtins/type_.rs ================================================ //! Implementation of the type() builtin function. use super::Builtins; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::RunResult, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Implementation of the type() builtin function. /// /// Returns the type of an object. pub fn builtin_type(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_one_arg("type", vm.heap)?; defer_drop!(value, vm); Ok(Value::Builtin(Builtins::Type(value.py_type(vm.heap)))) } ================================================ FILE: crates/monty/src/builtins/zip.rs ================================================ //! Implementation of the zip() builtin function. use crate::{ args::ArgValues, bytecode::VM, defer_drop_mut, exception_private::RunResult, heap::HeapData, resource::ResourceTracker, types::{List, MontyIter, allocate_tuple, tuple::TupleVec}, value::Value, }; /// Implementation of the zip() builtin function. /// /// Returns a list of tuples, where the i-th tuple contains the i-th element /// from each of the argument iterables. Stops when the shortest iterable is exhausted. /// Note: In Python this returns an iterator, but we return a list for simplicity. pub fn builtin_zip(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (positional, kwargs) = args.into_parts(); defer_drop_mut!(positional, vm); // TODO: support kwargs (strict) kwargs.not_supported_yet("zip", vm.heap)?; if positional.len() == 0 { // zip() with no arguments returns empty list let heap_id = vm.heap.allocate(HeapData::List(List::new(Vec::new())))?; return Ok(Value::Ref(heap_id)); } // Create iterators for each iterable let mut iterators: Vec = Vec::with_capacity(positional.len()); for iterable in positional { match MontyIter::new(iterable, vm) { Ok(iter) => iterators.push(iter), Err(e) => { // Clean up already-created iterators for iter in iterators { iter.drop_with_heap(vm); } return Err(e); } } } let mut result: Vec = Vec::new(); // Zip until shortest iterator is exhausted 'outer: loop { let mut tuple_items = TupleVec::with_capacity(iterators.len()); for iter in &mut iterators { if let Some(item) = iter.for_next(vm)? { tuple_items.push(item); } else { // This iterator is exhausted - drop partial tuple items and stop for item in tuple_items { item.drop_with_heap(vm); } break 'outer; } } // Create tuple from collected items let tuple_val = allocate_tuple(tuple_items, vm.heap)?; result.push(tuple_val); } // Clean up iterators for iter in iterators { iter.drop_with_heap(vm); } let heap_id = vm.heap.allocate(HeapData::List(List::new(result)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/bytecode/builder.rs ================================================ //! Builder for emitting bytecode during compilation. //! //! `CodeBuilder` provides methods for emitting opcodes and operands, handling //! forward jumps with patching, and tracking source locations for tracebacks. use std::collections::HashSet; use super::{ code::{Code, ConstPool, ExceptionEntry, LocationEntry}, op::Opcode, }; use crate::{intern::StringId, parse::CodeRange, value::Value}; /// Builder for emitting bytecode during compilation. /// /// Handles encoding opcodes and operands into raw bytes, managing forward jumps /// that need patching, and tracking source locations for traceback generation. /// /// # Usage /// /// ```ignore /// let mut builder = CodeBuilder::new(); /// builder.set_location(some_range, None); /// builder.emit(Opcode::LoadNone); /// builder.emit_u8(Opcode::LoadLocal, 0); /// let jump = builder.emit_jump(Opcode::JumpIfFalse); /// // ... emit more code ... /// builder.patch_jump(jump); /// let code = builder.build(num_locals); /// ``` #[derive(Debug, Default)] pub struct CodeBuilder { /// The bytecode being built. bytecode: Vec, /// Constants collected during compilation. constants: Vec, /// Source location entries for traceback generation. location_table: Vec, /// Exception handler entries. exception_table: Vec, /// Current source location (set before emitting instructions). current_location: Option, /// Current focus location within the source range. current_focus: Option, /// Current stack depth for tracking max stack usage. current_stack_depth: u16, /// Maximum stack depth seen during compilation. max_stack_depth: u16, /// Local variable names indexed by slot number. /// /// Populated during compilation to enable proper NameError messages /// when accessing undefined local variables. local_names: Vec>, /// Local variable slots that are assigned somewhere in this function. /// /// Used to determine whether to raise `UnboundLocalError` or `NameError` /// when loading an undefined local variable. assigned_locals: HashSet, } impl CodeBuilder { /// Creates a new empty CodeBuilder. #[must_use] pub fn new() -> Self { Self::default() } /// Sets the current source location for subsequent instructions. /// /// This location will be recorded in the location table when the next /// instruction is emitted. Call this before emitting instructions that /// correspond to source code. pub fn set_location(&mut self, range: CodeRange, focus: Option) { self.current_location = Some(range); self.current_focus = focus; } /// Emits a no-operand instruction and updates stack depth tracking. pub fn emit(&mut self, op: Opcode) { self.record_location(); self.bytecode.push(op as u8); // Track stack effect for opcodes with known fixed effects if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } /// Emits an instruction with a u8 operand and updates stack depth tracking. pub fn emit_u8(&mut self, op: Opcode, operand: u8) { self.record_location(); self.bytecode.push(op as u8); self.bytecode.push(operand); // Track stack effect - some need operand-based calculation self.track_stack_effect_u8(op, operand); } /// Emits an instruction with an i8 operand and updates stack depth tracking. pub fn emit_i8(&mut self, op: Opcode, operand: i8) { self.record_location(); self.bytecode.push(op as u8); // Reinterpret i8 as u8 for bytecode encoding self.bytecode.push(operand.to_ne_bytes()[0]); // Track stack effect for opcodes with known fixed effects if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } /// Emits an instruction with two u8 operands and updates stack depth tracking. /// /// Used for UnpackEx: before_count (u8) + after_count (u8) pub fn emit_u8_u8(&mut self, op: Opcode, operand1: u8, operand2: u8) { self.record_location(); self.bytecode.push(op as u8); self.bytecode.push(operand1); self.bytecode.push(operand2); // UnpackEx: pops 1, pushes (before + 1 + after) = before + after + 1 // Net effect: before + after if op == Opcode::UnpackEx { self.adjust_stack(i16::from(operand1) + i16::from(operand2)); } else if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } /// Emits an instruction with a u16 operand (little-endian) and updates stack depth tracking. pub fn emit_u16(&mut self, op: Opcode, operand: u16) { self.record_location(); self.bytecode.push(op as u8); self.bytecode.extend_from_slice(&operand.to_le_bytes()); // Track stack effect - some need operand-based calculation self.track_stack_effect_u16(op, operand); } /// Emits an instruction with a u16 operand followed by a u8 operand. /// /// Used for MakeFunction: func_id (u16) + defaults_count (u8) /// Used for CallAttr: attr_name_id (u16) + arg_count (u8) pub fn emit_u16_u8(&mut self, op: Opcode, operand1: u16, operand2: u8) { self.record_location(); self.bytecode.push(op as u8); self.bytecode.extend_from_slice(&operand1.to_le_bytes()); self.bytecode.push(operand2); // Track stack effects based on opcode match op { Opcode::MakeFunction => { // pops defaults_count defaults, pushes function: 1 - defaults_count self.adjust_stack(1 - i16::from(operand2)); } Opcode::CallAttr => { // pops obj + args, pushes result: 1 - (1 + arg_count) = -arg_count self.adjust_stack(-i16::from(operand2)); } _ => { if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } } } /// Emits an instruction with a u16 operand followed by two u8 operands. /// /// Used for MakeClosure: func_id (u16) + defaults_count (u8) + cell_count (u8) pub fn emit_u16_u8_u8(&mut self, op: Opcode, operand1: u16, operand2: u8, operand3: u8) { self.record_location(); self.bytecode.push(op as u8); self.bytecode.extend_from_slice(&operand1.to_le_bytes()); self.bytecode.push(operand2); self.bytecode.push(operand3); // MakeClosure: pops defaults_count defaults, pushes closure // Cell values are captured from locals, not popped from stack // Stack effect: 1 - defaults_count if op == Opcode::MakeClosure { self.adjust_stack(1 - i16::from(operand2)); } else if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } /// Emits `CallBuiltinFunction` instruction. /// /// Operands: builtin_id (u8) + arg_count (u8) /// /// The builtin_id is the `#[repr(u8)]` discriminant of `BuiltinsFunctions`. /// This is an optimization that avoids constant pool lookup and stack manipulation. pub fn emit_call_builtin_function(&mut self, builtin_id: u8, arg_count: u8) { self.record_location(); self.bytecode.push(Opcode::CallBuiltinFunction as u8); self.bytecode.push(builtin_id); self.bytecode.push(arg_count); // CallBuiltinFunction: pops args, pushes result. No callable on stack. // Stack effect: 1 - arg_count self.adjust_stack(1 - i16::from(arg_count)); } /// Emits `CallBuiltinType` instruction. /// /// Operands: type_id (u8) + arg_count (u8) /// /// The type_id is the `#[repr(u8)]` discriminant of `BuiltinsTypes`. /// This is an optimization for type constructors like `list()`, `int()`, `str()`. pub fn emit_call_builtin_type(&mut self, type_id: u8, arg_count: u8) { self.record_location(); self.bytecode.push(Opcode::CallBuiltinType as u8); self.bytecode.push(type_id); self.bytecode.push(arg_count); // CallBuiltinType: pops args, pushes result. No callable on stack. // Stack effect: 1 - arg_count self.adjust_stack(1 - i16::from(arg_count)); } /// Emits CallFunctionKw with inline keyword names. /// /// Operands: pos_count (u8) + kw_count (u8) + kw_count * name_id (u16 each) /// /// The kwname_ids slice contains StringId indices for each keyword argument /// name, in order matching how the values were pushed to the stack. pub fn emit_call_function_kw(&mut self, pos_count: u8, kwname_ids: &[u16]) { self.record_location(); self.bytecode.push(Opcode::CallFunctionKw as u8); self.bytecode.push(pos_count); self.bytecode .push(u8::try_from(kwname_ids.len()).expect("keyword count exceeds u8")); for &name_id in kwname_ids { self.bytecode.extend_from_slice(&name_id.to_le_bytes()); } // CallFunctionKw: pops callable + pos_args + kw_args, pushes result // Stack effect: 1 - (1 + pos_count + kw_count) = -pos_count - kw_count let kw_count = i16::try_from(kwname_ids.len()).expect("keyword count exceeds i16"); let total_args = i16::from(pos_count) + kw_count; self.adjust_stack(-total_args); } /// Emits CallAttrKw with inline keyword names. /// /// Operands: attr_name_id (u16) + pos_count (u8) + kw_count (u8) + kw_count * name_id (u16 each) /// /// The kwname_ids slice contains StringId indices for each keyword argument /// name, in order matching how the values were pushed to the stack. pub fn emit_call_attr_kw(&mut self, attr_name_id: u16, pos_count: u8, kwname_ids: &[u16]) { self.record_location(); self.bytecode.push(Opcode::CallAttrKw as u8); self.bytecode.extend_from_slice(&attr_name_id.to_le_bytes()); self.bytecode.push(pos_count); self.bytecode .push(u8::try_from(kwname_ids.len()).expect("keyword count exceeds u8")); for &name_id in kwname_ids { self.bytecode.extend_from_slice(&name_id.to_le_bytes()); } // CallAttrKw: pops obj + pos_args + kw_args, pushes result // Stack effect: 1 - (1 + pos_count + kw_count) = -pos_count - kw_count let kw_count = i16::try_from(kwname_ids.len()).expect("keyword count exceeds i16"); let total_args = i16::from(pos_count) + kw_count; self.adjust_stack(-total_args); } /// Emits a forward jump instruction, returning a label to patch later. /// /// The jump offset is initially set to 0 and must be patched with /// `patch_jump()` once the target location is known. #[must_use] pub fn emit_jump(&mut self, op: Opcode) -> JumpLabel { self.record_location(); let label = JumpLabel(self.bytecode.len()); self.bytecode.push(op as u8); // Placeholder for i16 offset (will be patched) self.bytecode.extend_from_slice(&0i16.to_le_bytes()); // Track stack effect match op { // ForIter: when successful (not jumping), pushes next value (+1) // When exhausted (jumping), pops iterator (-1), but that's after loop Opcode::ForIter => self.adjust_stack(1), // JumpIfTrueOrPop/JumpIfFalseOrPop: pops when not jumping (fallthrough) Opcode::JumpIfTrueOrPop | Opcode::JumpIfFalseOrPop => self.adjust_stack(-1), _ => { if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } } label } /// Patches a forward jump to point to the current bytecode location. /// /// The offset is calculated relative to the position after the jump /// instruction's operand (i.e., where execution would continue if /// the jump is not taken). /// /// # Panics /// /// Panics if the jump offset exceeds i16 range (-32768..32767), which /// indicates the function is too large. This is a compile-time error /// rather than silent truncation. pub fn patch_jump(&mut self, label: JumpLabel) { let target = self.bytecode.len(); // Offset is relative to position after the jump instruction (opcode + i16 = 3 bytes) let target_i64 = i64::try_from(target).expect("bytecode target exceeds i64"); let label_i64 = i64::try_from(label.0).expect("bytecode label exceeds i64"); let raw_offset = target_i64 - label_i64 - 3; let offset = i16::try_from(raw_offset).expect("jump offset exceeds i16 range (-32768..32767); function too large"); let bytes = offset.to_le_bytes(); self.bytecode[label.0 + 1] = bytes[0]; self.bytecode[label.0 + 2] = bytes[1]; } /// Emits a backward jump to a known target offset. /// /// Unlike forward jumps, backward jumps have a known target at emit time, /// so no patching is needed. pub fn emit_jump_to(&mut self, op: Opcode, target: usize) { self.record_location(); let current = self.bytecode.len(); // Offset is relative to position after this instruction (current + 3) let target_i64 = i64::try_from(target).expect("bytecode target exceeds i64"); let current_i64 = i64::try_from(current).expect("bytecode offset exceeds i64"); let raw_offset = target_i64 - (current_i64 + 3); let offset = i16::try_from(raw_offset).expect("jump offset exceeds i16 range (-32768..32767); function too large"); self.bytecode.push(op as u8); self.bytecode.extend_from_slice(&offset.to_le_bytes()); // Track stack effect (jump instructions pop condition) if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); } } /// Returns the current bytecode offset. /// /// Use this to record loop start positions for backward jumps. #[must_use] pub fn current_offset(&self) -> usize { self.bytecode.len() } /// Emits `LoadLocal`, using specialized opcodes for slots 0-3. /// /// Slots 0-3 use zero-operand opcodes (`LoadLocal0`, etc.) for efficiency. /// Slots 4-255 use `LoadLocal` with a u8 operand. /// Slots 256+ use `LoadLocalW` with a u16 operand. /// Registers a local variable name for a given slot. /// /// This is called during compilation when we encounter a variable access. /// The name is used to generate proper NameError messages. pub fn register_local_name(&mut self, slot: u16, name: StringId) { let slot_idx = slot as usize; // Extend the vector if needed if slot_idx >= self.local_names.len() { self.local_names.resize(slot_idx + 1, None); } // Only set if not already set (first occurrence determines the name) if self.local_names[slot_idx].is_none() { self.local_names[slot_idx] = Some(name); } } /// Registers a local variable slot as "assigned" (vs undefined reference). /// /// Called during compilation for variables that are assigned somewhere in the function. /// Used at runtime to determine whether to raise `UnboundLocalError` (assigned local /// accessed before assignment) or `NameError` (name doesn't exist anywhere). pub fn register_assigned_local(&mut self, slot: u16) { self.assigned_locals.insert(slot); } /// Emits a `LoadLocal` instruction, using specialized variants for common slots. pub fn emit_load_local(&mut self, slot: u16) { match slot { 0 => self.emit(Opcode::LoadLocal0), 1 => self.emit(Opcode::LoadLocal1), 2 => self.emit(Opcode::LoadLocal2), 3 => self.emit(Opcode::LoadLocal3), _ => { if let Ok(s) = u8::try_from(slot) { self.emit_u8(Opcode::LoadLocal, s); } else { self.emit_u16(Opcode::LoadLocalW, slot); } } } } /// Emits a `LoadLocalCallable` instruction for call-context loads. /// /// Unlike `emit_load_local`, this does NOT use specialized 0-3 variants since /// external function calls are rare enough that the optimization isn't worth /// the extra opcode slots. The `name_id` is encoded directly in the operand /// to avoid needing to look up the name from the code's local_names array. pub fn emit_load_local_callable(&mut self, slot: u16, name_id: StringId) { let name_id_u16 = u16::try_from(name_id.index()).expect("name_id exceeds u16"); if let Ok(s) = u8::try_from(slot) { // Emit LoadLocalCallable with u8 slot + u16 name_id self.record_location(); self.bytecode.push(Opcode::LoadLocalCallable as u8); self.bytecode.push(s); self.bytecode.extend_from_slice(&name_id_u16.to_le_bytes()); self.adjust_stack(1); } else { // Emit LoadLocalCallableW with u16 slot + u16 name_id self.record_location(); self.bytecode.push(Opcode::LoadLocalCallableW as u8); self.bytecode.extend_from_slice(&slot.to_le_bytes()); self.bytecode.extend_from_slice(&name_id_u16.to_le_bytes()); self.adjust_stack(1); } } /// Emits a `LoadGlobalCallable` instruction for call-context loads. /// /// The `name_id` is encoded directly in the operand to avoid the ambiguity /// of looking up global names from a function's local_names array (global slots /// and local slots use different namespaces). pub fn emit_load_global_callable(&mut self, slot: u16, name_id: StringId) { let name_id_u16 = u16::try_from(name_id.index()).expect("name_id exceeds u16"); self.record_location(); self.bytecode.push(Opcode::LoadGlobalCallable as u8); self.bytecode.extend_from_slice(&slot.to_le_bytes()); self.bytecode.extend_from_slice(&name_id_u16.to_le_bytes()); self.adjust_stack(1); } /// Emits `StoreLocal`, using wide variant for slots > 255. pub fn emit_store_local(&mut self, slot: u16) { if let Ok(s) = u8::try_from(slot) { self.emit_u8(Opcode::StoreLocal, s); } else { self.emit_u16(Opcode::StoreLocalW, slot); } } /// Adds a constant to the pool, returning its index. /// /// # Panics /// /// Panics if the constant pool exceeds 65535 entries. This is a compile-time /// error indicating the function has too many constants. #[must_use] pub fn add_const(&mut self, value: Value) -> u16 { let idx = self.constants.len(); let idx_u16 = u16::try_from(idx).expect("constant pool exceeds u16 range (65535); too many constants"); self.constants.push(value); idx_u16 } /// Adds an exception handler entry. /// /// Entries should be added in innermost-first order for nested try blocks. pub fn add_exception_entry(&mut self, entry: ExceptionEntry) { self.exception_table.push(entry); } /// Returns the current tracked stack depth. #[must_use] pub fn stack_depth(&self) -> u16 { self.current_stack_depth } /// Builds the final Code object. /// /// Consumes the builder and returns a Code object containing the /// compiled bytecode and all metadata. #[must_use] pub fn build(self, num_locals: u16) -> Code { // Convert local_names from Vec> to Vec, // using StringId::default() for slots with no recorded name let local_names: Vec = self.local_names.into_iter().map(Option::unwrap_or_default).collect(); Code::new( self.bytecode, ConstPool::from_vec(self.constants), self.location_table, self.exception_table, num_locals, self.max_stack_depth, local_names, self.assigned_locals, ) } /// Records the current location in the location table if set. fn record_location(&mut self) { if let Some(range) = self.current_location { let offset = u32::try_from(self.bytecode.len()).expect("bytecode length exceeds u32"); self.location_table .push(LocationEntry::new(offset, range, self.current_focus)); } } /// Sets the current stack depth to an absolute value. /// /// Used when compiling code paths that branch and reconverge with different /// stack states (e.g., break/continue through finally blocks). /// Updates `max_stack_depth` if the new depth exceeds it. pub fn set_stack_depth(&mut self, depth: u16) { self.current_stack_depth = depth; self.max_stack_depth = self.max_stack_depth.max(depth); } /// Adjusts the stack depth by the given delta. /// /// Positive values indicate pushes, negative values indicate pops. /// Updates `max_stack_depth` if the new depth exceeds it. fn adjust_stack(&mut self, delta: i16) { let new_depth = i32::from(self.current_stack_depth) + i32::from(delta); // Stack depth shouldn't go negative (indicates compiler bug) debug_assert!(new_depth >= 0, "Stack depth went negative: {new_depth}"); // Safe cast: new_depth is non-negative and stack won't exceed u16::MAX in practice self.current_stack_depth = u16::try_from(new_depth.max(0)).unwrap_or(u16::MAX); self.max_stack_depth = self.max_stack_depth.max(self.current_stack_depth); } /// Tracks stack effect for opcodes with u8 operand. /// /// For opcodes with variable effects (like `CallFunction`, `BuildList`), /// calculates the effect based on the operand. fn track_stack_effect_u8(&mut self, op: Opcode, operand: u8) { let effect: i16 = match op { // CallFunction pops (callable + args), pushes result: -(1 + arg_count) + 1 = -arg_count Opcode::CallFunction => -i16::from(operand), // UnpackSequence pops 1, pushes n: n - 1 Opcode::UnpackSequence => i16::from(operand) - 1, // ListAppend/SetAdd pop value: -1 (depth operand doesn't affect stack count) Opcode::ListAppend | Opcode::SetAdd => -1, // DictSetItem pops key and value: -2 Opcode::DictSetItem => -2, // Default: use fixed effect if available _ => op.stack_effect().unwrap_or(0), }; self.adjust_stack(effect); } /// Tracks stack effect for opcodes with u16 operand. /// /// For opcodes with variable effects (like `BuildList`, `BuildTuple`), /// calculates the effect based on the operand. fn track_stack_effect_u16(&mut self, op: Opcode, operand: u16) { // Safe cast: operand won't exceed i16::MAX in practice (would be a huge list) let operand_i16 = operand.cast_signed(); let effect: i16 = match op { // BuildList/BuildTuple/BuildSet: pop n, push 1: -(n - 1) = 1 - n Opcode::BuildList | Opcode::BuildTuple | Opcode::BuildSet => 1 - operand_i16, // BuildDict: pop 2n (key-value pairs), push 1: 1 - 2n Opcode::BuildDict => 1 - 2 * operand_i16, // BuildFString: pop n parts, push 1: 1 - n Opcode::BuildFString => 1 - operand_i16, // Default: use fixed effect if available _ => op.stack_effect().unwrap_or(0), }; self.adjust_stack(effect); } /// Manually adjust stack depth for complex scenarios. /// /// Use this when the compiler knows the exact stack effect that can't /// be determined from the opcode alone (e.g., exception handlers pushing /// an exception value). pub fn adjust_stack_depth(&mut self, delta: i16) { self.adjust_stack(delta); } } /// Label for a forward jump that needs patching. /// /// Stores the bytecode offset where the jump instruction was emitted. /// Pass this to `patch_jump()` once the target location is known. #[derive(Debug, Clone, Copy)] pub struct JumpLabel(usize); #[cfg(test)] mod tests { use super::*; #[test] fn test_emit_basic() { let mut builder = CodeBuilder::new(); builder.emit(Opcode::LoadNone); builder.emit(Opcode::Pop); let code = builder.build(0); assert_eq!(code.bytecode(), &[Opcode::LoadNone as u8, Opcode::Pop as u8]); } #[test] fn test_emit_u8_operand() { let mut builder = CodeBuilder::new(); builder.emit_u8(Opcode::LoadLocal, 42); let code = builder.build(0); assert_eq!(code.bytecode(), &[Opcode::LoadLocal as u8, 42]); } #[test] fn test_emit_u16_operand() { let mut builder = CodeBuilder::new(); builder.emit_u16(Opcode::LoadConst, 0x1234); let code = builder.build(0); assert_eq!(code.bytecode(), &[Opcode::LoadConst as u8, 0x34, 0x12]); } #[test] fn test_forward_jump() { let mut builder = CodeBuilder::new(); let jump = builder.emit_jump(Opcode::Jump); builder.emit(Opcode::LoadNone); // 1 byte, skipped by jump builder.emit(Opcode::LoadNone); // 1 byte, skipped by jump builder.patch_jump(jump); builder.emit(Opcode::LoadNone); // Return value builder.emit(Opcode::ReturnValue); let code = builder.build(0); // Jump at offset 0, target at offset 5 (after 2x LoadNone) // Offset = 5 - 0 - 3 = 2 assert_eq!( code.bytecode(), &[ Opcode::Jump as u8, 2, 0, // i16 little-endian = 2 Opcode::LoadNone as u8, Opcode::LoadNone as u8, Opcode::LoadNone as u8, Opcode::ReturnValue as u8, ] ); } #[test] fn test_backward_jump() { let mut builder = CodeBuilder::new(); let loop_start = builder.current_offset(); builder.emit(Opcode::LoadNone); // offset 0, 1 byte builder.emit(Opcode::Pop); // offset 1, 1 byte builder.emit_jump_to(Opcode::Jump, loop_start); // offset 2, target 0 let code = builder.build(0); // Jump at offset 2, target at offset 0 // Offset = 0 - (2 + 3) = -5 let expected_offset = (-5i16).to_le_bytes(); assert_eq!( code.bytecode(), &[ Opcode::LoadNone as u8, Opcode::Pop as u8, Opcode::Jump as u8, expected_offset[0], expected_offset[1], ] ); } #[test] fn test_load_local_specialization() { let mut builder = CodeBuilder::new(); builder.emit_load_local(0); builder.emit_load_local(1); builder.emit_load_local(2); builder.emit_load_local(3); builder.emit_load_local(4); builder.emit_load_local(256); let code = builder.build(0); assert_eq!( code.bytecode(), &[ Opcode::LoadLocal0 as u8, Opcode::LoadLocal1 as u8, Opcode::LoadLocal2 as u8, Opcode::LoadLocal3 as u8, Opcode::LoadLocal as u8, 4, Opcode::LoadLocalW as u8, 0, 1, // 256 in little-endian ] ); } #[test] fn test_add_const() { let mut builder = CodeBuilder::new(); let idx1 = builder.add_const(Value::Int(42)); let idx2 = builder.add_const(Value::None); assert_eq!(idx1, 0); assert_eq!(idx2, 1); } } ================================================ FILE: crates/monty/src/bytecode/code.rs ================================================ //! Code object containing compiled bytecode and metadata. //! //! A `Code` object represents a compiled function or module. It contains the raw //! bytecode instructions, a constant pool, source location information for tracebacks, //! and an exception handler table. use std::collections::HashSet; use crate::{intern::StringId, parse::CodeRange, value::Value}; /// Compiled bytecode for a function or module. /// /// This is the output of the bytecode compiler and the input to the VM. /// Each function has its own Code object; module-level code also gets one. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Code { /// Raw bytecode instructions as a byte vector. /// /// Opcodes are 1 byte each, followed by their operands (0-3 bytes depending /// on the instruction). The variable-width encoding gives better cache locality /// than fixed-width alternatives. bytecode: Vec, /// Constant pool for this code object. /// /// Values referenced by `LoadConst` instructions. Includes numbers, strings /// (as `Value::InternString`), and other literal values. constants: ConstPool, /// Source location table for tracebacks. /// /// Maps bytecode offsets to source locations. Used to generate Python-style /// tracebacks with line numbers and caret markers when exceptions occur. location_table: Vec, /// Exception handler table. /// /// Maps protected bytecode ranges to their exception handlers. Consulted when /// an exception is raised to find the appropriate handler. Entries are ordered /// innermost-first for nested try blocks. exception_table: Vec, /// Number of local variables (namespace slots needed). /// /// Used to pre-allocate the namespace when entering this code. num_locals: u16, /// Maximum stack depth needed during execution. /// /// Used as a hint for pre-allocating the operand stack. Computed during /// compilation by tracking push/pop operations. stack_size: u16, /// Local variable names for error messages. /// /// Maps slot indices to variable names. Used to generate proper NameError /// messages when accessing undefined local variables (e.g., "name 'x' is not defined"). local_names: Vec, /// Local variable slots that are assigned somewhere in this function. /// /// Used to determine whether to raise `UnboundLocalError` (slot is assigned somewhere /// but accessed before assignment) or `NameError` (name doesn't exist in any scope). assigned_locals: HashSet, } impl Code { /// Creates a new Code object with all components. /// /// This is typically called by `CodeBuilder::build()` after compilation. #[must_use] #[expect(clippy::too_many_arguments)] pub fn new( bytecode: Vec, constants: ConstPool, location_table: Vec, exception_table: Vec, num_locals: u16, stack_size: u16, local_names: Vec, assigned_locals: HashSet, ) -> Self { Self { bytecode, constants, location_table, exception_table, num_locals, stack_size, local_names, assigned_locals, } } /// Returns the raw bytecode bytes. #[must_use] pub fn bytecode(&self) -> &[u8] { &self.bytecode } /// Returns the constant pool. #[must_use] pub fn constants(&self) -> &ConstPool { &self.constants } /// Returns the local variable name for a given slot index. /// /// Used to generate proper NameError messages when accessing undefined locals. #[must_use] pub fn local_name(&self, slot: u16) -> Option { self.local_names.get(slot as usize).copied() } /// Returns whether the slot is an assigned local (vs an undefined reference). /// /// Used to determine whether to raise `UnboundLocalError` (true) or `NameError` (false) /// when loading an undefined local variable. #[must_use] pub fn is_assigned_local(&self, slot: u16) -> bool { self.assigned_locals.contains(&slot) } /// Finds the location entry for a given bytecode offset. /// /// Location entries are recorded at instruction boundaries. This method finds /// the most recent entry at or before the given offset. /// /// Returns `None` if the location table is empty or the offset is before /// the first recorded location. #[must_use] pub fn location_for_offset(&self, offset: usize) -> Option<&LocationEntry> { // Location entries are in order by bytecode offset. // Find the last entry where bytecode_offset <= offset. let offset_u32 = u32::try_from(offset).expect("bytecode offset exceeds u32"); self.location_table .iter() .rev() .find(|entry| entry.bytecode_offset <= offset_u32) } /// Finds an exception handler for the given bytecode offset. /// /// Searches the exception table for an entry whose protected range contains /// the given offset. Returns the first (innermost) matching handler, since /// entries are ordered innermost-first for nested try blocks. /// /// Returns `None` if no handler covers this offset. #[must_use] pub fn find_exception_handler(&self, offset: u32) -> Option<&ExceptionEntry> { self.exception_table.iter().find(|entry| entry.contains(offset)) } } /// TODO remove, this doesn't add any value /// Constant pool for a code object. /// /// Stores literal values referenced by `LoadConst` instructions. Strings are stored /// as `Value::InternString(StringId)` pointing to the global `Interns` table, not /// duplicated here. At runtime, constants are loaded via `clone_with_heap()` to /// handle reference counting properly. #[derive(Debug, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct ConstPool { /// The constant values, indexed by the operand of `LoadConst`. values: Vec, } impl Clone for ConstPool { fn clone(&self) -> Self { let values = self.values.iter().map(Value::clone_immediate).collect(); Self { values } } } impl ConstPool { /// Creates a constant pool from a vector of values. #[must_use] pub fn from_vec(values: Vec) -> Self { Self { values } } /// Returns the constant at the given index. /// /// # Panics /// /// Panics if the index is out of bounds. This should never happen with /// valid bytecode since indices come from the compiler. #[must_use] pub fn get(&self, index: u16) -> &Value { &self.values[index as usize] } } /// Source location for a bytecode instruction, used for tracebacks. /// /// Python 3.11+ tracebacks show carets under the relevant expression: /// ```text /// File "test.py", line 2, in foo /// return a + b + c /// ~~^~~ /// ``` /// /// The `range` covers the full expression (`a + b`), while `focus` points /// to the specific operator (`+`) that caused the error. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LocationEntry { /// Bytecode offset this entry applies to. /// /// The entry applies from this offset until the next entry's offset /// (or end of bytecode). bytecode_offset: u32, /// Full source range of the expression (for the underline). range: CodeRange, /// Optional focus point within the range (for the ^ caret). /// /// If None, the entire range is underlined without a focus caret. /// This can be populated later for Python 3.11-style focused tracebacks. focus: Option, } impl LocationEntry { /// Creates a new location entry. #[must_use] pub fn new(bytecode_offset: u32, range: CodeRange, focus: Option) -> Self { Self { bytecode_offset, range, focus, } } /// Returns the full source range. #[must_use] pub fn range(&self) -> CodeRange { self.range } } /// Entry in the exception table - maps a protected bytecode range to its handler. /// /// Instead of maintaining a runtime stack of handlers (push/pop during execution), /// we use a static table that's consulted when an exception is raised. This is /// simpler and matches CPython 3.11+'s approach. /// /// For nested try blocks, multiple entries may cover the same bytecode offset. /// Entries are ordered innermost-first, so the VM uses the first matching entry. /// /// # Example /// /// For `try: x = bar(); y = baz() except ValueError as e: print(e)`: /// ```text /// 0: LOAD_GLOBAL 'bar' /// 4: CALL_FUNCTION 0 /// 8: STORE_LOCAL 'x' /// ... /// 24: JUMP 50 # skip handler if no exception /// 30: # exception handler starts here /// ``` /// Entry: `{ start: 0, end: 24, handler: 30, stack_depth: 0 }` #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub struct ExceptionEntry { /// Start of protected bytecode range (inclusive). start: u32, /// End of protected bytecode range (exclusive). end: u32, /// Bytecode offset of the exception handler. handler: u32, /// Stack depth when entering the try block. /// /// Used to unwind the operand stack before jumping to handler. /// The VM pops values until the stack reaches this depth, then /// pushes the exception value. stack_depth: u16, } impl ExceptionEntry { /// Creates a new exception table entry. #[must_use] pub fn new(start: u32, end: u32, handler: u32, stack_depth: u16) -> Self { Self { start, end, handler, stack_depth, } } /// Returns the handler bytecode offset. #[must_use] pub fn handler(&self) -> u32 { self.handler } /// Returns the stack depth to unwind to. #[must_use] pub fn stack_depth(&self) -> u16 { self.stack_depth } /// Returns true if the given bytecode offset is within this entry's protected range. #[must_use] pub fn contains(&self, offset: u32) -> bool { offset >= self.start && offset < self.end } } ================================================ FILE: crates/monty/src/bytecode/compiler.rs ================================================ //! Bytecode compiler for transforming AST to bytecode. //! //! The compiler traverses the prepared AST (`PreparedNode` and `Expr` types from `expressions.rs`) //! and emits bytecode instructions using `CodeBuilder`. It handles variable scoping, //! control flow, and expression evaluation order following Python semantics. //! //! Functions are compiled recursively: when a `PreparedFunctionDef` is encountered, //! its body is compiled to bytecode and a `Function` struct is created. All compiled //! functions are collected and returned along with the module code. use std::borrow::Cow; use super::{ builder::{CodeBuilder, JumpLabel}, code::{Code, ExceptionEntry}, op::Opcode, }; use crate::{ args::{ArgExprs, CallArg, CallKwarg, Kwarg}, builtins::Builtins, exception_private::ExcType, exception_public::{MontyException, StackFrame}, expressions::{ Callable, CmpOperator, Comprehension, DictItem, Expr, ExprLoc, Identifier, Literal, NameScope, Node, Operator, PreparedFunctionDef, PreparedNode, SequenceItem, UnpackTarget, }, fstring::{ConversionFlag, FStringPart, FormatSpec, ParsedFormatSpec, encode_format_spec}, function::Function, intern::{Interns, StringId}, modules::BuiltinModule, parse::{CodeRange, ExceptHandler, Try}, value::{EitherStr, Value}, }; /// Maximum number of arguments allowed in a function call. /// /// This limit comes from the bytecode format: `CallFunction` and `CallAttr` /// use a u8 operand for the argument count, so max 255. Python itself has no /// such limit but we need one for our bytecode encoding. const MAX_CALL_ARGS: usize = 255; /// Compiles prepared AST nodes to bytecode. /// /// The compiler traverses the AST and emits bytecode instructions using /// `CodeBuilder`. It handles variable scoping, control flow, and expression /// evaluation order following Python semantics. /// /// Functions are compiled recursively and collected in the `functions` vector. /// When a `PreparedFunctionDef` is encountered, its body is compiled first, /// creating a `Function` struct that is added to the vector. The index of the /// function in this vector becomes the operand for MakeFunction/MakeClosure opcodes. pub struct Compiler<'a> { /// Current code being built. code: CodeBuilder, /// Reference to interns for string/function lookups. interns: &'a Interns, /// Compiled functions, indexed by their position in this vector. /// /// Functions are added in the order they are encountered during compilation. /// Nested functions are compiled before their containing function's code /// finishes, so inner functions have lower indices. functions: Vec, /// Loop stack for break/continue handling. /// Each entry tracks the loop start offset and pending break jumps. loop_stack: Vec, /// Stack of finally targets for handling returns inside try-finally. /// /// When a return statement is compiled inside a try-finally block, instead /// of immediately returning, we store the return value and jump to the /// finally block. The finally block will then execute the return. finally_targets: Vec, /// Tracks nesting depth inside exception handlers. /// /// When break/continue/return is inside an except handler, we need to /// clear the current exception (`ClearException`) and pop the exception /// value from the stack before jumping to the finally path or loop target. except_handler_depth: usize, /// Whether the compiler is currently compiling module-level code. /// /// At module level, `Local` and `LocalUnassigned` scopes map to global opcodes /// (`LoadGlobal`/`StoreGlobal`/`DeleteGlobal`) because module locals live in the /// globals array. In function bodies this is `false` and these scopes use local /// opcodes that index into the stack. is_module_scope: bool, } /// Information about a loop for break/continue handling. /// /// Tracks the bytecode locations needed for compiling break and continue statements: /// - `start`: where continue should jump to (the ForIter instruction for `for` loops, /// or condition evaluation for `while` loops) /// - `break_jumps`: pending jumps from break statements that need to be patched /// to jump past the loop's else block /// - `has_iterator_on_stack`: whether this loop has an iterator on the stack that /// needs to be popped on break (true for `for` loops, false for `while` loops) struct LoopInfo { /// Bytecode offset of loop start (for continue). start: usize, /// Jump labels that need patching to loop end (for break). break_jumps: Vec, /// Whether this loop has an iterator on the stack. /// True for `for` loops, false for `while` loops. has_iterator_on_stack: bool, } /// A break or continue that needs to go through a finally block. /// /// When break/continue is inside a try-finally, we need to run the finally block /// before executing the break/continue. This struct tracks the jump and which /// loop it targets. struct BreakContinueThruFinally { /// The jump instruction that needs to be patched. jump: JumpLabel, /// The loop depth (index in loop_stack) being targeted. target_loop_depth: usize, } /// Tracks a finally block for handling returns/break/continue inside try-finally. /// /// When compiling a try-finally, we push a `FinallyTarget` to track jumps /// from return/break/continue statements that need to go through the finally block. struct FinallyTarget { /// Jump labels for returns inside the try block that need to go to finally. return_jumps: Vec, /// Break statements that need to go through this finally block. break_jumps: Vec, /// Continue statements that need to go through this finally block. continue_jumps: Vec, /// The loop depth when this finally was entered. /// Used to determine if break/continue targets a loop outside this finally. loop_depth_at_entry: usize, } /// Result of module compilation: the module code and all compiled functions. pub struct CompileResult { /// The compiled module code. pub code: Code, /// All functions compiled during module compilation, indexed by their function ID. pub functions: Vec, } impl<'a> Compiler<'a> { /// Creates a new compiler with access to the string interner. fn new(interns: &'a Interns, functions: Vec) -> Self { Self { code: CodeBuilder::new(), interns, functions, loop_stack: Vec::new(), finally_targets: Vec::new(), except_handler_depth: 0, is_module_scope: false, } } /// Compiles module-level code (a sequence of statements). /// /// Returns the compiled module Code and all compiled Functions, or a compile /// error if limits were exceeded. The module implicitly returns the value /// of the last expression, or None if empty. pub fn compile_module( nodes: &[PreparedNode], interns: &Interns, num_locals: u16, ) -> Result { Self::compile_module_with_functions(nodes, interns, num_locals, Vec::new()) } /// Compiles module-level code while preserving an existing function table prefix. /// /// This is used by incremental REPL compilation so previously created /// `FunctionId`s remain stable: new function IDs are allocated after /// `existing_functions.len()`. pub fn compile_module_with_functions( nodes: &[PreparedNode], interns: &Interns, num_locals: u16, existing_functions: Vec, ) -> Result { let mut compiler = Compiler::new(interns, Vec::new()); compiler.functions = existing_functions; compiler.is_module_scope = true; compiler.compile_block(nodes)?; // Module returns None if no explicit return compiler.code.emit(Opcode::LoadNone); compiler.code.emit(Opcode::ReturnValue); Ok(CompileResult { code: compiler.code.build(num_locals), functions: compiler.functions, }) } /// Compiles a function body to bytecode, returning the Code and any nested functions. /// /// Used internally when compiling function definitions. The function body is /// compiled to bytecode with an implicit `return None` at the end if there's /// no explicit return statement. /// /// The `functions` parameter receives any previously compiled functions, and /// any nested functions found in the body will be added to it. fn compile_function_body( body: &[PreparedNode], interns: &Interns, functions: Vec, num_locals: u16, ) -> Result<(Code, Vec), CompileError> { let mut compiler = Compiler::new(interns, functions); compiler.compile_block(body)?; // Implicit return None if no explicit return compiler.code.emit(Opcode::LoadNone); compiler.code.emit(Opcode::ReturnValue); Ok((compiler.code.build(num_locals), compiler.functions)) } /// Compiles a block of statements. fn compile_block(&mut self, nodes: &[PreparedNode]) -> Result<(), CompileError> { for node in nodes { self.compile_stmt(node)?; } Ok(()) } // ======================================================================== // Statement Compilation // ======================================================================== /// Compiles a single statement. fn compile_stmt(&mut self, node: &PreparedNode) -> Result<(), CompileError> { // Node is an alias, use qualified path for matching match node { Node::Expr(expr) => { self.compile_expr(expr)?; self.code.emit(Opcode::Pop); // Discard result } Node::Return(expr) => { self.compile_expr(expr)?; self.compile_return(); } Node::ReturnNone => { self.code.emit(Opcode::LoadNone); self.compile_return(); } Node::Assign { target, object } => { self.compile_expr(object)?; self.compile_store(target); } Node::UnpackAssign { targets, targets_position, object, } => { self.compile_expr(object)?; // Check if there's a starred target let star_idx = targets.iter().position(|t| matches!(t, UnpackTarget::Starred(_))); // Set location to targets for proper caret in tracebacks self.code.set_location(*targets_position, None); if let Some(star_idx) = star_idx { // Has starred target - use UnpackEx let before = u8::try_from(star_idx).expect("too many targets before star"); let after = u8::try_from(targets.len() - star_idx - 1).expect("too many targets after star"); self.code.emit_u8_u8(Opcode::UnpackEx, before, after); } else { // No starred target - use UnpackSequence let count = u8::try_from(targets.len()).expect("too many targets in unpack"); self.code.emit_u8(Opcode::UnpackSequence, count); } // After UnpackSequence/UnpackEx, values are on stack with first item on top // Store them in order (first target gets first item), handling nesting for target in targets { self.compile_unpack_target(target); } } Node::OpAssign { target, op, object } => { let Some(opcode) = operator_to_inplace_opcode(op) else { return Err(CompileError::new( "matrix multiplication augmented assignment (@=) is not yet supported", target.position, )); }; self.compile_name(target); self.compile_expr(object)?; self.code.emit(opcode); self.compile_store(target); } Node::SubscriptOpAssign { target, index, op, object, target_position, } => { let Some(opcode) = operator_to_inplace_opcode(op) else { return Err(CompileError::new( "matrix multiplication augmented assignment (@=) is not yet supported", *target_position, )); }; self.compile_name(target); self.compile_expr(index)?; self.code.emit(Opcode::Dup2); self.code.set_location(*target_position, None); self.code.emit(Opcode::BinarySubscr); self.compile_expr(object)?; self.code.emit(opcode); self.code.emit(Opcode::Rot3); self.code.set_location(*target_position, None); self.code.emit(Opcode::StoreSubscr); } Node::SubscriptAssign { target, index, value, target_position, } => { // Stack order for StoreSubscr: value, obj, index self.compile_expr(value)?; self.compile_name(target); self.compile_expr(index)?; // Set location to the target (e.g., `lst[10]`) for proper caret in tracebacks self.code.set_location(*target_position, None); self.code.emit(Opcode::StoreSubscr); } Node::AttrAssign { object, attr, target_position, value, } => { // Stack order for StoreAttr: value, obj self.compile_expr(value)?; self.compile_expr(object)?; let name_id = attr.string_id().expect("StoreAttr requires interned attr name"); // Set location to the target (e.g., `x.foo`) for proper caret in tracebacks self.code.set_location(*target_position, None); self.code.emit_u16( Opcode::StoreAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), ); } Node::If { test, body, or_else } => self.compile_if(test, body, or_else)?, Node::For { target, iter, body, or_else, } => self.compile_for(target, iter, body, or_else)?, Node::While { test, body, or_else } => self.compile_while(test, body, or_else)?, Node::Assert { test, msg } => self.compile_assert(test, msg.as_ref())?, Node::Raise(expr) => { if let Some(exc) = expr { self.compile_expr(exc)?; self.code.emit(Opcode::Raise); } else { self.code.emit(Opcode::Reraise); } } Node::FunctionDef(func_def) => self.compile_function_def(func_def)?, Node::Try(try_block) => self.compile_try(try_block)?, Node::Import { module_name, binding } => self.compile_import(*module_name, binding), Node::ImportFrom { module_name, names, position, } => self.compile_import_from(*module_name, names, *position), Node::Break { position } => self.compile_break(*position)?, Node::Continue { position } => self.compile_continue(*position)?, // These are handled during the prepare phase and produce no bytecode Node::Pass | Node::Global { .. } | Node::Nonlocal { .. } => {} } Ok(()) } /// Compiles a function definition. /// /// This involves: /// 1. Recursively compiling the function body to bytecode /// 2. Creating a Function struct with the compiled Code /// 3. Adding the Function to the compiler's functions vector /// 4. Emitting bytecode to evaluate defaults and create the function at runtime fn compile_function_def(&mut self, func_def: &PreparedFunctionDef) -> Result<(), CompileError> { let func_pos = func_def.name.position; // Check bytecode operand limits if func_def.default_exprs.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} default parameter values"), func_pos, )); } if func_def.free_var_enclosing_slots.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} closure variables"), func_pos, )); } // 1. Compile the function body recursively // Take ownership of functions for the recursive compile, then restore let functions = std::mem::take(&mut self.functions); let namespace_size = u16::try_from(func_def.namespace_size).expect("function namespace size exceeds u16"); let (body_code, mut functions) = Self::compile_function_body(&func_def.body, self.interns, functions, namespace_size)?; // 2. Create the compiled Function and add to the vector let func_id = functions.len(); let function = Function::new( func_def.name, func_def.signature.clone(), func_def.namespace_size, func_def.free_var_enclosing_slots.clone(), func_def.cell_var_count, func_def.cell_param_indices.clone(), func_def.default_exprs.len(), func_def.is_async, body_code, ); functions.push(function); // Restore functions to self self.functions = functions; // 3. Compile and push default values (evaluated at definition time) for default_expr in &func_def.default_exprs { self.compile_expr(default_expr)?; } let defaults_count = u8::try_from(func_def.default_exprs.len()).expect("function default argument count exceeds u8"); let func_id_u16 = u16::try_from(func_id).expect("function count exceeds u16"); // 4. Emit MakeFunction or MakeClosure (if has free vars) if func_def.free_var_enclosing_slots.is_empty() { // MakeFunction: func_id (u16) + defaults_count (u8) self.code.emit_u16_u8(Opcode::MakeFunction, func_id_u16, defaults_count); } else { // Push captured cells from enclosing scope for &slot in &func_def.free_var_enclosing_slots { // Load the cell reference from the enclosing namespace let slot_u16 = u16::try_from(slot.index()).expect("closure slot index exceeds u16"); self.code.emit_load_local(slot_u16); } let cell_count = u8::try_from(func_def.free_var_enclosing_slots.len()).expect("closure cell count exceeds u8"); // MakeClosure: func_id (u16) + defaults_count (u8) + cell_count (u8) self.code .emit_u16_u8_u8(Opcode::MakeClosure, func_id_u16, defaults_count, cell_count); } // 5. Store the function object to its name slot self.compile_store(&func_def.name); Ok(()) } /// Compiles a lambda expression. /// /// This is similar to `compile_function_def` but: /// - Does NOT store the function to a name slot (it stays on the stack as an expression result) /// /// The lambda's `PreparedFunctionDef` already has `` as its name. fn compile_lambda(&mut self, func_def: &PreparedFunctionDef) -> Result<(), CompileError> { let func_pos = func_def.name.position; // Check bytecode operand limits if func_def.default_exprs.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} default parameter values"), func_pos, )); } if func_def.free_var_enclosing_slots.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} closure variables"), func_pos, )); } // 1. Compile the function body recursively let functions = std::mem::take(&mut self.functions); let namespace_size = u16::try_from(func_def.namespace_size).expect("function namespace size exceeds u16"); let (body_code, mut functions) = Self::compile_function_body(&func_def.body, self.interns, functions, namespace_size)?; // 2. Create the compiled Function and add to the vector let func_id = functions.len(); let function = Function::new( func_def.name, func_def.signature.clone(), func_def.namespace_size, func_def.free_var_enclosing_slots.clone(), func_def.cell_var_count, func_def.cell_param_indices.clone(), func_def.default_exprs.len(), func_def.is_async, body_code, ); functions.push(function); // Restore functions to self self.functions = functions; // 3. Compile and push default values (evaluated at definition time) for default_expr in &func_def.default_exprs { self.compile_expr(default_expr)?; } let defaults_count = u8::try_from(func_def.default_exprs.len()).expect("function default argument count exceeds u8"); let func_id_u16 = u16::try_from(func_id).expect("function count exceeds u16"); // 4. Emit MakeFunction or MakeClosure (if has free vars) if func_def.free_var_enclosing_slots.is_empty() { // MakeFunction: func_id (u16) + defaults_count (u8) self.code.emit_u16_u8(Opcode::MakeFunction, func_id_u16, defaults_count); } else { // Push captured cells from enclosing scope for &slot in &func_def.free_var_enclosing_slots { let slot_u16 = u16::try_from(slot.index()).expect("closure slot index exceeds u16"); self.code.emit_load_local(slot_u16); } let cell_count = u8::try_from(func_def.free_var_enclosing_slots.len()).expect("closure cell count exceeds u8"); // MakeClosure: func_id (u16) + defaults_count (u8) + cell_count (u8) self.code .emit_u16_u8_u8(Opcode::MakeClosure, func_id_u16, defaults_count, cell_count); } // NOTE: Unlike compile_function_def, we do NOT call compile_store here. // The function object stays on the stack as an expression result. Ok(()) } /// Compiles an import statement. /// /// Emits `LoadModule` to create the module, then stores it to the binding name. /// If the module is unknown, emits `RaiseImportError` to defer the error to runtime. /// This allows imports inside `if TYPE_CHECKING:` blocks to compile successfully. fn compile_import(&mut self, module_name: StringId, binding: &Identifier) { let position = binding.position; self.code.set_location(position, None); // Look up the module by name if let Some(builtin_module) = BuiltinModule::from_string_id(module_name) { // Known module - emit LoadModule self.code.emit_u8(Opcode::LoadModule, builtin_module as u8); // Store to the binding (respects Local/Global/Cell scope) self.compile_store(binding); } else { // Unknown module - defer error to runtime with RaiseImportError // This allows TYPE_CHECKING imports to compile without error let name_const = self.code.add_const(Value::InternString(module_name)); self.code.emit_u16(Opcode::RaiseImportError, name_const); } } /// Compiles a `from module import name, ...` statement. /// /// Creates the module once, then loads each attribute and stores to the binding. /// Invalid attribute names will raise `AttributeError` at runtime. /// If the module is unknown, emits `RaiseImportError` to defer the error to runtime. /// This allows imports inside `if TYPE_CHECKING:` blocks to compile successfully. fn compile_import_from(&mut self, module_name: StringId, names: &[(StringId, Identifier)], position: CodeRange) { self.code.set_location(position, None); // Look up the module if let Some(builtin_module) = BuiltinModule::from_string_id(module_name) { // Known module - emit LoadModule self.code.emit_u8(Opcode::LoadModule, builtin_module as u8); // For each name to import for (i, (import_name, binding)) in names.iter().enumerate() { // Dup the module if this isn't the last import (last one consumes the module) if i < names.len() - 1 { self.code.emit(Opcode::Dup); } // Load the attribute from the module (raises ImportError if not found) let name_idx = u16::try_from(import_name.index()).expect("name index exceeds u16"); self.code.emit_u16(Opcode::LoadAttrImport, name_idx); // Store to the binding self.compile_store(binding); } } else { // Unknown module - defer error to runtime with RaiseImportError // This allows TYPE_CHECKING imports to compile without error let name_const = self.code.add_const(Value::InternString(module_name)); self.code.emit_u16(Opcode::RaiseImportError, name_const); } } // ======================================================================== // Expression Compilation // ======================================================================== /// Compiles an expression, leaving its value on the stack. fn compile_expr(&mut self, expr_loc: &ExprLoc) -> Result<(), CompileError> { // Set source location for traceback info self.code.set_location(expr_loc.position, None); match &expr_loc.expr { Expr::Literal(lit) => self.compile_literal(lit), Expr::Name(ident) => self.compile_name(ident), Expr::Builtin(builtin) => { let idx = self.code.add_const(Value::Builtin(*builtin)); self.code.emit_u16(Opcode::LoadConst, idx); } Expr::Op { left, op, right } => { self.compile_binary_op(left, op, right, expr_loc.position)?; } Expr::CmpOp { left, op, right } => { self.compile_expr(left)?; self.compile_expr(right)?; // Restore the full comparison expression's position for traceback caret range self.code.set_location(expr_loc.position, None); // ModEq needs special handling - it has a constant operand if let CmpOperator::ModEq(value) = op { let const_idx = self.code.add_const(Value::Int(*value)); self.code.emit_u16(Opcode::CompareModEq, const_idx); } else { self.code.emit(cmp_operator_to_opcode(op)); } } Expr::ChainCmp { left, comparisons } => { self.compile_chain_comparison(left, comparisons, expr_loc.position)?; } Expr::Not(operand) => { self.compile_expr(operand)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::UnaryNot); } Expr::UnaryMinus(operand) => { self.compile_expr(operand)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::UnaryNeg); } Expr::UnaryPlus(operand) => { self.compile_expr(operand)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::UnaryPos); } Expr::UnaryInvert(operand) => { self.compile_expr(operand)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::UnaryInvert); } Expr::List(elements) => { if has_unpack_seq(elements) { // Generalized path: build incrementally for PEP 448 *unpacks self.code.emit_u16(Opcode::BuildList, 0); for item in elements { match item { SequenceItem::Value(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::ListAppend, 0); } SequenceItem::Unpack(e) => { self.compile_expr(e)?; self.code.emit(Opcode::ListExtend); } } } } else { // Fast path: all values, single BuildList. // SAFETY: has_unpack_seq(elements) is false, so every item is Value. for item in elements { let SequenceItem::Value(e) = item else { unreachable!("list fast path: only Value items") }; self.compile_expr(e)?; } self.code.emit_u16( Opcode::BuildList, u16::try_from(elements.len()).expect("elements count exceeds u16"), ); } } Expr::Tuple(elements) => { if has_unpack_seq(elements) { // Generalized path: build via list then convert for PEP 448 *unpacks self.code.emit_u16(Opcode::BuildList, 0); for item in elements { match item { SequenceItem::Value(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::ListAppend, 0); } SequenceItem::Unpack(e) => { self.compile_expr(e)?; self.code.emit(Opcode::ListExtend); } } } self.code.emit(Opcode::ListToTuple); } else { // Fast path: all values, single BuildTuple. // SAFETY: has_unpack_seq(elements) is false, so every item is Value. for item in elements { let SequenceItem::Value(e) = item else { unreachable!("tuple fast path: only Value items") }; self.compile_expr(e)?; } self.code.emit_u16( Opcode::BuildTuple, u16::try_from(elements.len()).expect("elements count exceeds u16"), ); } } Expr::Dict(dict_items) => { if has_unpack_dict(dict_items) { // Generalized path: build incrementally for PEP 448 **unpacks self.code.emit_u16(Opcode::BuildDict, 0); for item in dict_items { match item { DictItem::Pair(key, value) => { self.compile_expr(key)?; self.compile_expr(value)?; // depth=0: dict is at TOS after key/value are popped self.code.emit_u8(Opcode::DictSetItem, 0); } DictItem::Unpack(e) => { self.compile_expr(e)?; // depth=0: dict is directly below mapping on stack self.code.emit_u8(Opcode::DictUpdate, 0); } } } } else { // Fast path: all pairs, single BuildDict. // SAFETY: has_unpack_dict(dict_items) is false, so every item is Pair. for item in dict_items { let DictItem::Pair(key, value) = item else { unreachable!("dict fast path: only Pair items") }; self.compile_expr(key)?; self.compile_expr(value)?; } self.code.emit_u16( Opcode::BuildDict, u16::try_from(dict_items.len()).expect("pairs count exceeds u16"), ); } } Expr::Set(elements) => { if has_unpack_seq(elements) { // Generalized path: build incrementally for PEP 448 *unpacks self.code.emit_u16(Opcode::BuildSet, 0); for item in elements { match item { SequenceItem::Value(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::SetAdd, 0); } SequenceItem::Unpack(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::SetExtend, 0); } } } } else { // Fast path: all values, single BuildSet. // SAFETY: has_unpack_seq(elements) is false, so every item is Value. for item in elements { let SequenceItem::Value(e) = item else { unreachable!("set fast path: only Value items") }; self.compile_expr(e)?; } self.code.emit_u16( Opcode::BuildSet, u16::try_from(elements.len()).expect("elements count exceeds u16"), ); } } Expr::Subscript { object, index } => { self.compile_expr(object)?; self.compile_expr(index)?; // Restore the full subscript expression's position for traceback self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::BinarySubscr); } Expr::IfElse { test, body, orelse } => { self.compile_if_else_expr(test, body, orelse)?; } Expr::AttrGet { object, attr } => { self.compile_expr(object)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); let name_id = attr.string_id().expect("LoadAttr requires interned attr name"); self.code.emit_u16( Opcode::LoadAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), ); } Expr::Call { callable, args } => { self.compile_call(callable, args, expr_loc.position)?; } Expr::AttrCall { object, attr, args } => { // Compile the object (will be on the stack) self.compile_expr(object)?; // Compile the attribute call arguments and emit CallAttr self.compile_method_call(attr, args, expr_loc.position)?; } Expr::IndirectCall { callable, args } => { // Compile the callable expression (e.g., a lambda) self.compile_expr(callable)?; // Compile arguments and emit the call self.compile_call_args(args, expr_loc.position)?; } Expr::FString(parts) => { // Compile each part and build the f-string let part_count = self.compile_fstring_parts(parts)?; self.code.emit_u16(Opcode::BuildFString, part_count); } Expr::ListComp { elt, generators } => { self.compile_list_comp(elt, generators)?; } Expr::SetComp { elt, generators } => { self.compile_set_comp(elt, generators)?; } Expr::DictComp { key, value, generators } => { self.compile_dict_comp(key, value, generators)?; } Expr::Lambda { func_def } => { self.compile_lambda(func_def)?; } Expr::LambdaRaw { .. } => { // LambdaRaw should be converted to Lambda during prepare phase unreachable!("Expr::LambdaRaw should not exist after prepare phase") } Expr::Await(value) => { // Await expressions: compile the inner expression, then emit Await // Await handles ExternalFuture, Coroutine, and GatherFuture self.compile_expr(value)?; // Restore the full expression's position for traceback caret range self.code.set_location(expr_loc.position, None); self.code.emit(Opcode::Await); } Expr::Slice { lower, upper, step } => { // Compile slice components: start, stop, step (push None for missing) if let Some(lower) = lower { self.compile_expr(lower)?; } else { self.code.emit(Opcode::LoadNone); } if let Some(upper) = upper { self.compile_expr(upper)?; } else { self.code.emit(Opcode::LoadNone); } if let Some(step) = step { self.compile_expr(step)?; } else { self.code.emit(Opcode::LoadNone); } self.code.emit(Opcode::BuildSlice); } Expr::Named { target, value } => { // Compile the value expression (leaves result on stack) self.compile_expr(value)?; // Duplicate so value remains after store self.code.emit(Opcode::Dup); // Store to target (pops one copy) self.compile_store(target); } } Ok(()) } // ======================================================================== // Literal Compilation // ======================================================================== /// Compiles a literal value. fn compile_literal(&mut self, literal: &Literal) { match literal { Literal::None => { self.code.emit(Opcode::LoadNone); } Literal::Bool(true) => { self.code.emit(Opcode::LoadTrue); } Literal::Bool(false) => { self.code.emit(Opcode::LoadFalse); } Literal::Int(n) => { // Use LoadSmallInt for values that fit in i8 if let Ok(small) = i8::try_from(*n) { self.code.emit_i8(Opcode::LoadSmallInt, small); } else { let idx = self.code.add_const(Value::from(*literal)); self.code.emit_u16(Opcode::LoadConst, idx); } } // For Float, Str, Bytes, Ellipsis - use LoadConst with Value::from _ => { let idx = self.code.add_const(Value::from(*literal)); self.code.emit_u16(Opcode::LoadConst, idx); } } } // ======================================================================== // Variable Operations // ======================================================================== /// Compiles loading a variable onto the stack. /// /// At module level, `Local` and `LocalUnassigned` scopes emit global opcodes /// because module-level locals live in the globals array. fn compile_name(&mut self, ident: &Identifier) { let slot = u16::try_from(ident.namespace_id().index()).expect("local slot exceeds u16"); match ident.scope { NameScope::Local => { // True local - register name and mark as assigned for UnboundLocalError self.code.register_local_name(slot, ident.name_id); self.code.register_assigned_local(slot); if self.is_module_scope { self.code.emit_u16(Opcode::LoadGlobal, slot); } else { self.code.emit_load_local(slot); } } NameScope::LocalUnassigned => { // Undefined reference - register name but NOT as assigned for NameError self.code.register_local_name(slot, ident.name_id); if self.is_module_scope { self.code.emit_u16(Opcode::LoadGlobal, slot); } else { self.code.emit_load_local(slot); } } NameScope::Global => { // Register the name for NameError/NameLookup messages self.code.register_local_name(slot, ident.name_id); self.code.emit_u16(Opcode::LoadGlobal, slot); } NameScope::Cell => { // Register the name for NameError messages (unbound free variable) self.code.register_local_name(slot, ident.name_id); // Emit local slot index — the VM reads the cell HeapId from the stack self.code.emit_u16(Opcode::LoadCell, slot); } } } /// Compiles loading a variable in call context (e.g., `foo()` loads `foo`). /// /// For `LocalUnassigned` and `Global` scopes, emits callable-aware load opcodes /// that push `ExtFunction(name_id)` for undefined names instead of yielding /// `NameLookup`. This allows execution to reach `CallFunction`, which naturally /// yields `FunctionCall` — giving the host a chance to handle external function calls. /// /// For `Local` and `Cell` scopes, delegates to `compile_name` since those can't /// be external functions (they're always defined locally or captured). fn compile_name_callable(&mut self, ident: &Identifier) { let slot = u16::try_from(ident.namespace_id().index()).expect("local slot exceeds u16"); match ident.scope { NameScope::LocalUnassigned => { // Undefined reference in call context - use callable-aware load. // At module level, use global callable since locals are in the globals array. self.code.register_local_name(slot, ident.name_id); if self.is_module_scope { self.code.emit_load_global_callable(slot, ident.name_id); } else { self.code.emit_load_local_callable(slot, ident.name_id); } } NameScope::Global => { // Global scope - name_id is encoded in the operand because global slot // indices are in a different namespace from local slots, so looking up // the name from the current frame's local_names would be incorrect self.code.emit_load_global_callable(slot, ident.name_id); } // Local and Cell can't be external functions - use regular load NameScope::Local | NameScope::Cell => self.compile_name(ident), } } /// Compiles storing the top of stack to a variable. /// /// At module level, `Local` and `LocalUnassigned` scopes emit `StoreGlobal` /// because module-level locals live in the globals array. fn compile_store(&mut self, target: &Identifier) { let slot = u16::try_from(target.namespace_id().index()).expect("local slot exceeds u16"); match target.scope { NameScope::Local | NameScope::LocalUnassigned => { self.code.register_local_name(slot, target.name_id); if self.is_module_scope { self.code.emit_u16(Opcode::StoreGlobal, slot); } else { self.code.emit_store_local(slot); } } NameScope::Global => { self.code.emit_u16(Opcode::StoreGlobal, slot); } NameScope::Cell => { // Emit local slot index — the VM reads the cell HeapId from the stack self.code.emit_u16(Opcode::StoreCell, slot); } } } // ======================================================================== // Binary Operator Compilation // ======================================================================== /// Compiles a binary operation. /// /// `parent_pos` is the position of the full binary expression (e.g., `1 / 0`), /// which we restore before emitting the opcode so tracebacks show the right range. fn compile_binary_op( &mut self, left: &ExprLoc, op: &Operator, right: &ExprLoc, parent_pos: CodeRange, ) -> Result<(), CompileError> { match op { // Short-circuit AND: evaluate left, jump if falsy Operator::And => { self.compile_expr(left)?; let end_jump = self.code.emit_jump(Opcode::JumpIfFalseOrPop); self.compile_expr(right)?; self.code.patch_jump(end_jump); } // Short-circuit OR: evaluate left, jump if truthy Operator::Or => { self.compile_expr(left)?; let end_jump = self.code.emit_jump(Opcode::JumpIfTrueOrPop); self.compile_expr(right)?; self.code.patch_jump(end_jump); } // Regular binary operators _ => { self.compile_expr(left)?; self.compile_expr(right)?; // Restore the full expression's position for traceback caret range self.code.set_location(parent_pos, None); self.code.emit(operator_to_opcode(op)); } } Ok(()) } /// Compiles a chain comparison expression like `a < b < c < d`. /// /// Chain comparisons evaluate each intermediate value only once and short-circuit /// on the first false result. Uses stack manipulation to avoid namespace pollution. /// /// Bytecode strategy for `a < b < c`: /// ```text /// eval a # Stack: [a] /// eval b # Stack: [a, b] /// Dup # Stack: [a, b, b] /// Rot3 # Stack: [b, a, b] /// CompareLt # Stack: [b, result1] /// JumpIfFalseOrPop # if false: jump to cleanup; if true: pop, stack=[b] /// eval c # Stack: [b, c] /// CompareLt # Stack: [result2] /// Jump @end /// @cleanup: # Stack: [b, False] /// Rot2 # Stack: [False, b] /// Pop # Stack: [False] /// @end: /// ``` fn compile_chain_comparison( &mut self, left: &ExprLoc, comparisons: &[(CmpOperator, ExprLoc)], position: CodeRange, ) -> Result<(), CompileError> { let n = comparisons.len(); // Remember stack depth before the chain for cleanup calculation let base_depth = self.code.stack_depth(); // Compile leftmost operand self.compile_expr(left)?; // Track jump targets for short-circuit cleanup let mut cleanup_jumps = Vec::with_capacity(n - 1); for (i, (op, right)) in comparisons.iter().enumerate() { let is_last = i == n - 1; // Compile the right operand self.compile_expr(right)?; if !is_last { // Keep a copy of the intermediate for the next comparison self.code.emit(Opcode::Dup); // Reorder: [prev, curr, curr] -> [curr, prev, curr] self.code.emit(Opcode::Rot3); } // Emit comparison self.code.set_location(position, None); if let CmpOperator::ModEq(value) = op { let const_idx = self.code.add_const(Value::Int(*value)); self.code.emit_u16(Opcode::CompareModEq, const_idx); } else { self.code.emit(cmp_operator_to_opcode(op)); } if !is_last { // Short-circuit: if false, jump to cleanup let jump = self.code.emit_jump(Opcode::JumpIfFalseOrPop); cleanup_jumps.push(jump); } } // Jump past cleanup (result already on stack) let end_jump = self.code.emit_jump(Opcode::Jump); // Cleanup: remove the saved intermediate value, keep False result // The cleanup is only reached via JumpIfFalseOrPop which doesn't pop, // so the stack has: [intermediate, False] (2 extra items from base) for jump in cleanup_jumps { self.code.patch_jump(jump); } self.code.set_stack_depth(base_depth + 2); // [intermediate, False] self.code.emit(Opcode::Rot2); // [False, intermediate] self.code.emit(Opcode::Pop); // [False] self.code.patch_jump(end_jump); // Final result is on stack: base_depth + 1 self.code.set_stack_depth(base_depth + 1); Ok(()) } // ======================================================================== // Control Flow Compilation // ======================================================================== /// Compiles an if/else statement. fn compile_if( &mut self, test: &ExprLoc, body: &[PreparedNode], or_else: &[PreparedNode], ) -> Result<(), CompileError> { self.compile_expr(test)?; if or_else.is_empty() { // Simple if without else let end_jump = self.code.emit_jump(Opcode::JumpIfFalse); self.compile_block(body)?; self.code.patch_jump(end_jump); } else { // If with else let else_jump = self.code.emit_jump(Opcode::JumpIfFalse); self.compile_block(body)?; let end_jump = self.code.emit_jump(Opcode::Jump); self.code.patch_jump(else_jump); self.compile_block(or_else)?; self.code.patch_jump(end_jump); } Ok(()) } /// Compiles a ternary conditional expression. fn compile_if_else_expr(&mut self, test: &ExprLoc, body: &ExprLoc, orelse: &ExprLoc) -> Result<(), CompileError> { self.compile_expr(test)?; let else_jump = self.code.emit_jump(Opcode::JumpIfFalse); self.compile_expr(body)?; let end_jump = self.code.emit_jump(Opcode::Jump); self.code.patch_jump(else_jump); self.compile_expr(orelse)?; self.code.patch_jump(end_jump); Ok(()) } /// Compiles a function call expression. /// /// For builtin calls with positional-only arguments, emits the optimized `CallBuiltin` /// opcode which avoids pushing/popping the callable on the stack. /// /// For other calls, pushes the callable onto the stack, then all arguments, then emits /// `CallFunction` or `CallFunctionKw`. /// /// The `call_pos` is the position of the full call expression for proper traceback caret. fn compile_call(&mut self, callable: &Callable, args: &ArgExprs, call_pos: CodeRange) -> Result<(), CompileError> { // Check if we can use the optimized CallBuiltinFunction path: // - Callable must be a builtin function (known at compile time) // - Arguments must be positional-only (Empty, One, Two, or Args) if let Callable::Builtin(Builtins::Function(builtin_func)) = callable && let Some(arg_count) = self.compile_builtin_call(args, call_pos)? { // Optimization applied - CallBuiltinFunction emitted self.code.set_location(call_pos, None); self.code.emit_call_builtin_function(*builtin_func as u8, arg_count); return Ok(()); } // Fall through to standard path for kwargs/unpacking // Check if we can use the optimized CallBuiltinType path: // - Callable must be a builtin type constructor (known at compile time) // - Arguments must be positional-only (Empty, One, Two, or Args) if let Callable::Builtin(Builtins::Type(t)) = callable && let Some(type_id) = t.callable_to_u8() && let Some(arg_count) = self.compile_builtin_call(args, call_pos)? { // Optimization applied - CallBuiltinType emitted self.code.set_location(call_pos, None); self.code.emit_call_builtin_type(type_id, arg_count); return Ok(()); } // Fall through to standard path for kwargs/unpacking or non-callable types // Standard path: push callable, compile args, emit CallFunction/CallFunctionKw // Push the callable (use name position for NameError caret range) match callable { Callable::Builtin(builtin) => { let idx = self.code.add_const(Value::Builtin(*builtin)); self.code.emit_u16(Opcode::LoadConst, idx); } Callable::Name(ident) => { // Use callable-aware load opcodes so undefined names produce ExtFunction // instead of yielding NameLookup, allowing CallFunction to yield FunctionCall self.code.set_location(ident.position, None); self.compile_name_callable(ident); } } // Compile arguments and emit the call // Restore full call position before CallFunction for call-related errors match args { ArgExprs::Empty => { self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 0); } ArgExprs::One(arg) => { self.compile_expr(arg)?; self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 1); } ArgExprs::Two(arg1, arg2) => { self.compile_expr(arg1)?; self.compile_expr(arg2)?; self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 2); } ArgExprs::Args(args) => { // Check argument count limit before compiling if args.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in function call"), call_pos, )); } for arg in args { self.compile_expr(arg)?; } let arg_count = u8::try_from(args.len()).expect("argument count exceeds u8"); self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, arg_count); } ArgExprs::Kwargs(kwargs) => { // Check keyword argument count limit if kwargs.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in function call"), call_pos, )); } // Keyword-only call: compile kwarg values and emit CallFunctionKw let mut kwname_ids = Vec::with_capacity(kwargs.len()); for kwarg in kwargs { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } self.code.set_location(call_pos, None); self.code.emit_call_function_kw(0, &kwname_ids); } ArgExprs::ArgsKargs { args, var_args, kwargs, var_kwargs, } => { // Mixed positional and keyword arguments - may include *args or **kwargs unpacking if var_args.is_some() || var_kwargs.is_some() { // Use CallFunctionEx for unpacking - no limit on this path since // args are built into a tuple dynamically at runtime self.compile_call_with_unpacking( callable, args.as_ref(), var_args.as_ref(), kwargs.as_ref(), var_kwargs.as_ref(), call_pos, )?; } else { // No unpacking - use CallFunctionKw for efficiency // Check limits before compiling let pos_count = args.as_ref().map_or(0, Vec::len); let kw_count = kwargs.as_ref().map_or(0, Vec::len); if pos_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in function call"), call_pos, )); } if kw_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in function call"), call_pos, )); } // Compile positional args if let Some(args) = args { for arg in args { self.compile_expr(arg)?; } } // Compile kwarg values and collect names let mut kwname_ids = Vec::new(); if let Some(kwargs) = kwargs { for kwarg in kwargs { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } } self.code.set_location(call_pos, None); self.code.emit_call_function_kw( u8::try_from(pos_count).expect("positional arg count exceeds u8"), &kwname_ids, ); } } ArgExprs::GeneralizedCall { args, kwargs } => { // PEP 448: generalized unpacking — multiple *args or **kwargs. // Callable was already pushed above this match; delegate to the helper. let func_name_id = self.get_callable_name_id(callable); self.compile_generalized_call_body(args, kwargs, func_name_id, call_pos)?; } } Ok(()) } /// Compiles function call arguments and emits the call instruction. /// /// This is used when the callable is already on the stack (e.g., from compiling an expression). /// It compiles the arguments, then emits `CallFunction` or `CallFunctionKw` as appropriate. fn compile_call_args(&mut self, args: &ArgExprs, call_pos: CodeRange) -> Result<(), CompileError> { match args { ArgExprs::Empty => { self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 0); } ArgExprs::One(arg) => { self.compile_expr(arg)?; self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 1); } ArgExprs::Two(arg1, arg2) => { self.compile_expr(arg1)?; self.compile_expr(arg2)?; self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, 2); } ArgExprs::Args(args) => { if args.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in function call"), call_pos, )); } for arg in args { self.compile_expr(arg)?; } let arg_count = u8::try_from(args.len()).expect("argument count exceeds u8"); self.code.set_location(call_pos, None); self.code.emit_u8(Opcode::CallFunction, arg_count); } ArgExprs::Kwargs(kwargs) => { if kwargs.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in function call"), call_pos, )); } let mut kwname_ids = Vec::with_capacity(kwargs.len()); for kwarg in kwargs { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } self.code.set_location(call_pos, None); self.code.emit_call_function_kw(0, &kwname_ids); } ArgExprs::ArgsKargs { args, kwargs, var_args, var_kwargs, } => { // Mixed positional and keyword arguments - may include *args or **kwargs unpacking if var_args.is_some() || var_kwargs.is_some() { // Use CallFunctionExtended for unpacking - no limit on this path since // args are built into a tuple dynamically at runtime. // Callable is already on stack, so we just need to build args and kwargs. self.compile_call_args_with_unpacking( args.as_ref(), var_args.as_ref(), kwargs.as_ref(), var_kwargs.as_ref(), call_pos, )?; } else { // No unpacking - use CallFunctionKw for efficiency let pos_args = args.as_deref().unwrap_or(&[]); let kw_args = kwargs.as_deref().unwrap_or(&[]); let pos_count = pos_args.len(); let kw_count = kw_args.len(); // Check limits separately (same as direct calls) if pos_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in function call"), call_pos, )); } if kw_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in function call"), call_pos, )); } // Compile positional args for arg in pos_args { self.compile_expr(arg)?; } // Compile keyword args let mut kwname_ids = Vec::with_capacity(kw_count); for kwarg in kw_args { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } self.code.set_location(call_pos, None); self.code.emit_call_function_kw( u8::try_from(pos_count).expect("positional arg count exceeds u8"), &kwname_ids, ); } } ArgExprs::GeneralizedCall { args, kwargs } => { // PEP 448: generalized unpacking — callable is already on the stack. // Use 0xFFFF as func_name_id since we don't know the callee name here. self.compile_generalized_call_body(args, kwargs, 0xFFFF, call_pos)?; } } Ok(()) } /// Compiles arguments with `*args` and/or `**kwargs` unpacking when callable is already on stack. /// /// This is used for expression calls (e.g., `(lambda *a: a)(*xs)`) where the callable /// is compiled as an expression and is already on the stack. /// /// Stack layout: callable (on stack) -> callable, args_tuple, kwargs_dict? fn compile_call_args_with_unpacking( &mut self, args: Option<&Vec>, var_args: Option<&ExprLoc>, kwargs: Option<&Vec>, var_kwargs: Option<&ExprLoc>, call_pos: CodeRange, ) -> Result<(), CompileError> { // 1. Build args tuple // Push regular positional args and build list let pos_count = args.map_or(0, Vec::len); if let Some(args) = args { for arg in args { self.compile_expr(arg)?; } } self.code.emit_u16( Opcode::BuildList, u16::try_from(pos_count).expect("positional arg count exceeds u16"), ); // Extend with *args if present if let Some(var_args_expr) = var_args { self.compile_expr(var_args_expr)?; self.code.emit(Opcode::ListExtend); } // Convert list to tuple self.code.emit(Opcode::ListToTuple); // 2. Build kwargs dict (if we have kwargs or var_kwargs) let has_kwargs = kwargs.is_some() || var_kwargs.is_some(); if has_kwargs { // Build dict from regular kwargs let kw_count = kwargs.map_or(0, Vec::len); if let Some(kwargs) = kwargs { for kwarg in kwargs { // Push key as interned string constant let key_const = self.code.add_const(Value::InternString(kwarg.key.name_id)); self.code.emit_u16(Opcode::LoadConst, key_const); // Push value self.compile_expr(&kwarg.value)?; } } self.code.emit_u16( Opcode::BuildDict, u16::try_from(kw_count).expect("keyword count exceeds u16"), ); // Merge **kwargs if present // Use 0xFFFF for func_name_id (like builtins) since we don't have a name if let Some(var_kwargs_expr) = var_kwargs { self.compile_expr(var_kwargs_expr)?; self.code.emit_u16(Opcode::DictMerge, 0xFFFF); } } // 3. Call the function self.code.set_location(call_pos, None); let flags = u8::from(has_kwargs); self.code.emit_u8(Opcode::CallFunctionExtended, flags); Ok(()) } /// Compiles arguments for a builtin call and returns the arg count if optimization can be used. /// /// Returns `Some(arg_count)` if the call uses positional-only arguments (CallBuiltinFunction applicable). /// Returns `None` if the call uses kwargs or unpacking (must use standard CallFunction path). /// /// When `Some` is returned, arguments have been compiled onto the stack. fn compile_builtin_call(&mut self, args: &ArgExprs, call_pos: CodeRange) -> Result, CompileError> { match args { ArgExprs::Empty => Ok(Some(0)), ArgExprs::One(arg) => { self.compile_expr(arg)?; Ok(Some(1)) } ArgExprs::Two(arg1, arg2) => { self.compile_expr(arg1)?; self.compile_expr(arg2)?; Ok(Some(2)) } ArgExprs::Args(args) => { if args.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in function call"), call_pos, )); } for arg in args { self.compile_expr(arg)?; } Ok(Some(u8::try_from(args.len()).expect("argument count exceeds u8"))) } // Kwargs or unpacking - fall back to standard path ArgExprs::Kwargs(_) | ArgExprs::ArgsKargs { .. } | ArgExprs::GeneralizedCall { .. } => Ok(None), } } /// Compiles a function call with `*args` and/or `**kwargs` unpacking. /// /// This generates bytecode to build an args tuple and kwargs dict dynamically, /// then calls the function using `CallFunctionEx`. /// /// Stack layout for call: /// - callable (already on stack) /// - args tuple /// - kwargs dict (if present) fn compile_call_with_unpacking( &mut self, callable: &Callable, args: Option<&Vec>, var_args: Option<&ExprLoc>, kwargs: Option<&Vec>, var_kwargs: Option<&ExprLoc>, call_pos: CodeRange, ) -> Result<(), CompileError> { // Get function name for error messages. Builtins use their real interned name // so duplicate-kwargs errors from **unpacking match CPython. let func_name_id = self.get_callable_name_id(callable); // 1. Build args tuple // Push regular positional args and build list let pos_count = args.map_or(0, Vec::len); if let Some(args) = args { for arg in args { self.compile_expr(arg)?; } } self.code.emit_u16( Opcode::BuildList, u16::try_from(pos_count).expect("positional arg count exceeds u16"), ); // Extend with *args if present if let Some(var_args_expr) = var_args { self.compile_expr(var_args_expr)?; self.code.emit(Opcode::ListExtend); } // Convert list to tuple self.code.emit(Opcode::ListToTuple); // 2. Build kwargs dict (if we have kwargs or var_kwargs) let has_kwargs = kwargs.is_some() || var_kwargs.is_some(); if has_kwargs { // Build dict from regular kwargs let kw_count = kwargs.map_or(0, Vec::len); if let Some(kwargs) = kwargs { for kwarg in kwargs { // Push key as interned string constant let key_const = self.code.add_const(Value::InternString(kwarg.key.name_id)); self.code.emit_u16(Opcode::LoadConst, key_const); // Push value self.compile_expr(&kwarg.value)?; } } self.code.emit_u16( Opcode::BuildDict, u16::try_from(kw_count).expect("keyword count exceeds u16"), ); // Merge **kwargs if present if let Some(var_kwargs_expr) = var_kwargs { self.compile_expr(var_kwargs_expr)?; self.code.emit_u16(Opcode::DictMerge, func_name_id); } } // 3. Call the function self.code.set_location(call_pos, None); let flags = u8::from(has_kwargs); self.code.emit_u8(Opcode::CallFunctionExtended, flags); Ok(()) } /// Returns the best available function name id for call-site error messages. /// /// This is primarily used by `DictMerge` during `**kwargs` unpacking so /// duplicate-key and non-mapping errors can mention the actual callee name. /// When the callable is not a named local/global, we still try to resolve /// builtin functions, builtin exception constructors, and builtin types to /// their interned public names. fn get_callable_name_id(&self, callable: &Callable) -> u16 { match callable { Callable::Name(ident) => u16::try_from(ident.name_id.index()).expect("name index exceeds u16"), Callable::Builtin(builtin) => self.get_builtin_name_id(*builtin).unwrap_or(0xFFFF), } } /// Resolves a builtin callable to its interned public name, if available. /// /// Returning `None` falls back to `` in the VM, which is still /// correct but less helpful. In practice these names should already be /// interned during preparation because builtin names are resolved from source. fn get_builtin_name_id(&self, builtin: Builtins) -> Option { let name_id = match builtin { Builtins::Function(function) => { let name: &'static str = function.into(); self.interns.get_string_id_by_name(name)? } Builtins::ExcType(exc_type) => self.interns.get_string_id_by_name(&exc_type.to_string())?, Builtins::Type(type_) => { let name = type_.builtin_name()?; self.interns.get_string_id_by_name(name)? } }; u16::try_from(name_id.index()).ok() } /// Compiles an attribute call on an object. /// /// The object should already be on the stack. This compiles the arguments /// and emits a CallAttr opcode with the attribute name and arg count. fn compile_method_call( &mut self, attr: &EitherStr, args: &ArgExprs, call_pos: CodeRange, ) -> Result<(), CompileError> { // Get the interned attribute name let name_id = attr.string_id().expect("CallAttr requires interned attr name"); // Compile arguments based on the argument type match args { ArgExprs::Empty => { self.code.set_location(call_pos, None); self.code.emit_u16_u8( Opcode::CallAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), 0, ); } ArgExprs::One(arg) => { self.compile_expr(arg)?; self.code.set_location(call_pos, None); self.code.emit_u16_u8( Opcode::CallAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), 1, ); } ArgExprs::Two(arg1, arg2) => { self.compile_expr(arg1)?; self.compile_expr(arg2)?; self.code.set_location(call_pos, None); self.code.emit_u16_u8( Opcode::CallAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), 2, ); } ArgExprs::Args(args) => { // Check argument count limit if args.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} arguments in method call"), call_pos, )); } for arg in args { self.compile_expr(arg)?; } let arg_count = u8::try_from(args.len()).expect("argument count exceeds u8"); self.code.set_location(call_pos, None); self.code.emit_u16_u8( Opcode::CallAttr, u16::try_from(name_id.index()).expect("name index exceeds u16"), arg_count, ); } ArgExprs::Kwargs(kwargs) => { // Keyword-only method call if kwargs.len() > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in method call"), call_pos, )); } // Compile kwarg values and collect names let mut kwname_ids = Vec::with_capacity(kwargs.len()); for kwarg in kwargs { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } self.code.set_location(call_pos, None); self.code.emit_call_attr_kw( u16::try_from(name_id.index()).expect("name index exceeds u16"), 0, // no positional args &kwname_ids, ); } ArgExprs::ArgsKargs { args, kwargs, var_args, var_kwargs, } => { // Check if there's unpacking - use CallAttrExtended if var_args.is_some() || var_kwargs.is_some() { return self.compile_method_call_with_unpacking( name_id, args.as_ref(), var_args.as_ref(), kwargs.as_ref(), var_kwargs.as_ref(), call_pos, ); } // No unpacking - use CallAttrKw for efficiency let pos_count = args.as_ref().map_or(0, Vec::len); let kw_count = kwargs.as_ref().map_or(0, Vec::len); if pos_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} positional arguments in method call"), call_pos, )); } if kw_count > MAX_CALL_ARGS { return Err(CompileError::new( format!("more than {MAX_CALL_ARGS} keyword arguments in method call"), call_pos, )); } // Compile positional args if let Some(args) = args { for arg in args { self.compile_expr(arg)?; } } // Compile kwarg values and collect names let mut kwname_ids = Vec::new(); if let Some(kwargs) = kwargs { for kwarg in kwargs { self.compile_expr(&kwarg.value)?; kwname_ids.push(u16::try_from(kwarg.key.name_id.index()).expect("name index exceeds u16")); } } self.code.set_location(call_pos, None); self.code.emit_call_attr_kw( u16::try_from(name_id.index()).expect("name index exceeds u16"), u8::try_from(pos_count).expect("positional arg count exceeds u8"), &kwname_ids, ); } ArgExprs::GeneralizedCall { args, kwargs } => { // PEP 448: generalized unpacking on a method call. // Receiver is already on the stack; build args tuple and kwargs dict, // then emit CallAttrExtended. let func_name_id = u16::try_from(name_id.index()).expect("name index exceeds u16"); let has_kwargs = !kwargs.is_empty(); // 1. Build args tuple self.code.emit_u16(Opcode::BuildList, 0); for arg in args { match arg { CallArg::Value(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::ListAppend, 0); } CallArg::Unpack(e) => { self.compile_expr(e)?; self.code.emit(Opcode::ListExtend); } } } self.code.emit(Opcode::ListToTuple); // 2. Build kwargs dict (if any) if has_kwargs { self.code.emit_u16(Opcode::BuildDict, 0); for kwarg in kwargs { match kwarg { CallKwarg::Named(kw) => { let key_const = self.code.add_const(Value::InternString(kw.key.name_id)); self.code.emit_u16(Opcode::LoadConst, key_const); self.compile_expr(&kw.value)?; self.code.emit_u16(Opcode::BuildDict, 1); self.code.emit_u16(Opcode::DictMerge, func_name_id); } CallKwarg::Unpack(e) => { self.compile_expr(e)?; self.code.emit_u16(Opcode::DictMerge, func_name_id); } } } } // 3. Emit CallAttrExtended self.code.set_location(call_pos, None); let flags = u8::from(has_kwargs); self.code.emit_u16_u8(Opcode::CallAttrExtended, func_name_id, flags); } } Ok(()) } /// Compiles a method call with `*args` and/or `**kwargs` unpacking. /// /// The receiver object should already be on the stack. This builds the args tuple /// and optional kwargs dict, then emits `CallAttrExtended`. fn compile_method_call_with_unpacking( &mut self, name_id: StringId, args: Option<&Vec>, var_args: Option<&ExprLoc>, kwargs: Option<&Vec>, var_kwargs: Option<&ExprLoc>, call_pos: CodeRange, ) -> Result<(), CompileError> { // 1. Build args tuple // Push regular positional args and build list let pos_count = args.map_or(0, Vec::len); if let Some(args) = args { for arg in args { self.compile_expr(arg)?; } } self.code.emit_u16( Opcode::BuildList, u16::try_from(pos_count).expect("positional arg count exceeds u16"), ); // Extend with *args if present if let Some(var_args_expr) = var_args { self.compile_expr(var_args_expr)?; self.code.emit(Opcode::ListExtend); } // Convert list to tuple self.code.emit(Opcode::ListToTuple); // 2. Build kwargs dict (if we have kwargs or var_kwargs) let has_kwargs = kwargs.is_some() || var_kwargs.is_some(); if has_kwargs { // Build dict from regular kwargs let kw_count = kwargs.map_or(0, Vec::len); if let Some(kwargs) = kwargs { for kwarg in kwargs { // Push key as interned string constant let key_const = self.code.add_const(Value::InternString(kwarg.key.name_id)); self.code.emit_u16(Opcode::LoadConst, key_const); // Push value self.compile_expr(&kwarg.value)?; } } self.code.emit_u16( Opcode::BuildDict, u16::try_from(kw_count).expect("keyword count exceeds u16"), ); // Merge **kwargs if present if let Some(var_kwargs_expr) = var_kwargs { self.compile_expr(var_kwargs_expr)?; // Use the method name for error messages self.code.emit_u16( Opcode::DictMerge, u16::try_from(name_id.index()).expect("name index exceeds u16"), ); } } // 3. Call the method with CallAttrExtended self.code.set_location(call_pos, None); let name_idx = u16::try_from(name_id.index()).expect("name index exceeds u16"); let flags = u8::from(has_kwargs); self.code.emit_u16_u8(Opcode::CallAttrExtended, name_idx, flags); Ok(()) } /// Shared body for PEP 448 generalized calls with multiple `*args` and/or `**kwargs`. /// /// Assumes the callable is already on the stack (pushed by the caller). /// Emits: /// 1. `BuildList(0)` + per-item `ListAppend`/`ListExtend` + `ListToTuple` for args. /// 2. `BuildDict(0)` + per-item `BuildDict(1)+DictMerge`/`DictMerge` for kwargs (if any). /// 3. `CallFunctionExtended(flags)`. /// /// `func_name_id` is used in `DictMerge` error messages; pass `0xFFFF` when unknown. /// /// Stack transition (callable already on stack): /// `[callable]` → `[callable, args_tuple]` → `[callable, args_tuple, kwargs_dict?]` /// → `[result]` fn compile_generalized_call_body( &mut self, args: &[CallArg], kwargs: &[CallKwarg], func_name_id: u16, call_pos: CodeRange, ) -> Result<(), CompileError> { // 1. Build args tuple self.code.emit_u16(Opcode::BuildList, 0); for arg in args { match arg { CallArg::Value(e) => { self.compile_expr(e)?; self.code.emit_u8(Opcode::ListAppend, 0); } CallArg::Unpack(e) => { self.compile_expr(e)?; self.code.emit(Opcode::ListExtend); } } } self.code.emit(Opcode::ListToTuple); // 2. Build kwargs dict (if any) let has_kwargs = !kwargs.is_empty(); if has_kwargs { // Start with an empty dict, then merge each kwarg one at a time via DictMerge // so that duplicates (including Named+Unpack ordering) raise TypeError correctly. self.code.emit_u16(Opcode::BuildDict, 0); for kwarg in kwargs { match kwarg { CallKwarg::Named(kw) => { // Wrap key+value in a single-item dict, then merge into kwargs dict. let key_const = self.code.add_const(Value::InternString(kw.key.name_id)); self.code.emit_u16(Opcode::LoadConst, key_const); self.compile_expr(&kw.value)?; self.code.emit_u16(Opcode::BuildDict, 1); self.code.emit_u16(Opcode::DictMerge, func_name_id); } CallKwarg::Unpack(e) => { self.compile_expr(e)?; self.code.emit_u16(Opcode::DictMerge, func_name_id); } } } } // 3. Emit the extended call self.code.set_location(call_pos, None); let flags = u8::from(has_kwargs); self.code.emit_u8(Opcode::CallFunctionExtended, flags); Ok(()) } /// Compiles a for loop. fn compile_for( &mut self, target: &UnpackTarget, iter: &ExprLoc, body: &[PreparedNode], or_else: &[PreparedNode], ) -> Result<(), CompileError> { // Record stack depth at loop start (before iterator is pushed) // This is the depth we return to when the loop finishes (iterator popped) let loop_exit_depth = self.code.stack_depth(); // Compile iterator expression self.compile_expr(iter)?; // Convert to iterator self.code.emit(Opcode::GetIter); // Loop start let loop_start = self.code.current_offset(); // Push loop info for break/continue self.loop_stack.push(LoopInfo { start: loop_start, break_jumps: Vec::new(), has_iterator_on_stack: true, }); // ForIter: advance iterator or jump to end let end_jump = self.code.emit_jump(Opcode::ForIter); // Store current value to target (handles both single identifiers and tuple unpacking) self.compile_unpack_target(target); // Compile body self.compile_block(body)?; // Jump back to loop start self.code.emit_jump_to(Opcode::Jump, loop_start); // End of loop - ForIter jumps here when iterator is exhausted self.code.patch_jump(end_jump); // Iterator is popped when loop ends normally, so restore depth to before loop self.code.set_stack_depth(loop_exit_depth); // Pop loop info before compiling else block let loop_info = self.loop_stack.pop().expect("loop stack underflow"); // Compile else block (runs if loop completed without break) if !or_else.is_empty() { self.compile_block(or_else)?; } // Patch break jumps to here - AFTER the else block so break skips else for break_jump in loop_info.break_jumps { self.code.patch_jump(break_jump); } Ok(()) } /// Compiles a while loop. /// /// The bytecode structure: /// ```text /// loop_start: /// [evaluate condition] /// JumpIfFalse -> end_jump /// [body] /// Jump -> loop_start /// end_jump: /// [else block] /// [break patches here] /// ``` /// /// Key differences from `for` loops: /// - No `GetIter` (no iterator) /// - No `ForIter` (use `JumpIfFalse` instead) /// - `continue` jumps to condition evaluation /// - `break` doesn't need to pop iterator (nothing extra on stack) fn compile_while( &mut self, test: &ExprLoc, body: &[PreparedNode], or_else: &[PreparedNode], ) -> Result<(), CompileError> { let loop_start = self.code.current_offset(); self.loop_stack.push(LoopInfo { start: loop_start, break_jumps: Vec::new(), has_iterator_on_stack: false, }); self.compile_expr(test)?; let end_jump = self.code.emit_jump(Opcode::JumpIfFalse); self.compile_block(body)?; self.code.emit_jump_to(Opcode::Jump, loop_start); self.code.patch_jump(end_jump); let loop_info = self.loop_stack.pop().expect("loop stack underflow"); if !or_else.is_empty() { self.compile_block(or_else)?; } for break_jump in loop_info.break_jumps { self.code.patch_jump(break_jump); } Ok(()) } /// Compiles a break statement. /// /// Break exits the innermost loop and skips its else block. If inside a /// try-finally, the finally block must run first. /// /// The bytecode without finally: /// 1. Clean up exception state if inside except handler /// 2. Pop the iterator if in a `for` loop (still on stack during loop body) /// 3. Jump to after the else block /// /// With finally: /// 1. Clean up exception state if inside except handler /// 2. Pop the iterator if in a `for` loop /// 3. Jump to "finally with break" path (patched when try compilation completes) /// 4. That path runs finally, then jumps to after the else block fn compile_break(&mut self, position: CodeRange) -> Result<(), CompileError> { if self.loop_stack.is_empty() { return Err(CompileError::new("'break' outside loop", position)); } // `break` never falls through, but we still compile following statements in the same // block. Preserve the statement-entry depth for that unreachable compilation so // stack-effect tracking remains stable across dead code. let dead_code_depth = self.code.stack_depth(); let target_loop_depth = self.loop_stack.len() - 1; // If inside except handlers, clean up ALL exception states // Each nested except handler has pushed an exception onto the stack, // so we need to clear/pop each one when breaking out for _ in 0..self.except_handler_depth { self.code.emit(Opcode::ClearException); self.code.emit(Opcode::Pop); // Pop the exception value } // Pop the iterator only for `for` loops (has iterator on stack) // `while` loops don't have an iterator to pop if self.loop_stack[target_loop_depth].has_iterator_on_stack { self.code.emit(Opcode::Pop); } // Check if we need to go through any finally blocks // We need to run finally if break crosses the try boundary, i.e., if // we're breaking from a loop that existed before the try started. if let Some(finally_target) = self.finally_targets.last_mut() && target_loop_depth < finally_target.loop_depth_at_entry { // Breaking from a loop that's outside (or at the start of) this try-finally, // so finally must run before the break let jump = self.code.emit_jump(Opcode::Jump); finally_target.break_jumps.push(BreakContinueThruFinally { jump, target_loop_depth, }); self.code.set_stack_depth(dead_code_depth); return Ok(()); } // No finally to go through, jump directly to loop end let jump = self.code.emit_jump(Opcode::Jump); self.loop_stack[target_loop_depth].break_jumps.push(jump); self.code.set_stack_depth(dead_code_depth); Ok(()) } /// Compiles a continue statement. /// /// Continue jumps back to the loop start (the ForIter instruction) which /// advances the iterator and either enters the next iteration or exits the loop. /// If inside a try-finally, the finally block must run first. fn compile_continue(&mut self, position: CodeRange) -> Result<(), CompileError> { if self.loop_stack.is_empty() { return Err(CompileError::new("'continue' not properly in loop", position)); } // `continue` never falls through. Preserve the statement-entry stack depth so // subsequent dead statements in this block are compiled with the right abstract stack. let dead_code_depth = self.code.stack_depth(); let target_loop_depth = self.loop_stack.len() - 1; // If inside except handlers, clean up ALL exception states // Each nested except handler has pushed an exception onto the stack, // so we need to clear/pop each one when continuing for _ in 0..self.except_handler_depth { self.code.emit(Opcode::ClearException); self.code.emit(Opcode::Pop); // Pop the exception value } // Check if we need to go through any finally blocks // We need to run finally if continue crosses the try boundary if let Some(finally_target) = self.finally_targets.last_mut() && target_loop_depth < finally_target.loop_depth_at_entry { // Continuing a loop that's outside (or at the start of) this try-finally, // so finally must run before the continue let jump = self.code.emit_jump(Opcode::Jump); finally_target.continue_jumps.push(BreakContinueThruFinally { jump, target_loop_depth, }); self.code.set_stack_depth(dead_code_depth); return Ok(()); } // No finally to go through, jump directly to loop start let loop_start = self.loop_stack[target_loop_depth].start; self.code.emit_jump_to(Opcode::Jump, loop_start); self.code.set_stack_depth(dead_code_depth); Ok(()) } /// Compiles break or continue after a finally block has run. /// /// Called from `compile_try` after the finally block code. Each control flow /// statement may target a different loop, so we check if there's another finally /// to go through or if we can jump directly to the loop's target. /// /// Note: All items in the list jumped to the same finally block, so they all /// have the same starting point. After finally runs, we need to route each /// to its target loop, potentially through more finally blocks. fn compile_control_flow_after_finally(&mut self, items: &[BreakContinueThruFinally], is_break: bool) { // All items went through the same finally, now we need to dispatch to // potentially different loops. For simplicity, we assume all items in // a single finally target the same loop (the innermost one at the time). // This is always true since break/continue only targets the innermost loop. let Some(first) = items.first() else { return; }; let target_loop_depth = first.target_loop_depth; // Check if there's another finally between us and the target loop if let Some(finally_target) = self.finally_targets.last_mut() && target_loop_depth < finally_target.loop_depth_at_entry { // Need to go through another finally let jump = self.code.emit_jump(Opcode::Jump); let jump_info = BreakContinueThruFinally { jump, target_loop_depth, }; if is_break { finally_target.break_jumps.push(jump_info); } else { // else continue finally_target.continue_jumps.push(jump_info); } return; } // No more finally blocks, jump directly to the loop target if is_break { let jump = self.code.emit_jump(Opcode::Jump); self.loop_stack[target_loop_depth].break_jumps.push(jump); } else { // else continue let loop_start = self.loop_stack[target_loop_depth].start; self.code.emit_jump_to(Opcode::Jump, loop_start); } } // ======================================================================== // Comprehension Compilation // ======================================================================== /// Compiles a list comprehension: `[elt for target in iter if cond...]` /// /// Bytecode structure: /// ```text /// BUILD_LIST 0 ; empty result /// /// GET_ITER /// loop_start: /// FOR_ITER end_loop /// STORE_LOCAL target /// /// [nested generators...] /// /// LIST_APPEND depth /// JUMP loop_start /// end_loop: /// ; result list on stack /// ``` fn compile_list_comp(&mut self, elt: &ExprLoc, generators: &[Comprehension]) -> Result<(), CompileError> { // Build empty list self.code.emit_u16(Opcode::BuildList, 0); // Compile the nested generators, which will eventually append to the list let depth = u8::try_from(generators.len()).expect("too many generators in list comprehension"); self.compile_comprehension_generators(generators, 0, |compiler| { compiler.compile_expr(elt)?; compiler.code.emit_u8(Opcode::ListAppend, depth); Ok(()) })?; Ok(()) } /// Compiles a set comprehension: `{elt for target in iter if cond...}` fn compile_set_comp(&mut self, elt: &ExprLoc, generators: &[Comprehension]) -> Result<(), CompileError> { // Build empty set self.code.emit_u16(Opcode::BuildSet, 0); // Compile the nested generators, which will eventually add to the set let depth = u8::try_from(generators.len()).expect("too many generators in set comprehension"); self.compile_comprehension_generators(generators, 0, |compiler| { compiler.compile_expr(elt)?; compiler.code.emit_u8(Opcode::SetAdd, depth); Ok(()) })?; Ok(()) } /// Compiles a dict comprehension: `{key: value for target in iter if cond...}` fn compile_dict_comp( &mut self, key: &ExprLoc, value: &ExprLoc, generators: &[Comprehension], ) -> Result<(), CompileError> { // Build empty dict self.code.emit_u16(Opcode::BuildDict, 0); // Compile the nested generators, which will eventually set items in the dict let depth = u8::try_from(generators.len()).expect("too many generators in dict comprehension"); self.compile_comprehension_generators(generators, 0, |compiler| { compiler.compile_expr(key)?; compiler.compile_expr(value)?; compiler.code.emit_u8(Opcode::DictSetItem, depth); Ok(()) })?; Ok(()) } /// Recursively compiles comprehension generators (the for/if clauses). /// /// For each generator: /// 1. Compile the iterator expression and get iterator /// 2. Start loop: FOR_ITER to get next value or exit /// 3. Store to target variable /// 4. Compile filter conditions (jump back to loop start if any fails) /// 5. Either recurse for inner generator, or call the body callback /// 6. Jump back to loop start /// /// The `body_fn` callback is called at the innermost level to emit the element/key-value code. fn compile_comprehension_generators( &mut self, generators: &[Comprehension], index: usize, body_fn: impl FnOnce(&mut Self) -> Result<(), CompileError>, ) -> Result<(), CompileError> { let generator = &generators[index]; // Record stack depth before iterator expression // This is the depth we return to when the loop finishes (iterator popped) let loop_exit_depth = self.code.stack_depth(); // Compile iterator expression self.compile_expr(&generator.iter)?; self.code.emit(Opcode::GetIter); // Loop start let loop_start = self.code.current_offset(); // FOR_ITER: advance iterator or jump to end let end_jump = self.code.emit_jump(Opcode::ForIter); // Store current value to target (single variable or tuple unpacking) self.compile_unpack_target(&generator.target); // Compile filter conditions - jump back to loop start if any fails for cond in &generator.ifs { self.compile_expr(cond)?; // If condition is false, skip to next iteration self.code.emit_jump_to(Opcode::JumpIfFalse, loop_start); } // Either recurse for inner generator, or emit body if index + 1 < generators.len() { // Recurse for inner generator self.compile_comprehension_generators(generators, index + 1, body_fn)?; } else { // Innermost level - emit body (the element/key-value expression and append/add/set) body_fn(self)?; } // Jump back to loop start self.code.emit_jump_to(Opcode::Jump, loop_start); // End of loop self.code.patch_jump(end_jump); // Iterator is popped when loop ends normally, so restore depth to before loop self.code.set_stack_depth(loop_exit_depth); Ok(()) } /// Compiles storage of an unpack target - either a single identifier, nested tuple, or starred. /// /// For single identifiers: emits a simple store. /// For nested tuples: emits `UnpackSequence` (or `UnpackEx` with starred) and recursively /// handles each sub-target. fn compile_unpack_target(&mut self, target: &UnpackTarget) { match target { UnpackTarget::Name(ident) => { // Single identifier - just store directly self.compile_store(ident); } UnpackTarget::Starred(ident) => { // Starred target by itself (shouldn't happen at top level normally) // Just store as if it were a name self.compile_store(ident); } UnpackTarget::Tuple { targets, position } => { // Check if there's a starred target let star_idx = targets.iter().position(|t| matches!(t, UnpackTarget::Starred(_))); self.code.set_location(*position, None); if let Some(star_idx) = star_idx { // Has starred target - use UnpackEx let before = u8::try_from(star_idx).expect("too many targets before star"); let after = u8::try_from(targets.len() - star_idx - 1).expect("too many targets after star"); self.code.emit_u8_u8(Opcode::UnpackEx, before, after); } else { // No starred target - use UnpackSequence let count = u8::try_from(targets.len()).expect("too many targets in nested unpack"); self.code.emit_u8(Opcode::UnpackSequence, count); } // After UnpackSequence/UnpackEx, values are on stack with first item on top // Store them in order, recursively handling further nesting for target in targets { self.compile_unpack_target(target); } } } } // ======================================================================== // Statement Helpers // ======================================================================== /// Compiles an assert statement. fn compile_assert(&mut self, test: &ExprLoc, msg: Option<&ExprLoc>) -> Result<(), CompileError> { // Compile test self.compile_expr(test)?; // Jump over raise if truthy let skip_jump = self.code.emit_jump(Opcode::JumpIfTrue); // Raise AssertionError let exc_idx = self .code .add_const(Value::Builtin(Builtins::ExcType(ExcType::AssertionError))); self.code.emit_u16(Opcode::LoadConst, exc_idx); if let Some(msg_expr) = msg { // Call AssertionError(msg) self.compile_expr(msg_expr)?; self.code.emit_u8(Opcode::CallFunction, 1); } else { // Call AssertionError() self.code.emit_u8(Opcode::CallFunction, 0); } self.code.emit(Opcode::Raise); self.code.patch_jump(skip_jump); Ok(()) } /// Compiles f-string parts, returning the number of string parts to concatenate. /// /// Each part is compiled to leave a string value on the stack: /// - `Literal(StringId)`: Push the interned string directly /// - `Interpolation`: Compile expr, emit FormatValue to convert to string fn compile_fstring_parts(&mut self, parts: &[FStringPart]) -> Result { let mut count = 0u16; for part in parts { match part { FStringPart::Literal(string_id) => { // Push the interned string as a constant let const_idx = self.code.add_const(Value::InternString(*string_id)); self.code.emit_u16(Opcode::LoadConst, const_idx); count += 1; } FStringPart::Interpolation { expr, conversion, format_spec, debug_prefix, } => { // If debug prefix present, push it first if let Some(prefix_id) = debug_prefix { let const_idx = self.code.add_const(Value::InternString(*prefix_id)); self.code.emit_u16(Opcode::LoadConst, const_idx); count += 1; } // Compile the expression self.compile_expr(expr)?; // For debug expressions without explicit conversion, Python uses repr by default let effective_conversion = if debug_prefix.is_some() && matches!(conversion, ConversionFlag::None) { ConversionFlag::Repr } else { *conversion }; // Emit FormatValue with appropriate flags let flags = self.compile_format_value(effective_conversion, format_spec.as_ref())?; self.code.emit_u8(Opcode::FormatValue, flags); count += 1; } } } Ok(count) } /// Compiles format value flags and optionally pushes format spec to stack. /// /// Returns the flags byte encoding conversion and format spec presence. /// If a format spec is present, it's pushed to the stack before the value. fn compile_format_value( &mut self, conversion: ConversionFlag, format_spec: Option<&FormatSpec>, ) -> Result { // Conversion flag: bits 0-1 let conv_bits = match conversion { ConversionFlag::None => 0, ConversionFlag::Str => 1, ConversionFlag::Repr => 2, ConversionFlag::Ascii => 3, }; match format_spec { None => Ok(conv_bits), Some(FormatSpec::Static(parsed)) => { // Static format spec - push a marker constant with the parsed spec info // We store this as a special format spec value in the constant pool // The VM will recognize this and use the pre-parsed spec let const_idx = self.add_format_spec_const(parsed); self.code.emit_u16(Opcode::LoadConst, const_idx); Ok(conv_bits | 0x04) // has format spec on stack } Some(FormatSpec::Dynamic(dynamic_parts)) => { // Compile dynamic format spec parts to build a format spec string // Then parse it at runtime let part_count = self.compile_fstring_parts(dynamic_parts)?; if part_count > 1 { self.code.emit_u16(Opcode::BuildFString, part_count); } // Format spec string is now on stack Ok(conv_bits | 0x04) // has format spec on stack } } } /// Adds a format spec to the constant pool as an encoded integer. /// /// Uses the encoding from `fstring::encode_format_spec` and stores it as /// a negative integer to distinguish from regular ints. fn add_format_spec_const(&mut self, spec: &ParsedFormatSpec) -> u16 { let encoded = encode_format_spec(spec); // Use negative to distinguish from regular ints (format spec marker) // We negate and subtract 1 to ensure it's negative and recoverable let encoded_i64 = i64::try_from(encoded).expect("format spec encoding exceeds i64::MAX"); let marker = -(encoded_i64 + 1); self.code.add_const(Value::Int(marker)) } // ======================================================================== // Exception Handling Compilation // ======================================================================== /// Compiles a return statement, handling finally blocks properly. /// /// If we're inside a try-finally block, the return value is kept on the stack /// and we jump to a "finally with return" section that runs finally then returns. /// Otherwise, we emit a direct `ReturnValue`. fn compile_return(&mut self) { if let Some(finally_target) = self.finally_targets.last_mut() { // Inside a try-finally: jump to finally, then return // Return value is already on stack let jump = self.code.emit_jump(Opcode::Jump); finally_target.return_jumps.push(jump); } else { // Normal return self.code.emit(Opcode::ReturnValue); } } /// Compiles a try/except/else/finally block. /// /// The bytecode structure is: /// ```text /// # protected range /// JUMP to_else_or_finally # skip handlers if no exception /// handler_dispatch: # exception pushed by VM /// # for each handler: /// /// /// CLEAR_EXCEPTION /// JUMP to_finally /// reraise: /// RERAISE # no handler matched /// else_block: /// /// finally_block: /// /// end: /// ``` /// /// For finally blocks, exceptions that propagate through the handler dispatch /// (including RERAISE when no handler matches) are caught by a second exception /// entry that ensures finally runs before propagation. /// /// Returns inside try/except/else jump to a "finally with return" path that /// runs the finally code then returns the value. /// /// **Note:** The finally block code is emitted multiple times (once for each /// control flow path: normal, exception, return, break, continue). This is the /// same approach CPython uses - each path has different stack state at entry /// (e.g., return has a value on stack, break has popped the iterator), so we /// can't easily share a single copy. The duplication is intentional. fn compile_try(&mut self, try_block: &Try) -> Result<(), CompileError> { let has_finally = !try_block.finally.is_empty(); let has_handlers = !try_block.handlers.is_empty(); let has_else = !try_block.or_else.is_empty(); // Record stack depth at try entry (for unwinding on exception) let stack_depth = self.code.stack_depth(); // If there's a finally block, track returns/break/continue inside try/handlers/else if has_finally { self.finally_targets.push(FinallyTarget { return_jumps: Vec::new(), break_jumps: Vec::new(), continue_jumps: Vec::new(), loop_depth_at_entry: self.loop_stack.len(), }); } // === Compile try body === let try_start = self.code.current_offset(); self.compile_block(&try_block.body)?; let try_end = self.code.current_offset(); // Jump to else/finally if no exception (skip handlers) let after_try_jump = self.code.emit_jump(Opcode::Jump); // === Handler dispatch starts here === let handler_start = self.code.current_offset(); // VM pushes exception onto stack when entering handler. // Adjust compiler's stack depth tracking to reflect this. self.code.adjust_stack_depth(1); // Track jumps that go to finally (for patching later) let mut finally_jumps: Vec = Vec::new(); if has_handlers { // Compile exception handlers // handler_entry_depth = stack_depth + 1 (exception on stack) let handler_entry_depth = stack_depth + 1; self.compile_exception_handlers(&try_block.handlers, &mut finally_jumps, handler_entry_depth)?; } else { // No handlers - just reraise (this only happens with try-finally) self.code.emit(Opcode::Reraise); } // After handler dispatch, each handler path either: // 1. Matched and popped the exception (via Pop), then jumped to finally // 2. Didn't match and reraised (for last handler) // The handlers' Pop instructions already account for the exception, // so no additional stack depth adjustment is needed here. // Mark end of handler dispatch (for finally exception entry) let handler_dispatch_end = self.code.current_offset(); // === Finally cleanup handler (for exceptions during handler dispatch) === // This catches exceptions from RERAISE (and any other exceptions in handlers) // and ensures finally runs before the exception propagates. let finally_cleanup_start = if has_finally { let cleanup_start = self.code.current_offset(); // Exception value is on stack (pushed by VM), so stack = stack_depth + 1 self.code.set_stack_depth(stack_depth + 1); // We need to pop it, run finally, then reraise // But we can't easily save the exception, so we use a different approach: // The exception is already on the exception_stack from handle_exception, // so we can just pop from operand stack, run finally, then reraise. self.code.emit(Opcode::Pop); // Pop exception from operand stack self.compile_block(&try_block.finally)?; self.code.emit(Opcode::Reraise); // Re-raise from exception_stack Some(cleanup_start) } else { None }; // === Finally with return/break/continue paths === // Pop finally target and get all the jumps that need to go through finally let finally_with_return_start = if has_finally { let finally_target = self.finally_targets.pop().expect("finally_targets should not be empty"); // === Finally with return path === let return_start = if finally_target.return_jumps.is_empty() { None } else { let start = self.code.current_offset(); for jump in finally_target.return_jumps { self.code.patch_jump(jump); } // Return value is on stack, stack = stack_depth + 1 self.code.set_stack_depth(stack_depth + 1); self.compile_block(&try_block.finally)?; self.compile_return(); Some(start) }; // === Finally with break path === // For each break, run finally then either: // - Jump to outer finally's break path (if there's an outer finally between us and the loop) // - Jump directly to the loop's break target if !finally_target.break_jumps.is_empty() { for break_info in &finally_target.break_jumps { self.code.patch_jump(break_info.jump); } // Break already popped the iterator, so stack = stack_depth - 1 // (the iterator was on stack at try entry, break removed it) self.code.set_stack_depth(stack_depth.saturating_sub(1)); self.compile_block(&try_block.finally)?; // After finally, compile the break again (handles nested finally or direct jump) self.compile_control_flow_after_finally(&finally_target.break_jumps, true); } // === Finally with continue path === if !finally_target.continue_jumps.is_empty() { for continue_info in &finally_target.continue_jumps { self.code.patch_jump(continue_info.jump); } // Continue doesn't pop the iterator, stack = stack_depth self.code.set_stack_depth(stack_depth); self.compile_block(&try_block.finally)?; // After finally, compile the continue again (handles nested finally or direct jump) self.compile_control_flow_after_finally(&finally_target.continue_jumps, false); } return_start } else { None }; // === Else block (runs if no exception) === self.code.patch_jump(after_try_jump); // Normal path from try body, stack = stack_depth self.code.set_stack_depth(stack_depth); let else_start = self.code.current_offset(); if has_else { self.compile_block(&try_block.or_else)?; } let else_end = self.code.current_offset(); // === Normal finally path (no exception pending, no return) === // Patch all jumps from handlers to go here for jump in finally_jumps { self.code.patch_jump(jump); } if has_finally { // Stack = stack_depth (no exception, no return value) self.code.set_stack_depth(stack_depth); self.compile_block(&try_block.finally)?; } // === Add exception table entries === // Order matters: entries are searched in order, so inner entries must come first. // Entry 1: Try body -> handler dispatch if has_handlers || has_finally { self.code.add_exception_entry(ExceptionEntry::new( u32::try_from(try_start).expect("bytecode offset exceeds u32"), u32::try_from(try_end).expect("bytecode offset exceeds u32") + 3, // +3 to include the JUMP instruction u32::try_from(handler_start).expect("bytecode offset exceeds u32"), stack_depth, )); } // Entry 2: Handler dispatch -> finally cleanup (only if has_finally) // This ensures finally runs when RERAISE is executed or any exception occurs in handlers if let Some(cleanup_start) = finally_cleanup_start { self.code.add_exception_entry(ExceptionEntry::new( u32::try_from(handler_start).expect("bytecode offset exceeds u32"), u32::try_from(handler_dispatch_end).expect("bytecode offset exceeds u32"), u32::try_from(cleanup_start).expect("bytecode offset exceeds u32"), stack_depth, )); } // Entry 3: Finally with return -> finally cleanup // If an exception occurs while running finally (in the return path), catch it if let (Some(return_start), Some(cleanup_start)) = (finally_with_return_start, finally_cleanup_start) { self.code.add_exception_entry(ExceptionEntry::new( u32::try_from(return_start).expect("bytecode offset exceeds u32"), u32::try_from(else_start).expect("bytecode offset exceeds u32"), // End at else_start (before else block) u32::try_from(cleanup_start).expect("bytecode offset exceeds u32"), stack_depth, )); } // Entry 4: Else block -> finally cleanup (only if has_finally and has_else) // Exceptions in else block should go through finally if has_else && let Some(cleanup_start) = finally_cleanup_start { self.code.add_exception_entry(ExceptionEntry::new( u32::try_from(else_start).expect("bytecode offset exceeds u32"), u32::try_from(else_end).expect("bytecode offset exceeds u32"), u32::try_from(cleanup_start).expect("bytecode offset exceeds u32"), stack_depth, )); } Ok(()) } /// Compiles the exception handlers for a try block. /// /// Each handler checks if the exception matches its type, and if so, /// executes the handler body. If no handler matches, the exception is re-raised. /// /// `handler_entry_depth` is the stack depth when entering handler dispatch /// (i.e., base stack_depth + 1 for the exception value). fn compile_exception_handlers( &mut self, handlers: &[ExceptHandler], finally_jumps: &mut Vec, handler_entry_depth: u16, ) -> Result<(), CompileError> { // Track jumps from non-matching handlers to next handler let mut next_handler_jumps: Vec = Vec::new(); for (i, handler) in handlers.iter().enumerate() { let is_last = i == handlers.len() - 1; // Patch jumps from previous handler's non-match to here // If jumping from a previous handler's no-match, stack has [exc, exc] (duplicate) // We need to pop the duplicate before starting this handler's check if !next_handler_jumps.is_empty() { for jump in next_handler_jumps.drain(..) { self.code.patch_jump(jump); } // Reset stack depth for jump target: [exc, exc] = handler_entry_depth + 1 self.code.set_stack_depth(handler_entry_depth + 1); // Pop the duplicate from previous handler's check self.code.emit(Opcode::Pop); } if let Some(exc_type) = &handler.exc_type { // Typed handler: except ExcType: or except ExcType as e: // Stack: [exception] // Duplicate exception for type check self.code.emit(Opcode::Dup); // Stack: [exception, exception] // Load the exception type to match against self.compile_expr(exc_type)?; // Stack: [exception, exception, exc_type] // Check if exception matches the type // This validates exc_type is a valid exception type and performs the match // CheckExcMatch pops exc_type, peeks exception, pushes bool self.code.emit(Opcode::CheckExcMatch); // Stack: [exception, exception, bool] // Jump to next handler if match returned False // JumpIfFalse pops the bool, leaving [exception, exception] let no_match_jump = self.code.emit_jump(Opcode::JumpIfFalse); if is_last { // Last handler - if no match, reraise // But first we need to handle the exception var cleanup } else { next_handler_jumps.push(no_match_jump); } // After JumpIfFalse (match succeeded), stack is [exception, exception] // Pop the duplicate that was used for the type check self.code.emit(Opcode::Pop); // Stack: [exception] // Exception matched! Bind to variable if needed if let Some(name) = &handler.name { // Stack: [exception] // Store to variable (don't pop - we still need it for current_exception) self.code.emit(Opcode::Dup); self.compile_store(name); } // Track that we're inside an except handler (for break/continue cleanup) self.except_handler_depth += 1; // Compile handler body self.compile_block(&handler.body)?; // Exit except handler context self.except_handler_depth -= 1; // Delete exception variable (Python 3 behavior) if let Some(name) = &handler.name { self.compile_delete(name); } // Clear current_exception self.code.emit(Opcode::ClearException); // Pop the exception from stack self.code.emit(Opcode::Pop); // Jump to finally finally_jumps.push(self.code.emit_jump(Opcode::Jump)); // If this was last handler and no match, we need to reraise if is_last { self.code.patch_jump(no_match_jump); // Coming from JumpIfFalse no-match path, stack has [exception, exception] // Reset stack depth for jump target self.code.set_stack_depth(handler_entry_depth + 1); // We need to pop the duplicate before reraising self.code.emit(Opcode::Pop); self.code.emit(Opcode::Reraise); } } else { // Bare except: catches everything // Stack: [exception] // Bind to variable if needed if let Some(name) = &handler.name { self.code.emit(Opcode::Dup); self.compile_store(name); } // Track that we're inside an except handler (for break/continue cleanup) self.except_handler_depth += 1; // Compile handler body self.compile_block(&handler.body)?; // Exit except handler context self.except_handler_depth -= 1; // Delete exception variable if let Some(name) = &handler.name { self.compile_delete(name); } // Clear current_exception self.code.emit(Opcode::ClearException); // Pop the exception from stack self.code.emit(Opcode::Pop); // Jump to finally finally_jumps.push(self.code.emit_jump(Opcode::Jump)); } } Ok(()) } /// Compiles deletion of a variable. /// /// At module level, `Local` and `LocalUnassigned` scopes emit `DeleteGlobal` /// because module-level locals live in the globals array. fn compile_delete(&mut self, target: &Identifier) { let slot = u16::try_from(target.namespace_id().index()).expect("local slot exceeds u16"); match target.scope { NameScope::Local | NameScope::LocalUnassigned => { if self.is_module_scope { self.code.emit_u16(Opcode::DeleteGlobal, slot); } else if let Ok(s) = u8::try_from(slot) { self.code.emit_u8(Opcode::DeleteLocal, s); } else { // Wide variant not implemented yet todo!("DeleteLocalW for slot > 255"); } } NameScope::Global => { self.code.emit_u16(Opcode::DeleteGlobal, slot); } NameScope::Cell => { // Delete cell not commonly needed // For now, just store None self.code.emit(Opcode::LoadNone); self.compile_store(target); } } } } /// Error that can occur during bytecode compilation. /// /// These are typically limit violations that can't be represented in the bytecode /// format (e.g., too many arguments, too many local variables), or import errors /// detected at compile time. #[derive(Debug, Clone)] pub struct CompileError { /// Error message describing the issue. message: Cow<'static, str>, /// Source location where the error occurred. position: CodeRange, /// Exception type to use (defaults to SyntaxError). exc_type: ExcType, } impl CompileError { /// Creates a new compile error with the given message and position. /// /// Defaults to `SyntaxError` exception type. fn new(message: impl Into>, position: CodeRange) -> Self { Self { message: message.into(), position, exc_type: ExcType::SyntaxError, } } /// Converts this compile error into a Python exception. /// /// Uses the stored exception type (SyntaxError or ModuleNotFoundError). /// - SyntaxError: hides the `, in ` part (CPython's format) /// - ModuleNotFoundError: hides caret markers (CPython doesn't show them) pub fn into_python_exc(self, filename: &str, source: &str) -> MontyException { let mut frame = if self.exc_type == ExcType::SyntaxError { // SyntaxError uses different format: no `, in ` StackFrame::from_position_syntax_error(self.position, filename, source) } else { StackFrame::from_position(self.position, filename, source) }; // CPython doesn't show carets for module not found errors if self.exc_type == ExcType::ModuleNotFoundError { frame.hide_caret = true; } MontyException::new_full(self.exc_type, Some(self.message.into_owned()), vec![frame]) } } // ============================================================================ // Operator Mapping Functions // ============================================================================ /// Maps a binary `Operator` to its corresponding `Opcode`. fn operator_to_opcode(op: &Operator) -> Opcode { match op { Operator::Add => Opcode::BinaryAdd, Operator::Sub => Opcode::BinarySub, Operator::Mult => Opcode::BinaryMul, Operator::Div => Opcode::BinaryDiv, Operator::FloorDiv => Opcode::BinaryFloorDiv, Operator::Mod => Opcode::BinaryMod, Operator::Pow => Opcode::BinaryPow, Operator::MatMult => Opcode::BinaryMatMul, Operator::LShift => Opcode::BinaryLShift, Operator::RShift => Opcode::BinaryRShift, Operator::BitOr => Opcode::BinaryOr, Operator::BitXor => Opcode::BinaryXor, Operator::BitAnd => Opcode::BinaryAnd, // And/Or are handled separately for short-circuit evaluation Operator::And | Operator::Or => { unreachable!("And/Or operators handled in compile_binary_op") } } } /// Maps an `Operator` to its in-place (augmented assignment) `Opcode`. /// /// Returns `None` for operators that don't have an in-place opcode (currently `MatMult`, /// since matrix multiplication is not yet supported). Returns `Some(opcode)` for all /// other valid augmented assignment operators. /// /// # Panics /// /// Panics if called with `And` or `Or` operators, which cannot be used in augmented /// assignments (this would be a parser bug). fn operator_to_inplace_opcode(op: &Operator) -> Option { match op { Operator::Add => Some(Opcode::InplaceAdd), Operator::Sub => Some(Opcode::InplaceSub), Operator::Mult => Some(Opcode::InplaceMul), Operator::Div => Some(Opcode::InplaceDiv), Operator::FloorDiv => Some(Opcode::InplaceFloorDiv), Operator::Mod => Some(Opcode::InplaceMod), Operator::Pow => Some(Opcode::InplacePow), Operator::BitAnd => Some(Opcode::InplaceAnd), Operator::BitOr => Some(Opcode::InplaceOr), Operator::BitXor => Some(Opcode::InplaceXor), Operator::LShift => Some(Opcode::InplaceLShift), Operator::RShift => Some(Opcode::InplaceRShift), Operator::MatMult => None, Operator::And | Operator::Or => { unreachable!("And/Or operators cannot be used in augmented assignment") } } } /// Maps a `CmpOperator` to its corresponding `Opcode`. fn cmp_operator_to_opcode(op: &CmpOperator) -> Opcode { match op { CmpOperator::Eq => Opcode::CompareEq, CmpOperator::NotEq => Opcode::CompareNe, CmpOperator::Lt => Opcode::CompareLt, CmpOperator::LtE => Opcode::CompareLe, CmpOperator::Gt => Opcode::CompareGt, CmpOperator::GtE => Opcode::CompareGe, CmpOperator::Is => Opcode::CompareIs, CmpOperator::IsNot => Opcode::CompareIsNot, CmpOperator::In => Opcode::CompareIn, CmpOperator::NotIn => Opcode::CompareNotIn, // ModEq is handled specially at the call site (needs constant operand) CmpOperator::ModEq(_) => unreachable!("ModEq handled at call site"), } } /// Returns `true` if any item in the sequence is a PEP 448 unpack (`*expr`). /// /// Used to choose between the fast single-`Build*(N)` path and the generalized /// incremental `Build*(0)` + `ListAppend`/`ListExtend` (or `SetAdd`/`SetExtend`) path. /// Only the generalized path is needed when at least one `Unpack` variant is present. fn has_unpack_seq(items: &[SequenceItem]) -> bool { items.iter().any(|i| matches!(i, SequenceItem::Unpack(_))) } /// Returns `true` if any item in the dict literal is a PEP 448 `**expr` unpack. /// /// Used to choose between the fast single-`BuildDict(N)` path and the generalized /// incremental `BuildDict(0)` + `DictSetItem`/`DictUpdate` path. fn has_unpack_dict(items: &[DictItem]) -> bool { items.iter().any(|i| matches!(i, DictItem::Unpack(_))) } ================================================ FILE: crates/monty/src/bytecode/mod.rs ================================================ //! Bytecode VM module for Monty. //! //! This module contains the bytecode representation, compiler, and virtual machine //! for executing Python code. The bytecode VM replaces the tree-walking interpreter //! with a stack-based execution model. //! //! # Module Structure //! //! - `op` - Opcode enum definitions //! - `code` - Code object containing bytecode and metadata //! - `builder` - CodeBuilder for emitting bytecode during compilation //! - `compiler` - AST to bytecode compiler //! - `vm` - Virtual machine for bytecode execution mod builder; mod code; mod compiler; mod op; mod vm; pub use code::Code; pub use compiler::Compiler; pub(crate) use vm::CallResult; pub use vm::{FrameExit, VM, VMSnapshot}; ================================================ FILE: crates/monty/src/bytecode/op.rs ================================================ //! Opcode definitions for the bytecode VM. //! //! Bytecode is stored as raw `Vec` for cache efficiency. The `Opcode` enum is a pure //! discriminant with no data - operands are fetched separately from the byte stream. //! //! # Operand Encoding //! //! - No suffix, 0 bytes: `BinaryAdd`, `Pop`, `LoadNone` //! - No suffix, 1 byte (u8/i8): `LoadLocal`, `StoreLocal`, `LoadSmallInt` //! - `W` suffix, 2 bytes (u16/i16): `LoadLocalW`, `Jump`, `LoadConst` //! - Compound (multiple operands): `CallFunctionKw` (u8 + u8), `MakeClosure` (u16 + u8) use strum::FromRepr; /// Opcode discriminant - just identifies the instruction type. /// /// Operands (if any) follow in the bytecode stream and are fetched separately. /// With `#[repr(u8)]`, each opcode is exactly 1 byte. Uses `strum::FromRepr` for /// efficient byte-to-opcode conversion (bounds check + transmute). /// /// Opcode bytes are part of Monty's serialized `Code` format, so existing values /// must remain stable across releases. Append new opcodes to the end of the enum /// instead of inserting them into the middle. #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] pub enum Opcode { // === Stack Operations (no operand) === /// Discard top of stack. Pop, /// Duplicate top of stack. Dup, /// Swap top two: [a, b] -> [b, a]. Rot2, /// Rotate top three: [a, b, c] -> [c, a, b]. Rot3, // === Constants & Literals === /// Push constant from pool. Operand: u16 const_id. LoadConst, /// Push None. LoadNone, /// Push True. LoadTrue, /// Push False. LoadFalse, /// Push small integer (-128 to 127). Operand: i8. LoadSmallInt, // === Variables === // Specialized no-operand versions for common slots (hot path) /// Push local slot 0 (often 'self'). LoadLocal0, /// Push local slot 1. LoadLocal1, /// Push local slot 2. LoadLocal2, /// Push local slot 3. LoadLocal3, // General versions with operand /// Push local variable. Operand: u8 slot. LoadLocal, /// Push local (wide, slot > 255). Operand: u16 slot. LoadLocalW, /// Pop and store to local. Operand: u8 slot. StoreLocal, /// Store local (wide). Operand: u16 slot. StoreLocalW, /// Push from global namespace. Operand: u16 slot. LoadGlobal, /// Store to global. Operand: u16 slot. StoreGlobal, /// Load from closure cell. Operand: u16 slot. LoadCell, /// Store to closure cell. Operand: u16 slot. StoreCell, /// Delete local variable. Operand: u8 slot. DeleteLocal, /// Load local in call context: pushes `ExtFunction(name_id)` for undefined names /// instead of yielding `NameLookup`. Operands: u8 slot, u16 name_id. /// /// Used when compiling function calls like `foo()` where `foo` is `LocalUnassigned`. /// If the variable is defined, behaves identically to `LoadLocal`. /// If undefined, pushes an `ExtFunction` value so execution continues to `CallFunction`, /// which naturally yields `FunctionCall` instead of `NameLookup`. /// The name_id is encoded in the operand to avoid namespace lookup ambiguity. LoadLocalCallable, /// Wide variant of `LoadLocalCallable`. Operands: u16 slot, u16 name_id. LoadLocalCallableW, /// Load global in call context: pushes `ExtFunction(name_id)` for undefined names /// instead of yielding `NameLookup`. Operands: u16 slot, u16 name_id. /// /// Used when compiling function calls like `foo()` where `foo` is a global. /// If the variable is defined, behaves identically to `LoadGlobal`. /// If undefined, pushes an `ExtFunction` value so execution continues to `CallFunction`, /// which naturally yields `FunctionCall` instead of `NameLookup`. /// The name_id is encoded in the operand because global and local slot indices /// belong to different namespaces — using the current frame's local_names would /// return the wrong name when called from inside a function. LoadGlobalCallable, // === Binary Operations (no operand) === /// Add: a + b. BinaryAdd, /// Subtract: a - b. BinarySub, /// Multiply: a * b. BinaryMul, /// Divide: a / b. BinaryDiv, /// Floor divide: a // b. BinaryFloorDiv, /// Modulo: a % b. BinaryMod, /// Power: a ** b. BinaryPow, /// Bitwise AND: a & b. BinaryAnd, /// Bitwise OR: a | b. BinaryOr, /// Bitwise XOR: a ^ b. BinaryXor, /// Left shift: a << b. BinaryLShift, /// Right shift: a >> b. BinaryRShift, /// Matrix multiply: a @ b. BinaryMatMul, // === Comparison Operations (no operand) === /// Equal: a == b. CompareEq, /// Not equal: a != b. CompareNe, /// Less than: a < b. CompareLt, /// Less than or equal: a <= b. CompareLe, /// Greater than: a > b. CompareGt, /// Greater than or equal: a >= b. CompareGe, /// Identity: a is b. CompareIs, /// Not identity: a is not b. CompareIsNot, /// Membership: a in b. CompareIn, /// Not membership: a not in b. CompareNotIn, /// Modulo equality: a % b == k (operand: u16 constant index for k). /// /// This is an optimization for patterns like `x % 3 == 0` which are common /// in Python code. Pops b then a, computes `a % b`, then compares with k. CompareModEq, // === Unary Operations (no operand) === /// Logical not: not a. UnaryNot, /// Negation: -a. UnaryNeg, /// Positive: +a. UnaryPos, /// Bitwise invert: ~a. UnaryInvert, // === In-place Operations (no operand) === /// In-place add: a += b. InplaceAdd, /// In-place subtract: a -= b. InplaceSub, /// In-place multiply: a *= b. InplaceMul, /// In-place divide: a /= b. InplaceDiv, /// In-place floor divide: a //= b. InplaceFloorDiv, /// In-place modulo: a %= b. InplaceMod, /// In-place power: a **= b. InplacePow, /// In-place bitwise AND: a &= b. InplaceAnd, /// In-place bitwise OR: a |= b. InplaceOr, /// In-place bitwise XOR: a ^= b. InplaceXor, /// In-place left shift: a <<= b. InplaceLShift, /// In-place right shift: a >>= b. InplaceRShift, // === Collection Building === /// Pop n items, build list. Operand: u16 count. BuildList, /// Pop n items, build tuple. Operand: u16 count. BuildTuple, /// Pop 2n items (k/v pairs), build dict. Operand: u16 count. BuildDict, /// Pop n items, build set. Operand: u16 count. BuildSet, /// Format a value for f-string interpolation. Operand: u8 flags. /// /// Flags encoding: /// - bits 0-1: conversion (0=none, 1=str, 2=repr, 3=ascii) /// - bit 2: has format spec on stack (pop fmt_spec first, then value) /// - bit 3: has static format spec (operand includes u16 const_id after flags) /// /// Pops the value (and optionally format spec), pushes the formatted string. FormatValue, /// Pop n parts, concatenate for f-string. Operand: u16 count. BuildFString, /// Build a slice object from stack values. No operand. /// /// Pops 3 values from stack: step, stop, start (TOS order). /// Each value can be None (for default) or an integer. /// Creates a `HeapData::Slice` and pushes a `Value::Ref` to it. BuildSlice, /// Pop iterable, pop list, extend list with iterable items. /// /// Used for `*args` unpacking: builds a list of positional args, /// then extends it with unpacked iterables. ListExtend, /// Pop TOS (list), push tuple containing the same elements. /// /// Used after building the args list to create the final args tuple /// for `CallFunctionEx`. ListToTuple, /// Pop mapping, pop dict, update dict with mapping. Operand: u16 func_name_id. /// /// Used for `**kwargs` unpacking. The func_name_id is used for error messages /// when the mapping contains non-string keys. DictMerge, // === Comprehension Building === /// Append TOS to list for comprehension. Operand: u8 depth (number of iterators). /// /// Stack: [..., list, iter1, ..., iterN, value] -> [..., list, iter1, ..., iterN] /// Pops value (TOS), appends to list at stack position (len - 2 - depth). /// Depth equals the number of nested iterators (generators) in the comprehension. ListAppend, /// Add TOS to set for comprehension. Operand: u8 depth (number of iterators). /// /// Stack: [..., set, iter1, ..., iterN, value] -> [..., set, iter1, ..., iterN] /// Pops value (TOS), adds to set at stack position (len - 2 - depth). /// May raise TypeError if value is unhashable. SetAdd, /// Set dict[key] = value for comprehension. Operand: u8 depth (number of iterators). /// /// Stack: [..., dict, iter1, ..., iterN, key, value] -> [..., dict, iter1, ..., iterN] /// Pops value (TOS) and key (TOS-1), sets dict[key] = value. /// Dict is at stack position (len - 3 - depth). /// May raise TypeError if key is unhashable. DictSetItem, // === Subscript & Attribute === /// a[b]: pop index, pop obj, push result. BinarySubscr, /// a[b] = c: pop value, pop index, pop obj. StoreSubscr, // NOTE: DeleteSubscr removed - `del` statement not supported by parser /// Pop obj, push obj.attr. Operand: u16 name_id. LoadAttr, /// Pop module, push module.attr for `from ... import`. Operand: u16 name_id. /// /// Like `LoadAttr` but raises `ImportError` instead of `AttributeError` /// when the attribute is not found. Used for `from module import name`. LoadAttrImport, /// Pop value, pop obj, set obj.attr. Operand: u16 name_id. StoreAttr, // NOTE: DeleteAttr removed - `del` statement not supported by parser // === Function Calls === /// Call TOS with n positional args. Operand: u8 arg_count. CallFunction, /// Call a builtin function directly. Operands: u8 builtin_id, u8 arg_count. /// /// The builtin_id is the discriminant of `BuiltinsFunctions` (via `FromRepr`). /// This is an optimization over `LoadConst + CallFunction` that avoids: /// - Constant pool lookup /// - Pushing/popping the callable on the stack /// - Runtime type dispatch in call_function CallBuiltinFunction, /// Call a builtin type constructor directly. Operands: u8 type_id, u8 arg_count. /// /// The type_id is the discriminant of `BuiltinsTypes` (via `FromRepr`). /// This is an optimization for type constructors like `list()`, `int()`, `str()`. CallBuiltinType, /// Call with positional and keyword args. /// /// Operands: u8 pos_count, u8 kw_count, then kw_count u16 name indices. /// /// Stack: [callable, pos_args..., kw_values...] /// After the two count bytes, there are kw_count little-endian u16 values, /// each being a StringId index for the corresponding keyword argument name. CallFunctionKw, /// Call attribute on object. Operands: u16 name_id, u8 arg_count. /// /// This is used for both method calls (`obj.method(args)`) and module /// attribute calls (`module.func(args)`). The attribute is looked up /// on the object and called with the given arguments. CallAttr, /// Call attribute with keyword args. Operands: u16 name_id, u8 pos_count, u8 kw_count, then kw_count u16 name indices. /// /// Stack: [obj, pos_args..., kw_values...] /// After the operands, there are kw_count little-endian u16 values, /// each being a StringId index for the corresponding keyword argument name. CallAttrKw, /// Call a defined function with *args tuple and **kwargs dict. Operand: u8 flags. /// /// Flags: /// - bit 0: has kwargs dict on stack /// /// Stack layout (bottom to top): /// - callable /// - args tuple /// - kwargs dict (if flag bit 0 set) /// /// Used for calls with `*args` and/or `**kwargs` unpacking. CallFunctionExtended, /// Call attribute with *args tuple and **kwargs dict. Operands: u16 name_id, u8 flags. /// /// Flags: /// - bit 0: has kwargs dict on stack /// /// Stack layout (bottom to top): /// - receiver object /// - args tuple /// - kwargs dict (if flag bit 0 set) /// /// Used for method calls with `*args` and/or `**kwargs` unpacking. CallAttrExtended, // === Control Flow === /// Unconditional relative jump. Operand: i16 offset. Jump, /// Jump if TOS truthy, always pop. Operand: i16 offset. JumpIfTrue, /// Jump if TOS falsy, always pop. Operand: i16 offset. JumpIfFalse, /// Jump if TOS truthy (keep), else pop. Operand: i16 offset. JumpIfTrueOrPop, /// Jump if TOS falsy (keep), else pop. Operand: i16 offset. JumpIfFalseOrPop, // === Iteration === /// Convert TOS to iterator. GetIter, /// Advance iterator or jump to end. Operand: i16 offset. ForIter, // === Function Definition === /// Create function object. Operand: u16 func_id. MakeFunction, /// Create closure. Operands: u16 func_id, u8 cell_count. MakeClosure, // === Exception Handling === // Note: No SetupTry/PopExceptHandler - we use static exception_table /// Raise TOS as exception. Raise, // NOTE: RaiseFrom removed - `raise ... from ...` not supported by parser /// Re-raise current exception (bare `raise`). Reraise, /// Clear current_exception when exiting except block. ClearException, /// Check if exception matches type for except clause. /// /// Stack: [..., exception, exc_type] -> [..., exception, bool] /// Validates that exc_type is a valid exception type (ExcType or tuple of ExcTypes). /// If invalid, raises TypeError. If valid, pushes True if exception matches, else False. CheckExcMatch, // === Return === /// Return TOS from function. ReturnValue, // === Async/Await === /// Await the TOS value. /// /// Handles `ExternalFuture`, `Coroutine`, and `GatherFuture` awaitables. /// For `ExternalFuture`: if resolved, pushes result; if pending, blocks task. /// For `Coroutine`: validates state is `New`, then starts execution. /// For `GatherFuture`: spawns all coroutines as tasks and blocks until completion. /// /// Raises `TypeError` if TOS is not awaitable. /// Raises `RuntimeError` if coroutine/future has already been awaited. Await, // === Unpacking === /// Unpack TOS into n values. Operand: u8 count. UnpackSequence, /// Unpack with *rest. Operands: u8 before, u8 after. UnpackEx, // === Special === /// No operation (for patching/alignment). Nop, // === Module Operations === /// Load a built-in module onto the stack. Operand: u8 module_id. /// /// The module_id maps to `BuiltinModule` (0=sys, 1=typing). /// Creates the module on the heap and pushes a `Value::Ref` to it. LoadModule, /// Raises `ModuleNotFoundError` at runtime. Operand: u16 constant index for module name. /// /// This opcode is emitted when the compiler encounters an import of an unknown module. /// Instead of failing at compile time, the error is deferred to runtime so that /// imports inside `if TYPE_CHECKING:` blocks or other non-executed code paths /// don't cause errors. /// /// The operand is an index into the constant pool where the module name string is stored. RaiseImportError, /// Duplicate the top two stack values, preserving order: `[a, b] -> [a, b, a, b]`. /// /// Appended at the end to preserve the serialized byte values of all older opcodes. Dup2, /// Delete global variable (set to Undefined). Operand: u16 slot. /// /// Appended at the end to preserve the serialized byte values of all older opcodes. DeleteGlobal, /// Pop a mapping, silently merge into the dict at `depth`. Operand: u8 depth. /// /// Used for `**expr` unpack inside dict literals, where later keys overwrite earlier ones /// (unlike `DictMerge` which raises `TypeError` on duplicate keys). /// /// Stack: [..., dict, iter1, ..., iterN, mapping] -> [..., dict, iter1, ..., iterN] /// Pops mapping (TOS), merges into dict at stack position `len - 2 - depth`. /// Raises `TypeError` if `mapping` is not a dict. DictUpdate, /// Pop an iterable, add all items to set at `depth`. Operand: u8 depth. /// /// Used for `*expr` unpack inside set literals (e.g., `{*a, 1}`). /// Follows the same depth convention as `ListAppend`/`SetAdd`. /// /// Stack: [..., set, iter1, ..., iterN, iterable] -> [..., set, iter1, ..., iterN] /// Pops iterable (TOS), adds each item to set at stack position `len - 2 - depth`. /// Raises `TypeError` if iterable is not iterable. SetExtend, } impl TryFrom for Opcode { type Error = InvalidOpcodeError; fn try_from(byte: u8) -> Result { Self::from_repr(byte).ok_or(InvalidOpcodeError(byte)) } } impl Opcode { /// Returns the stack effect of this opcode (positive = push, negative = pop). /// /// Some opcodes have variable effects (e.g., `BuildList` depends on its operand). /// For those, this returns `None` and the caller must compute the effect. /// /// For opcodes that have known, fixed stack effects, returns `Some(i16)`. #[must_use] pub const fn stack_effect(self) -> Option { #![expect(clippy::allow_attributes, reason = "expect seems broken with enum_glob_use")] #[allow(clippy::enum_glob_use, reason = "simplifies churn")] use Opcode::*; Some(match self { // Stack operations Pop => -1, Dup => 1, Dup2 => 2, Rot2 | Rot3 => 0, // reorder, no net change // Constants & Literals (all push 1) LoadConst | LoadNone | LoadTrue | LoadFalse | LoadSmallInt => 1, // Variables - loads push, stores pop LoadLocal0 | LoadLocal1 | LoadLocal2 | LoadLocal3 => 1, LoadLocal | LoadLocalW | LoadLocalCallable | LoadLocalCallableW | LoadGlobal | LoadGlobalCallable | LoadCell => 1, StoreLocal | StoreLocalW | StoreGlobal | StoreCell => -1, DeleteLocal | DeleteGlobal => 0, // doesn't affect stack // Binary operations: pop 2, push 1 = -1 BinaryAdd | BinarySub | BinaryMul | BinaryDiv | BinaryFloorDiv | BinaryMod | BinaryPow | BinaryAnd | BinaryOr | BinaryXor | BinaryLShift | BinaryRShift | BinaryMatMul => -1, // Comparisons: pop 2, push 1 = -1 CompareEq | CompareNe | CompareLt | CompareLe | CompareGt | CompareGe | CompareIs | CompareIsNot | CompareIn | CompareNotIn | CompareModEq => -1, // Unary operations: pop 1, push 1 = 0 UnaryNot | UnaryNeg | UnaryPos | UnaryInvert => 0, // In-place operations: pop 1 (rhs), leave target on stack = -1 InplaceAdd | InplaceSub | InplaceMul | InplaceDiv | InplaceFloorDiv | InplaceMod | InplacePow | InplaceAnd | InplaceOr | InplaceXor | InplaceLShift | InplaceRShift => -1, // Collection building - depends on operand, return None BuildList | BuildTuple | BuildDict | BuildSet | BuildFString => return None, // FormatValue: pops 1 value (+ optional fmt_spec), pushes 1. Variable. FormatValue => return None, // BuildSlice: pop 3, push 1 = -2 BuildSlice => -2, // ListExtend: pop 2 (iterable + list), push 1 (list) = -1 ListExtend => -1, // ListToTuple: pop 1, push 1 = 0 ListToTuple => 0, // DictMerge: pop 2, push 1 = -1 DictMerge => -1, // Comprehension building - pops value, no push (stores in collection below) ListAppend | SetAdd => -1, DictSetItem => -2, // pops key and value // Subscript & Attribute BinarySubscr => -1, // pop 2, push 1 StoreSubscr => -3, // pop 3, push 0 LoadAttr | LoadAttrImport => 0, // pop 1, push 1 StoreAttr => -2, // pop 2, push 0 // Function calls - depend on arg count CallFunction | CallBuiltinFunction | CallBuiltinType | CallFunctionKw | CallAttr | CallAttrKw | CallFunctionExtended | CallAttrExtended => return None, // Control flow - no stack effect (jumps don't push/pop) Jump => 0, JumpIfTrue | JumpIfFalse => -1, // always pop condition JumpIfTrueOrPop | JumpIfFalseOrPop => return None, // variable (0 or -1) // Iteration GetIter => 0, // pop iterable, push iterator ForIter => return None, // pushes value or jumps (variable) // Async/await Await => 0, // pop awaitable, push result // Function definition - push 1 (the function/closure) MakeFunction | MakeClosure => 1, // Exception handling Raise => -1, // pop exception Reraise => 0, // no stack change (reads from exception_stack) ClearException => 0, // clears exception_stack, no operand stack change CheckExcMatch => 0, // pop exc_type, push bool (net 0, but exc stays) // Return ReturnValue => -1, // Unpacking - depends on operand UnpackSequence | UnpackEx => return None, // Dict/set literal extensions (PEP 448): // DictUpdate: pop mapping, silently merge into dict below = -1 DictUpdate => -1, // SetExtend: pop iterable, add all items to set below = -1 SetExtend => -1, // Special Nop => 0, // Module LoadModule => 1, // push module RaiseImportError => 0, // raises exception, no stack change before that }) } } /// Error returned when attempting to convert an invalid byte to an Opcode. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct InvalidOpcodeError(pub u8); impl std::fmt::Display for InvalidOpcodeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "invalid opcode byte: {}", self.0) } } impl std::error::Error for InvalidOpcodeError {} #[cfg(test)] mod tests { use super::*; #[test] fn test_opcode_roundtrip() { // Verify that all opcodes from 0 to DeleteGlobal (last opcode) can be converted to u8 and back. for byte in 0..=Opcode::DeleteGlobal as u8 { let opcode = Opcode::try_from(byte).unwrap(); assert_eq!(opcode as u8, byte, "opcode {opcode:?} has wrong discriminant"); } } #[test] fn test_serialized_opcode_values_remain_stable() { // `RaiseImportError` was the tail opcode before `Dup2` was introduced. Keeping it at // byte 110 preserves compatibility for serialized runners and snapshots compiled by // older versions. assert_eq!(Opcode::RaiseImportError as u8, 110); assert_eq!(Opcode::Dup2 as u8, 111); assert_eq!(Opcode::DeleteGlobal as u8, 112); assert_eq!(Opcode::DictUpdate as u8, 113); assert_eq!(Opcode::SetExtend as u8, 114); } #[test] fn test_invalid_opcode() { // Byte just after the last valid opcode should fail let result = Opcode::try_from(Opcode::SetExtend as u8 + 1); assert!(result.is_err()); // 255 should also fail let result = Opcode::try_from(255u8); assert!(result.is_err()); } #[test] fn test_opcode_size() { // Verify opcode is 1 byte assert_eq!(std::mem::size_of::(), 1); } } ================================================ FILE: crates/monty/src/bytecode/vm/async_exec.rs ================================================ //! Async execution support for the VM. //! //! This module contains all async-related methods for the VM including: //! - Awaiting coroutines, external futures, and gather futures //! - Task scheduling and context switching //! - Task completion and failure handling //! - External future resolution use super::{AwaitResult, CallFrame, VM}; use crate::{ InvalidInputError, MontyObject, args::ArgValues, asyncio::{CallId, CoroutineState, GatherItem, TaskId}, bytecode::vm::scheduler::{PendingCallData, SerializedTaskFrame, TaskState}, defer_drop, exception_private::{ExcType, RunError, SimpleException}, heap::{HeapData, HeapGuard, HeapId}, heap_data::HeapDataMut, intern::FunctionId, resource::ResourceTracker, types::{List, PyTrait}, value::Value, }; impl VM<'_, '_, T> { /// Executes the Await opcode. /// /// Pops the awaitable from the stack and handles it based on its type: /// - `Coroutine`: validates state is New, then pushes a frame to execute it /// - `ExternalFuture`: blocks until resolved or yields if not ready /// - `GatherFuture`: spawns tasks for coroutines and tracks external futures /// /// Returns `AwaitResult` indicating what action the VM should take. pub(super) fn exec_get_awaitable(&mut self) -> Result { let awaitable = self.pop(); let mut awaitable_guard = HeapGuard::new(awaitable, self); let (awaitable, this) = awaitable_guard.as_parts(); match awaitable { Value::Ref(heap_id) => { let heap_id = *heap_id; let heap_data_type = match this.heap.get(heap_id) { HeapData::Coroutine(_) => Some(AwaitableType::Coroutine), HeapData::GatherFuture(_) => Some(AwaitableType::GatherFuture), _ => None, }; match heap_data_type { Some(AwaitableType::Coroutine) => { let (awaitable, this) = awaitable_guard.into_parts(); this.await_coroutine(heap_id, awaitable) } Some(AwaitableType::GatherFuture) => { let (awaitable, this) = awaitable_guard.into_parts(); this.await_gather_future(heap_id, awaitable) } None => Err(ExcType::object_not_awaitable(awaitable.py_type(this.heap))), } } &Value::ExternalFuture(call_id) => this.await_external_future(call_id), _ => Err(ExcType::object_not_awaitable(awaitable.py_type(this.heap))), } } /// Awaits a coroutine by pushing a frame to execute it. /// /// Validates the coroutine is in `New` state, extracts its captured namespace /// and cells, marks it as `Running`, and pushes a frame to execute the coroutine body. fn await_coroutine(&mut self, heap_id: HeapId, awaitable: Value) -> Result { let this = self; defer_drop!(awaitable, this); let HeapData::Coroutine(coro) = this.heap.get(heap_id) else { unreachable!("await_coroutine called with non-coroutine heap_id") }; // Check if coroutine can be awaited (must be New) if coro.state != CoroutineState::New { return Err( SimpleException::new_msg(ExcType::RuntimeError, "cannot reuse already awaited coroutine").into(), ); } // Extract coroutine data before mutating let func_id = coro.func_id; let namespace_values: Vec = coro.namespace.iter().map(|v| v.clone_with_heap(this.heap)).collect(); // Mark coroutine as Running if let HeapDataMut::Coroutine(coro_mut) = this.heap.get_mut(heap_id) { coro_mut.state = CoroutineState::Running; } // Create namespace and push frame (guard drops awaitable at scope exit) this.start_coroutine_frame(func_id, namespace_values)?; Ok(AwaitResult::FramePushed) } /// Awaits a gather future by spawning tasks for coroutines and tracking external futures. /// /// For each item in the gather: /// - Coroutines are spawned as tasks /// - External futures are checked for resolution or registered for tracking /// /// If all items are already resolved, returns immediately. Otherwise blocks /// the current task and switches to a ready task or yields to the host. fn await_gather_future(&mut self, heap_id: HeapId, awaitable: Value) -> Result { let this = self; let mut awaitable_guard = HeapGuard::new(awaitable, this); let (_, this) = awaitable_guard.as_parts(); let HeapData::GatherFuture(gather) = this.heap.get(heap_id) else { unreachable!("await_gather_future called with non-gather heap_id") }; // Check if already being waited on (double-await) if gather.waiter.is_some() { return Err(SimpleException::new_msg(ExcType::RuntimeError, "cannot reuse already awaited gather").into()); } // If no items to gather, return empty list immediately if gather.item_count() == 0 { let list_id = this.heap.allocate(HeapData::List(List::new(vec![])))?; return Ok(AwaitResult::ValueReady(Value::Ref(list_id))); } // Set waiter and clone items to process // Note: We clone instead of mem::take because GatherItem::Coroutine holds HeapIds // that need to stay in gather.items for proper ref counting when the gather is dropped. let current_task = this.scheduler.current_task_id(); let items: Vec = if let HeapDataMut::GatherFuture(gather_mut) = this.heap.get_mut(heap_id) { gather_mut.waiter = current_task; gather_mut.items.clone() } else { vec![] }; // Process each item let mut task_ids = Vec::new(); let mut pending_calls = Vec::new(); for (idx, item) in items.iter().enumerate() { match item { GatherItem::Coroutine(coro_id) => { // Spawn as task with the item index as result index let task_id = this.scheduler.spawn(*coro_id, Some(heap_id), Some(idx)); task_ids.push(task_id); } GatherItem::ExternalFuture(call_id) => { // Check if already resolved this.scheduler.mark_consumed(*call_id); if let Some(value) = this.scheduler.take_resolved(*call_id) { // Already resolved - store result immediately if let HeapDataMut::GatherFuture(gather_mut) = this.heap.get_mut(heap_id) { gather_mut.results[idx] = Some(value); } } else { // Not resolved yet - track it pending_calls.push(*call_id); // Register gather as waiting on this call this.scheduler.register_gather_for_call(*call_id, heap_id, idx); } } } } // Store task IDs and pending calls in the gather if let HeapDataMut::GatherFuture(gather_mut) = this.heap.get_mut(heap_id) { gather_mut.task_ids = task_ids; gather_mut.pending_calls.clone_from(&pending_calls); } // Check if all items are already complete (only external futures, all resolved) let all_complete = { if let HeapData::GatherFuture(gather) = this.heap.get(heap_id) { gather.task_ids.is_empty() && gather.pending_calls.is_empty() } else { false } }; if all_complete { // All external futures were already resolved - return results immediately // Steal results using mem::take - avoids refcount dance since we're dropping // the GatherFuture anyway via awaitable.drop_with_heap below let results: Vec = if let HeapDataMut::GatherFuture(gather) = this.heap.get_mut(heap_id) { std::mem::take(&mut gather.results) .into_iter() .map(|r| r.expect("all results should be filled")) .collect() } else { vec![] }; let list_id = this.heap.allocate(HeapData::List(List::new(results)))?; return Ok(AwaitResult::ValueReady(Value::Ref(list_id))); } // Block current task on this gather this.scheduler.block_current_on_gather(heap_id); // Consume the awaitable without decrementing refcount - the GatherFuture // must stay alive for result collection. It will be dec_ref'd when // the gather completes (in handle_task_completion). let (awaitable, this) = awaitable_guard.into_parts(); #[cfg_attr( not(feature = "ref-count-panic"), expect(clippy::forget_non_drop, reason = "has Drop with ref-count-panic feature") )] std::mem::forget(awaitable); // Switch to next ready task (spawned tasks) or yield for external futures this.switch_or_yield() } /// Awaits an external future by blocking until it's resolved. /// /// If the future is already resolved, returns the value immediately. /// Otherwise blocks the current task and switches to a ready task or yields to the host. fn await_external_future(&mut self, call_id: CallId) -> Result { // Check if already consumed (double-await error) if self.scheduler.is_consumed(call_id) { return Err(SimpleException::new_msg(ExcType::RuntimeError, "cannot reuse already awaited future").into()); } // Mark as consumed self.scheduler.mark_consumed(call_id); // Check if the future is already resolved if let Some(value) = self.scheduler.take_resolved(call_id) { Ok(AwaitResult::ValueReady(value)) } else { // Block current task on this call self.scheduler.block_current_on_call(call_id); // Switch to next ready task or yield to host self.switch_or_yield() } } /// Starts execution of a coroutine by pushing its locals onto the stack. /// /// Extends the VM stack with the coroutine's pre-bound namespace values /// and pushes a new frame to execute the coroutine's function body. fn start_coroutine_frame(&mut self, func_id: FunctionId, namespace_values: Vec) -> Result<(), RunError> { let call_position = self.current_position(); let func = self.interns.get_function(func_id); let locals_count = u16::try_from(namespace_values.len()).expect("coroutine namespace size exceeds u16"); // Track memory for the locals let size = namespace_values.len() * std::mem::size_of::(); self.heap.tracker_mut().on_allocate(|| size)?; // Extend the stack with the coroutine's pre-bound locals let stack_base = self.stack.len(); self.stack.extend(namespace_values); // Push frame to execute the coroutine self.push_frame(CallFrame::new_function( &func.code, stack_base, locals_count, func_id, Some(call_position), ))?; Ok(()) } /// Attempts to switch to the next ready task or yields if all tasks are blocked. /// /// This method is called when the current task blocks (e.g., awaiting an unresolved /// future or gather). It performs task context switching: /// 1. Saves current VM context to the current task in the scheduler /// 2. Gets the next ready task from the scheduler /// 3. Loads that task's context into the VM (or initializes a new task from its coroutine) /// /// Returns `Yield(pending_calls)` if no ready tasks (all blocked), or continues /// the run loop if a task was switched to. fn switch_or_yield(&mut self) -> Result { if let Some(next_task_id) = self.scheduler.next_ready_task() { // Save current task context ONLY when switching to another task. // This is critical: if we're about to yield (no ready tasks), the main task's // frames must stay in the VM so they're included in the snapshot. if let Some(current_task_id) = self.scheduler.current_task_id() { self.save_task_context(current_task_id); } self.scheduler.set_current_task(Some(next_task_id)); // Load or initialize the next task's context self.load_or_init_task(next_task_id)?; // Continue execution - return FramePushed to reload cache and continue run loop Ok(AwaitResult::FramePushed) } else { // No ready tasks - yield control to host. // Don't save the main task's context - frames stay in VM for the snapshot. Ok(AwaitResult::Yield(self.scheduler.pending_call_ids())) } } /// Handles completion of a spawned task. /// /// Called when a spawned task's coroutine returns. This: /// 1. Marks the task as completed in the scheduler /// 2. If the task belongs to a gather, stores the result and checks if gather is complete /// 3. If gather is complete, unblocks the waiter and provides the collected results /// 4. Otherwise, switches to the next ready task pub(super) fn handle_task_completion(&mut self, result: Value) -> Result { let task_id = self .scheduler .current_task_id() .expect("handle_task_completion called without current task"); let task = self.scheduler.get_task(task_id); let gather_id = task.gather_id; let gather_result_idx = task.gather_result_idx; let coroutine_id = task.coroutine_id; // Mark coroutine as completed if let Some(coro_id) = coroutine_id && let HeapDataMut::Coroutine(coro) = self.heap.get_mut(coro_id) { coro.state = CoroutineState::Completed; } // Mark task as completed and store result in task state let task_result = result.clone_with_heap(self.heap); self.scheduler.complete_task(task_id, task_result); // If task belongs to a gather, store result and check if gather is complete if let Some(gid) = gather_id { // Store result in gather.results at the correct index if let Some(idx) = gather_result_idx && let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gid) { gather.results[idx] = Some(result); } else { result.drop_with_heap(self.heap); } // Extract gather metadata - clone task_ids since we need to check completion // but gather might not be complete yet. We only take task_ids later when // we know gather is complete and will be destroyed. let (task_ids, waiter, pending_calls_empty) = if let HeapData::GatherFuture(gather) = self.heap.get(gid) { (gather.task_ids.clone(), gather.waiter, gather.pending_calls.is_empty()) } else { (vec![], None, true) }; // Check if all tasks are complete AND all external futures are resolved let all_tasks_complete = task_ids.iter().all(|tid| { matches!( self.scheduler.get_task(*tid).state, TaskState::Completed(_) | TaskState::Failed(_) ) }); let all_complete = all_tasks_complete && pending_calls_empty; if all_complete { // First check if any task failed let failed_task = task_ids .iter() .find(|tid| matches!(self.scheduler.get_task(**tid).state, TaskState::Failed(_))); if let Some(&failed_tid) = failed_task { // Get the error from the failed task let task = self.scheduler.get_task_mut(failed_tid); if let TaskState::Failed(err) = std::mem::replace(&mut task.state, TaskState::Ready) { self.heap.dec_ref(gid); // Switch to waiter so error is raised in its context if let Some(waiter_id) = waiter { self.cleanup_current_task(); self.scheduler.set_current_task(Some(waiter_id)); self.load_or_init_task(waiter_id)?; } return Err(err); } } // Steal results from gather using mem::take - avoids refcount dance // (copy + inc_ref + dec_ref on gather drop). Since gather is being // destroyed, we can take ownership of the values directly. let results: Vec = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gid) { std::mem::take(&mut gather.results) .into_iter() .map(|r| r.expect("all results should be filled when gather is complete")) .collect() } else { vec![] }; // Create result list let list_id = self.heap.allocate(HeapData::List(List::new(results)))?; // Release the GatherFuture - this will cascade to release coroutines self.heap.dec_ref(gid); // Unblock waiter and switch to it if let Some(waiter_id) = waiter { self.scheduler.make_ready(waiter_id); // Remove from ready queue since we're switching directly to it self.scheduler.remove_from_ready_queue(waiter_id); // Clear current task's state since it's done self.cleanup_current_task(); // Switch to waiter self.scheduler.set_current_task(Some(waiter_id)); self.load_or_init_task(waiter_id)?; // Push the result onto the waiter's stack self.push(Value::Ref(list_id)); return Ok(AwaitResult::FramePushed); } // No waiter (shouldn't happen but handle gracefully) return Ok(AwaitResult::ValueReady(Value::Ref(list_id))); } } else { // Drop the result (it's stored in the task state now) result.drop_with_heap(self); } // Gather not complete or no gather - switch to next task self.cleanup_current_task(); self.scheduler.set_current_task(None); if let Some(next_task_id) = self.scheduler.next_ready_task() { self.scheduler.set_current_task(Some(next_task_id)); self.load_or_init_task(next_task_id)?; Ok(AwaitResult::FramePushed) } else { Ok(AwaitResult::Yield(self.scheduler.pending_call_ids())) } } /// Returns true if the current task is a spawned task (not main). /// /// Used by exception handling to determine if an unhandled exception /// should fail the task rather than propagate out. #[inline] pub(super) fn is_spawned_task(&self) -> bool { self.scheduler.current_task_id().is_some_and(|id| !id.is_main()) } /// Handles failure of a spawned task due to an unhandled exception. /// /// Called when an exception escapes all frames in a spawned task. This: /// 1. Marks the task as failed in the scheduler /// 2. If the task belongs to a gather, cleans up and propagates to waiter /// 3. Otherwise, switches to the next ready task /// /// # Returns /// - `Ok(())` - Switched to next task, continue execution /// - `Err(error)` - Switched to waiter, handle error in waiter's context /// /// # Panics /// Panics if called for the main task. pub(super) fn handle_task_failure(&mut self, error: RunError) -> Result<(), RunError> { let task_id = self .scheduler .current_task_id() .expect("handle_task_failure called without current task"); debug_assert!(!task_id.is_main(), "handle_task_failure called for main task"); // Get task's gather_id before marking failed let gather_id = self.scheduler.get_task(task_id).gather_id; // If part of a gather, propagate error to waiter if let Some(gid) = gather_id { // Get waiter and take task_ids from GatherFuture - gather is being destroyed anyway let (waiter, task_ids) = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gid) { (gather.waiter, std::mem::take(&mut gather.task_ids)) } else { (None, vec![]) }; // Mark task as failed self.scheduler.fail_task(task_id, error); // Cancel sibling tasks (filter out self and already-finished tasks inline) for sibling_id in task_ids { if sibling_id != task_id && !self.scheduler.get_task(sibling_id).is_finished() { self.scheduler.cancel_task(sibling_id, self.heap); } } // Clean up the gather self.heap.dec_ref(gid); // Switch to waiter and propagate the error if let Some(waiter_id) = waiter { self.cleanup_current_task(); self.scheduler.set_current_task(Some(waiter_id)); self.load_or_init_task(waiter_id)?; // Get error back from task state to return let task = self.scheduler.get_task_mut(task_id); if let TaskState::Failed(err) = std::mem::replace(&mut task.state, TaskState::Ready) { return Err(err); } } } else { // No gather - just mark task as failed (ignore returned gather_id which is None) let _ = self.scheduler.fail_task(task_id, error); } // No gather or no waiter - switch to next task self.cleanup_current_task(); self.scheduler.set_current_task(None); if let Some(next_task_id) = self.scheduler.next_ready_task() { self.scheduler.set_current_task(Some(next_task_id)); self.load_or_init_task(next_task_id)?; } // If no ready tasks, frames will be empty and run loop will yield Ok(()) } /// Saves the current VM context into the given task in the scheduler. /// /// Serializes frames, moves stack/exception_stack, stores instruction_ip, /// and adjusts the global recursion depth counter. fn save_task_context(&mut self, task_id: TaskId) { let frames: Vec = self .frames .drain(..) .map(|f| SerializedTaskFrame { function_id: f.function_id, ip: f.ip, stack_base: f.stack_base, locals_count: f.locals_count, call_position: f.call_position, }) .collect(); // Count this task's recursion depth contribution and subtract it from // the global counter so the next task gets a clean budget. let task_depth = frames.len().saturating_sub(1); // root frame doesn't contribute to recursion depth let global_depth = self.heap.get_recursion_depth(); self.heap.set_recursion_depth(global_depth - task_depth); // Save VM state into the task let task = self.scheduler.get_task_mut(task_id); task.frames = frames; task.stack = std::mem::take(&mut self.stack); task.exception_stack = std::mem::take(&mut self.exception_stack); task.instruction_ip = self.instruction_ip; } /// Loads an existing task's context or initializes a new task from its coroutine. /// /// If the task has stored frames, restores them into the VM. If the task was /// unblocked by an external future resolution, pushes the resolved value onto /// the restored stack so execution can continue past the AWAIT opcode. /// If the task has a coroutine_id but no frames, starts the coroutine. /// /// Restores the task's recursion depth contribution to the global counter /// (balances the subtraction in `save_task_context`). fn load_or_init_task(&mut self, task_id: TaskId) -> Result<(), RunError> { let task = self.scheduler.get_task_mut(task_id); let frames = std::mem::take(&mut task.frames); let stack = std::mem::take(&mut task.stack); let exception_stack = std::mem::take(&mut task.exception_stack); let instruction_ip = task.instruction_ip; let coroutine_id = task.coroutine_id; // Restore this task's recursion depth contribution to the global counter let task_depth = frames.len().saturating_sub(1); // root frame doesn't contribute to recursion depth let global_depth = self.heap.get_recursion_depth(); self.heap.set_recursion_depth(global_depth + task_depth); if !frames.is_empty() { // Task has existing context - restore it self.stack = stack; self.exception_stack = exception_stack; self.instruction_ip = instruction_ip; // Reconstruct CallFrames from serialized form self.frames = frames .into_iter() .map(|sf| { let code = match sf.function_id { Some(func_id) => &self.interns.get_function(func_id).code, None => { // This happens for the main task's module-level code self.module_code.expect("module_code not set for main task frame") } }; CallFrame { code, ip: sf.ip, stack_base: sf.stack_base, locals_count: sf.locals_count, function_id: sf.function_id, call_position: sf.call_position, should_return: false, } }) .collect(); } else if let Some(coro_id) = coroutine_id { // New task - start from coroutine self.init_task_from_coroutine(coro_id)?; } else { // This shouldn't happen - task with no frames and no coroutine panic!("task has no frames and no coroutine_id"); } // If this task was unblocked by a resolved external future, push the // resolved value onto the stack. The AWAIT opcode already advanced the IP // past itself before the task was saved, so execution will continue with // the resolved value on top of the stack. if let Some(value) = self.scheduler.take_resolved_for_task(task_id) { self.push(value); } Ok(()) } /// Initializes the VM state to run a coroutine for a spawned task. /// /// Similar to exec_get_awaitable's coroutine handling, but for task initialization. fn init_task_from_coroutine(&mut self, coroutine_id: HeapId) -> Result<(), RunError> { // Get coroutine data let heap_data = self.heap.get(coroutine_id); let HeapData::Coroutine(coro) = heap_data else { panic!("task coroutine_id doesn't point to a Coroutine") }; // Check state if coro.state != CoroutineState::New { return Err( SimpleException::new_msg(ExcType::RuntimeError, "cannot reuse already awaited coroutine").into(), ); } // Extract coroutine data let func_id = coro.func_id; let namespace_values: Vec = coro.namespace.iter().map(|v| v.clone_with_heap(self)).collect(); // Mark coroutine as Running if let HeapDataMut::Coroutine(coro_mut) = self.heap.get_mut(coroutine_id) { coro_mut.state = CoroutineState::Running; } // Push locals onto stack and push frame directly (can't use start_coroutine_frame // because that needs a current frame for call_position, but spawned tasks // don't have a parent frame — the coroutine is the root) let func = self.interns.get_function(func_id); let locals_count = u16::try_from(namespace_values.len()).expect("coroutine namespace size exceeds u16"); // Track memory for the locals let size = namespace_values.len() * std::mem::size_of::(); self.heap.tracker_mut().on_allocate(|| size)?; let stack_base = self.stack.len(); self.stack.extend(namespace_values); self.push_frame(CallFrame::new_function( &func.code, stack_base, locals_count, func_id, None, // No call position — this is the root frame for a spawned task ))?; Ok(()) } /// Resolves an external future with a value. /// /// Called by the host when an async external call completes. /// Stores the result in the scheduler, which will unblock any task /// waiting on this CallId. /// /// If the task that created this call has been cancelled or failed, /// the result is silently ignored and the value is dropped. pub fn resolve_future(&mut self, call_id: u32, obj: MontyObject) -> Result<(), InvalidInputError> { let call_id = CallId::new(call_id); // Check if the creator task has been cancelled/failed if let Some(creator_task) = self.scheduler.get_pending_call_creator(call_id) && self.scheduler.is_task_failed(creator_task) { // Task was cancelled - silently ignore the result return Ok(()); } let value = obj.to_value(self)?; // Check if a gather is waiting on this CallId if let Some((gather_id, result_idx)) = self.scheduler.take_gather_waiter(call_id) { self.scheduler.remove_pending_call(call_id); // Store result directly in gather (move, not clone) and check completion let (pending_empty, task_ids, waiter) = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gather_id) { gather.results[result_idx] = Some(value); // Move value directly, no clone needed // Remove from pending_calls gather.pending_calls.retain(|&cid| cid != call_id); // Take task_ids to avoid clone - we're checking completion so gather may be destroyed ( gather.pending_calls.is_empty(), std::mem::take(&mut gather.task_ids), gather.waiter, ) } else { (true, vec![], None) }; // Check if gather is now complete (all external futures resolved and all tasks complete) if pending_empty { let all_tasks_complete = task_ids.is_empty() || task_ids.iter().all(|tid| { matches!( self.scheduler.get_task(*tid).state, TaskState::Completed(_) | TaskState::Failed(_) ) }); if all_tasks_complete { // Gather is complete - build result and push to waiter's stack if let Some(waiter_id) = waiter { // Steal results from gather using mem::take - avoids refcount dance // (copy + inc_ref + dec_ref on gather drop). Since gather is being // destroyed, we can take ownership of the values directly. let results: Vec = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gather_id) { std::mem::take(&mut gather.results) .into_iter() .map(|r| r.expect("all results should be filled when gather is complete")) .collect() } else { vec![] }; // Create result list - if this fails, we can't do much, just skip if let Ok(list_id) = self.heap.allocate(HeapData::List(List::new(results))) { // Release the GatherFuture (results already taken, so no double-drop) self.heap.dec_ref(gather_id); // Push result onto waiter's stack and mark as ready. // Check if the waiter's context is currently in the VM (frames not saved // to the task). This is the case when the waiter is the current task // and hasn't been switched away from (e.g., external-only gather). let waiter_context_in_vm = self.scheduler.current_task_id() == Some(waiter_id) && !self.frames.is_empty(); if waiter_context_in_vm { // Waiter's frames are in the VM - push directly onto VM stack self.stack.push(Value::Ref(list_id)); // Mark as ready but don't add to ready_queue self.scheduler.get_task_mut(waiter_id).state = TaskState::Ready; } else { // Waiter's context is saved in the task (either spawned task, // or main task that was saved when switching to spawned tasks) self.scheduler.get_task_mut(waiter_id).stack.push(Value::Ref(list_id)); self.scheduler.make_ready(waiter_id); } } } } } } else { // Normal resolution for single awaiter self.scheduler.resolve(call_id, value); } Ok(()) } /// Fails an external future with an error. /// /// Called by the host when an async external call fails with an exception. /// Finds the task blocked on this CallId and fails it with the error. /// If the task is part of a gather, cancels sibling tasks. pub fn fail_future(&mut self, call_id: u32, error: RunError) { let call_id = CallId::new(call_id); // Check if a gather is waiting on this CallId if let Some((gather_id, _result_idx)) = self.scheduler.take_gather_waiter(call_id) { // Remove from pending_calls so it doesn't appear in get_pending_call_ids() // (fail_for_call handles this for the non-gather case) self.scheduler.remove_pending_call(call_id); // Get the gather's waiter, task_ids, and OTHER pending calls // We need to remove all pending calls for this gather from gather_waiters // before we dec_ref the gather, otherwise subsequent errors for the same // gather would try to access a freed heap object. // Use get_mut and take to avoid allocations - gather is being destroyed anyway. let (waiter, task_ids, other_pending_calls) = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gather_id) { let mut other_calls = std::mem::take(&mut gather.pending_calls); other_calls.retain(|&cid| cid != call_id); (gather.waiter, std::mem::take(&mut gather.task_ids), other_calls) } else { (None, vec![], vec![]) }; // Remove all other pending calls for this gather from gather_waiters and pending_calls // This prevents subsequent errors from trying to access the freed gather for other_call_id in other_pending_calls { self.scheduler.take_gather_waiter(other_call_id); self.scheduler.remove_pending_call(other_call_id); } // Cancel all sibling tasks in the gather for sibling_id in task_ids { self.scheduler.cancel_task(sibling_id, self.heap); } // Fail the waiter task (the task that awaited the gather) if let Some(waiter_id) = waiter { self.scheduler.fail_task(waiter_id, error); // Release the GatherFuture self.heap.dec_ref(gather_id); } } else if let Some((task_id, Some(gid))) = self.scheduler.fail_for_call(call_id, error) { // Original path: task is directly BlockedOnCall and part of a gather // Take task_ids from GatherFuture - gather is being destroyed anyway let task_ids: Vec = if let HeapDataMut::GatherFuture(gather) = self.heap.get_mut(gid) { std::mem::take(&mut gather.task_ids) } else { vec![] }; // Cancel sibling tasks (filter out self and already-finished tasks) for sibling_id in task_ids { if sibling_id != task_id && !self.scheduler.get_task(sibling_id).is_finished() { self.scheduler.cancel_task(sibling_id, self.heap); } } } } /// Adds pending call data for an external function call. /// /// Called by `run_pending()` when the host chooses async resolution. /// This stores the call data in the scheduler so we can: /// 1. Track which task created the call (to ignore results if cancelled) /// 2. Return pending call info when all tasks are blocked /// /// Note: The args are empty because the host already has them from the /// `FunctionCall` return value. We only need to track the creator task. pub fn add_pending_call(&mut self, call_id: CallId) { let current_task = self.scheduler.current_task_id().unwrap_or_default(); self.scheduler.add_pending_call( call_id, PendingCallData { args: ArgValues::Empty, creator_task: current_task, }, ); } /// Prepares the current task to continue after futures are resolved. /// /// When the current task (main or spawned) was blocked on an external future and /// that future is now resolved, this method takes the resolved value from the /// scheduler and pushes it onto the VM's stack so execution can continue. /// /// This is called by `FutureSnapshot::resume()` after resolving futures but before /// calling `vm.run()`. It handles the task whose frames are currently in the VM. /// Other unblocked tasks get their resolved values during task switching in /// `load_or_init_task`. /// /// # Returns /// `true` if a value was pushed, `false` if no task was ready to continue. pub fn prepare_current_task_after_resolve(&mut self) -> bool { // Check if there's a current task (main or spawned) let Some(current_task_id) = self.scheduler.current_task_id() else { return false; }; // Take the resolved value for the current task (if it was unblocked) if let Some(value) = self.scheduler.take_resolved_for_task(current_task_id) { // Remove task from ready_queue since we're handling it directly. // resolve() added it to ready_queue, but since frames are already // in the VM (not saved/restored), we handle it here instead of via task switching. self.scheduler.remove_from_ready_queue(current_task_id); self.push(value); true } else { false } } /// Loads a ready task if the VM needs one. /// /// This is called by `FutureSnapshot::resume()` after resolving futures but before /// calling `vm.run()`. It handles two cases: /// 1. **No frames in VM**: A task context needs to be loaded from the scheduler /// (e.g., gather completed while tasks were running and we yielded with no frames). /// 2. **Current task is blocked**: The current task's frames are in the VM but it's /// still blocked (e.g., only some futures were resolved in incremental resolution). /// Saves the blocked task's context and switches to a ready task. /// /// # Returns /// - `Ok(true)` if a task was loaded and execution can continue /// - `Ok(false)` if no task switch is needed (current task is runnable or no ready tasks) /// - `Err(error)` if loading the task failed pub fn load_ready_task_if_needed(&mut self) -> Result { // If frames exist, check if the current task is blocked. If it's not blocked // (i.e., it was just unblocked), there's nothing to do - it will continue running. if !self.frames.is_empty() { let current_blocked = self.scheduler.current_task_id().is_some_and(|tid| { matches!( self.scheduler.get_task(tid).state, TaskState::BlockedOnCall(_) | TaskState::BlockedOnGather(_) ) }); if !current_blocked { return Ok(false); } // Current task is blocked - save its context before switching if let Some(tid) = self.scheduler.current_task_id() { self.save_task_context(tid); } } // Check if there's a ready task to load let Some(next_task_id) = self.scheduler.next_ready_task() else { return Ok(false); }; self.scheduler.set_current_task(Some(next_task_id)); self.load_or_init_task(next_task_id)?; Ok(true) } /// Gets the pending call IDs from the scheduler. pub fn get_pending_call_ids(&self) -> Vec { self.scheduler.pending_call_ids() } /// Takes the error from a failed task if the current task has failed. /// /// Returns `Some(error)` if the current task is in `TaskState::Failed`, `None` otherwise. /// Used by `FutureSnapshot::resume` to propagate errors after resolving futures. /// /// Only replaces the state when the task has actually failed - other states /// (e.g., `BlockedOnCall`) are left untouched. pub fn take_failed_task_error(&mut self) -> Option { let current_task_id = self.scheduler.current_task_id()?; let task = self.scheduler.get_task_mut(current_task_id); // Only replace state if it's actually Failed - otherwise we'd corrupt // the task's real state (e.g., BlockedOnCall) by overwriting it with Ready. if matches!(task.state, TaskState::Failed(_)) && let TaskState::Failed(error) = std::mem::replace(&mut task.state, TaskState::Ready) { return Some(error); } None } } /// Internal enum for dispatching await operations by heap data type. /// /// Used in `exec_get_awaitable` to determine which handler to call after /// inspecting the heap data type. This avoids borrow conflicts between /// the heap reference and `&mut self` needed by the handler methods. enum AwaitableType { Coroutine, GatherFuture, } ================================================ FILE: crates/monty/src/bytecode/vm/attr.rs ================================================ //! Attribute access helpers for the VM. use super::VM; use crate::{ bytecode::vm::CallResult, defer_drop, exception_private::{ExcType, RunError}, intern::StringId, resource::ResourceTracker, value::EitherStr, }; impl VM<'_, '_, T> { /// Loads an attribute from an object and pushes it onto the stack. /// /// Returns an AttributeError if the attribute doesn't exist. pub(super) fn load_attr(&mut self, name_id: StringId) -> Result { let this = self; let obj = this.pop(); defer_drop!(obj, this); let attr = EitherStr::Interned(name_id); obj.py_getattr(&attr, this) } /// Loads an attribute from a module for `from ... import` and pushes it onto the stack. /// /// Returns an ImportError (not AttributeError) if the attribute doesn't exist, /// matching CPython's behavior for `from module import name`. pub(super) fn load_attr_import(&mut self, name_id: StringId) -> Result { let this = self; let obj = this.pop(); defer_drop!(obj, this); let attr = EitherStr::Interned(name_id); match obj.py_getattr(&attr, this) { Ok(result) => Ok(result), Err(RunError::Exc(exc)) if exc.exc.exc_type() == ExcType::AttributeError => { // Only compute module_name when we need it for the error message let module_name = obj.module_name(this.heap, this.interns); let name_str = this.interns.get_str(name_id); Err(ExcType::cannot_import_name(name_str, &module_name)) } Err(e) => Err(e), } } /// Stores a value as an attribute on an object. /// /// Returns an AttributeError if the attribute cannot be set. pub(super) fn store_attr(&mut self, name_id: StringId) -> Result<(), RunError> { let this = self; let obj = this.pop(); defer_drop!(obj, this); let value = this.pop(); // py_set_attr takes ownership of value and drops it on error obj.py_set_attr(name_id, value, this) } } ================================================ FILE: crates/monty/src/bytecode/vm/binary.rs ================================================ //! Binary and in-place operation helpers for the VM. use super::VM; use crate::{ defer_drop, exception_private::{ExcType, RunError}, heap::{Heap, HeapData, HeapGuard}, resource::ResourceTracker, types::{PyTrait, Set, dict_view::collect_iterable_to_set, set::SetBinaryOp}, value::BitwiseOp, }; impl VM<'_, '_, T> { /// Binary addition with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths to avoid /// overhead on the success path (99%+ of operations). pub(super) fn binary_add(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_add(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("+", lhs_type, rhs_type)) } Err(e) => Err(e.into()), } } /// Binary subtraction with proper refcount handling. /// /// Handles both numeric subtraction and set difference (`-` operator). /// For sets/frozensets, delegates to [`binary_set_op`] which needs `interns` /// for element hashing and equality. Uses lazy type capture: only calls /// `py_type()` in error paths. pub(super) fn binary_sub(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); if let Some(result) = this.binary_dict_view_op(lhs, rhs, DictViewBinaryOp::Sub)? { this.push(result); return Ok(()); } if let Some(result) = this.binary_set_op(lhs, rhs, SetBinaryOp::Sub)? { this.push(result); return Ok(()); } match lhs.py_sub(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("-", lhs_type, rhs_type)) } Err(e) => Err(e.into()), } } /// Binary multiplication with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths. pub(super) fn binary_mult(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_mult(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("*", lhs_type, rhs_type)) } Err(e) => Err(e), } } /// Binary division with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths. pub(super) fn binary_div(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_div(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("/", lhs_type, rhs_type)) } Err(e) => Err(e), } } /// Binary floor division with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths. pub(super) fn binary_floordiv(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_floordiv(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("//", lhs_type, rhs_type)) } Err(e) => Err(e), } } /// Binary modulo with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths. pub(super) fn binary_mod(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_mod(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("%", lhs_type, rhs_type)) } Err(e) => Err(e), } } /// Binary power with proper refcount handling. /// /// Uses lazy type capture: only calls `py_type()` in error paths. #[inline(never)] pub(super) fn binary_pow(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); match lhs.py_pow(rhs, this) { Ok(Some(v)) => { this.push(v); Ok(()) } Ok(None) => { let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("** or pow()", lhs_type, rhs_type)) } Err(e) => Err(e), } } /// Binary bitwise operation on integers and sets. /// /// For integers, performs standard bitwise operations (AND, OR, XOR, shifts). /// For sets/frozensets, `|` maps to union, `&` to intersection, and `^` to /// symmetric difference. Set operations are handled here because `py_bitwise` /// doesn't have access to `interns`, which set operations need for hashing. pub(super) fn binary_bitwise(&mut self, op: BitwiseOp) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); // Set/frozenset operations: |, &, ^ map to union, intersection, // symmetric_difference. Shifts don't apply to sets. let set_op = match op { BitwiseOp::Or => Some(SetBinaryOp::Or), BitwiseOp::And => Some(SetBinaryOp::And), BitwiseOp::Xor => Some(SetBinaryOp::Xor), BitwiseOp::LShift | BitwiseOp::RShift => None, }; if let Some(set_op) = set_op && let Some(result) = this.binary_set_op(lhs, rhs, set_op)? { this.push(result); return Ok(()); } let result = lhs.py_bitwise(rhs, op, this.heap)?; this.push(result); Ok(()) } /// Binary `&` with CPython-style dict-keys special handling before numeric fallback. /// /// Milestone one only needs one non-numeric behavior here: `dict_keys & iterable` /// should iterate the right-hand side, return a plain `set`, and raise /// `TypeError("'X' object is not iterable")` for non-iterable operands. pub(super) fn binary_and(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); if let Some(result) = this.binary_dict_view_op(lhs, rhs, DictViewBinaryOp::And)? { this.push(result); return Ok(()); } if let Some(result) = this.binary_set_op(lhs, rhs, SetBinaryOp::And)? { this.push(result); return Ok(()); } let result = lhs.py_bitwise(rhs, BitwiseOp::And, this.heap)?; this.push(result); Ok(()) } /// Binary `|` with CPython-style dict-view handling before numeric fallback. pub(super) fn binary_or(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); if let Some(result) = this.binary_dict_view_op(lhs, rhs, DictViewBinaryOp::Or)? { this.push(result); return Ok(()); } if let Some(result) = this.binary_set_op(lhs, rhs, SetBinaryOp::Or)? { this.push(result); return Ok(()); } let result = lhs.py_bitwise(rhs, BitwiseOp::Or, this.heap)?; this.push(result); Ok(()) } /// Binary `^` with CPython-style dict-view handling before numeric fallback. pub(super) fn binary_xor(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); if let Some(result) = this.binary_dict_view_op(lhs, rhs, DictViewBinaryOp::Xor)? { this.push(result); return Ok(()); } if let Some(result) = this.binary_set_op(lhs, rhs, SetBinaryOp::Xor)? { this.push(result); return Ok(()); } let result = lhs.py_bitwise(rhs, BitwiseOp::Xor, this.heap)?; this.push(result); Ok(()) } /// In-place addition (uses py_iadd for mutable containers, falls back to py_add). /// /// For mutable types like lists, `py_iadd` mutates in place and returns true. /// For immutable types, we fall back to regular addition. /// /// Uses lazy type capture: only calls `py_type()` in error paths. /// /// Note: Cannot use `defer_drop!` for `lhs` here because on successful in-place /// operation, we need to push `lhs` back onto the stack rather than drop it. pub(super) fn inplace_add(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); // Use HeapGuard because inplace addition will push lhs back on the stack if successful let mut lhs_guard = HeapGuard::new(this.pop(), this); let (lhs, this) = lhs_guard.as_parts_mut(); // Try in-place operation first (for mutable types like lists) if lhs.py_iadd(rhs, this, lhs.ref_id())? { // In-place operation succeeded - push lhs back let (lhs, this) = lhs_guard.into_parts(); this.push(lhs); return Ok(()); } // Next try regular addition if let Some(v) = lhs.py_add(rhs, this)? { this.push(v); return Ok(()); } let lhs_type = lhs.py_type(this.heap); let rhs_type = rhs.py_type(this.heap); Err(ExcType::binary_type_error("+=", lhs_type, rhs_type)) } /// Binary matrix multiplication (`@` operator). /// /// Currently not implemented - returns a `NotImplementedError`. /// Matrix multiplication requires numpy-like array types which Monty doesn't support. pub(super) fn binary_matmul(&mut self) -> Result<(), RunError> { let rhs = self.pop(); let lhs = self.pop(); lhs.drop_with_heap(self); rhs.drop_with_heap(self); Err(ExcType::not_implemented("matrix multiplication (@) is not supported").into()) } /// Implements dict-view set-like operators before falling back to other dispatch. /// /// Returning `Ok(None)` means the left operand was not a set-like dict view, so the /// caller should continue with ordinary numeric or pure-set dispatch. fn binary_dict_view_op( &mut self, lhs: &crate::value::Value, rhs: &crate::value::Value, op: DictViewBinaryOp, ) -> Result, RunError> { let this = self; let crate::value::Value::Ref(lhs_id) = lhs else { return Ok(None); }; let lhs_set = match this.heap.get(*lhs_id) { HeapData::DictKeysView(view) => view.to_set(this)?, HeapData::DictItemsView(view) => view.to_set(this)?, _ => return Ok(None), }; defer_drop!(lhs_set, this); let rhs_set = collect_iterable_to_set(rhs.clone_with_heap(this), this)?; defer_drop!(rhs_set, this); let result = apply_dict_view_binary_op(lhs_set, rhs_set, op, this)?; let result_id = this.heap.allocate(HeapData::Set(result))?; Ok(Some(crate::value::Value::Ref(result_id))) } /// Implements pure set/frozenset binary operators with strict operand checks. /// /// Method forms accept arbitrary iterables, but the operator forms handled here /// must reject non-set operands so Monty matches CPython's `TypeError` behavior. fn binary_set_op( &mut self, lhs: &crate::value::Value, rhs: &crate::value::Value, op: SetBinaryOp, ) -> Result, RunError> { let this = self; let crate::value::Value::Ref(lhs_id) = lhs else { return Ok(None); }; let result = Heap::with_entry_mut(this, *lhs_id, |this, data| match data { crate::heap_data::HeapDataMut::Set(set) => set.binary_op_value(rhs, op, this).map(|v| v.map(HeapData::Set)), crate::heap_data::HeapDataMut::FrozenSet(set) => { set.binary_op_value(rhs, op, this).map(|v| v.map(HeapData::FrozenSet)) } _ => Ok(None), })?; let Some(result) = result else { return Ok(None); }; let result_id = this.heap.allocate(result)?; Ok(Some(crate::value::Value::Ref(result_id))) } } /// Supported dict-view set-like operators. #[derive(Debug, Clone, Copy)] enum DictViewBinaryOp { And, Or, Xor, Sub, } /// Applies a set-like operator to two temporary sets and returns a plain `set`. fn apply_dict_view_binary_op( lhs: &Set, rhs: &Set, op: DictViewBinaryOp, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { let mut result = match op { DictViewBinaryOp::And => Set::with_capacity(lhs.len().min(rhs.len())), DictViewBinaryOp::Or => Set::with_capacity(lhs.len() + rhs.len()), DictViewBinaryOp::Xor => Set::with_capacity(lhs.len() + rhs.len()), DictViewBinaryOp::Sub => Set::with_capacity(lhs.len()), }; match op { DictViewBinaryOp::And => { let (smaller, larger) = if lhs.len() <= rhs.len() { (lhs, rhs) } else { (rhs, lhs) }; for value in smaller.iter() { if larger.contains(value, vm)? { result.add(value.clone_with_heap(vm), vm)?; } } } DictViewBinaryOp::Or => { for value in lhs.iter() { result.add(value.clone_with_heap(vm), vm)?; } for value in rhs.iter() { result.add(value.clone_with_heap(vm), vm)?; } } DictViewBinaryOp::Xor => { for value in lhs.iter() { if !rhs.contains(value, vm)? { result.add(value.clone_with_heap(vm), vm)?; } } for value in rhs.iter() { if !lhs.contains(value, vm)? { result.add(value.clone_with_heap(vm), vm)?; } } } DictViewBinaryOp::Sub => { for value in lhs.iter() { if !rhs.contains(value, vm)? { result.add(value.clone_with_heap(vm), vm)?; } } } } Ok(result) } ================================================ FILE: crates/monty/src/bytecode/vm/call.rs ================================================ //! Function call helpers for the VM. //! //! This module contains the implementation of call-related opcodes and helper //! functions for executing function calls. The main entry points are the `exec_*` //! methods which are called from the VM's main dispatch loop. use super::{CallFrame, VM}; use crate::{ args::{ArgValues, KwargsValues}, asyncio::Coroutine, builtins::{Builtins, BuiltinsFunctions}, bytecode::FrameExit, defer_drop, exception_private::{ExcType, RunError}, heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId}, heap_data::CellValue, intern::{FunctionId, StringId}, os::OsFunction, resource::ResourceTracker, types::{Dict, PyTrait, Type, bytes::call_bytes_method, str::call_str_method, r#type::call_type_method}, value::{EitherStr, Value}, }; /// Result of executing a call or attribute method. /// /// Used by the `exec_*` methods and `py_call_attr` implementations to communicate /// what action the VM's main loop should take after the call completes. /// /// For attribute methods that complete synchronously, use `CallResult::Value`. /// For operations requiring host involvement (OS calls, external functions, etc.), /// use the appropriate variant to signal the VM to yield. pub(crate) enum CallResult { /// Call completed synchronously with a return value. Value(Value), /// A new frame was pushed for a defined function call. /// The VM should reload its cached frame state. FramePushed, /// External function call requested - VM should pause and return to caller. /// The `EitherStr` is the name of the external function (interned or heap-owned). External(EitherStr, ArgValues), /// OS operation call requested - VM should yield `FrameExit::OsCall` to host. /// /// The host executes the OS operation and resumes the VM with the result. OsCall(OsFunction, ArgValues), /// Dataclass method call requested - VM should yield `FrameExit::MethodCall` to host. /// /// The method name (e.g. `"distance"`) and the args include the dataclass instance /// as the first argument (`self`). Unlike `External`, this uses an `EitherStr` instead /// of `StringId` because method names are only known at runtime when dataclass /// inputs are provided. MethodCall(EitherStr, ArgValues), /// The call returned a value that should be implicitly awaited. /// /// Used by `asyncio.run()` to execute a coroutine without an explicit `await`. /// The VM will push the value onto the stack and execute `exec_get_awaitable`. AwaitValue(Value), } impl VM<'_, '_, T> { // ======================================================================== // Call Opcode Executors // ======================================================================== // These methods are called from the VM's main dispatch loop to execute // call-related opcodes. They handle stack operations and return a result // indicating what the VM should do next. /// Executes `CallFunction` opcode. /// /// Pops the callable and arguments from the stack, calls the function, /// and returns the result. pub(super) fn exec_call_function(&mut self, arg_count: usize) -> Result { let args = self.pop_n_args(arg_count); let callable = self.pop(); let this = self; defer_drop!(callable, this); this.call_function(callable, args) } /// Executes `CallBuiltinFunction` opcode. /// /// Calls a builtin function directly without stack manipulation for the callable. /// This is an optimization that avoids constant pool lookup and stack manipulation. pub(super) fn exec_call_builtin_function(&mut self, builtin_id: u8, arg_count: usize) -> Result { // Convert u8 to BuiltinsFunctions via FromRepr if let Some(builtin) = BuiltinsFunctions::from_repr(builtin_id) { let args = self.pop_n_args(arg_count); builtin.call(self, args) } else { Err(RunError::internal("CallBuiltinFunction: invalid builtin_id")) } } /// Executes `CallBuiltinType` opcode. /// /// Calls a builtin type constructor directly without stack manipulation for the callable. /// This is an optimization for type constructors like `list()`, `int()`, `str()`. pub(super) fn exec_call_builtin_type(&mut self, type_id: u8, arg_count: usize) -> Result { // Convert u8 to Type via callable_from_u8 if let Some(t) = Type::callable_from_u8(type_id) { let args = self.pop_n_args(arg_count); t.call(self, args) } else { Err(RunError::internal("CallBuiltinType: invalid type_id")) } } /// Executes `CallFunctionKw` opcode. /// /// Pops the callable, positional args, and keyword args from the stack, /// builds the appropriate `ArgValues`, and calls the function. pub(super) fn exec_call_function_kw( &mut self, pos_count: usize, kwname_ids: Vec, ) -> Result { let kw_count = kwname_ids.len(); // Pop keyword values (TOS is last kwarg value) let kw_values = self.pop_n(kw_count); // Pop positional arguments let pos_args = self.pop_n(pos_count); // Pop the callable let callable = self.pop(); let this = self; defer_drop!(callable, this); // Build kwargs as Vec<(StringId, Value)> let kwargs_inline: Vec<(StringId, Value)> = kwname_ids.into_iter().zip(kw_values).collect(); // Build ArgValues with both positional and keyword args let args = if pos_args.is_empty() && kwargs_inline.is_empty() { ArgValues::Empty } else if pos_args.is_empty() { ArgValues::Kwargs(KwargsValues::Inline(kwargs_inline)) } else { ArgValues::ArgsKargs { args: pos_args, kwargs: KwargsValues::Inline(kwargs_inline), } }; this.call_function(callable, args) } /// Executes `CallAttr` opcode. /// /// Pops the object and arguments from the stack, calls the attribute, /// and returns a `CallResult` which may indicate an OS or external call. pub(super) fn exec_call_attr(&mut self, name_id: StringId, arg_count: usize) -> Result { let args = self.pop_n_args(arg_count); let obj = self.pop(); self.call_attr(obj, name_id, args) } /// Executes `CallAttrKw` opcode. /// /// Pops the object, positional args, and keyword args from the stack, /// builds the appropriate `ArgValues`, and calls the attribute. /// Returns a `CallResult` which may indicate an OS or external call. pub(super) fn exec_call_attr_kw( &mut self, name_id: StringId, pos_count: usize, kwname_ids: Vec, ) -> Result { let kw_count = kwname_ids.len(); // Pop keyword values (TOS is last kwarg value) let kw_values = self.pop_n(kw_count); // Pop positional arguments let pos_args = self.pop_n(pos_count); // Pop the object let obj = self.pop(); // Build kwargs as Vec<(StringId, Value)> let kwargs_inline: Vec<(StringId, Value)> = kwname_ids.into_iter().zip(kw_values).collect(); // Build ArgValues with both positional and keyword args let args = if pos_args.is_empty() && kwargs_inline.is_empty() { ArgValues::Empty } else if pos_args.is_empty() { ArgValues::Kwargs(KwargsValues::Inline(kwargs_inline)) } else { ArgValues::ArgsKargs { args: pos_args, kwargs: KwargsValues::Inline(kwargs_inline), } }; self.call_attr(obj, name_id, args) } /// Executes `CallFunctionExtended` opcode. /// /// Handles calls with `*args` and/or `**kwargs` unpacking. pub(super) fn exec_call_function_extended(&mut self, has_kwargs: bool) -> Result { // Pop kwargs dict if present let kwargs = if has_kwargs { Some(self.pop()) } else { None }; // Pop args tuple let args_tuple = self.pop(); // Pop callable let callable = self.pop(); // Unpack and call self.call_function_extended(callable, args_tuple, kwargs) } /// Executes `CallAttrExtended` opcode. /// /// Handles method calls with `*args` and/or `**kwargs` unpacking. pub(super) fn exec_call_attr_extended( &mut self, name_id: StringId, has_kwargs: bool, ) -> Result { // Pop kwargs dict if present let kwargs = if has_kwargs { Some(self.pop()) } else { None }; // Pop args tuple let args_tuple = self.pop(); // Pop the receiver object let obj = self.pop(); // Unpack and call self.call_attr_extended(obj, name_id, args_tuple, kwargs) } // ======================================================================== // Internal Call Helpers // ======================================================================== /// Pops n arguments from the stack and wraps them in `ArgValues`. fn pop_n_args(&mut self, n: usize) -> ArgValues { match n { 0 => ArgValues::Empty, 1 => ArgValues::One(self.pop()), 2 => { let b = self.pop(); let a = self.pop(); ArgValues::Two(a, b) } _ => ArgValues::ArgsKargs { args: self.pop_n(n), kwargs: KwargsValues::Empty, }, } } /// Calls an attribute on an object. /// /// For heap-allocated objects (`Value::Ref`), dispatches to the type's /// attribute call implementation via `Heap::call_attr()`, which may return /// `CallResult::OsCall`, `CallResult::External`, or /// `CallResult::MethodCall` for operations that require host involvement. /// /// For interned strings (`Value::InternString`), uses the unified `call_str_method`. /// For interned bytes (`Value::InternBytes`), uses the unified `call_bytes_method`. fn call_attr(&mut self, obj: Value, name_id: StringId, args: ArgValues) -> Result { let this = self; let attr = EitherStr::Interned(name_id); match obj { Value::Ref(heap_id) => { defer_drop!(obj, this); Heap::call_attr(this, heap_id, &attr, args) } Value::InternString(string_id) => { // Call string method on interned string literal using the unified dispatcher let s = this.interns.get_str(string_id); call_str_method(s, name_id, args, this).map(CallResult::Value) } Value::InternBytes(bytes_id) => { // Call bytes method on interned bytes literal using the unified dispatcher let b = this.interns.get_bytes(bytes_id); call_bytes_method(b, name_id, args, this).map(CallResult::Value) } Value::Builtin(Builtins::Type(t)) => { // Handle classmethods on type objects like dict.fromkeys() call_type_method(t, name_id, args, this).map(CallResult::Value) } _ => { // Non-heap values without method support let type_name = obj.py_type(this.heap); args.drop_with_heap(this); Err(ExcType::attribute_error(type_name, this.interns.get_str(name_id))) } } } /// Evaluates a function in a position that doesn't yet support suspending. /// /// Calls the function and, if it's a user-defined function that pushes a frame, /// runs the VM until that frame returns. /// /// Returns an error for external/OS functions since those require the host to /// execute them and resume, which this synchronous context cannot support. pub(crate) fn evaluate_function( &mut self, ctx: &'static str, callable: &Value, args: ArgValues, ) -> Result { match self.call_function(callable, args)? { CallResult::Value(v) => Ok(v), CallResult::FramePushed => { // A new frame was pushed for a defined function call - we need to run it // to completion. let stack_depth = self.frames.len(); // Mark the frame as an exit point from the `run()` loop self.current_frame_mut().should_return = true; match self.run()? { FrameExit::Return(v) => Ok(v), FrameExit::ResolveFutures(_) | FrameExit::ExternalCall { .. } | FrameExit::OsCall { .. } | FrameExit::MethodCall { .. } | FrameExit::NameLookup { .. } => { // Pop frames off the stack from this failed evaluation while self.frames.len() > stack_depth { self.pop_frame(); } Err(RunError::internal(format!( "{ctx}: external functions are not yet supported in this context" ))) } } } CallResult::External(_, _) | CallResult::OsCall(_, _) | CallResult::MethodCall(_, _) | CallResult::AwaitValue(_) => { // External calls are not supported in this context since the caller doesn't support suspending Err(RunError::internal(format!( "{ctx}: external functions are not yet supported in this context" ))) } } } /// Calls a callable value with the given arguments. /// /// Dispatches based on the callable type: /// - `Value::Builtin`: calls builtin directly, returns `Push` /// - `Value::ModuleFunction`: calls module function directly, returns `Push` /// - `Value::ExtFunction`: returns `External` for caller to execute /// - `Value::DefFunction`: pushes a new frame, returns `FramePushed` /// - `Value::Ref`: checks for closure/function on heap pub(crate) fn call_function(&mut self, callable: &Value, args: ArgValues) -> Result { match callable { Value::Builtin(builtin) => { let result = builtin.call(self, args)?; Ok(CallResult::Value(result)) } Value::ModuleFunction(mf) => mf.call(self, args), Value::ExtFunction(name_id) => { // External function - return to caller to execute Ok(CallResult::External(EitherStr::Interned(*name_id), args)) } Value::DefFunction(func_id) => { // Defined function without defaults or captured variables self.call_def_function(*func_id, &[], &[], args) } Value::Ref(heap_id) => { // Could be a closure or function with defaults - check heap self.call_heap_callable(*heap_id, args) } _ => { args.drop_with_heap(self); let ty = callable.py_type(self.heap); Err(ExcType::type_error(format!("'{ty}' object is not callable"))) } } } /// Handles calling a heap-allocated callable (closure, function with defaults, or external function). fn call_heap_callable(&mut self, heap_id: HeapId, args: ArgValues) -> Result { let (func_id, cells, defaults) = match self.heap.get(heap_id) { HeapData::Closure(closure) => { let cloned_cells = closure.cells.clone(); let cloned_defaults: Vec = closure.defaults.iter().map(|v| v.clone_with_heap(self)).collect(); (closure.func_id, cloned_cells, cloned_defaults) } HeapData::FunctionDefaults(fd) => { let cloned_defaults: Vec = fd.defaults.iter().map(|v| v.clone_with_heap(self)).collect(); (fd.func_id, Vec::new(), cloned_defaults) } HeapData::ExtFunction(name) => { // Heap-allocated external function with a non-interned name let name = name.clone(); return Ok(CallResult::External(EitherStr::Heap(name), args)); } _ => { args.drop_with_heap(self); return Err(ExcType::type_error("object is not callable")); } }; let this = self; defer_drop!(defaults, this); this.call_def_function(func_id, &cells, defaults, args) } /// Calls a function with unpacked args tuple and optional kwargs dict. /// /// Used for `f(*args)` and `f(**kwargs)` style calls. fn call_function_extended( &mut self, callable: Value, args_tuple: Value, kwargs: Option, ) -> Result { let this = self; defer_drop!(args_tuple, this); defer_drop!(callable, this); // Extract positional args from tuple let copied_args = this.extract_args_tuple(args_tuple); // Build ArgValues from positional args and optional kwargs let args = if let Some(kwargs_ref) = kwargs { this.build_args_with_kwargs(copied_args, kwargs_ref)? } else { Self::build_args_positional_only(copied_args) }; // Call the function (args_tuple guard drops at scope exit) this.call_function(callable, args) } /// Calls a method with unpacked args tuple and optional kwargs dict. /// /// Used for `obj.method(*args)` and `obj.method(**kwargs)` style calls. fn call_attr_extended( &mut self, obj: Value, name_id: StringId, args_tuple: Value, kwargs: Option, ) -> Result { let this = self; defer_drop!(args_tuple, this); // Extract positional args from tuple let copied_args = this.extract_args_tuple_for_attr(args_tuple); // Build ArgValues from positional args and optional kwargs let args = if let Some(kwargs_ref) = kwargs { this.build_args_with_kwargs_for_attr(copied_args, kwargs_ref)? } else { Self::build_args_positional_only(copied_args) }; // Call the method (args_tuple guard drops at scope exit) this.call_attr(obj, name_id, args) } /// Extracts arguments from a tuple for `CallFunctionExtended`. /// /// # Panics /// Panics if `args_tuple` is not a tuple. This indicates a compiler bug since /// the compiler always emits `ListToTuple` before `CallFunctionExtended`. fn extract_args_tuple(&mut self, args_tuple: &Value) -> Vec { let Value::Ref(id) = args_tuple else { unreachable!("CallFunctionExtended: args_tuple must be a Ref") }; let HeapData::Tuple(tuple) = self.heap.get(*id) else { unreachable!("CallFunctionExtended: args_tuple must be a Tuple") }; tuple.as_slice().iter().map(|v| v.clone_with_heap(self)).collect() } /// Builds `ArgValues` with kwargs for `CallFunctionExtended`. /// /// # Panics /// Panics if `kwargs_ref` is not a dict. This indicates a compiler bug since /// the compiler always emits `BuildDict` before `CallFunctionExtended` with kwargs. fn build_args_with_kwargs(&mut self, copied_args: Vec, kwargs_ref: Value) -> Result { let this = self; defer_drop!(kwargs_ref, this); // Extract kwargs dict items let Value::Ref(id) = kwargs_ref else { unreachable!("CallFunctionExtended: kwargs must be a Ref") }; let HeapData::Dict(dict) = this.heap.get(*id) else { unreachable!("CallFunctionExtended: kwargs must be a Dict") }; let copied_kwargs: Vec<(Value, Value)> = dict .iter() .map(|(k, v)| (k.clone_with_heap(this), v.clone_with_heap(this))) .collect(); let kwargs_values = if copied_kwargs.is_empty() { KwargsValues::Empty } else { let kwargs_dict = Dict::from_pairs(copied_kwargs, this)?; KwargsValues::Dict(kwargs_dict) }; Ok( if copied_args.is_empty() && matches!(kwargs_values, KwargsValues::Empty) { ArgValues::Empty } else if copied_args.is_empty() { ArgValues::Kwargs(kwargs_values) } else { ArgValues::ArgsKargs { args: copied_args, kwargs: kwargs_values, } }, ) } /// Builds `ArgValues` from positional args only. fn build_args_positional_only(copied_args: Vec) -> ArgValues { match copied_args.len() { 0 => ArgValues::Empty, 1 => ArgValues::One(copied_args.into_iter().next().unwrap()), 2 => { let mut iter = copied_args.into_iter(); ArgValues::Two(iter.next().unwrap(), iter.next().unwrap()) } _ => ArgValues::ArgsKargs { args: copied_args, kwargs: KwargsValues::Empty, }, } } /// Extracts arguments from a tuple for `CallAttrExtended`. /// /// # Panics /// Panics if `args_tuple` is not a tuple. This indicates a compiler bug since /// the compiler always emits `ListToTuple` before `CallAttrExtended`. fn extract_args_tuple_for_attr(&mut self, args_tuple: &Value) -> Vec { let Value::Ref(id) = args_tuple else { unreachable!("CallAttrExtended: args_tuple must be a Ref") }; let HeapData::Tuple(tuple) = self.heap.get(*id) else { unreachable!("CallAttrExtended: args_tuple must be a Tuple") }; tuple.as_slice().iter().map(|v| v.clone_with_heap(self)).collect() } /// Builds `ArgValues` with kwargs for `CallAttrExtended`. /// /// # Panics /// Panics if `kwargs_ref` is not a dict. This indicates a compiler bug since /// the compiler always emits `BuildDict` before `CallAttrExtended` with kwargs. fn build_args_with_kwargs_for_attr( &mut self, copied_args: Vec, kwargs_ref: Value, ) -> Result { let this = self; defer_drop!(kwargs_ref, this); // Extract kwargs dict items let Value::Ref(id) = kwargs_ref else { unreachable!("CallAttrExtended: kwargs must be a Ref") }; let HeapData::Dict(dict) = this.heap.get(*id) else { unreachable!("CallAttrExtended: kwargs must be a Dict") }; let copied_kwargs: Vec<(Value, Value)> = dict .iter() .map(|(k, v)| (k.clone_with_heap(this.heap), v.clone_with_heap(this.heap))) .collect(); let kwargs_values = if copied_kwargs.is_empty() { KwargsValues::Empty } else { let kwargs_dict = Dict::from_pairs(copied_kwargs, this)?; KwargsValues::Dict(kwargs_dict) }; Ok( if copied_args.is_empty() && matches!(kwargs_values, KwargsValues::Empty) { ArgValues::Empty } else if copied_args.is_empty() { ArgValues::Kwargs(kwargs_values) } else { ArgValues::ArgsKargs { args: copied_args, kwargs: kwargs_values, } }, ) } // ======================================================================== // Frame Setup // ======================================================================== /// Calls a defined function by pushing a new frame or creating a coroutine. /// /// For sync functions: sets up the function's namespace with bound arguments, /// cell variables, and free variables, then pushes a new frame. /// /// For async functions: binds arguments immediately but returns a Coroutine /// instead of pushing a frame. The coroutine stores the pre-bound namespace /// and will be executed when awaited. fn call_def_function( &mut self, func_id: FunctionId, cells: &[HeapId], defaults: &[Value], args: ArgValues, ) -> Result { let func = self.interns.get_function(func_id); if func.is_async { self.create_coroutine(func_id, cells, defaults, args) } else { self.call_sync_function(func_id, cells, defaults, args) } } /// Creates a Coroutine for an async function call. /// /// The coroutine is executed when awaited via Await. fn create_coroutine( &mut self, func_id: FunctionId, cells: &[HeapId], defaults: &[Value], args: ArgValues, ) -> Result { let func = self.interns.get_function(func_id); // 1. Create namespace for the coroutine with bound arguments and captured cells. let namespace = Vec::with_capacity(func.namespace_size); let mut namespace_guard = HeapGuard::new(namespace, self); let (namespace, this) = namespace_guard.as_parts_mut(); // 2. Bind arguments to parameters func.signature.bind(args, defaults, this, func.name, namespace)?; // 3. Create cells for variables captured by nested functions { let param_count = func.signature.total_slots(); for (i, maybe_param_idx) in func.cell_param_indices.iter().enumerate() { let cell_slot = param_count + i; let cell_value = if let Some(param_idx) = maybe_param_idx { namespace[*param_idx].clone_with_heap(this.heap) } else { Value::Undefined }; let cell_id = this.heap.allocate(HeapData::Cell(CellValue(cell_value)))?; namespace.resize_with(cell_slot, || Value::Undefined); namespace.push(Value::Ref(cell_id)); } // 4. Copy captured cells (free vars) into namespace let free_var_start = param_count + func.cell_var_count; for (i, &cell_id) in cells.iter().enumerate() { this.heap.inc_ref(cell_id); let slot = free_var_start + i; namespace.resize_with(slot, || Value::Undefined); namespace.push(Value::Ref(cell_id)); } // 5. Fill remaining slots with Undefined namespace.resize_with(func.namespace_size, || Value::Undefined); } // 6. Create Coroutine on heap let (namespace, this) = namespace_guard.into_parts(); let coroutine = Coroutine::new(func_id, namespace); let coroutine_id = this.heap.allocate(HeapData::Coroutine(coroutine))?; Ok(CallResult::Value(Value::Ref(coroutine_id))) } /// Calls a sync function by pushing a new frame. /// /// Sets up the function's namespace with bound arguments, cell variables, /// and free variables (captured from enclosing scope for closures). /// /// Locals are built directly on the VM stack using a [`StackGuard`] that /// automatically rolls back on error. The frame's `stack_base` points to /// the start of this locals region, and operands are pushed above it. fn call_sync_function( &mut self, func_id: FunctionId, cells: &[HeapId], defaults: &[Value], args: ArgValues, ) -> Result { let call_position = self.current_position(); let stack_base = self.stack.len(); let func = self.interns.get_function(func_id); let namespace_size = func.namespace_size; let locals_count = u16::try_from(namespace_size).expect("function namespace size exceeds u16"); // Track memory for this frame's locals let size = namespace_size * std::mem::size_of::(); self.heap.tracker_mut().on_allocate(|| size)?; // 1. Create namespace for the frame in a temporary vec, will extend to stack later let namespace = Vec::with_capacity(func.namespace_size); let mut namespace_guard = HeapGuard::new(namespace, self); let (namespace, this) = namespace_guard.as_parts_mut(); // 2. Bind arguments to parameters { let bind_result = func.signature.bind(args, defaults, this, func.name, namespace); if let Err(e) = bind_result { this.heap.tracker_mut().on_free(|| size); return Err(e); } } // 3. Create cells for variables captured by nested functions { let param_count = func.signature.total_slots(); for (i, maybe_param_idx) in func.cell_param_indices.iter().enumerate() { let cell_slot = param_count + i; let cell_value = if let Some(param_idx) = maybe_param_idx { namespace[*param_idx].clone_with_heap(this.heap) } else { Value::Undefined }; let cell_id = this.heap.allocate(HeapData::Cell(CellValue(cell_value)))?; namespace.resize_with(cell_slot, || Value::Undefined); namespace.push(Value::Ref(cell_id)); } // 4. Copy captured cells (free vars) into namespace let free_var_start = param_count + func.cell_var_count; for (i, &cell_id) in cells.iter().enumerate() { this.heap.inc_ref(cell_id); let slot = free_var_start + i; namespace.resize_with(slot, || Value::Undefined); namespace.push(Value::Ref(cell_id)); } // 5. Fill remaining slots with Undefined namespace.resize_with(namespace_size, || Value::Undefined); } let code = &func.code; // 6. Commit the guard (no rollback) and push the frame let (namespace, this) = namespace_guard.into_parts(); this.stack.extend(namespace); this.push_frame(CallFrame::new_function( code, stack_base, locals_count, func_id, Some(call_position), ))?; Ok(CallResult::FramePushed) } } ================================================ FILE: crates/monty/src/bytecode/vm/collections.rs ================================================ //! Collection building and unpacking helpers for the VM. use smallvec::SmallVec; use super::VM; use crate::{ defer_drop, defer_drop_mut, exception_private::{ExcType, RunError, SimpleException}, heap::{Heap, HeapData, HeapGuard}, heap_data::HeapDataMut, intern::StringId, resource::ResourceTracker, types::{Dict, List, PyTrait, Set, Slice, Type, allocate_tuple, slice::value_to_option_i64, str::allocate_char}, value::Value, }; impl VM<'_, '_, T> { /// Builds a list from the top n stack values. pub(super) fn build_list(&mut self, count: usize) -> Result<(), RunError> { let items = self.pop_n(count); let list = List::new(items); let heap_id = self.heap.allocate(HeapData::List(list))?; self.push(Value::Ref(heap_id)); Ok(()) } /// Builds a tuple from the top n stack values. /// /// Uses the empty tuple singleton when count is 0, and SmallVec /// optimization for small tuples (≤2 elements). pub(super) fn build_tuple(&mut self, count: usize) -> Result<(), RunError> { let items = self.pop_n(count); let value = allocate_tuple(items.into(), self.heap)?; self.push(value); Ok(()) } /// Builds a dict from the top 2n stack values (key/value pairs). pub(super) fn build_dict(&mut self, count: usize) -> Result<(), RunError> { let items = self.pop_n(count * 2); let mut dict = Dict::new(); // Use into_iter to consume items by value, avoiding clone and proper ownership transfer let mut iter = items.into_iter(); while let (Some(key), Some(value)) = (iter.next(), iter.next()) { dict.set(key, value, self)?; } let heap_id = self.heap.allocate(HeapData::Dict(dict))?; self.push(Value::Ref(heap_id)); Ok(()) } /// Builds a set from the top n stack values. pub(super) fn build_set(&mut self, count: usize) -> Result<(), RunError> { let items = self.pop_n(count); let mut set = Set::new(); for item in items { set.add(item, self)?; } let heap_id = self.heap.allocate(HeapData::Set(set))?; self.push(Value::Ref(heap_id)); Ok(()) } /// Builds a slice object from the top 3 stack values. /// /// Stack: [start, stop, step] -> [slice] /// Each value can be None (for default) or an integer. pub(super) fn build_slice(&mut self) -> Result<(), RunError> { let this = self; let step_val = this.pop(); defer_drop!(step_val, this); let stop_val = this.pop(); defer_drop!(stop_val, this); let start_val = this.pop(); defer_drop!(start_val, this); let start = value_to_option_i64(start_val)?; let stop = value_to_option_i64(stop_val)?; let step = value_to_option_i64(step_val)?; let slice = Slice::new(start, stop, step); let heap_id = this.heap.allocate(HeapData::Slice(slice))?; this.push(Value::Ref(heap_id)); Ok(()) } /// Extends a list with items from an iterable, for PEP 448 `*expr` literal unpacking. /// /// Stack: [list, iterable] -> [list] /// Pops the iterable, extends the list in place, leaves list on stack. /// /// Raises `TypeError("Value after * must be an iterable, not {type}")` for non-iterables, /// matching CPython's message for list/tuple literal unpacking (`[*x]`, `(*x,)`). /// /// Uses `HeapGuard` for `list_ref` because it is pushed back on success, /// and `defer_drop!` for `iterable` because it is always dropped. pub(super) fn list_extend(&mut self) -> Result<(), RunError> { let this = self; let iterable = this.pop(); defer_drop!(iterable, this); // HeapGuard for list_ref: pushed back on success via into_parts, dropped on error let mut list_ref_guard = HeapGuard::new(this.pop(), this); let (list_ref, this) = list_ref_guard.as_parts(); let copied_items: Vec = match iterable { Value::Ref(id) => match this.heap.get(*id) { HeapData::List(list) => list.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Tuple(tuple) => tuple.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Set(set) => set.storage().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Dict(dict) => dict.iter().map(|(k, _)| k.clone_with_heap(this.heap)).collect(), HeapData::Str(s) => { // Need to allocate strings for each character let chars: Vec = s.as_str().chars().collect(); let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } _ => { let type_ = iterable.py_type(this.heap); return Err(ExcType::type_error_value_after_star(type_)); } }, Value::InternString(id) => { let s = this.interns.get_str(*id); let chars: Vec = s.chars().collect(); let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } _ => { let type_ = iterable.py_type(this.heap); return Err(ExcType::type_error_value_after_star(type_)); } }; // Check if any copied items are refs (for updating contains_refs) let has_refs = copied_items.iter().any(|v| matches!(v, Value::Ref(_))); // Extend the list if let Value::Ref(id) = list_ref && let HeapDataMut::List(list) = this.heap.get_mut(*id) { // Update contains_refs before extending if has_refs { list.set_contains_refs(); } list.as_vec_mut().extend(copied_items); } // Mark potential cycle after the mutable borrow ends if has_refs { this.heap.mark_potential_cycle(); } // Push list_ref back on the stack (don't drop it) let (list_ref, this) = list_ref_guard.into_parts(); this.push(list_ref); Ok(()) } /// Converts a list to a tuple. /// /// Stack: [list] -> [tuple] pub(super) fn list_to_tuple(&mut self) -> Result<(), RunError> { let this = self; let list_ref = this.pop(); defer_drop!(list_ref, this); let copied_items: SmallVec<_> = if let Value::Ref(id) = list_ref { if let HeapData::List(list) = this.heap.get(*id) { list.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect() } else { return Err(RunError::internal("ListToTuple: expected list")); } } else { return Err(RunError::internal("ListToTuple: expected list ref")); }; // list_ref is dropped by the guard at scope exit; allocate the tuple let value = allocate_tuple(copied_items, this.heap)?; this.push(value); Ok(()) } /// Merges a mapping into a dict for **kwargs unpacking. /// /// Stack: [dict, mapping] -> [dict] /// Validates that mapping is a dict and that keys are strings. /// /// Uses `defer_drop!` for `mapping` (always dropped) and `HeapGuard` for /// `dict_ref` (pushed back on success, dropped on error). pub(super) fn dict_merge(&mut self, func_name_id: u16) -> Result<(), RunError> { let this = self; let mapping = this.pop(); defer_drop!(mapping, this); // HeapGuard for dict_ref: pushed back on success via into_parts, dropped on error let mut dict_ref_guard = HeapGuard::new(this.pop(), this); let (dict_ref, this) = dict_ref_guard.as_parts(); // Get function name for error messages let func_name = if func_name_id == 0xFFFF { "".to_string() } else { this.interns.get_str(StringId::from_index(func_name_id)).to_string() }; // Check that mapping is a dict (Ref pointing to Dict) and clone key-value pairs let copied_items: Vec<(Value, Value)> = if let Value::Ref(id) = mapping { if let HeapData::Dict(dict) = this.heap.get(*id) { dict.iter() .map(|(k, v)| (k.clone_with_heap(this.heap), v.clone_with_heap(this.heap))) .collect() } else { let type_name = mapping.py_type(this.heap).to_string(); return Err(ExcType::type_error_kwargs_not_mapping(&func_name, &type_name)); } } else { let type_name = mapping.py_type(this.heap).to_string(); return Err(ExcType::type_error_kwargs_not_mapping(&func_name, &type_name)); }; // Merge into the dict, validating string keys let dict_id = if let Value::Ref(id) = dict_ref { *id } else { return Err(RunError::internal("DictMerge: expected dict ref")); }; for (key, value) in copied_items { // Validate key is a string (InternString or heap-allocated Str) let is_string = match &key { Value::InternString(_) => true, Value::Ref(id) => matches!(this.heap.get(*id), HeapData::Str(_)), _ => false, }; if !is_string { key.drop_with_heap(this); value.drop_with_heap(this); return Err(ExcType::type_error_kwargs_nonstring_key()); } // Get the string key for error messages (needed before moving key into closure) let key_str = match &key { Value::InternString(id) => this.interns.get_str(*id).to_string(), Value::Ref(id) => { if let HeapData::Str(s) = this.heap.get(*id) { s.as_str().to_string() } else { "".to_string() } } _ => "".to_string(), }; // Use with_entry_mut to avoid borrow conflict: takes data out temporarily let result = Heap::with_entry_mut(this, dict_id, |this, data| { if let HeapDataMut::Dict(dict) = data { dict.set(key, value, this) } else { Err(RunError::internal("DictMerge: entry is not a Dict")) } }); // If set returned Some, the key already existed (duplicate kwarg) if let Some(old_value) = result? { old_value.drop_with_heap(this); return Err(ExcType::type_error_multiple_values(&func_name, &key_str)); } } // Push dict_ref back on the stack (don't drop it) let (dict_ref, this) = dict_ref_guard.into_parts(); this.push(dict_ref); Ok(()) } // ======================================================================== // PEP 448 Literal Building // ======================================================================== /// Silently merges a mapping into the dict literal at `depth` on the stack. /// /// Used for `{**x, ...}` dict literals where later keys silently overwrite /// earlier ones (unlike [`dict_merge`] which raises `TypeError` on duplicate keys /// and is used for function-call `**kwargs`). /// /// Stack (depth = 0): `[..., dict, mapping]` → `[..., dict]` /// /// # Errors /// /// Returns `TypeError: '{type}' object is not a mapping` if the TOS is not a dict. pub(super) fn dict_update(&mut self, depth: usize) -> Result<(), RunError> { let this = self; let mapping = this.pop(); defer_drop!(mapping, this); // Clone all key/value pairs out of the mapping before mutating the target dict let copied_items: Vec<(Value, Value)> = if let Value::Ref(id) = mapping { if let HeapData::Dict(dict) = this.heap.get(*id) { dict.iter() .map(|(k, v)| (k.clone_with_heap(this.heap), v.clone_with_heap(this.heap))) .collect() } else { let type_ = mapping.py_type(this.heap); return Err(ExcType::type_error_not_mapping(type_)); } } else { let type_ = mapping.py_type(this.heap); return Err(ExcType::type_error_not_mapping(type_)); }; // The target dict sits at `depth` positions below TOS (which is now gone after pop) let stack_len = this.stack.len(); let dict_pos = stack_len - 1 - depth; // SAFETY: the compiler always emits BuildDict before DictUpdate, so the // target is always a Value::Ref. This is a VM invariant: reaching this else // arm means a compiler bug. let Value::Ref(dict_id) = this.stack[dict_pos] else { unreachable!("DictUpdate: target is always a Ref — compiler invariant") }; for (key, value) in copied_items { let old = Heap::with_entry_mut(this, dict_id, |this, data| { if let HeapDataMut::Dict(dict) = data { dict.set(key, value, this) } else { // SAFETY: dict_id was obtained from a Value::Ref on the stack that // was created by BuildDict; it always refers to a HeapData::Dict. unreachable!("DictUpdate: heap entry is always a Dict — compiler invariant") } })?; // Silently drop any old value — PEP 448 dict literals allow duplicate keys if let Some(old_val) = old { old_val.drop_with_heap(this.heap); } } Ok(()) } /// Extends a set literal with all items from an iterable. /// /// Used for `{*x, ...}` set literals (PEP 448). Follows the same item-copying /// pattern as [`list_extend`]; raises `TypeError` for non-iterable sources. /// /// Stack (depth = 0): `[..., set, iterable]` → `[..., set]` /// /// # Errors /// /// Returns `TypeError: '{type}' object is not iterable` if TOS is not iterable. pub(super) fn set_extend(&mut self, depth: usize) -> Result<(), RunError> { let this = self; let iterable = this.pop(); defer_drop!(iterable, this); // Clone items from the iterable (same sources as list_extend) let copied_items: Vec = match iterable { Value::Ref(id) => match this.heap.get(*id) { HeapData::List(list) => list.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Tuple(tuple) => tuple.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Set(set) => set.storage().iter().map(|v| v.clone_with_heap(this.heap)).collect(), HeapData::Dict(dict) => dict.iter().map(|(k, _)| k.clone_with_heap(this.heap)).collect(), HeapData::Str(s) => { let chars: Vec = s.as_str().chars().collect(); let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } _ => { let type_ = iterable.py_type(this.heap); return Err(ExcType::type_error_not_iterable(type_)); } }, Value::InternString(id) => { let s = this.interns.get_str(*id); let chars: Vec = s.chars().collect(); let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } _ => { let type_ = iterable.py_type(this.heap); return Err(ExcType::type_error_not_iterable(type_)); } }; // The target set sits at `depth` positions below TOS (which is now gone after pop) let stack_len = this.stack.len(); let set_pos = stack_len - 1 - depth; // SAFETY: the compiler always emits BuildSet before SetExtend, so the // target is always a Value::Ref. This is a VM invariant: reaching this else // arm means a compiler bug. let Value::Ref(set_id) = this.stack[set_pos] else { unreachable!("SetExtend: target is always a Ref — compiler invariant") }; for item in copied_items { Heap::with_entry_mut(this, set_id, |this, data| { if let HeapDataMut::Set(set) = data { set.add(item, this) } else { // SAFETY: set_id was obtained from a Value::Ref on the stack that // was created by BuildSet; it always refers to a HeapData::Set. unreachable!("SetExtend: heap entry is always a Set — compiler invariant") } })?; } Ok(()) } // ======================================================================== // Comprehension Building // ======================================================================== /// Appends TOS to list for comprehension. /// /// Stack: [..., list, iter1, ..., iterN, value] -> [..., list, iter1, ..., iterN] /// The `depth` parameter is the number of iterators between the list and the value. /// List is at stack position: len - 2 - depth (0-indexed from bottom). pub(super) fn list_append(&mut self, depth: usize) -> Result<(), RunError> { let value = self.pop(); let stack_len = self.stack.len(); let list_pos = stack_len - 1 - depth; // Get the list reference let Value::Ref(list_id) = self.stack[list_pos] else { value.drop_with_heap(self); return Err(RunError::internal("ListAppend: expected list ref on stack")); }; // Append to the list using with_entry_mut to handle proper contains_refs tracking Heap::with_entry_mut(self, list_id, |this, data| { if let HeapDataMut::List(list) = data { list.append(this.heap, value); Ok(()) } else { value.drop_with_heap(this); Err(RunError::internal("ListAppend: expected list on heap")) } }) } /// Adds TOS to set for comprehension. /// /// Stack: [..., set, iter1, ..., iterN, value] -> [..., set, iter1, ..., iterN] /// The `depth` parameter is the number of iterators between the set and the value. /// May raise TypeError if value is unhashable. pub(super) fn set_add(&mut self, depth: usize) -> Result<(), RunError> { let value = self.pop(); let stack_len = self.stack.len(); let set_pos = stack_len - 1 - depth; // Get the set reference let Value::Ref(set_id) = self.stack[set_pos] else { value.drop_with_heap(self); return Err(RunError::internal("SetAdd: expected set ref on stack")); }; // Add to the set using with_entry_mut to avoid borrow conflicts Heap::with_entry_mut(self, set_id, |this, data| { if let HeapDataMut::Set(set) = data { set.add(value, this) } else { value.drop_with_heap(this); Err(RunError::internal("SetAdd: expected set on heap")) } })?; Ok(()) } /// Sets dict[key] = value for comprehension. /// /// Stack: [..., dict, iter1, ..., iterN, key, value] -> [..., dict, iter1, ..., iterN] /// The `depth` parameter is the number of iterators between the dict and the key-value pair. /// May raise TypeError if key is unhashable. pub(super) fn dict_set_item(&mut self, depth: usize) -> Result<(), RunError> { let value = self.pop(); let key = self.pop(); let stack_len = self.stack.len(); let dict_pos = stack_len - 1 - depth; // Get the dict reference let Value::Ref(dict_id) = self.stack[dict_pos] else { key.drop_with_heap(self); value.drop_with_heap(self); return Err(RunError::internal("DictSetItem: expected dict ref on stack")); }; // Set item in the dict using with_entry_mut to avoid borrow conflicts let old_value = Heap::with_entry_mut(self, dict_id, |this, data| { if let HeapDataMut::Dict(dict) = data { dict.set(key, value, this) } else { key.drop_with_heap(this); value.drop_with_heap(this); Err(RunError::internal("DictSetItem: expected dict on heap")) } })?; // Drop old value if key already existed if let Some(old) = old_value { old.drop_with_heap(self); } Ok(()) } // ======================================================================== // Unpacking // ======================================================================== /// Unpacks a sequence into n values on the stack. /// /// Supports lists, tuples, and strings. For strings, each character becomes /// a separate single-character string. pub(super) fn unpack_sequence(&mut self, count: usize) -> Result<(), RunError> { let this = self; let value = this.pop(); defer_drop!(value, this); // Copy values without incrementing refcounts (avoids borrow conflict with heap.get). // For strings, we allocate new string values for each character. let items: Vec = match value { // Interned strings (string literals stored inline, not on heap) Value::InternString(string_id) => { let s = this.interns.get_str(*string_id); let str_len = s.chars().count(); if str_len != count { return Err(unpack_size_error(count, str_len)); } // Allocate each character as a new string let mut items = Vec::with_capacity(str_len); for c in s.chars() { items.push(allocate_char(c, this.heap)?); } // Push items in reverse order so first item is on top for item in items.into_iter().rev() { this.push(item); } return Ok(()); } // Heap-allocated sequences Value::Ref(heap_id) => { match this.heap.get(*heap_id) { HeapData::List(list) => { let list_len = list.len(); if list_len != count { return Err(unpack_size_error(count, list_len)); } list.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect() } HeapData::Tuple(tuple) => { let tuple_len = tuple.as_slice().len(); if tuple_len != count { return Err(unpack_size_error(count, tuple_len)); } tuple.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect() } HeapData::Str(s) => { let str_len = s.as_str().chars().count(); if str_len != count { return Err(unpack_size_error(count, str_len)); } let chars: Vec = s.as_str().chars().collect(); let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } // Push items in reverse order so first item is on top for item in items.into_iter().rev() { this.push(item); } return Ok(()); } other => { let type_name = other.py_type(this.heap); return Err(unpack_type_error(type_name)); } } } // Non-iterable types _ => { let type_name = value.py_type(this.heap); return Err(unpack_type_error(type_name)); } }; // Push items in reverse order so first item is on top for item in items.into_iter().rev() { this.push(item); } Ok(()) } /// Unpacks a sequence with a starred target. /// /// `before` is the number of targets before the star, `after` is the number after. /// The starred target collects all middle items into a list. /// /// For example, `first, *rest, last = [1, 2, 3, 4, 5]` has before=1, after=1. /// After execution, the stack has: first (top), rest_list, last. pub(super) fn unpack_ex(&mut self, before: usize, after: usize) -> Result<(), RunError> { let this = self; let value = this.pop(); defer_drop_mut!(value, this); let min_items = before + after; // Extract items from the sequence let items: Vec = match value { Value::InternString(string_id) => { let s = this.interns.get_str(*string_id); // Collect chars once to avoid double iteration over UTF-8 data let chars: Vec = s.chars().collect(); if chars.len() < min_items { return Err(unpack_ex_too_few_error(min_items, chars.len())); } // Allocate each character as a new string let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } Value::Ref(heap_id) => { match this.heap.get(*heap_id) { HeapData::List(list) => { let list_len = list.len(); if list_len < min_items { return Err(unpack_ex_too_few_error(min_items, list_len)); } list.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect() } HeapData::Tuple(tuple) => { let tuple_len = tuple.as_slice().len(); if tuple_len < min_items { return Err(unpack_ex_too_few_error(min_items, tuple_len)); } tuple.as_slice().iter().map(|v| v.clone_with_heap(this.heap)).collect() } HeapData::Str(s) => { // Collect chars once to avoid double iteration over UTF-8 data let chars: Vec = s.as_str().chars().collect(); if chars.len() < min_items { return Err(unpack_ex_too_few_error(min_items, chars.len())); } let mut items = Vec::with_capacity(chars.len()); for c in chars { items.push(allocate_char(c, this.heap)?); } items } other => { let type_name = other.py_type(this.heap); return Err(unpack_type_error(type_name)); } } } _ => { let type_name = value.py_type(this.heap); return Err(unpack_type_error(type_name)); } }; this.push_unpack_ex_results(items, before, after) } /// Helper to push unpacked items with starred target onto the stack. /// /// Takes a slice of items and creates the middle list. fn push_unpack_ex_results(&mut self, items: Vec, before: usize, after: usize) -> Result<(), RunError> { let this = self; defer_drop_mut!(items, this); // Items get pushed onto the stack backwards, so a lot of .rev() calls for item in items.drain(items.len() - after..).rev() { this.push(item); } // Middle items as a list (starred target) let middle_list: Vec = items.drain(before..).collect(); let list_id = this.heap.allocate(HeapData::List(List::new(middle_list)))?; this.push(Value::Ref(list_id)); // Before items for item in items.drain(..).rev() { this.push(item); } Ok(()) } } /// Creates the ValueError for star unpacking when there are too few values. fn unpack_ex_too_few_error(min_needed: usize, actual: usize) -> RunError { let message = format!("not enough values to unpack (expected at least {min_needed}, got {actual})"); SimpleException::new_msg(ExcType::ValueError, message).into() } /// Creates the appropriate ValueError for unpacking size mismatches. /// /// Python uses different messages depending on whether there are too few or too many values: /// - Too few: "not enough values to unpack (expected X, got Y)" /// - Too many: "too many values to unpack (expected X, got Y)" fn unpack_size_error(expected: usize, actual: usize) -> RunError { let message = if actual < expected { format!("not enough values to unpack (expected {expected}, got {actual})") } else { format!("too many values to unpack (expected {expected}, got {actual})") }; SimpleException::new_msg(ExcType::ValueError, message).into() } /// Creates a TypeError for attempting to unpack a non-iterable type. fn unpack_type_error(type_name: Type) -> RunError { SimpleException::new_msg( ExcType::TypeError, format!("cannot unpack non-iterable {type_name} object"), ) .into() } ================================================ FILE: crates/monty/src/bytecode/vm/compare.rs ================================================ //! Comparison operation helpers for the VM. use super::VM; use crate::{ defer_drop, exception_private::{ExcType, RunError}, resource::ResourceTracker, types::{LongInt, PyTrait}, value::Value, }; impl VM<'_, '_, T> { /// Equality comparison. pub(super) fn compare_eq(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); let result = lhs.py_eq(rhs, this)?; this.push(Value::Bool(result)); Ok(()) } /// Inequality comparison. pub(super) fn compare_ne(&mut self) -> Result<(), RunError> { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); let result = !lhs.py_eq(rhs, this)?; this.push(Value::Bool(result)); Ok(()) } /// Ordering comparison with a predicate. pub(super) fn compare_ord(&mut self, check: F) -> Result<(), RunError> where F: FnOnce(std::cmp::Ordering) -> bool, { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); let result = lhs.py_cmp(rhs, this)?.is_some_and(check); this.push(Value::Bool(result)); Ok(()) } /// Identity comparison (is/is not). /// /// Compares identity using `Value::is()` which compares IDs. /// /// Identity is determined by `Value::id()` which uses: /// - Fixed IDs for singletons (None, True, False, Ellipsis) /// - Interned string/bytes index for InternString/InternBytes /// - HeapId for heap-allocated values (Ref) /// - Value-based hashing for immediate types (Int, Float, Function, etc.) pub(super) fn compare_is(&mut self, negate: bool) { let this = self; let rhs = this.pop(); defer_drop!(rhs, this); let lhs = this.pop(); defer_drop!(lhs, this); let result = lhs.is(rhs); this.push(Value::Bool(if negate { !result } else { result })); } /// Membership test (in/not in). pub(super) fn compare_in(&mut self, negate: bool) -> Result<(), RunError> { let this = self; let container = this.pop(); // container (rhs) defer_drop!(container, this); let item = this.pop(); // item to find (lhs) defer_drop!(item, this); let contained = container.py_contains(item, this)?; this.push(Value::Bool(if negate { !contained } else { contained })); Ok(()) } /// Modulo equality comparison: a % b == k /// /// This is an optimization for patterns like `x % 3 == 0`. The constant k /// is provided by the caller (fetched from the constant pool using the /// cached code reference in the run loop). /// /// Uses a fast path for Int/Float types via `py_mod_eq`, and falls back to /// computing `py_mod` then comparing with `py_eq` for other types (e.g., LongInt). pub(super) fn compare_mod_eq(&mut self, k: &Value) -> Result<(), RunError> { let this = self; let rhs = this.pop(); // divisor (b) defer_drop!(rhs, this); let lhs = this.pop(); // dividend (a) defer_drop!(lhs, this); // Try fast path for Int/Float types let mod_result = match k { Value::Int(k_val) => lhs.py_mod_eq(rhs, *k_val), _ => None, }; if let Some(is_equal) = mod_result { // Fast path succeeded this.push(Value::Bool(is_equal)); Ok(()) } else { // Fallback: compute py_mod then compare with py_eq // This handles LongInt and other Ref types let mod_value = lhs.py_mod(rhs, this); match mod_value { Ok(Some(v)) => { defer_drop!(v, this); // Handle InternLongInt by converting to heap LongInt for comparison let k_value = if let Value::InternLongInt(id) = k { let bi = this.interns.get_long_int(*id).clone(); LongInt::new(bi).into_value(this.heap)? } else { // k is from the constant pool and is always an immediate value k.clone_immediate() }; defer_drop!(k_value, this); let is_equal = v.py_eq(k_value, this)?; this.push(Value::Bool(is_equal)); Ok(()) } Ok(None) => Err(ExcType::type_error("unsupported operand type(s) for %")), Err(e) => Err(e), } } } } ================================================ FILE: crates/monty/src/bytecode/vm/exceptions.rs ================================================ //! Exception handling helpers for the VM. use super::VM; use crate::{ builtins::Builtins, defer_drop, exception_private::{ExcType, ExceptionRaise, RawStackFrame, RunError, SimpleException}, heap::{HeapData, HeapGuard}, intern::{StaticStrings, StringId}, resource::ResourceTracker, types::{PyTrait, Type}, value::Value, }; impl VM<'_, '_, T> { /// Returns the current frame's name for traceback generation. /// /// Returns the function name for user-defined functions, or `` for /// module-level code. fn current_frame_name(&self) -> StringId { let frame = self.current_frame(); match frame.function_id { Some(func_id) => self.interns.get_function(func_id).name.name_id, None => StaticStrings::Module.into(), } } /// Creates a `RawStackFrame` for the current execution point. /// /// Used when raising exceptions to capture traceback information. fn make_stack_frame(&self) -> RawStackFrame { RawStackFrame::new(self.current_position(), self.current_frame_name(), None) } /// Attaches initial frame information to an error if it doesn't have any. /// /// Only sets the innermost frame if the exception doesn't already have one. /// Caller frames are added separately during exception propagation. /// /// Uses the `hide_caret` flag from `ExceptionRaise` to determine whether to show /// the caret marker in the traceback. This flag is set by error creators that know /// whether CPython would show a caret for this specific error type. fn attach_frame_to_error(&self, error: RunError) -> RunError { match error { RunError::Exc(mut exc) => { if exc.frame.is_none() { let mut frame = self.make_stack_frame(); // Use the hide_caret flag from the error (set by error creators) frame.hide_caret = exc.hide_caret; exc.frame = Some(frame); } RunError::Exc(exc) } RunError::UncatchableExc(mut exc) => { if exc.frame.is_none() { let mut frame = self.make_stack_frame(); frame.hide_caret = exc.hide_caret; exc.frame = Some(frame); } RunError::UncatchableExc(exc) } RunError::Internal(_) => error, } } /// Creates a RunError from a Value that should be an exception. /// /// Takes ownership of the exception value and drops it properly. /// The `is_raise` flag indicates if this is from a `raise` statement (hide caret). pub(super) fn make_exception(&mut self, exc_value: Value, is_raise: bool) -> RunError { let this = self; defer_drop!(exc_value, this); let simple_exc = match exc_value { // Exception instance on heap Value::Ref(heap_id) => { if let HeapData::Exception(exc) = this.heap.get(*heap_id) { // Clone the exception (guard handles cleanup at scope exit) exc.clone() } else { // Not an exception type SimpleException::new_msg(ExcType::TypeError, "exceptions must derive from BaseException") } } // Exception type (e.g., `raise ValueError` instead of `raise ValueError()`) // Instantiate with no message Value::Builtin(Builtins::ExcType(exc_type)) => SimpleException::new_none(*exc_type), // Invalid exception value _ => SimpleException::new_msg(ExcType::TypeError, "exceptions must derive from BaseException"), }; // Create frame with appropriate hide_caret setting let frame = if is_raise { RawStackFrame::from_raise(this.current_position(), this.current_frame_name()) } else { this.make_stack_frame() }; RunError::Exc(ExceptionRaise { exc: simple_exc, frame: Some(frame), hide_caret: false, }) } /// Handles an exception by searching for a handler in the exception table. /// /// Returns: /// - `Some(VMResult)` if the exception was not caught (should return from run loop) /// - `None` if the exception was caught (continue execution) /// /// When an exception is caught: /// 1. Unwinds the stack to the handler's expected depth /// 2. Pushes the exception value onto the stack /// 3. Sets `current_exception` for bare `raise` /// 4. Jumps to the handler code pub(super) fn handle_exception(&mut self, mut error: RunError) -> Option { // Ensure exception has initial frame info error = self.attach_frame_to_error(error); // For uncatchable exceptions (ResourceError like RecursionError), // we still need to unwind the stack to collect all frames for the traceback if matches!(error, RunError::UncatchableExc(_) | RunError::Internal(_)) { return Some(self.unwind_for_traceback(error)); } // Only catchable exceptions can be handled let exc_info = match &error { RunError::Exc(exc) => exc.clone(), RunError::UncatchableExc(_) | RunError::Internal(_) => unreachable!(), }; // Create exception value to push on stack let exc_value = self.create_exception_value(&exc_info); let exc_value = match exc_value { Ok(v) => v, Err(e) => return Some(e), }; // Use HeapGuard because exc_value is conditionally consumed (pushed onto // exception_stack when handler found) or dropped (when no handler found) let mut exc_guard = HeapGuard::new(exc_value, self); // Search for handler in current and outer frames loop { let (exc_value, this) = exc_guard.as_parts(); let frame = this.current_frame(); let ip = u32::try_from(this.instruction_ip).expect("instruction IP exceeds u32"); // Search exception table for a handler covering this IP if let Some(entry) = frame.code.find_exception_handler(ip) { // Found a handler! Unwind stack and jump to it. let handler_offset = usize::try_from(entry.handler()).expect("handler offset exceeds usize"); let target_stack_depth = frame.stack_base + frame.locals_count as usize + entry.stack_depth() as usize; // Unwind stack to target depth (drop excess values) while this.stack.len() > target_stack_depth { let value = this.stack.pop().unwrap(); value.drop_with_heap(this); } // Push exception value onto stack (handler expects it) let exc_for_stack = exc_value.clone_with_heap(this.heap); this.push(exc_for_stack); // Reclaim exc_value from guard - it's being pushed onto exception_stack let (exc_value, this) = exc_guard.into_parts(); // Push exception onto the exception_stack for bare raise // This allows nested except handlers to restore outer exception context this.exception_stack.push(exc_value); // Jump to handler this.current_frame_mut().ip = handler_offset; return None; // Continue execution at handler } // No handler in this frame - pop frame and try outer if this.frames.len() <= 1 { // No more frames - exception is unhandled let is_spawned = this.is_spawned_task(); // Drop exc_value before potentially switching tasks drop(exc_guard); // For spawned tasks, fail the task instead of propagating if is_spawned { match self.handle_task_failure(error) { Ok(()) => { // Switched to next task - continue execution return None; } Err(waiter_error) => { // Switched to waiter - handle error in waiter's context return self.handle_exception(waiter_error); } } } return Some(error); } // Get the call site position before popping frame // This is where the caller invoked the function that's failing let call_position = this.current_frame().call_position; // Pop this frame if this.pop_frame() { // The frame indicated evaluation should stop - e.g. inside `evaluate_function` - return the error // now to stop unwinding. return Some(error); } // Add caller frame info to traceback (if we have call position) if let Some(pos) = call_position { let frame_name = this.current_frame_name(); match &mut error { RunError::Exc(exc) => exc.add_caller_frame(pos, frame_name), RunError::UncatchableExc(exc) => exc.add_caller_frame(pos, frame_name), RunError::Internal(_) => {} } } } } /// Unwinds the call stack to collect all frames for a traceback. /// /// Used for uncatchable exceptions (like RecursionError) that can't be handled /// but still need a complete traceback showing all active call frames. fn unwind_for_traceback(&mut self, mut error: RunError) -> RunError { // Pop frames and add caller frame info to the traceback while self.frames.len() > 1 { // Get the call site position before popping frame let call_position = self.current_frame().call_position; // Pop this frame (cleans up namespace, etc.) self.pop_frame(); // Add caller frame info to traceback if let Some(pos) = call_position { let frame_name = self.current_frame_name(); match &mut error { RunError::Exc(exc) => exc.add_caller_frame(pos, frame_name), RunError::UncatchableExc(exc) => exc.add_caller_frame(pos, frame_name), RunError::Internal(_) => {} } } } error } /// Creates an exception Value from exception info. /// /// Allocates an Exception on the heap and returns a Value::Ref to it. fn create_exception_value(&mut self, exc: &ExceptionRaise) -> Result { let exception = exc.exc.clone(); let heap_id = self.heap.allocate(HeapData::Exception(exception))?; Ok(Value::Ref(heap_id)) } /// Checks if an exception matches an exception type for except clause matching. /// /// Validates that `exc_type` is a valid exception type (ExcType or tuple of ExcTypes). /// Returns `Ok(true)` if exception matches, `Ok(false)` if not, or `Err` if exc_type is invalid. pub(super) fn check_exc_match(&self, exception: &Value, exc_type: &Value) -> Result { let exc_type_enum = exception.py_type(self.heap); self.check_exc_match_inner(exc_type_enum, exc_type) } /// Inner recursive helper for check_exc_match that handles tuples. fn check_exc_match_inner(&self, exc_type_enum: Type, exc_type: &Value) -> Result { match exc_type { // Valid exception type Value::Builtin(Builtins::ExcType(handler_type)) => { // Check if exception is an instance of handler_type Ok(matches!(exc_type_enum, Type::Exception(et) if et.is_subclass_of(*handler_type))) } // Tuple of exception types Value::Ref(id) => { if let HeapData::Tuple(tuple) = self.heap.get(*id) { for v in tuple.as_slice() { if self.check_exc_match_inner(exc_type_enum, v)? { return Ok(true); } } Ok(false) } else { // Not a tuple - invalid exception type Err(ExcType::except_invalid_type_error()) } } // Any other type is invalid for except clause _ => Err(ExcType::except_invalid_type_error()), } } } ================================================ FILE: crates/monty/src/bytecode/vm/format.rs ================================================ //! F-string and value formatting helpers for the VM. use super::VM; use crate::{ defer_drop, exception_private::{ExcType, RunError, SimpleException}, fstring::{ParsedFormatSpec, ascii_escape, decode_format_spec, format_string, format_with_spec}, resource::{ResourceTracker, check_repeat_size}, types::{PyTrait, str::allocate_string}, value::Value, }; impl VM<'_, '_, T> { /// Builds an f-string by concatenating n string parts from the stack. pub(super) fn build_fstring(&mut self, count: usize) -> Result<(), RunError> { let parts = self.pop_n(count); let mut result = String::new(); for part in parts { // Each part should be a string (interned or heap-allocated) let part_str = part.py_str(self); result.push_str(&part_str); part.drop_with_heap(self); } let value = allocate_string(result, self.heap)?; self.push(value); Ok(()) } /// Formats a value for f-string interpolation. /// /// Flags encoding: /// - bits 0-1: conversion (0=none, 1=str, 2=repr, 3=ascii) /// - bit 2: has format spec on stack /// /// Python f-string formatting order: /// 1. Apply format spec to original value (type-specific formatting) /// 2. Apply conversion flag to the result /// /// However, conversion flags like !s, !r, !a are applied BEFORE formatting /// if the value would be repr'd. The key insight is: /// - No conversion: format the original value type /// - !s conversion: convert to str first, then format as string /// - !r conversion: convert to repr first, then format as string /// - !a conversion: convert to ascii repr first, then format as string pub(super) fn format_value(&mut self, flags: u8) -> Result<(), RunError> { let this = self; let conversion = flags & 0x03; let has_format_spec = (flags & 0x04) != 0; // Pop format spec if present (pushed before value, so popped after) let format_spec = if has_format_spec { Some(this.pop()) } else { None }; let value = this.pop(); defer_drop!(value, this); // Format with spec applied to original value type, or convert and format as string let formatted = if let Some(spec_value) = format_spec { defer_drop!(spec_value, this); let spec = this.get_format_spec(spec_value, value)?; // Pre-check: reject format specs with huge width before pad_string // allocates an untracked Rust String. check_repeat_size(spec.width, spec.fill.len_utf8(), this.heap.tracker())?; match conversion { // No conversion - format original value 0 => format_with_spec(value, &spec, this)?, // !s - convert to str, format as string 1 => { let s = value.py_str(this); format_string(&s, &spec)? } // !r - convert to repr, format as string 2 => { let s = value.py_repr(this); format_string(&s, &spec)? } // !a - convert to ascii, format as string 3 => { let s = ascii_escape(&value.py_repr(this)); format_string(&s, &spec)? } _ => format_with_spec(value, &spec, this)?, } } else { // No format spec - just convert based on conversion flag match conversion { 0 => value.py_str(this).into_owned(), 1 => value.py_str(this).into_owned(), 2 => value.py_repr(this).into_owned(), 3 => ascii_escape(&value.py_repr(this)), _ => value.py_str(this).into_owned(), } }; let result = allocate_string(formatted, this.heap)?; this.push(result); Ok(()) } /// Gets a ParsedFormatSpec from a format spec value. /// /// The `value_for_error` parameter is used to include the value type in error messages. /// Uses lazy type capture: only calls `py_type()` in error paths. fn get_format_spec(&self, spec_value: &Value, value_for_error: &Value) -> Result { match spec_value { Value::Int(n) if *n < 0 => { // Decode the encoded format spec; n < 0 ensures (-n - 1) >= 0 let encoded = u64::try_from((-*n) - 1).expect("format spec encoding validated non-negative"); Ok(decode_format_spec(encoded)) } _ => { // Dynamic format spec - parse the string let spec_str = spec_value.py_str(self); spec_str.parse::().map_err(|invalid| { // Only fetch type in error path let value_type = value_for_error.py_type(self.heap); RunError::Exc( SimpleException::new_msg( ExcType::ValueError, format!("Invalid format specifier '{invalid}' for object of type '{value_type}'"), ) .into(), ) }) } } } } ================================================ FILE: crates/monty/src/bytecode/vm/mod.rs ================================================ //! Bytecode virtual machine for executing compiled Python code. //! //! The VM uses a stack-based execution model with an operand stack for computation //! and a call stack for function frames. Each frame owns its instruction pointer (IP). mod async_exec; mod attr; mod binary; mod call; mod collections; mod compare; mod exceptions; mod format; mod scheduler; use std::cmp::Ordering; pub(crate) use call::CallResult; use scheduler::Scheduler; use crate::{ MontyObject, args::ArgValues, asyncio::{CallId, TaskId}, bytecode::{code::Code, op::Opcode}, exception_private::{ExcType, RunError, RunResult, SimpleException}, heap::{ContainsHeap, DropWithHeap, Heap, HeapData, HeapGuard, HeapId}, heap_data::{Closure, FunctionDefaults, HeapDataMut}, intern::{FunctionId, Interns, StringId}, io::PrintWriter, modules::BuiltinModule, os::OsFunction, parse::CodeRange, resource::ResourceTracker, types::{LongInt, MontyIter, PyTrait, iter::advance_on_heap}, value::{BitwiseOp, EitherStr, Value}, }; /// Result of executing Await opcode. /// /// Indicates what the VM should do after awaiting a value: /// - `ValueReady`: the awaited value resolved immediately, push it /// - `FramePushed`: a new frame was pushed for coroutine execution /// - `Yield`: all tasks blocked, yield to caller with pending futures enum AwaitResult { /// The awaited value resolved immediately (e.g., resolved ExternalFuture). ValueReady(Value), /// A new frame was pushed to execute a coroutine. FramePushed, /// All tasks are blocked - yield to caller with pending futures. Yield(Vec), } /// Tries an operation and handles exceptions, reloading cached frame state. /// /// Use this in the main run loop where `cached_frame` /// are used. After catching an exception, reloads the cache since the handler /// may be in a different frame. macro_rules! try_catch_sync { ($self:expr, $cached_frame:ident, $expr:expr) => { if let Err(e) = $expr { if let Some(result) = $self.handle_exception(e) { return Err(result); } // Exception was caught - handler may be in different frame, reload cache reload_cache!($self, $cached_frame); } }; } /// Handles an exception and reloads cached frame state if caught. /// /// Use this in the main run loop where `cached_frame` /// are used. After catching an exception, reloads the cache since the handler /// may be in a different frame. /// /// Wrapped in a block to allow use in match arm expressions. macro_rules! catch_sync { ($self:expr, $cached_frame:ident, $err:expr) => {{ if let Some(result) = $self.handle_exception($err) { return Err(result); } // Exception was caught - handler may be in different frame, reload cache reload_cache!($self, $cached_frame); }}; } /// Fetches a byte from bytecode using cached code/ip, advancing ip. /// /// Used in the run loop for fast operand fetching without frame access. macro_rules! fetch_byte { ($cached_frame:expr) => {{ let byte = $cached_frame.code.bytecode()[$cached_frame.ip]; $cached_frame.ip += 1; byte }}; } /// Fetches a u8 operand using cached code/ip. macro_rules! fetch_u8 { ($cached_frame:expr) => { fetch_byte!($cached_frame) }; } /// Fetches an i8 operand using cached code/ip. macro_rules! fetch_i8 { ($cached_frame:expr) => {{ i8::from_ne_bytes([fetch_byte!($cached_frame)]) }}; } /// Fetches a u16 operand (little-endian) using cached code/ip. macro_rules! fetch_u16 { ($cached_frame:expr) => {{ let lo = $cached_frame.code.bytecode()[$cached_frame.ip]; let hi = $cached_frame.code.bytecode()[$cached_frame.ip + 1]; $cached_frame.ip += 2; u16::from_le_bytes([lo, hi]) }}; } /// Fetches an i16 operand (little-endian) using cached code/ip. macro_rules! fetch_i16 { ($cached_frame:expr) => {{ let lo = $cached_frame.code.bytecode()[$cached_frame.ip]; let hi = $cached_frame.code.bytecode()[$cached_frame.ip + 1]; $cached_frame.ip += 2; i16::from_le_bytes([lo, hi]) }}; } /// Reloads cached frame state from the current frame. /// /// Call this after any operation that modifies the frame stack (calls, returns, /// exception handling). macro_rules! reload_cache { ($self:expr, $cached_frame:ident) => {{ $cached_frame = $self.new_cached_frame(); }}; } /// Applies a relative jump offset to the cached IP. /// /// Uses checked arithmetic to safely compute the new IP, panicking if the /// jump would result in a negative or overflowing instruction pointer. macro_rules! jump_relative { ($ip:expr, $offset:expr) => {{ let ip_i64 = i64::try_from($ip).expect("instruction pointer exceeds i64"); let new_ip = ip_i64 + i64::from($offset); $ip = usize::try_from(new_ip).expect("jump resulted in negative or overflowing IP"); }}; } /// Handles the result of a load operation that may yield a `FrameExit::NameLookup`. /// /// `load_local` and `load_global` return `Result, RunError>`: /// - `Ok(None)`: load succeeded, value is on the stack /// - `Ok(Some(FrameExit::NameLookup { .. }))`: unresolved name, yield to host /// - `Err(e)`: exception (e.g., UnboundLocalError) macro_rules! handle_load_result { ($self:expr, $cached_frame:ident, $result:expr) => { match $result { Ok(None) => {} Ok(Some(frame_exit)) => { $self.current_frame_mut().ip = $cached_frame.ip; return Ok(frame_exit); } Err(e) => catch_sync!($self, $cached_frame, e), } }; } /// Handles the result of a call operation that returns `CallResult`. /// /// This macro eliminates the repetitive pattern of matching on `CallResult` /// variants that appears in LoadAttr, CallFunction, CallFunctionKw, CallAttr, /// CallAttrKw, and CallFunctionExtended opcodes. /// /// Actions taken for each variant: /// - `Push(value)`: Push the value onto the stack /// - `FramePushed`: Reload the cached frame (a new frame was pushed) /// - `External(ext_id, args)`: Return `FrameExit::ExternalCall` to yield to host /// - `OsCall(func, args)`: Return `FrameExit::OsCall` to yield to host /// - `MethodCall(name, args)`: Return `FrameExit::MethodCall` to yield to host /// - `AwaitValue(value)`: Push value, then implicitly await it via `exec_get_awaitable` /// - `Err(err)`: Handle the exception via `catch_sync!` macro_rules! handle_call_result { ($self:expr, $cached_frame:ident, $result:expr) => { match $result { Ok(CallResult::Value(result)) => $self.push(result), Ok(CallResult::FramePushed) => reload_cache!($self, $cached_frame), Ok(CallResult::External(name, args)) => { let call_id = $self.allocate_call_id(); let name_load_ip = $self.ext_function_load_ip.take(); // Sync cached IP back to frame before snapshot for resume $self.current_frame_mut().ip = $cached_frame.ip; return Ok(FrameExit::ExternalCall { function_name: name, args, call_id, name_load_ip, }); } Ok(CallResult::OsCall(func, args)) => { let call_id = $self.allocate_call_id(); // Sync cached IP back to frame before snapshot for resume $self.current_frame_mut().ip = $cached_frame.ip; return Ok(FrameExit::OsCall { function: func, args, call_id, }); } Ok(CallResult::MethodCall(method_name, args)) => { let call_id = $self.allocate_call_id(); // Sync cached IP back to frame before snapshot for resume $self.current_frame_mut().ip = $cached_frame.ip; return Ok(FrameExit::MethodCall { method_name, args, call_id, }); } Ok(CallResult::AwaitValue(value)) => { // Push the value and implicitly await it (used by asyncio.run()) $self.push(value); $self.current_frame_mut().ip = $cached_frame.ip; match $self.exec_get_awaitable() { Ok(AwaitResult::ValueReady(value)) => { $self.push(value); } Ok(AwaitResult::FramePushed) => { reload_cache!($self, $cached_frame); } Ok(AwaitResult::Yield(pending_calls)) => { return Ok(FrameExit::ResolveFutures(pending_calls)); } Err(e) => { catch_sync!($self, $cached_frame, e); } } } Err(err) => catch_sync!($self, $cached_frame, err), } }; } /// Result of VM execution. pub enum FrameExit { /// Execution completed successfully with a return value. Return(Value), /// Execution paused for an external function call. /// /// The caller should execute the external function and call `resume()` /// with the result. The `call_id` allows the host to use async resolution /// by calling `run_pending()` instead of `run(result)`. ExternalCall { /// Name of the external function to call (interned or heap-owned). function_name: EitherStr, /// Arguments for the external function (includes both positional and keyword args). args: ArgValues, /// Unique ID for this call, used for async correlation. call_id: CallId, /// Optional bytecode IP of the load instruction that produced this `ExtFunction`. /// /// When a `LoadGlobalCallable`/`LoadLocalCallable` opcode auto-injects an `ExtFunction` /// for an undefined name, the load instruction's IP is saved here. In standard execution /// (without external function support), this IP is used to restore the frame pointer /// before raising `NameError`, so the traceback points to the name rather than the call. name_load_ip: Option, }, /// Execution paused for an os function call. /// /// The caller should execute a function corresponding to the `os_call` and call `resume()` /// with the result. The `call_id` allows the host to use async resolution /// by calling `run_pending()` instead of `run(result)`. OsCall { /// ID of the os function to call. function: OsFunction, /// Arguments for the external function (includes both positional and keyword args). args: ArgValues, /// Unique ID for this call, used for async correlation. call_id: CallId, }, /// Execution paused for a dataclass method call. /// /// The caller should invoke the method on the original Python dataclass and call /// `resume()` with the result. The `method_name` is the attribute name (e.g. /// `"distance"`) and `args` includes the dataclass instance as the first argument /// (`self`). MethodCall { /// Method name (e.g., "distance"). method_name: EitherStr, /// Arguments including the dataclass instance as the first positional arg. args: ArgValues, /// Unique ID for this call, used for async correlation. call_id: CallId, }, /// All tasks are blocked waiting for external futures to resolve. /// /// The caller must resolve the pending CallIds before calling `resume()`. /// This happens when await is called on an ExternalFuture that hasn't /// been resolved yet, and there are no other ready tasks to switch to. ResolveFutures(Vec), /// Execution paused for an unresolved name lookup. /// /// When the VM encounters an `Undefined` value in a `LocalUnassigned` slot /// (module level) or a global slot, it yields to the host to resolve the name. /// The host can return a value to cache in the slot, or indicate the name is /// truly undefined (which will raise `NameError`). /// /// This enables auto-detection of external functions without requiring upfront /// declaration: unresolved names are lazily resolved by the host at runtime. NameLookup { /// The interned name being looked up. name_id: StringId, /// The namespace slot where the resolved value should be cached. namespace_slot: u16, /// Whether this is a global slot (true) or a local/function slot (false). is_global: bool, }, } /// A single function activation record. /// /// Each frame represents one level in the call stack and owns its own /// instruction pointer. This design avoids sync bugs on call/return. #[derive(Debug)] pub struct CallFrame<'code> { /// Bytecode being executed. code: &'code Code, /// Instruction pointer within this frame's bytecode. ip: usize, /// Base index into the VM stack for this frame's locals region. /// /// The frame's locals occupy `stack[stack_base..stack_base + locals_count]`, /// and operands are pushed above that. stack_base: usize, /// Number of local variable slots in this frame. /// /// Zero for module-level frames (globals are stored separately). /// For function frames, this equals `func.namespace_size`. locals_count: u16, /// Function ID (for tracebacks). None for module-level code. function_id: Option, /// Call site position (for tracebacks). call_position: Option, /// When this frame returns (or exits with an exception) the VM should exit the run loop /// and return to the caller. Supports `evaluate_function`. should_return: bool, } impl<'code> CallFrame<'code> { /// Creates a new call frame for module-level code. /// /// Module frames have `locals_count = 0` because module-level variables /// are stored in the VM's `globals` vec, not in the stack. pub fn new_module(code: &'code Code) -> Self { Self { code, ip: 0, stack_base: 0, locals_count: 0, function_id: None, call_position: None, should_return: false, } } /// Creates a new call frame for a function call. /// /// The frame's locals occupy `stack[stack_base..stack_base + locals_count]`. /// Operands are pushed above the locals region. pub fn new_function( code: &'code Code, stack_base: usize, locals_count: u16, function_id: FunctionId, call_position: Option, ) -> Self { Self { code, ip: 0, stack_base, locals_count, function_id: Some(function_id), call_position, should_return: false, } } } /// Cached state of the VM derived from the current frame as an optimization. /// /// Holds the hot fields from the current `CallFrame` to avoid repeated /// `frames.last()` lookups in the main opcode loop. #[derive(Debug, Copy, Clone)] pub struct CachedFrame<'code> { /// Bytecode being executed. code: &'code Code, /// Instruction pointer within this frame's bytecode. ip: usize, /// Base index into the VM stack for this frame's locals. stack_base: usize, } impl<'code> From<&CallFrame<'code>> for CachedFrame<'code> { fn from(frame: &CallFrame<'code>) -> Self { Self { code: frame.code, ip: frame.ip, stack_base: frame.stack_base, } } } /// Serializable representation of a call frame. /// /// Cannot store `&Code` (a reference) — instead stores `FunctionId` to look up /// the pre-compiled Code object on resume. Module-level code uses `None`. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct SerializedFrame { /// Which function's code this frame executes (None = module-level). function_id: Option, /// Instruction pointer within this frame's bytecode. ip: usize, /// Base index into the VM stack for this frame's locals region. stack_base: usize, /// Number of local variable slots (0 for module-level frames). locals_count: u16, /// Call site position (for tracebacks). call_position: Option, } impl CallFrame<'_> { /// Converts this frame to a serializable representation. fn serialize(&self) -> SerializedFrame { assert!( !self.should_return, "cannot serialize frame marked for return - not yet supported" ); SerializedFrame { function_id: self.function_id, ip: self.ip, stack_base: self.stack_base, locals_count: self.locals_count, call_position: self.call_position, } } } /// VM state for pause/resume at external function calls. /// /// **Ownership:** This struct OWNS the values (refcounts were already incremented). /// Must be used with the serialized Heap - HeapId values are indices into that heap. /// /// **Usage:** When the VM pauses for an external call, call `into_snapshot()` to /// create this snapshot. The snapshot can be serialized and stored. On resume, /// use `restore()` to reconstruct the VM and continue execution. /// /// Note: This struct does not implement `Clone` because `Value` uses manual /// reference counting. Snapshots transfer ownership - they are not copied. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct VMSnapshot { /// Operand stack — locals and operands interleaved per frame. /// /// Each function frame's locals occupy `stack[frame.stack_base..frame.stack_base + frame.locals_count]`, /// with operands pushed above. pub(crate) stack: Vec, /// Module-level (global) variable storage. pub(crate) globals: Vec, /// Call frames (serializable form — stores FunctionId, not &Code). frames: Vec, /// Stack of exceptions being handled for nested except blocks. /// /// When entering an except handler, the exception is pushed onto this stack. /// When exiting via `ClearException`, the top is popped. This allows nested /// except handlers to restore the outer exception context. exception_stack: Vec, /// IP of the instruction that caused the pause (for exception handling). instruction_ip: usize, /// Scheduler state (always present). /// /// Contains call ID counter, task state, pending calls, and resolved futures. scheduler: Scheduler, } // ============================================================================ // Virtual Machine // ============================================================================ /// The bytecode virtual machine. /// /// Executes compiled bytecode using a stack-based execution model. /// The instruction pointer (IP) lives in each `CallFrame`, not here, /// to avoid sync bugs on call/return. /// /// # Lifetimes /// * `'a` - Lifetime of the heap, namespaces, and interns /// * `'p` - Lifetime of the print writer's internal references pub struct VM<'a, 'p, T: ResourceTracker> { /// Operand stack — locals and operands interleaved per frame. /// /// Each function frame's locals occupy `stack[frame.stack_base..frame.stack_base + frame.locals_count]`, /// with operands pushed above. Module-level frames have `locals_count = 0` /// because globals are stored separately. pub(crate) stack: Vec, /// Module-level (global) variable storage. /// /// Indexed by slot number from `LoadGlobal`/`StoreGlobal` opcodes. /// Separated from the stack because globals persist across function calls /// and are accessed via dedicated opcodes. pub(crate) globals: Vec, /// Call stack — function frames (each frame has its own IP). frames: Vec>, /// Heap for reference-counted objects. pub(crate) heap: &'a mut Heap, /// Interned strings/bytes. pub(crate) interns: &'a Interns, /// Print output writer, borrowed so callers retain access to collected output. pub(crate) print_writer: PrintWriter<'p>, /// Stack of exceptions being handled for nested except blocks. /// /// Used by bare `raise` to re-raise the current exception. /// When entering an except handler, the exception is pushed onto this stack. /// When exiting via `ClearException`, the top is popped. This allows nested /// except handlers to restore the outer exception context. exception_stack: Vec, /// IP of the instruction being executed (for exception table lookup). /// /// Updated at the start of each instruction before operands are fetched. /// This allows us to find the correct exception handler when an error occurs. instruction_ip: usize, /// Scheduler for task management and call ID allocation. /// /// Always present — owns `next_call_id` (used by both sync and async paths) /// plus async task state. Internal collections don't allocate until first use, /// so sync-only code pays only for the main task entry. scheduler: Scheduler, /// Module-level code (for restoring main task frames). /// /// Stored here because the main task's frames have `function_id: None` and /// need a reference to the module code when being restored after task switching. module_code: Option<&'a Code>, /// Bytecode IP of the most recent `LoadGlobalCallable`/`LoadLocalCallable` that /// pushed an `ExtFunction` for an undefined name. /// /// Used to restore the frame IP when standard execution converts an `ExternalCall` /// back to a `NameError`, so the traceback points to the name reference rather than /// the call expression. ext_function_load_ip: Option, } impl<'a, 'p, T: ResourceTracker> VM<'a, 'p, T> { /// Creates a new VM with the given runtime context. pub fn new( globals: Vec, heap: &'a mut Heap, interns: &'a Interns, print_writer: PrintWriter<'p>, ) -> Self { Self { stack: Vec::with_capacity(64), globals, frames: Vec::with_capacity(16), heap, interns, print_writer, exception_stack: Vec::new(), instruction_ip: 0, scheduler: Scheduler::new(), ext_function_load_ip: None, // Set by LoadGlobalCallable/LoadLocalCallable module_code: None, } } /// Reconstructs a VM from a snapshot. /// /// The heap must already be deserialized. `FunctionId` values /// in frames are used to look up pre-compiled `Code` objects from the `Interns`. /// The `module_code` is used for frames with `function_id = None`. /// /// # Arguments /// * `snapshot` - The VM snapshot to restore /// * `module_code` - Compiled module code (for frames with function_id = None) /// * `heap` - The deserialized heap /// * `interns` - Interns for looking up function code /// * `print_writer` - Writer for print output pub fn restore( snapshot: VMSnapshot, module_code: &'a Code, heap: &'a mut Heap, interns: &'a Interns, print_writer: PrintWriter<'p>, ) -> Self { // Reconstruct call frames from serialized form let frames: Vec> = snapshot .frames .into_iter() .map(|sf| { let code = match sf.function_id { Some(func_id) => &interns.get_function(func_id).code, None => module_code, }; CallFrame { code, ip: sf.ip, stack_base: sf.stack_base, locals_count: sf.locals_count, function_id: sf.function_id, call_position: sf.call_position, should_return: false, } }) .collect(); // Restore recursion depth to match the number of active function frames. // During serialization, recursion_depth is transient (defaults to 0), // but cleanup paths call decr_recursion_depth for each non-root frame. let current_frame_depth = frames.len().saturating_sub(1); // Subtract 1 for root frame which doesn't contribute to depth heap.set_recursion_depth(current_frame_depth); Self { stack: snapshot.stack, globals: snapshot.globals, frames, heap, interns, print_writer, exception_stack: snapshot.exception_stack, instruction_ip: snapshot.instruction_ip, scheduler: snapshot.scheduler, module_code: Some(module_code), ext_function_load_ip: None, } } /// Consumes the VM and creates a snapshot for pause/resume. /// /// **Ownership transfer:** This method takes `self` by value, consuming the VM. /// The snapshot owns all Values (refcounts already correct from the live VM). /// The heap and namespaces must be serialized alongside this snapshot. /// /// This is NOT a clone - it's a transfer. After calling this, the original VM /// is gone and only the snapshot (+ serialized heap/namespaces) represents the state. pub fn snapshot(self) -> VMSnapshot { VMSnapshot { // Move values directly — no clone, no refcount increment needed // (the VM owned them, now the snapshot owns them) stack: self.stack, globals: self.globals, frames: self.frames.into_iter().map(|f| f.serialize()).collect(), exception_stack: self.exception_stack, instruction_ip: self.instruction_ip, scheduler: self.scheduler, } } /// Pushes an initial frame for module-level code and runs the VM. pub fn run_module(&mut self, code: &'a Code) -> Result { // Store module code for restoring main task frames during task switching self.module_code = Some(code); self.push_frame(CallFrame::new_module(code))?; self.run() } /// Cleans up VM state before the VM is dropped. /// /// This method must be called before the VM goes out of scope to ensure /// proper reference counting cleanup for any exception values and scheduler state. pub fn cleanup(&mut self) { // Drop all exceptions in the exception stack self.exception_stack.drain(..).drop_with_heap(self.heap); // Clean up current task's stack values and frame cell references self.cleanup_current_task(); // Clean up scheduler state (task stacks, pending calls, resolved values, frame cells) self.scheduler.cleanup(self.heap); self.globals.drain(..).drop_with_heap(self.heap); } /// Returns the `stack_base` of the current (topmost) call frame. /// /// Used by `NameLookup` resolution to determine which stack region to cache /// resolved values into when the lookup originated from a function scope. pub fn current_stack_base(&self) -> usize { self.frames .last() .expect("VM should have at least one frame") .stack_base } /// Takes ownership of the globals vector, replacing it with an empty vec. /// /// Used by the REPL to reclaim globals after VM execution completes, /// before calling `cleanup()` (which would destroy them in ref-count-panic mode). pub fn take_globals(&mut self) -> Vec { std::mem::take(&mut self.globals) } /// Allocates a new `CallId` for an external function call. fn allocate_call_id(&mut self) -> CallId { self.scheduler.allocate_call_id() } /// Returns true if we're on the main task (or no async at all). /// /// This is used to determine whether a `ReturnValue` at the last frame means /// module-level completion (return to host) or spawned task completion /// (handle task completion and switch). fn is_main_task(&self) -> bool { self.scheduler.current_task_id().is_none_or(TaskId::is_main) } /// Main execution loop. /// /// Fetches opcodes from the current frame's bytecode and executes them. /// Returns when execution completes, an error occurs, or an external /// call is needed. /// /// Uses locally cached `code` and `ip` variables to avoid repeated /// `frames.last_mut().expect()` calls during operand fetching. The cache /// is reloaded after any operation that modifies the frame stack. pub fn run(&mut self) -> Result { // Cache frame state locally to avoid repeated frames.last_mut() calls. // The Code reference has lifetime 'a (lives in Interns), independent of frame borrow. let mut cached_frame: CachedFrame<'a> = self.new_cached_frame(); loop { // Check time limit and trigger GC if needed at each instruction. // For NoLimitTracker, these are inlined no-ops that compile away. self.heap.check_time()?; if self.heap.should_gc() { // Sync IP before GC for safety self.current_frame_mut().ip = cached_frame.ip; self.run_gc(); } // Track instruction IP for exception table lookup self.instruction_ip = cached_frame.ip; // Fetch opcode using cached values (no frame access) let opcode = { let byte = cached_frame.code.bytecode()[cached_frame.ip]; cached_frame.ip += 1; Opcode::try_from(byte).expect("invalid opcode in bytecode") }; match opcode { // ============================================================ // Stack Operations // ============================================================ Opcode::Pop => { let value = self.pop(); value.drop_with_heap(self); } Opcode::Dup => { let value = self.peek().clone_with_heap(self); self.push(value); } Opcode::Dup2 => { let len = self.stack.len(); let first = self.stack[len - 2].clone_with_heap(self); let second = self.stack[len - 1].clone_with_heap(self); self.push(first); self.push(second); } Opcode::Rot2 => { // Swap top two: [a, b] → [b, a] let len = self.stack.len(); self.stack.swap(len - 1, len - 2); } Opcode::Rot3 => { // Rotate top three: [a, b, c] → [c, a, b] // Uses in-place rotation without cloning let len = self.stack.len(); // Move c out, then shift a→b→c, then put c at a's position // Equivalent to: [..rest, a, b, c] → [..rest, c, a, b] self.stack[len - 3..].rotate_right(1); } // Constants & Literals Opcode::LoadConst => { let idx = fetch_u16!(cached_frame); let value = cached_frame.code.constants().get(idx); // Handle InternLongInt specially - convert to heap-allocated LongInt if let Value::InternLongInt(long_int_id) = value { let bi = self.interns.get_long_int(*long_int_id).clone(); match LongInt::new(bi).into_value(self.heap) { Ok(v) => self.push(v), Err(e) => catch_sync!(self, cached_frame, RunError::from(e)), } } else { self.push(value.clone_with_heap(self)); } } Opcode::LoadNone => self.push(Value::None), Opcode::LoadTrue => self.push(Value::Bool(true)), Opcode::LoadFalse => self.push(Value::Bool(false)), Opcode::LoadSmallInt => { let n = fetch_i8!(cached_frame); self.push(Value::Int(i64::from(n))); } // Variables - Specialized Local Loads (no operand) Opcode::LoadLocal0 => handle_load_result!(self, cached_frame, self.load_local(&cached_frame, 0)), Opcode::LoadLocal1 => handle_load_result!(self, cached_frame, self.load_local(&cached_frame, 1)), Opcode::LoadLocal2 => handle_load_result!(self, cached_frame, self.load_local(&cached_frame, 2)), Opcode::LoadLocal3 => handle_load_result!(self, cached_frame, self.load_local(&cached_frame, 3)), // Variables - General Local Operations Opcode::LoadLocal => { let slot = u16::from(fetch_u8!(cached_frame)); handle_load_result!(self, cached_frame, self.load_local(&cached_frame, slot)); } Opcode::LoadLocalW => { let slot = fetch_u16!(cached_frame); handle_load_result!(self, cached_frame, self.load_local(&cached_frame, slot)); } Opcode::StoreLocal => { let slot = u16::from(fetch_u8!(cached_frame)); self.store_local(&cached_frame, slot); } Opcode::StoreLocalW => { let slot = fetch_u16!(cached_frame); self.store_local(&cached_frame, slot); } Opcode::DeleteLocal => { let slot = u16::from(fetch_u8!(cached_frame)); self.delete_local(&cached_frame, slot); } Opcode::DeleteGlobal => { let slot = fetch_u16!(cached_frame); self.delete_global(slot); } // Variables - Callable-context Local Loads Opcode::LoadLocalCallable => { let slot = u16::from(fetch_u8!(cached_frame)); let name_id = StringId::from_index(fetch_u16!(cached_frame)); self.load_local_callable(&cached_frame, slot, name_id); } Opcode::LoadLocalCallableW => { let slot = fetch_u16!(cached_frame); let name_id = StringId::from_index(fetch_u16!(cached_frame)); self.load_local_callable(&cached_frame, slot, name_id); } // Variables - Global Operations Opcode::LoadGlobal => { let slot = fetch_u16!(cached_frame); handle_load_result!(self, cached_frame, self.load_global(slot)); } Opcode::LoadGlobalCallable => { let slot = fetch_u16!(cached_frame); let name_id = StringId::from_index(fetch_u16!(cached_frame)); self.load_global_callable(slot, name_id); } Opcode::StoreGlobal => { let slot = fetch_u16!(cached_frame); self.store_global(slot); } // Variables - Cell Operations (closures) Opcode::LoadCell => { let slot = fetch_u16!(cached_frame); try_catch_sync!(self, cached_frame, self.load_cell(&cached_frame, slot)); } Opcode::StoreCell => { let slot = fetch_u16!(cached_frame); self.store_cell(&cached_frame, slot); } // Binary Operations - route through exception handling for tracebacks Opcode::BinaryAdd => try_catch_sync!(self, cached_frame, self.binary_add()), Opcode::BinarySub => try_catch_sync!(self, cached_frame, self.binary_sub()), Opcode::BinaryMul => try_catch_sync!(self, cached_frame, self.binary_mult()), Opcode::BinaryDiv => try_catch_sync!(self, cached_frame, self.binary_div()), Opcode::BinaryFloorDiv => try_catch_sync!(self, cached_frame, self.binary_floordiv()), Opcode::BinaryMod => try_catch_sync!(self, cached_frame, self.binary_mod()), Opcode::BinaryPow => try_catch_sync!(self, cached_frame, self.binary_pow()), // Bitwise operations - only work on integers Opcode::BinaryAnd => try_catch_sync!(self, cached_frame, self.binary_and()), Opcode::BinaryOr => try_catch_sync!(self, cached_frame, self.binary_or()), Opcode::BinaryXor => try_catch_sync!(self, cached_frame, self.binary_xor()), Opcode::BinaryLShift => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::LShift)); } Opcode::BinaryRShift => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::RShift)); } Opcode::BinaryMatMul => try_catch_sync!(self, cached_frame, self.binary_matmul()), // Comparison Operations Opcode::CompareEq => try_catch_sync!(self, cached_frame, self.compare_eq()), Opcode::CompareNe => try_catch_sync!(self, cached_frame, self.compare_ne()), Opcode::CompareLt => try_catch_sync!(self, cached_frame, self.compare_ord(Ordering::is_lt)), Opcode::CompareLe => try_catch_sync!(self, cached_frame, self.compare_ord(Ordering::is_le)), Opcode::CompareGt => try_catch_sync!(self, cached_frame, self.compare_ord(Ordering::is_gt)), Opcode::CompareGe => try_catch_sync!(self, cached_frame, self.compare_ord(Ordering::is_ge)), Opcode::CompareIs => self.compare_is(false), Opcode::CompareIsNot => self.compare_is(true), Opcode::CompareIn => try_catch_sync!(self, cached_frame, self.compare_in(false)), Opcode::CompareNotIn => try_catch_sync!(self, cached_frame, self.compare_in(true)), Opcode::CompareModEq => { let const_idx = fetch_u16!(cached_frame); let k = cached_frame.code.constants().get(const_idx); try_catch_sync!(self, cached_frame, self.compare_mod_eq(k)); } // Unary Operations Opcode::UnaryNot => { let value = self.pop(); let result = !value.py_bool(self); value.drop_with_heap(self); self.push(Value::Bool(result)); } Opcode::UnaryNeg => { // Unary minus - negate numeric value let value = self.pop(); match value { Value::Int(n) => { // Use checked_neg to handle i64::MIN overflow if let Some(negated) = n.checked_neg() { self.push(Value::Int(negated)); } else { // i64::MIN negated overflows to LongInt let li = -LongInt::from(n); match li.into_value(self.heap) { Ok(v) => self.push(v), Err(e) => catch_sync!(self, cached_frame, RunError::from(e)), } } } Value::Float(f) => self.push(Value::Float(-f)), Value::Bool(b) => self.push(Value::Int(if b { -1 } else { 0 })), Value::Ref(id) => { if let HeapData::LongInt(li) = self.heap.get(id) { let negated = -LongInt::new(li.inner().clone()); value.drop_with_heap(self); match negated.into_value(self.heap) { Ok(v) => self.push(v), Err(e) => catch_sync!(self, cached_frame, RunError::from(e)), } } else { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("-", value_type)); } } _ => { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("-", value_type)); } } } Opcode::UnaryPos => { // Unary plus - converts bools to int, no-op for other numbers let value = self.pop(); match value { Value::Int(_) | Value::Float(_) => self.push(value), Value::Bool(b) => self.push(Value::Int(i64::from(b))), Value::Ref(id) => { if matches!(self.heap.get(id), HeapData::LongInt(_)) { // LongInt - return as-is (value already has correct refcount) self.push(value); } else { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("+", value_type)); } } _ => { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("+", value_type)); } } } Opcode::UnaryInvert => { // Bitwise NOT let value = self.pop(); match value { Value::Int(n) => self.push(Value::Int(!n)), Value::Bool(b) => self.push(Value::Int(!i64::from(b))), Value::Ref(id) => { if let HeapData::LongInt(li) = self.heap.get(id) { // LongInt bitwise NOT: ~x = -(x + 1) let inverted = -(li.inner() + 1i32); value.drop_with_heap(self); match LongInt::new(inverted).into_value(self.heap) { Ok(v) => self.push(v), Err(e) => catch_sync!(self, cached_frame, RunError::from(e)), } } else { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("~", value_type)); } } _ => { let value_type = value.py_type(self.heap); value.drop_with_heap(self); catch_sync!(self, cached_frame, ExcType::unary_type_error("~", value_type)); } } } // In-place Operations - route through exception handling Opcode::InplaceAdd => try_catch_sync!(self, cached_frame, self.inplace_add()), // Other in-place ops use the same logic as binary ops for now Opcode::InplaceSub => try_catch_sync!(self, cached_frame, self.binary_sub()), Opcode::InplaceMul => try_catch_sync!(self, cached_frame, self.binary_mult()), Opcode::InplaceDiv => try_catch_sync!(self, cached_frame, self.binary_div()), Opcode::InplaceFloorDiv => try_catch_sync!(self, cached_frame, self.binary_floordiv()), Opcode::InplaceMod => try_catch_sync!(self, cached_frame, self.binary_mod()), Opcode::InplacePow => try_catch_sync!(self, cached_frame, self.binary_pow()), Opcode::InplaceAnd => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::And)); } Opcode::InplaceOr => try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::Or)), Opcode::InplaceXor => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::Xor)); } Opcode::InplaceLShift => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::LShift)); } Opcode::InplaceRShift => { try_catch_sync!(self, cached_frame, self.binary_bitwise(BitwiseOp::RShift)); } // Collection Building - route through exception handling Opcode::BuildList => { let count = fetch_u16!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.build_list(count)); } Opcode::BuildTuple => { let count = fetch_u16!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.build_tuple(count)); } Opcode::BuildDict => { let count = fetch_u16!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.build_dict(count)); } Opcode::BuildSet => { let count = fetch_u16!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.build_set(count)); } Opcode::FormatValue => { let flags = fetch_u8!(cached_frame); try_catch_sync!(self, cached_frame, self.format_value(flags)); } Opcode::BuildFString => { let count = fetch_u16!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.build_fstring(count)); } Opcode::BuildSlice => { try_catch_sync!(self, cached_frame, self.build_slice()); } Opcode::ListExtend => { try_catch_sync!(self, cached_frame, self.list_extend()); } Opcode::ListToTuple => { try_catch_sync!(self, cached_frame, self.list_to_tuple()); } Opcode::DictMerge => { let func_name_id = fetch_u16!(cached_frame); try_catch_sync!(self, cached_frame, self.dict_merge(func_name_id)); } // PEP 448 literal building Opcode::DictUpdate => { let depth = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.dict_update(depth)); } Opcode::SetExtend => { let depth = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.set_extend(depth)); } // Comprehension Building - append/add/set items during iteration Opcode::ListAppend => { let depth = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.list_append(depth)); } Opcode::SetAdd => { let depth = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.set_add(depth)); } Opcode::DictSetItem => { let depth = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.dict_set_item(depth)); } // Subscript & Attribute - route through exception handling Opcode::BinarySubscr => { let index = self.pop(); let obj = self.pop(); let result = obj.py_getitem(&index, self); obj.drop_with_heap(self); index.drop_with_heap(self); match result { Ok(v) => self.push(v), Err(e) => catch_sync!(self, cached_frame, e), } } Opcode::StoreSubscr => { // Stack order: value, obj, index (TOS) let index = self.pop(); let mut obj = self.pop(); let value = self.pop(); let result = obj.py_setitem(index, value, self); obj.drop_with_heap(self); if let Err(e) = result { catch_sync!(self, cached_frame, e); } } Opcode::LoadAttr => { let name_idx = fetch_u16!(cached_frame); let name_id = StringId::from_index(name_idx); handle_call_result!(self, cached_frame, self.load_attr(name_id)); } Opcode::LoadAttrImport => { let name_idx = fetch_u16!(cached_frame); let name_id = StringId::from_index(name_idx); handle_call_result!(self, cached_frame, self.load_attr_import(name_id)); } Opcode::StoreAttr => { let name_idx = fetch_u16!(cached_frame); let name_id = StringId::from_index(name_idx); try_catch_sync!(self, cached_frame, self.store_attr(name_id)); } // Control Flow - use cached_frame.ip directly for jumps Opcode::Jump => { let offset = fetch_i16!(cached_frame); jump_relative!(cached_frame.ip, offset); } Opcode::JumpIfTrue => { let offset = fetch_i16!(cached_frame); let cond = self.pop(); if cond.py_bool(self) { jump_relative!(cached_frame.ip, offset); } cond.drop_with_heap(self); } Opcode::JumpIfFalse => { let offset = fetch_i16!(cached_frame); let cond = self.pop(); if !cond.py_bool(self) { jump_relative!(cached_frame.ip, offset); } cond.drop_with_heap(self); } Opcode::JumpIfTrueOrPop => { let offset = fetch_i16!(cached_frame); if self.peek().py_bool(self) { jump_relative!(cached_frame.ip, offset); } else { let value = self.pop(); value.drop_with_heap(self); } } Opcode::JumpIfFalseOrPop => { let offset = fetch_i16!(cached_frame); if self.peek().py_bool(self) { let value = self.pop(); value.drop_with_heap(self); } else { jump_relative!(cached_frame.ip, offset); } } // Iteration - route through exception handling Opcode::GetIter => { let value = self.pop(); // Create a MontyIter from the value and store on heap match MontyIter::new(value, self) { Ok(iter) => match self.heap.allocate(HeapData::Iter(iter)) { Ok(heap_id) => self.push(Value::Ref(heap_id)), Err(e) => catch_sync!(self, cached_frame, e.into()), }, Err(e) => catch_sync!(self, cached_frame, e), } } Opcode::ForIter => { let offset = fetch_i16!(cached_frame); // Peek at the iterator on TOS and extract heap_id let Value::Ref(heap_id) = *self.peek() else { return Err(RunError::internal("ForIter: expected iterator ref on stack")); }; // Use advance_iterator which avoids std::mem::replace overhead // by using a two-phase approach: read state, get value, update index match advance_on_heap(self.heap, heap_id, self.interns) { Ok(Some(value)) => self.push(value), Ok(None) => { // Iterator exhausted - pop it and jump to end let iter = self.pop(); iter.drop_with_heap(self); jump_relative!(cached_frame.ip, offset); } Err(e) => { // Error during iteration (e.g., dict size changed) let iter = self.pop(); iter.drop_with_heap(self); catch_sync!(self, cached_frame, e); } } } // Function Calls - sync IP before call, reload cache after frame changes Opcode::CallFunction => { let arg_count = fetch_u8!(cached_frame) as usize; // Sync IP before call (call_function may access frame for traceback) self.current_frame_mut().ip = cached_frame.ip; handle_call_result!(self, cached_frame, self.exec_call_function(arg_count)); } Opcode::CallBuiltinFunction => { // Fetch operands: builtin_id (u8) + arg_count (u8) let builtin_id = fetch_u8!(cached_frame); let arg_count = fetch_u8!(cached_frame) as usize; // Sync IP before call (builtins like map() may call evaluate_function // which pushes frames and runs a nested run() loop) self.current_frame_mut().ip = cached_frame.ip; match self.exec_call_builtin_function(builtin_id, arg_count) { Ok(result) => self.push(result), Err(err) => catch_sync!(self, cached_frame, err), } } Opcode::CallBuiltinType => { // Fetch operands: type_id (u8) + arg_count (u8) let type_id = fetch_u8!(cached_frame); let arg_count = fetch_u8!(cached_frame) as usize; match self.exec_call_builtin_type(type_id, arg_count) { Ok(result) => self.push(result), // IP sync deferred to error path (no frame push possible) Err(err) => catch_sync!(self, cached_frame, err), } } Opcode::CallFunctionKw => { // Fetch operands: pos_count, kw_count, then kw_count name indices let pos_count = fetch_u8!(cached_frame) as usize; let kw_count = fetch_u8!(cached_frame) as usize; // Read keyword name StringIds let mut kwname_ids = Vec::with_capacity(kw_count); for _ in 0..kw_count { kwname_ids.push(StringId::from_index(fetch_u16!(cached_frame))); } // Sync IP before call (call_function may access frame for traceback) self.current_frame_mut().ip = cached_frame.ip; handle_call_result!(self, cached_frame, self.exec_call_function_kw(pos_count, kwname_ids)); } Opcode::CallAttr => { // CallAttr: u16 name_id, u8 arg_count // Stack: [obj, arg1, arg2, ..., argN] -> [result] let name_idx = fetch_u16!(cached_frame); let arg_count = fetch_u8!(cached_frame) as usize; let name_id = StringId::from_index(name_idx); // Sync IP before call (may yield to host for OS/external calls) self.current_frame_mut().ip = cached_frame.ip; handle_call_result!(self, cached_frame, self.exec_call_attr(name_id, arg_count)); } Opcode::CallAttrKw => { // CallAttrKw: u16 name_id, u8 pos_count, u8 kw_count, then kw_count u16 name indices // Stack: [obj, pos_args..., kw_values...] -> [result] let name_idx = fetch_u16!(cached_frame); let pos_count = fetch_u8!(cached_frame) as usize; let kw_count = fetch_u8!(cached_frame) as usize; let name_id = StringId::from_index(name_idx); // Read keyword name StringIds let mut kwname_ids = Vec::with_capacity(kw_count); for _ in 0..kw_count { kwname_ids.push(StringId::from_index(fetch_u16!(cached_frame))); } // Sync IP before call (may yield to host for OS/external calls) self.current_frame_mut().ip = cached_frame.ip; handle_call_result!( self, cached_frame, self.exec_call_attr_kw(name_id, pos_count, kwname_ids) ); } Opcode::CallFunctionExtended => { let flags = fetch_u8!(cached_frame); let has_kwargs = (flags & 0x01) != 0; // Sync IP before call self.current_frame_mut().ip = cached_frame.ip; handle_call_result!(self, cached_frame, self.exec_call_function_extended(has_kwargs)); } Opcode::CallAttrExtended => { let name_idx = fetch_u16!(cached_frame); let flags = fetch_u8!(cached_frame); let name_id = StringId::from_index(name_idx); let has_kwargs = (flags & 0x01) != 0; // Sync IP before call (may yield to host for OS/external calls) self.current_frame_mut().ip = cached_frame.ip; handle_call_result!(self, cached_frame, self.exec_call_attr_extended(name_id, has_kwargs)); } // Function Definition Opcode::MakeFunction => { let func_idx = fetch_u16!(cached_frame); let defaults_count = fetch_u8!(cached_frame) as usize; let func_id = FunctionId::from_index(func_idx); if defaults_count == 0 { // No defaults - use inline Value::Function (no heap allocation) self.push(Value::DefFunction(func_id)); } else { // Pop default values from stack (drain maintains order: first pushed = first in vec) let defaults = self.pop_n(defaults_count); // Create FunctionDefaults on heap and push reference let heap_id = self .heap .allocate(HeapData::FunctionDefaults(FunctionDefaults { func_id, defaults }))?; self.push(Value::Ref(heap_id)); } } Opcode::MakeClosure => { let func_idx = fetch_u16!(cached_frame); let defaults_count = fetch_u8!(cached_frame) as usize; let cell_count = fetch_u8!(cached_frame) as usize; let func_id = FunctionId::from_index(func_idx); // Pop cells from stack (pushed after defaults, so on top) // Cells are Value::Ref pointing to HeapData::Cell // We use individual pops which reverses order, so we need to reverse back let mut cells = Vec::with_capacity(cell_count); for _ in 0..cell_count { // mut needed for dec_ref_forget when ref-count-panic feature is enabled #[cfg_attr(not(feature = "ref-count-panic"), expect(unused_mut))] let mut cell_val = self.pop(); match &cell_val { Value::Ref(heap_id) => { // Keep the reference - the Closure will own the HeapId cells.push(*heap_id); // Mark the Value as dereferenced since Closure takes ownership // of the reference count (we don't call drop_with_heap because // we're not decrementing the refcount, just transferring it) #[cfg(feature = "ref-count-panic")] cell_val.dec_ref_forget(); } _ => { return Err(RunError::internal("MakeClosure: expected cell reference on stack")); } } } // Reverse to get original order (individual pops reverse the order) cells.reverse(); // Pop default values from stack (drain maintains order: first pushed = first in vec) let defaults = self.pop_n(defaults_count); // Create Closure on heap and push reference let heap_id = self.heap.allocate(HeapData::Closure(Closure { func_id, cells, defaults, }))?; self.push(Value::Ref(heap_id)); } // Exception Handling Opcode::Raise => { let exc = self.pop(); let error = self.make_exception(exc, true); // is_raise=true, hide caret catch_sync!(self, cached_frame, error); } Opcode::Reraise => { // Pop the current exception from the stack to re-raise it // If caught, handle_exception will push it back let error = if let Some(exc) = self.exception_stack.pop() { self.make_exception(exc, true) // is_raise=true for reraise } else { // No active exception - create a RuntimeError SimpleException::new_msg(ExcType::RuntimeError, "No active exception to reraise").into() }; catch_sync!(self, cached_frame, error); } Opcode::ClearException => { // Pop the current exception from the stack // This restores the previous exception context (if any) if let Some(exc) = self.exception_stack.pop() { exc.drop_with_heap(self); } } Opcode::CheckExcMatch => { // Stack: [exception, exc_type] -> [exception, bool] let exc_type = self.pop(); let exception = self.peek(); let result = self.check_exc_match(exception, &exc_type); exc_type.drop_with_heap(self); let result = result?; self.push(Value::Bool(result)); } // Return - reload cache after popping frame Opcode::ReturnValue => { let value = self.pop(); if self.frames.len() == 1 { // Last frame - check if this is main task or spawned task let is_main_task = self.is_main_task(); if is_main_task { // Module-level return - we're done return Ok(FrameExit::Return(value)); } // Spawned task completed - handle task completion let result = self.handle_task_completion(value); match result { Ok(AwaitResult::ValueReady(v)) => { self.push(v); } Ok(AwaitResult::FramePushed) => { // Switched to another task - reload cache reload_cache!(self, cached_frame); } Ok(AwaitResult::Yield(pending)) => { // All tasks blocked - return to host return Ok(FrameExit::ResolveFutures(pending)); } Err(e) => { catch_sync!(self, cached_frame, e); } } continue; } // Pop current frame and push return value if self.pop_frame() { // This frame indicated evaluation should stop - return to host with value // e.g. `evaluate_function` return Ok(FrameExit::Return(value)); } self.push(value); // Reload cache from parent frame reload_cache!(self, cached_frame); } // Async/Await Opcode::Await => { // Sync IP before exec (may push new frame for coroutine) self.current_frame_mut().ip = cached_frame.ip; let result = self.exec_get_awaitable(); match result { Ok(AwaitResult::ValueReady(value)) => { self.push(value); } Ok(AwaitResult::FramePushed) => { // Reload cache after pushing a new frame reload_cache!(self, cached_frame); } Ok(AwaitResult::Yield(pending_calls)) => { // All tasks are blocked - return control to host return Ok(FrameExit::ResolveFutures(pending_calls)); } Err(e) => { catch_sync!(self, cached_frame, e); } } } // Unpacking - route through exception handling Opcode::UnpackSequence => { let count = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.unpack_sequence(count)); } Opcode::UnpackEx => { let before = fetch_u8!(cached_frame) as usize; let after = fetch_u8!(cached_frame) as usize; try_catch_sync!(self, cached_frame, self.unpack_ex(before, after)); } // Special Opcode::Nop => { // No operation } // Module Operations Opcode::LoadModule => { let module_id = fetch_u8!(cached_frame); try_catch_sync!(self, cached_frame, self.load_module(module_id)); } Opcode::RaiseImportError => { // Fetch the module name from the constant pool and raise ModuleNotFoundError let const_idx = fetch_u16!(cached_frame); let module_name = cached_frame.code.constants().get(const_idx); // The constant should be an InternString from compile_import/compile_import_from let name_str = match module_name { Value::InternString(id) => self.interns.get_str(*id), _ => "", }; let error = ExcType::module_not_found_error(name_str); catch_sync!(self, cached_frame, error); } } } } /// Loads a built-in module and pushes it onto the stack. fn load_module(&mut self, module_id: u8) -> RunResult<()> { let module = BuiltinModule::from_repr(module_id).expect("unknown module id"); // Create the module on the heap using pre-interned strings let heap_id = module.create(self)?; self.push(Value::Ref(heap_id)); Ok(()) } /// Resumes execution after an external call completes. /// /// Pushes the return value onto the stack and continues execution. pub fn resume(&mut self, obj: MontyObject) -> Result { let value = obj .to_value(self) .map_err(|e| SimpleException::new(ExcType::RuntimeError, Some(format!("invalid return type: {e}"))))?; self.push(value); self.run() } /// Sets the instruction IP used for exception table lookup and traceback generation. /// /// Used by `run()` to restore the IP to the load instruction's position before /// raising `NameError` for auto-injected `ExtFunction` values, so the traceback /// points to the name reference rather than the call expression. pub fn set_instruction_ip(&mut self, ip: usize) { self.instruction_ip = ip; } /// Resumes execution after an external call raised an exception. /// /// Uses the exception handling mechanism to try to catch the exception. /// If caught, continues execution at the handler. If not, propagates the error. pub fn resume_with_exception(&mut self, error: RunError) -> Result { // Use the normal exception handling mechanism // handle_exception returns None if caught, Some(error) if not caught if let Some(uncaught_error) = self.handle_exception(error) { return Err(uncaught_error); } // Exception was caught, continue execution self.run() } // ======================================================================== // Stack Operations // ======================================================================== /// Pushes a value onto the operand stack. #[inline] pub(crate) fn push(&mut self, value: Value) { self.stack.push(value); } /// Pops a value from the operand stack. #[inline] pub(super) fn pop(&mut self) -> Value { self.stack.pop().expect("stack underflow") } /// Peeks at the top of the operand stack without removing it. #[inline] pub(super) fn peek(&self) -> &Value { self.stack.last().expect("stack underflow") } /// Pops n values from the stack in reverse order (first popped is last in vec). pub(super) fn pop_n(&mut self, n: usize) -> Vec { let start = self.stack.len() - n; self.stack.drain(start..).collect() } // ======================================================================== // Frame Operations // ======================================================================== /// Returns a reference to the current (topmost) call frame. #[inline] pub(crate) fn current_frame(&self) -> &CallFrame<'a> { self.frames.last().expect("no active frame") } /// Creates a new cached frame from the current frame. #[inline] pub(super) fn new_cached_frame(&self) -> CachedFrame<'a> { self.current_frame().into() } /// Returns a mutable reference to the current call frame. #[inline] pub(super) fn current_frame_mut(&mut self) -> &mut CallFrame<'a> { self.frames.last_mut().expect("no active frame") } /// Pushes the given frame onto the call stack. /// /// Returns an error if the recursion depth limit is exceeded by pushing this frame. pub(super) fn push_frame(&mut self, frame: CallFrame<'a>) -> RunResult<()> { // root frame doesn't count towards recursion depth, so only check if there's already a frame on the stack if !self.frames.is_empty() && let Err(e) = self.heap.incr_recursion_depth() { self.cleanup_frame_state(&frame); return Err(e.into()); } self.frames.push(frame); Ok(()) } /// Pops the current frame from the call stack. /// /// Cleans up the frame's stack region and namespace (except for global namespace). /// Syncs `instruction_ip` to the parent frame's IP so that exception handling /// looks up handlers in the correct frame's exception table. /// /// Returns `true` if this frame indicated evaluation should stop when popped. pub(super) fn pop_frame(&mut self) -> bool { let frame = self.frames.pop().expect("no frame to pop"); self.cleanup_frame_state(&frame); // Sync instruction_ip to the parent frame so exception table lookups // target the correct frame after returning from a nested run() call. if let Some(parent) = self.frames.last() { self.instruction_ip = parent.ip; } // Decrement recursion depth if this wasn't the root frame if !self.frames.is_empty() { self.heap.decr_recursion_depth(); } frame.should_return } fn cleanup_frame_state(&mut self, frame: &CallFrame<'_>) { // Clean up frame's stack region (locals + operands). // Locals occupy stack[frame.stack_base..frame.stack_base + frame.locals_count], // operands are above that. Draining from stack_base covers both. self.stack .drain(frame.stack_base..) .for_each(|value| value.drop_with_heap(&mut *self.heap)); // Track freed memory for locals if frame.locals_count > 0 { let size = frame.locals_count as usize * std::mem::size_of::(); self.heap.tracker_mut().on_free(|| size); } } /// Cleans up all frames and stack values for the current task. /// /// Used when a task completes or fails and we need to switch to another task. /// Drains the stack with proper `drop_with_heap` for each value (since locals /// are inlined on the stack), then cleans up each frame's cell references. pub(super) fn cleanup_current_task(&mut self) { self.stack.drain(..).drop_with_heap(self.heap); self.frames.clear(); } /// Runs garbage collection with proper GC roots. /// /// GC roots include values in the stack (locals + operands), globals, and exception stack. fn run_gc(&mut self) { // Collect roots from all reachable values let stack_roots = self.stack.iter().filter_map(Value::ref_id); let globals_roots = self.globals.iter().filter_map(Value::ref_id); let exc_roots = self.exception_stack.iter().filter_map(Value::ref_id); // Collect all roots into a vec to avoid lifetime issues let roots: Vec = stack_roots.chain(globals_roots).chain(exc_roots).collect(); self.heap.collect_garbage(roots); } /// Returns the current source position for traceback generation. /// /// Uses `instruction_ip` which is set at the start of each instruction in the run loop, /// ensuring accurate position tracking even when using cached IP for bytecode fetching. pub(super) fn current_position(&self) -> CodeRange { let frame = self.current_frame(); // Use instruction_ip which points to the start of the current instruction // (set at the beginning of each loop iteration in run()) frame .code .location_for_offset(self.instruction_ip) .map(crate::bytecode::code::LocationEntry::range) .unwrap_or_default() } // ======================================================================== // Variable Operations // ======================================================================== /// Loads a local variable and pushes it onto the stack. /// /// For true locals (assigned somewhere in the function), returns `UnboundLocalError` /// if accessed before assignment. For unassigned names (never assigned in this scope), /// returns `NameLookup` to signal that the host should resolve the name. /// /// Returns `Ok(None)` for normal loads, `Ok(Some(FrameExit::NameLookup))` when /// the host needs to resolve an unknown name, or `Err` for true unbound locals. fn load_local(&mut self, cached_frame: &CachedFrame<'a>, slot: u16) -> Result, RunError> { let value = &self.stack[cached_frame.stack_base + slot as usize]; // Check for undefined value — raise appropriate error based on whether // this is a true local (assigned somewhere) or an undefined reference if matches!(value, Value::Undefined) { let name = cached_frame.code.local_name(slot); if cached_frame.code.is_assigned_local(slot) { // True local accessed before assignment return Err(self.unbound_local_error(slot, name)); } // Name doesn't exist in any scope — yield to host for resolution. let name_id = name.expect("LocalUnassigned should always have a name"); return Ok(Some(FrameExit::NameLookup { name_id, namespace_slot: slot, is_global: false, })); } self.push(value.clone_with_heap(self.heap)); Ok(None) } /// Loads a local variable in call context, pushing `ExtFunction` for undefined names. /// /// Unlike `load_local`, this never yields `NameLookup`. When the variable is undefined /// (a `LocalUnassigned` name), it pushes `Value::ExtFunction(name_id)` so that the /// subsequent `CallFunction` opcode can yield `FunctionCall` instead. fn load_local_callable(&mut self, cached_frame: &CachedFrame<'a>, slot: u16, name_id: StringId) { let value = &self.stack[cached_frame.stack_base + slot as usize]; if matches!(value, Value::Undefined) { // LocalUnassigned in call context — push ExtFunction for the host to handle. self.ext_function_load_ip = Some(self.instruction_ip); self.push(Value::ExtFunction(name_id)); } else { self.push(value.clone_with_heap(self.heap)); } } /// Loads a global variable in call context, pushing `ExtFunction` for undefined names. /// /// Unlike `load_global`, this never yields `NameLookup`. When the variable is undefined, /// it pushes `Value::ExtFunction(name_id)` so that the subsequent `CallFunction` opcode /// can yield `FunctionCall` instead. fn load_global_callable(&mut self, slot: u16, name_id: StringId) { let value = self.globals[slot as usize].clone_with_heap(self.heap); if matches!(value, Value::Undefined) { // Save the load instruction's IP so NameError tracebacks point to the name self.ext_function_load_ip = Some(self.instruction_ip); self.push(Value::ExtFunction(name_id)); } else { self.push(value); } } /// Creates an UnboundLocalError for a local variable accessed before assignment. fn unbound_local_error(&self, slot: u16, name: Option) -> RunError { let name_str = match name { Some(id) => self.interns.get_str(id).to_string(), None => format!(""), }; ExcType::unbound_local_error(&name_str).into() } /// Creates a NameError for an undefined global variable. fn name_error(&self, slot: u16, name: Option) -> RunError { let name_str = match name { Some(id) => self.interns.get_str(id).to_string(), None => format!(""), }; ExcType::name_error(&name_str).into() } /// Pops the top of stack and stores it in a local variable. fn store_local(&mut self, cached_frame: &CachedFrame<'a>, slot: u16) { let value = self.pop(); let target = &mut self.stack[cached_frame.stack_base + slot as usize]; let old_value = std::mem::replace(target, value); old_value.drop_with_heap(self); } /// Deletes a local variable (sets it to Undefined). fn delete_local(&mut self, cached_frame: &CachedFrame<'a>, slot: u16) { let target = &mut self.stack[cached_frame.stack_base + slot as usize]; let old_value = std::mem::replace(target, Value::Undefined); old_value.drop_with_heap(self); } /// Loads a global variable and pushes it onto the stack. /// /// When the variable is undefined, yields `NameLookup` to the host for resolution /// instead of immediately raising `NameError`. This allows the host to provide /// external function bindings lazily. fn load_global(&mut self, slot: u16) -> Result, RunError> { let value = self.globals[slot as usize].clone_with_heap(self.heap); // Check for undefined value — raise appropriate error or yield to host if matches!(value, Value::Undefined) { let name = self.current_frame().code.local_name(slot); // If the name is registered as an assigned local (e.g. a module-level // variable or comprehension loop variable), raise UnboundLocalError // immediately rather than yielding NameLookup. if self.current_frame().code.is_assigned_local(slot) { return Err(self.unbound_local_error(slot, name)); } let Some(name_id) = name else { // No name available — raise NameError directly return Err(self.name_error(slot, None)); }; Ok(Some(FrameExit::NameLookup { name_id, namespace_slot: slot, is_global: true, })) } else { self.push(value); Ok(None) } } /// Pops the top of stack and stores it in a global variable. fn store_global(&mut self, slot: u16) { let value = self.pop(); let old_value = std::mem::replace(&mut self.globals[slot as usize], value); old_value.drop_with_heap(self); } /// Deletes a global variable (sets it to Undefined). fn delete_global(&mut self, slot: u16) { let old_value = std::mem::replace(&mut self.globals[slot as usize], Value::Undefined); old_value.drop_with_heap(self); } /// Loads from a closure cell and pushes onto the stack. /// /// The cell `HeapId` is read from the frame's local variable slot on the stack /// (cells are stored as `Value::Ref(cell_id)` at known positions in the locals region). /// Returns a `NameError` if the cell value is undefined (free variable not bound). fn load_cell(&mut self, cached_frame: &CachedFrame<'a>, slot: u16) -> RunResult<()> { let cell_id = self.cell_id_from_local(cached_frame, slot); let value = match self.heap.get(cell_id) { HeapData::Cell(c) => c.0.clone_with_heap(self), _ => panic!("LoadCell: entry is not a Cell"), }; // Check for undefined value - raise NameError for unbound free variable if matches!(value, Value::Undefined) { value.drop_with_heap(self); let name = cached_frame.code.local_name(slot); return Err(self.free_var_error(name)); } self.push(value); Ok(()) } /// Extracts the cell `HeapId` from a local variable slot on the stack. /// /// Cell variables are stored as `Value::Ref(cell_id)` in the frame's locals region. fn cell_id_from_local(&self, cached_frame: &CachedFrame<'_>, slot: u16) -> HeapId { match &self.stack[cached_frame.stack_base + slot as usize] { Value::Ref(cell_id) => *cell_id, other => panic!("LoadCell/StoreCell: expected cell reference in local slot {slot}, found {other:?}"), } } /// Creates a NameError for an unbound free variable. fn free_var_error(&self, name: Option) -> RunError { let name_str = match name { Some(id) => self.interns.get_str(id).to_string(), None => "".to_string(), }; ExcType::name_error_free_variable(&name_str).into() } /// Pops the top of stack and stores it in a closure cell. /// /// The cell `HeapId` is read from the frame's local variable slot on the stack. fn store_cell(&mut self, cached_frame: &CachedFrame<'_>, slot: u16) { let value = self.pop(); // The guard will clean up the new value if we panic, or the old value if we swap let mut guard = HeapGuard::new(value, self); let (value, this) = guard.as_parts_mut(); let cell_id = this.cell_id_from_local(cached_frame, slot); match this.heap.get_mut(cell_id) { HeapDataMut::Cell(c) => std::mem::swap(&mut c.0, value), _ => panic!("StoreCell: entry is not a Cell"), } } } // `heap` is not a public field on VM, so this implementation needs to go here rather than in `heap.rs` impl ContainsHeap for VM<'_, '_, T> { type ResourceTracker = T; fn heap(&self) -> &Heap { self.heap } fn heap_mut(&mut self) -> &mut Heap { self.heap } } ================================================ FILE: crates/monty/src/bytecode/vm/scheduler.rs ================================================ //! Task scheduler for async execution and call ID allocation. //! //! # Task Model //! //! - Task 0 is the "main task" which uses the VM's stack/frames directly //! - Spawned tasks (1+) store their own execution context in the Task struct //! - When switching tasks, the scheduler swaps contexts with the VM use std::collections::VecDeque; use ahash::{AHashMap, AHashSet}; use crate::{ args::ArgValues, asyncio::{CallId, TaskId}, exception_private::RunError, heap::{DropWithHeap, HeapId}, heap_data::HeapDataMut, parse::CodeRange, value::Value, }; /// Task execution state for async scheduling. /// /// Tracks whether a task is ready to run, blocked waiting for something, /// or has completed (successfully or with an error). #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum TaskState { /// Task is ready to execute (in the ready queue). Ready, /// Task is blocked waiting for an external call to resolve. BlockedOnCall(CallId), /// Task is blocked waiting for a GatherFuture to complete. BlockedOnGather(HeapId), /// Task completed successfully with a return value. Completed(Value), /// Task failed with an error. Failed(RunError), } /// A single async task with its own execution context. /// /// The main task (task 0) doesn't store its own frames/stack - it uses the VM's /// directly. Spawned tasks store their execution context here so they can be /// swapped in and out. /// /// # Context Switching /// /// When switching away from a non-main task, its context is saved here. /// When switching to it, the context is loaded into the VM. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Task { /// Unique identifier for this task. pub id: TaskId, /// Serialized call frames for this task's execution. /// Empty for the main task (which uses VM's frames directly). pub frames: Vec, /// Operand stack for this task. /// Empty for the main task (which uses VM's stack directly). pub stack: Vec, /// Exception stack for nested except blocks. pub exception_stack: Vec, /// VM-level instruction_ip (for exception table lookup). pub instruction_ip: usize, /// Coroutine being executed by this task (if any). /// Used to mark the coroutine as Completed when the task finishes. pub coroutine_id: Option, /// GatherFuture this task belongs to (if spawned by gather). /// Used to cancel sibling tasks when this task fails. pub gather_id: Option, /// Index in the gather's results where this task's result should be stored. /// Only set for tasks spawned by gather. pub gather_result_idx: Option, /// Current execution state. pub state: TaskState, /// CallId that unblocked this task (set when task transitions from Blocked to Ready). /// Used to retrieve the resolved value when the task resumes. pub unblocked_by: Option, } /// Serialized call frame for task storage. /// /// Similar to `SerializedFrame` but used within the scheduler for task context. /// Cannot store `&Code` references - uses `FunctionId` to look up code on resume. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) struct SerializedTaskFrame { /// Which function's code this frame executes (None = module-level). pub function_id: Option, /// Instruction pointer within this frame's bytecode. pub ip: usize, /// Base index into the VM stack for this frame's locals region. pub stack_base: usize, /// Number of local variable slots (0 for module-level frames). pub locals_count: u16, /// Call site position (for tracebacks). pub call_position: Option, } impl Task { /// Creates a new task in the Ready state. /// /// # Arguments /// * `id` - Unique task identifier /// * `coroutine_id` - Optional HeapId of the coroutine being executed /// * `gather_id` - Optional HeapId of the GatherFuture this task belongs to pub fn new( id: TaskId, coroutine_id: Option, gather_id: Option, gather_result_idx: Option, ) -> Self { Self { id, frames: Vec::new(), stack: Vec::new(), exception_stack: Vec::new(), instruction_ip: 0, coroutine_id, gather_id, gather_result_idx, state: TaskState::Ready, unblocked_by: None, } } /// Returns true if this task has completed (successfully or with failure). #[inline] pub fn is_finished(&self) -> bool { matches!(self.state, TaskState::Completed(_) | TaskState::Failed(_)) } } /// Internal representation of a pending external call. /// /// Stores the data needed to retry or resume an external function call, /// along with tracking information for the task that created it. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct PendingCallData { /// Arguments for the function (includes both positional and keyword args). pub args: ArgValues, /// Task that created this call (for ignoring results if task is cancelled). pub creator_task: TaskId, } /// Scheduler for managing call IDs, async tasks, and external call tracking. /// /// Always present on the VM (not optional). Owns the `next_call_id` counter /// used by both sync and async code paths, plus all async-related state: /// - Task management (creation, scheduling, completion) /// - External call tracking and resolution /// /// # Main Task /// /// Task 0 is the "main task" which executes using the VM's stack/frames directly. /// It's always created at scheduler initialization but doesn't store its own context /// (the VM holds it). Spawned tasks (1+) store their context in the Task struct. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Scheduler { /// All tasks (main task at index 0, spawned tasks follow). tasks: Vec, /// Queue of task IDs ready to execute. ready_queue: VecDeque, /// Currently executing task (None only during task switching). current_task: Option, /// Counter for generating new task IDs. next_task_id: u32, /// Counter for external call IDs (always incremented, even for sync resolution). next_call_id: u32, /// Maps CallId -> pending call data for unresolved external calls. /// Populated when host calls `run_pending()`. pending_calls: AHashMap, /// Maps CallId -> resolved Value for futures that have been resolved. /// Entry is removed when the value is consumed by awaiting. resolved: AHashMap, /// CallIds that have been awaited (to detect double-await). consumed: AHashSet, /// Maps CallId -> (gather_heap_id, result_index) for gathers waiting on external futures. /// When a CallId is resolved, the result is stored in the gather's results at the given index. gather_waiters: AHashMap, } impl Scheduler { /// Creates a new scheduler with the main task (task 0) as current. /// /// The main task uses the VM's stack/frames directly and is always present. /// It starts as the current task (not in the ready queue) since it runs /// immediately without needing to be scheduled. pub fn new() -> Self { let mut main_task = Task::new(TaskId::default(), None, None, None); // Main task starts Running, not Ready (it's the current task, not waiting) main_task.state = TaskState::Ready; // Will be set properly when it blocks Self { tasks: vec![main_task], ready_queue: VecDeque::new(), // Main task is current, not in ready queue current_task: Some(TaskId::default()), next_task_id: 1, next_call_id: 0, pending_calls: AHashMap::new(), resolved: AHashMap::new(), consumed: AHashSet::new(), gather_waiters: AHashMap::new(), } } /// Returns the currently executing task ID. /// /// Returns `None` only during task switching operations. #[inline] pub fn current_task_id(&self) -> Option { self.current_task } /// Returns a reference to a task by ID. /// /// # Panics /// Panics if the task ID doesn't exist. #[inline] pub fn get_task(&self, task_id: TaskId) -> &Task { &self.tasks[task_id.raw() as usize] } /// Returns a mutable reference to a task by ID. /// /// # Panics /// Panics if the task ID doesn't exist. #[inline] pub fn get_task_mut(&mut self, task_id: TaskId) -> &mut Task { &mut self.tasks[task_id.raw() as usize] } /// Allocates a new CallId for an external function call. /// /// The counter always increments, even for sync resolution, to keep IDs unique. pub fn allocate_call_id(&mut self) -> CallId { let id = CallId::new(self.next_call_id); self.next_call_id += 1; id } /// Stores pending call data for an external function call. /// /// Called when the host uses async resolution (`run_pending()`). pub fn add_pending_call(&mut self, call_id: CallId, data: PendingCallData) { self.pending_calls.insert(call_id, data); } /// Removes a call_id from the pending_calls map. /// /// Called when resolving a gather's external future - the call is no longer /// pending once the result has been stored in the gather's results. pub fn remove_pending_call(&mut self, call_id: CallId) { self.pending_calls.remove(&call_id); } /// Returns true if a CallId has already been awaited (consumed). #[inline] pub fn is_consumed(&self, call_id: CallId) -> bool { self.consumed.contains(&call_id) } /// Marks a CallId as consumed (awaited). pub fn mark_consumed(&mut self, call_id: CallId) { self.consumed.insert(call_id); } /// Registers a gather as waiting on an external future. /// /// When the CallId is resolved, the result will be stored in the gather's results /// at the specified index. pub fn register_gather_for_call(&mut self, call_id: CallId, gather_id: HeapId, result_index: usize) { self.gather_waiters.insert(call_id, (gather_id, result_index)); } /// Returns gather info if a gather is waiting on this CallId. /// /// Returns (gather_heap_id, result_index) if found, None otherwise. /// Removes the entry from gather_waiters. pub fn take_gather_waiter(&mut self, call_id: CallId) -> Option<(HeapId, usize)> { self.gather_waiters.remove(&call_id) } /// Resolves a CallId with a value. /// /// Stores the value for later retrieval when the future is awaited. /// If a task is blocked on this call, it will be unblocked. /// /// Uses `pending_calls` for O(1) lookup of the blocked task instead of /// scanning all tasks. pub fn resolve(&mut self, call_id: CallId, value: Value) { // Get blocked task from pending_calls before removing (O(1) lookup) let blocked_task = self.pending_calls.remove(&call_id).map(|data| data.creator_task); // Store the resolved value self.resolved.insert(call_id, value); // Unblock the task if found if let Some(task_id) = blocked_task { let task = self.get_task_mut(task_id); if matches!(task.state, TaskState::BlockedOnCall(cid) if cid == call_id) { task.state = TaskState::Ready; task.unblocked_by = Some(call_id); self.ready_queue.push_back(task_id); } } } /// Takes the resolved value for a CallId, if available. /// /// Removes the value from the resolved map and returns it. /// Returns `None` if the call hasn't been resolved yet. pub fn take_resolved(&mut self, call_id: CallId) -> Option { self.resolved.remove(&call_id) } /// Takes the resolved value for a task that was unblocked. /// /// If the task has an `unblocked_by` CallId set, takes the resolved value /// for that call and clears the `unblocked_by` field. /// Returns `None` if the task wasn't unblocked by a resolved call. pub fn take_resolved_for_task(&mut self, task_id: TaskId) -> Option { let task = &mut self.tasks[task_id.raw() as usize]; if let Some(call_id) = task.unblocked_by.take() { self.resolved.remove(&call_id) } else { None } } /// Marks the current task as blocked on an external call. /// /// The task will be unblocked when `resolve()` is called with the matching CallId. pub fn block_current_on_call(&mut self, call_id: CallId) { if let Some(task_id) = self.current_task { let task = self.get_task_mut(task_id); task.state = TaskState::BlockedOnCall(call_id); } } /// Marks the current task as blocked on a GatherFuture. /// /// The task will be unblocked when all gathered tasks complete. pub fn block_current_on_gather(&mut self, gather_id: HeapId) { if let Some(task_id) = self.current_task { let task = self.get_task_mut(task_id); task.state = TaskState::BlockedOnGather(gather_id); } } /// Returns all pending (unresolved) CallIds. pub fn pending_call_ids(&self) -> Vec { self.pending_calls.keys().copied().collect() } /// Removes a task from the ready queue. /// /// Used when handling the main task directly (via `prepare_main_task_after_resolve`) /// instead of through the normal task switching mechanism. pub fn remove_from_ready_queue(&mut self, task_id: TaskId) { self.ready_queue.retain(|&id| id != task_id); } /// Spawns a new task from a coroutine. /// /// Creates a new task that will execute the given coroutine when scheduled. /// The task is added to the ready queue. /// /// # Arguments /// * `coroutine_id` - HeapId of the coroutine to execute /// * `gather_id` - Optional HeapId of the GatherFuture this task belongs to /// * `gather_result_idx` - Optional index in the gather's results for this task /// /// # Returns /// The TaskId of the newly created task. pub fn spawn( &mut self, coroutine_id: HeapId, gather_id: Option, gather_result_idx: Option, ) -> TaskId { let task_id = TaskId::new(self.next_task_id); self.next_task_id += 1; let task = Task::new(task_id, Some(coroutine_id), gather_id, gather_result_idx); self.tasks.push(task); self.ready_queue.push_back(task_id); task_id } /// Gets the next ready task from the queue. /// /// Returns `None` if no tasks are ready. pub fn next_ready_task(&mut self) -> Option { self.ready_queue.pop_front() } /// Adds a task back to the ready queue. pub fn make_ready(&mut self, task_id: TaskId) { let task = self.get_task_mut(task_id); task.state = TaskState::Ready; self.ready_queue.push_back(task_id); } /// Sets the current task. pub fn set_current_task(&mut self, task_id: Option) { self.current_task = task_id; } /// Marks a task as completed with a result value. /// /// If the task is part of a gather, updates the gather's results. /// If this completes the gather, unblocks the waiting task. pub fn complete_task(&mut self, task_id: TaskId, result: Value) { let task = self.get_task_mut(task_id); task.state = TaskState::Completed(result); // Note: gather wake-up logic will be implemented when gather is fully integrated } /// Marks a task as failed with an error. /// /// If the task is part of a gather, returns the gather_id so the caller /// can collect siblings from `GatherFuture.task_ids` on the heap. /// /// # Returns /// The gather_id if this task belongs to a gather (for sibling lookup). pub fn fail_task(&mut self, task_id: TaskId, error: RunError) -> Option { let task = self.get_task_mut(task_id); let gather_id = task.gather_id; task.state = TaskState::Failed(error); gather_id } /// Cancels a task, cleaning up its resources. /// /// This marks the task as Failed with a cancellation error and cleans up: /// - Stack values /// - Exception stack values /// - Frame cell references /// - Frame namespaces /// - Nested gathers (if the task was blocked on one) /// - Completed task results (if task finished before cancellation) /// /// The caller is responsible for cleaning up the task's coroutine on the heap. /// /// # Arguments /// * `task_id` - ID of the task to cancel /// * `heap` - Heap for dropping values and cell cleanup pub fn cancel_task( &mut self, task_id: TaskId, heap: &mut crate::heap::Heap, ) { // If task already finished, clean up its result value and return if self.get_task(task_id).is_finished() { let task = self.get_task_mut(task_id); if let TaskState::Completed(value) = std::mem::replace(&mut task.state, TaskState::Ready) { value.drop_with_heap(heap); } // Note: Failed tasks don't have values to clean up (RunError doesn't contain Values) return; } // Remove from ready queue if present (do this before getting mutable task reference) self.ready_queue.retain(|&id| id != task_id); // Check if task is blocked on a gather and get the gather info before mutating task let inner_gather_info = { let task = self.get_task(task_id); if let TaskState::BlockedOnGather(gather_id) = task.state { // Get inner gather's task IDs from heap if let crate::heap::HeapData::GatherFuture(gather) = heap.get(gather_id) { Some((gather_id, gather.task_ids.clone())) } else { None } } else { None } }; // Recursively cancel inner gather's tasks first if let Some((inner_gather_id, inner_task_ids)) = inner_gather_info { for inner_task_id in inner_task_ids { self.cancel_task(inner_task_id, heap); } // Cleanup the inner GatherFuture - extract data first to avoid borrow conflict let (items, results) = if let HeapDataMut::GatherFuture(gather) = heap.get_mut(inner_gather_id) { (std::mem::take(&mut gather.items), std::mem::take(&mut gather.results)) } else { (vec![], vec![]) }; // Now cleanup the extracted data with mutable heap access for item in items { if let crate::asyncio::GatherItem::Coroutine(coro_id) = item { heap.dec_ref(coro_id); } } for value in results.into_iter().flatten() { value.drop_with_heap(heap); } // Dec_ref the gather itself heap.dec_ref(inner_gather_id); } // Now get mutable reference to the task for cleanup let task = self.get_task_mut(task_id); // Clean up stack values for value in std::mem::take(&mut task.stack) { value.drop_with_heap(heap); } // Clean up exception stack values for value in std::mem::take(&mut task.exception_stack) { value.drop_with_heap(heap); } // Restore this task's depth contribution before cleanup, // since save_task_context subtracted it. let task_depth = task.frames.len(); let global_depth = heap.get_recursion_depth(); heap.set_recursion_depth(global_depth + task_depth); task.frames.clear(); // Mark as failed with a cancellation error task.state = TaskState::Failed( crate::exception_private::SimpleException::new_msg( crate::exception_private::ExcType::RuntimeError, "task was cancelled", ) .into(), ); } /// Fails the task blocked on a specific CallId with an error. /// /// Used when an external function returns an error via `FutureSnapshot::resume`. /// Uses `pending_calls` for O(1) lookup of the blocked task. /// /// # Returns /// A tuple of (task_id, gather_id) if a task was found, /// or None if no task was blocked on this CallId. /// Callers should get siblings from `GatherFuture.task_ids` if gather_id is Some. pub fn fail_for_call(&mut self, call_id: CallId, error: RunError) -> Option<(TaskId, Option)> { // Get blocked task from pending_calls (O(1) lookup) let task_id = self.pending_calls.remove(&call_id)?.creator_task; let gather_id = self.fail_task(task_id, error); Some((task_id, gather_id)) } /// Returns the task that created a specific pending call. /// /// Used to check if a pending call's creator task has been cancelled. #[inline] pub fn get_pending_call_creator(&self, call_id: CallId) -> Option { self.pending_calls.get(&call_id).map(|data| data.creator_task) } /// Returns true if a task has been cancelled or failed. #[inline] pub fn is_task_failed(&self, task_id: TaskId) -> bool { matches!(self.tasks.get(task_id.raw() as usize), Some(task) if matches!(task.state, TaskState::Failed(_))) } /// Cleans up all scheduler resources: pending calls, resolved values, task /// stacks/exception stacks, completed results, and task frame cell references. /// /// Each task's `recursion_depth` is restored to the global counter before /// dropping cells, because `save_task_context` subtracted the recursion depth /// and cleanup needs the correct depth to avoid underflow. pub fn cleanup(&mut self, heap: &mut crate::heap::Heap) { // Drop pending call arguments for (_, data) in std::mem::take(&mut self.pending_calls) { data.args.drop_with_heap(heap); } // Drop resolved values for (_, value) in std::mem::take(&mut self.resolved) { value.drop_with_heap(heap); } // Drop task stack/exception values and completed results for task in &mut self.tasks { for value in std::mem::take(&mut task.stack) { value.drop_with_heap(heap); } for value in std::mem::take(&mut task.exception_stack) { value.drop_with_heap(heap); } if let TaskState::Completed(value) = std::mem::replace(&mut task.state, TaskState::Ready) { value.drop_with_heap(heap); } // Restore recursion depth and clear frames let task_depth = task.frames.len(); let global_depth = heap.get_recursion_depth(); heap.set_recursion_depth(global_depth + task_depth); task.frames.clear(); } } } impl Default for Scheduler { fn default() -> Self { Self::new() } } ================================================ FILE: crates/monty/src/exception_private.rs ================================================ use std::{ borrow::Cow, fmt::{self, Display, Write}, }; use serde::{Deserialize, Serialize}; use smallvec::smallvec; use strum::{Display, EnumString, IntoStaticStr}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_public::{MontyException, StackFrame}, fstring::FormatError, heap::{Heap, HeapData}, intern::{Interns, StaticStrings, StringId}, parse::CodeRange, resource::ResourceTracker, types::{ PyTrait, Str, Type, allocate_tuple, str::{StringRepr, string_repr_fmt}, }, value::{EitherStr, Value}, }; /// Result type alias for operations that can produce a runtime error. pub type RunResult = Result; /// Python exception types supported by the interpreter. /// /// Uses strum derives for automatic `Display`, `FromStr`, and `Into<&'static str>` implementations. /// The string representation matches the variant name exactly (e.g., `ValueError` -> "ValueError"). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, EnumString, IntoStaticStr, Serialize, Deserialize)] pub enum ExcType { /// primary exception class - matches any exception in isinstance checks. Exception, /// System exit exceptions BaseException, SystemExit, KeyboardInterrupt, // --- ArithmeticError hierarchy --- /// Intermediate class for arithmetic errors. ArithmeticError, /// Subclass of ArithmeticError. OverflowError, /// Subclass of ArithmeticError. ZeroDivisionError, // --- LookupError hierarchy --- /// Intermediate class for lookup errors. LookupError, /// Subclass of LookupError. IndexError, /// Subclass of LookupError. KeyError, // --- RuntimeError hierarchy --- /// Intermediate class for runtime errors. RuntimeError, /// Subclass of RuntimeError. NotImplementedError, /// Subclass of RuntimeError. RecursionError, // --- AttributeError hierarchy --- AttributeError, /// Subclass of AttributeError (from dataclasses module). FrozenInstanceError, // --- NameError hierarchy --- NameError, /// Subclass of NameError - for accessing local variable before assignment. UnboundLocalError, // --- ValueError hierarchy --- ValueError, /// Subclass of ValueError - for encoding/decoding errors. UnicodeDecodeError, // --- ImportError hierarchy --- /// Import-related errors (module not found, name not in module). ImportError, /// Subclass of ImportError - for when a module cannot be found. ModuleNotFoundError, // --- OSError hierarchy --- /// OS-related errors (file not found, permission denied, etc.) OSError, /// Subclass of OSError - for when a file or directory cannot be found. FileNotFoundError, /// Subclass of OSError - for when a file already exists. FileExistsError, /// Subclass of OSError - for when a path is a directory but a file was expected. IsADirectoryError, /// Subclass of OSError - for when a path is not a directory but one was expected. NotADirectoryError, // --- Standalone exception types --- AssertionError, MemoryError, StopIteration, SyntaxError, TimeoutError, TypeError, // --- Module-specific exception types --- // --- re module --- /// `re.PatternError` - raised for invalid regex patterns or unsupported regex features. /// /// # Behavior Note /// /// Limited to monty's exception type, `PatternError` does not provide `pattern`, `pos`, /// `lineno` and `colno` attributes. /// /// As per CPython's implementation, it would be hard to convert `fancy-regex`'s error /// representations into the required attributes. #[strum(serialize = "re.PatternError")] RePatternError, } impl ExcType { /// Checks if this exception type is a subclass of another exception type. /// /// Implements Python's exception hierarchy for try/except matching: /// - `Exception` is the base class for all standard exceptions /// - `LookupError` is the base for `KeyError` and `IndexError` /// - `ArithmeticError` is the base for `ZeroDivisionError` and `OverflowError` /// - `RuntimeError` is the base for `RecursionError` and `NotImplementedError` /// /// Returns true if `self` would be caught by `except handler_type:`. #[must_use] pub fn is_subclass_of(self, handler_type: Self) -> bool { if self == handler_type { return true; } match handler_type { // BaseException catches all exceptions Self::BaseException => true, // Exception catches everything except BaseException, and direct subclasses: KeyboardInterrupt, SystemExit Self::Exception => !matches!(self, Self::BaseException | Self::KeyboardInterrupt | Self::SystemExit), // LookupError catches KeyError and IndexError Self::LookupError => matches!(self, Self::KeyError | Self::IndexError), // ArithmeticError catches ZeroDivisionError and OverflowError Self::ArithmeticError => matches!(self, Self::ZeroDivisionError | Self::OverflowError), // RuntimeError catches RecursionError and NotImplementedError Self::RuntimeError => matches!(self, Self::RecursionError | Self::NotImplementedError), // AttributeError catches FrozenInstanceError Self::AttributeError => matches!(self, Self::FrozenInstanceError), // NameError catches UnboundLocalError Self::NameError => matches!(self, Self::UnboundLocalError), // ValueError catches UnicodeDecodeError Self::ValueError => matches!(self, Self::UnicodeDecodeError), // ImportError catches ModuleNotFoundError Self::ImportError => matches!(self, Self::ModuleNotFoundError), // OSError catches FileNotFoundError, FileExistsError, IsADirectoryError, NotADirectoryError Self::OSError => matches!( self, Self::FileNotFoundError | Self::FileExistsError | Self::IsADirectoryError | Self::NotADirectoryError ), // All other types only match exactly (handled by self == handler_type above) _ => false, } } /// Creates an exception instance from an exception type and arguments. /// /// Handles exception constructors like `ValueError('message')`. /// Currently supports zero or one string argument. /// /// The `interns` parameter provides access to interned string content. /// Returns a heap-allocated exception value. pub(crate) fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { defer_drop!(args, vm); let exc = match args { ArgValues::Empty => Ok(SimpleException::new_none(self)), ArgValues::One(value) => match value { Value::InternString(string_id) => Ok(SimpleException::new_msg( self, vm.interns.get_str(*string_id).to_owned(), )), Value::Ref(heap_id) => { if let HeapData::Str(s) = vm.heap.get(*heap_id) { Ok(SimpleException::new_msg(self, s.as_str().to_owned())) } else { Err(RunError::internal( "exceptions can only be called with zero or one string argument", )) } } _ => Err(RunError::internal( "exceptions can only be called with zero or one string argument", )), }, _ => Err(RunError::internal( "exceptions can only be called with zero or one string argument", )), }?; let heap_id = vm.heap.allocate(HeapData::Exception(exc))?; Ok(Value::Ref(heap_id)) } /// Creates an AttributeError for when an attribute is not found (GET operation). /// /// Sets `hide_caret: true` because CPython doesn't show carets for attribute GET errors. #[must_use] pub(crate) fn attribute_error(type_name: impl Display, attr: &str) -> RunError { let exc = SimpleException::new_msg( Self::AttributeError, format!("'{type_name}' object has no attribute '{attr}'"), ); RunError::Exc(ExceptionRaise { exc, frame: None, hide_caret: true, // CPython doesn't show carets for attribute GET errors }) } /// Creates an AttributeError for attribute assignment on types that don't support it. /// /// Matches CPython's format for setting attributes on built-in types. #[must_use] pub(crate) fn attribute_error_no_setattr(type_: Type, attr_name: &str) -> RunError { SimpleException::new_msg( Self::AttributeError, format!("'{type_}' object has no attribute '{attr_name}' and no __dict__ for setting new attributes"), ) .into() } /// Creates an AttributeError for a missing module attribute. /// /// Matches CPython's format: `AttributeError: module 'name' has no attribute 'attr'` /// Sets `hide_caret: true` because CPython doesn't show carets for attribute GET errors. #[must_use] pub(crate) fn attribute_error_module(module_name: &str, attr_name: &str) -> RunError { let exc = SimpleException::new_msg( Self::AttributeError, format!("module '{module_name}' has no attribute '{attr_name}'"), ); RunError::Exc(ExceptionRaise { exc, frame: None, hide_caret: true, // CPython doesn't show carets for attribute GET errors }) } /// Creates a FrozenInstanceError for assigning to a frozen dataclass. /// /// Matches CPython's `dataclasses.FrozenInstanceError` which is a subclass of `AttributeError`. /// Message format: "cannot assign to field 'attr_name'" #[must_use] pub(crate) fn frozen_instance_error(attr_name: &str) -> RunError { SimpleException::new_msg( Self::FrozenInstanceError, format!("cannot assign to field '{attr_name}'"), ) .into() } #[must_use] pub(crate) fn type_error_not_sub(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("'{type_}' object is not subscriptable")).into() } /// Creates a TypeError for awaiting a non-awaitable object. /// /// Matches CPython's format: `TypeError: '{type}' object can't be awaited` #[must_use] pub(crate) fn object_not_awaitable(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("'{type_}' object can't be awaited")).into() } /// Creates a TypeError for item assignment on types that don't support it. /// /// Matches CPython's format: `TypeError: '{type}' object does not support item assignment` #[must_use] pub(crate) fn type_error_not_sub_assignment(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("'{type_}' object does not support item assignment"), ) .into() } /// Creates a TypeError for unhashable types when calling `hash()`. /// /// This matches Python 3.14's error message: `TypeError: unhashable type: 'list'` #[must_use] pub(crate) fn type_error_unhashable(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("unhashable type: '{type_}'")).into() } /// Creates a TypeError for unhashable types used as dict keys. /// /// This matches Python 3.14's error message: /// `TypeError: cannot use 'list' as a dict key (unhashable type: 'list')` #[must_use] pub(crate) fn type_error_unhashable_dict_key(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("cannot use '{type_}' as a dict key (unhashable type: '{type_}')"), ) .into() } /// Creates a TypeError for unhashable types used as set elements. /// /// This matches Python 3.14's error message: /// `TypeError: cannot use 'list' as a set element (unhashable type: 'list')` #[must_use] pub(crate) fn type_error_unhashable_set_element(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("cannot use '{type_}' as a set element (unhashable type: '{type_}')"), ) .into() } /// Creates a KeyError for a missing dict key. /// /// For string keys, uses the raw string value without extra quoting. #[must_use] pub(crate) fn key_error(key: &Value, vm: &VM<'_, '_, impl ResourceTracker>) -> RunError { let key_str = key.py_str(vm).into_owned(); SimpleException::new_msg(Self::KeyError, key_str).into() } /// Creates a KeyError for popping from an empty set. /// /// Matches CPython's error format: `KeyError: 'pop from an empty set'` #[must_use] pub(crate) fn key_error_pop_empty_set() -> RunError { SimpleException::new_msg(Self::KeyError, "pop from an empty set").into() } /// Creates a TypeError for when a function receives the wrong number of arguments. /// /// Matches CPython's error format exactly: /// - For 1 expected arg: `{name}() takes exactly one argument ({actual} given)` /// - For N expected args: `{name} expected {expected} arguments, got {actual}` /// /// # Arguments /// * `name` - The function name (e.g., "len" for builtins, "list.append" for methods) /// * `expected` - Number of expected arguments /// * `actual` - Number of arguments actually provided #[must_use] pub(crate) fn type_error_arg_count(name: &str, expected: usize, actual: usize) -> RunError { if expected == 1 { // CPython: "len() takes exactly one argument (2 given)" SimpleException::new_msg( Self::TypeError, format!("{name}() takes exactly one argument ({actual} given)"), ) .into() } else { // CPython: "insert expected 2 arguments, got 1" SimpleException::new_msg( Self::TypeError, format!("{name} expected {expected} arguments, got {actual}"), ) .into() } } /// Creates a TypeError for when a method that takes no arguments receives some. /// /// Matches CPython's format: `{name}() takes no arguments ({actual} given)` /// /// # Arguments /// * `name` - The method name (e.g., "dict.keys") /// * `actual` - Number of arguments actually provided #[must_use] pub(crate) fn type_error_no_args(name: &str, actual: usize) -> RunError { // CPython: "dict.keys() takes no arguments (1 given)" SimpleException::new_msg(Self::TypeError, format!("{name}() takes no arguments ({actual} given)")).into() } /// Creates a TypeError for when a function receives fewer arguments than required. /// /// Matches CPython's format: `{name} expected at least {min} argument, got {actual}` /// /// # Arguments /// * `name` - The function name (e.g., "get", "pop") /// * `min` - Minimum number of required arguments /// * `actual` - Number of arguments actually provided #[must_use] pub(crate) fn type_error_at_least(name: &str, min: usize, actual: usize) -> RunError { // CPython: "get expected at least 1 argument, got 0" let plural = if min == 1 { "" } else { "s" }; SimpleException::new_msg( Self::TypeError, format!("{name} expected at least {min} argument{plural}, got {actual}"), ) .into() } /// Creates a TypeError for when a function receives more arguments than allowed. /// /// Matches CPython's format: `{name} expected at most {max} arguments, got {actual}` /// /// # Arguments /// * `name` - The function name (e.g., "get", "pop") /// * `max` - Maximum number of allowed arguments /// * `actual` - Number of arguments actually provided #[must_use] pub(crate) fn type_error_at_most(name: &str, max: usize, actual: usize) -> RunError { // CPython: "get expected at most 2 arguments, got 3" SimpleException::new_msg( Self::TypeError, format!("{name} expected at most {max} arguments, got {actual}"), ) .into() } /// Creates a TypeError for missing positional arguments. /// /// Matches CPython's format: `{name}() missing {count} required positional argument(s): 'a' and 'b'` #[must_use] pub(crate) fn type_error_missing_positional_with_names(name: &str, missing_names: &[&str]) -> RunError { let count = missing_names.len(); let names_str = format_param_names(missing_names); if count == 1 { SimpleException::new_msg( Self::TypeError, format!("{name}() missing 1 required positional argument: {names_str}"), ) .into() } else { SimpleException::new_msg( Self::TypeError, format!("{name}() missing {count} required positional arguments: {names_str}"), ) .into() } } /// Creates a TypeError for missing keyword-only arguments. /// /// Matches CPython's format: `{name}() missing {count} required keyword-only argument(s): 'a' and 'b'` #[must_use] pub(crate) fn type_error_missing_kwonly_with_names(name: &str, missing_names: &[&str]) -> RunError { let count = missing_names.len(); let names_str = format_param_names(missing_names); if count == 1 { SimpleException::new_msg( Self::TypeError, format!("{name}() missing 1 required keyword-only argument: {names_str}"), ) .into() } else { SimpleException::new_msg( Self::TypeError, format!("{name}() missing {count} required keyword-only arguments: {names_str}"), ) .into() } } /// Creates a TypeError for too many positional arguments. /// /// Matches CPython's format: /// - Simple: `{name}() takes {max} positional argument(s) but {actual} were given` /// - With kwonly: `{name}() takes {max} positional argument(s) but {actual} positional argument(s) (and N keyword-only argument(s)) were given` #[must_use] pub(crate) fn type_error_too_many_positional( name: &str, max: usize, actual: usize, kwonly_given: usize, ) -> RunError { let takes_word = if max == 1 { "argument" } else { "arguments" }; if kwonly_given > 0 { // CPython includes keyword-only args in the "given" part when present let given_word = if actual == 1 { "argument" } else { "arguments" }; let kwonly_word = if kwonly_given == 1 { "argument" } else { "arguments" }; SimpleException::new_msg( Self::TypeError, format!( "{name}() takes {max} positional {takes_word} but {actual} positional {given_word} (and {kwonly_given} keyword-only {kwonly_word}) were given" ), ) .into() } else if max == 0 { SimpleException::new_msg( Self::TypeError, format!("{name}() takes 0 positional arguments but {actual} were given"), ) .into() } else { SimpleException::new_msg( Self::TypeError, format!("{name}() takes {max} positional {takes_word} but {actual} were given"), ) .into() } } /// Creates a TypeError for positional-only parameter passed as keyword. /// /// Matches CPython's format: `{name}() got some positional-only arguments passed as keyword arguments: '{param}'` #[must_use] pub(crate) fn type_error_positional_only(name: &str, param: &str) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{name}() got some positional-only arguments passed as keyword arguments: '{param}'"), ) .into() } /// Creates a TypeError for duplicate argument. /// /// Matches CPython's format: `{name}() got multiple values for argument '{param}'` #[must_use] pub(crate) fn type_error_duplicate_arg(name: &str, param: &str) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{name}() got multiple values for argument '{param}'"), ) .into() } /// Creates a TypeError for duplicate keyword argument. /// /// Matches CPython's format: `{name}() got multiple values for keyword argument '{key}'` #[must_use] pub(crate) fn type_error_multiple_values(name: &str, key: &str) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{name}() got multiple values for keyword argument '{key}'"), ) .into() } /// Creates a TypeError for unexpected keyword argument. /// /// Matches CPython's format: `{name}() got an unexpected keyword argument '{key}'` #[must_use] pub(crate) fn type_error_unexpected_keyword(name: &str, key: &str) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{name}() got an unexpected keyword argument '{key}'"), ) .into() } /// Creates a TypeError for **kwargs argument that is not a mapping. /// /// Matches CPython's format: `{name}() argument after ** must be a mapping, not {type_name}` #[must_use] pub(crate) fn type_error_kwargs_not_mapping(name: &str, type_name: &str) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{name}() argument after ** must be a mapping, not {type_name}"), ) .into() } /// Creates a TypeError for `{**x}` dict-literal unpacking where `x` is not a mapping. /// /// Matches CPython's format: `'{type_name}' object is not a mapping` /// /// Note: this differs from [`type_error_kwargs_not_mapping`] which is used for /// function-call `**kwargs` and includes the function name in the message. #[must_use] pub(crate) fn type_error_not_mapping(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("'{type_}' object is not a mapping")).into() } /// Creates a TypeError for **kwargs with non-string keys. /// /// Matches CPython's format: `{name}() keywords must be strings` #[must_use] pub(crate) fn type_error_kwargs_nonstring_key() -> RunError { SimpleException::new_msg(Self::TypeError, "keywords must be strings").into() } /// Creates a simple TypeError with a custom message. #[must_use] pub(crate) fn type_error(msg: impl fmt::Display) -> RunError { SimpleException::new_msg(Self::TypeError, msg).into() } /// Creates a TypeError for bytes() constructor with invalid type. /// /// Matches CPython's format: `TypeError: cannot convert '{type}' object to bytes` #[must_use] pub(crate) fn type_error_bytes_init(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("cannot convert '{type_}' object to bytes")).into() } /// Creates a TypeError for calling a non-callable type. /// /// Matches CPython's format: `TypeError: cannot create '{type}' instances` #[must_use] pub(crate) fn type_error_not_callable(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("cannot create '{type_}' instances")).into() } /// Creates a TypeError for calling a non-callable object. /// /// Matches CPython's format: `TypeError: '{type}' object is not callable` #[must_use] pub(crate) fn type_error_not_callable_object(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("'{type_}' object is not callable")).into() } /// Creates a TypeError for non-iterable type in list/tuple/etc constructors. /// /// Matches CPython's format: `TypeError: '{type}' object is not iterable` #[must_use] pub(crate) fn type_error_not_iterable(type_: Type) -> RunError { SimpleException::new_msg(Self::TypeError, format!("'{type_}' object is not iterable")).into() } /// Creates a TypeError for non-iterable type in PEP 448 `*value` literal unpack. /// /// Used when `[*expr]`, `(*expr,)` literal unpack encounters a non-iterable — distinct /// from [`type_error_not_iterable`] because CPython uses a different message for this context. /// /// Matches CPython's format: `TypeError: Value after * must be an iterable, not {type}` #[must_use] pub(crate) fn type_error_value_after_star(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("Value after * must be an iterable, not {type_}"), ) .into() } /// Creates a TypeError for int() constructor with invalid type. /// /// Matches CPython's format: `TypeError: int() argument must be a string, a bytes-like object or a real number, not '{type}'` #[must_use] pub(crate) fn type_error_int_conversion(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("int() argument must be a string, a bytes-like object or a real number, not '{type_}'"), ) .into() } /// Creates a TypeError for float() constructor with invalid type. /// /// Matches CPython's format: `TypeError: float() argument must be a string or a real number, not '{type}'` #[must_use] pub(crate) fn type_error_float_conversion(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("float() argument must be a string or a real number, not '{type_}'"), ) .into() } /// Creates a ValueError for negative count in bytes(). /// /// Matches CPython's format: `ValueError: negative count` #[must_use] pub(crate) fn value_error_negative_bytes_count() -> RunError { SimpleException::new_msg(Self::ValueError, "negative count").into() } /// Creates a TypeError for isinstance() arg 2. /// /// Matches CPython's format: `TypeError: isinstance() arg 2 must be a type, a tuple of types, or a union` #[must_use] pub(crate) fn isinstance_arg2_error() -> RunError { SimpleException::new_msg( Self::TypeError, "isinstance() arg 2 must be a type, a tuple of types, or a union", ) .into() } /// Creates a TypeError for invalid exception type in except clause. /// /// Matches CPython's format: `TypeError: catching classes that do not inherit from BaseException is not allowed` #[must_use] pub(crate) fn except_invalid_type_error() -> RunError { SimpleException::new_msg( Self::TypeError, "catching classes that do not inherit from BaseException is not allowed", ) .into() } /// Creates a ValueError for range() step argument being zero. /// /// Matches CPython's format: `ValueError: range() arg 3 must not be zero` #[must_use] pub(crate) fn value_error_range_step_zero() -> RunError { SimpleException::new_msg(Self::ValueError, "range() arg 3 must not be zero").into() } /// Creates a ValueError for slice step being zero. /// /// Matches CPython's format: `ValueError: slice step cannot be zero` #[must_use] pub(crate) fn value_error_slice_step_zero() -> RunError { SimpleException::new_msg(Self::ValueError, "slice step cannot be zero").into() } /// Creates a TypeError for slice indices that are not integers or None. /// /// Matches CPython's format: `TypeError: slice indices must be integers or None or have an __index__ method` #[must_use] pub(crate) fn type_error_slice_indices() -> RunError { SimpleException::new_msg( Self::TypeError, "slice indices must be integers or None or have an __index__ method", ) .into() } /// Creates a RuntimeError for dict mutation during iteration. /// /// Matches CPython's format: `RuntimeError: dictionary changed size during iteration` #[must_use] pub(crate) fn runtime_error_dict_changed_size() -> RunError { SimpleException::new_msg(Self::RuntimeError, "dictionary changed size during iteration").into() } /// Creates a RuntimeError for set mutation during iteration. /// /// Matches CPython's format: `RuntimeError: Set changed size during iteration` #[must_use] pub(crate) fn runtime_error_set_changed_size() -> RunError { SimpleException::new_msg(Self::RuntimeError, "Set changed size during iteration").into() } /// Creates a TypeError for functions that don't accept keyword arguments. /// /// Matches CPython's format: `TypeError: {name}() takes no keyword arguments` #[must_use] pub(crate) fn type_error_no_kwargs(name: &str) -> RunError { SimpleException::new_msg(Self::TypeError, format!("{name}() takes no keyword arguments")).into() } /// Creates an IndexError for list index out of range (getitem). /// /// Matches CPython's format: `IndexError('list index out of range')` #[must_use] pub(crate) fn list_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "list index out of range").into() } /// Creates an IndexError for list assignment index out of range (setitem). /// /// Matches CPython's format: `IndexError('list assignment index out of range')` #[must_use] pub(crate) fn list_assignment_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "list assignment index out of range").into() } /// Creates an IndexError for tuple index out of range. /// /// Matches CPython's format: `IndexError('tuple index out of range')` #[must_use] pub(crate) fn tuple_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "tuple index out of range").into() } /// Creates an IndexError for string index out of range. /// /// Matches CPython's format: `IndexError('string index out of range')` #[must_use] pub(crate) fn str_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "string index out of range").into() } /// Creates an IndexError for bytes index out of range. /// /// Matches CPython's format: `IndexError('index out of range')` #[must_use] pub(crate) fn bytes_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "index out of range").into() } /// Creates an IndexError for range index out of range. /// /// Matches CPython's format: `IndexError('range object index out of range')` #[must_use] pub(crate) fn range_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "range object index out of range").into() } /// Creates an IndexError for `re.Match` group index out of range. /// /// Matches CPython's format: `IndexError('no such group')` #[must_use] pub(crate) fn re_match_group_index_error() -> RunError { SimpleException::new_msg(Self::IndexError, "no such group").into() } /// Creates a TypeError for non-integer sequence indices (getitem). /// /// Matches CPython's format: `TypeError('{type}' indices must be integers, not '{index_type}')` #[must_use] pub(crate) fn type_error_indices(type_str: Type, index_type: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("{type_str} indices must be integers, not '{index_type}'"), ) .into() } /// Creates a TypeError for non-integer list indices (setitem/assignment). /// /// Matches CPython's format: `TypeError('list indices must be integers or slices, not {index_type}')` #[must_use] pub(crate) fn type_error_list_assignment_indices(index_type: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("list indices must be integers or slices, not {index_type}"), ) .into() } /// Creates a NameError for accessing a free variable (nonlocal/closure) before it's assigned. /// /// Matches CPython's format: `NameError: cannot access free variable 'x' where it is not /// associated with a value in enclosing scope` #[must_use] pub(crate) fn name_error_free_variable(name: &str) -> SimpleException { SimpleException::new_msg( Self::NameError, format!("cannot access free variable '{name}' where it is not associated with a value in enclosing scope"), ) } /// Creates a NameError for accessing an undefined variable. /// /// Matches CPython's format: `NameError: name 'x' is not defined` #[must_use] pub(crate) fn name_error(name: &str) -> SimpleException { let mut msg = format!("name '{name}' is not defined"); // add the same suffix as cpython, but only for the modules supported by Monty if matches!(name, "asyncio" | "sys" | "typing" | "types" | "re") { write!(&mut msg, ". Did you forget to import '{name}'?").unwrap(); } SimpleException::new_msg(Self::NameError, msg) } /// Creates an UnboundLocalError for accessing a local variable before assignment. /// /// Matches CPython's format: `UnboundLocalError: cannot access local variable 'x' where it is not associated with a value` #[must_use] pub(crate) fn unbound_local_error(name: &str) -> SimpleException { SimpleException::new_msg( Self::UnboundLocalError, format!("cannot access local variable '{name}' where it is not associated with a value"), ) } /// Creates a ModuleNotFoundError for when a module cannot be found. /// /// Matches CPython's format: `ModuleNotFoundError: No module named 'name'` /// Sets `hide_caret: true` because CPython doesn't show carets for module not found errors. #[must_use] pub(crate) fn module_not_found_error(module_name: &str) -> RunError { let exc = SimpleException::new_msg(Self::ModuleNotFoundError, format!("No module named '{module_name}'")); RunError::Exc(ExceptionRaise { exc, frame: None, hide_caret: true, // CPython doesn't show carets for module not found errors }) } /// Creates a NotImplementedError for an unimplemented Python feature. /// /// Used during parsing when encountering Python syntax that Monty doesn't yet support. /// The message format is: "The monty syntax parser does not yet support {feature}" #[must_use] pub(crate) fn not_implemented(msg: impl fmt::Display) -> SimpleException { SimpleException::new_msg(Self::NotImplementedError, msg) } /// Creates a ZeroDivisionError for division by zero. /// /// Matches CPython 3.14's format: `ZeroDivisionError('division by zero')` #[must_use] pub(crate) fn zero_division() -> SimpleException { SimpleException::new_msg(Self::ZeroDivisionError, "division by zero") } /// Creates an OverflowError for string/sequence repetition with count too large. /// /// Matches CPython's format: `OverflowError('cannot fit 'int' into an index-sized integer')` #[must_use] pub(crate) fn overflow_repeat_count() -> SimpleException { SimpleException::new_msg(Self::OverflowError, "cannot fit 'int' into an index-sized integer") } /// Creates an IndexError for when an integer index is too large to fit in i64. /// /// Matches CPython's format: `IndexError: cannot fit 'int' into an index-sized integer` #[must_use] pub(crate) fn index_error_int_too_large() -> RunError { SimpleException::new_msg(Self::IndexError, "cannot fit 'int' into an index-sized integer").into() } /// Creates an ImportError for when a name cannot be imported from a module. /// /// Matches CPython's format for built-in modules: /// `ImportError: cannot import name 'name' from 'module' (unknown location)` /// /// Sets `hide_caret: true` because CPython doesn't show carets for import errors. #[must_use] pub(crate) fn cannot_import_name(name: &str, module_name: &str) -> RunError { let exc = SimpleException::new_msg( Self::ImportError, format!("cannot import name '{name}' from '{module_name}' (unknown location)"), ); RunError::Exc(ExceptionRaise { exc, frame: None, hide_caret: true, }) } /// Creates a ValueError for negative shift count in bitwise shift operations. /// /// Matches CPython's format: `ValueError: negative shift count` #[must_use] pub(crate) fn value_error_negative_shift_count() -> RunError { SimpleException::new_msg(Self::ValueError, "negative shift count").into() } /// Creates an OverflowError for shift count exceeding integer size. /// /// Matches CPython's format: `OverflowError: Python int too large to convert to C ssize_t` /// Note: CPython uses this message because it tries to convert to ssize_t for the shift amount. #[must_use] pub(crate) fn overflow_shift_count() -> RunError { SimpleException::new_msg(Self::OverflowError, "Python int too large to convert to C ssize_t").into() } /// Creates a TypeError for unsupported binary operations. /// /// For `+` or `+=` with str/list on the left side, uses CPython's special format: /// `can only concatenate {type} (not "{other}") to {type}` /// /// For other cases, uses the generic format: /// `unsupported operand type(s) for {op}: '{left}' and '{right}'` #[must_use] pub(crate) fn binary_type_error(op: &str, lhs_type: Type, rhs_type: Type) -> RunError { let message = if (op == "+" || op == "+=") && (lhs_type == Type::Str || lhs_type == Type::List) { format!("can only concatenate {lhs_type} (not \"{rhs_type}\") to {lhs_type}") } else { format!("unsupported operand type(s) for {op}: '{lhs_type}' and '{rhs_type}'") }; SimpleException::new_msg(Self::TypeError, message).into() } /// Creates a TypeError for unsupported unary operations. /// /// Uses CPython's format: `bad operand type for unary {op}: '{type}'` #[must_use] pub(crate) fn unary_type_error(op: &str, value_type: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("bad operand type for unary {op}: '{value_type}'"), ) .into() } /// Creates a TypeError for functions that require an integer argument. /// /// Matches CPython's format: `TypeError: '{type}' object cannot be interpreted as an integer` #[must_use] pub(crate) fn type_error_not_integer(type_: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("'{type_}' object cannot be interpreted as an integer"), ) .into() } /// Creates a ZeroDivisionError for zero raised to a negative power. /// /// Matches CPython's format: `ZeroDivisionError: zero to a negative power` /// Note: CPython uses the same message for both int and float zero ** negative. #[must_use] pub(crate) fn zero_negative_power() -> RunError { SimpleException::new_msg(Self::ZeroDivisionError, "zero to a negative power").into() } /// Creates an OverflowError for exponents that are too large. /// /// Matches CPython's format: `OverflowError: exponent too large` #[must_use] pub(crate) fn overflow_exponent_too_large() -> RunError { SimpleException::new_msg(Self::OverflowError, "exponent too large").into() } /// Creates a ZeroDivisionError for divmod by zero (both integer and float). /// /// Matches CPython's format: `ZeroDivisionError: division by zero` /// Note: CPython uses the same message for both integer and float divmod. #[must_use] pub(crate) fn divmod_by_zero() -> RunError { SimpleException::new_msg(Self::ZeroDivisionError, "division by zero").into() } /// Creates a TypeError for str.join() when an item is not a string. /// /// Matches CPython's format: `TypeError: sequence item {index}: expected str instance, {type} found` #[must_use] pub(crate) fn type_error_join_item(index: usize, item_type: Type) -> RunError { SimpleException::new_msg( Self::TypeError, format!("sequence item {index}: expected str instance, {item_type} found"), ) .into() } /// Creates a TypeError for str.join() when the argument is not iterable. /// /// Matches CPython's format: `TypeError: can only join an iterable` #[must_use] pub(crate) fn type_error_join_not_iterable() -> RunError { SimpleException::new_msg(Self::TypeError, "can only join an iterable").into() } /// Creates a ValueError for str.index()/str.rindex() when substring is not found. /// /// Matches CPython's format: `ValueError: substring not found` #[must_use] pub(crate) fn value_error_substring_not_found() -> RunError { SimpleException::new_msg(Self::ValueError, "substring not found").into() } /// Creates a ValueError for str.partition()/str.rpartition() with empty separator. /// /// Matches CPython's format: `ValueError: empty separator` #[must_use] pub(crate) fn value_error_empty_separator() -> RunError { SimpleException::new_msg(Self::ValueError, "empty separator").into() } /// Creates a TypeError for fillchar argument that is not a single character. /// /// Matches CPython's format: `TypeError: The fill character must be exactly one character long` #[must_use] pub(crate) fn type_error_fillchar_must_be_single_char() -> RunError { SimpleException::new_msg(Self::TypeError, "The fill character must be exactly one character long").into() } /// Creates a StopIteration exception for when an iterator is exhausted. /// /// Matches CPython's format: `StopIteration` #[must_use] pub(crate) fn stop_iteration() -> RunError { SimpleException::new_none(Self::StopIteration).into() } /// Creates a ValueError for list.index() when item is not found. /// /// Matches CPython's format: `ValueError: list.index(x): x not in list` #[must_use] pub(crate) fn value_error_not_in_list() -> RunError { SimpleException::new_msg(Self::ValueError, "list.index(x): x not in list").into() } /// Creates a ValueError for tuple.index() when item is not found. /// /// Matches CPython's format: `ValueError: tuple.index(x): x not in tuple` #[must_use] pub(crate) fn value_error_not_in_tuple() -> RunError { SimpleException::new_msg(Self::ValueError, "tuple.index(x): x not in tuple").into() } /// Creates a ValueError for list.remove() when item is not found. /// /// Matches CPython's format: `ValueError: list.remove(x): x not in list` #[must_use] pub(crate) fn value_error_remove_not_in_list() -> RunError { SimpleException::new_msg(Self::ValueError, "list.remove(x): x not in list").into() } /// Creates an IndexError for popping from an empty list. /// /// Matches CPython's format: `IndexError: pop from empty list` #[must_use] pub(crate) fn index_error_pop_empty_list() -> RunError { SimpleException::new_msg(Self::IndexError, "pop from empty list").into() } /// Creates an IndexError for list.pop(index) with invalid index. /// /// Matches CPython's format: `IndexError: pop index out of range` #[must_use] pub(crate) fn index_error_pop_out_of_range() -> RunError { SimpleException::new_msg(Self::IndexError, "pop index out of range").into() } /// Creates a KeyError for popping from an empty dict. /// /// Matches CPython's format: `KeyError: 'popitem(): dictionary is empty'` #[must_use] pub(crate) fn key_error_popitem_empty_dict() -> RunError { SimpleException::new_msg(Self::KeyError, "'popitem(): dictionary is empty'").into() } /// Creates a LookupError for unknown encoding. /// /// Matches CPython's format: `LookupError: unknown encoding: {encoding}` #[must_use] pub(crate) fn lookup_error_unknown_encoding(encoding: &str) -> RunError { SimpleException::new_msg(Self::LookupError, format!("unknown encoding: {encoding}")).into() } /// Creates a UnicodeDecodeError for invalid UTF-8 bytes in decode(). /// /// Matches CPython's format: `UnicodeDecodeError: 'utf-8' codec can't decode bytes...` #[must_use] pub(crate) fn unicode_decode_error_invalid_utf8() -> RunError { SimpleException::new_msg( Self::UnicodeDecodeError, "'utf-8' codec can't decode bytes: invalid utf-8 sequence", ) .into() } /// Creates a ValueError for subsequence not found in bytes/str. /// /// Matches CPython's format: `ValueError: subsection not found` #[must_use] pub(crate) fn value_error_subsequence_not_found() -> RunError { SimpleException::new_msg(Self::ValueError, "subsection not found").into() } /// Creates a LookupError for unknown error handler. /// /// Matches CPython's format: `LookupError: unknown error handler name '{name}'` #[must_use] pub(crate) fn lookup_error_unknown_error_handler(name: &str) -> RunError { SimpleException::new_msg(Self::LookupError, format!("unknown error handler name '{name}'")).into() } /// Creates a `re.PatternError` for an invalid regex pattern or unsupported regex feature. /// /// Matches CPython's exception type: `re.PatternError: {message}` #[must_use] pub(crate) fn re_pattern_error(msg: impl fmt::Display) -> RunError { SimpleException::new_msg(Self::RePatternError, msg).into() } } /// Simple lightweight representation of an exception. /// /// This is used for performance reasons for common exception patterns. /// Exception messages use `String` for owned storage. #[derive(Debug, Clone, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct SimpleException { exc_type: ExcType, arg: Option, } impl fmt::Display for SimpleException { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.py_repr_fmt(f) } } impl From for SimpleException { fn from(exc: MontyException) -> Self { Self { exc_type: exc.exc_type(), arg: exc.into_message(), } } } impl SimpleException { /// Creates a new exception with the given type and optional argument message. #[must_use] pub fn new(exc_type: ExcType, arg: Option) -> Self { Self { exc_type, arg } } /// Creates a new exception with the given type and argument message. #[must_use] pub fn new_msg(exc_type: ExcType, arg: impl fmt::Display) -> Self { Self { exc_type, arg: Some(arg.to_string()), } } /// Creates a new exception with the given type and no argument message. #[must_use] pub fn new_none(exc_type: ExcType) -> Self { Self { exc_type, arg: None } } #[must_use] pub fn exc_type(&self) -> ExcType { self.exc_type } #[must_use] pub fn arg(&self) -> Option<&String> { self.arg.as_ref() } /// str() for an exception #[must_use] pub fn py_str(&self) -> String { match (self.exc_type, &self.arg) { // KeyError expecificaly uses repr of the key for str(exc) (ExcType::KeyError, Some(exc)) => StringRepr(exc).to_string(), (_, Some(arg)) => arg.to_owned(), (_, None) => String::new(), } } pub(crate) fn py_type(&self) -> Type { Type::Exception(self.exc_type) } /// Returns the exception formatted as Python would repr it. pub fn py_repr_fmt(&self, f: &mut impl Write) -> std::fmt::Result { let type_str: &'static str = self.exc_type.into(); write!(f, "{type_str}(")?; if let Some(arg) = &self.arg { string_repr_fmt(arg, f)?; } f.write_char(')') } pub(crate) fn with_frame(self, frame: RawStackFrame) -> ExceptionRaise { ExceptionRaise { exc: self, frame: Some(frame), hide_caret: false, } } pub(crate) fn with_position(self, position: CodeRange) -> ExceptionRaise { ExceptionRaise { exc: self, frame: Some(RawStackFrame::from_position(position)), hide_caret: false, } } /// Gets an attribute from this exception. /// /// Handles the `.args` attribute by allocating a tuple containing the message. /// Returns `Err(AttributeError)` for all other attributes. pub fn py_getattr( &self, attr: &EitherStr, heap: &mut Heap, interns: &Interns, ) -> RunResult> { // Fast path: interned strings can be matched by ID let is_args = attr .static_string() .map_or_else(|| attr.as_str(interns) == "args", |ss| ss == StaticStrings::Args); if is_args { // Construct tuple with 0 or 1 elements based on whether arg exists let elements = if let Some(arg_str) = &self.arg { let str_id = heap.allocate(HeapData::Str(Str::from(arg_str.clone())))?; smallvec![Value::Ref(str_id)] } else { smallvec![] }; Ok(Some(CallResult::Value(allocate_tuple(elements, heap)?))) } else { Ok(None) } } } /// A raised exception with optional stack frame for traceback. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ExceptionRaise { pub exc: SimpleException, /// The stack frame where the exception was raised (first in vec is closest "bottom" frame). pub frame: Option, /// Whether to hide the caret marker when creating the stack frame. /// /// CPython doesn't show carets for attribute GET errors, but does show them /// for attribute SET errors. This flag allows error creators to specify /// whether the caret should be hidden. #[serde(default)] pub hide_caret: bool, } impl From for ExceptionRaise { fn from(exc: SimpleException) -> Self { Self { exc, frame: None, hide_caret: false, } } } impl From for ExceptionRaise { fn from(exc: MontyException) -> Self { Self { exc: exc.into(), frame: None, hide_caret: false, } } } impl ExceptionRaise { /// Adds a caller's frame as the outermost frame in the traceback chain. /// /// This is used when an exception propagates up through call frames. /// The new frame becomes the ultimate parent (displayed first in traceback, /// since tracebacks show "most recent call last"). /// /// Special case: If the innermost frame has no name yet (created with `with_position`), /// this sets its name instead of creating a new parent. This happens when the error /// is raised from a namespace lookup - the initial frame has the position but not /// the function name, which gets filled in as the error propagates. pub(crate) fn add_caller_frame(&mut self, position: CodeRange, name: StringId) { self.add_caller_frame_inner(position, name, false); } fn add_caller_frame_inner(&mut self, position: CodeRange, name: StringId, hide_caret: bool) { if let Some(ref mut frame) = self.frame { // If innermost frame has no name, set it instead of adding a parent // This handles errors from namespace lookups which create nameless frames if frame.frame_name.is_none() { frame.frame_name = Some(name); frame.hide_caret = hide_caret; return; } // Find the outermost frame (the one with no parent) and add the new frame as its parent let mut current = frame; while current.parent.is_some() { current = current.parent.as_mut().unwrap(); } let mut new_frame = RawStackFrame::new(position, name, None); new_frame.hide_caret = hide_caret; current.parent = Some(Box::new(new_frame)); } else { // No frame yet - create one let mut new_frame = RawStackFrame::new(position, name, None); new_frame.hide_caret = hide_caret; self.frame = Some(new_frame); } } /// Converts this exception to a `MontyException` for the public API. /// /// Uses `Interns` to resolve `StringId` references to actual strings. /// Extracts preview lines from the source code for traceback display. #[must_use] pub fn into_python_exception(self, interns: &Interns, source: &str) -> MontyException { let traceback = self .frame .map(|frame| { let mut frames = Vec::new(); let mut current = Some(&frame); while let Some(f) = current { frames.push(StackFrame::from_raw(f, interns, source)); current = f.parent.as_deref(); } // Reverse so outermost frame is first (Python's "most recent call last" ordering) frames.reverse(); frames }) .unwrap_or_default(); MontyException::new_full(self.exc.exc_type(), self.exc.arg().cloned(), traceback) } } /// A stack frame for traceback information. /// /// Stores position information and optional function name as StringId. /// The actual name string must be looked up externally when formatting the traceback. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct RawStackFrame { pub position: CodeRange, /// The name of the frame (function name StringId, or None for module-level code). pub frame_name: Option, pub parent: Option>, /// Whether to hide the caret marker in the traceback for this frame. /// /// Set to `true` for: /// - `raise` statements (CPython doesn't show carets for raise) /// - `AttributeError` on attribute access (CPython doesn't show carets for these) pub hide_caret: bool, } impl RawStackFrame { pub(crate) fn new(position: CodeRange, frame_name: StringId, parent: Option<&Self>) -> Self { Self { position, frame_name: Some(frame_name), parent: parent.map(|p| Box::new(p.clone())), hide_caret: false, } } fn from_position(position: CodeRange) -> Self { Self { position, frame_name: None, parent: None, hide_caret: false, } } /// Creates a new frame for a raise statement (no caret will be shown). pub(crate) fn from_raise(position: CodeRange, frame_name: StringId) -> Self { Self { position, frame_name: Some(frame_name), parent: None, hide_caret: true, } } } /// Runtime error types that can occur during execution. /// /// Three variants: /// - `Internal`: Bug in interpreter implementation (static message) /// - `Exc`: Python exception that can be caught by try/except (when implemented) /// - `UncatchableExc`: Python exception from resource limits that CANNOT be caught #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum RunError { /// Internal interpreter error - indicates a bug in Monty, not user code. Internal(Cow<'static, str>), /// Catchable Python exception (e.g., ValueError, TypeError). Exc(ExceptionRaise), /// Uncatchable Python exception from resource limits (MemoryError, TimeoutError). /// /// These exceptions display with proper tracebacks like normal Python exceptions, /// but cannot be caught by try/except blocks. This prevents untrusted code from /// suppressing resource limit violations. UncatchableExc(ExceptionRaise), } impl From for RunError { fn from(exc: ExceptionRaise) -> Self { Self::Exc(exc) } } impl From for RunError { fn from(exc: SimpleException) -> Self { Self::Exc(exc.into()) } } impl From for RunError { fn from(exc: MontyException) -> Self { Self::Exc(exc.into()) } } impl From for RunError { fn from(err: FormatError) -> Self { let exc_type = match &err { FormatError::Overflow(_) => ExcType::OverflowError, FormatError::InvalidAlignment(_) | FormatError::ValueError(_) => ExcType::ValueError, }; Self::Exc(SimpleException::new_msg(exc_type, err).into()) } } impl RunError { /// Converts this runtime error to a `MontyException` for the public API. /// /// Internal errors are converted to `RuntimeError` exceptions with no traceback. #[must_use] pub fn into_python_exception(self, interns: &Interns, source: &str) -> MontyException { match self { Self::Exc(exc) | Self::UncatchableExc(exc) => exc.into_python_exception(interns, source), Self::Internal(err) => MontyException::runtime_error(format!("Internal error in monty: {err}")), } } pub fn internal(msg: impl Into>) -> Self { Self::Internal(msg.into()) } } /// Formats a list of parameter names for error messages. /// /// Examples: /// - `["a"]` -> `'a'` /// - `["a", "b"]` -> `'a' and 'b'` /// - `["a", "b", "c"]` -> `'a', 'b' and 'c'` fn format_param_names(names: &[&str]) -> String { match names.len() { 0 => String::new(), 1 => format!("'{}'", names[0]), 2 => format!("'{}' and '{}'", names[0], names[1]), _ => { let last = names.last().unwrap(); let rest: Vec<_> = names[..names.len() - 1].iter().map(|n| format!("'{n}'")).collect(); format!("{} and '{last}'", rest.join(", ")) } } } ================================================ FILE: crates/monty/src/exception_public.rs ================================================ use std::fmt::{self, Write}; use crate::{ exception_private::{ExcType, RawStackFrame}, intern::Interns, parse::CodeRange, types::str::StringRepr, }; /// Public representation of a Monty exception. #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] pub struct MontyException { /// The exception type raised exc_type: ExcType, /// Optional exception message explaining what went wrong message: Option, /// Stack trace of the exception, first is the outermost frame shown first in the traceback traceback: Vec, } /// Number of identical consecutive frames to show before collapsing. /// /// CPython shows 3 identical frames, then "[Previous line repeated N more times]". const REPEAT_FRAMES_SHOWN: usize = 3; /// Display implementation for MontyException should exactly match python traceback format. impl fmt::Display for MontyException { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Print the traceback header if we have frames if !self.traceback.is_empty() { writeln!(f, "Traceback (most recent call last):")?; } // Print frames, collapsing consecutive identical frames like CPython does let mut i = 0; while i < self.traceback.len() { let frame = &self.traceback[i]; // Count consecutive identical frames let mut repeat_count = 1; while i + repeat_count < self.traceback.len() && frames_are_identical(frame, &self.traceback[i + repeat_count]) { repeat_count += 1; } if repeat_count > REPEAT_FRAMES_SHOWN { // Show first REPEAT_FRAMES_SHOWN frames, then collapse the rest for j in 0..REPEAT_FRAMES_SHOWN { write!(f, "{}", &self.traceback[i + j])?; } let collapsed = repeat_count - REPEAT_FRAMES_SHOWN; writeln!(f, " [Previous line repeated {collapsed} more times]")?; i += repeat_count; } else { // Show all frames in this group for j in 0..repeat_count { write!(f, "{}", &self.traceback[i + j])?; } i += repeat_count; } } if let Some(msg) = &self.message { write!(f, "{}: {}", self.exc_type, msg) } else { write!(f, "{}", self.exc_type) } } } impl std::error::Error for MontyException {} impl MontyException { /// Create a new MontyException with the given exception type and message. /// /// You can't provide a traceback here, it's send when raising the exception. #[must_use] pub fn new(exc_type: ExcType, message: Option) -> Self { Self { exc_type, message, traceback: vec![], } } /// The exception type raised. #[must_use] pub fn exc_type(&self) -> ExcType { self.exc_type } /// Optional exception message explaining what went wrong. /// /// Equivalent of python's `exc.args[0]` #[must_use] pub fn message(&self) -> Option<&str> { self.message.as_deref() } /// Optional exception message explaining what went wrong. /// /// This takes ownership of the MontyException and returns an owned String. /// /// Equivalent of python's `exc.args[0]` #[must_use] pub fn into_message(self) -> Option { self.message } /// Stack trace of the exception, first is the outermost frame shown first in the traceback #[must_use] pub fn traceback(&self) -> &[StackFrame] { &self.traceback } /// Returns a compact summary of the exception. /// /// Format: `ExceptionType: message` (e.g., `NotImplementedError: feature not supported`) /// If there's no message, just returns the exception type name. #[must_use] pub fn summary(&self) -> String { if let Some(msg) = &self.message { format!("{}: {}", self.exc_type, msg) } else { self.exc_type.to_string() } } /// Returns the exception formatted as Python's repr() would display it. /// /// Format: `ExceptionType('message')` (e.g., `ValueError('invalid value')`) /// Uses appropriate quoting for messages containing quotes. #[must_use] pub fn py_repr(&self) -> String { let type_str: &'static str = self.exc_type.into(); if let Some(msg) = &self.message { format!("{}({})", type_str, StringRepr(msg)) } else { format!("{type_str}()") } } pub(crate) fn new_full(exc_type: ExcType, message: Option, traceback: Vec) -> Self { Self { exc_type, message, traceback, } } pub(crate) fn runtime_error(err: impl fmt::Display) -> Self { Self { exc_type: ExcType::RuntimeError, message: Some(err.to_string()), traceback: vec![], } } } /// Check if two stack frames are identical for the purpose of collapsing repeated frames. /// /// Two frames are identical if they have the same filename, line number, and function name. fn frames_are_identical(a: &StackFrame, b: &StackFrame) -> bool { a.filename == b.filename && a.start.line == b.start.line && a.frame_name == b.frame_name } /// A single frame in a Python traceback. /// /// Contains all the information needed to display a traceback line: /// the file location, function name, and optional source code preview. /// /// # Caret Markers /// /// Monty uses only `~` characters for caret markers in tracebacks, unlike CPython 3.11+ /// which uses `~` for the function name and `^` for arguments (e.g., `~~~~~~~~~~~^^^^^^^^^^^`). /// This simplification is intentional - Monty marks the entire expression span uniformly. #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] pub struct StackFrame { /// The filename where the code is located. pub filename: String, /// Start position in the source code. pub start: CodeLoc, /// End position in the source code. pub end: CodeLoc, /// The name of the frame (function name, or None for module-level code). pub frame_name: Option, /// The source code line for preview in the traceback. pub preview_line: Option, /// Whether to hide the caret marker in the traceback for this frame. /// /// Set to `true` for: /// - `raise` statements (CPython doesn't show carets for raise) /// - `AttributeError` on attribute access (CPython doesn't show carets for these) pub hide_caret: bool, /// Whether to hide the `, in ` part of the frame line. /// /// Set to `true` for `SyntaxError` where CPython doesn't show the frame name. /// CPython's SyntaxError format: ` File "...", line N` /// vs runtime error format: ` File "...", line N, in ` pub hide_frame_name: bool, } impl fmt::Display for StackFrame { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SyntaxError format: ` File "...", line N` // Runtime error format: ` File "...", line N, in ` if self.hide_frame_name { write!(f, r#" File "{}", line {}"#, self.filename, self.start.line)?; } else { write!(f, r#" File "{}", line {}, in "#, self.filename, self.start.line)?; if let Some(frame_name) = &self.frame_name { f.write_str(frame_name)?; } else { f.write_str("")?; } } if let Some(line) = &self.preview_line { // Strip leading whitespace like CPython does let trimmed = line.trim_start(); writeln!(f, "\n {trimmed}")?; // Hide caret for raise statements, AttributeError, etc. if !self.hide_caret { let leading_spaces = line.len() - trimmed.len(); // Calculate caret position relative to the trimmed line // Column is 1-indexed, so subtract 1, then subtract leading spaces we stripped let caret_start = if self.start.column as usize > leading_spaces { 4 + self.start.column as usize - leading_spaces - 1 } else { 4 }; f.write_str(&" ".repeat(caret_start))?; writeln!(f, "{}", "~".repeat((self.end.column - self.start.column) as usize))?; } } else { f.write_char('\n')?; } Ok(()) } } impl StackFrame { pub(crate) fn from_raw(f: &RawStackFrame, interns: &Interns, source: &str) -> Self { let filename = interns.get_str(f.position.filename).to_string(); Self { filename, start: f.position.start(), end: f.position.end(), frame_name: f.frame_name.map(|id| interns.get_str(id).to_string()), preview_line: f .position .preview_line_number() .and_then(|ln| source.lines().nth(ln as usize)) .map(str::to_string), hide_caret: f.hide_caret, hide_frame_name: false, } } /// Creates a `StackFrame` from a `CodeRange` for SyntaxError. /// /// Sets `hide_frame_name: true` because CPython's SyntaxError format doesn't /// show the `, in ` part. pub(crate) fn from_position_syntax_error(position: CodeRange, filename: &str, source: &str) -> Self { Self { filename: filename.to_string(), start: position.start(), end: position.end(), frame_name: None, preview_line: position .preview_line_number() .and_then(|ln| source.lines().nth(ln as usize)) .map(str::to_string), hide_caret: false, hide_frame_name: true, } } pub(crate) fn from_position(position: CodeRange, filename: &str, source: &str) -> Self { Self { filename: filename.to_string(), start: position.start(), end: position.end(), frame_name: None, preview_line: position .preview_line_number() .and_then(|ln| source.lines().nth(ln as usize)) .map(str::to_string), hide_caret: false, hide_frame_name: false, } } /// Creates a `StackFrame` from a `CodeRange` without caret markers. /// /// Used for errors like `ImportError` where CPython doesn't show caret markers. pub(crate) fn from_position_no_caret(position: CodeRange, filename: &str, source: &str) -> Self { Self { filename: filename.to_string(), start: position.start(), end: position.end(), frame_name: None, preview_line: position .preview_line_number() .and_then(|ln| source.lines().nth(ln as usize)) .map(str::to_string), hide_caret: true, hide_frame_name: false, } } } /// A line and column position in source code. /// /// Uses 1-based indexing for both line and column to match Python's conventions. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub struct CodeLoc { /// Line number (1-based). pub line: u16, /// Column number (1-based). pub column: u16, } impl Default for CodeLoc { fn default() -> Self { Self { line: 1, column: 1 } } } impl CodeLoc { /// Creates a new CodeLoc from usize values. /// /// Lines and columns numbers are 1-indexed for display, hence `+1` /// /// # Panics /// Panics if the line or column number overflows `u16`. #[must_use] pub fn new(line: usize, column: usize) -> Self { Self { line: u16::try_from(line).expect("Line number overflow") + 1, column: u16::try_from(column).expect("Column number overflow") + 1, } } } ================================================ FILE: crates/monty/src/expressions.rs ================================================ use crate::{ args::ArgExprs, builtins::Builtins, fstring::FStringPart, intern::{BytesId, LongIntId, StringId}, namespace::NamespaceId, parse::{CodeRange, ParsedSignature, Try}, signature::Signature, value::{EitherStr, Marker, Value}, }; /// Indicates which namespace a variable reference belongs to. /// /// This is determined at prepare time based on Python's scoping rules: /// - Variables assigned in a function are Local (unless declared `global`) /// - Variables only read (not assigned) that exist at module level are Global /// - The `global` keyword explicitly marks a variable as Global /// - Variables declared `nonlocal` or implicitly captured from enclosing scopes /// are accessed through Cells #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] pub enum NameScope { /// Variable is in the current frame's local namespace (assigned somewhere in this function). /// /// If accessed before assignment, raises `UnboundLocalError`. #[default] Local, /// Variable reference that doesn't exist in any scope. /// /// A local slot is allocated but never assigned. Accessing raises `NameError` /// (not `UnboundLocalError`) because the name was never defined anywhere. LocalUnassigned, /// Variable is in the module-level global namespace Global, /// Variable accessed through a cell (heap-allocated container). /// /// Used for both: /// - Variables captured from enclosing scopes (free variables) /// - Variables in this function that are captured by nested functions (cell variables) /// /// The namespace slot contains `Value::Ref(cell_id)` pointing to a `HeapData::Cell`. /// Access requires dereferencing through the cell. Cell, } /// An identifier (variable or function name) with source location and scope information. /// /// The name is stored as a `StringId` which indexes into the string interner. /// To get the actual string, look it up in the `Interns` storage. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub struct Identifier { pub position: CodeRange, /// Interned name ID - look up in Interns to get the actual string. pub name_id: StringId, opt_namespace_id: Option, /// Which namespace this identifier refers to (determined at prepare time) pub scope: NameScope, } impl Identifier { /// Creates a new identifier with unknown scope (to be resolved during prepare phase). pub fn new(name_id: StringId, position: CodeRange) -> Self { Self { name_id, position, opt_namespace_id: None, scope: NameScope::Local, } } /// Creates a new identifier with resolved namespace index and explicit scope. pub fn new_with_scope(name_id: StringId, position: CodeRange, namespace_id: NamespaceId, scope: NameScope) -> Self { Self { name_id, position, opt_namespace_id: Some(namespace_id), scope, } } pub fn namespace_id(&self) -> NamespaceId { self.opt_namespace_id .expect("Identifier not prepared with namespace_id") } } /// Target of a function call expression. /// /// Represents a callable that can be either: /// - A builtin function or exception resolved at parse time (`print`, `len`, `ValueError`, etc.) /// - A name that will be looked up in the namespace at runtime (for callable variables) /// /// Separate from Value to allow deriving Clone without Value's Clone restrictions. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub enum Callable { /// A builtin function like `print`, `len`, `str`, etc. Builtin(Builtins), /// A name to be looked up in the namespace at runtime (e.g., `x` in `x = len; x('abc')`). Name(Identifier), } /// An item in a list, tuple, or set literal. /// /// PEP 448 allows any number of `*expr` unpack items to appear alongside /// regular values in list/tuple/set literals (e.g., `[1, *a, 2]`). /// This enum represents either a plain value or an iterable to be unpacked. /// /// Used in `Expr::List`, `Expr::Tuple`, and `Expr::Set` to represent each /// element of the literal. When the fast path is taken (no unpack items), /// only `Value` variants are present and the compiler emits a single /// `BuildList`/`BuildTuple`/`BuildSet` instruction. When any `Unpack` item /// is present, the compiler emits `Build*(0)` followed by per-item /// `ListAppend`/`SetAdd` and `ListExtend`/`SetExtend` instructions. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) enum SequenceItem { /// A plain expression value in the literal. Value(ExprLoc), /// An `*expr` unpack — the iterable is expanded in-place. Unpack(ExprLoc), } /// An item in a dict literal. /// /// PEP 448 allows `**expr` unpack items to appear alongside normal key:value /// pairs in dict literals (e.g., `{'a': 1, **d, 'b': 2}`). Duplicate keys /// from later items silently overwrite earlier ones (unlike `**kwargs` in /// function calls, where duplicates raise `TypeError`). /// /// Used in `Expr::Dict`. When no `Unpack` items are present the compiler /// emits a single `BuildDict` instruction. Otherwise it emits `BuildDict(0)` /// followed by per-item `DictSetItem` and `DictUpdate` instructions. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) enum DictItem { /// A plain `key: value` pair. Pair(ExprLoc, ExprLoc), /// A `**expr` unpack — the mapping is merged in-place, later keys win. Unpack(ExprLoc), } /// An expression in the AST. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Expr { Literal(Literal), Builtin(Builtins), Name(Identifier), /// Function call expression. /// /// The `callable` can be a Builtin, ExcType (resolved at parse time), or a Name /// that will be looked up in the namespace at runtime. Call { callable: Callable, /// ArgExprs is relatively large and would require Box anyway since it uses ExprLoc, so keep Expr small /// by using a box here args: Box, }, /// Method call on an object (e.g., `obj.method(args)`). /// /// The object expression is evaluated first, then the method is looked up /// and called with the given arguments. Supports chained attribute access /// like `a.b.c.method()`. AttrCall { object: Box, attr: EitherStr, /// same as above for Box args: Box, }, /// Expression call (e.g., `(lambda x: x + 1)(5)` or `get_func()(args)`). /// /// Calls an arbitrary expression as a callable. The callable expression /// is evaluated first, then called with the given arguments. IndirectCall { /// The expression that evaluates to a callable. callable: Box, args: Box, }, /// Attribute access expression (e.g., `point.x` or `a.b.c`). /// /// Retrieves the value of an attribute from an object. For dataclasses, /// this returns the field value. For other types, this may trigger /// special attribute handling. Supports chained attribute access. AttrGet { object: Box, attr: EitherStr, }, Op { left: Box, op: Operator, right: Box, }, CmpOp { left: Box, op: CmpOperator, right: Box, }, /// Chain comparison expression: `a < b < c < d` /// /// Unlike single comparisons, chain comparisons evaluate intermediate values /// only once and short-circuit on the first false result. Compiled to bytecode /// that uses stack manipulation (Dup, Rot) rather than temporary variables, /// avoiding namespace pollution. ChainCmp { /// The leftmost operand in the chain. left: Box, /// Sequence of (operator, operand) pairs: `[(op1, b), (op2, c), ...]` comparisons: Vec<(CmpOperator, ExprLoc)>, }, /// List literal: `[a, *b, c]` /// /// Each element is a `SequenceItem` which may be a plain value or an `*unpack`. /// When no unpack items are present (common case), the compiler emits a single /// `BuildList(N)`. When any unpack is present it emits `BuildList(0)` followed /// by per-item `ListAppend`/`ListExtend` instructions. List(Vec), /// Tuple literal: `(a, *b, c)` or `a, *b, c` /// /// Same compilation strategy as `List` but ends with `ListToTuple`. Tuple(Vec), Subscript { object: Box, index: Box, }, /// Slice literal expression from `x[start:stop:step]` syntax. /// /// Each component is optional (None means use the default for that position). /// This expression creates a `slice` object when evaluated. Slice { lower: Option>, upper: Option>, step: Option>, }, /// Dict literal: `{'a': 1, **d, 'b': 2}` /// /// Each element is a `DictItem` which may be a plain `key: value` pair or a `**unpack`. /// When no unpack items are present the compiler emits `BuildDict(N)`. Otherwise it /// emits `BuildDict(0)` followed by per-item `DictSetItem`/`DictUpdate` instructions. /// Duplicate keys from later items silently overwrite earlier ones. Dict(Vec), /// Set literal expression: `{1, *a, 2}`. /// /// Note: `{}` is always a dict, not an empty set. Use `set()` for empty sets. /// Compilation strategy mirrors `List` but uses `SetAdd`/`SetExtend`. Set(Vec), /// Unary `not` expression - evaluates to the boolean negation of the operand's truthiness. Not(Box), /// Unary minus expression - negates a numeric value. UnaryMinus(Box), /// Unary plus expression - returns value as-is for numbers, converts bools to int. UnaryPlus(Box), /// Unary bitwise NOT expression - inverts all bits of an integer. UnaryInvert(Box), /// Await expression - suspends execution until the awaited value resolves. /// /// Can await `ExternalFuture`, `Coroutine`, or `GatherFuture` values. /// Raises `TypeError` for non-awaitable values. /// Unlike standard Python, `await` is allowed at module level (like Jupyter notebooks). Await(Box), /// F-string expression containing literal and interpolated parts. /// /// At evaluation time, each part is processed in sequence: /// - Literal parts are used directly /// - Interpolation parts have their expression evaluated, converted, and formatted /// /// The results are concatenated to produce the final string. FString(Vec), /// Conditional expression (ternary operator): `body if test else orelse` /// /// Only one of body/orelse is evaluated based on the truthiness of test. /// This implements short-circuit evaluation - the branch not taken is never executed. IfElse { test: Box, body: Box, orelse: Box, }, /// List comprehension: `[elt for target in iter if cond...]` /// /// Builds a new list by iterating and optionally filtering. Loop variables /// are scoped to the comprehension and do not leak to the enclosing scope. ListComp { elt: Box, generators: Vec, }, /// Set comprehension: `{elt for target in iter if cond...}` /// /// Builds a new set by iterating and optionally filtering. Duplicate values /// are deduplicated. Loop variables are scoped to the comprehension. SetComp { elt: Box, generators: Vec, }, /// Dict comprehension: `{key: value for target in iter if cond...}` /// /// Builds a new dict by iterating and optionally filtering. Later values /// overwrite earlier ones for duplicate keys. Loop variables are scoped /// to the comprehension. DictComp { key: Box, value: Box, generators: Vec, }, /// Raw lambda expression from the parser, before preparation. /// /// This variant is produced during parsing and contains unprepared data. /// During the prepare phase, it gets converted to `Expr::Lambda` with a /// fully prepared `PreparedFunctionDef`. LambdaRaw { /// The interned `` name ID. name_id: StringId, /// The parsed lambda signature (parameters and defaults). signature: ParsedSignature, /// The lambda body expression (not yet prepared). body: Box, }, /// Lambda expression: `lambda args: body` (prepared form). /// /// A lambda is an anonymous function that returns a single expression. /// It's compiled identically to a regular function, but with the name `` /// and an implicit return of the body expression. The resulting function value /// stays on the stack as an expression result (not stored to a name). Lambda { /// The prepared function definition containing signature, body, and closure info. /// The body is wrapped as `[Node::Return(body_expr)]` during preparation. func_def: Box, }, /// Named expression (walrus operator): `(target := value)` /// /// Evaluates `value`, assigns it to `target`, and returns the value as the /// expression result. The target is treated as an assignment for scope analysis, /// so it creates a local binding in the enclosing scope. /// /// Per PEP 572, in comprehensions the target binds in the enclosing scope, /// not the comprehension's implicit scope. Named { target: Identifier, value: Box, }, } /// Target for tuple unpacking - can be a single name, nested tuple, or starred target. /// /// Supports recursive structures like `(a, b), c` or `a, (b, c)`. /// Also supports starred targets like `first, *rest = [1, 2, 3, 4]`. /// Used in assignment statements, for loop targets, and comprehension targets. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum UnpackTarget { /// Single identifier: `a` Name(Identifier), /// Nested tuple: `(a, b)` - can contain further nested tuples Tuple { /// The targets to unpack into (can be names or nested tuples) targets: Vec, /// Source position covering all targets (for error caret placement) position: CodeRange, }, /// Starred target: `*rest` - captures remaining values into a list. /// /// Only one starred target is allowed per unpacking level. Starred(Identifier), } /// A generator clause in a comprehension: `for target in iter [if cond1] [if cond2]...` /// /// Represents one `for` clause with zero or more `if` filters. Multiple generators /// create nested iteration (the rightmost varies fastest). #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Comprehension { /// Loop variable - either single identifier or tuple unpacking pattern. pub target: UnpackTarget, /// Iterable expression to loop over. pub iter: ExprLoc, /// Zero or more filter conditions (all must be truthy for the element to be included). pub ifs: Vec, } impl Expr { pub fn is_none(&self) -> bool { matches!(self, Self::Literal(Literal::None)) } } /// Represents values that can be produced purely from the parser/prepare pipeline. /// /// Const values are intentionally detached from the runtime heap so we can keep /// parse-time transformations (constant folding, namespace seeding, etc.) free from /// reference-count semantics. Only once execution begins are these literals turned /// into real `Value`s that participate in the interpreter's runtime rules. /// /// Note: unlike the AST `Constant` type, we store tuples only as expressions since they /// can't always be recorded as constants. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub enum Literal { Ellipsis, None, Bool(bool), Int(i64), Float(f64), /// An interned string literal. The StringId references the string in the Interns table. Str(StringId), /// An interned bytes literal. The BytesId references the bytes in the Interns table. Bytes(BytesId), /// An interned long integer literal. The `LongIntId` references the value in the Interns table. /// Used for integer literals that exceed the i64 range. LongInt(LongIntId), /// A marker value (e.g., typing constructs like Any, Optional, etc.). Marker(Marker), } impl From for Value { /// Converts the literal into its runtime `Value` counterpart. /// /// This is the only place parse-time data crosses the boundary into runtime /// semantics, ensuring every literal follows the same conversion path. fn from(literal: Literal) -> Self { match literal { Literal::Ellipsis => Self::Ellipsis, Literal::None => Self::None, Literal::Bool(b) => Self::Bool(b), Literal::Int(v) => Self::Int(v), Literal::Float(v) => Self::Float(v), Literal::Str(string_id) => Self::InternString(string_id), Literal::Bytes(bytes_id) => Self::InternBytes(bytes_id), Literal::LongInt(long_int_id) => Self::InternLongInt(long_int_id), Literal::Marker(marker) => Self::Marker(marker), } } } /// An expression with its source location. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ExprLoc { pub position: CodeRange, pub expr: Expr, } impl ExprLoc { pub fn new(position: CodeRange, expr: Expr) -> Self { Self { position, expr } } } /// An AST node parameterized by the function definition type. /// /// This generic enum represents statements in both parsed and prepared forms: /// - `Node` (aka `ParseNode`): Output of the parser, contains unprepared function bodies /// - `Node` (aka `PreparedNode`): Output of prepare phase, has resolved names /// /// Some variants (`Pass`, `Global`, `Nonlocal`) only appear in parsed form and are filtered /// out during the prepare phase. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Node { /// No-op statement. Only present in parsed form, filtered out during prepare. Pass, Expr(ExprLoc), Return(ExprLoc), ReturnNone, Raise(Option), Assert { test: ExprLoc, msg: Option, }, Assign { target: Identifier, object: ExprLoc, }, /// Tuple unpacking assignment (e.g., `a, b = some_tuple` or `(a, b), c = nested`). /// /// The right-hand side is evaluated, then unpacked into the targets in order. /// Supports nested unpacking like `(a, b), c = ((1, 2), 'x')`. UnpackAssign { /// The targets to unpack into (can be names or nested tuples) targets: Vec, /// Source position covering all targets (for error message caret placement) targets_position: CodeRange, object: ExprLoc, }, OpAssign { target: Identifier, op: Operator, object: ExprLoc, }, /// Augmented subscript assignment (e.g., `totals[key] += value`). /// /// This evaluates the container and index exactly once, then performs the /// inplace operation on the current item before storing the result back. /// Limiting duplicate evaluation is important because index expressions may /// have side effects and CPython only evaluates them once. SubscriptOpAssign { target: Identifier, index: ExprLoc, op: Operator, object: ExprLoc, /// Position of the subscript expression (e.g., `totals[key]`) for traceback carets. target_position: CodeRange, }, SubscriptAssign { target: Identifier, index: ExprLoc, value: ExprLoc, /// Position of the subscript expression (e.g., `lst[10]`) for traceback carets. target_position: CodeRange, }, /// Attribute assignment (e.g., `point.x = 5` or `a.b.c = 5`). /// /// Assigns a value to an attribute on an object. For mutable dataclasses, /// this sets the field value. Returns an error for immutable objects. /// Supports chained attribute access on the left-hand side. AttrAssign { object: ExprLoc, attr: EitherStr, target_position: CodeRange, value: ExprLoc, }, For { /// Loop target - either a single identifier or tuple unpacking pattern. target: UnpackTarget, iter: ExprLoc, body: Vec, or_else: Vec, }, /// While loop statement: `while test: body [else: orelse]` /// /// Executes body repeatedly while test is truthy. If the loop exits normally /// (not via break), the else block runs. While { test: ExprLoc, body: Vec, or_else: Vec, }, /// Break statement - exits the innermost loop. /// /// When executed, control flow jumps past the loop's else block (if any). /// Must be inside a loop, otherwise a `SyntaxError` is raised at compile time. Break { position: CodeRange, }, /// Continue statement - jumps to the next iteration of the innermost loop. /// /// When executed, control flow jumps back to the loop's iterator advancement. /// Must be inside a loop, otherwise a `SyntaxError` is raised at compile time. Continue { position: CodeRange, }, If { test: ExprLoc, body: Vec, or_else: Vec, }, FunctionDef(F), /// Global variable declaration. Only present in parsed form, consumed during prepare. /// /// Declares that the listed names refer to module-level (global) variables, /// allowing functions to read and write them instead of creating local variables. Global { position: CodeRange, names: Vec, }, /// Nonlocal variable declaration. Only present in parsed form, consumed during prepare. /// /// Declares that the listed names refer to variables in enclosing function scopes, /// allowing nested functions to read and write them instead of creating local variables. Nonlocal { position: CodeRange, names: Vec, }, /// Try/except/else/finally block. /// /// Executes body, catches matching exceptions with handlers, runs else if no exception, /// and always runs finally. Try(Try), /// Import statement (e.g., `import sys`, `import sys as s`). /// /// Loads a module and binds it to a name in the current namespace. Import { /// The module name to import (e.g., "sys", "typing"). module_name: StringId, /// The binding target - contains the name (or alias), position, and namespace slot. /// After prepare phase, this includes the resolved namespace slot for storing the module. binding: Identifier, }, /// From-import statement (e.g., `from typing import TYPE_CHECKING`). /// /// Imports specific names from a module into the current namespace. ImportFrom { /// The module name to import from (e.g., "typing"). module_name: StringId, /// Names to import: (import_name, binding) pairs. /// The import_name is the name in the module, the binding is the local name /// (alias if provided, otherwise the import name) with resolved namespace slot. names: Vec<(StringId, Identifier)>, /// Source position for error reporting. position: CodeRange, }, } /// A prepared function definition with resolved names and scope information. /// /// This is created during the prepare phase and contains everything needed to /// compile the function to bytecode. The function body has all names resolved /// to namespace indices with proper scoping. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct PreparedFunctionDef { /// The function name identifier with resolved namespace index. pub name: Identifier, /// The function signature with parameter names and default counts. pub signature: Signature, /// The prepared function body with resolved names. pub body: Vec>, /// Number of local variable slots needed in the namespace. pub namespace_size: usize, /// Enclosing namespace slots for variables captured from enclosing scopes. /// /// At definition time: look up cell HeapId from enclosing namespace at each slot. /// At call time: captured cells are pushed sequentially (our slots are implicit). pub free_var_enclosing_slots: Vec, /// Number of cell variables (captured by nested functions). /// /// At call time, this many cells are created and pushed right after params. /// Their slots are implicitly params.len()..params.len()+cell_var_count. pub cell_var_count: usize, /// Maps cell variable indices to their corresponding parameter indices, if any. /// /// When a parameter is also captured by nested functions (cell variable), its value /// must be copied into the cell after binding. Each entry corresponds to a cell /// (index 0..cell_var_count), and contains `Some(param_index)` if that cell is for /// a parameter, or `None` otherwise. pub cell_param_indices: Vec>, /// Prepared default value expressions, evaluated at function definition time. /// /// Layout: `[pos_defaults...][arg_defaults...][kwarg_defaults...]` /// Each group contains only the parameters that have defaults, in declaration order. /// The counts in `signature` indicate how many defaults exist for each group. pub default_exprs: Vec, /// Whether this is an async function (`async def`). /// /// When true, calling this function creates a `Coroutine` object instead of /// immediately pushing a frame. pub is_async: bool, } /// Type alias for prepared AST nodes (output of prepare phase). pub type PreparedNode = Node; /// Binary operators for arithmetic, bitwise, and boolean operations. /// /// Uses strum `Display` derive with per-variant serialization for operator symbols. #[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] pub enum Operator { // `+` Add, // `-` Sub, // `*` Mult, // `@` MatMult, // `/` Div, // `%` Mod, // `**` Pow, // `<<` LShift, // `>>` RShift, // `|` BitOr, // `^` BitXor, // `&` BitAnd, // `//` FloorDiv, // bool operators // `and` And, // `or` Or, } /// Defined separately since these operators always return a bool #[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] pub enum CmpOperator { Eq, NotEq, Lt, LtE, Gt, GtE, Is, IsNot, In, NotIn, // we should support floats too, either via a Number type, or ModEqInt and ModEqFloat ModEq(i64), } ================================================ FILE: crates/monty/src/fstring.rs ================================================ //! F-string type definitions and formatting functions. //! //! This module contains the AST types for f-strings (formatted string literals) //! and the runtime formatting functions used by the bytecode VM. //! //! F-strings can contain literal text and interpolated expressions with optional //! conversion flags (`!s`, `!r`, `!a`) and format specifications. use std::str::FromStr; use crate::{ bytecode::VM, exception_private::{ExcType, RunError, SimpleException}, expressions::ExprLoc, intern::StringId, resource::ResourceTracker, types::{PyTrait, Type}, value::Value, }; // ============================================================================ // F-string type definitions // ============================================================================ /// Conversion flags for f-string interpolations. /// /// These control how the value is converted to string before formatting: /// - `None`: Use default string conversion (equivalent to `str()`) /// - `Str` (`!s`): Explicitly call `str()` /// - `Repr` (`!r`): Call `repr()` for debugging representation /// - `Ascii` (`!a`): Call `ascii()` for ASCII-safe representation #[derive(Debug, Clone, Copy, Default, PartialEq, serde::Serialize, serde::Deserialize)] pub enum ConversionFlag { #[default] None, /// `!s` - convert using `str()` Str, /// `!r` - convert using `repr()` Repr, /// `!a` - convert using `ascii()` (escapes non-ASCII characters) Ascii, } /// A single part of an f-string. /// /// F-strings are composed of literal text segments and interpolated expressions. /// For example, `f"Hello {name}!"` has three parts: /// - `Literal(interned_hello)` (StringId for "Hello ") /// - `Interpolation { expr: name, ... }` /// - `Literal(interned_exclaim)` (StringId for "!") #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum FStringPart { /// Literal text segment (e.g., "Hello " in `f"Hello {name}"`) /// The StringId references the interned string in the Interns table. Literal(StringId), /// Interpolated expression with optional conversion and format spec Interpolation { /// The expression to evaluate expr: Box, /// Conversion flag: `None`, `!s` (str), `!r` (repr), `!a` (ascii) conversion: ConversionFlag, /// Optional format specification (can contain nested interpolations) format_spec: Option, /// Debug prefix for `=` specifier (e.g., "a=" for f'{a=}', " a = " for f'{ a = }'). /// When present, this text is prepended to the output and repr conversion is used /// by default (unless an explicit conversion is specified). debug_prefix: Option, }, } /// Format specification for f-string interpolations. /// /// Can be either a pre-parsed static spec or contain nested interpolations. /// For example: /// - `f"{value:>10}"` has `FormatSpec::Static(ParsedFormatSpec { ... })` /// - `f"{value:{width}}"` has `FormatSpec::Dynamic` with the `width` variable #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum FormatSpec { /// Pre-parsed static format spec (e.g., ">10s", ".2f") /// /// Parsing happens at parse time to avoid runtime string parsing overhead. /// Invalid specs cause a parse error immediately. Static(ParsedFormatSpec), /// Dynamic format spec with nested f-string parts /// /// These must be evaluated at runtime, then parsed into a `ParsedFormatSpec`. Dynamic(Vec), } /// Parsed format specification following Python's format mini-language. /// /// Format: `[[fill]align][sign][z][#][0][width][grouping_option][.precision][type]` /// /// This struct is parsed at parse time for static format specs, avoiding runtime /// string parsing. For dynamic format specs, parsing happens after evaluation. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct ParsedFormatSpec { /// Fill character for padding (default: space) pub fill: char, /// Alignment: '<' (left), '>' (right), '^' (center), '=' (sign-aware) pub align: Option, /// Sign handling: '+' (always), '-' (negative only), ' ' (space for positive) pub sign: Option, /// Whether to zero-pad numbers pub zero_pad: bool, /// Minimum field width pub width: usize, /// Precision for floats or max width for strings pub precision: Option, /// Type character: 's', 'd', 'f', 'e', 'g', etc. pub type_char: Option, } impl FromStr for ParsedFormatSpec { type Err = String; /// Parses a format specification string into its components. /// /// Returns an error if the specifier contains invalid or unrecognized characters. /// The error includes the original specifier for use in error messages. fn from_str(spec: &str) -> Result { if spec.is_empty() { return Ok(Self { fill: ' ', ..Default::default() }); } let mut result = Self { fill: ' ', ..Default::default() }; let mut chars = spec.chars().peekable(); // Parse fill and align: [[fill]align] let first = chars.peek().copied(); let second_pos = spec.chars().nth(1); if let Some(second) = second_pos { if matches!(second, '<' | '>' | '^' | '=') { // First char is fill, second is align result.fill = first.unwrap_or(' '); chars.next(); result.align = chars.next(); } else if matches!(first, Some('<' | '>' | '^' | '=')) { result.align = chars.next(); } } else if matches!(first, Some('<' | '>' | '^' | '=')) { result.align = chars.next(); } // Parse sign: +, -, or space if matches!(chars.peek(), Some('+' | '-' | ' ')) { result.sign = chars.next(); } // Skip '#' (alternate form) for now if chars.peek() == Some(&'#') { chars.next(); } // Parse zero-padding flag (must come before width) if chars.peek() == Some(&'0') { result.zero_pad = true; chars.next(); } // Parse width let mut width_str = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_digit() { width_str.push(c); chars.next(); } else { break; } } if !width_str.is_empty() { result.width = width_str.parse().unwrap_or(0); } // Skip grouping option (comma or underscore) if matches!(chars.peek(), Some(',' | '_')) { chars.next(); } // Parse precision: .N if chars.peek() == Some(&'.') { chars.next(); let mut prec_str = String::new(); while let Some(&c) = chars.peek() { if c.is_ascii_digit() { prec_str.push(c); chars.next(); } else { break; } } if !prec_str.is_empty() { result.precision = Some(prec_str.parse().unwrap_or(0)); } } // Parse type character: s, d, f, e, g, etc. if let Some(&c) = chars.peek() && matches!( c, 's' | 'd' | 'f' | 'F' | 'e' | 'E' | 'g' | 'G' | 'n' | '%' | 'b' | 'o' | 'x' | 'X' | 'c' ) { result.type_char = Some(c); chars.next(); } // Error if there are any unconsumed characters if chars.peek().is_some() { return Err(spec.to_owned()); } Ok(result) } } // ============================================================================ // Format errors // ============================================================================ /// Error type for format specification failures. /// /// These errors are returned from formatting functions and should be converted /// to appropriate Python exceptions (usually ValueError) by the VM. #[derive(Debug, Clone)] pub enum FormatError { /// Invalid alignment for the given type (e.g., '=' alignment on strings). InvalidAlignment(String), /// Value out of range (e.g., character code > 0x10FFFF). Overflow(String), /// Generic value error (e.g., invalid base, invalid Unicode). ValueError(String), } impl std::fmt::Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::InvalidAlignment(msg) | Self::Overflow(msg) | Self::ValueError(msg) => { write!(f, "{msg}") } } } } /// Formats a value according to a format specification, applying type-appropriate formatting. /// /// Dispatches to the appropriate formatting function based on the value type and format spec: /// - Integers: `format_int`, `format_int_base`, `format_char` /// - Floats: `format_float_f`, `format_float_e`, `format_float_g`, `format_float_percent` /// - Strings: `format_string` /// /// Returns a `ValueError` if the format type character is incompatible with the value type. pub fn format_with_spec( value: &Value, spec: &ParsedFormatSpec, vm: &VM<'_, '_, impl ResourceTracker>, ) -> Result { let value_type = value.py_type(vm.heap); match (value, spec.type_char) { // Integer formatting (Value::Int(n), None | Some('d')) => Ok(format_int(*n, spec)), (Value::Int(n), Some('b')) => Ok(format_int_base(*n, 2, spec)?), (Value::Int(n), Some('o')) => Ok(format_int_base(*n, 8, spec)?), (Value::Int(n), Some('x')) => Ok(format_int_base(*n, 16, spec)?), (Value::Int(n), Some('X')) => Ok(format_int_base(*n, 16, spec)?.to_uppercase()), (Value::Int(n), Some('c')) => Ok(format_char(*n, spec)?), // Float formatting (Value::Float(f), None | Some('g' | 'G')) => Ok(format_float_g(*f, spec)), (Value::Float(f), Some('f' | 'F')) => Ok(format_float_f(*f, spec)), (Value::Float(f), Some('e')) => Ok(format_float_e(*f, spec, false)), (Value::Float(f), Some('E')) => Ok(format_float_e(*f, spec, true)), (Value::Float(f), Some('%')) => Ok(format_float_percent(*f, spec)), // Int to float formatting (Python allows this) (Value::Int(n), Some('f' | 'F')) => Ok(format_float_f(*n as f64, spec)), (Value::Int(n), Some('e')) => Ok(format_float_e(*n as f64, spec, false)), (Value::Int(n), Some('E')) => Ok(format_float_e(*n as f64, spec, true)), (Value::Int(n), Some('g' | 'G')) => Ok(format_float_g(*n as f64, spec)), (Value::Int(n), Some('%')) => Ok(format_float_percent(*n as f64, spec)), // String formatting (including InternString and heap strings) (_, None | Some('s')) if value_type == Type::Str => { let s = value.py_str(vm); Ok(format_string(&s, spec)?) } // Bool as int (Value::Bool(b), Some('d')) => Ok(format_int(i64::from(*b), spec)), // No type specifier: convert to string and format (_, None) => { let s = value.py_str(vm); Ok(format_string(&s, spec)?) } // Type mismatch errors (_, Some(c)) => Err(SimpleException::new_msg( ExcType::ValueError, format!("Unknown format code '{c}' for object of type '{value_type}'"), ) .into()), } } /// Encodes a ParsedFormatSpec into a u64 for storage in bytecode constants. /// /// Encoding layout (fits in 48 bits): /// - bits 0-7: fill character (as ASCII, default space=32) /// - bits 8-10: align (0=none, 1='<', 2='>', 3='^', 4='=') /// - bits 11-12: sign (0=none, 1='+', 2='-', 3=' ') /// - bit 13: zero_pad /// - bits 14-29: width (16 bits, max 65535) /// - bits 30-45: precision (16 bits, using 0xFFFF as "no precision") /// - bits 46-50: type_char (0=none, 1-15=explicit type mapping: b,c,d,e,E,f,F,g,G,n,o,s,x,X,%) pub fn encode_format_spec(spec: &ParsedFormatSpec) -> u64 { let fill = spec.fill as u64; let align = match spec.align { None => 0u64, Some('<') => 1, Some('>') => 2, Some('^') => 3, Some('=') => 4, Some(_) => 0, }; let sign = match spec.sign { None => 0u64, Some('+') => 1, Some('-') => 2, Some(' ') => 3, Some(_) => 0, }; let zero_pad = u64::from(spec.zero_pad); let width = spec.width as u64; let precision = spec.precision.map_or(0xFFFFu64, |p| p as u64); let type_char = spec.type_char.map_or(0u64, |c| match c { 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'E' => 5, 'f' => 6, 'F' => 7, 'g' => 8, 'G' => 9, 'n' => 10, 'o' => 11, 's' => 12, 'x' => 13, 'X' => 14, '%' => 15, _ => 0, }); fill | (align << 8) | (sign << 11) | (zero_pad << 13) | (width << 14) | (precision << 30) | (type_char << 46) } /// Decodes a u64 back into a ParsedFormatSpec. /// /// Reverses the bit-packing done by `encode_format_spec`. Used by the VM /// when executing `FormatValue` to retrieve the format specification from /// the constant pool (where it's stored as a negative integer marker). pub fn decode_format_spec(encoded: u64) -> ParsedFormatSpec { let fill = (encoded & 0xFF) as u8 as char; let align_bits = (encoded >> 8) & 0x07; let sign_bits = (encoded >> 11) & 0x03; let zero_pad = ((encoded >> 13) & 0x01) != 0; let width = ((encoded >> 14) & 0xFFFF) as usize; let precision_raw = ((encoded >> 30) & 0xFFFF) as usize; let type_bits = ((encoded >> 46) & 0x1F) as u8; let align = match align_bits { 1 => Some('<'), 2 => Some('>'), 3 => Some('^'), 4 => Some('='), _ => None, }; let sign = match sign_bits { 1 => Some('+'), 2 => Some('-'), 3 => Some(' '), _ => None, }; let precision = if precision_raw == 0xFFFF { None } else { Some(precision_raw) }; let type_char = match type_bits { 1 => Some('b'), 2 => Some('c'), 3 => Some('d'), 4 => Some('e'), 5 => Some('E'), 6 => Some('f'), 7 => Some('F'), 8 => Some('g'), 9 => Some('G'), 10 => Some('n'), 11 => Some('o'), 12 => Some('s'), 13 => Some('x'), 14 => Some('X'), 15 => Some('%'), _ => None, }; ParsedFormatSpec { fill, align, sign, zero_pad, width, precision, type_char, } } // ============================================================================ // Formatting functions // ============================================================================ /// Formats a string value according to a format specification. /// /// Applies the following transformations in order: /// 1. Truncation: If `precision` is set, limits the string to that many characters /// 2. Alignment: Pads to `width` using `fill` character (default left-aligned for strings) /// /// Returns an error if `=` alignment is used (sign-aware padding only valid for numbers). pub fn format_string(value: &str, spec: &ParsedFormatSpec) -> Result { // Handle precision (string truncation) let value = if let Some(prec) = spec.precision { value.chars().take(prec).collect::() } else { value.to_owned() }; // Validate alignment for strings (= is only for numbers) if spec.align == Some('=') { return Err(FormatError::InvalidAlignment( "'=' alignment not allowed in string format specifier".to_owned(), )); } // Default alignment for strings is left ('<') let align = spec.align.unwrap_or('<'); Ok(pad_string(&value, spec.width, align, spec.fill)) } /// Formats an integer in decimal with a format specification. /// /// Applies the following: /// - Sign prefix based on `sign` spec: `+` (always show), `-` (negatives only), ` ` (space for positive) /// - Zero-padding: When `zero_pad` is true or `=` alignment, inserts zeros between sign and digits /// - Alignment: Right-aligned by default for numbers, pads to `width` with `fill` character pub fn format_int(n: i64, spec: &ParsedFormatSpec) -> String { let is_negative = n < 0; let abs_str = n.abs().to_string(); // Build the sign prefix let sign = if is_negative { "-" } else { match spec.sign { Some('+') => "+", Some(' ') => " ", _ => "", } }; // Default alignment for numbers is right ('>') let align = spec.align.unwrap_or('>'); // Handle sign-aware zero-padding or regular padding if spec.zero_pad || align == '=' { let fill = if spec.zero_pad { '0' } else { spec.fill }; let total_len = sign.len() + abs_str.len(); if spec.width > total_len { let padding = spec.width - total_len; let pad_str: String = std::iter::repeat_n(fill, padding).collect(); format!("{sign}{pad_str}{abs_str}") } else { format!("{sign}{abs_str}") } } else { let value = format!("{sign}{abs_str}"); pad_string(&value, spec.width, align, spec.fill) } } /// Formats an integer in binary (base 2), octal (base 8), or hexadecimal (base 16). /// /// Used for format types `b`, `o`, `x`, and `X`. The sign is prepended for negative numbers. /// Does not include base prefixes like `0b`, `0o`, `0x` (those require the `#` flag which /// is not yet implemented). Returns an error for invalid base values. pub fn format_int_base(n: i64, base: u32, spec: &ParsedFormatSpec) -> Result { let is_negative = n < 0; let abs_val = n.unsigned_abs(); let abs_str = match base { 2 => format!("{abs_val:b}"), 8 => format!("{abs_val:o}"), 16 => format!("{abs_val:x}"), _ => return Err(FormatError::ValueError("Invalid base".to_owned())), }; let sign = if is_negative { "-" } else { "" }; let value = format!("{sign}{abs_str}"); let align = spec.align.unwrap_or('>'); Ok(pad_string(&value, spec.width, align, spec.fill)) } /// Formats an integer as a Unicode character (format type `c`). /// /// Converts the integer to its corresponding Unicode code point. Valid range is 0 to 0x10FFFF. /// Returns `Overflow` error if out of range, `ValueError` if not a valid Unicode scalar value /// (e.g., surrogate code points). Left-aligned by default like strings. pub fn format_char(n: i64, spec: &ParsedFormatSpec) -> Result { if !(0..=0x0010_FFFF).contains(&n) { return Err(FormatError::Overflow("%c arg not in range(0x110000)".to_owned())); } let n_u32 = u32::try_from(n).expect("format_char n validated in 0..=0x10FFFF range"); let c = char::from_u32(n_u32).ok_or_else(|| FormatError::ValueError("Invalid Unicode code point".to_owned()))?; let value = c.to_string(); let align = spec.align.unwrap_or('<'); Ok(pad_string(&value, spec.width, align, spec.fill)) } /// Formats a float in fixed-point notation (format types `f` and `F`). /// /// Always includes a decimal point with `precision` digits after it (default 6). /// Handles sign prefix, zero-padding between sign and digits when `zero_pad` or `=` alignment. /// Right-aligned by default. NaN and infinity are formatted as `nan`/`inf` (or `NAN`/`INF` for `F`). pub fn format_float_f(f: f64, spec: &ParsedFormatSpec) -> String { let precision = spec.precision.unwrap_or(6); let is_negative = f.is_sign_negative() && !f.is_nan(); let abs_val = f.abs(); let abs_str = format!("{abs_val:.precision$}"); let sign = if is_negative { "-" } else { match spec.sign { Some('+') => "+", Some(' ') => " ", _ => "", } }; let align = spec.align.unwrap_or('>'); if spec.zero_pad || align == '=' { let fill = if spec.zero_pad { '0' } else { spec.fill }; let total_len = sign.len() + abs_str.len(); if spec.width > total_len { let padding = spec.width - total_len; let pad_str: String = std::iter::repeat_n(fill, padding).collect(); format!("{sign}{pad_str}{abs_str}") } else { format!("{sign}{abs_str}") } } else { let value = format!("{sign}{abs_str}"); pad_string(&value, spec.width, align, spec.fill) } } /// Formats a float in exponential/scientific notation (format types `e` and `E`). /// /// Produces output like `1.234568e+03` with `precision` digits after decimal (default 6). /// The `uppercase` parameter controls whether to use `E` or `e` for the exponent marker. /// Exponent is always formatted with a sign and at least 2 digits (Python convention). pub fn format_float_e(f: f64, spec: &ParsedFormatSpec, uppercase: bool) -> String { let precision = spec.precision.unwrap_or(6); let is_negative = f.is_sign_negative() && !f.is_nan(); let abs_val = f.abs(); let abs_str = if uppercase { format!("{abs_val:.precision$E}") } else { format!("{abs_val:.precision$e}") }; // Fix exponent format to match Python (e+03 not e3) let abs_str = fix_exp_format(&abs_str); let sign = if is_negative { "-" } else { match spec.sign { Some('+') => "+", Some(' ') => " ", _ => "", } }; let value = format!("{sign}{abs_str}"); let align = spec.align.unwrap_or('>'); pad_string(&value, spec.width, align, spec.fill) } /// Formats a float in "general" format (format types `g` and `G`). /// /// Chooses between fixed-point and exponential notation based on the magnitude: /// - Uses exponential if exponent < -4 or >= precision /// - Otherwise uses fixed-point notation /// /// Unlike `f` and `e` formats, trailing zeros are stripped from the result. /// Default precision is 6, but minimum is 1 significant digit. pub fn format_float_g(f: f64, spec: &ParsedFormatSpec) -> String { let precision = spec.precision.unwrap_or(6).max(1); let is_negative = f.is_sign_negative() && !f.is_nan(); let abs_val = f.abs(); // Python's g format: use exponential if exponent < -4 or >= precision let exp = if abs_val == 0.0 { 0 } else { // log10 of valid floats fits in i32; floor() returns a finite f64 f64_to_i32_trunc(abs_val.log10().floor()) }; // precision is typically small (default 6), safe to convert to i32 let prec_i32 = i32::try_from(precision).unwrap_or(i32::MAX); let abs_str = if exp < -4 || exp >= prec_i32 { // Use exponential notation let exp_prec = precision.saturating_sub(1); let formatted = format!("{abs_val:.exp_prec$e}"); // Python strips trailing zeros from the mantissa strip_trailing_zeros_exp(&formatted) } else { // Use fixed notation - result is non-negative due to .max(0) let sig_digits_i32 = (prec_i32 - exp - 1).max(0); let sig_digits = usize::try_from(sig_digits_i32).expect("sig_digits guaranteed non-negative"); let formatted = format!("{abs_val:.sig_digits$}"); strip_trailing_zeros(&formatted) }; let sign = if is_negative { "-" } else { match spec.sign { Some('+') => "+", Some(' ') => " ", _ => "", } }; let value = format!("{sign}{abs_str}"); let align = spec.align.unwrap_or('>'); pad_string(&value, spec.width, align, spec.fill) } /// Applies ASCII conversion to a string (escapes non-ASCII characters). /// /// Used for the `!a` conversion flag in f-strings. Takes a string (typically a repr) /// and escapes all non-ASCII characters using `\xNN`, `\uNNNN`, or `\UNNNNNNNN`. pub fn ascii_escape(s: &str) -> String { use std::fmt::Write; let mut result = String::new(); for c in s.chars() { if c.is_ascii() { result.push(c); } else { let code = c as u32; if code <= 0xFF { write!(result, "\\x{code:02x}") } else if code <= 0xFFFF { write!(result, "\\u{code:04x}") } else { write!(result, "\\U{code:08x}") } .expect("string write should be infallible"); } } result } /// Formats a float as a percentage (format type `%`). /// /// Multiplies the value by 100 and appends a `%` sign. Uses fixed-point notation /// with `precision` decimal places (default 6). For example, `0.1234` becomes `12.340000%`. pub fn format_float_percent(f: f64, spec: &ParsedFormatSpec) -> String { let precision = spec.precision.unwrap_or(6); let percent_val = f * 100.0; let is_negative = percent_val.is_sign_negative() && !percent_val.is_nan(); let abs_val = percent_val.abs(); let abs_str = format!("{abs_val:.precision$}%"); let sign = if is_negative { "-" } else { match spec.sign { Some('+') => "+", Some(' ') => " ", _ => "", } }; let value = format!("{sign}{abs_str}"); let align = spec.align.unwrap_or('>'); pad_string(&value, spec.width, align, spec.fill) } // ============================================================================ // Helper functions // ============================================================================ /// Pads a string to a given width with alignment. /// /// Alignment options: /// - '<': left-align (pad on right) /// - '>': right-align (pad on left) /// - '^': center (pad both sides) fn pad_string(value: &str, width: usize, align: char, fill: char) -> String { let value_len = value.chars().count(); if width <= value_len { return value.to_owned(); } let padding = width - value_len; match align { '<' => { let mut s = value.to_owned(); for _ in 0..padding { s.push(fill); } s } '>' => { let mut s = String::new(); for _ in 0..padding { s.push(fill); } s.push_str(value); s } '^' => { let left_pad = padding / 2; let right_pad = padding - left_pad; let mut s = String::new(); for _ in 0..left_pad { s.push(fill); } s.push_str(value); for _ in 0..right_pad { s.push(fill); } s } _ => value.to_owned(), } } /// Strips trailing zeros from a decimal float string. /// /// Used by the `:g` format to remove insignificant trailing zeros. /// Also removes the decimal point if all fractional digits are stripped. /// Has no effect if the string doesn't contain a decimal point. fn strip_trailing_zeros(s: &str) -> String { if !s.contains('.') { return s.to_owned(); } let trimmed = s.trim_end_matches('0'); if let Some(stripped) = trimmed.strip_suffix('.') { stripped.to_owned() } else { trimmed.to_owned() } } /// Strips trailing zeros from a float in exponential notation. /// /// Splits the string at `e` or `E`, strips zeros from the mantissa part, /// then recombines with the exponent. Also normalizes the exponent format /// to Python's convention (sign and at least 2 digits). fn strip_trailing_zeros_exp(s: &str) -> String { if let Some(e_pos) = s.find(['e', 'E']) { let (mantissa, exp_part) = s.split_at(e_pos); let trimmed_mantissa = strip_trailing_zeros(mantissa); let fixed_exp = fix_exp_format(exp_part); format!("{trimmed_mantissa}{fixed_exp}") } else { strip_trailing_zeros(s) } } /// Converts Rust's exponential format to Python's format. /// /// Rust produces "e3" or "e-3" but Python expects "e+03" or "e-03". /// This function ensures the exponent has: /// 1. A sign character ('+' or '-') /// 2. At least 2 digits fn fix_exp_format(s: &str) -> String { // Find the 'e' or 'E' marker let Some(e_pos) = s.find(['e', 'E']) else { return s.to_owned(); }; let (before_e, e_and_rest) = s.split_at(e_pos); let e_char = e_and_rest.chars().next().unwrap(); let exp_part = &e_and_rest[1..]; // Parse the exponent sign and value let (sign, digits) = if let Some(stripped) = exp_part.strip_prefix('-') { ('-', stripped) } else if let Some(stripped) = exp_part.strip_prefix('+') { ('+', stripped) } else { ('+', exp_part) }; // Ensure at least 2 digits let padded_digits = if digits.len() < 2 { format!("{digits:0>2}") } else { digits.to_owned() }; format!("{before_e}{e_char}{sign}{padded_digits}") } /// Truncates f64 to i32 with clamping for out-of-range values. /// /// Used for exponent calculations where the result should fit in i32. fn f64_to_i32_trunc(value: f64) -> i32 { if value >= f64::from(i32::MAX) { i32::MAX } else if value <= f64::from(i32::MIN) { i32::MIN } else { // SAFETY for clippy: value is guaranteed to be in (i32::MIN, i32::MAX) // after the bounds checks above, so truncation cannot overflow #[expect(clippy::cast_possible_truncation, reason = "bounds checked above")] let result = value as i32; result } } ================================================ FILE: crates/monty/src/function.rs ================================================ use std::fmt::Write; use crate::{bytecode::Code, expressions::Identifier, intern::Interns, namespace::NamespaceId, signature::Signature}; /// A defined function once compiled and ready for execution. /// /// This is created during the compilation phase from a `PreparedFunctionDef`. /// Contains everything needed to execute a user-defined function: compiled bytecode, /// metadata, and closure information. Functions are stored on the heap and /// referenced via HeapId. /// /// # Namespace Layout /// /// The namespace has a predictable layout that allows sequential construction: /// ```text /// [params...][cell_vars...][free_vars...][locals...] /// ``` /// - Slots 0..signature.param_count(): function parameters (see `Signature` for layout) /// - Slots after params: cell refs for variables captured by nested functions /// - Slots after cell_vars: free_var refs (captured from enclosing scope) /// - Remaining slots: local variables /// /// # Closure Support /// /// - `free_var_enclosing_slots`: Enclosing namespace slots for captured variables. /// At definition time, cells are captured from these slots and stored in a Closure. /// At call time, they're pushed sequentially after cell_vars. /// - `cell_var_count`: Number of cells to create for variables captured by nested functions. /// At call time, cells are created and pushed sequentially after params. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) struct Function { /// The function name (used for error messages and repr). pub name: Identifier, /// The function signature. pub signature: Signature, /// Size of the initial namespace (number of local variable slots). pub namespace_size: usize, /// Enclosing namespace slots for variables captured from enclosing scopes. /// /// At definition time: look up cell HeapId from enclosing namespace at each slot. /// At call time: captured cells are pushed sequentially (our slots are implicit). pub free_var_enclosing_slots: Vec, /// Number of cell variables (captured by nested functions). /// /// At call time, this many cells are created and pushed right after params. /// Their slots are implicitly params.len()..params.len()+cell_var_count. pub cell_var_count: usize, /// Maps cell variable indices to their corresponding parameter indices, if any. /// /// When a parameter is also captured by nested functions (cell variable), its value /// must be copied into the cell after binding. Each entry corresponds to a cell /// (index 0..cell_var_count), and contains `Some(param_index)` if that cell is for /// a parameter, or `None` otherwise. pub cell_param_indices: Vec>, /// Number of default parameter values. /// /// At function definition time, this many default values are evaluated and stored /// in a separate defaults array. The signature indicates how these map to parameters. pub defaults_count: usize, /// Whether this is an async function (`async def`). /// /// When true, calling this function creates a `Coroutine` object instead of /// immediately pushing a frame. The coroutine captures the bound arguments /// and starts execution only when awaited. pub is_async: bool, /// Compiled bytecode for this function body. pub code: Code, } impl Function { /// Create a new compiled function. /// /// This is typically called by the bytecode compiler after compiling a `PreparedFunctionDef`. /// /// # Arguments /// * `name` - The function name identifier /// * `signature` - The function signature with parameter names and defaults /// * `namespace_size` - Number of local variable slots needed /// * `free_var_enclosing_slots` - Enclosing namespace slots for captured variables /// * `cell_var_count` - Number of cells to create for variables captured by nested functions /// * `cell_param_indices` - Maps cell indices to parameter indices for captured parameters /// * `defaults_count` - Number of default parameter values /// * `is_async` - Whether this is an async function /// * `code` - The compiled bytecode for the function body #[expect(clippy::too_many_arguments)] pub fn new( name: Identifier, signature: Signature, namespace_size: usize, free_var_enclosing_slots: Vec, cell_var_count: usize, cell_param_indices: Vec>, defaults_count: usize, is_async: bool, code: Code, ) -> Self { Self { name, signature, namespace_size, free_var_enclosing_slots, cell_var_count, cell_param_indices, defaults_count, is_async, code, } } /// Writes the Python repr() string for this function to a formatter. pub fn py_repr_fmt(&self, f: &mut W, interns: &Interns, py_id: usize) -> std::fmt::Result { write!( f, "", interns.get_str(self.name.name_id), py_id ) } } ================================================ FILE: crates/monty/src/heap.rs ================================================ use std::{ cell::Cell, collections::hash_map::DefaultHasher, hash::{Hash, Hasher}, mem::size_of, vec, }; use smallvec::SmallVec; // Re-export items moved to `heap_traits` so that `crate::heap::HeapGuard` etc. continue // to resolve (used by the `defer_drop!` macros and throughout the codebase). pub(crate) use crate::heap_data::HeapData; pub(crate) use crate::heap_traits::{ContainsHeap, DropWithHeap, HeapGuard, HeapItem, ImmutableHeapGuard}; use crate::{ args::ArgValues, asyncio::GatherItem, bytecode::{CallResult, VM}, exception_private::{ExcType, RunResult}, heap_data::HeapDataMut, intern::Interns, resource::{ResourceError, ResourceTracker, check_mult_size, check_repeat_size}, types::{List, LongInt, PyTrait, Tuple, allocate_tuple}, value::{EitherStr, Value}, }; /// Unique identifier for values stored inside the heap arena. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub struct HeapId(usize); impl HeapId { /// Returns the raw index value. #[inline] pub fn index(self) -> usize { self.0 } } /// The empty tuple is a singleton which is allocated at startup. const EMPTY_TUPLE_ID: HeapId = HeapId(0); /// Hash caching state stored alongside each heap entry. #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] enum HashState { /// Hash has not yet been computed but the value might be hashable. Unknown, /// Cached hash value for immutable types that have been hashed at least once. Cached(u64), /// Value is unhashable (mutable types or tuples containing unhashables). Unhashable, } impl HashState { fn for_data(data: &HeapData) -> Self { match data { // Cells are hashable by identity (like all Python objects without __hash__ override) // FrozenSet is immutable and hashable // Range is immutable and hashable // Slice is immutable and hashable (like in CPython) // LongInt is immutable and hashable // NamedTuple is immutable and hashable (like Tuple) HeapData::Str(_) | HeapData::Bytes(_) | HeapData::Tuple(_) | HeapData::NamedTuple(_) | HeapData::FrozenSet(_) | HeapData::Cell(_) | HeapData::Closure(_) | HeapData::FunctionDefaults(_) | HeapData::Range(_) | HeapData::Slice(_) | HeapData::LongInt(_) => Self::Unknown, // Dataclass hashability depends on the mutable flag HeapData::Dataclass(dc) => { if dc.is_frozen() { Self::Unknown } else { Self::Unhashable } } // Path is immutable and hashable HeapData::Path(_) => Self::Unknown, // ExtFunction is hashable (by identity, like closures) HeapData::ExtFunction(_) => Self::Unknown, // other types are unhashable _ => Self::Unhashable, } } } /// A single entry inside the heap arena, storing refcount, payload, and hash metadata. /// /// The `hash_state` field tracks whether the heap entry is hashable and, if so, /// caches the computed hash. Mutable types (List, Dict) start as `Unhashable` and /// will raise TypeError if used as dict keys. /// /// The `data` field is an Option to support temporary borrowing: when methods like /// `with_entry_mut` or `call_attr` need mutable access to both the data and the heap, /// they can `.take()` the data out (leaving `None`), pass `&mut Heap` to user code, /// then restore the data. This avoids unsafe code while keeping `refcount` accessible /// for `inc_ref`/`dec_ref` during the borrow. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct HeapValue { refcount: Cell, /// The payload data. Temporarily `None` while borrowed via `with_entry_mut`/`call_attr`. data: Option, /// Current hashing status / cached hash value hash_state: HashState, } /// Zero-size token returned by [`Heap::incr_recursion_depth`]. /// /// Represents one level of recursion depth that must be released when the /// recursive operation completes. There are two ways to release the token: /// /// - **`DropWithHeap`** — for `&mut Heap` paths (e.g., `py_eq`). Compatible with /// `defer_drop!` and `HeapGuard` for automatic cleanup on all code paths. /// - **`DropWithImmutableHeap`** — for `&Heap` paths (e.g., `py_repr_fmt`) where /// only shared access is available. Compatible with `defer_drop_immutable_heap!` /// and `ImmutableHeapGuard`. #[derive(Debug)] pub(crate) struct RecursionToken(()); impl DropWithHeap for RecursionToken { #[inline] fn drop_with_heap(self, heap: &mut H) { heap.heap().decr_recursion_depth(); } } /// Reference-counted arena that backs all heap-only runtime values. /// /// Uses a free list to reuse slots from freed values, keeping memory usage /// constant for long-running loops that repeatedly allocate and free values. /// When an value is freed via `dec_ref`, its slot ID is added to the free list. /// New allocations pop from the free list when available, otherwise append. /// /// Generic over `T: ResourceTracker` to support different resource tracking strategies. /// When `T = NoLimitTracker` (the default), all resource checks compile away to no-ops. /// /// Serialization requires `T: Serialize` and `T: Deserialize`. Custom serde implementation /// handles the Drop constraint by using `std::mem::take` during serialization. #[derive(Debug)] pub(crate) struct Heap { entries: Vec>, /// IDs of freed slots available for reuse. Populated by `dec_ref`, consumed by `allocate`. free_list: Vec, /// Resource tracker for enforcing limits and scheduling GC. tracker: T, /// True if reference cycles may exist. Set when a container stores a Ref, /// cleared after GC completes. When false, GC can skip mark-sweep entirely. may_have_cycles: bool, /// Number of GC applicable allocations since the last GC. allocations_since_gc: u32, /// Current recursion depth — incremented on function calls and data structure traversals. /// /// Uses `Cell` for interior mutability so that methods with only `&Heap` /// (like `py_repr_fmt`) can still increment/decrement the depth counter. recursion_depth: Cell, } impl serde::Serialize for Heap { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeStruct; let mut state = serializer.serialize_struct("Heap", 6)?; state.serialize_field("entries", &self.entries)?; state.serialize_field("free_list", &self.free_list)?; state.serialize_field("tracker", &self.tracker)?; state.serialize_field("may_have_cycles", &self.may_have_cycles)?; state.serialize_field("allocations_since_gc", &self.allocations_since_gc)?; state.end() } } impl<'de, T: ResourceTracker + serde::Deserialize<'de>> serde::Deserialize<'de> for Heap { fn deserialize>(deserializer: D) -> Result { #[derive(serde::Deserialize)] struct HeapFields { entries: Vec>, free_list: Vec, tracker: T, may_have_cycles: bool, allocations_since_gc: u32, } let fields = HeapFields::::deserialize(deserializer)?; Ok(Self { entries: fields.entries, free_list: fields.free_list, tracker: fields.tracker, may_have_cycles: fields.may_have_cycles, allocations_since_gc: fields.allocations_since_gc, recursion_depth: Cell::new(0), }) } } macro_rules! take_data { ($self:ident, $id:expr, $func_name:literal) => { $self .entries .get_mut($id.index()) .expect(concat!("Heap::", $func_name, ": slot missing")) .as_mut() .expect(concat!("Heap::", $func_name, ": object already freed")) .data .take() .expect(concat!("Heap::", $func_name, ": data already borrowed")) }; } macro_rules! restore_data { ($self:ident, $id:expr, $new_data:expr, $func_name:literal) => {{ let entry = $self .entries .get_mut($id.index()) .expect(concat!("Heap::", $func_name, ": slot missing")) .as_mut() .expect(concat!("Heap::", $func_name, ": object already freed")); entry.data = Some($new_data); }}; } /// GC interval - run GC every 100,000 applicable allocations. /// /// This is intentionally infrequent to minimize overhead while still /// eventually collecting reference cycles. const GC_INTERVAL: u32 = 100_000; impl Heap { /// Creates a new heap with the given resource tracker. /// /// Use this to create heaps with custom resource limits or GC scheduling. pub fn new(capacity: usize, tracker: T) -> Self { let mut this = Self { entries: Vec::with_capacity(capacity), free_list: Vec::new(), tracker, may_have_cycles: false, allocations_since_gc: 0, recursion_depth: Cell::new(0), }; // TBC: should the empty tuple contribute to the resource limits? // If not, can just place it in `entries` directly without going through `allocate()`. let empty_tuple = this .allocate(HeapData::Tuple(Tuple::default())) .expect("Failed to allocate empty tuple singleton"); debug_assert_eq!(empty_tuple, EMPTY_TUPLE_ID); this } /// Returns a reference to the resource tracker. pub fn tracker(&self) -> &T { &self.tracker } /// Returns a mutable reference to the resource tracker. pub fn tracker_mut(&mut self) -> &mut T { &mut self.tracker } /// Checks whether the configured time limit has been exceeded. /// /// Delegates to the resource tracker's `check_time()`. For `NoLimitTracker`, /// this is inlined as a no-op with zero runtime cost. For `LimitTracker`, /// it compares elapsed time against the configured `max_duration_secs`. /// /// Call this inside Rust-side loops (builtins, sort, iterator collection) /// that execute within a single bytecode instruction and would otherwise /// bypass the VM's per-instruction timeout check. #[inline] pub fn check_time(&self) -> Result<(), ResourceError> { self.tracker.check_time() } /// Increments the recursion depth and checks the limit via the `ResourceTracker`. /// /// Returns `Ok(RecursionToken)` if within limits. The caller must ensure the /// token is released on all code paths — either via `defer_drop!`/`HeapGuard` /// (for `&mut Heap` contexts) or via `RecursionToken::release()` (for `&Heap` contexts). /// /// Returns `Err(ResourceError::Recursion)` if the limit would be exceeded. #[inline] pub fn incr_recursion_depth(&self) -> Result { let depth = self.recursion_depth.get(); self.tracker.check_recursion_depth(depth)?; self.recursion_depth.set(depth + 1); Ok(RecursionToken(())) } /// Increments the recursion depth, returning `Some(RecursionToken)` if within /// limits, or `None` if the limit is exceeded. /// /// Use this in repr-like contexts where exceeding the limit should produce /// truncated output (e.g., `[...]`) rather than an error. #[inline] pub fn incr_recursion_depth_for_repr(&self) -> Option { self.incr_recursion_depth().ok() } /// Decrements the recursion depth. /// /// Called internally by `RecursionToken` — prefer releasing the token /// rather than calling this directly. #[inline] pub(crate) fn decr_recursion_depth(&self) { let depth = self.recursion_depth.get(); debug_assert!(depth > 0, "decr_recursion_depth called when depth is 0"); self.recursion_depth.set(depth - 1); } /// Returns the current recursion depth. /// /// Used during async task switching to compute a task's depth contribution /// before adjusting the global counter. pub(crate) fn get_recursion_depth(&self) -> usize { self.recursion_depth.get() } /// Sets the recursion depth to an explicit value. /// /// Used after deserialization to restore the recursion depth to match /// the number of active (non-global) namespace frames that were serialized. /// Also used during async task switching to subtract/add a task's depth /// contribution when switching away from/to that task. pub(crate) fn set_recursion_depth(&self, depth: usize) { self.recursion_depth.set(depth); } /// Number of entries in the heap pub fn size(&self) -> usize { self.entries.len() } /// Marks that a reference cycle may exist in the heap. /// /// Call this when a container (list, dict, tuple, etc.) stores a reference /// to another heap object. This enables the GC to skip mark-sweep entirely /// when no cycles are possible. #[inline] pub fn mark_potential_cycle(&mut self) { self.may_have_cycles = true; } /// Returns the number of GC-tracked allocations since the last garbage collection. /// /// This counter increments for each allocation of a GC-tracked type (List, Dict, etc.) /// and resets to 0 when `collect_garbage` runs. Useful for testing GC behavior. #[cfg(feature = "ref-count-return")] pub fn get_allocations_since_gc(&self) -> u32 { self.allocations_since_gc } /// Allocates a new heap entry. /// /// Returns `Err(ResourceError)` if allocation would exceed configured limits. /// Use this when you need to handle resource limit errors gracefully. /// /// Only GC-tracked types (containers that can hold references) count toward the /// GC allocation threshold. Leaf types like strings don't trigger GC. /// /// When allocating a container that contains heap references, marks potential /// cycles to enable garbage collection. pub fn allocate(&mut self, data: HeapData) -> Result { self.tracker.on_allocate(|| data.py_estimate_size())?; if data.is_gc_tracked() { self.allocations_since_gc = self.allocations_since_gc.wrapping_add(1); // Mark potential cycles if this container has heap references. // This is essential for types like Dict where setitem doesn't call // mark_potential_cycle() - the allocation is the only place to detect refs. if data.has_refs() { self.may_have_cycles = true; } } let hash_state = HashState::for_data(&data); let new_entry = HeapValue { refcount: Cell::new(1), data: Some(data), hash_state, }; let id = if let Some(id) = self.free_list.pop() { // Reuse a freed slot self.entries[id.index()] = Some(new_entry); id } else { // No free slots, append new entry let id = self.entries.len(); self.entries.push(Some(new_entry)); HeapId(id) }; Ok(id) } /// Returns the singleton empty tuple. /// /// In Python, `() is ()` is always `True` because empty tuples are interned. /// This method provides the same optimization by returning the same `HeapId` /// for all empty tuple allocations. /// /// The returned `Value` has its reference count incremented, so the caller /// owns a reference and must call `dec_ref` when done. pub fn get_empty_tuple(&mut self) -> Value { // Return existing singleton with incremented refcount self.inc_ref(EMPTY_TUPLE_ID); Value::Ref(EMPTY_TUPLE_ID) } /// Increments the reference count for an existing heap entry. /// /// # Panics /// Panics if the value ID is invalid or the value has already been freed. pub fn inc_ref(&self, id: HeapId) { let value = self .entries .get(id.index()) .expect("Heap::inc_ref: slot missing") .as_ref() .expect("Heap::inc_ref: object already freed"); value.refcount.update(|r| r + 1); } /// Decrements the reference count and frees the value (plus children) once it hits zero. /// /// Uses an iterative work stack instead of recursion to avoid Rust stack overflow /// when freeing deeply nested containers (e.g., a list nested 10,000 levels deep). /// This is analogous to CPython's "trashcan" mechanism for safe deallocation. /// /// # Panics /// Panics if the value ID is invalid or the value has already been freed. pub fn dec_ref(&mut self, id: HeapId) { let mut current_id = id; let mut work_stack = Vec::new(); loop { let slot = self .entries .get_mut(current_id.index()) .expect("Heap::dec_ref: slot missing"); let entry = slot.as_mut().expect("Heap::dec_ref: object already freed"); if entry.refcount.get() > 1 { entry.refcount.update(|r| r - 1); } else if let Some(value) = slot.take() { // refcount == 1, free the value and add slot to free list for reuse self.free_list.push(current_id); // Notify tracker of freed memory if let Some(ref data) = value.data { self.tracker.on_free(|| data.py_estimate_size()); } // Collect child IDs and push onto work stack for iterative processing if let Some(mut data) = value.data { data.py_dec_ref_ids(&mut work_stack); drop(data); } } let Some(next_id) = work_stack.pop() else { break; }; current_id = next_id; } } /// Returns an immutable reference to the heap data stored at the given ID. /// /// # Panics /// Panics if the value ID is invalid, the value has already been freed, /// or the data is currently borrowed via `with_entry_mut`/`call_attr`. #[must_use] pub fn get(&self, id: HeapId) -> &HeapData { self.entries .get(id.index()) .expect("Heap::get: slot missing") .as_ref() .expect("Heap::get: object already freed") .data .as_ref() .expect("Heap::get: data currently borrowed") } /// Returns a mutable reference to the heap data stored at the given ID. /// /// # Panics /// Panics if the value ID is invalid, the value has already been freed, /// or the data is currently borrowed via `with_entry_mut`/`call_attr`. pub fn get_mut(&mut self, id: HeapId) -> HeapDataMut<'_> { self.entries .get_mut(id.index()) .expect("Heap::get_mut: slot missing") .as_mut() .expect("Heap::get_mut: object already freed") .data .as_mut() .expect("Heap::get_mut: data currently borrowed") .to_mut() } /// Returns or computes the hash for the heap entry at the given ID. /// /// Hashes are computed lazily on first use and then cached. Returns /// `Ok(Some(hash))` for immutable types, `Ok(None)` for mutable types, /// or `Err(ResourceError::Recursion)` if the recursion limit is exceeded. /// /// # Panics /// Panics if the value ID is invalid or the value has already been freed. pub fn get_or_compute_hash(&mut self, id: HeapId, interns: &Interns) -> Result, ResourceError> { let entry = self .entries .get_mut(id.index()) .expect("Heap::get_or_compute_hash: slot missing") .as_mut() .expect("Heap::get_or_compute_hash: object already freed"); match entry.hash_state { HashState::Unhashable => return Ok(None), HashState::Cached(hash) => return Ok(Some(hash)), HashState::Unknown => {} } // Handle Cell specially - uses identity-based hashing (like Python cell objects) if let Some(HeapData::Cell(_)) = &entry.data { let mut hasher = DefaultHasher::new(); id.hash(&mut hasher); let hash = hasher.finish(); entry.hash_state = HashState::Cached(hash); return Ok(Some(hash)); } // Compute hash lazily - need to temporarily take data to avoid borrow conflict. // IMPORTANT: data must be restored to the entry on ALL paths (including errors) // to avoid dropping HeapData containing Value::Ref without proper cleanup. let mut data = entry.data.take().expect("Heap::get_or_compute_hash: data borrowed"); let hash = data.to_mut().compute_hash_if_immutable(self, interns); // Restore data before handling the result let entry = self .entries .get_mut(id.index()) .expect("Heap::get_or_compute_hash: slot missing after compute") .as_mut() .expect("Heap::get_or_compute_hash: object freed during compute"); entry.data = Some(data); // Now handle the result and cache if successful let hash = hash?; entry.hash_state = match hash { Some(value) => HashState::Cached(value), None => HashState::Unhashable, }; Ok(hash) } /// Calls an attribute on the heap entry, returning an `CallResult` that may signal /// OS, external, or method calls. /// /// Temporarily takes ownership of the payload to avoid borrow conflicts when attribute /// implementations also need mutable heap access (e.g. for refcounting). /// /// Returns `CallResult` which may be: /// - `Value(v)` - Method completed synchronously with value `v` /// - `OsCall(func, args)` - Method needs OS operation; VM should yield to host /// - `ExternalCall(id, args)` - Method needs external function call /// - `MethodCall(name, args)` - Dataclass method call; VM should yield to host pub fn call_attr(vm: &mut VM<'_, '_, T>, id: HeapId, attr: &EitherStr, args: ArgValues) -> RunResult { // Take data out so the borrow of self.entries ends let heap = &mut *vm.heap; let mut data = take_data!(heap, id, "call_attr"); let result = data.py_call_attr(id, vm, attr, args); // Restore data let heap = &mut *vm.heap; restore_data!(heap, id, data, "call_attr"); result } /// Gives mutable access to a heap entry while allowing reentrant heap usage /// inside the closure (e.g. to read other values or allocate results). /// /// The data is temporarily taken from the heap entry, so the closure can safely /// mutate both the entry data and the heap (e.g. to allocate new values). /// The data is automatically restored after the closure completes. pub fn with_entry_mut<'a, 'p, F, R>(vm: &mut VM<'a, 'p, T>, id: HeapId, f: F) -> R where F: FnOnce(&mut VM<'a, 'p, T>, HeapDataMut) -> R, { // Take data out in a block so the borrow of self.entries ends let heap = &mut *vm.heap; let mut data = take_data!(heap, id, "with_entry_mut"); let result = f(vm, data.to_mut()); // Restore data let heap = &mut *vm.heap; restore_data!(heap, id, data, "with_entry_mut"); result } /// Temporarily takes ownership of two heap entries so their data can be borrowed /// simultaneously while still permitting mutable access to the VM (e.g. to /// allocate results). Automatically restores both entries after the closure /// finishes executing. /// /// This is a static method that takes `&mut VM` instead of `&mut self` so that /// the closure receives `&mut VM` — matching the `with_entry_mut` pattern and /// allowing the closure to call methods that need `vm` (e.g. `py_eq`). pub fn with_two<'a, 'p, F, R>(vm: &mut VM<'a, 'p, T>, left: HeapId, right: HeapId, f: F) -> R where F: FnOnce(&mut VM<'a, 'p, T>, &HeapData, &HeapData) -> R, { if left == right { // Same value - take data once and pass it twice let heap = &mut *vm.heap; let data = take_data!(heap, left, "with_two"); let result = f(vm, &data, &data); let heap = &mut *vm.heap; restore_data!(heap, left, data, "with_two"); result } else { // Different values - take both let heap = &mut *vm.heap; let left_data = take_data!(heap, left, "with_two (left)"); let right_data = take_data!(heap, right, "with_two (right)"); let result = f(vm, &left_data, &right_data); // Restore in reverse order let heap = &mut *vm.heap; restore_data!(heap, right, right_data, "with_two (right)"); restore_data!(heap, left, left_data, "with_two (left)"); result } } /// Returns the reference count for the heap entry at the given ID. /// /// This is primarily used for testing reference counting behavior. /// /// # Panics /// Panics if the value ID is invalid or the value has already been freed. #[must_use] #[cfg(feature = "ref-count-return")] pub fn get_refcount(&self, id: HeapId) -> usize { self.entries .get(id.index()) .expect("Heap::get_refcount: slot missing") .as_ref() .expect("Heap::get_refcount: object already freed") .refcount .get() } /// Returns the number of live (non-freed) values on the heap. /// /// This is primarily used for testing to verify that all heap entries /// are accounted for in reference count tests. /// /// Excludes the empty tuple singleton since it's an internal optimization /// detail that persists even when not explicitly used by user code. #[must_use] #[cfg(feature = "ref-count-return")] pub fn entry_count(&self) -> usize { // 1.. to skip index 0 which is the empty tuple singleton self.entries[1..].iter().filter(|o| o.is_some()).count() } /// Helper for List in-place add: extends the destination vec with items from a heap list. /// /// This method exists to work around borrow checker limitations when List::py_iadd /// needs to read from one heap entry while extending another. By keeping both /// the read and the refcount increments within Heap's impl block, we can use the /// take/restore pattern to avoid the lifetime propagation issues. /// /// Returns `true` if successful, `false` if the source ID is not a List. pub fn iadd_extend_list(&mut self, source_id: HeapId, dest: &mut Vec) -> bool { if let HeapData::List(list) = self.get(source_id) { let items: Vec = list.as_slice().iter().map(|v| v.clone_with_heap(self)).collect(); dest.extend(items); true } else { false } } /// Multiplies a heap-allocated value by an `i64`. /// /// If `id` refers to a `LongInt`, performs integer multiplication with a size /// pre-check. Otherwise, treats `id` as a sequence and `int_val` as the repeat /// count. This avoids multiple `heap.get()` calls by looking up the data once. /// /// Returns `Ok(None)` if the heap entry is neither a LongInt nor a sequence type. pub fn mult_ref_by_i64(&mut self, id: HeapId, int_val: i64) -> RunResult> { if let HeapData::LongInt(li) = self.get(id) { check_mult_size(li.bits(), i64_bits(int_val), &self.tracker)?; let result = LongInt::new(li.inner().clone()) * LongInt::from(int_val); Ok(Some(result.into_value(self)?)) } else { let count = i64_to_repeat_count(int_val)?; self.mult_sequence(id, count) } } /// Multiplies two heap-allocated values. /// /// Returns Ok(None) for unsupported type combinations. pub fn mult_heap_values(&mut self, id1: HeapId, id2: HeapId) -> RunResult> { let (seq_id, count) = match (self.get(id1), self.get(id2)) { (HeapData::LongInt(a), HeapData::LongInt(b)) => { check_mult_size(a.bits(), b.bits(), &self.tracker)?; let result = LongInt::new(a.inner() * b.inner()); return Ok(Some(result.into_value(self)?)); } (HeapData::LongInt(li), _) => { let count = longint_to_repeat_count(li)?; (id2, count) } (_, HeapData::LongInt(li)) => { let count = longint_to_repeat_count(li)?; (id1, count) } _ => return Ok(None), }; self.mult_sequence(seq_id, count) } /// Multiplies (repeats) a sequence by an integer count. /// /// This method handles sequence repetition for Python's `*` operator when applied /// to sequences (str, bytes, list, tuple). It creates a new heap-allocated sequence /// with the elements repeated `count` times. /// /// # Arguments /// * `id` - HeapId of the sequence to repeat /// * `count` - Number of times to repeat (0 returns empty sequence) /// /// # Returns /// * `Ok(Some(Value))` - The new repeated sequence /// * `Ok(None)` - If the heap entry is not a sequence type /// * `Err` - If allocation fails due to resource limits pub fn mult_sequence(&mut self, id: HeapId, count: usize) -> RunResult> { match self.get(id) { HeapData::Str(s) => { check_repeat_size(s.len(), count, &self.tracker)?; Ok(Some(Value::Ref( self.allocate(HeapData::Str(s.as_str().repeat(count).into()))?, ))) } HeapData::Bytes(b) => { check_repeat_size(b.len(), count, &self.tracker)?; Ok(Some(Value::Ref( self.allocate(HeapData::Bytes(b.as_slice().repeat(count).into()))?, ))) } HeapData::List(list) => { check_repeat_size(list.len().saturating_mul(size_of::()), count, &self.tracker)?; let mut result = Vec::with_capacity(list.as_slice().len() * count); for _ in 0..count { result.extend(list.as_slice().iter().map(|v| v.clone_with_heap(self))); self.check_time()?; } Ok(Some(Value::Ref(self.allocate(HeapData::List(List::new(result)))?))) } HeapData::Tuple(tuple) => { if count == 0 { return Ok(Some(self.get_empty_tuple())); } check_repeat_size( tuple.as_slice().len().saturating_mul(size_of::()), count, &self.tracker, )?; let mut result = SmallVec::with_capacity(tuple.as_slice().len() * count); for _ in 0..count { result.extend(tuple.as_slice().iter().map(|v| v.clone_with_heap(self))); self.check_time()?; } Ok(Some(allocate_tuple(result, self)?)) } _ => Ok(None), } } /// Returns whether garbage collection should run. /// /// True if reference cycles count exist in the heap /// and the number of allocations since the last GC exceeds the interval. #[inline] pub fn should_gc(&self) -> bool { self.may_have_cycles && self.allocations_since_gc >= GC_INTERVAL } /// Runs mark-sweep garbage collection to free unreachable cycles. /// /// This method takes a closure that provides an iterator of root HeapIds /// (typically from the VM's globals and stack). It marks all reachable objects starting /// from roots, then sweeps (frees) any unreachable objects. /// /// This is necessary because reference counting alone cannot free cycles /// where objects reference each other but are unreachable from the program. /// /// # Caller Responsibility /// The caller should check `should_gc()` before calling this method. /// If no cycles are possible, the caller can skip GC entirely. /// /// # Arguments /// * `root` - HeapIds that are roots pub fn collect_garbage(&mut self, root: Vec) { // Mark phase: collect all reachable IDs using BFS // Use Vec instead of HashSet for O(1) operations without hashing overhead let mut reachable: Vec = vec![false; self.entries.len()]; let mut work_list: Vec = root; while let Some(id) = work_list.pop() { let idx = id.index(); // Skip if out of bounds or already visited if idx >= reachable.len() || reachable[idx] { continue; } reachable[idx] = true; // Add children to work list if let Some(Some(entry)) = self.entries.get(idx) && let Some(ref data) = entry.data { collect_child_ids(data, &mut work_list); } } // Sweep phase: free unreachable values for (id, value) in self.entries.iter_mut().enumerate() { if reachable[id] { continue; } // This entry is unreachable - free it if let Some(value) = value.take() { // Notify tracker of freed memory if let Some(ref data) = value.data { self.tracker.on_free(|| data.py_estimate_size()); } self.free_list.push(HeapId(id)); // Mark Values as Dereferenced when ref-count-panic is enabled #[cfg(feature = "ref-count-panic")] if let Some(mut data) = value.data { data.py_dec_ref_ids(&mut Vec::new()); } } } // Reset cycle flag after GC - cycles have been collected self.may_have_cycles = false; self.allocations_since_gc = 0; } } /// Computes the number of significant bits in an `i64`. /// /// Returns 0 for zero, otherwise returns the position of the highest set bit /// plus one. Uses unsigned absolute value to handle negative numbers correctly. fn i64_bits(value: i64) -> u64 { if value == 0 { 0 } else { u64::from(64 - value.unsigned_abs().leading_zeros()) } } /// Converts an `i64` repeat count to `usize` for sequence repetition. /// /// Returns 0 for negative values (Python treats negative repeat counts as 0). /// Returns `OverflowError` if the value exceeds `usize::MAX`. fn i64_to_repeat_count(n: i64) -> RunResult { if n <= 0 { Ok(0) } else { usize::try_from(n).map_err(|_| ExcType::overflow_repeat_count().into()) } } /// Converts a `LongInt` repeat count to `usize` for sequence repetition. /// /// Returns 0 for negative values (Python treats negative repeat counts as 0). /// Returns `OverflowError` if the value exceeds `usize::MAX`. fn longint_to_repeat_count(li: &LongInt) -> RunResult { if li.is_negative() { Ok(0) } else if let Some(count) = li.to_usize() { Ok(count) } else { Err(ExcType::overflow_repeat_count().into()) } } /// Collects child HeapIds from a HeapData value for GC traversal. fn collect_child_ids(data: &HeapData, work_list: &mut Vec) { match data { HeapData::List(list) => { // Skip iteration if no refs - major GC optimization for lists of primitives if !list.contains_refs() { return; } for value in list.as_slice() { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::Tuple(tuple) => { // Skip iteration if no refs - GC optimization for tuples of primitives if !tuple.contains_refs() { return; } for value in tuple.as_slice() { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::NamedTuple(nt) => { // Skip iteration if no refs - GC optimization for namedtuples of primitives if !nt.contains_refs() { return; } for value in nt.as_vec() { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::Dict(dict) => { // Skip iteration if no refs - major GC optimization for dicts of primitives if !dict.has_refs() { return; } for (k, v) in dict { if let Value::Ref(id) = k { work_list.push(*id); } if let Value::Ref(id) = v { work_list.push(*id); } } } HeapData::DictKeysView(view) => { work_list.push(view.dict_id()); } HeapData::DictItemsView(view) => { work_list.push(view.dict_id()); } HeapData::DictValuesView(view) => { work_list.push(view.dict_id()); } HeapData::Set(set) => { for value in set.storage().iter() { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::FrozenSet(frozenset) => { for value in frozenset.storage().iter() { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::Closure(closure) => { // Add captured cells to work list for cell_id in &closure.cells { work_list.push(*cell_id); } // Add default values that are heap references for default in &closure.defaults { if let Value::Ref(id) = default { work_list.push(*id); } } } HeapData::FunctionDefaults(fd) => { // Add default values that are heap references for default in &fd.defaults { if let Value::Ref(id) = default { work_list.push(*id); } } } HeapData::Cell(cell) => { // Cell can contain a reference to another heap value if let Value::Ref(id) = &cell.0 { work_list.push(*id); } } HeapData::Dataclass(dc) => { // Dataclass attrs are stored in a Dict - iterate through entries for (k, v) in dc.attrs() { if let Value::Ref(id) = k { work_list.push(*id); } if let Value::Ref(id) = v { work_list.push(*id); } } } HeapData::Iter(iter) => { // Iterator holds a reference to the iterable being iterated if let Value::Ref(id) = iter.value() { work_list.push(*id); } } HeapData::Module(m) => { // Module attrs can contain references to heap values if !m.has_refs() { return; } for (k, v) in m.attrs() { if let Value::Ref(id) = k { work_list.push(*id); } if let Value::Ref(id) = v { work_list.push(*id); } } } HeapData::Coroutine(coro) => { // Add namespace values that are heap references for value in &coro.namespace { if let Value::Ref(id) = value { work_list.push(*id); } } } HeapData::GatherFuture(gather) => { // Add coroutine HeapIds to work list for item in &gather.items { if let GatherItem::Coroutine(coro_id) = item { work_list.push(*coro_id); } } // Add result values that are heap references for result in gather.results.iter().flatten() { if let Value::Ref(id) = result { work_list.push(*id); } } } // Leaf types with no heap references _ => {} } } /// Drop implementation for Heap that marks all contained Objects as Dereferenced /// before dropping to prevent panics when the `ref-count-panic` feature is enabled. #[cfg(feature = "ref-count-panic")] impl Drop for Heap { fn drop(&mut self) { // Mark all contained Objects as Dereferenced before dropping. // We use py_dec_ref_ids for this since it handles the marking // (we ignore the collected IDs since we're dropping everything anyway). let mut dummy_stack = Vec::new(); for value in self.entries.iter_mut().flatten() { if let Some(data) = &mut value.data { data.py_dec_ref_ids(&mut dummy_stack); } } } } ================================================ FILE: crates/monty/src/heap_data.rs ================================================ use std::{ borrow::Cow, fmt::Write, hash::{DefaultHasher, Hash, Hasher}, mem::discriminant, }; use ahash::AHashSet; use num_integer::Integer; use crate::{ ExcType, ResourceError, ResourceTracker, args::ArgValues, asyncio::{Coroutine, GatherFuture, GatherItem}, bytecode::{CallResult, VM}, defer_drop, exception_private::{RunResult, SimpleException}, heap::{Heap, HeapId, HeapItem}, intern::{FunctionId, Interns}, types::{ Bytes, Dataclass, Dict, DictItemsView, DictKeysView, DictValuesView, FrozenSet, List, LongInt, Module, MontyIter, NamedTuple, Path, PyTrait, Range, ReMatch, RePattern, Set, Slice, Str, Tuple, Type, }, value::{EitherStr, Value}, }; /// HeapData captures every runtime value that must live in the arena. /// /// Each variant wraps a type that implements `PyTrait`, providing /// Python-compatible operations. The trait is manually implemented to dispatch /// to the appropriate variant's implementation. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum HeapData { Str(Str), Bytes(Bytes), List(List), Tuple(Tuple), NamedTuple(NamedTuple), Dict(Dict), DictKeysView(DictKeysView), DictItemsView(DictItemsView), DictValuesView(DictValuesView), Set(Set), FrozenSet(FrozenSet), Closure(Closure), FunctionDefaults(FunctionDefaults), /// A cell wrapping a single mutable value for closure support. /// /// Cells enable nonlocal variable access by providing a heap-allocated /// container that can be shared between a function and its nested functions. /// Both the outer function and inner function hold references to the same /// cell, allowing modifications to propagate across scope boundaries. Cell(CellValue), /// A range object (e.g., `range(10)` or `range(1, 10, 2)`). /// /// Stored on the heap to keep `Value` enum small (16 bytes). Range objects /// are immutable and hashable. Range(Range), /// A slice object (e.g., `slice(1, 10, 2)` or from `x[1:10:2]`). /// /// Stored on the heap to keep `Value` enum small. Slice objects represent /// start:stop:step indices for sequence slicing operations. Slice(Slice), /// An exception instance (e.g., `ValueError('message')`). /// /// Stored on the heap to keep `Value` enum small (16 bytes). Exceptions /// are created when exception types are called or when `raise` is executed. Exception(SimpleException), /// A dataclass instance with fields and method references. /// /// Contains a class name, a Dict of field name -> value mappings, and a set /// of method names that trigger external function calls when invoked. Dataclass(Dataclass), /// An iterator for for-loop iteration and the `iter()` type constructor. /// /// Created by the `GetIter` opcode or `iter()` builtin, advanced by `ForIter`. /// Stores iteration state for lists, tuples, strings, ranges, dicts, and sets. Iter(MontyIter), /// An arbitrary precision integer (LongInt). /// /// Stored on the heap to keep `Value` enum at 16 bytes. Python has one `int` type, /// so LongInt is an implementation detail - we use `Value::Int(i64)` for performance /// when values fit, and promote to LongInt on overflow. When LongInt results fit back /// in i64, they are demoted back to `Value::Int` for performance. LongInt(LongInt), /// A Python module (e.g., `sys`, `typing`). /// /// Modules have a name and a dictionary of attributes. They are created by /// import statements and can have refs to other heap values in their attributes. Module(Module), /// A coroutine object from an async function call. /// /// Contains pre-bound arguments and captured cells, ready to be awaited. /// When awaited, a new frame is pushed using the stored namespace. Coroutine(Coroutine), /// A gather() result tracking multiple coroutines/tasks. /// /// Created by asyncio.gather() and spawns tasks when awaited. GatherFuture(GatherFuture), /// A filesystem path from `pathlib.Path`. /// /// Stored on the heap to provide Python-compatible path operations. /// Pure methods (name, parent, etc.) are handled directly by the VM. /// I/O methods (exists, read_text, etc.) yield external function calls. Path(Path), /// A compiled regex pattern from `re.compile()`. /// /// Contains the original pattern string, flags, and compiled regex engine. /// Leaf type: no heap references, not GC-tracked. RePattern(Box), /// A regex match result from a successful regex operation. /// /// Contains the matched text, capture groups, positions, and input string. /// Leaf type: no heap references, not GC-tracked. ReMatch(ReMatch), /// Reference to an external function whose name was not found in the intern table. /// /// Created when the host resolves a `NameLookup` to a callable whose name does not /// match any interned string (e.g., the host returns a function with a different /// `__name__` than the variable it was assigned to). When called, the VM yields /// `FrameExit::ExternalCall` with an `EitherStr::Heap` containing this name. ExtFunction(String), } impl HeapData { /// Returns whether this heap data type can participate in reference cycles. /// /// Only container types that can hold references to other heap objects need to be /// tracked for GC purposes. Leaf types like Str, Bytes, Range, and Exception cannot /// form cycles and should not count toward the GC allocation threshold. /// /// This optimization allows programs that allocate many leaf objects (like strings) /// to avoid triggering unnecessary GC cycles. #[inline] pub(crate) fn is_gc_tracked(&self) -> bool { matches!( self, Self::List(_) | Self::Tuple(_) | Self::NamedTuple(_) | Self::Dict(_) | Self::DictKeysView(_) | Self::DictItemsView(_) | Self::DictValuesView(_) | Self::Set(_) | Self::FrozenSet(_) | Self::Closure(_) | Self::FunctionDefaults(_) | Self::Cell(_) | Self::Dataclass(_) | Self::Iter(_) | Self::Module(_) | Self::Coroutine(_) | Self::GatherFuture(_) ) } /// Returns whether this heap data currently contains any heap references (`Value::Ref`). /// /// Used during allocation to determine if this data could create reference cycles. /// When true, `mark_potential_cycle()` should be called to enable GC. /// /// Note: This is separate from `is_gc_tracked()` - a container may be GC-tracked /// (capable of holding refs) but not currently contain any refs. #[inline] pub(crate) fn has_refs(&self) -> bool { match self { Self::List(list) => list.contains_refs(), Self::Tuple(tuple) => tuple.contains_refs(), Self::NamedTuple(nt) => nt.contains_refs(), Self::Dict(dict) => dict.has_refs(), Self::DictKeysView(_) | Self::DictItemsView(_) | Self::DictValuesView(_) => true, Self::Set(set) => set.has_refs(), Self::FrozenSet(fset) => fset.has_refs(), // Closures always have refs when they have captured cells (HeapIds) Self::Closure(closure) => { !closure.cells.is_empty() || closure.defaults.iter().any(|v| matches!(v, Value::Ref(_))) } Self::FunctionDefaults(fd) => fd.defaults.iter().any(|v| matches!(v, Value::Ref(_))), Self::Cell(cell) => matches!(&cell.0, Value::Ref(_)), Self::Dataclass(dc) => dc.has_refs(), Self::Iter(iter) => iter.has_refs(), Self::Module(m) => m.has_refs(), // Coroutines have refs from namespace values (params, cell/free vars) Self::Coroutine(coro) => coro.namespace.iter().any(|v| matches!(v, Value::Ref(_))), // GatherFutures have refs from coroutine items and results Self::GatherFuture(gather) => { gather.items.iter().any(|item| matches!(item, GatherItem::Coroutine(_))) || gather .results .iter() .any(|r| r.as_ref().is_some_and(|v| matches!(v, Value::Ref(_)))) } // Leaf types cannot have refs _ => false, } } /// Returns true if this heap data is a coroutine. #[inline] pub fn is_coroutine(&self) -> bool { matches!(self, Self::Coroutine(_)) } /// Re-cast this as `HeapDataMut` for mutation. /// /// This is an important part of the Heap invariants: we never allow `&mut HeapData` /// outside of the heap module to prevent heap data changing type during execution. pub(crate) fn to_mut(&mut self) -> HeapDataMut<'_> { match self { Self::Str(s) => HeapDataMut::Str(s), Self::Bytes(b) => HeapDataMut::Bytes(b), Self::List(l) => HeapDataMut::List(l), Self::Tuple(t) => HeapDataMut::Tuple(t), Self::NamedTuple(nt) => HeapDataMut::NamedTuple(nt), Self::Dict(d) => HeapDataMut::Dict(d), Self::DictKeysView(view) => HeapDataMut::DictKeysView(view), Self::DictItemsView(view) => HeapDataMut::DictItemsView(view), Self::DictValuesView(view) => HeapDataMut::DictValuesView(view), Self::Set(s) => HeapDataMut::Set(s), Self::FrozenSet(fs) => HeapDataMut::FrozenSet(fs), Self::Closure(closure) => HeapDataMut::Closure(closure), Self::FunctionDefaults(fd) => HeapDataMut::FunctionDefaults(fd), Self::Cell(cell) => HeapDataMut::Cell(cell), Self::Range(r) => HeapDataMut::Range(r), Self::Slice(s) => HeapDataMut::Slice(s), Self::Exception(e) => HeapDataMut::Exception(e), Self::Dataclass(dc) => HeapDataMut::Dataclass(dc), Self::Iter(iter) => HeapDataMut::Iter(iter), Self::LongInt(li) => HeapDataMut::LongInt(li), Self::Module(m) => HeapDataMut::Module(m), Self::Coroutine(coro) => HeapDataMut::Coroutine(coro), Self::GatherFuture(gather) => HeapDataMut::GatherFuture(gather), Self::Path(p) => HeapDataMut::Path(p), Self::ReMatch(m) => HeapDataMut::ReMatch(m), Self::RePattern(p) => HeapDataMut::RePattern(p), Self::ExtFunction(s) => HeapDataMut::ExtFunction(s), } } } /// Mutable reference to `HeapData` inner values #[derive(Debug)] pub(crate) enum HeapDataMut<'a> { Str(&'a mut Str), Bytes(&'a mut Bytes), List(&'a mut List), Tuple(&'a mut Tuple), NamedTuple(&'a mut NamedTuple), Dict(&'a mut Dict), DictKeysView(&'a mut DictKeysView), DictItemsView(&'a mut DictItemsView), DictValuesView(&'a mut DictValuesView), Set(&'a mut Set), FrozenSet(&'a mut FrozenSet), Closure(&'a mut Closure), FunctionDefaults(&'a mut FunctionDefaults), /// A cell wrapping a single mutable value for closure support. /// /// Cells enable nonlocal variable access by providing a heap-allocated /// container that can be shared between a function and its nested functions. /// Both the outer function and inner function hold references to the same /// cell, allowing modifications to propagate across scope boundaries. Cell(&'a mut CellValue), /// A range object (e.g., `range(10)` or `range(1, 10, 2)`). /// /// Stored on the heap to keep `Value` enum small (16 bytes). Range objects /// are immutable and hashable. Range(&'a mut Range), /// A slice object (e.g., `slice(1, 10, 2)` or from `x[1:10:2]`). /// /// Stored on the heap to keep `Value` enum small. Slice objects represent /// start:stop:step indices for sequence slicing operations. Slice(&'a mut Slice), /// An exception instance (e.g., `ValueError('message')`). /// /// Stored on the heap to keep `Value` enum small (16 bytes). Exceptions /// are created when exception types are called or when `raise` is executed. Exception(&'a mut SimpleException), /// A dataclass instance with fields and method references. /// /// Contains a class name, a Dict of field name -> value mappings, and a set /// of method names that trigger external function calls when invoked. Dataclass(&'a mut Dataclass), /// An iterator for for-loop iteration and the `iter()` type constructor. /// /// Created by the `GetIter` opcode or `iter()` builtin, advanced by `ForIter`. /// Stores iteration state for lists, tuples, strings, ranges, dicts, and sets. Iter(&'a mut MontyIter), /// An arbitrary precision integer (LongInt). /// /// Stored on the heap to keep `Value` enum at 16 bytes. Python has one `int` type, /// so LongInt is an implementation detail - we use `Value::Int(i64)` for performance /// when values fit, and promote to LongInt on overflow. When LongInt results fit back /// in i64, they are demoted back to `Value::Int` for performance. LongInt(&'a mut LongInt), /// A Python module (e.g., `sys`, `typing`). /// /// Modules have a name and a dictionary of attributes. They are created by /// import statements and can have refs to other heap values in their attributes. Module(&'a mut Module), /// A coroutine object from an async function call. /// /// Contains pre-bound arguments and captured cells, ready to be awaited. /// When awaited, a new frame is pushed using the stored namespace. Coroutine(&'a mut Coroutine), /// A gather() result tracking multiple coroutines/tasks. /// /// Created by asyncio.gather() and spawns tasks when awaited. GatherFuture(&'a mut GatherFuture), /// A filesystem path from `pathlib.Path`. /// /// Stored on the heap to provide Python-compatible path operations. /// Pure methods (name, parent, etc.) are handled directly by the VM. /// I/O methods (exists, read_text, etc.) yield external function calls. Path(&'a mut Path), /// A regex match result from `re.match()`, `re.search()`, etc. /// /// Stores matched text, capture groups, and positions. All data is owned /// (no heap references), so reference counting is trivial. ReMatch(&'a mut ReMatch), /// A compiled regex pattern from `re.compile()`. /// /// Wraps a compiled regex with the original pattern string and flags. /// Custom serde serializes only the pattern and flags, recompiling on deserialize. RePattern(&'a mut RePattern), /// Reference to an external function where the name was not interned. /// /// Created when the host resolves a name lookup to a callable whose name /// does not match any interned string (e.g., the host returns a function /// with a different `__name__` than the variable it was assigned to). ExtFunction(&'a mut String), } /// Thin wrapper around `Value` which is used in the `Cell` variant above. /// /// The inner value is the cell's mutable payload. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(transparent)] #[repr(transparent)] pub(crate) struct CellValue(pub(crate) Value); impl std::ops::Deref for CellValue { type Target = Value; fn deref(&self) -> &Self::Target { &self.0 } } /// A closure: a function that captures variables from enclosing scopes. /// /// Contains a reference to the function definition, a vector of captured cell HeapIds, /// and evaluated default values (if any). When the closure is called, these cells are /// passed to the RunFrame for variable access. When the closure is dropped, we must /// decrement the ref count on each captured cell and each default value. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Closure { /// The function definition being captured. pub func_id: FunctionId, /// Captured cells from enclosing scopes. pub cells: Vec, /// Evaluated default parameter values (if any). pub defaults: Vec, } /// A function with evaluated default parameter values (non-closure). /// /// Contains a reference to the function definition and the evaluated default values. /// When the function is called, defaults are cloned for missing optional parameters. /// When dropped, we must decrement the ref count on each default value. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct FunctionDefaults { /// The function definition being captured. pub func_id: FunctionId, /// Evaluated default parameter values (if any). pub defaults: Vec, } impl HeapItem for CellValue { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.0.py_dec_ref_ids(stack); } } impl HeapItem for Closure { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.cells.len() * std::mem::size_of::() + self.defaults.len() * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Decrement ref count for captured cells stack.extend(self.cells.iter().copied()); // Decrement ref count for default values that are heap references for default in &mut self.defaults { default.py_dec_ref_ids(stack); } } } impl HeapItem for FunctionDefaults { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.defaults.len() * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Decrement ref count for default values that are heap references for default in &mut self.defaults { default.py_dec_ref_ids(stack); } } } impl HeapItem for SimpleException { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.arg().map_or(0, String::len) } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // Exceptions don't contain heap references } } impl HeapItem for LongInt { fn py_estimate_size(&self) -> usize { self.estimate_size() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // LongInt doesn't contain heap references } } impl HeapItem for Coroutine { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.namespace.len() * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Decrement ref count for namespace values that are heap references for value in &mut self.namespace { value.py_dec_ref_ids(stack); } } } impl HeapItem for GatherFuture { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.items.len() * std::mem::size_of::() + self.results.len() * std::mem::size_of::>() + self.pending_calls.len() * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Decrement ref count for coroutine HeapIds for item in &self.items { if let GatherItem::Coroutine(id) = item { stack.push(*id); } } // Decrement ref count for result values that are heap references for result in self.results.iter_mut().flatten() { result.py_dec_ref_ids(stack); } } } impl HeapDataMut<'_> { /// Computes hash for immutable heap types that can be used as dict keys. /// /// Returns `Ok(Some(hash))` for immutable types (Str, Bytes, Tuple of hashables). /// Returns `Ok(None)` for mutable types (List, Dict) which cannot be dict keys. /// Returns `Err(ResourceError::Recursion)` if the recursion limit is exceeded /// while hashing deeply nested containers (e.g., tuples of tuples). /// /// This is called lazily when the value is first used as a dict key, /// avoiding unnecessary hash computation for values that are never used as keys. pub fn compute_hash_if_immutable( &self, heap: &mut Heap, interns: &Interns, ) -> Result, ResourceError> { match self { // Hash just the actual string or bytes content for consistency with Value::InternString/InternBytes // hence we don't include the discriminant Self::Str(s) => { let mut hasher = DefaultHasher::new(); s.as_str().hash(&mut hasher); Ok(Some(hasher.finish())) } Self::Bytes(b) => { let mut hasher = DefaultHasher::new(); b.as_slice().hash(&mut hasher); Ok(Some(hasher.finish())) } Self::FrozenSet(fs) => { // FrozenSet hash is XOR of element hashes (order-independent) // Recursion depth is checked inside compute_hash fs.compute_hash(heap, interns) } Self::Tuple(t) => { let token = heap.incr_recursion_depth()?; crate::defer_drop!(token, heap); let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); // Tuple is hashable only if all elements are hashable for obj in t.as_slice() { match obj.py_hash(heap, interns)? { Some(h) => h.hash(&mut hasher), None => return Ok(None), } } Ok(Some(hasher.finish())) } Self::NamedTuple(nt) => { let token = heap.incr_recursion_depth()?; crate::defer_drop!(token, heap); let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); // Hash only by elements (not type_name) to match equality semantics for obj in nt.as_vec() { match obj.py_hash(heap, interns)? { Some(h) => h.hash(&mut hasher), None => return Ok(None), } } Ok(Some(hasher.finish())) } Self::Closure(closure) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); // TODO, this is NOT proper hashing, we should somehow hash the function properly closure.func_id.hash(&mut hasher); Ok(Some(hasher.finish())) } Self::FunctionDefaults(fd) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); // TODO, this is NOT proper hashing, we should somehow hash the function properly fd.func_id.hash(&mut hasher); Ok(Some(hasher.finish())) } Self::Range(range) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); range.start.hash(&mut hasher); range.stop.hash(&mut hasher); range.step.hash(&mut hasher); Ok(Some(hasher.finish())) } // Dataclass hashability depends on the mutable flag // Recursion depth is checked inside compute_hash Self::Dataclass(dc) => dc.compute_hash(heap, interns), // Slices are immutable and hashable (like in CPython) Self::Slice(slice) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); slice.start.hash(&mut hasher); slice.stop.hash(&mut hasher); slice.step.hash(&mut hasher); Ok(Some(hasher.finish())) } // Path is immutable and hashable Self::Path(path) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); path.as_str().hash(&mut hasher); Ok(Some(hasher.finish())) } // LongInt is immutable and hashable Self::LongInt(li) => Ok(Some(li.hash())), // ExtFunction is hashable by name Self::ExtFunction(name) => { let mut hasher = DefaultHasher::new(); discriminant(self).hash(&mut hasher); name.hash(&mut hasher); Ok(Some(hasher.finish())) } // other types cannot be hashed (Cell is handled specially in get_or_compute_hash) _ => Ok(None), } } } /// Shared dispatch macro for `PyTrait` methods on `HeapData` and `HeapDataMut`. /// /// Both enums have identical variants (owned vs borrowed) and identical dispatch /// logic. This macro eliminates the duplication by generating the match arms for /// each method. The caller provides `self` and the method body for each variant. macro_rules! impl_py_trait_dispatch { ($self_ty:ty) => { impl PyTrait for $self_ty { fn py_type(&self, heap: &Heap) -> Type { match self { Self::Str(s) => s.py_type(heap), Self::Bytes(b) => b.py_type(heap), Self::List(l) => l.py_type(heap), Self::Tuple(t) => t.py_type(heap), Self::NamedTuple(nt) => nt.py_type(heap), Self::Dict(d) => d.py_type(heap), Self::DictKeysView(view) => view.py_type(heap), Self::DictItemsView(view) => view.py_type(heap), Self::DictValuesView(view) => view.py_type(heap), Self::Set(s) => s.py_type(heap), Self::FrozenSet(fs) => fs.py_type(heap), Self::Closure(_) | Self::FunctionDefaults(_) | Self::ExtFunction(_) => Type::Function, Self::Cell(_) => Type::Cell, Self::Range(_) => Type::Range, Self::Slice(_) => Type::Slice, Self::Exception(e) => e.py_type(), Self::Dataclass(dc) => dc.py_type(heap), Self::Iter(_) => Type::Iterator, // LongInt is still `int` in Python - it's an implementation detail Self::LongInt(_) => Type::Int, Self::Module(_) => Type::Module, Self::Coroutine(_) | Self::GatherFuture(_) => Type::Coroutine, Self::Path(p) => p.py_type(heap), Self::ReMatch(m) => m.py_type(heap), Self::RePattern(p) => p.py_type(heap), } } fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option { match self { Self::Str(s) => s.py_len(vm), Self::Bytes(b) => b.py_len(vm), Self::List(l) => l.py_len(vm), Self::Tuple(t) => t.py_len(vm), Self::NamedTuple(nt) => nt.py_len(vm), Self::Dict(d) => d.py_len(vm), Self::DictKeysView(view) => view.py_len(vm), Self::DictItemsView(view) => view.py_len(vm), Self::DictValuesView(view) => view.py_len(vm), Self::Set(s) => s.py_len(vm), Self::FrozenSet(fs) => fs.py_len(vm), Self::Range(r) => Some(r.len()), // other types don't have length _ => None, } } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_eq(b, vm), (Self::Bytes(a), Self::Bytes(b)) => a.py_eq(b, vm), (Self::List(a), Self::List(b)) => a.py_eq(b, vm), (Self::Tuple(a), Self::Tuple(b)) => a.py_eq(b, vm), (Self::NamedTuple(a), Self::NamedTuple(b)) => a.py_eq(b, vm), // NamedTuple can compare with Tuple by elements (matching CPython behavior) (Self::NamedTuple(nt), Self::Tuple(t)) | (Self::Tuple(t), Self::NamedTuple(nt)) => { let nt_items = nt.as_vec(); let t_items = t.as_slice(); if nt_items.len() != t_items.len() { return Ok(false); } // Helper function pattern: acquire token, call helper, drop token. // Cannot use defer_drop! here because the helper needs &mut VM. let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (a, b) in nt_items.iter().zip(t_items.iter()) { if !a.py_eq(b, vm)? { return Ok(false); } } Ok(true) } (Self::Dict(a), Self::Dict(b)) => a.py_eq(b, vm), (Self::DictKeysView(a), Self::DictKeysView(b)) => a.py_eq(b, vm), (Self::DictItemsView(a), Self::DictItemsView(b)) => a.py_eq(b, vm), (Self::DictValuesView(_), Self::DictValuesView(_)) => Ok(false), (Self::DictKeysView(a), Self::Set(b)) | (Self::Set(b), Self::DictKeysView(a)) => a.eq_set(b, vm), (Self::DictKeysView(a), Self::FrozenSet(b)) | (Self::FrozenSet(b), Self::DictKeysView(a)) => { a.eq_frozenset(b, vm) } (Self::DictItemsView(a), Self::Set(b)) | (Self::Set(b), Self::DictItemsView(a)) => a.eq_set(b, vm), (Self::DictItemsView(a), Self::FrozenSet(b)) | (Self::FrozenSet(b), Self::DictItemsView(a)) => { a.eq_frozenset(b, vm) } (Self::Set(a), Self::Set(b)) => a.py_eq(b, vm), (Self::FrozenSet(a), Self::FrozenSet(b)) => a.py_eq(b, vm), (Self::Closure(a), Self::Closure(b)) => Ok(a.func_id == b.func_id && a.cells == b.cells), (Self::FunctionDefaults(a), Self::FunctionDefaults(b)) => Ok(a.func_id == b.func_id), (Self::Range(a), Self::Range(b)) => a.py_eq(b, vm), (Self::Dataclass(a), Self::Dataclass(b)) => a.py_eq(b, vm), // LongInt equality (Self::LongInt(a), Self::LongInt(b)) => Ok(a == b), // Slice equality (Self::Slice(a), Self::Slice(b)) => a.py_eq(b, vm), // Path equality (Self::Path(a), Self::Path(b)) => a.py_eq(b, vm), // ReMatch objects are not comparable (Self::ReMatch(a), Self::ReMatch(b)) => a.py_eq(b, vm), // RePattern equality by pattern string and flags (Self::RePattern(a), Self::RePattern(b)) => a.py_eq(b, vm), // Cells, Exceptions, Iterators, Modules, and async types compare by identity only // (handled at Value level via HeapId comparison) (Self::Cell(_), Self::Cell(_)) | (Self::Exception(_), Self::Exception(_)) | (Self::Iter(_), Self::Iter(_)) | (Self::Module(_), Self::Module(_)) | (Self::Coroutine(_), Self::Coroutine(_)) | (Self::GatherFuture(_), Self::GatherFuture(_)) => Ok(false), _ => Ok(false), // Different types are never equal } } fn py_cmp( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_cmp(b, vm), (Self::Bytes(a), Self::Bytes(b)) => a.py_cmp(b, vm), (Self::Tuple(a), Self::Tuple(b)) => a.py_cmp(b, vm), _ => Ok(None), } } fn py_bool(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> bool { match self { Self::Str(s) => s.py_bool(vm), Self::Bytes(b) => b.py_bool(vm), Self::List(l) => l.py_bool(vm), Self::Tuple(t) => t.py_bool(vm), Self::NamedTuple(nt) => nt.py_bool(vm), Self::Dict(d) => d.py_bool(vm), Self::DictKeysView(view) => view.py_bool(vm), Self::DictItemsView(view) => view.py_bool(vm), Self::DictValuesView(view) => view.py_bool(vm), Self::Set(s) => s.py_bool(vm), Self::FrozenSet(fs) => fs.py_bool(vm), Self::Closure(_) | Self::FunctionDefaults(_) | Self::ExtFunction(_) => true, Self::Cell(_) => true, // Cells are always truthy Self::Range(r) => r.py_bool(vm), Self::Slice(s) => s.py_bool(vm), Self::Exception(_) => true, // Exceptions are always truthy Self::Dataclass(dc) => dc.py_bool(vm), Self::Iter(_) => true, // Iterators are always truthy Self::LongInt(li) => !li.is_zero(), Self::Module(_) => true, // Modules are always truthy Self::Coroutine(_) => true, // Coroutines are always truthy Self::GatherFuture(_) => true, // GatherFutures are always truthy Self::Path(p) => p.py_bool(vm), Self::ReMatch(m) => m.py_bool(vm), Self::RePattern(p) => p.py_bool(vm), } } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { match self { Self::Str(s) => s.py_repr_fmt(f, vm, heap_ids), Self::Bytes(b) => b.py_repr_fmt(f, vm, heap_ids), Self::List(l) => l.py_repr_fmt(f, vm, heap_ids), Self::Tuple(t) => t.py_repr_fmt(f, vm, heap_ids), Self::NamedTuple(nt) => nt.py_repr_fmt(f, vm, heap_ids), Self::Dict(d) => d.py_repr_fmt(f, vm, heap_ids), Self::DictKeysView(view) => view.py_repr_fmt(f, vm, heap_ids), Self::DictItemsView(view) => view.py_repr_fmt(f, vm, heap_ids), Self::DictValuesView(view) => view.py_repr_fmt(f, vm, heap_ids), Self::Set(s) => s.py_repr_fmt(f, vm, heap_ids), Self::FrozenSet(fs) => fs.py_repr_fmt(f, vm, heap_ids), Self::Closure(closure) => vm .interns .get_function(closure.func_id) .py_repr_fmt(f, vm.interns, 0), Self::FunctionDefaults(fd) => vm.interns.get_function(fd.func_id).py_repr_fmt(f, vm.interns, 0), // Cell repr shows the contained value's type Self::Cell(cell) => write!(f, "", cell.0.py_type(vm.heap)), Self::Range(r) => r.py_repr_fmt(f, vm, heap_ids), Self::Slice(s) => s.py_repr_fmt(f, vm, heap_ids), Self::Exception(e) => e.py_repr_fmt(f), Self::Dataclass(dc) => dc.py_repr_fmt(f, vm, heap_ids), Self::Iter(_) => write!(f, ""), Self::LongInt(li) => write!(f, "{li}"), Self::Module(m) => write!(f, "", vm.interns.get_str(m.name())), Self::Coroutine(coro) => { let func = vm.interns.get_function(coro.func_id); let name = vm.interns.get_str(func.name.name_id); write!(f, "") } Self::GatherFuture(gather) => write!(f, "", gather.item_count()), Self::Path(p) => p.py_repr_fmt(f, vm, heap_ids), Self::ReMatch(m) => m.py_repr_fmt(f, vm, heap_ids), Self::RePattern(p) => p.py_repr_fmt(f, vm, heap_ids), Self::ExtFunction(name) => write!(f, ""), } } fn py_str(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Cow<'static, str> { match self { // Strings return their value directly without quotes Self::Str(s) => s.py_str(vm), // LongInt returns its string representation Self::LongInt(li) => Cow::Owned(li.to_string()), // Exceptions return just the message (or empty string if no message) Self::Exception(e) => Cow::Owned(e.py_str()), // Paths return the path string without the PosixPath() wrapper Self::Path(p) => Cow::Owned(p.as_str().to_owned()), // All other types use repr _ => self.py_repr(vm), } } fn py_add( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_add(b, vm), (Self::Bytes(a), Self::Bytes(b)) => a.py_add(b, vm), (Self::List(a), Self::List(b)) => a.py_add(b, vm), (Self::Tuple(a), Self::Tuple(b)) => a.py_add(b, vm), (Self::Dict(a), Self::Dict(b)) => a.py_add(b, vm), (Self::LongInt(a), Self::LongInt(b)) => { let bi = a.inner() + b.inner(); Ok(LongInt::new(bi).into_value(vm.heap).map(Some)?) } // Cells and Dataclasses don't support arithmetic operations _ => Ok(None), } } fn py_sub( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_sub(b, vm), (Self::Bytes(a), Self::Bytes(b)) => a.py_sub(b, vm), (Self::List(a), Self::List(b)) => a.py_sub(b, vm), (Self::Tuple(a), Self::Tuple(b)) => a.py_sub(b, vm), (Self::Dict(a), Self::Dict(b)) => a.py_sub(b, vm), (Self::Set(a), Self::Set(b)) => a.py_sub(b, vm), (Self::FrozenSet(a), Self::FrozenSet(b)) => a.py_sub(b, vm), (Self::LongInt(a), Self::LongInt(b)) => { let bi = a.inner() - b.inner(); Ok(LongInt::new(bi).into_value(vm.heap).map(Some)?) } // Cells don't support arithmetic operations _ => Ok(None), } } fn py_mod(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_mod(b, vm), (Self::Bytes(a), Self::Bytes(b)) => a.py_mod(b, vm), (Self::List(a), Self::List(b)) => a.py_mod(b, vm), (Self::Tuple(a), Self::Tuple(b)) => a.py_mod(b, vm), (Self::Dict(a), Self::Dict(b)) => a.py_mod(b, vm), (Self::LongInt(a), Self::LongInt(b)) => { if b.is_zero() { Err(ExcType::zero_division().into()) } else { let bi = a.inner().mod_floor(b.inner()); Ok(LongInt::new(bi).into_value(vm.heap).map(Some)?) } } // Cells don't support arithmetic operations _ => Ok(None), } } fn py_mod_eq(&self, other: &Self, right_value: i64) -> Option { match (self, other) { (Self::Str(a), Self::Str(b)) => a.py_mod_eq(b, right_value), (Self::Bytes(a), Self::Bytes(b)) => a.py_mod_eq(b, right_value), (Self::List(a), Self::List(b)) => a.py_mod_eq(b, right_value), (Self::Tuple(a), Self::Tuple(b)) => a.py_mod_eq(b, right_value), (Self::Dict(a), Self::Dict(b)) => a.py_mod_eq(b, right_value), // Cells don't support arithmetic operations _ => None, } } fn py_iadd( &mut self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, self_id: Option, ) -> Result { match self { Self::List(list) => list.py_iadd(other, vm, self_id), Self::Dict(dict) => dict.py_iadd(other, vm, self_id), _ => Ok(false), } } fn py_call_attr( &mut self, self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { match self { Self::Str(s) => s.py_call_attr(self_id, vm, attr, args), Self::Bytes(b) => b.py_call_attr(self_id, vm, attr, args), Self::List(l) => l.py_call_attr(self_id, vm, attr, args), Self::Tuple(t) => t.py_call_attr(self_id, vm, attr, args), Self::Dict(d) => d.py_call_attr(self_id, vm, attr, args), Self::DictKeysView(view) => view.py_call_attr(self_id, vm, attr, args), Self::DictItemsView(view) => view.py_call_attr(self_id, vm, attr, args), Self::DictValuesView(view) => view.py_call_attr(self_id, vm, attr, args), Self::Set(s) => s.py_call_attr(self_id, vm, attr, args), Self::FrozenSet(fs) => fs.py_call_attr(self_id, vm, attr, args), Self::Dataclass(dc) => dc.py_call_attr(self_id, vm, attr, args), Self::Path(p) => p.py_call_attr(self_id, vm, attr, args), Self::Module(m) => m.py_call_attr(self_id, vm, attr, args), Self::ReMatch(m) => m.py_call_attr(self_id, vm, attr, args), Self::RePattern(p) => p.py_call_attr(self_id, vm, attr, args), _ => Err(ExcType::attribute_error( self.py_type(vm.heap), attr.as_str(vm.interns), )), } } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match self { Self::Str(s) => s.py_getitem(key, vm), Self::Bytes(b) => b.py_getitem(key, vm), Self::List(l) => l.py_getitem(key, vm), Self::Tuple(t) => t.py_getitem(key, vm), Self::NamedTuple(nt) => nt.py_getitem(key, vm), Self::Dict(d) => d.py_getitem(key, vm), Self::Range(r) => r.py_getitem(key, vm), Self::ReMatch(m) => m.py_getitem(key, vm), _ => Err(ExcType::type_error_not_sub(self.py_type(vm.heap))), } } fn py_setitem( &mut self, key: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<()> { match self { Self::Str(s) => s.py_setitem(key, value, vm), Self::Bytes(b) => b.py_setitem(key, value, vm), Self::List(l) => l.py_setitem(key, value, vm), Self::Tuple(t) => t.py_setitem(key, value, vm), Self::Dict(d) => d.py_setitem(key, value, vm), _ => Err(ExcType::type_error_not_sub_assignment(self.py_type(vm.heap))), } } fn py_getattr( &self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { match self { Self::Dataclass(dc) => dc.py_getattr(attr, vm), Self::Module(m) => Ok(m.py_getattr(attr, vm.heap, vm.interns)), Self::NamedTuple(nt) => nt.py_getattr(attr, vm), Self::Slice(s) => s.py_getattr(attr, vm), Self::Exception(exc) => exc.py_getattr(attr, vm.heap, vm.interns), Self::Path(p) => p.py_getattr(attr, vm), Self::ReMatch(m) => m.py_getattr(attr, vm), Self::RePattern(p) => p.py_getattr(attr, vm), // All other types don't support attribute access via py_getattr _ => Ok(None), } } } }; } impl_py_trait_dispatch!(HeapDataMut<'_>); impl_py_trait_dispatch!(HeapData); /// Shared dispatch macro for `HeapItem` methods on `HeapData` and `HeapDataMut`. /// /// Dispatches `py_estimate_size` and `py_dec_ref_ids` to the inner type's /// `HeapItem` implementation. For types without a dedicated `HeapItem` impl /// (like `ExtFunction` wrapping `String`), the logic is inlined here. macro_rules! impl_heap_item_dispatch { ($self_ty:ty) => { impl HeapItem for $self_ty { fn py_estimate_size(&self) -> usize { match self { Self::Str(s) => s.py_estimate_size(), Self::Bytes(b) => b.py_estimate_size(), Self::List(l) => l.py_estimate_size(), Self::Tuple(t) => t.py_estimate_size(), Self::NamedTuple(nt) => nt.py_estimate_size(), Self::Dict(d) => d.py_estimate_size(), Self::DictKeysView(view) => view.py_estimate_size(), Self::DictItemsView(view) => view.py_estimate_size(), Self::DictValuesView(view) => view.py_estimate_size(), Self::Set(s) => s.py_estimate_size(), Self::FrozenSet(fs) => fs.py_estimate_size(), Self::Closure(closure) => closure.py_estimate_size(), Self::FunctionDefaults(fd) => fd.py_estimate_size(), Self::Cell(cell) => cell.py_estimate_size(), Self::Range(r) => r.py_estimate_size(), Self::Slice(s) => s.py_estimate_size(), Self::Exception(e) => e.py_estimate_size(), Self::Dataclass(dc) => dc.py_estimate_size(), Self::Iter(iter) => iter.py_estimate_size(), Self::LongInt(li) => li.py_estimate_size(), Self::Module(m) => m.py_estimate_size(), Self::Coroutine(coro) => coro.py_estimate_size(), Self::GatherFuture(gather) => gather.py_estimate_size(), Self::Path(p) => p.py_estimate_size(), Self::ReMatch(m) => m.py_estimate_size(), Self::RePattern(p) => p.py_estimate_size(), Self::ExtFunction(s) => std::mem::size_of::() + s.len(), } } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { match self { Self::Str(s) => s.py_dec_ref_ids(stack), Self::Bytes(b) => b.py_dec_ref_ids(stack), Self::List(l) => l.py_dec_ref_ids(stack), Self::Tuple(t) => t.py_dec_ref_ids(stack), Self::NamedTuple(nt) => nt.py_dec_ref_ids(stack), Self::Dict(d) => d.py_dec_ref_ids(stack), Self::DictKeysView(view) => view.py_dec_ref_ids(stack), Self::DictItemsView(view) => view.py_dec_ref_ids(stack), Self::DictValuesView(view) => view.py_dec_ref_ids(stack), Self::Set(s) => s.py_dec_ref_ids(stack), Self::FrozenSet(fs) => fs.py_dec_ref_ids(stack), Self::Closure(closure) => closure.py_dec_ref_ids(stack), Self::FunctionDefaults(fd) => fd.py_dec_ref_ids(stack), Self::Cell(cell) => cell.py_dec_ref_ids(stack), Self::Dataclass(dc) => dc.py_dec_ref_ids(stack), Self::Iter(iter) => iter.py_dec_ref_ids(stack), Self::Module(m) => m.py_dec_ref_ids(stack), Self::Coroutine(coro) => coro.py_dec_ref_ids(stack), Self::GatherFuture(gather) => gather.py_dec_ref_ids(stack), // Types with no nested heap references _ => {} } } } }; } impl_heap_item_dispatch!(HeapDataMut<'_>); impl_heap_item_dispatch!(HeapData); ================================================ FILE: crates/monty/src/heap_traits.rs ================================================ use std::{mem::ManuallyDrop, ptr::addr_of}; use crate::{ ResourceTracker, heap::{Heap, HeapId, RecursionToken}, value::Value, }; /// Heap lifecycle operations for memory tracking and reference cleanup. /// /// This trait captures the two responsibilities shared by all heap-stored types: /// /// 1. **Memory estimation** (`py_estimate_size`): reporting approximate byte footprint /// for resource tracking and memory limit enforcement. /// /// 2. **Reference collection** (`py_dec_ref_ids`): collecting contained `HeapId`s during /// reference count decrement so child objects can be freed iteratively. /// /// Unlike `PyTrait`, which provides Python-level operations (equality, repr, arithmetic), /// `HeapItem` is purely about heap lifecycle management. This separation allows types like /// `Closure` and `FunctionDefaults` to participate in heap bookkeeping without needing /// the full `PyTrait` interface. /// /// Every `HeapData` variant must implement this trait (either directly on the inner type, /// or inline in the dispatch for types we don't own like `String`). pub(crate) trait HeapItem { /// Estimates the memory size in bytes of this value. /// /// Used by resource tracking to enforce memory limits. Returns the approximate /// heap footprint including struct overhead and variable-length data (e.g., string /// contents, list elements). /// /// Note: For containers holding `Value::Ref` entries, this counts the size of /// the reference slots, not the referenced objects. Nested objects are sized /// separately when they are allocated. fn py_estimate_size(&self) -> usize; /// Pushes any contained `HeapId`s onto the stack for reference counting. /// /// This is called during `dec_ref` to find nested heap references that /// need their refcounts decremented when this value is freed. /// /// When the `ref-count-panic` feature is enabled, this method also marks all /// contained `Value`s as `Dereferenced` to prevent Drop panics. This /// co-locates the cleanup logic with the reference collection logic. fn py_dec_ref_ids(&mut self, stack: &mut Vec); } /// This trait represents types that contain a `Heap`; it allows for more complex structures /// to participate in the `HeapGuard` pattern. pub(crate) trait ContainsHeap { type ResourceTracker: ResourceTracker; fn heap(&self) -> &Heap; fn heap_mut(&mut self) -> &mut Heap; } impl ContainsHeap for Heap { type ResourceTracker = T; fn heap(&self) -> &Self { self } #[inline] fn heap_mut(&mut self) -> &mut Self { self } } /// Trait for types that require heap access for proper cleanup. /// /// Rust's standard `Drop` trait cannot decrement heap reference counts because it has no /// access to the `Heap`. This trait provides an explicit drop-with-heap method so that /// ref-counted values (and containers of them) can properly decrement their counts when /// they are no longer needed. /// /// **All types implementing this trait must be cleaned up on every code path** — not just /// the happy path, but also early returns, conditional branches, `continue`, etc. A missed /// call on any branch leaks reference counts. Prefer [`defer_drop!`] or [`HeapGuard`] to /// guarantee cleanup automatically rather than inserting manual calls in every branch. /// /// Implemented for `Value`, `Option`, `Vec`, `ArgValues`, iterators, and other /// types that hold heap references. pub(crate) trait DropWithHeap: Sized { /// Consume `self` and decrement reference counts for any heap-allocated values contained within. fn drop_with_heap(self, heap: &mut H); } impl DropWithHeap for Value { #[inline] fn drop_with_heap(self, heap: &mut H) { Self::drop_with_heap(self, heap); } } impl DropWithHeap for Option { #[inline] fn drop_with_heap(self, heap: &mut H) { if let Some(value) = self { value.drop_with_heap(heap); } } } impl DropWithHeap for Vec { fn drop_with_heap(self, heap: &mut H) { for value in self { value.drop_with_heap(heap); } } } impl DropWithHeap for std::vec::IntoIter { fn drop_with_heap(self, heap: &mut H) { for value in self { value.drop_with_heap(heap); } } } impl DropWithHeap for std::vec::Drain<'_, U> { fn drop_with_heap(self, heap: &mut H) { for value in self { value.drop_with_heap(heap); } } } impl DropWithHeap for [Value; N] { fn drop_with_heap(self, heap: &mut H) { for value in self { value.drop_with_heap(heap); } } } impl DropWithHeap for (U, V) { fn drop_with_heap(self, heap: &mut H) { let (left, right) = self; left.drop_with_heap(heap); right.drop_with_heap(heap); } } /// Trait for types that require only an immutable heap reference for cleanup. /// /// Unlike [`DropWithHeap`], which requires `&mut Heap`, this trait works with `&Heap`. /// This is needed for cleanup in contexts that only have shared access to the heap, /// such as `py_repr_fmt` and `py_str` formatting methods. /// /// Currently implemented for [`RecursionToken`], which decrements the recursion depth /// counter via interior mutability (`Cell`). pub(crate) trait DropWithImmutableHeap { /// Consume `self` and perform cleanup using an immutable heap reference. fn drop_with_immutable_heap(self, heap: &Heap); } impl DropWithImmutableHeap for RecursionToken { #[inline] fn drop_with_immutable_heap(self, heap: &Heap) { heap.decr_recursion_depth(); } } /// RAII guard that ensures a [`DropWithImmutableHeap`] value is cleaned up on every code path. /// /// Like [`HeapGuard`], but holds an immutable `&Heap` instead of requiring `&mut` access /// via [`ContainsHeap`]. This is useful in contexts that only have shared access to the heap, /// such as `py_repr_fmt` formatting methods. /// /// On the normal path, the guarded value can be borrowed via [`as_parts`](Self::as_parts). /// The guard's `Drop` impl calls [`DropWithImmutableHeap::drop_with_immutable_heap`] /// automatically, so cleanup happens on all exit paths. pub(crate) struct ImmutableHeapGuard<'a, H: ContainsHeap, V: DropWithImmutableHeap> { value: ManuallyDrop, heap: &'a H, } impl<'a, H: ContainsHeap, V: DropWithImmutableHeap> ImmutableHeapGuard<'a, H, V> { /// Creates a new `ImmutableHeapGuard` for the given value and immutable heap reference. #[inline] pub fn new(value: V, heap: &'a H) -> Self { Self { value: ManuallyDrop::new(value), heap, } } /// Borrows the value (immutably) and heap (immutably) out of the guard. /// /// This is what [`defer_drop_immutable_heap!`] calls internally. The returned /// references are tied to the guard's lifetime, so the value cannot escape. #[inline] pub fn as_parts(&self) -> (&V, &'a H) { (&self.value, self.heap) } } impl Drop for ImmutableHeapGuard<'_, H, V> { fn drop(&mut self) { // SAFETY: [DH] - value is never manually dropped until this point unsafe { ManuallyDrop::take(&mut self.value) }.drop_with_immutable_heap(self.heap.heap()); } } /// RAII guard that ensures a [`DropWithHeap`] value is cleaned up on every code path. /// /// The guard's `Drop` impl calls [`DropWithHeap::drop_with_heap`] automatically, so /// cleanup happens whether the scope exits normally, via `?`, `continue`, early return, /// or any other branch. This eliminates the need to manually insert `drop_with_heap` /// calls in every branch. /// /// On the normal path, the guarded value can be borrowed via [`as_parts`](Self::as_parts) / /// [`as_parts_mut`](Self::as_parts_mut), or reclaimed via [`into_inner`](Self::into_inner) / /// [`into_parts`](Self::into_parts) (which consume the guard without dropping the value). /// /// Prefer the [`defer_drop!`] macro for the common case where you just need to ensure a /// value is dropped at scope exit. Use `HeapGuard` directly when you need to conditionally /// reclaim the value (e.g. push it back onto the stack on success) or need mutable access /// to both the value and heap through [`as_parts_mut`](Self::as_parts_mut). pub(crate) struct HeapGuard<'a, H: ContainsHeap, V: DropWithHeap> { // manually dropped because it needs to be dropped by move. value: ManuallyDrop, heap: &'a mut H, } impl<'a, H: ContainsHeap, V: DropWithHeap> HeapGuard<'a, H, V> { /// Creates a new `HeapGuard` for the given value and heap. #[inline] pub fn new(value: V, heap: &'a mut H) -> Self { Self { value: ManuallyDrop::new(value), heap, } } /// Consumes the guard and returns the contained value without dropping it. /// /// Use this when the value should survive beyond the guard's scope (e.g. returning /// a computed result from a function that used the guard for error-path safety). #[inline] pub fn into_inner(self) -> V { let mut this = ManuallyDrop::new(self); // SAFETY: [DH] - `ManuallyDrop::new(self)` prevents `Drop` on self, so we can take the value out unsafe { ManuallyDrop::take(&mut this.value) } } /// Borrows the value (immutably) and heap (mutably) out of the guard. /// /// This is what [`defer_drop!`] calls internally. The returned references are tied /// to the guard's lifetime, so the value cannot escape. #[inline] pub fn as_parts(&mut self) -> (&V, &mut H) { (&self.value, self.heap) } /// Borrows the value (mutably) and heap (mutably) out of the guard. /// /// This is what [`defer_drop_mut!`] calls internally. Use this when the value needs /// to be mutated in place (e.g. advancing an iterator, swapping during min/max). #[inline] pub fn as_parts_mut(&mut self) -> (&mut V, &mut H) { (&mut self.value, self.heap) } /// Consumes the guard and returns the value and heap separately, without dropping. /// /// Use this when you need to reclaim both the value *and* the heap reference — for /// example, to push the value back onto the VM stack via the heap owner. #[inline] pub fn into_parts(self) -> (V, &'a mut H) { let mut this = ManuallyDrop::new(self); // SAFETY: [DH] - `ManuallyDrop` prevents `Drop` on self, so we can recover the parts unsafe { (ManuallyDrop::take(&mut this.value), addr_of!(this.heap).read()) } } /// Borrows just the heap out of the guard #[inline] pub fn heap(&mut self) -> &mut H { self.heap } } impl Drop for HeapGuard<'_, H, V> { fn drop(&mut self) { // SAFETY: [DH] - value is never manually dropped until this point unsafe { ManuallyDrop::take(&mut self.value) }.drop_with_heap(self.heap.heap_mut()); } } /// The preferred way to ensure a [`DropWithHeap`] value is cleaned up on every code path. /// /// Creates a [`HeapGuard`] and immediately rebinds `$value` as `&V` and `$heap` as /// `&mut H` via [`HeapGuard::as_parts`]. The original owned value is moved into the /// guard, which will call [`DropWithHeap::drop_with_heap`] when scope exits — whether /// that's normal completion, early return via `?`, `continue`, or any other branch. /// /// Beyond safety, this is often much more concise than inserting `drop_with_heap` calls /// in every branch of complex control flow. For mutable access to the value, use /// [`defer_drop_mut!`]. /// /// # Limitation /// /// The macro rebinds `$heap` as a new `let` binding, so it cannot be used when `$heap` /// is `self`. In `&mut self` methods, first assign `let this = self;` and pass `this`. #[macro_export] macro_rules! defer_drop { ($value:ident, $heap:ident) => { let mut _guard = $crate::heap::HeapGuard::new($value, $heap); #[allow( clippy::allow_attributes, reason = "the reborrowed parts may not both be used in every case, so allow unused vars to avoid warnings" )] #[allow(unused_variables)] let ($value, $heap) = _guard.as_parts(); }; } /// Like [`defer_drop!`], but rebinds `$value` as `&mut V` via [`HeapGuard::as_parts_mut`]. /// /// Use this when the value needs to be mutated in place — for example, advancing an /// iterator with `for_next()`, or swapping values during a min/max comparison. #[macro_export] macro_rules! defer_drop_mut { ($value:ident, $heap:ident) => { let mut _guard = $crate::heap::HeapGuard::new($value, $heap); #[allow( clippy::allow_attributes, reason = "the reborrowed parts may not both be used in every case, so allow unused vars to avoid warnings" )] #[allow(unused_variables)] let ($value, $heap) = _guard.as_parts_mut(); }; } /// Like [`defer_drop!`], but for [`DropWithImmutableHeap`] values that only need `&Heap` /// for cleanup. /// /// Creates an [`ImmutableHeapGuard`] and immediately rebinds `$value` as `&V` and `$heap` /// as `&Heap`. The guard will call [`DropWithImmutableHeap::drop_with_immutable_heap`] /// when scope exits. Use this for values like [`RecursionToken`] in contexts that only have /// shared access to the heap (e.g., `py_repr_fmt` formatting methods). #[macro_export] macro_rules! defer_drop_immutable_heap { ($value:ident, $heap:ident) => { let _guard = $crate::heap::ImmutableHeapGuard::new($value, $heap); #[allow( clippy::allow_attributes, reason = "the reborrowed parts may not both be used in every case, so allow unused vars to avoid warnings" )] #[allow(unused_variables)] let ($value, $heap) = _guard.as_parts(); }; } ================================================ FILE: crates/monty/src/intern.rs ================================================ //! String, bytes, and long integer interning for efficient storage of literals and identifiers. //! //! This module provides interners that store unique strings, bytes, and long integers in vectors //! and return indices (`StringId`, `BytesId`, `LongIntId`) for efficient storage and comparison. //! This avoids the overhead of cloning strings or using atomic reference counting. //! //! The interners are populated during parsing and preparation, then owned by the `Executor`. //! During execution, lookups are needed only for error messages and repr output. //! //! StringIds are laid out as follows: //! * 0 to 128 - single character strings for all 128 ASCII characters //! * 1000 to count(StaticStrings) - strings StaticStrings //! * 10_000+ - strings interned per executor use std::{str::FromStr, sync::LazyLock}; use ahash::AHashMap; use num_bigint::BigInt; use strum::{EnumString, FromRepr, IntoStaticStr}; use crate::{function::Function, value::Value}; /// Index into the string interner's storage. /// /// Uses `u32` to save space (4 bytes vs 8 bytes for `usize`). This limits us to /// ~4 billion unique interns, which is more than sufficient. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)] pub struct StringId(u32); impl StringId { /// Creates a StringId from a raw index value. /// /// Used by the bytecode VM to reconstruct StringIds from operands stored /// in bytecode. The caller is responsible for ensuring the index is valid. #[inline] pub fn from_index(index: u16) -> Self { Self(u32::from(index)) } /// Returns the raw index value. #[inline] pub fn index(self) -> usize { self.0 as usize } /// Returns the StringId for an ASCII byte. #[must_use] pub fn from_ascii(byte: u8) -> Self { Self(u32::from(byte)) } } /// StringId offsets const STATIC_STRING_ID_OFFSET: u32 = 1000; const INTERN_STRING_ID_OFFSET: usize = 10_000; /// Static strings for all 128 ASCII characters, built once on first access. /// /// Uses `LazyLock` to build the array at runtime (once), leaking the strings to get /// `'static` lifetime. The leak is intentional and bounded (128 single-byte strings). static ASCII_STRS: LazyLock<[&'static str; 128]> = LazyLock::new(|| { std::array::from_fn(|i| { // Safe: i is always 0-127 for a 128-element array let s = char::from(u8::try_from(i).expect("index out of u8 range")).to_string(); // Leak to get 'static lifetime - this is intentional and bounded (128 bytes total) // Reborrow as immutable since we won't mutate &*Box::leak(s.into_boxed_str()) }) }); /// Static string values which are known at compile time and don't need to be interned. #[repr(u8)] #[derive( Debug, Clone, Copy, FromRepr, EnumString, IntoStaticStr, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, )] #[strum(serialize_all = "snake_case")] pub enum StaticStrings { #[strum(serialize = "")] EmptyString, #[strum(serialize = "")] Module, // ========================== // List methods // Also uses shared: POP, CLEAR, COPY, REMOVE // Also uses string-shared: INDEX, COUNT Append, Insert, Extend, Reverse, Sort, // ========================== // Dict methods // Also uses shared: POP, CLEAR, COPY, UPDATE Get, Keys, Values, Items, Setdefault, Popitem, Fromkeys, // ========================== // Shared methods // Used by multiple container types: list, dict, set Pop, Clear, Copy, // ========================== // Set methods // Also uses shared: POP, CLEAR, COPY Add, Remove, Discard, Update, Union, Intersection, Difference, SymmetricDifference, Issubset, Issuperset, Isdisjoint, // ========================== // String methods // Some methods shared with bytes: FIND, INDEX, COUNT, STARTSWITH, ENDSWITH // Some methods shared with list/tuple: INDEX, COUNT Join, // Simple transformations Lower, Upper, Capitalize, Title, Swapcase, Casefold, // Predicate methods Isalpha, Isdigit, Isalnum, Isnumeric, Isspace, Islower, Isupper, Isascii, Isdecimal, // Search methods (some shared with bytes, list, tuple) Find, Rfind, Index, Rindex, Count, Startswith, Endswith, // Strip/trim methods Strip, Lstrip, Rstrip, Removeprefix, Removesuffix, // Split methods Split, Rsplit, Splitlines, Partition, Rpartition, // Replace/padding methods Replace, Center, Ljust, Rjust, Zfill, // Additional string methods Encode, Isidentifier, Istitle, // ========================== // Bytes methods // Also uses string-shared: FIND, INDEX, COUNT, STARTSWITH, ENDSWITH // Also uses most string methods: LOWER, UPPER, CAPITALIZE, TITLE, SWAPCASE, // ISALPHA, ISDIGIT, ISALNUM, ISSPACE, ISLOWER, ISUPPER, ISASCII, ISTITLE, // RFIND, RINDEX, STRIP, LSTRIP, RSTRIP, REMOVEPREFIX, REMOVESUFFIX, // SPLIT, RSPLIT, SPLITLINES, PARTITION, RPARTITION, REPLACE, // CENTER, LJUST, RJUST, ZFILL, JOIN Decode, Hex, Fromhex, // ========================== // sys module strings Sys, #[strum(serialize = "sys.version_info")] SysVersionInfo, Version, VersionInfo, Platform, Stdout, Stderr, Major, Minor, Micro, Releaselevel, Serial, Final, #[strum(serialize = "3.14.0 (Monty)")] MontyVersionString, Monty, // ========================== // os.stat_result fields #[strum(serialize = "StatResult")] OsStatResult, StMode, StIno, StDev, StNlink, StUid, StGid, StSize, StAtime, StMtime, StCtime, // ========================== // typing module strings Typing, #[strum(serialize = "TYPE_CHECKING")] TypeChecking, #[strum(serialize = "Any")] Any, #[strum(serialize = "Optional")] Optional, #[strum(serialize = "Union")] UnionType, #[strum(serialize = "List")] ListType, #[strum(serialize = "Dict")] DictType, #[strum(serialize = "Tuple")] TupleType, #[strum(serialize = "Set")] SetType, #[strum(serialize = "FrozenSet")] FrozenSet, #[strum(serialize = "Callable")] Callable, #[strum(serialize = "Type")] Type, #[strum(serialize = "Sequence")] Sequence, #[strum(serialize = "Mapping")] Mapping, #[strum(serialize = "Iterable")] Iterable, #[strum(serialize = "Iterator")] IteratorType, #[strum(serialize = "Generator")] Generator, #[strum(serialize = "ClassVar")] ClassVar, #[strum(serialize = "Final")] FinalType, #[strum(serialize = "Literal")] Literal, #[strum(serialize = "TypeVar")] TypeVar, #[strum(serialize = "Generic")] Generic, #[strum(serialize = "Protocol")] Protocol, #[strum(serialize = "Annotated")] Annotated, #[strum(serialize = "Self")] SelfType, #[strum(serialize = "Never")] Never, #[strum(serialize = "NoReturn")] NoReturn, // ========================== // asyncio module strings Asyncio, Gather, Run, // ========================== // os module strings Os, Getenv, Environ, Default, // ========================== // Exception attributes Args, // ========================== // Type attributes #[strum(serialize = "__name__")] DunderName, // ========================== // pathlib module strings Pathlib, #[strum(serialize = "Path")] PathClass, // Path properties (pure - no I/O) Name, Parent, Stem, Suffix, Suffixes, Parts, // Path pure methods (no I/O) IsAbsolute, Joinpath, WithName, WithStem, WithSuffix, AsPosix, #[strum(serialize = "__fspath__")] Fspath, // Path filesystem methods (require OsAccess - yield external calls) Exists, IsFile, IsDir, IsSymlink, #[strum(serialize = "stat")] StatMethod, ReadBytes, ReadText, Iterdir, Resolve, Absolute, // Path write methods (require OsAccess - yield external calls) WriteText, WriteBytes, Mkdir, Unlink, Rmdir, Rename, // Slice attributes Start, Stop, Step, // ========================== // module strings // ========================== // math module strings Math, // Rounding Floor, Ceil, Trunc, // Roots & powers Sqrt, Isqrt, Cbrt, Pow, Exp, Exp2, Expm1, // Logarithms Log, Log1p, Log2, Log10, // Float properties Fabs, Isnan, Isinf, Isfinite, Copysign, Isclose, Nextafter, Ulp, // Trigonometric Sin, Cos, Tan, Asin, Acos, Atan, Atan2, // Hyperbolic Sinh, Cosh, Tanh, Asinh, Acosh, Atanh, // Angular conversion Degrees, Radians, // Integer math Factorial, Gcd, Lcm, Comb, Perm, // Modular / decomposition Fmod, Remainder, Modf, Frexp, Ldexp, // Special functions Gamma, Lgamma, Erf, Erfc, // Constants /// `math.pi` constant Pi, /// `math.e` constant #[strum(serialize = "e")] MathE, /// `math.tau` constant Tau, /// `math.inf` constant #[strum(serialize = "inf")] MathInf, /// `math.nan` constant #[strum(serialize = "nan")] MathNan, // re module strings /// Module name for `import re`. Re, /// `re.compile()` function Compile, /// `re.match()` / `pattern.match()` method Match, /// `re.search()` / `pattern.search()` method Search, /// `re.fullmatch()` / `pattern.fullmatch()` method Fullmatch, /// `re.findall()` / `pattern.findall()` method Findall, /// `re.sub()` / `pattern.sub()` method Sub, /// `match.group()` method Group, /// `match.groups()` method Groups, /// `match.span()` method Span, /// `match.end()` method End, /// `re.Pattern` #[strum(serialize = "Pattern")] PatternClass, /// `re.Match` #[strum(serialize = "Match")] MatchClass, /// `pattern.pattern` #[strum(serialize = "pattern")] PatternAttr, /// `match.string` #[strum(serialize = "string")] StringAttr, /// `pattern.flags` Flags, /// `re.IGNORECASE` flag #[strum(serialize = "IGNORECASE")] Ignorecase, /// `re.I` flag, alias #[strum(serialize = "I")] I, /// `re.MULTILINE` flag #[strum(serialize = "MULTILINE")] MultilineFlag, /// `re.M` flag, alias #[strum(serialize = "M")] M, /// `re.DOTALL` flag #[strum(serialize = "DOTALL")] DotallFlag, /// `re.S` flag, alias #[strum(serialize = "S")] S, /// `re.NOFLAG` flag #[strum(serialize = "NOFLAG")] NoFlag, /// `re.ASCII` flag #[strum(serialize = "ASCII")] AsciiFlag, /// `re.A` flag, alias #[strum(serialize = "A")] A, /// `re.PatternError` exception #[strum(serialize = "PatternError")] PatternError, /// `re.error` exception alias (same as `re.PatternError`) #[strum(serialize = "error")] Error, /// `re.escape()` function Escape, /// `re.finditer()` / `pattern.finditer()` method Finditer, /// `match.groupdict()` method Groupdict, } impl StaticStrings { /// Attempts to convert a `StringId` back to a `StaticStrings` variant. /// /// Returns `None` if the `StringId` doesn't correspond to a static string /// (e.g., it's an ASCII char or a dynamically interned string). pub fn from_string_id(id: StringId) -> Option { let enum_id = id.0.checked_sub(STATIC_STRING_ID_OFFSET)?; u8::try_from(enum_id).ok().and_then(Self::from_repr) } } /// Converts this static string variant to its corresponding `StringId`. impl From for StringId { fn from(value: StaticStrings) -> Self { let string_id = value as u32; Self(string_id + STATIC_STRING_ID_OFFSET) } } impl From for Value { fn from(value: StaticStrings) -> Self { Self::InternString(value.into()) } } impl PartialEq for StringId { fn eq(&self, other: &StaticStrings) -> bool { *self == Self::from(*other) } } impl PartialEq for StaticStrings { fn eq(&self, other: &StringId) -> bool { StringId::from(*self) == *other } } /// Index into the bytes interner's storage. /// /// Separate from `StringId` to distinguish string vs bytes literals at the type level. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub struct BytesId(u32); impl BytesId { /// Returns the raw index value. #[inline] pub fn index(self) -> usize { self.0 as usize } } /// Index into the long integer interner's storage. /// /// Used for integer literals that exceed i64 range. The actual `BigInt` values /// are stored in the `Interns` table and looked up by index at runtime. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub struct LongIntId(u32); impl LongIntId { /// Returns the raw index value. #[inline] pub fn index(self) -> usize { self.0 as usize } } /// Unique identifier for functions #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] pub struct FunctionId(u32); impl FunctionId { /// Creates a FunctionId from a raw index value. /// /// Used by the bytecode VM to reconstruct FunctionIds from operands stored /// in bytecode. The caller is responsible for ensuring the index is valid. #[inline] pub fn from_index(index: u16) -> Self { Self(u32::from(index)) } /// Returns the raw index value. #[inline] pub fn index(self) -> usize { self.0 as usize } } /// A string, bytes, and long integer interner that stores unique values and returns indices for lookup. /// /// Interns are deduplicated on insertion - interning the same string twice returns /// the same `StringId`. Bytes and long integers are NOT deduplicated (rare enough that it's not worth it). /// The interner owns all strings/bytes/long integers and provides lookup by index. /// /// # Thread Safety /// /// The interner is not thread-safe. It's designed to be used single-threaded during /// parsing/preparation, then the values are accessed read-only during execution. #[derive(Debug, Default, Clone)] pub struct InternerBuilder { /// Maps strings to their indices for deduplication during interning. string_map: AHashMap, /// Storage for interned interns, indexed by `StringId`. strings: Vec, /// Storage for interned bytes literals, indexed by `BytesId`. /// Not deduplicated since bytes literals are rare. bytes: Vec>, /// Storage for interned long integer literals, indexed by `LongIntId`. /// Not deduplicated since long integer literals are rare. long_ints: Vec, } impl InternerBuilder { /// Creates a new string interner with pre-interned strings. /// /// Clones from a lazily-initialized base interner that contains all pre-interned /// strings (``, attribute names, ASCII chars). This avoids rebuilding /// the base set on every call. /// /// # Arguments /// * `code` - The code being parsed, used for a very rough guess at how many /// additional strings will be interned beyond the base set. /// /// Pre-interns (via `BASE_INTERNER`): /// - Index 0: `""` for module-level code /// - Indices 1-MAX_ATTR_ID: Known attribute names (append, insert, get, join, etc.) /// - Indices MAX_ATTR_ID+1..: ASCII single-character strings pub fn new(code: &str) -> Self { // Reserve capacity for code-specific strings // Rough guess: count quotes and divide by 2 (open+close per string) let capacity = code.bytes().filter(|&b| b == b'"' || b == b'\'').count() >> 1; Self { string_map: AHashMap::with_capacity(capacity), strings: Vec::with_capacity(capacity), bytes: Vec::new(), long_ints: Vec::new(), } } /// Creates a builder pre-seeded from an existing [`Interns`] table. /// /// This is used by REPL incremental compilation: previously compiled interned /// values keep stable IDs, and newly interned values are appended. pub(crate) fn from_interns(interns: &Interns, code: &str) -> Self { let mut builder = Self::new(code); builder.strings.clone_from(&interns.strings); builder.bytes.clone_from(&interns.bytes); builder.long_ints.clone_from(&interns.long_ints); builder.string_map = builder .strings .iter() .enumerate() .map(|(index, value)| { let id = StringId( u32::try_from(INTERN_STRING_ID_OFFSET + index).expect("StringId overflow while seeding interner"), ); (value.clone(), id) }) .collect(); builder } /// Interns a string, returning its `StringId`. /// /// * If the string is ascii, return the pre-interned string id /// * If the string is a known static string, return the pre-interned string id /// * If the string was already interned, returns the existing string id /// * Otherwise, stores the string and returns a new string id pub fn intern(&mut self, s: &str) -> StringId { if s.len() == 1 { StringId::from_ascii(s.as_bytes()[0]) } else if let Ok(ss) = StaticStrings::from_str(s) { ss.into() } else { *self.string_map.entry(s.to_owned()).or_insert_with(|| { let string_id = self.strings.len() + INTERN_STRING_ID_OFFSET; let id = StringId(string_id.try_into().expect("StringId overflow")); self.strings.push(s.to_owned()); id }) } } /// Interns bytes, returning its `BytesId`. /// /// Unlike interns, bytes are not deduplicated (bytes literals are rare). pub fn intern_bytes(&mut self, b: &[u8]) -> BytesId { let id = BytesId(self.bytes.len().try_into().expect("BytesId overflow")); self.bytes.push(b.to_vec()); id } /// Interns a long integer, returning its `LongIntId`. /// /// Big integers are not deduplicated since literals exceeding i64 are rare. pub fn intern_long_int(&mut self, bi: BigInt) -> LongIntId { let id = LongIntId(self.long_ints.len().try_into().expect("LongIntId overflow")); self.long_ints.push(bi); id } /// Looks up a string by its `StringId`. #[inline] pub fn get_str(&self, id: StringId) -> &str { get_str(&self.strings, id) } } /// Looks up a string by its `StringId`. /// /// # Panics /// /// Panics if the `StringId` is invalid - not from this interner or ascii chars or StaticStrings. fn get_str(strings: &[String], id: StringId) -> &str { if let Ok(c) = u8::try_from(id.0) { ASCII_STRS[c as usize] } else if let Some(intern_index) = id.index().checked_sub(INTERN_STRING_ID_OFFSET) { &strings[intern_index] } else { let static_str = StaticStrings::from_string_id(id).expect("Invalid static string ID"); static_str.into() } } /// Read-only storage for interned strings, bytes, and long integers. /// /// This provides lookup by `StringId`, `BytesId`, `LongIntId` and `FunctionId` for interned literals and functions. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) struct Interns { strings: Vec, bytes: Vec>, long_ints: Vec, functions: Vec, } impl Interns { pub fn new(interner: InternerBuilder, functions: Vec) -> Self { Self { strings: interner.strings, bytes: interner.bytes, long_ints: interner.long_ints, functions, } } /// Looks up a string by its `StringId`. /// /// # Panics /// /// Panics if the `StringId` is invalid. #[inline] pub fn get_str(&self, id: StringId) -> &str { get_str(&self.strings, id) } /// Looks up bytes by their `BytesId`. /// /// # Panics /// /// Panics if the `BytesId` is invalid. #[inline] pub fn get_bytes(&self, id: BytesId) -> &[u8] { &self.bytes[id.index()] } /// Looks up a long integer by its `LongIntId`. /// /// # Panics /// /// Panics if the `LongIntId` is invalid. #[inline] pub fn get_long_int(&self, id: LongIntId) -> &BigInt { &self.long_ints[id.index()] } /// Lookup a function by its `FunctionId` /// /// # Panics /// /// Panics if the `FunctionId` is invalid. #[inline] pub fn get_function(&self, id: FunctionId) -> &Function { self.functions.get(id.index()).expect("Function not found") } /// Looks up the `StringId` for a string, checking ASCII, static strings, and interned strings. /// /// This is the reverse of `get_str`: given a string, find its StringId. /// Used when the host provides a name (e.g., from a NameLookup response) that was /// previously interned during preparation. /// /// Error if the string was never interned. pub fn get_string_id_by_name(&self, s: &str) -> Option { // Check single ASCII char if s.len() == 1 { return Some(StringId::from_ascii(s.as_bytes()[0])); } // Check static strings if let Ok(ss) = StaticStrings::from_str(s) { return Some(ss.into()); } // Check interned strings for (i, interned) in self.strings.iter().enumerate() { if interned == s { return u32::try_from(INTERN_STRING_ID_OFFSET + i).ok().map(StringId); } } None } /// Sets the compiled functions. /// /// This is called after compilation to populate the functions that were /// compiled from `PreparedFunctionDef` nodes. pub fn set_functions(&mut self, functions: Vec) { self.functions = functions; } /// Returns a clone of the compiled function table. /// /// Used by REPL incremental compilation to preserve existing function IDs. pub(crate) fn functions_clone(&self) -> Vec { self.functions.clone() } } ================================================ FILE: crates/monty/src/io.rs ================================================ use std::borrow::Cow; use crate::exception_public::MontyException; /// Output handler for the `print()` builtin function. /// /// Provides common output modes as enum variants to avoid trait object overhead /// in the typical cases (stdout, disabled, collect). For custom output handling, /// use the `Callback` variant with a [`PrintWriterCallback`] implementation. /// /// # Variants /// - `Disabled` - Silently discards all output (useful for benchmarking or suppressing output) /// - `Stdout` - Writes to standard output (the default behavior) /// - `Collect` - Accumulates output into a target `String` for programmatic access /// - `Callback` - Delegates to a user-provided [`PrintWriterCallback`] implementation pub enum PrintWriter<'a> { /// Silently discard all output. Disabled, /// Write to standard output. Stdout, /// Collect all output into a string. Collect(&'a mut String), /// Delegate to a custom callback. Callback(&'a mut dyn PrintWriterCallback), } impl PrintWriter<'_> { /// Creates a new `PrintWriter` that reborrows the same underlying target. /// /// This is useful in iterative execution (`start`/`resume` loops) where each /// step takes `PrintWriter` by value but you want all steps to write to the /// same output target. The original writer remains valid after the reborrowed /// copy is dropped. pub fn reborrow(&mut self) -> PrintWriter<'_> { match self { Self::Disabled => PrintWriter::Disabled, Self::Stdout => PrintWriter::Stdout, Self::Collect(buf) => PrintWriter::Collect(buf), Self::Callback(cb) => PrintWriter::Callback(&mut **cb), } } /// Called once for each formatted argument passed to `print()`. /// /// This method writes only the given argument's text, without adding /// separators or a trailing newline. Separators (spaces) and the final /// terminator (newline) are emitted via [`stdout_push`](Self::stdout_push). pub fn stdout_write(&mut self, output: Cow<'_, str>) -> Result<(), MontyException> { match self { Self::Disabled => Ok(()), Self::Stdout => { print!("{output}"); Ok(()) } Self::Collect(buf) => { buf.push_str(&output); Ok(()) } Self::Callback(cb) => cb.stdout_write(output), } } /// Appends a single character to the output. /// /// Generally called to add spaces (separators) and newlines (terminators) /// within print output. pub fn stdout_push(&mut self, end: char) -> Result<(), MontyException> { match self { Self::Disabled => Ok(()), Self::Stdout => { print!("{end}"); Ok(()) } Self::Collect(buf) => { buf.push(end); Ok(()) } Self::Callback(cb) => cb.stdout_push(end), } } } /// Trait for custom output handling from the `print()` builtin function. /// /// Implement this trait and pass it via [`PrintWriter::Callback`] to capture /// or redirect print output from sandboxed Python code. pub trait PrintWriterCallback { /// Called once for each formatted argument passed to `print()`. /// /// This method is responsible for writing only the given argument's text, and must /// not add separators or a trailing newline. Separators (such as spaces) and the /// final terminator (such as a newline) are emitted via [`stdout_push`](Self::stdout_push). /// /// # Arguments /// * `output` - The formatted output string for a single argument (without /// separators or trailing newline). fn stdout_write(&mut self, output: Cow<'_, str>) -> Result<(), MontyException>; /// Add a single character to stdout. /// /// Generally called to add spaces and newlines within print output. /// /// # Arguments /// * `end` - The character to print after the formatted output. fn stdout_push(&mut self, end: char) -> Result<(), MontyException>; } ================================================ FILE: crates/monty/src/lib.rs ================================================ #![doc = include_str!("../../../README.md")] // first to include defer_drop macro mod heap_traits; mod args; mod asyncio; mod builtins; mod bytecode; mod exception_private; mod exception_public; mod expressions; mod fstring; mod function; mod heap; mod heap_data; mod intern; mod io; mod modules; mod namespace; mod object; mod os; mod parse; mod prepare; mod repl; mod resource; mod run; mod run_progress; mod signature; mod sorting; mod types; mod value; #[cfg(feature = "ref-count-return")] pub use crate::run::RefCountOutput; pub use crate::{ exception_private::ExcType, exception_public::{CodeLoc, MontyException, StackFrame}, io::{PrintWriter, PrintWriterCallback}, object::{DictPairs, InvalidInputError, MontyObject}, os::{OsFunction, dir_stat, file_stat, stat_result, symlink_stat}, repl::{ MontyRepl, ReplContinuationMode, ReplFunctionCall, ReplNameLookup, ReplOsCall, ReplProgress, ReplResolveFutures, ReplStartError, detect_repl_continuation_mode, }, resource::{ DEFAULT_MAX_RECURSION_DEPTH, LimitedTracker, NoLimitTracker, ResourceError, ResourceLimits, ResourceTracker, }, run::MontyRun, run_progress::{ ExtFunctionResult, FunctionCall, NameLookup, NameLookupResult, OsCall, ResolveFutures, RunProgress, }, }; ================================================ FILE: crates/monty/src/modules/asyncio.rs ================================================ //! Implementation of the `asyncio` module. //! //! Provides a minimal implementation of Python's `asyncio` module with: //! - `run(coro)`: Runs a coroutine to completion, equivalent to `await coro` //! - `gather(*awaitables)`: Collects coroutines for concurrent execution //! //! Other asyncio functions (`create_task`, `sleep`, `wait`, etc.) are not implemented. //! The host acts as the event loop - Monty yields control when tasks are blocked. use crate::{ args::ArgValues, asyncio::{GatherFuture, GatherItem}, bytecode::{CallResult, VM}, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData, HeapId}, intern::StaticStrings, modules::ModuleFunctions, resource::{ResourceError, ResourceTracker}, types::Module, value::Value, }; /// Async Functions. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::Display, serde::Serialize, serde::Deserialize)] #[strum(serialize_all = "lowercase")] pub(crate) enum AsyncioFunctions { Gather, Run, } /// Creates the `asyncio` module and allocates it on the heap. /// /// The module contains only the `gather` function. Other asyncio functions /// are not implemented as they would require additional VM/scheduler features. /// /// # Returns /// A HeapId pointing to the newly allocated module. /// /// # Panics /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Asyncio); module.set_attr( StaticStrings::Gather, Value::ModuleFunction(ModuleFunctions::Asyncio(AsyncioFunctions::Gather)), vm, ); module.set_attr( StaticStrings::Run, Value::ModuleFunction(ModuleFunctions::Asyncio(AsyncioFunctions::Run)), vm, ); vm.heap.allocate(HeapData::Module(module)) } pub(super) fn call( heap: &mut Heap, functions: AsyncioFunctions, args: ArgValues, ) -> RunResult { match functions { AsyncioFunctions::Gather => gather(heap, args).map(CallResult::Value), AsyncioFunctions::Run => run(heap, args), } } /// Implementation of `asyncio.run(coro)`. /// /// Runs a single coroutine to completion, equivalent to `await coro` at the top level. /// Accepts exactly one positional argument (the coroutine) and no keyword arguments. /// /// Returns `CallResult::AwaitValue` so the VM executes `exec_get_awaitable` on /// the value, which handles validation that it's actually a coroutine/awaitable. fn run(heap: &mut Heap, args: ArgValues) -> RunResult { let coroutine = args.get_one_arg("asyncio.run", heap)?; Ok(CallResult::AwaitValue(coroutine)) } /// Implementation of `asyncio.gather(*awaitables)`. /// /// Collects coroutines and external futures for concurrent execution. Does NOT /// spawn tasks immediately - just validates and stores the references. Tasks are /// spawned when the returned `GatherFuture` is awaited (in the `Await` opcode handler). /// /// # Behavior when awaited /// /// 1. Each coroutine is spawned as a separate Task /// 2. External futures are tracked for resolution by the host /// 3. The current task blocks until all items complete /// 4. Results are collected in order and returned as a list /// 5. On any task failure, sibling tasks are cancelled and the exception propagates /// /// # Arguments /// * `heap` - The heap for allocating the GatherFuture /// * `args` - Variadic awaitable arguments (coroutines or external futures) /// /// # Errors /// Returns `TypeError` if any argument is not awaitable. pub(crate) fn gather(heap: &mut Heap, args: ArgValues) -> RunResult { let (pos_args, kwargs) = args.into_parts(); defer_drop_mut!(pos_args, heap); // TODO: support keyword arguments (e.g. return_exceptions) kwargs.not_supported_yet("gather", heap)?; // Validate all positional args are awaitable and collect them let mut items = Vec::new(); let mut coroutine_ids_to_cleanup: Vec = Vec::new(); #[cfg_attr(not(feature = "ref-count-panic"), expect(unused_mut))] for mut arg in pos_args { match &arg { Value::Ref(id) if heap.get(*id).is_coroutine() => { coroutine_ids_to_cleanup.push(*id); items.push(GatherItem::Coroutine(*id)); // Transfer ownership to GatherFuture - mark Value as consumed without dec_ref #[cfg(feature = "ref-count-panic")] arg.dec_ref_forget(); } Value::ExternalFuture(call_id) => { items.push(GatherItem::ExternalFuture(*call_id)); // ExternalFuture is Copy, no refcount to manage } _ => { // Not awaitable - clean up and error arg.drop_with_heap(heap); // Drop already-collected coroutine refs for cid in coroutine_ids_to_cleanup { heap.dec_ref(cid); } return Err(ExcType::type_error( "An asyncio.Future, a coroutine or an awaitable is required", )); } } } // Create GatherFuture on heap let gather_future = GatherFuture::new(items); let id = heap.allocate(HeapData::GatherFuture(gather_future))?; Ok(Value::Ref(id)) } ================================================ FILE: crates/monty/src/modules/math.rs ================================================ //! Implementation of Python's `math` module. //! //! Provides mathematical functions and constants matching CPython 3.14 behavior //! and error messages. All functions are pure computations that don't require //! host involvement, so they return `Value` directly rather than `AttrCallResult`. //! //! ## Implemented functions //! //! **Rounding**: `floor`, `ceil`, `trunc` //! **Roots & powers**: `sqrt`, `isqrt`, `cbrt`, `pow`, `exp`, `exp2`, `expm1` //! **Logarithms**: `log`, `log2`, `log10`, `log1p` //! **Trigonometric**: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`, `atan2` //! **Hyperbolic**: `sinh`, `cosh`, `tanh`, `asinh`, `acosh`, `atanh` //! **Angular**: `degrees`, `radians` //! **Float properties**: `fabs`, `isnan`, `isinf`, `isfinite`, `copysign`, `isclose`, //! `nextafter`, `ulp` //! **Integer math**: `factorial`, `gcd`, `lcm`, `comb`, `perm` //! **Modular**: `fmod`, `remainder`, `modf`, `frexp`, `ldexp` //! **Special**: `gamma`, `lgamma`, `erf`, `erfc` //! //! ## Constants //! //! `pi`, `e`, `tau`, `inf`, `nan` use num_bigint::BigInt; use smallvec::smallvec; use crate::{ args::ArgValues, bytecode::VM, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, heap::{Heap, HeapData, HeapId}, intern::{Interns, StaticStrings}, modules::ModuleFunctions, resource::{ResourceError, ResourceTracker}, types::{LongInt, Module, PyTrait, allocate_tuple}, value::Value, }; // ========================== // Shared constants and error helpers // ========================== /// Returns a `ValueError` with the standard CPython "math domain error" message. fn math_domain_error() -> crate::exception_private::RunError { SimpleException::new_msg(ExcType::ValueError, "math domain error").into() } /// Returns an `OverflowError` with the standard CPython "math range error" message. fn math_range_error() -> crate::exception_private::RunError { SimpleException::new_msg(ExcType::OverflowError, "math range error").into() } /// Checks whether a computation overflowed (finite input produced infinite result). /// /// Returns `Err(OverflowError("math range error"))` if `result` is infinite /// but `input` was finite. fn check_range_error(result: f64, input: f64) -> RunResult<()> { if result.is_infinite() && input.is_finite() { Err(math_range_error()) } else { Ok(()) } } /// Checks that a value is in the `[-1, 1]` range, raising `ValueError` if not. /// /// NaN passes through (it will propagate through the subsequent math operation). /// Used by `math.asin` and `math.acos`. fn require_unit_range(f: f64) -> RunResult<()> { if !f.is_nan() && !(-1.0..=1.0).contains(&f) { Err(SimpleException::new_msg( ExcType::ValueError, format!("expected a number in range from -1 up to 1, got {f:?}"), ) .into()) } else { Ok(()) } } /// Checks for non-positive integer arguments (poles of the Gamma function). /// /// These are the finite non-positive integers where Gamma diverges to ±∞. /// Does NOT reject `-inf` — callers that need to reject it (like `math.gamma`) /// must do so separately, since `lgamma(-inf)` is valid and returns `inf`. #[expect( clippy::float_cmp, reason = "exact comparison detects integer poles of gamma function" )] fn check_gamma_pole(f: f64) -> RunResult<()> { if f <= 0.0 && f == f.floor() && f.is_finite() { Err(SimpleException::new_msg( ExcType::ValueError, format!("expected a noninteger or positive integer, got {f:?}"), ) .into()) } else { Ok(()) } } /// Math module functions — each variant corresponds to a Python-visible function. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::Display, serde::Serialize, serde::Deserialize)] #[strum(serialize_all = "lowercase")] pub(crate) enum MathFunctions { // Rounding Floor, Ceil, Trunc, // Roots & powers Sqrt, Isqrt, Cbrt, Pow, Exp, Exp2, Expm1, // Logarithms Log, Log1p, Log2, Log10, // Float properties Fabs, Isnan, Isinf, Isfinite, Copysign, Isclose, Nextafter, Ulp, // Trigonometric Sin, Cos, Tan, Asin, Acos, Atan, Atan2, // Hyperbolic Sinh, Cosh, Tanh, Asinh, Acosh, Atanh, // Angular conversion Degrees, Radians, // Integer math Factorial, Gcd, Lcm, Comb, Perm, // Modular / decomposition Fmod, Remainder, Modf, Frexp, Ldexp, // Special functions Gamma, Lgamma, Erf, Erfc, } /// Creates the `math` module and allocates it on the heap. /// /// Registers all math functions and constants (`pi`, `e`, `tau`, `inf`, `nan`) /// matching CPython's `math` module. Functions are registered as /// `ModuleFunctions::Math` variants. /// /// # Returns /// A `HeapId` pointing to the newly allocated module. /// /// # Panics /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Math); // Register all math functions for (name, func) in MATH_FUNCTIONS { module.set_attr(*name, Value::ModuleFunction(ModuleFunctions::Math(*func)), vm); } // Constants module.set_attr(StaticStrings::Pi, Value::Float(std::f64::consts::PI), vm); module.set_attr(StaticStrings::MathE, Value::Float(std::f64::consts::E), vm); module.set_attr(StaticStrings::Tau, Value::Float(std::f64::consts::TAU), vm); module.set_attr(StaticStrings::MathInf, Value::Float(f64::INFINITY), vm); module.set_attr(StaticStrings::MathNan, Value::Float(f64::NAN), vm); vm.heap.allocate(HeapData::Module(module)) } /// Static mapping of attribute names to math functions for module creation. const MATH_FUNCTIONS: &[(StaticStrings, MathFunctions)] = &[ // Rounding (StaticStrings::Floor, MathFunctions::Floor), (StaticStrings::Ceil, MathFunctions::Ceil), (StaticStrings::Trunc, MathFunctions::Trunc), // Roots & powers (StaticStrings::Sqrt, MathFunctions::Sqrt), (StaticStrings::Isqrt, MathFunctions::Isqrt), (StaticStrings::Cbrt, MathFunctions::Cbrt), (StaticStrings::Pow, MathFunctions::Pow), (StaticStrings::Exp, MathFunctions::Exp), (StaticStrings::Exp2, MathFunctions::Exp2), (StaticStrings::Expm1, MathFunctions::Expm1), // Logarithms (StaticStrings::Log, MathFunctions::Log), (StaticStrings::Log1p, MathFunctions::Log1p), (StaticStrings::Log2, MathFunctions::Log2), (StaticStrings::Log10, MathFunctions::Log10), // Float properties (StaticStrings::Fabs, MathFunctions::Fabs), (StaticStrings::Isnan, MathFunctions::Isnan), (StaticStrings::Isinf, MathFunctions::Isinf), (StaticStrings::Isfinite, MathFunctions::Isfinite), (StaticStrings::Copysign, MathFunctions::Copysign), (StaticStrings::Isclose, MathFunctions::Isclose), (StaticStrings::Nextafter, MathFunctions::Nextafter), (StaticStrings::Ulp, MathFunctions::Ulp), // Trigonometric (StaticStrings::Sin, MathFunctions::Sin), (StaticStrings::Cos, MathFunctions::Cos), (StaticStrings::Tan, MathFunctions::Tan), (StaticStrings::Asin, MathFunctions::Asin), (StaticStrings::Acos, MathFunctions::Acos), (StaticStrings::Atan, MathFunctions::Atan), (StaticStrings::Atan2, MathFunctions::Atan2), // Hyperbolic (StaticStrings::Sinh, MathFunctions::Sinh), (StaticStrings::Cosh, MathFunctions::Cosh), (StaticStrings::Tanh, MathFunctions::Tanh), (StaticStrings::Asinh, MathFunctions::Asinh), (StaticStrings::Acosh, MathFunctions::Acosh), (StaticStrings::Atanh, MathFunctions::Atanh), // Angular conversion (StaticStrings::Degrees, MathFunctions::Degrees), (StaticStrings::Radians, MathFunctions::Radians), // Integer math (StaticStrings::Factorial, MathFunctions::Factorial), (StaticStrings::Gcd, MathFunctions::Gcd), (StaticStrings::Lcm, MathFunctions::Lcm), (StaticStrings::Comb, MathFunctions::Comb), (StaticStrings::Perm, MathFunctions::Perm), // Modular / decomposition (StaticStrings::Fmod, MathFunctions::Fmod), (StaticStrings::Remainder, MathFunctions::Remainder), (StaticStrings::Modf, MathFunctions::Modf), (StaticStrings::Frexp, MathFunctions::Frexp), (StaticStrings::Ldexp, MathFunctions::Ldexp), // Special functions (StaticStrings::Gamma, MathFunctions::Gamma), (StaticStrings::Lgamma, MathFunctions::Lgamma), (StaticStrings::Erf, MathFunctions::Erf), (StaticStrings::Erfc, MathFunctions::Erfc), ]; /// Dispatches a call to a math module function. /// /// All math functions are pure computations and return `Value` directly. pub(super) fn call( vm: &mut VM<'_, '_, impl ResourceTracker>, function: MathFunctions, args: ArgValues, ) -> RunResult { match function { // Rounding MathFunctions::Floor => math_floor(vm.heap, args), MathFunctions::Ceil => math_ceil(vm.heap, args), MathFunctions::Trunc => math_trunc(vm.heap, args), // Roots & powers MathFunctions::Sqrt => math_sqrt(vm.heap, args), MathFunctions::Isqrt => math_isqrt(vm.heap, args), MathFunctions::Cbrt => math_cbrt(vm.heap, args), MathFunctions::Pow => math_pow(vm.heap, args), MathFunctions::Exp => math_exp(vm.heap, args), MathFunctions::Exp2 => math_exp2(vm.heap, args), MathFunctions::Expm1 => math_expm1(vm.heap, args), // Logarithms MathFunctions::Log => math_log(vm.heap, args), MathFunctions::Log1p => math_log1p(vm.heap, args), MathFunctions::Log2 => math_log2(vm.heap, args), MathFunctions::Log10 => math_log10(vm.heap, args), // Float properties MathFunctions::Fabs => math_fabs(vm.heap, args), MathFunctions::Isnan => math_isnan(vm.heap, args), MathFunctions::Isinf => math_isinf(vm.heap, args), MathFunctions::Isfinite => math_isfinite(vm.heap, args), MathFunctions::Copysign => math_copysign(vm.heap, args), MathFunctions::Isclose => math_isclose(vm.heap, args, vm.interns), MathFunctions::Nextafter => math_nextafter(vm.heap, args), MathFunctions::Ulp => math_ulp(vm.heap, args), // Trigonometric MathFunctions::Sin => math_sin(vm.heap, args), MathFunctions::Cos => math_cos(vm.heap, args), MathFunctions::Tan => math_tan(vm.heap, args), MathFunctions::Asin => math_asin(vm.heap, args), MathFunctions::Acos => math_acos(vm.heap, args), MathFunctions::Atan => math_atan(vm.heap, args), MathFunctions::Atan2 => math_atan2(vm.heap, args), // Hyperbolic MathFunctions::Sinh => math_sinh(vm.heap, args), MathFunctions::Cosh => math_cosh(vm.heap, args), MathFunctions::Tanh => math_tanh(vm.heap, args), MathFunctions::Asinh => math_asinh(vm.heap, args), MathFunctions::Acosh => math_acosh(vm.heap, args), MathFunctions::Atanh => math_atanh(vm.heap, args), // Angular conversion MathFunctions::Degrees => math_degrees(vm.heap, args), MathFunctions::Radians => math_radians(vm.heap, args), // Integer math MathFunctions::Factorial => math_factorial(vm.heap, args), MathFunctions::Gcd => math_gcd(vm.heap, args), MathFunctions::Lcm => math_lcm(vm.heap, args), MathFunctions::Comb => math_comb(vm.heap, args), MathFunctions::Perm => math_perm(vm.heap, args), // Modular / decomposition MathFunctions::Fmod => math_fmod(vm.heap, args), MathFunctions::Remainder => math_remainder(vm.heap, args), MathFunctions::Modf => math_modf(vm.heap, args), MathFunctions::Frexp => math_frexp(vm.heap, args), MathFunctions::Ldexp => math_ldexp(vm.heap, args), // Special functions MathFunctions::Gamma => math_gamma(vm.heap, args), MathFunctions::Lgamma => math_lgamma(vm.heap, args), MathFunctions::Erf => math_erf(vm.heap, args), MathFunctions::Erfc => math_erfc(vm.heap, args), } } // ========================== // Rounding functions // ========================== /// `math.floor(x)` — returns the largest integer less than or equal to x. /// /// Accepts int, float, or bool. Returns int. /// Raises `OverflowError` for infinity, `ValueError` for NaN. fn math_floor(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.floor", heap)?; defer_drop!(value, heap); match value { Value::Float(f) => float_to_int_checked(f.floor(), *f, heap), Value::Int(n) => Ok(Value::Int(*n)), Value::Bool(b) => Ok(Value::Int(i64::from(*b))), _ => Err(ExcType::type_error(format!( "must be real number, not {}", value.py_type(heap) ))), } } /// `math.ceil(x)` — returns the smallest integer greater than or equal to x. /// /// Accepts int, float, or bool. Returns int. /// Raises `OverflowError` for infinity, `ValueError` for NaN. fn math_ceil(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.ceil", heap)?; defer_drop!(value, heap); match value { Value::Float(f) => float_to_int_checked(f.ceil(), *f, heap), Value::Int(n) => Ok(Value::Int(*n)), Value::Bool(b) => Ok(Value::Int(i64::from(*b))), _ => Err(ExcType::type_error(format!( "must be real number, not {}", value.py_type(heap) ))), } } /// `math.trunc(x)` — truncates x to the nearest integer toward zero. /// /// Accepts int, float, or bool. Returns int. fn math_trunc(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.trunc", heap)?; defer_drop!(value, heap); match value { Value::Float(f) => float_to_int_checked(f.trunc(), *f, heap), Value::Int(n) => Ok(Value::Int(*n)), Value::Bool(b) => Ok(Value::Int(i64::from(*b))), _ => Err(ExcType::type_error(format!( "type {} doesn't define __trunc__ method", value.py_type(heap) ))), } } // ========================== // Roots & powers // ========================== /// `math.sqrt(x)` — returns the square root of x. /// /// Always returns a float. Raises `ValueError` for negative inputs with a /// descriptive message matching CPython 3.14: "expected a nonnegative input, got ". fn math_sqrt(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.sqrt", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f < 0.0 { Err(SimpleException::new_msg(ExcType::ValueError, format!("expected a nonnegative input, got {f:?}")).into()) } else { Ok(Value::Float(f.sqrt())) } } /// `math.isqrt(n)` — returns the integer square root of a non-negative integer. /// /// Returns the largest integer `r` such that `r * r <= n`. /// Only accepts non-negative integers (and bools). fn math_isqrt(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.isqrt", heap)?; defer_drop!(value, heap); let n = value_to_int(value, heap)?; if n < 0 { return Err(SimpleException::new_msg(ExcType::ValueError, "isqrt() argument must be nonnegative").into()); } if n == 0 { return Ok(Value::Int(0)); } // Integer square root via f64 estimate + correction. // For i64 inputs, f64 sqrt is accurate to within ±1, so we need to // correct both overshoot and undershoot. The cast truncates toward zero, // so undershoot is possible for perfect squares near f64 precision limits. #[expect( clippy::cast_precision_loss, clippy::cast_possible_truncation, reason = "initial estimate doesn't need to be exact, correction refines it" )] let mut x = (n as f64).sqrt() as i64; // Correct overshoot: use `x > n / x` instead of `x * x > n` to avoid i64 overflow. while x > n / x { x -= 1; } // Correct undershoot: check if (x+1)² ≤ n using division to avoid overflow. while x < n / (x + 1) { x += 1; } Ok(Value::Int(x)) } /// `math.cbrt(x)` — returns the cube root of x. /// /// Always returns a float. Unlike `sqrt`, works for negative inputs. fn math_cbrt(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.cbrt", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.cbrt())) } /// `math.pow(x, y)` — returns x raised to the power y. /// /// Always returns a float. Unlike the builtin `pow()`, does not support /// three-argument modular exponentiation. Raises `ValueError` for /// negative base with non-integer exponent. fn math_pow(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, y_val) = args.get_two_args("math.pow", heap)?; defer_drop!(x_val, heap); defer_drop!(y_val, heap); let x = value_to_float(x_val, heap)?; let y = value_to_float(y_val, heap)?; let result = x.powf(y); // CPython raises ValueError for domain errors: 0**negative, negative**non-integer if result.is_nan() && !x.is_nan() && !y.is_nan() { return Err(math_domain_error()); } if result.is_infinite() && x.is_finite() && y.is_finite() { // 0**negative is a domain error (ValueError), not overflow if x == 0.0 && y < 0.0 { return Err(math_domain_error()); } return Err(math_range_error()); } Ok(Value::Float(result)) } /// `math.exp(x)` — returns e raised to the power x. fn math_exp(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.exp", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let result = f.exp(); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.exp2(x)` — returns 2 raised to the power x. fn math_exp2(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.exp2", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let result = f.exp2(); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.expm1(x)` — returns e**x - 1. /// /// More accurate than `exp(x) - 1` for small values of x. fn math_expm1(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.expm1", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let result = f.exp_m1(); check_range_error(result, f)?; Ok(Value::Float(result)) } // ========================== // Logarithms // ========================== /// `math.log(x[, base])` — returns the logarithm of x. /// /// With one argument, returns the natural logarithm (base e). /// With two arguments, returns `log(x) / log(base)`. /// Raises `ValueError` for non-positive inputs (CPython 3.14: "expected a positive input"). fn math_log(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, base_val) = args.get_one_two_args("math.log", heap)?; defer_drop!(x_val, heap); defer_drop!(base_val, heap); let x = value_to_float(x_val, heap)?; if x <= 0.0 { return Err(SimpleException::new_msg(ExcType::ValueError, "expected a positive input").into()); } match base_val { Some(base_v) => { let base = value_to_float(base_v, heap)?; // base == 1.0 causes division by zero in log(x)/log(base), matching // CPython which raises ZeroDivisionError for this case. #[expect( clippy::float_cmp, reason = "exact comparison with 1.0 is intentional — log(1.0) is exactly 0.0" )] if base == 1.0 { return Err(SimpleException::new_msg(ExcType::ZeroDivisionError, "division by zero").into()); } if base <= 0.0 { return Err(SimpleException::new_msg(ExcType::ValueError, "expected a positive input").into()); } Ok(Value::Float(x.ln() / base.ln())) } None => Ok(Value::Float(x.ln())), } } /// `math.log1p(x)` — returns the natural logarithm of 1 + x. /// /// More accurate than `log(1 + x)` for small values of x. /// CPython 3.14 raises ValueError with "expected argument value > -1, got ". fn math_log1p(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.log1p", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f <= -1.0 { return Err( SimpleException::new_msg(ExcType::ValueError, format!("expected argument value > -1, got {f:?}")).into(), ); } Ok(Value::Float(f.ln_1p())) } /// `math.log2(x)` — returns the base-2 logarithm of x. /// /// Returns `inf` for positive infinity, `nan` for NaN. /// Raises `ValueError` for non-positive finite inputs. fn math_log2(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.log2", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f <= 0.0 { Err(SimpleException::new_msg(ExcType::ValueError, "expected a positive input").into()) } else { Ok(Value::Float(f.log2())) } } /// `math.log10(x)` — returns the base-10 logarithm of x. /// /// Returns `inf` for positive infinity, `nan` for NaN. /// Raises `ValueError` for non-positive finite inputs. fn math_log10(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.log10", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f <= 0.0 { Err(SimpleException::new_msg(ExcType::ValueError, "expected a positive input").into()) } else { Ok(Value::Float(f.log10())) } } // ========================== // Float properties // ========================== /// `math.fabs(x)` — returns the absolute value as a float. /// /// Unlike the builtin `abs()`, always returns a float. fn math_fabs(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.fabs", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.abs())) } /// `math.isnan(x)` — returns True if x is NaN. fn math_isnan(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.isnan", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Bool(f.is_nan())) } /// `math.isinf(x)` — returns True if x is positive or negative infinity. fn math_isinf(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.isinf", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Bool(f.is_infinite())) } /// `math.isfinite(x)` — returns True if x is neither infinity nor NaN. fn math_isfinite(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.isfinite", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Bool(f.is_finite())) } /// `math.copysign(x, y)` — returns x with the sign of y. /// /// Always returns a float. fn math_copysign(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, y_val) = args.get_two_args("math.copysign", heap)?; defer_drop!(x_val, heap); defer_drop!(y_val, heap); let x = value_to_float(x_val, heap)?; let y = value_to_float(y_val, heap)?; Ok(Value::Float(x.copysign(y))) } /// `math.isclose(a, b, *, rel_tol=1e-9, abs_tol=0.0)` — returns True if a and b are close. /// /// Supports keyword-only `rel_tol` and `abs_tol` parameters matching CPython. /// Raises `ValueError` if either tolerance is negative. fn math_isclose(heap: &mut Heap, args: ArgValues, interns: &Interns) -> RunResult { let (positional, kwargs) = args.into_parts(); defer_drop_mut!(positional, heap); // Extract exactly two positional args let Some(a_val) = positional.next() else { return Err(ExcType::type_error_at_least("math.isclose", 2, 0)); }; defer_drop!(a_val, heap); let Some(b_val) = positional.next() else { return Err(ExcType::type_error_at_least("math.isclose", 2, 1)); }; defer_drop!(b_val, heap); if positional.len() > 0 { return Err(ExcType::type_error_at_most("math.isclose", 2, 2 + positional.len())); } let a = value_to_float(a_val, heap)?; let b = value_to_float(b_val, heap)?; // Parse optional keyword arguments rel_tol and abs_tol let (rel_tol, abs_tol) = extract_isclose_kwargs(kwargs, heap, interns)?; if rel_tol < 0.0 { return Err(SimpleException::new_msg(ExcType::ValueError, "tolerances must be non-negative").into()); } if abs_tol < 0.0 { return Err(SimpleException::new_msg(ExcType::ValueError, "tolerances must be non-negative").into()); } // Exact equality check matches CPython's isclose() behavior — two identical // values (including infinities) are always considered close. #[expect( clippy::float_cmp, reason = "exact equality check matches CPython's isclose() semantics" )] if a == b { return Ok(Value::Bool(true)); } if a.is_infinite() || b.is_infinite() { return Ok(Value::Bool(false)); } if a.is_nan() || b.is_nan() { return Ok(Value::Bool(false)); } let diff = (a - b).abs(); let result = diff <= (rel_tol * a.abs().max(b.abs())).max(abs_tol); Ok(Value::Bool(result)) } /// Extracts `rel_tol` and `abs_tol` keyword arguments for `math.isclose`. /// /// Returns `(rel_tol, abs_tol)` with defaults of `(1e-9, 0.0)`. fn extract_isclose_kwargs( kwargs: crate::args::KwargsValues, heap: &mut Heap, interns: &Interns, ) -> RunResult<(f64, f64)> { let mut rel_tol: f64 = 1e-9; let mut abs_tol: f64 = 0.0; if kwargs.is_empty() { return Ok((rel_tol, abs_tol)); } for (key, value) in kwargs { defer_drop!(key, heap); defer_drop!(value, heap); let Some(keyword_name) = key.as_either_str(heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(interns); match key_str { "rel_tol" => { rel_tol = value_to_float(value, heap)?; } "abs_tol" => { abs_tol = value_to_float(value, heap)?; } other => { return Err(ExcType::type_error(format!( "isclose() got an unexpected keyword argument '{other}'" ))); } } } Ok((rel_tol, abs_tol)) } /// `math.nextafter(x, y)` — returns the next float after x towards y. fn math_nextafter(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, y_val) = args.get_two_args("math.nextafter", heap)?; defer_drop!(x_val, heap); defer_drop!(y_val, heap); let x = value_to_float(x_val, heap)?; let y = value_to_float(y_val, heap)?; Ok(Value::Float(libm::nextafter(x, y))) } /// `math.ulp(x)` — returns the value of the least significant bit of x. /// /// For finite non-zero x, returns the smallest float `u` such that `x + u != x`. /// Special cases: `ulp(nan)` returns nan, `ulp(inf)` returns inf. fn math_ulp(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.ulp", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f.is_nan() { return Ok(Value::Float(f64::NAN)); } if f.is_infinite() { return Ok(Value::Float(f64::INFINITY)); } let f = f.abs(); if f == 0.0 { // CPython returns the smallest positive subnormal: 5e-324 return Ok(Value::Float(f64::from_bits(1))); } // ULP = nextafter(f, inf) - f let next = libm::nextafter(f, f64::INFINITY); Ok(Value::Float(next - f)) } // ========================== // Trigonometric functions // ========================== /// `math.sin(x)` — returns the sine of x (in radians). /// /// CPython 3.14 raises ValueError for infinity: "expected a finite input, got inf". fn math_sin(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.sin", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; require_finite(f)?; Ok(Value::Float(f.sin())) } /// `math.cos(x)` — returns the cosine of x (in radians). fn math_cos(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.cos", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; require_finite(f)?; Ok(Value::Float(f.cos())) } /// `math.tan(x)` — returns the tangent of x (in radians). fn math_tan(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.tan", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; require_finite(f)?; Ok(Value::Float(f.tan())) } /// `math.asin(x)` — returns the arc sine of x (in radians). /// /// CPython 3.14: "expected a number in range from -1 up to 1, got ". fn math_asin(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.asin", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; require_unit_range(f)?; Ok(Value::Float(f.asin())) } /// `math.acos(x)` — returns the arc cosine of x (in radians). /// /// CPython 3.14: "expected a number in range from -1 up to 1, got ". fn math_acos(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.acos", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; require_unit_range(f)?; Ok(Value::Float(f.acos())) } /// `math.atan(x)` — returns the arc tangent of x (in radians). fn math_atan(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.atan", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.atan())) } /// `math.atan2(y, x)` — returns atan(y/x) in radians, using the signs of both /// to determine the correct quadrant. fn math_atan2(heap: &mut Heap, args: ArgValues) -> RunResult { let (y_val, x_val) = args.get_two_args("math.atan2", heap)?; defer_drop!(y_val, heap); defer_drop!(x_val, heap); let y = value_to_float(y_val, heap)?; let x = value_to_float(x_val, heap)?; Ok(Value::Float(y.atan2(x))) } // ========================== // Hyperbolic functions // ========================== /// `math.sinh(x)` — returns the hyperbolic sine of x. fn math_sinh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.sinh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let result = f.sinh(); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.cosh(x)` — returns the hyperbolic cosine of x. fn math_cosh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.cosh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let result = f.cosh(); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.tanh(x)` — returns the hyperbolic tangent of x. fn math_tanh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.tanh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.tanh())) } /// `math.asinh(x)` — returns the inverse hyperbolic sine of x. fn math_asinh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.asinh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.asinh())) } /// `math.acosh(x)` — returns the inverse hyperbolic cosine of x. /// /// CPython 3.14: "expected argument value not less than 1, got ". fn math_acosh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.acosh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f < 1.0 { return Err(SimpleException::new_msg( ExcType::ValueError, format!("expected argument value not less than 1, got {f:?}"), ) .into()); } Ok(Value::Float(f.acosh())) } /// `math.atanh(x)` — returns the inverse hyperbolic tangent of x. /// /// CPython 3.14: "expected a number between -1 and 1, got ". fn math_atanh(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.atanh", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; if f <= -1.0 || f >= 1.0 { return Err(SimpleException::new_msg( ExcType::ValueError, format!("expected a number between -1 and 1, got {f:?}"), ) .into()); } Ok(Value::Float(f.atanh())) } // ========================== // Angular conversion // ========================== /// `math.degrees(x)` — converts angle x from radians to degrees. fn math_degrees(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.degrees", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.to_degrees())) } /// `math.radians(x)` — converts angle x from degrees to radians. fn math_radians(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.radians", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(f.to_radians())) } // ========================== // Integer math // ========================== /// `math.factorial(n)` — returns n factorial. /// /// Only accepts non-negative integers (and bools). Raises `ValueError` for /// negative values, `TypeError` for non-integer types. fn math_factorial(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.factorial", heap)?; defer_drop!(value, heap); let n = match value { Value::Int(n) => *n, Value::Bool(b) => i64::from(*b), _ => { return Err(ExcType::type_error(format!( "'{}' object cannot be interpreted as an integer", value.py_type(heap) ))); } }; if n < 0 { return Err( SimpleException::new_msg(ExcType::ValueError, "factorial() not defined for negative values").into(), ); } // Compute factorial iteratively let mut result: i64 = 1; for i in 2..=n { match result.checked_mul(i) { Some(v) => result = v, None => { // Overflow — for simplicity, return an error for very large factorials // since we don't have LongInt factorial support yet return Err( SimpleException::new_msg(ExcType::OverflowError, "int too large to convert to factorial").into(), ); } } } Ok(Value::Int(result)) } /// `math.gcd(*integers)` — returns the greatest common divisor of the arguments. /// /// Supports 0 or more arguments, matching CPython 3.9+. `gcd()` returns 0, /// `gcd(n)` returns `abs(n)`, and for multiple args reduces pairwise. /// The result is always non-negative. fn math_gcd(heap: &mut Heap, args: ArgValues) -> RunResult { let positional = args.into_pos_only("math.gcd", heap)?; defer_drop_mut!(positional, heap); let mut result: u64 = 0; for arg in positional.by_ref() { defer_drop!(arg, heap); let n = value_to_int(arg, heap)?; result = gcd(result, n.unsigned_abs()); } u64_to_value(result, heap) } /// `math.lcm(*integers)` — returns the least common multiple of the arguments. /// /// Supports 0 or more arguments, matching CPython 3.9+. `lcm()` returns 1, /// `lcm(n)` returns `abs(n)`, and for multiple args reduces pairwise. /// The result is always non-negative. Returns 0 if any argument is 0. fn math_lcm(heap: &mut Heap, args: ArgValues) -> RunResult { let positional = args.into_pos_only("math.lcm", heap)?; defer_drop_mut!(positional, heap); let mut result: u64 = 1; for arg in positional.by_ref() { defer_drop!(arg, heap); let n = value_to_int(arg, heap)?; let abs_n = n.unsigned_abs(); if abs_n == 0 { return Ok(Value::Int(0)); } let g = gcd(result, abs_n); // lcm(a, b) = |a| / gcd(a,b) * |b| — dividing first avoids intermediate overflow result = (result / g) .checked_mul(abs_n) .ok_or_else(|| SimpleException::new_msg(ExcType::OverflowError, "integer overflow in lcm"))?; } u64_to_value(result, heap) } /// `math.comb(n, k)` — returns the number of ways to choose k items from n. /// /// Both arguments must be non-negative integers. fn math_comb(heap: &mut Heap, args: ArgValues) -> RunResult { let (n_val, k_val) = args.get_two_args("math.comb", heap)?; defer_drop!(n_val, heap); defer_drop!(k_val, heap); let n = value_to_int(n_val, heap)?; let k = value_to_int(k_val, heap)?; if n < 0 { return Err(SimpleException::new_msg(ExcType::ValueError, "n must be a non-negative integer").into()); } if k < 0 { return Err(SimpleException::new_msg(ExcType::ValueError, "k must be a non-negative integer").into()); } if k > n { return Ok(Value::Int(0)); } // Use the smaller of k and n-k for efficiency: C(n, k) = C(n, n-k) let k = k.min(n - k); let mut result: i64 = 1; for i in 0..k { // Use GCD reduction to keep intermediates small: // result = result * (n - i) / (i + 1) // By dividing both numerator and denominator by their GCD first, // we reduce the chance of overflow in the multiplication step. let mut numerator = n - i; let mut denominator = i + 1; #[expect(clippy::cast_sign_loss, reason = "both values are known non-negative at this point")] let g = gcd(numerator as u64, denominator as u64).cast_signed(); numerator /= g; denominator /= g; // Also reduce against the running result #[expect(clippy::cast_sign_loss, reason = "result and denominator are known non-negative")] let g2 = gcd(result as u64, denominator as u64).cast_signed(); result /= g2; denominator /= g2; debug_assert!(denominator == 1, "denominator should be 1 after GCD reduction in comb"); match result.checked_mul(numerator) { Some(v) => result = v, None => { return Err(SimpleException::new_msg(ExcType::OverflowError, "integer overflow in comb").into()); } } } Ok(Value::Int(result)) } /// `math.perm(n, k=None)` — returns the number of k-length permutations from n items. /// /// Both arguments must be non-negative integers. When `k` is omitted, defaults to `n` /// (i.e., `perm(n)` returns `n!`), matching CPython behavior. fn math_perm(heap: &mut Heap, args: ArgValues) -> RunResult { let (n_val, k_val) = args.get_one_two_args("math.perm", heap)?; defer_drop!(n_val, heap); let n = value_to_int(n_val, heap)?; let k_explicit = k_val.is_some(); let k = match k_val { Some(kv) => { defer_drop!(kv, heap); value_to_int(kv, heap)? } None => n, }; if n < 0 { // When called as perm(n) without k, CPython uses the factorial error message let msg = if k_explicit { "n must be a non-negative integer" } else { "factorial() not defined for negative values" }; return Err(SimpleException::new_msg(ExcType::ValueError, msg).into()); } if k < 0 { return Err(SimpleException::new_msg(ExcType::ValueError, "k must be a non-negative integer").into()); } if k > n { return Ok(Value::Int(0)); } let mut result: i64 = 1; for i in 0..k { match result.checked_mul(n - i) { Some(v) => result = v, None => { return Err(SimpleException::new_msg(ExcType::OverflowError, "integer overflow in perm").into()); } } } Ok(Value::Int(result)) } // ========================== // Modular / decomposition // ========================== /// `math.fmod(x, y)` — returns x modulo y as a float. /// /// Unlike `x % y`, the result has the same sign as x. Raises `ValueError` /// when y is zero (CPython: "math domain error"). fn math_fmod(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, y_val) = args.get_two_args("math.fmod", heap)?; defer_drop!(x_val, heap); defer_drop!(y_val, heap); let x = value_to_float(x_val, heap)?; let y = value_to_float(y_val, heap)?; if y == 0.0 || x.is_infinite() { // CPython raises for both fmod(x, 0) and fmod(inf, y) // but NaN inputs propagate if !x.is_nan() && !y.is_nan() { return Err(math_domain_error()); } } Ok(Value::Float(x % y)) } /// `math.remainder(x, y)` — IEEE 754 remainder of x with respect to y. /// /// The result is `x - n*y` where n is the closest integer to `x/y`. fn math_remainder(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, y_val) = args.get_two_args("math.remainder", heap)?; defer_drop!(x_val, heap); defer_drop!(y_val, heap); let x = value_to_float(x_val, heap)?; let y = value_to_float(y_val, heap)?; // NaN propagates if x.is_nan() || y.is_nan() { return Ok(Value::Float(f64::NAN)); } if y == 0.0 { return Err(math_domain_error()); } if x.is_infinite() { return Err(math_domain_error()); } if y.is_infinite() { return Ok(Value::Float(x)); } Ok(Value::Float(libm::remainder(x, y))) } /// `math.modf(x)` — returns the fractional and integer parts of x as a tuple. /// /// Both values carry the sign of x. Returns `(fractional, integer)`. fn math_modf(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.modf", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let (fractional, integer) = libm::modf(f); let tuple = allocate_tuple(smallvec![Value::Float(fractional), Value::Float(integer)], heap)?; Ok(tuple) } /// `math.frexp(x)` — returns (mantissa, exponent) such that `x == mantissa * 2**exponent`. /// /// The mantissa is always in the range [0.5, 1.0) or zero. /// Returns a tuple `(float, int)`. fn math_frexp(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.frexp", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; let (m, exp) = libm::frexp(f); let tuple = allocate_tuple(smallvec![Value::Float(m), Value::Int(i64::from(exp))], heap)?; Ok(tuple) } /// `math.ldexp(x, i)` — returns `x * 2**i`, the inverse of `frexp`. /// /// Clamps the exponent to `i32` range before calling `libm::ldexp`, which is safe /// because IEEE 754 double exponents only span -1074 to +1023 — any `i64` outside /// `i32` range would trivially overflow or underflow anyway. fn math_ldexp(heap: &mut Heap, args: ArgValues) -> RunResult { let (x_val, i_val) = args.get_two_args("math.ldexp", heap)?; defer_drop!(x_val, heap); defer_drop!(i_val, heap); let x = value_to_float(x_val, heap)?; let i = value_to_int(i_val, heap)?; // Special cases: inf/nan/zero pass through regardless of exponent if x.is_nan() || x.is_infinite() || x == 0.0 { return Ok(Value::Float(x)); } // Clamp i64 to i32 range — exponents beyond ±2 billion trivially overflow/underflow #[expect(clippy::cast_possible_truncation, reason = "clamped to i32 range first")] let exp = i.clamp(i64::from(i32::MIN), i64::from(i32::MAX)) as i32; let result = libm::ldexp(x, exp); // If the result overflowed to infinity, CPython raises OverflowError if result.is_infinite() { return Err(math_range_error()); } Ok(Value::Float(result)) } // ========================== // Special functions // ========================== /// `math.gamma(x)` — returns the Gamma function at x. /// /// CPython 3.14 raises ValueError for non-positive integers: /// "expected a noninteger or positive integer, got ". fn math_gamma(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.gamma", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; // CPython also rejects -inf for gamma (but not lgamma, where lgamma(-inf) = inf) if f == f64::NEG_INFINITY { return Err(SimpleException::new_msg( ExcType::ValueError, format!("expected a noninteger or positive integer, got {f:?}"), ) .into()); } check_gamma_pole(f)?; let result = libm::tgamma(f); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.lgamma(x)` — returns the natural log of the absolute value of Gamma(x). fn math_lgamma(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.lgamma", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; check_gamma_pole(f)?; let result = libm::lgamma(f); check_range_error(result, f)?; Ok(Value::Float(result)) } /// `math.erf(x)` — returns the error function at x. fn math_erf(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.erf", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(libm::erf(f))) } /// `math.erfc(x)` — returns the complementary error function at x (1 - erf(x)). /// /// More accurate than `1 - erf(x)` for large x. fn math_erfc(heap: &mut Heap, args: ArgValues) -> RunResult { let value = args.get_one_arg("math.erfc", heap)?; defer_drop!(value, heap); let f = value_to_float(value, heap)?; Ok(Value::Float(libm::erfc(f))) } // ========================== // Helper functions // ========================== /// Converts a rounded float to an integer `Value`, checking for infinity/NaN. /// /// `rounded` is the already-rounded float value (e.g., from `floor()`, `ceil()`, `trunc()`). /// `original` is the original input float, used only to determine the error type: /// infinity produces `OverflowError`, NaN produces `ValueError`. /// /// For finite values outside the i64 range, promotes to `LongInt` to match CPython's /// behavior of returning arbitrary-precision integers from `math.floor`/`ceil`/`trunc`. fn float_to_int_checked(rounded: f64, original: f64, heap: &mut Heap) -> RunResult { if original.is_infinite() { Err(SimpleException::new_msg(ExcType::OverflowError, "cannot convert float infinity to integer").into()) } else if original.is_nan() { Err(SimpleException::new_msg(ExcType::ValueError, "cannot convert float NaN to integer").into()) } else if rounded >= i64::MIN as f64 && rounded < i64::MAX as f64 { // Note: `i64::MAX as f64` rounds up to 2^63 (9223372036854775808.0), so we use // strict less-than to exclude that value. `i64::MIN as f64` is exact (-2^63). #[expect( clippy::cast_possible_truncation, reason = "intentional: value is within i64 range after bounds check" )] let result = rounded as i64; Ok(Value::Int(result)) } else { // Value exceeds i64 range — promote to LongInt. // Format with no decimal places and parse as BigInt. This is correct because // `rounded` is already an integer-valued float from floor/ceil/trunc. let s = format!("{rounded:.0}"); let bi = s .parse::() .map_err(|_| SimpleException::new_msg(ExcType::ValueError, "float too large to convert to integer"))?; Ok(LongInt::new(bi).into_value(heap)?) } } /// Converts a `Value` to `f64`, raising `TypeError` if the value is not numeric. /// /// Accepts `Float`, `Int`, and `Bool` values. For other types, raises a `TypeError` /// with a message matching CPython's format: "must be real number, not ". #[expect( clippy::cast_precision_loss, reason = "i64-to-f64 can lose precision for large integers (beyond 2^53), but this matches CPython's conversion semantics" )] fn value_to_float(value: &Value, heap: &Heap) -> RunResult { match value { Value::Float(f) => Ok(*f), Value::Int(n) => Ok(*n as f64), Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }), _ => Err(ExcType::type_error(format!( "must be real number, not {}", value.py_type(heap) ))), } } /// Converts a `Value` to `i64`, raising `TypeError` if the value is not an integer. /// /// Accepts `Int` and `Bool` values. For other types, raises a `TypeError` /// with a message matching CPython's format. fn value_to_int(value: &Value, heap: &Heap) -> RunResult { match value { Value::Int(n) => Ok(*n), Value::Bool(b) => Ok(i64::from(*b)), _ => Err(ExcType::type_error(format!( "'{}' object cannot be interpreted as an integer", value.py_type(heap) ))), } } /// Requires that a float is finite, raising ValueError if it's inf or nan. /// /// CPython 3.14 uses "expected a finite input, got inf" for trig functions. fn require_finite(f: f64) -> RunResult<()> { if f.is_infinite() { Err(SimpleException::new_msg(ExcType::ValueError, format!("expected a finite input, got {f:?}")).into()) } else { Ok(()) } } /// Euclidean GCD algorithm for unsigned 64-bit integers. fn gcd(mut a: u64, mut b: u64) -> u64 { while b != 0 { let t = b; b = a % b; a = t; } a } /// Converts a `u64` result to a `Value`, promoting to `LongInt` if it exceeds `i64::MAX`. /// /// This is needed for operations like `gcd(i64::MIN, 0)` where the unsigned result /// (`2^63`) doesn't fit in a signed `i64`. fn u64_to_value(n: u64, heap: &mut Heap) -> RunResult { if let Ok(signed) = i64::try_from(n) { Ok(Value::Int(signed)) } else { Ok(LongInt::new(BigInt::from(n)).into_value(heap)?) } } ================================================ FILE: crates/monty/src/modules/mod.rs ================================================ //! Built-in module implementations. //! //! This module provides implementations for Python built-in modules like `sys`, `typing`, //! and `asyncio`. These are created on-demand when import statements are executed. use std::fmt::{self, Write}; use strum::FromRepr; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, exception_private::RunResult, heap::HeapId, intern::{StaticStrings, StringId}, resource::{ResourceError, ResourceTracker}, }; pub(crate) mod asyncio; pub(crate) mod math; pub(crate) mod os; pub(crate) mod pathlib; pub(crate) mod re; pub(crate) mod sys; pub(crate) mod typing; /// Built-in modules that can be imported. #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] pub(crate) enum BuiltinModule { /// The `sys` module providing system-specific parameters and functions. Sys, /// The `typing` module providing type hints support. Typing, /// The `asyncio` module providing async/await support (only `gather()` implemented). Asyncio, /// The `pathlib` module providing object-oriented filesystem paths. Pathlib, /// The `os` module providing operating system interface (only `getenv()` implemented). Os, /// The `math` module providing mathematical functions and constants. Math, /// The `re` module providing regular expression matching. Re, } impl BuiltinModule { /// Get the module from a string ID. pub fn from_string_id(string_id: StringId) -> Option { match StaticStrings::from_string_id(string_id)? { StaticStrings::Sys => Some(Self::Sys), StaticStrings::Typing => Some(Self::Typing), StaticStrings::Asyncio => Some(Self::Asyncio), StaticStrings::Pathlib => Some(Self::Pathlib), StaticStrings::Os => Some(Self::Os), StaticStrings::Math => Some(Self::Math), StaticStrings::Re => Some(Self::Re), _ => None, } } /// Creates a new instance of this module on the heap. /// /// Returns a HeapId pointing to the newly allocated module. /// /// # Panics /// /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { match self { Self::Sys => sys::create_module(vm), Self::Typing => typing::create_module(vm), Self::Asyncio => asyncio::create_module(vm), Self::Pathlib => pathlib::create_module(vm), Self::Os => os::create_module(vm), Self::Math => math::create_module(vm), Self::Re => re::create_module(vm), } } } /// All stdlib module function (but not builtins). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) enum ModuleFunctions { Asyncio(asyncio::AsyncioFunctions), Math(math::MathFunctions), Os(os::OsFunctions), Re(re::ReFunctions), } impl fmt::Display for ModuleFunctions { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Asyncio(func) => write!(f, "{func}"), Self::Math(func) => write!(f, "{func}"), Self::Os(func) => write!(f, "{func}"), Self::Re(func) => write!(f, "{func}"), } } } impl ModuleFunctions { /// Calls the module function with the given arguments. /// /// Returns `CallResult` to support both immediate values and OS calls that /// require host involvement (e.g., `os.getenv()` needs the host to provide environment variables). pub fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { Self::Asyncio(functions) => asyncio::call(vm.heap, functions, args), Self::Math(functions) => math::call(vm, functions, args).map(CallResult::Value), Self::Os(functions) => os::call(vm.heap, functions, args), Self::Re(functions) => re::call(vm, functions, args), } } /// Writes the Python repr() string for this function to a formatter. pub fn py_repr_fmt(self, f: &mut W, py_id: usize) -> std::fmt::Result { write!(f, "") } } ================================================ FILE: crates/monty/src/modules/os.rs ================================================ //! Implementation of the `os` module. //! //! Provides a minimal implementation of Python's `os` module with: //! - `getenv(key, default=None)`: Get a single environment variable //! - `environ`: Property that returns the entire environment as a dict //! //! Other os functions are not implemented. OS operations require host involvement //! via the `OsFunction` callback mechanism - Monty yields control to the host //! which executes the operation and returns the result. use crate::{ args::ArgValues, bytecode::{CallResult, VM}, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData, HeapId}, intern::StaticStrings, modules::ModuleFunctions, os::OsFunction, resource::{ResourceError, ResourceTracker}, types::{Module, Property, PyTrait}, value::Value, }; /// OS module functions. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::Display, serde::Serialize, serde::Deserialize)] #[strum(serialize_all = "lowercase")] pub(crate) enum OsFunctions { Getenv, } /// Creates the `os` module and allocates it on the heap. /// /// The module provides: /// - `getenv(key, default=None)`: Get a single environment variable /// - `environ`: Property that returns the entire environment as a dict /// /// Both operations yield to the host via `OsFunction` callbacks. /// /// # Returns /// A HeapId pointing to the newly allocated module. /// /// # Panics /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Os); // os.getenv - function to get a single environment variable module.set_attr( StaticStrings::Getenv, Value::ModuleFunction(ModuleFunctions::Os(OsFunctions::Getenv)), vm, ); // os.environ - property that returns the entire environment as a dict module.set_attr( StaticStrings::Environ, Value::Property(Property::Os(OsFunction::GetEnviron)), vm, ); vm.heap.allocate(HeapData::Module(module)) } /// Dispatches a call to an os module function. /// /// Returns `CallResult::OsCall` for functions that need host involvement, /// or `CallResult::Value` for functions that can be computed immediately. pub(super) fn call( heap: &mut Heap, functions: OsFunctions, args: ArgValues, ) -> RunResult { match functions { OsFunctions::Getenv => getenv(heap, args), } } /// Implementation of `os.getenv(key, default=None)`. /// /// Returns the value of the environment variable `key` if it exists, or `default` if it doesn't. /// This function yields to the host to perform the actual environment lookup. /// /// # Arguments /// * `heap` - The heap for any allocations /// * `args` - Function arguments: `key` (required string), `default` (optional, defaults to None) /// /// # Returns /// `CallResult::OsCall` with `OsFunction::Getenv` - the host should look up the /// environment variable and return the value, or the default if not found. /// /// # Errors /// Returns `TypeError` if: /// - No arguments are provided /// - More than 2 arguments are provided /// - `key` is not a string fn getenv(heap: &mut Heap, args: ArgValues) -> RunResult { // getenv(key, default=None) - accepts 1 or 2 positional arguments let (key, default) = args.get_one_two_args("os.getenv", heap)?; // Validate key is a string if key.is_str(heap) { // Build args to pass to host: (key, default) // The default is Value::None if not provided let final_default = default.unwrap_or(Value::None); let args = ArgValues::Two(key, final_default); Ok(CallResult::OsCall(OsFunction::Getenv, args)) } else { let type_name = key.py_type(heap); key.drop_with_heap(heap); if let Some(d) = default { d.drop_with_heap(heap); } Err(ExcType::type_error(format!("str expected, not {type_name}"))) } } ================================================ FILE: crates/monty/src/modules/pathlib.rs ================================================ //! Implementation of the `pathlib` module. //! //! Provides a minimal implementation of Python's `pathlib` module with: //! - `Path`: A class for filesystem path operations //! //! The `Path` class supports both pure methods (no I/O, handled directly) and //! filesystem methods (require I/O, yield external function calls for host resolution). use crate::{ builtins::Builtins, bytecode::VM, heap::{HeapData, HeapId}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::{Module, Type}, value::Value, }; /// Creates the `pathlib` module and allocates it on the heap. /// /// Returns a HeapId pointing to the newly allocated module. /// /// # Panics /// /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Pathlib); // pathlib.Path - the Path class (callable to create Path instances) module.set_attr(StaticStrings::PathClass, Value::Builtin(Builtins::Type(Type::Path)), vm); vm.heap.allocate(HeapData::Module(module)) } ================================================ FILE: crates/monty/src/modules/re.rs ================================================ //! Implementation of the `re` module. //! //! Provides regular expression matching operations. //! Uses the Rust `fancy-regex` crate. //! //! # Supported module-level functions //! //! - `re.compile(pattern, flags=0)` → `re.Pattern` //! - `re.search(pattern, string, flags=0)` → `re.Match` or `None` //! - `re.match(pattern, string, flags=0)` → `re.Match` or `None` //! - `re.fullmatch(pattern, string, flags=0)` → `re.Match` or `None` //! - `re.findall(pattern, string, flags=0)` → `list` //! - `re.sub(pattern, repl, string, count=0, flags=0)` → `str` //! - `re.split(pattern, string, maxsplit=0, flags=0)` → `list` //! - `re.finditer(pattern, string, flags=0)` → iterator of `re.Match` //! - `re.escape(pattern)` → `str` //! //! # Module attributes //! //! - `re.NOFLAG` - no flag (value: 0) //! - `re.IGNORECASE` / `re.I` — case-insensitive matching (value: 2) //! - `re.MULTILINE` / `re.M` — `^`/`$` match at line boundaries (value: 8) //! - `re.DOTALL` / `re.S` — `.` matches newlines (value: 16) //! - `re.ASCII` / `re.A` — ASCII-only matching for `\w`, `\d`, `\s` (value: 256) //! - `re.PatternError` / `re.error` — exception type for invalid patterns use std::borrow::Cow; use crate::{ args::ArgValues, builtins::Builtins, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapId}, intern::StaticStrings, modules::ModuleFunctions, resource::{ResourceError, ResourceTracker}, types::{Module, PyTrait, RePattern, Str, Type, re_pattern::value_to_str}, value::Value, }; /// Python regex flag: no flag being applied. pub(crate) const NOFLAG: u16 = 0; /// Python regex flag: case-insensitive matching. pub(crate) const IGNORECASE: u16 = 2; /// Python regex flag: `^` and `$` match at line boundaries. pub(crate) const MULTILINE: u16 = 8; /// Python regex flag: `.` matches newlines. pub(crate) const DOTALL: u16 = 16; /// Python regex flag: ASCII-only matching for `\w`, `\b`, `\d`, `\s`. pub(crate) const ASCII: u16 = 256; /// Functions exposed by the `re` module. /// /// Each variant corresponds to a module-level function that can be called directly /// (e.g., `re.search(pattern, string)`). These are convenience wrappers that compile /// the pattern on each call — for repeated use, `re.compile()` avoids recompilation. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::Display, serde::Serialize, serde::Deserialize)] #[strum(serialize_all = "lowercase")] pub(crate) enum ReFunctions { /// `re.compile(pattern, flags=0)` — compile a pattern into a `re.Pattern` object. Compile, /// `re.search(pattern, string, flags=0)` — find first match anywhere in the string. Search, /// `re.match(pattern, string, flags=0)` — match anchored at the start. Match, /// `re.fullmatch(pattern, string, flags=0)` — match the entire string. Fullmatch, /// `re.findall(pattern, string, flags=0)` — return all non-overlapping matches. Findall, /// `re.sub(pattern, repl, string, count=0, flags=0)` — substitute matches. Sub, /// `re.split(pattern, string, maxsplit=0, flags=0)` — split string by pattern. Split, /// `re.finditer(pattern, string, flags=0)` — return iterator over all matches. Finditer, /// `re.escape(pattern)` — escape all non-alphanumeric characters in pattern. Escape, } /// Creates the `re` module and allocates it on the heap. /// /// The module provides regex functions (`compile`, `search`, `match`, `fullmatch`, /// `findall`, `sub`) and flag constants (`IGNORECASE`, `MULTILINE`, `DOTALL`). /// /// # Returns /// A `HeapId` pointing to the newly allocated module. /// /// # Panics /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Re); // Functions module.set_attr( StaticStrings::Compile, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Compile)), vm, ); module.set_attr( StaticStrings::Search, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Search)), vm, ); module.set_attr( StaticStrings::Match, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Match)), vm, ); module.set_attr( StaticStrings::Fullmatch, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Fullmatch)), vm, ); module.set_attr( StaticStrings::Findall, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Findall)), vm, ); module.set_attr( StaticStrings::Sub, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Sub)), vm, ); module.set_attr( StaticStrings::Split, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Split)), vm, ); module.set_attr( StaticStrings::Finditer, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Finditer)), vm, ); module.set_attr( StaticStrings::Escape, Value::ModuleFunction(ModuleFunctions::Re(ReFunctions::Escape)), vm, ); // Flag constants module.set_attr(StaticStrings::NoFlag, Value::Int(i64::from(NOFLAG)), vm); module.set_attr(StaticStrings::Ignorecase, Value::Int(i64::from(IGNORECASE)), vm); module.set_attr(StaticStrings::I, Value::Int(i64::from(IGNORECASE)), vm); module.set_attr(StaticStrings::MultilineFlag, Value::Int(i64::from(MULTILINE)), vm); module.set_attr(StaticStrings::M, Value::Int(i64::from(MULTILINE)), vm); module.set_attr(StaticStrings::DotallFlag, Value::Int(i64::from(DOTALL)), vm); module.set_attr(StaticStrings::S, Value::Int(i64::from(DOTALL)), vm); module.set_attr(StaticStrings::AsciiFlag, Value::Int(i64::from(ASCII)), vm); module.set_attr(StaticStrings::A, Value::Int(i64::from(ASCII)), vm); // Exception types module.set_attr( StaticStrings::PatternError, Value::Builtin(Builtins::ExcType(ExcType::RePatternError)), vm, ); // `re.error` is the historical alias for `re.PatternError` (still widely used) module.set_attr( StaticStrings::Error, Value::Builtin(Builtins::ExcType(ExcType::RePatternError)), vm, ); // Constructed types module.set_attr( StaticStrings::PatternClass, Value::Builtin(Builtins::Type(Type::RePattern)), vm, ); module.set_attr( StaticStrings::MatchClass, Value::Builtin(Builtins::Type(Type::ReMatch)), vm, ); vm.heap.allocate(HeapData::Module(module)) } /// Dispatches a call to a `re` module function. /// /// Extracts arguments, compiles patterns as needed, and delegates to the appropriate /// `RePattern` method. All functions return `CallResult::Value` since regex /// operations don't need host involvement. pub(super) fn call( vm: &mut VM<'_, '_, impl ResourceTracker>, function: ReFunctions, args: ArgValues, ) -> RunResult { match function { ReFunctions::Compile => call_compile(vm, args).map(CallResult::Value), ReFunctions::Search => call_search(vm, args).map(CallResult::Value), ReFunctions::Match => call_match(vm, args).map(CallResult::Value), ReFunctions::Fullmatch => call_fullmatch(vm, args).map(CallResult::Value), ReFunctions::Findall => call_findall(vm, args).map(CallResult::Value), ReFunctions::Sub => call_sub(vm, args).map(CallResult::Value), ReFunctions::Split => call_split(vm, args).map(CallResult::Value), ReFunctions::Finditer => call_finditer(vm, args).map(CallResult::Value), ReFunctions::Escape => call_escape(vm, args).map(CallResult::Value), } } /// `re.compile(pattern, flags=0)` — compile a regular expression pattern. /// /// Returns a `re.Pattern` object that can be reused for multiple match operations. /// The pattern is compiled once and stored, avoiding recompilation overhead. fn call_compile(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern_val, flags) = extract_pattern_and_flags(args, "re.compile", vm)?; let compiled = RePattern::compile(pattern_val, flags)?; Ok(Value::Ref(vm.heap.allocate(HeapData::RePattern(Box::new(compiled)))?)) } /// `re.search(pattern, string, flags=0)` — scan through string looking for a match. /// /// Compiles the pattern, then delegates to `RePattern::search`. Returns a `re.Match` /// object on success, or `None` if no position in the string matches. fn call_search(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern, text, flags) = extract_pattern_string_flags(args, "re.search", vm)?; let compiled = RePattern::compile(pattern, flags)?; compiled.search(&text, vm.heap) } /// `re.match(pattern, string, flags=0)` — match at the beginning of the string. /// /// Compiles the pattern, then delegates to `RePattern::match_start`. Returns a `re.Match` /// object if the pattern matches at position 0, or `None` otherwise. fn call_match(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern, text, flags) = extract_pattern_string_flags(args, "re.match", vm)?; let compiled = RePattern::compile(pattern, flags)?; compiled.match_start(&text, vm.heap) } /// `re.fullmatch(pattern, string, flags=0)` — match the entire string. /// /// Compiles the pattern, then delegates to `RePattern::fullmatch`. Returns a `re.Match` /// object if the pattern matches the whole string, or `None` otherwise. fn call_fullmatch(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern, text, flags) = extract_pattern_string_flags(args, "re.fullmatch", vm)?; let compiled = RePattern::compile(pattern, flags)?; compiled.fullmatch(&text, vm.heap) } /// `re.findall(pattern, string, flags=0)` — find all non-overlapping matches. /// /// Compiles the pattern, then delegates to `RePattern::findall`. Returns a list of /// strings or tuples depending on the number of capture groups (matching CPython semantics). fn call_findall(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern, text, flags) = extract_pattern_string_flags(args, "re.findall", vm)?; let compiled = RePattern::compile(pattern, flags)?; compiled.findall(&text, vm.heap) } /// `re.sub(pattern, repl, string, count=0, flags=0)` — substitute matches with a replacement. /// /// Compiles the pattern, then delegates to `RePattern::sub`. Replaces occurrences of the /// pattern with the replacement string. When `count` is 0, all matches are replaced. /// Supports both positional and keyword arguments for `count` and `flags`. fn call_sub(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pos, kwargs) = args.into_parts(); defer_drop_mut!(pos, vm); let kwargs = kwargs.into_iter(); defer_drop_mut!(kwargs, vm); let Some(pattern_val) = pos.next() else { return Err(ExcType::type_error("re.sub() missing required argument: 'pattern'")); }; defer_drop!(pattern_val, vm); let Some(repl_val) = pos.next() else { return Err(ExcType::type_error("re.sub() missing required argument: 'repl'")); }; defer_drop!(repl_val, vm); let Some(string_val) = pos.next() else { return Err(ExcType::type_error("re.sub() missing required argument: 'string'")); }; defer_drop!(string_val, vm); // Extract count and flags from remaining positional args let pos_count = pos.next(); let pos_flags = pos.next(); if let Some(extra) = pos.next() { extra.drop_with_heap(vm); return Err(ExcType::type_error("re.sub() takes at most 5 positional arguments")); } // Extract count and flags from kwargs (if not given positionally) let (mut kw_count, mut kw_flags): (Option, Option) = (None, None); for (key, value) in kwargs { defer_drop!(key, vm); let Some(keyword_name) = key.as_either_str(vm.heap) else { value.drop_with_heap(vm); return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(vm.interns); match key_str { "count" => { if pos_count.is_some() { value.drop_with_heap(vm); return Err(ExcType::type_error("re.sub() got multiple values for argument 'count'")); } kw_count.replace(value).drop_with_heap(vm); } "flags" => { if pos_flags.is_some() { value.drop_with_heap(vm); return Err(ExcType::type_error("re.sub() got multiple values for argument 'flags'")); } kw_flags.replace(value).drop_with_heap(vm); } _ => { value.drop_with_heap(vm); return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for re.sub()" ))); } } } let count_val = pos_count.or(kw_count); let flags_val = pos_flags.or(kw_flags); #[expect( clippy::cast_sign_loss, clippy::cast_possible_truncation, reason = "n is checked non-negative above" )] let count = match count_val { Some(Value::Int(n)) if n >= 0 => n as usize, Some(Value::Bool(b)) => usize::from(b), Some(Value::Int(_)) => { // Negative count — return original string unchanged let _flags = extract_flags(flags_val, vm.heap)?; let text = value_to_str(string_val, vm.heap, vm.interns)?.into_owned(); let s = Str::new(text); return Ok(Value::Ref(vm.heap.allocate(HeapData::Str(s))?)); } Some(other) => { let t = other.py_type(vm.heap); other.drop_with_heap(vm); return Err(ExcType::type_error(format!( "'{t}' object cannot be interpreted as an integer for 'count' argument" ))); } None => 0, }; let flags = extract_flags(flags_val, vm.heap)?; let pattern = value_to_str(pattern_val, vm.heap, vm.interns)?.into_owned(); // Check that repl is a string — callable replacement is not supported if !repl_val.is_str(vm.heap) { return Err(ExcType::type_error( "callable replacement is not yet supported in re.sub()", )); } let repl = value_to_str(repl_val, vm.heap, vm.interns)?.into_owned(); let text = value_to_str(string_val, vm.heap, vm.interns)?.into_owned(); let compiled = RePattern::compile(pattern, flags)?; compiled.sub(&repl, &text, count, vm.heap) } /// `re.split(pattern, string, maxsplit=0, flags=0)` — split string by pattern occurrences. /// /// Returns a list of strings. If `maxsplit` is non-zero, at most `maxsplit` splits occur /// and the remainder of the string is returned as the final list element. /// Supports both positional and keyword arguments for `maxsplit` and `flags`. fn call_split(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pos, kwargs) = args.into_parts(); defer_drop_mut!(pos, vm); let kwargs = kwargs.into_iter(); defer_drop_mut!(kwargs, vm); let Some(pattern_val) = pos.next() else { return Err(ExcType::type_error("re.split() missing required argument: 'pattern'")); }; defer_drop!(pattern_val, vm); let Some(string_val) = pos.next() else { return Err(ExcType::type_error("re.split() missing required argument: 'string'")); }; defer_drop!(string_val, vm); let pos_maxsplit = pos.next(); let pos_flags = pos.next(); if let Some(extra) = pos.next() { extra.drop_with_heap(vm); return Err(ExcType::type_error("re.split() takes at most 4 positional arguments")); } let (mut kw_maxsplit, mut kw_flags): (Option, Option) = (None, None); for (key, value) in kwargs { defer_drop!(key, vm); let Some(keyword_name) = key.as_either_str(vm.heap) else { value.drop_with_heap(vm); return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(vm.interns); match key_str { "maxsplit" => { if pos_maxsplit.is_some() { value.drop_with_heap(vm); return Err(ExcType::type_error( "re.split() got multiple values for argument 'maxsplit'", )); } kw_maxsplit.replace(value).drop_with_heap(vm); } "flags" => { if pos_flags.is_some() { value.drop_with_heap(vm); return Err(ExcType::type_error( "re.split() got multiple values for argument 'flags'", )); } kw_flags.replace(value).drop_with_heap(vm); } _ => { value.drop_with_heap(vm); return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for re.split()" ))); } } } let maxsplit = extract_maxsplit(pos_maxsplit.or(kw_maxsplit), vm.heap)?; let flags = extract_flags(pos_flags.or(kw_flags), vm.heap)?; let pattern = value_to_str(pattern_val, vm.heap, vm.interns)?.into_owned(); let text = value_to_str(string_val, vm.heap, vm.interns)?.into_owned(); let compiled = RePattern::compile(pattern, flags)?; compiled.split(&text, maxsplit, vm.heap) } /// `re.finditer(pattern, string, flags=0)` — return all matches as a list. /// /// Eagerly collects all match objects into a list. When the user iterates with /// `for m in re.finditer(...)`, the VM's `GetIter` opcode handles iteration /// over the returned list automatically. fn call_finditer(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (pattern, text, flags) = extract_pattern_string_flags(args, "re.finditer", vm)?; let compiled = RePattern::compile(pattern, flags)?; compiled.finditer(&text, vm.heap) } /// `re.escape(pattern)` — escape special regex characters in a string. /// /// Returns a string with all regex metacharacters and whitespace prefixed with /// a backslash. Only characters that have special meaning in regex patterns are /// escaped, matching CPython 3.7+ behavior. /// /// Escaped characters: `\t \n \v \f \r # $ & ( ) * + - . ? [ \ ] ^ { | } ~` fn call_escape(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let arg = args.get_one_arg("re.escape", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); let mut result = String::with_capacity(text.len() * 2); for c in text.chars() { if should_escape(c) { result.push('\\'); } result.push(c); } let s = Str::new(result); Ok(Value::Ref(vm.heap.allocate(HeapData::Str(s))?)) } /// Returns whether a character should be escaped by `re.escape()`. /// /// Matches CPython's `_special_chars_map` — only regex metacharacters and whitespace. fn should_escape(c: char) -> bool { matches!( c, '\t' | '\n' | '\x0b' | '\x0c' | '\r' | ' ' | '#' | '$' | '&' | '(' | ')' | '*' | '+' | '-' | '.' | '?' | '[' | '\\' | ']' | '^' | '{' | '|' | '}' | '~' ) } /// Extracts a `maxsplit` value from an optional `Value`. /// /// Returns 0 if not provided. Negative values are treated as 0 (split all). fn extract_maxsplit(val: Option, heap: &mut Heap) -> RunResult { match val { None => Ok(0), Some(Value::Int(n)) if n <= 0 => Ok(0), #[expect( clippy::cast_sign_loss, clippy::cast_possible_truncation, reason = "n is checked positive above" )] Some(Value::Int(n)) => Ok(n as usize), Some(Value::Bool(b)) => Ok(usize::from(b)), Some(other) => { let t = other.py_type(heap); other.drop_with_heap(heap); Err(ExcType::type_error(format!("expected int for maxsplit, not {t}"))) } } } /// Extracts pattern string and optional flags from arguments for `re.compile()`. /// /// Accepts 1 or 2 positional arguments: `(pattern)` or `(pattern, flags)`. /// The pattern must be a string, and flags must be a non-negative integer. fn extract_pattern_and_flags( args: ArgValues, func_name: &str, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(String, u16)> { let (pattern_val, flags_val) = args.get_one_two_args(func_name, vm.heap)?; defer_drop!(pattern_val, vm); let pattern = value_to_str(pattern_val, vm.heap, vm.interns)?.into_owned(); let flags = extract_flags(flags_val, vm.heap)?; Ok((pattern, flags)) } /// Extracts a flags value from an optional `Value`, validating it is a non-negative integer /// that fits in a `u16`. fn extract_flags(flags_val: Option, heap: &mut Heap) -> RunResult { match flags_val { Some(Value::Int(n)) => { u16::try_from(n).map_err(|_| ExcType::type_error("flags must be a non-negative integer")) } // CPython treats bool as int subclass: True=1, False=0. Some(Value::Bool(b)) => Ok(u16::from(b)), Some(other) => { let t = other.py_type(heap); other.drop_with_heap(heap); Err(ExcType::type_error(format!("expected int for flags, not {t}"))) } None => Ok(0), } } /// Extracts pattern, string, and optional flags for `re.search()`, `re.match()`, /// `re.fullmatch()`, and `re.findall()`. /// /// Accepts 2 or 3 positional arguments: `(pattern, string)` or `(pattern, string, flags)`. fn extract_pattern_string_flags( args: ArgValues, func_name: &str, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(String, Cow<'static, str>, u16)> { let pos = args.into_pos_only(func_name, vm.heap)?; defer_drop_mut!(pos, vm); let Some(pattern_val) = pos.next() else { return Err(ExcType::type_error(format!( "{func_name}() missing required argument: 'pattern'" ))); }; defer_drop!(pattern_val, vm); let Some(string_val) = pos.next() else { return Err(ExcType::type_error(format!( "{func_name}() missing required argument: 'string'" ))); }; defer_drop!(string_val, vm); let flags = extract_flags(pos.next(), vm.heap)?; if let Some(extra) = pos.next() { extra.drop_with_heap(vm); return Err(ExcType::type_error(format!( "{func_name}() takes at most 3 positional arguments" ))); } let pattern = value_to_str(pattern_val, vm.heap, vm.interns)?.into_owned(); let text = value_to_str(string_val, vm.heap, vm.interns)?.into_owned(); Ok((pattern, Cow::Owned(text), flags)) } ================================================ FILE: crates/monty/src/modules/sys.rs ================================================ //! Implementation of the `sys` module. //! //! Provides a minimal implementation of Python's `sys` module with: //! - `version`: Python version string (e.g., "3.14.0 (Monty)") //! - `version_info`: Named tuple (3, 14, 0, 'final', 0) //! - `platform`: Platform identifier ("monty") //! - `stdout`: Marker for standard output (no real functionality) //! - `stderr`: Marker for standard error (no real functionality) use crate::{ bytecode::VM, heap::{HeapData, HeapId}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::{Module, NamedTuple}, value::{Marker, Value}, }; /// Creates the `sys` module and allocates it on the heap. /// /// Returns a HeapId pointing to the newly allocated module. /// /// # Panics /// /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Sys); // sys.platform module.set_attr(StaticStrings::Platform, StaticStrings::Monty.into(), vm); // sys.stdout / sys.stderr - markers for standard output/error module.set_attr(StaticStrings::Stdout, Value::Marker(Marker(StaticStrings::Stdout)), vm); module.set_attr(StaticStrings::Stderr, Value::Marker(Marker(StaticStrings::Stderr)), vm); // sys.version module.set_attr(StaticStrings::Version, StaticStrings::MontyVersionString.into(), vm); // sys.version_info - named tuple (major=3, minor=14, micro=0, releaselevel='final', serial=0) let version_info = NamedTuple::new( StaticStrings::SysVersionInfo, vec![ StaticStrings::Major.into(), StaticStrings::Minor.into(), StaticStrings::Micro.into(), StaticStrings::Releaselevel.into(), StaticStrings::Serial.into(), ], vec![ Value::Int(3), Value::Int(14), Value::Int(0), Value::InternString(StaticStrings::Final.into()), Value::Int(0), ], ); let version_info_id = vm.heap.allocate(HeapData::NamedTuple(version_info))?; module.set_attr(StaticStrings::VersionInfo, Value::Ref(version_info_id), vm); vm.heap.allocate(HeapData::Module(module)) } ================================================ FILE: crates/monty/src/modules/typing.rs ================================================ //! Implementation of the `typing` module. //! //! Provides a minimal implementation of Python's `typing` module with: //! - `TYPE_CHECKING`: Always False (used for conditional imports) //! - Common type hints as `Marker` values (Any, Optional, List, Dict, etc.) //! //! These markers exist so code that imports typing constructs works correctly, //! though Monty doesn't perform static type checking. use crate::{ bytecode::VM, heap::{HeapData, HeapId}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::Module, value::{Marker, Value}, }; /// Creates the `typing` module and allocates it on the heap. /// /// Returns a HeapId pointing to the newly allocated module. /// /// # Panics /// /// Panics if the required strings have not been pre-interned during prepare phase. pub fn create_module(vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let mut module = Module::new(StaticStrings::Typing); // typing.TYPE_CHECKING - always False module.set_attr(StaticStrings::TypeChecking, Value::Bool(false), vm); // Export all typing markers as module attributes for ss in MARKER_ATTRS { module.set_attr(*ss, Value::Marker(Marker(*ss)), vm); } vm.heap.allocate(HeapData::Module(module)) } /// Typing marker attributes exported by this module. /// /// Each marker wraps its corresponding `StaticStrings` variant as both the /// attribute name and the marker value. const MARKER_ATTRS: &[StaticStrings] = &[ StaticStrings::Any, StaticStrings::Optional, StaticStrings::UnionType, StaticStrings::ListType, StaticStrings::DictType, StaticStrings::TupleType, StaticStrings::SetType, StaticStrings::FrozenSet, StaticStrings::Callable, StaticStrings::Type, StaticStrings::Sequence, StaticStrings::Mapping, StaticStrings::Iterable, StaticStrings::IteratorType, StaticStrings::Generator, StaticStrings::ClassVar, StaticStrings::FinalType, StaticStrings::Literal, StaticStrings::TypeVar, StaticStrings::Generic, StaticStrings::Protocol, StaticStrings::Annotated, StaticStrings::SelfType, StaticStrings::Never, StaticStrings::NoReturn, ]; ================================================ FILE: crates/monty/src/namespace.rs ================================================ /// Unique identifier for variable slots in namespaces (globals and function locals). /// /// Used by the bytecode compiler to emit slot indices for variable access. /// The VM uses these indices to read/write values in the globals vector /// or the stack-inlined locals region. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] pub(crate) struct NamespaceId(u32); impl NamespaceId { pub fn new(index: usize) -> Self { Self(index.try_into().expect("Invalid namespace id")) } /// Returns the raw index value. /// /// Used by the bytecode compiler to emit slot indices for variable access. #[inline] pub fn index(self) -> usize { self.0 as usize } } ================================================ FILE: crates/monty/src/object.rs ================================================ use std::{ borrow::Cow, fmt::{self, Write}, hash::{Hash, Hasher}, }; use ahash::AHashSet; use indexmap::IndexMap; use num_bigint::BigInt; use num_traits::Zero; use crate::{ builtins::{Builtins, BuiltinsFunctions}, bytecode::VM, exception_private::{ExcType, SimpleException}, heap::{HeapData, HeapId}, resource::{ResourceError, ResourceTracker}, types::{ LongInt, NamedTuple, Path, PyTrait, Type, allocate_tuple, bytes::{Bytes, bytes_repr}, dict::Dict, list::List, set::{FrozenSet, Set}, str::{Str, StringRepr, string_repr_fmt}, }, value::{EitherStr, Value}, }; /// A Python value that can be passed to or returned from the interpreter. /// /// This is the public-facing type for Python values. It owns all its data and can be /// freely cloned, serialized, or stored. Unlike the internal `Value` type, `MontyObject` /// does not require a heap for operations. /// /// # Input vs Output Variants /// /// Most variants can be used both as inputs (passed to `Executor::run()`) and outputs /// (returned from execution). However: /// - `Repr` is output-only: represents values that have no direct `MontyObject` mapping /// - `Exception` can be used as input (to raise) or output (when code raises) /// /// # Hashability /// /// Only immutable variants (`None`, `Ellipsis`, `Bool`, `Int`, `Float`, `String`, `Bytes`) /// implement `Hash`. Attempting to hash mutable variants (`List`, `Dict`) will panic. /// /// # JSON Serialization /// /// `MontyObject` supports JSON serialization with natural mappings: /// /// **Bidirectional (can serialize and deserialize):** /// - `None` ↔ JSON `null` /// - `Bool` ↔ JSON `true`/`false` /// - `Int` ↔ JSON integer /// - `Float` ↔ JSON float /// - `String` ↔ JSON string /// - `List` ↔ JSON array /// - `Dict` ↔ JSON object (keys must be interns) /// /// **Output-only (serialize only, cannot deserialize from JSON):** /// - `Ellipsis` → `{"$ellipsis": true}` /// - `Tuple` → `{"$tuple": [...]}` /// - `Bytes` → `{"$bytes": [...]}` /// - `Exception` → `{"$exception": {"type": "...", "arg": "..."}}` /// - `Repr` → `{"$repr": "..."}` /// /// # Binary Serialization /// /// For binary serialization (e.g., with postcard), `MontyObject` uses derived serde /// with internally tagged format. This differs from the natural JSON format. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum MontyObject { /// Python's `Ellipsis` singleton (`...`). Ellipsis, /// Python's `None` singleton. None, /// Python boolean (`True` or `False`). Bool(bool), /// Python integer (64-bit signed). Int(i64), /// Python arbitrary-precision integer (larger than i64). BigInt(BigInt), /// Python float (64-bit IEEE 754). Float(f64), /// Python string (UTF-8). String(String), /// Python bytes object. Bytes(Vec), /// Python list (mutable sequence). List(Vec), /// Python tuple (immutable sequence). Tuple(Vec), /// Python named tuple (immutable sequence with named fields). /// /// Named tuples behave like tuples but also support attribute access by field name. /// The type_name is used in repr (e.g., "os.stat_result"), and field_names provides /// the attribute names for each position. NamedTuple { /// Type name for repr (e.g., "os.stat_result"). type_name: String, /// Field names in order. field_names: Vec, /// Values in order (same length as field_names). values: Vec, }, /// Python dictionary (insertion-ordered mapping). Dict(DictPairs), /// Python set (mutable, unordered collection of unique elements). Set(Vec), /// Python frozenset (immutable, unordered collection of unique elements). FrozenSet(Vec), /// Python exception with type and optional message argument. Exception { /// The exception type (e.g., `ValueError`, `TypeError`). exc_type: ExcType, /// Optional string argument passed to the exception constructor. arg: Option, }, /// A Python type object (e.g., `int`, `str`, `list`). /// /// Returned by the `type()` builtin and can be compared with other types. Type(Type), BuiltinFunction(BuiltinsFunctions), /// Python `pathlib.Path` object (or technically a `PurePosixPath`). /// /// Represents a filesystem path. Can be used both as input (from host) and output. Path(String), /// A dataclass instance with class name, field names, attributes, and mutability. /// /// Method calls are detected lazily at runtime: when `call_attr` is invoked /// on a dataclass and the attribute name is not found in `attrs`, it is /// dispatched as a `MethodCall` to the host (provided the name is public). Dataclass { /// The class name (e.g., "Point", "User"). name: String, /// Identifier of the type, from `id(type(dc))` in python. type_id: u64, /// Declared field names in definition order (for repr). field_names: Vec, /// All attribute name -> value mapping (includes fields and extra attrs). attrs: DictPairs, /// Whether this dataclass instance is immutable. frozen: bool, }, /// An external function provided by the host. /// /// Returned by the host in response to a `NameLookup` to provide a callable /// that the VM can invoke. When called, the VM yields `FunctionCall` to the host. Function { /// The function name (used for repr, error messages, and function call identification). name: String, /// Optional docstring for the function. docstring: Option, }, /// Fallback for values that cannot be represented as other variants. /// /// Contains the `repr()` string of the original value. /// /// This is output-only and cannot be used as an input to `Executor::run()`. Repr(String), /// Represents a cycle detected during Value-to-MontyObject conversion. /// /// When converting cyclic structures (e.g., `a = []; a.append(a)`), this variant /// is used to break the infinite recursion. Contains the heap ID and the type-specific /// placeholder string (e.g., `"[...]"` for lists, `"{...}"` for dicts). /// /// This is output-only and cannot be used as an input to `Executor::run()`. Cycle(HeapId, String), } impl fmt::Display for MontyObject { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::String(s) => f.write_str(s), Self::Cycle(_, placeholder) => f.write_str(placeholder), Self::Type(t) => write!(f, ""), Self::Function { name, .. } => write!(f, ""), _ => self.repr_fmt(f), } } } impl MontyObject { /// Converts a `Value` into a `MontyObject`, properly handling reference counting. /// /// Takes ownership of the `Value`, extracts its content to create a MontyObject, /// then properly drops the Value via `drop_with_heap` to maintain reference counting. /// /// The `interns` parameter is used to look up interned string/bytes content. pub(crate) fn new(value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Self { let py_obj = Self::from_value(&value, vm); value.drop_with_heap(vm.heap); py_obj } /// Creates a new `MontyObject` from something that can be converted into a `DictPairs`. pub fn dict(dict: impl Into) -> Self { Self::Dict(dict.into()) } /// Converts this `MontyObject` into an `Value`, allocating on the heap if needed. /// /// Immediate values (None, Bool, Int, Float, Ellipsis, Exception) are created directly. /// Heap-allocated values (String, Bytes, List, Tuple, Dict) are allocated /// via the heap and wrapped in `Value::Ref`. /// /// # Errors /// Returns `InvalidInputError` if called on the `Repr` variant, /// as it is only valid as an output from code execution, not as an input. pub(crate) fn to_value(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { match self { Self::Ellipsis => Ok(Value::Ellipsis), Self::None => Ok(Value::None), Self::Bool(b) => Ok(Value::Bool(b)), Self::Int(i) => Ok(Value::Int(i)), Self::BigInt(bi) => Ok(LongInt::new(bi).into_value(vm.heap)?), Self::Float(f) => Ok(Value::Float(f)), Self::String(s) => Ok(Value::Ref(vm.heap.allocate(HeapData::Str(Str::new(s)))?)), Self::Bytes(b) => Ok(Value::Ref(vm.heap.allocate(HeapData::Bytes(Bytes::new(b)))?)), Self::List(items) => { let values: Vec = items .into_iter() .map(|item| item.to_value(vm)) .collect::>()?; Ok(Value::Ref(vm.heap.allocate(HeapData::List(List::new(values)))?)) } Self::Tuple(items) => { let values = items .into_iter() .map(|item| item.to_value(vm)) .collect::>()?; allocate_tuple(values, vm.heap).map_err(InvalidInputError::Resource) } Self::NamedTuple { type_name, field_names, values, } => { let values: Vec = values .into_iter() .map(|item| item.to_value(vm)) .collect::>()?; let field_name_strs: Vec = field_names.into_iter().map(Into::into).collect(); let nt = NamedTuple::new(type_name, field_name_strs, values); Ok(Value::Ref(vm.heap.allocate(HeapData::NamedTuple(nt))?)) } Self::Dict(map) => { let pairs: Result, InvalidInputError> = map .into_iter() .map(|(k, v)| Ok((k.to_value(vm)?, v.to_value(vm)?))) .collect(); let dict = Dict::from_pairs(pairs?, vm) .map_err(|_| InvalidInputError::invalid_type("unhashable dict keys"))?; Ok(Value::Ref(vm.heap.allocate(HeapData::Dict(dict))?)) } Self::Set(items) => { let mut set = Set::new(); for item in items { let value = item.to_value(vm)?; set.add(value, vm) .map_err(|_| InvalidInputError::invalid_type("unhashable set element"))?; } Ok(Value::Ref(vm.heap.allocate(HeapData::Set(set))?)) } Self::FrozenSet(items) => { let mut set = Set::new(); for item in items { let value = item.to_value(vm)?; set.add(value, vm) .map_err(|_| InvalidInputError::invalid_type("unhashable frozenset element"))?; } // Convert to frozenset by extracting storage let frozenset = FrozenSet::from_set(set); Ok(Value::Ref(vm.heap.allocate(HeapData::FrozenSet(frozenset))?)) } Self::Exception { exc_type, arg } => { let exc = SimpleException::new(exc_type, arg); Ok(Value::Ref(vm.heap.allocate(HeapData::Exception(exc))?)) } Self::Dataclass { name, type_id, field_names, attrs, frozen, } => { use crate::types::Dataclass; // Convert attrs to Dict let pairs: Result, InvalidInputError> = attrs .into_iter() .map(|(k, v)| Ok((k.to_value(vm)?, v.to_value(vm)?))) .collect(); let dict = Dict::from_pairs(pairs?, vm) .map_err(|_| InvalidInputError::invalid_type("unhashable dataclass attr keys"))?; let dc = Dataclass::new(name, type_id, field_names, dict, frozen); Ok(Value::Ref(vm.heap.allocate(HeapData::Dataclass(dc))?)) } Self::Path(s) => Ok(Value::Ref(vm.heap.allocate(HeapData::Path(Path::new(s)))?)), Self::Type(t) => Ok(Value::Builtin(Builtins::Type(t))), Self::BuiltinFunction(f) => Ok(Value::Builtin(Builtins::Function(f))), Self::Function { name, .. } => { // Try to intern the function name. If the name is already interned // (common case: the function has the same name as the variable it was // assigned to), use the lightweight `Value::ExtFunction(StringId)`. // Otherwise, allocate a `HeapData::ExtFunction(String)` on the heap. if let Some(string_id) = vm.interns.get_string_id_by_name(&name) { Ok(Value::ExtFunction(string_id)) } else { Ok(Value::Ref(vm.heap.allocate(HeapData::ExtFunction(name))?)) } } Self::Repr(_) => Err(InvalidInputError::invalid_type("'Repr' is not a valid input value")), Self::Cycle(_, _) => Err(InvalidInputError::invalid_type("'Cycle' is not a valid input value")), } } fn from_value(object: &Value, vm: &VM<'_, '_, impl ResourceTracker>) -> Self { let mut visited = AHashSet::new(); Self::from_value_inner(object, vm, &mut visited) } /// Internal helper for converting Value to MontyObject with cycle detection. /// /// The `visited` set tracks HeapIds we're currently processing. When we encounter /// a HeapId already in the set, we've found a cycle and return `MontyObject::Cycle` /// with an appropriate placeholder string. /// /// Recursion depth is tracked via `heap.incr_recursion_depth_for_repr()`. fn from_value_inner(object: &Value, vm: &VM<'_, '_, impl ResourceTracker>, visited: &mut AHashSet) -> Self { // Check depth limit before processing let Some(token) = vm.heap.incr_recursion_depth_for_repr() else { return Self::Repr("".to_owned()); }; crate::defer_drop_immutable_heap!(token, vm); match object { Value::Undefined => panic!("Undefined found while converting to MontyObject"), Value::Ellipsis => Self::Ellipsis, Value::None => Self::None, Value::Bool(b) => Self::Bool(*b), Value::Int(i) => Self::Int(*i), Value::Float(f) => Self::Float(*f), Value::InternString(string_id) => Self::String(vm.interns.get_str(*string_id).to_owned()), Value::InternBytes(bytes_id) => Self::Bytes(vm.interns.get_bytes(*bytes_id).to_owned()), Value::Ref(id) => { // Check for cycle if visited.contains(id) { // Cycle detected - return appropriate placeholder return match vm.heap.get(*id) { HeapData::List(_) => Self::Cycle(*id, "[...]".to_owned()), HeapData::Tuple(_) | HeapData::NamedTuple(_) => Self::Cycle(*id, "(...)".to_owned()), HeapData::Dict(_) => Self::Cycle(*id, "{...}".to_owned()), _ => Self::Cycle(*id, "...".to_owned()), }; } // Mark this id as being visited visited.insert(*id); let result = match vm.heap.get(*id) { HeapData::Str(s) => Self::String(s.as_str().to_owned()), HeapData::Bytes(b) => Self::Bytes(b.as_slice().to_owned()), HeapData::List(list) => Self::List( list.as_slice() .iter() .map(|obj| Self::from_value_inner(obj, vm, visited)) .collect(), ), HeapData::Tuple(tuple) => Self::Tuple( tuple .as_slice() .iter() .map(|obj| Self::from_value_inner(obj, vm, visited)) .collect(), ), HeapData::NamedTuple(nt) => Self::NamedTuple { type_name: nt.name(vm.interns).to_owned(), field_names: nt .field_names() .iter() .map(|field_name| field_name.as_str(vm.interns).to_owned()) .collect(), values: nt .as_vec() .iter() .map(|obj| Self::from_value_inner(obj, vm, visited)) .collect(), }, HeapData::Dict(dict) => Self::Dict(DictPairs( dict.into_iter() .map(|(k, v)| { ( Self::from_value_inner(k, vm, visited), Self::from_value_inner(v, vm, visited), ) }) .collect(), )), HeapData::Set(set) => Self::Set( set.storage() .iter() .map(|obj| Self::from_value_inner(obj, vm, visited)) .collect(), ), HeapData::FrozenSet(frozenset) => Self::FrozenSet( frozenset .storage() .iter() .map(|obj| Self::from_value_inner(obj, vm, visited)) .collect(), ), // Cells are internal closure implementation details HeapData::Cell(cell) => { // Show the cell's contents Self::from_value_inner(&cell.0, vm, visited) } HeapData::Closure(..) | HeapData::FunctionDefaults(..) => { Self::Repr(object.py_repr(vm).into_owned()) } HeapData::Range(range) => { // Represent Range as a repr string since MontyObject doesn't have a Range variant let mut s = String::new(); let _ = range.py_repr_fmt(&mut s, vm, visited); Self::Repr(s) } HeapData::Exception(exc) => Self::Exception { exc_type: exc.exc_type(), arg: exc.arg().map(ToString::to_string), }, HeapData::Dataclass(dc) => { // Convert attrs to DictPairs let attrs = DictPairs( dc.attrs() .into_iter() .map(|(k, v)| { ( Self::from_value_inner(k, vm, visited), Self::from_value_inner(v, vm, visited), ) }) .collect(), ); Self::Dataclass { name: dc.name(vm.interns).to_owned(), type_id: dc.type_id(), field_names: dc.field_names().to_vec(), attrs, frozen: dc.is_frozen(), } } HeapData::Iter(_) => { // Iterators are internal objects - represent as a type string Self::Repr("".to_owned()) } HeapData::DictKeysView(_) | HeapData::DictItemsView(_) | HeapData::DictValuesView(_) => { Self::Repr(object.py_repr(vm).into_owned()) } HeapData::LongInt(li) => Self::BigInt(li.inner().clone()), HeapData::Module(m) => { // Modules are represented as a repr string Self::Repr(format!("", vm.interns.get_str(m.name()))) } HeapData::Slice(slice) => { // Represent Slice as a repr string since MontyObject doesn't have a Slice variant let mut s = String::new(); let _ = slice.py_repr_fmt(&mut s, vm, visited); Self::Repr(s) } HeapData::Coroutine(coro) => { // Coroutines are represented as a repr string let func = vm.interns.get_function(coro.func_id); let name = vm.interns.get_str(func.name.name_id); Self::Repr(format!("")) } HeapData::GatherFuture(gather) => { // GatherFutures are represented as a repr string Self::Repr(format!("", gather.item_count())) } HeapData::Path(path) => Self::Path(path.as_str().to_owned()), HeapData::RePattern(_) | HeapData::ReMatch(_) => Self::Repr(object.py_repr(vm).into_owned()), HeapData::ExtFunction(name) => Self::Function { name: name.clone(), docstring: None, }, }; // Remove from visited set after processing visited.remove(id); result } Value::Builtin(Builtins::Type(t)) => Self::Type(*t), Value::Builtin(Builtins::ExcType(e)) => Self::Type(Type::Exception(*e)), Value::Builtin(Builtins::Function(f)) => Self::BuiltinFunction(*f), #[cfg(feature = "ref-count-panic")] Value::Dereferenced => panic!("Dereferenced found while converting to MontyObject"), _ => Self::Repr(object.py_repr(vm).into_owned()), } } /// Returns the Python `repr()` string for this value. /// /// # Panics /// Could panic if out of memory. #[must_use] pub fn py_repr(&self) -> String { let mut s = String::new(); self.repr_fmt(&mut s).expect("Unable to format repr display value"); s } fn repr_fmt(&self, f: &mut impl Write) -> fmt::Result { match self { Self::Ellipsis => f.write_str("Ellipsis"), Self::None => f.write_str("None"), Self::Bool(true) => f.write_str("True"), Self::Bool(false) => f.write_str("False"), Self::Int(v) => write!(f, "{v}"), Self::BigInt(v) => write!(f, "{v}"), Self::Float(v) => { let s = v.to_string(); f.write_str(&s)?; if !s.contains('.') { f.write_str(".0")?; } Ok(()) } Self::String(s) => string_repr_fmt(s, f), Self::Bytes(b) => f.write_str(&bytes_repr(b)), Self::List(l) => { f.write_char('[')?; let mut iter = l.iter(); if let Some(first) = iter.next() { first.repr_fmt(f)?; for item in iter { f.write_str(", ")?; item.repr_fmt(f)?; } } f.write_char(']') } Self::Tuple(t) => { f.write_char('(')?; let mut iter = t.iter(); if let Some(first) = iter.next() { first.repr_fmt(f)?; for item in iter { f.write_str(", ")?; item.repr_fmt(f)?; } } f.write_char(')') } Self::NamedTuple { type_name, field_names, values, } => { // Format: type_name(field1=value1, field2=value2, ...) f.write_str(type_name)?; f.write_char('(')?; let mut first = true; for (name, value) in field_names.iter().zip(values) { if !first { f.write_str(", ")?; } first = false; f.write_str(name)?; f.write_char('=')?; value.repr_fmt(f)?; } f.write_char(')') } Self::Dict(d) => { f.write_char('{')?; let mut iter = d.iter(); if let Some((k, v)) = iter.next() { k.repr_fmt(f)?; f.write_str(": ")?; v.repr_fmt(f)?; for (k, v) in iter { f.write_str(", ")?; k.repr_fmt(f)?; f.write_str(": ")?; v.repr_fmt(f)?; } } f.write_char('}') } Self::Set(s) => { if s.is_empty() { f.write_str("set()") } else { f.write_char('{')?; let mut iter = s.iter(); if let Some(first) = iter.next() { first.repr_fmt(f)?; for item in iter { f.write_str(", ")?; item.repr_fmt(f)?; } } f.write_char('}') } } Self::FrozenSet(fs) => { f.write_str("frozenset(")?; if !fs.is_empty() { f.write_char('{')?; let mut iter = fs.iter(); if let Some(first) = iter.next() { first.repr_fmt(f)?; for item in iter { f.write_str(", ")?; item.repr_fmt(f)?; } } f.write_char('}')?; } f.write_char(')') } Self::Exception { exc_type, arg } => { let type_str: &'static str = exc_type.into(); write!(f, "{type_str}(")?; if let Some(arg) = &arg { string_repr_fmt(arg, f)?; } f.write_char(')') } Self::Dataclass { name, field_names, attrs, .. } => { // Format: ClassName(field1=value1, field2=value2, ...) // Only declared fields are shown, not extra attributes f.write_str(name)?; f.write_char('(')?; let mut first = true; for field_name in field_names { if !first { f.write_str(", ")?; } first = false; f.write_str(field_name)?; f.write_char('=')?; // Look up value in attrs let key = Self::String(field_name.clone()); if let Some(value) = attrs.iter().find(|(k, _)| k == &key).map(|(_, v)| v) { value.repr_fmt(f)?; } else { f.write_str("")?; } } f.write_char(')') } Self::Path(p) => write!(f, "PosixPath('{p}')"), Self::Type(t) => write!(f, ""), Self::BuiltinFunction(func) => write!(f, ""), Self::Function { name, .. } => write!(f, ""), Self::Repr(s) => write!(f, "Repr({})", StringRepr(s)), Self::Cycle(_, placeholder) => f.write_str(placeholder), } } /// Returns `true` if this value is "truthy" according to Python's truth testing rules. /// /// In Python, the following values are considered falsy: /// - `None` and `Ellipsis` /// - `False` /// - Zero numeric values (`0`, `0.0`) /// - Empty sequences and collections (`""`, `b""`, `[]`, `()`, `{}`) /// /// All other values are truthy, including `Exception` and `Repr` variants. #[must_use] pub fn is_truthy(&self) -> bool { match self { Self::None => false, Self::Ellipsis => true, Self::Bool(b) => *b, Self::Int(i) => *i != 0, Self::BigInt(bi) => !bi.is_zero(), Self::Float(f) => *f != 0.0, Self::String(s) => !s.is_empty(), Self::Bytes(b) => !b.is_empty(), Self::List(l) => !l.is_empty(), Self::Tuple(t) => !t.is_empty(), Self::NamedTuple { values, .. } => !values.is_empty(), Self::Dict(d) => !d.is_empty(), Self::Set(s) => !s.is_empty(), Self::FrozenSet(fs) => !fs.is_empty(), Self::Exception { .. } => true, Self::Path(_) => true, // Path instances are always truthy Self::Dataclass { .. } => true, // Dataclass instances are always truthy Self::Type(_) | Self::BuiltinFunction(_) | Self::Function { .. } | Self::Repr(_) | Self::Cycle(_, _) => { true } } } /// Returns the Python type name for this value (e.g., `"int"`, `"str"`, `"list"`). /// /// These are the same names returned by Python's `type(x).__name__`. #[must_use] pub fn type_name(&self) -> &'static str { match self { Self::None => "NoneType", Self::Ellipsis => "ellipsis", Self::Bool(_) => "bool", Self::Int(_) | Self::BigInt(_) => "int", Self::Float(_) => "float", Self::String(_) => "str", Self::Bytes(_) => "bytes", Self::List(_) => "list", Self::Tuple(_) => "tuple", Self::NamedTuple { .. } => "namedtuple", Self::Dict(_) => "dict", Self::Set(_) => "set", Self::FrozenSet(_) => "frozenset", Self::Exception { .. } => "Exception", Self::Path(_) => "PosixPath", Self::Dataclass { .. } => "dataclass", Self::Type(_) => "type", Self::BuiltinFunction(_) => "builtin_function_or_method", Self::Function { .. } => "function", Self::Repr(_) => "repr", Self::Cycle(_, _) => "cycle", } } } impl Hash for MontyObject { fn hash(&self, state: &mut H) { // Hash the discriminant first (but Int and BigInt share discriminant for consistency) match self { Self::Int(_) | Self::BigInt(_) => { // Use Int discriminant for both to maintain hash consistency std::mem::discriminant(&Self::Int(0)).hash(state); } _ => std::mem::discriminant(self).hash(state), } match self { Self::Ellipsis | Self::None => {} Self::Bool(bool) => bool.hash(state), Self::Int(i) => i.hash(state), Self::BigInt(bi) => { // For hash consistency, if BigInt fits in i64, hash as i64 if let Ok(i) = i64::try_from(bi) { i.hash(state); } else { // For large BigInts, hash the signed bytes bi.to_signed_bytes_le().hash(state); } } Self::Float(f) => f.to_bits().hash(state), Self::String(string) => string.hash(state), Self::Bytes(bytes) => bytes.hash(state), Self::Path(path) => path.hash(state), Self::Type(t) => t.to_string().hash(state), Self::Cycle(_, _) => panic!("cycle values are not hashable"), _ => panic!("{} python values are not hashable", self.type_name()), } } } impl PartialEq for MontyObject { fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::Ellipsis, Self::Ellipsis) => true, (Self::None, Self::None) => true, (Self::Bool(a), Self::Bool(b)) => a == b, (Self::Int(a), Self::Int(b)) => a == b, (Self::BigInt(a), Self::BigInt(b)) => a == b, // Cross-compare Int and BigInt (Self::Int(a), Self::BigInt(b)) | (Self::BigInt(b), Self::Int(a)) => BigInt::from(*a) == *b, // Use to_bits() for float comparison to be consistent with Hash (Self::Float(a), Self::Float(b)) => a.to_bits() == b.to_bits(), (Self::String(a), Self::String(b)) => a == b, (Self::Bytes(a), Self::Bytes(b)) => a == b, (Self::List(a), Self::List(b)) => a == b, (Self::Tuple(a), Self::Tuple(b)) => a == b, ( Self::NamedTuple { type_name: a_type, field_names: a_fields, values: a_values, }, Self::NamedTuple { type_name: b_type, field_names: b_fields, values: b_values, }, ) => a_type == b_type && a_fields == b_fields && a_values == b_values, // NamedTuple can compare with Tuple by values only (matching Python semantics) (Self::NamedTuple { values, .. }, Self::Tuple(t)) | (Self::Tuple(t), Self::NamedTuple { values, .. }) => { values == t } (Self::Dict(a), Self::Dict(b)) => a == b, (Self::Set(a), Self::Set(b)) => a == b, (Self::FrozenSet(a), Self::FrozenSet(b)) => a == b, ( Self::Exception { exc_type: a_type, arg: a_arg, }, Self::Exception { exc_type: b_type, arg: b_arg, }, ) => a_type == b_type && a_arg == b_arg, ( Self::Dataclass { name: a_name, type_id: a_type_id, field_names: a_field_names, attrs: a_attrs, frozen: a_frozen, }, Self::Dataclass { name: b_name, type_id: b_type_id, field_names: b_field_names, attrs: b_attrs, frozen: b_frozen, }, ) => { a_name == b_name && a_type_id == b_type_id && a_field_names == b_field_names && a_attrs == b_attrs && a_frozen == b_frozen } (Self::Path(a), Self::Path(b)) => a == b, ( Self::Function { name: a_name, docstring: a_doc, }, Self::Function { name: b_name, docstring: b_doc, }, ) => a_name == b_name && a_doc == b_doc, (Self::Repr(a), Self::Repr(b)) => a == b, (Self::Cycle(a, _), Self::Cycle(b, _)) => a == b, (Self::Type(a), Self::Type(b)) => a == b, _ => false, } } } impl Eq for MontyObject {} impl AsRef for MontyObject { fn as_ref(&self) -> &Self { self } } /// Error returned when a `MontyObject` cannot be converted to the requested Rust type. /// /// This error is returned by the `TryFrom` implementations when attempting to extract /// a specific type from a `MontyObject` that holds a different variant. #[derive(Debug)] pub struct ConversionError { /// The type name that was expected (e.g., "int", "str"). pub expected: &'static str, /// The actual type name of the `MontyObject` (e.g., "list", "NoneType"). pub actual: &'static str, } impl ConversionError { /// Creates a new `ConversionError` with the expected and actual type names. #[must_use] pub fn new(expected: &'static str, actual: &'static str) -> Self { Self { expected, actual } } } impl fmt::Display for ConversionError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "expected {}, got {}", self.expected, self.actual) } } impl std::error::Error for ConversionError {} /// Error returned when a `MontyObject` cannot be used as an input to code execution. /// /// This can occur when: /// - A `MontyObject` variant (like `Repr`) is only valid as an output, not an input /// - A resource limit (memory, allocations) is exceeded during conversion #[derive(Debug, Clone)] pub enum InvalidInputError { /// The input type is not valid for conversion to a runtime Value. /// Message explaining why the type is invalid. InvalidType(Cow<'static, str>), /// A resource limit was exceeded during conversion. Resource(ResourceError), } impl InvalidInputError { /// Creates a new `InvalidInputError` for the given type name. #[must_use] pub fn invalid_type(msg: impl Into>) -> Self { Self::InvalidType(msg.into()) } } impl fmt::Display for InvalidInputError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InvalidType(msg) => write!(f, "{msg}"), Self::Resource(e) => write!(f, "{e}"), } } } impl std::error::Error for InvalidInputError {} impl From for InvalidInputError { fn from(err: crate::resource::ResourceError) -> Self { Self::Resource(err) } } /// Attempts to convert a MontyObject to an i64 integer. /// Returns an error if the object is not an Int variant. impl TryFrom<&MontyObject> for i64 { type Error = ConversionError; fn try_from(value: &MontyObject) -> Result { match value { MontyObject::Int(i) => Ok(*i), _ => Err(ConversionError::new("int", value.type_name())), } } } /// Attempts to convert a MontyObject to an f64 float. /// Returns an error if the object is not a Float or Int variant. /// Int values are automatically converted to f64 to match python's behavior. impl TryFrom<&MontyObject> for f64 { type Error = ConversionError; fn try_from(value: &MontyObject) -> Result { match value { MontyObject::Float(f) => Ok(*f), MontyObject::Int(i) => Ok(*i as Self), _ => Err(ConversionError::new("float", value.type_name())), } } } /// Attempts to convert a MontyObject to a String. /// Returns an error if the object is not a heap-allocated Str variant. impl TryFrom<&MontyObject> for String { type Error = ConversionError; fn try_from(value: &MontyObject) -> Result { if let MontyObject::String(s) = value { Ok(s.clone()) } else { Err(ConversionError::new("str", value.type_name())) } } } /// Attempts to convert a `MontyObject` to a bool. /// Returns an error if the object is not a True or False variant. /// Note: This does NOT use Python's truthiness rules (use MontyObject::bool for that). impl TryFrom<&MontyObject> for bool { type Error = ConversionError; fn try_from(value: &MontyObject) -> Result { match value { MontyObject::Bool(b) => Ok(*b), _ => Err(ConversionError::new("bool", value.type_name())), } } } /// A collection of key-value pairs representing Python dictionary contents. /// /// Used internally by `MontyObject::Dict` to store dictionary entries while preserving /// insertion order. Keys and values are both `MontyObject` instances. #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] pub struct DictPairs(Vec<(MontyObject, MontyObject)>); impl From> for DictPairs { fn from(pairs: Vec<(MontyObject, MontyObject)>) -> Self { Self(pairs) } } impl From> for DictPairs { fn from(map: IndexMap) -> Self { Self(map.into_iter().collect()) } } impl From for IndexMap { fn from(pairs: DictPairs) -> Self { pairs.into_iter().collect() } } impl IntoIterator for DictPairs { type Item = (MontyObject, MontyObject); type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } impl<'a> IntoIterator for &'a DictPairs { type Item = &'a (MontyObject, MontyObject); type IntoIter = std::slice::Iter<'a, (MontyObject, MontyObject)>; fn into_iter(self) -> Self::IntoIter { self.0.iter() } } impl FromIterator<(MontyObject, MontyObject)> for DictPairs { fn from_iter>(iter: T) -> Self { Self(iter.into_iter().collect()) } } impl DictPairs { fn is_empty(&self) -> bool { self.0.is_empty() } fn iter(&self) -> impl Iterator { self.0.iter() } } ================================================ FILE: crates/monty/src/os.rs ================================================ //! OS-level operations that require host system access. //! //! This module defines the `OsFunction` enum, which represents operations that //! cannot be performed in a sandboxed environment. When a type method needs to //! perform one of these operations, it returns an `CallResult::OsCall` variant //! with the function and arguments. The VM then yields control to the host via //! `FrameExit::OsCall`, allowing the host to execute the operation and resume. //! //! This design enables sandboxed execution: the interpreter never directly performs //! I/O, filesystem, or network operations. Instead, the host decides whether to //! permit and execute such operations. use crate::{MontyObject, intern::StaticStrings}; /// OS operations that require host system access. /// /// These represent operations that Monty cannot perform in isolation because /// they require interacting with the operating system (filesystem, network, etc.). /// The host application decides whether to permit and execute these operations. /// /// # Extension /// /// When adding new operations, add both the variant here and update the /// `TryFrom` implementation to map method names to operations. // #[repr(u8)] #[derive( Debug, Clone, Copy, PartialEq, Eq, Hash, strum::EnumString, strum::Display, serde::Serialize, serde::Deserialize, )] pub enum OsFunction { /// Check if a path exists #[strum(serialize = "Path.exists")] Exists, /// Check if path is a file #[strum(serialize = "Path.is_file")] IsFile, /// Check if path is a directory #[strum(serialize = "Path.is_dir")] IsDir, /// Check if path is a symbolic link #[strum(serialize = "Path.is_symlink")] IsSymlink, /// Read file contents as text #[strum(serialize = "Path.read_text")] ReadText, /// Read file contents as bytes #[strum(serialize = "Path.read_bytes")] ReadBytes, /// Write text to file #[strum(serialize = "Path.write_text")] WriteText, /// Write bytes to file #[strum(serialize = "Path.write_bytes")] WriteBytes, /// Create directory #[strum(serialize = "Path.mkdir")] Mkdir, /// Remove file #[strum(serialize = "Path.unlink")] Unlink, /// Remove directory #[strum(serialize = "Path.rmdir")] Rmdir, /// List directory contents #[strum(serialize = "Path.iterdir")] Iterdir, /// Get file stats #[strum(serialize = "Path.stat")] Stat, /// Rename/move file #[strum(serialize = "Path.rename")] Rename, /// Get resolved absolute path #[strum(serialize = "Path.resolve")] Resolve, /// Get absolute path (without resolving symlinks) #[strum(serialize = "Path.absolute")] Absolute, /// Get an environment variable value #[strum(serialize = "os.getenv")] Getenv, /// Get the entire environment as a dictionary #[strum(serialize = "os.environ")] GetEnviron, } impl TryFrom for OsFunction { type Error = (); /// Attempts to convert a method name (as a `StaticStrings` variant) to an `OsFunction`. /// /// Returns `Err(())` if the method name doesn't correspond to an OS operation. fn try_from(method: StaticStrings) -> Result { match method { // Read operations StaticStrings::Exists => Ok(Self::Exists), StaticStrings::IsFile => Ok(Self::IsFile), StaticStrings::IsDir => Ok(Self::IsDir), StaticStrings::IsSymlink => Ok(Self::IsSymlink), StaticStrings::ReadText => Ok(Self::ReadText), StaticStrings::ReadBytes => Ok(Self::ReadBytes), StaticStrings::StatMethod => Ok(Self::Stat), StaticStrings::Iterdir => Ok(Self::Iterdir), StaticStrings::Resolve => Ok(Self::Resolve), StaticStrings::Absolute => Ok(Self::Absolute), // Write operations StaticStrings::WriteText => Ok(Self::WriteText), StaticStrings::WriteBytes => Ok(Self::WriteBytes), StaticStrings::Mkdir => Ok(Self::Mkdir), StaticStrings::Unlink => Ok(Self::Unlink), StaticStrings::Rmdir => Ok(Self::Rmdir), StaticStrings::Rename => Ok(Self::Rename), _ => Err(()), } } } // ============================================================================= // stat_result builders // ============================================================================= // These functions create MontyObject::NamedTuple values that match Python's // os.stat_result structure. The stat_result has 10 fields: // st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime, st_mtime, st_ctime const STAT_RESULT_TYPE_NAME: &str = "StatResult"; const STAT_RESULT_FIELDS: &[&str] = &[ "st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime", ]; /// Creates a stat_result for a regular file. /// /// The file type bits (`0o100_000`) are automatically added if not present. /// /// # Arguments /// * `mode` - File permissions as octal. Common values: /// - `0o644` - rw-r--r-- (owner read/write, others read) /// - `0o600` - rw------- (owner read/write only) /// - `0o755` - rwxr-xr-x (executable, owner full, others read/execute) /// - `0o100644` - same as 0o644 with explicit file type bits /// * `size` - File size in bytes /// * `mtime` - Modification time as Unix timestamp #[must_use] pub fn file_stat(mode: i64, size: i64, mtime: f64) -> MontyObject { // If only permission bits provided (no file type), add regular file type let mode = if mode < 0o1000 { mode | 0o100_000 } else { mode }; stat_result(mode, 0, 0, 1, 0, 0, size, mtime, mtime, mtime) } /// Creates a stat_result for a directory. /// /// The directory type bits (`0o040_000`) are automatically added if not present. /// /// # Arguments /// * `mode` - Directory permissions as octal. Common values: /// - `0o755` - rwxr-xr-x (owner full, others read/execute) /// - `0o700` - rwx------ (owner only) /// - `0o040755` - same as 0o755 with explicit directory type bits /// * `mtime` - Modification time as Unix timestamp #[must_use] pub fn dir_stat(mode: i64, mtime: f64) -> MontyObject { // If only permission bits provided (no file type), add directory type let mode = if mode < 0o1000 { mode | 0o040_000 } else { mode }; stat_result(mode, 0, 0, 2, 0, 0, 4096, mtime, mtime, mtime) } /// Creates a stat_result for a symbolic link. /// /// The symlink type bits (`0o120_000`) are automatically added if not present. /// /// # Arguments /// * `mode` - Symlink permissions as octal. Common values: /// - `0o777` - rwxrwxrwx (symlinks typically have full permissions) /// - `0o120777` - same as 0o777 with explicit symlink type bits /// * `mtime` - Modification time as Unix timestamp #[must_use] pub fn symlink_stat(mode: i64, mtime: f64) -> MontyObject { // If only permission bits provided (no file type), add symlink type let mode = if mode < 0o1000 { mode | 0o120_000 } else { mode }; stat_result(mode, 0, 0, 1, 0, 0, 0, mtime, mtime, mtime) } /// Creates a full stat_result with all 10 fields specified. /// /// This is the low-level builder; prefer `file_stat()`, `dir_stat()`, or `symlink_stat()` /// for common cases. #[must_use] #[expect(clippy::too_many_arguments)] pub fn stat_result( st_mode: i64, st_ino: i64, st_dev: i64, st_nlink: i64, st_uid: i64, st_gid: i64, st_size: i64, st_atime: f64, st_mtime: f64, st_ctime: f64, ) -> MontyObject { MontyObject::NamedTuple { type_name: STAT_RESULT_TYPE_NAME.to_owned(), field_names: STAT_RESULT_FIELDS.iter().map(|s| (*s).to_owned()).collect(), values: vec![ MontyObject::Int(st_mode), MontyObject::Int(st_ino), MontyObject::Int(st_dev), MontyObject::Int(st_nlink), MontyObject::Int(st_uid), MontyObject::Int(st_gid), MontyObject::Int(st_size), MontyObject::Float(st_atime), MontyObject::Float(st_mtime), MontyObject::Float(st_ctime), ], } } ================================================ FILE: crates/monty/src/parse.rs ================================================ use std::{borrow::Cow, fmt}; use num_bigint::BigInt; use ruff_python_ast::{ self as ast, BoolOp, CmpOp, ConversionFlag as RuffConversionFlag, ElifElseClause, Expr as AstExpr, InterpolatedStringElement, Keyword, Number, Operator as AstOperator, ParameterWithDefault, Stmt, UnaryOp, name::Name, }; use ruff_python_parser::parse_module; use ruff_text_size::{Ranged, TextRange}; use crate::{ StackFrame, args::{ArgExprs, CallArg, CallKwarg, Kwarg}, exception_private::ExcType, exception_public::{CodeLoc, MontyException}, expressions::{ Callable, CmpOperator, Comprehension, DictItem, Expr, ExprLoc, Identifier, Literal, Node, Operator, SequenceItem, UnpackTarget, }, fstring::{ConversionFlag, FStringPart, FormatSpec}, intern::{InternerBuilder, StringId}, value::EitherStr, }; /// Maximum nesting depth for AST structures during parsing. /// Matches CPython's limit of ~200 for nested parentheses. /// This prevents stack overflow from deeply nested structures like `((((x,),),),)`. #[cfg(not(debug_assertions))] pub const MAX_NESTING_DEPTH: u16 = 200; /// In debug builds, we use a lower limit because stack frames are much larger /// (no inlining, debug info, etc.). The limit is set conservatively to prevent /// stack overflow while still catching the error before the recursion limit. #[cfg(debug_assertions)] pub const MAX_NESTING_DEPTH: u16 = 35; /// A parameter in a function signature with optional default value. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ParsedParam { /// The parameter name. pub name: StringId, /// The default value expression (evaluated at definition time). pub default: Option, } /// A parsed function signature with all parameter types. /// /// This intermediate representation captures the structure of Python function /// parameters before name resolution. Default value expressions are stored /// as unevaluated AST and will be evaluated during the prepare phase. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct ParsedSignature { /// Positional-only parameters (before `/`). pub pos_args: Vec, /// Positional-or-keyword parameters. pub args: Vec, /// Variable positional parameter (`*args`). pub var_args: Option, /// Keyword-only parameters (after `*` or `*args`). pub kwargs: Vec, /// Variable keyword parameter (`**kwargs`). pub var_kwargs: Option, } impl ParsedSignature { /// Returns an iterator over all parameter names in the signature. /// /// Order: pos_args, args, var_args, kwargs, var_kwargs pub fn param_names(&self) -> impl Iterator + '_ { self.pos_args .iter() .map(|p| p.name) .chain(self.args.iter().map(|p| p.name)) .chain(self.var_args.iter().copied()) .chain(self.kwargs.iter().map(|p| p.name)) .chain(self.var_kwargs.iter().copied()) } } /// A raw (unprepared) function definition from the parser. /// /// Contains the function name, signature, and body as parsed AST nodes. /// During the prepare phase, this is transformed into `PreparedFunctionDef` /// with resolved names and scope information. #[derive(Debug, Clone)] pub struct RawFunctionDef { /// The function name identifier (not yet resolved to a namespace index). pub name: Identifier, /// The parsed function signature with parameter names and default expressions. pub signature: ParsedSignature, /// The unprepared function body (names not yet resolved). pub body: Vec, /// Whether this is an async function (`async def`). pub is_async: bool, } /// Type alias for parsed AST nodes (output of the parser). /// /// This uses `Node` where function definitions contain their /// full unprepared body. After the prepare phase, this becomes `PreparedNode` /// (aka `Node`). pub type ParseNode = Node; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct Try { pub body: Vec, pub handlers: Vec>, pub or_else: Vec, pub finally: Vec, } /// A parsed exception handler (except clause). /// /// Represents `except ExcType as name:` or bare `except:` clauses. /// The exception type and variable binding are both optional. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ExceptHandler { /// Exception type(s) to catch. None = bare except (catches all). pub exc_type: Option, /// Variable name for `except X as e:`. None = no binding. pub name: Option, /// Handler body statements. pub body: Vec, } /// Result of parsing: the AST nodes and the string interner with all interned names. #[derive(Debug)] pub struct ParseResult { pub nodes: Vec, pub interner: InternerBuilder, } pub(crate) fn parse(code: &str, filename: &str) -> Result { parse_with_interner(code, filename, InternerBuilder::new(code)) } /// Parses code using a caller-provided interner seed. /// /// This enables incremental compilation flows (e.g. REPL) where existing /// interned IDs must remain stable across parse invocations. pub(crate) fn parse_with_interner( code: &str, filename: &str, interner: InternerBuilder, ) -> Result { let mut parser = Parser::new(code, filename, interner); let parsed = parse_module(code).map_err(|e| ParseError::syntax(e.to_string(), parser.convert_range(e.range())))?; let module = parsed.into_syntax(); let nodes = parser.parse_statements(module.body)?; Ok(ParseResult { nodes, interner: parser.interner, }) } /// Parser for converting ruff AST to Monty's intermediate ParseNode representation. /// /// Holds references to the source code and owns a string interner for names. /// The filename is interned once at construction and reused for all CodeRanges. pub struct Parser<'a> { line_ends: Vec, code: &'a str, /// Interned filename ID, used for all CodeRanges created by this parser. filename_id: StringId, /// String interner for names (variables, functions, etc). pub interner: InternerBuilder, /// Remaining nesting depth budget for recursive structures. /// Starts at MAX_NESTING_DEPTH and decrements on each nested level. /// When it reaches zero, we return a "too many nested parentheses" error. depth_remaining: u16, } impl<'a> Parser<'a> { fn new(code: &'a str, filename: &'a str, mut interner: InternerBuilder) -> Self { // Position of each line in the source code, to convert indexes to line number and column number let mut line_ends = vec![]; for (i, c) in code.chars().enumerate() { if c == '\n' { line_ends.push(i); } } let filename_id = interner.intern(filename); Self { line_ends, code, filename_id, interner, depth_remaining: MAX_NESTING_DEPTH, } } fn parse_statements(&mut self, statements: Vec) -> Result, ParseError> { statements.into_iter().map(|f| self.parse_statement(f)).collect() } fn parse_elif_else_clauses(&mut self, clauses: Vec) -> Result, ParseError> { let mut tail: Vec = Vec::new(); for clause in clauses.into_iter().rev() { match clause.test { Some(test) => { let test = self.parse_expression(test)?; let body = self.parse_statements(clause.body)?; let or_else = tail; let nested = Node::If { test, body, or_else }; tail = vec![nested]; } None => { tail = self.parse_statements(clause.body)?; } } } Ok(tail) } /// Parses an exception handler (except clause). /// /// Handles `except:`, `except ExcType:`, and `except ExcType as name:` forms. fn parse_except_handler( &mut self, handler: ruff_python_ast::ExceptHandler, ) -> Result, ParseError> { let ruff_python_ast::ExceptHandler::ExceptHandler(h) = handler; let exc_type = match h.type_ { Some(expr) => Some(self.parse_expression(*expr)?), None => None, }; let name = h.name.map(|n| self.identifier(&n.id, n.range)); let body = self.parse_statements(h.body)?; Ok(ExceptHandler { exc_type, name, body }) } fn parse_statement(&mut self, statement: Stmt) -> Result { self.decr_depth_remaining(|| statement.range())?; let result = self.parse_statement_impl(statement); self.depth_remaining += 1; result } fn parse_statement_impl(&mut self, statement: Stmt) -> Result { match statement { Stmt::FunctionDef(function) => { let params = &function.parameters; // Parse positional-only parameters (before /) let pos_args = self.parse_params_with_defaults(¶ms.posonlyargs)?; // Parse positional-or-keyword parameters let args = self.parse_params_with_defaults(¶ms.args)?; // Parse *args let var_args = params.vararg.as_ref().map(|p| self.interner.intern(&p.name.id)); // Parse keyword-only parameters (after * or *args) let kwargs = self.parse_params_with_defaults(¶ms.kwonlyargs)?; // Parse **kwargs let var_kwargs = params.kwarg.as_ref().map(|p| self.interner.intern(&p.name.id)); let signature = ParsedSignature { pos_args, args, var_args, kwargs, var_kwargs, }; let name = self.identifier(&function.name.id, function.name.range); // Parse function body recursively let body = self.parse_statements(function.body)?; let is_async = function.is_async; Ok(Node::FunctionDef(RawFunctionDef { name, signature, body, is_async, })) } Stmt::ClassDef(c) => Err(ParseError::not_implemented( "class definitions", self.convert_range(c.range), )), Stmt::Return(ast::StmtReturn { value, .. }) => match value { Some(value) => Ok(Node::Return(self.parse_expression(*value)?)), None => Ok(Node::ReturnNone), }, Stmt::Delete(d) => Err(ParseError::not_implemented( "the 'del' statement", self.convert_range(d.range), )), Stmt::TypeAlias(t) => Err(ParseError::not_implemented("type aliases", self.convert_range(t.range))), Stmt::Assign(ast::StmtAssign { targets, value, range, .. }) => self.parse_assignment(first(targets, self.convert_range(range))?, *value), Stmt::AugAssign(ast::StmtAugAssign { target, op, value, .. }) => { let op = convert_op(op); let value = self.parse_expression(*value)?; match *target { AstExpr::Subscript(ast::ExprSubscript { value: object, slice, range, .. }) => Ok(Node::SubscriptOpAssign { target: self.parse_identifier(*object)?, index: self.parse_expression(*slice)?, op, object: value, target_position: self.convert_range(range), }), other => Ok(Node::OpAssign { target: self.parse_identifier(other)?, op, object: value, }), } } Stmt::AnnAssign(ast::StmtAnnAssign { target, value, .. }) => match value { Some(value) => self.parse_assignment(*target, *value), None => Ok(Node::Pass), }, Stmt::For(ast::StmtFor { is_async, target, iter, body, orelse, range, .. }) => { if is_async { return Err(ParseError::not_implemented( "async for loops", self.convert_range(range), )); } Ok(Node::For { target: self.parse_unpack_target(*target)?, iter: self.parse_expression(*iter)?, body: self.parse_statements(body)?, or_else: self.parse_statements(orelse)?, }) } Stmt::While(ast::StmtWhile { test, body, orelse, .. }) => Ok(Node::While { test: self.parse_expression(*test)?, body: self.parse_statements(body)?, or_else: self.parse_statements(orelse)?, }), Stmt::If(ast::StmtIf { test, body, elif_else_clauses, .. }) => { let test = self.parse_expression(*test)?; let body = self.parse_statements(body)?; let or_else = self.parse_elif_else_clauses(elif_else_clauses)?; Ok(Node::If { test, body, or_else }) } Stmt::With(ast::StmtWith { is_async, range, .. }) => { if is_async { Err(ParseError::not_implemented( "async context managers (async with)", self.convert_range(range), )) } else { Err(ParseError::not_implemented( "context managers (with statements)", self.convert_range(range), )) } } Stmt::Match(m) => Err(ParseError::not_implemented( "pattern matching (match statements)", self.convert_range(m.range), )), Stmt::Raise(ast::StmtRaise { exc, .. }) => { // TODO add cause to Node::Raise let expr = match exc { Some(expr) => Some(self.parse_expression(*expr)?), None => None, }; Ok(Node::Raise(expr)) } Stmt::Try(ast::StmtTry { body, handlers, orelse, finalbody, is_star, range, .. }) => { if is_star { Err(ParseError::not_implemented( "exception groups (try*/except*)", self.convert_range(range), )) } else { let body = self.parse_statements(body)?; let handlers = handlers .into_iter() .map(|h| self.parse_except_handler(h)) .collect::, _>>()?; let or_else = self.parse_statements(orelse)?; let finally = self.parse_statements(finalbody)?; Ok(Node::Try(Try { body, handlers, or_else, finally, })) } } Stmt::Assert(ast::StmtAssert { test, msg, .. }) => { let test = self.parse_expression(*test)?; let msg = match msg { Some(m) => Some(self.parse_expression(*m)?), None => None, }; Ok(Node::Assert { test, msg }) } Stmt::Import(ast::StmtImport { names, range, .. }) => { // We only support single module imports (e.g., `import sys`) // Multi-module imports (e.g., `import sys, os`) are not supported let position = self.convert_range(range); if names.len() != 1 { return Err(ParseError::not_implemented("multi-module import statements", position)); } let alias_node = &names[0]; let module_name = self.interner.intern(&alias_node.name); // The binding name is the alias if present, otherwise the module name let binding_name = alias_node .asname .as_ref() .map_or(module_name, |n| self.interner.intern(&n.id)); // Create an unresolved identifier (namespace slot will be set during prepare) let binding = Identifier::new(binding_name, position); Ok(Node::Import { module_name, binding }) } Stmt::ImportFrom(ast::StmtImportFrom { module, names, level, range, .. }) => { let position = self.convert_range(range); // We only support absolute imports (level 0) if level != 0 { return Err(ParseError::import_error( "attempted relative import with no known parent package", position, )); } // Module name is required for absolute imports let module_name = match module { Some(m) => self.interner.intern(&m), None => { return Err(ParseError::import_error( "attempted relative import with no known parent package", position, )); } }; // Parse the imported names let names = names .iter() .map(|alias| { // Check for star import which is not supported if alias.name.as_str() == "*" { return Err(ParseError::not_supported( "Wildcard imports (`from ... import *`) are not supported", position, )); } let name = self.interner.intern(&alias.name); // The binding name is the alias if provided, otherwise the import name let binding_name = alias.asname.as_ref().map_or(name, |n| self.interner.intern(&n.id)); // Create an unresolved identifier (namespace slot will be set during prepare) let binding = Identifier::new(binding_name, position); Ok((name, binding)) }) .collect::, _>>()?; Ok(Node::ImportFrom { module_name, names, position, }) } Stmt::Global(ast::StmtGlobal { names, range, .. }) => { let names = names .iter() .map(|id| self.interner.intern(&self.code[id.range])) .collect(); Ok(Node::Global { position: self.convert_range(range), names, }) } Stmt::Nonlocal(ast::StmtNonlocal { names, range, .. }) => { let names = names .iter() .map(|id| self.interner.intern(&self.code[id.range])) .collect(); Ok(Node::Nonlocal { position: self.convert_range(range), names, }) } Stmt::Expr(ast::StmtExpr { value, .. }) => self.parse_expression(*value).map(Node::Expr), Stmt::Pass(_) => Ok(Node::Pass), Stmt::Break(b) => Ok(Node::Break { position: self.convert_range(b.range), }), Stmt::Continue(c) => Ok(Node::Continue { position: self.convert_range(c.range), }), Stmt::IpyEscapeCommand(i) => Err(ParseError::not_implemented( "IPython escape commands", self.convert_range(i.range), )), } } /// `lhs = rhs` -> `lhs, rhs` /// Handles simple assignments (x = value), subscript assignments (dict[key] = value), /// attribute assignments (obj.attr = value), and tuple unpacking (a, b = value) fn parse_assignment(&mut self, lhs: AstExpr, rhs: AstExpr) -> Result { match lhs { // Subscript assignment like dict[key] = value AstExpr::Subscript(ast::ExprSubscript { value, slice, range, .. }) => Ok(Node::SubscriptAssign { target: self.parse_identifier(*value)?, index: self.parse_expression(*slice)?, value: self.parse_expression(rhs)?, target_position: self.convert_range(range), }), // Attribute assignment like obj.attr = value (supports chained like a.b.c = value) AstExpr::Attribute(ast::ExprAttribute { value, attr, range, .. }) => Ok(Node::AttrAssign { object: self.parse_expression(*value)?, attr: EitherStr::Interned(self.interner.intern(attr.id())), target_position: self.convert_range(range), value: self.parse_expression(rhs)?, }), // Tuple unpacking like a, b = value or (a, b), c = nested AstExpr::Tuple(ast::ExprTuple { elts, range, .. }) => { let targets_position = self.convert_range(range); let targets = elts .into_iter() .map(|e| self.parse_unpack_target(e)) // Use parse_unpack_target for recursion .collect::, _>>()?; Ok(Node::UnpackAssign { targets, targets_position, object: self.parse_expression(rhs)?, }) } // List unpacking like [a, b] = value or [a, *rest] = value AstExpr::List(ast::ExprList { elts, range, .. }) => { let targets_position = self.convert_range(range); let targets = elts .into_iter() .map(|e| self.parse_unpack_target(e)) .collect::, _>>()?; Ok(Node::UnpackAssign { targets, targets_position, object: self.parse_expression(rhs)?, }) } // Simple identifier assignment like x = value _ => Ok(Node::Assign { target: self.parse_identifier(lhs)?, object: self.parse_expression(rhs)?, }), } } /// Parses an expression from the ruff AST into Monty's ExprLoc representation. /// /// Includes depth tracking to prevent stack overflow from deeply nested structures. /// Matches CPython's limit of 200 for nested parentheses. fn parse_expression(&mut self, expression: AstExpr) -> Result { self.decr_depth_remaining(|| expression.range())?; let result = self.parse_expression_impl(expression); self.depth_remaining += 1; result } fn parse_expression_impl(&mut self, expression: AstExpr) -> Result { match expression { AstExpr::BoolOp(ast::ExprBoolOp { op, values, range, .. }) => { // Handle chained boolean operations like `a and b and c` by right-folding // into nested binary operations: `a and (b and c)` let rust_op = convert_bool_op(op); let position = self.convert_range(range); let mut values_iter = values.into_iter().rev(); // Start with the rightmost value let last_value = values_iter.next().expect("Expected at least one value in boolean op"); let mut result = self.parse_expression(last_value)?; // Fold from right to left for value in values_iter { let left = Box::new(self.parse_expression(value)?); result = ExprLoc::new( position, Expr::Op { left, op: rust_op.clone(), right: Box::new(result), }, ); } Ok(result) } AstExpr::Named(ast::ExprNamed { target, value, range, .. }) => { let target_ident = self.parse_identifier(*target)?; let value_expr = self.parse_expression(*value)?; Ok(ExprLoc::new( self.convert_range(range), Expr::Named { target: target_ident, value: Box::new(value_expr), }, )) } AstExpr::BinOp(ast::ExprBinOp { left, op, right, range, .. }) => { let left = Box::new(self.parse_expression(*left)?); let right = Box::new(self.parse_expression(*right)?); Ok(ExprLoc { position: self.convert_range(range), expr: Expr::Op { left, op: convert_op(op), right, }, }) } AstExpr::UnaryOp(ast::ExprUnaryOp { op, operand, range, .. }) => match op { UnaryOp::Not => { let operand = Box::new(self.parse_expression(*operand)?); Ok(ExprLoc::new(self.convert_range(range), Expr::Not(operand))) } UnaryOp::USub => { let operand = Box::new(self.parse_expression(*operand)?); Ok(ExprLoc::new(self.convert_range(range), Expr::UnaryMinus(operand))) } UnaryOp::UAdd => { let operand = Box::new(self.parse_expression(*operand)?); Ok(ExprLoc::new(self.convert_range(range), Expr::UnaryPlus(operand))) } UnaryOp::Invert => { let operand = Box::new(self.parse_expression(*operand)?); Ok(ExprLoc::new(self.convert_range(range), Expr::UnaryInvert(operand))) } }, AstExpr::Lambda(ast::ExprLambda { parameters, body, range, .. }) => { let position = self.convert_range(range); // Intern the lambda name let name_id = self.interner.intern(""); // Parse lambda parameters (similar to function parameters) let signature = if let Some(params) = parameters { // Parse positional-only parameters (before /) let pos_args = self.parse_params_with_defaults(¶ms.posonlyargs)?; // Parse positional-or-keyword parameters let args = self.parse_params_with_defaults(¶ms.args)?; // Parse *args let var_args = params.vararg.as_ref().map(|p| self.interner.intern(&p.name.id)); // Parse keyword-only parameters (after * or *args) let kwargs = self.parse_params_with_defaults(¶ms.kwonlyargs)?; // Parse **kwargs let var_kwargs = params.kwarg.as_ref().map(|p| self.interner.intern(&p.name.id)); ParsedSignature { pos_args, args, var_args, kwargs, var_kwargs, } } else { // No parameters (e.g., `lambda: 42`) ParsedSignature::default() }; // Parse the body expression let body = Box::new(self.parse_expression(*body)?); Ok(ExprLoc::new( position, Expr::LambdaRaw { name_id, signature, body, }, )) } AstExpr::If(ast::ExprIf { test, body, orelse, range, .. }) => Ok(ExprLoc::new( self.convert_range(range), Expr::IfElse { test: Box::new(self.parse_expression(*test)?), body: Box::new(self.parse_expression(*body)?), orelse: Box::new(self.parse_expression(*orelse)?), }, )), AstExpr::Dict(ast::ExprDict { items, range, .. }) => { let position = self.convert_range(range); let mut dict_items = Vec::new(); for ast::DictItem { key, value } in items { // key is Option - None represents ** unpacking (PEP 448) if let Some(key_expr_ast) = key { let key_expr = self.parse_expression(key_expr_ast)?; let value_expr = self.parse_expression(value)?; dict_items.push(DictItem::Pair(key_expr, value_expr)); } else { // **expr unpack in a dict literal: later keys silently win let unpack_expr = self.parse_expression(value)?; dict_items.push(DictItem::Unpack(unpack_expr)); } } Ok(ExprLoc::new(position, Expr::Dict(dict_items))) } AstExpr::Set(ast::ExprSet { elts, range, .. }) => { let mut items = Vec::new(); for e in elts { items.push(self.parse_sequence_item(e)?); } Ok(ExprLoc::new(self.convert_range(range), Expr::Set(items))) } AstExpr::ListComp(ast::ExprListComp { elt, generators, range, .. }) => { let elt = Box::new(self.parse_expression(*elt)?); let generators = self.parse_comprehension_generators(generators)?; Ok(ExprLoc::new( self.convert_range(range), Expr::ListComp { elt, generators }, )) } AstExpr::SetComp(ast::ExprSetComp { elt, generators, range, .. }) => { let elt = Box::new(self.parse_expression(*elt)?); let generators = self.parse_comprehension_generators(generators)?; Ok(ExprLoc::new( self.convert_range(range), Expr::SetComp { elt, generators }, )) } AstExpr::DictComp(ast::ExprDictComp { key, value, generators, range, .. }) => { let key = Box::new(self.parse_expression(*key)?); let value = Box::new(self.parse_expression(*value)?); let generators = self.parse_comprehension_generators(generators)?; Ok(ExprLoc::new( self.convert_range(range), Expr::DictComp { key, value, generators }, )) } AstExpr::Generator(ast::ExprGenerator { elt, generators, range, .. }) => { // TODO: When proper generators are implemented, this should produce // Expr::Generator instead of Expr::ListComp. Currently we treat generator // expressions as list comprehensions since we don't have generator support. let elt = Box::new(self.parse_expression(*elt)?); let generators = self.parse_comprehension_generators(generators)?; Ok(ExprLoc::new( self.convert_range(range), Expr::ListComp { elt, generators }, )) } AstExpr::Await(a) => { let value = self.parse_expression(*a.value)?; Ok(ExprLoc::new(self.convert_range(a.range), Expr::Await(Box::new(value)))) } AstExpr::Yield(y) => Err(ParseError::not_implemented( "yield expressions", self.convert_range(y.range), )), AstExpr::YieldFrom(y) => Err(ParseError::not_implemented( "yield from expressions", self.convert_range(y.range), )), AstExpr::Compare(ast::ExprCompare { left, ops, comparators, range, .. }) => { let position = self.convert_range(range); let ops_vec = ops.into_vec(); let comparators_vec = comparators.into_vec(); // Simple case: single comparison (most common) if ops_vec.len() == 1 { return Ok(ExprLoc::new( position, Expr::CmpOp { left: Box::new(self.parse_expression(*left)?), op: convert_compare_op(ops_vec.into_iter().next().unwrap()), right: Box::new(self.parse_expression(comparators_vec.into_iter().next().unwrap())?), }, )); } // Chain comparison: transform to nested And expressions self.parse_chain_comparison(*left, ops_vec, comparators_vec, position) } AstExpr::Call(ast::ExprCall { func, arguments, range, .. }) => { let position = self.convert_range(range); let ast::Arguments { args, keywords, .. } = arguments; let args_vec = args.into_vec(); let keywords_vec = keywords.into_vec(); // Detect whether we need the generalized path (PEP 448): // - multiple *args unpacks, OR // - positional argument after *args, OR // - multiple **kwargs unpacks let needs_generalized = Self::needs_generalized_call(&args_vec, &keywords_vec); let args = if needs_generalized { self.parse_generalized_call_args(args_vec, keywords_vec)? } else { self.parse_simple_call_args(args_vec, keywords_vec)? }; match *func { AstExpr::Name(ast::ExprName { id, range, .. }) => { // Always create Callable::Name — builtin resolution happens in // the prepare phase with scope awareness, so local assignments // can shadow builtins. let ident = self.identifier(&id, range); let callable = Callable::Name(ident); Ok(ExprLoc::new( position, Expr::Call { callable, args: Box::new(args), }, )) } AstExpr::Attribute(ast::ExprAttribute { value, attr, .. }) => { let object = Box::new(self.parse_expression(*value)?); Ok(ExprLoc::new( position, Expr::AttrCall { object, attr: EitherStr::Interned(self.interner.intern(attr.id())), args: Box::new(args), }, )) } other => { // Handle arbitrary expression as callable (e.g., lambda calls) let callable = Box::new(self.parse_expression(other)?); Ok(ExprLoc::new( position, Expr::IndirectCall { callable, args: Box::new(args), }, )) } } } AstExpr::FString(ast::ExprFString { value, range, .. }) => self.parse_fstring(&value, range), AstExpr::TString(t) => Err(ParseError::not_implemented( "template strings (t-strings)", self.convert_range(t.range), )), AstExpr::StringLiteral(ast::ExprStringLiteral { value, range, .. }) => { let string_id = self.interner.intern(&value.to_string()); Ok(ExprLoc::new( self.convert_range(range), Expr::Literal(Literal::Str(string_id)), )) } AstExpr::BytesLiteral(ast::ExprBytesLiteral { value, range, .. }) => { let bytes: Cow<'_, [u8]> = Cow::from(&value); let bytes_id = self.interner.intern_bytes(&bytes); Ok(ExprLoc::new( self.convert_range(range), Expr::Literal(Literal::Bytes(bytes_id)), )) } AstExpr::NumberLiteral(ast::ExprNumberLiteral { value, range, .. }) => { let position = self.convert_range(range); let const_value = match value { Number::Int(i) => { if let Some(i) = i.as_i64() { Literal::Int(i) } else { // Integer too large for i64, parse string representation as BigInt // Handles radix prefixes (0x, 0o, 0b) and underscores let bi = parse_int_literal(&i.to_string()) .ok_or_else(|| ParseError::syntax(format!("invalid integer literal: {i}"), position))?; let long_int_id = self.interner.intern_long_int(bi); Literal::LongInt(long_int_id) } } Number::Float(f) => Literal::Float(f), Number::Complex { .. } => return Err(ParseError::not_implemented("complex constants", position)), }; Ok(ExprLoc::new(position, Expr::Literal(const_value))) } AstExpr::BooleanLiteral(ast::ExprBooleanLiteral { value, range, .. }) => Ok(ExprLoc::new( self.convert_range(range), Expr::Literal(Literal::Bool(value)), )), AstExpr::NoneLiteral(ast::ExprNoneLiteral { range, .. }) => { Ok(ExprLoc::new(self.convert_range(range), Expr::Literal(Literal::None))) } AstExpr::EllipsisLiteral(ast::ExprEllipsisLiteral { range, .. }) => Ok(ExprLoc::new( self.convert_range(range), Expr::Literal(Literal::Ellipsis), )), AstExpr::Attribute(ast::ExprAttribute { value, attr, range, .. }) => { let object = Box::new(self.parse_expression(*value)?); let position = self.convert_range(range); Ok(ExprLoc::new( position, Expr::AttrGet { object, attr: EitherStr::Interned(self.interner.intern(attr.id())), }, )) } AstExpr::Subscript(ast::ExprSubscript { value, slice, range, .. }) => { let object = Box::new(self.parse_expression(*value)?); let index = Box::new(self.parse_expression(*slice)?); Ok(ExprLoc::new( self.convert_range(range), Expr::Subscript { object, index }, )) } AstExpr::Starred(s) => Err(ParseError::not_implemented( "starred expressions (*expr)", self.convert_range(s.range), )), AstExpr::Name(ast::ExprName { id, range, .. }) => { let position = self.convert_range(range); // Always create Expr::Name — builtin resolution happens in the prepare // phase with scope awareness, so local assignments can shadow builtins. let expr = Expr::Name(self.identifier(&id, range)); Ok(ExprLoc::new(position, expr)) } AstExpr::List(ast::ExprList { elts, range, .. }) => { let mut items = Vec::new(); for e in elts { items.push(self.parse_sequence_item(e)?); } Ok(ExprLoc::new(self.convert_range(range), Expr::List(items))) } AstExpr::Tuple(ast::ExprTuple { elts, range, .. }) => { let mut items = Vec::new(); for e in elts { items.push(self.parse_sequence_item(e)?); } Ok(ExprLoc::new(self.convert_range(range), Expr::Tuple(items))) } AstExpr::Slice(ast::ExprSlice { lower, upper, step, range, .. }) => { let lower = lower.map(|e| self.parse_expression(*e)).transpose()?; let upper = upper.map(|e| self.parse_expression(*e)).transpose()?; let step = step.map(|e| self.parse_expression(*e)).transpose()?; Ok(ExprLoc::new( self.convert_range(range), Expr::Slice { lower: lower.map(Box::new), upper: upper.map(Box::new), step: step.map(Box::new), }, )) } AstExpr::IpyEscapeCommand(i) => Err(ParseError::not_implemented( "IPython escape commands", self.convert_range(i.range), )), } } /// Converts an AST expression into a `SequenceItem` for list/tuple/set literals. /// /// A `Starred` node becomes `SequenceItem::Unpack`; all other expressions /// become `SequenceItem::Value`. This is the entry point for PEP 448 unpack /// handling in collection literals. fn parse_sequence_item(&mut self, expr: AstExpr) -> Result { if let AstExpr::Starred(ast::ExprStarred { value, .. }) = expr { Ok(SequenceItem::Unpack(self.parse_expression(*value)?)) } else { Ok(SequenceItem::Value(self.parse_expression(expr)?)) } } /// Detects whether a function call needs the generalized `GeneralizedCall` path. /// /// Returns `true` when the call has: /// - More than one `*unpack` among positional args, OR /// - A plain positional arg following a `*unpack`, OR /// - More than one `**unpack` among keyword args. /// /// In all these cases the simple `ArgsKargs` representation is insufficient /// and `parse_generalized_call_args` must be used instead. fn needs_generalized_call(args: &[AstExpr], keywords: &[Keyword]) -> bool { let mut seen_star = false; for arg in args { match arg { AstExpr::Starred(_) => { if seen_star { return true; // second *unpack } seen_star = true; } _ => { if seen_star { return true; // positional after *unpack } } } } // Multiple **kwargs unpacks? keywords.iter().filter(|k| k.arg.is_none()).count() > 1 } /// Parses function call args for the simple case (at most one * and one **). /// /// Returns `ArgExprs::new_with_var_kwargs(...)` as before, preserving the /// fast path for the vast majority of function calls. fn parse_simple_call_args( &mut self, args_vec: Vec, keywords_vec: Vec, ) -> Result { let mut positional_args = Vec::new(); let mut var_args_expr: Option = None; for arg_expr in args_vec { match arg_expr { AstExpr::Starred(ast::ExprStarred { value, .. }) => { var_args_expr = Some(self.parse_expression(*value)?); } other => { positional_args.push(self.parse_expression(other)?); } } } let (kwargs, var_kwargs) = self.parse_keywords(keywords_vec)?; Ok(ArgExprs::new_with_var_kwargs( positional_args, var_args_expr, kwargs, var_kwargs, )) } /// Parses function call args for the PEP 448 generalized case. /// /// Builds `Vec` and `Vec` preserving the full order of /// positional and keyword arguments so the compiler can emit correct /// `ListAppend`/`ListExtend`/`DictMerge` sequences. fn parse_generalized_call_args( &mut self, args_vec: Vec, keywords_vec: Vec, ) -> Result { let mut call_args = Vec::new(); for arg_expr in args_vec { match arg_expr { AstExpr::Starred(ast::ExprStarred { value, .. }) => { call_args.push(CallArg::Unpack(self.parse_expression(*value)?)); } other => { call_args.push(CallArg::Value(self.parse_expression(other)?)); } } } let mut call_kwargs = Vec::new(); for kwarg in keywords_vec { if let Some(key) = kwarg.arg { let key_ident = self.identifier(&key.id, key.range); let value = self.parse_expression(kwarg.value)?; call_kwargs.push(CallKwarg::Named(Kwarg { key: key_ident, value })); } else { let unpack_expr = self.parse_expression(kwarg.value)?; call_kwargs.push(CallKwarg::Unpack(unpack_expr)); } } Ok(ArgExprs::new_generalized(call_args, call_kwargs)) } /// Parses keyword arguments, separating regular kwargs from var_kwargs (`**expr`). /// /// Returns `(kwargs, var_kwargs)` where kwargs is a vec of named keyword arguments /// and var_kwargs is an optional expression for `**expr` unpacking. fn parse_keywords(&mut self, keywords: Vec) -> Result<(Vec, Option), ParseError> { let mut kwargs = Vec::new(); let mut var_kwargs = None; for kwarg in keywords { if let Some(key) = kwarg.arg { // Regular kwarg: key=value let key = self.identifier(&key.id, key.range); let value = self.parse_expression(kwarg.value)?; kwargs.push(Kwarg { key, value }); } else { // Var kwargs: **expr if var_kwargs.is_some() { return Err(ParseError::not_implemented( "multiple **kwargs unpacking", self.convert_range(kwarg.range), )); } var_kwargs = Some(self.parse_expression(kwarg.value)?); } } Ok((kwargs, var_kwargs)) } fn parse_identifier(&mut self, ast: AstExpr) -> Result { match ast { AstExpr::Name(ast::ExprName { id, range, .. }) => Ok(self.identifier(&id, range)), other => Err(ParseError::syntax( format!("Expected name, got {other:?}"), self.convert_range(other.range()), )), } } /// Parses a chain comparison expression like `a < b < c < d`. /// /// Chain comparisons evaluate each intermediate value only once and short-circuit /// on the first false result. This creates an `Expr::ChainCmp` node which is /// compiled to bytecode using stack manipulation (Dup, Rot) rather than /// temporary variables, avoiding namespace pollution. fn parse_chain_comparison( &mut self, left: AstExpr, ops: Vec, comparators: Vec, position: CodeRange, ) -> Result { let left_expr = self.parse_expression(left)?; let comparisons = ops .into_iter() .zip(comparators) .map(|(op, cmp)| Ok((convert_compare_op(op), self.parse_expression(cmp)?))) .collect::, ParseError>>()?; Ok(ExprLoc::new( position, Expr::ChainCmp { left: Box::new(left_expr), comparisons, }, )) } /// Parses an unpack target - either a single identifier or a nested tuple. /// /// Handles patterns like `a` (single variable), `a, b` (flat tuple), or `(a, b), c` (nested). /// Includes depth tracking to prevent stack overflow from deeply nested structures. fn parse_unpack_target(&mut self, ast: AstExpr) -> Result { self.decr_depth_remaining(|| ast.range())?; let result = self.parse_unpack_target_impl(ast); self.depth_remaining += 1; result } fn parse_unpack_target_impl(&mut self, ast: AstExpr) -> Result { match ast { AstExpr::Name(ast::ExprName { id, range, .. }) => Ok(UnpackTarget::Name(self.identifier(&id, range))), AstExpr::Tuple(ast::ExprTuple { elts, range, .. }) => { let position = self.convert_range(range); let targets = elts .into_iter() .map(|e| self.parse_unpack_target(e)) // Recursive call for nested tuples .collect::, _>>()?; if targets.is_empty() { return Err(ParseError::syntax("empty tuple in unpack target", position)); } // Validate at most one starred target let starred_count = targets.iter().filter(|t| matches!(t, UnpackTarget::Starred(_))).count(); if starred_count > 1 { return Err(ParseError::syntax( "multiple starred expressions in assignment", position, )); } Ok(UnpackTarget::Tuple { targets, position }) } AstExpr::Starred(ast::ExprStarred { value, range, .. }) => { // Starred target must be a simple name match *value { AstExpr::Name(ast::ExprName { id, range, .. }) => { Ok(UnpackTarget::Starred(self.identifier(&id, range))) } _ => Err(ParseError::syntax( "starred assignment target must be a name", self.convert_range(range), )), } } AstExpr::List(ast::ExprList { elts, range, .. }) => { // List unpacking target [a, b, *rest] - same as tuple let position = self.convert_range(range); let targets = elts .into_iter() .map(|e| self.parse_unpack_target(e)) .collect::, _>>()?; if targets.is_empty() { return Err(ParseError::syntax("empty list in unpack target", position)); } // Validate at most one starred target let starred_count = targets.iter().filter(|t| matches!(t, UnpackTarget::Starred(_))).count(); if starred_count > 1 { return Err(ParseError::syntax( "multiple starred expressions in assignment", position, )); } Ok(UnpackTarget::Tuple { targets, position }) } other => Err(ParseError::syntax( format!("invalid unpacking target: {other:?}"), self.convert_range(other.range()), )), } } fn identifier(&mut self, id: &Name, range: TextRange) -> Identifier { let string_id = self.interner.intern(id); Identifier::new(string_id, self.convert_range(range)) } /// Parses function parameters with optional default values. /// /// Handles parameters like `a`, `b=10`, `c=None` by extracting the parameter /// name and parsing any default expression. Default expressions are stored /// as unevaluated AST and will be evaluated during the prepare phase. fn parse_params_with_defaults(&mut self, params: &[ParameterWithDefault]) -> Result, ParseError> { params .iter() .map(|p| { let name = self.interner.intern(&p.parameter.name.id); let default = match &p.default { Some(expr) => Some(self.parse_expression((**expr).clone())?), None => None, }; Ok(ParsedParam { name, default }) }) .collect() } /// Parses comprehension generators (the `for ... in ... if ...` clauses). /// /// Each generator represents one `for` clause with zero or more `if` filters. /// Multiple generators create nested iteration. Supports both single identifiers /// (`for x in ...`) and tuple unpacking (`for x, y in ...`). fn parse_comprehension_generators( &mut self, generators: Vec, ) -> Result, ParseError> { generators .into_iter() .map(|comp| { if comp.is_async { return Err(ParseError::not_implemented( "async comprehensions", self.convert_range(comp.range), )); } let target = self.parse_unpack_target(comp.target)?; let iter = self.parse_expression(comp.iter)?; let ifs = comp .ifs .into_iter() .map(|cond| self.parse_expression(cond)) .collect::, _>>()?; Ok(Comprehension { target, iter, ifs }) }) .collect() } /// Parses an f-string value into expression parts. /// /// F-strings in ruff AST are represented as `FStringValue` containing /// `FStringPart`s, which can be either literal strings or `FString` /// interpolated sections. Each `FString` contains `InterpolatedStringElements`. fn parse_fstring(&mut self, value: &ast::FStringValue, range: TextRange) -> Result { let mut parts = Vec::new(); for fstring_part in value { match fstring_part { ast::FStringPart::Literal(lit) => { // Literal string segment - intern for use at runtime let processed = lit.value.to_string(); if !processed.is_empty() { let string_id = self.interner.intern(&processed); parts.push(FStringPart::Literal(string_id)); } } ast::FStringPart::FString(fstring) => { // Interpolated f-string section for element in &fstring.elements { let part = self.parse_fstring_element(element)?; parts.push(part); } } } } // Optimization: if only one literal part, return as simple string literal if parts.len() == 1 && let FStringPart::Literal(string_id) = parts[0] { return Ok(ExprLoc::new( self.convert_range(range), Expr::Literal(Literal::Str(string_id)), )); } Ok(ExprLoc::new(self.convert_range(range), Expr::FString(parts))) } /// Parses a single f-string element (literal or interpolation). fn parse_fstring_element(&mut self, element: &InterpolatedStringElement) -> Result { match element { InterpolatedStringElement::Literal(lit) => { // Intern the literal string for use at runtime let processed = lit.value.to_string(); let string_id = self.interner.intern(&processed); Ok(FStringPart::Literal(string_id)) } InterpolatedStringElement::Interpolation(interp) => { let expr = Box::new(self.parse_expression((*interp.expression).clone())?); let conversion = convert_conversion_flag(interp.conversion); let format_spec = match &interp.format_spec { Some(spec) => Some(self.parse_format_spec(spec)?), None => None, }; // Extract debug prefix for `=` specifier (e.g., f'{a=}' -> "a=") let debug_prefix = interp.debug_text.as_ref().map(|dt| { let expr_text = &self.code[interp.expression.range()]; self.interner .intern(&format!("{}{}{}", dt.leading, expr_text, dt.trailing)) }); Ok(FStringPart::Interpolation { expr, conversion, format_spec, debug_prefix, }) } } } /// Parses a format specification, which may contain nested interpolations. /// /// For static specs (no interpolations), parses the format string into a /// `ParsedFormatSpec` at parse time to avoid runtime parsing overhead. fn parse_format_spec(&mut self, spec: &ast::InterpolatedStringFormatSpec) -> Result { let mut parts = Vec::new(); let mut has_interpolation = false; for element in &spec.elements { match element { InterpolatedStringElement::Literal(lit) => { // Intern the literal string let processed = lit.value.to_string(); let string_id = self.interner.intern(&processed); parts.push(FStringPart::Literal(string_id)); } InterpolatedStringElement::Interpolation(interp) => { has_interpolation = true; let expr = Box::new(self.parse_expression((*interp.expression).clone())?); let conversion = convert_conversion_flag(interp.conversion); // Format specs within format specs are not allowed in Python, // and debug_prefix doesn't apply to nested interpolations parts.push(FStringPart::Interpolation { expr, conversion, format_spec: None, debug_prefix: None, }); } } } if has_interpolation { Ok(FormatSpec::Dynamic(parts)) } else { // Combine all literal parts into a single static string and parse at parse time let static_spec: String = parts .into_iter() .filter_map(|p| { if let FStringPart::Literal(string_id) = p { Some(self.interner.get_str(string_id).to_owned()) } else { None } }) .collect(); let parsed = static_spec.parse().map_err(|spec_str| { ParseError::syntax( format!("Invalid format specifier '{spec_str}'"), self.convert_range(spec.range), ) })?; Ok(FormatSpec::Static(parsed)) } } fn convert_range(&self, range: TextRange) -> CodeRange { let start = range.start().into(); let (start_line_no, start_line_start, _) = self.index_to_position(start); let start = CodeLoc::new(start_line_no, start - start_line_start); let end = range.end().into(); let (end_line_no, end_line_start, _) = self.index_to_position(end); let end = CodeLoc::new(end_line_no, end - end_line_start); // Store line number for single-line ranges, None for multi-line let preview_line = if start_line_no == end_line_no { Some(u32::try_from(start_line_no).expect("line number exceeds u32")) } else { None }; CodeRange::new(self.filename_id, start, end, preview_line) } fn index_to_position(&self, index: usize) -> (usize, usize, Option) { let mut line_start = 0; for (line_no, line_end) in self.line_ends.iter().enumerate() { if index <= *line_end { return (line_no, line_start, Some(*line_end)); } line_start = *line_end + 1; } // Content after the last newline (file without trailing newline) // line_ends.len() gives the correct 0-indexed line number (self.line_ends.len(), line_start, None) } /// Decrements the depth remaining for nested parentheses. /// Returns an error if the depth remaining goes to zero. fn decr_depth_remaining(&mut self, get_range: impl FnOnce() -> TextRange) -> Result<(), ParseError> { if let Some(depth_remaining) = self.depth_remaining.checked_sub(1) { self.depth_remaining = depth_remaining; Ok(()) } else { let position = self.convert_range(get_range()); Err(ParseError::syntax("too many nested parentheses", position)) } } } fn first(v: Vec, position: CodeRange) -> Result { if v.len() == 1 { v.into_iter() .next() .ok_or_else(|| ParseError::syntax("Expected 1 element, got 0", position)) } else { Err(ParseError::syntax( format!("Expected 1 element, got {} (raw: {v:?})", v.len()), position, )) } } fn convert_op(op: AstOperator) -> Operator { match op { AstOperator::Add => Operator::Add, AstOperator::Sub => Operator::Sub, AstOperator::Mult => Operator::Mult, AstOperator::MatMult => Operator::MatMult, AstOperator::Div => Operator::Div, AstOperator::Mod => Operator::Mod, AstOperator::Pow => Operator::Pow, AstOperator::LShift => Operator::LShift, AstOperator::RShift => Operator::RShift, AstOperator::BitOr => Operator::BitOr, AstOperator::BitXor => Operator::BitXor, AstOperator::BitAnd => Operator::BitAnd, AstOperator::FloorDiv => Operator::FloorDiv, } } fn convert_bool_op(op: BoolOp) -> Operator { match op { BoolOp::And => Operator::And, BoolOp::Or => Operator::Or, } } fn convert_compare_op(op: CmpOp) -> CmpOperator { match op { CmpOp::Eq => CmpOperator::Eq, CmpOp::NotEq => CmpOperator::NotEq, CmpOp::Lt => CmpOperator::Lt, CmpOp::LtE => CmpOperator::LtE, CmpOp::Gt => CmpOperator::Gt, CmpOp::GtE => CmpOperator::GtE, CmpOp::Is => CmpOperator::Is, CmpOp::IsNot => CmpOperator::IsNot, CmpOp::In => CmpOperator::In, CmpOp::NotIn => CmpOperator::NotIn, } } /// Converts ruff's ConversionFlag to our ConversionFlag. fn convert_conversion_flag(flag: RuffConversionFlag) -> ConversionFlag { match flag { RuffConversionFlag::None => ConversionFlag::None, RuffConversionFlag::Str => ConversionFlag::Str, RuffConversionFlag::Repr => ConversionFlag::Repr, RuffConversionFlag::Ascii => ConversionFlag::Ascii, } } /// Source code location information for error reporting. /// /// Contains filename (as StringId), line/column positions, and optionally a line number for /// extracting the preview line from source during traceback formatting. /// /// To display the filename, the caller must provide access to the string storage. #[derive(Clone, Copy, Default, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub struct CodeRange { /// Interned filename ID - look up in Interns to get the actual string. pub filename: StringId, /// Line number (0-indexed) for extracting preview from source. None if range spans multiple lines. preview_line: Option, start: CodeLoc, end: CodeLoc, } /// Custom Debug implementation to make displaying code much less verbose. impl fmt::Debug for CodeRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "CodeRange{{filename: {:?}, start: {:?}, end: {:?}}}", self.filename, self.start, self.end ) } } impl CodeRange { fn new(filename: StringId, start: CodeLoc, end: CodeLoc, preview_line: Option) -> Self { Self { filename, preview_line, start, end, } } /// Returns the start position. #[must_use] pub fn start(&self) -> CodeLoc { self.start } /// Returns the end position. #[must_use] pub fn end(&self) -> CodeLoc { self.end } /// Returns the preview line number (0-indexed) if available. #[must_use] pub fn preview_line_number(&self) -> Option { self.preview_line } } /// Errors that can occur during parsing or preparation of Python code. #[derive(Debug, Clone)] pub enum ParseError { /// Error in syntax Syntax { msg: Cow<'static, str>, position: CodeRange, }, /// Missing feature from Monty, we hope to implement in the future. /// Message gets prefixed with "The monty syntax parser does not yet support ". NotImplemented { msg: Cow<'static, str>, position: CodeRange, }, /// Missing feature with a custom full message (no prefix added). NotSupported { msg: Cow<'static, str>, position: CodeRange, }, /// Import error (e.g., relative imports without a package). Import { msg: Cow<'static, str>, position: CodeRange, }, } impl ParseError { fn not_implemented(msg: impl Into>, position: CodeRange) -> Self { Self::NotImplemented { msg: msg.into(), position, } } fn not_supported(msg: impl Into>, position: CodeRange) -> Self { Self::NotSupported { msg: msg.into(), position, } } fn import_error(msg: impl Into>, position: CodeRange) -> Self { Self::Import { msg: msg.into(), position, } } pub(crate) fn syntax(msg: impl Into>, position: CodeRange) -> Self { Self::Syntax { msg: msg.into(), position, } } } impl ParseError { pub fn into_python_exc(self, filename: &str, source: &str) -> MontyException { match self { Self::Syntax { msg, position } => MontyException::new_full( ExcType::SyntaxError, Some(msg.into_owned()), vec![StackFrame::from_position_syntax_error(position, filename, source)], ), Self::NotImplemented { msg, position } => MontyException::new_full( ExcType::NotImplementedError, Some(format!("The monty syntax parser does not yet support {msg}")), vec![StackFrame::from_position(position, filename, source)], ), Self::NotSupported { msg, position } => MontyException::new_full( ExcType::NotImplementedError, Some(msg.into_owned()), vec![StackFrame::from_position(position, filename, source)], ), Self::Import { msg, position } => MontyException::new_full( ExcType::ImportError, Some(msg.into_owned()), vec![StackFrame::from_position_no_caret(position, filename, source)], ), } } } /// Parses an integer literal string into a `BigInt`, handling radix prefixes and underscores. /// /// Supports Python integer literal formats: /// - Decimal: `123`, `1_000_000` /// - Hexadecimal: `0x1a2b`, `0X1A2B` /// - Octal: `0o777`, `0O777` /// - Binary: `0b1010`, `0B1010` /// /// Returns `None` if the string cannot be parsed. fn parse_int_literal(s: &str) -> Option { // Remove underscores (Python allows them as digit separators) let cleaned: String = s.chars().filter(|c| *c != '_').collect(); let cleaned = cleaned.as_str(); // Detect radix from prefix if cleaned.len() >= 2 { let prefix = &cleaned[..2]; let digits = &cleaned[2..]; match prefix.to_ascii_lowercase().as_str() { "0x" => return BigInt::parse_bytes(digits.as_bytes(), 16), "0o" => return BigInt::parse_bytes(digits.as_bytes(), 8), "0b" => return BigInt::parse_bytes(digits.as_bytes(), 2), _ => {} } } // Default to decimal cleaned.parse::().ok() } ================================================ FILE: crates/monty/src/prepare.rs ================================================ use std::collections::hash_map::Entry; use ahash::{AHashMap, AHashSet}; use crate::{ args::{ArgExprs, CallArg, CallKwarg}, builtins::Builtins, expressions::{ Callable, CmpOperator, Comprehension, DictItem, Expr, ExprLoc, Identifier, Literal, NameScope, Node, Operator, PreparedFunctionDef, PreparedNode, SequenceItem, UnpackTarget, }, fstring::{FStringPart, FormatSpec}, intern::{InternerBuilder, StringId}, namespace::NamespaceId, parse::{CodeRange, ExceptHandler, ParseError, ParseNode, ParseResult, ParsedSignature, RawFunctionDef, Try}, signature::Signature, }; /// Result of the prepare phase, containing everything needed to compile and execute code. /// /// This struct holds the outputs of name resolution and AST transformation: /// - The namespace size (number of slots needed at module level) /// - A mapping from variable names to their namespace indices (for ref-count testing) /// - The transformed AST nodes with all names resolved, ready for compilation /// - The string interner containing all interned identifiers and filenames pub struct PrepareResult { /// Number of items in the namespace (at module level, this IS the global namespace) pub namespace_size: usize, /// Maps variable names to their indices in the namespace. /// /// This map is used by: /// - ref-count tests for looking up variables by name /// - REPL incremental compilation to preserve stable global slot IDs across snippets pub name_map: AHashMap, /// The prepared AST nodes with all names resolved to namespace indices. /// Function definitions are inline as `PreparedFunctionDef` variants. pub nodes: Vec, /// The string interner containing all interned identifiers and filenames. pub interner: InternerBuilder, } /// Prepares parsed nodes for compilation by resolving names and building the initial namespace. /// /// The namespace will be converted to runtime Objects when execution begins and the heap is available. /// At module level, the local namespace IS the global namespace. pub(crate) fn prepare(parse_result: ParseResult, input_names: Vec) -> Result { let ParseResult { nodes, interner } = parse_result; let mut p = Prepare::new_module(input_names, &interner); let mut prepared_nodes = p.prepare_nodes(nodes)?; // In the root frame, the last expression is implicitly returned // if it's not None. This matches Python REPL behavior where the last expression // value is displayed/returned. if let Some(Node::Expr(expr_loc)) = prepared_nodes.last() && !expr_loc.expr.is_none() { let new_expr_loc = expr_loc.clone(); prepared_nodes.pop(); prepared_nodes.push(Node::Return(new_expr_loc)); } Ok(PrepareResult { namespace_size: p.namespace_size, name_map: p.name_map, nodes: prepared_nodes, interner, }) } /// Prepares parsed nodes for REPL-style incremental compilation using an existing global namespace map. /// /// Existing bindings keep their original namespace slots; any new names are appended with new slots. /// This ensures snippets can be compiled independently while sharing one persistent global namespace. pub(crate) fn prepare_with_existing_names( parse_result: ParseResult, existing_name_map: AHashMap, ) -> Result { let ParseResult { nodes, interner } = parse_result; let mut p = Prepare::new_module_with_name_map(existing_name_map, &interner); let mut prepared_nodes = p.prepare_nodes(nodes)?; // In the root frame, the last expression is implicitly returned to match REPL behavior. if let Some(Node::Expr(expr_loc)) = prepared_nodes.last() && !expr_loc.expr.is_none() { let new_expr_loc = expr_loc.clone(); prepared_nodes.pop(); prepared_nodes.push(Node::Return(new_expr_loc)); } Ok(PrepareResult { namespace_size: p.namespace_size, name_map: p.name_map, nodes: prepared_nodes, interner, }) } /// State machine for the preparation phase that transforms parsed AST nodes into a prepared form. /// /// This struct maintains the mapping between variable names and their namespace indices, /// and handles scope resolution. The preparation phase is crucial for converting string-based /// name lookups into efficient integer-indexed namespace access during compilation and execution. /// /// For functions, this struct also tracks: /// - Which variables are declared `global` (should resolve to module namespace) /// - Which variables are declared `nonlocal` (should resolve to enclosing scope via cells) /// - Which variables are assigned locally (determines local vs global scope) /// - Reference to the global name map for resolving global variable references /// - Enclosing scope information for closure analysis struct Prepare<'i> { /// Reference to the string interner for looking up names in error messages. interner: &'i InternerBuilder, /// Maps variable names to their indices in this scope's namespace vector name_map: AHashMap, /// Number of items in the namespace pub namespace_size: usize, /// Whether this is the module-level scope. /// At module level, all variables are global and `global` keyword is a no-op. is_module_scope: bool, /// Names declared as `global` in this scope. /// These names will resolve to the global namespace instead of local. global_names: AHashSet, /// Names that are assigned in this scope (from first-pass scan). /// Used in functions to determine if a variable is local (assigned) or global (only read). assigned_names: AHashSet, /// Names that have been assigned so far during the second pass (in order). /// Used to produce the correct error message for `global x` when x was assigned before. names_assigned_in_order: AHashSet, /// Copy of the module-level global name map. /// Used by functions to resolve global variable references. /// None at module level (not needed since all names are global there). global_name_map: Option>, /// Names that exist as locals in the enclosing function scope. /// Used to validate `nonlocal` declarations and resolve captured variables. /// None at module level or when there's no enclosing function. enclosing_locals: Option>, /// Maps free variable names (from nonlocal declarations and implicit captures) to their /// index in the free_vars vector. Pre-populated with nonlocal names at initialization, /// then extended with implicit captures discovered during preparation. free_var_map: AHashMap, /// Maps cell variable names to their index in the owned_cells vector. /// Pre-populated with cell_var names at initialization (excluding pass-through variables /// that are both nonlocal and captured by nested functions), then extended as new /// captures are discovered during nested function preparation. cell_var_map: AHashMap, /// Names that were resolved as `LocalUnassigned` in step 8 of `get_id`. /// /// These names are never assigned and not parameters - they were only referenced /// (e.g., external function names). Tracking them prevents step 6 from incorrectly /// classifying subsequent references as `Local` (like parameters) when the name /// appears in `name_map` from a previous `get_id` call. unassigned_ref_names: AHashSet, } impl<'i> Prepare<'i> { /// Creates a new Prepare instance for module-level code. /// /// At module level, all variables are global. The `global` keyword is a no-op /// since all variables are already in the global namespace. /// /// # Arguments /// * `input_names` - Names that should be pre-registered in the namespace (e.g., input variables) /// * `interner` - Reference to the string interner for looking up names fn new_module(input_names: Vec, interner: &'i InternerBuilder) -> Self { let mut name_map = AHashMap::with_capacity(input_names.len()); for (index, name) in input_names.into_iter().enumerate() { name_map.insert(name, NamespaceId::new(index)); } let namespace_size = name_map.len(); Self { interner, name_map, namespace_size, is_module_scope: true, global_names: AHashSet::new(), assigned_names: AHashSet::new(), names_assigned_in_order: AHashSet::new(), global_name_map: None, enclosing_locals: None, free_var_map: AHashMap::new(), cell_var_map: AHashMap::new(), unassigned_ref_names: AHashSet::new(), } } /// Creates a module-scope Prepare instance from an existing global name map. /// /// Used by incremental REPL compilation to keep stable slot assignments across snippets. fn new_module_with_name_map(name_map: AHashMap, interner: &'i InternerBuilder) -> Self { let namespace_size = name_map .values() .map(|id| id.index()) .max() .map_or(0, |max_idx| max_idx + 1); Self { interner, name_map, namespace_size, is_module_scope: true, global_names: AHashSet::new(), assigned_names: AHashSet::new(), names_assigned_in_order: AHashSet::new(), global_name_map: None, enclosing_locals: None, free_var_map: AHashMap::new(), cell_var_map: AHashMap::new(), unassigned_ref_names: AHashSet::new(), } } /// Creates a new Prepare instance for function-level code. /// /// Pre-populates `free_var_map` with nonlocal declarations and implicit captures, /// and `cell_var_map` with cell variables (excluding pass-through variables). /// /// # Arguments /// * `capacity` - Expected number of nodes /// * `params` - Function parameter StringIds (pre-registered in namespace) /// * `assigned_names` - Names that are assigned in this function (from first-pass scan) /// * `global_names` - Names declared as `global` in this function /// * `nonlocal_names` - Names declared as `nonlocal` in this function /// * `implicit_captures` - Names captured from enclosing scope without explicit nonlocal /// * `global_name_map` - Copy of the module-level name map for global resolution /// * `enclosing_locals` - Names that exist as locals in the enclosing function (for nonlocal resolution) /// * `cell_var_names` - Names that are captured by nested functions (must be stored in cells) /// * `interner` - Reference to the string interner for looking up names #[expect(clippy::too_many_arguments)] fn new_function( capacity: usize, params: &[StringId], assigned_names: AHashSet, global_names: AHashSet, nonlocal_names: AHashSet, implicit_captures: AHashSet, global_name_map: AHashMap, enclosing_locals: Option>, cell_var_names: AHashSet, interner: &'i InternerBuilder, ) -> Self { let mut name_map = AHashMap::with_capacity(capacity); for (index, string_id) in params.iter().enumerate() { name_map.insert(interner.get_str(*string_id).to_string(), NamespaceId::new(index)); } let namespace_size = name_map.len(); // Namespace layout: [params][cell_vars][free_vars][locals] // This predictable layout allows sequential namespace construction at runtime. // Pre-populate cell_var_map with cell variables FIRST (right after params). // Excludes pass-through variables (names that are both nonlocal and captured by // nested functions - these stay in free_var_map since we receive the cell, not create it). // NOTE: We intentionally do NOT add these to name_map here, because the scope // validation checks name_map to detect "used before declaration" errors let mut cell_var_map = AHashMap::with_capacity(cell_var_names.len()); let mut namespace_size = namespace_size; for name in cell_var_names { if !nonlocal_names.contains(&name) && !implicit_captures.contains(&name) { let slot = namespace_size; namespace_size += 1; cell_var_map.insert(name, NamespaceId::new(slot)); } } // Pre-populate free_var_map with nonlocal declarations AND implicit captures SECOND (after cell_vars). // Each entry maps name -> namespace slot index where the cell reference will be stored. // NOTE: We intentionally do NOT add these to name_map here, because the nonlocal // validation in prepare_nodes checks name_map to detect "used before nonlocal declaration" let free_var_capacity = nonlocal_names.len() + implicit_captures.len(); let mut free_var_map = AHashMap::with_capacity(free_var_capacity); for name in nonlocal_names { let slot = namespace_size; namespace_size += 1; free_var_map.insert(name, NamespaceId::new(slot)); } // Implicit captures (variables accessed from enclosing scope without explicit nonlocal) for name in implicit_captures { let slot = namespace_size; namespace_size += 1; free_var_map.insert(name, NamespaceId::new(slot)); } Self { interner, name_map, namespace_size, is_module_scope: false, global_names, assigned_names, names_assigned_in_order: AHashSet::new(), global_name_map: Some(global_name_map), enclosing_locals, free_var_map, cell_var_map, unassigned_ref_names: AHashSet::new(), } } /// Recursively prepares a sequence of AST nodes by resolving names and transforming expressions. /// /// This method processes each node type differently: /// - Resolves variable names to namespace indices /// - Transforms function calls from identifier-based to builtin type-based /// - Handles special cases like implicit returns in root frames /// - Validates that names used in attribute calls are already defined /// /// # Returns /// A vector of prepared nodes ready for compilation fn prepare_nodes(&mut self, nodes: Vec) -> Result, ParseError> { let nodes_len = nodes.len(); let mut new_nodes = Vec::with_capacity(nodes_len); for node in nodes { match node { Node::Pass => (), Node::Expr(expr) => new_nodes.push(Node::Expr(self.prepare_expression(expr)?)), Node::Return(expr) => new_nodes.push(Node::Return(self.prepare_expression(expr)?)), Node::ReturnNone => new_nodes.push(Node::ReturnNone), Node::Raise(exc) => { let expr = match exc { Some(expr) => { let prepared = self.prepare_expression(expr)?; match prepared.expr { // Handle raising a builtin exception type without instantiation, // e.g. `raise TypeError`. Transform into `raise TypeError()` // so the exception is properly instantiated before being raised. Expr::Builtin(b) => { let call_expr = Expr::Call { callable: Callable::Builtin(b), args: Box::new(ArgExprs::Empty), }; Some(ExprLoc::new(prepared.position, call_expr)) } _ => Some(prepared), } } None => None, }; new_nodes.push(Node::Raise(expr)); } Node::Assert { test, msg } => { let test = self.prepare_expression(test)?; let msg = match msg { Some(m) => Some(self.prepare_expression(m)?), None => None, }; new_nodes.push(Node::Assert { test, msg }); } Node::Assign { target, object } => { let object = self.prepare_expression(object)?; // Track that this name was assigned before we call get_id self.names_assigned_in_order .insert(self.interner.get_str(target.name_id).to_string()); let (target, _) = self.get_id(target); new_nodes.push(Node::Assign { target, object }); } Node::UnpackAssign { targets, targets_position, object, } => { let object = self.prepare_expression(object)?; // Recursively resolve all targets (supports nested tuples) let targets = targets .into_iter() .map(|target| self.prepare_unpack_target(target)) .collect(); new_nodes.push(Node::UnpackAssign { targets, targets_position, object, }); } Node::OpAssign { target, op, object } => { // Track that this name was assigned self.names_assigned_in_order .insert(self.interner.get_str(target.name_id).to_string()); let target = self.get_id(target).0; let object = self.prepare_expression(object)?; new_nodes.push(Node::OpAssign { target, op, object }); } Node::SubscriptOpAssign { target, index, op, object, target_position, } => { let target = self.get_id(target).0; let index = self.prepare_expression(index)?; let object = self.prepare_expression(object)?; new_nodes.push(Node::SubscriptOpAssign { target, index, op, object, target_position, }); } Node::SubscriptAssign { target, index, value, target_position, } => { // SubscriptAssign doesn't assign to the target itself, just modifies it let target = self.get_id(target).0; let index = self.prepare_expression(index)?; let value = self.prepare_expression(value)?; new_nodes.push(Node::SubscriptAssign { target, index, value, target_position, }); } Node::AttrAssign { object, attr, target_position, value, } => { // AttrAssign doesn't assign to the object itself, just modifies its attribute let object = self.prepare_expression(object)?; let value = self.prepare_expression(value)?; new_nodes.push(Node::AttrAssign { object, attr, target_position, value, }); } Node::For { target, iter, body, or_else, } => { // Prepare target with normal scoping (not comprehension isolation) let target = self.prepare_unpack_target(target); new_nodes.push(Node::For { target, iter: self.prepare_expression(iter)?, body: self.prepare_nodes(body)?, or_else: self.prepare_nodes(or_else)?, }); } Node::Break { position } => { new_nodes.push(Node::Break { position }); } Node::Continue { position } => { new_nodes.push(Node::Continue { position }); } Node::While { test, body, or_else } => { new_nodes.push(Node::While { test: self.prepare_expression(test)?, body: self.prepare_nodes(body)?, or_else: self.prepare_nodes(or_else)?, }); } Node::If { test, body, or_else } => { let test = self.prepare_expression(test)?; let body = self.prepare_nodes(body)?; let or_else = self.prepare_nodes(or_else)?; new_nodes.push(Node::If { test, body, or_else }); } Node::FunctionDef(RawFunctionDef { name, signature, body, is_async, }) => { let func_node = self.prepare_function_def(name, &signature, body, is_async)?; new_nodes.push(func_node); } Node::Global { names, position } => { // At module level, `global` is a no-op since all variables are already global. // In functions, the global declarations are already collected in the first pass // (see prepare_function_def), so this is also a no-op at this point. // The actual effect happens in get_id where we check global_names. if !self.is_module_scope { // Validate that names weren't already used/assigned before `global` declaration for string_id in names { let name_str = self.interner.get_str(string_id); if self.names_assigned_in_order.contains(name_str) { // Name was assigned before the global declaration return Err(ParseError::syntax( format!("name '{name_str}' is assigned to before global declaration"), position, )); } else if self.name_map.contains_key(name_str) { // Name was used (but not assigned) before the global declaration return Err(ParseError::syntax( format!("name '{name_str}' is used prior to global declaration"), position, )); } } } // Global statements don't produce any runtime nodes } Node::Nonlocal { names, position } => { // Nonlocal can only be used inside a function, not at module level if self.is_module_scope { return Err(ParseError::syntax( "nonlocal declaration not allowed at module level", position, )); } // Validate that names weren't already used/assigned before `nonlocal` declaration // and that the binding exists in an enclosing scope for string_id in names { let name_str = self.interner.get_str(string_id); if self.names_assigned_in_order.contains(name_str) { // Name was assigned before the nonlocal declaration return Err(ParseError::syntax( format!("name '{name_str}' is assigned to before nonlocal declaration"), position, )); } else if self.name_map.contains_key(name_str) { // Name was used (but not assigned) before the nonlocal declaration return Err(ParseError::syntax( format!("name '{name_str}' is used prior to nonlocal declaration"), position, )); } // Validate that the binding exists in an enclosing scope if let Some(ref enclosing) = self.enclosing_locals { if !enclosing.contains(name_str) { return Err(ParseError::syntax( format!("no binding for nonlocal '{name_str}' found"), position, )); } } else { // No enclosing scope (function defined at module level) // The nonlocal must reference something in an enclosing function return Err(ParseError::syntax( format!("no binding for nonlocal '{name_str}' found"), position, )); } } // Nonlocal statements don't produce any runtime nodes } Node::Try(Try { body, handlers, or_else, finally, }) => { let body = self.prepare_nodes(body)?; let handlers = handlers .into_iter() .map(|h| self.prepare_except_handler(h)) .collect::, _>>()?; let or_else = self.prepare_nodes(or_else)?; let finally = self.prepare_nodes(finally)?; new_nodes.push(Node::Try(Try { body, handlers, or_else, finally, })); } Node::Import { module_name, binding } => { // Resolve the binding identifier to get the namespace slot let (resolved_binding, _) = self.get_id(binding); new_nodes.push(Node::Import { module_name, binding: resolved_binding, }); } Node::ImportFrom { module_name, names, position, } => { // Resolve each binding identifier to get namespace slots let resolved_names = names .into_iter() .map(|(import_name, binding)| { let (resolved_binding, _) = self.get_id(binding); (import_name, resolved_binding) }) .collect(); new_nodes.push(Node::ImportFrom { module_name, names: resolved_names, position, }); } } } Ok(new_nodes) } /// Prepares an exception handler by resolving names in the exception type and body. /// /// The exception variable (if present) is treated as an assigned name in the current scope. fn prepare_except_handler( &mut self, handler: ExceptHandler, ) -> Result, ParseError> { let exc_type = match handler.exc_type { Some(expr) => Some(self.prepare_expression(expr)?), None => None, }; // The exception variable binding (e.g., `as e:`) is an assignment let name = match handler.name { Some(ident) => { // Track that this name was assigned self.names_assigned_in_order .insert(self.interner.get_str(ident.name_id).to_string()); Some(self.get_id(ident).0) } None => None, }; let body = self.prepare_nodes(handler.body)?; Ok(ExceptHandler { exc_type, name, body }) } /// Prepares an expression by resolving names, transforming calls, and applying optimizations. /// /// Key transformations performed: /// - Name lookups are resolved to namespace indices via `get_id` /// - Function calls are resolved from identifiers to builtin types /// - Attribute calls validate that the object is already defined (not a new name) /// - Lists and tuples are recursively prepared /// - Modulo equality patterns like `x % n == k` (constant right-hand side) are optimized to /// `CmpOperator::ModEq` /// /// # Errors /// Returns a NameError if an attribute call references an undefined variable fn prepare_expression(&mut self, loc_expr: ExprLoc) -> Result { let ExprLoc { position, expr } = loc_expr; let expr = match expr { Expr::Literal(object) => Expr::Literal(object), Expr::Builtin(callable) => Expr::Builtin(callable), Expr::Name(name) => self.resolve_name_or_builtin(name), Expr::Op { left, op, right } => Expr::Op { left: Box::new(self.prepare_expression(*left)?), op, right: Box::new(self.prepare_expression(*right)?), }, Expr::CmpOp { left, op, right } => Expr::CmpOp { left: Box::new(self.prepare_expression(*left)?), op, right: Box::new(self.prepare_expression(*right)?), }, Expr::ChainCmp { left, comparisons } => Expr::ChainCmp { left: Box::new(self.prepare_expression(*left)?), comparisons: comparisons .into_iter() .map(|(op, expr)| Ok((op, self.prepare_expression(expr)?))) .collect::, _>>()?, }, Expr::Call { callable, mut args } => { // Prepare the arguments args.prepare_args(|expr| self.prepare_expression(expr))?; // For Name callables, resolve the identifier in the namespace // Don't error here if undefined - let runtime raise NameError with proper traceback let callable = match callable { Callable::Name(ident) => match self.resolve_name_or_builtin(ident) { Expr::Builtin(b) => Callable::Builtin(b), Expr::Name(resolved) => Callable::Name(resolved), _ => unreachable!("resolve_name_or_builtin returns Name or Builtin"), }, other @ Callable::Builtin(_) => other, }; Expr::Call { callable, args } } Expr::AttrCall { object, attr, mut args } => { // Prepare the object expression (supports chained access like a.b.c.method()) let object = Box::new(self.prepare_expression(*object)?); args.prepare_args(|expr| self.prepare_expression(expr))?; Expr::AttrCall { object, attr, args } } Expr::IndirectCall { callable, mut args } => { // Prepare the callable expression (e.g., lambda or any expression returning a callable) let callable = Box::new(self.prepare_expression(*callable)?); args.prepare_args(|expr| self.prepare_expression(expr))?; Expr::IndirectCall { callable, args } } Expr::AttrGet { object, attr } => { // Prepare the object expression (supports chained access like a.b.c) let object = Box::new(self.prepare_expression(*object)?); Expr::AttrGet { object, attr } } Expr::List(elements) => { let items = elements .into_iter() .map(|item| self.prepare_sequence_item(item)) .collect::>()?; Expr::List(items) } Expr::Tuple(elements) => { let items = elements .into_iter() .map(|item| self.prepare_sequence_item(item)) .collect::>()?; Expr::Tuple(items) } Expr::Subscript { object, index } => Expr::Subscript { object: Box::new(self.prepare_expression(*object)?), index: Box::new(self.prepare_expression(*index)?), }, Expr::Dict(dict_items) => { let prepared = dict_items .into_iter() .map(|item| match item { DictItem::Pair(k, v) => { Ok(DictItem::Pair(self.prepare_expression(k)?, self.prepare_expression(v)?)) } DictItem::Unpack(e) => Ok(DictItem::Unpack(self.prepare_expression(e)?)), }) .collect::>()?; Expr::Dict(prepared) } Expr::Set(elements) => { let items = elements .into_iter() .map(|item| self.prepare_sequence_item(item)) .collect::>()?; Expr::Set(items) } Expr::Not(operand) => Expr::Not(Box::new(self.prepare_expression(*operand)?)), Expr::UnaryMinus(operand) => Expr::UnaryMinus(Box::new(self.prepare_expression(*operand)?)), Expr::UnaryPlus(operand) => Expr::UnaryPlus(Box::new(self.prepare_expression(*operand)?)), Expr::UnaryInvert(operand) => Expr::UnaryInvert(Box::new(self.prepare_expression(*operand)?)), Expr::FString(parts) => { let prepared_parts = parts .into_iter() .map(|part| self.prepare_fstring_part(part)) .collect::, ParseError>>()?; Expr::FString(prepared_parts) } Expr::IfElse { test, body, orelse } => Expr::IfElse { test: Box::new(self.prepare_expression(*test)?), body: Box::new(self.prepare_expression(*body)?), orelse: Box::new(self.prepare_expression(*orelse)?), }, Expr::ListComp { elt, generators } => { let (generators, elt, _) = self.prepare_comprehension(generators, Some(*elt), None)?; Expr::ListComp { elt: Box::new(elt.expect("list comp must have elt")), generators, } } Expr::SetComp { elt, generators } => { let (generators, elt, _) = self.prepare_comprehension(generators, Some(*elt), None)?; Expr::SetComp { elt: Box::new(elt.expect("set comp must have elt")), generators, } } Expr::DictComp { key, value, generators } => { let (generators, _, key_value) = self.prepare_comprehension(generators, None, Some((*key, *value)))?; let (key, value) = key_value.expect("dict comp must have key/value"); Expr::DictComp { key: Box::new(key), value: Box::new(value), generators, } } Expr::LambdaRaw { name_id, signature, body, } => { // Convert the raw lambda into a prepared lambda expression return self.prepare_lambda(name_id, &signature, &body, position); } Expr::Lambda { .. } => { // Lambda should only be created during prepare, never during parsing unreachable!("Expr::Lambda should not exist before prepare phase") } Expr::Slice { lower, upper, step } => Expr::Slice { lower: lower.map(|e| self.prepare_expression(*e)).transpose()?.map(Box::new), upper: upper.map(|e| self.prepare_expression(*e)).transpose()?.map(Box::new), step: step.map(|e| self.prepare_expression(*e)).transpose()?.map(Box::new), }, Expr::Named { target, value } => { let value = Box::new(self.prepare_expression(*value)?); // Register the target as assigned in this scope self.names_assigned_in_order .insert(self.interner.get_str(target.name_id).to_string()); let (resolved_target, _) = self.get_id(target); Expr::Named { target: resolved_target, value, } } Expr::Await(value) => Expr::Await(Box::new(self.prepare_expression(*value)?)), }; // Optimization: Transform `(x % n) == value` with any constant right-hand side into a // specialized ModEq operator. // This is a common pattern in competitive programming (e.g., FizzBuzz checks like `i % 3 == 0`) // and can be executed more efficiently with a single modulo operation + comparison // instead of separate modulo, then equality check. if let Expr::CmpOp { left, op, right } = &expr && op == &CmpOperator::Eq && let Expr::Literal(Literal::Int(value)) = right.expr && let Expr::Op { left: left2, op, right: right2, } = &left.expr && op == &Operator::Mod { let new_expr = Expr::CmpOp { left: left2.clone(), op: CmpOperator::ModEq(value), right: right2.clone(), }; return Ok(ExprLoc { position: left.position, expr: new_expr, }); } Ok(ExprLoc { position, expr }) } /// Resolves a name to either `Expr::Builtin` or `Expr::Name` with scope-aware builtin detection. /// /// Python's name resolution follows LEGB order (Local, Enclosing, Global, Builtin). /// Builtins are only used when the name is not found in any other scope. This method /// ensures that local assignments (e.g., `int = 42`) properly shadow builtin names. /// /// We check before calling `get_id` to avoid allocating unnecessary namespace slots. /// At module level, a slot allocated for an unassigned builtin would leak into /// `global_name_map` for nested functions, causing incorrect resolution. fn resolve_name_or_builtin(&mut self, name: Identifier) -> Expr { let name_str = self.interner.get_str(name.name_id); // Check if the name is assigned in the current scope. If so, it shadows // any builtin with the same name. let is_locally_assigned = if self.is_module_scope { // Module scope: sequential — only names assigned SO FAR shadow builtins self.names_assigned_in_order.contains(name_str) } else { // Function scope: lexical — ANY assignment in the function body makes // the name local for the entire function self.assigned_names.contains(name_str) }; if !is_locally_assigned { // In function scope, also check if the name is bound by other mechanisms // (global declaration, parameter, closure capture, enclosing/global scope). // Only fall back to builtins if the name is truly unresolved. let is_otherwise_bound = !self.is_module_scope && (self.global_names.contains(name_str) || self.free_var_map.contains_key(name_str) || self.cell_var_map.contains_key(name_str) || self.name_map.contains_key(name_str) || self.enclosing_locals.as_ref().is_some_and(|l| l.contains(name_str)) || self.global_name_map.as_ref().is_some_and(|m| m.contains_key(name_str))); if !is_otherwise_bound && let Ok(builtin) = name_str.parse::() { return Expr::Builtin(builtin); } } Expr::Name(self.get_id(name).0) } /// Prepares a `SequenceItem` by recursively preparing its inner expression. /// /// Both `Value` and `Unpack` variants need their expressions prepared /// (name resolution, scope analysis, builtin detection, etc.). fn prepare_sequence_item(&mut self, item: SequenceItem) -> Result { match item { SequenceItem::Value(e) => Ok(SequenceItem::Value(self.prepare_expression(e)?)), SequenceItem::Unpack(e) => Ok(SequenceItem::Unpack(self.prepare_expression(e)?)), } } /// Prepares a comprehension with scope isolation for loop variables. /// /// Comprehension loop variables are isolated from the enclosing scope - they do not /// leak after the comprehension completes. CPython scoping rules require: /// /// 1. The FIRST generator's iter is evaluated in the enclosing scope /// 2. ALL loop variables from ALL generators are then shadowed as local /// 3. Subsequent generators' iters see all loop vars as local (even if unassigned) /// /// This means `[y for x in [1] for y in z for z in [[2]]]` raises UnboundLocalError /// because `z` is treated as local (it's a loop var in generator 3) when evaluating /// generator 2's iter. /// /// For list/set comprehensions, pass `elt` as Some and `key_value` as None. /// For dict comprehensions, pass `elt` as None and `key_value` as Some((key, value)). #[expect(clippy::type_complexity)] fn prepare_comprehension( &mut self, generators: Vec, elt: Option, key_value: Option<(ExprLoc, ExprLoc)>, ) -> Result<(Vec, Option, Option<(ExprLoc, ExprLoc)>), ParseError> { // Per PEP 572, walrus operators inside comprehensions bind in the ENCLOSING scope. // Pre-register walrus targets before saving scope state, so they persist after restore. let mut walrus_targets: AHashSet = AHashSet::new(); if let Some(ref e) = elt { collect_assigned_names_from_expr(e, &mut walrus_targets, self.interner); } if let Some((ref k, ref v)) = key_value { collect_assigned_names_from_expr(k, &mut walrus_targets, self.interner); collect_assigned_names_from_expr(v, &mut walrus_targets, self.interner); } for generator in &generators { // Note: we don't scan iter expressions here because walrus in iterable is not allowed for cond in &generator.ifs { collect_assigned_names_from_expr(cond, &mut walrus_targets, self.interner); } } // Pre-allocate slots for walrus targets in the enclosing scope for name in &walrus_targets { if !self.name_map.contains_key(name) { let slot = NamespaceId::new(self.namespace_size); self.namespace_size += 1; self.name_map.insert(name.clone(), slot); self.names_assigned_in_order.insert(name.clone()); } } // Save current scope state for isolation let saved_name_map = self.name_map.clone(); let saved_assigned_names = self.names_assigned_in_order.clone(); let saved_free_var_map = self.free_var_map.clone(); let saved_cell_var_map = self.cell_var_map.clone(); let saved_enclosing_locals = self.enclosing_locals.clone(); let saved_unassigned_ref_names = self.unassigned_ref_names.clone(); // Step 1: Prepare first generator's iter in enclosing scope (before any shadowing) let mut generators_iter = generators.into_iter(); let first_gen = generators_iter .next() .expect("comprehension must have at least one generator"); let first_iter = self.prepare_expression(first_gen.iter)?; // Step 2: Collect and shadow ALL loop variable names from ALL generators. // This must happen BEFORE evaluating any subsequent generator's iter expression. // We allocate slots but don't mark them as "assigned" yet - this causes // UnboundLocalError if a later generator's iter references an earlier-declared // but not-yet-assigned loop variable. let first_target = self.prepare_unpack_target_for_comprehension(first_gen.target); // Collect remaining generators so we can pre-shadow their targets let remaining_gens: Vec = generators_iter.collect(); // Pre-shadow ALL remaining loop variables before evaluating their iters. // This is the key CPython behavior: all loop vars are local to the comprehension, // so referencing a later loop var in an earlier iter raises UnboundLocalError. let mut preshadowed_targets: Vec = Vec::with_capacity(remaining_gens.len()); for generator in &remaining_gens { preshadowed_targets.push(self.prepare_unpack_target_shadow_only(generator.target.clone())); } // Prepare first generator's filters (can see first loop variable) let first_ifs = first_gen .ifs .into_iter() .map(|cond| self.prepare_expression(cond)) .collect::, _>>()?; let mut prepared_generators = Vec::with_capacity(1 + remaining_gens.len()); prepared_generators.push(Comprehension { target: first_target, iter: first_iter, ifs: first_ifs, }); // Step 3: Process remaining generators - their iters now see all loop vars as local for (generator, preshadowed_target) in remaining_gens.into_iter().zip(preshadowed_targets) { let iter = self.prepare_expression(generator.iter)?; let ifs = generator .ifs .into_iter() .map(|cond| self.prepare_expression(cond)) .collect::, _>>()?; prepared_generators.push(Comprehension { target: preshadowed_target, iter, ifs, }); } // Prepare the element expression(s) - can see all loop variables let prepared_elt = match elt { Some(e) => Some(self.prepare_expression(e)?), None => None, }; let prepared_key_value = match key_value { Some((k, v)) => Some((self.prepare_expression(k)?, self.prepare_expression(v)?)), None => None, }; // Restore scope state - loop variables do not leak to enclosing scope self.name_map = saved_name_map; self.names_assigned_in_order = saved_assigned_names; self.free_var_map = saved_free_var_map; self.cell_var_map = saved_cell_var_map; self.enclosing_locals = saved_enclosing_locals; self.unassigned_ref_names = saved_unassigned_ref_names; Ok((prepared_generators, prepared_elt, prepared_key_value)) } /// Prepares an unpack target by resolving identifiers recursively. /// /// Handles both single identifiers and nested tuples like `(a, b), c`. fn prepare_unpack_target(&mut self, target: UnpackTarget) -> UnpackTarget { match target { UnpackTarget::Name(ident) => { self.names_assigned_in_order .insert(self.interner.get_str(ident.name_id).to_string()); UnpackTarget::Name(self.get_id(ident).0) } UnpackTarget::Starred(ident) => { self.names_assigned_in_order .insert(self.interner.get_str(ident.name_id).to_string()); UnpackTarget::Starred(self.get_id(ident).0) } UnpackTarget::Tuple { targets, position } => { let resolved_targets: Vec = targets .into_iter() .map(|t| self.prepare_unpack_target(t)) // Recursive call .collect(); UnpackTarget::Tuple { targets: resolved_targets, position, } } } } /// Prepares an unpack target for comprehension by allocating fresh namespace slots. /// /// Unlike regular unpack targets, comprehension targets need new slots to shadow /// any existing bindings with the same name. fn prepare_unpack_target_for_comprehension(&mut self, target: UnpackTarget) -> UnpackTarget { match target { UnpackTarget::Name(ident) => { let name_str = self.interner.get_str(ident.name_id).to_string(); let comp_var_id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; // Shadow any existing binding self.shadow_for_comprehension(&name_str, comp_var_id); UnpackTarget::Name(Identifier::new_with_scope( ident.name_id, ident.position, comp_var_id, NameScope::Local, )) } UnpackTarget::Starred(ident) => { let name_str = self.interner.get_str(ident.name_id).to_string(); let comp_var_id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; // Shadow any existing binding self.shadow_for_comprehension(&name_str, comp_var_id); UnpackTarget::Starred(Identifier::new_with_scope( ident.name_id, ident.position, comp_var_id, NameScope::Local, )) } UnpackTarget::Tuple { targets, position } => { let resolved_targets: Vec = targets .into_iter() .map(|t| self.prepare_unpack_target_for_comprehension(t)) // Recursive call .collect(); UnpackTarget::Tuple { targets: resolved_targets, position, } } } } /// Pre-shadows an unpack target for comprehension scoping. /// /// Allocates namespace slots without marking as assigned, causing UnboundLocalError /// if accessed before assignment. fn prepare_unpack_target_shadow_only(&mut self, target: UnpackTarget) -> UnpackTarget { match target { UnpackTarget::Name(ident) => { let name_str = self.interner.get_str(ident.name_id).to_string(); let comp_var_id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; // Shadow but do NOT add to names_assigned_in_order yet self.name_map.insert(name_str.clone(), comp_var_id); self.free_var_map.remove(&name_str); self.cell_var_map.remove(&name_str); if let Some(ref mut enclosing) = self.enclosing_locals { enclosing.remove(&name_str); } UnpackTarget::Name(Identifier::new_with_scope( ident.name_id, ident.position, comp_var_id, NameScope::Local, )) } UnpackTarget::Starred(ident) => { let name_str = self.interner.get_str(ident.name_id).to_string(); let comp_var_id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; // Shadow but do NOT add to names_assigned_in_order yet self.name_map.insert(name_str.clone(), comp_var_id); self.free_var_map.remove(&name_str); self.cell_var_map.remove(&name_str); if let Some(ref mut enclosing) = self.enclosing_locals { enclosing.remove(&name_str); } UnpackTarget::Starred(Identifier::new_with_scope( ident.name_id, ident.position, comp_var_id, NameScope::Local, )) } UnpackTarget::Tuple { targets, position } => { let resolved_targets: Vec = targets .into_iter() .map(|t| self.prepare_unpack_target_shadow_only(t)) // Recursive call .collect(); UnpackTarget::Tuple { targets: resolved_targets, position, } } } } /// Shadows a name in all scope maps for comprehension isolation. /// /// This ensures the comprehension loop variable takes precedence over any /// variable with the same name from enclosing scopes. fn shadow_for_comprehension(&mut self, name_str: &str, comp_var_id: NamespaceId) { // The lookup order in get_id is: global_declarations, free_var_map, cell_var_map, // assigned_names, enclosing_locals, then name_map. So we must update/remove from all maps // checked before name_map to ensure the comprehension variable shadows any captured // variable with the same name. self.name_map.insert(name_str.to_string(), comp_var_id); self.names_assigned_in_order.insert(name_str.to_string()); self.free_var_map.remove(name_str); self.cell_var_map.remove(name_str); // Also remove from enclosing_locals to prevent get_id from re-capturing the variable if let Some(ref mut enclosing) = self.enclosing_locals { enclosing.remove(name_str); } } /// Prepares a function definition using a two-pass approach for correct scope resolution. /// /// Pass 1: Scan the function body to collect: /// - Names declared as `global` /// - Names declared as `nonlocal` /// - Names that are assigned (these are local unless declared global/nonlocal) /// /// Pass 2: Prepare the function body with the scope information from pass 1. /// /// # Closure Analysis /// /// When the nested function uses `nonlocal` declarations, those names must exist /// in an enclosing scope. The enclosing scope's variable becomes a cell_var /// (stored in a heap cell), and the nested function captures it as a free_var. fn prepare_function_def( &mut self, name: Identifier, parsed_sig: &ParsedSignature, body: Vec, is_async: bool, ) -> Result { // Register the function name in the current scope let (name, _) = self.get_id(name); // Extract param names from the parsed signature for scope analysis let param_names: Vec = parsed_sig.param_names().collect(); // Pass 1: Collect scope information from the function body let scope_info = collect_function_scope_info(&body, ¶m_names, self.interner); // Get the global name map to pass to the function preparer // At module level, use our own name_map; otherwise use the inherited global_name_map let global_name_map = if self.is_module_scope { self.name_map.clone() } else { self.global_name_map.clone().unwrap_or_default() }; // Build enclosing_locals: names that are local to this scope (including params) // These are available for `nonlocal` declarations in nested functions let enclosing_locals: AHashSet = if self.is_module_scope { // At module level, there are no enclosing locals for nonlocal // (module-level variables are accessed via `global`, not `nonlocal`) AHashSet::new() } else { // In a function: our params + assigned_names + existing name_map keys // are all potentially available as enclosing locals let mut locals = self.assigned_names.clone(); for key in self.name_map.keys() { locals.insert(key.clone()); } locals }; // Filter potential_captures to get actual implicit captures. // Only names that are ALSO in enclosing_locals are true implicit captures. // Names NOT in enclosing_locals are either builtins or globals (handled at runtime). let implicit_captures: AHashSet = scope_info .potential_captures .into_iter() .filter(|name| enclosing_locals.contains(name)) .collect(); // Pass 2: Create child preparer for function body with scope info let mut inner_prepare = Prepare::new_function( body.len(), ¶m_names, scope_info.assigned_names, scope_info.global_names, scope_info.nonlocal_names, implicit_captures, global_name_map, Some(enclosing_locals), scope_info.cell_var_names, self.interner, ); // Prepare the function body let prepared_body = inner_prepare.prepare_nodes(body)?; // Mark variables that the inner function captures as our cell_vars // These are the names that appear in inner_prepare.free_var_map // Add to cell_var_map if not already present (may have been pre-populated or added earlier) for captured_name in inner_prepare.free_var_map.keys() { if !self.cell_var_map.contains_key(captured_name) && !self.free_var_map.contains_key(captured_name) { // Only add to cell_var_map if not already a free_var (pass-through case) // Allocate a namespace slot for the cell reference let slot = match self.name_map.entry(captured_name.clone()) { Entry::Occupied(e) => *e.get(), Entry::Vacant(e) => { let slot = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(slot); slot } }; self.cell_var_map.insert(captured_name.clone(), slot); } } // Build free_var_enclosing_slots: enclosing namespace slots for captured variables // At call time, cells are pushed sequentially, so we only need the enclosing slots. // Sort by our slot index to ensure consistent ordering (matches namespace layout). let mut free_var_entries: Vec<_> = inner_prepare.free_var_map.into_iter().collect(); free_var_entries.sort_by_key(|(_, our_slot)| *our_slot); let free_var_enclosing_slots: Vec = free_var_entries .into_iter() .map(|(var_name, _our_slot)| { // Determine the namespace slot in the enclosing scope where the cell reference lives: // - If it's in cell_var_map, it's a cell we own (allocated in this scope) // - If it's in free_var_map, it's a cell we captured from further up // - Otherwise, this is a prepare-time bug if let Some(&slot) = self.cell_var_map.get(&var_name) { slot } else if let Some(&slot) = self.free_var_map.get(&var_name) { slot } else { panic!("free_var '{var_name}' not found in enclosing scope's cell_var_map or free_var_map"); } }) .collect(); // cell_var_count: number of cells to create at call time for variables captured by nested functions // Slots are implicitly params.len()..params.len()+cell_var_count in the namespace layout let cell_var_count = inner_prepare.cell_var_map.len(); let namespace_size = inner_prepare.namespace_size; // Build cell_param_indices: maps cell indices to parameter indices for captured parameters. // When a parameter is captured by a nested function, we need to copy its value into the cell. let cell_param_indices: Vec> = if cell_var_count == 0 { Vec::new() } else { // Build a map from param name (String) to param index let param_name_to_index: AHashMap = param_names .iter() .enumerate() .map(|(idx, &name_id)| (self.interner.get_str(name_id).to_string(), idx)) .collect(); // Sort cell_var_map entries by slot to get cells in order let mut cell_entries: Vec<_> = inner_prepare.cell_var_map.iter().collect(); cell_entries.sort_by_key(|&(_, slot)| slot); // For each cell (in slot order), check if it's a parameter cell_entries .into_iter() .map(|(name, _slot)| param_name_to_index.get(name).copied()) .collect() }; // Build the runtime Signature from the parsed signature let pos_args: Vec = parsed_sig.pos_args.iter().map(|p| p.name).collect(); let pos_defaults_count = parsed_sig.pos_args.iter().filter(|p| p.default.is_some()).count(); let args: Vec = parsed_sig.args.iter().map(|p| p.name).collect(); let arg_defaults_count = parsed_sig.args.iter().filter(|p| p.default.is_some()).count(); let mut kwargs: Vec = Vec::with_capacity(parsed_sig.kwargs.len()); let mut kwarg_default_map: Vec> = Vec::with_capacity(parsed_sig.kwargs.len()); let mut kwarg_default_index = 0; for param in &parsed_sig.kwargs { kwargs.push(param.name); if param.default.is_some() { kwarg_default_map.push(Some(kwarg_default_index)); kwarg_default_index += 1; } else { kwarg_default_map.push(None); } } let signature = Signature::new( pos_args, pos_defaults_count, args, arg_defaults_count, parsed_sig.var_args, kwargs, kwarg_default_map, parsed_sig.var_kwargs, ); // Collect and prepare default expressions in order: pos_args -> args -> kwargs // Only includes parameters that actually have defaults. let mut default_exprs = Vec::with_capacity(signature.total_defaults_count()); for param in &parsed_sig.pos_args { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } for param in &parsed_sig.args { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } for param in &parsed_sig.kwargs { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } // Return the prepared function definition inline in the AST Ok(Node::FunctionDef(PreparedFunctionDef { name, signature, body: prepared_body, namespace_size, free_var_enclosing_slots, cell_var_count, cell_param_indices, default_exprs, is_async, })) } /// Prepares a lambda expression, converting it into a prepared function definition. /// /// Lambdas are essentially anonymous functions with an implicit return of their body /// expression. This method follows the same preparation logic as `prepare_function_def` /// but: /// - Uses `` as the function name (not registered in scope) /// - Wraps the body expression as `Node::Return(body)` /// - Returns `ExprLoc` with `Expr::Lambda` instead of `PreparedNode` fn prepare_lambda( &mut self, lambda_name_id: StringId, parsed_sig: &ParsedSignature, body: &ExprLoc, position: CodeRange, ) -> Result { // Create a synthetic name identifier (not registered in scope) let lambda_name = Identifier::new_with_scope( lambda_name_id, position, NamespaceId::new(0), // Placeholder, not actually used for storage NameScope::Local, ); // Wrap the body expression as a return statement for scope analysis let body_as_node: ParseNode = Node::Return(body.clone()); let body_nodes = vec![body_as_node]; // Extract param names from the parsed signature for scope analysis let param_names: Vec = parsed_sig.param_names().collect(); // Pass 1: Collect scope information from the lambda body // (Lambdas can't have global/nonlocal declarations, but can have nested functions) let scope_info = collect_function_scope_info(&body_nodes, ¶m_names, self.interner); // Get the global name map to pass to the function preparer let global_name_map = if self.is_module_scope { self.name_map.clone() } else { self.global_name_map.clone().unwrap_or_default() }; // Build enclosing_locals: names that are local to this scope or captured from enclosing scope. // This includes free_vars so that nested lambdas can capture pass-through variables. let enclosing_locals: AHashSet = if self.is_module_scope { AHashSet::new() } else { let mut locals = self.assigned_names.clone(); for key in self.name_map.keys() { locals.insert(key.clone()); } // Include free_vars so nested functions/lambdas can capture pass-through variables for key in self.free_var_map.keys() { locals.insert(key.clone()); } locals }; // Filter potential_captures to get actual implicit captures let implicit_captures: AHashSet = scope_info .potential_captures .into_iter() .filter(|name| enclosing_locals.contains(name)) .collect(); // Pass 2: Create child preparer for lambda body with scope info let mut inner_prepare = Prepare::new_function( body_nodes.len(), ¶m_names, scope_info.assigned_names, scope_info.global_names, scope_info.nonlocal_names, implicit_captures, global_name_map, Some(enclosing_locals), scope_info.cell_var_names, self.interner, ); // Prepare the lambda body let prepared_body = inner_prepare.prepare_nodes(body_nodes)?; // Mark variables that the inner function captures as our cell_vars for captured_name in inner_prepare.free_var_map.keys() { if !self.cell_var_map.contains_key(captured_name) && !self.free_var_map.contains_key(captured_name) { let slot = match self.name_map.entry(captured_name.clone()) { Entry::Occupied(e) => *e.get(), Entry::Vacant(e) => { let slot = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(slot); slot } }; self.cell_var_map.insert(captured_name.clone(), slot); } } // Build free_var_enclosing_slots let mut free_var_entries: Vec<_> = inner_prepare.free_var_map.into_iter().collect(); free_var_entries.sort_by_key(|(_, our_slot)| *our_slot); let free_var_enclosing_slots: Vec = free_var_entries .into_iter() .map(|(var_name, _our_slot)| { if let Some(&slot) = self.cell_var_map.get(&var_name) { slot } else if let Some(&slot) = self.free_var_map.get(&var_name) { slot } else { panic!("free_var '{var_name}' not found in enclosing scope's cell_var_map or free_var_map"); } }) .collect(); // Build cell_param_indices let cell_var_count = inner_prepare.cell_var_map.len(); let namespace_size = inner_prepare.namespace_size; let cell_param_indices: Vec> = if cell_var_count == 0 { Vec::new() } else { let param_name_to_index: AHashMap = param_names .iter() .enumerate() .map(|(idx, &name_id)| (self.interner.get_str(name_id).to_string(), idx)) .collect(); let mut cell_entries: Vec<_> = inner_prepare.cell_var_map.iter().collect(); cell_entries.sort_by_key(|&(_, slot)| slot); cell_entries .into_iter() .map(|(name, _slot)| param_name_to_index.get(name).copied()) .collect() }; // Build the runtime Signature from the parsed signature let pos_args: Vec = parsed_sig.pos_args.iter().map(|p| p.name).collect(); let pos_defaults_count = parsed_sig.pos_args.iter().filter(|p| p.default.is_some()).count(); let args: Vec = parsed_sig.args.iter().map(|p| p.name).collect(); let arg_defaults_count = parsed_sig.args.iter().filter(|p| p.default.is_some()).count(); let mut kwargs: Vec = Vec::with_capacity(parsed_sig.kwargs.len()); let mut kwarg_default_map: Vec> = Vec::with_capacity(parsed_sig.kwargs.len()); let mut kwarg_default_index = 0; for param in &parsed_sig.kwargs { kwargs.push(param.name); if param.default.is_some() { kwarg_default_map.push(Some(kwarg_default_index)); kwarg_default_index += 1; } else { kwarg_default_map.push(None); } } let signature = Signature::new( pos_args, pos_defaults_count, args, arg_defaults_count, parsed_sig.var_args, kwargs, kwarg_default_map, parsed_sig.var_kwargs, ); // Collect and prepare default expressions (evaluated in enclosing scope) let mut default_exprs = Vec::with_capacity(signature.total_defaults_count()); for param in &parsed_sig.pos_args { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } for param in &parsed_sig.args { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } for param in &parsed_sig.kwargs { if let Some(ref expr) = param.default { default_exprs.push(self.prepare_expression(expr.clone())?); } } // Create the prepared function definition (lambdas are never async) let func_def = PreparedFunctionDef { name: lambda_name, signature, body: prepared_body, namespace_size, free_var_enclosing_slots, cell_var_count, cell_param_indices, default_exprs, is_async: false, }; Ok(ExprLoc::new( position, Expr::Lambda { func_def: Box::new(func_def), }, )) } /// Resolves an identifier to its namespace index and scope, creating a new entry if needed. /// /// TODO This whole implementation seems ugly at best. /// /// This is the core name resolution mechanism with scope-aware resolution: /// /// **At module level:** All names go to the local namespace (which IS the global namespace). /// /// **In functions:** /// - If name is declared `global` → resolve to global namespace /// - If name is declared `nonlocal` → resolve to enclosing scope via Cell /// - If name is assigned in this function → resolve to local namespace /// - If name exists in global namespace (read-only access) → resolve to global namespace /// - Otherwise → resolve to local namespace (will be NameError at runtime) /// /// # Returns /// A tuple of (resolved Identifier with id and scope set, whether this is a new local name). fn get_id(&mut self, ident: Identifier) -> (Identifier, bool) { let name_str = self.interner.get_str(ident.name_id); // At module level, all names are local (which is also the global namespace). // The compiler emits global opcodes for these, so the VM reads/writes // directly from the globals array rather than the stack. if self.is_module_scope { return match self.name_map.entry(name_str.to_string()) { Entry::Occupied(e) => { // Name already exists (from prior reference or pre-registered). // Determine scope the same way as for vacant entries: if the name // has been assigned so far, it's a true local; otherwise it's an // unassigned reference that should yield NameLookup at runtime. let scope = if self.names_assigned_in_order.contains(name_str) { NameScope::Local } else { NameScope::LocalUnassigned }; ( Identifier::new_with_scope(ident.name_id, ident.position, *e.get(), scope), false, ) } Entry::Vacant(e) => { let id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(id); // Determine scope: if the name is assigned somewhere (even later in the file), // it's a true local that will raise UnboundLocalError if accessed before assignment. // If the name is never assigned, it's an undefined reference that raises NameError. let scope = if self.names_assigned_in_order.contains(name_str) { NameScope::Local } else { NameScope::LocalUnassigned }; ( Identifier::new_with_scope(ident.name_id, ident.position, id, scope), true, ) } }; } // In a function: determine scope based on global_names, nonlocal_names, assigned_names, global_name_map // 1. Check if declared `global` if self.global_names.contains(name_str) { if let Some(ref global_map) = self.global_name_map && let Some(&global_id) = global_map.get(name_str) { // Name exists in global namespace return ( Identifier::new_with_scope(ident.name_id, ident.position, global_id, NameScope::Global), false, ); } // Declared global but doesn't exist yet - it will be created when assigned // For now, we still need a global index. We'll use a placeholder approach: // allocate in global namespace (this is a simplification - in real Python, // the global would be created at module level when first assigned) // For our implementation, we'll resolve to global but the variable won't exist until assigned. // Return a "new" global - but we can't modify global_name_map here. // For simplicity, we'll resolve to local with Global scope - runtime will handle the lookup. let (id, is_new) = match self.name_map.entry(name_str.to_string()) { Entry::Occupied(e) => (*e.get(), false), Entry::Vacant(e) => { let id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(id); (id, true) } }; // Mark as Global scope - runtime will need to handle this specially return ( Identifier::new_with_scope(ident.name_id, ident.position, id, NameScope::Global), is_new, ); } // 2. Check if captured from enclosing scope (nonlocal declaration or implicit capture) // free_var_map stores namespace slot indices where the cell reference will be stored if let Some(&slot) = self.free_var_map.get(name_str) { // At runtime, the cell reference is in namespace[slot] as Value::Ref(cell_id) return ( Identifier::new_with_scope(ident.name_id, ident.position, slot, NameScope::Cell), false, // Not a new local - it's captured from enclosing scope ); } // 3. Check if this is a cell variable (captured by nested functions) // cell_var_map stores namespace slot indices where the cell reference will be stored // At call time, a cell is created and stored as Value::Ref(cell_id) at this slot if let Some(&slot) = self.cell_var_map.get(name_str) { // The namespace slot was already allocated when cell_var_map was populated return ( Identifier::new_with_scope(ident.name_id, ident.position, slot, NameScope::Cell), false, // Not a "new" local - it's a cell variable ); } // 4. Check if assigned in this function (local variable) if self.assigned_names.contains(name_str) { let (id, is_new) = match self.name_map.entry(name_str.to_string()) { Entry::Occupied(e) => (*e.get(), false), Entry::Vacant(e) => { let id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(id); (id, true) } }; return ( Identifier::new_with_scope(ident.name_id, ident.position, id, NameScope::Local), is_new, ); } // 5. Check if name was pre-populated in name_map (from function parameters) // This ensures parameters shadow both enclosing locals and global variables // with the same name. Parameters are added to name_map during // FunctionScope::new_function() but are NOT in assigned_names (since they're // not assigned in the function body). This MUST be checked before // enclosing_locals, otherwise a parameter like `def inner(x)` would be // incorrectly resolved as a closure capture when an outer scope also has `x`. // Excludes names tracked in `unassigned_ref_names` — those were added to // `name_map` by step 8 as `LocalUnassigned` references and must stay that way // to trigger NameLookup at runtime (e.g., for external function resolution). if !self.unassigned_ref_names.contains(name_str) && let Some(&id) = self.name_map.get(name_str) { return ( Identifier::new_with_scope(ident.name_id, ident.position, id, NameScope::Local), false, // Not new - was pre-populated from parameters ); } // 6. Check if exists in enclosing scope (implicit closure capture) // This handles reading variables from enclosing functions without explicit `nonlocal` if let Some(ref enclosing) = self.enclosing_locals && enclosing.contains(name_str) { // This is an implicit capture - add to free_var_map with a namespace slot let slot = if let Some(&existing_slot) = self.free_var_map.get(name_str) { existing_slot } else { // Allocate a namespace slot for this free variable let slot = NamespaceId::new(self.namespace_size); self.namespace_size += 1; self.name_map.insert(name_str.to_string(), slot); self.free_var_map.insert(name_str.to_string(), slot); slot }; return ( Identifier::new_with_scope(ident.name_id, ident.position, slot, NameScope::Cell), false, // Not a new local - it's captured from enclosing scope ); } // 7. Check if exists in global namespace (implicit global read) if let Some(ref global_map) = self.global_name_map && let Some(&global_id) = global_map.get(name_str) { return ( Identifier::new_with_scope(ident.name_id, ident.position, global_id, NameScope::Global), false, ); } // 8. Name not found anywhere - allocate a local slot (will be NameError at runtime) // This handles names that are only read (never assigned) and don't exist globally. // We allocate a local slot that will never be written to. // Mark as LocalUnassigned so runtime raises NameError (not UnboundLocalError). // Track in `unassigned_ref_names` so step 6 doesn't treat subsequent references // as `Local` (parameters). self.unassigned_ref_names.insert(name_str.to_string()); let (id, is_new) = match self.name_map.entry(name_str.to_string()) { Entry::Occupied(e) => (*e.get(), false), Entry::Vacant(e) => { let id = NamespaceId::new(self.namespace_size); self.namespace_size += 1; e.insert(id); (id, true) } }; ( Identifier::new_with_scope(ident.name_id, ident.position, id, NameScope::LocalUnassigned), is_new, ) } /// Prepares an f-string part by resolving names in interpolated expressions. fn prepare_fstring_part(&mut self, part: FStringPart) -> Result { match part { FStringPart::Literal(s) => Ok(FStringPart::Literal(s)), FStringPart::Interpolation { expr, conversion, format_spec, debug_prefix, } => { let prepared_expr = Box::new(self.prepare_expression(*expr)?); let prepared_spec = match format_spec { Some(FormatSpec::Static(s)) => Some(FormatSpec::Static(s)), Some(FormatSpec::Dynamic(parts)) => { let prepared = parts .into_iter() .map(|p| self.prepare_fstring_part(p)) .collect::, _>>()?; Some(FormatSpec::Dynamic(prepared)) } None => None, }; Ok(FStringPart::Interpolation { expr: prepared_expr, conversion, format_spec: prepared_spec, debug_prefix, }) } } } } /// Information collected from first-pass scan of a function body. /// /// This struct holds the scope-related information needed for the second pass /// of function preparation and for closure analysis. struct FunctionScopeInfo { /// Names declared as `global` global_names: AHashSet, /// Names declared as `nonlocal` nonlocal_names: AHashSet, /// Names that are assigned in this scope assigned_names: AHashSet, /// Names that are captured by nested functions (must be stored in cells) cell_var_names: AHashSet, /// Names that are referenced but not local, global, or nonlocal. /// These are POTENTIAL implicit captures - they may be captures from an enclosing function /// OR they may be builtin/global reads. The actual implicit captures are determined /// by filtering against enclosing_locals in new_function. potential_captures: AHashSet, } /// Scans a function body to collect scope information (first phase of preparation). /// /// This function performs three passes over the AST: /// 1. Collect global, nonlocal, and assigned names /// 2. Identify cell_vars (names captured by nested functions) /// 3. Collect potential implicit captures (referenced but not local/global/nonlocal) /// /// The collected information includes: /// - Names declared as `global` (from Global statements) /// - Names declared as `nonlocal` (from Nonlocal statements) /// - Names that are assigned (from Assign, OpAssign, For targets, etc.) /// - Names that are captured by nested functions (cell_var_names) /// - Names that might be captured from enclosing scope (potential_captures) /// /// This information is used to determine whether each name reference should resolve /// to the local namespace, global namespace, or an enclosing scope via cells. fn collect_function_scope_info( nodes: &[ParseNode], params: &[StringId], interner: &InternerBuilder, ) -> FunctionScopeInfo { let mut global_names = AHashSet::new(); let mut nonlocal_names = AHashSet::new(); let mut assigned_names = AHashSet::new(); let mut cell_var_names = AHashSet::new(); let mut referenced_names = AHashSet::new(); // First pass: collect global, nonlocal, and assigned names for node in nodes { collect_scope_info_from_node( node, &mut global_names, &mut nonlocal_names, &mut assigned_names, interner, ); } // Build the set of our locals: params + assigned_names (excluding globals) let param_names: AHashSet = params .iter() .map(|string_id| interner.get_str(*string_id).to_string()) .collect(); let our_locals: AHashSet = param_names .iter() .cloned() .chain(assigned_names.iter().cloned()) .filter(|name| !global_names.contains(name)) .collect(); // Second pass: find what nested functions capture from us for node in nodes { collect_cell_vars_from_node(node, &our_locals, &mut cell_var_names, interner); } // Third pass: collect all referenced names to identify potential implicit captures. // These are names that might be captured from an enclosing function scope. // We can't fully determine implicit captures here because we don't know yet what // the enclosing scope's locals are - that's determined later when we call new_function. for node in nodes { collect_referenced_names_from_node(node, &mut referenced_names, interner); } // Potential implicit captures are names that are: // - Referenced in the function body // - Not local (not params, not assigned) // - Not declared global // - Not declared nonlocal (those are handled separately) // The actual implicit captures will be filtered against enclosing_locals in new_function. let potential_captures: AHashSet = referenced_names .into_iter() .filter(|name| !our_locals.contains(name) && !global_names.contains(name) && !nonlocal_names.contains(name)) .collect(); FunctionScopeInfo { global_names, nonlocal_names, assigned_names, cell_var_names, potential_captures, } } /// Helper to collect scope info from a single node. fn collect_scope_info_from_node( node: &ParseNode, global_names: &mut AHashSet, nonlocal_names: &mut AHashSet, assigned_names: &mut AHashSet, interner: &InternerBuilder, ) { match node { Node::Global { names, .. } => { for string_id in names { global_names.insert(interner.get_str(*string_id).to_string()); } } Node::Nonlocal { names, .. } => { for string_id in names { nonlocal_names.insert(interner.get_str(*string_id).to_string()); } } Node::Assign { target, object } => { assigned_names.insert(interner.get_str(target.name_id).to_string()); // Scan value expression for walrus operators collect_assigned_names_from_expr(object, assigned_names, interner); } Node::UnpackAssign { targets, object, .. } => { // Recursively collect all names from nested unpack targets for target in targets { collect_names_from_unpack_target(target, assigned_names, interner); } // Scan value expression for walrus operators collect_assigned_names_from_expr(object, assigned_names, interner); } Node::OpAssign { target, object, .. } => { assigned_names.insert(interner.get_str(target.name_id).to_string()); // Scan value expression for walrus operators collect_assigned_names_from_expr(object, assigned_names, interner); } Node::SubscriptOpAssign { index, object, .. } => { collect_assigned_names_from_expr(index, assigned_names, interner); collect_assigned_names_from_expr(object, assigned_names, interner); } Node::SubscriptAssign { index, value, .. } => { // Subscript assignment doesn't create a new name, it modifies existing container // But scan expressions for walrus operators collect_assigned_names_from_expr(index, assigned_names, interner); collect_assigned_names_from_expr(value, assigned_names, interner); } Node::AttrAssign { object, value, .. } => { // Attribute assignment doesn't create a new name, it modifies existing object // But scan expressions for walrus operators collect_assigned_names_from_expr(object, assigned_names, interner); collect_assigned_names_from_expr(value, assigned_names, interner); } Node::For { target, iter, body, or_else, } => { // For loop target is assigned - collect all names from the target collect_names_from_unpack_target(target, assigned_names, interner); // Scan iter expression for walrus operators collect_assigned_names_from_expr(iter, assigned_names, interner); // Recurse into body and else for n in body { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } for n in or_else { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } Node::While { test, body, or_else } => { // Scan test expression for walrus operators collect_assigned_names_from_expr(test, assigned_names, interner); // Recurse into body and else blocks for n in body { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } for n in or_else { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } Node::If { test, body, or_else } => { // Scan test expression for walrus operators collect_assigned_names_from_expr(test, assigned_names, interner); // Recurse into branches for n in body { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } for n in or_else { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } Node::FunctionDef(RawFunctionDef { name, .. }) => { // Function definition creates a local binding for the function name // But we don't recurse into the function body - that's a separate scope assigned_names.insert(interner.get_str(name.name_id).to_string()); } Node::Try(Try { body, handlers, or_else, finally, }) => { // Recurse into all blocks for n in body { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } for handler in handlers { // Exception variable name is assigned if let Some(ref name) = handler.name { assigned_names.insert(interner.get_str(name.name_id).to_string()); } for n in &handler.body { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } for n in or_else { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } for n in finally { collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } // Import creates a binding for the module name (or alias) Node::Import { binding, .. } => { assigned_names.insert(interner.get_str(binding.name_id).to_string()); } // ImportFrom creates bindings for each imported name (or alias) Node::ImportFrom { names, .. } => { for (_import_name, binding) in names { assigned_names.insert(interner.get_str(binding.name_id).to_string()); } } // Statements with expressions that may contain walrus operators Node::Expr(expr) | Node::Return(expr) => { collect_assigned_names_from_expr(expr, assigned_names, interner); } Node::Raise(Some(expr)) => { collect_assigned_names_from_expr(expr, assigned_names, interner); } Node::Assert { test, msg } => { collect_assigned_names_from_expr(test, assigned_names, interner); if let Some(m) = msg { collect_assigned_names_from_expr(m, assigned_names, interner); } } // These don't create new names Node::Pass | Node::ReturnNone | Node::Raise(None) | Node::Break { .. } | Node::Continue { .. } => {} } } /// Collects names assigned by walrus operators (`:=`) within an expression. /// /// Per PEP 572, walrus operator targets are assignments in the enclosing scope. /// This function recursively scans expressions to find all `Named` expression targets. /// It does NOT recurse into lambda bodies as those have their own scope. fn collect_assigned_names_from_expr(expr: &ExprLoc, assigned_names: &mut AHashSet, interner: &InternerBuilder) { match &expr.expr { Expr::Named { target, value } => { // The target of a walrus operator is assigned in this scope assigned_names.insert(interner.get_str(target.name_id).to_string()); // Also scan the value expression collect_assigned_names_from_expr(value, assigned_names, interner); } // Recurse into sub-expressions Expr::List(items) | Expr::Tuple(items) | Expr::Set(items) => { for item in items { let expr = match item { SequenceItem::Value(e) | SequenceItem::Unpack(e) => e, }; collect_assigned_names_from_expr(expr, assigned_names, interner); } } Expr::Dict(dict_items) => { for item in dict_items { match item { DictItem::Pair(key, value) => { collect_assigned_names_from_expr(key, assigned_names, interner); collect_assigned_names_from_expr(value, assigned_names, interner); } DictItem::Unpack(e) => collect_assigned_names_from_expr(e, assigned_names, interner), } } } Expr::Op { left, right, .. } | Expr::CmpOp { left, right, .. } => { collect_assigned_names_from_expr(left, assigned_names, interner); collect_assigned_names_from_expr(right, assigned_names, interner); } Expr::ChainCmp { left, comparisons } => { collect_assigned_names_from_expr(left, assigned_names, interner); for (_, expr) in comparisons { collect_assigned_names_from_expr(expr, assigned_names, interner); } } Expr::Not(operand) | Expr::UnaryMinus(operand) | Expr::UnaryPlus(operand) | Expr::UnaryInvert(operand) | Expr::Await(operand) => { collect_assigned_names_from_expr(operand, assigned_names, interner); } Expr::Subscript { object, index } => { collect_assigned_names_from_expr(object, assigned_names, interner); collect_assigned_names_from_expr(index, assigned_names, interner); } Expr::Call { args, .. } => { collect_assigned_names_from_args(args, assigned_names, interner); } Expr::AttrCall { object, args, .. } => { collect_assigned_names_from_expr(object, assigned_names, interner); collect_assigned_names_from_args(args, assigned_names, interner); } Expr::IndirectCall { callable, args } => { collect_assigned_names_from_expr(callable, assigned_names, interner); collect_assigned_names_from_args(args, assigned_names, interner); } Expr::AttrGet { object, .. } => { collect_assigned_names_from_expr(object, assigned_names, interner); } Expr::IfElse { test, body, orelse } => { collect_assigned_names_from_expr(test, assigned_names, interner); collect_assigned_names_from_expr(body, assigned_names, interner); collect_assigned_names_from_expr(orelse, assigned_names, interner); } // Per PEP 572, walrus in comprehensions assigns to the ENCLOSING scope Expr::ListComp { elt, generators } | Expr::SetComp { elt, generators } => { collect_assigned_names_from_expr(elt, assigned_names, interner); for generator in generators { collect_assigned_names_from_expr(&generator.iter, assigned_names, interner); for cond in &generator.ifs { collect_assigned_names_from_expr(cond, assigned_names, interner); } } } Expr::DictComp { key, value, generators } => { collect_assigned_names_from_expr(key, assigned_names, interner); collect_assigned_names_from_expr(value, assigned_names, interner); for generator in generators { collect_assigned_names_from_expr(&generator.iter, assigned_names, interner); for cond in &generator.ifs { collect_assigned_names_from_expr(cond, assigned_names, interner); } } } Expr::FString(parts) => { for part in parts { if let FStringPart::Interpolation { expr, .. } = part { collect_assigned_names_from_expr(expr, assigned_names, interner); } } } Expr::Slice { lower, upper, step } => { if let Some(e) = lower { collect_assigned_names_from_expr(e, assigned_names, interner); } if let Some(e) = upper { collect_assigned_names_from_expr(e, assigned_names, interner); } if let Some(e) = step { collect_assigned_names_from_expr(e, assigned_names, interner); } } // Lambda bodies have their own scope - walrus inside them doesn't affect us Expr::LambdaRaw { .. } | Expr::Lambda { .. } => {} // Leaf expressions don't contain walrus operators Expr::Literal(_) | Expr::Builtin(_) | Expr::Name(_) => {} } } /// Helper to collect assigned names from argument expressions. fn collect_assigned_names_from_args( args: &ArgExprs, assigned_names: &mut AHashSet, interner: &InternerBuilder, ) { match args { ArgExprs::Empty => {} ArgExprs::One(arg) => collect_assigned_names_from_expr(arg, assigned_names, interner), ArgExprs::Two(arg1, arg2) => { collect_assigned_names_from_expr(arg1, assigned_names, interner); collect_assigned_names_from_expr(arg2, assigned_names, interner); } ArgExprs::Args(args) => { for arg in args { collect_assigned_names_from_expr(arg, assigned_names, interner); } } ArgExprs::Kwargs(kwargs) => { for kwarg in kwargs { collect_assigned_names_from_expr(&kwarg.value, assigned_names, interner); } } ArgExprs::ArgsKargs { args, kwargs, var_args, var_kwargs, } => { if let Some(args) = args { for arg in args { collect_assigned_names_from_expr(arg, assigned_names, interner); } } if let Some(kwargs) = kwargs { for kwarg in kwargs { collect_assigned_names_from_expr(&kwarg.value, assigned_names, interner); } } if let Some(var_args) = var_args { collect_assigned_names_from_expr(var_args, assigned_names, interner); } if let Some(var_kwargs) = var_kwargs { collect_assigned_names_from_expr(var_kwargs, assigned_names, interner); } } ArgExprs::GeneralizedCall { args, kwargs } => { for arg in args { match arg { CallArg::Value(e) | CallArg::Unpack(e) => { collect_assigned_names_from_expr(e, assigned_names, interner); } } } for kwarg in kwargs { match kwarg { CallKwarg::Named(kw) => { collect_assigned_names_from_expr(&kw.value, assigned_names, interner); } CallKwarg::Unpack(e) => { collect_assigned_names_from_expr(e, assigned_names, interner); } } } } } } /// Collects cell_vars by analyzing what nested functions capture from our scope. /// /// For each FunctionDef node, we recursively analyze its body to find what names it /// references. Any name that is in `our_locals` and referenced by the nested function /// (not as a local of the nested function) becomes a cell_var. fn collect_cell_vars_from_node( node: &ParseNode, our_locals: &AHashSet, cell_vars: &mut AHashSet, interner: &InternerBuilder, ) { match node { Node::FunctionDef(RawFunctionDef { signature, body, .. }) => { // Find what names are referenced inside this nested function let mut referenced = AHashSet::new(); for n in body { collect_referenced_names_from_node(n, &mut referenced, interner); } // Extract param names from signature for scope analysis let param_names: Vec = signature.param_names().collect(); // Collect the nested function's own locals (params + assigned) let nested_scope = collect_function_scope_info(body, ¶m_names, interner); // Any name that is: // - Referenced by the nested function // - Not a local of the nested function // - Not declared global in the nested function // - In our locals // becomes a cell_var for name in &referenced { if !nested_scope.assigned_names.contains(name) && !param_names.iter().any(|p| interner.get_str(*p) == name) && !nested_scope.global_names.contains(name) && our_locals.contains(name) { cell_vars.insert(name.clone()); } } // Also check what the nested function explicitly declares as nonlocal for name in &nested_scope.nonlocal_names { if our_locals.contains(name) { cell_vars.insert(name.clone()); } } } // Recurse into control flow structures Node::For { body, or_else, .. } => { for n in body { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } for n in or_else { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } Node::While { body, or_else, .. } => { for n in body { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } for n in or_else { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } Node::If { body, or_else, .. } => { for n in body { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } for n in or_else { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } Node::Try(Try { body, handlers, or_else, finally, }) => { for n in body { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } for handler in handlers { for n in &handler.body { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } for n in or_else { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } for n in finally { collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } // Handle expressions that may contain lambdas Node::Expr(expr) | Node::Return(expr) => { collect_cell_vars_from_expr(expr, our_locals, cell_vars, interner); } Node::Assign { object, .. } | Node::UnpackAssign { object, .. } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); } Node::OpAssign { object, .. } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); } Node::SubscriptOpAssign { index, object, .. } => { collect_cell_vars_from_expr(index, our_locals, cell_vars, interner); collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); } Node::SubscriptAssign { index, value, .. } => { collect_cell_vars_from_expr(index, our_locals, cell_vars, interner); collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); } Node::AttrAssign { object, value, .. } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); } // Other nodes don't contain nested function definitions or lambdas _ => {} } } /// Collects cell_vars from lambda expressions within an expression. /// /// Recursively searches through an expression tree to find lambda expressions /// that capture variables from the enclosing scope. fn collect_cell_vars_from_expr( expr: &ExprLoc, our_locals: &AHashSet, cell_vars: &mut AHashSet, interner: &InternerBuilder, ) { use crate::expressions::Expr; match &expr.expr { Expr::LambdaRaw { signature, body, .. } => { // This lambda captures variables from our scope // Find what names are referenced in the lambda body let mut referenced = AHashSet::new(); collect_referenced_names_from_expr(body, &mut referenced, interner); // Also collect from default expressions for param in &signature.pos_args { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, &mut referenced, interner); } } for param in &signature.args { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, &mut referenced, interner); } } for param in &signature.kwargs { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, &mut referenced, interner); } } // Extract param names from signature let param_names: Vec = signature.param_names().collect(); // Any name that is: // - Referenced by the lambda // - Not a param of the lambda // - In our locals // becomes a cell_var for name in &referenced { if !param_names.iter().any(|p| interner.get_str(*p) == name) && our_locals.contains(name) { cell_vars.insert(name.clone()); } } // Recursively check the lambda body for nested lambdas. // For nested lambdas, extend our_locals to include this lambda's parameters // so that inner lambdas can find them for closure capture. let mut extended_locals = our_locals.clone(); for param_id in ¶m_names { extended_locals.insert(interner.get_str(*param_id).to_string()); } collect_cell_vars_from_expr(body, &extended_locals, cell_vars, interner); } // Recurse into sub-expressions Expr::List(items) | Expr::Tuple(items) | Expr::Set(items) => { for item in items { let expr = match item { SequenceItem::Value(e) | SequenceItem::Unpack(e) => e, }; collect_cell_vars_from_expr(expr, our_locals, cell_vars, interner); } } Expr::Dict(dict_items) => { for item in dict_items { match item { DictItem::Pair(key, value) => { collect_cell_vars_from_expr(key, our_locals, cell_vars, interner); collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); } DictItem::Unpack(e) => collect_cell_vars_from_expr(e, our_locals, cell_vars, interner), } } } Expr::Op { left, right, .. } | Expr::CmpOp { left, right, .. } => { collect_cell_vars_from_expr(left, our_locals, cell_vars, interner); collect_cell_vars_from_expr(right, our_locals, cell_vars, interner); } Expr::ChainCmp { left, comparisons } => { collect_cell_vars_from_expr(left, our_locals, cell_vars, interner); for (_, expr) in comparisons { collect_cell_vars_from_expr(expr, our_locals, cell_vars, interner); } } Expr::Not(operand) | Expr::UnaryMinus(operand) | Expr::UnaryPlus(operand) | Expr::UnaryInvert(operand) => { collect_cell_vars_from_expr(operand, our_locals, cell_vars, interner); } Expr::Subscript { object, index } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); collect_cell_vars_from_expr(index, our_locals, cell_vars, interner); } Expr::Call { args, .. } => { collect_cell_vars_from_args(args, our_locals, cell_vars, interner); } Expr::AttrCall { object, args, .. } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); collect_cell_vars_from_args(args, our_locals, cell_vars, interner); } Expr::IndirectCall { callable, args } => { collect_cell_vars_from_expr(callable, our_locals, cell_vars, interner); collect_cell_vars_from_args(args, our_locals, cell_vars, interner); } Expr::AttrGet { object, .. } => { collect_cell_vars_from_expr(object, our_locals, cell_vars, interner); } Expr::IfElse { test, body, orelse } => { collect_cell_vars_from_expr(test, our_locals, cell_vars, interner); collect_cell_vars_from_expr(body, our_locals, cell_vars, interner); collect_cell_vars_from_expr(orelse, our_locals, cell_vars, interner); } Expr::ListComp { elt, generators } | Expr::SetComp { elt, generators } => { collect_cell_vars_from_expr(elt, our_locals, cell_vars, interner); for generator in generators { collect_cell_vars_from_expr(&generator.iter, our_locals, cell_vars, interner); for cond in &generator.ifs { collect_cell_vars_from_expr(cond, our_locals, cell_vars, interner); } } } Expr::DictComp { key, value, generators } => { collect_cell_vars_from_expr(key, our_locals, cell_vars, interner); collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); for generator in generators { collect_cell_vars_from_expr(&generator.iter, our_locals, cell_vars, interner); for cond in &generator.ifs { collect_cell_vars_from_expr(cond, our_locals, cell_vars, interner); } } } Expr::FString(parts) => { for part in parts { if let crate::fstring::FStringPart::Interpolation { expr, .. } = part { collect_cell_vars_from_expr(expr, our_locals, cell_vars, interner); } } } Expr::Named { value, .. } => { // Only scan the value expression for cell vars collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); } Expr::Await(value) => { collect_cell_vars_from_expr(value, our_locals, cell_vars, interner); } // Leaf expressions Expr::Literal(_) | Expr::Builtin(_) | Expr::Name(_) | Expr::Lambda { .. } | Expr::Slice { .. } => {} } } /// Helper to collect cell vars from argument expressions. fn collect_cell_vars_from_args( args: &ArgExprs, our_locals: &AHashSet, cell_vars: &mut AHashSet, interner: &InternerBuilder, ) { match args { ArgExprs::Empty => {} ArgExprs::One(arg) => collect_cell_vars_from_expr(arg, our_locals, cell_vars, interner), ArgExprs::Two(arg1, arg2) => { collect_cell_vars_from_expr(arg1, our_locals, cell_vars, interner); collect_cell_vars_from_expr(arg2, our_locals, cell_vars, interner); } ArgExprs::Args(args) => { for arg in args { collect_cell_vars_from_expr(arg, our_locals, cell_vars, interner); } } ArgExprs::Kwargs(kwargs) => { for kwarg in kwargs { collect_cell_vars_from_expr(&kwarg.value, our_locals, cell_vars, interner); } } ArgExprs::ArgsKargs { args, kwargs, var_args, var_kwargs, } => { if let Some(args) = args { for arg in args { collect_cell_vars_from_expr(arg, our_locals, cell_vars, interner); } } if let Some(kwargs) = kwargs { for kwarg in kwargs { collect_cell_vars_from_expr(&kwarg.value, our_locals, cell_vars, interner); } } if let Some(var_args) = var_args { collect_cell_vars_from_expr(var_args, our_locals, cell_vars, interner); } if let Some(var_kwargs) = var_kwargs { collect_cell_vars_from_expr(var_kwargs, our_locals, cell_vars, interner); } } ArgExprs::GeneralizedCall { args, kwargs } => { for arg in args { match arg { CallArg::Value(e) | CallArg::Unpack(e) => { collect_cell_vars_from_expr(e, our_locals, cell_vars, interner); } } } for kwarg in kwargs { match kwarg { CallKwarg::Named(kw) => { collect_cell_vars_from_expr(&kw.value, our_locals, cell_vars, interner); } CallKwarg::Unpack(e) => { collect_cell_vars_from_expr(e, our_locals, cell_vars, interner); } } } } } } /// Collects all names referenced (read) in a node and its descendants. /// /// This is used to find what names a nested function references from enclosing scopes. fn collect_referenced_names_from_node(node: &ParseNode, referenced: &mut AHashSet, interner: &InternerBuilder) { match node { Node::Expr(expr) => collect_referenced_names_from_expr(expr, referenced, interner), Node::Return(expr) => collect_referenced_names_from_expr(expr, referenced, interner), Node::Raise(Some(expr)) => collect_referenced_names_from_expr(expr, referenced, interner), Node::Raise(None) => {} Node::Assert { test, msg } => { collect_referenced_names_from_expr(test, referenced, interner); if let Some(m) = msg { collect_referenced_names_from_expr(m, referenced, interner); } } Node::Assign { object, .. } => { collect_referenced_names_from_expr(object, referenced, interner); } Node::UnpackAssign { object, .. } => { collect_referenced_names_from_expr(object, referenced, interner); } Node::OpAssign { target, object, .. } => { // OpAssign reads the target before writing referenced.insert(interner.get_str(target.name_id).to_string()); collect_referenced_names_from_expr(object, referenced, interner); } Node::SubscriptOpAssign { target, index, object, .. } => { referenced.insert(interner.get_str(target.name_id).to_string()); collect_referenced_names_from_expr(index, referenced, interner); collect_referenced_names_from_expr(object, referenced, interner); } Node::SubscriptAssign { target, index, value, .. } => { referenced.insert(interner.get_str(target.name_id).to_string()); collect_referenced_names_from_expr(index, referenced, interner); collect_referenced_names_from_expr(value, referenced, interner); } Node::AttrAssign { object, value, .. } => { collect_referenced_names_from_expr(object, referenced, interner); collect_referenced_names_from_expr(value, referenced, interner); } Node::For { iter, body, or_else, .. } => { collect_referenced_names_from_expr(iter, referenced, interner); for n in body { collect_referenced_names_from_node(n, referenced, interner); } for n in or_else { collect_referenced_names_from_node(n, referenced, interner); } } Node::While { test, body, or_else } => { collect_referenced_names_from_expr(test, referenced, interner); for n in body { collect_referenced_names_from_node(n, referenced, interner); } for n in or_else { collect_referenced_names_from_node(n, referenced, interner); } } Node::If { test, body, or_else } => { collect_referenced_names_from_expr(test, referenced, interner); for n in body { collect_referenced_names_from_node(n, referenced, interner); } for n in or_else { collect_referenced_names_from_node(n, referenced, interner); } } Node::FunctionDef(_) => { // Don't recurse into nested function bodies - they have their own scope } Node::Try(Try { body, handlers, or_else, finally, }) => { for n in body { collect_referenced_names_from_node(n, referenced, interner); } for handler in handlers { // Exception type expression may reference names if let Some(ref exc_type) = handler.exc_type { collect_referenced_names_from_expr(exc_type, referenced, interner); } for n in &handler.body { collect_referenced_names_from_node(n, referenced, interner); } } for n in or_else { collect_referenced_names_from_node(n, referenced, interner); } for n in finally { collect_referenced_names_from_node(n, referenced, interner); } } // Imports create bindings but don't reference names Node::Import { .. } | Node::ImportFrom { .. } => {} Node::Pass | Node::ReturnNone | Node::Global { .. } | Node::Nonlocal { .. } | Node::Break { .. } | Node::Continue { .. } => {} } } /// Collects all names referenced in an expression. fn collect_referenced_names_from_expr( expr: &crate::expressions::ExprLoc, referenced: &mut AHashSet, interner: &InternerBuilder, ) { use crate::expressions::Expr; match &expr.expr { Expr::Name(ident) => { referenced.insert(interner.get_str(ident.name_id).to_string()); } Expr::Literal(_) => {} Expr::Builtin(_) => {} Expr::List(items) | Expr::Tuple(items) | Expr::Set(items) => { for item in items { let expr = match item { SequenceItem::Value(e) | SequenceItem::Unpack(e) => e, }; collect_referenced_names_from_expr(expr, referenced, interner); } } Expr::Dict(dict_items) => { for item in dict_items { match item { DictItem::Pair(key, value) => { collect_referenced_names_from_expr(key, referenced, interner); collect_referenced_names_from_expr(value, referenced, interner); } DictItem::Unpack(e) => collect_referenced_names_from_expr(e, referenced, interner), } } } Expr::Op { left, right, .. } | Expr::CmpOp { left, right, .. } => { collect_referenced_names_from_expr(left, referenced, interner); collect_referenced_names_from_expr(right, referenced, interner); } Expr::ChainCmp { left, comparisons } => { collect_referenced_names_from_expr(left, referenced, interner); for (_, expr) in comparisons { collect_referenced_names_from_expr(expr, referenced, interner); } } Expr::Not(operand) | Expr::UnaryMinus(operand) | Expr::UnaryPlus(operand) | Expr::UnaryInvert(operand) => { collect_referenced_names_from_expr(operand, referenced, interner); } Expr::FString(parts) => { collect_referenced_names_from_fstring_parts(parts, referenced, interner); } Expr::Subscript { object, index } => { collect_referenced_names_from_expr(object, referenced, interner); collect_referenced_names_from_expr(index, referenced, interner); } Expr::Call { callable, args } => { // Check if the callable is a Name reference if let Callable::Name(ident) = callable { referenced.insert(interner.get_str(ident.name_id).to_string()); } collect_referenced_names_from_args(args, referenced, interner); } Expr::AttrCall { object, args, .. } => { collect_referenced_names_from_expr(object, referenced, interner); collect_referenced_names_from_args(args, referenced, interner); } Expr::AttrGet { object, .. } => { collect_referenced_names_from_expr(object, referenced, interner); } Expr::IndirectCall { callable, args } => { // Collect references from the callable expression and arguments collect_referenced_names_from_expr(callable, referenced, interner); collect_referenced_names_from_args(args, referenced, interner); } Expr::IfElse { test, body, orelse } => { collect_referenced_names_from_expr(test, referenced, interner); collect_referenced_names_from_expr(body, referenced, interner); collect_referenced_names_from_expr(orelse, referenced, interner); } Expr::ListComp { elt, generators } | Expr::SetComp { elt, generators } => { collect_referenced_names_from_comprehension(generators, Some(elt), None, referenced, interner); } Expr::DictComp { key, value, generators } => { collect_referenced_names_from_comprehension(generators, None, Some((key, value)), referenced, interner); } Expr::LambdaRaw { signature, body, .. } => { // Build set of parameter names (these are local to the lambda, not free variables) let lambda_params: AHashSet = signature .param_names() .map(|s| interner.get_str(s).to_string()) .collect(); // Collect references from the body expression into a temporary set let mut body_refs: AHashSet = AHashSet::new(); collect_referenced_names_from_expr(body, &mut body_refs, interner); // Filter out the lambda's own parameters before adding to referenced set. // The lambda's parameters are bound by the lambda, not free from outer scope. for name in body_refs { if !lambda_params.contains(&name) { referenced.insert(name); } } // Default value expressions are evaluated in the enclosing scope, not the lambda's // scope, so they can reference outer scope without filtering. for param in &signature.pos_args { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, referenced, interner); } } for param in &signature.args { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, referenced, interner); } } for param in &signature.kwargs { if let Some(ref default) = param.default { collect_referenced_names_from_expr(default, referenced, interner); } } } Expr::Lambda { .. } => { // Lambda should only exist after preparation; this function operates on raw expressions unreachable!("Expr::Lambda should not exist during scope analysis") } Expr::Named { value, .. } => { // Only the value is referenced; target is being assigned, not read collect_referenced_names_from_expr(value, referenced, interner); } Expr::Slice { lower, upper, step } => { if let Some(expr) = lower { collect_referenced_names_from_expr(expr, referenced, interner); } if let Some(expr) = upper { collect_referenced_names_from_expr(expr, referenced, interner); } if let Some(expr) = step { collect_referenced_names_from_expr(expr, referenced, interner); } } Expr::Await(value) => { collect_referenced_names_from_expr(value, referenced, interner); } } } /// Collects referenced names from comprehension expressions. /// /// Handles the special scoping rules: loop variables are local to the comprehension, /// so we collect references from iterators and conditions but exclude loop variable names. fn collect_referenced_names_from_comprehension( generators: &[Comprehension], elt: Option<&ExprLoc>, key_value: Option<(&ExprLoc, &ExprLoc)>, referenced: &mut AHashSet, interner: &InternerBuilder, ) { // Track loop variable names (these are local to the comprehension) let mut comp_locals: AHashSet = AHashSet::new(); // Collect references from expressions that can see prior loop variables. // These need to be filtered against comp_locals before adding to referenced. let mut inner_refs: AHashSet = AHashSet::new(); for (i, comp) in generators.iter().enumerate() { if i == 0 { // FIRST generator's iter expression truly references enclosing scope // (evaluated before any loop variable is defined). collect_referenced_names_from_expr(&comp.iter, referenced, interner); } else { // SUBSEQUENT generators' iter expressions can reference prior loop variables. // For example, in `[y for x in xs for y in x]`, the `x` in the second // generator's iter is the first generator's loop variable, not outer scope. collect_referenced_names_from_expr(&comp.iter, &mut inner_refs, interner); } // Add this generator's target(s) to local set collect_names_from_unpack_target(&comp.target, &mut comp_locals, interner); // Filter conditions can see prior loop variables - collect separately for cond in &comp.ifs { collect_referenced_names_from_expr(cond, &mut inner_refs, interner); } } // Element expression(s) can see all loop variables - collect separately if let Some(e) = elt { collect_referenced_names_from_expr(e, &mut inner_refs, interner); } if let Some((k, v)) = key_value { collect_referenced_names_from_expr(k, &mut inner_refs, interner); collect_referenced_names_from_expr(v, &mut inner_refs, interner); } // Add inner references that are NOT comprehension-locals to the outer referenced set. // Names that ARE comp_locals refer to the comprehension's loop variable, not enclosing scope. for name in inner_refs { if !comp_locals.contains(&name) { referenced.insert(name); } } } /// Collects referenced names from argument expressions. fn collect_referenced_names_from_args(args: &ArgExprs, referenced: &mut AHashSet, interner: &InternerBuilder) { match args { ArgExprs::Empty => {} ArgExprs::One(e) => collect_referenced_names_from_expr(e, referenced, interner), ArgExprs::Two(e1, e2) => { collect_referenced_names_from_expr(e1, referenced, interner); collect_referenced_names_from_expr(e2, referenced, interner); } ArgExprs::Args(exprs) => { for e in exprs { collect_referenced_names_from_expr(e, referenced, interner); } } ArgExprs::Kwargs(kwargs) => { for kwarg in kwargs { collect_referenced_names_from_expr(&kwarg.value, referenced, interner); } } ArgExprs::ArgsKargs { args, kwargs, var_args, var_kwargs, } => { if let Some(args) = args { for e in args { collect_referenced_names_from_expr(e, referenced, interner); } } if let Some(kwargs) = kwargs { for kwarg in kwargs { collect_referenced_names_from_expr(&kwarg.value, referenced, interner); } } if let Some(e) = var_args { collect_referenced_names_from_expr(e, referenced, interner); } if let Some(e) = var_kwargs { collect_referenced_names_from_expr(e, referenced, interner); } } ArgExprs::GeneralizedCall { args, kwargs } => { for arg in args { match arg { CallArg::Value(e) | CallArg::Unpack(e) => { collect_referenced_names_from_expr(e, referenced, interner); } } } for kwarg in kwargs { match kwarg { CallKwarg::Named(kw) => { collect_referenced_names_from_expr(&kw.value, referenced, interner); } CallKwarg::Unpack(e) => { collect_referenced_names_from_expr(e, referenced, interner); } } } } } } /// Collects referenced names from f-string parts (both expressions and dynamic format specs). fn collect_referenced_names_from_fstring_parts( parts: &[FStringPart], referenced: &mut AHashSet, interner: &InternerBuilder, ) { for part in parts { if let FStringPart::Interpolation { expr, format_spec, .. } = part { collect_referenced_names_from_expr(expr, referenced, interner); // Also check dynamic format specs which can contain interpolated expressions if let Some(FormatSpec::Dynamic(spec_parts)) = format_spec { collect_referenced_names_from_fstring_parts(spec_parts, referenced, interner); } } } } /// Collects all names from an unpack target into the given set. /// /// Recursively traverses nested tuples to find all identifier names. fn collect_names_from_unpack_target(target: &UnpackTarget, names: &mut AHashSet, interner: &InternerBuilder) { match target { UnpackTarget::Name(ident) | UnpackTarget::Starred(ident) => { names.insert(interner.get_str(ident.name_id).to_string()); } UnpackTarget::Tuple { targets, .. } => { for t in targets { collect_names_from_unpack_target(t, names, interner); } } } } ================================================ FILE: crates/monty/src/repl.rs ================================================ //! Stateful REPL execution support for Monty. //! //! This module implements incremental snippet execution where each new snippet //! is compiled and executed against persistent heap/namespace state without //! replaying previously executed snippets. use std::mem; use ahash::AHashMap; use ruff_python_ast::token::TokenKind; use ruff_python_parser::{InterpolatedStringErrorType, LexicalErrorType, ParseErrorType, parse_module}; use crate::{ ExcType, MontyException, asyncio::CallId, bytecode::{Code, Compiler, FrameExit, VM, VMSnapshot}, exception_private::{RunError, RunResult}, heap::{DropWithHeap, Heap}, intern::{InternerBuilder, Interns}, io::PrintWriter, namespace::NamespaceId, object::MontyObject, os::OsFunction, parse::parse_with_interner, prepare::prepare_with_existing_names, resource::ResourceTracker, run_progress::{ConvertedExit, ExtFunctionResult, NameLookupResult, convert_frame_exit}, value::Value, }; /// Stateful REPL session that executes snippets incrementally without replay. /// /// `MontyRepl` preserves heap and global variable state between snippets. /// Each `feed()` compiles and executes only the new snippet against the current /// state, avoiding the cost and semantic risks of replaying prior code. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct MontyRepl { /// Script name used for runtime error messages and REPL identification. /// /// Incremental `feed()` / `start()` snippets intentionally use internal script names /// like `` to match CPython's interactive traceback style. script_name: String, /// Counter for generated `` snippet filenames. next_input_id: u64, /// Stable mapping of global variable names to namespace slot IDs. global_name_map: AHashMap, /// Persistent intern table across snippets so intern/function IDs remain valid. interns: Interns, /// Persistent heap across snippets. heap: Heap, /// Persistent global variable values across snippets. /// /// Indexed by `NamespaceId` slots from `global_name_map`. Between snippet /// executions these are the only VM values that persist — stack and frames /// are transient. globals: Vec, } impl MontyRepl { /// Creates an empty REPL session with no code parsed or executed. /// /// All code execution is driven through `feed_run()` or `feed_start()`. This separates /// construction from execution, matching the pattern used by `MontyRun::new()`. #[must_use] pub fn new(script_name: &str, resource_tracker: T) -> Self { let heap = Heap::new(0, resource_tracker); Self { script_name: script_name.to_owned(), next_input_id: 0, global_name_map: AHashMap::new(), interns: Interns::new(InternerBuilder::default(), Vec::new()), heap, globals: Vec::new(), } } /// Starts executing a new snippet and returns suspendable REPL progress. /// /// This is the REPL equivalent of `MontyRun::start`: execution may complete, /// suspend at external calls / OS calls / unresolved futures, or raise a Python /// exception. Resume with the returned state object and eventually recover the /// updated REPL from `ReplProgress::into_complete`. /// /// Unlike `MontyRepl::feed`, this method consumes `self` so runtime state can be /// safely moved into snapshot objects for serialization and cross-process resume. /// /// On a Python-level runtime exception the REPL is **not** destroyed: it is /// returned inside `ReplStartError` so the caller can continue feeding /// subsequent snippets against the same heap and namespace state. /// /// # Errors /// Returns `Err(Box)` for syntax, compile-time, or runtime /// failures — the REPL session is always preserved inside the error. pub fn feed_start( self, code: &str, inputs: Vec<(String, MontyObject)>, print: PrintWriter<'_>, ) -> Result, Box>> { let mut this = self; if code.is_empty() { return Ok(ReplProgress::Complete { repl: this, value: MontyObject::None, }); } let (input_names, input_values): (Vec<_>, Vec<_>) = inputs.into_iter().unzip(); let input_script_name = this.next_input_script_name(); let executor = match ReplExecutor::new_repl_snippet( code.to_owned(), &input_script_name, this.global_name_map.clone(), &this.interns, input_names, ) { Ok(exec) => exec, Err(error) => return Err(Box::new(ReplStartError { repl: this, error })), }; this.ensure_globals_size(executor.namespace_size); let mut vm = VM::new(mem::take(&mut this.globals), &mut this.heap, &executor.interns, print); // Inject inputs with VM alive if let Err(error) = inject_inputs_into_vm(&executor, input_values, &mut vm) { this.globals = vm.take_globals(); vm.cleanup(); return Err(Box::new(ReplStartError { repl: this, error })); } let vm_result = vm.run_module(&executor.module_code); // Convert while VM alive, then snapshot or reclaim globals let converted = convert_frame_exit(vm_result, &mut vm); if converted.needs_snapshot() { let vm_state = vm.snapshot(); build_repl_progress(converted, Some(vm_state), executor, this) } else { this.globals = vm.take_globals(); vm.cleanup(); build_repl_progress(converted, None, executor, this) } } /// Feeds and executes a new snippet against the current REPL state to completion. /// /// This compiles only `code` using the existing global slot map, extends the /// global namespace if new names are introduced, and executes the snippet once. /// Previously executed snippets are never replayed. If execution raises after /// partially mutating globals, those mutations remain visible in later feeds, /// matching Python REPL semantics. /// /// # Errors /// Returns `MontyException` for syntax/compile/runtime failures. pub fn feed_run( &mut self, code: &str, inputs: Vec<(String, MontyObject)>, print: PrintWriter<'_>, ) -> Result { if code.is_empty() { return Ok(MontyObject::None); } let (input_names, input_values): (Vec<_>, Vec<_>) = inputs.into_iter().unzip(); let input_script_name = self.next_input_script_name(); let executor = ReplExecutor::new_repl_snippet( code.to_owned(), &input_script_name, self.global_name_map.clone(), &self.interns, input_names, )?; self.ensure_globals_size(executor.namespace_size); let mut vm = VM::new(mem::take(&mut self.globals), &mut self.heap, &executor.interns, print); if let Err(e) = inject_inputs_into_vm(&executor, input_values, &mut vm) { self.globals = vm.take_globals(); vm.cleanup(); return Err(e); } let mut frame_exit_result = vm.run_module(&executor.module_code); // Handle NameLookup exits by raising NameError through the VM so that // traceback information is properly captured. In the non-iterative REPL path, // there's no host to resolve names, so all NameLookup exits become NameErrors. while let Ok(FrameExit::NameLookup { name_id, .. }) = &frame_exit_result { let name = executor.interns.get_str(*name_id); let err = ExcType::name_error(name); frame_exit_result = vm.resume_with_exception(err.into()); } // Convert output while VM alive let result = frame_exit_to_object(frame_exit_result, &mut vm); // Reclaim globals before cleanup. self.globals = vm.take_globals(); vm.cleanup(); // Commit compiler metadata even on runtime errors. // Snippets can mutate globals before raising, and those values may contain // FunctionId/StringId values that must be interpreted with the updated tables. let ReplExecutor { name_map, interns, code, .. } = executor; self.global_name_map = name_map; self.interns = interns; result.map_err(|e| e.into_python_exception(&self.interns, &code)) } /// Grows the globals vector to at least `size` slots. /// /// Newly introduced slots are initialized to `Undefined` to keep slot alignment /// with the compiler's global-name map. fn ensure_globals_size(&mut self, size: usize) { if self.globals.len() < size { self.globals.resize_with(size, || Value::Undefined); } } /// Returns the generated filename for the next interactive snippet. /// /// CPython labels interactive snippets as `` and increments /// N for each feed attempt. Matching this improves traceback ergonomics and /// makes REPL errors easier to correlate with user input history. fn next_input_script_name(&mut self) -> String { let input_id = self.next_input_id; self.next_input_id += 1; format!("") } } impl MontyRepl { /// Serializes the REPL session state to bytes. /// /// This includes heap + globals + global slot mapping, allowing snapshot/restore /// of interactive state between process runs. /// /// # Errors /// Returns an error if serialization fails. pub fn dump(&self) -> Result, postcard::Error> { postcard::to_allocvec(self) } } impl MontyRepl { /// Restores a REPL session from bytes produced by `MontyRepl::dump`. /// /// # Errors /// Returns an error if deserialization fails. pub fn load(bytes: &[u8]) -> Result { postcard::from_bytes(bytes) } } impl Drop for MontyRepl { fn drop(&mut self) { self.globals.drain(..).drop_with_heap(&mut self.heap); } } // --------------------------------------------------------------------------- // ReplProgress and per-variant structs // --------------------------------------------------------------------------- /// Result of a single suspendable REPL snippet execution. /// /// This mirrors `RunProgress` but returns the updated `MontyRepl` on completion /// so callers can continue feeding additional snippets without replaying prior code. /// Each variant (except `Complete`) wraps a dedicated struct with only the relevant /// resume methods. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub enum ReplProgress { /// Execution paused at an external function call or dataclass method call. FunctionCall(ReplFunctionCall), /// Execution paused for an OS-level operation. OsCall(ReplOsCall), /// All async tasks are blocked waiting for external futures to resolve. ResolveFutures(ReplResolveFutures), /// Execution paused for an unresolved name lookup. NameLookup(ReplNameLookup), /// Snippet execution completed with the updated REPL and result value. Complete { /// Updated REPL session state to continue feeding snippets. repl: MontyRepl, /// Final result produced by the snippet. value: MontyObject, }, } /// Error returned when a REPL snippet raises a Python exception during `start()` or `resume()`. /// /// Unlike syntax/compile errors which consume the REPL, runtime errors preserve /// the full session state so the caller can inspect the error and continue feeding /// subsequent snippets. Any global mutations that occurred before the exception /// remain visible in the returned `repl`. #[derive(Debug)] pub struct ReplStartError { /// REPL session state after the failed snippet — ready for further use. pub repl: MontyRepl, /// The Python exception that was raised. pub error: MontyException, } impl ReplProgress { /// Consumes the progress and returns the `ReplFunctionCall` struct. #[must_use] pub fn into_function_call(self) -> Option> { match self { Self::FunctionCall(call) => Some(call), _ => None, } } /// Consumes the progress and returns the `ReplResolveFutures` struct. #[must_use] pub fn into_resolve_futures(self) -> Option> { match self { Self::ResolveFutures(state) => Some(state), _ => None, } } /// Consumes the progress and returns the `ReplNameLookup` struct. #[must_use] pub fn into_name_lookup(self) -> Option> { match self { Self::NameLookup(lookup) => Some(lookup), _ => None, } } /// Consumes the progress and returns the completed REPL and value. #[must_use] pub fn into_complete(self) -> Option<(MontyRepl, MontyObject)> { match self { Self::Complete { repl, value } => Some((repl, value)), _ => None, } } /// Extracts the REPL session from any progress variant, discarding /// the in-flight execution state. /// /// Use this to recover the REPL when you need to abandon the current /// snippet (e.g. because `feed_run` doesn't support async futures). /// The REPL state reflects any mutations that occurred before the /// snapshot was taken. #[must_use] pub fn into_repl(self) -> MontyRepl { match self { Self::FunctionCall(call) => call.into_repl(), Self::OsCall(call) => call.into_repl(), Self::ResolveFutures(state) => state.into_repl(), Self::NameLookup(lookup) => lookup.into_repl(), Self::Complete { repl, .. } => repl, } } } impl ReplProgress { /// Serializes the REPL execution progress to a binary format. /// /// # Errors /// Returns an error if serialization fails. pub fn dump(&self) -> Result, postcard::Error> { postcard::to_allocvec(self) } } impl ReplProgress { /// Deserializes REPL execution progress from a binary format. /// /// # Errors /// Returns an error if deserialization fails. pub fn load(bytes: &[u8]) -> Result { postcard::from_bytes(bytes) } } // --------------------------------------------------------------------------- // ReplFunctionCall // --------------------------------------------------------------------------- /// REPL execution paused at an external function call or dataclass method call. /// /// Resume with `resume(result, print)` to provide the return value and continue, /// or `resume_pending(print)` to push an `ExternalFuture` for async resolution. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct ReplFunctionCall { /// The name of the function or method being called. pub function_name: String, /// The positional arguments passed to the function. pub args: Vec, /// The keyword arguments passed to the function (key, value pairs). pub kwargs: Vec<(MontyObject, MontyObject)>, /// Unique identifier for this call (used for async correlation). pub call_id: u32, /// Whether this is a dataclass method call (first arg is `self`). pub method_call: bool, /// Internal REPL execution snapshot. snapshot: ReplSnapshot, } impl ReplFunctionCall { /// Extracts the REPL session, discarding the in-flight execution state. /// /// Restores globals from the VM snapshot so the REPL remains usable. #[must_use] pub fn into_repl(self) -> MontyRepl { self.snapshot.into_repl() } /// Resumes snippet execution with an external result. pub fn resume( self, result: impl Into, print: PrintWriter<'_>, ) -> Result, Box>> { self.snapshot.run(result, print) } /// Resumes execution by pushing an `ExternalFuture` for async resolution. /// /// Uses `self.call_id` internally — no need to pass it again. pub fn resume_pending(self, print: PrintWriter<'_>) -> Result, Box>> { self.snapshot.run(ExtFunctionResult::Future(self.call_id), print) } } // --------------------------------------------------------------------------- // ReplOsCall // --------------------------------------------------------------------------- /// REPL execution paused for an OS-level operation. /// /// Resume with `resume(result, print)` to provide the OS call result and continue. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct ReplOsCall { /// The OS function to execute. pub function: OsFunction, /// The positional arguments for the OS function. pub args: Vec, /// The keyword arguments passed to the function (key, value pairs). pub kwargs: Vec<(MontyObject, MontyObject)>, /// Unique identifier for this call (used for async correlation). pub call_id: u32, /// Internal REPL execution snapshot. snapshot: ReplSnapshot, } impl ReplOsCall { /// Extracts the REPL session, discarding the in-flight execution state. /// /// Restores globals from the VM snapshot so the REPL remains usable. #[must_use] pub fn into_repl(self) -> MontyRepl { self.snapshot.into_repl() } /// Resumes snippet execution with the OS call result. pub fn resume( self, result: impl Into, print: PrintWriter<'_>, ) -> Result, Box>> { self.snapshot.run(result, print) } } // --------------------------------------------------------------------------- // ReplNameLookup // --------------------------------------------------------------------------- /// REPL execution paused for an unresolved name lookup. /// /// The host should check if the name corresponds to a known external function or /// value. Call `resume(result, print)` with the appropriate `NameLookupResult`. /// The namespace slot and scope are managed internally. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct ReplNameLookup { /// The name being looked up. pub name: String, /// The namespace slot where the resolved value should be cached. namespace_slot: u16, /// Whether this is a global slot or a local/function slot. is_global: bool, /// Internal REPL execution snapshot. snapshot: ReplSnapshot, } impl ReplNameLookup { /// Extracts the REPL session, discarding the in-flight execution state. /// /// Restores globals from the VM snapshot so the REPL remains usable. #[must_use] pub fn into_repl(self) -> MontyRepl { self.snapshot.into_repl() } /// Resumes execution after name resolution. /// /// Caches the resolved value in the namespace slot before restoring the VM, /// then either pushes the value onto the stack or raises `NameError`. pub fn resume( self, result: NameLookupResult, print: PrintWriter<'_>, ) -> Result, Box>> { let Self { name, namespace_slot, is_global, snapshot, } = self; let ReplSnapshot { mut repl, executor, vm_state, } = snapshot; // Restore the VM first, then convert inside its lifetime let mut vm = VM::restore( vm_state, &executor.module_code, &mut repl.heap, &executor.interns, print, ); // Resolve the name lookup result with the VM alive let vm_result = match result { NameLookupResult::Value(obj) => { let value = match obj.to_value(&mut vm) { Ok(v) => v, Err(e) => { repl.globals = vm.take_globals(); vm.cleanup(); let error = MontyException::runtime_error(format!("invalid name lookup result: {e}")); return Err(Box::new(ReplStartError { repl, error })); } }; // Cache the resolved value in the appropriate slot let slot = namespace_slot as usize; if is_global { let cloned = value.clone_with_heap(vm.heap); let old = mem::replace(&mut vm.globals[slot], cloned); old.drop_with_heap(vm.heap); } else { let stack_base = vm.current_stack_base(); let cloned = value.clone_with_heap(vm.heap); let old = mem::replace(&mut vm.stack[stack_base + slot], cloned); old.drop_with_heap(vm.heap); } vm.push(value); vm.run() } NameLookupResult::Undefined => { let err: RunError = ExcType::name_error(&name).into(); vm.resume_with_exception(err) } }; // Convert while VM alive, then snapshot or reclaim globals let converted = convert_frame_exit(vm_result, &mut vm); if converted.needs_snapshot() { let vm_state = vm.snapshot(); build_repl_progress(converted, Some(vm_state), executor, repl) } else { repl.globals = vm.take_globals(); vm.cleanup(); build_repl_progress(converted, None, executor, repl) } } } // --------------------------------------------------------------------------- // ReplResolveFutures // --------------------------------------------------------------------------- /// REPL execution state blocked on unresolved external futures. /// /// This is the REPL-aware counterpart to `ResolveFutures`. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct ReplResolveFutures { /// Persistent REPL session state while this snippet is suspended. repl: MontyRepl, /// Compiled snippet and intern/function tables for this execution. executor: ReplExecutor, /// VM stack/frame state at suspension. vm_state: VMSnapshot, /// Pending call IDs expected by this snapshot. pending_call_ids: Vec, } impl ReplResolveFutures { /// Extracts the REPL session, discarding the in-flight execution state. #[must_use] pub fn into_repl(self) -> MontyRepl { self.repl } /// Returns unresolved call IDs for this suspended state. #[must_use] pub fn pending_call_ids(&self) -> &[u32] { &self.pending_call_ids } /// Resumes snippet execution with zero or more resolved futures. /// /// Supports incremental resolution: callers can provide only a subset of /// pending call IDs and continue resolving over multiple resumes. /// /// All errors — including API misuse (unknown `call_id`) and Python-level /// runtime failures — are returned as `Err(Box)` so the REPL /// session is always preserved. pub fn resume( self, results: Vec<(u32, ExtFunctionResult)>, print: PrintWriter<'_>, ) -> Result, Box>> { let Self { mut repl, executor, vm_state, pending_call_ids, } = self; let invalid_call_id = results .iter() .find(|(call_id, _)| !pending_call_ids.contains(call_id)) .map(|(call_id, _)| *call_id); let mut vm = VM::restore( vm_state, &executor.module_code, &mut repl.heap, &executor.interns, print, ); if let Some(call_id) = invalid_call_id { repl.globals = vm.take_globals(); vm.cleanup(); let error = MontyException::runtime_error(format!( "unknown call_id {call_id}, expected one of: {pending_call_ids:?}" )); return Err(Box::new(ReplStartError { repl, error })); } for (call_id, ext_result) in results { match ext_result { ExtFunctionResult::Return(obj) => { if let Err(e) = vm.resolve_future(call_id, obj) { repl.globals = vm.take_globals(); vm.cleanup(); let error = MontyException::runtime_error(format!("Invalid return type for call {call_id}: {e}")); return Err(Box::new(ReplStartError { repl, error })); } } ExtFunctionResult::Error(exc) => vm.fail_future(call_id, RunError::from(exc)), ExtFunctionResult::Future(_) => {} ExtFunctionResult::NotFound(function_name) => { vm.fail_future(call_id, ExtFunctionResult::not_found_exc(&function_name)); } } } if let Some(error) = vm.take_failed_task_error() { repl.globals = vm.take_globals(); vm.cleanup(); let error = error.into_python_exception(&executor.interns, &executor.code); return Err(Box::new(ReplStartError { repl, error })); } let main_task_ready = vm.prepare_current_task_after_resolve(); let loaded_task = match vm.load_ready_task_if_needed() { Ok(loaded) => loaded, Err(e) => { repl.globals = vm.take_globals(); vm.cleanup(); let error = e.into_python_exception(&executor.interns, &executor.code); return Err(Box::new(ReplStartError { repl, error })); } }; if !main_task_ready && !loaded_task { let pending_call_ids = vm.get_pending_call_ids(); if !pending_call_ids.is_empty() { let vm_state = vm.snapshot(); let pending_call_ids: Vec = pending_call_ids.iter().map(|id| id.raw()).collect(); return Ok(ReplProgress::ResolveFutures(Self { repl, executor, vm_state, pending_call_ids, })); } } let vm_result = vm.run(); // Convert while VM alive, then snapshot or reclaim globals let converted = convert_frame_exit(vm_result, &mut vm); if converted.needs_snapshot() { let vm_state = vm.snapshot(); build_repl_progress(converted, Some(vm_state), executor, repl) } else { repl.globals = vm.take_globals(); vm.cleanup(); build_repl_progress(converted, None, executor, repl) } } } // --------------------------------------------------------------------------- // ReplContinuationMode — public utility for interactive input collection // --------------------------------------------------------------------------- /// Parse-derived continuation state for interactive REPL input collection. /// /// `monty-cli` uses this to decide whether to execute the buffered snippet /// immediately, keep collecting continuation lines, or require a terminating /// blank line for block statements (`if:`, `def:`, etc.). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ReplContinuationMode { /// The current snippet is syntactically complete and can run now. Complete, /// The snippet is incomplete and needs more continuation lines. IncompleteImplicit, /// The snippet opened an indented block and should wait for a trailing blank /// line before execution, matching CPython interactive behavior. IncompleteBlock, } /// Detects whether REPL source is complete or needs more input. /// /// This mirrors CPython's broad interactive behavior: /// - Incomplete bracketed / parenthesized / triple-quoted constructs continue. /// - Clause headers (`if:`, `def:`, etc.) require an indented body and then a /// terminating blank line before execution. /// - All other parse outcomes are treated as complete (either valid code or a /// syntax error that should be shown immediately). #[must_use] pub fn detect_repl_continuation_mode(source: &str) -> ReplContinuationMode { let Err(error) = parse_module(source) else { return ReplContinuationMode::Complete; }; match error.error { ParseErrorType::OtherError(msg) => { if msg.starts_with("Expected an indented block after ") { ReplContinuationMode::IncompleteBlock } else { ReplContinuationMode::Complete } } ParseErrorType::Lexical(LexicalErrorType::Eof) | ParseErrorType::ExpectedToken { found: TokenKind::EndOfFile, .. } | ParseErrorType::FStringError(InterpolatedStringErrorType::UnterminatedTripleQuotedString) | ParseErrorType::TStringError(InterpolatedStringErrorType::UnterminatedTripleQuotedString) => { ReplContinuationMode::IncompleteImplicit } _ => ReplContinuationMode::Complete, } } // --------------------------------------------------------------------------- // ReplExecutor — internal compilation helper // --------------------------------------------------------------------------- /// Compiled snippet representation used only by REPL execution. /// /// This intentionally mirrors the data shape needed by VM execution in /// `run.rs` but lives in the REPL module so REPL evolution does not require /// changing `run.rs`. #[derive(Debug, serde::Serialize, serde::Deserialize)] struct ReplExecutor { /// Number of slots needed in the global namespace. namespace_size: usize, /// Maps variable names to their indices in the namespace. /// /// Stable slot assignment is required across snippets so previously created /// objects continue to resolve names correctly. name_map: AHashMap, /// Compiled bytecode for the snippet. module_code: Code, /// Interned strings and compiled functions for this snippet. interns: Interns, /// Source code used for traceback/error rendering. code: String, /// Input variable names that were injected for this snippet. /// /// Stored so that `inject_inputs` can look up their namespace slots /// after compilation assigns them. input_names: Vec, } impl ReplExecutor { /// Compiles one REPL snippet against existing session metadata. /// /// This differs from normal compilation in three ways required for true /// no-replay execution: /// - Seeds parsing from `existing_interns` so old `StringId` values stay stable. /// - Seeds compilation with existing functions so old `FunctionId` values remain valid. /// - Reuses `existing_name_map` and appends new global names only. /// /// `input_names` are pre-registered in the name map before preparation so they /// receive stable namespace slots that `inject_inputs` can use to store values. fn new_repl_snippet( code: String, script_name: &str, mut existing_name_map: AHashMap, existing_interns: &Interns, input_names: Vec, ) -> Result { // Pre-register input names so they get stable slots before preparation. for name in &input_names { let next_slot = existing_name_map.len(); existing_name_map .entry(name.clone()) .or_insert_with(|| NamespaceId::new(next_slot)); } let seeded_interner = InternerBuilder::from_interns(existing_interns, &code); let parse_result = parse_with_interner(&code, script_name, seeded_interner) .map_err(|e| e.into_python_exc(script_name, &code))?; let prepared = prepare_with_existing_names(parse_result, existing_name_map) .map_err(|e| e.into_python_exc(script_name, &code))?; let existing_functions = existing_interns.functions_clone(); let mut interns = Interns::new(prepared.interner, Vec::new()); let namespace_size_u16 = u16::try_from(prepared.namespace_size).expect("module namespace size exceeds u16"); let compile_result = Compiler::compile_module_with_functions(&prepared.nodes, &interns, namespace_size_u16, existing_functions) .map_err(|e| e.into_python_exc(script_name, &code))?; interns.set_functions(compile_result.functions); Ok(Self { namespace_size: prepared.namespace_size, name_map: prepared.name_map, module_code: compile_result.code, interns, code, input_names, }) } } // --------------------------------------------------------------------------- // ReplSnapshot — internal execution state for suspend/resume // --------------------------------------------------------------------------- /// REPL execution state that can be resumed after an external call. /// /// This is the REPL-aware counterpart to `Snapshot`. It is `pub(crate)` — /// callers interact with the per-variant structs (`ReplFunctionCall`, etc.). #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub(crate) struct ReplSnapshot { /// Persistent REPL session state while this snippet is suspended. repl: MontyRepl, /// Compiled snippet and intern/function tables for this execution. executor: ReplExecutor, /// VM stack/frame state at suspension. vm_state: VMSnapshot, } impl ReplSnapshot { /// Extracts the REPL session, restoring globals from the VM snapshot. /// /// When a snapshot is taken, globals live inside the `VMSnapshot`. /// This method creates an empty snapshot from just the globals so the REPL /// can be used for further snippets. fn into_repl(self) -> MontyRepl { let Self { mut repl, vm_state, .. } = self; repl.globals = vm_state.globals; repl } /// Continues snippet execution with an external result. fn run( self, result: impl Into, print: PrintWriter<'_>, ) -> Result, Box>> { let Self { mut repl, executor, vm_state, } = self; let ext_result = result.into(); let mut vm = VM::restore( vm_state, &executor.module_code, &mut repl.heap, &executor.interns, print, ); let vm_result = match ext_result { ExtFunctionResult::Return(obj) => vm.resume(obj), ExtFunctionResult::Error(exc) => vm.resume_with_exception(exc.into()), ExtFunctionResult::Future(raw_call_id) => { let call_id = CallId::new(raw_call_id); vm.add_pending_call(call_id); vm.push(Value::ExternalFuture(call_id)); vm.run() } ExtFunctionResult::NotFound(function_name) => { vm.resume_with_exception(ExtFunctionResult::not_found_exc(&function_name)) } }; // Convert while VM alive, then snapshot or reclaim globals let converted = convert_frame_exit(vm_result, &mut vm); if converted.needs_snapshot() { let vm_state = vm.snapshot(); build_repl_progress(converted, Some(vm_state), executor, repl) } else { repl.globals = vm.take_globals(); vm.cleanup(); build_repl_progress(converted, None, executor, repl) } } } // --------------------------------------------------------------------------- // Private helper functions // --------------------------------------------------------------------------- /// Injects input values into the VM's global namespace slots. /// /// Converts each `MontyObject` to a `Value` while the VM is alive, then stores /// it in the global slot that the compiler assigned for the corresponding input name. fn inject_inputs_into_vm( executor: &ReplExecutor, input_values: Vec, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result<(), MontyException> { for (name, obj) in executor.input_names.iter().zip(input_values) { let slot = executor .name_map .get(name) .expect("input name should have a namespace slot") .index(); let value = obj .to_value(vm) .map_err(|e| MontyException::runtime_error(format!("invalid input type: {e}")))?; let old = mem::replace(&mut vm.globals[slot], value); old.drop_with_heap(vm.heap); } Ok(()) } /// Converts module/frame exit results into plain `MontyObject` outputs. /// /// Used by the non-iterative `feed_run` path where suspendable outcomes (external calls, /// name lookups) are not supported and should produce errors. fn frame_exit_to_object( frame_exit_result: RunResult, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { match frame_exit_result? { FrameExit::Return(return_value) => Ok(MontyObject::new(return_value, vm)), FrameExit::ExternalCall { function_name, args, .. } => { args.drop_with_heap(vm.heap); let function_name = function_name.as_str(vm.interns); Err(ExcType::not_implemented(format!( "External function '{function_name}' not implemented with standard execution" )) .into()) } FrameExit::OsCall { function, args, .. } => { args.drop_with_heap(vm.heap); Err(ExcType::not_implemented(format!( "OS function '{function}' not implemented with standard execution" )) .into()) } FrameExit::MethodCall { method_name, args, .. } => { args.drop_with_heap(vm.heap); let name = method_name.as_str(vm.interns); Err( ExcType::not_implemented(format!("Method call '{name}' not implemented with standard execution")) .into(), ) } FrameExit::ResolveFutures(_) => { Err(ExcType::not_implemented("async futures not supported by standard execution.").into()) } FrameExit::NameLookup { name_id, .. } => { let name = vm.interns.get_str(name_id); Err(ExcType::name_error(name).into()) } } } /// Assembles a `ReplProgress` from already-converted data. /// /// This is the REPL equivalent of `build_run_progress`. On completion/error, /// compiler metadata is committed to the REPL so subsequent snippets see /// updated intern tables and name maps. fn build_repl_progress( converted: ConvertedExit, vm_state: Option, executor: ReplExecutor, mut repl: MontyRepl, ) -> Result, Box>> { macro_rules! new_repl_snapshot { () => { ReplSnapshot { repl, executor, vm_state: vm_state.expect("snapshot should exist"), } }; } match converted { ConvertedExit::Complete(obj) => { let ReplExecutor { name_map, interns, .. } = executor; repl.global_name_map = name_map; repl.interns = interns; Ok(ReplProgress::Complete { repl, value: obj }) } ConvertedExit::FunctionCall { function_name, args, kwargs, call_id, method_call, } => Ok(ReplProgress::FunctionCall(ReplFunctionCall { function_name, args, kwargs, call_id, method_call, snapshot: new_repl_snapshot!(), })), ConvertedExit::OsCall { function, args, kwargs, call_id, } => Ok(ReplProgress::OsCall(ReplOsCall { function, args, kwargs, call_id, snapshot: new_repl_snapshot!(), })), ConvertedExit::ResolveFutures(pending_call_ids) => Ok(ReplProgress::ResolveFutures(ReplResolveFutures { repl, executor, vm_state: vm_state.expect("snapshot should exist for ResolveFutures"), pending_call_ids, })), ConvertedExit::NameLookup { name, namespace_slot, is_global, } => Ok(ReplProgress::NameLookup(ReplNameLookup { name, namespace_slot, is_global, snapshot: new_repl_snapshot!(), })), ConvertedExit::Error(err) => { let error = err.into_python_exception(&executor.interns, &executor.code); // Commit compiler metadata even on runtime errors, matching feed() behavior. // Snippets can create new variables or functions before raising, and those // values may reference FunctionId/StringId values from the new tables. let ReplExecutor { name_map, interns, .. } = executor; repl.global_name_map = name_map; repl.interns = interns; Err(Box::new(ReplStartError { repl, error })) } } } ================================================ FILE: crates/monty/src/resource.rs ================================================ use std::{ fmt, sync::atomic::{AtomicU16, Ordering}, time::{Duration, Instant}, }; use crate::{ ExcType, MontyException, exception_private::{ExceptionRaise, RawStackFrame, RunError, SimpleException}, }; /// Threshold in bytes above which `check_large_result` is called. /// /// Operations that may produce results larger than this threshold (100KB) should call /// `check_large_result` before performing the operation. This prevents DoS attacks /// where operations like `2 ** 10_000_000` allocate huge amounts of memory before /// the allocation check can catch them. pub const LARGE_RESULT_THRESHOLD: usize = 100_000; /// Pre-checks that an operation producing `item_len * count` bytes won't exceed resource limits. /// /// Used for sequence repeats (`'x' * 999_999_999`), padding operations /// (`str.ljust`, `str.center`, `str.zfill`, etc.), and any other operation /// where the result size is a simple product of two known values. pub fn check_repeat_size(item_len: usize, count: usize, tracker: &impl ResourceTracker) -> Result<(), ResourceError> { check_estimated_size(item_len.saturating_mul(count), tracker) } /// Pre-checks that `base ** exponent` won't exceed resource limits before computing. /// /// The result of `base ** exp` has approximately `base_bits * exp` bits. /// For bases with 0 or 1 significant bits (0, 1, -1), the result is always /// small regardless of exponent, so the check is skipped. /// /// The estimate includes a 4× safety multiplier because `BigInt::pow` uses repeated squaring, /// which allocates intermediate values on the Rust heap (not tracked by the resource tracker). /// At peak, old/new base and old/new accumulator coexist simultaneously during each /// multiplication step, requiring roughly 4× the final result size in memory. pub fn check_pow_size(base_bits: u64, exponent: u64, tracker: &impl ResourceTracker) -> Result<(), ResourceError> { // 0**n = 0, 1**n = 1, (-1)**n = ±1 — always small if base_bits <= 1 { return Ok(()); } let result_bytes = estimate_bits_to_bytes(base_bits.saturating_mul(exponent)); // Repeated squaring needs ~4× result size in peak memory (old/new base + old/new accumulator // coexist during each multiplication step), and these are Rust heap allocations not tracked // by the resource tracker. check_estimated_size(result_bytes.saturating_mul(4), tracker) } /// Pre-checks that an integer multiplication won't exceed resource limits. /// /// The result of multiplying two numbers has at most `a_bits + b_bits` bits. pub fn check_mult_size(a_bits: u64, b_bits: u64, tracker: &impl ResourceTracker) -> Result<(), ResourceError> { check_estimated_size(estimate_bits_to_bytes(a_bits.saturating_add(b_bits)), tracker) } /// Pre-checks that a left shift won't exceed resource limits. /// /// The result of `value << shift` has approximately `value_bits + shift` bits. /// For zero values the result is always zero, so the check is skipped. pub fn check_lshift_size( value_bits: u64, shift_amount: u64, tracker: &impl ResourceTracker, ) -> Result<(), ResourceError> { if value_bits == 0 { return Ok(()); } check_estimated_size(estimate_bits_to_bytes(value_bits.saturating_add(shift_amount)), tracker) } /// Pre-checks that an integer division overflow promotion won't exceed resource limits. /// /// Division results are bounded by the dividend size, but we still check for consistency /// with other BigInt promotion paths. pub fn check_div_size(dividend_bits: u64, tracker: &impl ResourceTracker) -> Result<(), ResourceError> { check_estimated_size(estimate_bits_to_bytes(dividend_bits), tracker) } /// Pre-checks that a string/bytes replace won't exceed resource limits before allocating. /// /// This prevents DoS via expressions like `('a' * 1000).replace('a', 'b' * 10_000_000)` /// where a small tracked input is amplified into a huge untracked Rust `String`/`Vec` /// by `String::replace()` before `allocate_string()` can check the result. /// /// The upper bound on result size is: if `old` is non-empty, at most `input_len / old_len` /// replacements can occur, each producing `new_len` bytes instead of `old_len`. When `count` /// is specified, replacements are capped to that value. pub fn check_replace_size( input_len: usize, old_len: usize, new_len: usize, count: i64, tracker: &impl ResourceTracker, ) -> Result<(), ResourceError> { // Empty pattern (old_len == 0): inserts before each element + after the last = input_len + 1 let max_replacements = input_len .checked_div(old_len) .unwrap_or_else(|| input_len.saturating_add(1)); let replacements = if count < 0 { max_replacements } else { max_replacements.min(usize::try_from(count).unwrap_or(usize::MAX)) }; // Result = input_len - (replacements * old_len) + (replacements * new_len) let removed = replacements.saturating_mul(old_len); let added = replacements.saturating_mul(new_len); let estimated = input_len.saturating_sub(removed).saturating_add(added); check_estimated_size(estimated, tracker) } /// Checks an estimated result size against the resource tracker. /// /// Only calls the tracker when the estimate exceeds `LARGE_RESULT_THRESHOLD` /// to avoid overhead on small operations. pub(crate) fn check_estimated_size( estimated_bytes: usize, tracker: &impl ResourceTracker, ) -> Result<(), ResourceError> { if estimated_bytes > LARGE_RESULT_THRESHOLD { tracker.check_large_result(estimated_bytes)?; } Ok(()) } /// Converts an estimated bit count to bytes, saturating to `usize::MAX` on overflow. /// /// Overflow means the result is astronomically large, so saturating ensures /// the resource limit check always triggers rather than being silently skipped. fn estimate_bits_to_bytes(bits: u64) -> usize { usize::try_from(bits.saturating_add(7) / 8).unwrap_or(usize::MAX) } /// Error returned when a resource limit is exceeded during execution. /// /// This allows the sandbox to enforce strict limits on allocation count, /// execution time, and memory usage. #[derive(Debug, Clone)] pub enum ResourceError { /// Maximum number of allocations exceeded. Allocation { limit: usize, count: usize }, /// Maximum execution time exceeded. Time { limit: Duration, elapsed: Duration }, /// Maximum memory usage exceeded. Memory { limit: usize, used: usize }, /// Maximum recursion depth exceeded. Recursion { limit: usize, depth: usize }, /// Any other error, e.g. when propagating a python exception Exception(MontyException), } impl fmt::Display for ResourceError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Allocation { limit, count } => { write!(f, "allocation limit exceeded: {count} > {limit}") } Self::Time { limit, elapsed } => { write!(f, "time limit exceeded: {elapsed:?} > {limit:?}") } Self::Memory { limit, used } => { write!(f, "memory limit exceeded: {used} bytes > {limit} bytes") } Self::Recursion { .. } => { write!(f, "maximum recursion depth exceeded") } Self::Exception(exc) => { write!(f, "{exc}") } } } } impl std::error::Error for ResourceError {} impl ResourceError { /// Converts this resource error to a Python exception with optional stack frame. /// /// Maps resource error types to Python exception types: /// - `Allocation` → `MemoryError` /// - `Memory` → `MemoryError` /// - `Time` → `TimeoutError` /// - `Recursion` → `RecursionError` #[must_use] pub(crate) fn into_exception(self, frame: Option) -> ExceptionRaise { let (exc_type, msg) = match self { Self::Allocation { limit, count } => ( ExcType::MemoryError, Some(format!("allocation limit exceeded: {count} > {limit}")), ), Self::Memory { limit, used } => ( ExcType::MemoryError, Some(format!("memory limit exceeded: {used} bytes > {limit} bytes")), ), Self::Time { limit, elapsed } => ( ExcType::TimeoutError, Some(format!("time limit exceeded: {elapsed:?} > {limit:?}")), ), Self::Recursion { .. } => ( ExcType::RecursionError, Some("maximum recursion depth exceeded".to_string()), ), Self::Exception(exc) => (exc.exc_type(), exc.into_message()), }; let exc = SimpleException::new(exc_type, msg); match frame { Some(f) => exc.with_frame(f), None => exc.into(), } } } impl From for RunError { fn from(err: ResourceError) -> Self { // RecursionError is catchable in CPython, so it must be catchable here too. // Other resource errors (memory, time, allocation) remain uncatchable to prevent // untrusted code from suppressing resource limit violations. if matches!(err, ResourceError::Recursion { .. }) { Self::Exc(err.into_exception(None)) } else { Self::UncatchableExc(err.into_exception(None)) } } } /// Trait for tracking resource usage and scheduling garbage collection. /// /// Implementations can enforce limits on allocations, time, and memory, /// as well as schedule periodic garbage collection. /// /// All implementations should eventually trigger garbage collection to handle /// reference cycles. The `should_gc` method controls *frequency*, not whether /// GC runs at all. pub trait ResourceTracker: fmt::Debug { /// Called before each heap allocation. /// /// Returns `Ok(())` if the allocation should proceed, or `Err(ResourceError)` /// if a limit would be exceeded. /// /// # Arguments /// * `size` - Approximate size in bytes of the allocation fn on_allocate(&mut self, get_size: impl FnOnce() -> usize) -> Result<(), ResourceError>; /// Called when memory is freed (during dec_ref or garbage collection). /// /// # Arguments /// * `size` - Size in bytes of the freed allocation fn on_free(&mut self, get_size: impl FnOnce() -> usize); /// Called periodically (at statement boundaries) to check time limits. /// /// Returns `Ok(())` if within time limit, or `Err(ResourceError::Time)` /// if the limit is exceeded. /// /// Takes `&self` rather than `&mut self` because checking elapsed time is a /// read-only operation. This allows time checks in contexts that only have /// an immutable heap reference, such as `py_repr_fmt`. fn check_time(&self) -> Result<(), ResourceError>; /// Called before pushing a new call frame to check recursion depth. /// /// Returns `Ok(())` if within recursion limit, or `Err(ResourceError::Recursion)` /// if the limit would be exceeded. /// /// # Arguments /// * `current_depth` - Current call stack depth (before the new frame is pushed) fn check_recursion_depth(&self, current_depth: usize) -> Result<(), ResourceError>; /// Called before operations that may produce large results (>100KB). /// /// This allows pre-emptive rejection of operations like `2 ** 10_000_000` /// before the memory is actually allocated. The check only happens for /// estimated result sizes above `LARGE_RESULT_THRESHOLD` to avoid overhead /// on small operations. /// /// # Arguments /// * `estimated_bytes` - Approximate size of the result in bytes /// /// Returns `Ok(())` to allow the operation, or `Err(ResourceError)` to reject. fn check_large_result(&self, estimated_bytes: usize) -> Result<(), ResourceError>; } /// A resource tracker that imposes no limits except default recursion limit. /// /// Recursion limit is set to the cpython default of 1000. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct NoLimitTracker; impl ResourceTracker for NoLimitTracker { #[inline] fn on_allocate(&mut self, _: impl FnOnce() -> usize) -> Result<(), ResourceError> { Ok(()) } #[inline] fn on_free(&mut self, _: impl FnOnce() -> usize) {} #[inline] fn check_time(&self) -> Result<(), ResourceError> { Ok(()) } /// Set the recursion limit to 1000. /// /// The high limit here may cause stack overflow errors in debug mode, but do not those errors should /// not occur with release builds. #[inline] fn check_recursion_depth(&self, current_depth: usize) -> Result<(), ResourceError> { const DEFAULT_RECURSION_LIMIT: usize = 1000; if current_depth >= DEFAULT_RECURSION_LIMIT { Err(ResourceError::Recursion { limit: DEFAULT_RECURSION_LIMIT, depth: current_depth + 1, }) } else { Ok(()) } } #[inline] fn check_large_result(&self, _estimated_bytes: usize) -> Result<(), ResourceError> { // No limit - always allow operations regardless of result size Ok(()) } } /// Configuration for resource limits. /// /// All limits are optional - set to `None` to disable a specific limit. /// Use `ResourceLimits::default()` for no limits, or build custom limits /// with the builder pattern. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct ResourceLimits { /// Maximum number of heap allocations allowed. pub max_allocations: Option, /// Maximum execution time. pub max_duration: Option, /// Maximum heap memory in bytes (approximate). pub max_memory: Option, /// Run garbage collection every N allocations. pub gc_interval: Option, /// Maximum recursion depth (function call stack depth). pub max_recursion_depth: Option, } /// Recommended maximum recursion depth if not otherwise specified. pub const DEFAULT_MAX_RECURSION_DEPTH: usize = 1000; impl ResourceLimits { /// Creates a new ResourceLimits with all limits disabled, except max recursion which is set to 1000. #[must_use] pub fn new() -> Self { Self { max_recursion_depth: Some(1000), ..Default::default() } } /// Sets the maximum number of allocations. #[must_use] pub fn max_allocations(mut self, limit: usize) -> Self { self.max_allocations = Some(limit); self } /// Sets the maximum execution duration. #[must_use] pub fn max_duration(mut self, limit: Duration) -> Self { self.max_duration = Some(limit); self } /// Sets the maximum memory usage in bytes. #[must_use] pub fn max_memory(mut self, limit: usize) -> Self { self.max_memory = Some(limit); self } /// Sets the garbage collection interval (run GC every N allocations). #[must_use] pub fn gc_interval(mut self, interval: usize) -> Self { self.gc_interval = Some(interval); self } /// Sets the maximum recursion depth (function call stack depth). #[must_use] pub fn max_recursion_depth(mut self, limit: Option) -> Self { self.max_recursion_depth = limit; self } } /// How often to actually check `Instant::elapsed()` in `check_time`. /// /// Calling `Instant::elapsed()` on every `check_time` invocation adds measurable /// overhead in tight loops (the VM calls `check_time` on every instruction). /// By only checking every N calls, we reduce this overhead while still catching /// timeouts promptly. const TIME_CHECK_INTERVAL: u16 = 10; /// A resource tracker that enforces configurable limits. /// /// Tracks allocation count, memory usage, and execution time, returning /// errors when limits are exceeded. Also schedules garbage collection /// at configurable intervals. /// /// When serialized/deserialized, the `start_time` is reset to `Instant::now()`. /// This means time limits restart from zero after deserialization. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct LimitedTracker { limits: ResourceLimits, /// When execution started (for time limit checking). /// Reset to `Instant::now()` on deserialization. #[serde(skip, default = "Instant::now")] start_time: Instant, /// Total number of allocations made. allocation_count: usize, /// Current approximate memory usage in bytes. current_memory: usize, /// Counter for rate-limiting `Instant::elapsed()` calls in `check_time`. /// /// Uses `AtomicU16` for interior mutability since `check_time` takes `&self` /// and `LimitedTracker` must be `Sync` (it ends up inside PyO3 pyclass types). check_counter: AtomicU16, } impl LimitedTracker { /// Creates a new LimitedTracker with the given limits. /// /// The start time is recorded when the tracker is created, so create /// it immediately before starting execution. #[must_use] pub fn new(limits: ResourceLimits) -> Self { Self { limits, start_time: Instant::now(), allocation_count: 0, current_memory: 0, check_counter: AtomicU16::new(0), } } /// Returns the current allocation count. #[must_use] pub fn allocation_count(&self) -> usize { self.allocation_count } /// Returns the current approximate memory usage. #[must_use] pub fn current_memory(&self) -> usize { self.current_memory } /// Returns the elapsed time since tracker creation. #[must_use] pub fn elapsed(&self) -> Duration { self.start_time.elapsed() } /// Sets the maximum execution duration and resets the start time to now. /// /// This is useful when resuming execution after an external function call /// where you want to enforce a different (typically shorter) time limit /// for the resumed phase without counting the time spent in the host. pub fn set_max_duration(&mut self, duration: Duration) { self.limits.max_duration = Some(duration); self.start_time = Instant::now(); } } impl ResourceTracker for LimitedTracker { fn on_allocate(&mut self, get_size: impl FnOnce() -> usize) -> Result<(), ResourceError> { // Check allocation count limit if let Some(max) = self.limits.max_allocations && self.allocation_count >= max { return Err(ResourceError::Allocation { limit: max, count: self.allocation_count + 1, }); } let size = get_size(); // Check memory limit if let Some(max) = self.limits.max_memory { let new_memory = self.current_memory + size; if new_memory > max { return Err(ResourceError::Memory { limit: max, used: new_memory, }); } } // Update tracking state self.allocation_count += 1; self.current_memory += size; Ok(()) } fn on_free(&mut self, get_size: impl FnOnce() -> usize) { self.current_memory = self.current_memory.saturating_sub(get_size()); } fn check_time(&self) -> Result<(), ResourceError> { if let Some(max) = self.limits.max_duration { let count = self.check_counter.fetch_add(1, Ordering::Relaxed).wrapping_add(1); if count.is_multiple_of(TIME_CHECK_INTERVAL) { // Only call Instant::elapsed() every TIME_CHECK_INTERVAL calls let elapsed = self.start_time.elapsed(); if elapsed > max { // Reset counter so the very next check_time call also triggers // an elapsed check. This is important because some callers // (e.g. repr_sequence_fmt) catch the error and return normally, // and we need the VM loop's next check_time to re-detect timeout. self.check_counter .store(TIME_CHECK_INTERVAL.wrapping_sub(1), Ordering::Relaxed); return Err(ResourceError::Time { limit: max, elapsed }); } } } Ok(()) } fn check_recursion_depth(&self, current_depth: usize) -> Result<(), ResourceError> { if let Some(max) = self.limits.max_recursion_depth { // current_depth is before push, so new depth would be current_depth + 1 if current_depth >= max { return Err(ResourceError::Recursion { limit: max, depth: current_depth + 1, }); } } Ok(()) } fn check_large_result(&self, estimated_bytes: usize) -> Result<(), ResourceError> { // Check if this would exceed memory limit if let Some(max) = self.limits.max_memory { let new_memory = self.current_memory.saturating_add(estimated_bytes); if new_memory > max { return Err(ResourceError::Memory { limit: max, used: new_memory, }); } } Ok(()) } } ================================================ FILE: crates/monty/src/run.rs ================================================ //! Public interface for running Monty code. use std::sync::atomic::{AtomicUsize, Ordering}; use crate::{ ExcType, MontyException, bytecode::{Code, Compiler, FrameExit, VM}, exception_private::RunResult, heap::{DropWithHeap, Heap}, intern::Interns, io::PrintWriter, object::MontyObject, parse::parse, prepare::prepare, resource::{NoLimitTracker, ResourceTracker}, run_progress::{RunProgress, build_run_progress, check_snapshot_from_converted, convert_frame_exit}, value::Value, }; /// Primary interface for running Monty code. /// /// `MontyRun` supports two execution modes: /// - **Simple execution**: Use `run()` or `run_no_limits()` to run code to completion /// - **Iterative execution**: Use `start()` to start execution which will pause at external function calls and /// can be resumed later /// /// # Example /// ``` /// use monty::{MontyRun, MontyObject}; /// /// let runner = MontyRun::new("x + 1".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); /// let result = runner.run_no_limits(vec![MontyObject::Int(41)]).unwrap(); /// assert_eq!(result, MontyObject::Int(42)); /// ``` #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct MontyRun { /// The underlying executor containing parsed AST and interns. executor: Executor, } impl MontyRun { /// Creates a new run snapshot by parsing the given code. /// /// This only parses and prepares the code - no heap or namespaces are created yet. /// Call `run_snapshot()` with inputs to start execution. /// /// # Arguments /// * `code` - The Python code to execute /// * `script_name` - The script name for error messages /// * `input_names` - Names of input variables /// /// # Errors /// Returns `MontyException` if the code cannot be parsed. pub fn new(code: String, script_name: &str, input_names: Vec) -> Result { Executor::new(code, script_name, input_names).map(|executor| Self { executor }) } /// Returns the code that was parsed to create this snapshot. #[must_use] pub fn code(&self) -> &str { &self.executor.code } /// Executes the code and returns both the result and reference count data, used for testing only. #[cfg(feature = "ref-count-return")] pub fn run_ref_counts(&self, inputs: Vec) -> Result { self.executor.run_ref_counts(inputs) } /// Executes the code to completion assuming not external functions or snapshotting. /// /// This is marginally faster than running with snapshotting enabled since we don't need /// to track the position in code, but does not allow calling of external functions. /// /// # Arguments /// * `inputs` - Values to fill the first N slots of the namespace /// * `resource_tracker` - Custom resource tracker implementation /// * `print` - print output writer pub fn run( &self, inputs: Vec, resource_tracker: impl ResourceTracker, print: PrintWriter<'_>, ) -> Result { self.executor.run(inputs, resource_tracker, print) } /// Executes the code to completion with no resource limits, printing to stdout/stderr. pub fn run_no_limits(&self, inputs: Vec) -> Result { self.run(inputs, NoLimitTracker, PrintWriter::Stdout) } /// Serializes the runner to a binary format. /// /// The serialized data can be stored and later restored with `load()`. /// This allows caching parsed code to avoid re-parsing on subsequent runs. /// /// # Errors /// Returns an error if serialization fails. pub fn dump(&self) -> Result, postcard::Error> { postcard::to_allocvec(self) } /// Deserializes a runner from binary format. /// /// # Arguments /// * `bytes` - The serialized runner data from `dump()` /// /// # Errors /// Returns an error if deserialization fails. pub fn load(bytes: &[u8]) -> Result { postcard::from_bytes(bytes) } /// Starts execution with the given inputs and resource tracker, consuming self. /// /// Creates the heap and namespaces, then begins execution. /// /// For iterative execution, `start()` consumes self and returns a `RunProgress`: /// - `RunProgress::FunctionCall(call)` - external function call, call `call.resume(return_value)` to resume /// - `RunProgress::Complete(value)` - execution finished /// /// This enables snapshotting execution state and returning control to the host /// application during long-running computations. /// /// # Arguments /// * `inputs` - Initial input values (must match length of `input_names` from `new()`) /// * `resource_tracker` - Resource tracker for the execution /// * `print` - Writer for print output /// /// # Errors /// Returns `MontyException` if: /// - The number of inputs doesn't match the expected count /// - An input value is invalid (e.g., `MontyObject::Repr`) /// - A runtime error occurs during execution /// /// # Panics /// This method should not panic under normal operation. Internal assertions /// may panic if the VM reaches an inconsistent state (indicating a bug). pub fn start( self, inputs: Vec, resource_tracker: T, print: PrintWriter<'_>, ) -> Result, MontyException> { let executor = self.executor; // Create heap and VM with empty globals, then populate inputs with VM alive let mut heap = Heap::new(executor.namespace_size, resource_tracker); let globals = executor.empty_globals(); let mut vm = VM::new(globals, &mut heap, &executor.interns, print); executor.populate_inputs(inputs, &mut vm)?; // Start execution let vm_result = vm.run_module(&executor.module_code); // Three-phase conversion: convert while VM alive, then snapshot, then build progress let converted = convert_frame_exit(vm_result, &mut vm); let vm_state = check_snapshot_from_converted(&converted, vm); build_run_progress(converted, vm_state, executor, heap) } } /// Lower level interface to parse code and run it to completion. /// /// This is an internal type used by [`MontyRun`]. It stores the compiled bytecode and source code /// for error reporting. Also used by `run_progress` and `repl` modules. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Executor { /// Number of slots needed in the global namespace. pub(crate) namespace_size: usize, /// Maps variable names to their indices in the namespace. Used for ref-count testing. #[cfg(feature = "ref-count-return")] name_map: ahash::AHashMap, /// Compiled bytecode for the module. pub(crate) module_code: Code, /// Interned strings used for looking up names and filenames during execution. pub(crate) interns: Interns, /// Source code for error reporting (extracting preview lines for tracebacks). pub(crate) code: String, /// Estimated heap capacity for pre-allocation on subsequent runs. /// Uses AtomicUsize for thread-safety (required by PyO3's Sync bound). heap_capacity: AtomicUsize, } impl Clone for Executor { fn clone(&self) -> Self { Self { namespace_size: self.namespace_size, #[cfg(feature = "ref-count-return")] name_map: self.name_map.clone(), module_code: self.module_code.clone(), interns: self.interns.clone(), code: self.code.clone(), heap_capacity: AtomicUsize::new(self.heap_capacity.load(Ordering::Relaxed)), } } } impl Executor { /// Creates a new executor with the given code, filename, and input names. pub(crate) fn new(code: String, script_name: &str, input_names: Vec) -> Result { let parse_result = parse(&code, script_name).map_err(|e| e.into_python_exc(script_name, &code))?; let prepared = prepare(parse_result, input_names).map_err(|e| e.into_python_exc(script_name, &code))?; // Create interns with empty functions (functions will be set after compilation) let mut interns = Interns::new(prepared.interner, Vec::new()); // Compile the module to bytecode, which also compiles all nested functions let namespace_size_u16 = u16::try_from(prepared.namespace_size).expect("module namespace size exceeds u16"); let compile_result = Compiler::compile_module(&prepared.nodes, &interns, namespace_size_u16) .map_err(|e| e.into_python_exc(script_name, &code))?; // Set the compiled functions in the interns interns.set_functions(compile_result.functions); Ok(Self { namespace_size: prepared.namespace_size, #[cfg(feature = "ref-count-return")] name_map: prepared.name_map, module_code: compile_result.code, interns, code, heap_capacity: AtomicUsize::new(prepared.namespace_size), }) } /// Executes the code with a custom resource tracker. /// /// This provides full control over resource tracking and garbage collection /// scheduling. The tracker is called on each allocation and periodically /// during execution to check time limits and trigger GC. /// /// # Arguments /// * `inputs` - Values to fill the first N slots of the namespace /// * `resource_tracker` - Custom resource tracker implementation /// * `print` - Print output writer fn run( &self, inputs: Vec, resource_tracker: impl ResourceTracker, print: PrintWriter<'_>, ) -> Result { let heap_capacity = self.heap_capacity.load(Ordering::Relaxed); let mut heap = Heap::new(heap_capacity, resource_tracker); let globals = self.empty_globals(); // Create VM first, then populate inputs with VM alive let mut vm = VM::new(globals, &mut heap, &self.interns, print); self.populate_inputs(inputs, &mut vm)?; let mut frame_exit_result = vm.run_module(&self.module_code); // Handle NameLookup and ExternalCall exits by raising NameError through the VM // so that traceback information is properly captured. In the non-iterative path, // there's no host to resolve names or external functions, so these become NameErrors. loop { match frame_exit_result { Ok(FrameExit::NameLookup { name_id, .. }) => { let name = self.interns.get_str(name_id); let err = ExcType::name_error(name); frame_exit_result = vm.resume_with_exception(err.into()); } Ok(FrameExit::ExternalCall { function_name, args, name_load_ip, .. }) => { // In standard execution, an ExtFunction from LoadGlobalCallable/ // LoadLocalCallable means the name was undefined — raise NameError. // Restore the frame IP to the load instruction so the traceback // points to the name reference, not the call expression. if let Some(load_ip) = name_load_ip { vm.set_instruction_ip(load_ip); } let name = function_name.as_str(&self.interns); args.drop_with_heap(vm.heap); let err = ExcType::name_error(name); frame_exit_result = vm.resume_with_exception(err.into()); } _ => break, } } // Convert output while VM is still alive let result = frame_exit_to_object(frame_exit_result, &mut vm); // Clean up VM state before it goes out of scope vm.cleanup(); if heap.size() > heap_capacity { self.heap_capacity.store(heap.size(), Ordering::Relaxed); } result.map_err(|e| e.into_python_exception(&self.interns, &self.code)) } /// Executes the code and returns both the result and reference count data, used for testing only. /// /// This is used for testing reference counting behavior. Returns: /// - The execution result (`Exit`) /// - Reference count data as a tuple of: /// - A map from variable names to their reference counts (only for heap-allocated values) /// - The number of unique heap value IDs referenced by variables /// - The total number of live heap values /// /// For strict matching validation, compare unique_refs_count with heap_entry_count. /// If they're equal, all heap values are accounted for by named variables. /// /// Only available when the `ref-count-return` feature is enabled. #[cfg(feature = "ref-count-return")] fn run_ref_counts(&self, inputs: Vec) -> Result { use std::collections::HashSet; let mut heap = Heap::new(self.namespace_size, NoLimitTracker); let globals = self.empty_globals(); // Create VM, populate inputs, and run let mut vm = VM::new(globals, &mut heap, &self.interns, PrintWriter::Stdout); self.populate_inputs(inputs, &mut vm)?; let frame_exit_result = vm.run_module(&self.module_code); // Take globals out of the VM so we can inspect them, but keep VM alive // for heap access and later conversion. let globals = vm.take_globals(); // Read refcounts BEFORE converting the return value, because // `frame_exit_to_object` drops the return value (decrementing its refcount). let mut counts = ahash::AHashMap::new(); let mut unique_ids = HashSet::new(); for (name, &namespace_id) in &self.name_map { let idx = namespace_id.index(); if idx < globals.len() && let Value::Ref(id) = &globals[idx] { counts.insert(name.clone(), vm.heap.get_refcount(*id)); unique_ids.insert(*id); } } let unique_refs = unique_ids.len(); let heap_count = vm.heap.entry_count(); // Convert return value while VM is still alive (needs access to interns) let py_object = frame_exit_to_object(frame_exit_result, &mut vm) .map_err(|e| e.into_python_exception(&self.interns, &self.code))?; vm.cleanup(); // Drop globals with proper ref counting for value in globals { value.drop_with_heap(&mut heap); } let allocations_since_gc = heap.get_allocations_since_gc(); Ok(RefCountOutput { py_object, counts, unique_refs, heap_count, allocations_since_gc, }) } /// Creates an empty globals vector with all slots set to `Undefined`. /// /// Used to initialize global storage before input population. The VM is created /// with these empty globals, then [`populate_inputs`](Self::populate_inputs) fills /// the input slots while the VM is alive. pub(crate) fn empty_globals(&self) -> Vec { (0..self.namespace_size).map(|_| Value::Undefined).collect() } /// Converts `MontyObject` inputs to `Value`s and writes them into the VM's globals. /// /// This runs with the VM alive so that `to_value` has access to the full VM context. /// On error partway through, the VM's `cleanup()` (via drop) will drain globals and /// properly decrement refcounts for any already-converted values. pub(crate) fn populate_inputs( &self, inputs: Vec, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result<(), MontyException> { if inputs.len() > self.namespace_size { return Err(MontyException::runtime_error("too many inputs for namespace")); } for (i, input) in inputs.into_iter().enumerate() { let value = input .to_value(vm) .map_err(|e| MontyException::runtime_error(format!("invalid input type: {e}")))?; vm.globals[i] = value; } Ok(()) } } /// Converts module/frame exit results into plain `MontyObject` outputs. /// /// Used by non-iterative execution paths where suspendable outcomes (external calls, /// name lookups) are not supported and should produce errors. fn frame_exit_to_object( frame_exit_result: RunResult, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { match frame_exit_result? { FrameExit::Return(return_value) => Ok(MontyObject::new(return_value, vm)), FrameExit::ExternalCall { function_name, args, .. } => { args.drop_with_heap(vm.heap); let function_name = function_name.as_str(vm.interns); Err(ExcType::not_implemented(format!( "External function '{function_name}' not implemented with standard execution" )) .into()) } FrameExit::OsCall { function, args, .. } => { args.drop_with_heap(vm.heap); Err(ExcType::not_implemented(format!( "OS function '{function}' not implemented with standard execution" )) .into()) } FrameExit::MethodCall { method_name, args, .. } => { args.drop_with_heap(vm.heap); let name = method_name.as_str(vm.interns); Err( ExcType::not_implemented(format!("Method call '{name}' not implemented with standard execution")) .into(), ) } FrameExit::ResolveFutures(_) => { Err(ExcType::not_implemented("async futures not supported by standard execution.").into()) } FrameExit::NameLookup { name_id, .. } => { let name = vm.interns.get_str(name_id); Err(ExcType::name_error(name).into()) } } } /// Output from `run_ref_counts` containing reference count and heap information. /// /// Used for testing GC behavior and reference counting correctness. #[cfg(feature = "ref-count-return")] #[derive(Debug)] pub struct RefCountOutput { pub py_object: MontyObject, pub counts: ahash::AHashMap, pub unique_refs: usize, pub heap_count: usize, /// Number of GC-tracked allocations since the last garbage collection. /// /// If GC ran during execution, this will be lower than the total number of /// allocations. Compare this against expected allocation count to verify GC ran. pub allocations_since_gc: u32, } ================================================ FILE: crates/monty/src/run_progress.rs ================================================ //! This module defines the public types returned by [`MontyRun::start()`](crate::MontyRun::start) //! and their resume methods. Each variant of [`RunProgress`] wraps a dedicated struct //! (`FunctionCall`, `OsCall`, `NameLookup`, `ResolveFutures`) that carries only the //! fields and resume methods relevant to that suspension point. //! //! The internal [`Snapshot`] type is `pub(crate)` — callers interact exclusively with //! the per-variant structs. use std::mem; use crate::{ ExcType, MontyException, asyncio::CallId, bytecode::{FrameExit, VM, VMSnapshot}, exception_private::{RunError, RunResult}, heap::Heap, io::PrintWriter, object::MontyObject, os::OsFunction, resource::ResourceTracker, run::Executor, value::Value, }; // --------------------------------------------------------------------------- // RunProgress enum // --------------------------------------------------------------------------- /// Result of a single step of iterative execution. /// /// Each variant wraps a dedicated struct that owns the execution state and /// exposes only the resume methods relevant to that suspension reason. /// /// # Type Parameters /// * `T` — Resource tracker implementation (e.g. `NoLimitTracker` or `LimitedTracker`). /// /// Serialization requires `T: Serialize + Deserialize`. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub enum RunProgress { /// Execution paused at an external function call or dataclass method call. FunctionCall(FunctionCall), /// Execution paused for an OS-level operation (filesystem, network, etc.). OsCall(OsCall), /// All async tasks are blocked waiting for external futures to resolve. ResolveFutures(ResolveFutures), /// Execution paused for an unresolved name lookup. NameLookup(NameLookup), /// Execution completed with a final result. Complete(MontyObject), } impl RunProgress { /// Consumes the progress and returns the `FunctionCall` struct if this is a function call. #[must_use] pub fn into_function_call(self) -> Option> { match self { Self::FunctionCall(call) => Some(call), _ => None, } } /// Consumes the progress and returns the `OsCall` struct if this is an OS call. #[must_use] pub fn into_os_call(self) -> Option> { match self { Self::OsCall(call) => Some(call), _ => None, } } /// Consumes the progress and returns the final value if execution completed. #[must_use] pub fn into_complete(self) -> Option { match self { Self::Complete(value) => Some(value), _ => None, } } /// Consumes the progress and returns the `ResolveFutures` struct. #[must_use] pub fn into_resolve_futures(self) -> Option> { match self { Self::ResolveFutures(state) => Some(state), _ => None, } } /// Consumes the progress and returns the `NameLookup` struct. #[must_use] pub fn into_name_lookup(self) -> Option> { match self { Self::NameLookup(lookup) => Some(lookup), _ => None, } } } impl RunProgress { /// Serializes the execution state to a binary format. /// /// # Errors /// Returns an error if serialization fails. pub fn dump(&self) -> Result, postcard::Error> { postcard::to_allocvec(self) } } impl RunProgress { /// Deserializes execution state from binary format. /// /// # Errors /// Returns an error if deserialization fails. pub fn load(bytes: &[u8]) -> Result { postcard::from_bytes(bytes) } } // --------------------------------------------------------------------------- // FunctionCall // --------------------------------------------------------------------------- /// Execution paused at an external function call or dataclass method call. /// /// The host can choose how to handle this: /// - **Sync resolution**: Call `resume(return_value, print)` to push the result and continue. /// - **Async resolution**: Call `resume_pending(print)` to push an `ExternalFuture` and continue. /// /// When using async resolution, the code continues and may `await` the future later. /// If the future isn't resolved when awaited, execution yields with `ResolveFutures`. /// /// When `method_call` is true, this represents a dataclass method call where the first /// positional arg is the dataclass instance (`self`). #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct FunctionCall { /// The name of the function or method being called. pub function_name: String, /// The positional arguments passed to the function. pub args: Vec, /// The keyword arguments passed to the function (key, value pairs). pub kwargs: Vec<(MontyObject, MontyObject)>, /// Unique identifier for this call (used for async correlation). pub call_id: u32, /// Whether this is a dataclass method call (first arg is `self`). pub method_call: bool, /// Internal execution snapshot. snapshot: Snapshot, } impl FunctionCall { /// Creates a new `FunctionCall` from its parts. fn new( function_name: String, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, method_call: bool, snapshot: Snapshot, ) -> Self { Self { function_name, args, kwargs, call_id, method_call, snapshot, } } /// Returns a mutable reference to the resource tracker. /// /// This allows modifying resource limits between execution phases, /// e.g. setting a time limit before resuming after an external function call. pub fn tracker_mut(&mut self) -> &mut T { self.snapshot.heap.tracker_mut() } /// Resumes execution with the return value or exception from the external function. /// /// Consumes self and returns the next execution progress. /// /// # Arguments /// * `result` — The return value, exception, or pending future marker. /// * `print` — Writer for `print()` output. pub fn resume( self, result: impl Into, print: PrintWriter<'_>, ) -> Result, MontyException> { self.snapshot.run(result, print) } /// Resumes execution by pushing an `ExternalFuture` instead of a concrete value. /// /// This is the async resolution pattern: the host continues execution with a /// pending future. The code can then `await` this future later. If the code /// awaits the future before it's resolved, execution will yield with /// `RunProgress::ResolveFutures`. /// /// Uses `self.call_id` internally — no need to pass it again. /// /// # Arguments /// * `print` — Writer for print output. pub fn resume_pending(self, print: PrintWriter<'_>) -> Result, MontyException> { self.snapshot.run(ExtFunctionResult::Future(self.call_id), print) } } // --------------------------------------------------------------------------- // OsCall // --------------------------------------------------------------------------- /// Execution paused for an OS-level operation. /// /// The host should execute the OS operation (filesystem, network, etc.) and /// call `resume(return_value, print)` to provide the result and continue. /// /// This enables sandboxed execution where the interpreter never directly performs I/O. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct OsCall { /// The OS function to execute. pub function: OsFunction, /// The positional arguments for the OS function. pub args: Vec, /// The keyword arguments passed to the function (key, value pairs). pub kwargs: Vec<(MontyObject, MontyObject)>, /// Unique identifier for this call (used for async correlation). pub call_id: u32, /// Internal execution snapshot. snapshot: Snapshot, } impl OsCall { /// Creates a new `OsCall` from its parts. fn new( function: OsFunction, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, snapshot: Snapshot, ) -> Self { Self { function, args, kwargs, call_id, snapshot, } } /// Resumes execution with the OS call result. /// /// # Arguments /// * `result` — The return value or exception from the OS operation. /// * `print` — Writer for `print()` output. pub fn resume( self, result: impl Into, print: PrintWriter<'_>, ) -> Result, MontyException> { self.snapshot.run(result, print) } } // --------------------------------------------------------------------------- // NameLookup // --------------------------------------------------------------------------- /// Execution paused for an unresolved name lookup. /// /// The host should check if the name corresponds to a known external function or /// value. Call `resume(result, print)` with `NameLookupResult::Value(obj)` to /// cache it in the namespace and continue, or `NameLookupResult::Undefined` to /// raise `NameError`. /// /// The namespace slot and scope are managed internally — the host only needs to /// provide the name resolution result. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct NameLookup { /// The name being looked up. pub name: String, /// The namespace slot where the resolved value should be cached. namespace_slot: u16, /// Whether this is a global slot or a local/function slot. is_global: bool, /// Internal execution snapshot. snapshot: Snapshot, } impl NameLookup { /// Creates a new `NameLookup` from its parts. fn new(name: String, namespace_slot: u16, is_global: bool, snapshot: Snapshot) -> Self { Self { name, namespace_slot, is_global, snapshot, } } /// Resumes execution after name resolution. /// /// Caches the resolved value in the appropriate slot (globals or stack) /// before restoring the VM, then either pushes the value or raises `NameError`. /// /// # Arguments /// * `result` — The resolved value or `Undefined`. /// * `print` — Writer for print output. pub fn resume( mut self, result: impl Into, print: PrintWriter<'_>, ) -> Result, MontyException> { // Restore the VM first, then convert inside its lifetime let mut vm = VM::restore( self.snapshot.vm_state, &self.snapshot.executor.module_code, &mut self.snapshot.heap, &self.snapshot.executor.interns, print, ); // Resolve the name lookup result with the VM alive let vm_result = match result.into() { NameLookupResult::Value(obj) => { let value = obj.to_value(&mut vm).map_err(|e| { vm.cleanup(); MontyException::runtime_error(format!("invalid name lookup result: {e}")) })?; // Cache the resolved value in the appropriate slot let slot = self.namespace_slot as usize; if self.is_global { let cloned = value.clone_with_heap(vm.heap); let old = mem::replace(&mut vm.globals[slot], cloned); old.drop_with_heap(vm.heap); } else { let stack_base = vm.current_stack_base(); let cloned = value.clone_with_heap(vm.heap); let old = mem::replace(&mut vm.stack[stack_base + slot], cloned); old.drop_with_heap(vm.heap); } vm.push(value); vm.run() } NameLookupResult::Undefined => { let err = ExcType::name_error(&self.name); vm.resume_with_exception(err.into()) } }; // Three-phase: convert while VM alive, snapshot, build progress let converted = convert_frame_exit(vm_result, &mut vm); let vm_state = check_snapshot_from_converted(&converted, vm); build_run_progress(converted, vm_state, self.snapshot.executor, self.snapshot.heap) } } // --------------------------------------------------------------------------- // ResolveFutures // --------------------------------------------------------------------------- /// Execution state paused while waiting for external future results. /// /// Supports incremental resolution — you can provide partial results and Monty /// will continue running until all tasks are blocked again. /// /// Use `pending_call_ids()` to see which calls are pending, then call /// `resume(results, print)` with some or all of the results. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub struct ResolveFutures { /// The executor containing compiled code and interns. executor: Executor, /// The VM state containing stack, frames, globals, and exception state. vm_state: VMSnapshot, /// The heap containing all allocated objects. heap: Heap, /// The pending call_ids that this snapshot is waiting on. pending_call_ids: Vec, } impl ResolveFutures { /// Creates a new `ResolveFutures` from its parts. fn new(executor: Executor, vm_state: VMSnapshot, heap: Heap, pending_call_ids: Vec) -> Self { Self { executor, vm_state, heap, pending_call_ids, } } /// Returns unresolved call IDs for this suspended state. #[must_use] pub fn pending_call_ids(&self) -> &[u32] { &self.pending_call_ids } /// Resumes execution with results for some or all pending futures. /// /// **Incremental resolution**: You don't need to provide all results at once. /// If you provide a partial list, Monty will: /// 1. Mark those futures as resolved /// 2. Unblock any tasks waiting on those futures /// 3. Continue running until all tasks are blocked again /// 4. Return `ResolveFutures` with the remaining pending calls /// /// # Arguments /// * `results` — List of `(call_id, result)` pairs. Can be a subset of pending calls. /// * `print` — Writer for print output. /// /// # Errors /// Returns `Err(MontyException)` if any `call_id` in `results` is not in the pending set. pub fn resume( self, results: Vec<(u32, ExtFunctionResult)>, print: PrintWriter<'_>, ) -> Result, MontyException> { let Self { executor, vm_state, mut heap, pending_call_ids, } = self; // Validate that all provided call_ids are in the pending set before restoring VM. let invalid_call_id = results .iter() .find(|(call_id, _)| !pending_call_ids.contains(call_id)) .map(|(call_id, _)| *call_id); // Restore the VM from the snapshot (must happen before any error return to clean up properly). let mut vm = VM::restore(vm_state, &executor.module_code, &mut heap, &executor.interns, print); // Now check for invalid call_ids after VM is restored. if let Some(call_id) = invalid_call_id { vm.cleanup(); return Err(MontyException::runtime_error(format!( "unknown call_id {call_id}, expected one of: {pending_call_ids:?}" ))); } for (call_id, ext_result) in results { match ext_result { ExtFunctionResult::Return(obj) => vm.resolve_future(call_id, obj).map_err(|e| { MontyException::runtime_error(format!("Invalid return type for call {call_id}: {e}")) })?, ExtFunctionResult::Error(exc) => vm.fail_future(call_id, exc.into()), ExtFunctionResult::Future(_) => {} ExtFunctionResult::NotFound(function_name) => { vm.fail_future(call_id, ExtFunctionResult::not_found_exc(&function_name)); } } } // Check if the current task has failed. if let Some(error) = vm.take_failed_task_error() { vm.cleanup(); return Err(error.into_python_exception(&executor.interns, &executor.code)); } // Push resolved value for main task if it was blocked. let main_task_ready = vm.prepare_current_task_after_resolve(); let loaded_task = match vm.load_ready_task_if_needed() { Ok(loaded) => loaded, Err(e) => { vm.cleanup(); return Err(e.into_python_exception(&executor.interns, &executor.code)); } }; // If no task is ready and there are still pending calls, return ResolveFutures. if !main_task_ready && !loaded_task { let pending_call_ids = vm.get_pending_call_ids(); if !pending_call_ids.is_empty() { let vm_state = vm.snapshot(); let pending_call_ids: Vec = pending_call_ids.iter().map(|id| id.raw()).collect(); return Ok(RunProgress::ResolveFutures(Self { executor, vm_state, heap, pending_call_ids, })); } } let result = vm.run(); // Three-phase: convert while VM alive, snapshot, build progress let converted = convert_frame_exit(result, &mut vm); let vm_state = check_snapshot_from_converted(&converted, vm); build_run_progress(converted, vm_state, executor, heap) } } // --------------------------------------------------------------------------- // Snapshot (pub(crate)) // --------------------------------------------------------------------------- /// Internal execution state that can be resumed after suspension. /// /// This is a `pub(crate)` implementation detail wrapped by the per-variant /// structs (`FunctionCall`, `OsCall`, `NameLookup`). It is not exposed in the /// public API. #[derive(Debug, serde::Serialize, serde::Deserialize)] #[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: serde::de::DeserializeOwned"))] pub(crate) struct Snapshot { /// The executor containing compiled code and interns. pub(crate) executor: Executor, /// The VM state containing stack, frames, globals, and exception state. pub(crate) vm_state: VMSnapshot, /// The heap containing all allocated objects. pub(crate) heap: Heap, } impl Snapshot { /// Continues execution with the return value or exception from the external call. pub(crate) fn run( mut self, result: impl Into, print: PrintWriter<'_>, ) -> Result, MontyException> { let ext_result = result.into(); let mut vm = VM::restore( self.vm_state, &self.executor.module_code, &mut self.heap, &self.executor.interns, print, ); let vm_result = match ext_result { ExtFunctionResult::Return(obj) => vm.resume(obj), ExtFunctionResult::Error(exc) => vm.resume_with_exception(exc.into()), ExtFunctionResult::Future(raw_call_id) => { let call_id = CallId::new(raw_call_id); vm.add_pending_call(call_id); vm.push(Value::ExternalFuture(call_id)); vm.run() } ExtFunctionResult::NotFound(function_name) => { vm.resume_with_exception(ExtFunctionResult::not_found_exc(&function_name)) } }; // Three-phase: convert while VM alive, snapshot, build progress let converted = convert_frame_exit(vm_result, &mut vm); let vm_state = check_snapshot_from_converted(&converted, vm); build_run_progress(converted, vm_state, self.executor, self.heap) } } /// Result of a name lookup from the host. /// /// When the VM encounters an unresolved name, the host provides one of these: /// - `Value(obj)`: The name resolves to this value (cached in the namespace for future access). /// - `Undefined`: The name is truly undefined, causing `NameError`. #[derive(Debug)] pub enum NameLookupResult { /// The name resolves to this value. Value(MontyObject), /// The name is undefined — VM will raise `NameError`. Undefined, } impl From for NameLookupResult { fn from(value: MontyObject) -> Self { Self::Value(value) } } /// Return value or exception from an external function. #[derive(Debug)] pub enum ExtFunctionResult { /// Continues execution with the return value from the external function. Return(MontyObject), /// Continues execution with the exception raised by the external function. Error(MontyException), /// Pending future — the external function is a coroutine. /// /// The `u32` is the `call_id` from the `FunctionCall` that created this /// snapshot. It is used to track the pending future so it can be resolved /// later via `ResolveFutures::resume()`. Future(u32), /// The function was not found, should result in a `NameError` exception. NotFound(String), } impl ExtFunctionResult { pub(crate) fn not_found_exc(function_name: &str) -> RunError { let msg = format!("name '{function_name}' is not defined"); MontyException::new(ExcType::NameError, Some(msg)).into() } } impl From for ExtFunctionResult { fn from(value: MontyObject) -> Self { Self::Return(value) } } impl From for ExtFunctionResult { fn from(exception: MontyException) -> Self { Self::Error(exception) } } // --------------------------------------------------------------------------- // Executor (re-export from run.rs via pub(crate)) // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- // handle_vm_result // --------------------------------------------------------------------------- /// Pre-converted frame exit data, produced while the VM is still alive. /// /// This intermediate enum holds `MontyObject`s and `String`s instead of `Value`s /// and `StringId`s. It exists to separate the conversion phase (needs `&mut VM`) /// from the snapshot/progress construction phase (needs owned `Heap`). pub(crate) enum ConvertedExit { /// Execution completed with a final result. Complete(MontyObject), /// External function call or dataclass method call. FunctionCall { function_name: String, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, method_call: bool, }, /// OS-level operation. OsCall { function: OsFunction, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, }, /// All async tasks are blocked waiting for external futures. ResolveFutures(Vec), /// Unresolved name lookup. NameLookup { name: String, namespace_slot: u16, is_global: bool, }, /// Runtime error. Error(RunError), } impl ConvertedExit { /// Returns true if this exit requires a VM snapshot for later resumption. pub(crate) fn needs_snapshot(&self) -> bool { !matches!(self, Self::Complete(_) | Self::Error(_)) } } /// Converts a `FrameExit` into a `ConvertedExit` while the VM is still alive. /// /// All `Value` → `MontyObject` and `StringId` → `String` conversions happen here, /// while the VM (and its heap/interns) are still accessible. pub(crate) fn convert_frame_exit( result: RunResult, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> ConvertedExit { match result { Ok(FrameExit::Return(value)) => ConvertedExit::Complete(MontyObject::new(value, vm)), Ok(FrameExit::ExternalCall { function_name, args, call_id, .. }) => { let name = function_name.into_string(vm.interns); let (args_py, kwargs_py) = args.into_py_objects(vm); ConvertedExit::FunctionCall { function_name: name, args: args_py, kwargs: kwargs_py, call_id: call_id.raw(), method_call: false, } } Ok(FrameExit::OsCall { function, args, call_id, }) => { let (args_py, kwargs_py) = args.into_py_objects(vm); ConvertedExit::OsCall { function, args: args_py, kwargs: kwargs_py, call_id: call_id.raw(), } } Ok(FrameExit::MethodCall { method_name, args, call_id, }) => { let name = method_name.into_string(vm.interns); let (args_py, kwargs_py) = args.into_py_objects(vm); ConvertedExit::FunctionCall { function_name: name, args: args_py, kwargs: kwargs_py, call_id: call_id.raw(), method_call: true, } } Ok(FrameExit::ResolveFutures(pending_call_ids)) => { ConvertedExit::ResolveFutures(pending_call_ids.iter().map(|id| id.raw()).collect()) } Ok(FrameExit::NameLookup { name_id, namespace_slot, is_global, }) => { let name = vm.interns.get_str(name_id).to_owned(); ConvertedExit::NameLookup { name, namespace_slot, is_global, } } Err(err) => ConvertedExit::Error(err), } } /// Decides whether to snapshot or clean up the VM based on the converted exit. /// /// Consumes the VM. Returns `Some(VMSnapshot)` for suspendable exits, `None` for /// completion/error (in which case the VM is cleaned up). pub(crate) fn check_snapshot_from_converted( converted: &ConvertedExit, mut vm: VM<'_, '_, impl ResourceTracker>, ) -> Option { if converted.needs_snapshot() { Some(vm.snapshot()) } else { vm.cleanup(); None } } /// Assembles a `RunProgress` from already-converted data and owned heap. /// /// This runs after the VM has been dropped (releasing the heap borrow), /// so the heap can be moved into `Snapshot` structs. pub(crate) fn build_run_progress( converted: ConvertedExit, vm_state: Option, executor: Executor, heap: Heap, ) -> Result, MontyException> { macro_rules! new_snapshot { () => { Snapshot { executor, vm_state: vm_state.expect("snapshot should exist"), heap, } }; } match converted { ConvertedExit::Complete(obj) => Ok(RunProgress::Complete(obj)), ConvertedExit::FunctionCall { function_name, args, kwargs, call_id, method_call, } => Ok(RunProgress::FunctionCall(FunctionCall::new( function_name, args, kwargs, call_id, method_call, new_snapshot!(), ))), ConvertedExit::OsCall { function, args, kwargs, call_id, } => Ok(RunProgress::OsCall(OsCall::new( function, args, kwargs, call_id, new_snapshot!(), ))), ConvertedExit::ResolveFutures(pending_call_ids) => Ok(RunProgress::ResolveFutures(ResolveFutures::new( executor, vm_state.expect("snapshot should exist for ResolveFutures"), heap, pending_call_ids, ))), ConvertedExit::NameLookup { name, namespace_slot, is_global, } => Ok(RunProgress::NameLookup(NameLookup::new( name, namespace_slot, is_global, new_snapshot!(), ))), ConvertedExit::Error(err) => Err(err.into_python_exception(&executor.interns, &executor.code)), } } ================================================ FILE: crates/monty/src/signature.rs ================================================ //! Function signature representation and argument binding. //! //! This module handles Python function signatures including all parameter types: //! positional-only, positional-or-keyword, *args, keyword-only, and **kwargs. //! It also handles default values and the argument binding algorithm. use crate::{ args::{ArgPosIter, ArgValues}, bytecode::VM, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, expressions::Identifier, heap::{HeapData, HeapGuard}, intern::{Interns, StringId}, resource::ResourceTracker, types::{Dict, allocate_tuple}, value::Value, }; /// Represents a Python function signature with all parameter types. /// /// A complete Python signature can include: /// - Positional-only parameters (before `/`) /// - Positional-or-keyword parameters (regular parameters) /// - Variable positional parameter (`*args`) /// - Keyword-only parameters (after `*` or `*args`) /// - Variable keyword parameter (`**kwargs`) /// /// # Default Values /// /// Default values are tracked by count per parameter group. The `*_defaults_count` fields /// indicate how many parameters (from the end of each group) have defaults. For example, /// if `args = [a, b, c]` and `arg_defaults_count = 2`, then `b` and `c` have defaults. /// /// Note: The actual default Values are evaluated at function definition time and stored /// separately (in the heap as part of the function/closure object). This struct only /// tracks the structure, not the values themselves. /// /// # Namespace Layout /// /// Parameters are laid out in the namespace in this order: /// ```text /// [pos_args][args][*args_slot?][kwargs][**kwargs_slot?] /// ``` /// The `*args` slot is only present if `var_args` is Some. /// The `**kwargs` slot is only present if `var_kwargs` is Some. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct Signature { /// Positional-only parameters, e.g. `a, b` in `def f(a, b, /): ...` /// /// These can only be passed by position, not by keyword. pos_args: Option>, /// Number of positional-only parameters with defaults (from the end). pos_defaults_count: usize, /// Positional-or-keyword parameters, e.g. `a, b` in `def f(a, b): ...` /// /// These can be passed either by position or by keyword. args: Option>, /// Number of positional-or-keyword parameters with defaults (from the end). arg_defaults_count: usize, /// Variable positional parameter name, e.g. `args` in `def f(*args): ...` /// /// Collects excess positional arguments into a tuple. var_args: Option, /// Keyword-only parameters, e.g. `c` in `def f(*, c): ...` or `def f(*args, c): ...` /// /// These can only be passed by keyword, not by position. kwargs: Option>, /// Mapping from each keyword-only parameter to its default index (if any). /// /// Each entry corresponds to the same index in `kwargs`. A value of `Some(i)` /// points into the kwarg section of the defaults array, while `None` means /// the parameter is required. kwarg_default_map: Option>>, /// Variable keyword parameter name, e.g. `kwargs` in `def f(**kwargs): ...` /// /// Collects excess keyword arguments into a dict. var_kwargs: Option, /// How simple the signature is, used for fast path when binding bind_mode: BindMode, } #[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] enum BindMode { /// If this is a simple signature (no defaults, no *args/**kwargs). /// /// Simple signatures can use a fast path for argument binding that avoids /// the full binding algorithm overhead. A simple signature has: /// - No positional-only parameters /// - No defaults for any parameters /// - No *args or **kwargs /// - No keyword-only parameters #[default] Simple, /// If this signature has only positional-or-keyword params with defaults. /// /// This identifies the common pattern `def f(a, b=1, c=2)` where: /// - No positional-only parameters /// - No *args or **kwargs /// - No keyword-only parameters /// - Has some default values /// /// These signatures can use a simplified binding that just fills positional /// args and applies defaults without the full algorithm overhead. SimpleWithDefaults, Complex, } impl Signature { /// Creates a full signature with all parameter types. /// /// # Arguments /// * `pos_args` - Positional-only parameter names /// * `pos_defaults_count` - Number of pos_args with defaults (from end) /// * `args` - Positional-or-keyword parameter names /// * `arg_defaults_count` - Number of args with defaults (from end) /// * `var_args` - Variable positional parameter name (*args) /// * `kwargs` - Keyword-only parameter names /// * `kwarg_default_map` - Mapping of kw-only parameters to default indices /// * `var_kwargs` - Variable keyword parameter name (**kwargs) #[expect(clippy::too_many_arguments)] pub fn new( pos_args: Vec, pos_defaults_count: usize, args: Vec, arg_defaults_count: usize, var_args: Option, kwargs: Vec, kwarg_default_map: Vec>, var_kwargs: Option, ) -> Self { let pos_args = if pos_args.is_empty() { None } else { Some(pos_args) }; let has_kwonly = !kwargs.is_empty(); let kwargs = if has_kwonly { Some(kwargs) } else { None }; let bind_mode = if pos_args.is_none() && pos_defaults_count == 0 && arg_defaults_count == 0 && var_args.is_none() && kwargs.is_none() && var_kwargs.is_none() { BindMode::Simple } else if pos_args.is_none() && var_args.is_none() && kwargs.is_none() && var_kwargs.is_none() && arg_defaults_count > 0 { BindMode::SimpleWithDefaults } else { BindMode::Complex }; Self { pos_args, pos_defaults_count, args: if args.is_empty() { None } else { Some(args) }, arg_defaults_count, var_args, kwargs, kwarg_default_map: if has_kwonly { Some(kwarg_default_map) } else { None }, var_kwargs, bind_mode, } } /// Binds arguments to parameters according to Python's calling conventions. /// /// This implements the full argument binding algorithm: /// 1. Bind positional args to pos_args, then args (in order) /// 2. Bind keyword args to args and kwargs (NOT pos_args - positional-only) /// 3. Collect excess positional args into *args tuple /// 4. Collect excess keyword args into **kwargs dict /// 5. Apply defaults for missing parameters /// /// Returns a Vec ready to be injected into the namespace, laid out as: /// `[pos_args][args][*args_slot?][kwargs][**kwargs_slot?]` /// /// # Arguments /// * `args` - The arguments from the call site /// * `defaults` - Evaluated default values (layout: pos_defaults, arg_defaults, kwarg_defaults) /// * `heap` - The heap for allocating *args tuple and **kwargs dict /// * `interns` - For looking up parameter names in error messages /// * `func_name` - Function name for error messages /// * `namespace` - The namespace to populate with bound arguments. This is mutated in place and will need to be cleaned up on error. /// /// # Errors /// Returns an error if: /// - Too few or too many positional arguments /// - Missing required keyword-only arguments /// - Unexpected keyword argument /// - Positional-only parameter passed as keyword /// - Same argument passed both positionally and by keyword pub fn bind( &self, args: ArgValues, defaults: &[Value], vm: &mut VM<'_, '_, impl ResourceTracker>, func_name: Identifier, namespace: &mut Vec, ) -> RunResult<()> { let (pos_iter, keyword_args) = args.into_parts(); // Convert kwargs to an iterator and guard it so remaining items are cleaned up // on any error path let kwonly_given = keyword_args.len(); let keyword_args = keyword_args.into_iter(); defer_drop_mut!(keyword_args, vm); let namespace_base = namespace.len(); // Fast path for simple signatures (no defaults, no special params) and // signatures with only positional-or-keyword params and defaults. // This avoids the full binding algorithm overhead for common cases. if matches!(self.bind_mode, BindMode::Simple | BindMode::SimpleWithDefaults) && kwonly_given == 0 { match pos_iter { ArgPosIter::Empty => {} ArgPosIter::One(a) => { namespace.push(a); } ArgPosIter::Two([a1, a2]) => { namespace.push(a1); namespace.push(a2); } ArgPosIter::Vec(args) => { namespace.extend(args); } } let actual_count = namespace.len() - namespace_base; let param_count = self.param_count(); if actual_count == param_count { // Exact match - no defaults needed return Ok(()); } else if self.bind_mode == BindMode::SimpleWithDefaults { let required = self.required_positional_count(); if actual_count >= required && actual_count < param_count { // Apply defaults for remaining parameters // Defaults are stored at the end of the defaults array for pos-or-kw params let defaults_needed = param_count - actual_count; let defaults_start = self.arg_defaults_count - defaults_needed; for i in 0..defaults_needed { namespace.push(defaults[defaults_start + i].clone_with_heap(vm)); } return Ok(()); } } return self.wrong_arg_count_error(actual_count, vm.interns, func_name); } // Full binding algorithm for complex signatures or kwargs // Extract interns before guards since HeapGuard borrows the full VM mutably // but we only need mutable access to the heap portion. let mut pos_iter_guard = HeapGuard::new(pos_iter, vm); let (pos_iter, vm) = pos_iter_guard.as_parts_mut(); // Calculate how many positional params we have let pos_param_count = self.pos_arg_count(); let arg_param_count = self.arg_count(); let total_positional_params = pos_param_count + arg_param_count; // Check positional argument count against maximum if let Some(max) = self.max_positional_count() { let positional_count = pos_iter.len(); if positional_count > max { let func = vm.interns.get_str(func_name.name_id); return Err(ExcType::type_error_too_many_positional( func, max, positional_count, kwonly_given, )); } } // Initialize result namespace with Undefined values for all slots // Layout: [pos_args][args][*args?][kwargs][**kwargs?] let var_args_offset = usize::from(self.var_args.is_some()); namespace.resize_with(namespace.len() + self.total_slots(), || Value::Undefined); // Track which parameters have been bound (for duplicate detection) // Uses a u64 bitmap - supports up to 64 named parameters which is sufficient // for any reasonable Python function (Python itself has practical limits). // Note: this tracks only named params, not *args/**kwargs slots let mut bound_params: u64 = 0; // 1. Bind positional args to pos_args, then args // Bind to pos_args for (i, slot) in namespace[namespace_base..].iter_mut().enumerate().take(pos_param_count) { if let Some(val) = pos_iter.next() { *slot = val; bound_params |= 1 << i; } } // Bind to args for (i, slot) in namespace[namespace_base..] .iter_mut() .enumerate() .take(total_positional_params) .skip(pos_param_count) { if let Some(val) = pos_iter.next() { *slot = val; bound_params |= 1 << i; } } // 2. Collect excess positional args into *args tuple if self.var_args.is_some() { namespace[namespace_base + total_positional_params] = allocate_tuple(pos_iter.collect(), vm.heap)?; } else { // If no *args, excess was already checked above via max_positional_count debug_assert_eq!(pos_iter.len(), 0); } // 3. Bind keyword args // Bind keywords to args and kwargs (not pos_args - those are positional-only) let mut excess_kwargs_guard = HeapGuard::new(self.var_kwargs.is_some().then(Dict::new), vm); let (excess_kwargs, vm) = excess_kwargs_guard.as_parts_mut(); 'kwargs: for (key, value) in keyword_args { // Guard key: dropped on most paths, consumed into **kwargs via into_parts(). let mut key_guard = HeapGuard::new(key, vm); let (key, vm) = key_guard.as_parts_mut(); // Guard value: consumed into namespace/excess_kwargs via into_inner(), // or dropped automatically on error paths. let mut value_guard = HeapGuard::new(value, vm); let vm = value_guard.heap(); let Some(keyword_name) = key.as_either_str(vm.heap) else { return Err(ExcType::type_error("keywords must be strings")); }; // Check if this keyword matches a positional-only param (error) if let Some(pos_args) = &self.pos_args && let Some(¶m_id) = pos_args .iter() .find(|&¶m_id| keyword_name.matches(param_id, vm.interns)) { let func = vm.interns.get_str(func_name.name_id); let param = vm.interns.get_str(param_id); return Err(ExcType::type_error_positional_only(func, param)); } // Try positional-or-keyword params if let Some(args) = &self.args { for (i, ¶m_id) in args.iter().enumerate() { if keyword_name.matches(param_id, vm.interns) { let ns_idx = pos_param_count + i; if (bound_params & (1 << ns_idx)) != 0 { let func = vm.interns.get_str(func_name.name_id); let param = vm.interns.get_str(param_id); return Err(ExcType::type_error_duplicate_arg(func, param)); } let (value, _) = value_guard.into_parts(); namespace[namespace_base + ns_idx] = value; bound_params |= 1 << ns_idx; continue 'kwargs; } } } // Try keyword-only params if let Some(kwargs) = &self.kwargs { for (i, ¶m_id) in kwargs.iter().enumerate() { if keyword_name.matches(param_id, vm.interns) { let ns_idx = total_positional_params + var_args_offset + i; let bit_idx = total_positional_params + i; if (bound_params & (1 << bit_idx)) != 0 { let func = vm.interns.get_str(func_name.name_id); let param = vm.interns.get_str(param_id); return Err(ExcType::type_error_duplicate_arg(func, param)); } let (value, _) = value_guard.into_parts(); namespace[namespace_base + ns_idx] = value; bound_params |= 1 << bit_idx; continue 'kwargs; } } } if let Some(excess_kwargs) = excess_kwargs { // Consume both value and key into **kwargs dict let (value, _) = value_guard.into_parts(); let (key, vm) = key_guard.into_parts(); excess_kwargs.set(key, value, vm)?; continue 'kwargs; } let func = vm.interns.get_str(func_name.name_id); let key_str = keyword_name.as_str(vm.interns); return Err(ExcType::type_error_unexpected_keyword(func, key_str)); } // 3.5. Apply default values to unbound optional parameters // Defaults layout: [pos_defaults...][arg_defaults...][kwarg_defaults...] // Each section only contains defaults for params that have them. let mut default_idx = 0; // Apply pos_args defaults (optional params at the end of pos_args) if self.pos_defaults_count > 0 { let first_optional = pos_param_count - self.pos_defaults_count; for i in first_optional..pos_param_count { if (bound_params & (1 << i)) == 0 { namespace[namespace_base + i] = defaults[default_idx + (i - first_optional)].clone_with_heap(vm); bound_params |= 1 << i; } } } default_idx += self.pos_defaults_count; // Apply args defaults (optional params at the end of args) if self.arg_defaults_count > 0 { let first_optional = arg_param_count - self.arg_defaults_count; for i in first_optional..arg_param_count { let ns_idx = pos_param_count + i; if (bound_params & (1 << ns_idx)) == 0 { namespace[namespace_base + ns_idx] = defaults[default_idx + (i - first_optional)].clone_with_heap(vm); bound_params |= 1 << ns_idx; } } } default_idx += self.arg_defaults_count; // Apply kwargs defaults using the explicit default map if let Some(ref default_map) = self.kwarg_default_map { for (i, default_slot) in default_map.iter().enumerate() { if let Some(slot_idx) = default_slot { let bound_idx = total_positional_params + i; // Skip past *args slot if present let ns_idx = total_positional_params + var_args_offset + i; if (bound_params & (1 << bound_idx)) == 0 { namespace[namespace_base + ns_idx] = defaults[default_idx + slot_idx].clone_with_heap(vm); bound_params |= 1 << bound_idx; } } } } // 4. Check that all required params are bound BEFORE building final namespace. // This ensures we can clean up properly on error without leaking heap values. // Check required positional params (pos_args + required args) let mut missing_positional: Vec<&str> = Vec::new(); // Check pos_args if let Some(ref pos_args) = self.pos_args { let required_pos_only = pos_args.len().saturating_sub(self.pos_defaults_count); for (i, ¶m_id) in pos_args.iter().enumerate() { if i < required_pos_only && (bound_params & (1 << i)) == 0 { missing_positional.push(vm.interns.get_str(param_id)); } } } // Check args (positional-or-keyword) if let Some(ref args_params) = self.args { let required_args = args_params.len().saturating_sub(self.arg_defaults_count); for (i, ¶m_id) in args_params.iter().enumerate() { if i < required_args && (bound_params & (1 << (pos_param_count + i))) == 0 { missing_positional.push(vm.interns.get_str(param_id)); } } } if !missing_positional.is_empty() { // Clean up bound values before returning error let func = vm.interns.get_str(func_name.name_id); return Err(ExcType::type_error_missing_positional_with_names( func, &missing_positional, )); } // Check required keyword-only args let mut missing_kwonly: Vec<&str> = Vec::new(); if let Some(ref kwargs_params) = self.kwargs { let default_map = self.kwarg_default_map.as_ref(); for (i, ¶m_id) in kwargs_params.iter().enumerate() { let has_default = default_map.and_then(|map| map.get(i)).is_some_and(Option::is_some); if !has_default && (bound_params & (1 << (total_positional_params + i))) == 0 { missing_kwonly.push(vm.interns.get_str(param_id)); } } } if !missing_kwonly.is_empty() { let func = vm.interns.get_str(func_name.name_id); return Err(ExcType::type_error_missing_kwonly_with_names(func, &missing_kwonly)); } // 5. Insert **kwargs dict if present (at the last slot) // Namespace layout: [pos_args][args][*args?][kwargs][**kwargs?] let (excess_kwargs, vm) = excess_kwargs_guard.into_parts(); if let Some(excess_kwargs) = excess_kwargs { let dict_id = vm.heap.allocate(HeapData::Dict(excess_kwargs))?; let last_slot = namespace.len() - 1; namespace[last_slot] = Value::Ref(dict_id); } Ok(()) } /// Returns the total number of named parameters (excluding *args/**kwargs slots). /// /// This is `pos_args.len() + args.len() + kwargs.len()`. pub fn param_count(&self) -> usize { self.pos_arg_count() + self.arg_count() + self.kwarg_count() } /// Returns the total number of namespace slots needed for parameters. /// /// This includes slots for: /// - All named parameters (pos_args + args + kwargs) /// - The *args tuple (if var_args is Some) /// - The **kwargs dict (if var_kwargs is Some) pub fn total_slots(&self) -> usize { let mut slots = self.param_count(); if self.var_args.is_some() { slots += 1; } if self.var_kwargs.is_some() { slots += 1; } slots } /// Returns the total number of default values across all parameter groups. pub fn total_defaults_count(&self) -> usize { self.pos_defaults_count + self.arg_defaults_count + self.kwarg_defaults_count() } /// Returns the minimum number of positional arguments required. /// /// This is the total positional param count minus the number of defaults. /// For a signature like `def f(a, b, c=1)`, this returns 2 (a and b are required). #[inline] fn required_positional_count(&self) -> usize { self.pos_arg_count() + self.arg_count() - self.pos_defaults_count - self.arg_defaults_count } fn kwarg_defaults_count(&self) -> usize { self.kwarg_default_map .as_deref() .map(|v| v.iter().filter(|&x| x.is_some()).count()) .unwrap_or_default() } /// Returns the number of positional-only parameters. fn pos_arg_count(&self) -> usize { self.pos_args.as_ref().map_or(0, Vec::len) } /// Returns the number of positional-or-keyword parameters. fn arg_count(&self) -> usize { self.args.as_ref().map_or(0, Vec::len) } /// Returns the number of keyword-only parameters. fn kwarg_count(&self) -> usize { self.kwargs.as_ref().map_or(0, Vec::len) } /// Returns an iterator over all parameter names in namespace slot order. /// /// Order: pos_args, args, var_args (if present), kwargs, var_kwargs (if present) fn param_names(&self) -> impl Iterator + '_ { let pos_args = self.pos_args.iter().flat_map(|v| v.iter().copied()); let args = self.args.iter().flat_map(|v| v.iter().copied()); let var_args = self.var_args.iter().copied(); let kwargs = self.kwargs.iter().flat_map(|v| v.iter().copied()); let var_kwargs = self.var_kwargs.iter().copied(); pos_args.chain(args).chain(var_args).chain(kwargs).chain(var_kwargs) } /// Returns the maximum number of positional arguments accepted. /// /// Returns None if *args is present (unlimited positional args). fn max_positional_count(&self) -> Option { if self.var_args.is_some() { None } else { Some(self.pos_arg_count() + self.arg_count()) } } /// Creates an error for wrong number of arguments. /// /// Handles both "missing required positional arguments" and "too many arguments" cases, /// formatting the error message to match CPython's style. /// /// # Arguments /// * `actual_count` - Number of arguments actually provided /// * `interns` - String storage for looking up interned names fn wrong_arg_count_error(&self, actual_count: usize, interns: &Interns, func_name: Identifier) -> RunResult { let name_str = interns.get_str(func_name.name_id); let param_count = self.param_count(); let msg = if let Some(missing_count) = param_count.checked_sub(actual_count) { // Missing arguments - show actual parameter names let mut msg = format!( "{}() missing {} required positional argument{}: ", name_str, missing_count, if missing_count == 1 { "" } else { "s" } ); // Collect parameter names, skipping the ones already provided let mut missing_names: Vec<_> = self .param_names() .skip(actual_count) .map(|string_id| format!("'{}'", interns.get_str(string_id))) .collect(); let last = missing_names.pop().unwrap(); if !missing_names.is_empty() { msg.push_str(&missing_names.join(", ")); msg.push_str(", and "); } msg.push_str(&last); msg } else { // Too many arguments format!( "{}() takes {} positional argument{} but {} {} given", name_str, param_count, if param_count == 1 { "" } else { "s" }, actual_count, if actual_count == 1 { "was" } else { "were" } ) }; Err(SimpleException::new_msg(ExcType::TypeError, msg) .with_position(func_name.position) .into()) } } ================================================ FILE: crates/monty/src/sorting.rs ================================================ //! Shared sorting utilities for `sorted()` and `list.sort()`. //! //! Both `sorted()` and `list.sort()` use index-based sorting: they build //! a vector of indices `[0, 1, 2, ...]`, sort the indices by comparing the //! corresponding items (or key values), then rearrange items according to //! the sorted indices. //! //! This module provides [`sort_indices`] for the comparison step and //! [`apply_permutation`] for the in-place rearrangement step. use std::cmp::Ordering; use crate::{ bytecode::VM, exception_private::{ExcType, RunError}, resource::ResourceTracker, types::PyTrait, value::Value, }; /// Sorts a vector of indices by comparing items at those positions. /// /// Compares `values[a]` vs `values[b]` using `py_cmp`, optionally reversing /// the ordering. If any comparison fails (type error or runtime error), the /// sort finishes early and the error is returned. /// /// The `values` slice is typically either the items themselves (no key function) /// or the pre-computed key values. pub fn sort_indices( indices: &mut [usize], values: &[Value], reverse: bool, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result<(), RunError> { let mut sort_error: Option = None; indices.sort_by(|&a, &b| { if sort_error.is_some() { return Ordering::Equal; } if let Err(e) = vm.heap.check_time() { sort_error = Some(e.into()); return Ordering::Equal; } match values[a].py_cmp(&values[b], vm) { Ok(Some(ord)) => { if reverse { ord.reverse() } else { ord } } Ok(None) => { sort_error = Some(ExcType::type_error(format!( "'<' not supported between instances of '{}' and '{}'", values[a].py_type(vm.heap), values[b].py_type(vm.heap) ))); Ordering::Equal } Err(e) => { sort_error = Some(e.into()); Ordering::Equal } } }); match sort_error { Some(err) => Err(err), None => Ok(()), } } /// Rearranges `items` in-place according to a permutation of indices. /// /// After calling this, `items[i]` will hold the value that was originally at /// `items[indices[i]]`. The algorithm chases permutation cycles and swaps /// elements into their final positions, using O(1) extra memory beyond the /// `indices` slice (which is mutated to track visited positions). /// /// Each element is moved at most twice (one swap = two moves), so the total /// work is O(n) moves. This is at most 2x the moves of building a fresh /// `Vec`, but avoids allocating a second buffer. pub fn apply_permutation(items: &mut [Value], indices: &mut [usize]) { for i in 0..items.len() { if indices[i] == i { continue; } let mut current = i; loop { let target = indices[current]; indices[current] = current; if target == i { break; } items.swap(current, target); current = target; } } } ================================================ FILE: crates/monty/src/types/bytes.rs ================================================ /// Python bytes type, wrapping a `Vec`. /// /// This type provides Python bytes semantics with operations on ASCII bytes only. /// Unlike str methods which operate on Unicode codepoints, bytes methods only /// recognize ASCII characters (0-127) for case transformations and predicates. /// /// # Implemented Methods /// /// ## Encoding/Decoding /// - `decode([encoding[, errors]])` - Decode to string (UTF-8 only) /// - `hex([sep[, bytes_per_sep]])` - Return hex string representation /// - `fromhex(string)` - Create bytes from hex string (classmethod) /// /// ## Simple Transformations /// - `lower()` - Convert ASCII uppercase to lowercase /// - `upper()` - Convert ASCII lowercase to uppercase /// - `capitalize()` - First byte uppercase, rest lowercase /// - `title()` - Titlecase ASCII letters /// - `swapcase()` - Swap ASCII case /// /// ## Predicates /// - `isalpha()` - All bytes are ASCII letters /// - `isdigit()` - All bytes are ASCII digits /// - `isalnum()` - All bytes are ASCII alphanumeric /// - `isspace()` - All bytes are ASCII whitespace /// - `islower()` - Has cased bytes, all lowercase /// - `isupper()` - Has cased bytes, all uppercase /// - `isascii()` - All bytes are ASCII (0-127) /// - `istitle()` - Titlecased /// /// ## Search Methods /// - `count(sub[, start[, end]])` - Count non-overlapping occurrences /// - `find(sub[, start[, end]])` - Find first occurrence (-1 if not found) /// - `rfind(sub[, start[, end]])` - Find last occurrence (-1 if not found) /// - `index(sub[, start[, end]])` - Find first occurrence (raises ValueError) /// - `rindex(sub[, start[, end]])` - Find last occurrence (raises ValueError) /// - `startswith(prefix[, start[, end]])` - Check if starts with prefix /// - `endswith(suffix[, start[, end]])` - Check if ends with suffix /// /// ## Strip/Trim Methods /// - `strip([chars])` - Remove leading/trailing bytes /// - `lstrip([chars])` - Remove leading bytes /// - `rstrip([chars])` - Remove trailing bytes /// - `removeprefix(prefix)` - Remove prefix if present /// - `removesuffix(suffix)` - Remove suffix if present /// /// ## Split Methods /// - `split([sep[, maxsplit]])` - Split on separator /// - `rsplit([sep[, maxsplit]])` - Split from right /// - `splitlines([keepends])` - Split on line boundaries /// - `partition(sep)` - Split into 3 parts at first sep /// - `rpartition(sep)` - Split into 3 parts at last sep /// /// ## Replace/Padding Methods /// - `replace(old, new[, count])` - Replace occurrences /// - `center(width[, fillbyte])` - Center with fill byte /// - `ljust(width[, fillbyte])` - Left justify with fill byte /// - `rjust(width[, fillbyte])` - Right justify with fill byte /// - `zfill(width)` - Pad with zeros /// /// ## Other Methods /// - `join(iterable)` - Join bytes sequences /// /// # Unimplemented Methods /// - `expandtabs(tabsize=8)` - Tab expansion /// - `translate(table[, delete])` - Character translation /// - `maketrans(frm, to)` - Create translation table (staticmethod) use std::cmp::Ordering; use std::fmt::Write; use ahash::AHashSet; use smallvec::smallvec; use super::{MontyIter, PyTrait, Type, str::Str}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult, SimpleException}, heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, intern::{Interns, StaticStrings, StringId}, resource::{ResourceError, ResourceTracker, check_repeat_size, check_replace_size}, types::List, value::{EitherStr, Value}, }; // ============================================================================= // ASCII byte helper functions // ============================================================================= /// Returns true if the byte is Python ASCII whitespace. /// /// Python considers these bytes as whitespace: space, tab, newline, carriage return, /// vertical tab (0x0b), and form feed (0x0c). Note: Rust's `is_ascii_whitespace()` /// does not include vertical tab (0x0b). #[inline] fn is_py_whitespace(b: u8) -> bool { matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c) } /// Gets the byte at a given index, handling negative indices. /// /// Returns `None` if the index is out of bounds. /// Negative indices count from the end: -1 is the last byte. pub fn get_byte_at_index(bytes: &[u8], index: i64) -> Option { let len = i64::try_from(bytes.len()).ok()?; let normalized = if index < 0 { index + len } else { index }; if normalized < 0 || normalized >= len { return None; } let idx = usize::try_from(normalized).ok()?; Some(bytes[idx]) } /// Extracts a slice of a byte array. /// /// Handles both positive and negative step values. For negative step, /// iterates backward from start down to (but not including) stop. /// The `stop` parameter uses a sentinel value of `len + 1` for negative /// step to indicate "go to the beginning". /// /// Note: step must be non-zero (callers should validate this via `slice.indices()`). pub(crate) fn get_bytes_slice(bytes: &[u8], start: usize, stop: usize, step: i64) -> Vec { let mut result = Vec::new(); // try_from succeeds for non-negative step; step==0 rejected upstream by slice.indices() if let Ok(step_usize) = usize::try_from(step) { // Positive step: iterate forward let mut i = start; while i < stop && i < bytes.len() { result.push(bytes[i]); i += step_usize; } } else { // Negative step: iterate backward // start is the highest index, stop is the sentinel // stop > bytes.len() means "go to the beginning" let step_abs = usize::try_from(-step).expect("step is negative so -step is positive"); let step_abs_i64 = i64::try_from(step_abs).expect("step magnitude fits in i64"); let mut i = i64::try_from(start).expect("start index fits in i64"); let stop_i64 = if stop > bytes.len() { -1 } else { i64::try_from(stop).expect("stop bounded by bytes.len() fits in i64") }; while let Ok(i_usize) = usize::try_from(i) { if i_usize >= bytes.len() || i <= stop_i64 { break; } result.push(bytes[i_usize]); i -= step_abs_i64; } } result } /// Python bytes value stored on the heap. /// /// Wraps a `Vec` and provides Python-compatible operations. /// See the module-level documentation for implemented and unimplemented methods. #[derive(Debug, Clone, PartialEq, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct Bytes(Vec); impl Bytes { /// Creates a new Bytes from a byte vector. #[must_use] pub fn new(bytes: Vec) -> Self { Self(bytes) } /// Returns a reference to the inner byte slice. #[must_use] pub fn as_slice(&self) -> &[u8] { &self.0 } /// Creates bytes from the `bytes()` constructor call. /// /// - `bytes()` with no args returns empty bytes /// - `bytes(int)` returns bytes of that length filled with zeros /// - `bytes(string)` encodes the string as UTF-8 (simplified, no encoding param) /// - `bytes(bytes)` returns a copy of the bytes /// /// Note: Full Python semantics for bytes() are more complex (encoding, errors params). pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let heap = &mut *vm.heap; let interns = vm.interns; let value = args.get_zero_one_arg("bytes", heap)?; defer_drop!(value, heap); let new_data = match value { None => Vec::new(), Some(Value::Int(n)) => { if *n < 0 { return Err(ExcType::value_error_negative_bytes_count()); } let size = usize::try_from(*n).expect("bytes count validated non-negative"); vec![0u8; size] } Some(Value::InternString(string_id)) => { let s = interns.get_str(*string_id); s.as_bytes().to_vec() } Some(Value::InternBytes(bytes_id)) => { let b = interns.get_bytes(*bytes_id); b.to_vec() } Some(v @ Value::Ref(id)) => match heap.get(*id) { HeapData::Str(s) => s.as_str().as_bytes().to_vec(), HeapData::Bytes(b) => b.as_slice().to_vec(), _ => return Err(ExcType::type_error_bytes_init(v.py_type(heap))), }, Some(v) => return Err(ExcType::type_error_bytes_init(v.py_type(heap))), }; let heap_id = heap.allocate(HeapData::Bytes(Self::new(new_data)))?; Ok(Value::Ref(heap_id)) } } impl From> for Bytes { fn from(bytes: Vec) -> Self { Self(bytes) } } impl From<&[u8]> for Bytes { fn from(bytes: &[u8]) -> Self { Self(bytes.to_vec()) } } impl From for Vec { fn from(bytes: Bytes) -> Self { bytes.0 } } impl std::ops::Deref for Bytes { type Target = Vec; fn deref(&self) -> &Self::Target { &self.0 } } impl PyTrait for Bytes { fn py_type(&self, _heap: &Heap) -> Type { Type::Bytes } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.0.len()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let heap = &mut *vm.heap; // Check for slice first (Value::Ref pointing to HeapData::Slice) if let Value::Ref(id) = key && let HeapData::Slice(slice) = heap.get(*id) { let (start, stop, step) = slice .indices(self.0.len()) .map_err(|()| ExcType::value_error_slice_step_zero())?; let sliced_bytes = get_bytes_slice(&self.0, start, stop, step); let heap_id = heap.allocate(HeapData::Bytes(Self::new(sliced_bytes)))?; return Ok(Value::Ref(heap_id)); } // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt let index = key.as_index(heap, Type::Bytes)?; // Use helper for byte indexing let byte = get_byte_at_index(&self.0, index).ok_or_else(ExcType::bytes_index_error)?; Ok(Value::Int(i64::from(byte))) } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Ok(self.0 == other.0) } fn py_cmp( &self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { Ok(Some(self.0.cmp(&other.0))) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.0.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { bytes_repr_fmt(&self.0, f) } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let Some(method) = attr.static_string() else { args.drop_with_heap(vm.heap); return Err(ExcType::attribute_error(Type::Bytes, attr.as_str(vm.interns))); }; call_bytes_method_impl(self.as_slice(), method, args, vm).map(CallResult::Value) } } impl HeapItem for Bytes { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.0.len() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // No-op: bytes don't hold Value references } } /// Calls a bytes method on a byte slice by method name. /// /// This is the entry point for bytes method calls from the VM on interned bytes. /// Converts the `StringId` to `StaticStrings` and delegates to `call_bytes_method_impl`. pub fn call_bytes_method( bytes: &[u8], method_id: StringId, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let Some(method) = StaticStrings::from_string_id(method_id) else { args.drop_with_heap(vm.heap); return Err(ExcType::attribute_error(Type::Bytes, vm.interns.get_str(method_id))); }; call_bytes_method_impl(bytes, method, args, vm) } /// Calls a bytes method on a byte slice. /// /// This is the unified implementation for bytes method calls, used by both /// heap-allocated `Bytes` (via `py_call_attr`) and interned bytes literals /// (`Value::InternBytes`). fn call_bytes_method_impl( bytes: &[u8], method: StaticStrings, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { match method { // Decode method StaticStrings::Decode => bytes_decode(bytes, args, vm), // Simple transformations (no arguments) StaticStrings::Lower => { args.check_zero_args("bytes.lower", vm.heap)?; bytes_lower(bytes, vm) } StaticStrings::Upper => { args.check_zero_args("bytes.upper", vm.heap)?; bytes_upper(bytes, vm) } StaticStrings::Capitalize => { args.check_zero_args("bytes.capitalize", vm.heap)?; bytes_capitalize(bytes, vm) } StaticStrings::Title => { args.check_zero_args("bytes.title", vm.heap)?; bytes_title(bytes, vm) } StaticStrings::Swapcase => { args.check_zero_args("bytes.swapcase", vm.heap)?; bytes_swapcase(bytes, vm) } // Predicate methods (no arguments, return bool) StaticStrings::Isalpha => { args.check_zero_args("bytes.isalpha", vm.heap)?; Ok(Value::Bool(bytes_isalpha(bytes))) } StaticStrings::Isdigit => { args.check_zero_args("bytes.isdigit", vm.heap)?; Ok(Value::Bool(bytes_isdigit(bytes))) } StaticStrings::Isalnum => { args.check_zero_args("bytes.isalnum", vm.heap)?; Ok(Value::Bool(bytes_isalnum(bytes))) } StaticStrings::Isspace => { args.check_zero_args("bytes.isspace", vm.heap)?; Ok(Value::Bool(bytes_isspace(bytes))) } StaticStrings::Islower => { args.check_zero_args("bytes.islower", vm.heap)?; Ok(Value::Bool(bytes_islower(bytes))) } StaticStrings::Isupper => { args.check_zero_args("bytes.isupper", vm.heap)?; Ok(Value::Bool(bytes_isupper(bytes))) } StaticStrings::Isascii => { args.check_zero_args("bytes.isascii", vm.heap)?; Ok(Value::Bool(bytes.iter().all(|&b| b <= 127))) } StaticStrings::Istitle => { args.check_zero_args("bytes.istitle", vm.heap)?; Ok(Value::Bool(bytes_istitle(bytes))) } // Search methods StaticStrings::Count => bytes_count(bytes, args, vm), StaticStrings::Find => bytes_find(bytes, args, vm), StaticStrings::Rfind => bytes_rfind(bytes, args, vm), StaticStrings::Index => bytes_index(bytes, args, vm), StaticStrings::Rindex => bytes_rindex(bytes, args, vm), StaticStrings::Startswith => bytes_startswith(bytes, args, vm), StaticStrings::Endswith => bytes_endswith(bytes, args, vm), // Strip/trim methods StaticStrings::Strip => bytes_strip(bytes, args, vm), StaticStrings::Lstrip => bytes_lstrip(bytes, args, vm), StaticStrings::Rstrip => bytes_rstrip(bytes, args, vm), StaticStrings::Removeprefix => bytes_removeprefix(bytes, args, vm), StaticStrings::Removesuffix => bytes_removesuffix(bytes, args, vm), // Split methods StaticStrings::Split => bytes_split(bytes, args, vm), StaticStrings::Rsplit => bytes_rsplit(bytes, args, vm), StaticStrings::Splitlines => bytes_splitlines(bytes, args, vm), StaticStrings::Partition => bytes_partition(bytes, args, vm), StaticStrings::Rpartition => bytes_rpartition(bytes, args, vm), // Replace/padding methods StaticStrings::Replace => bytes_replace(bytes, args, vm), StaticStrings::Center => bytes_center(bytes, args, vm), StaticStrings::Ljust => bytes_ljust(bytes, args, vm), StaticStrings::Rjust => bytes_rjust(bytes, args, vm), StaticStrings::Zfill => bytes_zfill(bytes, args, vm), // Join method StaticStrings::Join => { let iterable = args.get_one_arg("bytes.join", vm.heap)?; bytes_join(bytes, iterable, vm) } // Hex method StaticStrings::Hex => bytes_hex(bytes, args, vm), // fromhex is a classmethod but also accessible on instances StaticStrings::Fromhex => bytes_fromhex(args, vm), _ => { args.drop_with_heap(vm); Err(ExcType::attribute_error(Type::Bytes, method.into())) } } } /// Writes a CPython-compatible repr string for bytes to a formatter. /// /// Format: `b'...'` or `b"..."` depending on content. /// - Uses single quotes by default /// - Switches to double quotes if bytes contain `'` but not `"` /// - Escapes: `\\`, `\t`, `\n`, `\r`, `\xNN` for non-printable bytes pub fn bytes_repr_fmt(bytes: &[u8], f: &mut impl Write) -> std::fmt::Result { // Determine quote character: use double quotes if single quote present but not double let has_single = bytes.contains(&b'\''); let has_double = bytes.contains(&b'"'); let quote = if has_single && !has_double { '"' } else { '\'' }; f.write_char('b')?; f.write_char(quote)?; for &byte in bytes { match byte { b'\\' => f.write_str("\\\\")?, b'\t' => f.write_str("\\t")?, b'\n' => f.write_str("\\n")?, b'\r' => f.write_str("\\r")?, b'\'' if quote == '\'' => f.write_str("\\'")?, b'"' if quote == '"' => f.write_str("\\\"")?, // Printable ASCII (32-126) 0x20..=0x7e => f.write_char(byte as char)?, // Non-printable: use \xNN format _ => write!(f, "\\x{byte:02x}")?, } } f.write_char(quote) } /// Returns a CPython-compatible repr string for bytes. /// /// Convenience wrapper around `bytes_repr_fmt` that returns an owned String. #[must_use] pub fn bytes_repr(bytes: &[u8]) -> String { let mut result = String::new(); // Writing to String never fails bytes_repr_fmt(bytes, &mut result).unwrap(); result } /// Implements Python's `bytes.decode([encoding[, errors]])` method. /// /// Converts bytes to a string. Currently only supports UTF-8 encoding. fn bytes_decode(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (encoding, errors) = args.get_zero_one_two_args("bytes.decode", vm.heap)?; defer_drop!(encoding, vm); defer_drop!(errors, vm); // NB we don't use errors argument yet // Check encoding (default UTF-8) let encoding = if let Some(enc) = encoding { get_encoding_str(enc, vm.heap, vm.interns)?.to_ascii_lowercase() } else { "utf-8".to_owned() }; // Only support UTF-8 family if !matches!(encoding.as_str(), "utf-8" | "utf8" | "utf_8") { return Err(ExcType::lookup_error_unknown_encoding(&encoding)); } // Decode as UTF-8 match std::str::from_utf8(bytes) { Ok(s) => { let heap_id = vm.heap.allocate(HeapData::Str(Str::from(s.to_owned())))?; Ok(Value::Ref(heap_id)) } Err(_) => Err(ExcType::unicode_decode_error_invalid_utf8()), } } /// Helper function to extract encoding string from a value. fn get_encoding_str<'a>( encoding: &Value, heap: &'a Heap, interns: &'a Interns, ) -> RunResult<&'a str> { match encoding { Value::InternString(id) => Ok(interns.get_str(*id)), Value::Ref(id) => match heap.get(*id) { HeapData::Str(s) => Ok(s.as_str()), _ => Err(ExcType::type_error( "decode() argument 'encoding' must be str, not bytes", )), }, // FIXME: should use proper encoding.py_type() here _ => Err(ExcType::type_error("decode() argument 'encoding' must be str, not int")), } } /// Implements Python's `bytes.count(sub[, start[, end]])` method. /// /// Returns the number of non-overlapping occurrences of the subsequence. fn bytes_count(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_bytes_sub_args("bytes.count", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let count = if sub.is_empty() { // Empty subsequence: count positions between each byte plus 1 slice.len() + 1 } else { count_non_overlapping(slice, &sub) }; let count_i64 = i64::try_from(count).expect("count exceeds i64::MAX"); Ok(Value::Int(count_i64)) } /// Counts non-overlapping occurrences of needle in haystack. fn count_non_overlapping(haystack: &[u8], needle: &[u8]) -> usize { let mut count = 0; let mut pos = 0; while pos + needle.len() <= haystack.len() { if &haystack[pos..pos + needle.len()] == needle { count += 1; pos += needle.len(); } else { pos += 1; } } count } /// Implements Python's `bytes.find(sub[, start[, end]])` method. /// /// Returns the lowest index where the subsequence is found, or -1 if not found. fn bytes_find(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_bytes_sub_args("bytes.find", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = if sub.is_empty() { // Empty subsequence: always found at start position Some(0) } else { find_subsequence(slice, &sub) }; let idx = match result { Some(i) => i64::try_from(start + i).expect("index exceeds i64::MAX"), None => -1, }; Ok(Value::Int(idx)) } /// Finds the first occurrence of needle in haystack. fn find_subsequence(haystack: &[u8], needle: &[u8]) -> Option { haystack.windows(needle.len()).position(|window| window == needle) } /// Implements Python's `bytes.index(sub[, start[, end]])` method. /// /// Like find(), but raises ValueError if the subsequence is not found. fn bytes_index(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_bytes_sub_args("bytes.index", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = if sub.is_empty() { // Empty subsequence: always found at start position Some(0) } else { find_subsequence(slice, &sub) }; match result { Some(i) => { let idx = i64::try_from(start + i).expect("index exceeds i64::MAX"); Ok(Value::Int(idx)) } None => Err(ExcType::value_error_subsequence_not_found()), } } /// Implements Python's `bytes.startswith(prefix[, start[, end]])` method. /// /// Returns True if bytes starts with the specified prefix. /// Accepts bytes or a tuple of bytes as prefix. If a tuple is given, returns True /// if any of the prefixes match. fn bytes_startswith(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (prefix_arg, start, end) = parse_bytes_prefix_suffix_args("bytes.startswith", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = match prefix_arg { PrefixSuffixArg::Single(prefix_bytes) => slice.starts_with(&prefix_bytes), PrefixSuffixArg::Multiple(prefixes) => prefixes.iter().any(|p| slice.starts_with(p)), }; Ok(Value::Bool(result)) } /// Implements Python's `bytes.endswith(suffix[, start[, end]])` method. /// /// Returns True if bytes ends with the specified suffix. /// Accepts bytes or a tuple of bytes as suffix. If a tuple is given, returns True /// if any of the suffixes match. fn bytes_endswith(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (suffix_arg, start, end) = parse_bytes_prefix_suffix_args("bytes.endswith", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = match suffix_arg { PrefixSuffixArg::Single(suffix_bytes) => slice.ends_with(&suffix_bytes), PrefixSuffixArg::Multiple(suffixes) => suffixes.iter().any(|s| slice.ends_with(s)), }; Ok(Value::Bool(result)) } /// Argument type for prefix/suffix matching methods. /// /// Represents either a single bytes value or a tuple of bytes values /// for matching in startswith/endswith. enum PrefixSuffixArg { /// A single bytes value to match Single(Vec), /// Multiple bytes values to match (from a tuple) Multiple(Vec>), } /// Parses arguments for bytes.startswith/endswith methods. /// /// Returns (prefix/suffix_arg, start, end) where start and end are normalized indices. /// The prefix/suffix_arg can be a single bytes value or a tuple of bytes values. /// Guarantees `start <= end` to prevent slice panics. fn parse_bytes_prefix_suffix_args( method: &str, len: usize, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(PrefixSuffixArg, usize, usize)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); let (prefix, start, end) = match pos.as_slice() { [prefix_value] => { let prefix = extract_bytes_for_prefix_suffix(prefix_value, method, vm)?; (prefix, 0, len) } [prefix_value, start_value] => { let prefix = extract_bytes_for_prefix_suffix(prefix_value, method, vm)?; let start = normalize_bytes_index(start_value.as_int(vm.heap)?, len); (prefix, start, len) } [prefix_value, start_value, end_value] => { let prefix = extract_bytes_for_prefix_suffix(prefix_value, method, vm)?; let start = normalize_bytes_index(start_value.as_int(vm.heap)?, len); let end = normalize_bytes_index(end_value.as_int(vm.heap)?, len); (prefix, start, end) } [] => return Err(ExcType::type_error_at_least(method, 1, 0)), _ => return Err(ExcType::type_error_at_most(method, 3, pos.len())), }; // Ensure start <= end to prevent slice panics Ok((prefix, start, end.max(start))) } /// Extracts bytes (or tuple of bytes) for startswith/endswith methods. /// /// Returns `PrefixSuffixArg::Single` for a single bytes value, or /// `PrefixSuffixArg::Multiple` for a tuple of bytes values. fn extract_bytes_for_prefix_suffix( value: &Value, method: &str, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { // Extract the method name (e.g., "startswith" from "bytes.startswith") let method_name = method.strip_prefix("bytes.").unwrap_or(method); match value { Value::InternBytes(id) => Ok(PrefixSuffixArg::Single(vm.interns.get_bytes(*id).to_vec())), Value::InternString(_) => Err(ExcType::type_error(format!( "{method_name} first arg must be bytes or a tuple of bytes, not str" ))), Value::Ref(id) => match vm.heap.get(*id) { HeapData::Bytes(b) => Ok(PrefixSuffixArg::Single(b.as_slice().to_vec())), HeapData::Str(_) => Err(ExcType::type_error(format!( "{method_name} first arg must be bytes or a tuple of bytes, not str" ))), HeapData::Tuple(tuple) => { // Extract each element as bytes let items = tuple.as_slice(); let mut prefixes = Vec::with_capacity(items.len()); for (i, item) in items.iter().enumerate() { if let Ok(b) = extract_single_bytes_for_prefix_suffix(item, vm.heap, vm.interns) { prefixes.push(b); } else { let item_type = item.py_type(vm.heap); return Err(ExcType::type_error(format!( "{method_name} first arg must be bytes or a tuple of bytes, \ not tuple containing {item_type} at index {i}" ))); } } Ok(PrefixSuffixArg::Multiple(prefixes)) } _ => Err(ExcType::type_error(format!( "{method_name} first arg must be bytes or a tuple of bytes, not {}", value.py_type(vm.heap) ))), }, _ => Err(ExcType::type_error(format!( "{method_name} first arg must be bytes or a tuple of bytes, not {}", value.py_type(vm.heap) ))), } } /// Extracts a single bytes value for tuple element in startswith/endswith. fn extract_single_bytes_for_prefix_suffix( value: &Value, heap: &Heap, interns: &Interns, ) -> RunResult> { match value { Value::InternBytes(id) => Ok(interns.get_bytes(*id).to_vec()), Value::InternString(_) => Err(ExcType::type_error("expected bytes, not str")), Value::Ref(id) => match heap.get(*id) { HeapData::Bytes(b) => Ok(b.as_slice().to_vec()), _ => Err(ExcType::type_error("expected bytes")), }, _ => Err(ExcType::type_error("expected bytes")), } } /// Extracts bytes from a Value (bytes only, NOT str - matches CPython behavior). /// /// CPython raises `TypeError: a bytes-like object is required, not 'str'` when /// a str is passed to bytes methods like find, count, index, startswith, endswith. fn extract_bytes_only<'a>( value: &Value, heap: &'a Heap, interns: &'a Interns, ) -> RunResult<&'a [u8]> { match value { Value::InternBytes(id) => Ok(interns.get_bytes(*id)), Value::InternString(_) => Err(ExcType::type_error("a bytes-like object is required, not 'str'")), Value::Ref(id) => match heap.get(*id) { HeapData::Bytes(b) => Ok(b.as_slice()), HeapData::Str(_) => Err(ExcType::type_error("a bytes-like object is required, not 'str'")), _ => Err(ExcType::type_error("a bytes-like object is required")), }, _ => Err(ExcType::type_error("a bytes-like object is required")), } } /// Parses arguments for bytes.find/count/index methods. /// /// Returns (sub_bytes, start, end) where start and end are normalized indices. /// Guarantees `start <= end` to prevent slice panics. fn parse_bytes_sub_args( method: &str, len: usize, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Vec, usize, usize)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); let (sub, start, end) = match pos.as_slice() { [sub_value] => { let sub = extract_bytes_only(sub_value, vm.heap, vm.interns)?; (sub, 0, len) } [sub_value, start_value] => { let sub = extract_bytes_only(sub_value, vm.heap, vm.interns)?; let start = normalize_bytes_index(start_value.as_int(vm.heap)?, len); (sub, start, len) } [sub_value, start_value, end_value] => { let sub = extract_bytes_only(sub_value, vm.heap, vm.interns)?; let start = normalize_bytes_index(start_value.as_int(vm.heap)?, len); let end = normalize_bytes_index(end_value.as_int(vm.heap)?, len); (sub, start, end) } [] => return Err(ExcType::type_error_at_least(method, 1, 0)), _ => return Err(ExcType::type_error_at_most(method, 3, pos.len())), }; // Ensure start <= end to prevent slice panics (Python treats start > end as empty slice) Ok((sub.to_owned(), start, end.max(start))) } /// Normalizes a Python-style bytes index to a valid index in range [0, len]. fn normalize_bytes_index(index: i64, len: usize) -> usize { if index < 0 { let abs_index = usize::try_from(-index).unwrap_or(usize::MAX); len.saturating_sub(abs_index) } else { usize::try_from(index).unwrap_or(len).min(len) } } // ============================================================================= // Simple transformations (no arguments) // ============================================================================= /// Implements Python's `bytes.lower()` method. /// /// Returns a copy of the bytes with all ASCII uppercase characters converted to lowercase. fn bytes_lower(bytes: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let result: Vec = bytes.iter().map(|&b| b.to_ascii_lowercase()).collect(); allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.upper()` method. /// /// Returns a copy of the bytes with all ASCII lowercase characters converted to uppercase. fn bytes_upper(bytes: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let result: Vec = bytes.iter().map(|&b| b.to_ascii_uppercase()).collect(); allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.capitalize()` method. /// /// Returns a copy of the bytes with the first byte capitalized (if ASCII) and /// the rest lowercased. fn bytes_capitalize(bytes: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result = Vec::with_capacity(bytes.len()); if let Some((&first, rest)) = bytes.split_first() { result.push(first.to_ascii_uppercase()); for &b in rest { result.push(b.to_ascii_lowercase()); } } allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.title()` method. /// /// Returns a titlecased version of the bytes where words start with an uppercase /// ASCII character and the remaining characters are lowercase. fn bytes_title(bytes: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result = Vec::with_capacity(bytes.len()); let mut prev_is_cased = false; for &b in bytes { if prev_is_cased { result.push(b.to_ascii_lowercase()); } else { result.push(b.to_ascii_uppercase()); } prev_is_cased = b.is_ascii_alphabetic(); } allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.swapcase()` method. /// /// Returns a copy of the bytes with ASCII uppercase characters converted to /// lowercase and vice versa. fn bytes_swapcase(bytes: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let result: Vec = bytes .iter() .map(|&b| { if b.is_ascii_uppercase() { b.to_ascii_lowercase() } else if b.is_ascii_lowercase() { b.to_ascii_uppercase() } else { b } }) .collect(); allocate_bytes(result, vm.heap) } // ============================================================================= // Predicate methods (no arguments, return bool) // ============================================================================= /// Implements Python's `bytes.isalpha()` method. /// /// Returns True if all bytes in the bytes are ASCII letters and there is at least one byte. fn bytes_isalpha(bytes: &[u8]) -> bool { !bytes.is_empty() && bytes.iter().all(|&b| b.is_ascii_alphabetic()) } /// Implements Python's `bytes.isdigit()` method. /// /// Returns True if all bytes in the bytes are ASCII digits and there is at least one byte. fn bytes_isdigit(bytes: &[u8]) -> bool { !bytes.is_empty() && bytes.iter().all(|&b| b.is_ascii_digit()) } /// Implements Python's `bytes.isalnum()` method. /// /// Returns True if all bytes in the bytes are ASCII alphanumeric and there is at least one byte. fn bytes_isalnum(bytes: &[u8]) -> bool { !bytes.is_empty() && bytes.iter().all(|&b| b.is_ascii_alphanumeric()) } /// Implements Python's `bytes.isspace()` method. /// /// Returns True if all bytes in the bytes are ASCII whitespace and there is at least one byte. fn bytes_isspace(bytes: &[u8]) -> bool { !bytes.is_empty() && bytes.iter().all(|&b| is_py_whitespace(b)) } /// Implements Python's `bytes.islower()` method. /// /// Returns True if all cased bytes are lowercase and there is at least one cased byte. fn bytes_islower(bytes: &[u8]) -> bool { let mut has_cased = false; for &b in bytes { if b.is_ascii_uppercase() { return false; } if b.is_ascii_lowercase() { has_cased = true; } } has_cased } /// Implements Python's `bytes.isupper()` method. /// /// Returns True if all cased bytes are uppercase and there is at least one cased byte. fn bytes_isupper(bytes: &[u8]) -> bool { let mut has_cased = false; for &b in bytes { if b.is_ascii_lowercase() { return false; } if b.is_ascii_uppercase() { has_cased = true; } } has_cased } /// Implements Python's `bytes.istitle()` method. /// /// Returns True if the bytes are titlecased: uppercase characters follow /// uncased characters and lowercase characters follow cased characters. fn bytes_istitle(bytes: &[u8]) -> bool { if bytes.is_empty() { return false; } let mut prev_cased = false; let mut has_cased = false; for &b in bytes { if b.is_ascii_uppercase() { if prev_cased { return false; } prev_cased = true; has_cased = true; } else if b.is_ascii_lowercase() { if !prev_cased { return false; } prev_cased = true; has_cased = true; } else { prev_cased = false; } } has_cased } // ============================================================================= // Search methods // ============================================================================= /// Implements Python's `bytes.rfind(sub[, start[, end]])` method. /// /// Returns the highest index where the subsequence is found, or -1 if not found. fn bytes_rfind(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_bytes_sub_args("bytes.rfind", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = if sub.is_empty() { // Empty subsequence: always found at end position Some(slice.len()) } else { rfind_subsequence(slice, &sub) }; let idx = match result { Some(i) => i64::try_from(start + i).expect("index exceeds i64::MAX"), None => -1, }; Ok(Value::Int(idx)) } /// Finds the last occurrence of needle in haystack. fn rfind_subsequence(haystack: &[u8], needle: &[u8]) -> Option { if needle.len() > haystack.len() { return None; } haystack.windows(needle.len()).rposition(|window| window == needle) } /// Implements Python's `bytes.rindex(sub[, start[, end]])` method. /// /// Like rfind(), but raises ValueError if the subsequence is not found. fn bytes_rindex(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_bytes_sub_args("bytes.rindex", bytes.len(), args, vm)?; let slice = &bytes[start..end]; let result = if sub.is_empty() { Some(slice.len()) } else { rfind_subsequence(slice, &sub) }; match result { Some(i) => { let idx = i64::try_from(start + i).expect("index exceeds i64::MAX"); Ok(Value::Int(idx)) } None => Err(ExcType::value_error_subsequence_not_found()), } } // ============================================================================= // Strip/trim methods // ============================================================================= /// Implements Python's `bytes.strip([chars])` method. /// /// Returns a copy of the bytes with leading and trailing bytes removed. /// If chars is not specified, ASCII whitespace bytes are removed. fn bytes_strip(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_zero_one_arg("bytes.strip", vm.heap)?; defer_drop!(value, vm); let result = match value { None | Some(Value::None) => bytes_strip_whitespace_both(bytes), Some(v) => bytes_strip_both(bytes, extract_bytes_only(v, vm.heap, vm.interns)?), }; allocate_bytes(result.to_vec(), vm.heap) } /// Implements Python's `bytes.lstrip([chars])` method. /// /// Returns a copy of the bytes with leading bytes removed. fn bytes_lstrip(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_zero_one_arg("bytes.lstrip", vm.heap)?; defer_drop!(value, vm); let result = match value { None | Some(Value::None) => bytes_strip_whitespace_start(bytes), Some(v) => bytes_strip_start(bytes, extract_bytes_only(v, vm.heap, vm.interns)?), }; allocate_bytes(result.to_vec(), vm.heap) } /// Implements Python's `bytes.rstrip([chars])` method. /// /// Returns a copy of the bytes with trailing bytes removed. fn bytes_rstrip(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_zero_one_arg("bytes.rstrip", vm.heap)?; defer_drop!(value, vm); let result = match value { None | Some(Value::None) => bytes_strip_whitespace_end(bytes), Some(v) => bytes_strip_end(bytes, extract_bytes_only(v, vm.heap, vm.interns)?), }; allocate_bytes(result.to_vec(), vm.heap) } /// Strips bytes in `chars` from both ends of the byte slice. fn bytes_strip_both<'a>(bytes: &'a [u8], chars: &[u8]) -> &'a [u8] { let start = bytes.iter().position(|b| !chars.contains(b)).unwrap_or(bytes.len()); let end = bytes .iter() .rposition(|b| !chars.contains(b)) .map_or(start, |pos| pos + 1); &bytes[start..end] } /// Strips bytes in `chars` from the start of the byte slice. fn bytes_strip_start<'a>(bytes: &'a [u8], chars: &[u8]) -> &'a [u8] { let start = bytes.iter().position(|b| !chars.contains(b)).unwrap_or(bytes.len()); &bytes[start..] } /// Strips bytes in `chars` from the end of the byte slice. fn bytes_strip_end<'a>(bytes: &'a [u8], chars: &[u8]) -> &'a [u8] { let end = bytes.iter().rposition(|b| !chars.contains(b)).map_or(0, |pos| pos + 1); &bytes[..end] } /// Strips ASCII whitespace from both ends of the byte slice. fn bytes_strip_whitespace_both(bytes: &[u8]) -> &[u8] { let start = bytes.iter().position(|b| !is_py_whitespace(*b)).unwrap_or(bytes.len()); let end = bytes .iter() .rposition(|b| !is_py_whitespace(*b)) .map_or(start, |pos| pos + 1); &bytes[start..end] } /// Strips ASCII whitespace from the start of the byte slice. fn bytes_strip_whitespace_start(bytes: &[u8]) -> &[u8] { let start = bytes.iter().position(|b| !is_py_whitespace(*b)).unwrap_or(bytes.len()); &bytes[start..] } /// Strips ASCII whitespace from the end of the byte slice. fn bytes_strip_whitespace_end(bytes: &[u8]) -> &[u8] { let end = bytes .iter() .rposition(|b| !is_py_whitespace(*b)) .map_or(0, |pos| pos + 1); &bytes[..end] } /// Implements Python's `bytes.removeprefix(prefix)` method. /// /// If the bytes start with the prefix, return bytes[len(prefix):]. /// Otherwise, return a copy of the original bytes. fn bytes_removeprefix(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let prefix_value = args.get_one_arg("bytes.removeprefix", vm.heap)?; defer_drop!(prefix_value, vm); let prefix = extract_bytes_only(prefix_value, vm.heap, vm.interns)?; let result = if bytes.starts_with(prefix) { bytes[prefix.len()..].to_vec() } else { bytes.to_vec() }; allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.removesuffix(suffix)` method. /// /// If the bytes end with the suffix, return bytes[:-len(suffix)]. /// Otherwise, return a copy of the original bytes. fn bytes_removesuffix(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let suffix_value = args.get_one_arg("bytes.removesuffix", vm.heap)?; defer_drop!(suffix_value, vm); let suffix = extract_bytes_only(suffix_value, vm.heap, vm.interns)?; let result = if bytes.ends_with(suffix) && !suffix.is_empty() { bytes[..bytes.len() - suffix.len()].to_vec() } else { bytes.to_vec() }; allocate_bytes(result, vm.heap) } // ============================================================================= // Split methods // ============================================================================= /// Implements Python's `bytes.split([sep[, maxsplit]])` method. /// /// Returns a list of the bytes split by the separator. fn bytes_split(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sep, maxsplit) = parse_bytes_split_args("bytes.split", args, vm)?; let parts: Vec<&[u8]> = match &sep { Some(sep) => { if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } if maxsplit < 0 { bytes_split_by_seq(bytes, sep) } else { let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); bytes_splitn_by_seq(bytes, sep, max + 1) } } None => { if maxsplit < 0 { bytes_split_whitespace(bytes) } else { let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); bytes_splitn_whitespace(bytes, max) } } }; let mut list_items = Vec::with_capacity(parts.len()); for part in parts { vm.heap.check_time()?; list_items.push(allocate_bytes(part.to_vec(), vm.heap)?); } let list = List::new(list_items); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Implements Python's `bytes.rsplit([sep[, maxsplit]])` method. /// /// Returns a list of the bytes split by the separator, splitting from the right. fn bytes_rsplit(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sep, maxsplit) = parse_bytes_split_args("bytes.rsplit", args, vm)?; let parts: Vec<&[u8]> = match &sep { Some(sep) => { if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } if maxsplit < 0 { bytes_split_by_seq(bytes, sep) } else { let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); bytes_rsplitn_by_seq(bytes, sep, max + 1) } } None => { if maxsplit < 0 { bytes_split_whitespace(bytes) } else { let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); bytes_rsplitn_whitespace(bytes, max) } } }; let mut list_items = Vec::with_capacity(parts.len()); for part in parts { vm.heap.check_time()?; list_items.push(allocate_bytes(part.to_vec(), vm.heap)?); } let list = List::new(list_items); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Parses arguments for bytes split methods. fn parse_bytes_split_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Option>, i64)> { let (pos_iter, kwargs) = args.into_parts(); defer_drop_mut!(pos_iter, vm); let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); let sep_value = pos_iter.next(); defer_drop_mut!(sep_value, vm); let maxsplit_value = pos_iter.next(); defer_drop_mut!(maxsplit_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most(method, 2, 3)); } // Process keyword arguments for (key, value) in kwargs_iter { defer_drop!(key, vm); let mut value_guard = HeapGuard::new(value, vm); let Some(keyword_name) = key.as_either_str(value_guard.heap().heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(value_guard.heap().interns); match key_str { "sep" => { if let Some(previous_value) = sep_value.replace(value_guard.into_inner()) { previous_value.drop_with_heap(vm); return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'sep'" ))); } } "maxsplit" => { if let Some(previous_value) = maxsplit_value.replace(value_guard.into_inner()) { previous_value.drop_with_heap(vm); return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'maxsplit'" ))); } } _ => { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for {method}()" ))); } } } // Extract sep (default None) let sep = if let Some(v) = sep_value { if matches!(v, Value::None) { None } else { Some(extract_bytes_only(v, vm.heap, vm.interns)?.to_owned()) } } else { None }; // Extract maxsplit (default -1) let maxsplit = if let Some(v) = maxsplit_value { v.as_int(vm.heap)? } else { -1 }; Ok((sep, maxsplit)) } /// Splits bytes by a separator sequence. fn bytes_split_by_seq<'a>(bytes: &'a [u8], sep: &[u8]) -> Vec<&'a [u8]> { let mut parts = Vec::new(); let mut start = 0; while let Some(pos) = find_subsequence(&bytes[start..], sep) { parts.push(&bytes[start..start + pos]); start = start + pos + sep.len(); } parts.push(&bytes[start..]); parts } /// Splits bytes by a separator sequence, returning at most n parts. fn bytes_splitn_by_seq<'a>(bytes: &'a [u8], sep: &[u8], n: usize) -> Vec<&'a [u8]> { let mut parts = Vec::new(); let mut start = 0; let mut count = 0; while count + 1 < n { if let Some(pos) = find_subsequence(&bytes[start..], sep) { parts.push(&bytes[start..start + pos]); start = start + pos + sep.len(); count += 1; } else { break; } } parts.push(&bytes[start..]); parts } /// Splits bytes by a separator sequence from the right, returning at most n parts. fn bytes_rsplitn_by_seq<'a>(bytes: &'a [u8], sep: &[u8], n: usize) -> Vec<&'a [u8]> { let mut parts = Vec::new(); let mut end = bytes.len(); let mut count = 0; while count + 1 < n { if let Some(pos) = rfind_subsequence(&bytes[..end], sep) { parts.push(&bytes[pos + sep.len()..end]); end = pos; count += 1; } else { break; } } parts.push(&bytes[..end]); parts.reverse(); parts } /// Splits bytes by ASCII whitespace, filtering empty parts. fn bytes_split_whitespace(bytes: &[u8]) -> Vec<&[u8]> { let mut parts = Vec::new(); let mut start = None; for (i, &b) in bytes.iter().enumerate() { if is_py_whitespace(b) { if let Some(s) = start { parts.push(&bytes[s..i]); start = None; } } else if start.is_none() { start = Some(i); } } if let Some(s) = start { parts.push(&bytes[s..]); } parts } /// Splits bytes by ASCII whitespace, returning at most maxsplit+1 parts. fn bytes_splitn_whitespace(bytes: &[u8], maxsplit: usize) -> Vec<&[u8]> { let mut parts = Vec::new(); let mut start = None; let mut count = 0; let trimmed = bytes_strip_whitespace_start(bytes); let offset = bytes.len() - trimmed.len(); for (i, &b) in trimmed.iter().enumerate() { if is_py_whitespace(b) { if let Some(s) = start && count < maxsplit { parts.push(&bytes[offset + s..offset + i]); count += 1; start = None; } } else if start.is_none() { start = Some(i); } } if let Some(s) = start { parts.push(&bytes[offset + s..]); } parts } /// Splits bytes by ASCII whitespace from the right, returning at most maxsplit+1 parts. fn bytes_rsplitn_whitespace(bytes: &[u8], maxsplit: usize) -> Vec<&[u8]> { let mut parts = Vec::new(); let mut end = None; let mut count = 0; let trimmed = bytes_strip_whitespace_end(bytes); for i in (0..trimmed.len()).rev() { let b = trimmed[i]; if is_py_whitespace(b) { if let Some(e) = end && count < maxsplit { parts.push(&trimmed[i + 1..e]); count += 1; end = None; } } else if end.is_none() { end = Some(i + 1); } } if let Some(e) = end { parts.push(&trimmed[..e]); } parts.reverse(); parts } /// Implements Python's `bytes.splitlines([keepends])` method. /// /// Returns a list of the lines in the bytes, breaking at line boundaries. fn bytes_splitlines(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let keepends = parse_bytes_splitlines_args(args, vm)?; let mut lines = Vec::new(); let mut start = 0; let len = bytes.len(); while start < len { vm.heap.check_time()?; let mut end = start; let mut line_end = start; while end < len { match bytes[end] { b'\n' => { line_end = end; end += 1; break; } b'\r' => { line_end = end; end += 1; if end < len && bytes[end] == b'\n' { end += 1; } break; } _ => { end += 1; line_end = end; } } } let line = if keepends { &bytes[start..end] } else { &bytes[start..line_end] }; lines.push(allocate_bytes(line.to_vec(), vm.heap)?); start = end; } let list = List::new(lines); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Parses arguments for bytes.splitlines method. fn parse_bytes_splitlines_args(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (pos_iter, kwargs) = args.into_parts(); defer_drop_mut!(pos_iter, vm); let kwargs = kwargs.into_iter(); defer_drop_mut!(kwargs, vm); let keepends_value = pos_iter.next(); defer_drop_mut!(keepends_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most("bytes.splitlines", 1, 2)); } // Process kwargs for (key, value) in kwargs { defer_drop!(key, vm); let mut value_guard = HeapGuard::new(value, vm); let Some(keyword_name) = key.as_either_str(value_guard.heap().heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(value_guard.heap().interns); if key_str == "keepends" { if let Some(previous_value) = keepends_value.replace(value_guard.into_inner()) { previous_value.drop_with_heap(vm); return Err(ExcType::type_error( "bytes.splitlines() got multiple values for argument 'keepends'", )); } } else { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for bytes.splitlines()" ))); } } // Extract keepends (default false) let keepends = if let Some(v) = keepends_value { v.py_bool(vm) } else { false }; Ok(keepends) } /// Implements Python's `bytes.partition(sep)` method. /// /// Splits the bytes at the first occurrence of sep, and returns a 3-tuple. fn bytes_partition(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let sep_value = args.get_one_arg("bytes.partition", vm.heap)?; defer_drop!(sep_value, vm); let sep = extract_bytes_only(sep_value, vm.heap, vm.interns)?; if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } let (before, sep_found, after) = match find_subsequence(bytes, sep) { Some(pos) => (bytes[..pos].to_vec(), sep.to_vec(), bytes[pos + sep.len()..].to_vec()), None => (bytes.to_vec(), Vec::new(), Vec::new()), }; let before_val = allocate_bytes(before, vm.heap)?; let sep_val = allocate_bytes(sep_found, vm.heap)?; let after_val = allocate_bytes(after, vm.heap)?; Ok(crate::types::allocate_tuple( smallvec![before_val, sep_val, after_val], vm.heap, )?) } /// Implements Python's `bytes.rpartition(sep)` method. /// /// Splits the bytes at the last occurrence of sep, and returns a 3-tuple. fn bytes_rpartition(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let sep_value = args.get_one_arg("bytes.rpartition", vm.heap)?; defer_drop!(sep_value, vm); let sep = extract_bytes_only(sep_value, vm.heap, vm.interns)?; if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } let (before, sep_found, after) = match rfind_subsequence(bytes, sep) { Some(pos) => (bytes[..pos].to_vec(), sep.to_vec(), bytes[pos + sep.len()..].to_vec()), None => (Vec::new(), Vec::new(), bytes.to_vec()), }; let before_val = allocate_bytes(before, vm.heap)?; let sep_val = allocate_bytes(sep_found, vm.heap)?; let after_val = allocate_bytes(after, vm.heap)?; Ok(crate::types::allocate_tuple( smallvec![before_val, sep_val, after_val], vm.heap, )?) } // ============================================================================= // Replace/padding methods // ============================================================================= /// Implements Python's `bytes.replace(old, new[, count])` method. /// /// Returns a copy with all occurrences of old replaced by new. fn bytes_replace(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (old, new, count) = parse_bytes_replace_args("bytes.replace", args, vm)?; check_replace_size(bytes.len(), old.len(), new.len(), count, vm.heap.tracker())?; let result = if count < 0 { bytes_replace_all(bytes, &old, &new, vm)? } else { let n = usize::try_from(count).unwrap_or(usize::MAX); bytes_replace_n(bytes, &old, &new, n, vm)? }; allocate_bytes(result, vm.heap) } /// Parses arguments for bytes.replace method. fn parse_bytes_replace_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Vec, Vec, i64)> { let (pos_iter, kwargs) = args.into_parts(); defer_drop_mut!(pos_iter, vm); let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); let Some(old_value) = pos_iter.next() else { return Err(ExcType::type_error_at_least(method, 2, 0)); }; defer_drop!(old_value, vm); let Some(new_value) = pos_iter.next() else { return Err(ExcType::type_error_at_least(method, 2, 1)); }; defer_drop!(new_value, vm); let count_value = pos_iter.next(); defer_drop_mut!(count_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most(method, 3, pos_iter.len() + 3)); } // Process keyword arguments for (key, value) in kwargs_iter { defer_drop!(key, vm); let mut value_guard = HeapGuard::new(value, vm); let Some(keyword_name) = key.as_either_str(value_guard.heap().heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(value_guard.heap().interns); match key_str { "count" => { if let Some(previous_value) = count_value.replace(value_guard.into_inner()) { previous_value.drop_with_heap(vm); return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'count'" ))); } } _ => { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for {method}()" ))); } } } // Extract old bytes let old = extract_bytes_only(old_value, vm.heap, vm.interns)?.to_owned(); // Extract new bytes let new = extract_bytes_only(new_value, vm.heap, vm.interns)?.to_owned(); // Extract count (default -1) let count = if let Some(v) = count_value { v.as_int(vm.heap)? } else { -1 }; Ok((old, new, count)) } /// Replaces all occurrences of `old` with `new` in bytes. /// /// Checks the time limit periodically to enforce `max_duration` during /// potentially long replacement operations on large byte sequences. fn bytes_replace_all( bytes: &[u8], old: &[u8], new: &[u8], vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { if old.is_empty() { // Empty pattern: insert new before each byte and at the end let mut result = Vec::with_capacity(bytes.len() + new.len() * (bytes.len() + 1)); for &b in bytes { vm.heap.check_time()?; result.extend_from_slice(new); result.push(b); } result.extend_from_slice(new); Ok(result) } else { let mut result = Vec::new(); let mut start = 0; while let Some(pos) = find_subsequence(&bytes[start..], old) { vm.heap.check_time()?; result.extend_from_slice(&bytes[start..start + pos]); result.extend_from_slice(new); start = start + pos + old.len(); } result.extend_from_slice(&bytes[start..]); Ok(result) } } /// Replaces at most n occurrences of `old` with `new` in bytes. /// /// Checks the time limit periodically to enforce `max_duration` during /// potentially long replacement operations on large byte sequences. fn bytes_replace_n( bytes: &[u8], old: &[u8], new: &[u8], n: usize, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { if old.is_empty() { // Empty pattern: insert new before each byte (up to n times) let mut result = Vec::new(); let mut count = 0; for &b in bytes { vm.heap.check_time()?; if count < n { result.extend_from_slice(new); count += 1; } result.push(b); } if count < n { result.extend_from_slice(new); } Ok(result) } else { let mut result = Vec::new(); let mut start = 0; let mut count = 0; while count < n { vm.heap.check_time()?; if let Some(pos) = find_subsequence(&bytes[start..], old) { result.extend_from_slice(&bytes[start..start + pos]); result.extend_from_slice(new); start = start + pos + old.len(); count += 1; } else { break; } } result.extend_from_slice(&bytes[start..]); Ok(result) } } /// Implements Python's `bytes.center(width[, fillbyte])` method. /// /// Returns centered in a bytes of length width. fn bytes_center(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillbyte) = parse_bytes_justify_args("bytes.center", args, vm)?; let len = bytes.len(); let result = if width <= len { bytes.to_vec() } else { check_repeat_size(width, 1, vm.heap.tracker())?; let total_pad = width - len; let left_pad = total_pad / 2; let right_pad = total_pad - left_pad; let mut result = Vec::with_capacity(width); for _ in 0..left_pad { result.push(fillbyte); } result.extend_from_slice(bytes); for _ in 0..right_pad { result.push(fillbyte); } result }; allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.ljust(width[, fillbyte])` method. /// /// Returns left-justified in a bytes of length width. fn bytes_ljust(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillbyte) = parse_bytes_justify_args("bytes.ljust", args, vm)?; let len = bytes.len(); let result = if width <= len { bytes.to_vec() } else { check_repeat_size(width, 1, vm.heap.tracker())?; let pad = width - len; let mut result = Vec::with_capacity(width); result.extend_from_slice(bytes); for _ in 0..pad { result.push(fillbyte); } result }; allocate_bytes(result, vm.heap) } /// Implements Python's `bytes.rjust(width[, fillbyte])` method. /// /// Returns right-justified in a bytes of length width. fn bytes_rjust(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillbyte) = parse_bytes_justify_args("bytes.rjust", args, vm)?; let len = bytes.len(); let result = if width <= len { bytes.to_vec() } else { check_repeat_size(width, 1, vm.heap.tracker())?; let pad = width - len; let mut result = Vec::with_capacity(width); for _ in 0..pad { result.push(fillbyte); } result.extend_from_slice(bytes); result }; allocate_bytes(result, vm.heap) } /// Parses arguments for bytes justify methods (center, ljust, rjust). fn parse_bytes_justify_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(usize, u8)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); let extract_width = |v: &Value| -> RunResult { let w = v.as_int(vm.heap)?; Ok(if w < 0 { 0 } else { usize::try_from(w).unwrap_or(usize::MAX) }) }; let extract_fill = |v: &Value| -> RunResult { let fill_bytes = extract_bytes_only(v, vm.heap, vm.interns)?; if fill_bytes.len() != 1 { return Err(ExcType::type_error(format!( "{method}() argument 2 must be a byte string of length 1, not bytes of length {}", fill_bytes.len() ))); } Ok(fill_bytes[0]) }; match pos.as_slice() { [width_value] => Ok((extract_width(width_value)?, b' ')), [width_value, fillbyte_value] => Ok((extract_width(width_value)?, extract_fill(fillbyte_value)?)), [] => Err(ExcType::type_error_at_least(method, 1, 0)), _ => Err(ExcType::type_error_at_most(method, 2, pos.len())), } } /// Implements Python's `bytes.zfill(width)` method. /// /// Returns a copy of the bytes left filled with ASCII '0' digits. fn bytes_zfill(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let width_value = args.get_one_arg("bytes.zfill", vm.heap)?; defer_drop!(width_value, vm); let width_i64 = width_value.as_int(vm.heap)?; let width = if width_i64 < 0 { 0 } else { usize::try_from(width_i64).unwrap_or(usize::MAX) }; let len = bytes.len(); let result = if width <= len { bytes.to_vec() } else { check_repeat_size(width, 1, vm.heap.tracker())?; let pad = width - len; let mut result = Vec::with_capacity(width); // Handle sign prefix if !bytes.is_empty() && (bytes[0] == b'+' || bytes[0] == b'-') { result.push(bytes[0]); result.resize(pad + 1, b'0'); result.extend_from_slice(&bytes[1..]); } else { result.resize(pad, b'0'); result.extend_from_slice(bytes); } result }; allocate_bytes(result, vm.heap) } // ============================================================================= // Join method // ============================================================================= /// Implements Python's `bytes.join(iterable)` method. /// /// Joins elements of the iterable with the separator bytes. fn bytes_join(separator: &[u8], iterable: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let Ok(iter) = MontyIter::new(iterable, vm) else { return Err(ExcType::type_error_join_not_iterable()); }; defer_drop_mut!(iter, vm); let mut result = Vec::new(); let mut index = 0usize; while let Some(item) = iter.for_next(vm)? { defer_drop!(item, vm); if index > 0 { result.extend_from_slice(separator); } // Check item is bytes and extract its content match item { Value::InternBytes(id) => { result.extend_from_slice(vm.interns.get_bytes(*id)); } Value::Ref(heap_id) => { if let HeapData::Bytes(b) = vm.heap.get(*heap_id) { result.extend_from_slice(b.as_slice()); } else { let t = item.py_type(vm.heap); return Err(ExcType::type_error(format!( "sequence item {index}: expected a bytes-like object, {t} found" ))); } } _ => { let t = item.py_type(vm.heap); return Err(ExcType::type_error(format!( "sequence item {index}: expected a bytes-like object, {t} found" ))); } } index += 1; } allocate_bytes(result, vm.heap) } // ============================================================================= // Hex method // ============================================================================= /// Implements Python's `bytes.hex([sep[, bytes_per_sep]])` method. /// /// Returns a string containing the hexadecimal representation of the bytes. fn bytes_hex(bytes: &[u8], args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sep, bytes_per_sep) = parse_bytes_hex_args(args, vm)?; let hex_chars: Vec = bytes .iter() .flat_map(|b| { let hi = (b >> 4) & 0xf; let lo = b & 0xf; let hi_char = if hi < 10 { (b'0' + hi) as char } else { (b'a' + hi - 10) as char }; let lo_char = if lo < 10 { (b'0' + lo) as char } else { (b'a' + lo - 10) as char }; [hi_char, lo_char] }) .collect(); let result = if let Some(sep) = sep { if bytes_per_sep == 0 || bytes.is_empty() { hex_chars.iter().collect() } else { // Insert separator every `bytes_per_sep` bytes (2*bytes_per_sep hex chars) let chars_per_group = usize::try_from(bytes_per_sep.unsigned_abs()).unwrap_or(usize::MAX) * 2; let mut result = String::new(); if bytes_per_sep > 0 { // Positive: count from right, so partial group is at the START let total_len = hex_chars.len(); let first_chunk_len = total_len % chars_per_group; let first_chunk_len = if first_chunk_len == 0 { chars_per_group } else { first_chunk_len }; result.extend(&hex_chars[..first_chunk_len]); for chunk in hex_chars[first_chunk_len..].chunks(chars_per_group) { result.push(sep); result.extend(chunk); } } else { // Negative: count from left, so partial group is at the END for (i, chunk) in hex_chars.chunks(chars_per_group).enumerate() { if i > 0 { result.push(sep); } result.extend(chunk); } } result } } else { hex_chars.iter().collect() }; crate::types::str::allocate_string(result, vm.heap) } /// Parses arguments for bytes.hex method. fn parse_bytes_hex_args(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<(Option, i64)> { let pos = args.into_pos_only("bytes.hex", vm.heap)?; defer_drop!(pos, vm); let (sep_value, bps_value) = match pos.as_slice() { [] => return Ok((None, 1)), [sep_value] => (sep_value, None), [sep_value, bps_value] => (sep_value, Some(bps_value)), other => return Err(ExcType::type_error_at_most("bytes.hex", 2, other.len())), }; let sep_bytes = match sep_value { Value::InternString(id) => vm.interns.get_str(*id).as_bytes(), Value::InternBytes(id) => vm.interns.get_bytes(*id), Value::Ref(heap_id) => match vm.heap.get(*heap_id) { HeapData::Str(s) => s.as_bytes(), HeapData::Bytes(b) => b.as_slice(), _ => return Err(ExcType::type_error("sep must be str or bytes")), }, _ => return Err(ExcType::type_error("sep must be str or bytes")), }; let sep = match sep_bytes { [b] if b.is_ascii() => *b as char, _ => return Err(SimpleException::new_msg(ExcType::ValueError, "sep must be a single ASCII character").into()), }; let bytes_per_sep = if let Some(bps_value) = bps_value { bps_value.as_int(vm.heap)? } else { 1 }; Ok((Some(sep), bytes_per_sep)) } // ============================================================================= // fromhex classmethod // ============================================================================= /// Implements Python's `bytes.fromhex(string)` classmethod. /// /// Creates bytes from a hexadecimal string. Whitespace is allowed between byte pairs, /// but not between the two digits of a byte. pub fn bytes_fromhex(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let hex_value = args.get_one_arg("bytes.fromhex", vm.heap)?; defer_drop!(hex_value, vm); let hex_str = match hex_value { Value::InternString(id) => vm.interns.get_str(*id), Value::Ref(heap_id) => { if let HeapData::Str(s) = vm.heap.get(*heap_id) { s.as_str() } else { return Err(ExcType::type_error("fromhex() argument must be str, not bytes")); } } _ => { let t = hex_value.py_type(vm.heap); return Err(ExcType::type_error(format!("fromhex() argument must be str, not {t}"))); } }; // CPython allows whitespace BETWEEN byte pairs, but NOT within a pair. // - "de ad" is valid (whitespace between pairs) // - "d e" or "0 1" are NOT valid (whitespace within a pair) // - " 01 " is valid (whitespace before/after) // // Error messages: // - Invalid char (including whitespace in wrong place): "non-hexadecimal number found ... at position X" // - Odd number of valid hex digits: "must contain an even number of hexadecimal digits" let mut result = Vec::new(); let mut chars = hex_str.chars().enumerate().peekable(); loop { // Skip whitespace BETWEEN byte pairs (before the high nibble) while chars.peek().is_some_and(|(_, c)| c.is_whitespace()) { chars.next(); } // Get high nibble let Some((hi_pos, hi_char)) = chars.next() else { break; // End of string - we're done }; let Some(hi_val) = hex_char_to_value(hi_char) else { return Err(SimpleException::new_msg( ExcType::ValueError, format!("non-hexadecimal number found in fromhex() arg at position {hi_pos}"), ) .into()); }; // Get low nibble - must be IMMEDIATELY after high nibble (no whitespace) let Some((lo_pos, lo_char)) = chars.next() else { // End of string after high nibble = odd number of hex digits return Err(SimpleException::new_msg( ExcType::ValueError, "fromhex() arg must contain an even number of hexadecimal digits", ) .into()); }; let Some(lo_val) = hex_char_to_value(lo_char) else { // Invalid character (including whitespace) in low nibble position return Err(SimpleException::new_msg( ExcType::ValueError, format!("non-hexadecimal number found in fromhex() arg at position {lo_pos}"), ) .into()); }; result.push((hi_val << 4) | lo_val); } allocate_bytes(result, vm.heap) } /// Converts a hex character to its numeric value. fn hex_char_to_value(c: char) -> Option { match c { '0'..='9' => Some(c as u8 - b'0'), 'a'..='f' => Some(c as u8 - b'a' + 10), 'A'..='F' => Some(c as u8 - b'A' + 10), _ => None, } } // ============================================================================= // Helper function for bytes allocation // ============================================================================= /// Allocates bytes on the heap. fn allocate_bytes(bytes: Vec, heap: &mut Heap) -> RunResult { let heap_id = heap.allocate(HeapData::Bytes(Bytes::new(bytes)))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/types/dataclass.rs ================================================ use std::fmt::Write; use ahash::AHashSet; use super::{Dict, PyTrait}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapId, HeapItem}, intern::Interns, resource::{ResourceError, ResourceTracker}, types::Type, value::{EitherStr, Value}, }; /// Python dataclass instance type. /// /// Represents an instance of a dataclass with a class name, field values, and /// frozen/mutable semantics. Method calls on dataclasses are detected lazily: /// when `call_attr` is invoked on a dataclass and the attribute name is not found /// in `attrs`, it is dispatched as a `MethodCall` to the host (provided the name /// is public — no leading underscore). /// /// # Fields /// - `name`: The class name (e.g., "Point", "User") /// - `field_names`: Declared field names in definition order (used for repr) /// - `attrs`: All attributes including declared fields and dynamically added ones /// - `frozen`: Whether the dataclass instance is immutable /// /// # Hashability /// When `frozen` is true, the dataclass is immutable and hashable. The hash /// is computed from the class name and declared field values only. /// When `frozen` is false, the dataclass is mutable and unhashable. /// /// # Reference Counting /// The `attrs` Dict contains Values that may be heap-allocated. The /// `py_dec_ref_ids` method properly handles decrementing refcounts for /// all attribute values when the dataclass instance is freed. /// /// # Attribute Access /// - Getting: Looks up the attribute name in the attrs Dict /// - Setting: Updates or adds the attribute in attrs (only if not frozen) /// - Method calls: If the attribute is a public name not found in attrs, dispatched to host /// - repr: Only shows declared fields (from field_names), not extra attributes #[derive(Debug)] pub(crate) struct Dataclass { /// The class name (e.g., "Point", "User") name: EitherStr, /// Identifier of the type, from `id(type(dc))` in python. type_id: u64, /// Declared field names in definition order (for repr and hashing) field_names: Vec, /// All attributes (both declared fields and dynamically added) attrs: Dict, /// Whether this dataclass instance is immutable (affects hashability) frozen: bool, } impl Dataclass { /// Creates a new dataclass instance. /// /// # Arguments /// * `name` - The class name /// * `type_id` - The type ID of the dataclass /// * `field_names` - Declared field names in definition order /// * `attrs` - Dict of attribute name -> value pairs (ownership transferred) /// * `frozen` - Whether this dataclass instance is immutable (affects hashability) #[must_use] pub fn new(name: impl Into, type_id: u64, field_names: Vec, attrs: Dict, frozen: bool) -> Self { Self { name: name.into(), type_id, field_names, attrs, frozen, } } /// Returns the class name. #[must_use] pub fn name<'a>(&'a self, interns: &'a Interns) -> &'a str { self.name.as_str(interns) } /// Returns the type ID of the dataclass. #[must_use] pub fn type_id(&self) -> u64 { self.type_id } /// Returns a reference to the declared field names. #[must_use] pub fn field_names(&self) -> &[String] { &self.field_names } /// Returns whether this dataclass contains any heap references (`Value::Ref`). /// /// Delegates to the underlying attrs Dict. #[inline] #[must_use] pub fn has_refs(&self) -> bool { self.attrs.has_refs() } /// Returns a reference to the attrs Dict. #[must_use] pub fn attrs(&self) -> &Dict { &self.attrs } /// Returns whether this dataclass instance is frozen (immutable). #[must_use] pub fn is_frozen(&self) -> bool { self.frozen } /// Sets an attribute value. /// /// The caller transfers ownership of both `name` and `value`. Returns the /// old value if the attribute existed (caller must drop it), or None if this /// is a new attribute. /// /// Returns `FrozenInstanceError` if the dataclass is frozen. pub fn set_attr( &mut self, name: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { if self.frozen { // Get attribute name for error message let attr_name = match &name { Value::InternString(id) => vm.interns.get_str(*id).to_string(), _ => "".to_string(), }; // Drop the values we were given ownership of name.drop_with_heap(vm); value.drop_with_heap(vm); return Err(ExcType::frozen_instance_error(&attr_name)); } self.attrs.set(name, value, vm) } /// Computes the hash for this dataclass if it's frozen. /// /// Returns `Ok(Some(hash))` for frozen (immutable) dataclasses, `Ok(None)` for mutable ones. /// Returns `Err(ResourceError::Recursion)` if the recursion limit is exceeded. /// The hash is computed from the class name and declared field values only. pub fn compute_hash( &self, heap: &mut Heap, interns: &Interns, ) -> Result, ResourceError> { use std::{ collections::hash_map::DefaultHasher, hash::{Hash, Hasher}, }; // Only frozen (immutable) dataclasses are hashable if !self.frozen { return Ok(None); } let token = heap.incr_recursion_depth()?; defer_drop!(token, heap); let mut hasher = DefaultHasher::new(); // Hash the class name self.name.hash(&mut hasher); // Hash each declared field (name, value) pair in order for field_name in &self.field_names { field_name.hash(&mut hasher); if let Some(value) = self.attrs.get_by_str(field_name, heap, interns) { match value.py_hash(heap, interns)? { Some(h) => h.hash(&mut hasher), None => return Ok(None), } } } Ok(Some(hasher.finish())) } } impl PyTrait for Dataclass { fn py_type(&self, _heap: &Heap) -> Type { Type::Dataclass } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { // Dataclasses don't have a length None } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { // Dataclasses are equal if they have the same name and equal attrs Ok(self.name == other.name && self.attrs.py_eq(&other.attrs, vm)?) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { // Dataclass instances are always truthy (like Python objects) true } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { // Check depth limit before recursing let heap = &*vm.heap; let Some(token) = heap.incr_recursion_depth_for_repr() else { return f.write_str("..."); }; crate::defer_drop_immutable_heap!(token, heap); // Format: ClassName(field1=value1, field2=value2, ...) // Only declared fields are shown, not dynamically added attributes f.write_str(self.name(vm.interns))?; f.write_char('(')?; let mut first = true; for field_name in &self.field_names { if !first { f.write_str(", ")?; } first = false; // Write field name f.write_str(field_name)?; f.write_char('=')?; // Look up value in attrs if let Some(value) = self.attrs.get_by_str(field_name, heap, vm.interns) { value.py_repr_fmt(f, vm, heap_ids)?; } else { // Field not found - shouldn't happen for well-formed dataclasses f.write_str("")?; } } f.write_char(')')?; Ok(()) } /// Performs lazy method detection for dataclass instances. /// /// If the attribute is a public name (no leading underscore) not found in the /// dataclass's attrs dict, returns `MethodCall` so the VM yields to the host. /// Otherwise handles the call directly: /// - Attributes that exist in attrs but aren't callable produce `TypeError` /// - Private/dunder attributes that aren't in attrs produce `AttributeError` fn py_call_attr( &mut self, self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let heap = &mut *vm.heap; let interns = vm.interns; let attr_str = attr.as_str(interns); // Only public methods (no underscore prefix = no dunders, no private) if !attr_str.starts_with('_') && self.attrs.get_by_str(attr_str, heap, interns).is_none() { // Clone self and prepend to args for the method call // inc_ref works even when data is taken out (refcount metadata is separate) heap.inc_ref(self_id); let self_arg = Value::Ref(self_id); let args_with_self = args.prepend(self_arg); Ok(CallResult::MethodCall(attr.clone(), args_with_self)) } else { // Not a method call — handle directly let method_name = attr.as_str(interns); defer_drop!(args, heap); // If the attribute exists in attrs, it's a data value (not callable) if let Some(value) = self.attrs.get_by_str(method_name, heap, interns) { let type_name = value.py_type(heap); Err(ExcType::type_error_not_callable_object(type_name)) } else { // Attribute doesn't exist — use the class name (e.g., "Point") not "Dataclass" Err(ExcType::attribute_error(self.name(interns), method_name)) } } } fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { let attr_name = attr.as_str(vm.interns); match self.attrs.get_by_str(attr_name, vm.heap, vm.interns) { Some(value) => Ok(Some(CallResult::Value(value.clone_with_heap(vm.heap)))), // we use name here, not `self.py_type(heap)` hence returning a Ok(None) None => Err(ExcType::attribute_error(self.name(vm.interns), attr_name)), } } } impl HeapItem for Dataclass { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.name.py_estimate_size() + self.field_names.iter().map(String::len).sum::() + self.attrs.py_estimate_size() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Delegate to the attrs Dict which handles all nested heap references self.attrs.py_dec_ref_ids(stack); } } // Custom serde implementation for Dataclass. // Serializes all five fields. impl serde::Serialize for Dataclass { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeStruct; let mut state = serializer.serialize_struct("Dataclass", 5)?; state.serialize_field("name", &self.name)?; state.serialize_field("type_id", &self.type_id)?; state.serialize_field("field_names", &self.field_names)?; state.serialize_field("attrs", &self.attrs)?; state.serialize_field("frozen", &self.frozen)?; state.end() } } impl<'de> serde::Deserialize<'de> for Dataclass { fn deserialize>(deserializer: D) -> Result { #[derive(serde::Deserialize)] struct DataclassData { name: EitherStr, type_id: u64, field_names: Vec, attrs: Dict, frozen: bool, } let dc = DataclassData::deserialize(deserializer)?; Ok(Self { name: dc.name, type_id: dc.type_id, field_names: dc.field_names, attrs: dc.attrs, frozen: dc.frozen, }) } } ================================================ FILE: crates/monty/src/types/dict.rs ================================================ use std::{ collections::hash_map::DefaultHasher, fmt::Write, hash::{Hash, Hasher}, }; use ahash::AHashSet; use hashbrown::{HashTable, hash_table::Entry}; use smallvec::smallvec; use super::{DictItemsView, DictKeysView, DictValuesView, MontyIter, PyTrait, allocate_tuple}; use crate::{ args::{ArgValues, KwargsValues}, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{ContainsHeap, DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, intern::{Interns, StaticStrings}, resource::{ResourceError, ResourceTracker}, types::Type, value::{EitherStr, Value}, }; /// Python dict type preserving insertion order. /// /// This type provides Python dict semantics including dynamic key-value namespaces, /// reference counting for heap values, and standard dict methods. /// /// # Implemented Methods /// - `get(key[, default])` - Get value or default /// - `keys()` - Return view of keys /// - `values()` - Return view of values /// - `items()` - Return view of (key, value) pairs /// - `pop(key[, default])` - Remove and return value /// - `clear()` - Remove all items /// - `copy()` - Shallow copy /// - `update(other)` - Update from dict or iterable of pairs /// - `setdefault(key[, default])` - Get or set default value /// - `popitem()` - Remove and return last (key, value) pair /// - `fromkeys(iterable[, value])` - Create dict from keys (classmethod) /// /// All dict methods from Python's builtins are implemented. /// /// # Storage Strategy /// Uses a `HashTable` for hash lookups combined with a dense `Vec` /// to preserve insertion order (matching Python 3.7+ behavior). The hash table maps /// key hashes to indices in the entries vector. This design provides O(1) lookups /// while maintaining insertion order for iteration. /// /// # Reference Counting /// When values are added via `set()`, their reference counts are incremented. /// When using `from_pairs()`, ownership is transferred without incrementing refcounts /// (caller must ensure values' refcounts account for the dict's reference). /// /// # GC Optimization /// The `contains_refs` flag tracks whether the dict contains any `Value::Ref` items. /// This allows `collect_child_ids` and `py_dec_ref_ids` to skip iteration when the /// dict contains only primitive values (ints, bools, None, etc.), significantly /// improving GC performance for dicts of primitives. #[derive(Debug, Default)] pub(crate) struct Dict { /// indices mapping from the entry hash to its index. indices: HashTable, /// entries is a dense vec maintaining entry order. entries: Vec, /// True if any key or value in the dict is a `Value::Ref`. Used to skip iteration /// in `collect_child_ids` and `py_dec_ref_ids` when no refs are present. /// Only transitions from false to true (never back) since tracking removals would be O(n). contains_refs: bool, } #[derive(Debug, serde::Serialize, serde::Deserialize)] struct DictEntry { key: Value, value: Value, /// the hash is needed here for correct use of insert_unique hash: u64, } impl Dict { /// Creates a new empty dict. #[must_use] pub fn new() -> Self { Self::default() } pub fn with_capacity(capacity: usize) -> Self { Self { indices: HashTable::with_capacity(capacity), entries: Vec::with_capacity(capacity), contains_refs: false, } } /// Returns whether this dict contains any heap references (`Value::Ref`). /// /// Used during allocation to determine if this container could create cycles, /// and in `collect_child_ids` and `py_dec_ref_ids` to skip iteration when no refs /// are present. /// /// Note: This flag only transitions from false to true (never back). When a ref is /// removed via `pop()`, we do NOT recompute the flag because that would be O(n). /// This is conservative - we may iterate unnecessarily if all refs were removed, /// but we'll never skip iteration when refs exist. #[inline] #[must_use] pub fn has_refs(&self) -> bool { self.contains_refs } /// Creates a dict from a vector of (key, value) pairs. /// /// Assumes the caller is transferring ownership of all keys and values in the pairs. /// Does NOT increment reference counts since ownership is being transferred. /// Returns Err if any key is unhashable (e.g., list, dict). pub fn from_pairs(pairs: Vec<(Value, Value)>, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let pairs_iter = pairs.into_iter(); defer_drop_mut!(pairs_iter, vm); let dict = Self::with_capacity(pairs_iter.len()); let mut dict_guard = HeapGuard::new(dict, vm); let (dict, vm) = dict_guard.as_parts_mut(); for (key, value) in pairs_iter { if let Some(old_value) = dict.set(key, value, vm)? { old_value.drop_with_heap(vm); } } Ok(dict_guard.into_inner()) } /// Gets a value from the dict by key. /// /// Returns Ok(Some(value)) if key exists, Ok(None) if key doesn't exist. /// Returns Err if key is unhashable. pub fn get(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { if let Some(index) = self.find_index_hash(key, vm)?.0 { Ok(Some(&self.entries[index].value)) } else { Ok(None) } } /// Gets a value from the dict by string key name (immutable lookup). /// /// This is an O(1) lookup that doesn't require mutable heap access. /// Only works for string keys - returns None if the key is not found. pub fn get_by_str(&self, key_str: &str, heap: &Heap, interns: &Interns) -> Option<&Value> { // Compute hash for the string key let mut hasher = DefaultHasher::new(); key_str.hash(&mut hasher); let hash = hasher.finish(); // Find entry with matching hash and key self.indices .find(hash, |&idx| { let entry_key = &self.entries[idx].key; match entry_key { Value::InternString(id) => interns.get_str(*id) == key_str, Value::Ref(id) => { if let HeapData::Str(s) = heap.get(*id) { s.as_str() == key_str } else { false } } _ => false, } }) .map(|&idx| &self.entries[idx].value) } /// Sets a key-value pair in the dict. /// /// The caller transfers ownership of `key` and `value` to the dict. Their refcounts /// are NOT incremented here - the caller is responsible for ensuring the refcounts /// were already incremented (e.g., via `clone_with_heap` or `evaluate_use`). /// /// If the key already exists, replaces the old value and returns it (caller now /// owns the old value and is responsible for its refcount). /// Returns Err if key is unhashable. pub fn set( &mut self, key: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { // Track if we're adding a reference for GC optimization if matches!(key, Value::Ref(_)) || matches!(value, Value::Ref(_)) { self.contains_refs = true; } // Handle hash computation errors explicitly so we can drop key/value properly let (opt_index, hash) = match self.find_index_hash(&key, vm) { Ok(result) => result, Err(e) => { // Drop the key and value before returning the error key.drop_with_heap(vm); value.drop_with_heap(vm); return Err(e); } }; let entry = DictEntry { key, value, hash }; if let Some(index) = opt_index { // Key exists, replace in place to preserve insertion order let old_entry = std::mem::replace(&mut self.entries[index], entry); // Decrement refcount for old key (we're discarding it) old_entry.key.drop_with_heap(vm); // Transfer ownership of the old value to caller (no clone needed) Ok(Some(old_entry.value)) } else { // Key doesn't exist, add new pair to indices and entries let index = self.entries.len(); self.entries.push(entry); self.indices .insert_unique(hash, index, |index| self.entries[*index].hash); Ok(None) } } /// Removes and returns a key-value pair from the dict. /// /// Returns Ok(Some((key, value))) if key exists, Ok(None) if key doesn't exist. /// Returns Err if key is unhashable. /// /// Reference counting: does not decrement refcounts for removed key and value; /// caller assumes ownership and is responsible for managing their refcounts. pub fn pop(&mut self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { let hash = key .py_hash(vm.heap, vm.interns)? .ok_or_else(|| ExcType::type_error_unhashable_dict_key(key.py_type(vm.heap)))?; let entry = self.indices.entry( hash, |v| key.py_eq(&self.entries[*v].key, vm).unwrap_or(false), |index| self.entries[*index].hash, ); if let Entry::Occupied(occ_entry) = entry { let entry = self.entries.remove(*occ_entry.get()); occ_entry.remove(); // Don't decrement refcounts - caller now owns the values Ok(Some((entry.key, entry.value))) } else { Ok(None) } } /// Returns the number of key-value pairs in the dict. #[must_use] pub fn len(&self) -> usize { self.entries.len() } /// Returns true if the dict is empty. #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns an iterator over references to (key, value) pairs. pub fn iter(&self) -> DictIter<'_> { self.into_iter() } /// Returns the key at the given iteration index, or None if out of bounds. /// /// Used for index-based iteration in for loops. Returns a reference to /// the key at the given position in insertion order. pub fn key_at(&self, index: usize) -> Option<&Value> { self.entries.get(index).map(|e| &e.key) } /// Returns the value at the given iteration index, or None if out of bounds. /// /// Dictionary views use this to produce live `dict_values` iteration directly /// from the underlying storage without copying the dictionary. pub fn value_at(&self, index: usize) -> Option<&Value> { self.entries.get(index).map(|e| &e.value) } /// Returns the key-value pair at the given iteration index, or None if out of bounds. /// /// This accessor keeps dict-view iteration logic out of the storage internals /// while still allowing `dict_items` to produce tuples on demand. pub fn item_at(&self, index: usize) -> Option<(&Value, &Value)> { self.entries.get(index).map(|entry| (&entry.key, &entry.value)) } /// Creates a dict from the `dict([mapping_or_pairs], **kwargs)` constructor call. /// /// Supported forms: /// - `dict()` returns an empty dict. /// - `dict(existing_dict)` returns a shallow copy of the dict. /// - `dict(iterable_of_pairs)` consumes `(key, value)` pairs from the iterable. /// - `dict(**kwargs)` inserts keyword arguments as string keys. /// /// Keyword arguments are applied after the optional positional source, matching /// CPython precedence (`dict([('a', 1)], a=2)` yields `{'a': 2}`). /// /// For now, only real `dict` values use mapping-copy semantics; other values /// are interpreted as iterables of pairs. pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let dict = Self::new(); let mut dict_guard = HeapGuard::new(dict, vm); { let (dict, vm) = dict_guard.as_parts_mut(); let (pos_iter, kwargs) = args.into_parts(); defer_drop_mut!(pos_iter, vm); let mut kwargs_guard = HeapGuard::new(kwargs, vm); if let Some(other_value) = pos_iter.next() { let other_value_guard = HeapGuard::new(other_value, kwargs_guard.heap()); if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most("dict", 1, pos_iter.len() + 1)); } let other_value = other_value_guard.into_inner(); dict_merge_from_value(dict, other_value, kwargs_guard.heap())?; } let kwargs = kwargs_guard.into_inner(); dict_merge_from_kwargs(dict, kwargs, vm)?; } let dict = dict_guard.into_inner(); let heap_id = vm.heap.allocate(HeapData::Dict(dict))?; Ok(Value::Ref(heap_id)) } fn find_index_hash( &self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Option, u64)> { let hash = key .py_hash(vm.heap, vm.interns)? .ok_or_else(|| ExcType::type_error_unhashable_dict_key(key.py_type(vm.heap)))?; // Dict keys are typically shallow (strings, ints, tuples of primitives), // so recursion errors are unlikely. If one occurs, treat it as "not equal" - // the key lookup fails but doesn't crash. let opt_index = self .indices .find(hash, |v| key.py_eq(&self.entries[*v].key, vm).unwrap_or(false)) .copied(); Ok((opt_index, hash)) } } /// Iterator over borrowed (key, value) pairs in a dict. pub(crate) struct DictIter<'a>(std::slice::Iter<'a, DictEntry>); impl<'a> Iterator for DictIter<'a> { type Item = (&'a Value, &'a Value); fn next(&mut self) -> Option { self.0.next().map(|e| (&e.key, &e.value)) } } impl<'a> IntoIterator for &'a Dict { type Item = (&'a Value, &'a Value); type IntoIter = DictIter<'a>; fn into_iter(self) -> Self::IntoIter { DictIter(self.entries.iter()) } } /// Iterator over owned (key, value) pairs from a consumed dict. pub(crate) struct DictIntoIter(std::vec::IntoIter); impl Iterator for DictIntoIter { type Item = (Value, Value); fn next(&mut self) -> Option { self.0.next().map(|e| (e.key, e.value)) } fn size_hint(&self) -> (usize, Option) { self.0.size_hint() } } impl ExactSizeIterator for DictIntoIter {} impl IntoIterator for Dict { type Item = (Value, Value); type IntoIter = DictIntoIter; fn into_iter(self) -> Self::IntoIter { DictIntoIter(self.entries.into_iter()) } } impl PyTrait for Dict { fn py_type(&self, _heap: &Heap) -> Type { Type::Dict } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.len()) } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.len() != other.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for entry in &self.entries { vm.heap.check_time()?; if let Ok(Some(other_v)) = other.get(&entry.key, vm) { if !entry.value.py_eq(other_v, vm)? { return Ok(false); } } else { return Ok(false); } } Ok(true) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { if self.is_empty() { return f.write_str("{}"); } let heap = &*vm.heap; // Check depth limit before recursing let Some(token) = heap.incr_recursion_depth_for_repr() else { return f.write_str("{...}"); }; crate::defer_drop_immutable_heap!(token, heap); f.write_char('{')?; let mut first = true; for entry in &self.entries { if !first { if heap.check_time().is_err() { f.write_str(", ...[timeout]")?; break; } f.write_str(", ")?; } first = false; entry.key.py_repr_fmt(f, vm, heap_ids)?; f.write_str(": ")?; entry.value.py_repr_fmt(f, vm, heap_ids)?; } f.write_char('}')?; Ok(()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match self.get(key, vm)? { Some(value) => Ok(value.clone_with_heap(vm)), None => Err(ExcType::key_error(key, vm)), } } fn py_setitem(&mut self, key: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { // Drop the old value if one was replaced if let Some(old_value) = self.set(key, value, vm)? { old_value.drop_with_heap(vm); } Ok(()) } fn py_call_attr( &mut self, self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let Some(method) = attr.static_string() else { args.drop_with_heap(vm.heap); return Err(ExcType::attribute_error(Type::Dict, attr.as_str(vm.interns))); }; let value = match method { StaticStrings::Get => { // dict.get() accepts 1 or 2 arguments let (key, default) = args.get_one_two_args("get", vm.heap)?; defer_drop!(key, vm); let default = default.unwrap_or(Value::None); let mut default_guard = HeapGuard::new(default, vm); let vm = default_guard.heap(); // Handle the lookup - may fail for unhashable keys let value = match self.get(key, vm)? { Some(v) => v.clone_with_heap(vm), None => default_guard.into_inner(), }; Ok(value) } StaticStrings::Keys => { args.check_zero_args("dict.keys", vm.heap)?; let view_id = vm.heap.allocate(HeapData::DictKeysView(DictKeysView::new(self_id)))?; vm.heap.inc_ref(self_id); Ok(Value::Ref(view_id)) } StaticStrings::Values => { args.check_zero_args("dict.values", vm.heap)?; let view_id = vm .heap .allocate(HeapData::DictValuesView(DictValuesView::new(self_id)))?; vm.heap.inc_ref(self_id); Ok(Value::Ref(view_id)) } StaticStrings::Items => { args.check_zero_args("dict.items", vm.heap)?; let view_id = vm.heap.allocate(HeapData::DictItemsView(DictItemsView::new(self_id)))?; vm.heap.inc_ref(self_id); Ok(Value::Ref(view_id)) } StaticStrings::Pop => { // dict.pop() accepts 1 or 2 arguments (key, optional default) let (key, default) = args.get_one_two_args("pop", vm.heap)?; defer_drop!(key, vm); let mut default_guard = HeapGuard::new(default, vm); let vm = default_guard.heap(); if let Some((old_key, value)) = self.pop(key, vm)? { // Drop the old key - we don't need it old_key.drop_with_heap(vm); Ok(value) } else { let (default, vm) = default_guard.into_parts(); // No matching key - return default if provided, else KeyError if let Some(d) = default { Ok(d) } else { Err(ExcType::key_error(key, vm)) } } } StaticStrings::Clear => { args.check_zero_args("dict.clear", vm.heap)?; dict_clear(self, vm.heap); Ok(Value::None) } StaticStrings::Copy => { args.check_zero_args("dict.copy", vm.heap)?; dict_copy(self, vm) } StaticStrings::Update => dict_update(self, args, vm), StaticStrings::Setdefault => dict_setdefault(self, args, vm), StaticStrings::Popitem => { args.check_zero_args("dict.popitem", vm.heap)?; dict_popitem(self, vm.heap) } // fromkeys is a classmethod but also accessible on instances StaticStrings::Fromkeys => dict_fromkeys(args, vm), _ => { args.drop_with_heap(vm.heap); return Err(ExcType::attribute_error(Type::Dict, attr.as_str(vm.interns))); } }; value.map(CallResult::Value) } } impl HeapItem for Dict { fn py_estimate_size(&self) -> usize { // Dict size: struct overhead + entries (2 Values per entry for key+value) std::mem::size_of::() + self.len() * 2 * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Skip iteration if no refs - major GC optimization for dicts of primitives if !self.contains_refs { return; } for entry in &mut self.entries { if let Value::Ref(id) = &entry.key { stack.push(*id); #[cfg(feature = "ref-count-panic")] entry.key.dec_ref_forget(); } if let Value::Ref(id) = &entry.value { stack.push(*id); #[cfg(feature = "ref-count-panic")] entry.value.dec_ref_forget(); } } } } impl DropWithHeap for Dict { fn drop_with_heap(self, heap: &mut H) { self.entries.drop_with_heap(heap); } } impl DropWithHeap for DictEntry { fn drop_with_heap(self, heap: &mut H) { self.key.drop_with_heap(heap); self.value.drop_with_heap(heap); } } /// Implements Python's `dict.clear()` method. /// /// Removes all items from the dict. fn dict_clear(dict: &mut Dict, heap: &mut Heap) { dict.entries.drain(..).drop_with_heap(heap); dict.indices.clear(); // Note: contains_refs stays true even if all refs removed, per conservative GC strategy } /// Implements Python's `dict.copy()` method. /// /// Returns a shallow copy of the dict. fn dict_copy(dict: &Dict, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Copy all key-value pairs (incrementing refcounts) let pairs: Vec<(Value, Value)> = dict .iter() .map(|(k, v)| (k.clone_with_heap(vm), v.clone_with_heap(vm))) .collect(); let new_dict = Dict::from_pairs(pairs, vm)?; let heap_id = vm.heap.allocate(HeapData::Dict(new_dict))?; Ok(Value::Ref(heap_id)) } /// Implements Python's `dict.update([other], **kwargs)` method. /// /// Updates the dict with key-value pairs from `other` and/or `kwargs`. /// If `other` is a dict, copies its key-value pairs. /// If `other` is an iterable, expects pairs of (key, value). /// Keyword arguments are also added to the dict. fn dict_update(dict: &mut Dict, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (pos_iter, kwargs) = args.into_parts(); defer_drop_mut!(pos_iter, vm); let mut kwargs_guard = HeapGuard::new(kwargs, vm); if let Some(other_value) = pos_iter.next() { let other_value_guard = HeapGuard::new(other_value, kwargs_guard.heap()); if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most("dict.update", 1, pos_iter.len() + 1)); } let other_value = other_value_guard.into_inner(); dict_merge_from_value(dict, other_value, kwargs_guard.heap())?; } let kwargs = kwargs_guard.into_inner(); dict_merge_from_kwargs(dict, kwargs, vm)?; Ok(Value::None) } /// Merges key-value pairs from either a dict or an iterable of 2-item pairs. /// /// This is shared between `dict()` construction and `dict.update()` so both /// entry points follow identical positional-source semantics. fn dict_merge_from_value( dict: &mut Dict, other_value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<()> { let mut other_value_guard = HeapGuard::new(other_value, vm); { let (other_value, vm) = other_value_guard.as_parts(); if let Value::Ref(id) = other_value && let HeapData::Dict(src_dict) = vm.heap.get(*id) { // Clone key-value pairs from the source dict. let pairs: Vec<(Value, Value)> = src_dict .iter() .map(|(k, v)| (k.clone_with_heap(vm.heap), v.clone_with_heap(vm.heap))) .collect(); // Apply pairs into the target dict. for (key, value) in pairs { let old_value = dict.set(key, value, vm)?; old_value.drop_with_heap(vm.heap); } return Ok(()); } } // Non-dict values are interpreted as iterable-of-pairs. let other_value = other_value_guard.into_inner(); dict_merge_from_iterable_pairs(dict, other_value, vm) } /// Merges key-value pairs from an iterable of 2-item iterables. /// /// Each item from `iterable` is treated as `(key, value)`. Items with length 0, 1, /// or greater than 2 raise the same TypeError messages used by `dict.update()`. fn dict_merge_from_iterable_pairs( dict: &mut Dict, iterable: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<()> { let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); while let Some(item) = iter.for_next(vm)? { // Each item should be a pair (iterable of 2 elements). let pair_iter = MontyIter::new(item, vm)?; defer_drop_mut!(pair_iter, vm); let Some(key) = pair_iter.for_next(vm)? else { return Err(ExcType::type_error( "dictionary update sequence element has length 0; 2 is required", )); }; let mut key_guard = HeapGuard::new(key, vm); let Some(value) = pair_iter.for_next(key_guard.heap())? else { return Err(ExcType::type_error( "dictionary update sequence element has length 1; 2 is required", )); }; let mut value_guard = HeapGuard::new(value, key_guard.heap()); if let Some(extra) = pair_iter.for_next(value_guard.heap())? { extra.drop_with_heap(value_guard.heap()); return Err(ExcType::type_error( "dictionary update sequence element has length > 2; 2 is required", )); } let value = value_guard.into_inner(); let key = key_guard.into_inner(); if let Some(old_value) = dict.set(key, value, vm)? { old_value.drop_with_heap(vm); } } Ok(()) } /// Merges keyword arguments into a dict. /// /// This helper drains `kwargs` safely on error so all values are dropped /// correctly, then inserts each key-value pair into `dict`. fn dict_merge_from_kwargs( dict: &mut Dict, kwargs: KwargsValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<()> { let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); for (key, value) in kwargs_iter { let old_value = dict.set(key, value, vm)?; old_value.drop_with_heap(vm); } Ok(()) } /// Implements Python's `dict.setdefault(key[, default])` method. /// /// If key is in the dict, return its value. /// If not, insert key with a value of default (or None) and return default. fn dict_setdefault(dict: &mut Dict, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (key, default) = args.get_one_two_args("setdefault", vm.heap)?; let default = default.unwrap_or(Value::None); let mut key_guard = HeapGuard::new(key, vm); let (key, vm) = key_guard.as_parts(); if let Some(existing) = dict.get(key, vm)? { // Key exists - return its value (cloned) let value = existing.clone_with_heap(vm); default.drop_with_heap(vm); Ok(value) } else { // Key doesn't exist - insert default and return it (cloned before insertion) let return_value = default.clone_with_heap(vm); let (key, vm) = key_guard.into_parts(); if let Some(old_value) = dict.set(key, default, vm)? { // This shouldn't happen since we checked, but handle it anyway old_value.drop_with_heap(vm); } Ok(return_value) } } /// Implements Python's `dict.popitem()` method. /// /// Removes and returns the last inserted key-value pair as a tuple. /// Raises KeyError if the dict is empty. fn dict_popitem(dict: &mut Dict, heap: &mut Heap) -> RunResult { if dict.is_empty() { return Err(ExcType::key_error_popitem_empty_dict()); } // Remove the last entry (LIFO order) let entry = dict.entries.pop().expect("dict is not empty"); // Remove from indices - need to find the entry with this index // Since we removed the last entry, we need to clear and rebuild indices // (This is simpler than trying to find and remove the specific hash entry) // TODO: This O(n) rebuild could be optimized by finding and removing the // specific hash entry directly from the hashbrown table. dict.indices.clear(); for (idx, e) in dict.entries.iter().enumerate() { dict.indices.insert_unique(e.hash, idx, |&i| dict.entries[i].hash); } // Create tuple (key, value) Ok(allocate_tuple(smallvec![entry.key, entry.value], heap)?) } // Custom serde implementation for Dict. // Serializes entries and contains_refs; rebuilds the indices hash table on deserialize. impl serde::Serialize for Dict { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeStruct; let mut state = serializer.serialize_struct("Dict", 2)?; state.serialize_field("entries", &self.entries)?; state.serialize_field("contains_refs", &self.contains_refs)?; state.end() } } impl<'de> serde::Deserialize<'de> for Dict { fn deserialize>(deserializer: D) -> Result { #[derive(serde::Deserialize)] struct DictFields { entries: Vec, contains_refs: bool, } let fields = DictFields::deserialize(deserializer)?; // Rebuild the indices hash table from the entries let mut indices = HashTable::with_capacity(fields.entries.len()); for (idx, entry) in fields.entries.iter().enumerate() { indices.insert_unique(entry.hash, idx, |&i| fields.entries[i].hash); } Ok(Self { indices, entries: fields.entries, contains_refs: fields.contains_refs, }) } } /// Implements Python's `dict.fromkeys(iterable[, value])` classmethod. /// /// Creates a new dictionary with keys from `iterable` and all values set to `value` /// (default: None). /// /// This is a classmethod that can be called directly on the dict type: /// ```python /// dict.fromkeys(['a', 'b', 'c']) # {'a': None, 'b': None, 'c': None} /// dict.fromkeys(['a', 'b'], 0) # {'a': 0, 'b': 0} /// ``` pub fn dict_fromkeys(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (iterable, default) = args.get_one_two_args("dict.fromkeys", vm.heap)?; let default = default.unwrap_or(Value::None); defer_drop!(default, vm); let iter = MontyIter::new(iterable, vm)?; defer_drop_mut!(iter, vm); let dict = Dict::new(); let mut dict_guard = HeapGuard::new(dict, vm); { let (dict, vm) = dict_guard.as_parts_mut(); while let Some(key) = iter.for_next(vm)? { let old_value = dict.set(key, default.clone_with_heap(vm), vm)?; old_value.drop_with_heap(vm); } } let dict = dict_guard.into_inner(); let heap_id = vm.heap.allocate(HeapData::Dict(dict))?; Ok(Value::Ref(heap_id)) } ================================================ FILE: crates/monty/src/types/dict_view.rs ================================================ use std::fmt::Write; use ahash::AHashSet; use smallvec::smallvec; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunError, RunResult}, heap::{Heap, HeapData, HeapId, HeapItem}, heap_data::HeapDataMut, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::{Dict, FrozenSet, MontyIter, PyTrait, Set, Type, allocate_tuple, iter::advance_on_heap}, value::Value, }; /// Shared accessors for heap-backed dictionary view objects. /// /// All dictionary views are thin live references to an underlying `dict`. They do /// not snapshot keys, items, or values; instead every observable operation reads /// through to the current dict state. Keeping that behavior centralized avoids /// subtle divergence between keys/items/values views. pub(crate) trait DictView { /// Returns the heap id of the underlying dictionary this view keeps alive. fn dict_id(&self) -> HeapId; /// Returns the live dictionary backing this view. fn dict<'a>(&self, heap: &'a Heap) -> &'a Dict { let HeapData::Dict(dict) = heap.get(self.dict_id()) else { panic!("dict view must always reference a dict"); }; dict } } /// Live view returned by `dict.keys()`. /// /// `dict_keys` is set-like in CPython, so this view supports the shared live-view /// behavior plus equality against other keys views and ordinary set-like values. /// The remaining set algebra operations are added incrementally in the VM layer. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub(crate) struct DictKeysView { dict_id: HeapId, } impl DictKeysView { /// Creates a new keys view over an existing dictionary heap entry. #[must_use] pub fn new(dict_id: HeapId) -> Self { Self { dict_id } } /// Returns the underlying dictionary heap id. #[must_use] pub fn dict_id(self) -> HeapId { self.dict_id } /// Compares this keys view to another keys view using set semantics. /// /// Two keys views compare equal when they expose the same live key set, /// even if they are distinct view objects over distinct dictionaries. pub(crate) fn eq_view(self, other: Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.dict_id == other.dict_id { return Ok(true); } Heap::with_two(vm, self.dict_id, other.dict_id, |vm, left, right| { let (HeapData::Dict(left_dict), HeapData::Dict(right_dict)) = (left, right) else { panic!("dict_keys view must always reference dicts"); }; dict_keys_eq_dict(left_dict, right_dict, vm) }) } /// Compares this keys view to a mutable set using set membership semantics. pub(crate) fn eq_set(self, other: &Set, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_keys view must always reference a dict"); }; dict_keys_eq_set_like( dict, other.len(), |key, vm| matches!(other.contains(key, vm), Ok(true)), vm, ) }) } /// Compares this keys view to a frozenset using set membership semantics. pub(crate) fn eq_frozenset( self, other: &FrozenSet, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_keys view must always reference a dict"); }; dict_keys_eq_set_like( dict, other.len(), |key, vm| matches!(other.contains(key, vm), Ok(true)), vm, ) }) } /// Materializes the view's current live keys into a plain `set`. /// /// Dict-view operators always produce ordinary `set` results in CPython, /// so the VM uses this helper as the left-hand-side snapshot for `& | ^ -` /// and for `isdisjoint(...)`. pub(crate) fn to_set(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_keys view must always reference a dict"); }; let mut result = Set::with_capacity(dict.len()); for (key, _) in dict.iter() { result.add(key.clone_with_heap(vm), vm)?; } Ok(result) }) } /// Implements `dict_keys.isdisjoint(iterable)` with CPython's iterable semantics. pub(crate) fn isdisjoint_from_value( self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let self_set = self.to_set(vm)?; defer_drop!(self_set, vm); let other_set = collect_iterable_to_set(other.clone_with_heap(vm), vm)?; defer_drop!(other_set, vm); sets_are_disjoint(self_set, other_set, vm) } } impl DictView for DictKeysView { fn dict_id(&self) -> HeapId { self.dict_id } } impl PyTrait for DictKeysView { fn py_type(&self, _heap: &Heap) -> Type { Type::DictKeys } fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.dict(vm.heap).len()) } fn py_eq( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { self.eq_view(*other, vm) } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { f.write_str("dict_keys([")?; write_dict_keys_contents(f, self.dict(vm.heap), vm, heap_ids)?; f.write_str("])") } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &crate::value::EitherStr, args: ArgValues, ) -> RunResult { match attr.static_string() { Some(StaticStrings::Isdisjoint) => { let other = args.get_one_arg("dict_keys.isdisjoint", vm.heap)?; defer_drop!(other, vm); Ok(CallResult::Value(Value::Bool(self.isdisjoint_from_value(other, vm)?))) } _ => Err(ExcType::attribute_error(Type::DictKeys, attr.as_str(vm.interns))), } } } impl HeapItem for DictKeysView { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { stack.push(self.dict_id); } } /// Live view returned by `dict.items()`. /// /// The view stays linked to the original dictionary so iteration, `len()`, and /// repr all reflect subsequent dictionary mutations. Like CPython, equality is /// set-like: items views compare by their live `(key, value)` pairs. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub(crate) struct DictItemsView { dict_id: HeapId, } impl DictItemsView { /// Creates a new items view over an existing dictionary heap entry. #[must_use] pub fn new(dict_id: HeapId) -> Self { Self { dict_id } } /// Returns the underlying dictionary heap id. #[must_use] pub fn dict_id(self) -> HeapId { self.dict_id } /// Compares this items view to another items view using live dict item semantics. pub(crate) fn eq_view(self, other: Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.dict_id == other.dict_id { return Ok(true); } Heap::with_two(vm, self.dict_id, other.dict_id, |vm, left, right| { let (HeapData::Dict(left), HeapData::Dict(right)) = (left, right) else { panic!("dict_items view must always reference dicts"); }; if left.len() != right.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (key, value) in left { vm.heap.check_time()?; if let Ok(Some(other_v)) = right.get(key, vm) { if !value.py_eq(other_v, vm)? { return Ok(false); } } else { return Ok(false); } } Ok(true) }) } /// Compares this items view to a mutable set using set membership semantics. pub(crate) fn eq_set(self, other: &Set, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_items view must always reference a dict"); }; dict_items_eq_set_like( dict, other.len(), |item, vm| matches!(other.contains(item, vm), Ok(true)), vm, ) }) } /// Compares this items view to a frozenset using set membership semantics. pub(crate) fn eq_frozenset( self, other: &FrozenSet, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_items view must always reference a dict"); }; dict_items_eq_set_like( dict, other.len(), |item, vm| matches!(other.contains(item, vm), Ok(true)), vm, ) }) } /// Materializes the view's current live `(key, value)` pairs into a plain `set`. /// /// Each item is allocated as a 2-tuple so later set-like operators and /// membership checks observe standard Python tuple semantics. pub(crate) fn to_set(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { Heap::with_entry_mut(vm, self.dict_id, |vm, data| { let HeapDataMut::Dict(dict) = data else { panic!("dict_items view must always reference a dict"); }; let mut result = Set::with_capacity(dict.len()); for (key, value) in dict.iter() { let item = allocate_tuple(smallvec![key.clone_with_heap(vm), value.clone_with_heap(vm)], vm.heap)?; result.add(item, vm)?; } Ok(result) }) } /// Implements `dict_items.isdisjoint(iterable)` with CPython's iterable semantics. pub(crate) fn isdisjoint_from_value( self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let self_set = self.to_set(vm)?; defer_drop!(self_set, vm); let other_set = collect_iterable_to_set(other.clone_with_heap(vm), vm)?; defer_drop!(other_set, vm); sets_are_disjoint(self_set, other_set, vm) } } impl DictView for DictItemsView { fn dict_id(&self) -> HeapId { self.dict_id } } impl PyTrait for DictItemsView { fn py_type(&self, _heap: &Heap) -> Type { Type::DictItems } fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.dict(vm.heap).len()) } fn py_eq( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { self.eq_view(*other, vm) } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { f.write_str("dict_items([")?; write_dict_items_contents(f, self.dict(vm.heap), vm, heap_ids)?; f.write_str("])") } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &crate::value::EitherStr, args: ArgValues, ) -> RunResult { match attr.static_string() { Some(StaticStrings::Isdisjoint) => { let other = args.get_one_arg("dict_items.isdisjoint", vm.heap)?; defer_drop!(other, vm); Ok(CallResult::Value(Value::Bool(self.isdisjoint_from_value(other, vm)?))) } _ => Err(ExcType::attribute_error(Type::DictItems, attr.as_str(vm.interns))), } } } impl HeapItem for DictItemsView { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { stack.push(self.dict_id); } } /// Live view returned by `dict.values()`. /// /// Unlike keys/items views, `dict_values` is intentionally not set-like in /// CPython. Milestone one only needs it to be a real view object with the same /// live iteration, repr, and membership behavior users expect from Python. #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub(crate) struct DictValuesView { dict_id: HeapId, } impl DictValuesView { /// Creates a new values view over an existing dictionary heap entry. #[must_use] pub fn new(dict_id: HeapId) -> Self { Self { dict_id } } /// Returns the underlying dictionary heap id. #[must_use] pub fn dict_id(self) -> HeapId { self.dict_id } } impl DictView for DictValuesView { fn dict_id(&self) -> HeapId { self.dict_id } } impl PyTrait for DictValuesView { fn py_type(&self, _heap: &Heap) -> Type { Type::DictValues } fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.dict(vm.heap).len()) } fn py_eq( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { Ok(false) } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { f.write_str("dict_values([")?; write_dict_values_contents(f, self.dict(vm.heap), vm, heap_ids)?; f.write_str("])") } } impl HeapItem for DictValuesView { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { stack.push(self.dict_id); } } /// Compares two dicts for key-set equality using membership checks. fn dict_keys_eq_dict( left: &Dict, right: &Dict, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { dict_keys_eq_set_like( left, right.len(), |key, vm| matches!(right.get(key, vm), Ok(Some(_))), vm, ) } /// Compares a dict's live keys to another set-like container by membership. fn dict_keys_eq_set_like( dict: &Dict, other_len: usize, mut contains: impl FnMut(&Value, &mut VM<'_, '_, T>) -> bool, vm: &mut VM<'_, '_, T>, ) -> Result { if dict.len() != other_len { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (key, _) in dict { vm.heap.check_time()?; if !contains(key, vm) { return Ok(false); } } Ok(true) } /// Compares a dict's live items to another set-like container by membership. fn dict_items_eq_set_like( dict: &Dict, other_len: usize, mut contains: impl FnMut(&Value, &mut VM<'_, '_, T>) -> bool, vm: &mut VM<'_, '_, T>, ) -> Result { if dict.len() != other_len { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (key, value) in dict { vm.heap.check_time()?; let item = allocate_tuple(smallvec![key.clone_with_heap(vm), value.clone_with_heap(vm)], vm.heap)?; defer_drop!(item, vm); if !contains(item, vm) { return Ok(false); } } Ok(true) } /// Writes the repr payload for a keys view without its outer wrapper. fn write_dict_keys_contents( f: &mut impl Write, dict: &Dict, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { let mut first = true; for (key, _) in dict { if !first { f.write_str(", ")?; } first = false; key.py_repr_fmt(f, vm, heap_ids)?; } Ok(()) } /// Writes the repr payload for an items view without its outer wrapper. fn write_dict_items_contents( f: &mut impl Write, dict: &Dict, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { let mut first = true; for (key, value) in dict { if !first { f.write_str(", ")?; } first = false; f.write_char('(')?; key.py_repr_fmt(f, vm, heap_ids)?; f.write_str(", ")?; value.py_repr_fmt(f, vm, heap_ids)?; f.write_char(')')?; } Ok(()) } /// Writes the repr payload for a values view without its outer wrapper. fn write_dict_values_contents( f: &mut impl Write, dict: &Dict, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { let mut first = true; for (_, value) in dict { if !first { f.write_str(", ")?; } first = false; value.py_repr_fmt(f, vm, heap_ids)?; } Ok(()) } /// Collects an arbitrary iterable into a temporary `set`. /// /// Dict-view operators accept any iterable on the right-hand side in CPython, /// including one-shot iterator objects. Reusing the same collection path keeps /// binary operators and `isdisjoint(...)` consistent with each other. pub(crate) fn collect_iterable_to_set( value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { let is_existing_iterator = matches!(&value, Value::Ref(heap_id) if matches!(vm.heap.get(*heap_id), HeapData::Iter(_))); if is_existing_iterator { let mut iterable_guard = crate::heap::HeapGuard::new(value, vm); let (iterable, vm) = iterable_guard.as_parts_mut(); let Value::Ref(iter_id) = iterable else { unreachable!("existing iterator check should guarantee a heap iterator"); }; let mut set_guard = crate::heap::HeapGuard::new(Set::new(), vm); let (set, vm) = set_guard.as_parts_mut(); while let Some(item) = advance_on_heap(vm.heap, *iter_id, vm.interns)? { set.add(item, vm)?; } return Ok(set_guard.into_inner()); } let iter = MontyIter::new(value, vm)?; crate::defer_drop_mut!(iter, vm); let mut set_guard = crate::heap::HeapGuard::new(Set::with_capacity(iter.size_hint(vm.heap)), vm); let (set, vm) = set_guard.as_parts_mut(); while let Some(item) = iter.for_next(vm)? { set.add(item, vm)?; } Ok(set_guard.into_inner()) } /// Returns whether two temporary sets have no elements in common. fn sets_are_disjoint(left: &Set, right: &Set, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (smaller, larger) = if left.len() <= right.len() { (left, right) } else { (right, left) }; for value in smaller.iter() { if larger.contains(value, vm)? { return Ok(false); } } Ok(true) } ================================================ FILE: crates/monty/src/types/iter.rs ================================================ //! Iterator support for Python for loops and the `iter()` type constructor. //! //! This module provides the `MontyIter` struct which encapsulates iteration state //! for different iterable types. It uses index-based iteration internally to avoid //! borrow conflicts when accessing the heap during iteration. //! //! The design stores iteration state (indices) rather than Rust iterators, allowing //! `for_next()` to take `&mut Heap` for cloning values and allocating strings. //! //! For constructors like `list()` and `tuple()`, use `MontyIter::new()` followed //! by `collect()` to materialize all items into a Vec. //! //! ## Efficient Iteration with `IterState` //! //! For the VM's `ForIter` opcode, `advance_on_heap()` uses two strategies: //! //! **Fast path** for simple iterators (Range, InternBytes, ASCII IterStr): //! - Single `get_mut()` call to compute value and advance index //! - No additional heap access needed during iteration //! //! **Multi-phase approach** for complex iterators (IterStr, HeapRef): //! 1. `iter_state()` - reads current state without mutation, returns `Option` //! 2. Get the value (may access other heap objects like strings or containers) //! 3. `advance()` - updates the index after the caller has done its work //! //! This allows `advance_on_heap()` to coordinate access without extracting //! the iterator from the heap (avoiding `std::mem::replace` overhead). //! //! ## Builtin Support //! //! The `iterator_next()` helper implements the `next()` builtin. use crate::{ args::ArgValues, bytecode::VM, exception_private::{ExcType, RunResult}, heap::{ContainsHeap, DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, heap_data::HeapDataMut, intern::{BytesId, Interns, StringId}, resource::ResourceTracker, types::{PyTrait, Range, dict_view::DictView, str::allocate_char}, value::Value, }; /// Iterator state for Python for loops. /// /// Contains the current iteration index and the type-specific iteration data. /// Uses index-based iteration to avoid borrow conflicts when accessing the heap. /// /// For strings, stores the string content with a byte offset for O(1) UTF-8 iteration. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct MontyIter { /// Current iteration index, shared across all iterator types. index: usize, /// Type-specific iteration data. iter_value: IterValue, /// the actual Value being iterated over. value: Value, } impl MontyIter { /// Creates an iterator from the `iter()` constructor call. /// /// - `iter(iterable)` - Returns an iterator for the iterable. If the argument is /// already an iterator, returns the same object. /// - `iter(callable, sentinel)` - Not yet supported. pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let (iterable, sentinel) = args.get_one_two_args("iter", vm.heap)?; if let Some(s) = sentinel { // Two-argument form: iter(callable, sentinel) // This is the sentinel iteration protocol, not yet supported iterable.drop_with_heap(vm); s.drop_with_heap(vm); return Err(ExcType::type_error("iter(callable, sentinel) is not yet supported")); } // Check if already an iterator - return self if let Value::Ref(id) = &iterable && matches!(vm.heap.get(*id), HeapData::Iter(_)) { // Already an iterator - return it (refcount already correct from caller) return Ok(iterable); } // Create new iterator let iter = Self::new(iterable, vm)?; let id = vm.heap.allocate(HeapData::Iter(iter))?; Ok(Value::Ref(id)) } /// Creates a new MontyIter from a Value. /// /// Returns an error if the value is not iterable. /// For strings, copies the string content for byte-offset based iteration. /// For ranges, the data is copied so the heap reference is dropped immediately. pub fn new(mut value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { if let Some(iter_value) = IterValue::new(&value, vm) { // For Range, we copy next/step/len into ForIterValue::Range, so we don't need // to keep the heap object alive during iteration. Drop it immediately to avoid // GC issues (the Range isn't in any namespace slot, so GC wouldn't see it). // Same for IterStr which copies the string content. if matches!(iter_value, IterValue::Range { .. } | IterValue::IterStr { .. }) { value.drop_with_heap(vm); value = Value::None; } Ok(Self { index: 0, iter_value, value, }) } else { let err = ExcType::type_error_not_iterable(value.py_type(vm.heap)); value.drop_with_heap(vm); Err(err) } } /// Drops the iterator and its held value properly. pub fn drop_with_heap(self, heap: &mut impl ContainsHeap) { self.value.drop_with_heap(heap); } /// Collects HeapIds from this iterator for reference counting cleanup. pub fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.value.py_dec_ref_ids(stack); } /// Returns whether this iterator holds a heap reference (`Value::Ref`). /// /// Used during allocation to determine if this container could create cycles. #[inline] #[must_use] pub fn has_refs(&self) -> bool { matches!(self.value, Value::Ref(_)) } /// Returns a reference to the underlying value being iterated. /// /// Used by GC to traverse heap references held by the iterator. pub fn value(&self) -> &Value { &self.value } /// Returns the current iterator state without mutation. /// /// This is used by the multi-phase approach in `advance_on_heap()` for complex /// iterator types (IterStr, HeapRef). Simple types (Range, InternBytes, ASCII /// IterStr) are handled by the fast path and should not call this method. /// /// Returns `None` if the iterator is exhausted. fn iter_state(&self) -> Option { match &self.iter_value { // Range, InternBytes, and ASCII IterStr are handled by try_advance_simple() fast path IterValue::Range { .. } | IterValue::InternBytes { .. } => { unreachable!("Range and InternBytes use fast path, not iter_state") } IterValue::IterStr { string, byte_offset, len, .. } => { if self.index >= *len { None } else { // Get the next character at current byte offset let c = string[*byte_offset..] .chars() .next() .expect("index < len implies char exists"); Some(IterState::IterStr { char: c, char_len: c.len_utf8(), }) } } IterValue::HeapRef { heap_id, len, checks_mutation, } => { // For types with captured len, check exhaustion here. // For List (len=None), exhaustion is checked in advance_on_heap(). if let Some(l) = len && self.index >= *l { return None; } Some(IterState::HeapIndex { heap_id: *heap_id, index: self.index, expected_len: if *checks_mutation { *len } else { None }, }) } } } /// Advances the iterator by one step. /// /// This is phase 2 of the two-phase iteration approach. Call this after /// successfully retrieving the value using the data from `iter_state()`. /// /// For string iterators, `string_char_len` must be provided (the UTF-8 byte /// length of the character that was just yielded) to update the byte offset. /// For other iterator types, pass `None`. #[inline] pub fn advance(&mut self, string_char_len: Option) { self.index += 1; if let Some(char_len) = string_char_len && let IterValue::IterStr { byte_offset, .. } = &mut self.iter_value { *byte_offset += char_len; } } /// Attempts to advance simple iterator types that don't need additional heap access. /// /// Returns `Some(result)` if handled (Range, InternBytes, ASCII IterStr), /// `None` if caller should use the multi-phase approach (non-ASCII IterStr, HeapRef). /// /// This optimization avoids two heap lookups for iterator types that can compute /// their next value without accessing other heap objects. #[inline] fn try_advance_simple(&mut self, interns: &Interns) -> Option>> { match &mut self.iter_value { IterValue::Range { next, step, len } => { if self.index >= *len { Some(Ok(None)) } else { let value = *next; *next += *step; self.index += 1; Some(Ok(Some(Value::Int(value)))) } } IterValue::IterStr { string, byte_offset, len, is_ascii, } => { if !*is_ascii { None } else if self.index >= *len { Some(Ok(None)) } else { let byte = string.as_bytes()[*byte_offset]; *byte_offset += 1; self.index += 1; Some(Ok(Some(Value::InternString(StringId::from_ascii(byte))))) } } IterValue::InternBytes { bytes_id, len } => { if self.index >= *len { Some(Ok(None)) } else { let i = self.index; self.index += 1; let bytes = interns.get_bytes(*bytes_id); Some(Ok(Some(Value::Int(i64::from(bytes[i]))))) } } IterValue::HeapRef { .. } => None, } } /// Returns the next item from the iterator, advancing the internal index. /// /// Returns `Ok(None)` when the iterator is exhausted. /// Returns `Err` if allocation fails (for string character iteration) or if /// a dict/set changes size during iteration (RuntimeError). pub fn for_next(&mut self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { // Check timeout on every iteration step. For NoLimitTracker this is // inlined as a no-op. For LimitTracker it ensures that Rust-side loops // (sum, sorted, min, max, etc.) cannot bypass the VM's per-instruction // timeout check by running entirely within a single bytecode instruction. vm.heap.check_time()?; match &mut self.iter_value { IterValue::Range { next, step, len } => { if self.index >= *len { return Ok(None); } let value = *next; *next += *step; self.index += 1; Ok(Some(Value::Int(value))) } IterValue::IterStr { string, byte_offset, len, is_ascii, } => { if self.index >= *len { Ok(None) } else if *is_ascii { let byte = string.as_bytes()[*byte_offset]; *byte_offset += 1; self.index += 1; Ok(Some(Value::InternString(StringId::from_ascii(byte)))) } else { // Get next char at current byte offset let c = string[*byte_offset..] .chars() .next() .expect("index < len implies char exists"); *byte_offset += c.len_utf8(); self.index += 1; Ok(Some(allocate_char(c, vm.heap)?)) } } IterValue::InternBytes { bytes_id, len } => { if self.index >= *len { return Ok(None); } let i = self.index; self.index += 1; let bytes = vm.interns.get_bytes(*bytes_id); Ok(Some(Value::Int(i64::from(bytes[i])))) } IterValue::HeapRef { heap_id, len, checks_mutation, } => { // Check exhaustion for types with captured len if let Some(l) = len && self.index >= *l { return Ok(None); } let i = self.index; let expected_len = if *checks_mutation { *len } else { None }; let item = get_heap_item(vm.heap, *heap_id, i, expected_len)?; // Check for list exhaustion (list can shrink during iteration) let Some(item) = item else { return Ok(None); }; self.index += 1; Ok(Some(item)) } } } /// Returns the remaining size for iterables based on current state. /// /// For immutable types (Range, Tuple, Str, Bytes, FrozenSet), returns the exact remaining count. /// For List, returns current length minus index (may change if list is mutated). /// For Dict and Set, returns the captured length minus index (used for size-change detection). pub fn size_hint(&self, heap: &Heap) -> usize { let len = match &self.iter_value { IterValue::Range { len, .. } | IterValue::IterStr { len, .. } | IterValue::InternBytes { len, .. } => *len, IterValue::HeapRef { heap_id, len, .. } => { // For List (len=None), check current length dynamically len.unwrap_or_else(|| { let HeapData::List(list) = heap.get(*heap_id) else { panic!("HeapRef with len=None should only be List") }; list.len() }) } }; len.saturating_sub(self.index) } /// Collects all remaining items from the iterator into a Vec. /// /// Consumes the iterator and returns all items. Used by `list()`, `tuple()`, /// and similar constructors that need to materialize all items. /// /// Pre-allocates capacity based on `size_hint()` for better performance. pub fn collect>(self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut guard = HeapGuard::new(self, vm); let (this, vm) = guard.as_parts_mut(); HeapedMontyIter(this, vm).collect() } } struct HeapedMontyIter<'this, 'a, 'p, T: ResourceTracker>(&'this mut MontyIter, &'this mut VM<'a, 'p, T>); impl Iterator for HeapedMontyIter<'_, '_, '_, T> { type Item = RunResult; fn next(&mut self) -> Option { self.0.for_next(self.1).transpose() } fn size_hint(&self) -> (usize, Option) { let remaining = self.0.size_hint(self.1.heap); (remaining, Some(remaining)) } } /// Advances an iterator stored on the heap and returns the next value. /// /// Uses a fast path for simple iterators (Range, InternBytes, ASCII IterStr) that don't need /// additional heap access - these are handled with a single mutable borrow. /// /// For complex iterators (IterStr, HeapRef), uses a multi-phase approach: /// 1. Read iterator state (immutable borrow ends) /// 2. Based on state, get the value (may access other heap objects) /// 3. Update iterator index (mutable borrow) /// /// This is more efficient than `std::mem::replace` with a placeholder because /// it avoids creating and moving placeholder objects on every iteration. /// /// Returns `Ok(None)` when the iterator is exhausted. /// Returns `Err` for dict/set size changes or allocation failures. pub(crate) fn advance_on_heap( heap: &mut Heap, iter_id: HeapId, interns: &Interns, ) -> RunResult> { // Fast path: Range and InternBytes don't need additional heap access, // so we can handle them with a single mutable borrow. { let HeapDataMut::Iter(iter) = heap.get_mut(iter_id) else { panic!("advance_on_heap: expected Iterator on heap"); }; if let Some(result) = iter.try_advance_simple(interns) { return result; } } // Mutable borrow ends here, allowing the multi-phase approach below // Multi-phase approach for IterStr and HeapRef (need heap access during value retrieval) // Phase 1: Get iterator state (immutable borrow ends after this block) let HeapData::Iter(iter) = heap.get(iter_id) else { panic!("advance_on_heap: expected Iterator on heap"); }; let Some(state) = iter.iter_state() else { return Ok(None); // Iterator exhausted }; // Phase 2: Based on state, get the value and determine char_len for strings let (value, string_char_len) = match state { IterState::IterStr { char, char_len } => { let value = allocate_char(char, heap)?; (value, Some(char_len)) } IterState::HeapIndex { heap_id, index, expected_len, } => { let item = get_heap_item(heap, heap_id, index, expected_len)?; // Check for list exhaustion (list can shrink during iteration) let Some(item) = item else { return Ok(None); }; (item, None) } }; // Phase 3: Advance the iterator let HeapDataMut::Iter(iter) = heap.get_mut(iter_id) else { panic!("advance_on_heap: expected Iterator on heap"); }; iter.advance(string_char_len); Ok(Some(value)) } /// Gets an item from a heap-allocated container at the given index. /// /// Returns `Ok(None)` if the index is out of bounds (for lists that shrunk during iteration). /// Returns `Err` if a dict/set changed size during iteration (RuntimeError). fn get_heap_item( heap: &mut Heap, heap_id: HeapId, index: usize, expected_len: Option, ) -> RunResult> { match heap.get(heap_id) { HeapData::List(list) => { // Check if list shrunk during iteration if index >= list.len() { return Ok(None); } Ok(Some(list.as_slice()[index].clone_with_heap(heap))) } HeapData::Tuple(tuple) => Ok(Some(tuple.as_slice()[index].clone_with_heap(heap))), HeapData::NamedTuple(namedtuple) => Ok(Some(namedtuple.as_vec()[index].clone_with_heap(heap))), HeapData::Dict(dict) => { // Check for dict mutation if let Some(expected) = expected_len && dict.len() != expected { return Err(ExcType::runtime_error_dict_changed_size()); } Ok(Some( dict.key_at(index).expect("index should be valid").clone_with_heap(heap), )) } HeapData::DictKeysView(view) => { let dict = view.dict(heap); if let Some(expected) = expected_len && dict.len() != expected { return Err(ExcType::runtime_error_dict_changed_size()); } Ok(Some( dict.key_at(index).expect("index should be valid").clone_with_heap(heap), )) } HeapData::DictItemsView(view) => { let dict = view.dict(heap); if let Some(expected) = expected_len && dict.len() != expected { return Err(ExcType::runtime_error_dict_changed_size()); } let (key, value) = dict.item_at(index).expect("index should be valid"); Ok(Some(crate::types::allocate_tuple( smallvec::smallvec![key.clone_with_heap(heap), value.clone_with_heap(heap)], heap, )?)) } HeapData::DictValuesView(view) => { let dict = view.dict(heap); if let Some(expected) = expected_len && dict.len() != expected { return Err(ExcType::runtime_error_dict_changed_size()); } Ok(Some( dict.value_at(index) .expect("index should be valid") .clone_with_heap(heap), )) } HeapData::Bytes(bytes) => Ok(Some(Value::Int(i64::from(bytes.as_slice()[index])))), HeapData::Set(set) => { // Check for set mutation if let Some(expected) = expected_len && set.len() != expected { return Err(ExcType::runtime_error_set_changed_size()); } Ok(Some( set.storage() .value_at(index) .expect("index should be valid") .clone_with_heap(heap), )) } HeapData::FrozenSet(frozenset) => Ok(Some( frozenset .storage() .value_at(index) .expect("index should be valid") .clone_with_heap(heap), )), _ => panic!("get_heap_item: unexpected heap data type"), } } /// Gets the next item from an iterator. /// /// If the iterator is exhausted: /// - If `default` is `Some`, returns the default value /// - If `default` is `None`, raises `StopIteration` /// /// This implements Python's `next()` builtin semantics. /// /// # Arguments /// * `iter_value` - Must be an iterator (heap-allocated MontyIter) /// * `default` - Optional default value to return when exhausted /// * `heap` - The heap for memory operations /// * `interns` - String interning table /// /// # Errors /// Returns `StopIteration` if exhausted with no default, or propagates errors from iteration. pub fn iterator_next( iter_value: &Value, default: Option, heap: &mut Heap, interns: &Interns, ) -> RunResult { let Value::Ref(iter_id) = iter_value else { // Not a heap value - can't be an iterator if let Some(d) = default { d.drop_with_heap(heap); } return Err(ExcType::type_error_not_iterable(iter_value.py_type(heap))); }; // Check that it's actually an iterator if !matches!(heap.get(*iter_id), HeapData::Iter(_)) { if let Some(d) = default { d.drop_with_heap(heap); } let data_type = heap.get(*iter_id).py_type(heap); return Err(ExcType::type_error(format!("'{data_type}' object is not an iterator"))); } // Get next item using the MontyIter::advance_on_heap method match advance_on_heap(heap, *iter_id, interns)? { Some(item) => { // Drop default if provided since we don't need it if let Some(d) = default { d.drop_with_heap(heap); } Ok(item) } None => { // Iterator exhausted match default { Some(d) => Ok(d), None => Err(ExcType::stop_iteration()), } } } } /// Snapshot of iterator state needed to produce the next value. /// /// This enum captures state for complex iterator types (IterStr, HeapRef) that /// require the multi-phase approach in `advance_on_heap()`. Simple types (Range, /// InternBytes, ASCII IterStr) are handled by the fast path and don't use this enum. /// /// The multi-phase approach avoids borrow conflicts: /// 1. Read `Option` from iterator (immutable borrow ends, `None` means exhausted) /// 2. Use the state to get the value (may access other heap objects) /// 3. Call `advance()` to update the iterator index #[derive(Debug, Clone, Copy)] enum IterState { /// String iterator yields this character; char_len is UTF-8 byte length for advance(). IterStr { char: char, char_len: usize }, /// Heap-based iterator (List, Tuple, NamedTuple, Dict, Bytes, Set, FrozenSet). /// The expected_len is Some for types that check for mutation (Dict, Set). HeapIndex { heap_id: HeapId, index: usize, expected_len: Option, }, } /// Type-specific iteration data for different Python iterable types. /// /// Each variant stores the data needed to iterate over a specific type, /// excluding the index which is stored in the parent `MontyIter` struct. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] enum IterValue { /// Iterating over a Range, yields `Value::Int`. Range { /// Next value to yield. next: i64, /// Step between values. step: i64, /// Total number of elements. len: usize, }, /// Iterating over a string (heap or interned), yields single-char Str values. /// /// Stores a copy of the string content plus a byte offset for O(1) UTF-8 character access. /// We store the string rather than referencing the heap because `for_next()` needs mutable /// heap access to allocate the returned character strings, which would conflict with /// borrowing the source string from the heap. IterStr { /// Copy of the string content for iteration. string: String, /// Current byte offset into the string (points to next char to yield). byte_offset: usize, /// Total number of characters in the string. len: usize, /// Whether the string is ASCII (enables fast-path iteration). is_ascii: bool, }, /// Iterating over interned bytes, yields `Value::Int` for each byte. InternBytes { bytes_id: BytesId, len: usize }, /// Iterating over a heap-allocated container (List, Tuple, NamedTuple, Dict, Bytes, Set, FrozenSet). /// /// - `len`: `None` for List (checked dynamically since lists can mutate during iteration), /// `Some(n)` for other types (captured at construction for exhaustion checking). /// - `checks_mutation`: `true` for Dict/Set (raises RuntimeError if size changes), /// `false` for other types. HeapRef { heap_id: HeapId, len: Option, checks_mutation: bool, }, } impl IterValue { fn new(value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Option { match &value { Value::InternString(string_id) => Some(Self::from_str(vm.interns.get_str(*string_id))), Value::InternBytes(bytes_id) => Some(Self::from_intern_bytes(*bytes_id, vm.interns)), Value::Ref(heap_id) => Self::from_heap_data(*heap_id, vm.heap), _ => None, } } /// Creates a Range iterator value. fn from_range(range: &Range) -> Self { Self::Range { next: range.start, step: range.step, len: range.len(), } } /// Creates an iterator value over a string. /// /// Copies the string content and counts characters for the length field. fn from_str(s: &str) -> Self { let is_ascii = s.is_ascii(); let len = if is_ascii { s.len() } else { s.chars().count() }; Self::IterStr { string: s.to_owned(), byte_offset: 0, len, is_ascii, } } /// Creates an iterator value over interned bytes. fn from_intern_bytes(bytes_id: BytesId, interns: &Interns) -> Self { let bytes = interns.get_bytes(bytes_id); Self::InternBytes { bytes_id, len: bytes.len(), } } /// Creates an iterator value from heap data. fn from_heap_data(heap_id: HeapId, heap: &Heap) -> Option { match heap.get(heap_id) { // List: no captured len (checked dynamically), no mutation check HeapData::List(_) => Some(Self::HeapRef { heap_id, len: None, checks_mutation: false, }), // Tuple/NamedTuple/Bytes/FrozenSet: captured len, no mutation check HeapData::Tuple(tuple) => Some(Self::HeapRef { heap_id, len: Some(tuple.as_slice().len()), checks_mutation: false, }), HeapData::NamedTuple(namedtuple) => Some(Self::HeapRef { heap_id, len: Some(namedtuple.len()), checks_mutation: false, }), HeapData::Bytes(b) => Some(Self::HeapRef { heap_id, len: Some(b.len()), checks_mutation: false, }), HeapData::FrozenSet(frozenset) => Some(Self::HeapRef { heap_id, len: Some(frozenset.len()), checks_mutation: false, }), // Dict and dict views: captured len, WITH mutation check HeapData::Dict(dict) => Some(Self::HeapRef { heap_id, len: Some(dict.len()), checks_mutation: true, }), HeapData::DictKeysView(view) => Some(Self::HeapRef { heap_id, len: Some(view.dict(heap).len()), checks_mutation: true, }), HeapData::DictItemsView(view) => Some(Self::HeapRef { heap_id, len: Some(view.dict(heap).len()), checks_mutation: true, }), HeapData::DictValuesView(view) => Some(Self::HeapRef { heap_id, len: Some(view.dict(heap).len()), checks_mutation: true, }), HeapData::Set(set) => Some(Self::HeapRef { heap_id, len: Some(set.len()), checks_mutation: true, }), // String: copy content for iteration HeapData::Str(s) => Some(Self::from_str(s.as_str())), // Range: copy values for iteration HeapData::Range(range) => Some(Self::from_range(range)), // other types are not iterable _ => None, } } } impl DropWithHeap for MontyIter { #[inline] fn drop_with_heap(self, heap: &mut H) { Self::drop_with_heap(self, heap); } } impl HeapItem for MontyIter { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.value.py_dec_ref_ids(stack); } } ================================================ FILE: crates/monty/src/types/list.rs ================================================ use std::fmt::Write; use ahash::AHashSet; use itertools::Itertools; use smallvec::SmallVec; use super::{MontyIter, PyTrait}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunError, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, sorting::{apply_permutation, sort_indices}, types::Type, value::{EitherStr, Value}, }; /// Python list type, wrapping a Vec of Values. /// /// This type provides Python list semantics including dynamic growth, /// reference counting for heap values, and standard list methods. /// /// # Implemented Methods /// - `append(item)` - Add item to end /// - `insert(index, item)` - Insert item at index /// - `pop([index])` - Remove and return item (default: last) /// - `remove(value)` - Remove first occurrence of value /// - `clear()` - Remove all items /// - `copy()` - Shallow copy /// - `extend(iterable)` - Append items from iterable /// - `index(value[, start[, end]])` - Find first index of value /// - `count(value)` - Count occurrences /// - `reverse()` - Reverse in place /// - `sort([key][, reverse])` - Sort in place /// /// Note: `sort(key=...)` supports builtin key functions (len, abs, etc.) /// but not user-defined functions. This is handled at VM level for access /// to function calling machinery. /// /// All list methods from Python's builtins are implemented. /// /// # Reference Counting /// When values are added to the list (via append, insert, etc.), their /// reference counts are incremented if they are heap-allocated (Ref variants). /// This ensures values remain valid while referenced by the list. /// /// # GC Optimization /// The `contains_refs` flag tracks whether the list contains any `Value::Ref` items. /// This allows `collect_child_ids` and `py_dec_ref_ids` to skip iteration when the /// list contains only primitive values (ints, bools, None, etc.), significantly /// improving GC performance for lists of primitives. #[derive(Debug, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct List { items: Vec, /// True if any item in the list is a `Value::Ref`. Used to skip iteration /// in `collect_child_ids` and `py_dec_ref_ids` when no refs are present. contains_refs: bool, } impl List { /// Creates a new list from a vector of values. /// /// Automatically computes the `contains_refs` flag by checking if any value /// is a `Value::Ref`. /// /// Note: This does NOT increment reference counts - the caller must /// ensure refcounts are properly managed. #[must_use] pub fn new(vec: Vec) -> Self { let contains_refs = vec.iter().any(|v| matches!(v, Value::Ref(_))); Self { items: vec, contains_refs, } } /// Returns a reference to the underlying vector. #[must_use] pub fn as_slice(&self) -> &[Value] { &self.items } /// Returns a mutable reference to the underlying vector. /// /// # Safety Considerations /// Be careful when mutating the vector directly - you must manually /// manage reference counts for any heap values you add or remove. /// The `contains_refs` flag is NOT automatically updated by direct /// vector mutations. Prefer using `append()` or `insert()` instead. pub fn as_vec_mut(&mut self) -> &mut Vec { &mut self.items } /// Returns the number of elements in the list. #[must_use] pub fn len(&self) -> usize { self.items.len() } /// Returns whether the list contains any heap references. /// /// When false, `collect_child_ids` and `py_dec_ref_ids` can skip iteration. #[inline] #[must_use] pub fn contains_refs(&self) -> bool { self.contains_refs } /// Marks that the list contains heap references. /// /// This should be called when directly mutating the list's items vector /// (via `as_vec_mut()`) with values that include `Value::Ref` variants. #[inline] pub fn set_contains_refs(&mut self) { self.contains_refs = true; } /// Appends an element to the end of the list. /// /// The caller transfers ownership of `item` to the list. The item's refcount /// is NOT incremented here - the caller is responsible for ensuring the refcount /// was already incremented (e.g., via `clone_with_heap` or `evaluate_use`). /// /// Returns `Value::None`, matching Python's behavior where `list.append()` returns None. pub fn append(&mut self, heap: &mut Heap, item: Value) { // Track if we're adding a reference and mark potential cycle if matches!(item, Value::Ref(_)) { self.contains_refs = true; heap.mark_potential_cycle(); } // Ownership transfer - refcount was already handled by caller self.items.push(item); } /// Inserts an element at the specified index. /// /// The caller transfers ownership of `item` to the list. The item's refcount /// is NOT incremented here - the caller is responsible for ensuring the refcount /// was already incremented. /// /// # Arguments /// * `index` - The position to insert at (0-based). If index >= len(), /// the item is appended to the end (matching Python semantics). /// /// Returns `Value::None`, matching Python's behavior where `list.insert()` returns None. pub fn insert(&mut self, heap: &mut Heap, index: usize, item: Value) { // Track if we're adding a reference and mark potential cycle if matches!(item, Value::Ref(_)) { self.contains_refs = true; heap.mark_potential_cycle(); } // Ownership transfer - refcount was already handled by caller // Python's insert() appends if index is out of bounds if index >= self.items.len() { self.items.push(item); } else { self.items.insert(index, item); } } /// Creates a list from the `list()` constructor call. /// /// - `list()` with no args returns an empty list /// - `list(iterable)` creates a list from any iterable (list, tuple, range, str, bytes, dict) pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_zero_one_arg("list", vm.heap)?; match value { None => { let heap_id = vm.heap.allocate(HeapData::List(Self::new(Vec::new())))?; Ok(Value::Ref(heap_id)) } Some(v) => { let items = MontyIter::new(v, vm)?.collect(vm)?; let heap_id = vm.heap.allocate(HeapData::List(Self::new(items)))?; Ok(Value::Ref(heap_id)) } } } /// Handles slice-based indexing for lists. /// /// Returns a new list containing the selected elements. fn getitem_slice(&self, slice: &crate::types::Slice, heap: &mut Heap) -> RunResult { let (start, stop, step) = slice .indices(self.items.len()) .map_err(|()| ExcType::value_error_slice_step_zero())?; let items = get_slice_items(&self.items, start, stop, step, heap)?; let heap_id = heap.allocate(HeapData::List(Self::new(items)))?; Ok(Value::Ref(heap_id)) } } impl From for Vec { fn from(list: List) -> Self { list.items } } impl PyTrait for List { fn py_type(&self, _heap: &Heap) -> Type { Type::List } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.items.len()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let heap = &mut *vm.heap; // Check for slice first (Value::Ref pointing to HeapData::Slice) if let Value::Ref(id) = key && let HeapData::Slice(slice) = heap.get(*id) { // Clone the slice to release the borrow on heap before calling getitem_slice let slice = slice.clone(); return self.getitem_slice(&slice, heap); } // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt let index = key.as_index(heap, Type::List)?; // Convert to usize, handling negative indices (Python-style: -1 = last element) let len = i64::try_from(self.items.len()).expect("list length exceeds i64::MAX"); let normalized_index = if index < 0 { index + len } else { index }; // Bounds check if normalized_index < 0 || normalized_index >= len { return Err(ExcType::list_index_error()); } // Return clone of the item with proper refcount increment // Safety: normalized_index is validated to be in [0, len) above let idx = usize::try_from(normalized_index).expect("list index validated non-negative"); Ok(self.items[idx].clone_with_heap(heap)) } fn py_setitem(&mut self, key: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { let heap = &mut *vm.heap; defer_drop!(key, heap); defer_drop_mut!(value, heap); // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt. // Note: The LongInt-to-i64 conversion is defensive code. In normal execution, // heap-allocated LongInt values always exceed i64 range because into_value() // demotes i64-fitting values to Value::Int. However, this could be reached via // deserialization of crafted snapshot data. let index = match key { Value::Int(i) => *i, Value::Bool(b) => i64::from(*b), Value::Ref(heap_id) => { if let HeapData::LongInt(li) = heap.get(*heap_id) { if let Some(i) = li.to_i64() { i } else { return Err(ExcType::index_error_int_too_large()); } } else { let key_type = key.py_type(heap); return Err(ExcType::type_error_list_assignment_indices(key_type)); } } _ => { let key_type = key.py_type(heap); return Err(ExcType::type_error_list_assignment_indices(key_type)); } }; // Normalize negative indices (Python-style: -1 = last element) let len = i64::try_from(self.items.len()).expect("list length exceeds i64::MAX"); let normalized_index = if index < 0 { index + len } else { index }; // Bounds check if normalized_index < 0 || normalized_index >= len { return Err(ExcType::list_assignment_index_error()); } let idx = usize::try_from(normalized_index).expect("index validated non-negative"); // Update contains_refs if storing a Ref (must check before swap, // since after swap `value` holds the old item) if matches!(*value, Value::Ref(_)) { self.contains_refs = true; heap.mark_potential_cycle(); } // Replace value (old one dropped by defer_drop_mut guard) std::mem::swap(&mut self.items[idx], value); Ok(()) } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.items.len() != other.items.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (i1, i2) in self.items.iter().zip(&other.items) { vm.heap.check_time()?; if !i1.py_eq(i2, vm)? { return Ok(false); } } Ok(true) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.items.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { repr_sequence_fmt('[', ']', &self.items, f, vm, heap_ids) } fn py_add( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { let heap = &mut *vm.heap; // Clone both lists' contents with proper refcounting let mut result: Vec = self.items.iter().map(|obj| obj.clone_with_heap(heap)).collect(); let other_cloned: Vec = other.items.iter().map(|obj| obj.clone_with_heap(heap)).collect(); result.extend(other_cloned); let id = heap.allocate(HeapData::List(Self::new(result)))?; Ok(Some(Value::Ref(id))) } fn py_iadd( &mut self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, self_id: Option, ) -> Result { let heap = &mut *vm.heap; // Extract the value ID first, keeping `other` around to drop later let Value::Ref(other_id) = other else { return Ok(false) }; if Some(*other_id) == self_id { // Self-extend: clone our own items with proper refcounting let items = self .items .iter() .map(|obj| obj.clone_with_heap(heap)) .collect::>(); // If we're self-extending and have refs, mark potential cycle if self.contains_refs { heap.mark_potential_cycle(); } self.items.extend(items); } else { // Get items from other list using iadd_extend_from_heap helper // This handles the borrow checker limitations with lifetime propagation let prev_len = self.items.len(); if !heap.iadd_extend_list(*other_id, &mut self.items) { return Ok(false); } // Check if we added any refs and mark potential cycle if self.contains_refs { // Already had refs, but adding more may create cycles heap.mark_potential_cycle(); } else { for item in &self.items[prev_len..] { if matches!(item, Value::Ref(_)) { self.contains_refs = true; heap.mark_potential_cycle(); break; } } } } Ok(true) } /// Intercepts `sort` to call `do_list_sort` (which needs `PrintWriter` for key functions), /// and delegates all other methods to `call_list_method`. fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { if attr.static_string() == Some(StaticStrings::Sort) { do_list_sort(self, args, vm)?; return Ok(CallResult::Value(Value::None)); } let args_guard = HeapGuard::new(args, vm.heap); let Some(method) = attr.static_string() else { return Err(ExcType::attribute_error(Type::List, attr.as_str(vm.interns))); }; let args = args_guard.into_inner(); call_list_method(self, method, args, vm).map(CallResult::Value) } } impl HeapItem for List { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.items.len() * std::mem::size_of::() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Skip iteration if no refs - major GC optimization for lists of primitives if !self.contains_refs { return; } for obj in &mut self.items { if let Value::Ref(id) = obj { stack.push(*id); #[cfg(feature = "ref-count-panic")] obj.dec_ref_forget(); } } } } /// Dispatches a method call on a list value. /// /// This is the unified entry point for list method calls. /// /// # Arguments /// * `list` - The list to call the method on /// * `method` - The method to call (e.g., `StaticStrings::Append`) /// * `args` - The method arguments /// * `heap` - The heap for allocation and reference counting fn call_list_method( list: &mut List, method: StaticStrings, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let heap = &mut *vm.heap; match method { StaticStrings::Append => { let item = args.get_one_arg("list.append", heap)?; list.append(heap, item); Ok(Value::None) } StaticStrings::Insert => list_insert(list, args, heap), StaticStrings::Pop => list_pop(list, args, heap), StaticStrings::Remove => list_remove(list, args, vm), StaticStrings::Clear => { args.check_zero_args("list.clear", heap)?; list_clear(list, heap); Ok(Value::None) } StaticStrings::Copy => { args.check_zero_args("list.copy", heap)?; Ok(list_copy(list, heap)?) } StaticStrings::Extend => list_extend(list, args, vm), StaticStrings::Index => list_index(list, args, vm), StaticStrings::Count => list_count(list, args, vm), StaticStrings::Reverse => { args.check_zero_args("list.reverse", heap)?; list.items.reverse(); Ok(Value::None) } // Note: list.sort is handled by py_call_attr which intercepts it before reaching here _ => { args.drop_with_heap(heap); Err(ExcType::attribute_error(Type::List, method.into())) } } } /// Implements Python's `list.insert(index, item)` method. fn list_insert(list: &mut List, args: ArgValues, heap: &mut Heap) -> RunResult { let (index_obj, item) = args.get_two_args("insert", heap)?; defer_drop!(index_obj, heap); let mut item_guard = HeapGuard::new(item, heap); let heap = item_guard.heap(); // Python's insert() handles negative indices by adding len // If still negative after adding len, clamps to 0 // If >= len, appends to end let index_i64 = index_obj.as_int(heap)?; let len = list.items.len(); let len_i64 = i64::try_from(len).expect("list length exceeds i64::MAX"); let index = if index_i64 < 0 { // Negative index: add length, clamp to 0 if still negative let adjusted = index_i64 + len_i64; usize::try_from(adjusted).unwrap_or(0) } else { // Positive index: clamp to len if too large usize::try_from(index_i64).unwrap_or(len) }; let (item, heap) = item_guard.into_parts(); list.insert(heap, index, item); Ok(Value::None) } /// Implements Python's `list.pop([index])` method. /// /// Removes the item at the given index (default: -1) and returns it. /// Raises IndexError if the list is empty or the index is out of range. fn list_pop(list: &mut List, args: ArgValues, heap: &mut Heap) -> RunResult { let index_arg = args.get_zero_one_arg("list.pop", heap)?; // Validate index type FIRST (if provided), matching Python's validation order. // Python raises TypeError for bad index type even on empty list. let index_i64 = if let Some(v) = index_arg { let result = v.as_int(heap); v.drop_with_heap(heap); result? } else { -1 }; // THEN check empty list if list.items.is_empty() { return Err(ExcType::index_error_pop_empty_list()); } // Normalize index let len = list.items.len(); let len_i64 = i64::try_from(len).expect("list length exceeds i64::MAX"); let normalized = if index_i64 < 0 { index_i64 + len_i64 } else { index_i64 }; // Bounds check if normalized < 0 || normalized >= len_i64 { return Err(ExcType::index_error_pop_out_of_range()); } // Remove and return the item let idx = usize::try_from(normalized).expect("index validated non-negative"); Ok(list.items.remove(idx)) } /// Implements Python's `list.remove(value)` method. /// /// Removes the first occurrence of value. Raises ValueError if not found. fn list_remove(list: &mut List, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_one_arg("list.remove", vm.heap)?; defer_drop!(value, vm); // Find the first matching element let mut found_idx = None; for (i, item) in list.items.iter().enumerate() { vm.heap.check_time()?; if value.py_eq(item, vm)? { found_idx = Some(i); break; } } match found_idx { Some(idx) => { // Remove the element and drop its refcount let removed = list.items.remove(idx); removed.drop_with_heap(vm.heap); Ok(Value::None) } None => Err(ExcType::value_error_remove_not_in_list()), } } /// Implements Python's `list.clear()` method. /// /// Removes all items from the list. fn list_clear(list: &mut List, heap: &mut Heap) { list.items.drain(..).drop_with_heap(heap); // Note: contains_refs stays true even if all refs removed, per conservative GC strategy } /// Implements Python's `list.copy()` method. /// /// Returns a shallow copy of the list. fn list_copy(list: &List, heap: &mut Heap) -> Result { let items: Vec = list.items.iter().map(|v| v.clone_with_heap(heap)).collect(); let heap_id = heap.allocate(HeapData::List(List::new(items)))?; Ok(Value::Ref(heap_id)) } /// Implements Python's `list.extend(iterable)` method. /// /// Extends the list by appending all items from the iterable. fn list_extend(list: &mut List, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let iterable = args.get_one_arg("list.extend", vm.heap)?; let items: SmallVec<[_; 2]> = MontyIter::new(iterable, vm)?.collect(vm)?; // Add each item to the list for item in items { list.append(vm.heap, item); } Ok(Value::None) } /// Implements Python's `list.index(value[, start[, end]])` method. /// /// Returns the index of the first occurrence of value. /// Raises ValueError if the value is not found. fn list_index(list: &List, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let pos_args = args.into_pos_only("list.index", vm.heap)?; defer_drop!(pos_args, vm); let len = list.items.len(); let (value, start, end) = match pos_args.as_slice() { [] => return Err(ExcType::type_error_at_least("list.index", 1, 0)), [value] => (value, 0, len), [value, start_arg] => { let start = normalize_list_index(start_arg.as_int(vm.heap)?, len); (value, start, len) } [value, start_arg, end_arg] => { let start = normalize_list_index(start_arg.as_int(vm.heap)?, len); let end = normalize_list_index(end_arg.as_int(vm.heap)?, len).max(start); (value, start, end) } other => return Err(ExcType::type_error_at_most("list.index", 3, other.len())), }; // Search for the value in the specified range for (i, item) in list.items[start..end].iter().enumerate() { vm.heap.check_time()?; if value.py_eq(item, vm)? { let idx = i64::try_from(start + i).expect("index exceeds i64::MAX"); return Ok(Value::Int(idx)); } } Err(ExcType::value_error_not_in_list()) } /// Implements Python's `list.count(value)` method. /// /// Returns the number of occurrences of value in the list. fn list_count(list: &List, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_one_arg("list.count", vm.heap)?; defer_drop!(value, vm); let mut count: usize = 0; for item in &list.items { vm.heap.check_time()?; if value.py_eq(item, vm)? { count += 1; } } let count_i64 = i64::try_from(count).expect("count exceeds i64::MAX"); Ok(Value::Int(count_i64)) } /// Normalizes a Python-style list index to a valid index in range [0, len]. fn normalize_list_index(index: i64, len: usize) -> usize { if index < 0 { let abs_index = usize::try_from(-index).unwrap_or(usize::MAX); len.saturating_sub(abs_index) } else { usize::try_from(index).unwrap_or(len).min(len) } } /// Performs an in-place sort on a list with optional key function and reverse flag. fn do_list_sort(list: &mut List, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result<(), RunError> { // Parse keyword-only arguments: key and reverse let (key_arg, reverse_arg) = args.extract_keyword_only_pair("list.sort", "key", "reverse", vm.heap, vm.interns)?; // Convert reverse to bool (default false) let reverse = if let Some(v) = reverse_arg { let result = v.py_bool(vm); v.drop_with_heap(vm); result } else { false }; // Handle key function (None means no key function) let key_fn = match key_arg { Some(v) if matches!(v, Value::None) => { v.drop_with_heap(vm); None } other => other, }; defer_drop!(key_fn, vm); // Step 1: Borrow from the list for in-place sorting let items = list.as_vec_mut(); // 2. Compute key values if a key function was provided, otherwise we'll sort by the items themselves let mut keys_guard; let (compare_values, vm) = if let Some(f) = key_fn { let keys: Vec = Vec::with_capacity(items.len()); // Use a HeapGuard to ensure that if key function evaluation fails partway through, // we clean up any keys that were successfully computed keys_guard = HeapGuard::new(keys, vm); let (keys, vm) = keys_guard.as_parts_mut(); items .iter() .map(|item| { let item = item.clone_with_heap(vm); vm.evaluate_function("sorted() key argument", f, ArgValues::One(item)) }) .process_results(|keys_iter| keys.extend(keys_iter))?; keys_guard.as_parts() } else { (&*items, vm) }; // 3. Sort indices by comparing key values (or items themselves if no key) let len = compare_values.len(); let mut indices: Vec = (0..len).collect(); sort_indices(&mut indices, compare_values, reverse, vm)?; // 4. Rearrange items in-place according to the sorted permutation apply_permutation(items, &mut indices); Ok(()) } /// Writes a formatted sequence of values to a formatter. /// /// This helper function is used to implement `__repr__` for sequence types like /// lists and tuples. It writes items as comma-separated repr interns. /// /// # Arguments /// * `start` - The opening character (e.g., '[' for lists, '(' for tuples) /// * `end` - The closing character (e.g., ']' for lists, ')' for tuples) /// * `items` - The slice of values to format /// * `f` - The formatter to write to /// * `vm` - The VM for resolving value references and looking up interned strings /// * `heap_ids` - Set of heap IDs being repr'd (for cycle detection) pub(crate) fn repr_sequence_fmt( start: char, end: char, items: &[Value], f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { // Check depth limit before recursing let heap = &*vm.heap; let Some(token) = heap.incr_recursion_depth_for_repr() else { return f.write_str("..."); }; crate::defer_drop_immutable_heap!(token, heap); f.write_char(start)?; let mut iter = items.iter(); if let Some(first) = iter.next() { first.py_repr_fmt(f, vm, heap_ids)?; for item in iter { if heap.check_time().is_err() { f.write_str(", ...[timeout]")?; break; } f.write_str(", ")?; item.py_repr_fmt(f, vm, heap_ids)?; } } f.write_char(end)?; Ok(()) } /// Helper to extract items from a slice for list/tuple slicing. /// /// Handles both positive and negative step values. For negative step, /// iterates backward from start down to (but not including) stop. /// /// Returns a new Vec of cloned values with proper refcount increments. /// Checks the time limit on each iteration to enforce timeouts during slicing. /// /// Note: step must be non-zero (callers should validate this via `slice.indices()`). pub(crate) fn get_slice_items( items: &[Value], start: usize, stop: usize, step: i64, heap: &mut Heap, ) -> RunResult> { let mut result = Vec::new(); // try_from succeeds for non-negative step; step==0 rejected upstream by slice.indices() if let Ok(step_usize) = usize::try_from(step) { // Positive step: iterate forward let mut i = start; while i < stop && i < items.len() { heap.check_time()?; result.push(items[i].clone_with_heap(heap)); i += step_usize; } } else { // Negative step: iterate backward // start is the highest index, stop is the sentinel // stop > items.len() means "go to the beginning" let step_abs = usize::try_from(-step).expect("step is negative so -step is positive"); let step_abs_i64 = i64::try_from(step_abs).expect("step magnitude fits in i64"); let mut i = i64::try_from(start).expect("start index fits in i64"); let stop_i64 = if stop > items.len() { -1 } else { i64::try_from(stop).expect("stop bounded by items.len() fits in i64") }; while let Ok(i_usize) = usize::try_from(i) { if i_usize >= items.len() || i <= stop_i64 { break; } heap.check_time()?; result.push(items[i_usize].clone_with_heap(heap)); i -= step_abs_i64; } } Ok(result) } #[cfg(test)] mod tests { use num_bigint::BigInt; use super::*; use crate::{ PrintWriter, intern::{InternerBuilder, Interns}, resource::NoLimitTracker, types::LongInt, }; /// Creates a minimal Interns for testing. fn create_test_interns() -> Interns { let interner = InternerBuilder::new(""); Interns::new(interner, vec![]) } /// Creates a heap with a list and a LongInt index, bypassing into_value() demotion. /// /// This allows testing the defensive code path where a LongInt contains an i64-fitting value. fn create_heap_with_list_and_longint( list_items: Vec, index_value: BigInt, ) -> (Heap, HeapId, HeapId) { let mut heap = Heap::new(16, NoLimitTracker); let list = List::new(list_items); let list_id = heap.allocate(HeapData::List(list)).unwrap(); let long_int = LongInt::new(index_value); let index_id = heap.allocate(HeapData::LongInt(long_int)).unwrap(); (heap, list_id, index_id) } /// Tests py_setitem with a LongInt index that fits in i64. /// /// This is a defensive code path - normally unreachable because LongInt::into_value() /// demotes i64-fitting values to Value::Int. However, it could be reached via /// deserialization of crafted snapshot data. #[test] fn py_setitem_longint_fits_in_i64() { let (mut heap, list_id, index_id) = create_heap_with_list_and_longint(vec![Value::Int(10), Value::Int(20), Value::Int(30)], BigInt::from(1)); let interns = create_test_interns(); // Use heap.with_entry_mut to avoid double mutable borrow let key = Value::Ref(index_id); let new_value = Value::Int(99); heap.inc_ref(index_id); let mut vm = VM::new(Vec::new(), &mut heap, &interns, PrintWriter::Disabled); let result = Heap::with_entry_mut(&mut vm, list_id, |vm, mut data| data.py_setitem(key, new_value, vm)); assert!(result.is_ok()); // Verify the list was updated by checking it matches expected Int value let HeapData::List(list) = heap.get(list_id) else { panic!("expected list"); }; assert!(matches!(list.as_slice()[1], Value::Int(99))); // Clean up Value::Ref(list_id).drop_with_heap(&mut heap); } /// Tests py_setitem with a negative LongInt index that fits in i64. #[test] fn py_setitem_longint_negative_fits_in_i64() { let (mut heap, list_id, index_id) = create_heap_with_list_and_longint( vec![Value::Int(10), Value::Int(20), Value::Int(30)], BigInt::from(-1), // Last element ); let interns = create_test_interns(); let key = Value::Ref(index_id); let new_value = Value::Int(99); heap.inc_ref(index_id); let mut vm = VM::new(Vec::new(), &mut heap, &interns, PrintWriter::Disabled); let result = Heap::with_entry_mut(&mut vm, list_id, |vm, mut data| data.py_setitem(key, new_value, vm)); assert!(result.is_ok()); // Verify the last element was updated let HeapData::List(list) = heap.get(list_id) else { panic!("expected list"); }; assert!(matches!(list.as_slice()[2], Value::Int(99))); Value::Ref(list_id).drop_with_heap(&mut heap); } /// Tests py_setitem with i64::MAX as a LongInt index. #[test] fn py_setitem_longint_at_i64_max() { let (mut heap, list_id, index_id) = create_heap_with_list_and_longint(vec![Value::Int(10)], BigInt::from(i64::MAX)); let interns = create_test_interns(); let key = Value::Ref(index_id); let new_value = Value::Int(99); heap.inc_ref(index_id); // This should fail with IndexError because i64::MAX is out of bounds for a 1-element list let mut vm = VM::new(Vec::new(), &mut heap, &interns, PrintWriter::Disabled); let result = Heap::with_entry_mut(&mut vm, list_id, |vm, mut data| data.py_setitem(key, new_value, vm)); assert!(result.is_err()); Value::Ref(list_id).drop_with_heap(&mut heap); } } ================================================ FILE: crates/monty/src/types/long_int.rs ================================================ //! LongInt wrapper for arbitrary precision integer support. //! //! This module provides the `LongInt` wrapper type around `num_bigint::BigInt`. //! Named `LongInt` to avoid confusion with the external `BigInt` type. Python has //! one `int` type, and LongInt is an implementation detail - we use i64 for performance //! when values fit, and promote to LongInt on overflow. //! //! The design centralizes BigInt-related logic into methods on `LongInt` rather than //! having freestanding functions scattered across the codebase. use std::{ collections::hash_map::DefaultHasher, fmt::{self, Display}, hash::{Hash, Hasher}, ops::{Add, Mul, Neg, Sub}, }; use num_bigint::BigInt; use num_traits::{Signed, ToPrimitive, Zero}; use crate::{ heap::{Heap, HeapData}, resource::{ResourceError, ResourceTracker}, value::Value, }; /// Wrapper around `num_bigint::BigInt` for arbitrary precision integers. /// /// Named `LongInt` to avoid confusion with the external `BigInt` type from `num_bigint`. /// The inner `BigInt` is accessible via `.0` for arithmetic operations that need direct /// access to the underlying type. /// /// Python treats all integers as one type - we use `Value::Int(i64)` for values that fit /// and `LongInt` for larger values. The `into_value()` method automatically demotes to /// i64 when the value fits, maintaining this optimization. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)] pub struct LongInt(pub BigInt); impl LongInt { /// Creates a new `LongInt` from a `BigInt`. pub fn new(bi: BigInt) -> Self { Self(bi) } /// Converts to a `Value`, demoting to i64 if it fits. /// /// For performance, we want to keep values as `Value::Int(i64)` whenever possible. /// This method checks if the value fits in an i64 and returns `Value::Int` if so, /// otherwise allocates a `HeapData::LongInt` on the heap. pub fn into_value(self, heap: &mut Heap) -> Result { // Try to demote back to i64 for performance if let Some(i) = self.0.to_i64() { Ok(Value::Int(i)) } else { let heap_id = heap.allocate(HeapData::LongInt(self))?; Ok(Value::Ref(heap_id)) } } /// Computes a hash consistent with i64 hashing. /// /// Critical: For values that fit in i64, this must return the same hash as /// hashing the i64 directly. This ensures dict key consistency - e.g., /// `hash(5)` must equal `hash(LongInt(5))`. pub fn hash(&self) -> u64 { // If the LongInt fits in i64, hash as i64 for consistency if let Some(i) = self.0.to_i64() { let mut hasher = DefaultHasher::new(); // Hash the i64 discriminant and value to match Value::Int hashing std::mem::discriminant(&Value::Int(0)).hash(&mut hasher); i.hash(&mut hasher); hasher.finish() } else { // For LongInts outside i64 range, use byte representation let mut hasher = DefaultHasher::new(); // Use a unique discriminant for LongInt (we use the LongInt's sign and bytes) let (sign, bytes) = self.0.to_bytes_le(); sign.hash(&mut hasher); bytes.hash(&mut hasher); hasher.finish() } } /// Estimates memory size in bytes. /// /// Used for resource tracking. The actual size includes the Vec overhead /// plus the digit storage. Rounds up bits to bytes to avoid underestimating /// (e.g., 1 bit = 1 byte, not 0 bytes). pub fn estimate_size(&self) -> usize { // Each BigInt digit is typically a u32 or u64 // We estimate based on the number of significant bits let bits = self.0.bits(); // Convert bits to bytes (round up), add overhead for Vec and sign // On 32-bit platforms, truncate to usize::MAX if bits is too large let bit_bytes = usize::try_from(bits).unwrap_or(usize::MAX).saturating_add(7) / 8; bit_bytes + std::mem::size_of::() } /// Returns a reference to the inner `BigInt`. /// /// Use this when you need read-only access to the underlying `BigInt` /// for operations like formatting or comparison. pub fn inner(&self) -> &BigInt { &self.0 } /// Checks if the value is zero. pub fn is_zero(&self) -> bool { self.0.is_zero() } /// Checks if the value is negative. pub fn is_negative(&self) -> bool { self.0.is_negative() } /// Tries to convert to i64. /// /// Returns `Some(i64)` if the value fits, `None` otherwise. pub fn to_i64(&self) -> Option { self.0.to_i64() } /// Tries to convert to f64. /// /// Returns `Some(f64)` if the conversion is possible, `None` if the value /// is too large to represent as f64. pub fn to_f64(&self) -> Option { self.0.to_f64() } /// Tries to convert to u32. /// /// Returns `Some(u32)` if the value fits, `None` otherwise. pub fn to_u32(&self) -> Option { self.0.to_u32() } /// Tries to convert to usize. /// /// Returns `Some(usize)` if the value fits, `None` otherwise. /// Useful for sequence repetition counts. pub fn to_usize(&self) -> Option { self.0.to_usize() } /// Returns the absolute value as a new `LongInt`. pub fn abs(&self) -> Self { Self(self.0.abs()) } /// Returns the number of significant bits in this LongInt. /// /// Zero returns 0 bits. For non-zero values, this is the position of the /// highest set bit plus one. pub fn bits(&self) -> u64 { self.0.bits() } } // === Trait Implementations === impl From for LongInt { fn from(bi: BigInt) -> Self { Self(bi) } } impl From for LongInt { fn from(i: i64) -> Self { Self(BigInt::from(i)) } } impl Add for LongInt { type Output = Self; fn add(self, rhs: Self) -> Self::Output { Self(self.0 + rhs.0) } } impl Sub for LongInt { type Output = Self; fn sub(self, rhs: Self) -> Self::Output { Self(self.0 - rhs.0) } } impl Mul for LongInt { type Output = Self; fn mul(self, rhs: Self) -> Self::Output { Self(self.0 * rhs.0) } } impl Neg for LongInt { type Output = Self; fn neg(self) -> Self::Output { Self(-self.0) } } impl Display for LongInt { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) } } ================================================ FILE: crates/monty/src/types/mod.rs ================================================ /// Type definitions for Python runtime values. /// /// This module contains structured types that wrap heap-allocated data /// and provide Python-like semantics for operations like append, insert, etc. /// /// The `AbstractValue` trait provides a common interface for all heap-allocated /// types, enabling efficient dispatch via `enum_dispatch`. pub mod bytes; pub mod dataclass; pub mod dict; pub mod dict_view; pub mod iter; pub mod list; pub mod long_int; pub mod module; pub mod namedtuple; pub mod path; pub mod property; pub mod py_trait; pub mod range; pub mod re_match; pub mod re_pattern; pub mod set; pub mod slice; pub mod str; pub mod tuple; pub mod r#type; pub(crate) use bytes::Bytes; pub(crate) use dataclass::Dataclass; pub(crate) use dict::Dict; pub(crate) use dict_view::{DictItemsView, DictKeysView, DictValuesView}; pub(crate) use iter::MontyIter; pub(crate) use list::List; pub(crate) use long_int::LongInt; pub(crate) use module::Module; pub(crate) use namedtuple::NamedTuple; pub(crate) use path::Path; pub(crate) use property::Property; pub(crate) use py_trait::PyTrait; pub(crate) use range::Range; pub(crate) use re_match::ReMatch; pub(crate) use re_pattern::RePattern; pub(crate) use set::{FrozenSet, Set}; pub(crate) use slice::Slice; pub(crate) use str::Str; pub(crate) use tuple::{Tuple, allocate_tuple}; pub(crate) use r#type::Type; ================================================ FILE: crates/monty/src/types/module.rs ================================================ //! Python module type for representing imported modules. use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapGuard, HeapId, HeapItem}, intern::{Interns, StringId}, resource::ResourceTracker, types::Dict, value::{EitherStr, Value}, }; /// A Python module with a name and attribute dictionary. /// /// Modules in Monty are simplified compared to CPython - they just have a name /// and a dictionary of attributes. This is sufficient for built-in modules like /// `sys` and `typing` where we control the available attributes. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct Module { /// The module name (e.g., "sys", "typing"). name: StringId, /// The module's attributes (e.g., `version`, `platform` for `sys`). attrs: Dict, } impl Module { /// Creates a new module with an empty attributes dictionary. /// /// The module name must be pre-interned during the prepare phase. /// /// # Panics /// /// Panics if the module name string has not been pre-interned. pub fn new(name: impl Into) -> Self { Self { name: name.into(), attrs: Dict::new(), } } /// Returns the module's name StringId. pub fn name(&self) -> StringId { self.name } /// Returns a reference to the module's attribute dictionary. pub fn attrs(&self) -> &Dict { &self.attrs } /// Sets an attribute in the module's dictionary. /// /// The attribute name must be pre-interned during the prepare phase. /// /// # Panics /// /// Panics if the attribute name string has not been pre-interned. pub fn set_attr(&mut self, name: impl Into, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) { let key = Value::InternString(name.into()); // Unwrap is safe because InternString keys are always hashable self.attrs.set(key, value, vm).unwrap(); } /// Looks up an attribute by name in the module's attribute dictionary. /// /// Returns `Some(value)` if the attribute exists, `None` otherwise. /// The returned value is cloned with proper refcount handling. pub fn get_attr(&self, attr_value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Option { // Dict::get returns Result because of hash computation, but InternString keys // are always hashable, so `.ok()` is safe here. self.attrs .get(attr_value, vm) .ok() .flatten() .map(|v| v.clone_with_heap(vm)) } /// Returns whether this module has any heap references in its attributes. pub fn has_refs(&self) -> bool { self.attrs.has_refs() } /// Collects child HeapIds for reference counting. pub fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.attrs.py_dec_ref_ids(stack); } /// Gets an attribute by string ID for the `py_getattr` trait method. /// /// Returns the attribute value if found, or `None` if the attribute doesn't exist. /// For `Property` values, invokes the property getter rather than returning /// the Property itself - this implements Python's descriptor protocol. pub fn py_getattr( &self, attr: &EitherStr, heap: &mut Heap, interns: &Interns, ) -> Option { let value = self.attrs.get_by_str(attr.as_str(interns), heap, interns)?; // If the value is a Property, invoke its getter to compute the actual value if let Value::Property(prop) = *value { Some(prop.get()) } else { Some(CallResult::Value(value.clone_with_heap(heap))) } } /// Calls an attribute as a function on this module. /// /// Modules don't have methods - they have callable attributes. This looks up /// the attribute and calls it if it's a `ModuleFunction`. /// /// Returns `CallResult` because module functions may need OS operations /// (e.g., `os.getenv()`) that require host involvement. pub fn py_call_attr( &self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let mut args_guard = HeapGuard::new(args, vm); let vm = args_guard.heap(); let attr_key = match attr { EitherStr::Interned(id) => Value::InternString(*id), EitherStr::Heap(s) => { return Err(ExcType::attribute_error_module(vm.interns.get_str(self.name), s)); } }; match self.get_attr(&attr_key, vm) { Some(value) => { let (args, vm) = args_guard.into_parts(); defer_drop!(value, vm); vm.call_function(value, args) } None => Err(ExcType::attribute_error_module( vm.interns.get_str(self.name), attr.as_str(vm.interns), )), } } } impl HeapItem for Module { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.attrs.py_estimate_size() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.attrs.py_dec_ref_ids(stack); } } ================================================ FILE: crates/monty/src/types/namedtuple.rs ================================================ /// Python named tuple type, combining tuple-like indexing with named attribute access. /// /// Named tuples are like regular tuples but with field names, providing two ways /// to access elements: /// - By index: `version_info[0]` returns the major version /// - By name: `version_info.major` returns the same value /// /// Named tuples are: /// - Immutable (all tuple semantics apply) /// - Hashable (if all elements are hashable) /// - Have a descriptive repr: `sys.version_info(major=3, minor=14, ...)` /// - Support `len()` and iteration /// /// # Use Case /// /// This type is used for `sys.version_info` and similar structured tuples where /// named access improves usability and readability. use std::fmt::Write; use ahash::AHashSet; use super::PyTrait; use crate::{ bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapId, HeapItem}, intern::{Interns, StringId}, resource::{ResourceError, ResourceTracker}, types::Type, value::{EitherStr, Value}, }; /// Python named tuple value stored on the heap. /// /// Wraps a `Vec` with associated field names and provides both index-based /// and name-based access. Named tuples are conceptually immutable, though this is /// not enforced at the type level for internal operations. /// /// # Reference Counting /// /// When a named tuple is freed, all contained heap references have their refcounts /// decremented via `py_dec_ref_ids`. /// /// # GC Optimization /// /// The `contains_refs` flag tracks whether the tuple contains any `Value::Ref` items. /// This allows `py_dec_ref_ids` to skip iteration when the tuple contains only /// primitive values (ints, bools, None, etc.). #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) struct NamedTuple { /// Type name for repr (e.g., "sys.version_info"). name: EitherStr, /// Field names in order, e.g., `major`, `minor`, `micro`, `releaselevel`, `serial`. field_names: Vec, /// Values in order (same length as field_names). items: Vec, /// True if any item is a `Value::Ref`. Set at creation time since named tuples are immutable. contains_refs: bool, } impl NamedTuple { /// Creates a new named tuple. /// /// # Arguments /// /// * `type_name` - The type name for repr (e.g., "sys.version_info") /// * `field_names` - Field names as interned StringIds, in order /// * `items` - Values corresponding to each field name /// /// # Panics /// /// Panics if `field_names.len() != items.len()`. #[must_use] pub fn new(name: impl Into, field_names: Vec, items: Vec) -> Self { assert_eq!( field_names.len(), items.len(), "NamedTuple field_names and items must have same length" ); let contains_refs = items.iter().any(|v| matches!(v, Value::Ref(_))); Self { name: name.into(), field_names, items, contains_refs, } } /// Returns the type name (e.g., "sys.version_info"). #[must_use] pub fn name<'a>(&'a self, interns: &'a Interns) -> &'a str { self.name.as_str(interns) } /// Returns a reference to the field names. #[must_use] pub fn field_names(&self) -> &[EitherStr] { &self.field_names } /// Returns a reference to the underlying items vector. #[must_use] pub fn as_vec(&self) -> &Vec { &self.items } /// Returns the number of elements. #[must_use] pub fn len(&self) -> usize { self.items.len() } /// Returns whether the tuple contains any heap references. /// /// When false, `py_dec_ref_ids` can skip iteration. #[inline] #[must_use] pub fn contains_refs(&self) -> bool { self.contains_refs } /// Gets a field value by name. /// /// Compares field names by actual string content, not just variant type. /// This allows lookup to work regardless of whether the field name was /// stored as an interned `StringId` or a heap-allocated `String`. /// /// Returns `Some(value)` if the field exists, `None` otherwise. #[must_use] pub fn get_by_name(&self, name_str: &str, interns: &Interns) -> Option<&Value> { self.field_names .iter() .position(|field_name| field_name.as_str(interns) == name_str) .map(|idx| &self.items[idx]) } /// Gets a field value by index, supporting negative indexing. /// /// Returns `Some(value)` if the index is in bounds, `None` otherwise. /// Uses `index + len` instead of `-index` to avoid overflow on `i64::MIN`. #[must_use] pub fn get_by_index(&self, index: i64) -> Option<&Value> { let len = i64::try_from(self.items.len()).ok()?; let normalized = if index < 0 { index + len } else { index }; if normalized < 0 || normalized >= len { return None; } self.items.get(usize::try_from(normalized).ok()?) } } impl PyTrait for NamedTuple { fn py_type(&self, _heap: &Heap) -> Type { Type::NamedTuple } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.items.len()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Extract integer index from key, returning TypeError if not an int let index = match key { Value::Int(i) => *i, _ => return Err(ExcType::type_error_indices(Type::NamedTuple, key.py_type(vm.heap))), }; // Get by index with bounds checking match self.get_by_index(index) { Some(value) => Ok(value.clone_with_heap(vm.heap)), None => Err(ExcType::tuple_index_error()), } } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { // Compare only by items (not type_name) to match tuple semantics // This allows sys.version_info == (3, 14, 0, 'final', 0) to work if self.items.len() != other.items.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (i1, i2) in self.items.iter().zip(&other.items) { if !i1.py_eq(i2, vm)? { return Ok(false); } } Ok(true) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.items.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { // Check depth limit before recursing let heap = &*vm.heap; let Some(token) = heap.incr_recursion_depth_for_repr() else { return f.write_str("..."); }; crate::defer_drop_immutable_heap!(token, heap); // Format: type_name(field1=value1, field2=value2, ...) write!(f, "{}(", self.name.as_str(vm.interns))?; let mut first = true; for (field_name, value) in self.field_names.iter().zip(&self.items) { if !first { f.write_str(", ")?; } first = false; f.write_str(field_name.as_str(vm.interns))?; f.write_char('=')?; value.py_repr_fmt(f, vm, heap_ids)?; } f.write_char(')')?; Ok(()) } fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { let attr_name = attr.as_str(vm.interns); if let Some(value) = self.get_by_name(attr_name, vm.interns) { Ok(Some(CallResult::Value(value.clone_with_heap(vm.heap)))) } else { // we use name here, not `self.py_type(heap)` hence returning a Ok(None) Err(ExcType::attribute_error(self.name(vm.interns), attr_name)) } } } impl HeapItem for NamedTuple { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.name.py_estimate_size() + self.field_names.len() * std::mem::size_of::() + self.items.len() * std::mem::size_of::() } /// Pushes all heap IDs contained in this named tuple onto the stack. /// /// Called during garbage collection to decrement refcounts of nested values. /// When `ref-count-panic` is enabled, also marks all Values as Dereferenced. fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Skip iteration if no refs - GC optimization for tuples of primitives if !self.contains_refs { return; } for obj in &mut self.items { if let Value::Ref(id) = obj { stack.push(*id); #[cfg(feature = "ref-count-panic")] obj.dec_ref_forget(); } } } } ================================================ FILE: crates/monty/src/types/path.rs ================================================ //! Python `pathlib.Path` type implementation. //! //! Provides a path object with both pure methods (no I/O) and filesystem methods //! (require `OsAccess` implementation). Pure methods are handled directly by the VM, //! while filesystem methods yield external function calls for the host to resolve. use std::fmt::Write; use ahash::AHashSet; use smallvec::SmallVec; use crate::{ args::{ArgValues, KwargsValues}, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapId, HeapItem}, intern::{Interns, StaticStrings}, os::OsFunction, resource::{ResourceError, ResourceTracker}, types::{PyTrait, Str, Type, allocate_tuple}, value::{EitherStr, Value}, }; /// Python `pathlib.Path` object representing a filesystem path. /// /// Stores a normalized POSIX path string. Windows-style paths are converted /// to POSIX format (backslashes to forward slashes). /// /// The path is immutable - all operations that would modify the path return /// new `Path` objects or strings. #[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct Path { /// The normalized path string. path: String, } impl Path { /// Creates a new `Path` from a path string. /// /// The path is normalized: /// - Backslashes are converted to forward slashes /// - Trailing slashes are preserved for root paths only #[must_use] pub fn new(path: String) -> Self { Self { path: normalize_path(path), } } /// Returns the path as a string slice. #[must_use] pub fn as_str(&self) -> &str { &self.path } /// Returns the final component of the path. /// /// Returns an empty string if the path ends with a separator or is empty. #[must_use] pub fn name(&self) -> &str { self.path.rsplit_once('/').map_or(self.path.as_str(), |(_, name)| name) } /// Returns the path without its final component (parent directory). /// /// For relative paths without a directory (like `file.txt`), returns `.`. /// Returns `None` only for the root path `/`. #[must_use] pub fn parent(&self) -> Option<&str> { if self.path == "/" { return None; } match self.path.rsplit_once('/') { Some((parent, _)) => Some(if parent.is_empty() { "/" } else { parent }), None => Some("."), // Relative path without directory component } } /// Returns the final component without its last suffix. /// /// If the name has multiple suffixes (e.g., "file.tar.gz"), only the /// last suffix is removed. #[must_use] pub fn stem(&self) -> &str { let name = self.name(); if name.starts_with('.') && !name[1..].contains('.') { // Hidden file without extension (e.g., ".bashrc") return name; } name.rsplit_once('.').map_or(name, |(stem, _)| stem) } /// Returns the file extension (last suffix), including the leading dot. /// /// Returns an empty string if there is no extension. #[must_use] pub fn suffix(&self) -> &str { let name = self.name(); if name.starts_with('.') && !name[1..].contains('.') { // Hidden file without extension (e.g., ".bashrc") return ""; } name.rfind('.').map_or("", |idx| &name[idx..]) } /// Returns all file extensions as a list of strings. /// /// Each suffix includes its leading dot. Returns an empty list if no extensions. #[must_use] pub fn suffixes(&self) -> Vec<&str> { let name = self.name(); if name.is_empty() || name == "." || name == ".." { return Vec::new(); } let start_idx = usize::from(name.starts_with('.')); let search_str = &name[start_idx..]; let mut result = Vec::new(); let mut pos = 0; while let Some(idx) = search_str[pos..].find('.') { let abs_idx = pos + idx; // Each suffix is from this dot to the end or next dot let suffix_end = search_str[abs_idx + 1..] .find('.') .map_or(search_str.len(), |next| abs_idx + 1 + next); result.push(&name[start_idx + abs_idx..start_idx + suffix_end]); pos = abs_idx + 1; } result } /// Returns the path components as a list of strings. /// /// Absolute paths start with "/" as the first component. #[must_use] pub fn parts(&self) -> Vec<&str> { if self.path.is_empty() { return Vec::new(); } let mut parts = Vec::new(); if self.path.starts_with('/') { parts.push("/"); let rest = &self.path[1..]; if !rest.is_empty() { parts.extend(rest.split('/').filter(|s| !s.is_empty())); } } else { parts.extend(self.path.split('/').filter(|s| !s.is_empty())); } parts } /// Returns `true` if the path is absolute (starts with `/`). #[must_use] pub fn is_absolute(&self) -> bool { self.path.starts_with('/') } /// Joins this path with another path component. /// /// If `other` is an absolute path, it replaces `self` entirely. #[must_use] pub fn joinpath(&self, other: &str) -> String { if other.starts_with('/') || self.path.is_empty() || self.path == "." { normalize_path(other.to_owned()) } else if self.path.ends_with('/') { normalize_path(format!("{}{}", self.path, other)) } else { normalize_path(format!("{}/{}", self.path, other)) } } /// Returns a new path with the name changed. /// /// # Errors /// Returns an error if the path has no name or if the new name is empty. pub fn with_name(&self, name: &str) -> Result { if name.is_empty() { return Err("Invalid name: empty string".to_owned()); } if name.contains('/') { return Err(format!("Invalid name: {name:?} contains path separator")); } if self.name().is_empty() { return Err("Path has no name".to_owned()); } if let Some(parent) = self.parent() { if parent == "/" { Ok(format!("/{name}")) } else if parent == "." { // Relative path without directory - just use the new name Ok(name.to_owned()) } else { Ok(format!("{parent}/{name}")) } } else { Ok(name.to_owned()) } } /// Returns a new path with the stem changed (keeps the suffix). /// /// # Errors /// Returns an error if the path has no name or if the new stem is empty. pub fn with_stem(&self, stem: &str) -> Result { if stem.is_empty() { return Err("Invalid stem: empty string".to_owned()); } if stem.contains('/') { return Err(format!("Invalid stem: {stem:?} contains path separator")); } if self.name().is_empty() { return Err("Path has no name".to_owned()); } let suffix = self.suffix(); let new_name = format!("{stem}{suffix}"); self.with_name(&new_name) } /// Returns a new path with the suffix changed. /// /// If the suffix is empty, removes the existing suffix. /// If the suffix doesn't start with '.', it's added. pub fn with_suffix(&self, suffix: &str) -> Result { if self.name().is_empty() { return Err("Path has no name".to_owned()); } let suffix = if suffix.is_empty() || suffix.starts_with('.') { suffix.to_owned() } else { format!(".{suffix}") }; if suffix.contains('/') { return Err(format!("Invalid suffix: {suffix:?} contains path separator")); } let stem = self.stem(); let new_name = format!("{stem}{suffix}"); self.with_name(&new_name) } /// Returns the path as a POSIX string (forward slashes). /// /// Since paths are already stored in POSIX format, this just returns the path. #[must_use] pub fn as_posix(&self) -> &str { &self.path } /// Creates a `Path` from the `Path()` constructor call. /// /// Accepts zero or more path segments that are joined together. /// - `Path()` returns `Path('.')` /// - `Path('a')` returns `Path('a')` /// - `Path('a', 'b', 'c')` returns `Path('a/b/c')` /// - If an absolute path appears, it replaces everything before it. pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let heap = &mut *vm.heap; let interns = vm.interns; let pos_args = args.into_pos_only("Path", heap)?; defer_drop!(pos_args, heap); let path = match pos_args.as_slice() { [] => { // No arguments, return Path('.') Self::new(".".to_owned()) } [single] => { // Single argument, just convert to Path Self::new(extract_path_string(single, heap, interns)?.to_owned()) } [first_arg, rest @ ..] => { let base = Self::new(extract_path_string(first_arg, heap, interns)?.to_owned()); fold_joinpath(base, rest, heap, interns)? } }; Ok(Value::Ref(heap.allocate(HeapData::Path(path))?)) } } /// Extracts a string from a Value for use as a path. fn extract_path_string<'a>( val: &Value, heap: &'a Heap, interns: &'a Interns, ) -> RunResult<&'a str> { match val { Value::InternString(string_id) => Ok(interns.get_str(*string_id)), Value::Ref(heap_id) => match heap.get(*heap_id) { HeapData::Str(s) => Ok(s.as_str()), HeapData::Path(p) => Ok(p.as_str()), _ => Err(ExcType::type_error(format!( "expected str or Path, got {}", val.py_type(heap) ))), }, _ => Err(ExcType::type_error(format!( "expected str or Path, got {}", val.py_type(heap) ))), } } fn fold_joinpath( mut path: Path, parts: &[Value], heap: &mut Heap, interns: &Interns, ) -> RunResult { for part in parts { path = Path::new(path.joinpath(extract_path_string(part, heap, interns)?)); } Ok(path) } /// Handles the `/` operator for Path objects (path concatenation). /// /// In Python, `Path('/usr') / 'bin'` produces `Path('/usr/bin')`. pub(crate) fn path_div( path_id: HeapId, other: &Value, heap: &mut Heap, interns: &Interns, ) -> RunResult> { // Extract the right-hand side as a string let other_str = match other { Value::InternString(string_id) => interns.get_str(*string_id).to_owned(), Value::Ref(other_id) => match heap.get(*other_id) { HeapData::Str(s) => s.as_str().to_owned(), HeapData::Path(p) => p.as_str().to_owned(), _ => return Ok(None), }, _ => return Ok(None), }; // Get the path string let path_str = match heap.get(path_id) { HeapData::Path(p) => p.as_str().to_owned(), _ => return Ok(None), }; // Perform path concatenation let result = Path::new(path_str).joinpath(&other_str); Ok(Some(Value::Ref(heap.allocate(HeapData::Path(Path::new(result)))?))) } /// Normalizes a path string to POSIX format. /// /// - Converts backslashes to forward slashes /// - Removes trailing slashes (except for root "/") /// - Does NOT resolve `.` or `..` components (that requires I/O for symlinks) fn normalize_path(mut path: String) -> String { // Convert backslashes to forward slashes if path.contains('\\') { path = path.replace('\\', "/"); } // Remove trailing slashes, but keep root "/" while path.len() > 1 && path.ends_with('/') { path.pop(); } path } /// Prepends the path string argument to existing arguments for OS calls. /// /// OS functions expect the path as the first argument, so we need to /// combine it with any additional arguments passed to the method. fn prepend_path_arg(path_arg: Value, args: ArgValues) -> ArgValues { match args { ArgValues::Empty => ArgValues::One(path_arg), ArgValues::One(v) => ArgValues::Two(path_arg, v), ArgValues::Two(a, b) => ArgValues::ArgsKargs { args: vec![path_arg, a, b], kwargs: KwargsValues::Empty, }, ArgValues::Kwargs(kwargs) => ArgValues::ArgsKargs { args: vec![path_arg], kwargs, }, ArgValues::ArgsKargs { args: mut vals, kwargs } => { vals.insert(0, path_arg); ArgValues::ArgsKargs { args: vals, kwargs } } } } impl Path { /// Resolves a known attribute by its `StaticStrings` variant. /// /// Returns `Ok(Some(value))` for recognized property names (`name`, `parent`, /// `stem`, `suffix`, `suffixes`, `parts`), or `Ok(None)` if the variant doesn't /// correspond to a Path attribute. Used by `py_getattr` to share logic between /// the interned fast path and the heap string slow path. fn getattr_by_static(&self, ss: StaticStrings, heap: &mut Heap) -> RunResult> { let v = match ss { StaticStrings::Name => { let name = self.name(); Value::Ref(heap.allocate(HeapData::Str(Str::new(name.to_owned())))?) } StaticStrings::Parent => { if let Some(parent) = self.parent() { let parent_path = Self::new(parent.to_owned()); Value::Ref(heap.allocate(HeapData::Path(parent_path))?) } else { // Return self when there's no parent (root or relative path) let same_path = Self::new(self.as_str().to_owned()); Value::Ref(heap.allocate(HeapData::Path(same_path))?) } } StaticStrings::Stem => { let stem = self.stem(); Value::Ref(heap.allocate(HeapData::Str(Str::new(stem.to_owned())))?) } StaticStrings::Suffix => { let suffix = self.suffix(); Value::Ref(heap.allocate(HeapData::Str(Str::new(suffix.to_owned())))?) } StaticStrings::Suffixes => { use crate::types::List; let suffixes = self.suffixes(); let mut items = Vec::with_capacity(suffixes.len()); for suffix in suffixes { let str_id = heap.allocate(HeapData::Str(Str::new(suffix.to_owned())))?; items.push(Value::Ref(str_id)); } Value::Ref(heap.allocate(HeapData::List(List::new(items)))?) } StaticStrings::Parts => { let parts = self.parts(); let mut items = SmallVec::with_capacity(parts.len()); for part in parts { let str_id = heap.allocate(HeapData::Str(Str::new(part.to_owned())))?; items.push(Value::Ref(str_id)); } allocate_tuple(items, heap)? } _ => return Ok(None), }; Ok(Some(v)) } } impl PyTrait for Path { fn py_type(&self, _heap: &Heap) -> Type { Type::Path } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { // Paths don't have a length in Python None } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Ok(self.path == other.path) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { // Paths are always truthy (even empty paths) true } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { // Format like: PosixPath('/usr/bin') write!(f, "PosixPath('{}')", self.path) } /// Handles attribute calls on Path objects, including both pure methods (no I/O) /// and OS methods that require host system access. /// /// OS methods (exists, read_text, etc.) are detected via `OsFunction::try_from` /// and returned as `CallResult::OsCall` for the VM to yield to the host. /// Pure methods (is_absolute, joinpath, etc.) are handled directly. fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let heap = &mut *vm.heap; let interns = vm.interns; let Some(method) = attr.static_string() else { args.drop_with_heap(heap); return Err(ExcType::attribute_error(Type::Path, attr.as_str(interns))); }; // Check if this is an OS method that requires host system access if let Ok(os_fn) = OsFunction::try_from(method) { // Package path as first argument for OS call (as Path, not string) let path_arg = Value::Ref(heap.allocate(HeapData::Path(self.clone()))?); let os_args = prepend_path_arg(path_arg, args); return Ok(CallResult::OsCall(os_fn, os_args)); } // Pure methods (no I/O) let value = match method { StaticStrings::IsAbsolute => { args.check_zero_args("is_absolute", heap)?; Ok(Value::Bool(self.is_absolute())) } StaticStrings::Joinpath => { let pos_args = args.into_pos_only("joinpath", heap)?; defer_drop!(pos_args, heap); let path = fold_joinpath(self.clone(), pos_args.as_slice(), heap, interns)?; Ok(Value::Ref(heap.allocate(HeapData::Path(path))?)) } StaticStrings::WithName => { let name_val = args.get_one_arg("with_name", heap)?; defer_drop!(name_val, heap); let name = extract_path_string(name_val, heap, interns)?; let result = self .with_name(name) .map_err(|e| crate::exception_private::SimpleException::new_msg(ExcType::ValueError, &e))?; Ok(Value::Ref(heap.allocate(HeapData::Path(Self::new(result)))?)) } StaticStrings::WithStem => { let stem_val = args.get_one_arg("with_stem", heap)?; defer_drop!(stem_val, heap); let stem = extract_path_string(stem_val, heap, interns)?; let result = self .with_stem(stem) .map_err(|e| crate::exception_private::SimpleException::new_msg(ExcType::ValueError, &e))?; Ok(Value::Ref(heap.allocate(HeapData::Path(Self::new(result)))?)) } StaticStrings::WithSuffix => { let suffix_val = args.get_one_arg("with_suffix", heap)?; defer_drop!(suffix_val, heap); let suffix = extract_path_string(suffix_val, heap, interns)?; let result = self .with_suffix(suffix) .map_err(|e| crate::exception_private::SimpleException::new_msg(ExcType::ValueError, &e))?; Ok(Value::Ref(heap.allocate(HeapData::Path(Self::new(result)))?)) } StaticStrings::AsPosix | StaticStrings::Fspath => { args.check_zero_args(method.into(), heap)?; // Both as_posix() and __fspath__() return the string representation Ok(Value::Ref( heap.allocate(HeapData::Str(Str::new(self.as_posix().to_owned())))?, )) } _ => { args.drop_with_heap(heap); return Err(ExcType::attribute_error(Type::Path, attr.as_str(interns))); } }; value.map(CallResult::Value) } fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { // Fast path: interned strings can be matched by ID without string comparison if let Some(ss) = attr.static_string() { if let Some(v) = self.getattr_by_static(ss, vm.heap)? { return Ok(Some(CallResult::Value(v))); } return Err(ExcType::attribute_error(Type::Path, attr.as_str(vm.interns))); } // Slow path: heap-allocated strings need string comparison let attr_str = attr.as_str(vm.interns); let ss = match attr_str { "name" => StaticStrings::Name, "parent" => StaticStrings::Parent, "stem" => StaticStrings::Stem, "suffix" => StaticStrings::Suffix, "suffixes" => StaticStrings::Suffixes, "parts" => StaticStrings::Parts, _ => return Err(ExcType::attribute_error(Type::Path, attr_str)), }; let v = self .getattr_by_static(ss, vm.heap)? .expect("matched attribute must produce a value"); Ok(Some(CallResult::Value(v))) } } impl HeapItem for Path { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.path.capacity() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // Path doesn't contain heap references, nothing to do } } ================================================ FILE: crates/monty/src/types/property.rs ================================================ //! Python property descriptor for computed attributes. //! //! Properties are descriptors whose value is computed when accessed. //! When a Property is retrieved via `py_getattr`, its getter is invoked //! rather than returning the Property itself. use crate::{args::ArgValues, bytecode::CallResult, os::OsFunction}; /// Property descriptor for computed attributes. /// /// This mirrors Python's descriptor protocol for properties. When accessed, /// the property's getter is invoked to compute the value. /// /// # Variants /// /// Currently only supports OS properties. Future variants: /// - `Callable(FunctionId)` - user-defined getter functions (@property) /// - `External(StringId)` - external function getters #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) enum Property { /// A property backed by an OS function (e.g., `os.environ`). Os(OsFunction), } impl Property { /// Invokes the property getter, returning the appropriate `CallResult`. /// /// For OS properties, returns `CallResult::OsCall` to signal the VM /// should yield to the host for the value. pub fn get(self) -> CallResult { match self { Self::Os(os_fn) => CallResult::OsCall(os_fn, ArgValues::Empty), } } } ================================================ FILE: crates/monty/src/types/py_trait.rs ================================================ /// Trait for heap-allocated Python values that need common operations. /// /// This trait abstracts over container types (List, Tuple, Str, Bytes) stored /// in the heap, providing a unified interface for operations like length, /// equality, reference counting support, and attribute dispatch. /// /// The trait is designed to work with `enum_dispatch` for efficient virtual /// dispatch on `HeapData` without boxing overhead. use std::borrow::Cow; use std::{cmp::Ordering, fmt::Write}; use ahash::AHashSet; use super::Type; use crate::{ ResourceError, args::ArgValues, bytecode::{CallResult, VM}, exception_private::{ExcType, RunResult, SimpleException}, heap::{DropWithHeap, Heap, HeapId}, resource::ResourceTracker, value::{EitherStr, Value}, }; /// Common operations for heap-allocated Python values. /// /// Implementers should provide Python-compatible semantics for all operations. /// Most methods take a `&VM` or `&mut VM` reference to access the heap and interned /// strings for nested lookups in containers holding `Value::Ref` values. /// /// This trait is used with `enum_dispatch` on `HeapData` to enable efficient /// virtual dispatch without boxing overhead. /// /// Many methods are generic over `T: ResourceTracker` to work with any heap /// configuration. This allows the same trait to work with both unlimited and /// resource-limited execution contexts. pub trait PyTrait { /// Returns the Python type name for this value (e.g., "list", "str"). /// /// Used for error messages and the `type()` builtin. /// Takes heap reference for cases where nested Value lookups are needed. fn py_type(&self, heap: &Heap) -> Type; /// Returns the number of elements in this container. /// /// For interns, returns the number of Unicode codepoints (characters), matching Python. /// Returns `None` if the type doesn't support `len()`. fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option; /// Python equality comparison (`==`). /// /// For containers, this performs element-wise comparison using the heap /// to resolve nested references. Takes `&mut VM` to allow lazy hash /// computation for dict key lookups and access to interned string content. /// /// Recursion depth is tracked via `heap.incr_recursion_depth()`. /// /// Returns `Ok(true)` if equal, `Ok(false)` if not equal, or /// `Err(ResourceError::Recursion)` if maximum depth is exceeded. fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result; /// Python comparison (`<`, `>`, etc.). /// /// For containers, this performs element-wise comparison using the heap /// to resolve nested references. Takes `&mut VM` to allow lazy hash /// computation for dict key lookups and access to interned string content. /// /// Recursion depth is tracked via `heap.incr_recursion_depth()`. /// /// Returns `Ok(Some(Ordering))` for comparable values, `Ok(None)` if not comparable, /// or `Err(ResourceError::Recursion)` if maximum depth is exceeded. fn py_cmp( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { Ok(None) } /// Returns the truthiness of the value following Python semantics. /// /// Container types should typically report `false` when empty. fn py_bool(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> bool { self.py_len(vm) != Some(0) } /// Writes the Python `repr()` string for this value to a formatter. /// /// This method enables cycle detection for self-referential structures by tracking /// visited heap IDs. When a cycle is detected (ID already in `heap_ids`), implementations /// should write an ellipsis (e.g., `[...]` for lists, `{...}` for dicts). /// /// Recursion depth is tracked via `heap.incr_recursion_depth_for_repr()`. /// /// # Arguments /// * `f` - The formatter to write to /// * `vm` - The VM for resolving value references and looking up interned strings /// * `heap_ids` - Set of heap IDs currently being repr'd (for cycle detection) fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result; /// Returns the Python `repr()` string for this value. /// /// Convenience wrapper around `py_repr_fmt` that returns an owned string. fn py_repr(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Cow<'static, str> { let mut s = String::new(); let mut heap_ids = AHashSet::new(); // Unwrap is safe: writing to String never fails self.py_repr_fmt(&mut s, vm, &mut heap_ids).unwrap(); Cow::Owned(s) } /// Returns the Python `str()` string for this value. /// /// Recursion depth is tracked via the heap's recursion depth counter. fn py_str(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Cow<'static, str> { self.py_repr(vm) } /// Python addition (`__add__`). /// /// Returns `Ok(None)` if the operation is not supported for these types, /// `Ok(Some(value))` on success, or `Err(ResourceError)` if allocation fails. fn py_add( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { Ok(None) } /// Python subtraction (`__sub__`). /// /// Returns `Ok(None)` if the operation is not supported for these types, /// `Ok(Some(value))` on success, or `Err(ResourceError)` if allocation fails. fn py_sub( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { Ok(None) } /// Python modulus (`__mod__`). /// /// Returns `Ok(None)` if the operation is not supported for these types, /// `Ok(Some(value))` on success, or `Err(RunError)` if an error occurs. fn py_mod(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { Ok(None) } /// Optimized helper for `(a % b) == c` comparisons. fn py_mod_eq(&self, _other: &Self, _right_value: i64) -> Option { None } /// Python in-place addition (`__iadd__`). /// /// # Returns /// /// Returns `Ok(true)` if the operation was successful, `Ok(false)` if not supported, /// or `Err(ResourceError)` if allocation fails. fn py_iadd( &mut self, _other: &Value, _vm: &mut VM<'_, '_, impl ResourceTracker>, _self_id: Option, ) -> Result { Ok(false) } /// Python multiplication (`__mul__`). /// /// Returns `Ok(None)` if the operation is not supported for these types. /// For numeric types: Int * Int, Float * Float, Int * Float, etc. /// For sequences: str * int, list * int for repetition. fn py_mult(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { Ok(None) } /// Python true division (`__truediv__`). /// /// Always returns float for numeric types. Returns `Ok(None)` if not supported. /// Returns `Err(ZeroDivisionError)` for division by zero. fn py_div(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { Ok(None) } /// Python floor division (`__floordiv__`). /// /// Returns int for int//int, float for float operations. /// Returns `Ok(None)` if not supported. /// Returns `Err(ZeroDivisionError)` for division by zero. fn py_floordiv(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { Ok(None) } /// Python power (`__pow__`). /// /// Int ** positive_int returns int, int ** negative_int returns float. /// Returns `Ok(None)` if not supported. /// Returns `Err(ZeroDivisionError)` for 0 ** negative. fn py_pow(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { Ok(None) } /// Calls an attribute method on this value (e.g., `list.append()`), returning a /// `CallResult` that may signal OS, external, or method calls. /// /// This method enables types to signal that they need operations the VM cannot perform /// directly (OS operations, external function calls, dataclass method calls). The VM /// converts the result to the appropriate `FrameExit` variant. /// /// Types that only support synchronous attribute calls should wrap their return value /// with `CallResult::Value`. Types that need to perform OS/external operations, /// intercept specific methods (e.g. `list.sort`), or detect method calls (e.g. dataclass /// methods) should return the appropriate `CallResult` variant. /// /// # Arguments /// * `self_id` - The heap ID of this value, needed by types that must reference themselves /// (e.g. dataclass method calls prepend `self` to args) /// /// # Returns /// /// - `Ok(CallResult::Value(v))` - Method completed synchronously with value `v` /// - `Ok(CallResult::OsCall(func, args))` - Method needs OS operation; VM yields to host /// - `Ok(CallResult::External(name, args))` - Method needs external function call /// - `Ok(CallResult::MethodCall(attr, args))` - Dataclass method call; VM yields to host /// - `Err(e)` - Method call failed with error fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { // `py_call_attr` takes ownership of the argument bundle. Implementations that // do not recognize the attribute still need to release those values before // reporting `AttributeError`, otherwise method calls on unsupported types leak // references on the error path (caught by `ref-count-panic`). args.drop_with_heap(vm); Err(ExcType::attribute_error(self.py_type(vm.heap), attr.as_str(vm.interns))) } /// Python subscript get operation (`__getitem__`), e.g., `d[key]`. /// /// Returns the value associated with the key, or an error if the key doesn't exist /// or the type doesn't support subscripting. /// /// Takes `&mut VM` for proper reference counting when cloning the returned value /// and access to interned string content. /// /// Default implementation returns TypeError. fn py_getitem(&self, _key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { Err(ExcType::type_error_not_sub(self.py_type(vm.heap))) } /// Python subscript set operation (`__setitem__`), e.g., `d[key] = value`. /// /// Sets the value associated with the key, or returns an error if the key is invalid /// or the type doesn't support subscript assignment. /// /// Default implementation returns TypeError. fn py_setitem(&mut self, key: Value, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { key.drop_with_heap(vm.heap); value.drop_with_heap(vm.heap); Err(SimpleException::new_msg( ExcType::TypeError, format!("'{}' object does not support item assignment", self.py_type(vm.heap)), ) .into()) } /// Python attribute get operation (`__getattr__`), e.g., `obj.attr`. /// /// Returns the value associated with the attribute (owned), or `Ok(None)` if the type /// doesn't support attribute access at all. Types that support attributes should return /// `Err(AttributeError)` when an attribute is not found, not `Ok(None)`. /// /// The returned `Value` is always owned: /// - For stored values (Dataclass, Module, NamedTuple fields): clone with `clone_with_heap` /// - For computed values (Exception.args, Slice.start, Path.name): return newly created value /// /// Takes `&mut VM` to allow: /// - Cloning stored values with proper reference counting /// - Allocating computed values that need heap storage /// /// Default implementation returns `Ok(None)`, indicating the type doesn't support /// attribute access and a generic `AttributeError` should be raised by the caller. fn py_getattr( &self, _attr: &EitherStr, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { Ok(None) } } ================================================ FILE: crates/monty/src/types/range.rs ================================================ //! Python range type implementation. //! //! Provides a range object that supports iteration over a sequence of integers //! with configurable start, stop, and step values. use std::fmt::Write; use ahash::AHashSet; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData, HeapId, HeapItem}, resource::{ResourceError, ResourceTracker}, types::{PyTrait, Type}, value::Value, }; /// Python range object representing an immutable sequence of integers. /// /// Supports three forms of construction: /// - `range(stop)` - integers from 0 to stop-1 /// - `range(start, stop)` - integers from start to stop-1 /// - `range(start, stop, step)` - integers from start, incrementing by step /// /// The range is computed lazily during iteration, not stored as a list. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct Range { /// The starting value (inclusive). Defaults to 0. pub start: i64, /// The ending value (exclusive). pub stop: i64, /// The step between values. Defaults to 1. Cannot be 0. pub step: i64, } impl Range { /// Creates a new range with the given start, stop, and step. /// /// # Panics /// Panics if step is 0. Use `new_checked` for fallible construction. #[must_use] fn new(start: i64, stop: i64, step: i64) -> Self { debug_assert!(step != 0, "range step cannot be 0"); Self { start, stop, step } } /// Creates a range from just a stop value (start=0, step=1). #[must_use] fn from_stop(stop: i64) -> Self { Self { start: 0, stop, step: 1, } } /// Creates a range from start and stop (step=1). #[must_use] fn from_start_stop(start: i64, stop: i64) -> Self { Self { start, stop, step: 1 } } /// Returns the length of the range (number of elements it will yield). #[must_use] pub fn len(&self) -> usize { if self.step > 0 { if self.stop > self.start { let len_i64 = (self.stop - self.start - 1) / self.step + 1; usize::try_from(len_i64).expect("range length guaranteed non-negative") } else { 0 } } else { // step < 0 if self.start > self.stop { let len_i64 = (self.start - self.stop - 1) / (-self.step) + 1; usize::try_from(len_i64).expect("range length guaranteed non-negative") } else { 0 } } } #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Checks if an integer value is contained within this range (O(1)). /// /// A value is contained if it falls within the range bounds and is aligned /// with the step (i.e., `(n - start) % step == 0`). #[must_use] pub fn contains(&self, n: i64) -> bool { if self.step > 0 { // Forward range: start <= n < stop if n < self.start || n >= self.stop { return false; } } else { // Backward range: stop < n <= start if n > self.start || n <= self.stop { return false; } } // Check if n is on the step grid (n - self.start) % self.step == 0 } /// Creates a range from the `range()` constructor call. /// /// Supports: /// - `range(stop)` - range from 0 to stop /// - `range(start, stop)` - range from start to stop /// - `range(start, stop, step)` - range with custom step pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let heap = &mut *vm.heap; let pos_args = args.into_pos_only("range", heap)?; defer_drop!(pos_args, heap); let range = match pos_args.as_slice() { [] => return Err(ExcType::type_error_at_least("range", 1, 0)), [first_arg] => { let stop = first_arg.as_int(heap)?; Self::from_stop(stop) } [first_arg, second_arg] => { let start = first_arg.as_int(heap)?; let stop = second_arg.as_int(heap)?; Self::from_start_stop(start, stop) } [first_arg, second_arg, third_arg] => { let start = first_arg.as_int(heap)?; let stop = second_arg.as_int(heap)?; let step = third_arg.as_int(heap)?; if step == 0 { return Err(ExcType::value_error_range_step_zero()); } Self::new(start, stop, step) } _ => return Err(ExcType::type_error_at_most("range", 3, pos_args.len())), }; Ok(Value::Ref(heap.allocate(HeapData::Range(range))?)) } /// Handles slice-based indexing for ranges. /// /// Returns a new range object representing the sliced view. /// The new range has computed start, stop, and step values. fn getitem_slice(&self, slice: &crate::types::Slice, heap: &mut Heap) -> RunResult { let range_len = self.len(); let (start, stop, step) = slice .indices(range_len) .map_err(|()| ExcType::value_error_slice_step_zero())?; // Calculate the new range parameters // new_start = self.start + start * self.step // new_step = self.step * slice_step // new_stop needs to be computed based on the number of elements let new_step = self.step.saturating_mul(step); let start_i64 = i64::try_from(start).expect("start index fits in i64"); let new_start = self.start.saturating_add(start_i64.saturating_mul(self.step)); // Calculate the number of elements in the sliced range // try_from succeeds for non-negative step; step==0 rejected by slice.indices() let num_elements = if let Ok(step_usize) = usize::try_from(step) { // Forward iteration if start >= stop { 0 } else { ((stop - start - 1) / step_usize) + 1 } } else { // Backward iteration let step_abs = usize::try_from(-step).expect("step is negative so -step is positive"); if stop > range_len { // stop sentinel means "go to the beginning" (start / step_abs) + 1 } else if start <= stop { 0 } else { ((start - stop - 1) / step_abs) + 1 } }; // new_stop = new_start + num_elements * new_step let num_elements_i64 = i64::try_from(num_elements).expect("num_elements fits in i64"); let new_stop = new_start.saturating_add(num_elements_i64.saturating_mul(new_step)); let new_range = Self::new(new_start, new_stop, new_step); Ok(Value::Ref(heap.allocate(HeapData::Range(new_range))?)) } } impl Default for Range { fn default() -> Self { Self::from_stop(0) } } impl PyTrait for Range { fn py_type(&self, _heap: &Heap) -> Type { Type::Range } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.len()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Check for slice first (Value::Ref pointing to HeapData::Slice) if let Value::Ref(id) = key && let HeapData::Slice(slice) = vm.heap.get(*id) { // Clone the slice to release the borrow on heap before calling getitem_slice let slice = slice.clone(); return self.getitem_slice(&slice, vm.heap); } // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt let index = key.as_index(vm.heap, Type::Range)?; // Get range length for normalization let len = i64::try_from(self.len()).expect("range length exceeds i64::MAX"); let normalized = if index < 0 { index + len } else { index }; // Bounds check if normalized < 0 || normalized >= len { return Err(ExcType::range_index_error()); } // Calculate: start + normalized * step // Use checked arithmetic to avoid overflow in intermediate calculations let offset = normalized .checked_mul(self.step) .and_then(|v| self.start.checked_add(v)) .expect("range element calculation overflowed"); Ok(Value::Int(offset)) } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { // Compare ranges by their actual sequences, not parameters. // Two ranges are equal if they produce the same elements. let len1 = self.len(); let len2 = other.len(); if len1 != len2 { return Ok(false); } // Same length - compare first element and step (if non-empty) if len1 == 0 { return Ok(true); // Both empty } Ok(self.start == other.start && self.step == other.step) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { if self.step == 1 { write!(f, "range({}, {})", self.start, self.stop) } else { write!(f, "range({}, {}, {})", self.start, self.stop, self.step) } } } impl HeapItem for Range { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // Range doesn't contain heap references, nothing to do } } ================================================ FILE: crates/monty/src/types/re_match.rs ================================================ //! Regex match result type for the `re` module. //! //! `ReMatch` represents the result of a successful regex match operation. //! It stores the matched text, capture groups, and their positions, providing //! Python-compatible access via `.group()`, `.groups()`, `.start()`, `.end()`, //! and `.span()` methods. //! //! All data is stored as owned values (no heap references), so reference counting //! is trivial — `py_dec_ref_ids` is a no-op. use std::{cmp::Ordering, fmt::Write}; use ahash::AHashSet; use smallvec::smallvec; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData, HeapId, HeapItem}, intern::{Interns, StaticStrings}, resource::{ResourceError, ResourceTracker}, types::{Dict, PyTrait, Str, Type, allocate_tuple, str::string_repr_fmt}, value::{EitherStr, Value}, }; /// A regex match result, storing captured groups and positions. /// /// Created by `re.match()`, `re.search()`, `re.fullmatch()`, and their /// `Pattern` method equivalents. Stores all data as owned values (no heap /// references), which simplifies reference counting — `py_dec_ref_ids` is /// a no-op. /// /// The `.re` attribute (reference back to the pattern) is intentionally omitted /// to avoid circular references between Match and Pattern objects. /// /// # Position semantics /// /// Positions are returned as Unicode character offsets (not byte offsets) to /// match CPython's behavior. The conversion from byte offsets (used internally /// by the Rust `regex` crate) happens at construction time in `from_captures`. /// /// # Group Indexing /// /// Group 0 is the full match, groups 1..N are capture groups. /// Both integer and named group access are supported — named groups are looked /// up via the `named_groups` mapping. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub(crate) struct ReMatch { /// The full matched text (equivalent to `group(0)`). full_match: String, /// Start character position of the full match in the input string. start: usize, /// End character position of the full match in the input string. end: usize, /// Captured group strings (index 0 = group 1). `None` for unmatched optional groups. groups: Vec>, /// Span positions per captured group (index 0 = group 1). `None` for unmatched optional groups. group_spans: Vec>, /// Named groups: maps group name → 1-based group index. named_groups: Vec<(String, usize)>, /// Owned copy of the input string (returned by `.string` attribute). input_string: String, /// The original pattern string (used in repr output). pattern_string: String, } impl ReMatch { /// Creates a `ReMatch` from a `fancy_regex::Captures` result. /// /// Converts byte offsets from the regex crate into character offsets to match /// CPython's behavior. The full match (group 0) is always present when captures /// are successful. /// /// # Arguments /// * `caps` - The successful capture result from the regex engine /// * `input` - The full input string that was searched /// * `pattern` - The original pattern string (for repr) /// * `regex` - The compiled regex, used to extract named group mappings pub fn from_captures( caps: &fancy_regex::Captures<'_>, input: &str, pattern: &str, regex: &fancy_regex::Regex, ) -> Self { let full = caps.get(0).expect("group 0 always exists on a successful match"); let full_match = full.as_str().to_owned(); let start = byte_to_char_offset(input, full.start()); let end = byte_to_char_offset(input, full.end()); let group_count = caps.len().saturating_sub(1); let mut groups = Vec::with_capacity(group_count); let mut group_spans = Vec::with_capacity(group_count); for cap in caps.iter().skip(1) { if let Some(m) = cap { groups.push(Some(m.as_str().to_owned())); group_spans.push(Some(( byte_to_char_offset(input, m.start()), byte_to_char_offset(input, m.end()), ))); } else { groups.push(None); group_spans.push(None); } } // Extract named group name→index mappings from the regex let mut named_groups = Vec::new(); for (idx, name) in regex.capture_names().enumerate() { if let Some(name) = name { named_groups.push((name.to_owned(), idx)); } } Self { full_match, start, end, groups, group_spans, named_groups, input_string: input.to_owned(), pattern_string: pattern.to_owned(), } } /// Returns the match for a given group number. /// /// Group 0 is the full match, groups 1..N are capture groups. /// Returns `Value::None` for unmatched optional groups. /// Raises `IndexError` for invalid group numbers. fn get_group(&self, n: i64, heap: &mut Heap) -> RunResult { match n.cmp(&0) { Ordering::Equal => { let s = Str::new(self.full_match.clone()); Ok(Value::Ref(heap.allocate(HeapData::Str(s))?)) } Ordering::Less => Err(ExcType::re_match_group_index_error()), Ordering::Greater => { let idx = group_index(n); if idx >= self.groups.len() { return Err(ExcType::re_match_group_index_error()); } match &self.groups[idx] { Some(s) => { let s = Str::new(s.clone()); Ok(Value::Ref(heap.allocate(HeapData::Str(s))?)) } None => Ok(Value::None), } } } } /// Returns the match for a named group. /// /// Looks up the group name in `named_groups` and delegates to `get_group`. /// Raises `IndexError` if the name is not found. fn get_group_by_name(&self, name: &str, heap: &mut Heap) -> RunResult { for (group_name, idx) in &self.named_groups { if group_name == name { #[expect(clippy::cast_possible_wrap, reason = "group indices are always small")] return self.get_group(*idx as i64, heap); } } Err(ExcType::re_match_group_index_error()) } /// Implements `m[key]` subscript access on match objects. /// /// Supports integer indexing (like `m[0]`, `m[1]`), bool indexing, /// and string indexing for named groups (like `m['name']`). pub fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match key { Value::Int(n) => self.get_group(*n, vm.heap), Value::Bool(b) => self.get_group(i64::from(*b), vm.heap), Value::InternString(id) => { let name = vm.interns.get_str(*id); self.get_group_by_name(name, vm.heap) } Value::Ref(heap_id) => match vm.heap.get(*heap_id) { HeapData::Str(s) => { let name = s.as_str().to_owned(); self.get_group_by_name(&name, vm.heap) } _ => Err(ExcType::re_match_group_index_error()), }, _ => Err(ExcType::re_match_group_index_error()), } } /// Returns a dict mapping named group names to their matched strings. /// /// Groups that didn't participate in the match have the `default` value /// (typically `None`). fn get_groupdict(&self, default: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut pairs = Vec::with_capacity(self.named_groups.len()); for (name, idx) in &self.named_groups { let key_str = Str::new(name.clone()); let key = Value::Ref(vm.heap.allocate(HeapData::Str(key_str))?); // idx is 1-based, groups vec is 0-based (index 0 = group 1) let value = if *idx > 0 && (*idx - 1) < self.groups.len() { match &self.groups[*idx - 1] { Some(s) => { let s = Str::new(s.clone()); Value::Ref(vm.heap.allocate(HeapData::Str(s))?) } None => default.clone_with_heap(vm), } } else { default.clone_with_heap(vm) }; pairs.push((key, value)); } let dict = Dict::from_pairs(pairs, vm)?; Ok(Value::Ref(vm.heap.allocate(HeapData::Dict(dict))?)) } /// Returns a tuple of all capture group strings. /// /// Unmatched optional groups appear as `None`. fn get_groups(&self, heap: &mut Heap) -> RunResult { let mut elements = smallvec![]; for group in &self.groups { match group { Some(s) => { let s = Str::new(s.clone()); elements.push(Value::Ref(heap.allocate(HeapData::Str(s))?)); } None => elements.push(Value::None), } } Ok(allocate_tuple(elements, heap)?) } /// Returns the start character position for a given group. /// /// Group 0 is the full match. Returns -1 for unmatched optional groups #[expect(clippy::cast_possible_wrap, reason = "positions are always small enough for i64")] fn get_start(&self, n: i64) -> RunResult { match n.cmp(&0) { Ordering::Equal => Ok(Value::Int(self.start as i64)), Ordering::Less => Err(ExcType::re_match_group_index_error()), Ordering::Greater => { let idx = group_index(n); if idx >= self.group_spans.len() { return Err(ExcType::re_match_group_index_error()); } match &self.group_spans[idx] { Some((s, _)) => Ok(Value::Int(*s as i64)), None => Ok(Value::Int(-1)), } } } } /// Returns the end character position for a given group. /// /// Group 0 is the full match. Returns -1 for unmatched optional groups #[expect(clippy::cast_possible_wrap, reason = "positions are always small enough for i64")] fn get_end(&self, n: i64) -> RunResult { match n.cmp(&0) { Ordering::Equal => Ok(Value::Int(self.end as i64)), Ordering::Less => Err(ExcType::re_match_group_index_error()), Ordering::Greater => { let idx = group_index(n); if idx >= self.group_spans.len() { return Err(ExcType::re_match_group_index_error()); } match &self.group_spans[idx] { Some((_, e)) => Ok(Value::Int(*e as i64)), None => Ok(Value::Int(-1)), } } } } /// Returns a `(start, end)` tuple for a given group. /// /// Group 0 is the full match. Returns `(-1, -1)` for unmatched optional groups #[expect(clippy::cast_possible_wrap, reason = "positions are always small enough for i64")] fn get_span(&self, n: i64, heap: &mut Heap) -> RunResult { match n.cmp(&0) { Ordering::Equal => Ok(allocate_tuple( smallvec![Value::Int(self.start as i64), Value::Int(self.end as i64)], heap, )?), Ordering::Less => Err(ExcType::re_match_group_index_error()), Ordering::Greater => { let idx = group_index(n); if idx >= self.group_spans.len() { return Err(ExcType::re_match_group_index_error()); } let (s, e) = match &self.group_spans[idx] { Some((s, e)) => (*s as i64, *e as i64), None => (-1, -1), }; Ok(allocate_tuple(smallvec![Value::Int(s), Value::Int(e)], heap)?) } } } } impl PyTrait for ReMatch { fn py_type(&self, _heap: &Heap) -> Type { Type::ReMatch } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { None } fn py_eq(&self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { // Match objects are not comparable Ok(false) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { // Match objects are always truthy true } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { write!(f, " { let default = args.get_zero_one_arg("re.Match.groupdict", vm.heap)?; let default = default.unwrap_or(Value::None); let result = self.get_groupdict(&default, vm)?; default.drop_with_heap(vm.heap); result } Some(StaticStrings::Start) => { let n = extract_optional_group_arg(args, "re.Match.start", 0, vm.heap)?; self.get_start(n)? } Some(StaticStrings::End) => { let n = extract_optional_group_arg(args, "re.Match.end", 0, vm.heap)?; self.get_end(n)? } Some(StaticStrings::Span) => { let n = extract_optional_group_arg(args, "re.Match.span", 0, vm.heap)?; self.get_span(n, vm.heap)? } _ => return Err(ExcType::attribute_error(Type::ReMatch, attr.as_str(vm.interns))), }; Ok(CallResult::Value(result)) } } impl HeapItem for ReMatch { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.full_match.len() + self.input_string.len() + self.pattern_string.len() + self .groups .iter() .map(|g| g.as_ref().map_or(0, String::len)) .sum::() + self .named_groups .iter() .map(|(name, _)| name.len() + std::mem::size_of::()) .sum::() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // No heap references — all data is owned strings and integers. } } /// Handles `m.group(...)` calls, supporting zero, one, or multiple arguments. /// /// - `m.group()` → equivalent to `m.group(0)`, returns full match string /// - `m.group(n)` → returns the nth group (integer or named string) /// - `m.group(n1, n2, ...)` → returns a tuple of groups fn call_group( m: &ReMatch, args: ArgValues, heap: &mut Heap, interns: &Interns, ) -> RunResult { match args { ArgValues::Empty => m.get_group(0, heap), ArgValues::One(v) => { let result = resolve_group_arg(m, &v, heap, interns); v.drop_with_heap(heap); result } other => { let pos = other.into_pos_only("re.Match.group", heap)?; let mut pos_guard = smallvec::SmallVec::<[Value; 4]>::new(); for val in pos { pos_guard.push(val); } let mut elements = smallvec::smallvec![]; for val in &pos_guard { let result = resolve_group_arg(m, val, heap, interns); if result.is_err() { // Drop already-allocated elements for elem in elements { Value::drop_with_heap(elem, heap); } for val in pos_guard { val.drop_with_heap(heap); } return result; } elements.push(result?); } for val in pos_guard { val.drop_with_heap(heap); } Ok(allocate_tuple(elements, heap)?) } } } /// Resolves a single group argument — integer, bool, or string (named group). fn resolve_group_arg( m: &ReMatch, val: &Value, heap: &mut Heap, interns: &Interns, ) -> RunResult { match val { Value::Int(n) => m.get_group(*n, heap), Value::Bool(b) => m.get_group(i64::from(*b), heap), Value::InternString(id) => { let name = interns.get_str(*id); m.get_group_by_name(name, heap) } Value::Ref(heap_id) => match heap.get(*heap_id) { HeapData::Str(s) => { let name = s.as_str().to_owned(); m.get_group_by_name(&name, heap) } _ => Err(ExcType::re_match_group_index_error()), }, _ => Err(ExcType::re_match_group_index_error()), } } /// Extracts an optional integer argument for group-related methods. /// /// Many `re.Match` methods accept an optional group number that defaults to 0. /// This helper extracts the argument, validates it is an integer (or string for /// named groups), and returns the group number. fn extract_optional_group_arg( args: ArgValues, name: &str, default: i64, heap: &mut Heap, ) -> RunResult { let opt = args.get_zero_one_arg(name, heap)?; match opt { None => Ok(default), Some(Value::Int(n)) => Ok(n), // CPython treats bool as int subclass: True=1, False=0. Some(Value::Bool(b)) => Ok(i64::from(b)), // String group names are not valid for start/end/span — they take integers only Some(other) => { other.drop_with_heap(heap); Err(ExcType::re_match_group_index_error()) } } } /// Converts a byte offset in a UTF-8 string to a character (code point) offset. /// /// The Rust `regex` crate operates on byte offsets, but Python's `re` module /// returns character positions. For ASCII-only strings, these are identical. /// For multi-byte UTF-8 characters, this counts actual code points up to the /// byte position. fn byte_to_char_offset(s: &str, byte_offset: usize) -> usize { s[..byte_offset].chars().count() } /// Converts a positive group number (1-based) to a 0-based index. /// /// The caller must ensure `n > 0`. #[expect( clippy::cast_sign_loss, clippy::cast_possible_truncation, reason = "n is always positive (checked by caller via match on Ordering::Greater)" )] fn group_index(n: i64) -> usize { (n - 1) as usize } ================================================ FILE: crates/monty/src/types/re_pattern.rs ================================================ //! Compiled regex pattern type for the `re` module. //! //! `RePattern` wraps a compiled `fancy_regex::Regex` with the original Python pattern //! string and flags. The `fancy_regex` crate supports backreferences, lookahead/lookbehind, //! and other advanced features, but uses backtracking which means patterns are susceptible //! to ReDoS. Monty's resource limits (time and allocation budgets) are the primary defense //! against catastrophic backtracking in untrusted patterns. //! //! Custom serde serializes only the pattern string and flags, recompiling the regex //! on deserialization. This supports Monty's snapshot/restore feature. use std::{borrow::Cow, fmt::Write}; use ahash::AHashSet; use fancy_regex::Regex; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use smallvec::SmallVec; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapId, HeapItem}, intern::{Interns, StaticStrings}, modules::re::{ASCII, DOTALL, IGNORECASE, MULTILINE}, resource::{ResourceError, ResourceTracker, check_estimated_size}, types::{List, PyTrait, ReMatch, Str, Type, allocate_tuple, str::string_repr_fmt}, value::{EitherStr, Value}, }; /// A compiled regular expression pattern. /// /// Wraps a `fancy_regex::Regex` with the original Python pattern string and flags. /// The `fancy_regex` crate supports backtracking features like backreferences and /// lookaround, but this means patterns are susceptible to ReDoS — Monty's resource /// limits are the defense against catastrophic backtracking. /// /// Custom serde serializes only the pattern string and flags, recompiling the /// regex on deserialization. This supports Monty's snapshot/restore feature. #[derive(Debug)] pub(crate) struct RePattern { /// The original Python regex pattern string. pattern: String, /// Python regex flags bitmask (IGNORECASE=2, MULTILINE=8, DOTALL=16, ASCII=256). flags: u16, /// The compiled Rust regex, unanchored. compiled: Regex, /// The compiled regex anchored with `\A(?:...)` for `match()`. /// /// Uses `\A` (absolute start anchor) instead of `^` so the MULTILINE flag /// doesn't cause it to match at line boundaries. This correctly handles /// alternations — e.g. `match('b|ab', 'ab')` must match `ab`, not fail /// because the engine found only `b` starting at position 1. compiled_match: Regex, /// The compiled regex anchored with `\A(?:...)\z` for `fullmatch()`. /// /// Uses `\A`/`\z` (absolute anchors) instead of `^`/`$` so the MULTILINE flag /// doesn't cause them to match at line boundaries. This correctly handles /// alternations — e.g. `fullmatch('a|ab', 'ab')` must match `ab`, not fail /// because the engine found `a` first. compiled_fullmatch: Regex, } impl RePattern { /// Creates a compiled pattern from a Python regex string and flags. /// /// Translates Python flag constants into inline regex flag prefixes and compiles /// the pattern. Also pre-compiles anchored variants for `match` (`\A(?:pattern)`) /// and `fullmatch` (`\A(?:pattern)\z`) to correctly handle alternations. /// /// # Errors /// /// Returns `re.PatternError` if the pattern is invalid. pub fn compile(pattern: String, flags: u16) -> RunResult { let compiled = compile_regex(&pattern, flags)?; let compiled_match = compile_regex(&format!("\\A(?:{pattern})"), flags)?; let compiled_fullmatch = compile_regex(&format!("\\A(?:{pattern})\\z"), flags)?; Ok(Self { pattern, flags, compiled, compiled_match, compiled_fullmatch, }) } /// `pattern.search(string)` — find first match anywhere in the string. /// /// Returns a `ReMatch` heap object on success, or `Value::None` if no match. pub fn search(&self, text: &str, heap: &mut Heap) -> RunResult { match self.compiled.captures(text) { Ok(Some(caps)) => { let m = ReMatch::from_captures(&caps, text, &self.pattern, &self.compiled); Ok(Value::Ref(heap.allocate(HeapData::ReMatch(m))?)) } Ok(None) => Ok(Value::None), Err(err) => Err(ExcType::re_pattern_error(err)), } } /// `pattern.match(string)` — match anchored at the start of the string. /// /// Uses a pre-compiled `\A(?:pattern)` regex to correctly handle alternations. /// For example, `match('b|ab', 'ab')` correctly matches `ab` because the /// anchor forces the engine to try all alternatives at position 0. /// /// Returns a `ReMatch` heap object on success, or `Value::None` if no match. pub fn match_start(&self, text: &str, heap: &mut Heap) -> RunResult { match self.compiled_match.captures(text) { Ok(Some(caps)) => { let match_obj = ReMatch::from_captures(&caps, text, &self.pattern, &self.compiled); Ok(Value::Ref(heap.allocate(HeapData::ReMatch(match_obj))?)) } Ok(None) => Ok(Value::None), Err(err) => Err(ExcType::re_pattern_error(err)), } } /// `pattern.fullmatch(string)` — match the entire string. /// /// Uses a pre-compiled `\A(?:pattern)\z` regex to correctly handle alternations. /// For example, `fullmatch('a|ab', 'ab')` correctly matches `ab` because the /// anchors force the engine to try all alternatives for a full-string match. /// /// Returns a `ReMatch` heap object on success, or `Value::None` if no match. pub fn fullmatch(&self, text: &str, heap: &mut Heap) -> RunResult { match self.compiled_fullmatch.captures(text) { Ok(Some(caps)) => { let match_obj = ReMatch::from_captures(&caps, text, &self.pattern, &self.compiled); Ok(Value::Ref(heap.allocate(HeapData::ReMatch(match_obj))?)) } Ok(None) => Ok(Value::None), Err(err) => Err(ExcType::re_pattern_error(err)), } } /// `pattern.findall(string)` — return all non-overlapping matches. /// /// Follows CPython's semantics: /// - No capture groups: returns a list of matched strings /// - One capture group: returns a list of the group's matched strings /// - Multiple capture groups: returns a list of tuples of matched strings pub fn findall(&self, text: &str, heap: &mut Heap) -> RunResult { let cap_count = self.compiled.captures_len(); let mut results = Vec::new(); match cap_count { // No capture groups — return list of full match strings 0 | 1 => { for m in self.compiled.find_iter(text) { let s = Str::new(m.map_err(ExcType::re_pattern_error)?.as_str().to_owned()); results.push(Value::Ref(heap.allocate(HeapData::Str(s))?)); } } // One capture group — return list of the group's strings 2 => { for caps in self.compiled.captures_iter(text) { let caps = caps.map_err(ExcType::re_pattern_error)?; let val = caps.get(1).map(|m| m.as_str().to_owned()).unwrap_or_default(); let s = Str::new(val); results.push(Value::Ref(heap.allocate(HeapData::Str(s))?)); } } // Multiple capture groups — return list of tuples _ => { for caps in self.compiled.captures_iter(text) { let caps = caps.map_err(ExcType::re_pattern_error)?; let mut elements: SmallVec<[Value; 3]> = SmallVec::with_capacity(cap_count - 1); for cap in caps.iter().skip(1) { let val = cap.map(|m| m.as_str().to_owned()).unwrap_or_default(); let s = Str::new(val); elements.push(Value::Ref(heap.allocate(HeapData::Str(s))?)); } results.push(allocate_tuple(elements, heap)?); } } } let list = List::new(results); Ok(Value::Ref(heap.allocate(HeapData::List(list))?)) } /// `pattern.sub(repl, string, count=0)` — substitute matches with a replacement. /// /// When `count` is 0, all matches are replaced. Otherwise, at most `count` /// replacements are made. The replacement string supports `$1`, `$2`, etc. /// for backreferences to captured groups. /// /// Builds the result string in a single pass by iterating matches and appending /// replacements directly. Checks the running output size against resource limits /// after each match, bailing out immediately if the budget is exceeded. This /// avoids both false rejections from conservative pre-estimates and untracked /// Rust heap allocations from delegating to `fancy_regex::replace_all()`. pub fn sub(&self, repl: &str, text: &str, count: usize, heap: &mut Heap) -> RunResult { // Translate Python-style backreferences (\1, \2) to regex crate style ($1, $2) let rust_repl = translate_replacement(repl); let effective_count = if count == 0 { usize::MAX } else { count }; let mut result = String::new(); let mut last_end = 0; for caps in self.compiled.captures_iter(text).take(effective_count) { let caps = caps.map_err(ExcType::re_pattern_error)?; let m = caps.get(0).expect("capture group 0 always exists"); result.push_str(&text[last_end..m.start()]); caps.expand(rust_repl.as_ref(), &mut result); last_end = m.end(); // Check running size: current result + remaining unprocessed text. check_estimated_size(result.len() + (text.len() - last_end), heap.tracker())?; } result.push_str(&text[last_end..]); let s = Str::new(result); Ok(Value::Ref(heap.allocate(HeapData::Str(s))?)) } /// `pattern.split(string, maxsplit=0)` — split string by pattern occurrences. /// /// Returns a list of strings. If `maxsplit` is non-zero, at most `maxsplit` /// splits occur and the remainder of the string is returned as the final element. pub fn split(&self, text: &str, maxsplit: usize, heap: &mut Heap) -> RunResult { let pieces: Vec<&str> = if maxsplit == 0 { self.compiled .split(text) .collect::, _>>() .map_err(ExcType::re_pattern_error)? } else { self.compiled .splitn(text, maxsplit + 1) .collect::, _>>() .map_err(ExcType::re_pattern_error)? }; let mut results = Vec::with_capacity(pieces.len()); for piece in pieces { let s = Str::new(piece.to_owned()); results.push(Value::Ref(heap.allocate(HeapData::Str(s))?)); } let list = List::new(results); Ok(Value::Ref(heap.allocate(HeapData::List(list))?)) } /// `pattern.finditer(string)` — return all matches as a list. /// /// Eagerly collects all match objects into a list. This differs from CPython's /// lazy iterator but produces the same results when iterated. The VM's `GetIter` /// opcode handles iteration over the returned list. pub fn finditer(&self, text: &str, heap: &mut Heap) -> RunResult { let mut results = Vec::new(); for caps in self.compiled.captures_iter(text) { let caps = caps.map_err(ExcType::re_pattern_error)?; let m = ReMatch::from_captures(&caps, text, &self.pattern, &self.compiled); results.push(Value::Ref(heap.allocate(HeapData::ReMatch(m))?)); } let list = List::new(results); Ok(Value::Ref(heap.allocate(HeapData::List(list))?)) } } impl PyTrait for RePattern { fn py_type(&self, _heap: &Heap) -> Type { Type::RePattern } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { None } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Ok(self.pattern == other.pattern && self.flags == other.flags) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { // Pattern objects are always truthy (matching CPython). true } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { write!(f, "re.compile(")?; string_repr_fmt(&self.pattern, f)?; if self.flags != 0 { let mut flag_parts = smallvec::SmallVec::<[&'static str; 4]>::new(); if self.flags & IGNORECASE != 0 { flag_parts.push("re.IGNORECASE"); } if self.flags & MULTILINE != 0 { flag_parts.push("re.MULTILINE"); } if self.flags & DOTALL != 0 { flag_parts.push("re.DOTALL"); } if self.flags & ASCII != 0 { flag_parts.push("re.ASCII"); } write!(f, ", {}", flag_parts.join("|"))?; } write!(f, ")") } fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match attr.static_string() { Some(StaticStrings::PatternAttr) => { let s = Str::new(self.pattern.clone()); let v = Value::Ref(vm.heap.allocate(HeapData::Str(s))?); Ok(Some(CallResult::Value(v))) } Some(StaticStrings::Flags) => Ok(Some(CallResult::Value(Value::Int(i64::from(self.flags))))), _ => Err(ExcType::attribute_error(Type::RePattern, attr.as_str(vm.interns))), } } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let result = match attr.static_string() { Some(StaticStrings::Search) => { let arg = args.get_one_arg("Pattern.search", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); self.search(&text, vm.heap) } Some(StaticStrings::Match) => { let arg = args.get_one_arg("Pattern.match", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); self.match_start(&text, vm.heap) } Some(StaticStrings::Fullmatch) => { let arg = args.get_one_arg("Pattern.fullmatch", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); self.fullmatch(&text, vm.heap) } Some(StaticStrings::Findall) => { let arg = args.get_one_arg("Pattern.findall", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); self.findall(&text, vm.heap) } Some(StaticStrings::Sub) => call_pattern_sub(self, args, vm.heap, vm.interns), Some(StaticStrings::Split) => call_pattern_split(self, args, vm.heap, vm.interns), Some(StaticStrings::Finditer) => { let arg = args.get_one_arg("Pattern.finditer", vm.heap)?; defer_drop!(arg, vm); let text = value_to_str(arg, vm.heap, vm.interns)?.into_owned(); self.finditer(&text, vm.heap) } _ => return Err(ExcType::attribute_error(Type::RePattern, attr.as_str(vm.interns))), }?; Ok(CallResult::Value(result)) } } impl HeapItem for RePattern { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.pattern.len() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // No heap references — all data is owned. } } /// Handles `pattern.sub(repl, string, count=0)` argument extraction and dispatch. /// /// Separated from the main `py_call_attr` match to keep the borrow checker happy — /// extracting multiple string arguments requires careful ordering of borrows. /// Supports `count` as either positional or keyword argument. fn call_pattern_sub( pattern: &RePattern, args: ArgValues, heap: &mut Heap, interns: &Interns, ) -> RunResult { let (pos, kwargs) = args.into_parts(); defer_drop_mut!(pos, heap); let kwargs = kwargs.into_iter(); defer_drop_mut!(kwargs, heap); let Some(repl_val) = pos.next() else { return Err(ExcType::type_error("Pattern.sub() missing required argument: 'repl'")); }; defer_drop!(repl_val, heap); let Some(string_val) = pos.next() else { return Err(ExcType::type_error("Pattern.sub() missing required argument: 'string'")); }; defer_drop!(string_val, heap); let pos_count = pos.next(); if let Some(extra) = pos.next() { extra.drop_with_heap(heap); return Err(ExcType::type_error( "Pattern.sub() takes at most 3 positional arguments", )); } // Extract count from kwargs if not given positionally let mut kw_count: Option = None; for (key, value) in kwargs { defer_drop!(key, heap); let Some(keyword_name) = key.as_either_str(heap) else { value.drop_with_heap(heap); return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(interns); if key_str == "count" { if pos_count.is_some() { value.drop_with_heap(heap); return Err(ExcType::type_error( "Pattern.sub() got multiple values for argument 'count'", )); } kw_count.replace(value).drop_with_heap(heap); } else { value.drop_with_heap(heap); return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for Pattern.sub()" ))); } } let count_val = pos_count.or(kw_count); #[expect( clippy::cast_sign_loss, clippy::cast_possible_truncation, reason = "n is checked non-negative above" )] let count = match count_val { Some(Value::Int(n)) if n >= 0 => n as usize, Some(Value::Bool(b)) => usize::from(b), Some(Value::Int(_)) => { let text = value_to_str(string_val, heap, interns)?.into_owned(); let s = Str::new(text); return Ok(Value::Ref(heap.allocate(HeapData::Str(s))?)); } Some(other) => { let t = other.py_type(heap); other.drop_with_heap(heap); return Err(ExcType::type_error(format!("expected int for count, not {t}"))); } None => 0, }; // Check that repl is a string — callable replacement is not supported if !repl_val.is_str(heap) { return Err(ExcType::type_error( "callable replacement is not yet supported in re.sub()", )); } let repl = value_to_str(repl_val, heap, interns)?.into_owned(); let text = value_to_str(string_val, heap, interns)?.into_owned(); pattern.sub(&repl, &text, count, heap) } /// Handles `pattern.split(string, maxsplit=0)` argument extraction and dispatch. /// /// Supports `maxsplit` as either positional or keyword argument. fn call_pattern_split( pattern: &RePattern, args: ArgValues, heap: &mut Heap, interns: &Interns, ) -> RunResult { let (pos, kwargs) = args.into_parts(); defer_drop_mut!(pos, heap); let kwargs = kwargs.into_iter(); defer_drop_mut!(kwargs, heap); let Some(string_val) = pos.next() else { return Err(ExcType::type_error( "Pattern.split() missing required argument: 'string'", )); }; defer_drop!(string_val, heap); let pos_maxsplit = pos.next(); if let Some(extra) = pos.next() { extra.drop_with_heap(heap); return Err(ExcType::type_error( "Pattern.split() takes at most 2 positional arguments", )); } let mut kw_maxsplit: Option = None; for (key, value) in kwargs { defer_drop!(key, heap); let Some(keyword_name) = key.as_either_str(heap) else { value.drop_with_heap(heap); return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(interns); if key_str == "maxsplit" { if pos_maxsplit.is_some() { value.drop_with_heap(heap); return Err(ExcType::type_error( "Pattern.split() got multiple values for argument 'maxsplit'", )); } kw_maxsplit.replace(value).drop_with_heap(heap); } else { value.drop_with_heap(heap); return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for Pattern.split()" ))); } } let maxsplit = extract_maxsplit(pos_maxsplit.or(kw_maxsplit), heap)?; let text = value_to_str(string_val, heap, interns)?.into_owned(); pattern.split(&text, maxsplit, heap) } /// Extracts a `maxsplit` value from an optional `Value`. /// /// Returns 0 if not provided. Negative values are treated as 0 (split all). fn extract_maxsplit(val: Option, heap: &mut Heap) -> RunResult { match val { None => Ok(0), Some(Value::Int(n)) if n <= 0 => Ok(0), #[expect( clippy::cast_sign_loss, clippy::cast_possible_truncation, reason = "n is checked positive above" )] Some(Value::Int(n)) => Ok(n as usize), Some(Value::Bool(b)) => Ok(usize::from(b)), Some(other) => { let t = other.py_type(heap); other.drop_with_heap(heap); Err(ExcType::type_error(format!("expected int for maxsplit, not {t}"))) } } } /// Compiles a Python regex pattern string with flags into a Rust `Regex`. /// /// Translates Python flag constants into inline regex flag prefixes: /// - `re.IGNORECASE` (2) → `(?i)` prefix /// - `re.MULTILINE` (8) → `(?m)` prefix /// - `re.DOTALL` (16) → `(?s)` prefix /// /// # Errors /// /// Returns `re.PatternError(...)` if the pattern is invalid. pub(crate) fn compile_regex(pattern: &str, flags: u16) -> RunResult { let mut prefix = String::new(); if flags & IGNORECASE != 0 { prefix.push('i'); } if flags & MULTILINE != 0 { prefix.push('m'); } if flags & DOTALL != 0 { prefix.push('s'); } // Note: re.ASCII (256) is accepted but has no effect on the regex compilation. // `fancy_regex` doesn't support `(?-u)` to disable Unicode mode, so `\w`, `\d`, `\s` // always match Unicode characters. This is a known limitation — Python 3 defaults to // Unicode mode anyway, so the behavioral difference only matters for non-ASCII input. let full_pattern = if prefix.is_empty() { pattern.to_owned() } else { format!("(?{prefix}){pattern}") }; Regex::new(&full_pattern).map_err(ExcType::re_pattern_error) } /// Translates Python-style replacement backreferences to `fancy_regex` syntax. /// /// Python uses `\1`, `\2`, `\g<1>`, `\g` for backreferences in replacement strings. /// `fancy_regex` uses `$1`, `$2`, `${1}`, `${name}`. This function converts between them. /// /// # Supported translations /// /// - `\1`–`\9` → `$1`–`$9` (single-digit backreferences) /// - `\g` → `${N}` (numeric backreference with explicit syntax) /// - `\g` → `${name}` (named group backreference) /// - `\\` → literal backslash /// - `$` → `$$` (escape literal `$` so `fancy_regex` doesn't misinterpret it) /// /// Returns a `Cow` to avoid allocation when no translation is needed. /// /// # Limitations /// /// TODO: Multi-digit backreferences like `\10` are not fully supported. CPython /// greedily reads all digits after `\` and interprets them as a group number if /// that group exists, otherwise falls back to octal escapes. Currently `\10` is /// translated as `$1` followed by literal `0`, which is wrong when 10+ groups /// exist. Fixing this requires passing the pattern's capture group count into /// this function to disambiguate. fn translate_replacement(repl: &str) -> Cow<'_, str> { // Fast path: no backslashes and no literal `$` means nothing to translate or escape. if !repl.contains('\\') && !repl.contains('$') { return Cow::Borrowed(repl); } let mut result = String::with_capacity(repl.len()); let mut chars = repl.chars().peekable(); while let Some(c) = chars.next() { if c == '\\' { match chars.peek() { Some(&d) if d.is_ascii_digit() => { // TODO: This only handles single-digit backrefs (\1–\9). // Multi-digit like \10 should be ${10} when group 10 exists, // but that requires knowing the group count. See docstring. result.push('$'); result.push(d); chars.next(); } Some(&'g') => { chars.next(); // consume 'g' translate_g_backref(&mut chars, &mut result); } Some(&'\\') => { result.push('\\'); chars.next(); } _ => { result.push('\\'); } } } else if c == '$' { // Escape literal `$` as `$$` so `fancy_regex` doesn't interpret `$1` etc. // as backreferences. result.push('$'); result.push('$'); } else { result.push(c); } } Cow::Owned(result) } /// Translates a `\g<...>` backreference to `fancy_regex` `${...}` syntax. /// /// Called after `\g` has been consumed. Reads `` from the iterator /// and writes `${name_or_number}` to the result. If the syntax is malformed /// (missing `<` or `>`), the literal characters are written through unchanged. fn translate_g_backref(chars: &mut std::iter::Peekable>, result: &mut String) { if chars.peek() != Some(&'<') { // Not \g<...>, just literal \g result.push('\\'); result.push('g'); return; } chars.next(); // consume '<' // Collect everything until '>' let mut name = String::new(); loop { match chars.next() { Some('>') => break, Some(ch) => name.push(ch), None => { // Unterminated \g<... — emit literally result.push('\\'); result.push('g'); result.push('<'); result.push_str(&name); return; } } } // Write as ${name_or_number} for fancy_regex result.push('$'); result.push('{'); result.push_str(&name); result.push('}'); } /// Extracts a string from a `Value`, supporting both interned and heap strings. /// /// Returns a `Cow` to avoid unnecessary copies for interned strings. pub(crate) fn value_to_str<'a>( val: &'a Value, heap: &'a Heap, interns: &'a Interns, ) -> RunResult> { match val { Value::InternString(string_id) => Ok(Cow::Borrowed(interns.get_str(*string_id))), Value::Ref(heap_id) => match heap.get(*heap_id) { HeapData::Str(s) => Ok(Cow::Borrowed(s.as_str())), other => Err(ExcType::type_error(format!( "expected string, not {}", other.py_type(heap) ))), }, _ => Err(ExcType::type_error(format!( "expected string, not {}", val.py_type(heap) ))), } } impl Serialize for RePattern { fn serialize(&self, serializer: S) -> Result { // Serialize only pattern string and flags; regex is recompiled on deserialize. (&self.pattern, self.flags).serialize(serializer) } } impl<'de> Deserialize<'de> for RePattern { fn deserialize>(deserializer: D) -> Result { let (pattern, flags): (String, u16) = Deserialize::deserialize(deserializer)?; Self::compile(pattern, flags).map_err(|e| serde::de::Error::custom(format!("{e:?}"))) } } ================================================ FILE: crates/monty/src/types/set.rs ================================================ use std::fmt::Write; use ahash::AHashSet; use hashbrown::HashTable; use super::{MontyIter, PyTrait}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{ContainsHeap, DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, intern::{Interns, StaticStrings}, resource::{ResourceError, ResourceTracker}, types::Type, value::{EitherStr, Value}, }; /// Entry in the set storage, containing a value and its cached hash. #[derive(Debug, serde::Serialize, serde::Deserialize)] struct SetEntry { pub(crate) value: Value, /// Cached hash for efficient lookup and reinsertion. pub(crate) hash: u64, } /// Internal storage shared between Set and FrozenSet. /// /// Uses a `HashTable` for O(1) lookups combined with a dense `Vec` /// to preserve insertion order (consistent with Python 3.7+ dict behavior). /// The hash table maps value hashes to indices in the entries vector. #[derive(Debug, Default)] pub(crate) struct SetStorage { /// Maps hash to index in entries vector. indices: HashTable, /// Dense vector of entries maintaining insertion order. entries: Vec, } impl SetStorage { /// Creates a new empty set storage. fn new() -> Self { Self::default() } /// Creates a new set storage with pre-allocated capacity. fn with_capacity(capacity: usize) -> Self { Self { indices: HashTable::with_capacity(capacity), entries: Vec::with_capacity(capacity), } } /// Creates a SetStorage from a vector of (value, hash) pairs. /// /// This is used to avoid borrow conflicts when we need to copy another set's /// contents and then perform operations requiring mutable heap access. /// The caller is responsible for handling reference counting. fn from_entries(entries: Vec<(Value, u64)>) -> Self { let mut storage = Self::with_capacity(entries.len()); for (idx, (value, hash)) in entries.into_iter().enumerate() { storage.entries.push(SetEntry { value, hash }); storage.indices.insert_unique(hash, idx, |&i| storage.entries[i].hash); } storage } /// Clones entries with proper reference counting. fn clone_entries(&self, heap: &impl ContainsHeap) -> Vec<(Value, u64)> { self.entries .iter() .map(|e| (e.value.clone_with_heap(heap), e.hash)) .collect() } /// Returns the number of elements in the set. fn len(&self) -> usize { self.entries.len() } /// Returns true if the set is empty. fn is_empty(&self) -> bool { self.entries.is_empty() } /// Returns whether this set contains any heap references (`Value::Ref`). /// /// Used during allocation to determine if this container could create cycles. #[inline] fn has_refs(&self) -> bool { self.entries.iter().any(|e| matches!(e.value, Value::Ref(_))) } /// Adds an element to the set, transferring ownership. /// /// Returns `Ok(true)` if the element was added (not already present), /// `Ok(false)` if the element was already in the set. /// Returns `Err` if the element is unhashable. /// /// The caller transfers ownership of `value`. If the value is already in /// the set, it will be dropped. fn add(&mut self, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let hash = match value.py_hash(vm.heap, vm.interns) { Ok(Some(h)) => h, Ok(None) => { let err = ExcType::type_error_unhashable_set_element(value.py_type(vm.heap)); value.drop_with_heap(vm.heap); return Err(err); } Err(e) => { value.drop_with_heap(vm.heap); return Err(e.into()); } }; // Check if value already exists. let existing = self .indices .find(hash, |&idx| value.py_eq(&self.entries[idx].value, vm).unwrap_or(false)); if existing.is_some() { // Value already in set, drop the new value value.drop_with_heap(vm.heap); Ok(false) } else { // Add new entry let index = self.entries.len(); self.entries.push(SetEntry { value, hash }); self.indices.insert_unique(hash, index, |&idx| self.entries[idx].hash); Ok(true) } } /// Removes an element from the set. /// /// Returns `Ok(true)` if the element was removed, `Ok(false)` if not found. /// Returns `Err` if the key is unhashable. fn remove(&mut self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let hash = value .py_hash(vm.heap, vm.interns)? .ok_or_else(|| ExcType::type_error_unhashable_set_element(value.py_type(vm.heap)))?; let entry = self.indices.entry( hash, |&idx| value.py_eq(&self.entries[idx].value, vm).unwrap_or(false), |&idx| self.entries[idx].hash, ); if let hashbrown::hash_table::Entry::Occupied(occ) = entry { let index = *occ.get(); let removed_entry = self.entries.remove(index); occ.remove(); // Update indices for entries that shifted down for idx in &mut self.indices { if *idx > index { *idx -= 1; } } // Drop the removed value removed_entry.value.drop_with_heap(vm); Ok(true) } else { Ok(false) } } /// Removes an element from the set without raising an error if not found. /// /// Returns `Ok(())` always (unless the key is unhashable). fn discard(&mut self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { self.remove(value, vm)?; Ok(()) } /// Removes and returns an arbitrary element from the set. /// /// Returns `Err(KeyError)` if the set is empty. fn pop(&mut self) -> RunResult { if self.entries.is_empty() { return Err(ExcType::key_error_pop_empty_set()); } // Remove the last entry (most efficient) let entry = self.entries.pop().expect("checked non-empty"); // Remove from hash table self.indices .find_entry(entry.hash, |&idx| idx == self.entries.len()) .expect("entry must exist") .remove(); Ok(entry.value) } /// Removes all elements from the set. fn clear(&mut self, heap: &mut Heap) { self.entries.drain(..).drop_with_heap(heap); self.indices.clear(); } /// Creates a deep clone with proper reference counting. fn clone_with_heap(&self, heap: &impl ContainsHeap) -> Self { Self { indices: self.indices.clone(), entries: self .entries .iter() .map(|entry| SetEntry { value: entry.value.clone_with_heap(heap), hash: entry.hash, }) .collect(), } } /// Checks if the set contains a value. pub fn contains(&self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let hash = value .py_hash(vm.heap, vm.interns)? .ok_or_else(|| ExcType::type_error_unhashable_set_element(value.py_type(vm.heap)))?; // Set values are typically shallow (strings, ints, tuples of primitives), // so recursion errors are unlikely. If one occurs, treat it as "not equal". Ok(self .indices .find(hash, |&idx| value.py_eq(&self.entries[idx].value, vm).unwrap_or(false)) .is_some()) } /// Returns an iterator over the values in the set. pub(crate) fn iter(&self) -> impl Iterator { self.entries.iter().map(|e| &e.value) } /// Returns the value at the given index, if valid. /// /// Used by MontyIter for index-based iteration. pub(crate) fn value_at(&self, index: usize) -> Option<&Value> { self.entries.get(index).map(|e| &e.value) } /// Collects heap IDs for reference counting cleanup. fn collect_dec_ref_ids(&mut self, stack: &mut Vec) { for entry in &mut self.entries { if let Value::Ref(id) = &entry.value { stack.push(*id); #[cfg(feature = "ref-count-panic")] entry.value.dec_ref_forget(); } } } /// Compares two sets for equality. fn eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.len() != other.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); // Check that every element in self is in other for entry in &self.entries { if !matches!(other.contains(&entry.value, vm), Ok(true)) { return Ok(false); } } Ok(true) } /// Returns true if this set is a subset of other. fn is_subset(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { for entry in &self.entries { if !other.contains(&entry.value, vm)? { return Ok(false); } } Ok(true) } /// Returns true if this set is a superset of other. fn is_superset(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { other.is_subset(self, vm) } /// Returns true if this set has no elements in common with other. fn is_disjoint(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Iterate over the smaller set for efficiency let (smaller, larger) = if self.len() <= other.len() { (self, other) } else { (other, self) }; for entry in &smaller.entries { if larger.contains(&entry.value, vm)? { return Ok(false); } } Ok(true) } /// Returns a new set containing elements in either set (union). fn union(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result_guard = HeapGuard::new(self.clone_with_heap(vm), vm); let (result, vm) = result_guard.as_parts_mut(); for entry in &other.entries { let value = entry.value.clone_with_heap(vm); result.add(value, vm)?; } Ok(result_guard.into_inner()) } /// Returns a new set containing elements in both sets (intersection). fn intersection(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result_guard = HeapGuard::new(Self::new(), vm); let (result, vm) = result_guard.as_parts_mut(); // Iterate over the smaller set for efficiency let (smaller, larger) = if self.len() <= other.len() { (self, other) } else { (other, self) }; for entry in &smaller.entries { if larger.contains(&entry.value, vm)? { let value = entry.value.clone_with_heap(vm); result.add(value, vm)?; } } Ok(result_guard.into_inner()) } /// Returns a new set containing elements in self but not in other (difference). fn difference(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result_guard = HeapGuard::new(Self::new(), vm); let (result, vm) = result_guard.as_parts_mut(); for entry in &self.entries { if !other.contains(&entry.value, vm)? { let value = entry.value.clone_with_heap(vm); result.add(value, vm)?; } } Ok(result_guard.into_inner()) } /// Returns a new set containing elements in either set but not both (symmetric difference). fn symmetric_difference(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result_guard = HeapGuard::new(Self::new(), vm); let (result, vm) = result_guard.as_parts_mut(); // Add elements in self but not in other for entry in &self.entries { if !other.contains(&entry.value, vm)? { let value = entry.value.clone_with_heap(vm); result.add(value, vm)?; } } // Add elements in other but not in self for entry in &other.entries { if !self.contains(&entry.value, vm)? { let value = entry.value.clone_with_heap(vm); result.add(value, vm)?; } } Ok(result_guard.into_inner()) } /// Adds all elements from other to this set (in-place union). fn update(&mut self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { for entry in &other.entries { let value = entry.value.clone_with_heap(vm); self.add(value, vm)?; } Ok(()) } /// Writes the repr format to a formatter. /// /// For sets, outputs `{elem1, elem2, ...}` (no type prefix). /// For frozensets, outputs `frozenset({elem1, elem2, ...})`. fn repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, type_name: &str, ) -> std::fmt::Result { if self.is_empty() { return write!(f, "{type_name}()"); } // Check depth limit before recursing let Some(token) = vm.heap.incr_recursion_depth_for_repr() else { return f.write_str("{...}"); }; crate::defer_drop_immutable_heap!(token, vm); // frozenset needs type prefix: frozenset({...}), but set doesn't: {...} let needs_prefix = type_name != "set"; if needs_prefix { write!(f, "{type_name}(")?; } f.write_char('{')?; let mut first = true; for entry in &self.entries { if !first { if vm.heap.check_time().is_err() { f.write_str(", ...[timeout]")?; break; } f.write_str(", ")?; } first = false; entry.value.py_repr_fmt(f, vm, heap_ids)?; } f.write_char('}')?; if needs_prefix { f.write_char(')')?; } Ok(()) } /// Estimates the memory size of this storage. fn estimate_size(&self) -> usize { std::mem::size_of::() + self.len() * std::mem::size_of::() } } /// Python set type - mutable, unordered collection of unique hashable elements. /// /// Sets support standard operations like add, remove, discard, pop, clear, as well /// as set algebra operations like union, intersection, difference, and symmetric /// difference. /// /// # Reference Counting /// When values are added, their reference counts are NOT incremented by the set - /// the caller transfers ownership. When values are removed or the set is cleared, /// their reference counts are decremented. #[derive(Debug, Default)] pub(crate) struct Set(SetStorage); impl Set { /// Creates a new empty set. #[must_use] pub fn new() -> Self { Self(SetStorage::new()) } /// Creates a set with pre-allocated capacity. #[must_use] pub fn with_capacity(capacity: usize) -> Self { Self(SetStorage::with_capacity(capacity)) } /// Returns the number of elements in the set. #[must_use] pub fn len(&self) -> usize { self.0.len() } /// Returns true if the set is empty. #[must_use] pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Returns whether this set contains any heap references (`Value::Ref`). /// /// Used during allocation to determine if this container could create cycles. #[inline] #[must_use] pub fn has_refs(&self) -> bool { self.0.has_refs() } /// Adds an element to the set, transferring ownership. /// /// Returns `Ok(true)` if added, `Ok(false)` if already present. pub fn add(&mut self, value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { self.0.add(value, vm) } /// Removes an element from the set. /// /// Returns `Err(KeyError)` if the element is not present. pub fn remove(&mut self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { if self.0.remove(value, vm)? { Ok(()) } else { Err(ExcType::key_error(value, vm)) } } /// Removes an element from the set if present. /// /// Does not raise an error if the element is not found. pub fn discard(&mut self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { self.0.discard(value, vm) } /// Removes and returns an arbitrary element from the set. /// /// Returns `Err(KeyError)` if the set is empty. pub fn pop(&mut self) -> RunResult { self.0.pop() } /// Removes all elements from the set. pub fn clear(&mut self, heap: &mut Heap) { self.0.clear(heap); } /// Returns a shallow copy of the set. #[must_use] pub fn copy(&self, heap: &mut Heap) -> Self { Self(self.0.clone_with_heap(heap)) } /// Checks if the set contains a value. pub fn contains(&self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { self.0.contains(value, vm) } /// Returns the internal storage (for set operations between Set and FrozenSet). pub(crate) fn storage(&self) -> &SetStorage { &self.0 } /// Returns an iterator over the set's elements in insertion order. /// /// This is primarily used by other runtime helpers that need to implement /// set-like protocols while still preserving Monty's single canonical set /// storage implementation. pub(crate) fn iter(&self) -> impl Iterator { self.0.iter() } /// Creates a set from the `set()` constructor call. /// /// - `set()` with no args returns an empty set /// - `set(iterable)` creates a set from any iterable (list, tuple, set, dict, range, str, bytes) pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_zero_one_arg("set", vm.heap)?; let set = match value { None => Self::new(), Some(v) => Self::from_iterable(v, vm)?, }; let heap_id = vm.heap.allocate(HeapData::Set(set))?; Ok(Value::Ref(heap_id)) } /// Creates a set from a MontyIter, adding elements one by one. /// /// Unlike list/tuple which can just collect into a Vec, sets need to add /// each element individually to handle duplicates and compute hashes. fn from_iterator(iter: MontyIter, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { defer_drop_mut!(iter, vm); let mut set = Self::with_capacity(iter.size_hint(vm.heap)); while let Some(item) = iter.for_next(vm)? { set.add(item, vm)?; } Ok(set) } /// Creates a set from an iterable value. /// /// This is a convenience method used by helper methods that need to convert /// arbitrary iterables to sets. It uses `MontyIter` internally. fn from_iterable(iterable: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let iter = MontyIter::new(iterable, vm)?; let set = Self::from_iterator(iter, vm)?; Ok(set) } } impl DropWithHeap for Set { fn drop_with_heap(self, heap: &mut H) { self.0.drop_with_heap(heap); } } impl DropWithHeap for SetStorage { fn drop_with_heap(self, heap: &mut H) { self.entries.drop_with_heap(heap); } } impl DropWithHeap for FrozenSet { fn drop_with_heap(self, heap: &mut H) { self.0.drop_with_heap(heap); } } impl DropWithHeap for SetEntry { fn drop_with_heap(self, heap: &mut H) { self.value.drop_with_heap(heap); } } impl PyTrait for Set { fn py_type(&self, _heap: &Heap) -> Type { Type::Set } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.len()) } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.len() != other.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); self.0.eq(&other.0, vm) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { self.0.repr_fmt(f, vm, heap_ids, "set") } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let value = match attr.static_string() { Some(StaticStrings::Add) => { let value = args.get_one_arg("set.add", vm.heap)?; self.add(value, vm)?; Ok(Value::None) } Some(StaticStrings::Remove) => { let value = args.get_one_arg("set.remove", vm.heap)?; defer_drop!(value, vm); self.remove(value, vm)?; Ok(Value::None) } Some(StaticStrings::Discard) => { let value = args.get_one_arg("set.discard", vm.heap)?; defer_drop!(value, vm); self.discard(value, vm)?; Ok(Value::None) } Some(StaticStrings::Pop) => { args.check_zero_args("set.pop", vm.heap)?; self.pop() } Some(StaticStrings::Clear) => { args.check_zero_args("set.clear", vm.heap)?; self.clear(vm.heap); Ok(Value::None) } Some(StaticStrings::Copy) => { args.check_zero_args("set.copy", vm.heap)?; let copy = self.copy(vm.heap); let heap_id = vm.heap.allocate(HeapData::Set(copy))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Update) => { let other = args.get_one_arg("set.update", vm.heap)?; self.update_from_value(other, vm)?; Ok(Value::None) } Some(StaticStrings::Union) => { let other = args.get_one_arg("set.union", vm.heap)?; let result = self.union_from_value(other, vm)?; let heap_id = vm.heap.allocate(HeapData::Set(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Intersection) => { let other = args.get_one_arg("set.intersection", vm.heap)?; let result = self.intersection_from_value(other, vm)?; let heap_id = vm.heap.allocate(HeapData::Set(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Difference) => { let other = args.get_one_arg("set.difference", vm.heap)?; let result = self.difference_from_value(other, vm)?; let heap_id = vm.heap.allocate(HeapData::Set(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::SymmetricDifference) => { let other = args.get_one_arg("set.symmetric_difference", vm.heap)?; let result = self.symmetric_difference_from_value(other, vm)?; let heap_id = vm.heap.allocate(HeapData::Set(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Issubset) => { let other = args.get_one_arg("set.issubset", vm.heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.issubset_from_value(other, vm)?)) } Some(StaticStrings::Issuperset) => { let other = args.get_one_arg("set.issuperset", vm.heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.issuperset_from_value(other, vm)?)) } Some(StaticStrings::Isdisjoint) => { let other = args.get_one_arg("set.isdisjoint", vm.heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.isdisjoint_from_value(other, vm)?)) } _ => { args.drop_with_heap(vm); return Err(ExcType::attribute_error(Type::Set, attr.as_str(vm.interns))); } }; value.map(CallResult::Value) } fn py_sub( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { // This is called from heap.rs with two Sets // We need interns for contains check, but py_sub doesn't have it // This is a limitation - we'll need to handle this differently // For now, return None to indicate not supported via this path Ok(None) } } /// Pure set/frozenset binary operators shared by both concrete container types. #[derive(Debug, Clone, Copy)] pub(crate) enum SetBinaryOp { And, Or, Xor, Sub, } /// Helper methods for set operations with arbitrary iterables. impl Set { /// Implements operator-form set algebra, which only accepts set/frozenset operands. /// /// Unlike method forms such as `set.union(iterable)`, the binary operators /// `& | ^ -` are intentionally strict and return `None` for operands outside /// the set-like values CPython accepts here (`set`, `frozenset`, /// `dict_keys`, and `dict_items`) so the VM can raise the standard /// unsupported-operands `TypeError`. pub(crate) fn binary_op_value( &self, other: &Value, op: SetBinaryOp, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { let Some(other_storage) = get_storage_from_set_operand(other, vm)? else { return Ok(None); }; defer_drop!(other_storage, vm); let result = match op { SetBinaryOp::And => Self(self.0.intersection(other_storage, vm)?), SetBinaryOp::Or => Self(self.0.union(other_storage, vm)?), SetBinaryOp::Xor => Self(self.0.symmetric_difference(other_storage, vm)?), SetBinaryOp::Sub => Self(self.0.difference(other_storage, vm)?), }; Ok(Some(result)) } /// Updates this set with elements from an iterable value. fn update_from_value(&mut self, other: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { let heap = &mut *vm.heap; // Try to get entries from a Set/FrozenSet directly let entries_opt = match &other { Value::Ref(id) => match heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { other.drop_with_heap(heap); for (value, _hash) in entries { self.add(value, vm)?; } return Ok(()); } // Fall back to creating a temporary set from the iterable let temp_set = Self::from_iterable(other, vm)?; defer_drop!(temp_set, vm); self.0.update(&temp_set.0, vm)?; Ok(()) } /// Returns a new set with elements from both this set and an iterable. fn union_from_value(&self, other: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let other_storage = Self::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result_storage = self.0.union(other_storage, vm)?; Ok(Self(result_storage)) } /// Returns a new set with elements common to both this set and an iterable. fn intersection_from_value(&self, other: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let other_storage = Self::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result_storage = self.0.intersection(other_storage, vm)?; Ok(Self(result_storage)) } /// Returns a new set with elements in this set but not in an iterable. fn difference_from_value(&self, other: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let other_storage = Self::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result_storage = self.0.difference(other_storage, vm)?; Ok(Self(result_storage)) } /// Returns a new set with elements in either set but not both. fn symmetric_difference_from_value( &self, other: Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let other_storage = Self::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result_storage = self.0.symmetric_difference(other_storage, vm)?; Ok(Self(result_storage)) } /// Checks if this set is a subset of an iterable. fn issubset_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_subset(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Self::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_subset(&temp.0, vm) } /// Checks if this set is a superset of an iterable. fn issuperset_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_superset(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Self::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_superset(&temp.0, vm) } /// Checks if this set has no elements in common with an iterable. fn isdisjoint_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_disjoint(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Self::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_disjoint(&temp.0, vm) } /// Helper to get SetStorage from a Value (either directly or by conversion). fn get_storage_from_value(value: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match &value { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(set) => Some(set.0.clone_entries(vm.heap)), HeapData::FrozenSet(set) => Some(set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { value.drop_with_heap(vm); return Ok(SetStorage::from_entries(entries)); } // Convert iterable to set let temp_set = Self::from_iterable(value, vm)?; Ok(temp_set.0) } } impl HeapItem for Set { fn py_estimate_size(&self) -> usize { self.0.estimate_size() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.0.collect_dec_ref_ids(stack); } } /// Python frozenset type - immutable, unordered collection of unique hashable elements. /// /// FrozenSets support the same set algebra operations as sets (union, intersection, /// difference, symmetric difference) but are immutable and therefore hashable. /// /// # Hashability /// Unlike mutable sets, frozensets can be used as dict keys or set elements because /// they are immutable. The hash is computed as the XOR of element hashes (order-independent). #[derive(Debug, Default)] pub(crate) struct FrozenSet(SetStorage); impl FrozenSet { /// Creates a new empty frozenset. #[must_use] pub fn new() -> Self { Self(SetStorage::new()) } /// Returns the number of elements in the frozenset. #[must_use] pub fn len(&self) -> usize { self.0.len() } /// Returns true if the frozenset is empty. #[must_use] pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Returns whether this frozenset contains any heap references (`Value::Ref`). /// /// Used during allocation to determine if this container could create cycles. #[inline] #[must_use] pub fn has_refs(&self) -> bool { self.0.has_refs() } /// Returns a shallow copy of the frozenset. #[must_use] pub fn copy(&self, heap: &mut Heap) -> Self { Self(self.0.clone_with_heap(heap)) } /// Returns the internal storage. pub(crate) fn storage(&self) -> &SetStorage { &self.0 } /// Checks if the frozenset contains a value. pub fn contains(&self, value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { self.0.contains(value, vm) } /// Computes the hash of this frozenset. /// /// The hash is the XOR of all element hashes, making it order-independent. /// Checks recursion depth before recursing into element hashes. pub fn compute_hash( &self, heap: &mut Heap, interns: &Interns, ) -> Result, ResourceError> { let token = heap.incr_recursion_depth()?; defer_drop!(token, heap); let mut hash: u64 = 0; for entry in &self.0.entries { // All elements must be hashable (enforced at construction) match entry.value.py_hash(heap, interns)? { Some(h) => hash ^= h, None => return Ok(None), } } Ok(Some(hash)) } /// Creates a frozenset from a Set, consuming the Set's storage. /// /// This is used when we need to convert a mutable set to an immutable frozenset /// without cloning. pub fn from_set(set: Set) -> Self { Self(set.0) } /// Creates a frozenset from the `frozenset()` constructor call. /// /// - `frozenset()` with no args returns an empty frozenset /// - `frozenset(iterable)` creates a frozenset from any iterable (list, tuple, set, dict, range, str, bytes) pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_zero_one_arg("frozenset", vm.heap)?; let frozenset = match value { None => Self::new(), Some(v) => Self::from_set(Set::from_iterable(v, vm)?), }; let heap_id = vm.heap.allocate(HeapData::FrozenSet(frozenset))?; Ok(Value::Ref(heap_id)) } /// Returns a new frozenset with elements from both this and another set. pub(crate) fn union(&self, other: &SetStorage, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { Ok(Self(self.0.union(other, vm)?)) } /// Returns a new frozenset with elements common to both sets. pub(crate) fn intersection( &self, other: &SetStorage, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { Ok(Self(self.0.intersection(other, vm)?)) } /// Returns a new frozenset with elements in this set but not in other. pub(crate) fn difference(&self, other: &SetStorage, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { Ok(Self(self.0.difference(other, vm)?)) } /// Returns a new frozenset with elements in either set but not both. pub(crate) fn symmetric_difference( &self, other: &SetStorage, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { Ok(Self(self.0.symmetric_difference(other, vm)?)) } } impl PyTrait for FrozenSet { fn py_type(&self, _heap: &Heap) -> Type { Type::FrozenSet } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.len()) } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.len() != other.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); self.0.eq(&other.0, vm) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { self.0.repr_fmt(f, vm, heap_ids, "frozenset") } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let heap = &mut *vm.heap; let interns = vm.interns; let value = match attr.static_string() { Some(StaticStrings::Copy) => { args.check_zero_args("frozenset.copy", heap)?; let copy = self.copy(heap); let heap_id = heap.allocate(HeapData::FrozenSet(copy))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Union) => { let other = args.get_one_arg("frozenset.union", heap)?; let other_storage = Set::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result = self.union(other_storage, vm)?; let heap_id = vm.heap.allocate(HeapData::FrozenSet(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Intersection) => { let other = args.get_one_arg("frozenset.intersection", heap)?; let other_storage = Set::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result = self.intersection(other_storage, vm)?; let heap_id = vm.heap.allocate(HeapData::FrozenSet(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Difference) => { let other = args.get_one_arg("frozenset.difference", heap)?; let other_storage = Set::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result = self.difference(other_storage, vm)?; let heap_id = vm.heap.allocate(HeapData::FrozenSet(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::SymmetricDifference) => { let other = args.get_one_arg("frozenset.symmetric_difference", heap)?; let other_storage = Set::get_storage_from_value(other, vm)?; defer_drop!(other_storage, vm); let result = self.symmetric_difference(other_storage, vm)?; let heap_id = vm.heap.allocate(HeapData::FrozenSet(result))?; Ok(Value::Ref(heap_id)) } Some(StaticStrings::Issubset) => { let other = args.get_one_arg("frozenset.issubset", heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.issubset_from_value(other, vm)?)) } Some(StaticStrings::Issuperset) => { let other = args.get_one_arg("frozenset.issuperset", heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.issuperset_from_value(other, vm)?)) } Some(StaticStrings::Isdisjoint) => { let other = args.get_one_arg("frozenset.isdisjoint", heap)?; defer_drop!(other, vm); Ok(Value::Bool(self.isdisjoint_from_value(other, vm)?)) } _ => { args.drop_with_heap(heap); return Err(ExcType::attribute_error(Type::FrozenSet, attr.as_str(interns))); } }; value.map(CallResult::Value) } fn py_sub( &self, _other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { // Same limitation as Set - needs interns Ok(None) } } impl HeapItem for FrozenSet { fn py_estimate_size(&self) -> usize { self.0.estimate_size() } fn py_dec_ref_ids(&mut self, stack: &mut Vec) { self.0.collect_dec_ref_ids(stack); } } /// Helper methods for frozenset operations with arbitrary iterables. impl FrozenSet { /// Implements operator-form set algebra, which only accepts set/frozenset operands. /// /// CPython returns the type of the left operand for pure set/frozenset binary /// operators, so this helper keeps the result as `frozenset` even when the /// right operand is a mutable `set`. Like `set`, the accepted right-hand /// side includes CPython's set-like dict views. pub(crate) fn binary_op_value( &self, other: &Value, op: SetBinaryOp, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { let Some(other_storage) = get_storage_from_set_operand(other, vm)? else { return Ok(None); }; defer_drop!(other_storage, vm); let result = match op { SetBinaryOp::And => Self(self.0.intersection(other_storage, vm)?), SetBinaryOp::Or => Self(self.0.union(other_storage, vm)?), SetBinaryOp::Xor => Self(self.0.symmetric_difference(other_storage, vm)?), SetBinaryOp::Sub => Self(self.0.difference(other_storage, vm)?), }; Ok(Some(result)) } /// Checks if this frozenset is a subset of an iterable. fn issubset_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { // Build temporary storage and check let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_subset(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Set::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_subset(&temp.0, vm) } /// Checks if this frozenset is a superset of an iterable. fn issuperset_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { // Build temporary storage and check let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_superset(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Set::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_superset(&temp.0, vm) } /// Checks if this frozenset has no elements in common with an iterable. fn isdisjoint_from_value(&self, other: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Try to get entries from a Set/FrozenSet directly let entries_opt = match other { Value::Ref(id) => match vm.heap.get(*id) { HeapData::Set(other_set) => Some(other_set.0.clone_entries(vm.heap)), HeapData::FrozenSet(other_set) => Some(other_set.0.clone_entries(vm.heap)), _ => None, }, _ => None, }; if let Some(entries) = entries_opt { // Build temporary storage and check let other_storage = SetStorage::from_entries(entries); defer_drop!(other_storage, vm); return self.0.is_disjoint(other_storage, vm); } // Handle all other iterables (list, tuple, range, str, bytes, dict, etc.) let temp = Set::from_iterable(other.clone_with_heap(vm), vm)?; defer_drop!(temp, vm); self.0.is_disjoint(&temp.0, vm) } } /// Returns temporary set storage only for operator-valid set operands. /// /// This is stricter than `Set::get_storage_from_value(...)`: operator forms /// only accept CPython's set-like operands (`set`, `frozenset`, `dict_keys`, /// and `dict_items`), while method forms accept any iterable. fn get_storage_from_set_operand( value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { let Value::Ref(id) = value else { return Ok(None); }; match vm.heap.get(*id) { HeapData::Set(set) => Ok(Some(SetStorage::from_entries(set.0.clone_entries(vm.heap)))), HeapData::FrozenSet(set) => Ok(Some(SetStorage::from_entries(set.0.clone_entries(vm.heap)))), // Dict views are `Copy` — matched value is not borrowed from the heap, // so `to_set` can take `&mut VM` below without conflict. HeapData::DictKeysView(view) => { let Set(storage) = view.to_set(vm)?; Ok(Some(storage)) } HeapData::DictItemsView(view) => { let Set(storage) = view.to_set(vm)?; Ok(Some(storage)) } _ => Ok(None), } } // Custom serde implementations for SetStorage, Set, and FrozenSet. // Only serialize entries; rebuild the indices hash table on deserialize. impl serde::Serialize for SetStorage { fn serialize(&self, serializer: S) -> Result { self.entries.serialize(serializer) } } impl<'de> serde::Deserialize<'de> for SetStorage { fn deserialize>(deserializer: D) -> Result { let entries: Vec = serde::Deserialize::deserialize(deserializer)?; // Rebuild the indices hash table from the entries let mut indices = HashTable::with_capacity(entries.len()); for (idx, entry) in entries.iter().enumerate() { indices.insert_unique(entry.hash, idx, |&i| entries[i].hash); } Ok(Self { indices, entries }) } } impl serde::Serialize for Set { fn serialize(&self, serializer: S) -> Result { self.0.serialize(serializer) } } impl<'de> serde::Deserialize<'de> for Set { fn deserialize>(deserializer: D) -> Result { Ok(Self(SetStorage::deserialize(deserializer)?)) } } impl serde::Serialize for FrozenSet { fn serialize(&self, serializer: S) -> Result { self.0.serialize(serializer) } } impl<'de> serde::Deserialize<'de> for FrozenSet { fn deserialize>(deserializer: D) -> Result { Ok(Self(SetStorage::deserialize(deserializer)?)) } } ================================================ FILE: crates/monty/src/types/slice.rs ================================================ //! Python slice type implementation. //! //! Provides a slice object representing start:stop:step indices for sequence slicing. //! Each field is optional (None in Python), where None means "use the default for that field". use std::fmt::Write; use ahash::AHashSet; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{Heap, HeapData, HeapId, HeapItem}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::{PyTrait, Type}, value::{EitherStr, Value}, }; /// Python slice object representing start:stop:step indices. /// /// Each field is `Option` where `None` corresponds to Python's `None`, /// meaning "use the default value for this field based on context". /// /// When indexing a sequence of length `n`: /// - `start` defaults to 0 (or n-1 if step < 0) /// - `stop` defaults to n (or -1 sentinel meaning "before index 0" if step < 0) /// - `step` defaults to 1 /// /// The `indices(length)` method computes concrete indices from these optional values. #[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct Slice { pub start: Option, pub stop: Option, pub step: Option, } impl Slice { /// Creates a new slice with the given start, stop, and step values. #[must_use] pub fn new(start: Option, stop: Option, step: Option) -> Self { Self { start, stop, step } } /// Creates a slice from the `slice()` constructor call. /// /// Supports: /// - `slice(stop)` - slice with only stop (start=None, step=None) /// - `slice(start, stop)` - slice with start and stop (step=None) /// - `slice(start, stop, step)` - slice with all three components /// /// Each argument can be None to indicate "use default". pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let heap = &mut *vm.heap; let pos_args = args.into_pos_only("slice", heap)?; defer_drop!(pos_args, heap); let slice = match pos_args.as_slice() { [] => return Err(ExcType::type_error_at_least("slice", 1, 0)), [first_arg] => { let stop = value_to_option_i64(first_arg)?; Self::new(None, stop, None) } [first_arg, second_arg] => { let start = value_to_option_i64(first_arg)?; let stop = value_to_option_i64(second_arg)?; Self::new(start, stop, None) } [first_arg, second_arg, third_arg] => { let start = value_to_option_i64(first_arg)?; let stop = value_to_option_i64(second_arg)?; let step = value_to_option_i64(third_arg)?; Self::new(start, stop, step) } _ => return Err(ExcType::type_error_at_most("slice", 3, pos_args.len())), }; Ok(Value::Ref(heap.allocate(HeapData::Slice(slice))?)) } /// Computes concrete indices for a sequence of the given length. /// /// This implements Python's `slice.indices(length)` semantics: /// - Handles negative indices (wrapping from the end) /// - Clamps indices to valid range [0, length] /// - Returns the step direction correctly for negative steps /// /// Returns `(start, stop, step)` as concrete values ready for iteration. /// Returns `Err(())` if step is 0 (invalid). /// /// # Algorithm /// For positive step: /// - start defaults to 0, stop defaults to length /// - Both are clamped to [0, length] /// /// For negative step: /// - start defaults to length-1, stop defaults to -1 (before beginning) /// - start is clamped to [-1, length-1], stop to [-1, length-1] pub fn indices(&self, length: usize) -> Result<(usize, usize, i64), ()> { let step = self.step.unwrap_or(1); if step == 0 { return Err(()); } let len = i64::try_from(length).unwrap_or(i64::MAX); if step > 0 { // Positive step: iterate forward let default_start = 0; let default_stop = len; let start = self.start.map_or(default_start, |s| normalize_index(s, len, 0, len)); let stop = self.stop.map_or(default_stop, |s| normalize_index(s, len, 0, len)); // Convert to usize, clamping to valid range let start_usize = usize::try_from(start.max(0)).unwrap_or(0); let stop_usize = usize::try_from(stop.max(0)).unwrap_or(0).min(length); Ok((start_usize, stop_usize, step)) } else { // Negative step: iterate backward // For negative step, we need different handling let default_start = len - 1; let default_stop = -1; // Before the beginning let start = self .start .map_or(default_start, |s| normalize_index(s, len, -1, len - 1)); let stop = self.stop.map_or(default_stop, |s| normalize_index(s, len, -1, len - 1)); // The start can be at most len-1 let start_i64 = start.min(len - 1); let stop_i64 = stop; // can be -1 to mean "go all the way to beginning" // If start normalizes to < 0, it means the starting position is before index 0. // For negative step iteration, this produces an empty slice. // Return (0, 0, step) which makes the iteration condition `0 > 0` false immediately. if start_i64 < 0 { return Ok((0, 0, step)); } let start_usize = usize::try_from(start_i64).unwrap_or(0); // For stop, we encode it specially: if stop is -1, it means "stop before index 0" // We'll use length + 1 as a sentinel to indicate "stop was None or evaluates to before 0" let stop_usize = if stop_i64 < 0 { length + 1 // sentinel value meaning "go all the way to the beginning" } else { usize::try_from(stop_i64).unwrap_or(0) }; Ok((start_usize, stop_usize, step)) } } } /// Converts a Value to Option, treating None as None. /// /// Used for slice construction from both `slice()` builtin and `[start:stop:step]` syntax. /// Returns Ok(None) for Value::None, Ok(Some(i)) for integers/bools, /// or Err(TypeError) for other types. pub(crate) fn value_to_option_i64(value: &Value) -> RunResult> { match value { Value::None => Ok(None), Value::Int(i) => Ok(Some(*i)), Value::Bool(b) => Ok(Some(i64::from(*b))), _ => Err(ExcType::type_error_slice_indices()), } } /// Normalizes a slice index for a sequence of the given length. /// /// Handles negative indices (counting from end) and clamps to [lower, upper]. fn normalize_index(index: i64, length: i64, lower: i64, upper: i64) -> i64 { let normalized = if index < 0 { index + length } else { index }; normalized.clamp(lower, upper) } impl PyTrait for Slice { fn py_type(&self, _heap: &Heap) -> Type { Type::Slice } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { // Slices don't have a length in Python None } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Ok(self.start == other.start && self.stop == other.stop && self.step == other.step) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { // Slices are always truthy in Python true } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> std::fmt::Result { f.write_str("slice(")?; format_option_i64(f, self.start)?; f.write_str(", ")?; format_option_i64(f, self.stop)?; f.write_str(", ")?; format_option_i64(f, self.step)?; f.write_char(')') } fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { // Fast path: interned strings can be matched by ID without string comparison if let Some(ss) = attr.static_string() { return match ss { StaticStrings::Start => Ok(Some(CallResult::Value(option_i64_to_value(self.start)))), StaticStrings::Stop => Ok(Some(CallResult::Value(option_i64_to_value(self.stop)))), StaticStrings::Step => Ok(Some(CallResult::Value(option_i64_to_value(self.step)))), _ => Ok(None), }; } // Slow path: heap-allocated strings need string comparison match attr.as_str(vm.interns) { "start" => Ok(Some(CallResult::Value(option_i64_to_value(self.start)))), "stop" => Ok(Some(CallResult::Value(option_i64_to_value(self.stop)))), "step" => Ok(Some(CallResult::Value(option_i64_to_value(self.step)))), _ => Ok(None), } } } impl HeapItem for Slice { fn py_estimate_size(&self) -> usize { std::mem::size_of::() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // Slice doesn't contain heap references, nothing to do } } /// Converts an Option to a Value (None or Int). pub(crate) fn option_i64_to_value(opt: Option) -> Value { match opt { Some(i) => Value::Int(i), None => Value::None, } } /// Formats an Option for repr output (None or the integer). fn format_option_i64(f: &mut impl Write, value: Option) -> std::fmt::Result { match value { Some(i) => write!(f, "{i}"), None => f.write_str("None"), } } ================================================ FILE: crates/monty/src/types/str.rs ================================================ /// Python string type, wrapping a Rust `String`. /// /// This type provides Python string semantics. Currently supports basic /// operations like length and equality comparison. use std::{borrow::Cow, fmt}; use std::{cmp::Ordering, fmt::Write}; use ahash::AHashSet; use smallvec::smallvec; use super::{Bytes, MontyIter, PyTrait}; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, defer_drop_mut, exception_private::{ExcType, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapGuard, HeapId, HeapItem}, intern::{StaticStrings, StringId}, resource::{ResourceError, ResourceTracker, check_repeat_size, check_replace_size}, types::Type, value::{EitherStr, Value}, }; /// Python string value stored on the heap. /// /// Wraps a Rust `String` and provides Python-compatible operations. /// `len()` returns the number of Unicode codepoints (characters), matching Python semantics. #[derive(Debug, Clone, PartialEq, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct Str(Box); impl Str { /// Creates a new Str from a Rust String. #[must_use] pub fn new(s: String) -> Self { Self(s.into()) } /// Returns a reference to the inner string. #[must_use] pub fn as_str(&self) -> &str { &self.0 } /// Creates a string from the `str()` constructor call. /// /// - `str()` with no args returns an empty string /// - `str(x)` converts x to its string representation using `py_str` pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_zero_one_arg("str", vm.heap)?; match value { None => Ok(Value::InternString(StaticStrings::EmptyString.into())), Some(v) => { defer_drop!(v, vm); let s = v.py_str(vm).into_owned(); allocate_string(s, vm.heap) } } } /// Handles slice-based indexing for strings. /// /// Returns a new string containing the selected characters (Unicode-aware). fn getitem_slice(&self, slice: &crate::types::Slice, heap: &mut Heap) -> RunResult { let char_count = self.0.chars().count(); let (start, stop, step) = slice .indices(char_count) .map_err(|()| ExcType::value_error_slice_step_zero())?; let result_str = get_str_slice(&self.0, start, stop, step); let heap_id = heap.allocate(HeapData::Str(Self::from(result_str)))?; Ok(Value::Ref(heap_id)) } } impl From for Str { fn from(s: String) -> Self { Self(s.into()) } } impl From<&str> for Str { fn from(s: &str) -> Self { Self(s.into()) } } impl From for String { fn from(value: Str) -> Self { value.0.into_string() } } /// Allocates a string, using interned versions when possible. /// /// Optimizations: /// - Empty strings return the pre-interned `StaticStrings::EmptyString` /// - Single ASCII characters return pre-interned ASCII strings /// - Other strings are allocated on the heap /// /// This avoids heap allocation for common cases like results from `strip()`, /// `split()`, string iteration, etc. pub fn allocate_string(s: String, heap: &mut Heap) -> RunResult { match s.len() { 0 => Ok(Value::InternString(StaticStrings::EmptyString.into())), 1 => { // Single byte means single ASCII character let byte = s.as_bytes()[0]; Ok(Value::InternString(StringId::from_ascii(byte))) } _ => { let heap_id = heap.allocate(HeapData::Str(Str::new(s)))?; Ok(Value::Ref(heap_id)) } } } /// Allocates a single character as a string value. /// /// ASCII characters use pre-interned strings for efficiency. /// Non-ASCII characters are allocated on the heap. /// /// This is used by string iteration and `chr()` builtin. pub fn allocate_char(c: char, heap: &mut Heap) -> Result { if c.is_ascii() { Ok(Value::InternString(StringId::from_ascii(c as u8))) } else { let heap_id = heap.allocate(HeapData::Str(Str::new(c.to_string())))?; Ok(Value::Ref(heap_id)) } } /// Gets the character at a given index in a string, handling negative indices. /// /// Returns `None` if the index is out of bounds. This uses a single-pass scan /// to avoid allocating a `Vec`. /// /// Negative indices count from the end: -1 is the last character. pub fn get_char_at_index(s: &str, index: i64) -> Option { let char_count = s.chars().count(); let len = i64::try_from(char_count).ok()?; let normalized = if index < 0 { index + len } else { index }; if normalized < 0 || normalized >= len { return None; } let idx = usize::try_from(normalized).ok()?; s.chars().nth(idx) } /// Extracts a slice of a string (Unicode-aware). /// /// Handles both positive and negative step values. For negative step, /// iterates backward from start down to (but not including) stop. /// The `stop` parameter uses a sentinel value of `len + 1` for negative /// step to indicate "go to the beginning". /// /// Note: step must be non-zero (callers should validate this via `slice.indices()`). pub(crate) fn get_str_slice(s: &str, start: usize, stop: usize, step: i64) -> String { let chars: Vec = s.chars().collect(); let mut result = String::new(); // try_from succeeds for non-negative step; step==0 rejected upstream by slice.indices() if let Ok(step_usize) = usize::try_from(step) { // Positive step: iterate forward let mut i = start; while i < stop && i < chars.len() { result.push(chars[i]); i += step_usize; } } else { // Negative step: iterate backward // start is the highest index, stop is the sentinel // stop > chars.len() means "go to the beginning" let step_abs = usize::try_from(-step).expect("step is negative so -step is positive"); let step_abs_i64 = i64::try_from(step_abs).expect("step magnitude fits in i64"); let mut i = i64::try_from(start).expect("start index fits in i64"); // stop > chars.len() is sentinel meaning "go to beginning", use -1 let stop_i64 = if stop > chars.len() { -1 } else { i64::try_from(stop).expect("stop bounded by chars.len() fits in i64") }; while let Ok(i_usize) = usize::try_from(i) { if i_usize >= chars.len() || i <= stop_i64 { break; } result.push(chars[i_usize]); i -= step_abs_i64; } } result } impl std::ops::Deref for Str { type Target = str; fn deref(&self) -> &Self::Target { &self.0 } } impl PyTrait for Str { fn py_type(&self, _heap: &Heap) -> Type { Type::Str } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { // Count Unicode characters, not bytes, to match Python semantics Some(self.0.chars().count()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let heap = &mut *vm.heap; // Check for slice first (Value::Ref pointing to HeapData::Slice) if let Value::Ref(id) = key && let HeapData::Slice(slice) = heap.get(*id) { // Clone the slice to release the borrow on heap before calling getitem_slice let slice = slice.clone(); return self.getitem_slice(&slice, heap); } // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt let index = key.as_index(heap, Type::Str)?; // Use single-pass indexing to avoid Vec allocation let c = get_char_at_index(&self.0, index).ok_or_else(ExcType::str_index_error)?; Ok(allocate_char(c, heap)?) } fn py_eq(&self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { Ok(self.0 == other.0) } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.0.is_empty() } fn py_cmp( &self, other: &Self, _vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { Ok(Some(self.0.cmp(&other.0))) } fn py_repr_fmt( &self, f: &mut impl Write, _vm: &VM<'_, '_, impl ResourceTracker>, _heap_ids: &mut AHashSet, ) -> fmt::Result { string_repr_fmt(&self.0, f) } fn py_str(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Cow<'static, str> { self.0.clone().into_string().into() } fn py_add( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { let result = format!("{}{}", self.0, other.0); let id = vm.heap.allocate(HeapData::Str(result.into()))?; Ok(Some(Value::Ref(id))) } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { let args_guard = HeapGuard::new(args, vm.heap); let Some(method) = attr.static_string() else { return Err(ExcType::attribute_error(Type::Str, attr.as_str(vm.interns))); }; let args = args_guard.into_inner(); call_str_method_impl(&self.0, method, args, vm).map(CallResult::Value) } } impl HeapItem for Str { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.0.len() } fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { // No-op: strings don't hold Value references } } /// Dispatches a method call on a string value by method name. /// /// This is the entry point for string method calls from the VM on interned strings. /// Converts the `StringId` to `StaticStrings` and delegates to `call_str_method_impl`. pub fn call_str_method( s: &str, method_id: StringId, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { let args_guard = HeapGuard::new(args, vm.heap); let Some(method) = StaticStrings::from_string_id(method_id) else { return Err(ExcType::attribute_error(Type::Str, vm.interns.get_str(method_id))); }; let args = args_guard.into_inner(); call_str_method_impl(s, method, args, vm) } /// Dispatches a method call on a string value. /// /// This is the unified implementation for string method calls, used by both: /// - `Str::py_call_attr()` for heap-allocated strings /// - `call_str_method()` for interned string literals from the VM /// /// # Not Yet Implemented /// /// The following Python string methods are not yet implemented: /// /// - `format()` - Requires implementing the format spec mini-language (PEP 3101), /// which is complex and involves parsing format specifications like `{:>10.2f}`. /// - `format_map(mapping)` - Similar to `format()` but takes a mapping; depends on /// `format()` implementation. /// - `maketrans()` / `translate()` - Character translation tables; moderate complexity, /// requires building and applying Unicode translation maps. /// - `expandtabs(tabsize=8)` - Tab expansion; simple but rarely used in practice. /// - `isprintable()` - Checks if all characters are printable; requires accurate Unicode /// category data for the "printable" property. fn call_str_method_impl( s: &str, method: StaticStrings, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { match method { // Simple transformations (no arguments) StaticStrings::Lower => { args.check_zero_args("str.lower", vm.heap)?; str_lower(s, vm) } StaticStrings::Upper => { args.check_zero_args("str.upper", vm.heap)?; str_upper(s, vm) } StaticStrings::Capitalize => { args.check_zero_args("str.capitalize", vm.heap)?; str_capitalize(s, vm) } StaticStrings::Title => { args.check_zero_args("str.title", vm.heap)?; str_title(s, vm) } StaticStrings::Swapcase => { args.check_zero_args("str.swapcase", vm.heap)?; str_swapcase(s, vm) } StaticStrings::Casefold => { args.check_zero_args("str.casefold", vm.heap)?; str_casefold(s, vm) } // Predicate methods (no arguments, return bool) StaticStrings::Isalpha => { args.check_zero_args("str.isalpha", vm.heap)?; Ok(Value::Bool(str_isalpha(s))) } StaticStrings::Isdigit => { args.check_zero_args("str.isdigit", vm.heap)?; Ok(Value::Bool(str_isdigit(s))) } StaticStrings::Isalnum => { args.check_zero_args("str.isalnum", vm.heap)?; Ok(Value::Bool(str_isalnum(s))) } StaticStrings::Isnumeric => { args.check_zero_args("str.isnumeric", vm.heap)?; Ok(Value::Bool(str_isnumeric(s))) } StaticStrings::Isspace => { args.check_zero_args("str.isspace", vm.heap)?; Ok(Value::Bool(str_isspace(s))) } StaticStrings::Islower => { args.check_zero_args("str.islower", vm.heap)?; Ok(Value::Bool(str_islower(s))) } StaticStrings::Isupper => { args.check_zero_args("str.isupper", vm.heap)?; Ok(Value::Bool(str_isupper(s))) } StaticStrings::Isascii => { args.check_zero_args("str.isascii", vm.heap)?; Ok(Value::Bool(s.is_ascii())) } StaticStrings::Isdecimal => { args.check_zero_args("str.isdecimal", vm.heap)?; Ok(Value::Bool(str_isdecimal(s))) } // Search methods StaticStrings::Find => str_find(s, args, vm), StaticStrings::Rfind => str_rfind(s, args, vm), StaticStrings::Index => str_index(s, args, vm), StaticStrings::Rindex => str_rindex(s, args, vm), StaticStrings::Count => str_count(s, args, vm), StaticStrings::Startswith => str_startswith(s, args, vm), StaticStrings::Endswith => str_endswith(s, args, vm), // Strip/trim methods StaticStrings::Strip => str_strip(s, args, vm), StaticStrings::Lstrip => str_lstrip(s, args, vm), StaticStrings::Rstrip => str_rstrip(s, args, vm), StaticStrings::Removeprefix => str_removeprefix(s, args, vm), StaticStrings::Removesuffix => str_removesuffix(s, args, vm), // Split methods StaticStrings::Split => str_split(s, args, vm), StaticStrings::Rsplit => str_rsplit(s, args, vm), StaticStrings::Splitlines => str_splitlines(s, args, vm), StaticStrings::Partition => str_partition(s, args, vm), StaticStrings::Rpartition => str_rpartition(s, args, vm), // Replace/modify methods StaticStrings::Replace => str_replace(s, args, vm), StaticStrings::Center => str_center(s, args, vm), StaticStrings::Ljust => str_ljust(s, args, vm), StaticStrings::Rjust => str_rjust(s, args, vm), StaticStrings::Zfill => str_zfill(s, args, vm), // Additional methods StaticStrings::Encode => str_encode(s, args, vm), StaticStrings::Isidentifier => { args.check_zero_args("str.isidentifier", vm.heap)?; Ok(Value::Bool(str_isidentifier(s))) } StaticStrings::Istitle => { args.check_zero_args("str.istitle", vm.heap)?; Ok(Value::Bool(str_istitle(s))) } // Existing method StaticStrings::Join => { let iterable = args.get_one_arg("str.join", vm.heap)?; str_join(s, iterable, vm) } _ => { args.drop_with_heap(vm.heap); Err(ExcType::attribute_error(Type::Str, method.into())) } } } /// Implements Python's `str.join(iterable)` method. /// /// Joins elements of the iterable with the separator string, returning /// a new heap-allocated string. Each element must be a string. /// /// # Arguments /// * `separator` - The separator string (e.g., "," for comma-separated) /// * `iterable` - The iterable containing string elements to join /// * `heap` - The heap for allocation and reference counting /// * `interns` - The interns table for resolving interned strings /// /// # Errors /// Returns `TypeError` if the argument is not iterable or if any element is not a string. fn str_join(separator: &str, iterable: Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Create MontyIter from the iterable, with join-specific error message let Ok(iter) = MontyIter::new(iterable, vm) else { return Err(ExcType::type_error_join_not_iterable()); }; defer_drop_mut!(iter, vm); // Build result string, tracking index for error messages let mut result = String::new(); let mut index = 0usize; while let Some(item) = iter.for_next(vm)? { defer_drop!(item, vm); if index > 0 { result.push_str(separator); } // Check item is a string and extract its content match item { Value::InternString(id) => { result.push_str(vm.interns.get_str(*id)); } Value::Ref(heap_id) => { if let HeapData::Str(s) = vm.heap.get(*heap_id) { result.push_str(s.as_str()); } else { let t = item.py_type(vm.heap); return Err(ExcType::type_error_join_item(index, t)); } } _ => { let t = item.py_type(vm.heap); return Err(ExcType::type_error_join_item(index, t)); } } index += 1; } // Allocate result (uses interned empty string if result is empty) allocate_string(result, vm.heap) } /// Writes a Python repr() string for a given string slice to a formatter. /// /// Chooses between single and double quotes based on the string content: /// - Uses double quotes if the string contains single quotes but not double quotes /// - Uses single quotes by default, escaping any contained single quotes /// /// Common escape sequences (backslash, newline, tab, carriage return) are always escaped. pub fn string_repr_fmt(s: &str, f: &mut impl Write) -> fmt::Result { // Check if the string contains single quotes but not double quotes if s.contains('\'') && !s.contains('"') { // Use double quotes if string contains only single quotes f.write_char('"')?; for c in s.chars() { match c { '\\' => f.write_str("\\\\")?, '\n' => f.write_str("\\n")?, '\t' => f.write_str("\\t")?, '\r' => f.write_str("\\r")?, _ => f.write_char(c)?, } } f.write_char('"') } else { // Use single quotes by default, escape any single quotes in the string f.write_char('\'')?; for c in s.chars() { match c { '\\' => f.write_str("\\\\")?, '\n' => f.write_str("\\n")?, '\t' => f.write_str("\\t")?, '\r' => f.write_str("\\r")?, '\'' => f.write_str("\\'")?, _ => f.write_char(c)?, } } f.write_char('\'') } } /// Formatter for a Python repr() string. #[derive(Debug)] pub struct StringRepr<'a>(pub &'a str); impl fmt::Display for StringRepr<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { string_repr_fmt(self.0, f) } } // ============================================================================= // Simple transformations (no arguments) // ============================================================================= /// Implements Python's `str.lower()` method. fn str_lower(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { allocate_string(s.to_lowercase(), vm.heap) } /// Implements Python's `str.upper()` method. fn str_upper(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { allocate_string(s.to_uppercase(), vm.heap) } /// Implements Python's `str.capitalize()` method. /// /// Returns a copy of the string with its first character capitalized and the rest lowercased. fn str_capitalize(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut chars = s.chars(); let result = match chars.next() { None => String::new(), Some(first) => { let mut result = first.to_uppercase().to_string(); for c in chars { result.extend(c.to_lowercase()); } result } }; allocate_string(result, vm.heap) } /// Implements Python's `str.title()` method. /// /// Returns a titlecased version of the string where words start with an uppercase /// character and the remaining characters are lowercase. fn str_title(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result = String::with_capacity(s.len()); let mut prev_is_cased = false; for c in s.chars() { if prev_is_cased { result.extend(c.to_lowercase()); } else { result.extend(c.to_uppercase()); } prev_is_cased = c.is_alphabetic(); } allocate_string(result, vm.heap) } /// Implements Python's `str.swapcase()` method. /// /// Returns a copy of the string with uppercase characters converted to lowercase and vice versa. fn str_swapcase(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let mut result = String::with_capacity(s.len()); for c in s.chars() { if c.is_uppercase() { result.extend(c.to_lowercase()); } else if c.is_lowercase() { result.extend(c.to_uppercase()); } else { result.push(c); } } allocate_string(result, vm.heap) } /// Implements Python's `str.casefold()` method. /// /// Returns a casefolded copy of the string. Casefolding is similar to lowercasing /// but more aggressive because it is intended for caseless string matching. fn str_casefold(s: &str, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { // Rust's to_lowercase() is equivalent to Unicode casefolding for most purposes allocate_string(s.to_lowercase(), vm.heap) } // ============================================================================= // Predicate methods (no arguments, return bool) // ============================================================================= /// Implements Python's `str.isalpha()` method. /// /// Returns True if all characters in the string are alphabetic and there is at least one character. fn str_isalpha(s: &str) -> bool { !s.is_empty() && s.chars().all(char::is_alphabetic) } /// Implements Python's `str.isdigit()` method. /// /// Returns True if all characters in the string are digits and there is at least one character. /// In Python, digits include decimal digits (Nd) plus characters with Numeric_Type=Digit /// (superscripts, subscripts, circled digits, etc.). fn str_isdigit(s: &str) -> bool { !s.is_empty() && s.chars().all(is_unicode_digit) } /// Implements Python's `str.isalnum()` method. /// /// Returns True if all characters in the string are alphanumeric and there is at least one character. fn str_isalnum(s: &str) -> bool { !s.is_empty() && s.chars().all(char::is_alphanumeric) } /// Implements Python's `str.isnumeric()` method. /// /// Returns True if all characters in the string are numeric and there is at least one character. /// In Python, numeric includes decimal digits (Nd), letter numerals (Nl), and other numerals (No). /// Rust's `char::is_numeric()` checks for all of these categories. fn str_isnumeric(s: &str) -> bool { !s.is_empty() && s.chars().all(char::is_numeric) } /// Implements Python's `str.isspace()` method. /// /// Returns True if all characters in the string are whitespace and there is at least one character. fn str_isspace(s: &str) -> bool { !s.is_empty() && s.chars().all(char::is_whitespace) } /// Implements Python's `str.islower()` method. /// /// Returns True if all cased characters in the string are lowercase and there is at least one cased character. fn str_islower(s: &str) -> bool { let mut has_cased = false; for c in s.chars() { if c.is_uppercase() { return false; } if c.is_lowercase() { has_cased = true; } } has_cased } /// Implements Python's `str.isupper()` method. /// /// Returns True if all cased characters in the string are uppercase and there is at least one cased character. fn str_isupper(s: &str) -> bool { let mut has_cased = false; for c in s.chars() { if c.is_lowercase() { return false; } if c.is_uppercase() { has_cased = true; } } has_cased } /// Implements Python's `str.isdecimal()` method. /// /// Returns True if all characters in the string are decimal characters and there is at least one character. /// Decimal characters are those in Unicode category Nd (Decimal_Number) - digits that can be used /// to form numbers in base 10. fn str_isdecimal(s: &str) -> bool { !s.is_empty() && s.chars().all(is_unicode_decimal) } /// Checks if a character is a Unicode decimal digit (Nd category). /// /// This covers decimal digit ranges from various scripts including ASCII, Arabic-Indic, /// Devanagari, Bengali, Thai, Fullwidth, and many others. fn is_unicode_decimal(c: char) -> bool { let cp = c as u32; matches!( cp, // Basic Latin (ASCII digits) 0x0030..=0x0039 // Arabic-Indic digits | 0x0660..=0x0669 // Extended Arabic-Indic digits | 0x06F0..=0x06F9 // NKo digits | 0x07C0..=0x07C9 // Devanagari digits | 0x0966..=0x096F // Bengali digits | 0x09E6..=0x09EF // Gurmukhi digits | 0x0A66..=0x0A6F // Gujarati digits | 0x0AE6..=0x0AEF // Oriya digits | 0x0B66..=0x0B6F // Tamil digits | 0x0BE6..=0x0BEF // Telugu digits | 0x0C66..=0x0C6F // Kannada digits | 0x0CE6..=0x0CEF // Malayalam digits | 0x0D66..=0x0D6F // Sinhala Lith digits | 0x0DE6..=0x0DEF // Thai digits | 0x0E50..=0x0E59 // Lao digits | 0x0ED0..=0x0ED9 // Tibetan digits | 0x0F20..=0x0F29 // Myanmar digits | 0x1040..=0x1049 // Myanmar Shan digits | 0x1090..=0x1099 // Khmer digits | 0x17E0..=0x17E9 // Mongolian digits | 0x1810..=0x1819 // Limbu digits | 0x1946..=0x194F // New Tai Lue digits | 0x19D0..=0x19D9 // Tai Tham Hora digits | 0x1A80..=0x1A89 // Tai Tham Tham digits | 0x1A90..=0x1A99 // Balinese digits | 0x1B50..=0x1B59 // Sundanese digits | 0x1BB0..=0x1BB9 // Lepcha digits | 0x1C40..=0x1C49 // Ol Chiki digits | 0x1C50..=0x1C59 // Vai digits | 0xA620..=0xA629 // Saurashtra digits | 0xA8D0..=0xA8D9 // Kayah Li digits | 0xA900..=0xA909 // Javanese digits | 0xA9D0..=0xA9D9 // Myanmar Tai Laing digits | 0xA9F0..=0xA9F9 // Cham digits | 0xAA50..=0xAA59 // Meetei Mayek digits | 0xABF0..=0xABF9 // Fullwidth digits | 0xFF10..=0xFF19 // Osmanya digits | 0x104A0..=0x104A9 // Hanifi Rohingya digits | 0x10D30..=0x10D39 // Brahmi digits | 0x11066..=0x1106F // Sora Sompeng digits | 0x110F0..=0x110F9 // Chakma digits | 0x11136..=0x1113F // Sharada digits | 0x111D0..=0x111D9 // Khudawadi digits | 0x112F0..=0x112F9 // Newa digits | 0x11450..=0x11459 // Tirhuta digits | 0x114D0..=0x114D9 // Modi digits | 0x11650..=0x11659 // Takri digits | 0x116C0..=0x116C9 // Ahom digits | 0x11730..=0x11739 // Warang Citi digits | 0x118E0..=0x118E9 // Dives Akuru digits | 0x11950..=0x11959 // Bhaiksuki digits | 0x11C50..=0x11C59 // Masaram Gondi digits | 0x11D50..=0x11D59 // Gunjala Gondi digits | 0x11DA0..=0x11DA9 // Adlam digits | 0x1E950..=0x1E959 // Segmented digits | 0x1FBF0..=0x1FBF9 ) } /// Checks if a character is a Unicode digit (isdigit). /// /// This includes decimal digits (Nd) plus characters with Numeric_Type=Digit /// such as superscripts, subscripts, and circled digits. fn is_unicode_digit(c: char) -> bool { // First check if it's a decimal digit if is_unicode_decimal(c) { return true; } let cp = c as u32; matches!( cp, // Superscripts (², ³) 0x00B2..=0x00B3 // Superscript 1 | 0x00B9 // Superscript digits 0, 4-9 | 0x2070 | 0x2074..=0x2079 // Subscript digits 0-9 | 0x2080..=0x2089 // Circled digits 1-9 | 0x2460..=0x2468 // Circled digit 0 | 0x24EA // Circled digits 10-20 | 0x2469..=0x2473 // Parenthesized digits 1-9 | 0x2474..=0x247C // Period digits 1-9 | 0x2488..=0x2490 // Double circled digits 1-10 | 0x24F5..=0x24FE // Dingbat circled sans-serif digits 1-10 | 0x2780..=0x2789 // Dingbat negative circled digits 1-10 | 0x278A..=0x2793 // Dingbat circled sans-serif digits 1-10 | 0x24FF // Fullwidth digit zero (already in decimal, but include for completeness) // | 0xFF10..=0xFF19 // Already covered by is_unicode_decimal ) } // ============================================================================= // Search methods // ============================================================================= /// Implements Python's `str.find(sub, start?, end?)` method. /// /// Returns the lowest index in the string where substring sub is found within /// the slice s[start:end]. Returns -1 if sub is not found. fn str_find(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_search_args("str.find", s, args, vm)?; let slice = slice_string(s, start, end); let result = match slice.find(&sub) { Some(pos) => { // Convert byte offset to char offset, then add start offset let char_pos = slice[..pos].chars().count(); i64::try_from(start + char_pos).unwrap_or(i64::MAX) } None => -1, }; Ok(Value::Int(result)) } /// Implements Python's `str.rfind(sub, start?, end?)` method. /// /// Returns the highest index in the string where substring sub is found within /// the slice s[start:end]. Returns -1 if sub is not found. fn str_rfind(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_search_args("str.rfind", s, args, vm)?; let slice = slice_string(s, start, end); let result = match slice.rfind(&sub) { Some(pos) => { // Convert byte offset to char offset, then add start offset let char_pos = slice[..pos].chars().count(); i64::try_from(start + char_pos).unwrap_or(i64::MAX) } None => -1, }; Ok(Value::Int(result)) } /// Implements Python's `str.index(sub, start?, end?)` method. /// /// Like find(), but raises ValueError when the substring is not found. fn str_index(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_search_args("str.index", s, args, vm)?; let slice = slice_string(s, start, end); match slice.find(&sub) { Some(pos) => { let char_pos = slice[..pos].chars().count(); let result = i64::try_from(start + char_pos).unwrap_or(i64::MAX); Ok(Value::Int(result)) } None => Err(ExcType::value_error_substring_not_found()), } } /// Implements Python's `str.rindex(sub, start?, end?)` method. /// /// Like rfind(), but raises ValueError when the substring is not found. fn str_rindex(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_search_args("str.rindex", s, args, vm)?; let slice = slice_string(s, start, end); match slice.rfind(&sub) { Some(pos) => { let char_pos = slice[..pos].chars().count(); let result = i64::try_from(start + char_pos).unwrap_or(i64::MAX); Ok(Value::Int(result)) } None => Err(ExcType::value_error_substring_not_found()), } } /// Implements Python's `str.count(sub, start?, end?)` method. /// /// Returns the number of non-overlapping occurrences of substring sub in /// the string s[start:end]. fn str_count(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sub, start, end) = parse_search_args("str.count", s, args, vm)?; let slice = slice_string(s, start, end); let count = if sub.is_empty() { // Empty string matches between every character, plus start and end slice.chars().count() + 1 } else { slice.matches(&sub).count() }; let result = i64::try_from(count).unwrap_or(i64::MAX); Ok(Value::Int(result)) } /// Implements Python's `str.startswith(prefix, start?, end?)` method. /// /// Returns True if the string starts with the prefix, otherwise returns False. /// The prefix argument can be a string or a tuple of strings. fn str_startswith(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (prefixes, start, end) = parse_prefix_suffix_args("str.startswith", s, args, vm)?; let slice = slice_string(s, start, end); let result = prefixes.iter().any(|prefix| slice.starts_with(prefix)); Ok(Value::Bool(result)) } /// Implements Python's `str.endswith(suffix, start?, end?)` method. /// /// Returns True if the string ends with the suffix, otherwise returns False. /// The suffix argument can be a string or a tuple of strings. fn str_endswith(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (suffixes, start, end) = parse_prefix_suffix_args("str.endswith", s, args, vm)?; let slice = slice_string(s, start, end); let result = suffixes.iter().any(|suffix| slice.ends_with(suffix)); Ok(Value::Bool(result)) } /// Parses arguments for search methods (find, rfind, index, rindex, count, startswith, endswith). /// /// Returns (substring, start, end) where start and end are character indices. fn parse_search_args( method: &str, s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(String, usize, usize)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); let str_len = s.chars().count(); match pos.as_slice() { [sub_value] => { let sub = extract_string_arg(sub_value, vm)?; Ok((sub, 0, str_len)) } [sub_value, start_value] => { let sub = extract_string_arg(sub_value, vm)?; let start = optional_index(start_value, 0, str_len, vm)?; Ok((sub, start, str_len)) } [sub_value, start_value, end_value] => { let sub = extract_string_arg(sub_value, vm)?; let start = optional_index(start_value, 0, str_len, vm)?; let end = optional_index(end_value, str_len, str_len, vm)?; Ok((sub, start, end)) } [] => Err(ExcType::type_error_at_least(method, 1, 0)), _ => Err(ExcType::type_error_at_most(method, 3, pos.len())), } } /// Parses arguments for startswith/endswith methods. /// /// Returns (prefixes/suffixes as Vec, start, end) where start and end are character indices. /// The first argument can be either a string or a tuple of strings. fn parse_prefix_suffix_args( method: &str, s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Vec, usize, usize)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); let str_len = s.chars().count(); match pos.as_slice() { [prefix_value] => { let prefixes = extract_str_or_tuple_of_str(prefix_value, vm)?; Ok((prefixes, 0, str_len)) } [prefix_value, start_value] => { let prefixes = extract_str_or_tuple_of_str(prefix_value, vm)?; let start = optional_index(start_value, 0, str_len, vm)?; Ok((prefixes, start, str_len)) } [prefix_value, start_value, end_value] => { let prefixes = extract_str_or_tuple_of_str(prefix_value, vm)?; let start = optional_index(start_value, 0, str_len, vm)?; let end = optional_index(end_value, str_len, str_len, vm)?; Ok((prefixes, start, end)) } [] => Err(ExcType::type_error_at_least(method, 1, 0)), _ => Err(ExcType::type_error_at_most(method, 3, pos.len())), } } /// Extracts a string or tuple of strings from a Value. /// /// Returns a Vec of strings - a single-element Vec if given a string, /// or multiple elements if given a tuple of strings. fn extract_str_or_tuple_of_str(value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match value { Value::InternString(id) => Ok(vec![vm.interns.get_str(*id).to_owned()]), Value::Ref(heap_id) => match vm.heap.get(*heap_id) { HeapData::Str(s) => Ok(vec![s.as_str().to_owned()]), HeapData::Tuple(tuple) => { // Inline string extraction to avoid borrow conflict — vm.heap is // already borrowed immutably to access the tuple's items. let items = tuple.as_slice(); let mut strings = Vec::with_capacity(items.len()); for item in items { match item { Value::InternString(id) => { strings.push(vm.interns.get_str(*id).to_owned()); } Value::Ref(hid) => { if let HeapData::Str(s) = vm.heap.get(*hid) { strings.push(s.as_str().to_owned()); } else { return Err(ExcType::type_error("expected str or tuple of str")); } } _ => return Err(ExcType::type_error("expected str or tuple of str")), } } Ok(strings) } _ => Err(ExcType::type_error("expected str or tuple of str")), }, _ => Err(ExcType::type_error("expected str or tuple of str")), } } /// Extracts a string from a Value, returning an error if not a string. fn extract_string_arg(value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match value { Value::InternString(id) => Ok(vm.interns.get_str(*id).to_owned()), Value::Ref(heap_id) => { if let HeapData::Str(s) = vm.heap.get(*heap_id) { Ok(s.as_str().to_owned()) } else { Err(ExcType::type_error("expected str")) } } _ => Err(ExcType::type_error("expected str")), } } /// Extracts an integer from a Value, returning an error if not an integer. fn extract_int_arg(value: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match value { Value::Int(i) => Ok(*i), Value::Ref(heap_id) => { if let HeapData::LongInt(li) = vm.heap.get(*heap_id) { // Try to convert to i64 li.to_i64().ok_or_else(|| ExcType::type_error("integer too large")) } else { Err(ExcType::type_error("expected int")) } } _ => Err(ExcType::type_error("expected int")), } } /// Normalizes a Python-style index to a valid index in range [0, len]. fn normalize_index(index: i64, len: usize) -> usize { if index < 0 { // Safe cast: we've checked index is negative, so -index is positive // For very large negative numbers that don't fit in usize, saturate to usize::MAX let abs_index = usize::try_from(-index).unwrap_or(usize::MAX); len.saturating_sub(abs_index) } else { // Safe cast: we've checked index is non-negative // For values > usize::MAX, saturate to len usize::try_from(index).unwrap_or(len).min(len) } } /// Extracts an optional index from a `Value`, treating `None` as `default`. /// /// Used by argument parsers where `None` means "use the default index" and /// any other value is interpreted as an integer and normalized against `str_len`. fn optional_index( value: &Value, default: usize, str_len: usize, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult { if matches!(value, Value::None) { Ok(default) } else { Ok(normalize_index(extract_int_arg(value, vm)?, str_len)) } } /// Returns a substring of s from character index start to end. fn slice_string(s: &str, start: usize, end: usize) -> &str { if start >= end { return ""; } let mut start_byte = s.len(); let mut end_byte = s.len(); for (char_idx, (byte_idx, _)) in s.char_indices().enumerate() { if char_idx == start { start_byte = byte_idx; } if char_idx == end { end_byte = byte_idx; break; } } &s[start_byte..end_byte] } // ============================================================================= // Strip/trim methods // ============================================================================= /// Implements Python's `str.strip(chars?)` method. /// /// Returns a copy of the string with leading and trailing characters removed. /// If chars is not specified, whitespace characters are removed. fn str_strip(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let chars = parse_strip_arg("str.strip", args, vm)?; let result = match &chars { Some(c) => s.trim_matches(|ch| c.contains(ch)).to_owned(), None => s.trim().to_owned(), }; allocate_string(result, vm.heap) } /// Implements Python's `str.lstrip(chars?)` method. /// /// Returns a copy of the string with leading characters removed. fn str_lstrip(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let chars = parse_strip_arg("str.lstrip", args, vm)?; let result = match &chars { Some(c) => s.trim_start_matches(|ch| c.contains(ch)).to_owned(), None => s.trim_start().to_owned(), }; allocate_string(result, vm.heap) } /// Implements Python's `str.rstrip(chars?)` method. /// /// Returns a copy of the string with trailing characters removed. fn str_rstrip(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let chars = parse_strip_arg("str.rstrip", args, vm)?; let result = match &chars { Some(c) => s.trim_end_matches(|ch| c.contains(ch)).to_owned(), None => s.trim_end().to_owned(), }; allocate_string(result, vm.heap) } /// Parses the optional chars argument for strip methods. /// /// Accepts None as a value meaning "use default whitespace stripping". fn parse_strip_arg( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult> { let value = args.get_zero_one_arg(method, vm.heap)?; match value { None => Ok(None), Some(Value::None) => Ok(None), // Explicit None means default whitespace Some(v) => { defer_drop!(v, vm); let result = extract_string_arg(v, vm)?; Ok(Some(result)) } } } /// Implements Python's `str.removeprefix(prefix)` method. /// /// If the string starts with the prefix string, return string[len(prefix):]. /// Otherwise, return a copy of the original string. fn str_removeprefix(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let prefix_value = args.get_one_arg("str.removeprefix", vm.heap)?; defer_drop!(prefix_value, vm); let prefix = extract_string_arg(prefix_value, vm)?; let result = s.strip_prefix(&prefix).unwrap_or(s).to_owned(); allocate_string(result, vm.heap) } /// Implements Python's `str.removesuffix(suffix)` method. /// /// If the string ends with the suffix string, return string[:-len(suffix)]. /// Otherwise, return a copy of the original string. fn str_removesuffix(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let suffix_value = args.get_one_arg("str.removesuffix", vm.heap)?; defer_drop!(suffix_value, vm); let suffix = extract_string_arg(suffix_value, vm)?; let result = s.strip_suffix(&suffix).unwrap_or(s).to_owned(); allocate_string(result, vm.heap) } // ============================================================================= // Split methods // ============================================================================= /// Implements Python's `str.split(sep?, maxsplit?)` method. /// /// Returns a list of the words in the string, using sep as the delimiter string. fn str_split(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sep, maxsplit) = parse_split_args("str.split", args, vm)?; let parts: Vec<&str> = match &sep { Some(sep) => { // Empty separator raises ValueError if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } if maxsplit < 0 { s.split(sep.as_str()).collect() } else { // Safe cast: we've checked maxsplit >= 0 let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); s.splitn(max.saturating_add(1), sep.as_str()).collect() } } None => { // Split on whitespace, filtering empty strings if maxsplit < 0 { s.split_whitespace().collect() } else { // Safe cast: we've checked maxsplit >= 0 let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); split_whitespace_n(s, max) } } }; // Convert to list of strings (using interned empty string when applicable) let mut list_items = Vec::with_capacity(parts.len()); for part in parts { vm.heap.check_time()?; list_items.push(allocate_string(part.to_owned(), vm.heap)?); } let list = crate::types::List::new(list_items); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Implements Python's `str.rsplit(sep?, maxsplit?)` method. /// /// Returns a list of the words in the string, using sep as the delimiter string, /// splitting from the right. fn str_rsplit(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (sep, maxsplit) = parse_split_args("str.rsplit", args, vm)?; let parts: Vec<&str> = match &sep { Some(sep) => { // Empty separator raises ValueError if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } if maxsplit < 0 { s.rsplit(sep.as_str()).collect::>().into_iter().rev().collect() } else { // Safe cast: we've checked maxsplit >= 0 let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); let mut parts: Vec<_> = s.rsplitn(max.saturating_add(1), sep.as_str()).collect(); parts.reverse(); parts } } None => { // Split on whitespace from right if maxsplit < 0 { s.split_whitespace().collect() } else { // Safe cast: we've checked maxsplit >= 0 let max = usize::try_from(maxsplit).unwrap_or(usize::MAX); rsplit_whitespace_n(s, max) } } }; // Convert to list of strings (using interned empty string when applicable) let mut list_items = Vec::with_capacity(parts.len()); for part in parts { vm.heap.check_time()?; list_items.push(allocate_string(part.to_owned(), vm.heap)?); } let list = crate::types::List::new(list_items); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Parses arguments for split methods. /// /// Supports both positional and keyword arguments for sep and maxsplit. fn parse_split_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(Option, i64)> { let (pos, kwargs) = args.into_parts(); let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); let mut pos_iter = pos; let sep_value = pos_iter.next(); defer_drop_mut!(sep_value, vm); let maxsplit_value = pos_iter.next(); defer_drop_mut!(maxsplit_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most(method, 2, 3)); } // Extract positional sep (default None) let mut has_pos_sep = sep_value.is_some(); let mut sep = if let Some(v) = sep_value.as_ref() { if matches!(v, Value::None) { None } else { Some(extract_string_arg(v, vm)?) } } else { None }; // Extract positional maxsplit (default -1) let mut has_pos_maxsplit = maxsplit_value.is_some(); let mut maxsplit = if let Some(v) = maxsplit_value.as_ref() { extract_int_arg(v, vm)? } else { -1 }; // Process kwargs for (key, value) in kwargs_iter { defer_drop!(key, vm); defer_drop!(value, vm); let Some(keyword_name) = key.as_either_str(vm.heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(vm.interns); match key_str { "sep" => { if has_pos_sep { return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'sep'" ))); } if matches!(value, Value::None) { sep = None; } else { sep = Some(extract_string_arg(value, vm)?); } has_pos_sep = true; } "maxsplit" => { if has_pos_maxsplit { return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'maxsplit'" ))); } maxsplit = extract_int_arg(value, vm)?; has_pos_maxsplit = true; } _ => { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for {method}()" ))); } } } Ok((sep, maxsplit)) } /// Split string on whitespace, returning at most `maxsplit + 1` parts. fn split_whitespace_n(s: &str, maxsplit: usize) -> Vec<&str> { let mut parts = Vec::new(); let mut remaining = s.trim_start(); let mut count = 0; while !remaining.is_empty() && count < maxsplit { if let Some(end) = remaining.find(|c: char| c.is_whitespace()) { parts.push(&remaining[..end]); remaining = remaining[end..].trim_start(); count += 1; } else { break; } } if !remaining.is_empty() { parts.push(remaining); } parts } /// Split string on whitespace from the right, returning at most `maxsplit + 1` parts. fn rsplit_whitespace_n(s: &str, maxsplit: usize) -> Vec<&str> { let mut parts = Vec::new(); let mut remaining = s.trim_end(); let mut count = 0; while !remaining.is_empty() && count < maxsplit { if let Some(start) = remaining.rfind(|c: char| c.is_whitespace()) { parts.push(&remaining[start + 1..]); remaining = remaining[..start].trim_end(); count += 1; } else { break; } } if !remaining.is_empty() { parts.push(remaining); } parts.reverse(); parts } /// Implements Python's `str.splitlines(keepends?)` method. /// /// Returns a list of the lines in the string, breaking at line boundaries. /// Accepts keepends as either positional or keyword argument. fn str_splitlines(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let keepends = parse_splitlines_args(args, vm)?; let mut lines = Vec::new(); let mut start = 0; let bytes = s.as_bytes(); let len = bytes.len(); while start < len { vm.heap.check_time()?; // Find the next line ending let mut end = start; let mut line_end = start; while end < len { match bytes[end] { b'\n' => { line_end = end; end += 1; break; } b'\r' => { line_end = end; end += 1; // Check for \r\n if end < len && bytes[end] == b'\n' { end += 1; } break; } _ => { end += 1; line_end = end; } } } let line = if keepends { &s[start..end] } else { &s[start..line_end] }; lines.push(allocate_string(line.to_owned(), vm.heap)?); start = end; } let list = crate::types::List::new(lines); let heap_id = vm.heap.allocate(HeapData::List(list))?; Ok(Value::Ref(heap_id)) } /// Parses arguments for splitlines method. /// /// Supports both positional and keyword arguments for keepends. fn parse_splitlines_args(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (pos, kwargs) = args.into_parts(); let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); let mut pos_iter = pos; let keepends_value = pos_iter.next(); defer_drop_mut!(keepends_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most("str.splitlines", 1, 2)); } // Extract positional keepends (default false) let mut has_pos_keepends = keepends_value.is_some(); let mut keepends = if let Some(v) = keepends_value.as_ref() { value_is_truthy(v) } else { false }; // Process kwargs for (key, value) in kwargs_iter { defer_drop!(key, vm); defer_drop!(value, vm); let Some(keyword_name) = key.as_either_str(vm.heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(vm.interns); if key_str == "keepends" { if has_pos_keepends { return Err(ExcType::type_error( "str.splitlines() got multiple values for argument 'keepends'", )); } keepends = value_is_truthy(value); has_pos_keepends = true; } else { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for str.splitlines()" ))); } } Ok(keepends) } /// Checks if a value is truthy for bool conversion. fn value_is_truthy(v: &Value) -> bool { match v { Value::Bool(b) => *b, Value::Int(i) => *i != 0, Value::None => false, _ => true, // Most other values are truthy } } /// Implements Python's `str.partition(sep)` method. /// /// Splits the string at the first occurrence of sep, and returns a 3-tuple /// containing the part before the separator, the separator itself, and the part after. fn str_partition(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let sep_value = args.get_one_arg("str.partition", vm.heap)?; defer_drop!(sep_value, vm); let sep = extract_string_arg(sep_value, vm)?; if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } let (before, sep_found, after) = match s.find(&sep) { Some(pos) => (&s[..pos], &sep[..], &s[pos + sep.len()..]), None => (s, "", ""), }; let before_val = allocate_string(before.to_owned(), vm.heap)?; let sep_val = allocate_string(sep_found.to_owned(), vm.heap)?; let after_val = allocate_string(after.to_owned(), vm.heap)?; Ok(crate::types::allocate_tuple( smallvec![before_val, sep_val, after_val], vm.heap, )?) } /// Implements Python's `str.rpartition(sep)` method. /// /// Splits the string at the last occurrence of sep, and returns a 3-tuple /// containing the part before the separator, the separator itself, and the part after. fn str_rpartition(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let sep_value = args.get_one_arg("str.rpartition", vm.heap)?; defer_drop!(sep_value, vm); let sep = extract_string_arg(sep_value, vm)?; if sep.is_empty() { return Err(ExcType::value_error_empty_separator()); } let (before, sep_found, after) = match s.rfind(&sep) { Some(pos) => (&s[..pos], &sep[..], &s[pos + sep.len()..]), None => ("", "", s), }; let before_val = allocate_string(before.to_owned(), vm.heap)?; let sep_val = allocate_string(sep_found.to_owned(), vm.heap)?; let after_val = allocate_string(after.to_owned(), vm.heap)?; Ok(crate::types::allocate_tuple( smallvec![before_val, sep_val, after_val], vm.heap, )?) } // ============================================================================= // Replace/modify methods // ============================================================================= /// Implements Python's `str.replace(old, new, count?)` method. /// /// Returns a copy with all occurrences of substring old replaced by new. /// If count is given, only the first count occurrences are replaced. fn str_replace(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (old, new, count) = parse_replace_args("str.replace", args, vm)?; check_replace_size(s.len(), old.len(), new.len(), count, vm.heap.tracker())?; let result = if count < 0 { s.replace(&old, &new) } else { // Safe cast: we've checked count >= 0 let n = usize::try_from(count).unwrap_or(usize::MAX); s.replacen(&old, &new, n) }; allocate_string(result, vm.heap) } /// Parses arguments for the replace method. /// /// Supports both positional and keyword arguments for count (Python 3.13+). fn parse_replace_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(String, String, i64)> { let (pos, kwargs) = args.into_parts(); let kwargs_iter = kwargs.into_iter(); defer_drop_mut!(kwargs_iter, vm); let mut pos_iter = pos; let Some(old_value) = pos_iter.next() else { return Err(ExcType::type_error_at_least(method, 2, 0)); }; defer_drop!(old_value, vm); let Some(new_value) = pos_iter.next() else { return Err(ExcType::type_error_at_least(method, 2, 1)); }; defer_drop!(new_value, vm); let count_value = pos_iter.next(); defer_drop_mut!(count_value, vm); // Check no extra positional arguments if pos_iter.len() != 0 { return Err(ExcType::type_error_at_most(method, 3, 4)); } let old = extract_string_arg(old_value, vm)?; let new = extract_string_arg(new_value, vm)?; let mut has_pos_count = count_value.is_some(); let mut count = if let Some(v) = count_value.as_ref() { extract_int_arg(v, vm)? } else { -1 }; // Process kwargs (Python 3.13+ allows count as keyword) for (key, value) in kwargs_iter { defer_drop!(key, vm); defer_drop!(value, vm); let Some(keyword_name) = key.as_either_str(vm.heap) else { return Err(ExcType::type_error("keywords must be strings")); }; let key_str = keyword_name.as_str(vm.interns); if key_str == "count" { if has_pos_count { return Err(ExcType::type_error(format!( "{method}() got multiple values for argument 'count'" ))); } count = extract_int_arg(value, vm)?; has_pos_count = true; } else { return Err(ExcType::type_error(format!( "'{key_str}' is an invalid keyword argument for {method}()" ))); } } Ok((old, new, count)) } /// Implements Python's `str.center(width, fillchar?)` method. /// /// Returns centered in a string of length width. Padding is done using the /// specified fill character (default is a space). fn str_center(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillchar) = parse_justify_args("str.center", args, vm)?; let len = s.chars().count(); let result = if width <= len { s.to_owned() } else { check_repeat_size(width, fillchar.len_utf8(), vm.heap.tracker())?; let total_pad = width - len; let left_pad = total_pad / 2; let right_pad = total_pad - left_pad; let mut result = String::with_capacity(width); for _ in 0..left_pad { result.push(fillchar); } result.push_str(s); for _ in 0..right_pad { result.push(fillchar); } result }; allocate_string(result, vm.heap) } /// Implements Python's `str.ljust(width, fillchar?)` method. /// /// Returns left-justified in a string of length width. fn str_ljust(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillchar) = parse_justify_args("str.ljust", args, vm)?; let len = s.chars().count(); let result = if width <= len { s.to_owned() } else { check_repeat_size(width, fillchar.len_utf8(), vm.heap.tracker())?; let pad = width - len; let mut result = String::with_capacity(width); result.push_str(s); for _ in 0..pad { result.push(fillchar); } result }; allocate_string(result, vm.heap) } /// Implements Python's `str.rjust(width, fillchar?)` method. /// /// Returns right-justified in a string of length width. fn str_rjust(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (width, fillchar) = parse_justify_args("str.rjust", args, vm)?; let len = s.chars().count(); let result = if width <= len { s.to_owned() } else { check_repeat_size(width, fillchar.len_utf8(), vm.heap.tracker())?; let pad = width - len; let mut result = String::with_capacity(width); for _ in 0..pad { result.push(fillchar); } result.push_str(s); result }; allocate_string(result, vm.heap) } /// Parses arguments for justify methods (center, ljust, rjust). fn parse_justify_args( method: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<(usize, char)> { let pos = args.into_pos_only(method, vm.heap)?; defer_drop!(pos, vm); match pos.as_slice() { [width_value] => { let w = extract_int_arg(width_value, vm)?; let width = if w < 0 { 0 } else { usize::try_from(w).unwrap_or(usize::MAX) }; Ok((width, ' ')) } [width_value, fillchar_value] => { let w = extract_int_arg(width_value, vm)?; let width = if w < 0 { 0 } else { usize::try_from(w).unwrap_or(usize::MAX) }; let fill_str = extract_string_arg(fillchar_value, vm)?; if fill_str.chars().count() != 1 { return Err(ExcType::type_error_fillchar_must_be_single_char()); } Ok((width, fill_str.chars().next().unwrap())) } [] => Err(ExcType::type_error_at_least(method, 1, 0)), _ => Err(ExcType::type_error_at_most(method, 2, pos.len())), } } /// Implements Python's `str.zfill(width)` method. /// /// Returns a copy of the string left filled with ASCII '0' digits to make a /// string of length width. A sign prefix is handled correctly. fn str_zfill(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let width_value = args.get_one_arg("str.zfill", vm.heap)?; defer_drop!(width_value, vm); let width_i64 = extract_int_arg(width_value, vm)?; // Safe cast: treat negative as 0, saturate large positive values let width = if width_i64 < 0 { 0 } else { usize::try_from(width_i64).unwrap_or(usize::MAX) }; let len = s.chars().count(); let result = if width <= len { s.to_owned() } else { // zfill always pads with ASCII '0' (1 byte) check_repeat_size(width, 1, vm.heap.tracker())?; let pad = width - len; let mut chars = s.chars(); let first = chars.next(); let mut result = String::with_capacity(width); // Handle sign prefix if matches!(first, Some('+' | '-')) { result.push(first.unwrap()); for _ in 0..pad { result.push('0'); } result.extend(chars); } else { for _ in 0..pad { result.push('0'); } result.push_str(s); } result }; allocate_string(result, vm.heap) } /// Implements Python's `str.encode(encoding='utf-8', errors='strict')` method. /// /// Returns an encoded version of the string as a bytes object. Only supports /// UTF-8 encoding (the native encoding for Rust strings). fn str_encode(s: &str, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let (encoding, errors) = parse_encode_args(args, vm)?; // Only UTF-8 is supported - Rust strings are always valid UTF-8 let encoding_lower = encoding.to_ascii_lowercase(); if encoding_lower != "utf-8" && encoding_lower != "utf8" { return Err(ExcType::lookup_error_unknown_encoding(&encoding)); } // For UTF-8 encoding of a valid UTF-8 string, errors mode doesn't matter // since there's nothing to handle - the string is already valid UTF-8 if errors != "strict" && errors != "ignore" && errors != "replace" && errors != "backslashreplace" { return Err(ExcType::lookup_error_unknown_error_handler(&errors)); } let bytes = s.as_bytes().to_vec(); let heap_id = vm.heap.allocate(HeapData::Bytes(Bytes::new(bytes)))?; Ok(Value::Ref(heap_id)) } /// Parses arguments for `str.encode()`. /// /// Returns (encoding, errors) with defaults "utf-8" and "strict". fn parse_encode_args(args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<(String, String)> { let (first, second) = args.get_zero_one_two_args("str.encode", vm.heap)?; let encoding = if let Some(v) = first { defer_drop!(v, vm); extract_string_arg(v, vm)? } else { "utf-8".to_owned() }; let errors = if let Some(v) = second { defer_drop!(v, vm); extract_string_arg(v, vm)? } else { "strict".to_owned() }; Ok((encoding, errors)) } /// Implements Python's `str.isidentifier()` predicate. /// /// Returns True if the string is a valid Python identifier according to /// the language definition (starts with letter or underscore, followed by /// letters, digits, or underscores). Empty strings return False. fn str_isidentifier(s: &str) -> bool { if s.is_empty() { return false; } let mut chars = s.chars(); // First character must be a letter (Unicode) or underscore let first = chars.next().unwrap(); if !is_xid_start(first) && first != '_' { return false; } // Remaining characters must be letters, digits (Unicode), or underscores chars.all(is_xid_continue) } /// Checks if a character is valid at the start of an identifier (XID_Start). /// /// This is a simplified implementation that covers ASCII and common Unicode letters. /// Python uses the full Unicode XID_Start property. fn is_xid_start(c: char) -> bool { c.is_alphabetic() } /// Checks if a character is valid in the continuation of an identifier (XID_Continue). /// /// This is a simplified implementation that covers ASCII and common Unicode. /// Python uses the full Unicode XID_Continue property. fn is_xid_continue(c: char) -> bool { c.is_alphanumeric() || c == '_' } /// Implements Python's `str.istitle()` predicate. /// /// Returns True if the string is titlecased: uppercase characters follow /// uncased characters and lowercase characters follow cased characters. /// Empty strings return False. fn str_istitle(s: &str) -> bool { if s.is_empty() { return false; } let mut prev_cased = false; let mut has_cased = false; for c in s.chars() { if c.is_uppercase() { // Uppercase must follow uncased if prev_cased { return false; } prev_cased = true; has_cased = true; } else if c.is_lowercase() { // Lowercase must follow cased if !prev_cased { return false; } prev_cased = true; has_cased = true; } else { // Uncased character prev_cased = false; } } has_cased } ================================================ FILE: crates/monty/src/types/tuple.rs ================================================ /// Python tuple type using `SmallVec` for inline storage of small tuples. /// /// This type provides Python tuple semantics. Tuples are immutable sequences /// that can contain any Python object. Like lists, tuples properly handle /// reference counting for heap-allocated values. /// /// # Optimization /// Uses `SmallVec<[Value; 2]>` to store up to 2 elements inline without heap /// allocation. This benefits common cases like 2-tuples from `enumerate()`, /// `dict.items()`, and function return values. /// /// # Implemented Methods /// - `index(value[, start[, end]])` - Find first index of value /// - `count(value)` - Count occurrences /// /// All tuple methods from Python's builtins are implemented. use std::cmp::Ordering; use std::fmt::Write; use ahash::AHashSet; use smallvec::SmallVec; /// Inline capacity for small tuples. Tuples with 2 or fewer elements avoid /// heap allocation for the items storage. const TUPLE_INLINE_CAPACITY: usize = 3; /// Storage type for tuple items. Uses SmallVec to inline small tuples. pub(crate) type TupleVec = SmallVec<[Value; TUPLE_INLINE_CAPACITY]>; use super::{ MontyIter, PyTrait, list::{get_slice_items, repr_sequence_fmt}, }; use crate::{ args::ArgValues, bytecode::{CallResult, VM}, defer_drop, exception_private::{ExcType, RunResult}, heap::{DropWithHeap, Heap, HeapData, HeapId, HeapItem}, intern::StaticStrings, resource::{ResourceError, ResourceTracker}, types::Type, value::{EitherStr, Value}, }; /// Python tuple value stored on the heap. /// /// Uses `SmallVec<[Value; 3]>` internally to avoid separate heap allocation /// for tuples with 3 or fewer elements. This is a significant optimization /// since small tuples are very common (enumerate, dict items, returns, etc.). /// /// # Reference Counting /// When a tuple is freed, all contained heap references have their refcounts /// decremented via `push_stack_ids`. /// /// # GC Optimization /// The `contains_refs` flag tracks whether the tuple contains any `Value::Ref` items. /// This allows `collect_child_ids` and `py_dec_ref_ids` to skip iteration when the /// tuple contains only primitive values (ints, bools, None, etc.). #[derive(Debug, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct Tuple { items: TupleVec, /// True if any item in the tuple is a `Value::Ref`. Set at creation time /// since tuples are immutable. contains_refs: bool, } impl Tuple { /// Creates a new tuple from a vector of values. /// /// Automatically computes the `contains_refs` flag by checking if any value /// is a `Value::Ref`. Since tuples are immutable, this flag never changes. /// /// For tuples with 3 or fewer elements, the items are stored inline in the /// SmallVec without additional heap allocation. /// /// Note: This does NOT increment reference counts - the caller must /// ensure refcounts are properly managed. #[must_use] fn new(items: TupleVec) -> Self { let contains_refs = items.iter().any(|v| matches!(v, Value::Ref(_))); Self { items, contains_refs } } /// Returns a reference to the underlying SmallVec. #[must_use] pub fn as_slice(&self) -> &[Value] { &self.items } /// Returns whether the tuple contains any heap references. /// /// When false, `collect_child_ids` and `py_dec_ref_ids` can skip iteration. #[inline] #[must_use] pub fn contains_refs(&self) -> bool { self.contains_refs } /// Creates a tuple from the `tuple()` constructor call. /// /// - `tuple()` with no args returns an empty tuple (singleton) /// - `tuple(iterable)` creates a tuple from any iterable (list, tuple, range, str, bytes, dict) pub fn init(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { let value = args.get_zero_one_arg("tuple", vm.heap)?; match value { None => { // Use empty tuple singleton Ok(vm.heap.get_empty_tuple()) } Some(v) => { let items = MontyIter::new(v, vm)?.collect(vm)?; Ok(allocate_tuple(items, vm.heap)?) } } } } impl From for Vec { fn from(tuple: Tuple) -> Self { tuple.items.into_vec() } } impl From for TupleVec { fn from(tuple: Tuple) -> Self { tuple.items } } /// Allocates a tuple, using the empty tuple singleton when appropriate. /// /// This is the preferred way to allocate tuples as it provides: /// - Empty tuple interning: `() is ()` returns `True` /// - SmallVec optimization for small tuples (≤3 elements) /// /// # Example Usage /// ```ignore /// // Empty tuple - returns singleton /// let empty = allocate_tuple(Vec::new(), heap)?; /// /// // Small tuple - stored inline in SmallVec /// let pair = allocate_tuple(vec![Value::Int(1), Value::Int(2)], heap)?; /// ``` pub fn allocate_tuple( items: SmallVec<[Value; TUPLE_INLINE_CAPACITY]>, heap: &mut Heap, ) -> Result { if items.is_empty() { Ok(heap.get_empty_tuple()) } else { // Allocate a new tuple (SmallVec will inline if ≤3 elements) let heap_id = heap.allocate(HeapData::Tuple(Tuple::new(items)))?; Ok(Value::Ref(heap_id)) } } impl PyTrait for Tuple { fn py_type(&self, _heap: &Heap) -> Type { Type::Tuple } fn py_len(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> Option { Some(self.items.len()) } fn py_getitem(&self, key: &Value, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let heap = &mut *vm.heap; // Check for slice first (Value::Ref pointing to HeapData::Slice) if let Value::Ref(id) = key && let HeapData::Slice(slice) = heap.get(*id) { let (start, stop, step) = slice .indices(self.items.len()) .map_err(|()| ExcType::value_error_slice_step_zero())?; let items = get_slice_items(&self.items, start, stop, step, heap)?; return Ok(allocate_tuple(items.into(), heap)?); } // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt let index = key.as_index(heap, Type::Tuple)?; // Convert to usize, handling negative indices (Python-style: -1 = last element) let len = i64::try_from(self.items.len()).expect("tuple length exceeds i64::MAX"); let normalized_index = if index < 0 { index + len } else { index }; // Bounds check if normalized_index < 0 || normalized_index >= len { return Err(ExcType::tuple_index_error()); } // Return clone of the item with proper refcount increment // Safety: normalized_index is validated to be in [0, len) above let idx = usize::try_from(normalized_index).expect("tuple index validated non-negative"); Ok(self.items[idx].clone_with_heap(heap)) } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { if self.items.len() != other.items.len() { return Ok(false); } let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (i1, i2) in self.items.iter().zip(&other.items) { vm.heap.check_time()?; if !i1.py_eq(i2, vm)? { return Ok(false); } } Ok(true) } /// Lexicographic comparison for tuples. /// /// Compares element-by-element left-to-right. The first non-equal pair /// determines the result. If all compared elements are equal, the shorter /// tuple is considered less than the longer one — matching Python semantics: /// `(1, 2) < (1, 2, 3)` is `True`. /// /// Returns `None` if any element pair is incomparable (e.g. `int` vs `str`). fn py_cmp( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { let token = vm.heap.incr_recursion_depth()?; defer_drop!(token, vm); for (av, bv) in self.items.iter().zip(&other.items) { vm.heap.check_time()?; match av.py_cmp(bv, vm)? { Some(Ordering::Equal) => {} Some(ord) => return Ok(Some(ord)), None => { // py_cmp returned None — the elements don't support ordering. // CPython checks __eq__ first and only calls __lt__ for non-equal // pairs, so equal-but-unorderable elements (e.g. None == None) // should be treated as equal and not block comparison. if !av.py_eq(bv, vm)? { return Ok(None); } } } } // All compared elements equal — shorter tuple is less Ok(Some(self.items.len().cmp(&other.items.len()))) } fn py_add( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { let heap = &mut *vm.heap; // Clone both tuples' contents with proper refcounting let mut result: TupleVec = self.items.iter().map(|obj| obj.clone_with_heap(heap)).collect(); let other_cloned = other.items.iter().map(|obj| obj.clone_with_heap(heap)); result.extend(other_cloned); Ok(Some(allocate_tuple(result, heap)?)) } fn py_call_attr( &mut self, _self_id: HeapId, vm: &mut VM<'_, '_, impl ResourceTracker>, attr: &EitherStr, args: ArgValues, ) -> RunResult { match attr.static_string() { Some(StaticStrings::Index) => tuple_index(self, args, vm).map(CallResult::Value), Some(StaticStrings::Count) => tuple_count(self, args, vm).map(CallResult::Value), _ => { args.drop_with_heap(vm); Err(ExcType::attribute_error(Type::Tuple, attr.as_str(vm.interns))) } } } fn py_bool(&self, _vm: &VM<'_, '_, impl ResourceTracker>) -> bool { !self.items.is_empty() } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { repr_sequence_fmt('(', ')', &self.items, f, vm, heap_ids) } } impl HeapItem for Tuple { fn py_estimate_size(&self) -> usize { std::mem::size_of::() + self.items.len() * std::mem::size_of::() } /// Pushes all heap IDs contained in this tuple onto the stack. /// /// Called during garbage collection to decrement refcounts of nested values. /// When `ref-count-panic` is enabled, also marks all Values as Dereferenced. fn py_dec_ref_ids(&mut self, stack: &mut Vec) { // Skip iteration if no refs - GC optimization for tuples of primitives if !self.contains_refs { return; } for obj in &mut self.items { if let Value::Ref(id) = obj { stack.push(*id); #[cfg(feature = "ref-count-panic")] obj.dec_ref_forget(); } } } } /// Implements Python's `tuple.index(value[, start[, end]])` method. /// /// Returns the index of the first occurrence of value. /// Raises ValueError if the value is not found. fn tuple_index(tuple: &Tuple, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let pos_args = args.into_pos_only("tuple.index", vm.heap)?; defer_drop!(pos_args, vm); let len = tuple.as_slice().len(); let (value, start, end) = match pos_args.as_slice() { [] => return Err(ExcType::type_error_at_least("tuple.index", 1, 0)), [value] => (value, 0, len), [value, start_arg] => { let start = normalize_tuple_index(start_arg.as_int(vm.heap)?, len); (value, start, len) } [value, start_arg, end_arg] => { let start = normalize_tuple_index(start_arg.as_int(vm.heap)?, len); let end = normalize_tuple_index(end_arg.as_int(vm.heap)?, len).max(start); (value, start, end) } other => return Err(ExcType::type_error_at_most("tuple.index", 3, other.len())), }; // Search for the value in the specified range for (i, item) in tuple.as_slice()[start..end].iter().enumerate() { if value.py_eq(item, vm)? { let idx = i64::try_from(start + i).expect("index exceeds i64::MAX"); return Ok(Value::Int(idx)); } } Err(ExcType::value_error_not_in_tuple()) } /// Implements Python's `tuple.count(value)` method. /// /// Returns the number of occurrences of value in the tuple. fn tuple_count(tuple: &Tuple, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let value = args.get_one_arg("tuple.count", vm.heap)?; defer_drop!(value, vm); let mut count = 0usize; for item in tuple.as_slice() { if value.py_eq(item, vm)? { count += 1; } } let count_i64 = i64::try_from(count).expect("count exceeds i64::MAX"); Ok(Value::Int(count_i64)) } /// Normalizes a Python-style tuple index to a valid index in range [0, len]. fn normalize_tuple_index(index: i64, len: usize) -> usize { if index < 0 { let abs_index = usize::try_from(-index).unwrap_or(usize::MAX); len.saturating_sub(abs_index) } else { usize::try_from(index).unwrap_or(len).min(len) } } ================================================ FILE: crates/monty/src/types/type.rs ================================================ use std::fmt; use num_bigint::BigInt; use crate::{ args::ArgValues, bytecode::VM, defer_drop, exception_private::{ExcType, RunError, RunResult, SimpleException}, heap::{DropWithHeap, Heap, HeapData}, intern::{StaticStrings, StringId}, resource::ResourceTracker, types::{ Bytes, Dict, FrozenSet, List, LongInt, MontyIter, Path, PyTrait, Range, Set, Slice, Str, Tuple, bytes::bytes_fromhex, dict::dict_fromkeys, str::StringRepr, }, value::Value, }; /// Represents the Python type of a value. /// /// This enum is used both for type checking and as a callable constructor. /// Some variants are Python builtins accessible by name (e.g., `int`, `list`), /// while others are internal types only available through imports or introspection /// (e.g., `TextIOWrapper`, `PosixPath`). #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] #[expect(clippy::enum_variant_names)] pub enum Type { Ellipsis, Type, NoneType, Bool, Int, Float, Range, Slice, Str, Bytes, List, Tuple, NamedTuple, Dict, DictKeys, DictItems, DictValues, Set, FrozenSet, Dataclass, Exception(ExcType), Function, BuiltinFunction, Cell, Iterator, /// Coroutine type for async functions and external futures. Coroutine, Module, /// Marker types like stdout/stderr - displays as "TextIOWrapper" TextIOWrapper, /// typing module special forms (Any, Optional, Union, etc.) - displays as "typing._SpecialForm" SpecialForm, /// A filesystem path from `pathlib.Path` - displays as "PosixPath" Path, /// A property descriptor - displays as "property" Property, /// A compiled regex pattern from `re.compile()` - displays as "re.Pattern" RePattern, /// A regex match result from `re.match()` / `re.search()` etc. - displays as "re.Match" ReMatch, } impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Ellipsis => f.write_str("ellipsis"), Self::Type => f.write_str("type"), Self::NoneType => f.write_str("NoneType"), Self::Bool => f.write_str("bool"), Self::Int => f.write_str("int"), Self::Float => f.write_str("float"), Self::Range => f.write_str("range"), Self::Slice => f.write_str("slice"), Self::Str => f.write_str("str"), Self::Bytes => f.write_str("bytes"), Self::List => f.write_str("list"), Self::Tuple => f.write_str("tuple"), Self::NamedTuple => f.write_str("namedtuple"), Self::Dict => f.write_str("dict"), Self::DictKeys => f.write_str("dict_keys"), Self::DictItems => f.write_str("dict_items"), Self::DictValues => f.write_str("dict_values"), Self::Set => f.write_str("set"), Self::FrozenSet => f.write_str("frozenset"), Self::Dataclass => f.write_str("dataclass"), Self::Exception(exc_type) => write!(f, "{exc_type}"), Self::Function => f.write_str("function"), Self::BuiltinFunction => f.write_str("builtin_function_or_method"), Self::Cell => f.write_str("cell"), Self::Iterator => f.write_str("iterator"), Self::Coroutine => f.write_str("coroutine"), Self::Module => f.write_str("module"), Self::TextIOWrapper => f.write_str("_io.TextIOWrapper"), Self::SpecialForm => f.write_str("typing._SpecialForm"), Self::Path => f.write_str("PosixPath"), Self::Property => f.write_str("property"), Self::RePattern => f.write_str("re.Pattern"), Self::ReMatch => f.write_str("re.Match"), } } } impl Type { /// Returns the Python source-level name for builtin types that can be called directly. /// /// This differs from `Display` for internal representation-only names such as /// `Type::Iterator`, which displays as `iterator` for repr/type output but is /// exposed as the builtin constructor `iter` in Python source. #[must_use] pub const fn builtin_name(self) -> Option<&'static str> { match self { Self::Bool => Some("bool"), Self::Int => Some("int"), Self::Float => Some("float"), Self::Str => Some("str"), Self::Bytes => Some("bytes"), Self::List => Some("list"), Self::Tuple => Some("tuple"), Self::Dict => Some("dict"), Self::Set => Some("set"), Self::FrozenSet => Some("frozenset"), Self::Range => Some("range"), Self::Slice => Some("slice"), Self::Iterator => Some("iter"), Self::Type => Some("type"), Self::Property => Some("property"), _ => None, } } /// Resolves a bare Python name to a builtin type, if it is one. /// /// Only matches names that are true Python builtins — accessible without any import. /// Internal types like `TextIOWrapper`, `PosixPath`, `NoneType`, and `ellipsis` are /// intentionally excluded because they require imports or are not directly nameable. /// /// This replaces the previous strum `FromStr` derive which matched ALL variants, /// including internal types that shouldn't be resolvable from bare names. #[must_use] pub fn from_builtin_name(name: &str) -> Option { match name { "bool" => Some(Self::Bool), "int" => Some(Self::Int), "float" => Some(Self::Float), "str" => Some(Self::Str), "bytes" => Some(Self::Bytes), "list" => Some(Self::List), "tuple" => Some(Self::Tuple), "dict" => Some(Self::Dict), "set" => Some(Self::Set), "frozenset" => Some(Self::FrozenSet), "range" => Some(Self::Range), "slice" => Some(Self::Slice), "iter" => Some(Self::Iterator), "type" => Some(Self::Type), "property" => Some(Self::Property), _ => None, } } /// Checks if a value of type `self` is an instance of `other`. /// /// This handles Python's subtype relationships: /// - `bool` is a subtype of `int` (so `isinstance(True, int)` returns True) #[must_use] pub fn is_instance_of(self, other: Self) -> bool { if self == other { true } else if self == Self::Bool && other == Self::Int { // bool is a subtype of int in Python true } else { false } } /// Converts a callable type to a u8 for the `CallBuiltinType` opcode. /// /// Returns `Some(u8)` for types that can be called as constructors, /// `None` for non-callable types. #[must_use] pub fn callable_to_u8(self) -> Option { match self { Self::Bool => Some(0), Self::Int => Some(1), Self::Float => Some(2), Self::Str => Some(3), Self::Bytes => Some(4), Self::List => Some(5), Self::Tuple => Some(6), Self::Dict => Some(7), Self::Set => Some(8), Self::FrozenSet => Some(9), Self::Range => Some(10), Self::Slice => Some(11), Self::Iterator => Some(12), Self::Path => Some(13), _ => None, } } /// Converts a u8 back to a callable `Type` for the `CallBuiltinType` opcode. /// /// Returns `Some(Type)` for valid callable type IDs, `None` otherwise. #[must_use] pub fn callable_from_u8(id: u8) -> Option { match id { 0 => Some(Self::Bool), 1 => Some(Self::Int), 2 => Some(Self::Float), 3 => Some(Self::Str), 4 => Some(Self::Bytes), 5 => Some(Self::List), 6 => Some(Self::Tuple), 7 => Some(Self::Dict), 8 => Some(Self::Set), 9 => Some(Self::FrozenSet), 10 => Some(Self::Range), 11 => Some(Self::Slice), 12 => Some(Self::Iterator), 13 => Some(Self::Path), _ => None, } } /// Calls this type as a constructor (e.g., `list(x)`, `int(x)`). /// /// Dispatches to the appropriate type's init method for container types, /// or handles primitive type conversions inline. pub(crate) fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { // Container types - delegate to init methods Self::List => List::init(vm, args), Self::Tuple => Tuple::init(vm, args), Self::Dict => Dict::init(vm, args), Self::Set => Set::init(vm, args), Self::FrozenSet => FrozenSet::init(vm, args), Self::Str => Str::init(vm, args), Self::Bytes => Bytes::init(vm, args), Self::Range => Range::init(vm, args), Self::Slice => Slice::init(vm, args), Self::Iterator => MontyIter::init(vm, args), Self::Path => Path::init(vm, args), // Primitive types - inline implementation Self::Int => { let heap = &mut *vm.heap; let interns = vm.interns; let Some(v) = args.get_zero_one_arg("int", heap)? else { return Ok(Value::Int(0)); }; defer_drop!(v, heap); match v { Value::Int(i) => Ok(Value::Int(*i)), Value::Float(f) => Ok(Value::Int(f64_to_i64_truncate(*f))), Value::Bool(b) => Ok(Value::Int(i64::from(*b))), Value::InternString(string_id) => parse_int_from_str(interns.get_str(*string_id), heap), Value::Ref(heap_id) => { // Clone data to release the borrow on heap before mutation match heap.get(*heap_id) { HeapData::Str(s) => { let s = s.to_string(); parse_int_from_str(&s, heap) } HeapData::LongInt(li) => li.clone().into_value(heap).map_err(Into::into), _ => Err(ExcType::type_error_int_conversion(v.py_type(heap))), } } _ => Err(ExcType::type_error_int_conversion(v.py_type(heap))), } } Self::Float => { let heap = &mut *vm.heap; let interns = vm.interns; let Some(v) = args.get_zero_one_arg("float", heap)? else { return Ok(Value::Float(0.0)); }; defer_drop!(v, heap); match v { Value::Float(f) => Ok(Value::Float(*f)), Value::Int(i) => Ok(Value::Float(*i as f64)), Value::Bool(b) => Ok(Value::Float(if *b { 1.0 } else { 0.0 })), Value::InternString(string_id) => { Ok(Value::Float(parse_f64_from_str(interns.get_str(*string_id))?)) } Value::Ref(heap_id) => match heap.get(*heap_id) { HeapData::Str(s) => Ok(Value::Float(parse_f64_from_str(s.as_str())?)), _ => Err(ExcType::type_error_float_conversion(v.py_type(heap))), }, _ => Err(ExcType::type_error_float_conversion(v.py_type(heap))), } } Self::Bool => { let Some(v) = args.get_zero_one_arg("bool", vm.heap)? else { return Ok(Value::Bool(false)); }; defer_drop!(v, vm); Ok(Value::Bool(v.py_bool(vm))) } // Non-callable types - raise TypeError _ => Err(ExcType::type_error_not_callable(self)), } } } /// Truncates f64 to i64 with clamping for out-of-range values. /// /// Python's `int(float)` truncates toward zero. For values outside i64 range, /// we clamp to i64::MAX/MIN (Python would use arbitrary precision ints, which /// we don't support). fn f64_to_i64_truncate(value: f64) -> i64 { // trunc() rounds toward zero, matching Python's int(float) behavior let truncated = value.trunc(); if truncated >= i64::MAX as f64 { i64::MAX } else if truncated <= i64::MIN as f64 { i64::MIN } else { // SAFETY for clippy: truncated is guaranteed to be in (i64::MIN, i64::MAX) // after the bounds checks above, so truncation cannot overflow #[expect(clippy::cast_possible_truncation, reason = "bounds checked above")] let result = truncated as i64; result } } /// Parses a Python `float()` string argument into an `f64`. /// /// This supports: /// - Leading/trailing whitespace (e.g. `" 1.5 "`) /// - The special values `inf`, `-inf`, `infinity`, and `nan` (case-insensitive) /// /// Underscore digit separators are not currently supported. fn parse_f64_from_str(value: &str) -> RunResult { let trimmed = value.trim(); if trimmed.is_empty() { return Err(value_error_could_not_convert_string_to_float(value)); } let lower = trimmed.to_ascii_lowercase(); let parsed = match lower.as_str() { "inf" | "+inf" | "infinity" | "+infinity" => f64::INFINITY, "-inf" | "-infinity" => f64::NEG_INFINITY, "nan" | "+nan" => f64::NAN, "-nan" => -f64::NAN, _ => trimmed .parse::() .map_err(|_| value_error_could_not_convert_string_to_float(value))?, }; Ok(parsed) } /// Creates the `ValueError` raised by `float()` when a string cannot be parsed. /// /// Matches CPython's message format: `could not convert string to float: '...'`. fn value_error_could_not_convert_string_to_float(value: &str) -> RunError { SimpleException::new_msg( ExcType::ValueError, format!("could not convert string to float: {}", StringRepr(value)), ) .into() } /// Parses a Python `int()` string argument into an `Int` or `LongInt`. /// /// Handles whitespace stripping and removing `_` separators. Returns `Value::Int` if the value /// fits in i64, otherwise allocates a `LongInt` on the heap. Returns `ValueError` on failure. fn parse_int_from_str(value: &str, heap: &mut Heap) -> RunResult { // Try parsing as i64 first (fast path) if let Ok(int) = value.parse::() { return Ok(Value::Int(int)); } let trimmed = value.trim(); if let Ok(int) = trimmed.parse::() { return Ok(Value::Int(int)); } // Try with underscores removed let normalized = trimmed.replace('_', ""); if let Ok(int) = normalized.parse::() { return Ok(Value::Int(int)); } // Try parsing as BigInt for values too large for i64 if let Ok(bi) = normalized.parse::() { return Ok(LongInt::new(bi).into_value(heap)?); } Err(value_error_invalid_literal_for_int(value)) } /// Creates the `ValueError` raised by `int()` when a string cannot be parsed. /// /// Matches CPython's message format: `invalid literal for int() with base 10: '...'`. fn value_error_invalid_literal_for_int(value: &str) -> RunError { SimpleException::new_msg( ExcType::ValueError, format!("invalid literal for int() with base 10: {}", StringRepr(value)), ) .into() } /// Dispatches a classmethod call on a type object. /// /// Handles classmethods like `dict.fromkeys()` and `bytes.fromhex()` that are /// called on the type itself rather than on an instance. pub(crate) fn call_type_method( t: Type, method_id: StringId, args: ArgValues, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result { match (t, method_id) { (Type::Dict, m) if m == StaticStrings::Fromkeys => return dict_fromkeys(args, vm), (Type::Bytes, m) if m == StaticStrings::Fromhex => { return bytes_fromhex(args, vm); } _ => {} } // Other types or unknown methods - report actual type name, not 'type' args.drop_with_heap(vm.heap); Err(ExcType::attribute_error(t, vm.interns.get_str(method_id))) } ================================================ FILE: crates/monty/src/value.rs ================================================ use std::{ borrow::Cow, cmp::Ordering, collections::hash_map::DefaultHasher, fmt::{self, Write}, hash::{Hash, Hasher}, mem::discriminant, str::FromStr, }; use ahash::AHashSet; use num_bigint::BigInt; use num_integer::Integer; use num_traits::{ToPrimitive, Zero}; use crate::{ asyncio::CallId, builtins::Builtins, bytecode::{CallResult, VM}, exception_private::{ExcType, RunError, RunResult, SimpleException}, heap::{ContainsHeap, Heap, HeapData, HeapGuard, HeapId}, heap_data::HeapDataMut, intern::{BytesId, FunctionId, Interns, LongIntId, StaticStrings, StringId}, modules::ModuleFunctions, resource::{ResourceError, ResourceTracker, check_div_size, check_lshift_size, check_pow_size, check_repeat_size}, types::{ LongInt, Property, PyTrait, Str, Type, bytes::{bytes_repr_fmt, get_byte_at_index, get_bytes_slice}, path, str::{allocate_char, get_char_at_index, get_str_slice, string_repr_fmt}, }, }; /// Primary value type representing Python objects at runtime. /// /// This enum uses a hybrid design: small immediate values (Int, Bool, None) are stored /// inline, while heap-allocated values (List, Str, Dict, etc.) are stored in the arena /// and referenced via `Ref(HeapId)`. /// /// NOTE: `Clone` is intentionally NOT derived. Use `clone_with_heap()` for heap values /// or `clone_immediate()` for immediate values only. Direct cloning via `.clone()` would /// bypass reference counting and cause memory leaks. /// /// NOTE: it's important to keep this size small to minimize memory overhead! #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum Value { // Immediate values (stored inline, no heap allocation) Undefined, Ellipsis, None, Bool(bool), Int(i64), Float(f64), /// An interned string literal. The StringId references the string in the Interns table. /// To get the actual string content, use `interns.get(string_id)`. InternString(StringId), /// An interned bytes literal. The BytesId references the bytes in the Interns table. /// To get the actual bytes content, use `interns.get_bytes(bytes_id)`. InternBytes(BytesId), /// An interned long integer literal. The `LongIntId` references the `BigInt` in the Interns table. /// Used for integer literals exceeding i64 range. Converted to heap-allocated `LongInt` on load. InternLongInt(LongIntId), /// A builtin function or exception type Builtin(Builtins), /// A function from a module (not a global builtin). /// Module functions require importing a module to access (e.g., `asyncio.gather`). ModuleFunction(ModuleFunctions), /// A function defined in the module (not a closure, doesn't capture any variables) DefFunction(FunctionId), /// Reference to an external function defined on the host. /// /// The `StringId` stores the interned function name. When called, the VM yields /// a `FrameExit::ExternalCall` with this `StringId` so the host can look up and /// execute the function by name. ExtFunction(StringId), /// A marker value representing special objects like sys.stdout/stderr. /// These exist but have minimal functionality in the sandboxed environment. Marker(Marker), /// A property descriptor that computes its value when accessed. /// When retrieved via `py_getattr`, the property's getter is invoked. Property(Property), /// A pending external function call result. /// /// Created when the host calls `run_pending()` instead of `run(result)` for an /// external function call. The CallId correlates with the call that created it. /// When awaited, blocks the task until the host provides a result via `resume()`. /// /// ExternalFutures follow single-shot semantics like coroutines - awaiting an /// already-awaited ExternalFuture raises RuntimeError. ExternalFuture(CallId), // Heap-allocated values (stored in arena) Ref(HeapId), /// Sentinel value indicating this Value was properly cleaned up via `drop_with_heap`. /// Only exists when `ref-count-panic` feature is enabled. Used to verify reference counting /// correctness - if a `Ref` variant is dropped without calling `drop_with_heap`, the /// Drop impl will panic. #[cfg(feature = "ref-count-panic")] Dereferenced, } /// Drop implementation that panics if a `Ref` variant is dropped without calling `drop_with_heap`. /// This helps catch reference counting bugs during development/testing. /// Only enabled when the `ref-count-panic` feature is active. #[cfg(feature = "ref-count-panic")] impl Drop for Value { fn drop(&mut self) { if let Self::Ref(id) = self { panic!("Value::Ref({id:?}) dropped without calling drop_with_heap() - this is a reference counting bug"); } } } impl From for Value { fn from(v: bool) -> Self { Self::Bool(v) } } impl PyTrait for Value { fn py_type(&self, heap: &Heap) -> Type { match self { Self::Undefined => panic!("Cannot get type of undefined value"), Self::Ellipsis => Type::Ellipsis, Self::None => Type::NoneType, Self::Bool(_) => Type::Bool, Self::Int(_) | Self::InternLongInt(_) => Type::Int, Self::Float(_) => Type::Float, Self::InternString(_) => Type::Str, Self::InternBytes(_) => Type::Bytes, Self::Builtin(c) => c.py_type(), Self::ModuleFunction(_) => Type::BuiltinFunction, Self::DefFunction(_) | Self::ExtFunction(_) => Type::Function, Self::Marker(m) => m.py_type(), Self::Property(_) => Type::Property, Self::ExternalFuture(_) => Type::Coroutine, Self::Ref(id) => heap.get(*id).py_type(heap), #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot access Dereferenced object"), } } fn py_len(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Option { match self { // Count Unicode characters, not bytes, to match Python semantics Self::InternString(string_id) => Some(vm.interns.get_str(*string_id).chars().count()), Self::InternBytes(bytes_id) => Some(vm.interns.get_bytes(*bytes_id).len()), Self::Ref(id) => vm.heap.get(*id).py_len(vm), _ => None, } } fn py_eq(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> Result { let interns = vm.interns; match (self, other) { (Self::Undefined, _) => Ok(false), (_, Self::Undefined) => Ok(false), (Self::Int(v1), Self::Int(v2)) => Ok(v1 == v2), (Self::Bool(v1), Self::Bool(v2)) => Ok(v1 == v2), (Self::Bool(v1), Self::Int(v2)) => Ok(i64::from(*v1) == *v2), (Self::Int(v1), Self::Bool(v2)) => Ok(*v1 == i64::from(*v2)), (Self::Float(v1), Self::Float(v2)) => Ok(v1 == v2), (Self::Int(v1), Self::Float(v2)) => Ok((*v1 as f64) == *v2), (Self::Float(v1), Self::Int(v2)) => Ok(*v1 == (*v2 as f64)), (Self::Bool(v1), Self::Float(v2)) => Ok((i64::from(*v1) as f64) == *v2), (Self::Float(v1), Self::Bool(v2)) => Ok(*v1 == (i64::from(*v2) as f64)), (Self::None, Self::None) => Ok(true), // Int == LongInt comparison (Self::Int(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { Ok(BigInt::from(*a) == *li.inner()) } else { Ok(false) } } // LongInt == Int comparison (Self::Ref(id), Self::Int(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { Ok(*li.inner() == BigInt::from(*b)) } else { Ok(false) } } // For interned interns, compare by StringId first (fast path for same interned string) (Self::InternString(s1), Self::InternString(s2)) => Ok(s1 == s2), // for strings we need to account for the fact they might be either interned or not (Self::InternString(string_id), Self::Ref(id2)) => { if let HeapData::Str(s2) = vm.heap.get(*id2) { Ok(interns.get_str(*string_id) == s2.as_str()) } else { Ok(false) } } (Self::Ref(id1), Self::InternString(string_id)) => { if let HeapData::Str(s1) = vm.heap.get(*id1) { Ok(s1.as_str() == interns.get_str(*string_id)) } else { Ok(false) } } // For interned bytes, compare by content (bytes are not deduplicated unlike interns) (Self::InternBytes(b1), Self::InternBytes(b2)) => { // Fast path: same BytesId means same content Ok(b1 == b2 || interns.get_bytes(*b1) == interns.get_bytes(*b2)) } // same for bytes (Self::InternBytes(bytes_id), Self::Ref(id2)) => { if let HeapData::Bytes(b2) = vm.heap.get(*id2) { Ok(interns.get_bytes(*bytes_id) == b2.as_slice()) } else { Ok(false) } } (Self::Ref(id1), Self::InternBytes(bytes_id)) => { if let HeapData::Bytes(b1) = vm.heap.get(*id1) { Ok(b1.as_slice() == interns.get_bytes(*bytes_id)) } else { Ok(false) } } (Self::Ref(id1), Self::Ref(id2)) => { if *id1 == *id2 { return Ok(true); } Heap::with_two(vm, *id1, *id2, |vm, left, right| left.py_eq(right, vm)) } // Builtins equality - just check the enums are equal (Self::Builtin(b1), Self::Builtin(b2)) => Ok(b1 == b2), // Module functions equality (Self::ModuleFunction(mf1), Self::ModuleFunction(mf2)) => Ok(mf1 == mf2), (Self::DefFunction(f1), Self::DefFunction(f2)) => Ok(f1 == f2), // Markers compare equal if they're the same variant (Self::Marker(m1), Self::Marker(m2)) => Ok(m1 == m2), // Properties compare equal if they're the same variant (Self::Property(p1), Self::Property(p2)) => Ok(p1 == p2), _ => Ok(false), } } fn py_cmp( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, ResourceError> { let interns = vm.interns; // py_cmp handles numbers, strings, bytes, and tuples. // Recursion depth tracking for tuples is handled in Tuple::py_cmp. match (self, other) { (Self::Int(s), Self::Int(o)) => Ok(s.partial_cmp(o)), (Self::Float(s), Self::Float(o)) => Ok(s.partial_cmp(o)), (Self::Int(s), Self::Float(o)) => Ok((*s as f64).partial_cmp(o)), (Self::Float(s), Self::Int(o)) => Ok(s.partial_cmp(&(*o as f64))), // Bool promotion: convert to Int and re-dispatch. Recursion is bounded // to at most 2 levels (Bool→Int, then Int matches directly above). (Self::Bool(s), _) => Self::Int(i64::from(*s)).py_cmp(other, vm), (_, Self::Bool(s)) => self.py_cmp(&Self::Int(i64::from(*s)), vm), // Int vs LongInt comparison (Self::Int(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { Ok(BigInt::from(*a).partial_cmp(li.inner())) } else { Ok(None) } } // LongInt vs Int comparison (Self::Ref(id), Self::Int(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { Ok(li.inner().partial_cmp(&BigInt::from(*b))) } else { Ok(None) } } // Ref vs Ref comparison: handles LongInt, Str, and Tuple (Self::Ref(id1), Self::Ref(id2)) => match (vm.heap.get(*id1), vm.heap.get(*id2)) { (HeapData::LongInt(a), HeapData::LongInt(b)) => Ok(a.inner().partial_cmp(b.inner())), (HeapData::Str(a), HeapData::Str(b)) => Ok(a.as_str().partial_cmp(b.as_str())), (HeapData::Tuple(_), HeapData::Tuple(_)) => { Heap::with_two(vm, *id1, *id2, |vm, left, right| left.py_cmp(right, vm)) } _ => Ok(None), }, // Interned string comparisons (Self::InternString(s1), Self::InternString(s2)) => { Ok(interns.get_str(*s1).partial_cmp(interns.get_str(*s2))) } // Cross-type string comparisons: interned vs heap-allocated (Self::InternString(s1), Self::Ref(id2)) => { if let HeapData::Str(s2) = vm.heap.get(*id2) { Ok(interns.get_str(*s1).partial_cmp(s2.as_str())) } else { Ok(None) } } (Self::Ref(id1), Self::InternString(s2)) => { if let HeapData::Str(s1) = vm.heap.get(*id1) { Ok(s1.as_str().partial_cmp(interns.get_str(*s2))) } else { Ok(None) } } (Self::InternBytes(b1), Self::InternBytes(b2)) => { Ok(interns.get_bytes(*b1).partial_cmp(interns.get_bytes(*b2))) } _ => Ok(None), } } fn py_bool(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> bool { match self { Self::Undefined => false, Self::Ellipsis => true, Self::None => false, Self::Bool(b) => *b, Self::Int(v) => *v != 0, Self::Float(f) => *f != 0.0, // InternLongInt is always truthy (if it were zero, it would fit in i64) Self::InternLongInt(_) => true, Self::Builtin(_) | Self::ModuleFunction(_) => true, // Builtins are always truthy Self::DefFunction(_) | Self::ExtFunction(_) => true, // Functions are always truthy Self::Marker(_) => true, // Markers are always truthy Self::Property(_) => true, // Properties are always truthy Self::ExternalFuture(_) => true, // ExternalFutures are always truthy Self::InternString(string_id) => !vm.interns.get_str(*string_id).is_empty(), Self::InternBytes(bytes_id) => !vm.interns.get_bytes(*bytes_id).is_empty(), Self::Ref(id) => vm.heap.get(*id).py_bool(vm), #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot access Dereferenced object"), } } fn py_repr_fmt( &self, f: &mut impl Write, vm: &VM<'_, '_, impl ResourceTracker>, heap_ids: &mut AHashSet, ) -> std::fmt::Result { let interns = vm.interns; match self { Self::Undefined => f.write_str("Undefined"), Self::Ellipsis => f.write_str("Ellipsis"), Self::None => f.write_str("None"), Self::Bool(true) => f.write_str("True"), Self::Bool(false) => f.write_str("False"), Self::Int(v) => write!(f, "{v}"), Self::InternLongInt(long_int_id) => write!(f, "{}", interns.get_long_int(*long_int_id)), Self::Float(v) => { let s = v.to_string(); if s.contains('.') { f.write_str(&s) } else { write!(f, "{s}.0") } } Self::Builtin(b) => b.py_repr_fmt(f), Self::ModuleFunction(mf) => mf.py_repr_fmt(f, self.id()), Self::DefFunction(f_id) => interns.get_function(*f_id).py_repr_fmt(f, interns, self.id()), Self::ExtFunction(name_id) => { write!(f, "", interns.get_str(*name_id)) } Self::InternString(string_id) => string_repr_fmt(interns.get_str(*string_id), f), Self::InternBytes(bytes_id) => bytes_repr_fmt(interns.get_bytes(*bytes_id), f), Self::Marker(m) => m.py_repr_fmt(f), Self::Property(p) => write!(f, ""), Self::ExternalFuture(call_id) => write!(f, "", call_id.raw()), Self::Ref(id) => { if heap_ids.contains(id) { // Cycle detected - write type-specific placeholder following Python semantics match vm.heap.get(*id) { HeapData::List(_) => f.write_str("[...]"), HeapData::Tuple(_) => f.write_str("(...)"), HeapData::Dict(_) => f.write_str("{...}"), // Other types don't typically have cycles, but handle gracefully _ => f.write_str("..."), } } else { heap_ids.insert(*id); let result = vm.heap.get(*id).py_repr_fmt(f, vm, heap_ids); heap_ids.remove(id); result } } #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot access Dereferenced object"), } } fn py_str(&self, vm: &VM<'_, '_, impl ResourceTracker>) -> Cow<'static, str> { match self { Self::InternString(string_id) => vm.interns.get_str(*string_id).to_owned().into(), Self::Ref(id) => vm.heap.get(*id).py_str(vm), _ => self.py_repr(vm), } } fn py_add( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { let interns = vm.interns; match (self, other) { // Int + Int with overflow detection (Self::Int(a), Self::Int(b)) => { if let Some(result) = a.checked_add(*b) { Ok(Some(Self::Int(result))) } else { // Overflow - promote to LongInt let li = LongInt::from(*a) + LongInt::from(*b); li.into_value(vm.heap).map(Some) } } // Int + LongInt (Self::Int(i), Self::Ref(id)) | (Self::Ref(id), Self::Int(i)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let result = LongInt::new(li.inner() + i); result.into_value(vm.heap).map(Some) } else { Ok(None) } } (Self::Float(v1), Self::Float(v2)) => Ok(Some(Self::Float(v1 + v2))), // Int + Float and Float + Int (Self::Int(a), Self::Float(b)) => Ok(Some(Self::Float(*a as f64 + b))), (Self::Float(a), Self::Int(b)) => Ok(Some(Self::Float(a + *b as f64))), (Self::Ref(id1), Self::Ref(id2)) => { Heap::with_two(vm, *id1, *id2, |vm, left, right| left.py_add(right, vm)) } (Self::InternString(s1), Self::InternString(s2)) => { let concat = format!("{}{}", interns.get_str(*s1), interns.get_str(*s2)); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Str(concat.into()))?))) } // for strings we need to account for the fact they might be either interned or not (Self::InternString(string_id), Self::Ref(id2)) => { if let HeapData::Str(s2) = vm.heap.get(*id2) { let concat = format!("{}{}", interns.get_str(*string_id), s2.as_str()); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Str(concat.into()))?))) } else { Ok(None) } } (Self::Ref(id1), Self::InternString(string_id)) => { if let HeapData::Str(s1) = vm.heap.get(*id1) { let concat = format!("{}{}", s1.as_str(), interns.get_str(*string_id)); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Str(concat.into()))?))) } else { Ok(None) } } // same for bytes (Self::InternBytes(b1), Self::InternBytes(b2)) => { let bytes1 = interns.get_bytes(*b1); let bytes2 = interns.get_bytes(*b2); let mut b = Vec::with_capacity(bytes1.len() + bytes2.len()); b.extend_from_slice(bytes1); b.extend_from_slice(bytes2); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Bytes(b.into()))?))) } (Self::InternBytes(bytes_id), Self::Ref(id2)) => { if let HeapData::Bytes(b2) = vm.heap.get(*id2) { let bytes1 = interns.get_bytes(*bytes_id); let mut b = Vec::with_capacity(bytes1.len() + b2.len()); b.extend_from_slice(bytes1); b.extend_from_slice(b2); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Bytes(b.into()))?))) } else { Ok(None) } } (Self::Ref(id1), Self::InternBytes(bytes_id)) => { if let HeapData::Bytes(b1) = vm.heap.get(*id1) { let bytes2 = interns.get_bytes(*bytes_id); let mut b = Vec::with_capacity(b1.len() + bytes2.len()); b.extend_from_slice(b1); b.extend_from_slice(bytes2); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Bytes(b.into()))?))) } else { Ok(None) } } _ => Ok(None), } } fn py_sub( &self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> Result, crate::resource::ResourceError> { match (self, other) { // Int - Int with overflow detection (Self::Int(a), Self::Int(b)) => { if let Some(result) = a.checked_sub(*b) { Ok(Some(Self::Int(result))) } else { // Overflow - promote to LongInt let li = LongInt::from(*a) - LongInt::from(*b); li.into_value(vm.heap).map(Some) } } // Int - LongInt (Self::Int(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let result = LongInt::from(*a) - LongInt::new(li.inner().clone()); result.into_value(vm.heap).map(Some) } else { Ok(None) } } // LongInt - Int (Self::Ref(id), Self::Int(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let result = LongInt::new(li.inner().clone()) - LongInt::from(*b); result.into_value(vm.heap).map(Some) } else { Ok(None) } } // LongInt - LongInt (Self::Ref(id1), Self::Ref(id2)) => { Heap::with_two(vm, *id1, *id2, |vm, left, right| left.py_sub(right, vm)) } // Float - Float (Self::Float(a), Self::Float(b)) => Ok(Some(Self::Float(a - b))), // Int - Float and Float - Int (Self::Int(a), Self::Float(b)) => Ok(Some(Self::Float(*a as f64 - b))), (Self::Float(a), Self::Int(b)) => Ok(Some(Self::Float(a - *b as f64))), _ => Ok(None), } } fn py_mod(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match (self, other) { (Self::Int(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else if let Some(r) = a.checked_rem(*b) { // Python modulo: result has the same sign as divisor (b) let result = if r != 0 && (*a < 0) != (*b < 0) { r + *b } else { r }; Ok(Some(Self::Int(result))) } else { // Overflow - i64::MIN % -1 is 0 Ok(Some(Self::Int(0))) } } // Int % LongInt (Self::Int(a), Self::Ref(id)) => { // Clone to avoid borrow conflict with heap mutation let b_clone = if let HeapData::LongInt(li) = vm.heap.get(*id) { if li.is_zero() { return Err(ExcType::zero_division().into()); } li.inner().clone() } else { return Ok(None); }; let bi = BigInt::from(*a).mod_floor(&b_clone); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } // LongInt % Int (Self::Ref(id), Self::Int(b)) => { if *b == 0 { return Err(ExcType::zero_division().into()); } // Clone to avoid borrow conflict with heap mutation let a_clone = if let HeapData::LongInt(li) = vm.heap.get(*id) { li.inner().clone() } else { return Ok(None); }; let bi = a_clone.mod_floor(&BigInt::from(*b)); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } // LongInt % LongInt (Self::Ref(id1), Self::Ref(id2)) => { Heap::with_two(vm, *id1, *id2, |vm, left, right| left.py_mod(right, vm)) } (Self::Float(v1), Self::Float(v2)) => { if *v2 == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(v1 % v2))) } } (Self::Float(v1), Self::Int(v2)) => { if *v2 == 0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(v1 % (*v2 as f64)))) } } (Self::Int(v1), Self::Float(v2)) => { if *v2 == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float((*v1 as f64) % v2))) } } _ => Ok(None), } } fn py_mod_eq(&self, other: &Self, right_value: i64) -> Option { match (self, other) { (Self::Int(v1), Self::Int(v2)) => { if let Some(r) = v1.checked_rem(*v2) { // Python modulo: result has same sign as divisor let result = if r != 0 && (*v1 < 0) != (*v2 < 0) { r + *v2 } else { r }; Some(result == right_value) } else { // checked_rem returns None for overflow (i64::MIN % -1) or zero division (*v2 != 0).then_some(0 == right_value) } } (Self::Float(v1), Self::Float(v2)) => Some(v1 % v2 == right_value as f64), (Self::Float(v1), Self::Int(v2)) => Some(v1 % (*v2 as f64) == right_value as f64), (Self::Int(v1), Self::Float(v2)) => Some((*v1 as f64) % v2 == right_value as f64), _ => None, } } fn py_iadd( &mut self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>, _self_id: Option, ) -> Result { let interns = vm.interns; match (&self, other) { (Self::Int(v1), Self::Int(v2)) => { if let Some(result) = v1.checked_add(*v2) { *self = Self::Int(result); } else { // Overflow - promote to LongInt let li = LongInt::from(*v1) + LongInt::from(*v2); *self = li.into_value(vm.heap)?; } Ok(true) } (Self::Float(v1), Self::Float(v2)) => { *self = Self::Float(*v1 + *v2); Ok(true) } (Self::InternString(s1), Self::InternString(s2)) => { let concat = format!("{}{}", interns.get_str(*s1), interns.get_str(*s2)); *self = Self::Ref(vm.heap.allocate(HeapData::Str(concat.into()))?); Ok(true) } (Self::InternString(string_id), Self::Ref(id2)) => { let result = if let HeapData::Str(s2) = vm.heap.get(*id2) { let concat = format!("{}{}", interns.get_str(*string_id), s2.as_str()); *self = Self::Ref(vm.heap.allocate(HeapData::Str(concat.into()))?); true } else { false }; Ok(result) } // same for bytes (Self::InternBytes(b1), Self::InternBytes(b2)) => { let bytes1 = interns.get_bytes(*b1); let bytes2 = interns.get_bytes(*b2); let mut b = Vec::with_capacity(bytes1.len() + bytes2.len()); b.extend_from_slice(bytes1); b.extend_from_slice(bytes2); *self = Self::Ref(vm.heap.allocate(HeapData::Bytes(b.into()))?); Ok(true) } (Self::InternBytes(bytes_id), Self::Ref(id2)) => { let result = if let HeapData::Bytes(b2) = vm.heap.get(*id2) { let bytes1 = interns.get_bytes(*bytes_id); let mut b = Vec::with_capacity(bytes1.len() + b2.len()); b.extend_from_slice(bytes1); b.extend_from_slice(b2); *self = Self::Ref(vm.heap.allocate(HeapData::Bytes(b.into()))?); true } else { false }; Ok(result) } (Self::Ref(id), Self::Ref(_)) => { Heap::with_entry_mut(vm, *id, |vm, mut data| data.py_iadd(other, vm, Some(*id))) } _ => Ok(false), } } fn py_mult(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { let interns = vm.interns; match (self, other) { // Numeric multiplication with overflow promotion to LongInt (Self::Int(a), Self::Int(b)) => { if let Some(result) = a.checked_mul(*b) { Ok(Some(Self::Int(result))) } else { // Overflow - promote to LongInt let li = LongInt::from(*a) * LongInt::from(*b); Ok(Some(li.into_value(vm.heap)?)) } } // Int * Ref (LongInt or sequence) (Self::Int(a), Self::Ref(id)) => vm.heap.mult_ref_by_i64(*id, *a), // Ref * Int (LongInt or sequence) (Self::Ref(id), Self::Int(b)) => vm.heap.mult_ref_by_i64(*id, *b), // Ref * Ref (LongInt * LongInt, sequence * LongInt, etc.) (Self::Ref(id1), Self::Ref(id2)) => vm.heap.mult_heap_values(*id1, *id2), (Self::Float(a), Self::Float(b)) => Ok(Some(Self::Float(a * b))), (Self::Int(a), Self::Float(b)) => Ok(Some(Self::Float(*a as f64 * b))), (Self::Float(a), Self::Int(b)) => Ok(Some(Self::Float(a * *b as f64))), // Bool numeric multiplication (True=1, False=0) (Self::Bool(a), Self::Int(b)) => { let a_int = i64::from(*a); Ok(Some(Self::Int(a_int * b))) } (Self::Int(a), Self::Bool(b)) => { let b_int = i64::from(*b); Ok(Some(Self::Int(a * b_int))) } (Self::Bool(a), Self::Float(b)) => { let a_float = if *a { 1.0 } else { 0.0 }; Ok(Some(Self::Float(a_float * b))) } (Self::Float(a), Self::Bool(b)) => { let b_float = if *b { 1.0 } else { 0.0 }; Ok(Some(Self::Float(a * b_float))) } (Self::Bool(a), Self::Bool(b)) => { let result = i64::from(*a) * i64::from(*b); Ok(Some(Self::Int(result))) } // String repetition: "ab" * 3 or 3 * "ab" (Self::InternString(s), Self::Int(n)) | (Self::Int(n), Self::InternString(s)) => { let count = i64_to_repeat_count(*n)?; let str_ref = interns.get_str(*s); check_repeat_size(str_ref.len(), count, vm.heap.tracker())?; let result = str_ref.repeat(count); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Str(result.into()))?))) } // Bytes repetition: b"ab" * 3 or 3 * b"ab" (Self::InternBytes(b), Self::Int(n)) | (Self::Int(n), Self::InternBytes(b)) => { let count = i64_to_repeat_count(*n)?; let bytes_ref = interns.get_bytes(*b); check_repeat_size(bytes_ref.len(), count, vm.heap.tracker())?; let result: Vec = bytes_ref.repeat(count); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Bytes(result.into()))?))) } // String repetition with LongInt: "ab" * bigint or bigint * "ab" (Self::InternString(s), Self::Ref(id)) | (Self::Ref(id), Self::InternString(s)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let count = longint_to_repeat_count(li)?; let str_ref = interns.get_str(*s); check_repeat_size(str_ref.len(), count, vm.heap.tracker())?; let result = str_ref.repeat(count); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Str(result.into()))?))) } else { Ok(None) } } // Bytes repetition with LongInt: b"ab" * bigint or bigint * b"ab" (Self::InternBytes(b), Self::Ref(id)) | (Self::Ref(id), Self::InternBytes(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { let count = longint_to_repeat_count(li)?; let bytes_ref = interns.get_bytes(*b); check_repeat_size(bytes_ref.len(), count, vm.heap.tracker())?; let result: Vec = bytes_ref.repeat(count); Ok(Some(Self::Ref(vm.heap.allocate(HeapData::Bytes(result.into()))?))) } else { Ok(None) } } _ => Ok(None), } } fn py_div(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { let interns = vm.interns; match (self, other) { // True division always returns float (Self::Int(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(*a as f64 / *b as f64))) } } // Int / LongInt (Self::Int(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if li.is_zero() { Err(ExcType::zero_division().into()) } else { // Convert both to f64 for division let a_f64 = *a as f64; let b_f64 = li.to_f64().unwrap_or(f64::INFINITY); Ok(Some(Self::Float(a_f64 / b_f64))) } } else { Ok(None) } } // LongInt / Int (Self::Ref(id), Self::Int(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if *b == 0 { Err(ExcType::zero_division().into()) } else { // Convert both to f64 for division let a_f64 = li.to_f64().unwrap_or(f64::INFINITY); let b_f64 = *b as f64; Ok(Some(Self::Float(a_f64 / b_f64))) } } else { Ok(None) } } // LongInt / LongInt (Self::Ref(id1), Self::Ref(id2)) => match (vm.heap.get(*id1), vm.heap.get(*id2)) { (HeapData::LongInt(li1), HeapData::LongInt(li2)) => { if li2.is_zero() { Err(ExcType::zero_division().into()) } else { let a_f64 = li1.to_f64().unwrap_or(f64::INFINITY); let b_f64 = li2.to_f64().unwrap_or(f64::INFINITY); Ok(Some(Self::Float(a_f64 / b_f64))) } } _ => Ok(None), }, // LongInt / Float (Self::Ref(id), Self::Float(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { let a_f64 = li.to_f64().unwrap_or(f64::INFINITY); Ok(Some(Self::Float(a_f64 / b))) } } else { Ok(None) } } // Float / LongInt (Self::Float(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if li.is_zero() { Err(ExcType::zero_division().into()) } else { let b_f64 = li.to_f64().unwrap_or(f64::INFINITY); Ok(Some(Self::Float(a / b_f64))) } } else { Ok(None) } } (Self::Float(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(a / b))) } } (Self::Int(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(*a as f64 / b))) } } (Self::Float(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(a / *b as f64))) } } // Bool division (True=1, False=0) (Self::Bool(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(f64::from(*a) / *b as f64))) } } (Self::Int(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Float(*a as f64))) // a / 1 = a } else { Err(ExcType::zero_division().into()) } } (Self::Bool(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float(f64::from(*a) / b))) } } (Self::Float(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Float(*a))) // a / 1.0 = a } else { Err(ExcType::zero_division().into()) } } (Self::Bool(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Float(f64::from(*a)))) // a / 1 = a } else { Err(ExcType::zero_division().into()) } } _ => { // Check for Path / (str or Path) - path concatenation if let Self::Ref(id) = self && matches!(vm.heap.get(*id), HeapData::Path(_)) { return path::path_div(*id, other, vm.heap, interns); } Ok(None) } } } fn py_floordiv(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match (self, other) { // Floor division: int // int returns int (Self::Int(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else if let Some((d, _)) = floor_divmod(*a, *b) { Ok(Some(Self::Int(d))) } else { // Overflow - promote to LongInt check_div_size(i64_bits(*a), vm.heap.tracker())?; let bi = BigInt::from(*a).div_floor(&BigInt::from(*b)); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } // Int // LongInt (Self::Int(a), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if li.is_zero() { Err(ExcType::zero_division().into()) } else { let bi = BigInt::from(*a).div_floor(li.inner()); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { Ok(None) } } // LongInt // Int (Self::Ref(id), Self::Int(b)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if *b == 0 { Err(ExcType::zero_division().into()) } else { let bi = li.inner().div_floor(&BigInt::from(*b)); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { Ok(None) } } // LongInt // LongInt (Self::Ref(id1), Self::Ref(id2)) => match (vm.heap.get(*id1), vm.heap.get(*id2)) { (HeapData::LongInt(li1), HeapData::LongInt(li2)) => { if li2.is_zero() { Err(ExcType::zero_division().into()) } else { let bi = li1.inner().div_floor(li2.inner()); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } _ => Ok(None), }, // Float floor division returns float (Self::Float(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float((a / b).floor()))) } } (Self::Int(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float((*a as f64 / b).floor()))) } } (Self::Float(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float((a / *b as f64).floor()))) } } // Bool floor division (True=1, False=0) (Self::Bool(a), Self::Int(b)) => { if *b == 0 { Err(ExcType::zero_division().into()) } else { let a_int = i64::from(*a); // Use same floor division logic as Int // Int let d = a_int / b; let r = a_int % b; let result = if r != 0 && (a_int < 0) != (*b < 0) { d - 1 } else { d }; Ok(Some(Self::Int(result))) } } (Self::Int(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Int(*a))) // a // 1 = a } else { Err(ExcType::zero_division().into()) } } (Self::Bool(a), Self::Float(b)) => { if *b == 0.0 { Err(ExcType::zero_division().into()) } else { Ok(Some(Self::Float((f64::from(*a) / b).floor()))) } } (Self::Float(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Float(a.floor()))) // a // 1.0 = floor(a) } else { Err(ExcType::zero_division().into()) } } (Self::Bool(a), Self::Bool(b)) => { if *b { Ok(Some(Self::Int(i64::from(*a)))) // a // 1 = a } else { Err(ExcType::zero_division().into()) } } _ => Ok(None), } } fn py_pow(&self, other: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult> { match (self, other) { (Self::Int(base), Self::Int(exp)) => { if *base == 0 && *exp < 0 { Err(ExcType::zero_negative_power()) } else if *exp >= 0 { // Positive exponent: try to return int, promote to LongInt on overflow if let Ok(exp_u32) = u32::try_from(*exp) { if let Some(result) = base.checked_pow(exp_u32) { Ok(Some(Self::Int(result))) } else { // Overflow - promote to LongInt // Check size before computing to prevent DoS check_pow_size(i64_bits(*base), u64::from(exp_u32), vm.heap.tracker())?; let bi = BigInt::from(*base).pow(exp_u32); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { // exp > u32::MAX - use BigInt with modpow-style exponentiation // For very large exponents, we still need LongInt // Safety: exp >= 0 is guaranteed by the outer if condition #[expect(clippy::cast_sign_loss)] let exp_u64 = *exp as u64; // Check size before computing to prevent DoS check_pow_size(i64_bits(*base), exp_u64, vm.heap.tracker())?; let bi = bigint_pow(BigInt::from(*base), exp_u64); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { // Negative exponent: return float // Use powi if exp fits in i32, otherwise use powf if let Ok(exp_i32) = i32::try_from(*exp) { Ok(Some(Self::Float((*base as f64).powi(exp_i32)))) } else { Ok(Some(Self::Float((*base as f64).powf(*exp as f64)))) } } } // LongInt ** Int (Self::Ref(id), Self::Int(exp)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if li.is_zero() && *exp < 0 { Err(ExcType::zero_negative_power()) } else if *exp >= 0 { // Use BigInt pow for positive exponents if let Ok(exp_u32) = u32::try_from(*exp) { // Check size before computing to prevent DoS check_pow_size(li.bits(), u64::from(exp_u32), vm.heap.tracker())?; let bi = li.inner().pow(exp_u32); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } else { // Safety: exp >= 0 is guaranteed by the outer if condition #[expect(clippy::cast_sign_loss)] let exp_u64 = *exp as u64; // Check size before computing to prevent DoS check_pow_size(li.bits(), exp_u64, vm.heap.tracker())?; let bi = bigint_pow(li.inner().clone(), exp_u64); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { // Negative exponent: return float (LongInt base becomes 0.0 for large values) if let Some(base_f64) = li.to_f64() { if let Ok(exp_i32) = i32::try_from(*exp) { Ok(Some(Self::Float(base_f64.powi(exp_i32)))) } else { Ok(Some(Self::Float(base_f64.powf(*exp as f64)))) } } else { // Base too large for f64, result approaches 0 Ok(Some(Self::Float(0.0))) } } } else { Ok(None) } } // Int ** LongInt (only small positive exponents make sense) (Self::Int(base), Self::Ref(id)) => { if let HeapData::LongInt(li) = vm.heap.get(*id) { if *base == 0 && li.is_negative() { Err(ExcType::zero_negative_power()) } else if !li.is_negative() { // For very large exponents, most results are huge or 0/1 // Check for x ** 0 = 1 first (including 0 ** 0 = 1) if li.is_zero() { Ok(Some(Self::Int(1))) } else if *base == 0 { Ok(Some(Self::Int(0))) } else if *base == 1 { Ok(Some(Self::Int(1))) } else if *base == -1 { // (-1) ** n = 1 if n is even, -1 if n is odd let is_even = (li.inner() % 2i32).is_zero(); Ok(Some(Self::Int(if is_even { 1 } else { -1 }))) } else if let Some(exp_u32) = li.to_u32() { // Reasonable exponent size if let Some(result) = base.checked_pow(exp_u32) { Ok(Some(Self::Int(result))) } else { // Check size before computing to prevent DoS check_pow_size(i64_bits(*base), u64::from(exp_u32), vm.heap.tracker())?; let bi = BigInt::from(*base).pow(exp_u32); Ok(Some(LongInt::new(bi).into_value(vm.heap)?)) } } else { // Exponent too large - result would be astronomically large // Python handles this, but it would take forever. Use OverflowError Err(SimpleException::new_msg(ExcType::OverflowError, "exponent too large").into()) } } else { // Negative LongInt exponent: return float if let (Some(base_f64), Some(exp_f64)) = (Some(*base as f64), li.to_f64()) { Ok(Some(Self::Float(base_f64.powf(exp_f64)))) } else { Ok(Some(Self::Float(0.0))) } } } else { Ok(None) } } (Self::Float(base), Self::Float(exp)) => { if *base == 0.0 && *exp < 0.0 { Err(ExcType::zero_negative_power()) } else { Ok(Some(Self::Float(base.powf(*exp)))) } } (Self::Int(base), Self::Float(exp)) => { if *base == 0 && *exp < 0.0 { Err(ExcType::zero_negative_power()) } else { Ok(Some(Self::Float((*base as f64).powf(*exp)))) } } (Self::Float(base), Self::Int(exp)) => { if *base == 0.0 && *exp < 0 { Err(ExcType::zero_negative_power()) } else if let Ok(exp_i32) = i32::try_from(*exp) { // Use powi if exp fits in i32 Ok(Some(Self::Float(base.powi(exp_i32)))) } else { // Fall back to powf for exponents outside i32 range Ok(Some(Self::Float(base.powf(*exp as f64)))) } } // Bool power operations (True=1, False=0) (Self::Bool(base), Self::Int(exp)) => { let base_int = i64::from(*base); if base_int == 0 && *exp < 0 { Err(ExcType::zero_negative_power()) } else if *exp >= 0 { // Positive exponent: 1**n=1, 0**n=0 (for n>0), 0**0=1 if let Ok(exp_u32) = u32::try_from(*exp) { match base_int.checked_pow(exp_u32) { Some(result) => Ok(Some(Self::Int(result))), None => Ok(Some(Self::Float((base_int as f64).powf(*exp as f64)))), } } else { Ok(Some(Self::Float((base_int as f64).powf(*exp as f64)))) } } else { // Negative exponent: return float (1**-n=1.0) if let Ok(exp_i32) = i32::try_from(*exp) { Ok(Some(Self::Float((base_int as f64).powi(exp_i32)))) } else { Ok(Some(Self::Float((base_int as f64).powf(*exp as f64)))) } } } (Self::Int(base), Self::Bool(exp)) => { // n ** True = n, n ** False = 1 if *exp { Ok(Some(Self::Int(*base))) } else { Ok(Some(Self::Int(1))) } } (Self::Bool(base), Self::Float(exp)) => { let base_float = f64::from(*base); if base_float == 0.0 && *exp < 0.0 { Err(ExcType::zero_negative_power()) } else { Ok(Some(Self::Float(base_float.powf(*exp)))) } } (Self::Float(base), Self::Bool(exp)) => { // base ** True = base, base ** False = 1.0 if *exp { Ok(Some(Self::Float(*base))) } else { Ok(Some(Self::Float(1.0))) } } (Self::Bool(base), Self::Bool(exp)) => { // True ** True = 1, True ** False = 1, False ** True = 0, False ** False = 1 let base_int = i64::from(*base); let exp_int = i64::from(*exp); if exp_int == 0 { Ok(Some(Self::Int(1))) // anything ** 0 = 1 } else { Ok(Some(Self::Int(base_int))) // base ** 1 = base } } _ => Ok(None), } } fn py_getitem(&self, key: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { let interns = vm.interns; match self { Self::Ref(id) => Heap::with_entry_mut(vm, *id, |vm, data| data.py_getitem(key, vm)), Self::InternString(string_id) => { // Check for slice first if let Self::Ref(key_id) = key && let HeapData::Slice(slice_obj) = vm.heap.get(*key_id) { let s = interns.get_str(*string_id); let char_count = s.chars().count(); let (start, stop, step) = slice_obj .indices(char_count) .map_err(|()| ExcType::value_error_slice_step_zero())?; let result_str = get_str_slice(s, start, stop, step); let heap_id = vm.heap.allocate(HeapData::Str(Str::from(result_str)))?; return Ok(Self::Ref(heap_id)); } // Handle interned string indexing, accepting Int and Bool let index = match key { Self::Int(i) => *i, Self::Bool(b) => i64::from(*b), _ => return Err(ExcType::type_error_indices(Type::Str, key.py_type(vm.heap))), }; let s = interns.get_str(*string_id); let c = get_char_at_index(s, index).ok_or_else(ExcType::str_index_error)?; Ok(allocate_char(c, vm.heap)?) } Self::InternBytes(bytes_id) => { // Check for slice first if let Self::Ref(key_id) = key && let HeapData::Slice(slice_obj) = vm.heap.get(*key_id) { let bytes = interns.get_bytes(*bytes_id); let (start, stop, step) = slice_obj .indices(bytes.len()) .map_err(|()| ExcType::value_error_slice_step_zero())?; let result_bytes = get_bytes_slice(bytes, start, stop, step); let heap_id = vm .heap .allocate(HeapData::Bytes(crate::types::Bytes::new(result_bytes)))?; return Ok(Self::Ref(heap_id)); } // Handle interned bytes indexing - returns integer byte value let index = match key { Self::Int(i) => *i, Self::Bool(b) => i64::from(*b), _ => return Err(ExcType::type_error_indices(Type::Bytes, key.py_type(vm.heap))), }; let bytes = interns.get_bytes(*bytes_id); let byte = get_byte_at_index(bytes, index).ok_or_else(ExcType::bytes_index_error)?; Ok(Self::Int(i64::from(byte))) } _ => Err(ExcType::type_error_not_sub(self.py_type(vm.heap))), } } fn py_setitem(&mut self, key: Self, value: Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult<()> { match self { Self::Ref(id) => Heap::with_entry_mut(vm, *id, |vm, mut data| data.py_setitem(key, value, vm)), _ => Err(ExcType::type_error(format!( "'{}' object does not support item assignment", self.py_type(vm.heap) ))), } } } impl Value { /// Returns a stable, unique identifier for this value. /// /// Should match Python's `id()` function conceptually. /// /// For immediate values (Int, Float, Builtins), this computes a deterministic ID /// based on the value's hash, avoiding heap allocation. This means `id(5) == id(5)` will /// return True (unlike CPython for large integers outside the interning range). /// /// Singletons (None, True, False, etc.) return IDs from a dedicated tagged range. /// Interned strings/bytes use their interner index for stable identity. /// Heap-allocated values (Ref) reuse their `HeapId` inside the heap-tagged range. pub fn id(&self) -> usize { match self { // Singletons have fixed tagged IDs Self::Undefined => singleton_id(SingletonSlot::Undefined), Self::Ellipsis => singleton_id(SingletonSlot::Ellipsis), Self::None => singleton_id(SingletonSlot::None), Self::Bool(b) => { if *b { singleton_id(SingletonSlot::True) } else { singleton_id(SingletonSlot::False) } } // Interned strings/bytes/bigints use their index directly - the index is the stable identifier Self::InternString(string_id) => INTERN_STR_ID_TAG | (string_id.index() & INTERN_STR_ID_MASK), Self::InternBytes(bytes_id) => INTERN_BYTES_ID_TAG | (bytes_id.index() & INTERN_BYTES_ID_MASK), Self::InternLongInt(long_int_id) => { INTERN_LONG_INT_ID_TAG | (long_int_id.index() & INTERN_LONG_INT_ID_MASK) } // Already heap-allocated (includes Range and Exception), return id within a dedicated tag range Self::Ref(id) => heap_tagged_id(*id), // Value-based IDs for immediate types (no heap allocation!) Self::Int(v) => int_value_id(*v), Self::Float(v) => float_value_id(*v), Self::Builtin(c) => builtin_value_id(*c), Self::ModuleFunction(mf) => module_function_value_id(*mf), Self::DefFunction(f_id) => function_value_id(*f_id), Self::ExtFunction(name_id) => ext_function_value_id(*name_id), // Markers get deterministic IDs based on discriminant Self::Marker(m) => marker_value_id(*m), // Properties get deterministic IDs based on discriminant Self::Property(p) => property_value_id(*p), // ExternalFutures get IDs based on their call_id Self::ExternalFuture(call_id) => external_future_value_id(*call_id), #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot get id of Dereferenced object"), } } /// Returns the Ref ID if this value is a reference, otherwise returns None. pub fn ref_id(&self) -> Option { match self { Self::Ref(id) => Some(*id), _ => None, } } /// Returns the module name if this value is a module, otherwise returns "". /// /// Used for error messages in `from module import name` when the name doesn't exist. pub fn module_name(&self, heap: &Heap, interns: &Interns) -> String { match self { Self::Ref(id) => match heap.get(*id) { HeapData::Module(module) => interns.get_str(module.name()).to_string(), _ => "".to_string(), }, _ => "".to_string(), } } /// Equivalent of Python's `is` operator. /// /// Compares value identity by comparing their IDs. pub fn is(&self, other: &Self) -> bool { self.id() == other.id() } /// Computes the hash value for this value, used for dict keys. /// /// Returns `Ok(Some(hash))` for hashable types (immediate values and immutable heap types). /// Returns `Ok(None)` for unhashable types (list, dict). /// Returns `Err(ResourceError::Recursion)` if the recursion limit is exceeded /// while hashing deeply nested containers (e.g., tuples of tuples). /// /// For heap-allocated values (Ref variant), this computes the hash lazily /// on first use and caches it for subsequent calls. /// /// The `interns` parameter is needed for InternString/InternBytes to look up /// their actual content and hash it consistently with equivalent heap Str/Bytes. pub fn py_hash( &self, heap: &mut Heap, interns: &Interns, ) -> Result, ResourceError> { // strings bytes bigints and heap allocated values have their own hashing logic match self { // Hash just the actual string or bytes content for consistency with heap Str/Bytes // hence we don't include the discriminant Self::InternString(string_id) => { let mut hasher = DefaultHasher::new(); interns.get_str(*string_id).hash(&mut hasher); return Ok(Some(hasher.finish())); } Self::InternBytes(bytes_id) => { let mut hasher = DefaultHasher::new(); interns.get_bytes(*bytes_id).hash(&mut hasher); return Ok(Some(hasher.finish())); } // Hash BigInt consistently with LongInt (using sign and bytes for large values) Self::InternLongInt(long_int_id) => { let bi = interns.get_long_int(*long_int_id); let mut hasher = DefaultHasher::new(); let (sign, bytes) = bi.to_bytes_le(); sign.hash(&mut hasher); bytes.hash(&mut hasher); return Ok(Some(hasher.finish())); } // For heap-allocated values (includes Range and Exception), compute hash lazily and cache it Self::Ref(id) => return heap.get_or_compute_hash(*id, interns), _ => {} } let mut hasher = DefaultHasher::new(); // hash based on discriminant to avoid collisions with different types discriminant(self).hash(&mut hasher); match self { // Immediate values can be hashed directly Self::Undefined | Self::Ellipsis | Self::None => {} Self::Bool(b) => b.hash(&mut hasher), Self::Int(i) => i.hash(&mut hasher), // Hash the bit representation of float for consistency Self::Float(f) => f.to_bits().hash(&mut hasher), Self::Builtin(b) => b.hash(&mut hasher), Self::ModuleFunction(mf) => mf.hash(&mut hasher), // Hash functions based on function ID Self::DefFunction(f_id) => f_id.hash(&mut hasher), Self::ExtFunction(name_id) => name_id.hash(&mut hasher), // Markers are hashable based on their discriminant (already included above) Self::Marker(m) => m.hash(&mut hasher), // Properties are hashable based on their OS function discriminant Self::Property(p) => p.hash(&mut hasher), // ExternalFutures are hashable based on their call ID Self::ExternalFuture(call_id) => call_id.raw().hash(&mut hasher), Self::InternString(_) | Self::InternBytes(_) | Self::InternLongInt(_) | Self::Ref(_) => { unreachable!("covered above") } #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot access Dereferenced object"), } Ok(Some(hasher.finish())) } /// TODO this doesn't have many tests!!! also doesn't cover bytes /// Checks if `item` is contained in `self` (the container). /// /// Implements Python's `in` operator for various container types: /// - List/Tuple: linear search with equality /// - Dict: key lookup /// - Set/FrozenSet: element lookup /// - Str: substring search pub fn py_contains(&self, item: &Self, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match self { Self::Ref(heap_id) => Heap::with_entry_mut(vm, *heap_id, |vm, data| match data { HeapDataMut::List(list) => { for el in list.as_slice() { if item.py_eq(el, vm)? { return Ok(true); } } Ok(false) } HeapDataMut::Tuple(tuple) => { for el in tuple.as_slice() { if item.py_eq(el, vm)? { return Ok(true); } } Ok(false) } HeapDataMut::Dict(dict) => dict.get(item, vm).map(|m| m.is_some()), HeapDataMut::DictKeysView(view) => Heap::with_entry_mut(vm, view.dict_id(), |vm, dict_data| { let HeapDataMut::Dict(dict) = dict_data else { panic!("dict_keys view must reference a dict"); }; dict.get(item, vm).map(|m| m.is_some()) }), HeapDataMut::DictItemsView(view) => { let Some((key, value)) = cloned_items_view_candidate(item, vm) else { return Ok(false); }; let mut key_guard = HeapGuard::new(key, vm); let (key, vm) = key_guard.as_parts_mut(); let mut value_guard = HeapGuard::new(value, vm); let (value, vm) = value_guard.as_parts_mut(); Heap::with_entry_mut(vm, view.dict_id(), |vm, dict_data| { let HeapDataMut::Dict(dict) = dict_data else { panic!("dict_items view must reference a dict"); }; match dict.get(key, vm) { Ok(Some(existing_value)) => value.py_eq(existing_value, vm).map_err(RunError::from), Ok(None) => Ok(false), Err(e) => Err(e), } }) } HeapDataMut::DictValuesView(view) => Heap::with_entry_mut(vm, view.dict_id(), |vm, dict_data| { let HeapDataMut::Dict(dict) = dict_data else { panic!("dict_values view must reference a dict"); }; for (_, value) in dict.iter() { if item.py_eq(value, vm)? { return Ok(true); } } Ok(false) }), HeapDataMut::Set(set) => set.contains(item, vm), HeapDataMut::FrozenSet(fset) => fset.contains(item, vm), HeapDataMut::Str(s) => str_contains(s.as_str(), item, vm.heap, vm.interns), HeapDataMut::Range(range) => { // Range containment is O(1) - check bounds and step alignment let n = match item { Self::Int(i) => *i, Self::Bool(b) => i64::from(*b), Self::Float(f) => { // Floats are contained if they equal an integer in the range // e.g., 3.0 in range(5) is True, but 3.5 in range(5) is False if f.fract() != 0.0 { return Ok(false); } // Check if float is within i64 range and convert safely // f64 can represent integers up to 2^53 exactly let int_val = f.trunc(); if int_val < i64::MIN as f64 || int_val > i64::MAX as f64 { return Ok(false); } // Safe conversion: we've verified it's a whole number in i64 range #[expect(clippy::cast_possible_truncation)] let n = int_val as i64; n } _ => return Ok(false), }; Ok(range.contains(n)) } other => { let type_name = other.py_type(vm.heap); Err(ExcType::type_error(format!( "argument of type '{type_name}' is not iterable" ))) } }), Self::InternString(string_id) => { let container_str = vm.interns.get_str(*string_id); str_contains(container_str, item, vm.heap, vm.interns) } _ => { let type_name = self.py_type(vm.heap); Err(ExcType::type_error(format!( "argument of type '{type_name}' is not iterable" ))) } } } /// Gets an attribute from this value. /// /// Dispatches to `py_getattr` on the underlying types where appropriate. /// Accepts `EitherStr` to support both interned and heap-allocated attribute names. /// /// Returns `AttributeError` for other types or unknown attributes. pub fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'_, '_, impl ResourceTracker>) -> RunResult { match self { Self::Ref(heap_id) => { // Use with_entry_mut to get access to both data and heap without borrow conflicts. // This allows py_getattr to allocate (for computed attributes) while we hold the data. let opt_result = Heap::with_entry_mut(vm, *heap_id, |vm, data| data.py_getattr(attr, vm))?; if let Some(call_result) = opt_result { return Ok(call_result); } } Self::Builtin(Builtins::Type(t)) => { // Handle type object attributes like __name__ let is_dunder_name = attr.static_string().map_or_else( || attr.as_str(vm.interns) == "__name__", |ss| ss == StaticStrings::DunderName, ); if is_dunder_name { let name_str = t.to_string(); let str_id = vm.heap.allocate(HeapData::Str(Str::from(name_str)))?; return Ok(CallResult::Value(Self::Ref(str_id))); } } _ => {} } let type_name = self.py_type(vm.heap); Err(ExcType::attribute_error(type_name, attr.as_str(vm.interns))) } /// Sets an attribute on this value. /// /// Currently only Dataclass objects support attribute setting. /// Returns AttributeError for other types. /// /// Takes ownership of `value` and drops it on error. /// On success, drops the old attribute value if one existed. pub fn py_set_attr( &self, name_id: StringId, value: Self, vm: &mut VM<'_, '_, impl ResourceTracker>, ) -> RunResult<()> { let attr_name = vm.interns.get_str(name_id); if let Self::Ref(heap_id) = self { let heap_id = *heap_id; let is_dataclass = matches!(vm.heap.get(heap_id), HeapData::Dataclass(_)); if is_dataclass { let name_value = Self::InternString(name_id); Heap::with_entry_mut(vm, heap_id, |vm, data| { if let HeapDataMut::Dataclass(dc) = data { match dc.set_attr(name_value, value, vm) { Ok(old_value) => { if let Some(old) = old_value { old.drop_with_heap(vm.heap); } Ok(()) } Err(e) => Err(e), } } else { unreachable!("type changed during borrow") } }) } else { let type_name = vm.heap.get(heap_id).py_type(vm.heap); value.drop_with_heap(vm.heap); Err(ExcType::attribute_error_no_setattr(type_name, attr_name)) } } else { let type_name = self.py_type(vm.heap); value.drop_with_heap(vm.heap); Err(ExcType::attribute_error_no_setattr(type_name, attr_name)) } } /// Extracts an integer value from the Value. /// /// Accepts `Int` and `LongInt` (if it fits in i64). Returns a `TypeError` for other types /// and an `OverflowError` if the `LongInt` value is too large. /// /// Note: The LongInt-to-i64 conversion path is defensive code. In normal execution, /// heap-allocated `LongInt` values always exceed i64 range because `LongInt::into_value()` /// automatically demotes i64-fitting values to `Value::Int`. However, this path could be /// reached via deserialization of crafted snapshot data. pub fn as_int(&self, heap: &Heap) -> RunResult { match self { Self::Int(i) => Ok(*i), Self::Ref(heap_id) => { if let HeapData::LongInt(li) = heap.get(*heap_id) { li.to_i64().ok_or_else(ExcType::overflow_shift_count) } else { let msg = format!("'{}' object cannot be interpreted as an integer", self.py_type(heap)); Err(SimpleException::new_msg(ExcType::TypeError, msg).into()) } } _ => { let msg = format!("'{}' object cannot be interpreted as an integer", self.py_type(heap)); Err(SimpleException::new_msg(ExcType::TypeError, msg).into()) } } } /// Extracts an index value for sequence operations. /// /// Accepts `Int`, `Bool` (True=1, False=0), and `LongInt` (if it fits in i64). /// Returns a `TypeError` for other types with the container type name included. /// Returns an `IndexError` if the `LongInt` value is too large to use as an index. /// /// Note: The LongInt-to-i64 conversion path is defensive code. In normal execution, /// heap-allocated `LongInt` values always exceed i64 range because `LongInt::into_value()` /// automatically demotes i64-fitting values to `Value::Int`. However, this path could be /// reached via deserialization of crafted snapshot data. pub fn as_index(&self, heap: &Heap, container_type: Type) -> RunResult { match self { Self::Int(i) => Ok(*i), Self::Bool(b) => Ok(i64::from(*b)), Self::Ref(heap_id) => { if let HeapData::LongInt(li) = heap.get(*heap_id) { li.to_i64().ok_or_else(ExcType::index_error_int_too_large) } else { Err(ExcType::type_error_indices(container_type, self.py_type(heap))) } } _ => Err(ExcType::type_error_indices(container_type, self.py_type(heap))), } } /// Performs a binary bitwise operation on two values. /// /// Python only supports bitwise operations on integers (and bools, which coerce to int). /// Returns a `TypeError` if either operand is not an integer, bool, or LongInt. /// /// For shift operations: /// - Negative shift counts raise `ValueError` /// - Left shifts may produce LongInt results for large shifts /// - Right shifts with large counts return 0 (or -1 for negative numbers) pub fn py_bitwise( &self, other: &Self, op: BitwiseOp, heap: &mut Heap, ) -> Result { // Capture types for error messages let lhs_type = self.py_type(heap); let rhs_type = other.py_type(heap); // Extract BigInt from all numeric types let lhs_bigint = extract_bigint(self, heap); let rhs_bigint = extract_bigint(other, heap); if let (Some(l), Some(r)) = (lhs_bigint, rhs_bigint) { let result = match op { BitwiseOp::And => l & r, BitwiseOp::Or => l | r, BitwiseOp::Xor => l ^ r, BitwiseOp::LShift => { // Get shift amount as i64 for validation let shift_amount = r.to_i64(); if let Some(shift) = shift_amount { if shift < 0 { return Err(ExcType::value_error_negative_shift_count()); } // Python allows arbitrarily large left shifts - use BigInt's shift // Safety: shift >= 0 is guaranteed by the check above #[expect(clippy::cast_sign_loss)] let shift_u64 = shift as u64; // Check size before computing to prevent DoS check_lshift_size(l.bits(), shift_u64, heap.tracker())?; l << shift_u64 } else if r.sign() == num_bigint::Sign::Minus { return Err(ExcType::value_error_negative_shift_count()); } else { // Shift amount too large to fit in i64 - this would be astronomically large return Err(ExcType::overflow_shift_count()); } } BitwiseOp::RShift => { // Get shift amount as i64 for validation let shift_amount = r.to_i64(); if let Some(shift) = shift_amount { if shift < 0 { return Err(ExcType::value_error_negative_shift_count()); } // Safety: shift >= 0 is guaranteed by the check above #[expect(clippy::cast_sign_loss)] let shift_u64 = shift as u64; l >> shift_u64 } else if r.sign() == num_bigint::Sign::Minus { return Err(ExcType::value_error_negative_shift_count()); } else { // Shift amount too large - result is 0 or -1 depending on sign if l.sign() == num_bigint::Sign::Minus { BigInt::from(-1) } else { BigInt::from(0) } } } }; // Convert result back to Value, demoting to i64 if it fits LongInt::new(result).into_value(heap).map_err(Into::into) } else { Err(ExcType::binary_type_error(op.as_str(), lhs_type, rhs_type)) } } /// Clones an value with proper heap reference counting. /// /// For immediate values (Int, Bool, None, etc.), this performs a simple copy. /// For heap-allocated values (Ref variant), this increments the reference count /// and returns a new reference to the same heap value. /// /// Takes `ContainsHeap` to allow directly passing the `VM` in many contexts. Where /// borrow checking creates conflicts, it may be preferred to pass `&Heap` directly /// (e.g. as `vm.heap` / `self.heap` etc.). /// /// # Important /// This method MUST be used instead of the derived `Clone` implementation to ensure /// proper reference counting. Using `.clone()` directly will bypass reference counting /// and cause memory leaks or double-frees. #[must_use] pub fn clone_with_heap(&self, heap: &impl ContainsHeap) -> Self { match self { Self::Ref(id) => { heap.heap().inc_ref(*id); Self::Ref(*id) } // Immediate values can be copied without heap interaction other => other.clone_immediate(), } } /// Drops an value, decrementing its heap reference count if applicable. /// /// For immediate values, this is a no-op. For heap-allocated values (Ref variant), /// this decrements the reference count and frees the value (and any children) when /// the count reaches zero. For Closure variants, this decrements ref counts on all /// captured cells. /// /// Takes `ContainsHeap` to allow directly passing the `VM` in many contexts. Where /// borrow checking creates conflicts, it may be preferred to pass `&mut Heap` directly /// (e.g. as `vm.heap` / `self.heap` etc.). /// /// # Important /// This method MUST be called before overwriting a namespace slot or discarding /// a value to prevent memory leaks. #[cfg(not(feature = "ref-count-panic"))] #[inline] pub fn drop_with_heap(self, heap: &mut impl ContainsHeap) { if let Self::Ref(id) = self { heap.heap_mut().dec_ref(id); } } /// With `ref-count-panic` enabled, `Ref` variants are replaced with `Dereferenced` and /// the original is forgotten to prevent the Drop impl from panicking. Non-Ref variants /// are left unchanged since they don't trigger the Drop panic. #[cfg(feature = "ref-count-panic")] pub fn drop_with_heap(mut self, heap: &mut impl ContainsHeap) { let old = std::mem::replace(&mut self, Self::Dereferenced); if let Self::Ref(id) = &old { heap.heap_mut().dec_ref(*id); std::mem::forget(old); } } /// Internal helper for copying immediate values without heap interaction. /// /// This method should only be called by `clone_with_heap()` for immediate values. /// Attempting to clone a Ref variant will panic. pub fn clone_immediate(&self) -> Self { match self { Self::Undefined => Self::Undefined, Self::Ellipsis => Self::Ellipsis, Self::None => Self::None, Self::Bool(b) => Self::Bool(*b), Self::Int(v) => Self::Int(*v), Self::Float(v) => Self::Float(*v), Self::Builtin(b) => Self::Builtin(*b), Self::ModuleFunction(mf) => Self::ModuleFunction(*mf), Self::DefFunction(f) => Self::DefFunction(*f), Self::ExtFunction(f) => Self::ExtFunction(*f), Self::InternString(s) => Self::InternString(*s), Self::InternBytes(b) => Self::InternBytes(*b), Self::InternLongInt(bi) => Self::InternLongInt(*bi), Self::Marker(m) => Self::Marker(*m), Self::Property(p) => Self::Property(*p), Self::ExternalFuture(call_id) => Self::ExternalFuture(*call_id), Self::Ref(_) => panic!("Ref clones must go through clone_with_heap to maintain refcounts"), #[cfg(feature = "ref-count-panic")] Self::Dereferenced => panic!("Cannot copy Dereferenced object"), } } /// Mark as Dereferenced to prevent Drop panic /// /// This should be called from `py_dec_ref_ids` methods only #[cfg(feature = "ref-count-panic")] pub fn dec_ref_forget(&mut self) { let old = std::mem::replace(self, Self::Dereferenced); std::mem::forget(old); } /// Pushes any contained `HeapId` onto the stack for reference counting. /// /// For `Value::Ref` variants, pushes the heap ID so the referenced object's /// refcount can be decremented. When `ref-count-panic` is enabled, also marks /// this value as `Dereferenced` to prevent Drop panics. pub fn py_dec_ref_ids(&mut self, stack: &mut Vec) { if let Self::Ref(id) = self { stack.push(*id); #[cfg(feature = "ref-count-panic")] self.dec_ref_forget(); } } /// Converts the value into a keyword string representation if possible. /// /// Returns `Some(KeywordStr)` for `InternString` values or heap `str` /// objects, otherwise returns `None`. pub fn as_either_str(&self, heap: &Heap) -> Option { match self { Self::InternString(id) => Some(EitherStr::Interned(*id)), Self::Ref(heap_id) => match heap.get(*heap_id) { HeapData::Str(s) => Some(EitherStr::Heap(s.as_str().to_owned())), _ => None, }, _ => None, } } /// check if the value is a string. pub fn is_str(&self, heap: &Heap) -> bool { match self { Self::InternString(_) => true, Self::Ref(heap_id) => matches!(heap.get(*heap_id), HeapData::Str(_)), _ => false, } } } /// Interned or heap-owned string identifier. /// /// Used when a string value can come from either the intern table (for known /// static strings and keywords) or from a heap-allocated Python string object. #[derive(Debug, Clone, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) enum EitherStr { /// Interned string identifier (cheap comparisons and no allocation). Interned(StringId), /// Heap-owned string extracted from a `str` object. Heap(String), } impl From for EitherStr { fn from(id: StringId) -> Self { Self::Interned(id) } } impl From for EitherStr { fn from(s: StaticStrings) -> Self { Self::Interned(s.into()) } } /// Convert String to EitherStr: use Interned for known static strings, /// otherwise use Heap for user-defined field names. impl From for EitherStr { fn from(s: String) -> Self { match StaticStrings::from_str(&s) { Ok(s) => s.into(), Err(_) => Self::Heap(s), } } } impl EitherStr { /// Returns the keyword as a str slice for error messages or comparisons. pub fn as_str<'a>(&'a self, interns: &'a Interns) -> &'a str { match self { Self::Interned(id) => interns.get_str(*id), Self::Heap(s) => s.as_str(), } } /// Checks whether this keyword matches the given interned identifier. pub fn matches(&self, target: StringId, interns: &Interns) -> bool { match self { Self::Interned(id) => *id == target, Self::Heap(s) => s == interns.get_str(target), } } /// Returns the `StringId` if this is an interned attribute. #[inline] pub fn string_id(&self) -> Option { match self { Self::Interned(id) => Some(*id), Self::Heap(_) => None, } } /// Returns the `StaticStrings` if this is an interned attribute from `StaticStrings`s. #[inline] pub fn static_string(&self) -> Option { match self { Self::Interned(id) => StaticStrings::from_string_id(*id), Self::Heap(_) => None, } } /// Converts this `EitherStr` into an owned `String`. /// /// For interned strings, looks up and clones the string content. /// For heap strings, returns the owned string directly. pub fn into_string(self, interns: &Interns) -> String { match self { Self::Interned(id) => interns.get_str(id).to_owned(), Self::Heap(s) => s, } } pub fn py_estimate_size(&self) -> usize { match self { Self::Interned(_) => 0, Self::Heap(s) => s.capacity(), } } } /// Bitwise operation type for `py_bitwise`. #[derive(Debug, Clone, Copy)] pub enum BitwiseOp { And, Or, Xor, LShift, RShift, } impl BitwiseOp { /// Returns the operator symbol for error messages. pub fn as_str(self) -> &'static str { match self { Self::And => "&", Self::Or => "|", Self::Xor => "^", Self::LShift => "<<", Self::RShift => ">>", } } } /// Marker values for special objects that exist but have minimal functionality. /// /// These are used for: /// - System objects like `sys.stdout` and `sys.stderr` that need to exist but don't /// provide functionality in the sandboxed environment /// - Typing constructs from the `typing` module that are imported for type hints but /// don't need runtime functionality /// /// Wraps a `StaticStrings` variant to leverage its string conversion capabilities. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub(crate) struct Marker(pub StaticStrings); impl Marker { /// Returns the Python type of this marker. /// /// System markers (stdout, stderr) are `TextIOWrapper`. /// `typing.Union` has type `type` (matching CPython). /// Other typing markers (Any, Optional, etc.) are `_SpecialForm`. pub(crate) fn py_type(self) -> Type { match self.0 { StaticStrings::Stdout | StaticStrings::Stderr => Type::TextIOWrapper, StaticStrings::UnionType => Type::Type, _ => Type::SpecialForm, } } /// Writes the Python repr for this marker. /// /// System markers have special repr formats ("", ""). /// `typing.Union` uses `` format (matching CPython). /// Other typing markers are prefixed with "typing." (e.g., "typing.Any"). fn py_repr_fmt(self, f: &mut impl Write) -> fmt::Result { let s: &'static str = self.0.into(); match self.0 { StaticStrings::Stdout => f.write_str("")?, StaticStrings::Stderr => f.write_str("")?, StaticStrings::UnionType => f.write_str("")?, _ => write!(f, "typing.{s}")?, } Ok(()) } } /// High-bit tag reserved for literal singletons (None, Ellipsis, booleans). const SINGLETON_ID_TAG: usize = 1usize << (usize::BITS - 1); /// High-bit tag reserved for interned string `id()` values. const INTERN_STR_ID_TAG: usize = 1usize << (usize::BITS - 2); /// High-bit tag reserved for interned bytes `id()` values to avoid colliding with any other space. const INTERN_BYTES_ID_TAG: usize = 1usize << (usize::BITS - 3); /// High-bit tag reserved for heap-backed `HeapId`s. const HEAP_ID_TAG: usize = 1usize << (usize::BITS - 4); /// Mask that keeps pointer-derived bits below the bytes tag bit. const INTERN_BYTES_ID_MASK: usize = INTERN_BYTES_ID_TAG - 1; /// Mask that keeps pointer-derived bits below the string tag bit. const INTERN_STR_ID_MASK: usize = INTERN_STR_ID_TAG - 1; /// Mask that keeps per-singleton offsets below the singleton tag bit. const SINGLETON_ID_MASK: usize = SINGLETON_ID_TAG - 1; /// Mask that keeps heap value IDs below the heap tag bit. const HEAP_ID_MASK: usize = HEAP_ID_TAG - 1; /// High-bit tag for Int value-based IDs (no heap allocation needed). const INT_ID_TAG: usize = 1usize << (usize::BITS - 5); /// High-bit tag for Float value-based IDs. const FLOAT_ID_TAG: usize = 1usize << (usize::BITS - 6); /// High-bit tag for Callable value-based IDs. const BUILTIN_ID_TAG: usize = 1usize << (usize::BITS - 7); /// High-bit tag for Function value-based IDs. const FUNCTION_ID_TAG: usize = 1usize << (usize::BITS - 8); /// High-bit tag for External Function value-based IDs. const EXTFUNCTION_ID_TAG: usize = 1usize << (usize::BITS - 9); /// High-bit tag for Marker value-based IDs (stdout, stderr, etc.). const MARKER_ID_TAG: usize = 1usize << (usize::BITS - 10); /// High-bit tag for ExternalFuture value-based IDs. const EXTERNAL_FUTURE_ID_TAG: usize = 1usize << (usize::BITS - 11); /// High-bit tag for ModuleFunction value-based IDs. const MODULE_FUNCTION_ID_TAG: usize = 1usize << (usize::BITS - 12); /// High-bit tag for interned LongInt `id()` values. const INTERN_LONG_INT_ID_TAG: usize = 1usize << (usize::BITS - 13); /// High-bit tag for Property value-based IDs. const PROPERTY_ID_TAG: usize = 1usize << (usize::BITS - 14); /// Masks for value-based ID tags (keep bits below the tag bit). const INT_ID_MASK: usize = INT_ID_TAG - 1; const FLOAT_ID_MASK: usize = FLOAT_ID_TAG - 1; const BUILTIN_ID_MASK: usize = BUILTIN_ID_TAG - 1; const FUNCTION_ID_MASK: usize = FUNCTION_ID_TAG - 1; const EXTFUNCTION_ID_MASK: usize = EXTFUNCTION_ID_TAG - 1; const MARKER_ID_MASK: usize = MARKER_ID_TAG - 1; const EXTERNAL_FUTURE_ID_MASK: usize = EXTERNAL_FUTURE_ID_TAG - 1; const MODULE_FUNCTION_ID_MASK: usize = MODULE_FUNCTION_ID_TAG - 1; const INTERN_LONG_INT_ID_MASK: usize = INTERN_LONG_INT_ID_TAG - 1; const PROPERTY_ID_MASK: usize = PROPERTY_ID_TAG - 1; /// Enumerates singleton literal slots so we can issue stable `id()` values without heap allocation. #[repr(usize)] #[derive(Copy, Clone)] enum SingletonSlot { Undefined = 0, Ellipsis = 1, None = 2, False = 3, True = 4, } /// Returns the fully tagged `id()` value for the requested singleton literal. #[inline] const fn singleton_id(slot: SingletonSlot) -> usize { SINGLETON_ID_TAG | ((slot as usize) & SINGLETON_ID_MASK) } /// Computes Python-style floor division and modulo. /// /// Python's division rounds toward negative infinity (floor division), /// and the remainder has the same sign as the divisor. /// This differs from Rust's truncating division. /// /// Returns `None` on overflow (i64::MIN / -1 doesn't fit in i64). pub(crate) fn floor_divmod(a: i64, b: i64) -> Option<(i64, i64)> { let quot = a.checked_div(b)?; let rem = a.checked_rem(b)?; if rem != 0 && (rem < 0) != (b < 0) { Some((quot - 1, rem + b)) } else { Some((quot, rem)) } } /// Converts a heap `HeapId` into its tagged `id()` value, ensuring it never collides with other spaces. #[inline] pub fn heap_tagged_id(heap_id: HeapId) -> usize { HEAP_ID_TAG | (heap_id.index() & HEAP_ID_MASK) } /// Computes a deterministic ID for an i64 integer value. /// Uses the value's hash combined with a type tag to ensure uniqueness across types. #[inline] fn int_value_id(value: i64) -> usize { let mut hasher = DefaultHasher::new(); value.hash(&mut hasher); let hash_u64 = hasher.finish(); // Mask to usize range before conversion to handle 32-bit platforms let masked = hash_u64 & (usize::MAX as u64); let hash_usize = usize::try_from(masked).expect("masked value fits in usize"); INT_ID_TAG | (hash_usize & INT_ID_MASK) } /// Computes a deterministic ID for an f64 float value. /// Uses the bit representation's hash for consistency (handles NaN, infinities, etc.). #[inline] fn float_value_id(value: f64) -> usize { let mut hasher = DefaultHasher::new(); value.to_bits().hash(&mut hasher); let hash_u64 = hasher.finish(); // Mask to usize range before conversion to handle 32-bit platforms let masked = hash_u64 & (usize::MAX as u64); let hash_usize = usize::try_from(masked).expect("masked value fits in usize"); FLOAT_ID_TAG | (hash_usize & FLOAT_ID_MASK) } /// Computes a deterministic ID for a builtin based on its discriminant. #[inline] fn builtin_value_id(b: Builtins) -> usize { let mut hasher = DefaultHasher::new(); b.hash(&mut hasher); let hash_u64 = hasher.finish(); // wrapping here is fine #[expect(clippy::cast_possible_truncation)] let hash_usize = hash_u64 as usize; BUILTIN_ID_TAG | (hash_usize & BUILTIN_ID_MASK) } /// Computes a deterministic ID for a function based on its id. #[inline] fn function_value_id(f_id: FunctionId) -> usize { FUNCTION_ID_TAG | (f_id.index() & FUNCTION_ID_MASK) } /// Computes a deterministic ID for an external function based on its interned name. #[inline] fn ext_function_value_id(name_id: StringId) -> usize { EXTFUNCTION_ID_TAG | (name_id.index() & EXTFUNCTION_ID_MASK) } /// Computes a deterministic ID for a marker value based on its discriminant. #[inline] fn marker_value_id(m: Marker) -> usize { MARKER_ID_TAG | ((m.0 as usize) & MARKER_ID_MASK) } /// Computes a deterministic ID for a property value based on its discriminant. #[inline] fn property_value_id(p: Property) -> usize { let discriminant = match p { Property::Os(os_fn) => os_fn as usize, }; PROPERTY_ID_TAG | (discriminant & PROPERTY_ID_MASK) } /// Computes a deterministic ID for an external future based on its call ID. #[inline] fn external_future_value_id(call_id: CallId) -> usize { EXTERNAL_FUTURE_ID_TAG | ((call_id.raw() as usize) & EXTERNAL_FUTURE_ID_MASK) } /// Computes a deterministic ID for a module function based on its discriminant. #[inline] fn module_function_value_id(mf: ModuleFunctions) -> usize { let mut hasher = DefaultHasher::new(); mf.hash(&mut hasher); let hash_u64 = hasher.finish(); // wrapping here is fine #[expect(clippy::cast_possible_truncation)] let hash_usize = hash_u64 as usize; MODULE_FUNCTION_ID_TAG | (hash_usize & MODULE_FUNCTION_ID_MASK) } /// Converts an i64 repeat count to usize, handling negative values and overflow. /// /// Returns 0 for negative values (Python treats negative repeat counts as 0). /// Returns `OverflowError` if the value exceeds `usize::MAX`. #[inline] fn i64_to_repeat_count(n: i64) -> RunResult { if n <= 0 { Ok(0) } else { usize::try_from(n).map_err(|_| ExcType::overflow_repeat_count().into()) } } /// Converts a LongInt repeat count to usize, handling negative values and overflow. /// /// Returns 0 for negative values (Python treats negative repeat counts as 0). /// Returns `OverflowError` if the value exceeds `usize::MAX`. #[inline] fn longint_to_repeat_count(li: &LongInt) -> RunResult { if li.is_negative() { Ok(0) } else if let Some(count) = li.to_usize() { Ok(count) } else { Err(ExcType::overflow_repeat_count().into()) } } /// Extracts a BigInt from a Value for bitwise operations. /// /// Returns `Some(BigInt)` for Int, Bool, and LongInt values. /// Returns `None` for other types (Float, Str, etc.). fn extract_bigint(value: &Value, heap: &Heap) -> Option { match value { Value::Int(i) => Some(BigInt::from(*i)), Value::Bool(b) => Some(BigInt::from(i64::from(*b))), Value::Ref(id) => { if let HeapData::LongInt(li) = heap.get(*id) { Some(li.inner().clone()) } else { None } } _ => None, } } /// Extracts and clones the `(key, value)` probe accepted by `dict_items.__contains__`. /// /// CPython treats only 2-tuples as valid probes for items-view membership. Monty /// also accepts namedtuples of length two so tuple-like runtime values behave /// sensibly even though namedtuples are not modeled as a true tuple subclass. fn cloned_items_view_candidate(item: &Value, heap: &impl ContainsHeap) -> Option<(Value, Value)> { let Value::Ref(heap_id) = item else { return None; }; match heap.heap().get(*heap_id) { HeapData::Tuple(tuple) => { let items = tuple.as_slice(); if items.len() == 2 { Some((items[0].clone_with_heap(heap), items[1].clone_with_heap(heap))) } else { None } } HeapData::NamedTuple(namedtuple) => { let items = namedtuple.as_vec(); if items.len() == 2 { Some((items[0].clone_with_heap(heap), items[1].clone_with_heap(heap))) } else { None } } _ => None, } } /// Helper for substring containment check in strings. /// /// Called by `py_contains` when the container is a string. /// The item must also be a string (either interned or heap-allocated). fn str_contains( container_str: &str, item: &Value, heap: &mut Heap, interns: &Interns, ) -> RunResult { match item { Value::InternString(item_id) => { let item_str = interns.get_str(*item_id); Ok(container_str.contains(item_str)) } Value::Ref(item_heap_id) => { if let HeapData::Str(item_str) = heap.get(*item_heap_id) { Ok(container_str.contains(item_str.as_str())) } else { Err(ExcType::type_error("'in ' requires string as left operand")) } } _ => Err(ExcType::type_error("'in ' requires string as left operand")), } } /// Computes the number of significant bits in an i64. /// /// Returns 0 for 0, otherwise returns ceil(log2(|value|)) + 1 (accounting for sign). /// For example: 0 -> 0, 1 -> 1, 2 -> 2, 255 -> 8, 256 -> 9. fn i64_bits(value: i64) -> u64 { if value == 0 { 0 } else { // For negative numbers, use unsigned_abs to get magnitude u64::from(64 - value.unsigned_abs().leading_zeros()) } } /// Computes BigInt exponentiation for exponents larger than u32::MAX. /// /// Uses repeated squaring for efficiency. This is needed when the exponent /// doesn't fit in a u32, which is required by the `num-bigint` pow method. fn bigint_pow(base: BigInt, exp: u64) -> BigInt { if exp == 0 { return BigInt::from(1); } if exp == 1 { return base; } // Use repeated squaring let mut result = BigInt::from(1); let mut b = base; let mut e = exp; while e > 0 { if e & 1 == 1 { result *= &b; } b = &b * &b; e >>= 1; } result } #[cfg(test)] mod tests { use num_bigint::BigInt; use super::*; use crate::resource::NoLimitTracker; /// Creates a heap and directly allocates a LongInt with the given BigInt value. /// /// This bypasses `LongInt::into_value()` which would demote i64-fitting values. /// Used to test defensive code paths that handle LongInt-as-index scenarios. fn create_heap_with_longint(value: BigInt) -> (Heap, HeapId) { let mut heap = Heap::new(16, NoLimitTracker); let long_int = LongInt::new(value); let heap_id = heap.allocate(HeapData::LongInt(long_int)).unwrap(); (heap, heap_id) } /// Tests that `as_index()` correctly handles a LongInt containing an i64-fitting value. /// /// This tests a defensive code path that's normally unreachable because /// `LongInt::into_value()` demotes i64-fitting values to `Value::Int`. /// However, this path could be reached via deserialization of crafted data. #[test] fn as_index_longint_fits_in_i64() { let (mut heap, heap_id) = create_heap_with_longint(BigInt::from(42)); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert_eq!(result.unwrap(), 42); value.drop_with_heap(&mut heap); } /// Tests that `as_index()` correctly handles a negative LongInt that fits in i64. #[test] fn as_index_longint_negative_fits_in_i64() { let (mut heap, heap_id) = create_heap_with_longint(BigInt::from(-100)); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert_eq!(result.unwrap(), -100); value.drop_with_heap(&mut heap); } /// Tests that `as_index()` returns IndexError for LongInt values too large for i64. #[test] fn as_index_longint_too_large() { // 2^100 is way larger than i64::MAX let big_value = BigInt::from(2).pow(100); let (mut heap, heap_id) = create_heap_with_longint(big_value); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert!(result.is_err()); value.drop_with_heap(&mut heap); } /// Tests that `as_int()` correctly handles a LongInt containing an i64-fitting value. /// /// Similar to `as_index`, this tests a defensive code path normally unreachable. #[test] fn as_int_longint_fits_in_i64() { let (mut heap, heap_id) = create_heap_with_longint(BigInt::from(12345)); let value = Value::Ref(heap_id); let result = value.as_int(&heap); assert_eq!(result.unwrap(), 12345); value.drop_with_heap(&mut heap); } /// Tests that `as_int()` returns an error for LongInt values too large for i64. #[test] fn as_int_longint_too_large() { let big_value = BigInt::from(2).pow(100); let (mut heap, heap_id) = create_heap_with_longint(big_value); let value = Value::Ref(heap_id); let result = value.as_int(&heap); assert!(result.is_err()); value.drop_with_heap(&mut heap); } /// Tests boundary values: i64::MAX as a LongInt. #[test] fn as_index_longint_at_i64_max() { let (mut heap, heap_id) = create_heap_with_longint(BigInt::from(i64::MAX)); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert_eq!(result.unwrap(), i64::MAX); value.drop_with_heap(&mut heap); } /// Tests boundary values: i64::MIN as a LongInt. #[test] fn as_index_longint_at_i64_min() { let (mut heap, heap_id) = create_heap_with_longint(BigInt::from(i64::MIN)); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert_eq!(result.unwrap(), i64::MIN); value.drop_with_heap(&mut heap); } /// Tests boundary values: i64::MAX + 1 as a LongInt (should fail). #[test] fn as_index_longint_just_over_i64_max() { let big_value = BigInt::from(i64::MAX) + BigInt::from(1); let (mut heap, heap_id) = create_heap_with_longint(big_value); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert!(result.is_err()); value.drop_with_heap(&mut heap); } /// Tests boundary values: i64::MIN - 1 as a LongInt (should fail). #[test] fn as_index_longint_just_under_i64_min() { let big_value = BigInt::from(i64::MIN) - BigInt::from(1); let (mut heap, heap_id) = create_heap_with_longint(big_value); let value = Value::Ref(heap_id); let result = value.as_index(&heap, Type::List); assert!(result.is_err()); value.drop_with_heap(&mut heap); } } ================================================ FILE: crates/monty/test_cases/args__dict_get_no_args.py ================================================ x = {} x.get() # Raise=TypeError('get expected at least 1 argument, got 0') ================================================ FILE: crates/monty/test_cases/args__dict_get_too_many.py ================================================ x = {} x.get(1, 2, 3) # Raise=TypeError('get expected at most 2 arguments, got 3') ================================================ FILE: crates/monty/test_cases/args__dict_items_with_args.py ================================================ x = {} x.items(1) # Raise=TypeError('dict.items() takes no arguments (1 given)') ================================================ FILE: crates/monty/test_cases/args__dict_keys_with_args.py ================================================ x = {} x.keys(1) # Raise=TypeError('dict.keys() takes no arguments (1 given)') ================================================ FILE: crates/monty/test_cases/args__dict_pop_no_args.py ================================================ x = {} x.pop() # Raise=TypeError('pop expected at least 1 argument, got 0') ================================================ FILE: crates/monty/test_cases/args__dict_pop_too_many.py ================================================ x = {} x.pop(1, 2, 3) # Raise=TypeError('pop expected at most 2 arguments, got 3') ================================================ FILE: crates/monty/test_cases/args__dict_values_with_args.py ================================================ x = {} x.values(1) # Raise=TypeError('dict.values() takes no arguments (1 given)') ================================================ FILE: crates/monty/test_cases/args__id_too_many.py ================================================ id(1, 2) # Raise=TypeError('id() takes exactly one argument (2 given)') ================================================ FILE: crates/monty/test_cases/args__len_no_args.py ================================================ len() # Raise=TypeError('len() takes exactly one argument (0 given)') ================================================ FILE: crates/monty/test_cases/args__len_too_many.py ================================================ len(1, 2) # Raise=TypeError('len() takes exactly one argument (2 given)') ================================================ FILE: crates/monty/test_cases/args__len_type_error_int.py ================================================ len(42) """ TRACEBACK: Traceback (most recent call last): File "args__len_type_error_int.py", line 1, in len(42) ~~~~~~~ TypeError: object of type 'int' has no len() """ ================================================ FILE: crates/monty/test_cases/args__len_type_error_none.py ================================================ len(None) """ TRACEBACK: Traceback (most recent call last): File "args__len_type_error_none.py", line 1, in len(None) ~~~~~~~~~ TypeError: object of type 'NoneType' has no len() """ ================================================ FILE: crates/monty/test_cases/args__list_append_no_args.py ================================================ x = [] x.append() # Raise=TypeError('list.append() takes exactly one argument (0 given)') ================================================ FILE: crates/monty/test_cases/args__list_append_too_many.py ================================================ x = [] x.append(1, 2) # Raise=TypeError('list.append() takes exactly one argument (2 given)') ================================================ FILE: crates/monty/test_cases/args__list_insert_too_few.py ================================================ x = [] x.insert(1) # Raise=TypeError('insert expected 2 arguments, got 1') ================================================ FILE: crates/monty/test_cases/args__list_insert_too_many.py ================================================ x = [] x.insert(1, 2, 3) # Raise=TypeError('insert expected 2 arguments, got 3') ================================================ FILE: crates/monty/test_cases/args__repr_no_args.py ================================================ repr() # Raise=TypeError('repr() takes exactly one argument (0 given)') ================================================ FILE: crates/monty/test_cases/arith__div_zero_float.py ================================================ 1.0 / 0.0 # Raise=ZeroDivisionError('division by zero') ================================================ FILE: crates/monty/test_cases/arith__div_zero_int.py ================================================ 1 / 0 # Raise=ZeroDivisionError('division by zero') ================================================ FILE: crates/monty/test_cases/arith__floordiv_zero_float.py ================================================ 1.0 // 0.0 # Raise=ZeroDivisionError('division by zero') ================================================ FILE: crates/monty/test_cases/arith__floordiv_zero_int.py ================================================ 1 // 0 # Raise=ZeroDivisionError('division by zero') ================================================ FILE: crates/monty/test_cases/arith__pow_zero_neg.py ================================================ 0**-1 # Raise=ZeroDivisionError('zero to a negative power') ================================================ FILE: crates/monty/test_cases/arith__pow_zero_neg_builtin.py ================================================ pow(0, -1) """ TRACEBACK: Traceback (most recent call last): File "arith__pow_zero_neg_builtin.py", line 1, in pow(0, -1) ~~~~~~~~~~ ZeroDivisionError: zero to a negative power """ ================================================ FILE: crates/monty/test_cases/assert__expr_fail.py ================================================ assert 1 == 2 # Raise=AssertionError() ================================================ FILE: crates/monty/test_cases/assert__fail.py ================================================ assert False # Raise=AssertionError() ================================================ FILE: crates/monty/test_cases/assert__fail_msg.py ================================================ assert False, 'custom message' # Raise=AssertionError('custom message') ================================================ FILE: crates/monty/test_cases/assert__fn_fail.py ================================================ # fmt: off assert(0) # Raise=AssertionError() ================================================ FILE: crates/monty/test_cases/assert__ops.py ================================================ # Tests for assert statements that pass (failure cases are in separate files) # === Basic assert === assert True, 'basic assert True' # === Assert with expression === assert 1 == 1, 'assert equality expression' # === Assert with function call style (assert is statement, not function) === # fmt: off assert(123) # fmt: on ================================================ FILE: crates/monty/test_cases/async__asyncio_run.py ================================================ import asyncio # === Basic asyncio.run === async def simple(): return 42 result = asyncio.run(simple()) assert result == 42, f'basic asyncio.run failed: {result}' # === With arguments === async def add(a, b): return a + b result = asyncio.run(add(10, 20)) assert result == 30, f'asyncio.run with args failed: {result}' # === Nested awaits inside the coroutine === async def inner(): return 'hello' async def outer(): val = await inner() return val + ' world' result = asyncio.run(outer()) assert result == 'hello world', f'nested awaits failed: {result}' # === asyncio.gather inside asyncio.run === async def double(x): return x * 2 async def run_gather(): results = await asyncio.gather(double(1), double(2), double(3)) return results result = asyncio.run(run_gather()) assert result == [2, 4, 6], f'gather inside run failed: {result}' ================================================ FILE: crates/monty/test_cases/async__basic.py ================================================ # run-async # Basic async function that returns a value async def foo(): return 123 result = await foo() # pyright: ignore assert result == 123, 'async function should return awaited value' ================================================ FILE: crates/monty/test_cases/async__closure.py ================================================ # run-async # Async function capturing variables from enclosing scope def make_adder(n): async def adder(x): return x + n return adder add_five = make_adder(5) result = await add_five(10) # pyright: ignore assert result == 15, 'async closure should capture variables' ================================================ FILE: crates/monty/test_cases/async__double_await_coroutine.py ================================================ # run-async async def foo(): return 1 coro = foo() await coro # pyright: ignore await coro # pyright: ignore """ TRACEBACK: Traceback (most recent call last): File "async__double_await_coroutine.py", line 8, in await coro # pyright: ignore ~~~~~~~~~~ RuntimeError: cannot reuse already awaited coroutine """ ================================================ FILE: crates/monty/test_cases/async__exception.py ================================================ # run-async # Test that exceptions in async functions propagate correctly async def raises_error(): raise ValueError('async error') await raises_error() # pyright: ignore # Raise=ValueError('async error') ================================================ FILE: crates/monty/test_cases/async__ext_call.py ================================================ # call-external # run-async # Test async external function calls (coroutines) # === Basic async external call === result = await async_call(42) # pyright: ignore assert result == 42, 'async_call should return awaited value' # === Async call with string === s = await async_call('hello') # pyright: ignore assert s == 'hello', 'async_call should work with strings' # === Async call with list === lst = await async_call([1, 2, 3]) # pyright: ignore assert lst == [1, 2, 3], 'async_call should work with lists' # === Multiple async calls === a = await async_call(10) # pyright: ignore b = await async_call(20) # pyright: ignore assert a + b == 30, 'multiple async calls should work' # === Gather multiple external async calls === import asyncio results = await asyncio.gather(async_call(1), async_call(2), async_call(3)) # pyright: ignore assert results == [1, 2, 3], 'gather should collect external async results in order' # === Gather with mixed external calls === results = await asyncio.gather(async_call('a'), async_call('b')) # pyright: ignore assert results == ['a', 'b'], 'gather should work with string returns' # === Gather mixing coroutines and external futures === async def add(a, b): return a + b async def multiply(a, b): return a * b # Mix: coroutine first, external future second results = await asyncio.gather(add(1, 2), async_call(10)) # pyright: ignore assert results == [3, 10], 'gather should work with coroutine then external future' # Mix: external future first, coroutine second results = await asyncio.gather(async_call(20), multiply(3, 4)) # pyright: ignore assert results == [20, 12], 'gather should work with external future then coroutine' # Mix: multiple of each interleaved results = await asyncio.gather(add(5, 5), async_call('x'), multiply(2, 3), async_call('y')) # pyright: ignore assert results == [10, 'x', 6, 'y'], 'gather should handle interleaved coroutines and external futures' # === Coroutine with nested external awaits === async def double_external(x): val = await async_call(x) return val * 2 results = await asyncio.gather(double_external(5), async_call(100)) # pyright: ignore assert results == [10, 100], 'gather should work with coroutine that awaits external' # === Coroutine with multiple nested awaits === async def triple_add(a, b, c): x = await async_call(a) y = await async_call(b) return x + y + c results = await asyncio.gather(triple_add(1, 2, 3), async_call(50)) # pyright: ignore assert results == [6, 50], 'gather should work with coroutine with multiple external awaits' ================================================ FILE: crates/monty/test_cases/async__gather_all.py ================================================ # run-async import asyncio # === Basic gather === async def task1(): return 1 async def task2(): return 2 result = await asyncio.gather(task1(), task2()) # pyright: ignore assert result == [1, 2], 'gather should return results as a list' # === Result ordering === # Results should be in argument order, not completion order async def slow(): return 'slow' async def fast(): return 'fast' result = await asyncio.gather(slow(), fast()) # pyright: ignore assert result == ['slow', 'fast'], 'gather should preserve argument order' # === Empty gather === result = await asyncio.gather() # pyright: ignore assert result == [], 'empty gather should return empty list' # === Single coroutine === async def single(): return 42 result = await asyncio.gather(single()) # pyright: ignore assert result == [42], 'gather with single coroutine should return list with one element' # === repr of gather function === r = repr(asyncio.gather) assert r.startswith(' await 123 # pyright: ignore ~~~~~~~~~ TypeError: 'int' object can't be awaited """ ================================================ FILE: crates/monty/test_cases/async__not_imported.py ================================================ # run-async async def foo(): return 1 await asyncio.gather(foo(), foo()) # pyright: ignore """ TRACEBACK: Traceback (most recent call last): File "async__not_imported.py", line 6, in await asyncio.gather(foo(), foo()) # pyright: ignore ~~~~~~~ NameError: name 'asyncio' is not defined. Did you forget to import 'asyncio'? """ ================================================ FILE: crates/monty/test_cases/async__recursion_depth_isolation.py ================================================ # call-external # run-async # Test that recursion depth is per-task, not global. # # With a recursion limit of 50, a gathered task that recurses 40 deep # should NOT eat into another task's budget. Without per-task depth # tracking, the second task inherits the first task's depth and hits # the limit prematurely. import asyncio async def recurse_then_call(n): """Recurse n levels deep, then make an external call at the bottom.""" if n == 0: return await async_call('done') return await recurse_then_call(n - 1) # Each task recurses 40 deep independently. # With a global depth counter, the second task would start at depth 40 # and blow the limit at depth 80 (well above the 50 limit). # With correct per-task tracking, each task sees its own depth of 40. results = await asyncio.gather( # pyright: ignore recurse_then_call(40), recurse_then_call(40), ) assert results == ['done', 'done'], f'both tasks should complete: {results}' ================================================ FILE: crates/monty/test_cases/async__return_types.py ================================================ # run-async # Async functions returning different types async def return_int(): return 42 async def return_str(): return 'hello' async def return_list(): return [1, 2, 3] async def return_none(): pass i = await return_int() # pyright: ignore assert i == 42, 'should return int' s = await return_str() # pyright: ignore assert s == 'hello', 'should return str' lst = await return_list() # pyright: ignore assert lst == [1, 2, 3], 'should return list' n = await return_none() # pyright: ignore assert n is None, 'should return None implicitly' ================================================ FILE: crates/monty/test_cases/async__sequential.py ================================================ # run-async # Multiple sequential awaits async def get_value(x): return x * 2 a = await get_value(1) # pyright: ignore b = await get_value(2) # pyright: ignore c = await get_value(3) # pyright: ignore assert a == 2, 'first await' assert b == 4, 'second await' assert c == 6, 'third await' assert a + b + c == 12, 'sum of sequential awaits' ================================================ FILE: crates/monty/test_cases/async__traceback.py ================================================ # run-async # Test that exceptions in async functions produce correct tracebacks async def raises_error(): raise ValueError('async error') await raises_error() # pyright: ignore """ TRACEBACK: Traceback (most recent call last): File "async__traceback.py", line 9, in await raises_error() # pyright: ignore ~~~~~~~~~~~~~~~~~~~~ File "async__traceback.py", line 6, in raises_error raise ValueError('async error') ValueError: async error """ ================================================ FILE: crates/monty/test_cases/async__with_args.py ================================================ # run-async # Async function with arguments async def add(a, b): return a + b result = await add(10, 20) # pyright: ignore assert result == 30, 'async function should handle arguments' # With keyword arguments result2 = await add(a=5, b=15) # pyright: ignore assert result2 == 20, 'async function should handle keyword arguments' ================================================ FILE: crates/monty/test_cases/attr__get_int_error.py ================================================ x = 5 x.foo """ TRACEBACK: Traceback (most recent call last): File "attr__get_int_error.py", line 2, in x.foo AttributeError: 'int' object has no attribute 'foo' """ ================================================ FILE: crates/monty/test_cases/attr__get_list_error.py ================================================ x = [1, 2, 3] x.foo """ TRACEBACK: Traceback (most recent call last): File "attr__get_list_error.py", line 2, in x.foo AttributeError: 'list' object has no attribute 'foo' """ ================================================ FILE: crates/monty/test_cases/attr__set_frozen_nonfield.py ================================================ # call-external # Test that setting a non-field attribute on frozen dataclass raises error point = make_point() point.z = 42 """ TRACEBACK: Traceback (most recent call last): File "attr__set_frozen_nonfield.py", line 4, in point.z = 42 ~~~~~~~ FrozenInstanceError: cannot assign to field 'z' """ ================================================ FILE: crates/monty/test_cases/attr__set_int_error.py ================================================ x = 5 x.foo = 1 """ TRACEBACK: Traceback (most recent call last): File "attr__set_int_error.py", line 2, in x.foo = 1 ~~~~~ AttributeError: 'int' object has no attribute 'foo' and no __dict__ for setting new attributes """ ================================================ FILE: crates/monty/test_cases/attr__set_list_error.py ================================================ x = [1, 2, 3] x.foo = 1 """ TRACEBACK: Traceback (most recent call last): File "attr__set_list_error.py", line 2, in x.foo = 1 ~~~~~ AttributeError: 'list' object has no attribute 'foo' and no __dict__ for setting new attributes """ ================================================ FILE: crates/monty/test_cases/bench__kitchen_sink.py ================================================ # This test case is also used in the benchmark (benches/main.rs) # List operations my_list = [] my_list.append(1) my_list.append(2) my_list.insert(0, 0) list_len = len(my_list) list_item = my_list[1] # Dict operations my_dict = {} my_dict['a'] = 10 my_dict['b'] = 20 dict_val = my_dict['a'] popped = my_dict.pop('b') dict_len = len(my_dict) # Tuple operations my_tuple = (1, 2, 3) tuple_item = my_tuple[0] tuple_len = len(my_tuple) # String operations s = 'hello' s += ' world' str_len = len(s) # Function definition and call def add(x, y): return x + y func_result = add(3, 4) # For loop with if/elif/else total = 0 for i in range(10): if i < 3: total += 1 elif i < 6: total += 2 else: total += 3 # Boolean operators and comparisons flag = True and not False check = 1 < 2 and 3 > 2 identity = None is None not_identity = 1 is not None compare = 5 >= 5 and 5 <= 5 and 4 != 5 # Assert with message assert total > 0, 'total should be positive' # List comprehension squares = [x * x for x in range(10)] comp_sum = sum(squares) # Dict comprehension square_dict = {x: x * x for x in range(5)} dict_comp_sum = sum(square_dict.values()) # Final result result = list_len + list_item + dict_val + dict_len + tuple_item + tuple_len result += str_len + func_result + total + comp_sum + dict_comp_sum result # Return=373 ================================================ FILE: crates/monty/test_cases/bool__ops.py ================================================ # === Boolean 'and' operator === # returns first falsy value, or last value if all truthy assert (5 and 3) == 3, 'and truthy' assert (0 and 3) == 0, 'and falsy' assert (1 and 2 and 3) == 3, 'and chained' # === Boolean 'or' operator === # returns first truthy value, or last value if all falsy assert (5 or 3) == 5, 'or truthy' assert (0 or 3) == 3, 'or falsy' assert (0 or 0 or 3) == 3, 'or chained' # === Boolean 'not' operator === assert (not 5) == False, 'not truthy' assert (not 0) == True, 'not falsy' assert (not None) == True, 'not None' # === Complex boolean expressions === assert ((1 and 2) or (3 and 0)) == 2, 'complex and/or' assert (not (0 and 1)) == True, 'not and combined' ================================================ FILE: crates/monty/test_cases/builtin__add_type_error.py ================================================ len + 1 # Raise=TypeError("unsupported operand type(s) for +: 'builtin_function_or_method' and 'int'") ================================================ FILE: crates/monty/test_cases/builtin__filter.py ================================================ assert list(filter(None, [0, 1, False, True, '', 'hello'])) == [1, True, 'hello'], 'filter None removes falsy values' assert list(filter(None, [])) == [], 'filter None on empty list' assert list(filter(None, [0, 0, 0])) == [], 'filter None removes all zeros' assert list(filter(None, [1, 2, 3])) == [1, 2, 3], 'filter None keeps truthy values' assert list(filter(None, ['', '', 'x'])) == ['x'], 'filter None keeps non-empty string' assert list(filter(abs, [-1, 0, 1])) == [-1, 1], 'filter with abs keeps non-zero' assert list(filter(abs, [0, 0, 0])) == [], 'filter with abs removes zeros' assert list(filter(abs, [-5, -3, 0, 2, 0, 4])) == [-5, -3, 2, 4], 'filter with abs mixed' assert list(filter(bool, [0, 1, '', 'x'])) == [1, 'x'], 'filter with bool' assert list(filter(bool, [False, True, 0, 1])) == [True, 1], 'filter with bool booleans and ints' assert list(filter(bool, [[], [1], (), (2,)])) == [[1], (2,)], 'filter with bool containers' # Note: len returns int, so empty containers return 0 (falsy), non-empty return truthy assert list(filter(len, ['', 'a', '', 'bc'])) == ['a', 'bc'], 'filter with len on strings' assert list(filter(len, [[], [1], [], [2, 3]])) == [[1], [2, 3]], 'filter with len on lists' assert list(filter(len, [(), (1,), (), (2, 3)])) == [(1,), (2, 3)], 'filter with len on tuples' assert list(filter(int, ['0', '1', '2', '0'])) == ['1', '2'], 'filter with int on string numbers' assert list(filter(int, [0.0, 1.5, 0.0, 2.3])) == [1.5, 2.3], 'filter with int on floats' assert list(filter(str, [0, 1, '', 'x'])) == [0, 1, 'x'], 'filter with str converts and checks truthiness' assert list(filter(None, [1, 2, 3])) == [1, 2, 3], 'filter list' assert list(filter(None, (0, 1, 2))) == [1, 2], 'filter tuple' assert list(filter(None, 'abc')) == ['a', 'b', 'c'], 'filter string' assert list(filter(None, 'a b')) == ['a', ' ', 'b'], 'filter string with space' assert list(filter(None, range(0, 5))) == [1, 2, 3, 4], 'filter range' assert list(filter(None, range(1, 4))) == [1, 2, 3], 'filter range all truthy' assert list(filter(None, {0, 1, 2})) == [1, 2] or list(filter(None, {0, 1, 2})) == [2, 1], 'filter set' assert list(filter(None, [])) == [], 'filter empty list' assert list(filter(None, ())) == [], 'filter empty tuple' assert list(filter(None, '')) == [], 'filter empty string' assert list(filter(None, range(0))) == [], 'filter empty range' assert list(filter(None, [[], [1], []])) == [[1]], 'filter nested lists' assert list(filter(None, [(), (1,), ()])) == [(1,)], 'filter nested tuples' # filter() with user-defined function # This should error until user-defined functions are supported def is_positive(x): return x > 0 assert list(filter(is_positive, [-1, 1])) == [1], 'filter with user-defined function keeps positives' assert list(filter(lambda x: x > 0, [-1, 1])) == [1], 'filter with lambda keeps positives' try: list(filter(4, [1, 2])) assert False, 'filter with non-callable first argument should raise TypeError' except TypeError as e: assert str(e) == "'int' object is not callable", 'filter with non-callable first argument raises TypeError' ================================================ FILE: crates/monty/test_cases/builtin__filter_not_iterable.py ================================================ # filter() with non-iterable second argument filter(None, 42) """ TRACEBACK: Traceback (most recent call last): File "builtin__filter_not_iterable.py", line 2, in filter(None, 42) ~~~~~~~~~~~~~~~~ TypeError: 'int' object is not iterable """ ================================================ FILE: crates/monty/test_cases/builtin__getattr.py ================================================ # Test getattr() builtin function s = slice(1, 10, 2) assert getattr(s, 'start') == 1, 'getattr(slice, "start") should return 1' assert getattr(s, 'stop') == 10, 'getattr(slice, "stop") should return 10' assert getattr(s, 'step') == 2, 'getattr(slice, "step") should return 2' assert getattr(s, 'nonexistent', 'default') == 'default', 'getattr with default should return default' assert getattr(s, 'nonexistent', None) == None, 'getattr with None default should return None' assert getattr(s, 'nonexistent', 42) == 42, 'getattr with numeric default should return number' assert getattr(s, 'start', 999) == 1, 'getattr should return actual value, not default' try: getattr(s, 'nonexistent') assert False, 'getattr should raise AttributeError for missing attribute' except AttributeError: pass try: getattr() assert False, 'getattr() with no args should raise TypeError' except TypeError as e: assert str(e) == 'getattr expected at least 2 arguments, got 0', str(e) try: getattr(kwarg=1) assert False, 'getattr() with keyword arg should raise TypeError' except TypeError as e: assert str(e) == 'getattr() takes no keyword arguments', str(e) try: getattr(s) assert False, 'getattr() with 1 arg should raise TypeError' except TypeError as e: assert str(e) == 'getattr expected at least 2 arguments, got 1', str(e) try: getattr(s, 'start', 'default', 'extra') assert False, 'getattr() with 4 args should raise TypeError' except TypeError as e: assert str(e) == 'getattr expected at most 3 arguments, got 4', str(e) try: getattr(s, 123) assert False, 'getattr() with non-string name should raise TypeError' except TypeError as e: assert str(e) == "attribute name must be string, not 'int'", str(e) try: getattr(s, None) assert False, 'getattr() with None name should raise TypeError' except TypeError as e: assert str(e) == "attribute name must be string, not 'NoneType'", str(e) try: raise ValueError('test error') except ValueError as e: args = getattr(e, 'args') assert args == ('test error',), 'exception args should be accessible via getattr' # === Dynamic (heap-allocated) attribute name strings === # These test that getattr works with non-interned strings (e.g. from concatenation) s2 = slice(5, 15, 3) attr_name = 'sta' + 'rt' assert getattr(s2, attr_name) == 5, 'getattr with concatenated string should work' attr_name = 'st' + 'op' assert getattr(s2, attr_name) == 15, 'getattr with concatenated "stop" should work' attr_name = 'st' + 'ep' assert getattr(s2, attr_name) == 3, 'getattr with concatenated "step" should work' # Dynamic attribute name with default for missing attribute attr_name = 'non' + 'existent' assert getattr(s2, attr_name, 42) == 42, 'getattr with dynamic missing attr should return default' # Dynamic attribute name on exception try: raise TypeError('dynamic test') except TypeError as e: attr_name = 'ar' + 'gs' args = getattr(e, attr_name) assert args == ('dynamic test',), 'exception args via dynamic string should work' ================================================ FILE: crates/monty/test_cases/builtin__iter_err_unpack_int.py ================================================ iterator = 0 iter(**42) # Raise=TypeError('iter() argument after ** must be a mapping, not int') ================================================ FILE: crates/monty/test_cases/builtin__iter_funcs.py ================================================ # === sum() === # Basic sum operations assert sum([1, 2, 3]) == 6, 'sum of list' assert sum([1, 2, 3], 10) == 16, 'sum with start value' assert sum(()) == 0, 'sum of empty tuple' assert sum([], 5) == 5, 'sum of empty list with start' assert sum(range(5)) == 10, 'sum of range' assert sum([1.5, 2.5, 3.0], 0.0) == 7.0, 'sum of floats with float start' # Note: sum of floats without start requires py_add to support int+float # sum with different iterables assert sum({1, 2, 3}) == 6, 'sum of set' assert sum({1: 'a', 2: 'b', 3: 'c'}) == 6, 'sum of dict keys' # === any() === # Basic any operations assert any([True, False, False]) == True, 'any with one True' assert any([False, False, False]) == False, 'any with all False' assert any([]) == False, 'any of empty list' assert any([0, 0, 1]) == True, 'any with truthy int' assert any([0, '', None]) == False, 'any with all falsy' assert any(['', 'hello']) == True, 'any with non-empty string' assert any(range(0, 5)) == True, 'any of range (has non-zero)' assert any(range(0, 1)) == False, 'any of range(0,1) is False (only 0)' # === all() === # Basic all operations assert all([True, True, True]) == True, 'all with all True' assert all([True, False, True]) == False, 'all with one False' assert all([]) == True, 'all of empty list' assert all([1, 2, 3]) == True, 'all with truthy ints' assert all([1, 0, 3]) == False, 'all with zero' assert all(['a', 'b', 'c']) == True, 'all with non-empty strings' assert all(['a', '', 'c']) == False, 'all with empty string' # More edge cases with nested structures assert any([[1], [], [3]]) == True, 'any with nested lists (some non-empty)' assert all([[1], [2], [3]]) == True, 'all with non-empty nested lists' # sum with lists (list + list is supported) assert sum([[1], [2], [3]], []) == [1, 2, 3], 'sum lists with empty start' # Note: sum with tuples requires Tuple py_add which is not implemented ================================================ FILE: crates/monty/test_cases/builtin__iter_next.py ================================================ # === iter() on various iterables === # iter() creates an iterator from an iterable # iter() on list it = iter([1, 2, 3]) assert next(it) == 1, 'iter list: first element should be 1' assert next(it) == 2, 'iter list: second element should be 2' assert next(it) == 3, 'iter list: third element should be 3' # iter() on tuple it = iter((10, 20)) assert next(it) == 10, 'iter tuple: first element should be 10' assert next(it) == 20, 'iter tuple: second element should be 20' # iter() on string it = iter('ab') assert next(it) == 'a', 'iter string: first element should be a' assert next(it) == 'b', 'iter string: second element should be b' # iter() on range it = iter(range(3)) assert next(it) == 0, 'iter range: first element should be 0' assert next(it) == 1, 'iter range: second element should be 1' assert next(it) == 2, 'iter range: third element should be 2' # iter() on dict iterates over keys d = {'x': 1, 'y': 2} it = iter(d) keys = [next(it), next(it)] assert 'x' in keys, 'iter dict: x should be in keys' assert 'y' in keys, 'iter dict: y should be in keys' # === next() with default value === # next() returns default when iterator is exhausted it = iter([42]) assert next(it) == 42, 'next: first element should be 42' assert next(it, 'done') == 'done', 'next with default: should return default when exhausted' # Check default with various types it = iter([]) assert next(it, None) is None, 'next with None default: should return None' assert next(it, 0) == 0, 'next with 0 default: should return 0' assert next(it, []) == [], 'next with empty list default: should return empty list' # === iter() on iterator returns itself === # Calling iter() on an iterator should return the same iterator original = iter([1, 2, 3]) same = iter(original) # They should iterate over the same values assert next(original) == 1, 'iter on iterator: original first should be 1' assert next(same) == 2, 'iter on iterator: same should continue from 2' assert next(original) == 3, 'iter on iterator: original should continue to 3' # === Multiple independent iterators === # Creating multiple iterators over the same iterable should be independent data = [1, 2, 3] it1 = iter(data) it2 = iter(data) assert next(it1) == 1, 'independent iterators: it1 first should be 1' assert next(it1) == 2, 'independent iterators: it1 second should be 2' assert next(it2) == 1, 'independent iterators: it2 first should be 1 (independent)' assert next(it1) == 3, 'independent iterators: it1 third should be 3' assert next(it2) == 2, 'independent iterators: it2 second should be 2' ================================================ FILE: crates/monty/test_cases/builtin__map.py ================================================ assert list(map(abs, [-1, 0, 1, -2])) == [1, 0, 1, 2], 'map with abs' assert list(map(abs, [0, 0, 0])) == [0, 0, 0], 'map with abs all zeros' assert list(map(str, [1, 2, 3])) == ['1', '2', '3'], 'map with str on ints' assert list(map(str, [True, False])) == ['True', 'False'], 'map with str on bools' assert list(map(int, ['1', '2', '3'])) == [1, 2, 3], 'map with int on strings' assert list(map(int, [1.1, 2.9, 3.5])) == [1, 2, 3], 'map with int on floats' assert list(map(int, [True, False, True])) == [1, 0, 1], 'map with int on bools' assert list(map(bool, [0, 1, '', 'x'])) == [False, True, False, True], 'map with bool' assert list(map(bool, [[], [1], (), (2,)])) == [False, True, False, True], 'map with bool on containers' assert list(map(len, ['', 'a', 'ab', 'abc'])) == [0, 1, 2, 3], 'map with len on strings' assert list(map(len, [[], [1], [1, 2], [1, 2, 3]])) == [0, 1, 2, 3], 'map with len on lists' assert list(map(float, [1, 2, 3])) == [1.0, 2.0, 3.0], 'map with float on ints' assert list(map(float, ['1.5', '2.5'])) == [1.5, 2.5], 'map with float on strings' assert list(map(abs, [1, -2, 3])) == [1, 2, 3], 'map on list' assert list(map(abs, (1, -2, 3))) == [1, 2, 3], 'map on tuple' assert list(map(ord, 'abc')) == [97, 98, 99], 'map ord on string' assert list(map(abs, range(-3, 3))) == [3, 2, 1, 0, 1, 2], 'map on range' result = list(map(abs, {-1, 0, 1})) assert sorted(result) == [0, 1, 1], 'map on set' assert list(map(abs, [])) == [], 'map on empty list' assert list(map(abs, ())) == [], 'map on empty tuple' assert list(map(abs, '')) == [], 'map on empty string' assert list(map(abs, range(0))) == [], 'map on empty range' assert list(map(list, [(1, 2), (3, 4)])) == [[1, 2], [3, 4]], 'map with list constructor' assert list(map(tuple, [[1, 2], [3, 4]])) == [(1, 2), (3, 4)], 'map with tuple constructor' assert list(map(pow, [2, 3, 4], [3, 2, 2])) == [8, 9, 16], 'map with pow and 2 iterables' assert list(map(divmod, [10, 20, 30], [3, 6, 7])) == [(3, 1), (3, 2), (4, 2)], 'map with divmod and 2 iterables' assert list(map(pow, [2, 3, 4, 5], [3, 2])) == [8, 9], 'map stops at shortest iterable' assert list(map(pow, [2, 3], [3, 2, 1, 0])) == [8, 9], 'map stops at shortest iterable (first shorter)' assert list(map(pow, [2], [3, 4, 5])) == [8], 'map with single item in shortest' def f(x): return x * 2 assert list(map(f, [1, 2, 3])) == [2, 4, 6], 'map with custom function' def raise_exception(x): raise ValueError('Intentional error') try: list(map(raise_exception, [1, 2, 3])) assert False, 'should have failed with exception' except ValueError as e: assert str(e) == 'Intentional error', 'map with function that raises exception' try: map() except TypeError as e: assert str(e) == 'map() must have at least two arguments.', 'map with no arguments' try: map(None) except TypeError as e: assert str(e) == 'map() must have at least two arguments.', 'map with only function argument' ================================================ FILE: crates/monty/test_cases/builtin__map_not_iterable.py ================================================ # map() with non-iterable second argument map(abs, 42) """ TRACEBACK: Traceback (most recent call last): File "builtin__map_not_iterable.py", line 2, in map(abs, 42) ~~~~~~~~~~~~ TypeError: 'int' object is not iterable """ ================================================ FILE: crates/monty/test_cases/builtin__math_funcs.py ================================================ # === abs() === # Basic abs operations assert abs(5) == 5, 'abs of positive int' assert abs(-5) == 5, 'abs of negative int' assert abs(0) == 0, 'abs of zero' assert abs(3.14) == 3.14, 'abs of positive float' assert abs(-3.14) == 3.14, 'abs of negative float' assert abs(True) == 1, 'abs of True' assert abs(False) == 0, 'abs of False' # === round() === # Basic round operations assert round(2.5) == 2, 'round 2.5 (bankers rounding)' assert round(3.5) == 4, 'round 3.5 (bankers rounding)' assert round(0.5) == 0, 'round 0.5 (bankers rounding)' assert round(-0.5) == 0, 'round -0.5 (bankers rounding)' assert round(2.4) == 2, 'round 2.4' assert round(2.6) == 3, 'round 2.6' assert round(-2.5) == -2, 'round -2.5' assert round(-1.5) == -2, 'round -1.5 (bankers rounding)' assert round(5) == 5, 'round integer' # round with ndigits assert round(3.14159, 2) == 3.14, 'round to 2 digits' assert round(3.14159, 0) == 3.0, 'round to 0 digits returns float' assert repr(round(-0.4, 0)) == '-0.0', 'round(-0.4, 0) preserves negative zero sign' assert repr(round(-0.5, 0)) == '-0.0', 'round(-0.5, 0) preserves negative zero sign' assert round(1234, -2) == 1200, 'round int to nearest 100' assert round(1250, -2) == 1200, 'round 1250 to nearest 100 (bankers)' assert round(1350, -2) == 1400, 'round 1350 to nearest 100' assert round(15, -1) == 20, 'round 15 to nearest 10 (bankers)' assert round(25, -1) == 20, 'round 25 to nearest 10 (bankers)' # round with None assert round(2.5, None) == 2, 'round with None ndigits' assert round(True, -1) == 0, 'round True with negative digits behaves like int' assert round(True, 2) == 1, 'round True with positive digits returns int' assert round(False, -3) == 0, 'round False with negative digits stays zero' # round type errors threw = False try: round(1.2, 1.5) except TypeError: threw = True assert threw, 'round with non-int ndigits raises TypeError' # round edge cases with extreme values assert isinstance(round(1e15), int), 'round large float returns int' assert isinstance(round(-1e15), int), 'round large negative float returns int' assert round(0.0) == 0, 'round(0.0) is zero' assert round(-0.0) == 0, 'round(-0.0) is zero' # round special float values (infinity / NaN) inf = float('inf') neg_inf = float('-inf') nan = float('nan') threw = False try: round(inf) except OverflowError: threw = True assert threw, 'round(inf) raises OverflowError' threw = False try: round(neg_inf) except OverflowError: threw = True assert threw, 'round(-inf) raises OverflowError' threw = False try: round(nan) except ValueError: threw = True assert threw, 'round(nan) raises ValueError' r = round(inf, 0) assert r == inf, 'round(inf, 0) returns inf' r = round(neg_inf, 0) assert r == neg_inf, 'round(-inf, 0) returns -inf' r = round(nan, 0) assert r != r, 'round(nan, 0) returns NaN' # round with extreme ndigits values assert round(1.23, 10**6) == 1.23, 'round with huge positive ndigits returns original float' assert round(1.23, -(10**6)) == 0.0, 'round with huge negative ndigits returns zero' assert repr(round(-1.23, -(10**6))) == '-0.0', 'round with huge negative ndigits preserves signed zero' # round with float result (ndigits specified) assert isinstance(round(1.5, 1), float), 'round with ndigits returns float' assert round(1.25, 1) == 1.2, 'round 1.25 to 1 decimal (bankers rounding)' assert round(1.35, 1) == 1.4, 'round 1.35 to 1 decimal' # === divmod() === # Basic divmod operations assert divmod(17, 5) == (3, 2), 'divmod 17, 5' assert divmod(10, 3) == (3, 1), 'divmod 10, 3' assert divmod(9, 3) == (3, 0), 'divmod 9, 3' assert divmod(-10, 3) == (-4, 2), 'divmod -10, 3 (floor division)' assert divmod(10, -3) == (-4, -2), 'divmod 10, -3' assert divmod(-10, -3) == (3, -1), 'divmod -10, -3' # divmod with floats r = divmod(7.5, 2.5) assert r[0] == 3.0 and r[1] == 0.0, 'divmod floats' assert divmod(True, 2) == (0, 1), 'divmod accepts bool numerator' assert divmod(5, True) == (5, 0), 'divmod accepts bool denominator' # === pow() === # Basic pow operations assert pow(2, 3) == 8, 'pow 2^3' assert pow(2, 0) == 1, 'pow x^0' assert pow(5, 1) == 5, 'pow x^1' assert pow(2, 10) == 1024, 'pow 2^10' # pow with negative exponent assert pow(2, -1) == 0.5, 'pow with negative exp' assert pow(4, -2) == 0.0625, 'pow 4^-2' # pow with floats assert pow(2.0, 3.0) == 8.0, 'pow with floats' assert pow(4.0, 0.5) == 2.0, 'pow float sqrt' # Three-argument pow (modular exponentiation) assert pow(2, 10, 1000) == 24, 'pow modular 2^10 % 1000' assert pow(3, 4, 5) == 1, 'pow modular 3^4 % 5' assert pow(7, 256, 13) == 9, 'pow modular large exp' # Modular exponentiation edge cases assert pow(2, 0, 5) == 1, 'pow x^0 mod n' assert pow(0, 5, 3) == 0, 'pow 0^n mod m' assert pow(True, 2) == 1, 'pow handles bool base' assert pow(2, True) == 2, 'pow handles bool exponent' assert pow(True, True) == 1, 'pow handles bool base and exponent' assert pow(True, -1) == 1.0, 'pow bool negative exponent works like int' threw = False try: pow(0, -1) except ZeroDivisionError: threw = True assert threw, 'pow(0, negative) raises ZeroDivisionError' threw = False try: pow(0.0, -1) except ZeroDivisionError: threw = True assert threw, 'pow(0.0, negative) raises ZeroDivisionError' ================================================ FILE: crates/monty/test_cases/builtin__more_iter_funcs.py ================================================ # === min() === # Basic min operations assert min([1, 2, 3]) == 1, 'min of list' assert min([3, 1, 2]) == 1, 'min of unsorted list' assert min([5]) == 5, 'min of single element' assert min(1, 2, 3) == 1, 'min of multiple args' assert min(3, 1, 2) == 1, 'min of unsorted args' assert min(-5, -10, -1) == -10, 'min of negatives' # min with strings assert min(['b', 'a', 'c']) == 'a', 'min of string list' assert min('b', 'a', 'c') == 'a', 'min of string args' # min with floats assert min([1.5, 0.5, 2.5]) == 0.5, 'min of floats' assert min(1.5, 0.5) == 0.5, 'min float args' # === max() === # Basic max operations assert max([1, 2, 3]) == 3, 'max of list' assert max([3, 1, 2]) == 3, 'max of unsorted list' assert max([5]) == 5, 'max of single element' assert max(1, 2, 3) == 3, 'max of multiple args' assert max(3, 1, 2) == 3, 'max of unsorted args' assert max(-5, -10, -1) == -1, 'max of negatives' # max with strings assert max(['b', 'a', 'c']) == 'c', 'max of string list' assert max('b', 'a', 'c') == 'c', 'max of string args' # max with floats assert max([1.5, 0.5, 2.5]) == 2.5, 'max of floats' assert max(1.5, 2.5) == 2.5, 'max float args' # max with keyword arguments assert max([3, -1, 2, -4], key=abs) == -4, 'max key=abs' assert max(['a', 'bbb', 'cc'], key=len) == 'bbb', 'max key=len' assert max(['a', 'bbb', 'cc'], key=lambda s: len(s)) == 'bbb', 'max key=lambda simple callable' assert max('a', 'bbb', 'cc', key=len) == 'bbb', 'max multiple args key=len' assert max([1, 2, 3], key=None) == 3, 'max key=None same as no key' assert max([], default='fallback') == 'fallback', 'max default for empty iterable' assert max([], key=len, default='fallback') == 'fallback', 'max key+default for empty iterable' # min with keyword arguments assert min([3, -1, 2, -4], key=abs) == -1, 'min key=abs' assert min(['a', 'bbb', 'cc'], key=len) == 'a', 'min key=len' assert min(['a', 'bbb', 'cc'], key=lambda s: len(s)) == 'a', 'min key=lambda simple callable' assert min('a', 'bbb', 'cc', key=len) == 'a', 'min multiple args key=len' assert min([1, 2, 3], key=None) == 1, 'min key=None same as no key' assert min([], default='fallback') == 'fallback', 'min default for empty iterable' assert min([], key=len, default='fallback') == 'fallback', 'min key+default for empty iterable' # max/min with tuple-producing key functions ranked_items = [ {'downloads': 10, 'likes': 1}, {'downloads': 10, 'likes': 5}, {'downloads': 20, 'likes': 0}, ] assert max(ranked_items, key=lambda item: (item.get('downloads', 0), item.get('likes', 0))) == { 'downloads': 20, 'likes': 0, }, 'max key=lambda tuple ranking' tie_items = [ {'downloads': 10, 'likes': 5, 'name': 'first'}, {'downloads': 10, 'likes': 5, 'name': 'second'}, ] assert max(tie_items, key=lambda item: (item['downloads'], item['likes']))['name'] == 'first', ( 'max returns first maximal item on ties' ) assert min(tie_items, key=lambda item: (item['downloads'], item['likes']))['name'] == 'first', ( 'min returns first minimal item on ties' ) try: max([1], nope=1) assert False, 'invalid max keyword should raise TypeError' except TypeError as e: assert e.args == ("max() got an unexpected keyword argument 'nope'",), 'max invalid keyword error matches CPython' try: min([1], nope=1) assert False, 'invalid min keyword should raise TypeError' except TypeError as e: assert e.args == ("min() got an unexpected keyword argument 'nope'",), 'min invalid keyword error matches CPython' try: max(key=int) assert False, 'max with only kwargs should raise TypeError' except TypeError as e: assert e.args == ('max expected at least 1 argument, got 0',), 'max kwargs-only arity error matches CPython' try: min(default=None, key=int) assert False, 'min with only kwargs should raise TypeError' except TypeError as e: assert e.args == ('min expected at least 1 argument, got 0',), 'min kwargs-only arity error matches CPython' try: max(nope=1) assert False, 'max with only unexpected kwargs should still raise the zero-arg TypeError' except TypeError as e: assert e.args == ('max expected at least 1 argument, got 0',), ( 'max zero-arg error takes precedence over kwargs validation' ) try: min(nope=1) assert False, 'min with only unexpected kwargs should still raise the zero-arg TypeError' except TypeError as e: assert e.args == ('min expected at least 1 argument, got 0',), ( 'min zero-arg error takes precedence over kwargs validation' ) try: max(key=int, nope=1) assert False, 'max with mixed kwargs and no positional args should still raise the zero-arg TypeError' except TypeError as e: assert e.args == ('max expected at least 1 argument, got 0',), 'max zero-arg error beats mixed kwargs validation' try: max(1, 2, default=3) assert False, 'max with multiple args and default should raise TypeError' except TypeError as e: assert e.args == ('Cannot specify a default for max() with multiple positional arguments',), ( 'max multiple args default error matches CPython' ) try: min(1, 2, default=3) assert False, 'min with multiple args and default should raise TypeError' except TypeError as e: assert e.args == ('Cannot specify a default for min() with multiple positional arguments',), ( 'min multiple args default error matches CPython' ) try: max(1, key=int) assert False, 'max single non-iterable arg with key should raise TypeError' except TypeError as e: assert e.args == ("'int' object is not iterable",), 'max single arg with key still uses iterable form' try: min(1, key=int) assert False, 'min single non-iterable arg with key should raise TypeError' except TypeError as e: assert e.args == ("'int' object is not iterable",), 'min single arg with key still uses iterable form' try: max([1], key=1) assert False, 'max non-callable key should raise TypeError' except TypeError as e: assert e.args == ("'int' object is not callable",), 'max non-callable key error matches CPython' try: min([1], key=1) assert False, 'min non-callable key should raise TypeError' except TypeError as e: assert e.args == ("'int' object is not callable",), 'min non-callable key error matches CPython' try: max([]) assert False, 'max empty iterable without default should raise ValueError' except ValueError as e: assert e.args == ('max() iterable argument is empty',), 'max empty iterable error unchanged' try: min([]) assert False, 'min empty iterable without default should raise ValueError' except ValueError as e: assert e.args == ('min() iterable argument is empty',), 'min empty iterable error unchanged' assert max([1], default=2) == 1, 'max ignores default for non-empty iterable' assert min([1], default=2) == 1, 'min ignores default for non-empty iterable' assert max([], key=1, default='fallback') == 'fallback', 'max does not validate key for empty iterable with default' assert min([], key=1, default='fallback') == 'fallback', 'min does not validate key for empty iterable with default' try: max([1], key=abs, **{'key': len}) assert False, 'duplicate max key should raise TypeError' except TypeError as e: assert e.args == ("max() got multiple values for keyword argument 'key'",), ( 'max duplicate key error matches CPython' ) try: min([], default='x', **{'default': 'y'}) assert False, 'duplicate min default should raise TypeError' except TypeError as e: assert e.args == ("min() got multiple values for keyword argument 'default'",), ( 'min duplicate default error matches CPython' ) try: max([], **{1: 2}) assert False, 'max non-string keyword key should raise TypeError' except TypeError as e: assert e.args == ('keywords must be strings',), 'max non-string keyword key error matches CPython' try: max([1, 'a']) assert False, 'max with incomparable iterable items should raise TypeError' except TypeError as e: assert e.args == ("'>' not supported between instances of 'str' and 'int'",), ( 'max iterable comparison error matches CPython' ) try: min(1, 'a') assert False, 'min with incomparable positional args should raise TypeError' except TypeError as e: assert e.args == ("'<' not supported between instances of 'str' and 'int'",), ( 'min positional comparison error matches CPython' ) max_key_map = {10: 1, 20: 3, 30: 3, 40: 2} assert max([10, 20, 30, 40], key=lambda item: max_key_map[item]) == 20, ( 'max returns first item among repeated maximal keys' ) min_key_map = {10: 2, 20: 1, 30: 1, 40: 3} assert min([10, 20, 30, 40], key=lambda item: min_key_map[item]) == 20, ( 'min returns first item among repeated minimal keys' ) # === sorted() === # Basic sorted operations assert sorted([3, 1, 2]) == [1, 2, 3], 'sorted int list' assert sorted([1, 2, 3]) == [1, 2, 3], 'sorted already sorted' assert sorted([3, 2, 1]) == [1, 2, 3], 'sorted reverse order' assert sorted([]) == [], 'sorted empty list' assert sorted([5]) == [5], 'sorted single element' # sorted with strings assert sorted(['c', 'a', 'b']) == ['a', 'b', 'c'], 'sorted strings' # sorted with heap-allocated strings (from split) assert sorted('banana,apple,cherry'.split(',')) == ['apple', 'banana', 'cherry'], 'sorted split strings' # sorted with multi-char string literals (heap-allocated) assert sorted(['banana', 'apple', 'cherry']) == ['apple', 'banana', 'cherry'], 'sorted multi-char strings' # min/max with heap-allocated strings assert min('banana,apple,cherry'.split(',')) == 'apple', 'min of split strings' assert max('banana,apple,cherry'.split(',')) == 'cherry', 'max of split strings' # sorted with negative numbers assert sorted([-3, 1, -2, 2]) == [-3, -2, 1, 2], 'sorted with negatives' # sorted with tuple assert sorted((3, 1, 2)) == [1, 2, 3], 'sorted tuple returns list' # sorted preserves duplicates assert sorted([3, 1, 2, 1, 3]) == [1, 1, 2, 3, 3], 'sorted with duplicates' # sorted with range assert sorted(range(5, 0, -1)) == [1, 2, 3, 4, 5], 'sorted range' try: sorted(1, 2) assert False, 'sorted() with too many positional arguments should raise TypeError' except TypeError as e: assert e.args == ('sorted expected 1 argument, got 2',), 'sorted() positional arity error matches CPython' try: sorted([1], nope=1) assert False, 'sorted() with invalid keyword should raise TypeError' except TypeError as e: assert e.args == ("sort() got an unexpected keyword argument 'nope'",), ( 'sorted() invalid keyword error matches CPython' ) # === sorted() with reverse === assert sorted([3, 1, 2], reverse=True) == [3, 2, 1], 'sorted reverse=True' assert sorted([3, 1, 2], reverse=False) == [1, 2, 3], 'sorted reverse=False' assert sorted(['c', 'a', 'b'], reverse=True) == ['c', 'b', 'a'], 'sorted strings reverse' assert sorted([], reverse=True) == [], 'sorted empty reverse' assert sorted([5], reverse=True) == [5], 'sorted single reverse' assert sorted([3, 1, 2], reverse=0) == [1, 2, 3], 'sorted reverse=0 (falsy)' assert sorted([3, 1, 2], reverse=1) == [3, 2, 1], 'sorted reverse=1 (truthy)' # === sorted() with key === assert sorted([3, -1, 2, -4], key=abs) == [-1, 2, 3, -4], 'sorted key=abs' assert sorted(['banana', 'apple', 'cherry'], key=len) == ['apple', 'banana', 'cherry'], 'sorted key=len' assert sorted([3, 1, 2], key=None) == [1, 2, 3], 'sorted key=None same as no key' try: sorted([1], key=abs, **{'key': len}) assert False, 'duplicate sorted key should raise TypeError' except TypeError as e: assert e.args == ("sorted() got multiple values for keyword argument 'key'",), ( 'sorted duplicate key error matches CPython' ) def negate(x): return -x assert sorted([1, -2, 3], key=negate) == [3, 1, -2], 'sorted key=user-defined function' # === sorted() with key and reverse === assert sorted([3, -1, 2, -4], key=abs, reverse=True) == [-4, 3, 2, -1], 'sorted key=abs reverse=True' assert sorted(['banana', 'apple', 'cherry'], key=len, reverse=True) == ['banana', 'cherry', 'apple'], ( 'sorted key=len reverse=True' ) assert sorted([3, 1, 2], key=None, reverse=True) == [3, 2, 1], 'sorted key=None reverse=True' # === reversed() === # Basic reversed operations assert list(reversed([1, 2, 3])) == [3, 2, 1], 'reversed list' assert list(reversed([1])) == [1], 'reversed single element' assert list(reversed([])) == [], 'reversed empty list' # reversed tuple assert list(reversed((1, 2, 3))) == [3, 2, 1], 'reversed tuple' # reversed string assert list(reversed('abc')) == ['c', 'b', 'a'], 'reversed string' # reversed range assert list(reversed(range(1, 4))) == [3, 2, 1], 'reversed range' # === enumerate() === # Basic enumerate operations assert list(enumerate(['a', 'b', 'c'])) == [(0, 'a'), (1, 'b'), (2, 'c')], 'enumerate list' assert list(enumerate([])) == [], 'enumerate empty list' assert list(enumerate(['x'])) == [(0, 'x')], 'enumerate single element' # enumerate with start assert list(enumerate(['a', 'b'], 1)) == [(1, 'a'), (2, 'b')], 'enumerate with start' assert list(enumerate(['a', 'b'], 10)) == [(10, 'a'), (11, 'b')], 'enumerate with start 10' # enumerate string assert list(enumerate('ab')) == [(0, 'a'), (1, 'b')], 'enumerate string' # enumerate range assert list(enumerate(range(3))) == [(0, 0), (1, 1), (2, 2)], 'enumerate range' # === zip() === # Basic zip operations assert list(zip([1, 2], ['a', 'b'])) == [(1, 'a'), (2, 'b')], 'zip two lists' assert list(zip([1], ['a'])) == [(1, 'a')], 'zip single elements' assert list(zip([], [])) == [], 'zip empty lists' # zip truncates to shortest assert list(zip([1, 2, 3], ['a', 'b'])) == [(1, 'a'), (2, 'b')], 'zip truncates to shortest' assert list(zip([1], ['a', 'b', 'c'])) == [(1, 'a')], 'zip truncates first shorter' # zip three iterables assert list(zip([1, 2], ['a', 'b'], [True, False])) == [(1, 'a', True), (2, 'b', False)], 'zip three lists' # zip with different types assert list(zip(range(3), 'abc')) == [(0, 'a'), (1, 'b'), (2, 'c')], 'zip range and string' # zip single iterable assert list(zip([1, 2, 3])) == [(1,), (2,), (3,)], 'zip single iterable' # zip with empty assert list(zip([1, 2], [])) == [], 'zip with empty second' assert list(zip([], [1, 2])) == [], 'zip with empty first' ================================================ FILE: crates/monty/test_cases/builtin__next_stop_iteration.py ================================================ it = iter([]) next(it) """ TRACEBACK: Traceback (most recent call last): File "builtin__next_stop_iteration.py", line 2, in next(it) ~~~~~~~~ StopIteration """ ================================================ FILE: crates/monty/test_cases/builtin__print_invalid_kwarg.py ================================================ print('xxx', **{"foo'": 123}) """ TRACEBACK: Traceback (most recent call last): File "builtin__print_invalid_kwarg.py", line 1, in print('xxx', **{"foo'": 123}) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TypeError: print() got an unexpected keyword argument 'foo'' """ ================================================ FILE: crates/monty/test_cases/builtin__print_kwargs.py ================================================ # Tests dynamic keyword arguments for print() # === Dynamic sep via **kwargs === dynamic_sep = 's' + 'e' + 'p' result = print('left', 'right', **{dynamic_sep: '-'}) assert result is None, 'print returns None with dynamic sep' # === Dynamic end via **kwargs === dynamic_end = 'e' + 'n' + 'd' result2 = print('line', **{dynamic_end: ''}) assert result2 is None, 'print returns None with dynamic end' ================================================ FILE: crates/monty/test_cases/builtin__repr.py ================================================ # === repr of built-in functions === assert repr(len) == '', 'repr(len)' assert repr(print) == '', 'repr(print)' ================================================ FILE: crates/monty/test_cases/builtin__string_funcs.py ================================================ # === ord() === # Basic ord operations assert ord('a') == 97, 'ord lowercase a' assert ord('A') == 65, 'ord uppercase A' assert ord('0') == 48, 'ord digit 0' assert ord(' ') == 32, 'ord space' assert ord('\n') == 10, 'ord newline' assert ord('\t') == 9, 'ord tab' # Unicode characters assert ord('\u00e9') == 233, 'ord e-acute' assert ord('\u4e2d') == 20013, 'ord Chinese character' assert ord('\U0001f600') == 128512, 'ord emoji grinning face' # === chr() === # Basic chr operations assert chr(97) == 'a', 'chr 97 = a' assert chr(65) == 'A', 'chr 65 = A' assert chr(48) == '0', 'chr 48 = 0' assert chr(32) == ' ', 'chr 32 = space' assert chr(10) == '\n', 'chr 10 = newline' # Unicode characters assert chr(233) == '\u00e9', 'chr 233 = e-acute' assert chr(20013) == '\u4e2d', 'chr 20013 = Chinese char' assert chr(128512) == '\U0001f600', 'chr emoji' # Edge cases assert chr(0) == '\x00', 'chr 0 = null' assert chr(0x10FFFF) != '', 'chr max unicode' # Round-trip test assert chr(ord('x')) == 'x', 'ord/chr roundtrip' assert ord(chr(1000)) == 1000, 'chr/ord roundtrip' # === bin() === # Basic bin operations assert bin(0) == '0b0', 'bin 0' assert bin(1) == '0b1', 'bin 1' assert bin(2) == '0b10', 'bin 2' assert bin(5) == '0b101', 'bin 5' assert bin(255) == '0b11111111', 'bin 255' assert bin(-5) == '-0b101', 'bin negative' assert bin(True) == '0b1', 'bin True' assert bin(False) == '0b0', 'bin False' MIN_I64 = -9223372036854775807 - 1 # Smallest i64 MIN_I64_BIN = '1' + '0' * 63 MIN_I64_HEX = '8' + '0' * 15 MIN_I64_OCT = '1' + '0' * 21 assert bin(MIN_I64) == '-0b' + MIN_I64_BIN, 'bin handles i64::MIN without overflow' # === hex() === # Basic hex operations assert hex(0) == '0x0', 'hex 0' assert hex(15) == '0xf', 'hex 15' assert hex(16) == '0x10', 'hex 16' assert hex(255) == '0xff', 'hex 255' assert hex(256) == '0x100', 'hex 256' assert hex(-42) == '-0x2a', 'hex negative' assert hex(True) == '0x1', 'hex True' assert hex(False) == '0x0', 'hex False' assert hex(MIN_I64) == '-0x' + MIN_I64_HEX, 'hex handles i64::MIN without overflow' # === oct() === # Basic oct operations assert oct(0) == '0o0', 'oct 0' assert oct(7) == '0o7', 'oct 7' assert oct(8) == '0o10', 'oct 8' assert oct(64) == '0o100', 'oct 64' assert oct(-56) == '-0o70', 'oct negative' assert oct(True) == '0o1', 'oct True' assert oct(False) == '0o0', 'oct False' assert oct(MIN_I64) == '-0o' + MIN_I64_OCT, 'oct handles i64::MIN without overflow' ================================================ FILE: crates/monty/test_cases/bytes__decode_invalid_utf8.py ================================================ # Test that bytes.decode raises UnicodeDecodeError for invalid UTF-8 # UnicodeDecodeError is a subclass of ValueError, so it should be caught by both # Test it raises UnicodeDecodeError raised_decode_error = False try: b'\xff'.decode() except UnicodeDecodeError: raised_decode_error = True assert raised_decode_error, 'should raise UnicodeDecodeError for invalid UTF-8' # Test it can be caught by ValueError (since UnicodeDecodeError is a subclass) caught_by_value_error = False try: b'\x80\x81'.decode() except ValueError: caught_by_value_error = True assert caught_by_value_error, 'UnicodeDecodeError should be caught by except ValueError' ================================================ FILE: crates/monty/test_cases/bytes__endswith_str_error.py ================================================ # Test that bytes.endswith with str raises TypeError b'hello'.endswith('o') """ TRACEBACK: Traceback (most recent call last): File "bytes__endswith_str_error.py", line 2, in b'hello'.endswith('o') ~~~~~~~~~~~~~~~~~~~~~~ TypeError: endswith first arg must be bytes or a tuple of bytes, not str """ ================================================ FILE: crates/monty/test_cases/bytes__getitem_index_error.py ================================================ b = b'hello' b[10] """ TRACEBACK: Traceback (most recent call last): File "bytes__getitem_index_error.py", line 2, in b[10] ~~~~~ IndexError: index out of range """ ================================================ FILE: crates/monty/test_cases/bytes__index_start_gt_end.py ================================================ # Test that bytes.index with start > end doesn't panic but raises ValueError b'hello'.index(b'e', 5, 2) """ TRACEBACK: Traceback (most recent call last): File "bytes__index_start_gt_end.py", line 2, in b'hello'.index(b'e', 5, 2) ~~~~~~~~~~~~~~~~~~~~~~~~~~ ValueError: subsection not found """ ================================================ FILE: crates/monty/test_cases/bytes__methods.py ================================================ # === bytes.decode() === assert b'hello'.decode() == 'hello', 'decode default utf-8' assert b'hello'.decode('utf-8') == 'hello', 'decode explicit utf-8' assert b'hello'.decode('utf8') == 'hello', 'decode utf8 variant' assert b'hello'.decode('UTF-8') == 'hello', 'decode uppercase UTF-8' assert b''.decode() == '', 'decode empty bytes' # Non-ASCII UTF-8 assert b'\xc3\xa9'.decode() == '\xe9', 'decode utf-8 e-acute' assert b'\xe4\xb8\xad'.decode() == '\u4e2d', 'decode utf-8 CJK character' # === bytes.count() === assert b'hello'.count(b'l') == 2, 'count single char' assert b'hello'.count(b'll') == 1, 'count subsequence' assert b'hello'.count(b'x') == 0, 'count not found' assert b'aaa'.count(b'aa') == 1, 'count non-overlapping' assert b''.count(b'x') == 0, 'count in empty bytes' assert b'hello'.count(b'') == 6, 'count empty subsequence' # count with start/end assert b'abcabc'.count(b'ab', 1) == 1, 'count with start' assert b'abcabc'.count(b'ab', 0, 3) == 1, 'count with start and end' # === bytes.find() === assert b'hello'.find(b'e') == 1, 'find single char' assert b'hello'.find(b'll') == 2, 'find subsequence' assert b'hello'.find(b'x') == -1, 'find not found' assert b'hello'.find(b'') == 0, 'find empty subsequence' assert b''.find(b'x') == -1, 'find in empty bytes' # find with start/end assert b'hello'.find(b'l', 3) == 3, 'find with start' assert b'hello'.find(b'l', 0, 2) == -1, 'find with end before match' # === bytes.index() === assert b'hello'.index(b'e') == 1, 'index single char' assert b'hello'.index(b'll') == 2, 'index subsequence' assert b'hello'.index(b'') == 0, 'index empty subsequence' # === bytes.startswith() === assert b'hello'.startswith(b'he'), 'startswith true' assert not b'hello'.startswith(b'lo'), 'startswith false' assert b'hello'.startswith(b''), 'startswith empty' assert b''.startswith(b''), 'empty startswith empty' assert not b''.startswith(b'x'), 'empty startswith non-empty' # startswith with start/end assert b'abcdef'.startswith(b'bc', 1), 'startswith with start' assert b'abcdef'.startswith(b'bc', 1, 3), 'startswith with start and end' assert not b'abcdef'.startswith(b'bc', 2), 'startswith with start past match' assert not b'abcdef'.startswith(b'abc', 0, 2), 'startswith with end before match ends' # === bytes.endswith() === assert b'hello'.endswith(b'lo'), 'endswith true' assert not b'hello'.endswith(b'he'), 'endswith false' assert b'hello'.endswith(b''), 'endswith empty' assert b''.endswith(b''), 'empty endswith empty' assert not b''.endswith(b'x'), 'empty endswith non-empty' # endswith with start/end assert b'abcdef'.endswith(b'de', 0, 5), 'endswith with end' assert b'abcdef'.endswith(b'cd', 1, 4), 'endswith with start and end' assert not b'abcdef'.endswith(b'de', 0, 4), 'endswith before suffix' # === Edge case: start > end (should not panic, treat as empty slice) === assert b'hello'.find(b'e', 5, 2) == -1, 'find with start > end returns -1' assert b'hello'.count(b'l', 5, 2) == 0, 'count with start > end returns 0' assert not b'hello'.startswith(b'h', 5, 2), 'startswith with start > end is false' assert not b'hello'.endswith(b'o', 5, 2), 'endswith with start > end is false' # === bytes.lower() === assert b'HELLO'.lower() == b'hello', 'lower basic' assert b'Hello World'.lower() == b'hello world', 'lower mixed case' assert b'hello'.lower() == b'hello', 'lower already lowercase' assert b''.lower() == b'', 'lower empty' assert b'123ABC'.lower() == b'123abc', 'lower with digits' assert b'\x80\xff'.lower() == b'\x80\xff', 'lower non-ascii unchanged' # === bytes.upper() === assert b'hello'.upper() == b'HELLO', 'upper basic' assert b'Hello World'.upper() == b'HELLO WORLD', 'upper mixed case' assert b'HELLO'.upper() == b'HELLO', 'upper already uppercase' assert b''.upper() == b'', 'upper empty' assert b'123abc'.upper() == b'123ABC', 'upper with digits' # === bytes.capitalize() === assert b'hello'.capitalize() == b'Hello', 'capitalize basic' assert b'HELLO'.capitalize() == b'Hello', 'capitalize uppercase' assert b'hELLO wORLD'.capitalize() == b'Hello world', 'capitalize mixed' assert b''.capitalize() == b'', 'capitalize empty' assert b'123hello'.capitalize() == b'123hello', 'capitalize starting with digit' # === bytes.title() === assert b'hello world'.title() == b'Hello World', 'title basic' assert b'HELLO WORLD'.title() == b'Hello World', 'title uppercase' assert b"they're bill's".title() == b"They'Re Bill'S", 'title with apostrophe' assert b''.title() == b'', 'title empty' # === bytes.swapcase() === assert b'Hello World'.swapcase() == b'hELLO wORLD', 'swapcase basic' assert b'HELLO'.swapcase() == b'hello', 'swapcase uppercase' assert b'hello'.swapcase() == b'HELLO', 'swapcase lowercase' assert b''.swapcase() == b'', 'swapcase empty' # === bytes.isalpha() === assert b'hello'.isalpha(), 'isalpha all letters' assert not b'hello123'.isalpha(), 'isalpha with digits' assert not b'hello world'.isalpha(), 'isalpha with space' assert not b''.isalpha(), 'isalpha empty is false' assert b'ABC'.isalpha(), 'isalpha uppercase' # === bytes.isdigit() === assert b'123'.isdigit(), 'isdigit all digits' assert not b'123abc'.isdigit(), 'isdigit with letters' assert not b''.isdigit(), 'isdigit empty is false' # === bytes.isalnum() === assert b'hello123'.isalnum(), 'isalnum letters and digits' assert b'hello'.isalnum(), 'isalnum all letters' assert b'123'.isalnum(), 'isalnum all digits' assert not b'hello world'.isalnum(), 'isalnum with space' assert not b''.isalnum(), 'isalnum empty is false' # === bytes.isspace() === assert b' \t\n\r'.isspace(), 'isspace whitespace chars' assert not b'hello'.isspace(), 'isspace not all whitespace' assert not b''.isspace(), 'isspace empty is false' assert b' '.isspace(), 'isspace single space' # === bytes.islower() === assert b'hello'.islower(), 'islower all lowercase' assert b'hello123'.islower(), 'islower with digits' assert not b'Hello'.islower(), 'islower with uppercase' assert not b'HELLO'.islower(), 'islower all uppercase' assert not b''.islower(), 'islower empty is false' assert not b'123'.islower(), 'islower no cased chars is false' # === bytes.isupper() === assert b'HELLO'.isupper(), 'isupper all uppercase' assert b'HELLO123'.isupper(), 'isupper with digits' assert not b'Hello'.isupper(), 'isupper with lowercase' assert not b'hello'.isupper(), 'isupper all lowercase' assert not b''.isupper(), 'isupper empty is false' assert not b'123'.isupper(), 'isupper no cased chars is false' # === bytes.isascii() === assert b'hello'.isascii(), 'isascii all ascii' assert b''.isascii(), 'isascii empty is true' assert b'\x00\x7f'.isascii(), 'isascii boundary values' assert not b'\x80'.isascii(), 'isascii non-ascii byte' assert not b'hello\xff'.isascii(), 'isascii with non-ascii' # === bytes.istitle() === assert b'Hello World'.istitle(), 'istitle basic' assert not b'hello world'.istitle(), 'istitle lowercase' assert not b'HELLO WORLD'.istitle(), 'istitle uppercase' assert b'Hello'.istitle(), 'istitle single word' assert not b''.istitle(), 'istitle empty is false' # === bytes.rfind() === assert b'hello'.rfind(b'l') == 3, 'rfind finds last occurrence' assert b'hello'.rfind(b'x') == -1, 'rfind not found' assert b'hello'.rfind(b'') == 5, 'rfind empty at end' assert b'aaaa'.rfind(b'aa') == 2, 'rfind non-overlapping from right' assert b'hello'.rfind(b'l', 0, 3) == 2, 'rfind with range' # === bytes.rindex() === assert b'hello'.rindex(b'l') == 3, 'rindex finds last occurrence' assert b'hello'.rindex(b'') == 5, 'rindex empty at end' # === bytes.strip() === assert b' hello '.strip() == b'hello', 'strip whitespace' assert b'hello'.strip() == b'hello', 'strip no whitespace' assert b'xxxhelloxxx'.strip(b'x') == b'hello', 'strip custom chars' assert b''.strip() == b'', 'strip empty' assert b' '.strip() == b'', 'strip all whitespace' # === bytes.lstrip() === assert b' hello '.lstrip() == b'hello ', 'lstrip whitespace' assert b'xxxhello'.lstrip(b'x') == b'hello', 'lstrip custom chars' assert b''.lstrip() == b'', 'lstrip empty' # === bytes.rstrip() === assert b' hello '.rstrip() == b' hello', 'rstrip whitespace' assert b'helloxxx'.rstrip(b'x') == b'hello', 'rstrip custom chars' assert b''.rstrip() == b'', 'rstrip empty' # === bytes.removeprefix() === assert b'hello'.removeprefix(b'he') == b'llo', 'removeprefix found' assert b'hello'.removeprefix(b'xxx') == b'hello', 'removeprefix not found' assert b'hello'.removeprefix(b'') == b'hello', 'removeprefix empty' assert b''.removeprefix(b'x') == b'', 'removeprefix empty bytes' # === bytes.removesuffix() === assert b'hello'.removesuffix(b'lo') == b'hel', 'removesuffix found' assert b'hello'.removesuffix(b'xxx') == b'hello', 'removesuffix not found' assert b'hello'.removesuffix(b'') == b'hello', 'removesuffix empty' assert b''.removesuffix(b'x') == b'', 'removesuffix empty bytes' # === bytes.split() === assert b'a,b,c'.split(b',') == [b'a', b'b', b'c'], 'split basic' assert b'a b c'.split() == [b'a', b'b', b'c'], 'split whitespace' assert b'a b c'.split() == [b'a', b'b', b'c'], 'split multiple whitespace' assert b'a,b,c'.split(b',', 1) == [b'a', b'b,c'], 'split maxsplit' assert b''.split() == [], 'split empty bytes' assert b'hello'.split(b'x') == [b'hello'], 'split not found' # === bytes.rsplit() === assert b'a,b,c'.rsplit(b',') == [b'a', b'b', b'c'], 'rsplit basic' assert b'a,b,c'.rsplit(b',', 1) == [b'a,b', b'c'], 'rsplit maxsplit' assert b'a b c'.rsplit() == [b'a', b'b', b'c'], 'rsplit whitespace' # === bytes.splitlines() === assert b'a\nb\nc'.splitlines() == [b'a', b'b', b'c'], 'splitlines newline' assert b'a\r\nb\rc'.splitlines() == [b'a', b'b', b'c'], 'splitlines mixed' assert b'a\nb\n'.splitlines() == [b'a', b'b'], 'splitlines trailing' assert b'a\nb'.splitlines(True) == [b'a\n', b'b'], 'splitlines keepends' assert b''.splitlines() == [], 'splitlines empty' # === bytes.partition() === assert b'hello world'.partition(b' ') == (b'hello', b' ', b'world'), 'partition found' assert b'hello'.partition(b'x') == (b'hello', b'', b''), 'partition not found' assert b'hello world here'.partition(b' ') == (b'hello', b' ', b'world here'), 'partition first' # === bytes.rpartition() === assert b'hello world'.rpartition(b' ') == (b'hello', b' ', b'world'), 'rpartition found' assert b'hello'.rpartition(b'x') == (b'', b'', b'hello'), 'rpartition not found' assert b'hello world here'.rpartition(b' ') == (b'hello world', b' ', b'here'), 'rpartition last' # === bytes.replace() === assert b'hello'.replace(b'l', b'L') == b'heLLo', 'replace all' assert b'hello'.replace(b'l', b'L', 1) == b'heLlo', 'replace count' assert b'hello'.replace(b'x', b'y') == b'hello', 'replace not found' assert b'aaa'.replace(b'a', b'bb') == b'bbbbbb', 'replace longer' assert b'aaa'.replace(b'aa', b'b') == b'ba', 'replace non-overlapping' # === bytes.center() === assert b'hello'.center(10) == b' hello ', 'center basic' assert b'hello'.center(10, b'*') == b'**hello***', 'center fillbyte' assert b'hello'.center(3) == b'hello', 'center too short' # === bytes.ljust() === assert b'hello'.ljust(10) == b'hello ', 'ljust basic' assert b'hello'.ljust(10, b'*') == b'hello*****', 'ljust fillbyte' assert b'hello'.ljust(3) == b'hello', 'ljust too short' # === bytes.rjust() === assert b'hello'.rjust(10) == b' hello', 'rjust basic' assert b'hello'.rjust(10, b'*') == b'*****hello', 'rjust fillbyte' assert b'hello'.rjust(3) == b'hello', 'rjust too short' # === bytes.zfill() === assert b'42'.zfill(5) == b'00042', 'zfill basic' assert b'-42'.zfill(5) == b'-0042', 'zfill negative' assert b'+42'.zfill(5) == b'+0042', 'zfill positive' assert b'hello'.zfill(3) == b'hello', 'zfill too short' # === bytes.join() === assert b','.join([b'a', b'b', b'c']) == b'a,b,c', 'join list' assert b''.join([b'a', b'b']) == b'ab', 'join empty separator' assert b','.join([]) == b'', 'join empty iterable' assert b'-'.join([b'hello']) == b'hello', 'join single item' # === bytes.hex() === assert b'\xde\xad\xbe\xef'.hex() == 'deadbeef', 'hex basic' assert b''.hex() == '', 'hex empty' assert b'AB'.hex() == '4142', 'hex letters' assert b'\x00\xff'.hex() == '00ff', 'hex boundary' assert b'\xde\xad\xbe\xef'.hex(':') == 'de:ad:be:ef', 'hex with separator' assert b'\xde\xad\xbe\xef'.hex(':', 2) == 'dead:beef', 'hex with bytes_per_sep' # Test positive bytes_per_sep (partial group at start) assert b'\x01\x02\x03\x04\x05'.hex(':', 2) == '01:0203:0405', 'hex +2 odd bytes' assert b'\x01\x02\x03'.hex(':', 2) == '01:0203', 'hex +2 three bytes' # Test negative bytes_per_sep (partial group at end) assert b'\x01\x02\x03\x04\x05'.hex(':', -2) == '0102:0304:05', 'hex -2 odd bytes' assert b'\x01\x02\x03'.hex(':', -2) == '0102:03', 'hex -2 three bytes' # === bytes.fromhex() === assert bytes.fromhex('deadbeef') == b'\xde\xad\xbe\xef', 'fromhex basic' assert bytes.fromhex('DEADBEEF') == b'\xde\xad\xbe\xef', 'fromhex uppercase' assert bytes.fromhex('') == b'', 'fromhex empty' assert bytes.fromhex('de ad be ef') == b'\xde\xad\xbe\xef', 'fromhex with spaces' assert bytes.fromhex('4142') == b'AB', 'fromhex letters' # === bytes.fromhex() with whitespace === # Whitespace is only allowed BETWEEN byte pairs, not within a pair assert bytes.fromhex(' 01 ') == b'\x01', 'fromhex whitespace around bytes is stripped' assert bytes.fromhex('01 23') == b'\x01\x23', 'fromhex whitespace between byte pairs' # === bytes.fromhex() errors === # Odd number of hex digits (no invalid chars, just odd count) try: bytes.fromhex('0') assert False, 'fromhex odd digits should error' except ValueError as e: assert str(e) == 'fromhex() arg must contain an even number of hexadecimal digits', ( f'fromhex odd digits message, error: {e}' ) try: bytes.fromhex(' 0') assert False, 'fromhex odd digits after whitespace should error' except ValueError as e: assert str(e) == 'fromhex() arg must contain an even number of hexadecimal digits', ( f'fromhex odd digits after whitespace message, error: {e}' ) # Whitespace within a byte pair is invalid (space is not a hex digit) try: bytes.fromhex('0 1') assert False, 'fromhex whitespace within pair should error' except ValueError as e: assert str(e) == 'non-hexadecimal number found in fromhex() arg at position 1', ( f'fromhex whitespace within pair message, error: {e}' ) # Invalid hex character try: bytes.fromhex('0g') assert False, 'fromhex invalid hex char should error' except ValueError as e: assert str(e) == 'non-hexadecimal number found in fromhex() arg at position 1', ( f'fromhex invalid hex char message, error: {e}' ) # === bytes.fromhex() instance access === # fromhex is a classmethod but should also work on instances assert b''.fromhex('4142') == b'AB', 'fromhex on bytes instance' assert b'hello'.fromhex('deadbeef') == b'\xde\xad\xbe\xef', 'fromhex on non-empty instance' # === bytes.startswith/endswith with tuple of prefixes === assert b'hello'.startswith((b'he', b'wo')), 'startswith tuple first match' assert b'hello'.startswith((b'wo', b'he')), 'startswith tuple second match' assert not b'hello'.startswith((b'wo', b'ab')), 'startswith tuple no match' assert b'hello'.startswith((b'',)), 'startswith tuple with empty bytes' assert b'hello'.startswith((b'hello', b'world')), 'startswith tuple exact match' assert b'hello'.endswith((b'lo', b'ld')), 'endswith tuple first match' assert b'hello'.endswith((b'ld', b'lo')), 'endswith tuple second match' assert not b'hello'.endswith((b'he', b'ab')), 'endswith tuple no match' assert b'hello'.endswith((b'',)), 'endswith tuple with empty bytes' assert b'hello'.endswith((b'hello', b'world')), 'endswith tuple exact match' # startswith/endswith tuple with start/end assert b'abcdef'.startswith((b'bc', b'cd'), 1), 'startswith tuple with start' assert b'abcdef'.endswith((b'de', b'cd'), 0, 5), 'endswith tuple with end' # === Empty-substring edge cases === # Edge case: start == len (boundary) - this works assert b'hello'.find(b'', 5) == 5, 'find empty at len returns len' assert b'hello'.count(b'', 5) == 1, 'count empty at len returns 1' assert b'hello'.startswith(b'', 5), 'startswith empty at len is true' assert b'hello'.endswith(b'', 5), 'endswith empty at len is true' # TODO: These edge cases when start > len need to be fixed # CPython returns -1/0/False for these, currently Monty doesn't handle this correctly # assert b'hello'.find(b'', 10) == -1, 'find empty when start > len returns -1' # assert b'hello'.count(b'', 10) == 0, 'count empty when start > len returns 0' # assert not b'hello'.startswith(b'', 10), 'startswith empty when start > len is false' # assert not b'hello'.endswith(b'', 10), 'endswith empty when start > len is false' # assert b'hello'.rfind(b'', 10) == -1, 'rfind empty when start > len returns -1' # === bytes.hex() non-ASCII separator errors === try: b'\x01\x02'.hex('\xff') assert False, 'hex with non-ASCII separator should error' except ValueError as e: # CPython uses 'sep must be ASCII.' with period msg = str(e) assert 'sep' in msg.lower() and 'ascii' in msg.lower(), f'hex non-ASCII sep message, error: {e}' # === bytes.decode() with errors argument === # Valid errors values assert b'hello'.decode('utf-8', 'strict') == 'hello', 'decode with strict errors' assert b'hello'.decode('utf-8', 'ignore') == 'hello', 'decode with ignore errors' assert b'hello'.decode('utf-8', 'replace') == 'hello', 'decode with replace errors' # TODO: errors argument type validation - CPython raises TypeError for non-string errors # This is not implemented yet # try: # b'hello'.decode('utf-8', 123) # assert False, 'decode with non-string errors should error' # except TypeError as e: # assert 'str' in str(e), f'decode errors type error should mention str, error: {e}' # === Error message for unknown classmethod === # Error message should say 'bytes' not 'type' try: bytes.nonexistent() assert False, 'should raise AttributeError' except AttributeError as e: msg = str(e) assert 'bytes' in msg, f'error should mention bytes, got: {e}' assert 'nonexistent' in msg, f'error should mention method name, got: {e}' ================================================ FILE: crates/monty/test_cases/bytes__negative_count.py ================================================ bytes(-1) """ TRACEBACK: Traceback (most recent call last): File "bytes__negative_count.py", line 1, in bytes(-1) ~~~~~~~~~ ValueError: negative count """ ================================================ FILE: crates/monty/test_cases/bytes__ops.py ================================================ # === Bytes length === assert len(b'') == 0, 'len empty' assert len(b'hello') == 5, 'len basic' # === Bytes repr/str === assert repr(b'hello') == "b'hello'", 'bytes repr' assert str(b'hello') == "b'hello'", 'bytes str' # === Various bytes repr cases === assert repr(b'') == "b''", 'empty bytes repr' assert repr(b"it's") == 'b"it\'s"', 'single quote bytes repr' assert repr(b'l1\nl2') == "b'l1\\nl2'", 'newline bytes repr' assert repr(b'col1\tcol2') == "b'col1\\tcol2'", 'tab bytes repr' assert repr(b'\x00\xff') == "b'\\x00\\xff'", 'non-printable bytes repr' assert repr(b'back\\slash') == "b'back\\\\slash'", 'backslash bytes repr' # === Bytes repetition (*) === assert b'ab' * 3 == b'ababab', 'bytes mult int' assert 3 * b'ab' == b'ababab', 'int mult bytes' assert b'x' * 0 == b'', 'bytes mult zero' assert b'x' * -1 == b'', 'bytes mult negative' assert b'' * 5 == b'', 'empty bytes mult' assert b'ab' * 1 == b'ab', 'bytes mult one' # === Bytes indexing (getitem) === # Basic indexing - returns integer byte values assert b'hello'[0] == 104, 'bytes getitem index 0 (h=104)' assert b'hello'[1] == 101, 'bytes getitem index 1 (e=101)' assert b'hello'[4] == 111, 'bytes getitem last index (o=111)' # Negative indexing assert b'hello'[-1] == 111, 'bytes getitem -1 (o=111)' assert b'hello'[-2] == 108, 'bytes getitem -2 (l=108)' assert b'hello'[-5] == 104, 'bytes getitem -5 (h=104)' # Single byte assert b'x'[0] == 120, 'bytes getitem single byte at 0' assert b'x'[-1] == 120, 'bytes getitem single byte at -1' # ASCII printable range assert b' '[0] == 32, 'bytes getitem space (32)' assert b'~'[0] == 126, 'bytes getitem tilde (126)' # Non-printable bytes assert b'\x00'[0] == 0, 'bytes getitem null byte' assert b'\xff'[0] == 255, 'bytes getitem 0xff' assert b'\n'[0] == 10, 'bytes getitem newline' assert b'\t'[0] == 9, 'bytes getitem tab' # Heap-allocated bytes b = bytes(b'abc') assert b[0] == 97, 'heap bytes getitem 0' assert b[1] == 98, 'heap bytes getitem 1' assert b[-1] == 99, 'heap bytes negative getitem' # Variable index b = b'xyz' i = 1 assert b[i] == 121, 'bytes getitem with variable index' # Verify return type is int val = b'A'[0] assert type(val) == int, 'bytes getitem returns int' assert val == 65, 'bytes getitem value is correct' # Bool indices (True=1, False=0) b = b'abc' assert b[False] == 97, 'bytes getitem with False' assert b[True] == 98, 'bytes getitem with True' # === Bytes comparisons === assert b'abc' < b'abd', 'bytes < bytes' assert b'abd' > b'abc', 'bytes > bytes' assert b'abc' <= b'abc', 'bytes <= bytes equal' assert b'abc' <= b'abd', 'bytes <= bytes less' assert b'abd' >= b'abd', 'bytes >= bytes equal' assert b'abd' >= b'abc', 'bytes >= bytes greater' # Different lengths assert b'ab' < b'abc', 'shorter prefix is less' assert b'' < b'a', 'empty bytes is less' assert b'abc' > b'ab', 'longer bytes with same prefix is greater' # Non-ASCII byte values assert b'\x00' < b'\xff', 'null byte < 0xff' assert b'\xfe' < b'\xff', '0xfe < 0xff' # Sorting assert sorted([b'c', b'a', b'b']) == [b'a', b'b', b'c'], 'sorted bytes list' assert sorted([b'bb', b'a', b'ba']) == [b'a', b'ba', b'bb'], 'sorted different length bytes' ================================================ FILE: crates/monty/test_cases/bytes__startswith_str_error.py ================================================ # Test that bytes.startswith with str raises TypeError b'hello'.startswith('h') """ TRACEBACK: Traceback (most recent call last): File "bytes__startswith_str_error.py", line 2, in b'hello'.startswith('h') ~~~~~~~~~~~~~~~~~~~~~~~~ TypeError: startswith first arg must be bytes or a tuple of bytes, not str """ ================================================ FILE: crates/monty/test_cases/call_object.py ================================================ x = len x('abc') # Return=3 ================================================ FILE: crates/monty/test_cases/chain_comparison__all.py ================================================ # === Basic chain comparisons === assert (1 < 2 < 3) == True, 'ascending chain' assert (1 < 3 < 2) == False, 'fails at second comparison' assert (3 < 2 < 1) == False, 'fails at first comparison' assert (1 <= 2 <= 2) == True, 'with equality' assert 1 <= 2 <= 2, 'with equality' assert 1 <= 2 <= 2 <= 3, 'chained with equality' # === Mixed operators === assert (1 < 2 <= 2 < 3) == True, 'mixed lt and le' assert (1 == 1 == 1) == True, 'triple equality' assert (1 != 2 != 1) == True, 'not-equal chain (not transitive)' # === Longer chains === assert (1 < 2 < 3 < 4 < 5) == True, '5-way ascending' assert (1 < 2 < 3 < 2 < 5) == False, 'fails in middle' # === With variables and expressions === x = 5 assert (1 < x < 10) == True, 'variable in chain' assert (0 < x - 3 < x < x + 1) == True, 'expressions' # === Short-circuit evaluation === def test_short_circuit(): calls = [] def a(): calls.append('a') return 1 def b(): calls.append('b') return 0 # This will make first comparison fail def c(): calls.append('c') return 2 # Test: first comparison fails, c() should not be called result = a() < b() < c() # 1 < 0 is False, c() should not be called assert result == False, 'short circuit result' assert calls == ['a', 'b'], 'c not called due to short circuit' test_short_circuit() # === Single evaluation of intermediate values === def test_single_eval(): count = 0 def middle(): nonlocal count count += 1 return 5 result = 1 < middle() < 10 assert result == True, 'chain result' assert count == 1, 'middle() called exactly once' test_single_eval() # === Identity comparisons === a = [1] b = a c = a assert (a is b is c) == True, 'is chain same object' # === Containment checks === assert (1 in [1, 2] in [[1, 2], [3]]) == True, 'in chain' # === Verify no namespace pollution === # Note: The old implementation used _chain_cmp_N variables which would leak. # The new stack-based implementation doesn't create any intermediate variables. # We can't easily test for namespace pollution without dir(), so we just verify # that chain comparisons work correctly (covered by tests above). ================================================ FILE: crates/monty/test_cases/closure__param_shadows_outer.py ================================================ # === Parameter shadows outer local (basic) === def outer_basic(): x = 42 def inner(x): return x + 1 return inner(10) assert outer_basic() == 11, 'inner param should shadow outer local' # === Parameter shadows outer local (multiple params) === def outer_multi(): a = 100 b = 200 def inner(a, b): return a + b return inner(1, 2) assert outer_multi() == 3, 'both params should shadow outer locals' # === Mixed: one param shadows, one captures === def outer_mixed(): x = 10 y = 20 def inner(x): return x + y return inner(5) assert outer_mixed() == 25, 'x should be param (5), y should be captured (20)' # === Parameter shadows with default value === def outer_default(): x = 99 def inner(x=7): return x return inner() assert outer_default() == 7, 'default param should shadow outer local' # === Deeply nested: param shadows grandparent local === def outer_deep(): x = 1000 def middle(): def inner(x): return x * 2 return inner(3) return middle() assert outer_deep() == 6, 'inner param should shadow grandparent local' # === Parameter used in complex expression === def outer_expr(): scale = 100 def inner(n, scale): return n * scale + 1 return inner(5, 10) assert outer_expr() == 51, 'scale param should shadow outer scale' ================================================ FILE: crates/monty/test_cases/closure__pep448.py ================================================ # Tests for PEP 448 unpacking inside closures. # These exercise the collect_*_from_expr helpers in prepare.rs which walk # expressions to find walrus-operator assignments, cell variables, and # referenced names in nested functions. Closures that reference variables # used in PEP 448 positions (dict unpack, list/tuple/set literal, call args) # are the only way to reach these code paths. # === Closure capturing variable used in dict unpack === def outer_dict(): d1 = {'a': 1, 'b': 2} d2 = {'c': 3} def inner(): return {**d1, **d2} return inner() assert outer_dict() == {'a': 1, 'b': 2, 'c': 3}, 'closure: dict unpack' # === Closure capturing variable used in list unpack === def outer_list(): items = [1, 2, 3] extra = [4, 5] def inner(): return [*items, *extra] return inner() assert outer_list() == [1, 2, 3, 4, 5], 'closure: list unpack' # === Closure capturing variable used in tuple unpack === def outer_tuple(): a = (1, 2) b = (3, 4) def inner(): return (*a, *b) return inner() assert outer_tuple() == (1, 2, 3, 4), 'closure: tuple unpack' # === Closure capturing variable used in set unpack === def outer_set(): items = [1, 2, 3] def inner(): return {*items} return inner() assert outer_set() == {1, 2, 3}, 'closure: set unpack' # === Closure using PEP 448 in a function call (single * and **) === def outer_call_star(): def f(*args, **kwargs): return (args, kwargs) args = [1, 2, 3] kw = {'x': 10} def inner(): return f(*args, **kw) return inner() assert outer_call_star() == ((1, 2, 3), {'x': 10}), 'closure: call *args **kw' # === Closure using multiple * and ** in a call === def outer_multi(): def f(*args, **kwargs): return (args, kwargs) a = [1, 2] b = [3, 4] kw1 = {'x': 10} kw2 = {'y': 20} def inner(): return f(*a, *b, **kw1, **kw2) return inner() assert outer_multi() == ((1, 2, 3, 4), {'x': 10, 'y': 20}), 'closure: multi-star call' # === Closure calling with keyword-only args (ArgExprs::Kwargs) === # The outer-function assignment `_precomp = f(a=val_a, b=val_b)` exercises # collect_assigned_names_from_args for the Kwargs arm. The inner body exercises # collect_cell_vars_from_args and collect_referenced_names_from_args for Kwargs. def outer_kwargs_call(): def f(**kwargs): return kwargs val_a = 10 val_b = 20 # Assignment RHS is a Kwargs call → triggers collect_assigned_names_from_args Kwargs arm _precomp = f(a=val_a, b=val_b) def inner(): # Kwargs call in inner body → collect_cell/referenced_names Kwargs arm return f(a=val_a, b=val_b) return inner() assert outer_kwargs_call() == {'a': 10, 'b': 20}, 'closure: keyword-only call' # === Closure calling with positional + *star (ArgsKargs with args=Some) === # Exercises ArgsKargs branch where the positional-args field is non-None. def outer_argsstar(): def f(*args): return list(args) pos_val = 1 items = [2, 3] # Assignment RHS has positional arg + star → ArgsKargs with args=Some([pos_val]) _precomp = f(pos_val, *items) def inner(): return f(pos_val, *items) return inner() assert outer_argsstar() == [1, 2, 3], 'closure: positional + *args' # === Closure calling with named kwarg + **kw (ArgsKargs with kwargs=Some) === # Exercises ArgsKargs branch where the named-kwargs field is non-None. def outer_kwargsstar(): def f(**kwargs): return kwargs val = 1 extra = {'b': 2} # Assignment RHS has named kwarg + double-star → ArgsKargs with kwargs=Some _precomp = f(a=val, **extra) def inner(): return f(a=val, **extra) return inner() assert outer_kwargsstar() == {'a': 1, 'b': 2}, 'closure: named kwarg + **kw' # === Closure with GeneralizedCall and Named kwarg === # Exercises the CallKwarg::Named arm in all three collect_*_names_from_args functions. def outer_generalized_named(): def f(*args, **kwargs): return (args, kwargs) a = [1, 2] b = [3] key_val = 99 # Assignment RHS has GeneralizedCall with Named kwarg → collect_assigned_names Named arm _precomp = f(*a, *b, key=key_val) def inner(): # Named kwarg in GeneralizedCall → collect_cell/referenced_names Named arm return f(*a, *b, key=key_val) return inner() assert outer_generalized_named() == ((1, 2, 3), {'key': 99}), 'closure: generalized call with named kwarg' # === Closure with GeneralizedCall and plain Value arg === # Exercises the CallArg::Value path of the GeneralizedCall args loop, # covering the Value branch of the `CallArg::Value | CallArg::Unpack` OR-pattern. def outer_generalized_mixed(): def f(*args): return list(args) const = 0 items1 = [1, 2] items2 = [3, 4] # GeneralizedCall with Value(const) + Unpack(items1) + Unpack(items2) _precomp = f(const, *items1, *items2) def inner(): return f(const, *items1, *items2) return inner() assert outer_generalized_mixed() == [0, 1, 2, 3, 4], 'closure: generalized call with value + unpack args' ================================================ FILE: crates/monty/test_cases/closure__undefined_nonlocal.py ================================================ # accessing nonlocal before assignment should raise NameError def outer(): def inner(): nonlocal x return x # x not yet defined result = inner() x = 10 return result outer() # Raise=NameError("cannot access free variable 'x' where it is not associated with a value in enclosing scope") ================================================ FILE: crates/monty/test_cases/compare__mixed_types.py ================================================ # === Bool == Int equality === assert True == 1, 'True == 1' assert False == 0, 'False == 0' assert 1 == True, '1 == True' assert 0 == False, '0 == False' assert True != 2, 'True != 2' assert False != 1, 'False != 1' # === Bool == Float equality === assert True == 1.0, 'True == 1.0' assert False == 0.0, 'False == 0.0' assert 1.0 == True, '1.0 == True' assert 0.0 == False, '0.0 == False' assert True != 2.0, 'True != 2.0' assert 0.5 != False, '0.5 != False' # === Int == Float equality === assert 5 == 5.0, '5 == 5.0' assert 5.0 == 5, '5.0 == 5' assert 5 != 5.5, '5 != 5.5' assert 0 == 0.0, '0 == 0.0' assert -3 == -3.0, '-3 == -3.0' # === Int/Float ordering === assert 5 < 5.5, '5 < 5.5' assert 5.5 > 5, '5.5 > 5' assert 5 <= 5.0, '5 <= 5.0' assert 5.0 >= 5, '5.0 >= 5' assert 5 > 4.9, '5 > 4.9' assert 4.9 < 5, '4.9 < 5' # === Bool ordering (promotes to int) === assert True > False, 'True > False' assert False < True, 'False < True' assert True >= 1, 'True >= 1' assert False <= 0, 'False <= 0' assert True > 0, 'True > 0' assert True < 2, 'True < 2' assert True > 0.5, 'True > 0.5' assert True < 1.5, 'True < 1.5' assert False < 0.5, 'False < 0.5' assert False >= -1, 'False >= -1' # === Cross-type non-equality === assert 'hello' != 42, 'str != int' assert 42 != 'hello', 'int != str' assert b'hello' != 'hello', 'bytes != str' assert 'hello' != b'hello', 'str != bytes' assert None != 0, 'None != 0' assert 0 != None, '0 != None' assert [] != 'list', 'list != str' assert {} != 0, 'dict != int' # === LongInt cross-type comparisons === big = 2**100 big2 = 2**100 assert big == big2, 'LongInt == LongInt' assert big != 5, 'LongInt != int' assert big > 5, 'LongInt > int' assert 5 < big, 'int < LongInt' assert big >= 5, 'LongInt >= int' assert 5 <= big, 'int <= LongInt' small_big = 2**100 large_big = 2**101 assert small_big < large_big, 'LongInt < LongInt' assert large_big > small_big, 'LongInt > LongInt' assert big != 'hello', 'LongInt != str' # === Bytes ordering === assert b'abc' < b'abd', 'bytes lt' assert b'abc' <= b'abc', 'bytes le' assert b'abd' > b'abc', 'bytes gt' assert b'abc' >= b'abc', 'bytes ge' assert b'a' < b'b', 'single byte lt' assert b'' < b'a', 'empty bytes lt non-empty' # === String ordering === assert 'abc' < 'abd', 'str lt' assert 'abc' <= 'abc', 'str le' assert 'abd' > 'abc', 'str gt' assert 'abc' >= 'abc', 'str ge' assert 'a' < 'b', 'single char lt' # === Heap-allocated string ordering (from split) === parts = 'banana,apple'.split(',') assert parts[1] < parts[0], 'heap str lt' assert parts[0] > parts[1], 'heap str gt' assert parts[0] >= parts[0], 'heap str ge self' assert parts[0] <= parts[0], 'heap str le self' # === Cross-type string ordering (interned vs heap) === heap_str = 'banana,apple'.split(',')[0] assert heap_str > 'apple', 'heap str gt interned' assert 'cherry' > heap_str, 'interned gt heap str' assert heap_str >= 'banana', 'heap str ge interned eq' assert 'banana' <= heap_str, 'interned le heap str eq' # === Containment: not in list === assert 999 not in [1, 2, 3], 'not in list' assert 0 not in [1, 2, 3], 'zero not in list' # === Containment: not in tuple === assert 'z' not in ('a', 'b', 'c'), 'not in tuple' assert 0 not in (1, 2, 3), 'zero not in tuple' # === Containment: in/not in set === assert 2 in {1, 2, 3}, 'in set' assert 99 not in {1, 2, 3}, 'not in set' # === Containment: in/not in frozenset === assert 2 in frozenset({1, 2, 3}), 'in frozenset' assert 99 not in frozenset({1, 2, 3}), 'not in frozenset' # === Containment: in/not in list (found) === assert 2 in [1, 2, 3], 'in list' assert 'b' in ['a', 'b', 'c'], 'str in list' # === Containment: in/not in tuple (found) === assert 'b' in ('a', 'b', 'c'), 'str in tuple' assert 2 in (1, 2, 3), 'int in tuple' ================================================ FILE: crates/monty/test_cases/comprehension__all.py ================================================ # === Basic list comprehension === assert [x for x in [1, 2, 3]] == [1, 2, 3], 'identity' assert [x * 2 for x in [1, 2, 3]] == [2, 4, 6], 'transform' assert [x + 1 for x in range(5)] == [1, 2, 3, 4, 5], 'range' # === With filter === assert [x for x in [1, 2, 3, 4] if x > 2] == [3, 4], 'filter' assert [x for x in [1, 2, 3, 4, 5] if x % 2 == 0] == [2, 4], 'even filter' assert [x for x in range(20) if x % 2 == 0 if x % 3 == 0] == [0, 6, 12, 18], 'multi-filter' assert [x * 2 for x in [1, 2, 3, 4] if x > 1 if x < 4] == [4, 6], 'transform with multi-filter' # === Nested for === assert [x + y for x in [1, 2] for y in [10, 20]] == [11, 21, 12, 22], 'nested' assert [(x, y) for x in [1, 2] for y in ['a', 'b']] == [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')], 'nested tuple' assert [x * y for x in [1, 2, 3] for y in [10, 100]] == [10, 100, 20, 200, 30, 300], 'nested multiply' # === Nested with filter === assert [x + y for x in [1, 2, 3] if x > 1 for y in [10, 20] if y > 10] == [22, 23], 'nested with filters' # === Set comprehension === assert {x for x in [1, 2, 2, 3]} == {1, 2, 3}, 'set dedup' assert {x for x in [1, 2, 3] if x > 1} == {2, 3}, 'set filter' assert {x * 2 for x in [1, 2, 3]} == {2, 4, 6}, 'set transform' assert {x % 3 for x in range(10)} == {0, 1, 2}, 'set modulo' # === Dict comprehension === assert {x: x * 2 for x in [1, 2, 3]} == {1: 2, 2: 4, 3: 6}, 'dict' assert {x: x for x in [1, 2, 3] if x > 1} == {2: 2, 3: 3}, 'dict filter' assert {str(x): x for x in [1, 2, 3]} == {'1': 1, '2': 2, '3': 3}, 'dict str keys' assert {x: y for x in [1, 2] for y in [10, 20]} == {1: 20, 2: 20}, 'dict nested overwrites' # === Scope isolation === x = 'outer' result = [x for x in [1, 2, 3]] assert x == 'outer', 'loop var does not leak' y = 'before' result2 = [y * 2 for y in [1, 2]] assert y == 'before', 'loop var y does not leak' # === Access enclosing scope === multiplier = 10 assert [x * multiplier for x in [1, 2]] == [10, 20], 'closure' prefix = 'item_' assert [prefix + str(x) for x in [1, 2, 3]] == ['item_1', 'item_2', 'item_3'], 'closure string' base = [1, 2, 3] assert [x + 10 for x in base] == [11, 12, 13], 'closure list' # === Capture when iter uses same name as target === def outer_capture_same_name(): x = [1, 2, 3] def inner(): return [x for x in x] return inner() assert outer_capture_same_name() == [1, 2, 3], 'iter uses outer x' # === Empty iterables === assert [x for x in []] == [], 'empty list' assert {x for x in []} == set(), 'empty set' assert {x: x for x in []} == {}, 'empty dict' # === Filter removes all === assert [x for x in [1, 2, 3] if x > 10] == [], 'filter all' assert {x for x in [1, 2, 3] if x > 10} == set(), 'set filter all' assert {x: x for x in [1, 2, 3] if x > 10} == {}, 'dict filter all' # === Complex expressions === assert [x**2 for x in [1, 2, 3, 4]] == [1, 4, 9, 16], 'square' assert [len(s) for s in ['a', 'bb', 'ccc']] == [1, 2, 3], 'len' assert [[y for y in range(x)] for x in [1, 2, 3]] == [[0], [0, 1], [0, 1, 2]], 'nested comprehension' # === Nested generator referencing prior loop var === # Second generator's iter references first generator's loop variable assert [y for x in [[1, 2], [3, 4]] for y in x] == [1, 2, 3, 4], 'flatten nested lists' assert [(x, y) for x in [1, 2] for y in range(x)] == [(1, 0), (2, 0), (2, 1)], 'second iter uses first var' def outer_nested_comp(): xs = [[1, 2], [3, 4]] def inner(): return [y for x in xs for y in x] return inner() assert outer_nested_comp() == [1, 2, 3, 4], 'nested comp in closure' # === Tuple unpacking in comprehensions === pairs = [(1, 'a'), (2, 'b'), (3, 'c')] assert [x for x, y in pairs] == [1, 2, 3], 'unpack first element' assert [y for x, y in pairs] == ['a', 'b', 'c'], 'unpack second element' assert [str(x) + str(y) for x, y in [(1, 2), (3, 4)]] == ['12', '34'], 'unpack and use both' assert [(y, x) for x, y in pairs] == [('a', 1), ('b', 2), ('c', 3)], 'swap unpacked elements' # Tuple unpacking with filter assert [x for x, y in pairs if x > 1] == [2, 3], 'unpack with filter' assert [y for x, y in pairs if y != 'b'] == ['a', 'c'], 'unpack filter on second' # Triple unpacking triples = [(1, 2, 3), (4, 5, 6)] assert [a + b + c for a, b, c in triples] == [6, 15], 'triple unpack sum' assert [b for a, b, c in triples] == [2, 5], 'triple unpack middle' # Dict comprehension with unpacking d = {k: v for k, v in pairs} assert d == {1: 'a', 2: 'b', 3: 'c'}, 'dict comp with unpack' assert {v: k for k, v in pairs} == {'a': 1, 'b': 2, 'c': 3}, 'dict comp swap key/value' # Set comprehension with unpacking assert {x for x, y in pairs} == {1, 2, 3}, 'set comp unpack first' assert {y for x, y in pairs} == {'a', 'b', 'c'}, 'set comp unpack second' # Unpacking with dict.items() d2 = {'x': 10, 'y': 20, 'z': 30} assert [k for k, v in d2.items()] == ['x', 'y', 'z'], 'unpack dict items keys' assert [v for k, v in d2.items()] == [10, 20, 30], 'unpack dict items values' assert {v: k for k, v in d2.items()} == {10: 'x', 20: 'y', 30: 'z'}, 'dict comp invert dict' # Nested comprehension with unpacking matrix = [[(1, 2), (3, 4)], [(5, 6), (7, 8)]] assert [[a + b for a, b in row] for row in matrix] == [[3, 7], [11, 15]], 'nested comp unpack' # Scope isolation with unpacking (vars don't leak) x = 'outer_x' y = 'outer_y' result = [x + y for x, y in [(1, 2)]] assert x == 'outer_x', 'x does not leak from unpack' assert y == 'outer_y', 'y does not leak from unpack' # Unpacking in closure def outer_unpack(): items = [(1, 2), (3, 4)] def inner(): return [a * b for a, b in items] return inner() assert outer_unpack() == [2, 12], 'unpack in closure' # Capture variable used in unpacking pattern def outer_shadow_unpack(): x = 100 def inner(): # x in unpacking shadows the outer x, but we can still reference outer x in expression # Actually, the x in the comprehension shadows outer x, so this tests scope isolation pairs = [(1, 2), (3, 4)] return [x + y for x, y in pairs] return inner() assert outer_shadow_unpack() == [3, 7], 'shadow unpack in closure' # === Generator expressions (temporary: treated as list comprehensions) === # TODO: When proper generators are implemented, these should return generator objects # instead of lists. For now, generator expressions are parsed as list comprehensions. # See iter__generator_expr.py for tests, and iter__generator_expr_type.py for # a type check test (xfail=cpython since CPython has real generators). # Generator in list() call - works identically in both Monty and CPython assert list(x for x in [1, 2, 3]) == [1, 2, 3], 'generator in list()' assert tuple(x for x in [1, 2, 3]) == (1, 2, 3), 'generator in tuple()' # Generator with condition assert list(x for x in range(10) if x % 2 == 0) == [0, 2, 4, 6, 8], 'generator with condition' # Nested generators assert list(x + y for x in range(3) for y in range(2)) == [0, 1, 1, 2, 2, 3], 'nested generator' # Generator in sum() assert sum(x for x in range(5)) == 10, 'generator in sum()' # Generator with unpacking pairs = [(1, 2), (3, 4)] assert list(a + b for a, b in pairs) == [3, 7], 'generator with unpacking' # list of strings join assert ''.join(str(x) for x in range(5)) == '01234', 'list of strings join' a = '1', '2', '3' assert ''.join(a) == '123', 'tuple of strings join' # === Regression: Iterator panic with try/except inside loop === # Issue: https://github.com/pydantic/monty/issues/177 # Verifies that exception handling in a comprehension inside a loop doesn't # corrupt the outer loop's iterator (causing "expected Iterator on heap" panic). # A prior loop is needed to potentially trigger incorrect stack depth tracking. for _ in range(1): pass for s in ['hello']: try: # Inner comprehension raises exception [int(c) for c in s] except ValueError: pass ================================================ FILE: crates/monty/test_cases/comprehension__scope.py ================================================ [x for x in range(10)] try: x assert False, "Expected NameError for 'x' after comprehension" except NameError: pass ================================================ FILE: crates/monty/test_cases/comprehension__unbound_local.py ================================================ # Test that comprehension scoping raises UnboundLocalError when a generator's iter # references a later generator's loop variable (which is local but not yet assigned) z = ['outer'] result = [x for x in [1] for y in z for z in [[2], [3]]] """ TRACEBACK: Traceback (most recent call last): File "comprehension__unbound_local.py", line 6, in result = [x for x in [1] for y in z for z in [[2], [3]]] ~ UnboundLocalError: cannot access local variable 'z' where it is not associated with a value """ ================================================ FILE: crates/monty/test_cases/dataclass__basic.py ================================================ # call-external # === Basic dataclass tests === # Get immutable dataclass from external function point = make_point() # === repr and str === assert repr(point) == 'Point(x=1, y=2)', f'point repr {point=!r}' assert str(point) == 'Point(x=1, y=2)', 'point str' # === Boolean truthiness === # Dataclasses are always truthy (like Python class instances) assert bool(point), 'dataclass bool is True' # === Hash for immutable dataclass === # Immutable (frozen) dataclasses are hashable h1 = hash(point) assert h1 != 0, 'hash is not zero' # Hash is consistent - same object hashes to same value h2 = hash(point) assert h1 == h2, 'hash is consistent' # Equal frozen dataclasses hash to same value point2 = make_point() assert hash(point) == hash(point2), 'equal dataclasses have equal hash' # Frozen dataclass can be used as dict key d = {point: 'first'} assert d[point] == 'first', 'frozen dataclass as dict key' assert d[point2] == 'first', 'equal frozen dataclass looks up same value' # Frozen dataclass can be added to set s = {point, point2} assert len(s) == 1, 'equal frozen dataclasses deduplicated in set' # Different field values produce different hash alice = make_user('Alice') bob = make_user('Bob') assert hash(alice) != hash(bob), 'different field values have different hash' # === Mutable dataclass === mut_point = make_mutable_point() assert repr(mut_point) == 'MutablePoint(x=1, y=2)', f'mutable point repr {mut_point=!r}' # === Dataclass with string argument === alice = make_user('Alice') assert repr(alice) == "User(name='Alice', active=True)", f'user repr with string field {alice=!r}' # === Dataclass in list (using existing variables) === points = [point, mut_point, alice] assert len(points) == 3, 'dataclass list length' # === Attribute access (get) === # Access fields on immutable dataclass assert point.x == 1, 'point.x is 1' assert point.y == 2, 'point.y is 2' # Access fields on mutable dataclass assert mut_point.x == 1, 'mut_point.x is 1' assert mut_point.y == 2, 'mut_point.y is 2' # Access fields on dataclass with string field assert alice.name == 'Alice', 'alice.name is Alice' assert alice.active == True, 'alice.active is True' # === Attribute assignment (set) === # Modify mutable dataclass mut_point.x = 10 assert mut_point.x == 10, 'mut_point.x updated to 10' mut_point.y = 20 assert mut_point.y == 20, 'mut_point.y updated to 20' assert repr(mut_point) == 'MutablePoint(x=10, y=20)', f'repr after attribute update {mut_point=!r}' # === set other attributes mut_point.z = 30 assert mut_point.z == 30, 'mut_point.z updated to 30' assert repr(mut_point) == 'MutablePoint(x=10, y=20)', 'repr after attribute update' # === Nested attribute access (chained get) === # Create outer dataclass with inner dataclass as field outer = make_mutable_point() inner = make_mutable_point() inner.x = 100 inner.y = 200 outer.x = inner # Chained attribute get: outer.x.y assert outer.x.x == 100, 'outer.x.x is 100' assert outer.x.y == 200, 'outer.x.y is 200' # === Nested attribute assignment (chained set) === # Modify nested field via chained access outer.x.x = 999 assert outer.x.x == 999, 'outer.x.x updated to 999' outer.x.y = 888 assert outer.x.y == 888, 'outer.x.y updated to 888' # Verify inner was modified (same object) assert inner.x == 999, 'inner.x also updated to 999' assert inner.y == 888, 'inner.y also updated to 888' # === Deeper nesting (3 levels) === level1 = make_mutable_point() level2 = make_mutable_point() level3 = make_mutable_point() level3.x = 42 level2.x = level3 level1.x = level2 # 3-level chained get assert level1.x.x.x == 42, 'level1.x.x.x is 42' # 3-level chained set level1.x.x.x = 7 assert level1.x.x.x == 7, 'level1.x.x.x updated to 7' assert level3.x == 7, 'level3.x also updated to 7' # === Empty dataclass === empty = make_empty() assert repr(empty) == 'Empty()', 'empty dataclass repr' assert str(empty) == 'Empty()', 'empty dataclass str' # === FrozenInstanceError is subclass of AttributeError === # Catching AttributeError should also catch FrozenInstanceError frozen_point = make_point() caught = False try: frozen_point.x = 10 except AttributeError: caught = True assert caught, 'FrozenInstanceError caught by AttributeError' # === Error: accessing non-existent attribute === try: point.nonexistent assert False, 'should have raised AttributeError for missing attr' except AttributeError as e: assert str(e) == "'Point' object has no attribute 'nonexistent'", f'wrong message: {e}' # === Error: accessing non-existent private attribute === try: point._private assert False, 'should have raised AttributeError for private attr' except AttributeError as e: assert str(e) == "'Point' object has no attribute '_private'", f'wrong message: {e}' # === Error: calling a dunder that doesn't exist === try: point.__nonexistent__() assert False, 'should have raised AttributeError for dunder' except AttributeError as e: assert str(e) == "'Point' object has no attribute '__nonexistent__'", f'wrong message: {e}' # === Error: calling a private method that doesn't exist === try: point._private_method() assert False, 'should have raised AttributeError for private method' except AttributeError as e: assert str(e) == "'Point' object has no attribute '_private_method'", f'wrong message: {e}' # === Error: calling a field value (not callable) === try: point.x() assert False, 'should have raised TypeError for calling int field' except TypeError as e: assert str(e) == "'int' object is not callable", f'wrong message: {e}' # === Error: calling a non-existent public method === try: point.nonexistent_method() assert False, 'should have raised AttributeError for missing method' except AttributeError as e: assert str(e) == "'Point' object has no attribute 'nonexistent_method'", f'wrong message: {e}' # === Error: same errors on mutable dataclass === try: mut_point.nonexistent assert False, 'should have raised AttributeError on mutable dc' except AttributeError as e: assert str(e) == "'MutablePoint' object has no attribute 'nonexistent'", f'wrong message: {e}' try: mut_point.x() assert False, 'should have raised TypeError on mutable dc field call' except TypeError as e: assert str(e) == "'int' object is not callable", f'wrong message: {e}' # === Method calls: no args (exercises ArgValues::prepend on Empty) === result = point.sum() assert result == 3, f'Point.sum() should be 3, got {result}' # === Method calls: two positional args (exercises ArgValues::prepend on Two) === new_point = point.add(10, 20) assert new_point.x == 11, f'Point.add x should be 11, got {new_point.x}' assert new_point.y == 22, f'Point.add y should be 22, got {new_point.y}' # === Method calls: one positional arg (exercises ArgValues::prepend on One) === scaled = point.scale(3) assert scaled.x == 3, f'Point.scale x should be 3, got {scaled.x}' assert scaled.y == 6, f'Point.scale y should be 6, got {scaled.y}' # === Method calls: returning a string === desc = point.describe('pt') assert desc == 'pt(1, 2)', f'Point.describe should be pt(1, 2), got {desc}' # === Method calls on mutable dataclass === mut_p2 = make_mutable_point() mut_sum = mut_p2.sum() assert mut_sum == 3, f'MutablePoint.sum() should be 3, got {mut_sum}' # === Method calls on User dataclass (string field) === alice2 = make_user('Alice') greeting = alice2.greeting() assert greeting == 'Hello, Alice!', f'User.greeting should be Hello, Alice!, got {greeting}' # === Method call returning dataclass - chained access === p3 = point.add(0, 0) assert p3.x == 1, f'chained method access: p3.x should be 1, got {p3.x}' assert p3.y == 2, f'chained method access: p3.y should be 2, got {p3.y}' # === Method calls with keyword-only args (exercises ArgValues::prepend on Kwargs) === desc_kw = point.describe(label='custom') assert desc_kw == 'custom(1, 2)', f'Point.describe(label=) should be custom(1, 2), got {desc_kw}' # === Error: calling non-existent method on mutable dataclass === try: mut_p2.nonexistent_method() assert False, 'should have raised AttributeError for missing method on mutable dc' except AttributeError as e: assert str(e) == "'MutablePoint' object has no attribute 'nonexistent_method'", f'wrong message: {e}' # === Error: calling non-existent method on User === try: alice2.missing() assert False, 'should have raised AttributeError for missing method on User' except AttributeError as e: assert str(e) == "'User' object has no attribute 'missing'", f'wrong message: {e}' ================================================ FILE: crates/monty/test_cases/dataclass__call_field_error.py ================================================ # call-external # Test that calling a field value (not a method) raises TypeError point = make_point() point.x() """ TRACEBACK: Traceback (most recent call last): File "dataclass__call_field_error.py", line 4, in point.x() ~~~~~~~~~ TypeError: 'int' object is not callable """ ================================================ FILE: crates/monty/test_cases/dataclass__frozen_set_error.py ================================================ # call-external # Test that assigning to a frozen dataclass raises FrozenInstanceError point = make_point() point.x = 10 """ TRACEBACK: Traceback (most recent call last): File "dataclass__frozen_set_error.py", line 4, in point.x = 10 ~~~~~~~ FrozenInstanceError: cannot assign to field 'x' """ ================================================ FILE: crates/monty/test_cases/dataclass__get_missing_attr_error.py ================================================ # call-external # Test that accessing a non-existent attribute on a dataclass raises AttributeError point = make_point() point.z """ TRACEBACK: Traceback (most recent call last): File "dataclass__get_missing_attr_error.py", line 4, in point.z AttributeError: 'Point' object has no attribute 'z' """ ================================================ FILE: crates/monty/test_cases/dict__get_unhashable_key.py ================================================ d = {} d.get([], 'fallback') # Raise=TypeError("cannot use 'list' as a dict key (unhashable type: 'list')") ================================================ FILE: crates/monty/test_cases/dict__literal_unhashable_key.py ================================================ {'a': 1, []: 2} # Raise=TypeError("cannot use 'list' as a dict key (unhashable type: 'list')") ================================================ FILE: crates/monty/test_cases/dict__method_pop_missing_error.py ================================================ d = {'a': 1} d.pop('missing') # Raise=KeyError('missing') ================================================ FILE: crates/monty/test_cases/dict__methods.py ================================================ # === dict.clear() === d = {'a': 1, 'b': 2} d.clear() assert d == {}, 'clear empties the dict' d = {} d.clear() assert d == {}, 'clear on empty dict is no-op' # === dict.copy() === d = {'a': 1, 'b': 2} copy = d.copy() assert copy == {'a': 1, 'b': 2}, 'copy creates equal dict' assert copy is not d, 'copy creates new dict object' d['c'] = 3 assert 'c' not in copy, 'copy is independent' d = {} copy = d.copy() assert copy == {}, 'copy empty dict' # === dict.update() === d = {'a': 1} d.update({'b': 2}) assert d == {'a': 1, 'b': 2}, 'update with dict' d = {'a': 1} d.update({'a': 10}) assert d == {'a': 10}, 'update overwrites existing key' d = {'a': 1} d.update() assert d == {'a': 1}, 'update with no args is no-op' d = {} d.update([('a', 1), ('b', 2)]) assert d == {'a': 1, 'b': 2}, 'update with list of tuples' # === dict.setdefault() === d = {'a': 1} result = d.setdefault('a', 10) assert result == 1, 'setdefault returns existing value' assert d == {'a': 1}, 'setdefault does not overwrite' d = {'a': 1} result = d.setdefault('b', 2) assert result == 2, 'setdefault returns default for new key' assert d == {'a': 1, 'b': 2}, 'setdefault inserts new key' d = {'a': 1} result = d.setdefault('b') assert result is None, 'setdefault default is None' assert d == {'a': 1, 'b': None}, 'setdefault inserts None' # === dict.popitem() === d = {'a': 1, 'b': 2} item = d.popitem() assert item == ('b', 2), 'popitem returns last inserted item' assert d == {'a': 1}, 'popitem removes item' d = {'x': 10} item = d.popitem() assert item == ('x', 10), 'popitem on single-item dict' assert d == {}, 'dict is now empty' # === dict.fromkeys() === d = dict.fromkeys(['a', 'b', 'c']) assert d == {'a': None, 'b': None, 'c': None}, 'fromkeys with list, default None' d = dict.fromkeys(['a', 'b'], 0) assert d == {'a': 0, 'b': 0}, 'fromkeys with default value' d = dict.fromkeys([]) assert d == {}, 'fromkeys with empty iterable' d = dict.fromkeys('abc') assert d == {'a': None, 'b': None, 'c': None}, 'fromkeys with string iterable' d = dict.fromkeys(range(3), 'x') assert d == {0: 'x', 1: 'x', 2: 'x'}, 'fromkeys with range iterable' d = dict.fromkeys((1, 2, 3), []) assert d[1] is d[2] and d[2] is d[3], 'fromkeys shares same value object for all keys' # Duplicate keys - later occurrence wins d = dict.fromkeys(['a', 'b', 'a'], 1) assert d == {'a': 1, 'b': 1}, 'fromkeys with duplicate keys' assert list(d.keys()) == ['a', 'b'], 'fromkeys preserves first occurrence order' # === dict.fromkeys() instance access === # fromkeys is a classmethod but should also work on instances d = {}.fromkeys(['a', 'b']) assert d == {'a': None, 'b': None}, 'fromkeys on empty dict instance' d = {'x': 1}.fromkeys(['a', 'b'], 0) assert d == {'a': 0, 'b': 0}, 'fromkeys on non-empty dict instance (ignores original)' # === dict.update() with keyword arguments === d = {'a': 1} d.update(b=2) assert d == {'a': 1, 'b': 2}, 'update with single kwarg' d = {'a': 1} d.update(b=2, c=3) assert d == {'a': 1, 'b': 2, 'c': 3}, 'update with multiple kwargs' d = {'a': 1} d.update(a=10) assert d == {'a': 10}, 'update kwarg overwrites existing key' d = {} d.update(a=1, b=2) assert d == {'a': 1, 'b': 2}, 'update empty dict with kwargs' # update with both positional dict and kwargs d = {'a': 1} d.update({'b': 2}, c=3) assert d == {'a': 1, 'b': 2, 'c': 3}, 'update with dict and kwargs' # kwargs overwrite positional dict values d = {'a': 1} d.update({'b': 2}, b=20) assert d == {'a': 1, 'b': 20}, 'update kwargs overwrite positional dict' # update with iterable and kwargs d = {} d.update([('a', 1)], b=2) assert d == {'a': 1, 'b': 2}, 'update with iterable and kwargs' # === Error message for unknown classmethod === # Error message should say 'dict' not 'type' try: dict.nonexistent() assert False, 'should raise AttributeError' except AttributeError as e: msg = str(e) assert 'dict' in msg, f'error should mention dict, got: {e}' assert 'nonexistent' in msg, f'error should mention method name, got: {e}' # === dict.update() sequence element error === # Invalid sequence elements should raise ValueError try: d = {} d.update([('a', 1), 'x', ('c', 3)]) # 'x' at index 1 is not a 2-tuple assert False, 'should raise ValueError' except (ValueError, TypeError) as e: msg = str(e) # Error message should mention 'length' requirement assert 'length' in msg.lower(), f'error should mention length, got: {e}' # TODO: CPython includes element index (#N) in error message # assert '#1' in msg, 'error should mention element index' ================================================ FILE: crates/monty/test_cases/dict__ops.py ================================================ # === Dict literals === assert {} == {}, 'empty literal' assert {'a': 1} == {'a': 1}, 'single item literal' assert {'a': 1, 'b': 2} == {'a': 1, 'b': 2}, 'multiple items literal' assert {1: 'a', 2: 'b'} == {1: 'a', 2: 'b'}, 'int keys literal' # === Dict length === assert len({}) == 0, 'len empty' assert len({'a': 1, 'b': 2, 'c': 3}) == 3, 'len multiple' # === Dict equality === assert ({'a': 1, 'b': 2} == {'b': 2, 'a': 1}) == True, 'equality true (order independent)' assert ({'a': 1} == {'a': 2}) == False, 'equality false' # === Dict subscript get === d = {'name': 'Alice', 'age': 30} assert d['name'] == 'Alice', 'subscript get str key' assert d['age'] == 30, 'subscript get value' d = {1: 'one', 2: 'two'} assert d[1] == 'one', 'subscript get int key' # === Dict subscript set === d = {'a': 1} d['b'] = 2 assert d == {'a': 1, 'b': 2}, 'subscript set new key' d = {'a': 1} d['a'] = 99 assert d == {'a': 99}, 'subscript set existing key' # === Dict subscript augmented assignment === totals = {'photo': 1} rtype = 'photo' likes = 2 totals[rtype] += likes assert totals == {'photo': 3}, 'subscript += updates existing dict item' calls = 0 def key(): global calls calls += 1 return 'photo' totals = {'photo': 10} totals[key()] += 5 assert totals == {'photo': 15}, 'subscript += stores the computed result back' assert calls == 1, 'subscript += evaluates the index expression once' captured_total = {'photo': 1} captured_likes = 2 def apply_captured_increment(): captured_total['photo'] += captured_likes apply_captured_increment() assert captured_total == {'photo': 3}, 'subscript += works with closure-captured names' walrus_key = None walrus_total = {'photo': 10} walrus_total[(walrus_key := 'photo')] += 4 assert walrus_key == 'photo', 'subscript += allows walrus in the index expression' assert walrus_total == {'photo': 14}, 'subscript += with walrus index updates the selected item' try: missing = {} missing['photo'] += 1 assert False, 'subscript += on a missing dict key should raise KeyError' except KeyError as e: assert e.args == ('photo',), 'subscript += missing key preserves the missing key in KeyError' try: existing = {'photo': 'a'} existing['photo'] += 1 assert False, 'subscript += with incompatible operand types should raise TypeError' except TypeError as e: assert e.args == ('can only concatenate str (not "int") to str',), 'subscript += type error matches CPython' assert existing == {'photo': 'a'}, 'failed subscript += does not overwrite the original dict item' # === Dict.get() method === d = {'a': 1, 'b': 2} assert d.get('a') == 1, 'get existing' assert d.get('missing') is None, 'get missing returns None' assert d.get('missing', 'default') == 'default', 'get missing with default' # === Dict.pop() method === d = {'a': 1, 'b': 2} assert d.pop('a') == 1, 'pop existing' assert d == {'b': 2}, 'pop removes key' d = {'a': 1} assert d.pop('missing', 'default') == 'default', 'pop missing with default' # === Dict with tuple key === d = {(1, 2): 'value'} assert d[(1, 2)] == 'value', 'tuple key' # === Dict repr === assert repr({}) == '{}', 'empty repr' assert repr({'a': 1}) == "{'a': 1}", 'repr with items' # === Dict self-reference === d = {} d['self'] = d assert d['self'] is d, 'getitem self' d = {} assert d.get('missing', d) is d, 'get default same dict' # === Dict unpacking (PEP 448) === a = {'x': 1, 'y': 2} b = {'y': 99, 'z': 3} assert {**a} == {'x': 1, 'y': 2}, 'single unpack' assert {**a, **b} == {'x': 1, 'y': 99, 'z': 3}, 'double unpack, later wins' assert {**a, 'y': 0} == {'x': 1, 'y': 0}, 'literal overrides unpacked key' assert {'y': 0, **a} == {'y': 2, 'x': 1}, 'unpack overrides earlier literal' assert {**a, 'z': 3} == {'x': 1, 'y': 2, 'z': 3}, 'unpack then new key' assert {**{}} == {}, 'unpack empty dict' assert {**a, **b, 'w': 4} == {'x': 1, 'y': 99, 'z': 3, 'w': 4}, 'complex mixed' assert list({**a, 'z': 3}.keys()) == ['x', 'y', 'z'], 'insertion order preserved' # === Dict unpack TypeError for non-mapping heap ref === # Unpacking a Ref that is NOT a dict (e.g. a list) should raise TypeError try: x = {**[1, 2, 3]} assert False, 'expected TypeError' except TypeError as e: assert str(e) == "'list' object is not a mapping", f'wrong error: {e}' ================================================ FILE: crates/monty/test_cases/dict__pop_unhashable_key.py ================================================ # note cpython behaves weirdly if the dict is empty: https://github.com/python/cpython/issues/142396 d = {1: 2} d.pop([], 'fallback') # Raise=TypeError("cannot use 'list' as a dict key (unhashable type: 'list')") ================================================ FILE: crates/monty/test_cases/dict__popitem_empty.py ================================================ {}.popitem() """ TRACEBACK: Traceback (most recent call last): File "dict__popitem_empty.py", line 1, in {}.popitem() ~~~~~~~~~~~~ KeyError: 'popitem(): dictionary is empty' """ ================================================ FILE: crates/monty/test_cases/dict__subscript_missing_key.py ================================================ d = {'a': 1} d['missing'] # Raise=KeyError('missing') ================================================ FILE: crates/monty/test_cases/dict__unhashable_dict_key.py ================================================ {{'a': 1}: 'value'} # Raise=TypeError("cannot use 'dict' as a dict key (unhashable type: 'dict')") ================================================ FILE: crates/monty/test_cases/dict__unhashable_list_key.py ================================================ {[1, 2]: 'value'} # Raise=TypeError("cannot use 'list' as a dict key (unhashable type: 'list')") ================================================ FILE: crates/monty/test_cases/dict__unpack_type_error.py ================================================ {**42} # Raise=TypeError("'int' object is not a mapping") ================================================ FILE: crates/monty/test_cases/dict__views.py ================================================ # === Type identity and repr === d = {'a': 1, 'b': 2} keys = d.keys() items = d.items() values = d.values() assert type(keys).__name__ == 'dict_keys', 'dict.keys() returns a dict_keys view' assert type(items).__name__ == 'dict_items', 'dict.items() returns a dict_items view' assert type(values).__name__ == 'dict_values', 'dict.values() returns a dict_values view' assert repr(keys) == "dict_keys(['a', 'b'])", 'keys repr matches CPython' assert repr(items) == "dict_items([('a', 1), ('b', 2)])", 'items repr matches CPython' assert repr(values) == 'dict_values([1, 2])', 'values repr matches CPython' # === len() and truthiness === assert len(keys) == 2, 'keys view reports live dict length' assert len(items) == 2, 'items view reports live dict length' assert len(values) == 2, 'values view reports live dict length' assert bool(keys) is True, 'non-empty keys view is truthy' assert bool(items) is True, 'non-empty items view is truthy' assert bool(values) is True, 'non-empty values view is truthy' assert bool({}.keys()) is False, 'empty keys view is falsy' assert bool({}.items()) is False, 'empty items view is falsy' assert bool({}.values()) is False, 'empty values view is falsy' # === Iteration order === assert list(keys) == ['a', 'b'], 'keys iterate in insertion order' assert list(items) == [('a', 1), ('b', 2)], 'items iterate in insertion order' assert list(values) == [1, 2], 'values iterate in insertion order' # === Membership === assert ('a' in keys) is True, 'keys membership checks keys' assert ('missing' in keys) is False, 'keys membership is false for absent keys' assert (('a', 1) in items) is True, 'items membership matches existing pairs' assert (('a', 3) in items) is False, 'items membership checks values too' assert (('a',) in items) is False, 'items membership ignores non-2-tuples' assert (1 in values) is True, 'values membership checks stored values' assert (3 in values) is False, 'values membership is false for absent values' try: ([1], 'x') in {1: 'x'}.items() assert False, 'items membership should reject unhashable keys' except TypeError as e: assert str(e) == "cannot use 'list' as a dict key (unhashable type: 'list')", ( 'items membership propagates key hash errors' ) # === Equality === assert keys == keys, 'keys view equals itself' assert items == items, 'items view equals itself' assert values == values, 'values view equals itself by identity' assert keys == {'a', 'b'}, 'keys view compares equal to matching sets' assert {'b', 'a'} == keys, 'set equality works when dict_keys is on the right' assert keys == frozenset({'a', 'b'}), 'keys view compares equal to matching frozensets' assert frozenset({'a', 'b'}) == keys, 'frozenset equality works when dict_keys is on the right' assert keys == {'b': 0, 'a': 9}.keys(), 'keys view compares equal to another matching keys view' assert keys != {'a'}, 'keys view equality checks the full key set' assert keys != {'a', 'x'}, 'keys view inequality checks equal-length mismatches' assert items == {('a', 1), ('b', 2)}, 'items view compares equal to matching sets' assert {('b', 2), ('a', 1)} == items, 'set equality works when dict_items is on the right' assert items == frozenset({('a', 1), ('b', 2)}), 'items view compares equal to matching frozensets' assert frozenset({('a', 1), ('b', 2)}) == items, 'frozenset equality works when dict_items is on the right' assert items == {'b': 2, 'a': 1}.items(), 'items view compares equal to another matching items view' assert items != {('a', 1)}, 'items view equality checks the full item set' assert items != {('a', 2), ('b', 2)}, 'items view equality checks values as well as keys' assert items != {('a', 1), ('x', 9)}, 'items view inequality checks equal-length mismatches' assert ({'a': 1}.values() == {'a': 1}.values()) is False, 'distinct values views are never equal' # === Live behavior after mutation === live = {'x': 10} live_keys = live.keys() live_items = live.items() live_values = live.values() live['y'] = 20 assert list(live_keys) == ['x', 'y'], 'keys view sees later insertions' assert list(live_items) == [('x', 10), ('y', 20)], 'items view sees later insertions' assert list(live_values) == [10, 20], 'values view sees later insertions' assert repr(live_keys) == "dict_keys(['x', 'y'])", 'keys repr stays live after mutation' assert len(live_values) == 2, 'values len updates after mutation' # === Dict mutation during iteration === changing = {'a': 1, 'b': 2} changing_iter = iter(changing.keys()) assert next(changing_iter) == 'a', 'iterator yields the first key before mutation' changing['c'] = 3 try: next(changing_iter) assert False, 'changing dict size during keys iteration should raise' except RuntimeError as e: assert str(e) == 'dictionary changed size during iteration', 'keys iteration error matches CPython' changing = {'a': 1, 'b': 2} changing_iter = iter(changing.items()) assert next(changing_iter) == ('a', 1), 'iterator yields the first item before mutation' changing['c'] = 3 try: next(changing_iter) assert False, 'changing dict size during items iteration should raise' except RuntimeError as e: assert str(e) == 'dictionary changed size during iteration', 'items iteration error matches CPython' changing = {'a': 1, 'b': 2} changing_iter = iter(changing.values()) assert next(changing_iter) == 1, 'iterator yields the first value before mutation' changing['c'] = 3 try: next(changing_iter) assert False, 'changing dict size during values iteration should raise' except RuntimeError as e: assert str(e) == 'dictionary changed size during iteration', 'values iteration error matches CPython' # === dict_keys & iterable === d = {'a': 1, 'b': 2, 'c': 3} assert d.keys() & {'b', 'c', 'x'} == {'b', 'c'}, 'keys view intersects sets' assert d.keys() & ('b', 'x', 'a') == {'a', 'b'}, 'keys view intersects tuples' assert d.keys() & iter(['c', 'c', 'a']) == {'a', 'c'}, 'keys view intersects iterators' assert type(d.keys() & {'a'}).__name__ == 'set', 'keys intersection returns a plain set' try: d.keys() & 1 assert False, 'keys intersection should reject non-iterables' except TypeError as e: assert str(e) == "'int' object is not iterable", 'non-iterable rhs error matches CPython' # === dict_keys set-like operators === assert d.keys() | ('c', 'd') == {'a', 'b', 'c', 'd'}, 'keys view unions arbitrary iterables' assert d.keys() ^ ('b', 'd', 'e') == {'a', 'c', 'd', 'e'}, 'keys view symmetric difference works' assert d.keys() - ('b', 'd') == {'a', 'c'}, 'keys view difference works' assert d.keys() & {'b': 0, 'z': 9}.keys() == {'b'}, 'keys view intersects other keys views' assert d.keys() | {'c': 0, 'd': 1}.keys() == {'a', 'b', 'c', 'd'}, 'keys view unions other keys views' assert d.keys().isdisjoint(['x', 'y']) is True, 'keys isdisjoint accepts arbitrary iterables' assert d.keys().isdisjoint(iter(['x', 'a'])) is False, 'keys isdisjoint consumes iterators' # === dict_items set-like operators === items_dict = {'a': 1, 'b': 2} assert items_dict.items() & [('a', 1), ('x', 9)] == {('a', 1)}, 'items view intersects iterables of pairs' assert items_dict.items() | [('c', 3)] == {('a', 1), ('b', 2), ('c', 3)}, 'items view unions iterables of pairs' assert items_dict.items() ^ [('a', 1), ('c', 3)] == {('b', 2), ('c', 3)}, 'items view symmetric difference works' assert items_dict.items() - [('a', 1)] == {('b', 2)}, 'items view difference works' assert items_dict.items() & {'b': 2, 'x': 9}.items() == {('b', 2)}, 'items view intersects other items views' assert items_dict.items().isdisjoint([('x', 1)]) is True, 'items isdisjoint accepts arbitrary iterables' assert items_dict.items().isdisjoint(iter([('a', 1)])) is False, 'items isdisjoint consumes iterators' # === dict_values remains non-set-like === try: {'a': 1}.values() & [1] assert False, 'dict_values should not support set-like operators' except TypeError: pass try: {'a': 1}.values().isdisjoint([1]) assert False, 'dict_values should not gain isdisjoint' except AttributeError: pass # === Motivating milestone example === me_map = {'me': 1, 'you': 2, 'merve': 3} merve_set = {'merve', 'unknown'} common_ids = me_map.keys() & merve_set assert common_ids == {'merve'}, 'dict_keys & set supports the motivating use case' ================================================ FILE: crates/monty/test_cases/edge__all.py ================================================ # === Empty container lengths === assert (len([]), len(()), len('')) == (0, 0, 0), 'all empty lengths' # === Large concatenations === lst = [] for i in range(100): lst += [i] assert len(lst) == 100, 'large list concat' s = '' for i in range(100): s += 'x' assert len(s) == 100, 'large string concat' # === Self-concatenation === lst = [1] lst += lst lst += lst assert lst == [1, 1, 1, 1], 'list self concat twice' # === Mod comparison in loop === count = 0 for i in range(100): if i % 7 == 0: count += 1 assert count == 15, 'mod comparison chain' ================================================ FILE: crates/monty/test_cases/edge__float_int_mod.py ================================================ 7.5 % 2 # Return=1.5 ================================================ FILE: crates/monty/test_cases/edge__int_float_mod.py ================================================ 7 % 2.5 # Return=2.0 ================================================ FILE: crates/monty/test_cases/exc__args.py ================================================ # === Exception .args attribute === try: raise ValueError('test message') except ValueError as e: assert e.args == ('test message',), 'args is tuple with message' assert e.args[0] == 'test message', 'args[0] is the message' try: raise ValueError() except ValueError as e: assert e.args == (), 'no-arg exception has empty args' try: raise TypeError('type error') except TypeError as e: assert e.args[0] == 'type error', 'works for other exception types' ================================================ FILE: crates/monty/test_cases/exc__str.py ================================================ try: raise ValueError except ValueError as e: assert str(e) == '' assert repr(e) == 'ValueError()' else: raise AssertionError('should raise an error') try: raise ValueError() except ValueError as e: assert str(e) == '' assert repr(e) == 'ValueError()' else: raise AssertionError('should raise an error') ================================================ FILE: crates/monty/test_cases/execute_ok__all.py ================================================ # === Basic arithmetic === assert 1 + 1 == 2, 'add ints' assert 'a' + 'b' == 'ab', 'add strs' # === Equality tests === assert (1 == 1) == True, 'ints equal true' assert (1 == 2) == False, 'ints equal false' assert ('a' == 'a') == True, 'str equal' assert ('a' == 'b') == False, 'str not equal' assert ([1, 2] == [1, 2]) == True, 'list equal' assert ((1, 2) == (1, 2)) == True, 'tuple equal' assert (b'hello' == b'hello') == True, 'bytes equal' # === Boolean repr/str === assert repr(True) == 'True', 'bool true repr' assert str(True) == 'True', 'bool true str' assert repr(False) == 'False', 'bool false repr' assert str(False) == 'False', 'bool false str' # === None repr/str === assert repr(None) == 'None', 'none repr' assert str(None) == 'None', 'none str' # === Ellipsis repr/str === assert repr(...) == 'Ellipsis', 'ellipsis repr' assert str(...) == 'Ellipsis', 'ellipsis str' # === List repr/str === assert repr([1, 2]) == '[1, 2]', 'list repr' assert str([1, 2]) == '[1, 2]', 'list str' # === Discard expression result === a = 1 [1, 2, 3] # this list is created and discarded assert a == 1, 'discard list' # === Shared list append === a = [1] b = a b.append(2) assert len(a) == 2, 'shared list append' # === For loop string append === v = '' for i in range(1000): if i % 13 == 0: v = v + 'x' assert len(v) == 77, 'for loop str append assign' v = '' for i in range(1000): if i % 13 == 0: v += 'x' assert len(v) == 77, 'for loop str append assign op' ================================================ FILE: crates/monty/test_cases/execute_raise__error_instance_str.py ================================================ raise ValueError('testing') # Raise=ValueError('testing') ================================================ FILE: crates/monty/test_cases/execute_raise__error_no_args.py ================================================ raise TypeError() # Raise=TypeError() ================================================ FILE: crates/monty/test_cases/execute_raise__error_string_arg.py ================================================ raise TypeError('hello') # Raise=TypeError('hello') ================================================ FILE: crates/monty/test_cases/execute_raise__error_string_arg_quotes.py ================================================ raise TypeError("hello 'there'") # Raise=TypeError("hello 'there'") ================================================ FILE: crates/monty/test_cases/execute_raise__error_type.py ================================================ raise TypeError # Raise=TypeError() ================================================ FILE: crates/monty/test_cases/execute_raise__raise_instance_via_var.py ================================================ # raise exception instance stored in a variable a = ValueError('instance error') raise a # Raise=ValueError('instance error') ================================================ FILE: crates/monty/test_cases/execute_raise__raise_list.py ================================================ raise [] # Raise=TypeError('exceptions must derive from BaseException') ================================================ FILE: crates/monty/test_cases/execute_raise__raise_number.py ================================================ raise 1 + 2 # Raise=TypeError('exceptions must derive from BaseException') ================================================ FILE: crates/monty/test_cases/execute_raise__raise_type_call_via_var.py ================================================ # raise exception type called via variable a = ValueError raise a('error message') # Raise=ValueError('error message') ================================================ FILE: crates/monty/test_cases/execute_raise__raise_type_direct.py ================================================ # raise exception type directly raise ValueError # Raise=ValueError() ================================================ FILE: crates/monty/test_cases/execute_raise__raise_type_via_var.py ================================================ # raise exception type stored in a variable a = ValueError raise a # Raise=ValueError() ================================================ FILE: crates/monty/test_cases/ext_call__arg_side_effect_bug.py ================================================ # call-external # BUG: Side effects in arguments are duplicated when external call suspends # # When an external call is in one argument position and there's a side effect # in another argument position, the side effect may be executed multiple times # because argument evaluation is repeated during resumption. call_count = 0 def side_effect(val): global call_count call_count += 1 return val # Side effect before external call in args # Expected: side_effect runs once, result is 10 + 3 = 13 call_count = 0 result = add_ints(side_effect(10), add_ints(1, 2)) assert result == 13, 'ext call after side effect' assert call_count == 1, 'side effect should happen only once (before ext)' ================================================ FILE: crates/monty/test_cases/ext_call__augmented.py ================================================ # call-external # External calls in augmented assignment expressions # += with external call x = 10 x += add_ints(5, 5) assert x == 20, 'ext call in augmented add' # -= with external call x = 100 x -= add_ints(20, 30) assert x == 50, 'ext call in augmented sub' # *= with external call x = 5 x *= add_ints(2, 1) assert x == 15, 'ext call in augmented mul' ================================================ FILE: crates/monty/test_cases/ext_call__augmented_refcount_bug.py ================================================ # call-external # BUG: Reference counting bug with string augmented assignment and external calls # String += with external call causes reference counting error s = 'hello' s += concat_strings(' ', 'world') assert s == 'hello world', 'ext call in augmented string concat' ================================================ FILE: crates/monty/test_cases/ext_call__bare_raise_after_resume.py ================================================ # call-external # Test bare raise after external call resumption in except handler # This tests that current_exception is properly restored after resuming # Note: bare raise after resumption only works when exception is bound (as e) caught_reraised = False try: try: raise ValueError('original error') except ValueError as e: # Make an external call, which will cause a suspend/resume x = add_ints(1, 2) # After resuming, bare raise should still work (exception restored from binding) raise except ValueError as outer_e: caught_reraised = repr(outer_e) == "ValueError('original error')" assert caught_reraised, 'bare raise after external call should re-raise original exception' # === Nested handler bare raise after resumption === outer_nested_reraise = False try: try: raise ValueError('outer error') except ValueError: try: raise KeyError('inner error') except KeyError: _ = add_ints(1, 2) raise except ValueError as reraised: outer_nested_reraise = repr(reraised) == "ValueError('outer error')" assert outer_nested_reraise, 'bare raise in outer handler should re-raise original exception after nested resumption' ================================================ FILE: crates/monty/test_cases/ext_call__basic.py ================================================ # call-external # === Basic external function tests === # Simple calls a = add_ints(10, 20) assert a == 30, 'add_ints basic' b = add_ints(-5, 15) assert b == 10, 'add_ints with negative' s = concat_strings('hello', ' world') assert s == 'hello world', 'concat_strings basic' x = return_value(42) assert x == 42, 'return_value with int' y = return_value('test') assert y == 'test', 'return_value with str' # === Assignment with external calls === result = add_ints(100, 200) assert result == 300, 'assignment from add_ints' name = concat_strings('foo', 'bar') assert name == 'foobar', 'assignment from concat_strings' # === Nested calls === nested = add_ints(1, add_ints(2, 3)) assert nested == 6, 'nested add_ints right' nested2 = add_ints(add_ints(1, 2), 3) assert nested2 == 6, 'nested add_ints left' nested3 = add_ints(add_ints(1, 2), add_ints(3, 4)) assert nested3 == 10, 'nested add_ints both' deep = add_ints(add_ints(add_ints(1, 2), 3), 4) assert deep == 10, 'deeply nested add_ints' # === Chained operations === chained = add_ints(1, 2) + add_ints(3, 4) assert chained == 10, 'chained add_ints with +' chained2 = add_ints(10, 20) - add_ints(5, 10) assert chained2 == 15, 'chained add_ints with -' chained3 = add_ints(2, 3) * add_ints(4, 5) assert chained3 == 45, 'chained add_ints with *' str_chain = concat_strings('a', 'b') + concat_strings('c', 'd') assert str_chain == 'abcd', 'chained concat_strings' # === External calls in assert statements === assert add_ints(5, 5) == 10, 'ext call in assert condition' assert return_value(True), 'ext call returning truthy in assert' assert concat_strings('x', 'y') == 'xy', 'concat in assert' assert add_ints(1, add_ints(2, 3)) == 6, 'nested ext call in assert' # === Mixed with builtins === length = len(concat_strings('hello', 'world')) assert length == 10, 'len of concat result' items = [add_ints(1, 2), add_ints(3, 4)] assert items[0] == 3, 'ext call in list literal first' assert items[1] == 7, 'ext call in list literal second' # === Multiple external calls in single expression === # Two ext calls added together sum_result = add_ints(1, 2) + add_ints(3, 4) assert sum_result == 10, 'two ext calls in addition' # Three ext calls in one expression triple = add_ints(1, 1) + add_ints(2, 2) + add_ints(3, 3) assert triple == 12, 'three ext calls in expression' # Ext calls in multiplication mul_result = add_ints(2, 3) * add_ints(1, 1) assert mul_result == 10, 'ext calls in multiplication' # Ext calls in subtraction sub_result = add_ints(10, 5) - add_ints(3, 2) assert sub_result == 10, 'ext calls in subtraction' # Complex expression with multiple ext calls complex_expr = (add_ints(1, 2) + add_ints(3, 4)) * add_ints(0, 2) assert complex_expr == 20, 'complex expr with ext calls' # String concatenation with multiple ext calls str_result = concat_strings(return_value('a'), return_value('b')) + concat_strings('c', 'd') assert str_result == 'abcd', 'multiple ext calls for string concat' # Comparison with multiple ext calls cmp_result = add_ints(5, 5) == add_ints(3, 7) assert cmp_result == True, 'comparison of two ext call results' # Nested ext calls in expression nested_expr = add_ints(add_ints(1, 2), add_ints(3, 4)) assert nested_expr == 10, 'nested ext calls in expression' ================================================ FILE: crates/monty/test_cases/ext_call__boolean.py ================================================ # call-external # External calls in boolean short-circuit expressions # === Basic boolean operations === result = return_value(True) and return_value(42) assert result == 42, 'ext call in and (both run)' result = return_value(False) and return_value(42) assert result == False, 'ext call in and (short circuit)' result = return_value(0) or return_value(42) assert result == 42, 'ext call in or (both run)' result = return_value(99) or return_value(42) assert result == 99, 'ext call in or (short circuit)' # === Chained boolean with external calls === result = return_value(True) and return_value(True) and return_value(42) assert result == 42, 'chained and all truthy' result = return_value(True) and return_value(False) and return_value(42) assert result == False, 'chained and with false in middle' result = return_value(0) or return_value(0) or return_value(42) assert result == 42, 'chained or all falsy except last' result = return_value(0) or return_value(99) or return_value(42) assert result == 99, 'chained or with truthy in middle' # === Mixed and/or === result = return_value(True) and return_value(0) or return_value(42) assert result == 42, 'and then or' result = return_value(0) or return_value(True) and return_value(42) assert result == 42, 'or then and' ================================================ FILE: crates/monty/test_cases/ext_call__boolean_side_effect_hang.py ================================================ # call-external # BUG: This test hangs (infinite loop) - external calls in boolean expressions # with side effects cause incorrect behavior. call_count = 0 def side_effect(val): global call_count call_count += 1 return val # This specific pattern causes a hang in Monty result = return_value(True) and return_value(side_effect(42)) assert result == 42, 'and: second runs when first truthy' assert call_count == 1, 'and: side effect runs when first is truthy' ================================================ FILE: crates/monty/test_cases/ext_call__closure_bug.py ================================================ # call-external # BUG: External calls in closures cannot access captured variables # When an external call is inside a closure, it fails to access free variables # Error: "cannot access free variable 'x' where it is not associated with a value" def make_adder(x): def adder(y): return add_ints(x, y) return adder add5 = make_adder(5) result = add5(10) assert result == 15, 'ext call in closure accessing captured var' ================================================ FILE: crates/monty/test_cases/ext_call__comparison.py ================================================ # call-external # External calls in comparison expressions # External call on left side of comparison result = add_ints(1, 2) == 3 assert result == True, 'ext call == literal' # External call on right side result = 3 == add_ints(1, 2) assert result == True, 'literal == ext call' # Both sides external calls result = add_ints(1, 2) == add_ints(2, 1) assert result == True, 'ext call == ext call' # Less than result = add_ints(1, 1) < add_ints(2, 2) assert result == True, 'ext call < ext call' # Greater than result = add_ints(5, 5) > add_ints(2, 2) assert result == True, 'ext call > ext call' # Not equal result = add_ints(1, 2) != add_ints(3, 4) assert result == True, 'ext call != ext call' ================================================ FILE: crates/monty/test_cases/ext_call__deep_call_stack.py ================================================ # call-external # External function calls in deep call stacks (function calling function). # Tests that the outer function receives the return value correctly when # the inner function makes an external call. def depth1(n): return add_ints(n, 1) def depth2(n): return depth1(n) + 10 result = depth2(5) # depth2(5) should be: depth1(5) + 10 = 6 + 10 = 16 assert result == 16, f'ext call through 2 levels of functions {result=}' ================================================ FILE: crates/monty/test_cases/ext_call__elif.py ================================================ # call-external # === External calls in elif conditions === # External call in elif condition - true result1 = 0 if add_ints(1, 1) == 3: result1 = 1 elif add_ints(2, 2) == 4: result1 = 2 else: result1 = 3 assert result1 == 2, f'elif condition with ext call should be evaluated, {result1=}' # External call in elif condition - falls through to else result2 = 0 if add_ints(1, 1) == 3: result2 = 1 elif add_ints(2, 2) == 5: result2 = 2 else: result2 = 3 assert result2 == 3, 'else taken when all ext call conditions are false' # Multiple elif with external calls result3 = 0 if add_ints(1, 1) == 10: result3 = 1 elif add_ints(2, 2) == 10: result3 = 2 elif add_ints(3, 3) == 6: result3 = 3 elif add_ints(4, 4) == 10: result3 = 4 else: result3 = 5 assert result3 == 3, 'third elif with ext call should match' # === External calls in elif bodies === # Ext call in elif body val1 = 0 if False: val1 = 1 elif True: val1 = add_ints(10, 20) else: val1 = 3 assert val1 == 30, 'ext call in elif body' # Ext call in else body after elif chain val2 = 0 if False: val2 = 1 elif False: val2 = 2 else: val2 = add_ints(15, 25) assert val2 == 40, 'ext call in else body after elif' # Multiple ext calls in elif body val3 = 0 if False: val3 = 1 elif True: a = add_ints(5, 5) b = add_ints(10, 10) val3 = add_ints(a, b) else: val3 = 3 assert val3 == 30, 'multiple ext calls in elif body' # === Nested ext calls === # Nested ext calls in elif condition result4 = 0 if False: result4 = 1 elif add_ints(add_ints(1, 2), add_ints(3, 4)) == 10: result4 = 2 else: result4 = 3 assert result4 == 2, 'nested ext calls in elif condition' # === Short-circuit with ext calls === # Ext call should not be evaluated if earlier condition is true call_count = 0 def counting_add(a, b): global call_count call_count = call_count + 1 return a + b # This uses a regular function, not ext call, to verify short-circuit # but the ext calls in bodies still test suspension x = 0 if True: x = add_ints(1, 1) elif False: x = add_ints(2, 2) assert x == 2, 'if body ext call executed, elif skipped' # === Ext call in both condition and body === result5 = 0 if add_ints(1, 1) == 3: result5 = add_ints(100, 100) elif add_ints(2, 2) == 4: result5 = add_ints(50, 50) else: result5 = add_ints(25, 25) assert result5 == 100, 'ext call in both elif condition and body' # === Ext call in if body when condition is true === if_body_result = 0 if add_ints(5, 5) == 10: if_body_result = add_ints(100, 200) elif add_ints(1, 1) == 2: if_body_result = add_ints(10, 20) else: if_body_result = add_ints(1, 2) assert if_body_result == 300, 'ext call in if body when if condition is true' # === Ext calls returning values used as conditions === # return_value returns its argument, so we can use it to test boolean coercion cond_result = 0 if return_value(0): cond_result = 1 elif return_value(1): cond_result = 2 else: cond_result = 3 assert cond_result == 2, 'ext call return value used as boolean condition' # === Ext calls with string concatenation === str_result = '' if add_ints(1, 1) == 3: str_result = concat_strings('a', 'b') elif add_ints(2, 2) == 4: str_result = concat_strings('hello', ' world') else: str_result = concat_strings('x', 'y') assert str_result == 'hello world', 'ext call with string result in elif body' # === Multiple conditions with ext calls in same expression === multi_cond = 0 if add_ints(1, 1) > 5: multi_cond = 1 elif add_ints(2, 2) < add_ints(3, 3): multi_cond = 2 else: multi_cond = 3 assert multi_cond == 2, 'comparison between two ext call results in elif condition' # === Ext call in all three branches === all_branches = 0 val = add_ints(5, 5) if val < 5: all_branches = add_ints(1, 0) elif val < 15: all_branches = add_ints(2, 0) else: all_branches = add_ints(3, 0) assert all_branches == 2, 'ext call in elif body based on earlier ext call result' ================================================ FILE: crates/monty/test_cases/ext_call__exc.py ================================================ # call-external # External call in raise statement raise ValueError(return_value('foobar')) # Raise=ValueError('foobar') ================================================ FILE: crates/monty/test_cases/ext_call__exc_deep_stack.py ================================================ # call-external def level4(): x = 1 raise_error('RuntimeError', 'deep error') def level3(): level4() def level2(): level3() def level1(): level2() level1() """ TRACEBACK: Traceback (most recent call last): File "ext_call__exc_deep_stack.py", line 19, in level1() ~~~~~~~~ File "ext_call__exc_deep_stack.py", line 16, in level1 level2() ~~~~~~~~ File "ext_call__exc_deep_stack.py", line 12, in level2 level3() ~~~~~~~~ File "ext_call__exc_deep_stack.py", line 8, in level3 level4() ~~~~~~~~ File "ext_call__exc_deep_stack.py", line 4, in level4 raise_error('RuntimeError', 'deep error') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RuntimeError: deep error """ ================================================ FILE: crates/monty/test_cases/ext_call__exc_in_function.py ================================================ # call-external def wrapper(): raise_error('ValueError', 'from external') wrapper() """ TRACEBACK: Traceback (most recent call last): File "ext_call__exc_in_function.py", line 6, in wrapper() ~~~~~~~~~ File "ext_call__exc_in_function.py", line 3, in wrapper raise_error('ValueError', 'from external') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ValueError: from external """ ================================================ FILE: crates/monty/test_cases/ext_call__exc_nested_functions.py ================================================ # call-external def inner(): raise_error('TypeError', 'nested error') def middle(): inner() def outer(): middle() outer() """ TRACEBACK: Traceback (most recent call last): File "ext_call__exc_nested_functions.py", line 14, in outer() ~~~~~~~ File "ext_call__exc_nested_functions.py", line 11, in outer middle() ~~~~~~~~ File "ext_call__exc_nested_functions.py", line 7, in middle inner() ~~~~~~~ File "ext_call__exc_nested_functions.py", line 3, in inner raise_error('TypeError', 'nested error') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TypeError: nested error """ ================================================ FILE: crates/monty/test_cases/ext_call__ext_exc.py ================================================ # call-external # === External function exceptions === # Tests for exceptions raised by external functions # === Basic exception propagation === # External function raising ValueError caught_value_error = False try: result = raise_error('ValueError', 'test error') assert False, 'should not reach here' except ValueError: caught_value_error = True assert caught_value_error, 'ValueError was caught' # External function raising TypeError caught_type_error = False try: result = raise_error('TypeError', 'type error message') assert False, 'should not reach here' except TypeError: caught_type_error = True assert caught_type_error, 'TypeError was caught' # External function raising KeyError caught_key_error = False try: result = raise_error('KeyError', 'missing key') assert False, 'should not reach here' except KeyError: caught_key_error = True assert caught_key_error, 'KeyError was caught' # External function raising RuntimeError caught_runtime_error = False try: result = raise_error('RuntimeError', 'runtime error') assert False, 'should not reach here' except RuntimeError: caught_runtime_error = True assert caught_runtime_error, 'RuntimeError was caught' # === Exception not caught by wrong handler === # ValueError not caught by TypeError handler caught_outer = False try: try: raise_error('ValueError', 'inner error') except TypeError: assert False, 'TypeError should not catch ValueError' except ValueError: caught_outer = True assert caught_outer, 'ValueError caught by outer handler' # === Exception in expression with multiple ext calls === # First ext call raises, second should not be called try: x = raise_error('ValueError', 'first') + add_ints(1, 2) assert False, 'should not reach here' except ValueError: pass # Expected # === External exception in try body with finally === finally_ran = False try: raise_error('ValueError', 'in try') except ValueError: pass # Caught finally: finally_ran = True assert finally_ran, 'finally ran after external exception caught' # External exception propagating through finally outer_caught = False finally_ran2 = False try: try: raise_error('KeyError', 'will propagate') except ValueError: assert False, 'ValueError should not catch KeyError' finally: finally_ran2 = True except KeyError: outer_caught = True assert finally_ran2, 'finally ran before exception propagated' assert outer_caught, 'exception propagated after finally' # === Mix of normal returns and exceptions === # Normal return, then exception value1 = add_ints(10, 20) assert value1 == 30, 'first ext call returned normally' try: raise_error('ValueError', 'after success') assert False, 'should not reach here' except ValueError: pass # Expected # Exception, then normal return (after catching) caught_exc = False try: raise_error('TypeError', 'will be caught') except TypeError: caught_exc = True value2 = add_ints(5, 5) assert caught_exc, 'exception was caught' assert value2 == 10, 'ext call after caught exception returned normally' # === Exception in except handler from external function === outer_catch = False try: try: raise ValueError('inner') except ValueError: raise_error('TypeError', 'from handler') except TypeError: outer_catch = True assert outer_catch, 'exception from handler caught by outer' # === Exception in else block from external function === else_exc_caught = False try: try: pass # No exception except: assert False, 'should not reach except' else: raise_error('RuntimeError', 'from else') except RuntimeError: else_exc_caught = True assert else_exc_caught, 'exception from else block caught' # === Exception in finally block === # Note: exception in finally replaces any pending exception finally_exc_caught = False try: try: pass finally: raise_error('ValueError', 'from finally') except ValueError: finally_exc_caught = True assert finally_exc_caught, 'exception from finally caught' # === Nested try blocks with external exceptions === inner_handled = False outer_handled = False finally_count = 0 try: try: raise_error('ValueError', 'inner error') except ValueError: inner_handled = True raise_error('TypeError', 'from inner handler') finally: finally_count += 1 except TypeError: outer_handled = True finally: finally_count += 1 assert inner_handled, 'inner exception was handled' assert outer_handled, 'exception from inner handler was caught by outer' assert finally_count == 2, 'both finally blocks ran' ================================================ FILE: crates/monty/test_cases/ext_call__for.py ================================================ # call-external # === External calls in for loops === # Ext call in loop body total = 0 for i in range(3): total = add_ints(total, 1) assert total == 3, f'ext call accumulator in loop, {total=}' # Ext call with loop variable sum_val = 0 for i in range(4): sum_val = add_ints(sum_val, i) assert sum_val == 6, 'ext call with loop var' # Multiple ext calls per iteration result = 0 for i in range(3): result = add_ints(result, add_ints(i, i)) assert result == 6, 'nested ext calls in loop' # Building list with ext calls items = [] for i in range(3): items.append(add_ints(i, 10)) assert items[0] == 10, 'ext call list build first' assert items[1] == 11, 'ext call list build second' assert items[2] == 12, 'ext call list build third' # Chained ext calls in loop acc = 0 for i in range(3): acc = add_ints(acc, 1) + add_ints(0, 1) assert acc == 6, 'chained ext calls in loop body' # Nested loops with ext calls matrix_sum = 0 for i in range(2): for j in range(2): matrix_sum = add_ints(matrix_sum, add_ints(i, j)) assert matrix_sum == 4, 'ext calls in nested loops' # === More nested loop edge cases === # Nested loops building a result list results = [] for i in range(2): for j in range(2): results.append(concat_strings(return_value(str(i)), return_value(str(j)))) assert results[0] == '00', 'nested loops result list first' assert results[1] == '01', 'nested loops result list second' assert results[2] == '10', 'nested loops result list third' assert results[3] == '11', 'nested loops result list fourth' # If inside for loop with external call condition filtered = [] for i in range(3): if return_value(i) == i: filtered.append(i) assert filtered == [0, 1, 2], 'if inside for with ext condition' # If inside for loop - some iterations match results2 = [] for i in range(4): # Only append even numbers (using modulo check with add_ints) if add_ints(i, 0) % 2 == 0: results2.append(return_value(i)) assert results2 == [0, 2], 'if inside for filtering with ext' # Nested for with different ranges outer_sum = 0 for i in range(2): inner_sum = 0 for j in range(3): inner_sum = add_ints(inner_sum, add_ints(i, j)) outer_sum = add_ints(outer_sum, inner_sum) # i=0: (0+0)+(0+1)+(0+2) = 0+1+2 = 3 # i=1: (1+0)+(1+1)+(1+2) = 1+2+3 = 6 # total = 3 + 6 = 9 assert outer_sum == 9, 'nested for with accumulator' # Three levels of nested loops count = 0 for i in range(2): for j in range(2): for k in range(2): count = add_ints(count, 1) assert count == 8, 'triple nested for with ext call' # multiple ext calls in iterable ext_ints = [] for i in add_ints(1, 1), add_ints(2, 2), add_ints(3, 3): ext_ints.append(i) assert ext_ints == [2, 4, 6], 'multiple ext calls in iterable' # ext call iterable, get_list() returns [1, 2, 3] total = 0 for x in get_list(): total = add_ints(total, x) assert total == 6, 'ext call iterable' # string iteration with ext call in body chars = [] for c in 'abc': chars.append(return_value(c)) assert chars == ['a', 'b', 'c'], f'string iteration with ext call: {chars}' # unicode string iteration with ext call in body # Tests decr() handling of multi-byte UTF-8 characters (1-4 bytes each) unicode_chars = [] for c in 'aé中😀b': # a (1 byte), e-acute (2), chinese (3), emoji (4), b (1) unicode_chars.append(return_value(c)) assert unicode_chars == ['a', 'é', '中', '😀', 'b'], f'unicode iteration: {unicode_chars}' ================================================ FILE: crates/monty/test_cases/ext_call__fstring.py ================================================ # call-external # External calls in f-strings s = f'result is {add_ints(10, 20)}' assert s == 'result is 30', 'ext call in f-string' s = f'a={add_ints(1, 2)}, b={add_ints(3, 4)}' assert s == 'a=3, b=7', 'multiple ext calls in f-string' # Nested external call in f-string s = f'nested={add_ints(add_ints(1, 2), 3)}' assert s == 'nested=6', 'nested ext call in f-string' ================================================ FILE: crates/monty/test_cases/ext_call__if.py ================================================ # call-external # === External calls in if/else expressions === # Ternary expression with ext call in condition result = 'yes' if add_ints(1, 1) == 2 else 'no' assert result == 'yes', 'ext call in ternary condition true' result2 = 'yes' if add_ints(1, 1) == 3 else 'no' assert result2 == 'no', 'ext call in ternary condition false' # Ext call in true branch val = add_ints(10, 20) if True else 0 assert val == 30, 'ext call in ternary true branch' # Ext call in false branch val2 = 0 if False else add_ints(5, 5) assert val2 == 10, 'ext call in ternary false branch' # Ext calls in both branches val3 = add_ints(1, 2) if True else add_ints(3, 4) assert val3 == 3, 'ext call in both branches takes true' val4 = add_ints(1, 2) if False else add_ints(3, 4) assert val4 == 7, 'ext call in both branches takes false' # === If statements with external calls === # Ext call in if condition x = 0 if add_ints(1, 1) == 2: x = 100 assert x == 100, 'ext call in if statement condition true' y = 0 if add_ints(1, 1) == 3: y = 100 assert y == 0, 'ext call in if statement condition false' # Ext call in if body z = 0 if True: z = add_ints(50, 50) assert z == 100, 'ext call in if body' # Ext call in else body w = 0 if False: w = 1 else: w = add_ints(25, 75) assert w == 100, 'ext call in else body' # Nested ext calls in condition nested = 0 if add_ints(add_ints(1, 2), add_ints(3, 4)) == 10: nested = 1 assert nested == 1, 'nested ext calls in if condition' # Chained conditions with ext calls result3 = 0 if add_ints(1, 1) == 2 and add_ints(2, 2) == 4: result3 = 1 assert result3 == 1, 'multiple ext calls in and condition' result4 = 0 if add_ints(1, 1) == 3 or add_ints(2, 2) == 4: result4 = 1 assert result4 == 1, 'multiple ext calls in or condition' # Comparison with ext call results cmp = add_ints(10, 5) > add_ints(5, 5) assert cmp == True, 'comparing two ext call results' # === Nested if statements with external calls === # Nested if with ext calls in both conditions (both true) result = 'none' if return_value(1) == 1: if return_value(2) == 2: result = 'inner' else: result = 'outer_only' else: result = 'failed' assert result == 'inner', 'nested if both conditions true' # Nested if - outer true, inner false result2 = 'none' if return_value(1) == 1: if return_value(2) == 999: result2 = 'inner' else: result2 = 'outer_only' else: result2 = 'failed' assert result2 == 'outer_only', 'nested if inner false' # Nested if - outer false result3 = 'none' if return_value(1) == 999: if return_value(2) == 2: result3 = 'inner' else: result3 = 'outer_only' else: result3 = 'xxx' assert result3 == 'xxx', 'nested if outer false' # Triple nested if - all true result4 = 0 if return_value(1) == 1: if return_value(2) == 2: if return_value(3) == 3: result4 = 123 assert result4 == 123, 'triple nested if all true' # If condition with multiple ext calls (addition) result5 = 0 if add_ints(1, 2) + add_ints(3, 4) == 10: result5 = 1 assert result5 == 1, 'if condition with multiple ext calls' # For loop inside if with external condition total = 0 if return_value(1) == 1: for i in range(3): total = add_ints(total, return_value(i)) assert total == 3, 'for loop inside if with ext condition' # For loop inside if - condition false total2 = 0 if return_value(1) == 999: for i in range(3): total2 = add_ints(total2, i) assert total2 == 0, 'for loop inside if condition false' ================================================ FILE: crates/monty/test_cases/ext_call__if_condition.py ================================================ # call-external # External calls in if conditions def check_positive(): if add_ints(1, 2) > 0: return 'positive' return 'not positive' result = check_positive() assert result == 'positive', 'ext call in if condition' def check_with_else(): if add_ints(-5, 3) > 0: return 'positive' else: return 'negative or zero' result = check_with_else() assert result == 'negative or zero', 'ext call in if condition with else' def check_elif(): val = add_ints(5, 5) if val > 15: return 'big' elif val > 5: return 'medium' else: return 'small' result = check_elif() assert result == 'medium', 'ext call result used in elif chain' ================================================ FILE: crates/monty/test_cases/ext_call__in_closure.py ================================================ # call-external # External function calls inside closures (nested functions with captured variables). def outer_with_nested(): x = 10 def inner(): return add_ints(x, 5) return inner() assert outer_with_nested() == 15, 'ext call in nested function' ================================================ FILE: crates/monty/test_cases/ext_call__in_function.py ================================================ # call-external # === External function calls inside user-defined functions === # Basic function calling external function def add_wrapper(a, b): return add_ints(a, b) result = add_wrapper(10, 20) assert result == 30, 'basic ext call in function' # Function with multiple external calls (sequential) def multi_ext(): x = add_ints(1, 2) y = add_ints(3, 4) return add_ints(x, y) assert multi_ext() == 10, 'multiple ext calls in function' # External call in function with local variable usage def with_locals(): x = 100 y = add_ints(x, 50) z = y * 2 return z assert with_locals() == 300, 'ext call with locals' # Function returning external call result def get_sum(a, b, c): temp = add_ints(a, b) return add_ints(temp, c) assert get_sum(1, 2, 3) == 6, 'chained ext calls in function' ================================================ FILE: crates/monty/test_cases/ext_call__in_function_simple.py ================================================ # call-external def foo(): return add_ints(1, 2) result = foo() assert result == 3, 'basic ext call in function' ================================================ FILE: crates/monty/test_cases/ext_call__literals.py ================================================ # call-external # External calls in list and dict literals # External call in list literal lst = [add_ints(1, 2), add_ints(3, 4)] assert lst[0] == 3, 'ext call in list literal [0]' assert lst[1] == 7, 'ext call in list literal [1]' # External call in tuple literal tup = (add_ints(1, 1), add_ints(2, 2)) assert tup[0] == 2, 'ext call in tuple literal [0]' assert tup[1] == 4, 'ext call in tuple literal [1]' # External call in dict value d = {'a': add_ints(5, 5), 'b': add_ints(10, 10)} assert d['a'] == 10, 'ext call in dict value a' assert d['b'] == 20, 'ext call in dict value b' ================================================ FILE: crates/monty/test_cases/ext_call__multi_in_func.py ================================================ # call-external # Multiple external calls within user-defined functions def compute_sum(): a = add_ints(1, 2) b = add_ints(3, 4) c = add_ints(5, 6) return a + b + c result = compute_sum() assert result == 21, 'multiple sequential ext calls in func' def compute_nested(): return add_ints(add_ints(1, 2), add_ints(3, 4)) result = compute_nested() assert result == 10, 'nested ext calls in func' def outer(): def inner(): return add_ints(10, 20) return inner() + add_ints(1, 2) result = outer() assert result == 33, 'ext call in nested func plus outer ext call' ================================================ FILE: crates/monty/test_cases/ext_call__name_lookup.py ================================================ # call-external # Tests for NameLookup resolution with various value types. # Verifies that the host can inject non-function values (constants) # into the sandbox namespace via the NameLookup mechanism. # === Integer constant === assert CONST_INT == 42, f'CONST_INT should be 42, got {CONST_INT}' assert CONST_INT + 8 == 50, 'CONST_INT arithmetic' assert type(CONST_INT) == int, f'CONST_INT type should be int, got {type(CONST_INT)}' # === String constant === assert CONST_STR == 'hello', f'CONST_STR should be hello, got {CONST_STR}' assert CONST_STR + ' world' == 'hello world', 'CONST_STR concatenation' assert len(CONST_STR) == 5, f'CONST_STR length should be 5, got {len(CONST_STR)}' assert type(CONST_STR) == str, f'CONST_STR type should be str, got {type(CONST_STR)}' # === Float constant === assert CONST_FLOAT == 3.14, f'CONST_FLOAT should be 3.14, got {CONST_FLOAT}' assert CONST_FLOAT + 0.86 == 4.0, 'CONST_FLOAT arithmetic' assert type(CONST_FLOAT) == float, f'CONST_FLOAT type should be float, got {type(CONST_FLOAT)}' # === Boolean constant === assert CONST_BOOL == True, f'CONST_BOOL should be True, got {CONST_BOOL}' assert CONST_BOOL and True, 'CONST_BOOL in boolean expression' assert type(CONST_BOOL) == bool, f'CONST_BOOL type should be bool, got {type(CONST_BOOL)}' # === List constant === assert CONST_LIST == [1, 2, 3], f'CONST_LIST should be [1, 2, 3], got {CONST_LIST}' assert len(CONST_LIST) == 3, f'CONST_LIST length should be 3, got {len(CONST_LIST)}' assert CONST_LIST[0] == 1, 'CONST_LIST first element' assert CONST_LIST[-1] == 3, 'CONST_LIST last element' assert type(CONST_LIST) == list, f'CONST_LIST type should be list, got {type(CONST_LIST)}' # === None constant === assert CONST_NONE is None, f'CONST_NONE should be None, got {CONST_NONE}' assert type(CONST_NONE) == type(None), f'CONST_NONE type should be NoneType, got {type(CONST_NONE)}' # === Caching: same constant used twice should work === x = CONST_INT y = CONST_INT assert x == y == 42, 'cached CONST_INT should be consistent' # === Mixed: constants and external functions in the same code === result = add_ints(CONST_INT, 8) assert result == 50, f'add_ints(CONST_INT, 8) should be 50, got {result}' str_result = concat_strings(CONST_STR, ' world') assert str_result == 'hello world', f'concat with CONST_STR should be hello world, got {str_result}' # === Constants used in control flow === if CONST_BOOL: flag = 'yes' else: flag = 'no' assert flag == 'yes', f'CONST_BOOL in if should take true branch, got {flag}' # === Constants used in loops === total = 0 for item in CONST_LIST: total = total + item assert total == 6, f'sum of CONST_LIST should be 6, got {total}' # === Constants in function scope === def use_constant(): return CONST_INT * 2 assert use_constant() == 84, f'CONST_INT in function should be 84, got {use_constant()}' ================================================ FILE: crates/monty/test_cases/ext_call__name_lookup_undefined.py ================================================ # call-external # When NameLookup returns Undefined for an unknown name, NameError is raised. totally_unknown_name # Raise=NameError("name 'totally_unknown_name' is not defined") ================================================ FILE: crates/monty/test_cases/ext_call__nested_calls.py ================================================ # call-external # External calls in nested call expressions # Nested external calls - inner first result = add_ints(add_ints(1, 2), 3) assert result == 6, 'nested ext calls' # Triple nested result = add_ints(add_ints(add_ints(1, 1), 2), 3) assert result == 7, 'triple nested ext calls' # Two separate nested calls result = add_ints(add_ints(1, 2), add_ints(3, 4)) assert result == 10, 'two nested ext calls in args' ================================================ FILE: crates/monty/test_cases/ext_call__recursion_bug.py ================================================ # call-external # BUG: External calls in recursive functions produce wrong results # Recursion with external calls doesn't compute the correct value def sum_with_ext(n): if n <= 0: return 0 return add_ints(n, sum_with_ext(n - 1)) # sum_with_ext(3) should compute: # add_ints(3, sum_with_ext(2)) # add_ints(3, add_ints(2, sum_with_ext(1))) # add_ints(3, add_ints(2, add_ints(1, sum_with_ext(0)))) # add_ints(3, add_ints(2, add_ints(1, 0))) # = 3 + 2 + 1 = 6 result = sum_with_ext(3) assert result == 6, 'recursive ext call: 1+2+3=6' ================================================ FILE: crates/monty/test_cases/ext_call__return.py ================================================ # call-external # External calls in return statements def direct_return(): return add_ints(10, 20) result = direct_return() assert result == 30, 'ext call as direct return value' def return_with_expression(): return add_ints(1, 2) + add_ints(3, 4) result = return_with_expression() assert result == 10, 'ext call expression as return value' def conditional_return(): if return_value(True): return add_ints(100, 200) return add_ints(1, 1) result = conditional_return() assert result == 300, 'ext call in conditional return' ================================================ FILE: crates/monty/test_cases/ext_call__side_effects.py ================================================ # call-external def log(msg): print(msg) return 1 def inner(x): print('Inner called') val = add_ints(x, 1) # External call print('Inner resumed') return val def outer(): print('Outer calling inner') # If side effects are duplicated, we'll see "Evaluating arg" twice res = inner(log('Evaluating arg')) print('Outer returned') return res print('Starting') res = outer() print(f'Result: {res}') assert res == 2 ================================================ FILE: crates/monty/test_cases/ext_call__subscript.py ================================================ # call-external # External calls in subscript operations # External call as subscript index items = [10, 20, 30] result = items[add_ints(0, 1)] assert result == 20, 'ext call as subscript index' ================================================ FILE: crates/monty/test_cases/ext_call__ternary.py ================================================ # call-external # External calls in ternary expressions (if/else expressions) # External call in true branch result = add_ints(1, 2) if True else add_ints(10, 20) assert result == 3, 'ext call in ternary true branch' # External call in false branch result = add_ints(1, 2) if False else add_ints(10, 20) assert result == 30, 'ext call in ternary false branch' # External call in condition result = 'yes' if return_value(True) else 'no' assert result == 'yes', 'ext call in ternary condition (true)' result = 'yes' if return_value(False) else 'no' assert result == 'no', 'ext call in ternary condition (false)' # External calls in both branches result = add_ints(1, 2) if return_value(True) else add_ints(10, 20) assert result == 3, 'ext call in condition and true branch' result = add_ints(1, 2) if return_value(False) else add_ints(10, 20) assert result == 30, 'ext call in condition and false branch' # Nested ternary with external calls result = add_ints(1, 1) if return_value(True) else (add_ints(2, 2) if return_value(False) else add_ints(3, 3)) assert result == 2, 'nested ternary with ext calls' ================================================ FILE: crates/monty/test_cases/ext_call__try.py ================================================ # call-external # === External calls in try blocks === # Basic external call in try body result = None try: result = add_ints(10, 20) except: result = -1 assert result == 30, 'ext call in try body' # Multiple external calls in try body try: a = add_ints(1, 2) b = add_ints(3, 4) c = add_ints(a, b) except: c = -1 assert c == 10, 'multiple ext calls in try body' # Nested external calls in try body try: nested = add_ints(add_ints(1, 2), add_ints(3, 4)) except: nested = -1 assert nested == 10, 'nested ext calls in try body' # === External calls in except blocks === # External call in except handler handler_result = None try: raise ValueError('error') except ValueError: handler_result = add_ints(100, 200) assert handler_result == 300, 'ext call in except handler' # Multiple external calls in except handler try: raise TypeError('error') except TypeError: x = add_ints(5, 5) y = add_ints(10, 10) handler_sum = add_ints(x, y) assert handler_sum == 30, 'multiple ext calls in except handler' # External call with exception variable exc_with_ext = None try: raise ValueError('test') except ValueError as e: prefix = concat_strings('caught: ', repr(e)) exc_with_ext = prefix assert exc_with_ext == "caught: ValueError('test')", 'ext call with exception variable' # === External calls in else blocks === # External call in else block else_result = None try: x = 1 # No exception except: else_result = -1 else: else_result = add_ints(50, 50) assert else_result == 100, 'ext call in else block' # Multiple external calls in else block try: pass except: else_multi = -1 else: p = add_ints(1, 2) q = add_ints(3, 4) else_multi = add_ints(p, q) assert else_multi == 10, 'multiple ext calls in else block' # === External calls in finally blocks === # External call in finally block finally_result = None try: x = 1 finally: finally_result = add_ints(25, 75) assert finally_result == 100, 'ext call in finally block' # Finally with external call after exception caught finally_after_exc = None try: raise ValueError('error') except ValueError: pass finally: finally_after_exc = add_ints(1, 99) assert finally_after_exc == 100, 'ext call in finally after caught exception' # Multiple external calls in finally try: pass finally: f1 = add_ints(10, 20) f2 = add_ints(30, 40) finally_multi = add_ints(f1, f2) assert finally_multi == 100, 'multiple ext calls in finally block' # === External calls across multiple phases === # External calls in try, except, and finally all_phases = [] try: all_phases.append(add_ints(1, 0)) # 1 raise ValueError('error') except ValueError: all_phases.append(add_ints(2, 0)) # 2 finally: all_phases.append(add_ints(3, 0)) # 3 assert all_phases == [1, 2, 3], 'ext calls in try, except, and finally' # External calls in try, else, and finally (no exception) no_exc_phases = [] try: no_exc_phases.append(add_ints(10, 0)) # 10 except: no_exc_phases.append(-1) else: no_exc_phases.append(add_ints(20, 0)) # 20 finally: no_exc_phases.append(add_ints(30, 0)) # 30 assert no_exc_phases == [10, 20, 30], 'ext calls in try, else, and finally' # === External calls in nested try blocks === # Nested try with external calls at each level outer_val = None inner_val = None try: outer_val = add_ints(100, 0) try: inner_val = add_ints(200, 0) raise ValueError('inner') except ValueError: inner_val = add_ints(inner_val, 50) except: outer_val = -1 assert outer_val == 100, 'ext call in outer try' assert inner_val == 250, 'ext call in inner try and handler' # === External calls in exception type expression === # (Exception type is evaluated at handler matching time) # External call producing value used after try post_try = None try: pre = add_ints(5, 5) except: pre = -1 post_try = add_ints(pre, 10) assert post_try == 20, 'ext call result used after try block' # === External call in finally with unhandled exception === # Finally should still run even when exception propagates finally_with_propagate = None try: try: finally_with_propagate = add_ints(0, 0) # Initialize raise KeyError('unhandled') except ValueError: pass # Won't catch KeyError finally: finally_with_propagate = add_ints(42, 0) # Should still run except KeyError: pass # Catch propagated exception assert finally_with_propagate == 42, 'ext call in finally should run even with unhandled exception' # === External call in except handler that then raises === handler_before_raise = None try: try: raise ValueError('original') except ValueError: handler_before_raise = add_ints(10, 0) # External call before raising raise TypeError('from handler') except TypeError: pass assert handler_before_raise == 10, 'ext call in handler before raising' # === External call in else block that then raises === else_before_raise = None try: try: pass # No exception except: pass else: else_before_raise = add_ints(20, 0) # External call before raising raise ValueError('from else') except ValueError: pass assert else_before_raise == 20, 'ext call in else before raising' # === External call preserves state across try/except === state_before = add_ints(1000, 0) state_after = None try: state_after = add_ints(state_before, 1) raise ValueError('test') except ValueError: state_after = add_ints(state_after, 10) finally: state_after = add_ints(state_after, 100) assert state_after == 1111, 'state preserved across try/except with ext calls' # === Multiple except handlers with external calls === which_handler = None try: raise TypeError('test') except ValueError: which_handler = add_ints(1, 0) except TypeError: which_handler = add_ints(2, 0) except KeyError: which_handler = add_ints(3, 0) assert which_handler == 2, 'ext call in correct handler with multiple handlers' # === External call in finally with pending exception (after handler raises) === finally_after_handler_raise = None try: try: raise ValueError('original') except ValueError: finally_after_handler_raise = add_ints(10, 0) # External call before raising raise TypeError('from handler') finally: # This external call should work even though there's a pending exception finally_after_handler_raise = add_ints(finally_after_handler_raise, 5) except TypeError: pass assert finally_after_handler_raise == 15, 'ext call in finally with pending exception from handler' # === External call in finally with pending exception (no matching handler) === finally_with_pending_exc = None try: try: finally_with_pending_exc = add_ints(0, 0) raise KeyError('no handler') except ValueError: pass # Won't catch KeyError finally: # This external call should work even though KeyError is pending finally_with_pending_exc = add_ints(100, 0) except KeyError: pass # Catch it here assert finally_with_pending_exc == 100, 'ext call in finally with unhandled exception pending' # === External call in finally with return (uses simple values) === # Note: External calls in user-defined functions are not supported, # so we test pending return with built-in operations only finally_return_result = None try: finally_return_result = 'in_try' finally: pass # finally runs but doesn't override assert finally_return_result == 'in_try', 'finally runs with pending value' # === Multiple external calls in finally with pending exception === multi_finally = None try: try: raise ValueError('test') except TypeError: pass # Won't match finally: a = add_ints(1, 2) b = add_ints(3, 4) multi_finally = add_ints(a, b) except ValueError: pass assert multi_finally == 10, 'multiple ext calls in finally with pending exception' ================================================ FILE: crates/monty/test_cases/ext_call__try_simple.py ================================================ # call-external # Test external call with exception variable exc_with_ext = None try: raise ValueError('test') except ValueError as e: prefix = concat_strings('caught: ', repr(e)) exc_with_ext = prefix assert exc_with_ext == "caught: ValueError('test')", 'ext call with exception variable' ================================================ FILE: crates/monty/test_cases/ext_call__unary.py ================================================ # call-external # External calls in unary expressions # Negation of external call result result = -add_ints(3, 4) assert result == -7, 'negation of ext call' # Not of external call result = not return_value(False) assert result == True, 'not of ext call returning False' result = not return_value(True) assert result == False, 'not of ext call returning True' ================================================ FILE: crates/monty/test_cases/frozenset__ops.py ================================================ # === Construction === fs = frozenset() assert len(fs) == 0, 'empty frozenset len' assert fs == frozenset(), 'empty frozenset equality' fs = frozenset([1, 2, 3]) assert len(fs) == 3, 'frozenset from list len' # === Copy === fs = frozenset([1, 2, 3]) fs2 = fs.copy() assert fs == fs2, 'copy equality' # === Union === fs1 = frozenset([1, 2]) fs2 = frozenset([2, 3]) u = fs1.union(fs2) assert len(u) == 3, 'union len' # === Intersection === fs1 = frozenset([1, 2, 3]) fs2 = frozenset([2, 3, 4]) i = fs1.intersection(fs2) assert len(i) == 2, 'intersection len' # === Difference === fs1 = frozenset([1, 2, 3]) fs2 = frozenset([2, 3, 4]) d = fs1.difference(fs2) assert len(d) == 1, 'difference len' # === Symmetric Difference === fs1 = frozenset([1, 2, 3]) fs2 = frozenset([2, 3, 4]) sd = fs1.symmetric_difference(fs2) assert len(sd) == 2, 'symmetric_difference len' # === Binary operators === fs = frozenset([1, 2]) other_fs = frozenset([2, 3]) s = {2, 3} assert fs & other_fs == frozenset([2]), 'frozenset & frozenset works' assert fs | other_fs == frozenset([1, 2, 3]), 'frozenset | frozenset works' assert fs ^ other_fs == frozenset([1, 3]), 'frozenset ^ frozenset works' assert fs - other_fs == frozenset([1]), 'frozenset - frozenset works' assert fs & s == frozenset([2]), 'frozenset & set works' assert fs | s == frozenset([1, 2, 3]), 'frozenset | set works' assert fs ^ s == frozenset([1, 3]), 'frozenset ^ set works' assert fs - s == frozenset([1]), 'frozenset - set works' keys = {'a': 1, 'b': 2}.keys() items = {'a': 1, 'b': 2}.items() assert frozenset({'a'}) & keys == frozenset({'a'}), 'frozenset & dict_keys works' assert frozenset({'a'}) | keys == frozenset({'a', 'b'}), 'frozenset | dict_keys works' assert frozenset({('a', 1)}) ^ items == frozenset({('b', 2)}), 'frozenset ^ dict_items works' assert frozenset({('a', 1), ('b', 2)}) - items == frozenset(), 'frozenset - dict_items works' assert type(fs | s).__name__ == 'frozenset', 'frozenset operators keep the left operand type' try: fs & [1, 2] assert False, 'frozenset operators reject non-set rhs' except TypeError as e: assert str(e) == "unsupported operand type(s) for &: 'frozenset' and 'list'", ( 'frozenset & rhs error matches CPython' ) # === Issubset === fs1 = frozenset([1, 2]) fs2 = frozenset([1, 2, 3]) assert fs1.issubset(fs2) == True, 'issubset true' assert fs2.issubset(fs1) == False, 'issubset false' # === Issuperset === fs1 = frozenset([1, 2, 3]) fs2 = frozenset([1, 2]) assert fs1.issuperset(fs2) == True, 'issuperset true' assert fs2.issuperset(fs1) == False, 'issuperset false' # === Isdisjoint === fs1 = frozenset([1, 2]) fs2 = frozenset([3, 4]) fs3 = frozenset([2, 3]) assert fs1.isdisjoint(fs2) == True, 'isdisjoint true' assert fs1.isdisjoint(fs3) == False, 'isdisjoint false' # === Bool === assert bool(frozenset()) == False, 'empty frozenset is falsy' assert bool(frozenset([1])) == True, 'non-empty frozenset is truthy' # === repr === assert repr(frozenset()) == 'frozenset()', 'empty frozenset repr' # === Hashing === fs = frozenset([1, 2, 3]) h = hash(fs) assert isinstance(h, int), 'frozenset hash is int' # Same elements should have same hash fs1 = frozenset([1, 2, 3]) fs2 = frozenset([3, 2, 1]) # Different order assert hash(fs1) == hash(fs2), 'frozenset hash is order-independent' # === As dict key === d = {} fs = frozenset([1, 2]) d[fs] = 'value' assert d[fs] == 'value', 'frozenset as dict key' assert d[frozenset([2, 1])] == 'value', 'frozenset key lookup order-independent' # === Construction from various iterables === fs = frozenset('abc') assert len(fs) == 3, 'frozenset from string len' assert 'a' in fs and 'b' in fs and 'c' in fs, 'frozenset from string elements' fs = frozenset((1, 2, 3)) assert fs == frozenset({1, 2, 3}), 'frozenset from tuple' fs = frozenset(range(5)) assert fs == frozenset({0, 1, 2, 3, 4}), 'frozenset from range' fs = frozenset({1, 2, 3}) assert len(fs) == 3, 'frozenset from set' # === Containment (in / not in) === fs = frozenset({1, 2, 3}) assert 1 in fs, 'in frozenset positive' assert 4 not in fs, 'not in frozenset' assert 'x' not in frozenset({'a', 'b'}), 'not in frozenset strings' # === Iteration === result = [] for x in frozenset({1, 2, 3}): result.append(x) assert len(result) == 3, 'frozenset iteration length' assert set(result) == {1, 2, 3}, 'frozenset iteration elements' result = [] for x in frozenset(): result.append(x) assert result == [], 'empty frozenset iteration' # === Inequality (!=) === assert frozenset({1, 2}) != frozenset({1, 3}), 'frozenset ne different' assert not (frozenset({1, 2}) != frozenset({1, 2})), 'frozenset ne same' # === Methods accepting iterables === assert frozenset({1, 2}).union([3, 4]) == frozenset({1, 2, 3, 4}), 'union with list arg' assert frozenset({1, 2, 3}).intersection([2, 3, 4]) == frozenset({2, 3}), 'intersection with list arg' assert frozenset({1, 2, 3}).difference([2]) == frozenset({1, 3}), 'difference with list arg' assert frozenset({1, 2}).symmetric_difference([2, 3]) == frozenset({1, 3}), 'symmetric_difference with list arg' assert frozenset({1}).union(range(3)) == frozenset({0, 1, 2}), 'union with range arg' assert frozenset({1}).union((2, 3)) == frozenset({1, 2, 3}), 'union with tuple arg' # === issubset/issuperset/isdisjoint with non-set iterables === fs = frozenset({1, 2, 3}) assert fs.issubset(range(10)), 'issubset with range' assert fs.issuperset([1, 2]), 'issuperset with list' assert fs.isdisjoint([4, 5, 6]), 'isdisjoint with list' assert not fs.isdisjoint([3, 4]), 'not isdisjoint with list' # === Different hashes for different frozensets === fs1 = frozenset({1, 2}) fs2 = frozenset({3, 4}) # Not guaranteed to be different, but very likely # Instead just verify they're integers and stable assert hash(fs1) == hash(frozenset({2, 1})), 'hash stable across order' assert hash(frozenset()) == hash(frozenset()), 'empty frozenset hash stable' # === Frozenset as set element === s = {frozenset({1, 2}), frozenset({3, 4})} assert len(s) == 2, 'set of frozensets' assert frozenset({1, 2}) in s, 'frozenset element lookup' # Duplicate frozenset should dedup s2 = {frozenset({1}), frozenset({1})} assert len(s2) == 1, 'duplicate frozensets dedup in set' ================================================ FILE: crates/monty/test_cases/fstring__all.py ================================================ # === Basic f-strings === assert f'hello' == 'hello', 'basic f-string' assert f'' == '', 'empty f-string' # === Simple interpolation === x = 'world' assert f'hello {x}' == 'hello world', 'simple interpolation' # multiple interpolations a = 1 b = 2 assert f'{a} + {b} = {a + b}' == '1 + 2 = 3', 'multiple interpolations' # expression in f-string assert f'{1 + 2 + 3}' == '6', 'expression' # === Value types === # list value x = [1, 2, 3] assert f'list: {x}' == 'list: [1, 2, 3]', 'list value' # bool value x = True assert f'value: {x}' == 'value: True', 'bool value' # int value assert f'{42}' == '42', 'int value' # float value assert f'{3.14}' == '3.14', 'float value' # None value assert f'{None}' == 'None', 'None value' # === Conversion flags (!s, !r, !a) === # conversion !s (str) assert f'{42!s}' == '42', 'conversion !s' # conversion !r (repr) assert f'{"hello"!r}' == "'hello'", 'conversion !r' # conversion !r on int (should be same as str for int) assert f'{42!r}' == '42', 'conversion !r on int' # conversion !r on list assert f'{[1, 2]!r}' == '[1, 2]', 'conversion !r on list' # conversion !s on string (no quotes) assert f'{"hello"!s}' == 'hello', 'conversion !s on string' # conversion !a (ascii) - escapes non-ASCII characters assert f'{"café"!a}' == "'caf\\xe9'", 'conversion !a' assert f'{"hello"!a}' == "'hello'", 'conversion !a ascii only' assert f'{"日本"!a}' == "'\\u65e5\\u672c'", 'conversion !a unicode' # === String padding and alignment === # format spec: width (left-aligned by default for strings) assert f'{"hi":10}' == 'hi ', 'format width' # format spec: left align assert f'{"hi":<10}' == 'hi ', 'format left align' # format spec: right align assert f'{"hi":>10}' == ' hi', 'format right align' # format spec: center align assert f'{"hi":^10}' == ' hi ', 'format center align' # center align with odd padding assert f'{"zip":^6}' == ' zip ', 'format center align odd' # format spec: fill character assert f'{"hi":*>10}' == '********hi', 'format fill right' assert f'{"hi":_<10}' == 'hi________', 'format fill left' assert f'{"hi":*^10}' == '****hi****', 'format fill center' # string truncation with precision assert f'{"xylophone":.5}' == 'xylop', 'string truncation' assert f'{"xylophone":10.5}' == 'xylop ', 'string truncation with width' # === Integer formatting === # basic integer assert f'{42}' == '42', 'basic integer' # integer with :d type assert f'{42:d}' == '42', 'integer :d' # integer padding assert f'{42:4d}' == ' 42', 'integer padding' assert f'{42:04d}' == '0042', 'integer zero padding' # integer with sign assert f'{42:+d}' == '+42', 'integer positive sign' assert f'{42: d}' == ' 42', 'integer space for positive' assert f'{-42:+d}' == '-42', 'integer negative with sign' assert f'{-42: d}' == '-42', 'integer negative space' # sign-aware padding assert f'{-23:=5d}' == '- 23', 'sign-aware padding' # === Float formatting === # basic float assert f'{3.14159}' == '3.14159', 'basic float' # float with :f type assert f'{3.141592653589793:f}' == '3.141593', 'float :f' # float precision assert f'{3.141592653589793:.2f}' == '3.14', 'float precision' assert f'{3.141592653589793:.4f}' == '3.1416', 'float precision 4' # float width and precision assert f'{3.141592653589793:06.2f}' == '003.14', 'float zero pad with precision' assert f'{3.141592653589793:10.2f}' == ' 3.14', 'float width with precision' # float with sign assert f'{3.14:+.2f}' == '+3.14', 'float positive sign' assert f'{-3.14:+.2f}' == '-3.14', 'float negative with sign' assert f'{3.14:-.2f}' == '3.14', 'float explicit minus sign' assert f'{-3.14:-.2f}' == '-3.14', 'float explicit minus sign negative' # exponential notation assert f'{1234.5678:e}' == '1.234568e+03', 'exponential lowercase' assert f'{1234.5678:E}' == '1.234568E+03', 'exponential uppercase' assert f'{1234.5678:.2e}' == '1.23e+03', 'exponential with precision' assert f'{0.00012345:.2e}' == '1.23e-04', 'exponential small number' # general format (g/G) - uses exponential for very large/small numbers assert f'{1.5:g}' == '1.5', 'general format simple' assert f'{1.500:g}' == '1.5', 'general format strips trailing zeros' assert f'{1234567890:g}' == '1.23457e+09', 'general format large number' # percentage assert f'{0.25:%}' == '25.000000%', 'percentage default precision' assert f'{0.25:.1%}' == '25.0%', 'percentage with precision' assert f'{0.125:.0%}' == '12%', 'percentage zero precision' # === Nested format specs === width = 10 assert f'{"hi":{width}}' == 'hi ', 'nested format spec width' # nested alignment and width align = '^' assert f'{"test":{align}{width}}' == ' test ', 'nested align and width' # nested precision prec = 3 assert f'{"xylophone":.{prec}}' == 'xyl', 'nested precision' # === f-string in function === def greet(name): return f'Hello, {name}!' assert greet('World') == 'Hello, World!', 'f-string in function' # function returning formatted value def format_num(n, w): return f'{n:>{w}}' assert format_num('x', 5) == ' x', 'f-string with params' # === Escaping === # double braces to escape assert f'{{}}' == '{}', 'escaped braces' assert f'{{x}}' == '{x}', 'escaped braces with content' assert f'{{{42}}}' == '{42}', 'value inside escaped braces' # === Complex expressions === # TODO: method call on literal - parser doesn't support this yet # assert f'{"hello".upper()}' == 'HELLO', 'method call on literal' # TODO: method call on variable - str.upper() not implemented yet # s = 'hello' # assert f'{s.upper()}' == 'HELLO', 'method call on variable' # subscript in f-string lst = [10, 20, 30] assert f'{lst[1]}' == '20', 'subscript' # dict lookup d = {'a': 1, 'b': 2} assert f'{d["a"]}' == '1', 'dict lookup' # TODO: conditional expression - parser doesn't support IfExp yet # x = 5 # assert f'{x if x > 0 else -x}' == '5', 'conditional positive' # x = -5 # assert f'{-x if x < 0 else x}' == '5', 'conditional negative' # === String concatenation === name = 'world' # regular string + f-string (implicit concatenation) assert f'hello {name}' == 'hello world', 'str concat with fstring' # === Empty interpolation expression === # (this should be a syntax error, but test current behavior) # assert f'{}' would be syntax error # === Whitespace in format spec === # no extra whitespace handling needed, width handles it assert f'{"x":5}' == 'x ', 'single char width' # === Unicode character counting in padding === x = 'café' assert f'{x:_<10}' == 'café______' assert f'{x:_>10}' == '______café' assert f'{x:_^10}' == '___café___' assert f'{x:_^11}' == '___café____' assert f'{x:é<10}' == 'cafééééééé' assert f'{x:é>10}' == 'éééééécafé' assert f'{x:é^10}' == 'ééécaféééé' assert f'{x:é^11}' == 'ééécafééééé' # === Conversion flag with type spec === # conversion flag produces string, so 's' format should work assert f'{42!r:s}' == '42', 'conversion with type spec' # === Zero-padding with negative numbers === # zero-padding should use sign-aware alignment x = -42 assert f'{x:05d}' == '-0042', 'zero pad negative' # === Debug/self-documenting expressions (=) === a = 42 assert f'{a=}' == 'a=42', 'basic debug expression' assert f'{a = }' == 'a = 42', 'debug with spaces' name = 'test' assert f'{name=}' == "name='test'", 'debug uses repr for strings' assert f'{name = }' == "name = 'test'", 'debug uses repr for strings' assert f'{name=!s}' == 'name=test', 'debug with !s conversion' assert f'{name=!r}' == "name='test'", 'debug with !r conversion' assert f'{1+1=}' == '1+1=2', 'debug with expression' ================================================ FILE: crates/monty/test_cases/fstring__error_eq_align_on_str.py ================================================ # '=' alignment on string raises ValueError f'{"hello":=10}' # Raise=ValueError("'=' alignment not allowed in string format specifier") ================================================ FILE: crates/monty/test_cases/fstring__error_float_f_on_str.py ================================================ # float format specifier ':f' on string raises ValueError f'{"hello":f}' # Raise=ValueError("Unknown format code 'f' for object of type 'str'") ================================================ FILE: crates/monty/test_cases/fstring__error_int_d_on_float.py ================================================ # integer format specifier ':d' on float raises ValueError f'{3.14:d}' # Raise=ValueError("Unknown format code 'd' for object of type 'float'") ================================================ FILE: crates/monty/test_cases/fstring__error_int_d_on_str.py ================================================ # integer format specifier ':d' on string raises ValueError f'{"hello":d}' # Raise=ValueError("Unknown format code 'd' for object of type 'str'") ================================================ FILE: crates/monty/test_cases/fstring__error_invalid_spec.py ================================================ # xfail=cpython # invalid format specifier with trailing characters (detected at parse time) f'{1:10xyz}' # Raise=SyntaxError("Invalid format specifier '10xyz'") ================================================ FILE: crates/monty/test_cases/fstring__error_invalid_spec_dynamic.py ================================================ # invalid format specifier with dynamic spec spec = 'xyz' f'{1:{spec}}' # Raise=ValueError("Invalid format specifier 'xyz' for object of type 'int'") ================================================ FILE: crates/monty/test_cases/fstring__error_invalid_spec_str.py ================================================ # xfail=cpython # invalid format specifier for string (detected at parse time) f'{"hello":abc}' # Raise=SyntaxError("Invalid format specifier 'abc'") ================================================ FILE: crates/monty/test_cases/fstring__error_str_s_on_int.py ================================================ # string format specifier ':s' on integer raises ValueError f'{42:s}' # Raise=ValueError("Unknown format code 's' for object of type 'int'") ================================================ FILE: crates/monty/test_cases/function__call_duplicate_kwargs.py ================================================ def f(**kwargs): pass f(**{'x': 1}, **{'x': 2}) # Raise=TypeError("f() got multiple values for keyword argument 'x'") ================================================ FILE: crates/monty/test_cases/function__call_unpack.py ================================================ def f(*args, **kwargs): return args, kwargs # === Multiple *args === assert f(*[1, 2], *[3, 4]) == ((1, 2, 3, 4), {}), 'multiple star args' assert f(0, *[1, 2], 3) == ((0, 1, 2, 3), {}), 'positional after star args' assert f(*[], *[1]) == ((1,), {}), 'unpack empty then non-empty' # === Multiple **kwargs === assert f(**{'a': 1}, **{'b': 2}) == ((), {'a': 1, 'b': 2}), 'multiple star-star kwargs' assert f(**{'a': 1}, b=2) == ((), {'a': 1, 'b': 2}), 'named after star-star' assert f(key='before', **{'a': 1}) == ((), {'key': 'before', 'a': 1}), 'named before star-star' # === Mixed === assert f(1, *[2, 3], **{'x': 4}) == ((1, 2, 3), {'x': 4}), 'mixed star and star-star' # === Builtin callable with GeneralizedCall (Callable::Builtin path) === # max(*[1,2], *[3,4]) exercises the Callable::Builtin branch in compile_call GeneralizedCall result = max(*[1, 2], *[3, 4]) assert result == 4, 'builtin max with multiple *args' result = min(*[5, 3], *[7, 1]) assert result == 1, 'builtin min with multiple *args' # Builtin type and exception constructors should keep their public names in # **kwargs merge errors, not fall back to ''. try: list(**1) assert False, 'list with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('list() argument after ** must be a mapping, not int',), ( 'builtin type non-mapping **kwargs error keeps type name' ) try: ValueError(**1) assert False, 'ValueError with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('ValueError() argument after ** must be a mapping, not int',), ( 'builtin exception non-mapping **kwargs error keeps exception name' ) try: list(a=1, **{'a': 2}) assert False, 'list with duplicate **kwargs should raise TypeError' except TypeError as e: assert e.args == ("list() got multiple values for keyword argument 'a'",), ( 'builtin type duplicate **kwargs error keeps type name' ) # Builtin type constructors should also keep their public names in # non-mapping **kwargs errors so compiler call metadata matches CPython. try: bool(**1) assert False, 'bool with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('bool() argument after ** must be a mapping, not int',), ( 'bool non-mapping **kwargs error keeps builtin type name' ) try: int(**1) assert False, 'int with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('int() argument after ** must be a mapping, not int',), ( 'int non-mapping **kwargs error keeps builtin type name' ) try: float(**1) assert False, 'float with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('float() argument after ** must be a mapping, not int',), ( 'float non-mapping **kwargs error keeps builtin type name' ) try: str(**1) assert False, 'str with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('str() argument after ** must be a mapping, not int',), ( 'str non-mapping **kwargs error keeps builtin type name' ) try: bytes(**1) assert False, 'bytes with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('bytes() argument after ** must be a mapping, not int',), ( 'bytes non-mapping **kwargs error keeps builtin type name' ) try: tuple(**1) assert False, 'tuple with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('tuple() argument after ** must be a mapping, not int',), ( 'tuple non-mapping **kwargs error keeps builtin type name' ) try: dict(**1) assert False, 'dict with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('dict() argument after ** must be a mapping, not int',), ( 'dict non-mapping **kwargs error keeps builtin type name' ) try: set(**1) assert False, 'set with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('set() argument after ** must be a mapping, not int',), ( 'set non-mapping **kwargs error keeps builtin type name' ) try: frozenset(**1) assert False, 'frozenset with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('frozenset() argument after ** must be a mapping, not int',), ( 'frozenset non-mapping **kwargs error keeps builtin type name' ) try: range(**1) assert False, 'range with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('range() argument after ** must be a mapping, not int',), ( 'range non-mapping **kwargs error keeps builtin type name' ) try: slice(**1) assert False, 'slice with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('slice() argument after ** must be a mapping, not int',), ( 'slice non-mapping **kwargs error keeps builtin type name' ) try: type(**1) assert False, 'type with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('type() argument after ** must be a mapping, not int',), ( 'type non-mapping **kwargs error keeps builtin type name' ) try: property(**1) assert False, 'property with non-mapping **arg should raise TypeError' except TypeError as e: assert e.args == ('property() argument after ** must be a mapping, not int',), ( 'property non-mapping **kwargs error keeps builtin type name' ) try: ValueError(a=1, **{'a': 2}) assert False, 'ValueError with duplicate **kwargs should raise TypeError' except TypeError as e: assert e.args == ("ValueError() got multiple values for keyword argument 'a'",), ( 'builtin exception duplicate **kwargs error keeps exception name' ) # === Expression-based callable with GeneralizedCall (compile_call_args path) === # funcs[0](*[1,2], *[3,4]) exercises the GeneralizedCall branch in compile_call_args funcs = [f] result = funcs[0](*[1, 2], *[3, 4]) assert result == ((1, 2, 3, 4), {}), 'subscript call with multiple *args' result = funcs[0](**{'a': 1}, **{'b': 2}) assert result == ((), {'a': 1, 'b': 2}), 'subscript call with multiple **kwargs' # === Named kwarg in GeneralizedCall (compile_generalized_call_body Named path) === # f(*[1,2], *[3], x=5): two *unpacks → GeneralizedCall; x=5 is a Named kwarg. # This exercises the CallKwarg::Named arm in compile_generalized_call_body. result = f(*[1, 2], *[3], x=5) assert result == ((1, 2, 3), {'x': 5}), 'named kwarg in multi-star GeneralizedCall' result = funcs[0](*[1, 2], *[3], x=5) assert result == ((1, 2, 3), {'x': 5}), 'subscript call: named kwarg in GeneralizedCall' ================================================ FILE: crates/monty/test_cases/function__defaults.py ================================================ # Tests for default parameter values in function definitions # === Basic default values === def f_basic(a, b=10): return a + b assert f_basic(1) == 11, 'default used' assert f_basic(1, 2) == 3, 'default overridden' assert f_basic(5) == 15, 'default used again' # === Multiple defaults === def f_multi(a=1, b=2): return a + b assert f_multi() == 3, 'both defaults' assert f_multi(10) == 12, 'first provided' assert f_multi(10, 20) == 30, 'both provided' # === Mixed required and default === def f_mixed(a, b, c=3, d=4): return a + b + c + d assert f_mixed(1, 2) == 10, 'required only' assert f_mixed(1, 2, 30) == 37, 'one default overridden' assert f_mixed(1, 2, 30, 40) == 73, 'all provided' # === Default with keyword args === def f_kw(a, b=10): return a + b assert f_kw(1, b=20) == 21, 'keyword override' assert f_kw(a=5) == 15, 'keyword required, default used' assert f_kw(a=5, b=3) == 8, 'both keywords' # === Default expressions evaluated at definition === # Test that default is evaluated once at definition time def value_maker(): return 42 def f_eval(x=value_maker()): return x # value_maker was called once at function definition time assert f_eval() == 42, 'first call uses cached default' assert f_eval() == 42, 'second call uses same default' # === Mutable default (Python gotcha - shared across calls) === def f_mutable(lst=[]): lst.append(1) return lst first_result = f_mutable() assert first_result == [1], 'first call' second_result = f_mutable() assert second_result == [1, 1], 'second call appends to same list' assert first_result is second_result, 'same list object' # === Multiple functions with separate defaults === def f_sep1(x=[]): x.append('a') return x def f_sep2(x=[]): x.append('b') return x r1 = f_sep1() r2 = f_sep2() assert r1 == ['a'], 'f_sep1 default' assert r2 == ['b'], 'f_sep2 default' assert r1 is not r2, 'separate default lists' # === Default referencing earlier param (not supported, different test) === # === Closure with defaults === def make_adder(n): def add(x, y=n): return x + y return add add5 = make_adder(5) assert add5(10) == 15, 'closure default from enclosing scope' assert add5(10, 3) == 13, 'closure default overridden' add10 = make_adder(10) assert add10(1) == 11, 'different closure, different captured default' # Verify the two closures have independent defaults assert add5(1) == 6, 'add5 still uses 5' # === Keyword-only defaults interleaved === def kwonly_mix(*, head=1, mid, tail=3): return head, mid, tail assert kwonly_mix(mid=2) == (1, 2, 3), 'kw-only defaults applied per parameter' assert kwonly_mix(head=5, mid=7) == (5, 7, 3), 'kw-only default overridden independently' ================================================ FILE: crates/monty/test_cases/function__err_duplicate_arg.py ================================================ # Test: same argument passed both positionally and by keyword def f(a, b, c): return a + b + c f(1, 2, b=3) # Raise=TypeError("f() got multiple values for argument 'b'") ================================================ FILE: crates/monty/test_cases/function__err_duplicate_first_arg.py ================================================ # Test: first argument passed both positionally and by keyword def f(a, b): return a + b f(1, a=2) # Raise=TypeError("f() got multiple values for argument 'a'") ================================================ FILE: crates/monty/test_cases/function__err_duplicate_kwarg_cleanup.py ================================================ # Test that heap values are properly cleaned up when duplicate kwarg error occurs def f(a, b): return a # The list [1, 2, 3] should be cleaned up when the error occurs # because 'a' is passed both positionally and by keyword f([1, 2, 3], a=[4, 5]) # Raise=TypeError("f() got multiple values for argument 'a'") ================================================ FILE: crates/monty/test_cases/function__err_kwonly_as_positional.py ================================================ # Test: keyword-only argument passed positionally def f(*, a, b): return a + b f(1, 2) # Raise=TypeError('f() takes 0 positional arguments but 2 were given') ================================================ FILE: crates/monty/test_cases/function__err_missing_all_posonly.py ================================================ # Test: missing all positional-only arguments def f(a, b, /): return a + b f() # Raise=TypeError("f() missing 2 required positional arguments: 'a' and 'b'") ================================================ FILE: crates/monty/test_cases/function__err_missing_heap_cleanup.py ================================================ # Test that heap values are properly cleaned up when missing required arg error occurs def f(a, b, c): return a # The list [1, 2, 3] should be cleaned up when the error occurs # because 'c' is missing f([1, 2, 3], [4, 5]) # Raise=TypeError("f() missing 1 required positional argument: 'c'") ================================================ FILE: crates/monty/test_cases/function__err_missing_kwonly.py ================================================ # Test: missing required keyword-only argument def f(a, *, b): return a + b f(1) # Raise=TypeError("f() missing 1 required keyword-only argument: 'b'") ================================================ FILE: crates/monty/test_cases/function__err_missing_posonly_with_kwarg.py ================================================ # Test: missing positional-only when keyword is provided for other param def f(a, /, b): return a + b f(b=2) # Raise=TypeError("f() missing 1 required positional argument: 'a'") ================================================ FILE: crates/monty/test_cases/function__err_missing_with_posonly.py ================================================ # Test: missing required argument with positional-only params def f(a, b, /, c): return a + b + c f(1, 2) # Raise=TypeError("f() missing 1 required positional argument: 'c'") ================================================ FILE: crates/monty/test_cases/function__err_posonly_as_kwarg.py ================================================ # Test: positional-only parameter passed as keyword argument def f(a, b, /, c): return a + b + c f(1, b=2, c=3) # Raise=TypeError("f() got some positional-only arguments passed as keyword arguments: 'b'") ================================================ FILE: crates/monty/test_cases/function__err_posonly_first_as_kwarg.py ================================================ # Test: first positional-only parameter passed as keyword argument def f(a, /, b): return a + b f(a=1, b=2) # Raise=TypeError("f() got some positional-only arguments passed as keyword arguments: 'a'") ================================================ FILE: crates/monty/test_cases/function__err_too_many_posonly.py ================================================ # Test: too many positional arguments with positional-only params def f(a, b, /): return a + b f(1, 2, 3) # Raise=TypeError('f() takes 2 positional arguments but 3 were given') ================================================ FILE: crates/monty/test_cases/function__err_too_many_with_kwonly.py ================================================ # Test: too many positional arguments with keyword-only params def f(a, *, b): return a + b f(1, 2, b=3) # Raise=TypeError('f() takes 1 positional argument but 2 positional arguments (and 1 keyword-only argument) were given') ================================================ FILE: crates/monty/test_cases/function__err_unexpected_kwarg.py ================================================ # Test: unexpected keyword argument def f(a, b): return a + b f(1, 2, c=3) # Raise=TypeError("f() got an unexpected keyword argument 'c'") ================================================ FILE: crates/monty/test_cases/function__err_unexpected_kwarg_cleanup.py ================================================ # Test that heap values are properly cleaned up when unexpected kwarg error occurs def f(a, b): return a # The list [1, 2, 3] should be cleaned up when the error occurs # because 'c' is an unexpected keyword argument f([1, 2, 3], [4, 5], c=[6, 7]) # Raise=TypeError("f() got an unexpected keyword argument 'c'") ================================================ FILE: crates/monty/test_cases/function__err_unexpected_kwarg_quote.py ================================================ def f(a): pass f(1, **{"foo'": 2}) """ TRACEBACK: Traceback (most recent call last): File "function__err_unexpected_kwarg_quote.py", line 5, in f(1, **{"foo'": 2}) ~~~~~~~~~~~~~~~~~~~ TypeError: f() got an unexpected keyword argument 'foo'' """ ================================================ FILE: crates/monty/test_cases/function__err_unexpected_kwarg_simple.py ================================================ # Test: unexpected keyword argument on single-param function def f(a): return a f(1, b=2) # Raise=TypeError("f() got an unexpected keyword argument 'b'") ================================================ FILE: crates/monty/test_cases/function__err_unpack_duplicate_arg.py ================================================ def f(a, b): return a + b f(a=1, **{'a': 2}) # Raise=TypeError("f() got multiple values for keyword argument 'a'") ================================================ FILE: crates/monty/test_cases/function__err_unpack_duplicate_heap.py ================================================ # Test that heap values are cleaned up when duplicate kwargs error occurs def f(a, b): return a # The list value in the dict should be cleaned up when the error occurs f(a=[1, 2, 3], **{'a': [4, 5, 6]}) # Raise=TypeError("f() got multiple values for keyword argument 'a'") ================================================ FILE: crates/monty/test_cases/function__err_unpack_int.py ================================================ def f(a, b): return a + b f(1, **42) # Raise=TypeError('f() argument after ** must be a mapping, not int') ================================================ FILE: crates/monty/test_cases/function__err_unpack_nonstring_key.py ================================================ def foo(a): return a foo(**{1: 'value'}) # Raise=TypeError('keywords must be strings') ================================================ FILE: crates/monty/test_cases/function__err_unpack_not_mapping.py ================================================ def f(a, b): return a + b f(1, **[2]) # Raise=TypeError('f() argument after ** must be a mapping, not list') ================================================ FILE: crates/monty/test_cases/function__kwargs_unpacking.py ================================================ # === Basic **kwargs unpacking === def greet(name, greeting): return f'{greeting}, {name}!' opts = {'greeting': 'Hi'} assert greet('Alice', **opts) == 'Hi, Alice!', 'basic **kwargs unpacking' # === Dict literal unpacking === assert greet('Charlie', **{'greeting': 'Hey'}) == 'Hey, Charlie!', 'dict literal unpacking' # === Multiple kwargs in unpacked dict === def format_msg(msg, prefix, suffix): return f'{prefix}{msg}{suffix}' assert format_msg('test', **{'prefix': '[', 'suffix': ']'}) == '[test]', 'multiple kwargs unpacking' # === Combining regular kwargs with **kwargs === assert format_msg('hello', prefix='> ', **{'suffix': '!'}) == '> hello!', 'regular kwargs with **kwargs' # === **kwargs with positional args === def add_all(a, b, c): return a + b + c assert add_all(1, 2, **{'c': 3}) == 6, '**kwargs with positional args' assert add_all(1, **{'b': 2, 'c': 3}) == 6, '**kwargs providing multiple args' # === Variable dict unpacking === settings = {'prefix': '>>> ', 'suffix': ' <<<'} assert format_msg('output', **settings) == '>>> output <<<', 'variable dict unpacking' # === Unpacking with keyword-only args === def kwonly_func(a, *, b, c): return a + b + c assert kwonly_func(1, **{'b': 2, 'c': 3}) == 6, '**kwargs with keyword-only args' # === Empty dict unpacking with all args provided === def simple(x, y): return x + y assert simple(1, 2, **{}) == 3, 'empty dict unpacking' # === All kwargs from unpacking === def all_kwargs(a, b, c): return a * 100 + b * 10 + c assert all_kwargs(**{'a': 1, 'b': 2, 'c': 3}) == 123, 'all args from **kwargs' assert all_kwargs(**{'c': 7, 'a': 4, 'b': 5}) == 457, 'all args from **kwargs different order' # === Dynamic **kwargs keys === def kwonly_echo(*, keyword): return keyword key_name = 'k' + 'e' + 'y' + 'w' + 'o' + 'r' + 'd' assert kwonly_echo(**{key_name: 'dynamic'}) == 'dynamic', 'runtime string key matches kw-only param' # ============================================================ # *args unpacking tests (function calls) # ============================================================ # === *args with zero args === def no_args(): return 'ok' assert no_args(*[]) == 'ok', '*args with empty list' assert no_args(*()) == 'ok', '*args with empty tuple' # === *args with one arg === def one_arg(x): return x * 2 assert one_arg(*[5]) == 10, '*args with one item list' assert one_arg(*(7,)) == 14, '*args with one item tuple' # === *args with two args === def two_args(a, b): return a + b assert two_args(*[1, 2]) == 3, '*args with two item list' assert two_args(*(3, 4)) == 7, '*args with two item tuple' # === *args with three+ args === def many_args(a, b, c, d): return a + b + c + d assert many_args(*[1, 2, 3, 4]) == 10, '*args with four items' assert many_args(*(10, 20, 30, 40)) == 100, '*args with tuple four items' # === Mixed positional and *args === assert two_args(1, *[2]) == 3, 'pos + *args' assert many_args(1, 2, *[3, 4]) == 10, 'two pos + *args' # === *args with heap-allocated values === def list_arg(lst): return len(lst) my_list = [1, 2, 3] assert list_arg(*[my_list]) == 3, '*args with list value' # ============================================================ # Combined *args and **kwargs (function calls) # ============================================================ # === *args and **kwargs together === def mixed_func(a, b, c): return f'{a}-{b}-{c}' assert mixed_func(*[1], **{'b': 2, 'c': 3}) == '1-2-3', '*args and **kwargs' assert mixed_func(*[1, 2], **{'c': 3}) == '1-2-3', 'two *args and **kwargs' # === *args tuple with **kwargs === args_tuple = (10, 20) kwargs_dict = {'c': 30} assert many_args(*args_tuple, **kwargs_dict, d=40) == 100, '*args tuple + **kwargs + regular kwarg' # === Empty *args with **kwargs === assert mixed_func(*[], **{'a': 'x', 'b': 'y', 'c': 'z'}) == 'x-y-z', 'empty *args with **kwargs' # === *args with empty **kwargs === assert two_args(*[5, 6], **{}) == 11, '*args with empty **kwargs' # === All combinations: pos, *args, kwargs, **kwargs === def full_func(a, b, c, d): return a * 1000 + b * 100 + c * 10 + d assert full_func(1, *[2], c=3, **{'d': 4}) == 1234, 'pos + *args + kwarg + **kwargs' # === *args with heap values and **kwargs === def heap_func(lst, dct): return len(lst) + len(dct) list_val = [1, 2, 3] dict_val = {'a': 1} assert heap_func(*[list_val], **{'dct': dict_val}) == 4, '*args and **kwargs with heap values' # === Both *args and **kwargs empty === assert no_args(*[], **{}) == 'ok', 'empty *args and empty **kwargs' ================================================ FILE: crates/monty/test_cases/function__ops.py ================================================ # === Basic function calls === def f_no_args(): return 1 assert f_no_args() == 1, 'no args' def f_one_arg(x): return x assert f_one_arg(42) == 42, 'one arg' def add(a, b): return a + b assert add(1, 2) == 3, 'two args' def sum3(a, b, c): return a + b + c assert sum3(1, 2, 3) == 6, 'three args' # === Local variables === def f_local(): x = 42 return x assert f_local() == 42, 'local var' def f_local_from_arg(x): y = x + 1 return y assert f_local_from_arg(10) == 11, 'local var from arg' def f_local_list(): items = [1, 2, 3] return items assert f_local_list() == [1, 2, 3], 'local var list' def f_local_modify_list(): items = [1, 2] items.append(3) return items assert f_local_modify_list() == [1, 2, 3], 'local var modify list' def f_local_multiple(): a = 1 b = 2 c = 3 return a + b + c assert f_local_multiple() == 6, 'local var multiple' def f_local_reassign(): x = 1 x = 2 x = 3 return x assert f_local_reassign() == 3, 'local var reassign' # === Nested functions === def nested_basic(): def bar(): return 1 return bar() + 1 assert nested_basic() == 2, 'nested basic' def nested_deep(): def level2(): def level3(): return 42 return level3() return level2() assert nested_deep() == 42, 'nested deep' def nested_multiple_calls(): def inner(): return 10 return inner() + inner() + inner() assert nested_multiple_calls() == 30, 'nested multiple calls' def nested_two_inner(): def add(): return 1 def sub(): return 2 return add() + sub() assert nested_two_inner() == 3, 'nested two inner' def nested_with_args(x): def inner(y): return y + y return inner(x) + 1 assert nested_with_args(5) == 11, 'nested with args' # === Function equality === def eq_test(): return 1 def eq_test2(): return 1 # Same function is equal to itself assert eq_test == eq_test, 'function equals itself' assert not (eq_test != eq_test), 'function not-not-equals itself' # Different functions are not equal (even with same body) assert not (eq_test == eq_test2), 'different functions not equal' assert eq_test != eq_test2, 'different functions are not equal' # Function assigned to variable is still equal f_alias = eq_test assert f_alias == eq_test, 'function alias equals original' assert eq_test == f_alias, 'original equals function alias' # === Builtin equality === # Same builtin is equal to itself assert len == len, 'builtin equals itself' assert print == print, 'print equals itself' assert not (len != len), 'builtin not-not-equals itself' # Builtin identity (is) assert print is print, 'print is print' assert len is len, 'len is len' assert not (len is print), 'len is not print' # Different builtins are not equal assert not (len == print), 'different builtins not equal' assert len != print, 'different builtins are not equal' # Builtin assigned to variable is still equal len_alias = len assert len_alias == len, 'builtin alias equals original' assert len_alias is len, 'builtin alias is original' # === Exception type equality === # Note: Using == instead of 'is' to explicitly test the __eq__ implementation assert ValueError == ValueError, 'exc type equals itself' assert TypeError == TypeError, 'exc type equals itself 2' assert not (ValueError != ValueError), 'exc type not-not-equals itself' assert not (ValueError == TypeError), 'different exc types not equal' assert ValueError != TypeError, 'different exc types are not equal' exc_alias = ValueError assert exc_alias == ValueError, 'exc type alias equals original' # === Closure equality === def make_adder(n): def adder(x): return x + n return adder add1 = make_adder(1) add2 = make_adder(2) add1_again = make_adder(1) # Same closure instance equals itself assert add1 == add1, 'closure equals itself' assert not (add1 != add1), 'closure not-not-equals itself' # Different closure instances are not equal (even with same captured value) assert not (add1 == add1_again), 'different closure instances not equal' assert add1 != add1_again, 'different closure instances are not equal' # Different closure instances with different captured values assert not (add1 == add2), 'closures with diff captured values not equal' assert add1 != add2, 'closures with diff captured values are not equal' # === Cross-type inequality === def cross_test(): return 1 assert not (cross_test == len), 'function not equal to builtin' assert not (len == cross_test), 'builtin not equal to function' assert not (cross_test == ValueError), 'function not equal to exc type' assert not (ValueError == cross_test), 'exc type not equal to function' assert not (len == ValueError), 'builtin not equal to exc type' assert not (ValueError == len), 'exc type not equal to builtin' # Callables not equal to other types assert not (len == 1), 'builtin not equal to int' assert not (len == 'len'), 'builtin not equal to string' assert not (cross_test == None), 'function not equal to None' assert not (ValueError == None), 'exc type not equal to None' # === Parameter shadowing global variables === # Function parameters should shadow global variables with the same name x = 5 def shadow_single(x): return x + 1 # When called with 10, param x=10 should be used, not global x=5 assert shadow_single(10) == 11, 'param shadows global - single param' y = 3 def shadow_multiple(x, y): return x + y # When called with (20, 30), params should be used, not globals x=5, y=3 assert shadow_multiple(20, 30) == 50, 'param shadows global - multiple params' def shadow_uses_global_too(x): # x is param, y is global return x + y # x=100 (param), y=3 (global), so 100 + 3 = 103 assert shadow_uses_global_too(100) == 103, 'param shadows but can still access other globals' def shadow_with_default(x=99): return x + 1 # When called with argument, param shadows global assert shadow_with_default(10) == 11, 'param with default shadows global' # When called without argument, default is used (not global) assert shadow_with_default() == 100, 'param default used, not global' # Global is still accessible outside the function assert x == 5, 'global still accessible after function that shadows it' assert y == 3, 'other global still accessible' # Verify global can still be used as argument def double(x): return x * 2 assert double(x) == 10, 'global used as argument, param shadows inside' ================================================ FILE: crates/monty/test_cases/function__return_none.py ================================================ # === Bare return statement === # Test functions with bare return (no value) def early_exit(): return assert early_exit() is None, 'bare return returns None' def conditional_early_exit(x): if x < 0: return return x * 2 assert conditional_early_exit(-5) is None, 'conditional early return' assert conditional_early_exit(5) == 10, 'conditional normal return' def multiple_bare_returns(x): if x == 0: return if x == 1: return return x assert multiple_bare_returns(0) is None, 'first bare return' assert multiple_bare_returns(1) is None, 'second bare return' assert multiple_bare_returns(2) == 2, 'fall through to value return' def nested_bare_return(): def inner(): return return inner() assert nested_bare_return() is None, 'nested bare return' ================================================ FILE: crates/monty/test_cases/function__signatures.py ================================================ # === Basic functions === def simple(a, b, c): return a + b + c assert simple(1, 2, 3) == 6, 'simple function' assert simple(10, 20, 30) == 60, 'simple function with larger values' # === Positional-only parameters === def pos_only(a, b, /, c): return a + b + c assert pos_only(1, 2, 3) == 6, 'positional-only params' assert pos_only(5, 5, 5) == 15, 'positional-only all same' assert pos_only(5, 5, c=5) == 15, 'positional-only all same' # === All positional-only === def all_pos_only(a, b, c, /): return a + b + c assert all_pos_only(1, 2, 3) == 6, 'all positional-only' # === Multiple parameter groups === def multi_group(a, /, b, c): return f'a={a} b={b} c={c}' assert multi_group(1, 2, 3) == 'a=1 b=2 c=3', 'mixed positional-only and regular' assert multi_group(1, b=2, c=3) == 'a=1 b=2 c=3', 'mixed positional-only and regular' assert multi_group(1, c=3, b=2) == 'a=1 b=2 c=3', 'mixed positional-only and regular' # === Call-site *args unpacking === def collect_all(*values): return values source_tuple = (1, 2, 3) assert collect_all(*source_tuple) == (1, 2, 3), 'tuple unpacked with *args' source_list = [4, 5] assert collect_all(0, *source_list) == (0, 4, 5), 'positional args followed by *args' ================================================ FILE: crates/monty/test_cases/function__too_few_args_all.py ================================================ def f(a, b, c): return a + b + c f() # Raise=TypeError("f() missing 3 required positional arguments: 'a', 'b', and 'c'") ================================================ FILE: crates/monty/test_cases/function__too_few_args_one.py ================================================ def f(x): return x f() # Raise=TypeError("f() missing 1 required positional argument: 'x'") ================================================ FILE: crates/monty/test_cases/function__too_few_args_two.py ================================================ def f(a, b): return a + b f(1) # Raise=TypeError("f() missing 1 required positional argument: 'b'") ================================================ FILE: crates/monty/test_cases/function__too_many_args_one.py ================================================ def f(x): return x f(1, 2) # Raise=TypeError('f() takes 1 positional argument but 2 were given') ================================================ FILE: crates/monty/test_cases/function__too_many_args_two.py ================================================ def f(a, b): return a + b f(1, 2, 3) # Raise=TypeError('f() takes 2 positional arguments but 3 were given') ================================================ FILE: crates/monty/test_cases/function__too_many_args_zero.py ================================================ def f(): return 1 f(42) # Raise=TypeError('f() takes 0 positional arguments but 1 was given') ================================================ FILE: crates/monty/test_cases/global__error_assigned_before.py ================================================ def f(): x = 1 global x # type: ignore[reportAssignmentBeforeGlobalDeclaration] f() # Raise=SyntaxError("name 'x' is assigned to before global declaration") ================================================ FILE: crates/monty/test_cases/global__ops.py ================================================ # === Basic global read/write === x1 = 42 def read_explicit(): global x1 return x1 assert read_explicit() == 42, 'explicit global read' x2 = 1 def write_explicit(): global x2 x2 = 2 write_explicit() assert x2 == 2, 'explicit global write' x3 = 42 def read_implicit(): return x3 # no local x3, reads global assert read_implicit() == 42, 'implicit global read' # === Multiple functions sharing global === counter1 = 0 def inc(): global counter1 counter1 = counter1 + 1 def get_counter(): return counter1 inc() inc() assert get_counter() == 2, 'multiple functions sharing global' # === Mutating global containers (no 'global' needed) === data1 = {'a': 1} def add_dict_entry(): data1['b'] = 2 add_dict_entry() assert data1 == {'a': 1, 'b': 2}, 'mutate global dict' items1 = [1, 2] def append_list_item(): items1.append(3) append_list_item() assert items1 == [1, 2, 3], 'mutate global list append' items2 = ['a', 'c'] def insert_list_item(): items2.insert(1, 'b') insert_list_item() assert items2 == ['a', 'b', 'c'], 'mutate global list insert' items3 = [] def build_list(): items3.append(1) items3.append(2) items3.append(3) build_list() assert items3 == [1, 2, 3], 'mutate global list multiple' # === Reassigning global containers (requires 'global') === items4 = [1, 2] def replace_list(): global items4 items4 = [3, 4, 5] replace_list() assert items4 == [3, 4, 5], 'reassign global list' # === Nested functions with global === x4 = 1 def outer_global(): def inner(): global x4 x4 = 10 inner() outer_global() assert x4 == 10, 'nested inner global write' x5 = 42 def outer_read(): def inner(): return x5 # reads global return inner() assert outer_read() == 42, 'nested inner global read' # === Shadowing === x6 = 10 def shadow_local(): x6 = 20 # creates local (shadows global) return x6 assert shadow_local() == 20, 'local shadows global' x7 = 10 def shadow_unchanged(): x7 = 99 # local return x7 assert shadow_unchanged() == 99, 'shadowing returns local' assert x7 == 10, 'global unchanged after shadowing' ================================================ FILE: crates/monty/test_cases/hash__dict_unhashable.py ================================================ hash({}) # Raise=TypeError("unhashable type: 'dict'") ================================================ FILE: crates/monty/test_cases/hash__list_unhashable.py ================================================ hash([1, 2, 3]) # Raise=TypeError("unhashable type: 'list'") ================================================ FILE: crates/monty/test_cases/hash__ops.py ================================================ # === Hash returns int type === assert isinstance(hash(42), int), 'hash returns int type' assert isinstance(hash('hello'), int), 'hash of str returns int' assert isinstance(hash((1, 2, 3)), int), 'hash of tuple returns int' assert isinstance(hash(3.14), int), 'hash of float returns int' # === Hash consistency for same values === assert hash(42) == hash(42), 'int hash consistent' assert hash(-1) == hash(-1), 'negative int hash consistent' assert hash(0) == hash(0), 'zero hash consistent' assert hash('hello') == hash('hello'), 'str hash consistent' assert hash('') == hash(''), 'empty str hash consistent' assert hash(b'hello') == hash(b'hello'), 'bytes hash consistent' assert hash(b'') == hash(b''), 'empty bytes hash consistent' assert hash(None) == hash(None), 'None hash consistent' assert hash(True) == hash(True), 'True hash consistent' assert hash(False) == hash(False), 'False hash consistent' assert hash((1, 2, 3)) == hash((1, 2, 3)), 'tuple hash consistent' assert hash(()) == hash(()), 'empty tuple hash consistent' assert hash((1,)) == hash((1,)), 'single element tuple hash consistent' assert hash(3.14) == hash(3.14), 'float hash consistent' assert hash(0.0) == hash(0.0), 'zero float hash consistent' assert hash(-0.0) == hash(-0.0), 'negative zero float hash consistent' assert hash(...) == hash(...), 'ellipsis hash consistent' # === Range hash consistency === assert hash(range(10)) == hash(range(10)), 'range hash consistent' assert hash(range(0)) == hash(range(0)), 'empty range hash consistent' assert hash(range(1, 10)) == hash(range(1, 10)), 'range with start hash consistent' assert hash(range(1, 10, 2)) == hash(range(1, 10, 2)), 'range with step hash consistent' assert hash(range(-5, 5)) == hash(range(-5, 5)), 'negative start range hash consistent' # === Different range values should hash differently === assert hash(range(10)) != hash(range(11)), 'different range stop hashes differently' assert hash(range(10)) != hash(range(1, 10)), 'range with different start hashes differently' assert hash(range(10)) != hash(range(0, 10, 2)), 'range with step hashes differently' assert hash(range(1, 10, 2)) != hash(range(1, 10, 3)), 'different steps hash differently' # === Different values should hash differently === assert hash(1) != hash(2), 'different ints hash differently' assert hash('a') != hash('b'), 'different strs hash differently' assert hash(b'a') != hash(b'b'), 'different bytes hash differently' assert hash((1, 2)) != hash((1, 3)), 'different tuples hash differently' assert hash((1, 2)) != hash((2, 1)), 'tuple order matters for hash' assert hash(True) != hash(False), 'True and False hash differently' assert hash(3.14) != hash(2.71), 'different floats hash differently' # === Type differentiation for clearly different types === assert hash(()) != hash(''), 'empty tuple and empty str hash differently' assert hash('1') != hash(1), 'str "1" and int 1 hash differently' assert hash(b'1') != hash(1), 'bytes b"1" and int 1 hash differently' # === Nested tuple hashing === assert hash((1, (2, 3))) == hash((1, (2, 3))), 'nested tuple hash consistent' assert hash((1, (2, 3))) != hash((1, (2, 4))), 'nested tuples with different inner values hash differently' assert hash(((1, 2), (3, 4))) == hash(((1, 2), (3, 4))), 'tuple of tuples hash consistent' # === String/bytes content equality across representations === # Interned strings and heap strings with same content should hash the same s1 = 'test' s2 = 'te' + 'st' assert hash(s1) == hash(s2), 'concatenated string hashes same as literal' b1 = b'test' b2 = b'te' + b'st' assert hash(b1) == hash(b2), 'concatenated bytes hashes same as literal' # === Function hashing === def f(): pass def g(): pass assert hash(f) == hash(f), 'function hash consistent' assert hash(g) == hash(g), 'different function hash consistent' assert hash(f) != hash(g), 'different functions hash differently' # === Builtin function hashing === assert hash(len) == hash(len), 'builtin hash consistent' assert hash(print) == hash(print), 'print builtin hash consistent' assert hash(len) != hash(print), 'different builtins hash differently' # === Builtin type hashing === assert hash(int) == hash(int), 'int type hash consistent' assert hash(str) == hash(str), 'str type hash consistent' assert hash(int) != hash(str), 'different types hash differently' assert hash(int) != hash(float), 'int and float types hash differently' # === Exception type hashing === assert hash(ValueError) == hash(ValueError), 'exception type hash consistent' assert hash(TypeError) == hash(TypeError), 'TypeError hash consistent' assert hash(ValueError) != hash(TypeError), 'different exception types hash differently' # === Dict key behavior with hashes === # Verify that hash consistency works with dict lookups d = {} d[42] = 'int' d['hello'] = 'str' d[(1, 2)] = 'tuple' d[range(5)] = 'range' d[3.14] = 'float' d[None] = 'none' assert d[42] == 'int', 'int dict key works' assert d['hello'] == 'str', 'str dict key works' assert d[(1, 2)] == 'tuple', 'tuple dict key works' assert d[range(5)] == 'range', 'range dict key works' assert d[3.14] == 'float', 'float dict key works' assert d[None] == 'none', 'None dict key works' # === Multiple ranges as dict keys === rd = {} rd[range(5)] = 'a' rd[range(10)] = 'b' rd[range(1, 5)] = 'c' rd[range(0, 5, 2)] = 'd' assert rd[range(5)] == 'a', 'range(5) key retrieval' assert rd[range(10)] == 'b', 'range(10) key retrieval' assert rd[range(1, 5)] == 'c', 'range(1,5) key retrieval' assert rd[range(0, 5, 2)] == 'd', 'range with step key retrieval' assert len(rd) == 4, 'all ranges stored as distinct keys' # === Functions as dict keys === def key_fn(): pass fd = {} fd[key_fn] = 'func_value' assert fd[key_fn] == 'func_value', 'function as dict key works' # === Builtins as dict keys === bd = {} bd[len] = 'len_value' bd[print] = 'print_value' assert bd[len] == 'len_value', 'builtin len as dict key' assert bd[print] == 'print_value', 'builtin print as dict key' assert len(bd) == 2, 'different builtins are distinct keys' # === Types as dict keys === td = {} td[int] = 'int_type' td[str] = 'str_type' td[ValueError] = 'value_error' assert td[int] == 'int_type', 'int type as dict key' assert td[str] == 'str_type', 'str type as dict key' assert td[ValueError] == 'value_error', 'exception type as dict key' ================================================ FILE: crates/monty/test_cases/id__bytes_literals_distinct.py ================================================ # xfail=cpython id(b'test') == id(b'test') # Return=False ================================================ FILE: crates/monty/test_cases/id__int_copy_distinct.py ================================================ # value-based identity: same value = same id x = 100 y = x id(x) == id(y) # Return=True ================================================ FILE: crates/monty/test_cases/id__is_number_is_number.py ================================================ # value-based identity: same value = same identity 1 is 1 # Return=True ================================================ FILE: crates/monty/test_cases/id__non_overlapping_lifetimes_distinct_types.py ================================================ # xfail=cpython # === Heap types may have the same id if lifetimes do not overlap === # See https://docs.python.org/3/library/functions.html#id # for Cpython it happens to be the case that for different types they end # up being allocated in different memory locations, but this is not guaranteed by the language spec assert id([]) == id([]), 'empty list may have same id' assert id([]) == id({}), 'empty list may have same id as empty dict' assert id([]) == id((1,)), 'empty list may have same id as tuple' assert id((1, 2)) == id((1, 2)), 'non-empty tuple may have same id' assert id([1, 2]) == id([1, 2]), 'non-empty list may have same id' ================================================ FILE: crates/monty/test_cases/id__non_overlapping_lifetimes_same_types.py ================================================ # === Heap types may have the same id if lifetimes do not overlap === # See https://docs.python.org/3/library/functions.html#id assert id([]) == id([]), 'empty list may have same id' assert id({}) == id({}), 'empty dict may have same id' assert id((1, 2)) == id((1, 2)), 'non-empty tuple may have same id' assert id([1, 2]) == id([1, 2]), 'non-empty list may have same id' ================================================ FILE: crates/monty/test_cases/id__ops.py ================================================ # === id() returns int type === assert isinstance(id(None), int), 'id returns int type' assert isinstance(id([]), int), 'id of list returns int' assert isinstance(id('hello'), int), 'id of str returns int' assert isinstance(id(42), int), 'id of int returns int' # === Identity operator (is) === assert (True is True) == True, 'is True' assert (False is False) == True, 'is False' assert (None is None) == True, 'is None' assert (... is ...) == True, 'is Ellipsis' # === Identity operator (is not) === assert (True is not True) == False, 'is not True' assert (True is not False) == True, 'is not False' # === Singleton identity === assert id(None) == id(None), 'None singleton' assert id(True) == id(True), 'True singleton' assert id(False) == id(False), 'False singleton' assert id(...) == id(...), 'Ellipsis singleton' # bool and int are distinct assert id(True) != id(1), 'True is not 1' assert id(False) != id(0), 'False is not 0' # distinct singletons assert id(None) != id(True), 'None is not True' assert id(None) != id(False), 'None is not False' assert id(None) != id(...), 'None is not Ellipsis' # === Integer identity === assert id(10) != id(20), 'different ints distinct' # === Float identity === assert id(1.0) != id(2.0), 'different floats distinct' # === List assignment shares identity === lst = [1, 2] ref = lst assert id(lst) == id(ref), 'list assignment shared' assert lst is ref, 'list is same' # === Variable identity is stable === lst = [1, 2] assert id(lst) == id(lst), 'var id stable' # === List mutation preserves identity === a = [1, 2] b = a b.append(3) assert a is b, 'list mutate preserves identity' # === Mixed types have distinct ids === assert id(1) != id('1'), 'int vs str distinct' # === Tuple singleton is guaranteed to have a unique id === assert id([]) != id(()), 'list vs tuple singleton distinct' assert id({}) != id(()), 'dict vs tuple singleton distinct' assert id(1) != id(()), 'int vs tuple singleton distinct' # === Multiple refs share id === x = [1, 2] y = x z = y assert id(x) == id(y), 'multiple refs share id xy' assert id(y) == id(z), 'multiple refs share id yz' # === String assignment shares identity === s = 'hello' r = s assert id(s) == id(r), 'str assignment shared' # === Bytes assignment shares identity === b = b'hello' r = b assert id(b) == id(r), 'bytes assignment shared' # === Tuple assignment shares identity === t = (1, 2) r = t assert id(t) == id(r), 'tuple assignment shared' # === Boolean is tests === assert (True is True) == True, 'bool is test' assert (False is False) == True, 'bool is test 2' # === Array is test === a = [1, 2] b = a assert (a is b) == True, 'array is test' assert (a is [1, 2]) == False, 'array is new literal' # === None is tests === x = None assert (x is None) == True, 'var is None' assert (1 is None) == False, 'int is not None' ================================================ FILE: crates/monty/test_cases/id__str_literals_same.py ================================================ # With string interning, identical literals have the same id id('hello') == id('hello') # Return=True ================================================ FILE: crates/monty/test_cases/if__elif_else.py ================================================ # === Basic elif chains === # if branch taken x = 0 if True: x = 1 elif True: x = 2 assert x == 1, 'if branch should be taken when condition is True' # elif branch taken (first elif) y = 0 if False: y = 1 elif True: y = 2 elif True: y = 3 assert y == 2, 'first elif should be taken when if is False and elif is True' # second elif taken z = 0 if False: z = 1 elif False: z = 2 elif True: z = 3 assert z == 3, 'second elif should be taken' # else branch taken after if a = 0 if False: a = 1 else: a = 2 assert a == 2, 'else should be taken when if is False' # else branch taken after elif chain b = 0 if False: b = 1 elif False: b = 2 elif False: b = 3 else: b = 4 assert b == 4, 'else should be taken when all conditions are False' # === Value-based conditions === val = 5 c = 0 if val < 3: c = 1 elif val < 6: c = 2 elif val < 9: c = 3 else: c = 4 assert c == 2, 'elif condition val < 6 should match for val=5' val2 = 10 d = 0 if val2 < 3: d = 1 elif val2 < 6: d = 2 elif val2 < 9: d = 3 else: d = 4 assert d == 4, 'else should be taken for val2=10' # === Multiple statements in branches === e = 0 f = 0 if False: e = 1 f = 1 elif True: e = 2 f = 2 else: e = 3 f = 3 assert e == 2, 'first statement in elif executed' assert f == 2, 'second statement in elif executed' # === Nested if inside elif === g = 0 if False: g = 1 elif True: if True: g = 100 else: g = 200 else: g = 3 assert g == 100, 'nested if inside elif should work' # nested if in else h = 0 if False: h = 1 elif False: h = 2 else: if True: h = 300 else: h = 400 assert h == 300, 'nested if inside else should work' # === Short-circuit evaluation === # elif condition not evaluated if earlier branch taken called = False def set_called(): global called called = True return True i = 0 if True: i = 1 elif set_called(): i = 2 assert i == 1, 'if branch taken' assert called == False, 'elif condition should not be evaluated when if branch is taken' # reset and test elif evaluation called = False j = 0 if False: j = 1 elif set_called(): j = 2 assert j == 2, 'elif branch taken' assert called == True, 'elif condition should be evaluated when if condition is False' # === Empty body handling (pass) === k = 0 if False: pass elif True: k = 1 else: pass assert k == 1, 'elif body executes after if with pass' # === Boolean expression conditions === and_result = 0 if False and True: and_result = 1 elif True and True: and_result = 2 else: and_result = 3 assert and_result == 2, 'elif with and condition' or_result = 0 if False or False: or_result = 1 elif False or True: or_result = 2 else: or_result = 3 assert or_result == 2, 'elif with or condition' # === Multiple conditions with and === n = 5 o = 0 if n > 1 and n < 3: o = 1 elif n > 3 and n < 7: o = 2 else: o = 3 assert o == 2, 'elif with multiple and conditions' # === Variable assignment in conditions (walrus operator style via temp var) === # Test value propagation through elif chain p = 0 temp = 10 if temp > 20: p = 1 elif temp > 5: p = 2 elif temp > 0: p = 3 else: p = 4 assert p == 2, 'second condition matches temp=10' ================================================ FILE: crates/monty/test_cases/if__raise_elif.py ================================================ if False: pass elif True: raise ValueError('in elif body') """ TRACEBACK: Traceback (most recent call last): File "if__raise_elif.py", line 4, in raise ValueError('in elif body') ValueError: in elif body """ ================================================ FILE: crates/monty/test_cases/if__raise_else.py ================================================ if False: pass elif False: pass else: raise ValueError('in else body') """ TRACEBACK: Traceback (most recent call last): File "if__raise_else.py", line 6, in raise ValueError('in else body') ValueError: in else body """ ================================================ FILE: crates/monty/test_cases/if__raise_if.py ================================================ if True: raise ValueError('in if body') """ TRACEBACK: Traceback (most recent call last): File "if__raise_if.py", line 2, in raise ValueError('in if body') ValueError: in if body """ ================================================ FILE: crates/monty/test_cases/if__raise_in_elif_condition.py ================================================ def fail(): raise ValueError('elif condition failed') if False: x = 1 elif fail(): x = 2 """ TRACEBACK: Traceback (most recent call last): File "if__raise_in_elif_condition.py", line 7, in elif fail(): ~~~~~~ File "if__raise_in_elif_condition.py", line 2, in fail raise ValueError('elif condition failed') ValueError: elif condition failed """ ================================================ FILE: crates/monty/test_cases/if__raise_in_if_condition.py ================================================ def fail(): raise ValueError('condition failed') if fail(): x = 1 """ TRACEBACK: Traceback (most recent call last): File "if__raise_in_if_condition.py", line 5, in if fail(): ~~~~~~ File "if__raise_in_if_condition.py", line 2, in fail raise ValueError('condition failed') ValueError: condition failed """ ================================================ FILE: crates/monty/test_cases/if_else_expr__all.py ================================================ # === Basic if/else === assert (1 if True else 2) == 1, 'true condition' assert (1 if False else 2) == 2, 'false condition' # === Truthy/falsy values === assert ('yes' if 1 else 'no') == 'yes', 'truthy int' assert ('yes' if 0 else 'no') == 'no', 'falsy int' assert ('yes' if 'a' else 'no') == 'yes', 'truthy str' assert ('yes' if '' else 'no') == 'no', 'falsy str' assert ('yes' if [1] else 'no') == 'yes', 'truthy list' assert ('yes' if [] else 'no') == 'no', 'falsy list' assert ('yes' if None else 'no') == 'no', 'None is falsy' # === Variables and comparisons === x = 5 assert (x if x > 0 else -x) == 5, 'positive x' x = -3 assert (x if x > 0 else -x) == 3, 'negative x - abs' # === Nested if/else === a = 1 b = 2 c = 3 assert ((a if a > b else b) if True else c) == 2, 'nested - outer true' assert ((a if a > b else b) if False else c) == 3, 'nested - outer false' assert (a if True else (b if True else c)) == 1, 'nested in else - not evaluated' # === Complex expressions === assert (1 + 2 if True else 3 + 4) == 3, 'arithmetic in body' assert (1 + 2 if False else 3 + 4) == 7, 'arithmetic in orelse' # === With heap values (strings, lists) === s1 = 'hello' s2 = 'world' assert (s1 if True else s2) == 'hello', 'string true branch' assert (s1 if False else s2) == 'world', 'string false branch' l1 = [1, 2] l2 = [3, 4] result = l1 if True else l2 assert result == [1, 2], 'list true branch' result = l1 if False else l2 assert result == [3, 4], 'list false branch' # === In f-strings === val = 10 assert f'{val if val > 5 else 0}' == '10', 'fstring with true branch' val = 3 assert f'{val if val > 5 else 0}' == '0', 'fstring with false branch' assert f'value: {1 if True else 2}' == 'value: 1', 'fstring with prefix' assert f'{"yes" if 1 else "no"}' == 'yes', 'fstring with string result' # === F-string with format spec === x = 42 assert f'{x if True else 0:05d}' == '00042', 'fstring format spec with if/else' ================================================ FILE: crates/monty/test_cases/import__error_cannot_import.py ================================================ from sys import nonexistent """ TRACEBACK: Traceback (most recent call last): File "import__error_cannot_import.py", line 1, in from sys import nonexistent ImportError: cannot import name 'nonexistent' from 'sys' (unknown location) """ ================================================ FILE: crates/monty/test_cases/import__error_module_not_found.py ================================================ import nonexistent_module """ TRACEBACK: Traceback (most recent call last): File "import__error_module_not_found.py", line 1, in import nonexistent_module ModuleNotFoundError: No module named 'nonexistent_module' """ ================================================ FILE: crates/monty/test_cases/import__local_scope.py ================================================ # Tests that import inside functions binds to local scope, not global # === Import statement inside function === def test_import_local(): import sys return sys.platform # Call to verify import works inside function result = test_import_local() assert isinstance(result, str), 'sys.platform should be a string' # Verify sys is NOT in global scope after function call try: sys assert False, 'sys should not be in global scope' except NameError: pass # Expected: sys is local to the function # === From import inside function === def test_from_import_local(): from typing import Any return Any any_result = test_from_import_local() assert repr(any_result) == 'typing.Any', 'should return typing.Any' # Verify Any is NOT in global scope after function call try: Any assert False, 'Any should not be in global scope' except NameError: pass # Expected: Any is local to the function # === Aliased import inside function === def test_aliased_import_local(): import sys as system return system.platform alias_result = test_aliased_import_local() assert isinstance(alias_result, str), 'system.platform should be a string' # Verify system is NOT in global scope try: system assert False, 'system should not be in global scope' except NameError: pass # Expected: system is local to the function # === Global import remains accessible === import sys as global_sys assert isinstance(global_sys.platform, str), 'global import should work' def use_global_import(): # This should access the global sys, not create a new local return global_sys.platform assert use_global_import() == global_sys.platform, 'function should access global import' ================================================ FILE: crates/monty/test_cases/import__os.py ================================================ # call-external # Tests for os module import and os.getenv() import os # === os.getenv() with existing variable === assert os.getenv('VIRTUAL_HOME') == '/virtual/home', 'getenv returns existing value' assert os.getenv('VIRTUAL_USER') == 'testuser', 'getenv returns user value' assert os.getenv('VIRTUAL_EMPTY') == '', 'getenv returns empty string value' # === os.getenv() with missing variable === assert os.getenv('NONEXISTENT') is None, 'getenv returns None for missing var' assert os.getenv('ALSO_MISSING') is None, 'getenv returns None for other missing var' # === os.getenv() with default value === assert os.getenv('NONEXISTENT', 'fallback') == 'fallback', 'getenv uses default when missing' assert os.getenv('ALSO_MISSING', '') == '', 'getenv uses empty string default' assert os.getenv('MISSING', None) is None, 'getenv with explicit None default' # === os.getenv() existing var ignores default === assert os.getenv('VIRTUAL_HOME', 'ignored') == '/virtual/home', 'existing var ignores default' assert os.getenv('VIRTUAL_USER', 'other') == 'testuser', 'existing user ignores default' # === os.getenv() with empty string existing var === assert os.getenv('VIRTUAL_EMPTY', 'not_used') == '', 'empty string var ignores default' ================================================ FILE: crates/monty/test_cases/import__relative_error.py ================================================ from .foo import bar """ TRACEBACK: Traceback (most recent call last): File "import__relative_error.py", line 1, in from .foo import bar ImportError: attempted relative import with no known parent package """ ================================================ FILE: crates/monty/test_cases/import__relative_no_module_error.py ================================================ from . import foo """ TRACEBACK: Traceback (most recent call last): File "import__relative_no_module_error.py", line 1, in from . import foo ImportError: attempted relative import with no known parent package """ ================================================ FILE: crates/monty/test_cases/import__runtime_error_when_executed.py ================================================ # Verify that ModuleNotFoundError is raised when an unknown module import is actually executed # (not guarded by TYPE_CHECKING) condition = True if condition: import nonexistent_at_runtime """ TRACEBACK: Traceback (most recent call last): File "import__runtime_error_when_executed.py", line 6, in import nonexistent_at_runtime ModuleNotFoundError: No module named 'nonexistent_at_runtime' """ ================================================ FILE: crates/monty/test_cases/import__star_error.py ================================================ # xfail=cpython from sys import * """ TRACEBACK: Traceback (most recent call last): File "import__star_error.py", line 2, in from sys import * ~~~~~~~~~~~~~~~~~ NotImplementedError: Wildcard imports (`from ... import *`) are not supported """ ================================================ FILE: crates/monty/test_cases/import__sys.py ================================================ # Tests for sys module import import sys # === sys.version === # Check that version is a non-empty string (exact value differs between interpreters) assert isinstance(sys.version, str), 'version should be a string' assert len(sys.version) > 0, 'version should be non-empty' # === sys.version_info === # Test index access returns integers for first 3 elements assert isinstance(sys.version_info[0], int), 'major version should be int' assert isinstance(sys.version_info[1], int), 'minor version should be int' assert isinstance(sys.version_info[2], int), 'micro version should be int' assert isinstance(sys.version_info[3], str), 'releaselevel should be str' assert isinstance(sys.version_info[4], int), 'serial should be int' # Test negative indexing assert sys.version_info[-1] == sys.version_info[4], 'negative index -1 should equal index 4' assert sys.version_info[-2] == sys.version_info[3], 'negative index -2 should equal index 3' assert sys.version_info[-5] == sys.version_info[0], 'negative index -5 should equal index 0' # Test named attribute access matches index access assert sys.version_info.major == sys.version_info[0], 'major attr should equal index 0' assert sys.version_info.minor == sys.version_info[1], 'minor attr should equal index 1' assert sys.version_info.micro == sys.version_info[2], 'micro attr should equal index 2' assert sys.version_info.releaselevel == sys.version_info[3], 'releaselevel attr should equal index 3' assert sys.version_info.serial == sys.version_info[4], 'serial attr should equal index 4' # Test len assert len(sys.version_info) == 5, 'version_info should have 5 elements' # Test tuple equality (works after fixing NamedTuple equality) v = sys.version_info assert (v[0], v[1]) == (v.major, v.minor), 'tuple of indices should equal tuple of attrs' assert v.major == v[0], 'major attr should equal index 0' assert v.minor == v[1], 'minor attr should equal index 1' # === sys.platform === # Check that platform is a non-empty string (exact value differs between interpreters) assert isinstance(sys.platform, str), 'platform should be a string' assert len(sys.platform) > 0, 'platform should be non-empty' # === sys.stdout and sys.stderr === # These should exist - we test by accessing them (will fail if not present) stdout = sys.stdout stderr = sys.stderr ================================================ FILE: crates/monty/test_cases/import__sys_monty.py ================================================ # xfail=cpython # Tests for Monty-specific sys module values import sys # === sys.version === assert sys.version == '3.14.0 (Monty)', f'version should be 3.14.0 (Monty), got {sys.version!r}' # === sys.version_info exact values === assert sys.version_info[0] == 3, 'major version should be 3' assert sys.version_info[1] == 14, 'minor version should be 14' assert sys.version_info[2] == 0, 'micro version should be 0' assert sys.version_info[3] == 'final', 'releaselevel should be final' assert sys.version_info[4] == 0, 'serial should be 0' # === sys.version_info named attributes === assert sys.version_info.major == 3, 'major attr should be 3' assert sys.version_info.minor == 14, 'minor attr should be 14' assert sys.version_info.micro == 0, 'micro attr should be 0' assert sys.version_info.releaselevel == 'final', 'releaselevel attr should be final' assert sys.version_info.serial == 0, 'serial attr should be 0' # === sys.version_info tuple equality === # This works because NamedTuple equality compares only by elements, not type_name assert sys.version_info == (3, 14, 0, 'final', 0), 'version_info should equal tuple' # === sys.platform === assert sys.platform == 'monty', f'platform should be monty, got {sys.platform!r}' ================================================ FILE: crates/monty/test_cases/import__type_checking_guard.py ================================================ # === TYPE_CHECKING guard === # Imports inside TYPE_CHECKING blocks should not raise errors at runtime # because TYPE_CHECKING is False at runtime, so the import is never executed. from typing import TYPE_CHECKING if TYPE_CHECKING: import also_nonexistent from nonexistent_module import something # Verify TYPE_CHECKING is False at runtime (as expected) assert TYPE_CHECKING is False, 'TYPE_CHECKING should be False at runtime' # === Function using TYPE_CHECKING for conditional import === def get_type_checking_value(): if TYPE_CHECKING: from another_fake_module import FakeType return 'success' result = get_type_checking_value() assert result == 'success', 'function with TYPE_CHECKING guard should execute' # === Nested TYPE_CHECKING blocks === if TYPE_CHECKING: if True: from deeply_nested_fake import DeepFake # === TYPE_CHECKING in else branch (should not be executed either) === x = True if x: pass else: if TYPE_CHECKING: from unreachable_module import Unreachable assert True, 'all TYPE_CHECKING guards work correctly' ================================================ FILE: crates/monty/test_cases/import__typing.py ================================================ # === Typing markers via from import === from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union # These are now assigned to Marker values (not silently ignored) # Test repr() to verify they have the correct string representation assert repr(Any) == 'typing.Any', f'Any repr should be Any, got {Any!r}' assert repr(Optional) == 'typing.Optional', f'Optional repr should be Optional, got {Optional!r}' assert repr(Union) == "", f'Union repr should be , got {Union!r}' assert repr(List) == 'typing.List', f'List repr should be List, got {List!r}' assert repr(Dict) == 'typing.Dict', f'Dict repr should be Dict, got {Dict!r}' assert repr(Tuple) == 'typing.Tuple', f'Tuple repr should be Tuple, got {Tuple!r}' assert repr(Set) == 'typing.Set', f'Set repr should be Set, got {Set!r}' assert repr(Callable) == 'typing.Callable', f'Callable repr should be Callable, got {Callable!r}' # === Typing markers via module import === import typing assert repr(typing.Any) == 'typing.Any' assert repr(typing.Optional) == 'typing.Optional' assert repr(typing.Union) == "" # === Aliased imports === from typing import Any as AnyType assert repr(AnyType) == 'typing.Any' ================================================ FILE: crates/monty/test_cases/import__typing_type_ignore.py ================================================ from typing import TYPE_CHECKING assert TYPE_CHECKING == False, 'TYPE_CHECKING should be False' assert not TYPE_CHECKING, 'TYPE_CHECKING should be falsy' ================================================ FILE: crates/monty/test_cases/int__bigint.py ================================================ # Tests for BigInt (arbitrary precision integer) support # === Setup constants === MAX_I64 = 9223372036854775807 # i64::MAX MIN_I64 = -MAX_I64 - 1 # i64::MIN (compute to avoid type checker overflow) # === BigInt literals === # Monty supports parsing integer literals larger than i64 LITERAL_BIG = 10000000000000000000000000000000000000000 assert LITERAL_BIG == 10**40, 'bigint literal equals computed value' assert str(LITERAL_BIG) == '10000000000000000000000000000000000000000', 'bigint literal str' assert type(LITERAL_BIG) == int, 'bigint literal type is int' # Negative bigint literal (via unary negation) NEG_BIG_LITERAL = -10000000000000000000000000000000000000000 assert NEG_BIG_LITERAL == -(10**40), 'negative bigint literal' assert str(NEG_BIG_LITERAL) == '-10000000000000000000000000000000000000000', 'negative bigint literal str' # === BigInt literal arithmetic === # bigint_literal * int assert 10000000000000000000000000000000000000000 * 2 == 2 * 10**40, 'bigint literal * int' assert 2 * 10000000000000000000000000000000000000000 == 2 * 10**40, 'int * bigint literal' # bigint_literal / int (true division) assert 10000000000000000000000000000000000000000 / 2 == 10**40 / 2, 'bigint literal / int' assert 10000000000000000000000000000000000000000 / 10000000000000000000000000000000000000000 == 1.0, ( 'bigint literal / bigint literal' ) # bigint_literal // int (floor division) assert 10000000000000000000000000000000000000000 // 3 == 10**40 // 3, 'bigint literal // int' assert 10000000000000000000000000000000000000000 // 10000000000000000000000000000000000000000 == 1, ( 'bigint literal // bigint literal' ) # bigint_literal % int (modulo) assert 10000000000000000000000000000000000000000 % 7 == 10**40 % 7, 'bigint literal % int' assert 10000000000000000000000000000000000000001 % 10000000000000000000000000000000000000000 == 1, ( 'bigint literal % bigint literal' ) # bigint_literal + int assert 10000000000000000000000000000000000000000 + 1 == 10**40 + 1, 'bigint literal + int' assert 1 + 10000000000000000000000000000000000000000 == 10**40 + 1, 'int + bigint literal' # bigint_literal - int assert 10000000000000000000000000000000000000000 - 1 == 10**40 - 1, 'bigint literal - int' assert 10000000000000000000000000000000000000001 - 10000000000000000000000000000000000000000 == 1, ( 'bigint literal - bigint literal' ) # bigint_literal ** int assert 10000000000000000000**2 == 10**38, 'bigint literal ** 2' # === int() parsing of big integers === assert int('10000000000000000000000000000000000000000') == 10**40, 'int() parses bigint string' assert int('-10000000000000000000000000000000000000000') == -(10**40), 'int() parses negative bigint string' assert int('99999999999999999999999999999999999999999999999999') == 10**50 - 1, 'int() parses very large bigint string' # === BigInt literal comparisons === assert 10000000000000000000000000000000000000000 > 9999999999999999999999999999999999999999, ( 'bigint literal > bigint literal' ) assert 10000000000000000000000000000000000000000 >= 10000000000000000000000000000000000000000, ( 'bigint literal >= bigint literal' ) assert 9999999999999999999999999999999999999999 < 10000000000000000000000000000000000000000, ( 'bigint literal < bigint literal' ) assert 10000000000000000000000000000000000000000 <= 10000000000000000000000000000000000000000, ( 'bigint literal <= bigint literal' ) assert 10000000000000000000000000000000000000000 == 10000000000000000000000000000000000000000, ( 'bigint literal == bigint literal' ) assert 10000000000000000000000000000000000000000 != 10000000000000000000000000000000000000001, ( 'bigint literal != bigint literal' ) # bigint literal vs int comparisons assert 10000000000000000000000000000000000000000 > 1, 'bigint literal > int' assert 1 < 10000000000000000000000000000000000000000, 'int < bigint literal' # === BigInt literal bool conversion === assert bool(10000000000000000000000000000000000000000), 'bigint literal is truthy' assert bool(-10000000000000000000000000000000000000000), 'negative bigint literal is truthy' # === BigInt literal hash consistency === # Same literal value should have same hash h1 = hash(10000000000000000000000000000000000000000) h2 = hash(10000000000000000000000000000000000000000) assert h1 == h2, 'same bigint literal has same hash' # Computed equal value should have same hash h3 = hash(10**40) assert h1 == h3, 'bigint literal hash equals computed hash' # === BigInt literal bitwise operations === assert 10000000000000000000000000000000000000000 & 0xFF == (10**40) & 0xFF, 'bigint literal & int' assert 10000000000000000000000000000000000000000 | 1 == (10**40) | 1, 'bigint literal | int' assert 10000000000000000000000000000000000000000 ^ 10000000000000000000000000000000000000000 == 0, ( 'bigint literal ^ bigint literal' ) assert 10000000000000000000000000000000000000000 >> 10 == (10**40) >> 10, 'bigint literal >> int' assert 10000000000000000000000000000000000000000 << 10 == (10**40) << 10, 'bigint literal << int' # === Non-decimal BigInt literals === # Large hex literal (2^64) big_hex = 0x10000000000000000 assert big_hex == 2**64, 'large hex literal' bigger_hex = 0x10000000000000000123 assert bigger_hex == 75557863725914323419427, f'large hex literal {bigger_hex}' # Large binary literal (2^65) big_bin = 0b100000000000000000000000000000000000000000000000000000000000000000 assert big_bin == 2**65, 'large binary literal' # Large octal literal big_oct = 0o10000000000000000000000 assert big_oct == 8**22, 'large octal literal' # Underscores in large non-decimal big_hex_underscore = 0x1_0000_0000_0000_0000 assert big_hex_underscore == 2**64, 'large hex with underscores' # === BigInt literal in collections === d = {10000000000000000000000000000000000000000: 'value'} assert d[10000000000000000000000000000000000000000] == 'value', 'bigint literal as dict key' assert d[10**40] == 'value', 'computed bigint finds literal key' lst = [10000000000000000000000000000000000000000, 20000000000000000000000000000000000000000] assert lst[0] == 10**40, 'bigint literal in list' assert lst[1] == 2 * 10**40, 'bigint literal in list index 1' # === BigInt literal repr/str === assert repr(10000000000000000000000000000000000000000) == '10000000000000000000000000000000000000000', ( 'bigint literal repr' ) assert str(10000000000000000000000000000000000000000) == '10000000000000000000000000000000000000000', ( 'bigint literal str' ) # === Overflow promotion === bigger = MAX_I64 + 1 assert bigger == MAX_I64 + 1, 'add overflow promotes to bigint' assert bigger - 1 == MAX_I64, 'sub back to i64' # === Subtraction overflow === smaller = MIN_I64 - 1 assert smaller == MIN_I64 - 1, 'sub overflow promotes to bigint' assert smaller + 1 == MIN_I64, 'add back to i64' # === Multiplication overflow === mul_result = MAX_I64 * 2 expected_mul = MAX_I64 + MAX_I64 assert mul_result == expected_mul, 'mul overflow' trillion = 1000000000000 trillion_squared = trillion * trillion assert trillion_squared == 1000000000000 * 1000000000000, 'large mul' # === Power overflow === pow_2_63 = 2**63 assert pow_2_63 == MAX_I64 + 1, 'pow creates bigint at boundary' pow_2_64 = 2**64 assert pow_2_64 == pow_2_63 * 2, 'pow overflow' pow_2_100 = 2**100 assert pow_2_100 > pow_2_64, 'large pow is greater' # === Negative overflow === neg_bigger = -MAX_I64 - 2 assert neg_bigger == MIN_I64 - 1, 'negative bigint' # === Type is still int === assert type(bigger) == int, 'bigint type is int' assert type(pow_2_100) == int, 'large pow type is int' # === Mixed operations === add_result = bigger + 100 assert add_result == MAX_I64 + 101, 'bigint + int' add_result2 = 100 + bigger assert add_result2 == MAX_I64 + 101, 'int + bigint' sub_result = bigger - 100 assert sub_result == MAX_I64 - 99, 'bigint - int' sub_result2 = 100 - bigger expected_sub = -(MAX_I64 - 99) assert sub_result2 == expected_sub, 'int - bigint' mul_result2 = bigger * 2 expected_mul2 = (MAX_I64 + 1) * 2 assert mul_result2 == expected_mul2, 'bigint * int' mul_result3 = 2 * bigger assert mul_result3 == expected_mul2, 'int * bigint' # === BigInt with BigInt operations === big_a = 2**100 big_b = 2**100 big_sum = big_a + big_b assert big_sum == 2**101, 'bigint + bigint' big_diff = big_a - big_b assert big_diff == 0, 'bigint - bigint' big_prod = big_a * big_b assert big_prod == 2**200, 'bigint * bigint' # === Comparisons === assert bigger > MAX_I64, 'bigint > int' assert MAX_I64 < bigger, 'int < bigint' assert bigger >= MAX_I64, 'bigint >= int' assert MAX_I64 <= bigger, 'int <= bigint' cmp_result = bigger == MAX_I64 + 1 assert cmp_result, 'bigint == computed int' cmp_result2 = bigger == MAX_I64 assert not cmp_result2, 'bigint != int' # === BigInt comparisons === assert big_a == big_b, 'bigint == bigint' cmp_lt = big_a < big_b assert not cmp_lt, 'bigint not < equal bigint' big_double = big_a * 2 assert big_double > big_b, 'larger bigint > smaller bigint' # === Hash consistency === # When a BigInt demotes to i64 range, its hash must match the equivalent int hash # This is critical for dict key lookups to work correctly # Test hash equality for values that fit in i64 computed_42 = (big_a - big_a) + 42 # Goes through BigInt arithmetic, demotes to 42 assert hash(computed_42) == hash(42), 'hash of computed int must match literal int' assert hash(bigger - 1) == hash(MAX_I64), 'hash of demoted bigint must match MAX_I64' assert hash(smaller + 1) == hash(MIN_I64), 'hash of demoted bigint must match MIN_I64' # Test that hash(0) is consistent across computation paths zero_via_bigint = big_a - big_a assert hash(zero_via_bigint) == hash(0), 'hash of bigint zero must match int zero' # Test dict key lookup works when inserting with int and looking up with computed bigint d = {42: 'a'} assert d[42] == 'a', 'int as key' assert d[computed_42] == 'a', 'lookup with computed bigint finds int key' # Test dict key lookup works when inserting with bigint and looking up with int d2 = {computed_42: 'value'} assert d2[42] == 'value', 'lookup with int finds bigint key' # Large bigints (outside i64 range) as dict keys d[bigger] = 'b' assert d[bigger] == 'b', 'bigint as key' d[big_a] = 'c' assert d[big_a] == 'c', 'large bigint as key' # Verify large bigints with same value hash the same big_copy = 2**100 assert hash(big_a) == hash(big_copy), 'equal large bigints must hash the same' # Verify large bigints can be used interchangeably as dict keys d3 = {big_a: 'original'} assert d3[big_copy] == 'original', 'lookup with equal large bigint works' # === Unary neg overflow === # Use 0 - MIN_I64 instead of -MIN_I64 to avoid type checker overflow neg_min = 0 - MIN_I64 assert neg_min == MAX_I64 + 1, 'neg i64::MIN promotes' # Note: ~bigger (bitwise not) tests skipped - Monty parser doesn't support ~ yet # === Floor division === fd_result = bigger // 2 fd_expected = (MAX_I64 + 1) // 2 assert fd_result == fd_expected, 'bigint // int' pow_2_50 = 2**50 fd_result2 = pow_2_100 // pow_2_50 assert fd_result2 == 2**50, 'bigint // bigint' fd_result3 = 100 // bigger assert fd_result3 == 0, 'int // bigint (small / large)' neg_bigger = -bigger fd_neg_result = neg_bigger // 3 fd_neg_expected = (-(MAX_I64 + 1)) // 3 assert fd_neg_result == fd_neg_expected, 'negative bigint floordiv' # === Modulo === mod_result = bigger % 1000 mod_expected = (MAX_I64 + 1) % 1000 assert mod_result == mod_expected, 'bigint % int' mod_result2 = 100 % bigger assert mod_result2 == 100, 'int % bigint' mod_result3 = pow_2_100 % (pow_2_50 + 1) assert mod_result3 == 1, 'bigint % bigint' # === Builtin functions === abs_neg = abs(-bigger) assert abs_neg == bigger, 'abs of negative bigint' abs_pos = abs(bigger) assert abs_pos == bigger, 'abs of positive bigint' abs_min = abs(MIN_I64) assert abs_min == MAX_I64 + 1, 'abs of i64::MIN' pow_result = pow(2, 100) assert pow_result == pow_2_100, 'pow builtin' pow_bigger_2 = bigger * bigger pow_result2 = pow(bigger, 2) assert pow_result2 == pow_bigger_2, 'pow with bigint base' dm = divmod(bigger, 1000) dm_quot = dm[0] dm_rem = dm[1] expected_quot = bigger // 1000 expected_rem = bigger % 1000 assert dm_quot == expected_quot, 'divmod quotient with bigint' assert dm_rem == expected_rem, 'divmod remainder with bigint' dm2 = divmod(pow_2_100, pow_2_50) assert dm2[0] == pow_2_50, 'divmod bigint by bigint quotient' assert dm2[1] == 0, 'divmod bigint by bigint remainder' hex_result = hex(bigger) assert hex_result == '0x8000000000000000', 'hex of bigint' hex_neg = hex(-bigger) assert hex_neg == '-0x8000000000000000', 'hex of negative bigint' bin_result = bin(bigger) assert bin_result == '0b1000000000000000000000000000000000000000000000000000000000000000', 'bin of bigint' bin_neg = bin(-bigger) assert bin_neg == '-0b1000000000000000000000000000000000000000000000000000000000000000', 'bin of negative bigint' oct_result = oct(bigger) assert oct_result == '0o1000000000000000000000', 'oct of bigint' oct_neg = oct(-bigger) assert oct_neg == '-0o1000000000000000000000', 'oct of negative bigint' # === Repr and str === repr_result = repr(bigger) str_result = str(bigger) expected_repr = str(MAX_I64 + 1) assert repr_result == expected_repr, 'repr of bigint' assert str_result == expected_repr, 'str of bigint' # === Bool conversion === assert bool(bigger), 'bigint is truthy' assert bool(-bigger), 'negative bigint is truthy' # === Demote back to i64 === demote_result = bigger - bigger assert demote_result == 0, 'bigint - bigint can demote to i64' demote_result2 = bigger - 1 assert demote_result2 == MAX_I64, 'bigint - 1 demotes to i64::MAX' # === Bug 1: 0 ** 0 with LongInt exponent === big = 2**100 assert 0**big == 0, '0 ** large_positive should be 0' assert 1**big == 1, '1 ** large_positive should be 1' # Edge case: 0 ** 0 where 0 is a LongInt zero_big = big - big # LongInt zero (actually demotes to int, so test with computed zero) assert 0**zero_big == 1, '0 ** 0 (computed zero) should be 1' assert 5**zero_big == 1, '5 ** 0 (computed zero) should be 1' # === Bug 2: Modulo with negative divisor === assert 5 % -3 == -1, '5 % -3 should be -1' assert -5 % 3 == 1, '-5 % 3 should be 1' assert -5 % -3 == -2, '-5 % -3 should be -2' assert 7 % -4 == -1, '7 % -4 should be -1' # === Bug 3: += overflow === x = MAX_I64 x += 1 assert x == MAX_I64 + 1, 'i64::MAX += 1 should promote to LongInt' y = MIN_I64 y += -1 assert y == MIN_I64 - 1, 'i64::MIN += -1 should promote to LongInt' # === Bug 4: LongInt * sequence === big = 2**100 assert 'a' * 0 == '', 'str * 0' assert [1] * 0 == [], 'list * 0' # Sequence * LongInt (where LongInt is heap-allocated) # Note: CPython doesn't support seq * huge_negative_longint (OverflowError) # Test with positive LongInt - should raise OverflowError for repeat count too large # But we can test heap-allocated LongInt by using a value that demotes big_then_small = big - big + 3 # Results in 3 (goes through LongInt arithmetic) assert 'ab' * big_then_small == 'ababab', 'str * LongInt that demotes to small value' # === Bug 5: True division with LongInt === big = 2**100 assert big / 2 == 2.0**99, 'bigint / int' # 1 / 2**100 is a very small positive number, not exactly 0.0 tiny = 1 / big assert tiny > 0.0 and tiny < 1e-29, 'int / huge_bigint approaches 0' assert big / big == 1.0, 'bigint / bigint same value' assert big / 2.0 == 2.0**99, 'bigint / float' tiny_f = 1.0 / big assert tiny_f > 0.0 and tiny_f < 1e-29, 'float / huge_bigint approaches 0' # === Bug 6: Bitwise with LongInt === big = 2**100 assert big & 0xFF == 0, '2**100 & 0xFF' assert big | 1 == big + 1, '2**100 | 1' assert big ^ big == 0, 'bigint ^ same bigint' assert big >> 50 == 2**50, '2**100 >> 50' assert 1 << 100 == big, '1 << 100' assert (big + 0xFF) & 0xFF == 0xFF, 'bigint with low bits & mask' # === Large result operations (should succeed with NoLimitTracker) === # These are large but allowed since test runner uses NoLimitTracker x = 2**100000 # ~12.5KB - well under any reasonable limit assert x > 0, '2 ** 100000 should succeed' y = 1 << 100000 assert y > 0, '1 << 100000 should succeed' # Edge cases (constant-size results) - always succeed assert 0**10000000 == 0, '0 ** huge = 0' assert 1**10000000 == 1, '1 ** huge = 1' assert (-1) ** 10000000 == 1, '(-1) ** huge_even = 1' assert (-1) ** 10000001 == -1, '(-1) ** huge_odd = -1' assert 0 << 10000000 == 0, '0 << huge = 0' # === LongInt in range() === # Note: Monty raises OverflowError immediately for range(10**100), while CPython # only raises when iterating or calling len(). We accept this difference for safety. big = 2**100 small_via_big = big - big + 5 # LongInt that demotes to 5 r = range(small_via_big) assert list(r) == [0, 1, 2, 3, 4], 'range with LongInt stop' r2 = range(small_via_big, small_via_big + 3) assert list(r2) == [5, 6, 7], 'range with LongInt start/stop' r3 = range(0, 10, big - big + 2) assert list(r3) == [0, 2, 4, 6, 8], 'range with LongInt step' # === Integer computed via LongInt arithmetic === # These values go through BigInt arithmetic but demote to regular Int via into_value() idx = big - big + 1 # Results in Value::Int(1) after demotion assert [10, 20, 30][idx] == 20, 'list indexing with BigInt-computed int' assert (10, 20, 30)[idx] == 20, 'tuple indexing with BigInt-computed int' assert 'abc'[idx] == 'b', 'string indexing with BigInt-computed int' assert b'abc'[idx] == ord('b'), 'bytes indexing with BigInt-computed int' assert range(10)[idx] == 1, 'range indexing with BigInt-computed int' # Negative index computed via LongInt arithmetic neg_idx = big - big - 1 # Results in Value::Int(-1) after demotion assert [10, 20, 30][neg_idx] == 30, 'list indexing with negative BigInt-computed int' assert (10, 20, 30)[neg_idx] == 30, 'tuple indexing with negative BigInt-computed int' assert 'abc'[neg_idx] == 'c', 'string indexing with negative BigInt-computed int' assert b'abc'[neg_idx] == ord('c'), 'bytes indexing with negative BigInt-computed int' assert range(10)[neg_idx] == 9, 'range indexing with negative BigInt-computed int' # List assignment with LongInt index lst = [1, 2, 3] lst[idx] = 42 assert lst == [1, 42, 3], 'list assignment with BigInt-computed index' lst[neg_idx] = 99 assert lst == [1, 42, 99], 'list assignment with negative BigInt-computed index' # === String/bytes * LongInt === count = big - big + 3 assert 'ab' * count == 'ababab', 'string * LongInt' assert count * 'ab' == 'ababab', 'LongInt * string' assert b'ab' * count == b'ababab', 'bytes * LongInt' assert count * b'ab' == b'ababab', 'LongInt * bytes' # Negative LongInt repeat neg = big - big - 2 assert 'ab' * neg == '', 'string * negative LongInt' assert b'ab' * neg == b'', 'bytes * negative LongInt' # Zero LongInt repeat zero = big - big assert 'ab' * zero == '', 'string * zero LongInt' assert b'ab' * zero == b'', 'bytes * zero LongInt' ================================================ FILE: crates/monty/test_cases/int__bigint_errors.py ================================================ # Tests for error cases in BigInt-related builtins and operations # All error messages must match CPython exactly # Uses 'in str(e)' checks since Monty's str(e) includes the type name # === Setup constants === MAX_I64 = 9223372036854775807 BIGINT = MAX_I64 + 1 # Force BigInt creation # === hex() errors === try: hex('str') assert False, 'hex(str) should raise TypeError' except TypeError as e: assert "'str' object cannot be interpreted as an integer" in str(e), f'hex str error: {e}' try: hex(1.5) assert False, 'hex(float) should raise TypeError' except TypeError as e: assert "'float' object cannot be interpreted as an integer" in str(e), f'hex float error: {e}' try: hex([]) assert False, 'hex(list) should raise TypeError' except TypeError as e: assert "'list' object cannot be interpreted as an integer" in str(e), f'hex list error: {e}' # === bin() errors === try: bin('str') assert False, 'bin(str) should raise TypeError' except TypeError as e: assert "'str' object cannot be interpreted as an integer" in str(e), f'bin str error: {e}' try: bin(1.5) assert False, 'bin(float) should raise TypeError' except TypeError as e: assert "'float' object cannot be interpreted as an integer" in str(e), f'bin float error: {e}' try: bin({}) assert False, 'bin(dict) should raise TypeError' except TypeError as e: assert "'dict' object cannot be interpreted as an integer" in str(e), f'bin dict error: {e}' # === oct() errors === try: oct('str') assert False, 'oct(str) should raise TypeError' except TypeError as e: assert "'str' object cannot be interpreted as an integer" in str(e), f'oct str error: {e}' try: oct(1.5) assert False, 'oct(float) should raise TypeError' except TypeError as e: assert "'float' object cannot be interpreted as an integer" in str(e), f'oct float error: {e}' try: oct((1, 2)) assert False, 'oct(tuple) should raise TypeError' except TypeError as e: assert "'tuple' object cannot be interpreted as an integer" in str(e), f'oct tuple error: {e}' # === divmod() division by zero === try: divmod(10, 0) assert False, 'divmod(int, 0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod int/0 error: {e}' try: divmod(BIGINT, 0) assert False, 'divmod(bigint, 0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod bigint/0 error: {e}' try: divmod(10, BIGINT - BIGINT) # BigInt zero assert False, 'divmod(int, bigint_zero) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod int/bigint_zero error: {e}' try: divmod(BIGINT, BIGINT - BIGINT) # BigInt / BigInt zero assert False, 'divmod(bigint, bigint_zero) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod bigint/bigint_zero error: {e}' try: divmod(10.0, 0.0) assert False, 'divmod(float, 0.0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod float/0.0 error: {e}' try: divmod(10, 0.0) assert False, 'divmod(int, 0.0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod int/0.0 error: {e}' try: divmod(10.0, 0) assert False, 'divmod(float, 0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'divmod float/0 error: {e}' # === divmod() type errors === try: divmod('a', 5) assert False, 'divmod(str, int) should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for divmod(): 'str' and 'int'" in str(e), f'divmod str/int error: {e}' try: divmod(5, 'a') assert False, 'divmod(int, str) should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for divmod(): 'int' and 'str'" in str(e), f'divmod int/str error: {e}' try: divmod([], 5) assert False, 'divmod(list, int) should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for divmod(): 'list' and 'int'" in str(e), f'divmod list/int error: {e}' try: divmod(BIGINT, 'a') assert False, 'divmod(bigint, str) should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for divmod(): 'int' and 'str'" in str(e), f'divmod bigint/str error: {e}' # === pow() zero to negative power === try: pow(0.0, -1) assert False, 'pow(0.0, -1) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'zero to a negative power' in str(e), f'pow 0.0/-1 error: {e}' try: pow(0, -1.0) assert False, 'pow(0, -1.0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'zero to a negative power' in str(e), f'pow 0/-1.0 error: {e}' try: pow(0.0, -2.0) assert False, 'pow(0.0, -2.0) should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'zero to a negative power' in str(e), f'pow 0.0/-2.0 error: {e}' # === pow() with modulo errors === try: pow(2, 10, 0) assert False, 'pow(2, 10, 0) should raise ValueError' except ValueError as e: assert 'pow() 3rd argument cannot be 0' in str(e), f'pow mod=0 error: {e}' # Note: pow(2, -1, 5) computes modular inverse in Python 3.8+, not an error # But pow(2, -1, 4) raises an error because 2 is not invertible mod 4 try: pow(2, -1, 4) # gcd(2, 4) != 1, no inverse exists assert False, 'pow(2, -1, 4) should raise ValueError' except ValueError as e: # CPython: "base is not invertible for the given modulus" # Monty: "pow() 2nd argument cannot be negative when 3rd argument specified" # Accept either message since Monty doesn't support modular inverse yet assert 'not invertible' in str(e) or 'cannot be negative' in str(e), f'pow non-invertible error: {e}' try: pow(2.0, 2, 5) assert False, 'pow(float, int, int) should raise TypeError' except TypeError as e: assert 'pow() 3rd argument not allowed unless all arguments are integers' in str(e), f'pow float mod error: {e}' try: pow(2, 2.0, 5) assert False, 'pow(int, float, int) should raise TypeError' except TypeError as e: assert 'pow() 3rd argument not allowed unless all arguments are integers' in str(e), f'pow float exp mod error: {e}' try: pow(2, 2, 5.0) assert False, 'pow(int, int, float) should raise TypeError' except TypeError as e: assert 'pow() 3rd argument not allowed unless all arguments are integers' in str(e), f'pow float mod2 error: {e}' # === abs() type errors === try: abs('str') assert False, 'abs(str) should raise TypeError' except TypeError as e: assert "bad operand type for abs(): 'str'" in str(e), f'abs str error: {e}' try: abs([]) assert False, 'abs(list) should raise TypeError' except TypeError as e: assert "bad operand type for abs(): 'list'" in str(e), f'abs list error: {e}' try: abs({}) assert False, 'abs(dict) should raise TypeError' except TypeError as e: assert "bad operand type for abs(): 'dict'" in str(e), f'abs dict error: {e}' # === pow() type errors (** operator) === try: 5 ** 'x' assert False, '5 ** str should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for ** or pow(): 'int' and 'str'" in str(e), f'int ** str error: {e}' try: 'x' ** 5 assert False, 'str ** int should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for ** or pow(): 'str' and 'int'" in str(e), f'str ** int error: {e}' try: BIGINT ** 'x' assert False, 'bigint ** str should raise TypeError' except TypeError as e: assert "unsupported operand type(s) for ** or pow(): 'int' and 'str'" in str(e), f'bigint ** str error: {e}' # === Division by zero with BigInt === try: BIGINT // 0 assert False, 'bigint // 0 should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'bigint floordiv error: {e}' try: BIGINT % 0 assert False, 'bigint % 0 should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'bigint mod error: {e}' try: 10 // (BIGINT - BIGINT) # int // BigInt zero assert False, 'int // bigint_zero should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'int floordiv bigint_zero error: {e}' try: 10 % (BIGINT - BIGINT) # int % BigInt zero assert False, 'int % bigint_zero should raise ZeroDivisionError' except ZeroDivisionError as e: assert 'division by zero' in str(e), f'int mod bigint_zero error: {e}' ================================================ FILE: crates/monty/test_cases/int__ops.py ================================================ # === Integer addition === assert 1 + 2 == 3, 'basic add' assert 5 + 0 == 5, 'add zero' assert 0 + 5 == 5, 'zero add' # === Integer subtraction === assert 5 - 3 == 2, 'basic sub' assert 5 - 0 == 5, 'sub zero' # === Mixed int/float addition === assert 3 + 4.0 == 7.0, 'int add float' assert 4.0 + 3 == 7.0, 'float add int' assert -2 + 3.5 == 1.5, 'neg int add float' assert 0 + 2.5 == 2.5, 'zero add float' assert 2.5 + 0 == 2.5, 'float add zero' # === Mixed int/float subtraction === assert 5 - 2.5 == 2.5, 'int sub float' assert 5.5 - 2 == 3.5, 'float sub int' assert -3 - 1.5 == -4.5, 'neg int sub float' assert 1.5 - (-2) == 3.5, 'float sub neg int' # === Float subtraction === assert 5.5 - 2.5 == 3.0, 'float sub float' assert 0.0 - 1.5 == -1.5, 'zero sub float' # === Integer modulo === assert 10 % 3 == 1, 'basic mod' assert 3 % 10 == 3, 'mod larger divisor' assert 9 % 3 == 0, 'mod zero result' # === Augmented assignment (+=) === x = 5 x += 3 assert x == 8, 'basic iadd' # === Integer repr/str === assert repr(42) == '42', 'int repr' assert str(42) == '42', 'int str' # === Float repr/str === assert repr(2.5) == '2.5', 'float repr' assert str(2.5) == '2.5', 'float str' # === Integer multiplication === assert 3 * 4 == 12, 'basic int mult' assert 5 * 0 == 0, 'mult by zero' assert 0 * 5 == 0, 'zero mult' assert -3 * 4 == -12, 'negative mult' assert 3 * -4 == -12, 'mult negative' assert -3 * -4 == 12, 'neg mult neg' # === Float multiplication === assert 3.0 * 4.0 == 12.0, 'float mult' assert 2.5 * 2.0 == 5.0, 'float mult 2' # === Mixed int/float multiplication === assert 3 * 4.0 == 12.0, 'int mult float' assert 4.0 * 3 == 12.0, 'float mult int' # === True division (always returns float) === assert 6 / 2 == 3.0, 'int div exact' assert 7 / 2 == 3.5, 'int div remainder' assert 1 / 4 == 0.25, 'int div fraction' assert 6.0 / 2.0 == 3.0, 'float div' assert 7 / 2.0 == 3.5, 'int div float' assert 7.0 / 2 == 3.5, 'float div int' assert -7 / 2 == -3.5, 'neg div' # === Floor division === assert 7 // 2 == 3, 'int floor div' assert 6 // 2 == 3, 'int floor div exact' assert -7 // 2 == -4, 'neg floor div rounds down' assert 7 // -2 == -4, 'floor div neg rounds down' assert -7 // -2 == 3, 'neg floor div neg' assert 7.0 // 2.0 == 3.0, 'float floor div' assert 7 // 2.0 == 3.0, 'int floor div float' assert 7.0 // 2 == 3.0, 'float floor div int' assert -7.0 // 2.0 == -4.0, 'neg float floor div' # === Power (exponentiation) === assert 2**3 == 8, 'int pow' assert 2**10 == 1024, 'int pow large' assert 2**0 == 1, 'pow zero' assert (-2) ** 3 == -8, 'neg base pow' assert (-2) ** 2 == 4, 'neg base even pow' assert 2**-1 == 0.5, 'pow neg returns float' assert 2**-2 == 0.25, 'pow neg 2' assert 4.0**2.0 == 16.0, 'float pow' assert 4**0.5 == 2.0, 'sqrt via pow' assert 8 ** (1 / 3) == 2.0, 'cube root via pow' assert 2.0**3 == 8.0, 'float pow int' # === Augmented assignment operators === # *= x = 5 x *= 3 assert x == 15, 'imult' # /= x = 10 x /= 4 assert x == 2.5, 'idiv' # //= x = 10 x //= 3 assert x == 3, 'ifloordiv' # **= x = 2 x **= 4 assert x == 16, 'ipow' # -= x = 10 x -= 3 assert x == 7, 'isub' # %= x = 10 x %= 3 assert x == 1, 'imod' # === Bool arithmetic (True=1, False=0) === # Bool multiplication assert True * 3 == 3, 'bool mult int' assert False * 5 == 0, 'false mult int' assert 3 * True == 3, 'int mult bool' assert 3 * False == 0, 'int mult false' assert True * True == 1, 'bool mult bool' assert True * False == 0, 'bool mult false' assert True * 2.5 == 2.5, 'bool mult float' assert 2.5 * True == 2.5, 'float mult bool' # Bool division assert True / 2 == 0.5, 'bool div int' assert False / 2 == 0.0, 'false div int' assert 4 / True == 4.0, 'int div bool' assert True / True == 1.0, 'bool div bool' assert True / 2.0 == 0.5, 'bool div float' assert 4.0 / True == 4.0, 'float div bool' # Bool floor division assert True // 2 == 0, 'bool floordiv int' assert False // 2 == 0, 'false floordiv int' assert 5 // True == 5, 'int floordiv bool' assert True // True == 1, 'bool floordiv bool' assert True // 2.0 == 0.0, 'bool floordiv float' assert 5.5 // True == 5.0, 'float floordiv bool' # Bool power assert True**3 == 1, 'bool pow int' assert False**3 == 0, 'false pow int' assert 2**True == 2, 'int pow bool true' assert 2**False == 1, 'int pow bool false' assert True**True == 1, 'bool pow bool' assert False**False == 1, 'false pow false' assert True**2.0 == 1.0, 'bool pow float' assert 2.0**True == 2.0, 'float pow bool true' assert 2.0**False == 1.0, 'float pow bool false' # === Unary positive (no-op for numbers, converts bools to int) === assert +5 == 5, 'unary pos int' assert +(-3) == -3, 'unary pos neg int' assert +0 == 0, 'unary pos zero' assert +3.14 == 3.14, 'unary pos float' assert +(-2.5) == -2.5, 'unary pos neg float' assert +0.0 == 0.0, 'unary pos zero float' assert +True == 1, 'unary pos true' assert +False == 0, 'unary pos false' # Verify +bool returns int type, not bool assert type(+True) == int, 'unary pos true returns int type' assert type(+False) == int, 'unary pos false returns int type' # === Unary negative === assert -5 == -5, 'unary neg int' assert -(-3) == 3, 'unary neg neg int' assert -0 == 0, 'unary neg zero' assert -3.14 == -3.14, 'unary neg float' assert -(-2.5) == 2.5, 'unary neg neg float' assert -True == -1, 'unary neg true' assert repr(-True) == '-1', 'unary neg true repr' assert -False == 0, 'unary neg false' assert repr(-False) == '0', 'unary neg false repr' # === Unary invert (bitwise NOT) === assert ~0 == -1, 'unary invert zero' assert ~1 == -2, 'unary invert one' assert ~(-1) == 0, 'unary invert neg one' assert ~True == -2, 'unary invert true' assert repr(~True) == '-2', 'unary invert true repr' assert ~False == -1, 'unary invert false' assert repr(~False) == '-1', 'unary invert false repr' assert int('123') == 123, 'int conversion from string' assert int(' 123 ') == 123, 'int conversion from string trim' assert int('1_234 ') == 1234, 'int conversion from string' try: int('abc') except ValueError as e: assert str(e) == "invalid literal for int() with base 10: 'abc'", f'got err: {e}' else: raise AssertionError('int conversion from string should fail') try: int(' ') except ValueError as e: assert str(e) == "invalid literal for int() with base 10: ' '", f'got err: {e}' else: raise AssertionError('int conversion from string should fail') try: int('a\tbc') except ValueError as e: assert str(e) == "invalid literal for int() with base 10: 'a\\tbc'", f'got err: {e}' else: raise AssertionError('int conversion from string should fail') ================================================ FILE: crates/monty/test_cases/int__overflow_division.py ================================================ # === i64::MIN // -1 overflow === INT_MIN = -(2**63) INT_MAX = 2**63 - 1 assert INT_MIN // -1 == 9223372036854775808, 'INT_MIN // -1' assert INT_MIN % -1 == 0, 'INT_MIN % -1' q, r = divmod(INT_MIN, -1) assert q == 9223372036854775808, 'divmod(INT_MIN, -1) quot' assert r == 0, 'divmod(INT_MIN, -1) rem' # === augmented assignment === x = INT_MIN x //= -1 assert x == 9223372036854775808, 'INT_MIN //= -1' x = INT_MIN x %= -1 assert x == 0, 'INT_MIN %= -1' # === i64 boundary values === assert INT_MIN // 1 == INT_MIN, 'INT_MIN // 1' assert INT_MIN // -2 == 4611686018427387904, 'INT_MIN // -2' assert INT_MIN % 1 == 0, 'INT_MIN % 1' assert INT_MIN % -2 == 0, 'INT_MIN % -2' assert INT_MIN % 3 == 1, 'INT_MIN % 3' assert INT_MAX // -1 == -INT_MAX, 'INT_MAX // -1' assert INT_MAX % -1 == 0, 'INT_MAX % -1' assert INT_MAX // 2 == 4611686018427387903, 'INT_MAX // 2' assert INT_MAX % 2 == 1, 'INT_MAX % 2' # === boundary divisors === assert INT_MIN // INT_MIN == 1, 'INT_MIN // INT_MIN' assert INT_MIN // INT_MAX == -2, 'INT_MIN // INT_MAX' assert INT_MAX // INT_MIN == -1, 'INT_MAX // INT_MIN' assert INT_MIN % INT_MIN == 0, 'INT_MIN % INT_MIN' assert INT_MAX % INT_MAX == 0, 'INT_MAX % INT_MAX' # === sign combinations === assert -7 // 2 == -4, '-7 // 2' assert 7 // -2 == -4, '7 // -2' assert -7 % 2 == 1, '-7 % 2' assert 7 % -2 == -1, '7 % -2' q, r = divmod(-7, 2) assert q == -4, 'divmod(-7, 2) quot' assert r == 1, 'divmod(-7, 2) rem' q, r = divmod(7, -2) assert q == -4, 'divmod(7, -2) quot' assert r == -1, 'divmod(7, -2) rem' # === divmod at boundaries === q, r = divmod(INT_MIN, 2) assert q == -4611686018427387904, 'divmod(INT_MIN, 2) quot' assert r == 0, 'divmod(INT_MIN, 2) rem' q, r = divmod(INT_MAX, -1) assert q == -INT_MAX, 'divmod(INT_MAX, -1) quot' assert r == 0, 'divmod(INT_MAX, -1) rem' q, r = divmod(INT_MIN, INT_MAX) assert q == -2, 'divmod(INT_MIN, INT_MAX) quot' assert r == INT_MAX - 1, 'divmod(INT_MIN, INT_MAX) rem' # === divmod invariant: q * b + r == a === q, r = divmod(INT_MIN, -1) assert q * -1 + r == INT_MIN, 'divmod(INT_MIN, -1) invariant' q, r = divmod(INT_MIN, 3) assert q * 3 + r == INT_MIN, 'divmod(INT_MIN, 3) invariant' assert q == -3074457345618258603, 'divmod(INT_MIN, 3) quot' assert r == 1, 'divmod(INT_MIN, 3) rem' # === CompareModEq patterns === x = INT_MIN assert x % -1 == 0, 'INT_MIN % -1 == 0' assert x % 2 == 0, 'INT_MIN % 2 == 0' assert x % 3 == 1, 'INT_MIN % 3 == 1' x = INT_MAX assert x % -1 == 0, 'INT_MAX % -1 == 0' assert x % 2 == 1, 'INT_MAX % 2 == 1' ================================================ FILE: crates/monty/test_cases/is_variant__all.py ================================================ # Tests that values of different types are returned correctly # Also tests identity operators with singletons # === Boolean values === assert repr(False) == 'False', 'False repr' assert repr(True) == 'True', 'True repr' # === None value === assert repr(None) == 'None', 'None repr' # === Ellipsis value === assert repr(...) == 'Ellipsis', 'Ellipsis repr' # === Ellipsis identity === assert (... is ...) == True, 'ellipsis is ellipsis' assert (None is ...) == False, 'none is not ellipsis' # === Type checks against None === assert (False is None) == False, 'False is not None' assert (True is None) == False, 'True is not None' assert (None is None) == True, 'None is None' assert (42 is None) == False, 'int is not None' assert (3.14 is None) == False, 'float is not None' assert ([1, 2] is None) == False, 'list is not None' assert ('hello' is None) == False, 'str is not None' assert ((1, 2) is None) == False, 'tuple is not None' # === Type checks against Ellipsis === assert (False is ...) == False, 'False is not Ellipsis' assert (True is ...) == False, 'True is not Ellipsis' assert (None is ...) == False, 'None is not Ellipsis' assert (42 is ...) == False, 'int is not Ellipsis' assert (3.14 is ...) == False, 'float is not Ellipsis' assert ([1, 2] is ...) == False, 'list is not Ellipsis' assert ('hello' is ...) == False, 'str is not Ellipsis' assert ((1, 2) is ...) == False, 'tuple is not Ellipsis' ================================================ FILE: crates/monty/test_cases/isinstance__arg2_list_error.py ================================================ isinstance(1, [int, str]) # Raise=TypeError('isinstance() arg 2 must be a type, a tuple of types, or a union') ================================================ FILE: crates/monty/test_cases/isinstance__arg2_type_error.py ================================================ isinstance(1, 'int') # Raise=TypeError('isinstance() arg 2 must be a type, a tuple of types, or a union') ================================================ FILE: crates/monty/test_cases/iter__dict_mutation.py ================================================ d = {'a': 1, 'b': 2} for k in d: d['c'] = 3 # Raise=RuntimeError('dictionary changed size during iteration') ================================================ FILE: crates/monty/test_cases/iter__for.py ================================================ # === List iteration === result = [] for x in [1, 2, 3]: result.append(x) assert result == [1, 2, 3], 'iterate over list' # list with mixed types result = [] for x in [1, 'a', True]: result.append(x) assert result == [1, 'a', True], 'iterate over mixed list' # empty list result = [] for x in []: result.append(x) assert result == [], 'iterate over empty list' # nested list items result = [] for x in [[1, 2], [3, 4]]: result.append(x) assert result == [[1, 2], [3, 4]], 'iterate over nested lists' # === Tuple iteration === result = [] for x in (1, 2, 3): result.append(x) assert result == [1, 2, 3], 'iterate over tuple' # empty tuple result = [] for x in (): result.append(x) assert result == [], 'iterate over empty tuple' # tuple with mixed types result = [] for x in (1, 'b', False): result.append(x) assert result == [1, 'b', False], 'iterate over mixed tuple' # === Dict iteration (yields keys) === result = [] for k in {'a': 1, 'b': 2, 'c': 3}: result.append(k) assert result == ['a', 'b', 'c'], 'iterate over dict yields keys' # empty dict result = [] for k in {}: result.append(k) assert result == [], 'iterate over empty dict' # dict preserves insertion order result = [] d = {'z': 1, 'a': 2, 'm': 3} for k in d: result.append(k) assert result == ['z', 'a', 'm'], 'dict iteration preserves insertion order' # === String iteration (yields chars) === result = [] for c in 'abc': result.append(c) assert result == ['a', 'b', 'c'], 'iterate over string yields chars' # empty string result = [] for c in '': result.append(c) assert result == [], 'iterate over empty string' # string with punctuation result = [] for c in 'hi!': result.append(c) assert result == ['h', 'i', '!'], 'iterate over string with punctuation' # string with unicode (multi-byte UTF-8 characters) result = [] for c in 'héllo': result.append(c) assert result == ['h', 'é', 'l', 'l', 'o'], 'iterate over string with accented char' # string with CJK characters result = [] for c in '日本': result.append(c) assert result == ['日', '本'], 'iterate over string with CJK chars' # string with emoji result = [] for c in 'a🎉b': result.append(c) assert result == ['a', '🎉', 'b'], 'iterate over string with emoji' # heap string s = 'xyz' s = s + '!' # Force heap allocation result = [] for c in s: result.append(c) assert result == ['x', 'y', 'z', '!'], 'iterate over heap string' # === Bytes iteration (yields ints) === result = [] for b in b'abc': result.append(b) assert result == [97, 98, 99], 'iterate over bytes yields ints' # empty bytes result = [] for b in b'': result.append(b) assert result == [], 'iterate over empty bytes' # bytes with various values result = [] for b in b'\x00\x01\xff': result.append(b) assert result == [0, 1, 255], 'iterate over bytes with special values' # === Range iteration (existing functionality) === result = [] for i in range(3): result.append(i) assert result == [0, 1, 2], 'iterate over range' # range with step result = [] for i in range(0, 6, 2): result.append(i) assert result == [0, 2, 4], 'iterate over range with step' # === Nested iteration === result = [] for outer in [[1, 2], [3, 4]]: for inner in outer: result.append(inner) assert result == [1, 2, 3, 4], 'nested for loops' # iterate over string within list result = [] for s in ['ab', 'cd']: for c in s: result.append(c) assert result == ['a', 'b', 'c', 'd'], 'nested string iteration' # === Using loop variable after loop === for x in [1, 2, 3]: pass assert x == 3, 'loop variable persists after loop' for y in 'abc': pass assert y == 'c', 'string loop variable persists' # === List mutation during iteration === # Python allows list mutation during iteration (unlike dict). # The iterator checks current length on each iteration. # appending during iteration - new items are seen result = [] lst = [1, 2, 3] for x in lst: result.append(x) if x == 2: lst.append(4) assert result == [1, 2, 3, 4], 'appending to list during iteration sees new items' assert lst == [1, 2, 3, 4], 'list was modified' # appending multiple items result = [] lst = [1] for x in lst: result.append(x) if x < 5: lst.append(x + 1) assert result == [1, 2, 3, 4, 5], 'can grow list dynamically during iteration' # === Modifying via copy pattern === original = [1, 2, 3] copy = list(original) for x in copy: if x == 2: original.append(4) assert original == [1, 2, 3, 4], 'modifying list via copy pattern' # === Sum pattern === total = 0 for n in [1, 2, 3, 4, 5]: total = total + n assert total == 15, 'sum pattern with list' # === Early break simulation via flag === # (break not implemented, using flag pattern) found = False for x in [1, 2, 3, 4, 5]: if not found and x == 3: found = True assert found == True, 'find pattern with flag' # === Accumulator patterns === # count items count = 0 for _ in ['a', 'b', 'c']: count = count + 1 assert count == 3, 'count items' # concatenate strings result = '' for s in ['a', 'b', 'c']: result = result + s assert result == 'abc', 'concatenate strings' # === Dict key-value access pattern === d = {'x': 10, 'y': 20} total = 0 for k in d: total = total + d[k] assert total == 30, 'dict key-value access in loop' # === Dict mutation during iteration === # Python allows modifying existing key values during iteration (no size change). # It also allows pop + add that keeps size the same (iterator sees new keys). # modifying existing values is allowed d = {'a': 1, 'b': 2, 'c': 3} for k in d: d[k] = d[k] * 10 assert d == {'a': 10, 'b': 20, 'c': 30}, 'modify dict values during iteration' # pop + add keeping same size is allowed, iterator sees new keys d = {'a': 1, 'b': 2, 'c': 3} result = [] for k in d: result.append(k) if k == 'a': d.pop('b') d['x'] = 4 # size unchanged assert result == ['a', 'c', 'x'], 'dict pop+add same size sees new keys' assert d == {'a': 1, 'c': 3, 'x': 4}, 'dict was modified correctly' ================================================ FILE: crates/monty/test_cases/iter__for_loop_unpacking.py ================================================ # === Basic for loop === result = [] for i in range(5): result.append(i) assert result == [0, 1, 2, 3, 4], 'basic for loop' # === Tuple unpacking in for loop === pairs = [(1, 2), (3, 4), (5, 6)] sums = [] for a, b in pairs: sums.append(a + b) assert sums == [3, 7, 11], 'for loop with pair unpacking' # === Triple unpacking === triples = [(1, 2, 3), (4, 5, 6)] products = [] for a, b, c in triples: products.append(a * b * c) assert products == [6, 120], 'for loop with triple unpacking' # === Nested tuple unpacking === nested = [((1, 2), 3), ((4, 5), 6)] results = [] for (a, b), c in nested: results.append(a + b + c) assert results == [6, 15], 'for loop with nested unpacking' # === Deep nested unpacking === deep = [((1, 2), (3, 4)), ((5, 6), (7, 8))] sums = [] for (a, b), (c, d) in deep: sums.append(a + b + c + d) assert sums == [10, 26], 'for loop with deep nested unpacking' # === Mixed depth unpacking === mixed = [(1, (2, 3)), (4, (5, 6))] results = [] for a, (b, c) in mixed: results.append(a + b + c) assert results == [6, 15], 'for loop with mixed depth unpacking' # === Unpacking with else clause === pairs = [(1, 2), (3, 4)] total = 0 for a, b in pairs: total += a + b else: total += 100 assert total == 110, 'for loop unpacking with else clause' # === Enumerate with unpacking === items = ['a', 'b', 'c'] result = [] for i, val in enumerate(items): result.append((i, val)) assert result == [(0, 'a'), (1, 'b'), (2, 'c')], 'enumerate with unpacking' # === Dict items unpacking === d = {'x': 1, 'y': 2} keys = [] vals = [] for k, v in d.items(): keys.append(k) vals.append(v) assert sorted(keys) == ['x', 'y'], 'dict items unpacking keys' assert sorted(vals) == [1, 2], 'dict items unpacking values' ================================================ FILE: crates/monty/test_cases/iter__generator_expr.py ================================================ # === Basic generator expression === result = list(x * 2 for x in range(5)) assert result == [0, 2, 4, 6, 8], 'basic generator expression' # === With condition === result = list(x for x in range(10) if x % 2 == 0) assert result == [0, 2, 4, 6, 8], 'generator with condition' # === Nested generators === result = list(x + y for x in range(3) for y in range(2)) assert result == [0, 1, 1, 2, 2, 3], 'nested generator' # === Generator in function call === result = sum(x for x in range(5)) assert result == 10, 'generator in sum()' # === Generator with unpacking === pairs = [(1, 2), (3, 4)] result = list(a + b for a, b in pairs) assert result == [3, 7], 'generator with unpacking' ================================================ FILE: crates/monty/test_cases/iter__generator_expr_type.py ================================================ # xfail=cpython # TODO: When proper generators are implemented, this test should be removed. # Currently generator expressions return lists in Monty, not generator objects. # This test verifies the temporary behavior until generators are properly implemented. gen_result = (x * 2 for x in range(5)) assert type(gen_result) == list, 'generator expr currently returns list' assert gen_result == [0, 2, 4, 6, 8], 'generator expr value' ================================================ FILE: crates/monty/test_cases/iter__not_iterable.py ================================================ for x in 42: pass # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/lambda__all.py ================================================ # === Basic lambda === # no-arg lambda f = lambda: 42 assert f() == 42, 'no-arg lambda' # single arg lambda f = lambda x: x + 1 assert f(5) == 6, 'single arg lambda' # === Multiple arguments === f = lambda x, y: x + y assert f(2, 3) == 5, 'multi-arg lambda' f = lambda x, y, z: x * y + z assert f(2, 3, 4) == 10, 'three-arg lambda' # === Default arguments === f = lambda x, y=10: x + y assert f(5) == 15, 'lambda with default' assert f(5, 3) == 8, 'lambda override default' f = lambda x=1, y=2: x * y assert f() == 2, 'lambda all defaults' assert f(3) == 6, 'lambda override first default' assert f(3, 4) == 12, 'lambda override all defaults' # === Lambda as expression (immediate call) === assert (lambda x: x * 2)(5) == 10, 'immediate call' assert (lambda: 'hello')() == 'hello', 'immediate call no args' assert (lambda x, y: x - y)(10, 3) == 7, 'immediate call multi args' # === Lambda in data structures === funcs = [lambda x: x + 1, lambda x: x * 2, lambda x: x**2] assert funcs[0](3) == 4, 'lambda in list - add' assert funcs[1](3) == 6, 'lambda in list - mul' assert funcs[2](3) == 9, 'lambda in list - pow' # === Lambda assigned and called later === square = lambda x: x * x double = lambda x: x + x assert square(4) == 16, 'lambda assigned square' assert double(4) == 8, 'lambda assigned double' # === Lambda with operations === f = lambda x: x > 5 assert f(6) is True, 'lambda comparison gt' assert f(4) is False, 'lambda comparison not gt' f = lambda x: x if x > 0 else -x assert f(5) == 5, 'lambda ternary positive' assert f(-5) == 5, 'lambda ternary negative' # === Closures === def make_adder(n): return lambda x: x + n add5 = make_adder(5) add10 = make_adder(10) assert add5(3) == 8, 'closure capture add5' assert add10(3) == 13, 'closure capture add10' def make_multiplier(factor): return lambda x: x * factor times3 = make_multiplier(3) assert times3(4) == 12, 'closure capture multiplier' # === Nested lambdas === f = lambda x: lambda y: x + y add_to_5 = f(5) assert add_to_5(3) == 8, 'nested lambda' f = lambda x: lambda y: lambda z: x + y + z assert f(1)(2)(3) == 6, 'triple nested lambda' # === Lambda in list comprehension === squared = [f(x) for x in [1, 2, 3, 4] for f in [lambda n: n * n]] # Note: this tests lambda in comprehension context, though due to late binding # all items use the same lambda # === Lambda returns another lambda === compose = lambda f: lambda g: lambda x: f(g(x)) inc = lambda x: x + 1 double = lambda x: x * 2 inc_then_double = compose(double)(inc) assert inc_then_double(3) == 8, 'lambda composition' # double(inc(3)) = double(4) = 8 # === Lambda repr === f = lambda: None r = repr(f) assert '' in r, 'lambda repr contains ' assert 'function' in r, 'lambda repr contains function' # === Lambda with *args === f = lambda *args: sum(args) assert f() == 0, 'lambda varargs empty' assert f(1) == 1, 'lambda varargs one' assert f(1, 2, 3) == 6, 'lambda varargs multiple' # === Lambda with keyword arguments === f = lambda x, *, y: x + y assert f(1, y=2) == 3, 'lambda keyword only' f = lambda **kwargs: len(kwargs) assert f() == 0, 'lambda kwargs empty' assert f(a=1, b=2) == 2, 'lambda kwargs multiple' # === Mixed parameters === f = lambda a, b=2, *args, c, d=4, **kwargs: (a, b, args, c, d, len(kwargs)) result = f(1, 2, 3, 4, c=10, d=20, e=30, f=40) assert result == (1, 2, (3, 4), 10, 20, 2), 'lambda mixed params' # === Unpacking in immediate lambda calls === xs = [1, 2, 3] assert (lambda *a: a)(*xs) == (1, 2, 3), 'lambda with *args unpacking' assert (lambda **k: k)(**{'a': 1}) == {'a': 1}, 'lambda with **kwargs unpacking' assert (lambda *a, **k: (a, k))(1, 2, x=3) == ((1, 2), {'x': 3}), 'lambda mixed unpack' # === Lambda parameter shadowing === # Inner lambda shadows outer variable - outer should not capture it def make_shadowing_lambda(): x = 10 # inner lambda has param x, so outer lambda should NOT capture x from make_shadowing_lambda return lambda: (lambda x: x + 1) outer_fn = make_shadowing_lambda() inner_fn = outer_fn() assert inner_fn(5) == 6, 'inner lambda takes x as param' def test_inner_lambda_capture(): y = 5 # outer lambda binds y as param, inner lambda captures from outer lambda, not test_inner_lambda_capture g = lambda y: (lambda: y) return g(7)() assert test_inner_lambda_capture() == 7, 'inner lambda captures outer lambda param' ================================================ FILE: crates/monty/test_cases/list__extend_not_iterable.py ================================================ # Regression test: list.extend() with a non-iterable should still raise TypeError # with the correct message. This verifies that the list_extend opcode helper in # collections.rs (for [*expr] literals) and the list.extend() method in # types/list.rs are separate code paths that do not interfere with each other. a = [] a.extend(1) # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/list__getitem_out_of_bounds.py ================================================ a = [1, 2, 3] a[10] # Raise=IndexError('list index out of range') ================================================ FILE: crates/monty/test_cases/list__index_not_found.py ================================================ [1, 2, 3].index(4) """ TRACEBACK: Traceback (most recent call last): File "list__index_not_found.py", line 1, in [1, 2, 3].index(4) ~~~~~~~~~~~~~~~~~~ ValueError: list.index(x): x not in list """ ================================================ FILE: crates/monty/test_cases/list__index_start_gt_end.py ================================================ # Test that list.index with start > end doesn't panic but raises ValueError [1, 2, 3].index(1, 5, 2) """ TRACEBACK: Traceback (most recent call last): File "list__index_start_gt_end.py", line 2, in [1, 2, 3].index(1, 5, 2) ~~~~~~~~~~~~~~~~~~~~~~~~ ValueError: list.index(x): x not in list """ ================================================ FILE: crates/monty/test_cases/list__ops.py ================================================ # === List concatenation (+) === assert [1, 2] + [3, 4] == [1, 2, 3, 4], 'basic concat' assert [] + [1, 2] == [1, 2], 'empty left concat' assert [1, 2] + [] == [1, 2], 'empty right concat' assert [] + [] == [], 'empty both concat' assert [1] + [2] + [3] + [4] == [1, 2, 3, 4], 'multiple concat' assert [[1]] + [[2]] == [[1], [2]], 'nested concat' # === Augmented assignment (+=) === lst = [1, 2] lst += [3, 4] assert lst == [1, 2, 3, 4], 'basic iadd' lst = [1] alias = lst lst += [2] assert lst is alias, 'list += preserves identity' assert alias == [1, 2], 'list += mutates through aliases' lst = [1, 2, 3] index = 1 lst[index] += 5 assert lst == [1, 7, 3], 'subscript += updates the selected list item' try: lst = [1] lst[5] += 1 assert False, 'subscript += past the end of a list should raise IndexError' except IndexError as e: assert e.args == ('list index out of range',), 'subscript += list index error matches normal setitem' lst = [1] lst += [] assert lst == [1], 'iadd empty' lst = [1] lst += [2] lst += [3] assert lst == [1, 2, 3], 'multiple iadd' lst = [1, 2] lst += lst assert lst == [1, 2, 1, 2], 'iadd self' # === List length === assert len([]) == 0, 'len empty' assert len([1, 2, 3]) == 3, 'len basic' lst = [1] lst.append(2) assert len(lst) == 2, 'len after append' # === List indexing === a = [] a.append('value') assert a[0] == 'value', 'getitem basic' a = [1, 2, 3] assert a[0 - 1] == 3, 'getitem negative index' assert a[-1] == 3, 'getitem -1' assert a[-2] == 2, 'getitem -2' # === List repr/str === assert repr([]) == '[]', 'empty list repr' assert str([]) == '[]', 'empty list str' assert repr([1, 2, 3]) == '[1, 2, 3]', 'list repr' assert str([1, 2, 3]) == '[1, 2, 3]', 'list str' # === List repetition (*) === assert [1, 2] * 3 == [1, 2, 1, 2, 1, 2], 'list mult int' assert 3 * [1, 2] == [1, 2, 1, 2, 1, 2], 'int mult list' assert [1] * 0 == [], 'list mult zero' assert [1] * -1 == [], 'list mult negative' assert [] * 5 == [], 'empty list mult' assert [1, 2] * 1 == [1, 2], 'list mult one' assert [[1]] * 2 == [[1], [1]], 'nested list mult' # === List repetition augmented assignment (*=) === lst = [1, 2] lst *= 2 assert lst == [1, 2, 1, 2], 'list imult' lst = [1] lst *= 0 assert lst == [], 'list imult zero' # === list() constructor === assert list() == [], 'list() empty' assert list([1, 2, 3]) == [1, 2, 3], 'list from list' assert list((1, 2, 3)) == [1, 2, 3], 'list from tuple' assert list(range(3)) == [0, 1, 2], 'list from range' assert list('abc') == ['a', 'b', 'c'], 'list from string' assert list(b'abc') == [97, 98, 99], 'list from bytes' assert list({'a': 1, 'b': 2}) == ['a', 'b'], 'list from dict yields keys' # non-ASCII strings (multi-byte UTF-8) assert list('héllo') == ['h', 'é', 'l', 'l', 'o'], 'list from string with accented char' assert list('日本') == ['日', '本'], 'list from string with CJK chars' assert list('a🎉b') == ['a', '🎉', 'b'], 'list from string with emoji' # === list.append() === lst = [] lst.append(1) assert lst == [1], 'append to empty' lst.append(2) assert lst == [1, 2], 'append to non-empty' lst.append(lst) # append self creates cycle assert len(lst) == 3, 'append self increases length' # === list.insert() === # Basic insert at various positions lst = [1, 2, 3] lst.insert(0, 'a') assert lst == ['a', 1, 2, 3], 'insert at beginning' lst = [1, 2, 3] lst.insert(1, 'a') assert lst == [1, 'a', 2, 3], 'insert in middle' lst = [1, 2, 3] lst.insert(3, 'a') assert lst == [1, 2, 3, 'a'], 'insert at end' # Insert beyond length appends lst = [1, 2, 3] lst.insert(100, 'a') assert lst == [1, 2, 3, 'a'], 'insert beyond length appends' # Insert with negative index lst = [1, 2, 3] lst.insert(-1, 'a') assert lst == [1, 2, 'a', 3], 'insert at -1 (before last)' lst = [1, 2, 3] lst.insert(-2, 'a') assert lst == [1, 'a', 2, 3], 'insert at -2' lst = [1, 2, 3] lst.insert(-100, 'a') assert lst == ['a', 1, 2, 3], 'insert very negative clamps to 0' # === list.pop() === lst = [1, 2, 3] assert lst.pop() == 3, 'pop without argument returns last' assert lst == [1, 2], 'pop removes last element' lst = [1, 2, 3] assert lst.pop(0) == 1, 'pop(0) returns first' assert lst == [2, 3], 'pop(0) removes first element' lst = [1, 2, 3] assert lst.pop(1) == 2, 'pop(1) returns middle' assert lst == [1, 3], 'pop(1) removes middle element' lst = [1, 2, 3] assert lst.pop(-1) == 3, 'pop(-1) returns last' assert lst == [1, 2], 'pop(-1) removes last element' lst = [1, 2, 3] assert lst.pop(-2) == 2, 'pop(-2) returns second to last' assert lst == [1, 3], 'pop(-2) removes second to last element' # === list.remove() === lst = [1, 2, 3, 2] lst.remove(2) assert lst == [1, 3, 2], 'remove removes first occurrence' lst = ['a', 'b', 'c'] lst.remove('b') assert lst == ['a', 'c'], 'remove string element' # === list.clear() === lst = [1, 2, 3] lst.clear() assert lst == [], 'clear empties the list' lst = [] lst.clear() assert lst == [], 'clear on empty list is no-op' # === list.copy() === lst = [1, 2, 3] copy = lst.copy() assert copy == [1, 2, 3], 'copy creates equal list' assert copy is not lst, 'copy creates new list object' lst.append(4) assert copy == [1, 2, 3], 'copy is independent' # === list.extend() === lst = [1, 2] lst.extend([3, 4]) assert lst == [1, 2, 3, 4], 'extend with list' lst = [1] lst.extend((2, 3)) assert lst == [1, 2, 3], 'extend with tuple' lst = [1] lst.extend(range(2, 5)) assert lst == [1, 2, 3, 4], 'extend with range' lst = [1] lst.extend('ab') assert lst == [1, 'a', 'b'], 'extend with string' lst = [] lst.extend([]) assert lst == [], 'extend empty with empty' # === list.index() === lst = [1, 2, 3, 2] assert lst.index(2) == 1, 'index finds first occurrence' assert lst.index(3) == 2, 'index finds element' assert lst.index(2, 2) == 3, 'index with start' assert lst.index(2, 1, 4) == 1, 'index with start and end' # === list.count() === lst = [1, 2, 2, 3, 2] assert lst.count(2) == 3, 'count multiple occurrences' assert lst.count(1) == 1, 'count single occurrence' assert lst.count(4) == 0, 'count zero occurrences' assert [].count(1) == 0, 'count on empty list' # === list.reverse() === lst = [1, 2, 3] lst.reverse() assert lst == [3, 2, 1], 'reverse modifies in place' lst = [1] lst.reverse() assert lst == [1], 'reverse single element' lst = [] lst.reverse() assert lst == [], 'reverse empty list' # === list.sort() === lst = [3, 1, 2] lst.sort() assert lst == [1, 2, 3], 'sort integers' lst = ['b', 'c', 'a'] lst.sort() assert lst == ['a', 'b', 'c'], 'sort strings' lst = [3, 1, 2] lst.sort(reverse=True) assert lst == [3, 2, 1], 'sort with reverse=True' lst = [] lst.sort() assert lst == [], 'sort empty list' lst = [1] lst.sort() assert lst == [1], 'sort single element' # === list.sort(key=...) === lst = ['banana', 'apple', 'cherry'] lst.sort(key=len) assert lst == ['apple', 'banana', 'cherry'], 'sort by len' lst = [[1, 2, 3], [4], [5, 6]] lst.sort(key=len) assert lst == [[4], [5, 6], [1, 2, 3]], 'sort nested lists by len' lst = [[1, 2, 3], [4], [5, 6]] lst.sort(key=len, reverse=True) assert lst == [[1, 2, 3], [5, 6], [4]], 'sort by len reverse' lst = [-3, 1, -2, 4] lst.sort(key=abs) assert lst == [1, -2, -3, 4], 'sort by abs' # key=None is same as no key lst = [3, 1, 2] lst.sort(key=None) assert lst == [1, 2, 3], 'sort with key=None' lst = [3, 1, 2] lst.sort(key=None, reverse=True) assert lst == [3, 2, 1], 'sort with key=None reverse' # Empty list with key lst = [] lst.sort(key=len) assert lst == [], 'sort empty list with key' # key=int for string-to-int conversion lst = ['-3', '1', '-2', '4'] lst.sort(key=int) assert lst == ['-3', '-2', '1', '4'], 'sort strings by int value' lst = ['10', '2', '1', '100'] lst.sort(key=int) assert lst == ['1', '2', '10', '100'], 'sort numeric strings by int value' lst = ['10', '2', '1', '100'] lst.sort(key=int, reverse=True) assert lst == ['100', '10', '2', '1'], 'sort numeric strings by int reverse' # user-defined key function def last_char(s): return s[-1] lst = ['cherry', 'banana', 'apple'] lst.sort(key=last_char) assert lst == ['banana', 'apple', 'cherry'], 'sort by last char' # key function might raise exception lst = [''] try: lst.sort(key=last_char) except IndexError: pass # expected since last_char('') raises IndexError # === List assignment (setitem) === # Basic assignment lst = [1, 2, 3] lst[0] = 10 assert lst == [10, 2, 3], 'setitem at index 0' lst = [1, 2, 3] lst[1] = 20 assert lst == [1, 20, 3], 'setitem at index 1' lst = [1, 2, 3] lst[2] = 30 assert lst == [1, 2, 30], 'setitem at last index' # Negative index assignment lst = [1, 2, 3] lst[-1] = 100 assert lst == [1, 2, 100], 'setitem at -1' lst = [1, 2, 3] lst[-2] = 200 assert lst == [1, 200, 3], 'setitem at -2' lst = [1, 2, 3] lst[-3] = 300 assert lst == [300, 2, 3], 'setitem at -3' # Assigning different types lst = [1, 2, 3] lst[0] = 'hello' assert lst == ['hello', 2, 3], 'setitem string value' lst = [1, 2, 3] lst[1] = [4, 5] assert lst == [1, [4, 5], 3], 'setitem list value' lst = [1, 2, 3] lst[0] = None assert lst == [None, 2, 3], 'setitem None value' # Multiple assignments lst = [0, 0, 0] lst[0] = 1 lst[1] = 2 lst[2] = 3 assert lst == [1, 2, 3], 'multiple setitem' # Assignment preserves other elements lst = ['a', 'b', 'c', 'd'] lst[1] = 'B' assert lst[0] == 'a', 'setitem preserves element 0' assert lst[1] == 'B', 'setitem changes element 1' assert lst[2] == 'c', 'setitem preserves element 2' assert lst[3] == 'd', 'setitem preserves element 3' # === Bool indices === # Python allows True/False as indices (True=1, False=0) lst = ['a', 'b', 'c'] assert lst[False] == 'a', 'getitem with False' assert lst[True] == 'b', 'getitem with True' lst = ['x', 'y', 'z'] lst[False] = 'X' assert lst == ['X', 'y', 'z'], 'setitem with False' lst = ['x', 'y', 'z'] lst[True] = 'Y' assert lst == ['x', 'Y', 'z'], 'setitem with True' # === Nested list equality === # same-length lists with matching nested elements assert [[1, 2], [3, 4]] == [[1, 2], [3, 4]], 'nested list eq' # same-length but different nested elements (exercises py_eq early return) assert [[1, 2], [3, 4]] != [[1, 2], [3, 5]], 'nested list ne same length' assert [[]] != [[1]], 'nested empty vs non-empty' # deeper nesting assert [[[1]]] == [[[1]]], 'deep nested list eq' assert [[[1]]] != [[[2]]], 'deep nested list ne' # mixed nesting depths assert [[1], 2] == [[1], 2], 'mixed nesting eq' assert [[1], 2] != [[1], 3], 'mixed nesting ne' # === Nested list repr === assert repr([[1, 2], [3, 4]]) == '[[1, 2], [3, 4]]', 'nested list repr' assert repr([[]]) == '[[]]', 'list containing empty list repr' assert repr([[1], [2, 3]]) == '[[1], [2, 3]]', 'nested varied len repr' # === list.remove() with nested elements === x = [1, 2] lst = [x, [3, 4], x] lst.remove([1, 2]) assert lst == [[3, 4], [1, 2]], 'remove nested list element' lst = [1, [2, 3], 4] lst.remove([2, 3]) assert lst == [1, 4], 'remove nested from mixed' # === list.index() with nested elements === lst = [[3], [1, 2], [4]] assert lst.index([1, 2]) == 1, 'index with nested list' lst = [[1], [2], [1]] assert lst.index([1]) == 0, 'index nested finds first' # === list.count() with nested elements === lst = [[1, 2], [3], [1, 2], 4, [1, 2]] assert lst.count([1, 2]) == 3, 'count nested list elements' assert lst.count([3]) == 1, 'count single nested occurrence' assert lst.count([99]) == 0, 'count nested not found' assert [].count([1]) == 0, 'count on empty list' # === Nested list containment === assert [1, 2] in [[1, 2], [3, 4]], 'nested list in' assert [5, 6] not in [[1, 2], [3, 4]], 'nested list not in' assert [] in [[], [1]], 'empty list in list of lists' # === List unpacking (PEP 448) === a = [1, 2] b = [3, 4] assert [*a] == [1, 2], 'single list unpack' assert [*a, *b] == [1, 2, 3, 4], 'double list unpack' assert [0, *a, 5] == [0, 1, 2, 5], 'mixed list unpack' assert [*[]] == [], 'unpack empty list' assert [*(1, 2)] == [1, 2], 'unpack tuple into list' assert [*'abc'] == ['a', 'b', 'c'], 'unpack string into list' assert [*{'x': 1, 'y': 2}] == ['x', 'y'], 'unpack dict keys into list' # Heap-allocated set: covers the HeapData::Set arm in list_extend assert sorted([*{1, 2, 3}]) == [1, 2, 3], 'unpack set into list' # Heap-allocated Str (result of concat, not interned): covers HeapData::Str in list_extend hs = 'hel' + 'lo' assert [*hs] == ['h', 'e', 'l', 'l', 'o'], 'unpack heap string into list' # Non-iterable heap-allocated Ref (closure) hits the inner `_` arm in list_extend. # A plain top-level function is Value::DefFunction (not a Ref), so a closure is # required to reach the Value::Ref(_) branch (HeapData that is not List/Tuple/Set/Dict/Str). def _make_list_unpack_closure(): _sentinel = 1 def _inner(): return _sentinel return _inner _list_unpack_closure = _make_list_unpack_closure() try: _x = [*_list_unpack_closure] assert False, 'expected TypeError for non-iterable heap closure in list unpack' except TypeError: pass ================================================ FILE: crates/monty/test_cases/list__pop_empty.py ================================================ [].pop() """ TRACEBACK: Traceback (most recent call last): File "list__pop_empty.py", line 1, in [].pop() ~~~~~~~~ IndexError: pop from empty list """ ================================================ FILE: crates/monty/test_cases/list__pop_out_of_range.py ================================================ [1, 2, 3].pop(10) """ TRACEBACK: Traceback (most recent call last): File "list__pop_out_of_range.py", line 1, in [1, 2, 3].pop(10) ~~~~~~~~~~~~~~~~~ IndexError: pop index out of range """ ================================================ FILE: crates/monty/test_cases/list__pop_type_error.py ================================================ [].pop('not an int') """ TRACEBACK: Traceback (most recent call last): File "list__pop_type_error.py", line 1, in [].pop('not an int') ~~~~~~~~~~~~~~~~~~~~ TypeError: 'str' object cannot be interpreted as an integer """ ================================================ FILE: crates/monty/test_cases/list__remove_not_found.py ================================================ [1, 2, 3].remove(4) """ TRACEBACK: Traceback (most recent call last): File "list__remove_not_found.py", line 1, in [1, 2, 3].remove(4) ~~~~~~~~~~~~~~~~~~~ ValueError: list.remove(x): x not in list """ ================================================ FILE: crates/monty/test_cases/list__setitem_dict_index.py ================================================ # Test using a dict as a list setitem index (should raise TypeError) # This covers the code path where a non-LongInt Ref type is used as an index lst = [1, 2, 3] d = {'key': 'value'} lst[d] = 42 """ TRACEBACK: Traceback (most recent call last): File "list__setitem_dict_index.py", line 5, in lst[d] = 42 ~~~~~~ TypeError: list indices must be integers or slices, not dict """ ================================================ FILE: crates/monty/test_cases/list__setitem_huge_int_index.py ================================================ # Test using a huge LongInt as a list setitem index (should raise IndexError) # This covers the code path where a LongInt exceeds i64 range lst = [1, 2, 3] huge = 2**100 lst[huge] = 42 """ TRACEBACK: Traceback (most recent call last): File "list__setitem_huge_int_index.py", line 5, in lst[huge] = 42 ~~~~~~~~~ IndexError: cannot fit 'int' into an index-sized integer """ ================================================ FILE: crates/monty/test_cases/list__setitem_index_error.py ================================================ lst = [1, 2, 3] lst[10] = 'value' """ TRACEBACK: Traceback (most recent call last): File "list__setitem_index_error.py", line 2, in lst[10] = 'value' ~~~~~~~ IndexError: list assignment index out of range """ ================================================ FILE: crates/monty/test_cases/list__setitem_type_error.py ================================================ lst = [1, 2, 3] lst['key'] = 'value' """ TRACEBACK: Traceback (most recent call last): File "list__setitem_type_error.py", line 2, in lst['key'] = 'value' ~~~~~~~~~~ TypeError: list indices must be integers or slices, not str """ ================================================ FILE: crates/monty/test_cases/list__unpack_type_error.py ================================================ [*42] # Raise=TypeError('Value after * must be an iterable, not int') ================================================ FILE: crates/monty/test_cases/longint__index_error.py ================================================ big = 10**100 [1, 2, 3][big] # Raise=IndexError("cannot fit 'int' into an index-sized integer") ================================================ FILE: crates/monty/test_cases/longint__repeat_error.py ================================================ big = 10**100 'abc' * big # Raise=OverflowError("cannot fit 'int' into an index-sized integer") ================================================ FILE: crates/monty/test_cases/loop__break_continue.py ================================================ # === Basic break === result = [] for x in [1, 2, 3, 4, 5]: if x == 3: break result.append(x) assert result == [1, 2], 'break exits loop early' # === Break skips else === flag = 0 for x in [1, 2, 3]: if x == 2: break else: flag = 1 assert flag == 0, 'break skips else clause' # === No break runs else === flag = 0 for x in [1, 2, 3]: pass else: flag = 1 assert flag == 1, 'completing loop runs else clause' # === Basic continue === result = [] for x in [1, 2, 3, 4, 5]: if x % 2 == 0: continue result.append(x) assert result == [1, 3, 5], 'continue skips iteration' # === Continue with else === flag = 0 for x in [1, 2, 3]: if x == 2: continue else: flag = 1 assert flag == 1, 'continue does not skip else clause' # === Nested loops - break inner === result = [] for i in [1, 2, 3]: for j in ['a', 'b', 'c']: if j == 'b': break result.append((i, j)) assert result == [(1, 'a'), (2, 'a'), (3, 'a')], 'break only affects inner loop' # === Nested loops - continue inner === result = [] for i in [1, 2]: for j in ['a', 'b', 'c']: if j == 'b': continue result.append((i, j)) assert result == [(1, 'a'), (1, 'c'), (2, 'a'), (2, 'c')], 'continue only affects inner loop' # === Break in nested with else on inner === result = [] for i in [1, 2]: for j in [10, 20, 30]: if j == 20: break result.append(j) else: result.append('inner-else') assert result == [10, 10], 'break skips inner else' # === No break in inner runs inner else === result = [] for i in [1, 2]: for j in [10, 20]: result.append(j) else: result.append('inner-else') assert result == [10, 20, 'inner-else', 10, 20, 'inner-else'], 'no break runs inner else' # === Continue does not affect else === result = [] for x in [1, 2, 3]: if x == 2: continue result.append(x) else: result.append('else') assert result == [1, 3, 'else'], 'continue does not prevent else' # === Empty loop with else === flag = 0 for x in []: flag = 1 else: flag = 2 assert flag == 2, 'empty loop runs else' # === Break on first iteration === result = [] for x in [1, 2, 3]: result.append('before') break result.append('after') # unreachable assert result == ['before'], 'break on first iteration' # === Double break (unreachable second break) === def double_break(value): for i in range(0, 1): break break return value assert double_break('hello') == 'hello', 'double break returns value correctly' assert double_break(42) == 42, 'double break works with int' # === Two breaks in different branches (both reachable) === def two_breaks(items): result = [] for x in items: if x < 0: result.append('negative') break if x > 100: result.append('too big') break result.append(x) return result assert two_breaks([1, 2, 3]) == [1, 2, 3], 'no break taken' assert two_breaks([1, -1, 3]) == [1, 'negative'], 'first break taken' assert two_breaks([1, 200, 3]) == [1, 'too big'], 'second break taken' assert two_breaks([-5]) == ['negative'], 'negative on first item' assert two_breaks([999]) == ['too big'], 'too big on first item' # === Double continue (unreachable second continue) === def double_continue(items): out = [] for x in items: out.append(x) continue continue return out assert double_continue([1, 2, 3]) == [1, 2, 3], 'double continue keeps normal loop output' assert double_continue([]) == [], 'double continue handles empty input' # === Continue on every iteration === result = [] for x in [1, 2, 3]: result.append(x) continue result.append('after') # unreachable assert result == [1, 2, 3], 'continue on every iteration' ================================================ FILE: crates/monty/test_cases/loop__break_finally.py ================================================ # === Break in try/finally must run finally === result = [] for x in [1, 2, 3]: try: result.append('before') break result.append('after') # unreachable finally: result.append('finally') assert result == ['before', 'finally'], f'break in try/finally should run finally: {result}' # === Break in nested try/finally runs both finally blocks === result = [] for x in [1, 2, 3]: try: try: result.append('inner-try') break finally: result.append('inner-finally') finally: result.append('outer-finally') assert result == ['inner-try', 'inner-finally', 'outer-finally'], f'nested finally blocks: {result}' # === Break in try/except/finally runs finally === result = [] for x in [1, 2, 3]: try: result.append('try') break except ValueError: result.append('except') finally: result.append('finally') assert result == ['try', 'finally'], f'break in try/except/finally: {result}' # === Break inside except handler with finally === result = [] for x in [1, 2, 3]: try: raise ValueError('test') except ValueError: result.append('except') break finally: result.append('finally') assert result == ['except', 'finally'], f'break in except with finally: {result}' # === Break does not run finally if not in try === result = [] for x in [1, 2, 3]: result.append('body') break assert result == ['body'], f'break without finally: {result}' # === Break with multiple loops and finally === result = [] for i in [1, 2]: try: for j in [10, 20, 30]: if j == 20: break # This break should not trigger outer finally result.append(j) result.append('after-inner') finally: result.append('outer-finally') assert result == [10, 'after-inner', 'outer-finally', 10, 'after-inner', 'outer-finally'], ( f'inner break with outer finally: {result}' ) ================================================ FILE: crates/monty/test_cases/loop__break_in_function_error.py ================================================ def foo(): break foo() """ TRACEBACK: Traceback (most recent call last): File "loop__break_in_function_error.py", line 2 break ~~~~~ SyntaxError: 'break' outside loop """ ================================================ FILE: crates/monty/test_cases/loop__break_in_if_error.py ================================================ x = True if x: break """ TRACEBACK: Traceback (most recent call last): File "loop__break_in_if_error.py", line 3 break ~~~~~ SyntaxError: 'break' outside loop """ ================================================ FILE: crates/monty/test_cases/loop__break_nested_except_clears.py ================================================ # When breaking from nested except handlers, ALL exception states must be cleared. # After the loop completes via break, execution should continue normally. # Test 1: break from depth 2 should reach code after loop def test_break(): for i in range(1): try: raise ValueError('outer') except: try: raise TypeError('inner') except: break # Should clear BOTH exceptions return 'ok' assert test_break() == 'ok', 'break from nested except should reach return' # Test 2: break from depth 3 should also work def test_break_depth3(): for i in range(1): try: raise ValueError('level1') except: try: raise TypeError('level2') except: try: raise RuntimeError('level3') except: break # Should clear ALL THREE exceptions return 'deep' assert test_break_depth3() == 'deep', 'break from 3-deep except should reach return' # Test 3: verify exception stack is empty after break def test_empty_stack(): result = [] for i in range(1): try: raise ValueError('outer') except: try: raise TypeError('inner') except: result.append('breaking') break result.append('after') return result assert test_empty_stack() == ['breaking', 'after'], 'should execute code after break' ================================================ FILE: crates/monty/test_cases/loop__break_outside_error.py ================================================ break """ TRACEBACK: Traceback (most recent call last): File "loop__break_outside_error.py", line 1 break ~~~~~ SyntaxError: 'break' outside loop """ ================================================ FILE: crates/monty/test_cases/loop__continue_finally.py ================================================ # === Continue in try/finally must run finally === result = [] for x in [1, 2, 3]: try: result.append(x) if x == 2: continue result.append('after-continue') finally: result.append('finally') assert result == [1, 'after-continue', 'finally', 2, 'finally', 3, 'after-continue', 'finally'], ( f'continue in try/finally should run finally: {result}' ) # === Continue in nested try/finally runs both finally blocks === result = [] for x in [1, 2]: try: try: result.append(x) continue finally: result.append('inner-finally') finally: result.append('outer-finally') assert result == [1, 'inner-finally', 'outer-finally', 2, 'inner-finally', 'outer-finally'], ( f'nested finally with continue: {result}' ) # === Continue in try/except/finally runs finally === result = [] for x in [1, 2, 3]: try: result.append(x) if x == 2: continue except ValueError: result.append('except') finally: result.append('finally') assert result == [1, 'finally', 2, 'finally', 3, 'finally'], f'continue in try/except/finally: {result}' # === Continue inside except handler with finally === result = [] for x in [1, 2, 3]: try: if x == 2: raise ValueError('test') result.append(x) except ValueError: result.append('except') continue finally: result.append('finally') result.append('after') assert result == [1, 'finally', 'after', 'except', 'finally', 3, 'finally', 'after'], ( f'continue in except with finally: {result}' ) # === Continue does not run finally if not in try === result = [] for x in [1, 2, 3]: result.append(x) continue result.append('unreachable') assert result == [1, 2, 3], f'continue without finally: {result}' # === Continue with multiple loops and finally === result = [] for i in [1, 2]: try: for j in [10, 20, 30]: if j == 20: continue # This continue should not trigger outer finally result.append(j) result.append('after-inner') finally: result.append('outer-finally') assert result == [10, 30, 'after-inner', 'outer-finally', 10, 30, 'after-inner', 'outer-finally'], ( f'inner continue with outer finally: {result}' ) ================================================ FILE: crates/monty/test_cases/loop__continue_in_function_error.py ================================================ def foo(): continue foo() """ TRACEBACK: Traceback (most recent call last): File "loop__continue_in_function_error.py", line 2 continue ~~~~~~~~ SyntaxError: 'continue' not properly in loop """ ================================================ FILE: crates/monty/test_cases/loop__continue_in_if_error.py ================================================ x = True if x: continue """ TRACEBACK: Traceback (most recent call last): File "loop__continue_in_if_error.py", line 3 continue ~~~~~~~~ SyntaxError: 'continue' not properly in loop """ ================================================ FILE: crates/monty/test_cases/loop__continue_nested_except_clears.py ================================================ # When continuing from nested except handlers, ALL exception states must be cleared. # After the loop completes its iterations, execution should continue normally. # Test 1: continue from depth 2 should process all iterations def test_continue(): results = [] for i in range(3): try: raise ValueError('outer') except: try: raise TypeError('inner') except: results.append(i) continue # Should clear BOTH exceptions return results assert test_continue() == [0, 1, 2], 'continue from nested except should process all iterations' # Test 2: continue from depth 3 should also work def test_continue_depth3(): results = [] for i in range(2): try: raise ValueError('level1') except: try: raise TypeError('level2') except: try: raise RuntimeError('level3') except: results.append(i) continue # Should clear ALL THREE exceptions return results assert test_continue_depth3() == [0, 1], 'continue from 3-deep except should work' # Test 3: continue runs else clause since loop completes normally def test_continue_with_else(): results = [] for i in range(2): try: raise ValueError('outer') except: try: raise TypeError('inner') except: results.append(i) continue else: results.append('else') return results assert test_continue_with_else() == [0, 1, 'else'], 'continue should allow else to run' ================================================ FILE: crates/monty/test_cases/loop__continue_outside_error.py ================================================ continue """ TRACEBACK: Traceback (most recent call last): File "loop__continue_outside_error.py", line 1 continue ~~~~~~~~ SyntaxError: 'continue' not properly in loop """ ================================================ FILE: crates/monty/test_cases/math__acos_domain_error.py ================================================ import math math.acos(2) """ TRACEBACK: Traceback (most recent call last): File "math__acos_domain_error.py", line 3, in math.acos(2) ~~~~~~~~~~~~ ValueError: expected a number in range from -1 up to 1, got 2.0 """ ================================================ FILE: crates/monty/test_cases/math__acosh_domain_error.py ================================================ import math math.acosh(0.5) """ TRACEBACK: Traceback (most recent call last): File "math__acosh_domain_error.py", line 3, in math.acosh(0.5) ~~~~~~~~~~~~~~~ ValueError: expected argument value not less than 1, got 0.5 """ ================================================ FILE: crates/monty/test_cases/math__asin_domain_error.py ================================================ import math math.asin(2) """ TRACEBACK: Traceback (most recent call last): File "math__asin_domain_error.py", line 3, in math.asin(2) ~~~~~~~~~~~~ ValueError: expected a number in range from -1 up to 1, got 2.0 """ ================================================ FILE: crates/monty/test_cases/math__atanh_domain_error.py ================================================ import math math.atanh(1) """ TRACEBACK: Traceback (most recent call last): File "math__atanh_domain_error.py", line 3, in math.atanh(1) ~~~~~~~~~~~~~ ValueError: expected a number between -1 and 1, got 1.0 """ ================================================ FILE: crates/monty/test_cases/math__cos_inf_error.py ================================================ import math math.cos(math.inf) """ TRACEBACK: Traceback (most recent call last): File "math__cos_inf_error.py", line 3, in math.cos(math.inf) ~~~~~~~~~~~~~~~~~~ ValueError: expected a finite input, got inf """ ================================================ FILE: crates/monty/test_cases/math__cosh_overflow_error.py ================================================ import math math.cosh(1000) """ TRACEBACK: Traceback (most recent call last): File "math__cosh_overflow_error.py", line 3, in math.cosh(1000) ~~~~~~~~~~~~~~~ OverflowError: math range error """ ================================================ FILE: crates/monty/test_cases/math__exp_overflow_error.py ================================================ import math math.exp(1000) """ TRACEBACK: Traceback (most recent call last): File "math__exp_overflow_error.py", line 3, in math.exp(1000) ~~~~~~~~~~~~~~ OverflowError: math range error """ ================================================ FILE: crates/monty/test_cases/math__factorial_float_error.py ================================================ import math math.factorial(1.5) """ TRACEBACK: Traceback (most recent call last): File "math__factorial_float_error.py", line 3, in math.factorial(1.5) ~~~~~~~~~~~~~~~~~~~ TypeError: 'float' object cannot be interpreted as an integer """ ================================================ FILE: crates/monty/test_cases/math__factorial_negative_error.py ================================================ import math math.factorial(-1) """ TRACEBACK: Traceback (most recent call last): File "math__factorial_negative_error.py", line 3, in math.factorial(-1) ~~~~~~~~~~~~~~~~~~ ValueError: factorial() not defined for negative values """ ================================================ FILE: crates/monty/test_cases/math__floor_inf_error.py ================================================ import math math.floor(float('inf')) """ TRACEBACK: Traceback (most recent call last): File "math__floor_inf_error.py", line 3, in math.floor(float('inf')) ~~~~~~~~~~~~~~~~~~~~~~~~ OverflowError: cannot convert float infinity to integer """ ================================================ FILE: crates/monty/test_cases/math__floor_nan_error.py ================================================ import math math.floor(float('nan')) """ TRACEBACK: Traceback (most recent call last): File "math__floor_nan_error.py", line 3, in math.floor(float('nan')) ~~~~~~~~~~~~~~~~~~~~~~~~ ValueError: cannot convert float NaN to integer """ ================================================ FILE: crates/monty/test_cases/math__floor_str_error.py ================================================ import math math.floor('x') """ TRACEBACK: Traceback (most recent call last): File "math__floor_str_error.py", line 3, in math.floor('x') ~~~~~~~~~~~~~~~ TypeError: must be real number, not str """ ================================================ FILE: crates/monty/test_cases/math__fmod_inf_error.py ================================================ import math math.fmod(math.inf, 3) """ TRACEBACK: Traceback (most recent call last): File "math__fmod_inf_error.py", line 3, in math.fmod(math.inf, 3) ~~~~~~~~~~~~~~~~~~~~~~ ValueError: math domain error """ ================================================ FILE: crates/monty/test_cases/math__gamma_neg_int_error.py ================================================ import math math.gamma(0) """ TRACEBACK: Traceback (most recent call last): File "math__gamma_neg_int_error.py", line 3, in math.gamma(0) ~~~~~~~~~~~~~ ValueError: expected a noninteger or positive integer, got 0.0 """ ================================================ FILE: crates/monty/test_cases/math__gcd_float_error.py ================================================ import math math.gcd(1.5, 2) """ TRACEBACK: Traceback (most recent call last): File "math__gcd_float_error.py", line 3, in math.gcd(1.5, 2) ~~~~~~~~~~~~~~~~ TypeError: 'float' object cannot be interpreted as an integer """ ================================================ FILE: crates/monty/test_cases/math__isqrt_negative_error.py ================================================ import math math.isqrt(-1) """ TRACEBACK: Traceback (most recent call last): File "math__isqrt_negative_error.py", line 3, in math.isqrt(-1) ~~~~~~~~~~~~~~ ValueError: isqrt() argument must be nonnegative """ ================================================ FILE: crates/monty/test_cases/math__ldexp_overflow_error.py ================================================ import math math.ldexp(1.0, 1075) """ TRACEBACK: Traceback (most recent call last): File "math__ldexp_overflow_error.py", line 3, in math.ldexp(1.0, 1075) ~~~~~~~~~~~~~~~~~~~~~ OverflowError: math range error """ ================================================ FILE: crates/monty/test_cases/math__log1p_domain_error.py ================================================ import math math.log1p(-2) """ TRACEBACK: Traceback (most recent call last): File "math__log1p_domain_error.py", line 3, in math.log1p(-2) ~~~~~~~~~~~~~~ ValueError: expected argument value > -1, got -2.0 """ ================================================ FILE: crates/monty/test_cases/math__log_base1_error.py ================================================ import math math.log(10, 1) """ TRACEBACK: Traceback (most recent call last): File "math__log_base1_error.py", line 3, in math.log(10, 1) ~~~~~~~~~~~~~~~ ZeroDivisionError: division by zero """ ================================================ FILE: crates/monty/test_cases/math__log_zero_error.py ================================================ import math math.log(0) """ TRACEBACK: Traceback (most recent call last): File "math__log_zero_error.py", line 3, in math.log(0) ~~~~~~~~~~~ ValueError: expected a positive input """ ================================================ FILE: crates/monty/test_cases/math__module.py ================================================ import math # === Constants === assert math.pi == 3.141592653589793, 'math.pi value' assert math.e == 2.718281828459045, 'math.e value' assert math.tau == 6.283185307179586, 'math.tau value' assert math.inf == float('inf'), 'math.inf is infinity' assert math.nan != math.nan, 'math.nan is NaN (not equal to itself)' assert math.isinf(math.inf), 'math.inf is recognized by isinf' assert math.isnan(math.nan), 'math.nan is recognized by isnan' # === math.floor() === assert math.floor(2.3) == 2, 'floor(2.3)' assert math.floor(-2.3) == -3, 'floor(-2.3)' assert math.floor(2.0) == 2, 'floor(2.0)' assert math.floor(5) == 5, 'floor(int)' assert math.floor(True) == 1, 'floor(True)' assert math.floor(False) == 0, 'floor(False)' assert math.floor(-0.5) == -1, 'floor(-0.5)' assert math.floor(0.9) == 0, 'floor(0.9)' assert math.floor(1e18) == 1000000000000000000, 'floor(1e18)' threw = False try: math.floor(float('inf')) except OverflowError: threw = True assert threw, 'floor(inf) raises OverflowError' threw = False try: math.floor(float('nan')) except ValueError: threw = True assert threw, 'floor(nan) raises ValueError' threw = False try: math.floor('x') except TypeError: threw = True assert threw, 'floor(str) raises TypeError' # === math.ceil() === assert math.ceil(2.3) == 3, 'ceil(2.3)' assert math.ceil(-2.3) == -2, 'ceil(-2.3)' assert math.ceil(2.0) == 2, 'ceil(2.0)' assert math.ceil(5) == 5, 'ceil(int)' assert math.ceil(True) == 1, 'ceil(True)' assert math.ceil(False) == 0, 'ceil(False)' assert math.ceil(0.1) == 1, 'ceil(0.1)' assert math.ceil(-0.1) == 0, 'ceil(-0.1)' threw = False try: math.ceil(float('inf')) except OverflowError: threw = True assert threw, 'ceil(inf) raises OverflowError' threw = False try: math.ceil(float('nan')) except ValueError: threw = True assert threw, 'ceil(nan) raises ValueError' threw = False try: math.ceil('x') except TypeError: threw = True assert threw, 'ceil(str) raises TypeError' # === math.trunc() === assert math.trunc(2.7) == 2, 'trunc(2.7)' assert math.trunc(-2.7) == -2, 'trunc(-2.7)' assert math.trunc(2.0) == 2, 'trunc(2.0)' assert math.trunc(5) == 5, 'trunc(int)' assert math.trunc(True) == 1, 'trunc(True)' assert math.trunc(False) == 0, 'trunc(False)' threw = False try: math.trunc(float('inf')) except OverflowError: threw = True assert threw, 'trunc(inf) raises OverflowError' threw = False try: math.trunc(float('nan')) except ValueError: threw = True assert threw, 'trunc(nan) raises ValueError' threw = False try: math.trunc('x') except TypeError: threw = True assert threw, 'trunc(str) raises TypeError' # === math.sqrt() === assert math.sqrt(4) == 2.0, 'sqrt(4)' assert math.sqrt(2) == 1.4142135623730951, 'sqrt(2)' assert math.sqrt(0) == 0.0, 'sqrt(0)' assert math.sqrt(1) == 1.0, 'sqrt(1)' assert math.sqrt(0.25) == 0.5, 'sqrt(0.25)' assert isinstance(math.sqrt(4), float), 'sqrt returns float' assert math.sqrt(True) == 1.0, 'sqrt(True)' assert math.sqrt(False) == 0.0, 'sqrt(False)' assert math.sqrt(float('inf')) == float('inf'), 'sqrt(inf) returns inf' assert math.isnan(math.sqrt(float('nan'))), 'sqrt(nan) returns nan' threw = False try: math.sqrt(-1) except ValueError: threw = True assert threw, 'sqrt(-1) raises ValueError' threw = False try: math.sqrt('x') except TypeError: threw = True assert threw, 'sqrt(str) raises TypeError' # === math.isqrt() === assert math.isqrt(0) == 0, 'isqrt(0)' assert math.isqrt(1) == 1, 'isqrt(1)' assert math.isqrt(4) == 2, 'isqrt(4)' assert math.isqrt(10) == 3, 'isqrt(10)' assert math.isqrt(99) == 9, 'isqrt(99)' assert math.isqrt(100) == 10, 'isqrt(100)' assert math.isqrt(True) == 1, 'isqrt(True)' threw = False try: math.isqrt(-1) except ValueError: threw = True assert threw, 'isqrt(-1) raises ValueError' threw = False try: math.isqrt(4.0) except TypeError: threw = True assert threw, 'isqrt(float) raises TypeError' # === math.cbrt() === assert math.cbrt(0) == 0.0, 'cbrt(0)' assert math.cbrt(8) == 2.0, 'cbrt(8)' assert math.cbrt(-8) == -2.0, 'cbrt(-8)' assert math.cbrt(1) == 1.0, 'cbrt(1)' assert math.cbrt(64) == 4.0, 'cbrt(64)' assert math.cbrt(float('inf')) == float('inf'), 'cbrt(inf)' assert math.cbrt(float('-inf')) == float('-inf'), 'cbrt(-inf)' assert math.isnan(math.cbrt(float('nan'))), 'cbrt(nan) is nan' threw = False try: math.cbrt('x') except TypeError: threw = True assert threw, 'cbrt(str) raises TypeError' # === math.pow() === assert math.pow(2, 3) == 8.0, 'pow(2, 3)' assert math.pow(2.0, 0.5) == math.sqrt(2), 'pow(2, 0.5)' assert math.pow(0, 0) == 1.0, 'pow(0, 0)' assert isinstance(math.pow(2, 3), float), 'pow returns float' assert math.pow(2, -1) == 0.5, 'pow(2, -1)' assert math.pow(float('inf'), 0) == 1.0, 'pow(inf, 0)' assert math.pow(float('nan'), 0) == 1.0, 'pow(nan, 0)' assert math.pow(1, float('inf')) == 1.0, 'pow(1, inf)' assert math.pow(1, float('nan')) == 1.0, 'pow(1, nan)' threw = False try: math.pow(0, -1) except ValueError: threw = True assert threw, 'pow(0, -1) raises ValueError' threw = False try: math.pow(-1, 0.5) except ValueError: threw = True assert threw, 'pow(-1, 0.5) raises ValueError' threw = False try: math.pow(2, 1024) except OverflowError: threw = True assert threw, 'pow(2, 1024) raises OverflowError' threw = False try: math.pow('x', 2) except TypeError: threw = True assert threw, 'pow(str, int) raises TypeError' # === math.exp() === assert math.exp(0) == 1.0, 'exp(0)' assert math.exp(1) == math.e, 'exp(1)' assert math.exp(float('-inf')) == 0.0, 'exp(-inf)' assert math.exp(float('inf')) == float('inf'), 'exp(inf)' assert math.isnan(math.exp(float('nan'))), 'exp(nan) is nan' threw = False try: math.exp(1000) except OverflowError: threw = True assert threw, 'exp(1000) raises OverflowError' threw = False try: math.exp('x') except TypeError: threw = True assert threw, 'exp(str) raises TypeError' # === math.exp2() === assert math.exp2(0) == 1.0, 'exp2(0)' assert math.exp2(3) == 8.0, 'exp2(3)' assert math.exp2(10) == 1024.0, 'exp2(10)' assert math.exp2(float('-inf')) == 0.0, 'exp2(-inf)' assert math.exp2(float('inf')) == float('inf'), 'exp2(inf)' assert math.isnan(math.exp2(float('nan'))), 'exp2(nan) is nan' threw = False try: math.exp2(1024) except OverflowError: threw = True assert threw, 'exp2(1024) raises OverflowError' threw = False try: math.exp2('x') except TypeError: threw = True assert threw, 'exp2(str) raises TypeError' # === math.expm1() === assert math.expm1(0) == 0.0, 'expm1(0)' assert math.isclose(math.expm1(1), math.e - 1), 'expm1(1)' assert math.expm1(1e-15) != 0.0, 'expm1(1e-15) is precise' assert math.expm1(float('-inf')) == -1.0, 'expm1(-inf)' assert math.expm1(float('inf')) == float('inf'), 'expm1(inf)' assert math.isnan(math.expm1(float('nan'))), 'expm1(nan) is nan' threw = False try: math.expm1(1000) except OverflowError: threw = True assert threw, 'expm1(1000) raises OverflowError' threw = False try: math.expm1('x') except TypeError: threw = True assert threw, 'expm1(str) raises TypeError' # === math.fabs() === assert math.fabs(-5) == 5.0, 'fabs(-5)' assert math.fabs(5) == 5.0, 'fabs(5)' assert math.fabs(-3.14) == 3.14, 'fabs(-3.14)' assert math.fabs(0) == 0.0, 'fabs(0)' assert isinstance(math.fabs(-5), float), 'fabs returns float' assert isinstance(math.fabs(0), float), 'fabs(0) returns float' assert math.fabs(True) == 1.0, 'fabs(True)' assert math.fabs(False) == 0.0, 'fabs(False)' assert math.fabs(float('inf')) == float('inf'), 'fabs(inf)' assert math.fabs(float('-inf')) == float('inf'), 'fabs(-inf)' assert math.isnan(math.fabs(float('nan'))), 'fabs(nan) returns nan' threw = False try: math.fabs('x') except TypeError: threw = True assert threw, 'fabs(str) raises TypeError' # === math.isnan() === assert math.isnan(float('nan')) == True, 'isnan(nan)' assert math.isnan(1.0) == False, 'isnan(1.0)' assert math.isnan(0.0) == False, 'isnan(0.0)' assert math.isnan(float('inf')) == False, 'isnan(inf)' assert math.isnan(0) == False, 'isnan(int)' assert math.isnan(True) == False, 'isnan(True)' assert math.isnan(False) == False, 'isnan(False)' threw = False try: math.isnan('x') except TypeError: threw = True assert threw, 'isnan(str) raises TypeError' # === math.isinf() === assert math.isinf(float('inf')) == True, 'isinf(inf)' assert math.isinf(float('-inf')) == True, 'isinf(-inf)' assert math.isinf(1.0) == False, 'isinf(1.0)' assert math.isinf(float('nan')) == False, 'isinf(nan)' assert math.isinf(0) == False, 'isinf(int)' assert math.isinf(True) == False, 'isinf(True)' assert math.isinf(False) == False, 'isinf(False)' threw = False try: math.isinf('x') except TypeError: threw = True assert threw, 'isinf(str) raises TypeError' # === math.isfinite() === assert math.isfinite(1.0) == True, 'isfinite(1.0)' assert math.isfinite(0) == True, 'isfinite(0)' assert math.isfinite(float('inf')) == False, 'isfinite(inf)' assert math.isfinite(float('-inf')) == False, 'isfinite(-inf)' assert math.isfinite(float('nan')) == False, 'isfinite(nan)' assert math.isfinite(True) == True, 'isfinite(True)' assert math.isfinite(False) == True, 'isfinite(False)' threw = False try: math.isfinite('x') except TypeError: threw = True assert threw, 'isfinite(str) raises TypeError' # === math.copysign() === assert math.copysign(1.0, -0.0) == -1.0, 'copysign(1.0, -0.0)' assert math.copysign(-1.0, 1.0) == 1.0, 'copysign(-1.0, 1.0)' assert math.copysign(5, -3) == -5.0, 'copysign(5, -3)' assert isinstance(math.copysign(5, -3), float), 'copysign returns float' assert math.copysign(float('inf'), -1.0) == float('-inf'), 'copysign(inf, -1.0)' assert math.copysign(0.0, -1.0) == -0.0, 'copysign(0.0, -1.0)' assert math.isnan(math.copysign(float('nan'), -1.0)), 'copysign(nan, -1.0) is nan' assert math.copysign(True, -1) == -1.0, 'copysign(True, -1)' threw = False try: math.copysign('x', 1) except TypeError: threw = True assert threw, 'copysign(str, int) raises TypeError' # === math.isclose() === assert math.isclose(1.0, 1.0) == True, 'isclose equal' assert math.isclose(1.0, 1.0000000001) == True, 'isclose very close' assert math.isclose(1.0, 1.1) == False, 'isclose not close' assert math.isclose(0.0, 0.0) == True, 'isclose zeros' assert math.isclose(-0.0, 0.0) == True, 'isclose neg zero and zero' assert math.isclose(float('inf'), float('inf')) == True, 'isclose(inf, inf)' assert math.isclose(float('inf'), 1e308) == False, 'isclose(inf, large) is False' assert math.isclose(float('nan'), float('nan')) == False, 'isclose(nan, nan) is False' assert math.isclose(1e-15, 0.0) == False, 'isclose(1e-15, 0.0) is False with default abs_tol' assert math.isclose(0.0, 1e-15) == False, 'isclose(0.0, 1e-15) is False with default abs_tol' threw = False try: math.isclose('x', 1) except TypeError: threw = True assert threw, 'isclose(str, int) raises TypeError' # === math.log() === assert math.log(1) == 0.0, 'log(1)' assert math.log(math.e) == 1.0, 'log(e)' assert math.log(100, 10) == 2.0, 'log(100, 10)' assert math.log(1, 10) == 0.0, 'log(1, 10)' assert math.log(True) == 0.0, 'log(True)' assert math.log(float('inf')) == float('inf'), 'log(inf) returns inf' assert math.isnan(math.log(float('nan'))), 'log(nan) returns nan' assert math.isnan(math.log(float('nan'), 2)), 'log(nan, 2) returns nan' assert math.log(float('inf'), 2) == float('inf'), 'log(inf, 2) returns inf' threw = False try: math.log(0) except ValueError: threw = True assert threw, 'log(0) raises ValueError' threw = False try: math.log(-1) except ValueError: threw = True assert threw, 'log(-1) raises ValueError' threw = False try: math.log(10, 1) except ZeroDivisionError: threw = True assert threw, 'log(10, 1) raises ZeroDivisionError' threw = False try: math.log(10, 0) except ValueError: threw = True assert threw, 'log(10, 0) raises ValueError' threw = False try: math.log(10, -1) except ValueError: threw = True assert threw, 'log(10, -1) raises ValueError' threw = False try: math.log('x') except TypeError: threw = True assert threw, 'log(str) raises TypeError' # === math.log2() === assert math.log2(1) == 0.0, 'log2(1)' assert math.log2(8) == 3.0, 'log2(8)' assert math.log2(1024) == 10.0, 'log2(1024)' assert math.log2(True) == 0.0, 'log2(True)' assert math.log2(float('inf')) == float('inf'), 'log2(inf) returns inf' assert math.isnan(math.log2(float('nan'))), 'log2(nan) returns nan' threw = False try: math.log2(0) except ValueError: threw = True assert threw, 'log2(0) raises ValueError' threw = False try: math.log2(-1) except ValueError: threw = True assert threw, 'log2(-1) raises ValueError' threw = False try: math.log2('x') except TypeError: threw = True assert threw, 'log2(str) raises TypeError' # === math.log10() === assert math.log10(1) == 0.0, 'log10(1)' assert math.log10(1000) == 3.0, 'log10(1000)' assert math.log10(100) == 2.0, 'log10(100)' assert math.log10(True) == 0.0, 'log10(True)' assert math.log10(float('inf')) == float('inf'), 'log10(inf) returns inf' assert math.isnan(math.log10(float('nan'))), 'log10(nan) returns nan' threw = False try: math.log10(0) except ValueError: threw = True assert threw, 'log10(0) raises ValueError' threw = False try: math.log10(-1) except ValueError: threw = True assert threw, 'log10(-1) raises ValueError' threw = False try: math.log10('x') except TypeError: threw = True assert threw, 'log10(str) raises TypeError' # === math.log1p() === assert math.log1p(0) == 0.0, 'log1p(0)' assert math.isclose(math.log1p(math.e - 1), 1.0), 'log1p(e-1)' assert math.log1p(float('inf')) == float('inf'), 'log1p(inf)' assert math.isnan(math.log1p(float('nan'))), 'log1p(nan) is nan' threw = False try: math.log1p(-1) except ValueError: threw = True assert threw, 'log1p(-1) raises ValueError' threw = False try: math.log1p(-2) except ValueError: threw = True assert threw, 'log1p(-2) raises ValueError' threw = False try: math.log1p('x') except TypeError: threw = True assert threw, 'log1p(str) raises TypeError' # === math.factorial() === assert math.factorial(0) == 1, 'factorial(0)' assert math.factorial(1) == 1, 'factorial(1)' assert math.factorial(5) == 120, 'factorial(5)' assert math.factorial(10) == 3628800, 'factorial(10)' assert math.factorial(20) == 2432902008176640000, 'factorial(20)' assert math.factorial(True) == 1, 'factorial(True)' assert math.factorial(False) == 1, 'factorial(False)' threw = False try: math.factorial(-1) except ValueError: threw = True assert threw, 'factorial(-1) raises ValueError' threw = False try: math.factorial(1.5) except TypeError: threw = True assert threw, 'factorial(1.5) raises TypeError' threw = False try: math.factorial('x') except TypeError: threw = True assert threw, 'factorial(str) raises TypeError' # === math.gcd() === assert math.gcd(12, 8) == 4, 'gcd(12, 8)' assert math.gcd(0, 5) == 5, 'gcd(0, 5)' assert math.gcd(5, 0) == 5, 'gcd(5, 0)' assert math.gcd(0, 0) == 0, 'gcd(0, 0)' assert math.gcd(-12, 8) == 4, 'gcd(-12, 8)' assert math.gcd(12, -8) == 4, 'gcd(12, -8)' assert math.gcd(-12, -8) == 4, 'gcd(-12, -8)' assert math.gcd(7, 13) == 1, 'gcd(7, 13) coprime' assert math.gcd(True, 2) == 1, 'gcd(True, 2)' assert math.gcd(False, 5) == 5, 'gcd(False, 5)' threw = False try: math.gcd(1.5, 2) except TypeError: threw = True assert threw, 'gcd(float, int) raises TypeError' threw = False try: math.gcd(2, 1.5) except TypeError: threw = True assert threw, 'gcd(int, float) raises TypeError' # === math.lcm() === assert math.lcm(4, 6) == 12, 'lcm(4, 6)' assert math.lcm(0, 5) == 0, 'lcm(0, 5)' assert math.lcm(5, 0) == 0, 'lcm(5, 0)' assert math.lcm(0, 0) == 0, 'lcm(0, 0)' assert math.lcm(3, 7) == 21, 'lcm(3, 7) coprime' assert math.lcm(6, 6) == 6, 'lcm(6, 6) equal' assert math.lcm(-4, 6) == 12, 'lcm(-4, 6) negative' assert math.lcm(-4, -6) == 12, 'lcm(-4, -6) both negative' assert math.lcm(True, 2) == 2, 'lcm(True, 2)' assert math.lcm(False, 5) == 0, 'lcm(False, 5)' threw = False try: math.lcm(1.5, 2) except TypeError: threw = True assert threw, 'lcm(float, int) raises TypeError' threw = False try: math.lcm(2, 1.5) except TypeError: threw = True assert threw, 'lcm(int, float) raises TypeError' # === math.comb() === assert math.comb(5, 2) == 10, 'comb(5, 2)' assert math.comb(10, 0) == 1, 'comb(10, 0)' assert math.comb(10, 10) == 1, 'comb(10, 10)' assert math.comb(0, 0) == 1, 'comb(0, 0)' assert math.comb(5, 6) == 0, 'comb(5, 6) k > n' threw = False try: math.comb(5, -1) except ValueError: threw = True assert threw, 'comb(5, -1) raises ValueError' threw = False try: math.comb(-1, 2) except ValueError: threw = True assert threw, 'comb(-1, 2) raises ValueError' threw = False try: math.comb(5.0, 2) except TypeError: threw = True assert threw, 'comb(float, int) raises TypeError' # === math.perm() === assert math.perm(5, 2) == 20, 'perm(5, 2)' assert math.perm(5, 0) == 1, 'perm(5, 0)' assert math.perm(5, 5) == 120, 'perm(5, 5)' assert math.perm(5, 6) == 0, 'perm(5, 6) k > n' threw = False try: math.perm(5, -1) except ValueError: threw = True assert threw, 'perm(5, -1) raises ValueError' threw = False try: math.perm(-1, 2) except ValueError: threw = True assert threw, 'perm(-1, 2) raises ValueError' threw = False try: math.perm(5.0, 2) except TypeError: threw = True assert threw, 'perm(float, int) raises TypeError' # === math.copysign() (already above) === # === math.isclose() (already above) === # === math.degrees() === assert math.degrees(0) == 0.0, 'degrees(0)' assert math.degrees(math.pi) == 180.0, 'degrees(pi)' assert math.degrees(math.tau) == 360.0, 'degrees(tau)' assert math.degrees(True) == math.degrees(1), 'degrees(True)' assert math.degrees(float('inf')) == float('inf'), 'degrees(inf)' assert math.degrees(float('-inf')) == float('-inf'), 'degrees(-inf)' assert math.isnan(math.degrees(float('nan'))), 'degrees(nan) is nan' threw = False try: math.degrees('x') except TypeError: threw = True assert threw, 'degrees(str) raises TypeError' # === math.radians() === assert math.radians(0) == 0.0, 'radians(0)' assert math.radians(180) == math.pi, 'radians(180)' assert math.radians(360) == math.tau, 'radians(360)' assert math.radians(True) == math.radians(1), 'radians(True)' assert math.radians(float('inf')) == float('inf'), 'radians(inf)' assert math.radians(float('-inf')) == float('-inf'), 'radians(-inf)' assert math.isnan(math.radians(float('nan'))), 'radians(nan) is nan' threw = False try: math.radians('x') except TypeError: threw = True assert threw, 'radians(str) raises TypeError' # === math.sin() === assert math.sin(0) == 0.0, 'sin(0)' assert math.sin(math.pi / 2) == 1.0, 'sin(pi/2)' assert math.sin(math.pi) < 1e-15, 'sin(pi) near zero' assert math.isnan(math.sin(float('nan'))), 'sin(nan) is nan' threw = False try: math.sin(float('inf')) except ValueError: threw = True assert threw, 'sin(inf) raises ValueError' threw = False try: math.sin(float('-inf')) except ValueError: threw = True assert threw, 'sin(-inf) raises ValueError' threw = False try: math.sin('x') except TypeError: threw = True assert threw, 'sin(str) raises TypeError' # === math.cos() === assert math.cos(0) == 1.0, 'cos(0)' assert abs(math.cos(math.pi / 2)) < 1e-15, 'cos(pi/2) near zero' assert math.cos(math.pi) == -1.0, 'cos(pi)' assert math.isnan(math.cos(float('nan'))), 'cos(nan) is nan' threw = False try: math.cos(float('inf')) except ValueError: threw = True assert threw, 'cos(inf) raises ValueError' threw = False try: math.cos(float('-inf')) except ValueError: threw = True assert threw, 'cos(-inf) raises ValueError' threw = False try: math.cos('x') except TypeError: threw = True assert threw, 'cos(str) raises TypeError' # === math.tan() === assert math.tan(0) == 0.0, 'tan(0)' assert abs(math.tan(math.pi / 4) - 1.0) < 1e-15, 'tan(pi/4) near 1' assert math.isnan(math.tan(float('nan'))), 'tan(nan) is nan' threw = False try: math.tan(float('inf')) except ValueError: threw = True assert threw, 'tan(inf) raises ValueError' threw = False try: math.tan(float('-inf')) except ValueError: threw = True assert threw, 'tan(-inf) raises ValueError' threw = False try: math.tan('x') except TypeError: threw = True assert threw, 'tan(str) raises TypeError' # === math.asin() === assert math.asin(0) == 0.0, 'asin(0)' assert math.asin(1) == math.pi / 2, 'asin(1)' assert math.asin(-1) == -math.pi / 2, 'asin(-1)' assert math.isnan(math.asin(float('nan'))), 'asin(nan) is nan' threw = False try: math.asin(2) except ValueError: threw = True assert threw, 'asin(2) raises ValueError' threw = False try: math.asin(-2) except ValueError: threw = True assert threw, 'asin(-2) raises ValueError' threw = False try: math.asin('x') except TypeError: threw = True assert threw, 'asin(str) raises TypeError' # === math.acos() === assert math.acos(1) == 0.0, 'acos(1)' assert math.acos(0) == math.pi / 2, 'acos(0)' assert math.acos(-1) == math.pi, 'acos(-1)' assert math.isnan(math.acos(float('nan'))), 'acos(nan) is nan' threw = False try: math.acos(2) except ValueError: threw = True assert threw, 'acos(2) raises ValueError' threw = False try: math.acos(-2) except ValueError: threw = True assert threw, 'acos(-2) raises ValueError' threw = False try: math.acos('x') except TypeError: threw = True assert threw, 'acos(str) raises TypeError' # === math.atan() === assert math.atan(0) == 0.0, 'atan(0)' assert math.atan(1) == math.pi / 4, 'atan(1)' assert math.atan(float('inf')) == math.pi / 2, 'atan(inf)' assert math.atan(float('-inf')) == -math.pi / 2, 'atan(-inf)' assert math.isnan(math.atan(float('nan'))), 'atan(nan) is nan' threw = False try: math.atan('x') except TypeError: threw = True assert threw, 'atan(str) raises TypeError' # === math.atan2() === assert math.atan2(0, 1) == 0.0, 'atan2(0, 1)' assert math.atan2(1, 0) == math.pi / 2, 'atan2(1, 0)' assert math.atan2(0, -1) == math.pi, 'atan2(0, -1)' assert math.atan2(0, 0) == 0.0, 'atan2(0, 0)' assert math.atan2(-1, 0) == -math.pi / 2, 'atan2(-1, 0)' assert math.isclose(math.atan2(float('inf'), float('inf')), math.pi / 4), 'atan2(inf, inf)' assert math.isnan(math.atan2(float('nan'), 1)), 'atan2(nan, 1) is nan' assert math.isnan(math.atan2(1, float('nan'))), 'atan2(1, nan) is nan' threw = False try: math.atan2('x', 1) except TypeError: threw = True assert threw, 'atan2(str, int) raises TypeError' # === math.sinh() === assert math.sinh(0) == 0.0, 'sinh(0)' assert math.isclose(math.sinh(1), 1.1752011936438014), 'sinh(1)' assert math.sinh(float('inf')) == float('inf'), 'sinh(inf)' assert math.sinh(float('-inf')) == float('-inf'), 'sinh(-inf)' assert math.isnan(math.sinh(float('nan'))), 'sinh(nan) is nan' threw = False try: math.sinh(1000) except OverflowError: threw = True assert threw, 'sinh(1000) raises OverflowError' threw = False try: math.sinh('x') except TypeError: threw = True assert threw, 'sinh(str) raises TypeError' # === math.cosh() === assert math.cosh(0) == 1.0, 'cosh(0)' assert math.isclose(math.cosh(1), 1.5430806348152437), 'cosh(1)' assert math.cosh(float('inf')) == float('inf'), 'cosh(inf)' assert math.cosh(float('-inf')) == float('inf'), 'cosh(-inf)' assert math.isnan(math.cosh(float('nan'))), 'cosh(nan) is nan' threw = False try: math.cosh(1000) except OverflowError: threw = True assert threw, 'cosh(1000) raises OverflowError' threw = False try: math.cosh('x') except TypeError: threw = True assert threw, 'cosh(str) raises TypeError' # === math.tanh() === assert math.tanh(0) == 0.0, 'tanh(0)' assert math.tanh(float('inf')) == 1.0, 'tanh(inf)' assert math.tanh(float('-inf')) == -1.0, 'tanh(-inf)' assert math.tanh(1) == 0.7615941559557649, 'tanh(1)' assert math.isnan(math.tanh(float('nan'))), 'tanh(nan) is nan' threw = False try: math.tanh('x') except TypeError: threw = True assert threw, 'tanh(str) raises TypeError' # === math.asinh() === assert math.asinh(0) == 0.0, 'asinh(0)' assert math.isclose(math.asinh(1), 0.881373587019543), 'asinh(1)' assert math.asinh(float('inf')) == float('inf'), 'asinh(inf)' assert math.asinh(float('-inf')) == float('-inf'), 'asinh(-inf)' assert math.isnan(math.asinh(float('nan'))), 'asinh(nan) is nan' threw = False try: math.asinh('x') except TypeError: threw = True assert threw, 'asinh(str) raises TypeError' # === math.acosh() === assert math.acosh(1) == 0.0, 'acosh(1)' assert math.isclose(math.acosh(2), 1.3169578969248166), 'acosh(2)' assert math.acosh(float('inf')) == float('inf'), 'acosh(inf)' assert math.isnan(math.acosh(float('nan'))), 'acosh(nan) is nan' threw = False try: math.acosh(0.5) except ValueError: threw = True assert threw, 'acosh(0.5) raises ValueError' threw = False try: math.acosh('x') except TypeError: threw = True assert threw, 'acosh(str) raises TypeError' # === math.atanh() === assert math.atanh(0) == 0.0, 'atanh(0)' assert math.isclose(math.atanh(0.5), 0.5493061443340549), 'atanh(0.5)' assert math.isnan(math.atanh(float('nan'))), 'atanh(nan) is nan' threw = False try: math.atanh(1) except ValueError: threw = True assert threw, 'atanh(1) raises ValueError' threw = False try: math.atanh(-1) except ValueError: threw = True assert threw, 'atanh(-1) raises ValueError' threw = False try: math.atanh('x') except TypeError: threw = True assert threw, 'atanh(str) raises TypeError' # === math.fmod() === assert math.fmod(10, 3) == 1.0, 'fmod(10, 3)' assert math.fmod(-10, 3) == -1.0, 'fmod(-10, 3)' assert math.fmod(10.5, 3) == 1.5, 'fmod(10.5, 3)' assert math.fmod(3, float('inf')) == 3.0, 'fmod(3, inf)' assert math.isnan(math.fmod(float('nan'), 3)), 'fmod(nan, 3) is nan' assert math.isnan(math.fmod(3, float('nan'))), 'fmod(3, nan) is nan' assert math.isnan(math.fmod(float('nan'), float('nan'))), 'fmod(nan, nan) is nan' threw = False try: math.fmod(10, 0) except ValueError: threw = True assert threw, 'fmod(10, 0) raises ValueError' threw = False try: math.fmod(float('inf'), 3) except ValueError: threw = True assert threw, 'fmod(inf, 3) raises ValueError' threw = False try: math.fmod('x', 3) except TypeError: threw = True assert threw, 'fmod(str, int) raises TypeError' # === math.remainder() === assert math.remainder(10, 3) == 1.0, 'remainder(10, 3)' assert math.remainder(10, 4) == 2.0, 'remainder(10, 4)' assert math.remainder(-10, 3) == -1.0, 'remainder(-10, 3)' assert math.remainder(10.5, 3) == -1.5, 'remainder(10.5, 3)' assert math.remainder(3, float('inf')) == 3.0, 'remainder(3, inf)' assert math.isnan(math.remainder(float('nan'), 3)), 'remainder(nan, 3) is nan' assert math.isnan(math.remainder(3, float('nan'))), 'remainder(3, nan) is nan' threw = False try: math.remainder(10, 0) except ValueError: threw = True assert threw, 'remainder(10, 0) raises ValueError' threw = False try: math.remainder(float('inf'), 3) except ValueError: threw = True assert threw, 'remainder(inf, 3) raises ValueError' threw = False try: math.remainder('x', 3) except TypeError: threw = True assert threw, 'remainder(str, int) raises TypeError' # === math.modf() === r = math.modf(3.5) assert r == (0.5, 3.0), 'modf(3.5)' r = math.modf(-3.5) assert r == (-0.5, -3.0), 'modf(-3.5)' r = math.modf(0.0) assert r == (0.0, 0.0), 'modf(0.0)' r = math.modf(float('inf')) assert r == (0.0, float('inf')), 'modf(inf)' r = math.modf(float('-inf')) # modf(-inf) returns (-0.0, -inf), verify both parts including sign of fractional assert str(r[0]) == '-0.0', 'modf(-inf) fractional part is -0.0' assert r[1] == float('-inf'), 'modf(-inf) integer part is -inf' r_nan = math.modf(float('nan')) assert math.isnan(r_nan[0]) and math.isnan(r_nan[1]), 'modf(nan) both parts are nan' threw = False try: math.modf('x') except TypeError: threw = True assert threw, 'modf(str) raises TypeError' # === math.frexp() === r = math.frexp(0.0) assert r == (0.0, 0), 'frexp(0.0)' r = math.frexp(3.5) assert r == (0.875, 2), 'frexp(3.5)' r = math.frexp(1.0) assert r == (0.5, 1), 'frexp(1.0)' r = math.frexp(-1.0) assert r == (-0.5, 1), 'frexp(-1.0)' r = math.frexp(float('inf')) assert r == (float('inf'), 0), 'frexp(inf)' r = math.frexp(float('-inf')) assert r == (float('-inf'), 0), 'frexp(-inf)' r_nan = math.frexp(float('nan')) assert math.isnan(r_nan[0]) and r_nan[1] == 0, 'frexp(nan)' threw = False try: math.frexp('x') except TypeError: threw = True assert threw, 'frexp(str) raises TypeError' # === math.ldexp() === assert math.ldexp(0.875, 2) == 3.5, 'ldexp(0.875, 2)' assert math.ldexp(1.0, 0) == 1.0, 'ldexp(1.0, 0)' assert math.ldexp(0.5, 1) == 1.0, 'ldexp(0.5, 1)' assert math.ldexp(1.0, -1075) == 0.0, 'ldexp(1.0, -1075) underflows to 0' assert math.ldexp(float('inf'), 1) == float('inf'), 'ldexp(inf, 1)' assert math.isnan(math.ldexp(float('nan'), 1)), 'ldexp(nan, 1) is nan' assert math.ldexp(0.0, 1000) == 0.0, 'ldexp(0.0, 1000)' threw = False try: math.ldexp(1.0, 1075) except OverflowError: threw = True assert threw, 'ldexp(1.0, 1075) raises OverflowError' threw = False try: math.ldexp(0.5, 1025) except OverflowError: threw = True assert threw, 'ldexp(0.5, 1025) raises OverflowError' threw = False try: math.ldexp('x', 1) except TypeError: threw = True assert threw, 'ldexp(str, int) raises TypeError' # === math.gamma() === assert math.gamma(1) == 1.0, 'gamma(1)' assert math.gamma(5) == 24.0, 'gamma(5)' assert math.isclose(math.gamma(0.5), math.sqrt(math.pi)), 'gamma(0.5)' assert math.gamma(float('inf')) == float('inf'), 'gamma(inf)' assert math.isnan(math.gamma(float('nan'))), 'gamma(nan) is nan' threw = False try: math.gamma(0) except ValueError: threw = True assert threw, 'gamma(0) raises ValueError' threw = False try: math.gamma(-1) except ValueError: threw = True assert threw, 'gamma(-1) raises ValueError' threw = False try: math.gamma(float('-inf')) except ValueError: threw = True assert threw, 'gamma(-inf) raises ValueError' threw = False try: math.gamma(172) except OverflowError: threw = True assert threw, 'gamma(172) raises OverflowError' threw = False try: math.gamma('x') except TypeError: threw = True assert threw, 'gamma(str) raises TypeError' # === math.lgamma() === assert math.lgamma(1) == 0.0, 'lgamma(1)' assert math.isclose(math.lgamma(5), math.log(24)), 'lgamma(5)' assert math.lgamma(float('inf')) == float('inf'), 'lgamma(inf)' assert math.isnan(math.lgamma(float('nan'))), 'lgamma(nan) is nan' assert math.isclose(math.lgamma(-0.5), 1.265512123484645), 'lgamma(-0.5)' threw = False try: math.lgamma(0) except ValueError: threw = True assert threw, 'lgamma(0) raises ValueError' threw = False try: math.lgamma(-2) except ValueError: threw = True assert threw, 'lgamma(-2) raises ValueError' threw = False try: math.lgamma('x') except TypeError: threw = True assert threw, 'lgamma(str) raises TypeError' # === math.erf() === assert math.erf(0) == 0.0, 'erf(0)' assert math.isclose(math.erf(1), 0.8427007929497148, rel_tol=1e-15), 'erf(1)' assert math.isclose(math.erf(-1), -0.8427007929497148, rel_tol=1e-15), 'erf(-1)' assert math.erf(float('inf')) == 1.0, 'erf(inf)' assert math.erf(float('-inf')) == -1.0, 'erf(-inf)' assert math.isnan(math.erf(float('nan'))), 'erf(nan) is nan' threw = False try: math.erf('x') except TypeError: threw = True assert threw, 'erf(str) raises TypeError' # === math.erfc() === assert math.erfc(0) == 1.0, 'erfc(0)' assert math.isclose(math.erfc(1), 1.0 - math.erf(1)), 'erfc(1)' assert math.erfc(float('inf')) == 0.0, 'erfc(inf)' assert math.erfc(float('-inf')) == 2.0, 'erfc(-inf)' assert math.isnan(math.erfc(float('nan'))), 'erfc(nan) is nan' threw = False try: math.erfc('x') except TypeError: threw = True assert threw, 'erfc(str) raises TypeError' # === math.nextafter() === r = math.nextafter(1.0, 2.0) assert r > 1.0, 'nextafter(1.0, 2.0) > 1.0' assert r == 1.0000000000000002, 'nextafter(1.0, 2.0) value' r = math.nextafter(1.0, 0.0) assert r < 1.0, 'nextafter(1.0, 0.0) < 1.0' assert math.nextafter(0.0, 1.0) == 5e-324, 'nextafter(0.0, 1.0) smallest positive' assert math.nextafter(0.0, -1.0) == -5e-324, 'nextafter(0.0, -1.0) smallest negative' assert math.isnan(math.nextafter(float('nan'), 1.0)), 'nextafter(nan, 1.0) is nan' assert math.isnan(math.nextafter(1.0, float('nan'))), 'nextafter(1.0, nan) is nan' assert math.nextafter(float('inf'), float('inf')) == float('inf'), 'nextafter(inf, inf)' assert math.nextafter(1.0, 1.0) == 1.0, 'nextafter(1.0, 1.0) equal inputs' threw = False try: math.nextafter('x', 1.0) except TypeError: threw = True assert threw, 'nextafter(str, float) raises TypeError' # === math.ulp() === assert math.ulp(1.0) == 2.220446049250313e-16, 'ulp(1.0)' assert math.ulp(-1.0) == 2.220446049250313e-16, 'ulp(-1.0) same as ulp(1.0)' assert math.ulp(0.0) == 5e-324, 'ulp(0.0) is smallest subnormal' assert math.isinf(math.ulp(float('inf'))), 'ulp(inf) is inf' assert math.isnan(math.ulp(float('nan'))), 'ulp(nan) is nan' assert math.ulp(5e-324) == 5e-324, 'ulp(smallest subnormal)' threw = False try: math.ulp('x') except TypeError: threw = True assert threw, 'ulp(str) raises TypeError' # === Additional edge cases for coverage === # --- frexp subnormal numbers --- r = math.frexp(5e-324) assert r == (0.5, -1073), 'frexp(5e-324) subnormal' # --- ldexp large negative exponent (underflow to zero) --- assert math.ldexp(1.0, -2000) == 0.0, 'ldexp(1.0, -2000) underflows to 0' # --- fmod NaN propagation edge cases --- assert math.isnan(math.fmod(float('inf'), float('nan'))), 'fmod(inf, nan) propagates nan' assert math.isnan(math.fmod(float('nan'), 0)), 'fmod(nan, 0) propagates nan' # --- gamma negative non-integer (reflection formula) --- assert math.isclose(math.gamma(-0.5), -3.544907701811032), 'gamma(-0.5)' assert math.isclose(math.gamma(-1.5), 2.3632718012073544), 'gamma(-1.5)' # --- lgamma(-inf) returns inf --- assert math.lgamma(float('-inf')) == float('inf'), 'lgamma(-inf) returns inf' # --- lgamma overflow for extremely large input --- threw = False try: math.lgamma(1e308) except OverflowError: threw = True assert threw, 'lgamma(1e308) raises OverflowError' # --- lgamma negative non-integer (reflection formula) --- assert math.isclose(math.lgamma(-0.5), 1.265512123484645), 'lgamma(-0.5) reflection' # ========================================================== # Tests for bug fixes and CPython behavior alignment # ========================================================== # === floor/ceil/trunc with large floats (LongInt promotion) === large_floor = math.floor(1e300) assert large_floor > 0, 'floor(1e300) should be positive' assert ( large_floor == 1000000000000000052504760255204420248704468581108159154915854115511802457988908195786371375080447864043704443832883878176942523235360430575644792184786706982848387200926575803737830233794788090059368953234970799945081119038967640880074652742780142494579258788820056842838115669472196386865459400540160 ), 'floor(1e300) matches CPython' large_ceil = math.ceil(-1e300) assert large_ceil < 0, 'ceil(-1e300) should be negative' assert ( large_ceil == -1000000000000000052504760255204420248704468581108159154915854115511802457988908195786371375080447864043704443832883878176942523235360430575644792184786706982848387200926575803737830233794788090059368953234970799945081119038967640880074652742780142494579258788820056842838115669472196386865459400540160 ), 'ceil(-1e300) matches CPython' large_trunc = math.trunc(1e300) assert large_trunc == math.floor(1e300), 'trunc(1e300) matches floor(1e300) for positive' large_trunc_neg = math.trunc(-1e300) assert large_trunc_neg == math.ceil(-1e300), 'trunc(-1e300) matches ceil(-1e300) for negative' # floor/ceil should still work normally for values within i64 range assert math.floor(1e18) == 1000000000000000000, 'floor(1e18) within i64 range' assert math.floor(2.7) == 2, 'floor(2.7) basic case' assert math.ceil(-2.7) == -2, 'ceil(-2.7) basic case' # === ldexp with large exponent but small x === assert math.ldexp(5e-324, 1075) == 2.0, 'ldexp(5e-324, 1075) should be 2.0' assert math.ldexp(0.5, 1024) == 8.98846567431158e307, 'ldexp(0.5, 1024) large but finite' # === modf(-0.0) sign preservation === frac, integer = math.modf(-0.0) # Both parts should be -0.0 assert str(frac) == '-0.0', 'modf(-0.0) fractional part is -0.0' assert str(integer) == '-0.0', 'modf(-0.0) integer part is -0.0' # === erfc accuracy for large x === erfc_6 = math.erfc(6) assert erfc_6 > 0, 'erfc(6) should be positive, not zero' assert math.isclose(erfc_6, 2.1519736712498913e-17, rel_tol=1e-12), 'erfc(6) matches CPython' erfc_neg6 = math.erfc(-6) assert erfc_neg6 == 2.0, 'erfc(-6) is exactly 2.0' assert math.erfc(0) == 1.0, 'erfc(0) is 1.0' # === variadic gcd === assert math.gcd() == 0, 'gcd() with no args returns 0' assert math.gcd(12) == 12, 'gcd(12) single arg returns abs(12)' assert math.gcd(-12) == 12, 'gcd(-12) single arg returns abs(-12)' assert math.gcd(12, 8) == 4, 'gcd(12, 8) two args' assert math.gcd(12, 8, 6) == 2, 'gcd(12, 8, 6) three args' # === variadic lcm === assert math.lcm() == 1, 'lcm() with no args returns 1' assert math.lcm(12) == 12, 'lcm(12) single arg returns abs(12)' assert math.lcm(-12) == 12, 'lcm(-12) single negative arg returns abs(-12)' assert math.lcm(4, 6) == 12, 'lcm(4, 6) two args' assert math.lcm(4, 6, 10) == 60, 'lcm(4, 6, 10) three args' assert math.lcm(0, 5) == 0, 'lcm(0, 5) returns 0 if any arg is 0' # === perm with optional k === assert math.perm(5) == 120, 'perm(5) defaults k to n (= 5!)' assert math.perm(5, 2) == 20, 'perm(5, 2) with explicit k' assert math.perm(0) == 1, 'perm(0) is 1' # === isclose with rel_tol/abs_tol kwargs === assert math.isclose(1.0, 1.1, rel_tol=0.2) == True, 'isclose with rel_tol=0.2' assert math.isclose(1.0, 1.1, abs_tol=0.2) == True, 'isclose with abs_tol=0.2' assert math.isclose(1.0, 1.1) == False, 'isclose with defaults (not close)' assert math.isclose(1.0, 1.0 + 1e-10) == True, 'isclose with defaults (close)' # isclose negative tolerance raises ValueError threw = False try: math.isclose(1.0, 1.0, rel_tol=-0.1) except ValueError: threw = True assert threw, 'isclose with negative rel_tol raises ValueError' threw = False try: math.isclose(1.0, 1.0, abs_tol=-0.1) except ValueError: threw = True assert threw, 'isclose with negative abs_tol raises ValueError' # isclose unknown kwarg raises TypeError threw = False try: math.isclose(1.0, 1.0, foo=0.1) except TypeError: threw = True assert threw, 'isclose with unknown kwarg raises TypeError' # === ldexp sign preservation === assert str(math.ldexp(-0.0, 1000)) == '-0.0', 'ldexp(-0.0, n) preserves sign' assert math.ldexp(float('-inf'), 1) == float('-inf'), 'ldexp(-inf, 1) returns -inf' # === frexp(-0.0) sign preservation === m, e = math.frexp(-0.0) assert str(m) == '-0.0', 'frexp(-0.0) mantissa preserves sign' assert e == 0, 'frexp(-0.0) exponent is 0' # === comb with GCD reduction (values that would overflow intermediate without it) === assert math.comb(62, 31) == 465428353255261088, 'comb(62, 31) with GCD reduction' assert math.comb(61, 30) == 232714176627630544, 'comb(61, 30) with GCD reduction' # === isclose arg count errors === threw = False try: math.isclose() except TypeError: threw = True assert threw, 'isclose with 0 args raises TypeError' threw = False try: math.isclose(1.0) except TypeError: threw = True assert threw, 'isclose with 1 arg raises TypeError' threw = False try: math.isclose(1.0, 2.0, 3.0) except TypeError: threw = True assert threw, 'isclose with 3 positional args raises TypeError' # === perm(-1) single-arg error message === threw = False try: math.perm(-1) except ValueError: threw = True assert threw, 'perm(-1) single-arg raises ValueError' # === gcd/lcm with i64::MIN-like values (u64 promotion) === # gcd(-9223372036854775808, 0) should return 9223372036854775808 (exceeds i64::MAX) big_gcd = math.gcd(-9223372036854775808, 0) assert big_gcd == 9223372036854775808, 'gcd(i64::MIN, 0) promotes to LongInt' # === isqrt large values (Newton's method refinement) === # Values near i64::MAX where f64 sqrt loses precision assert math.isqrt(9223372036854775807) == 3037000499, 'isqrt(i64::MAX)' assert math.isqrt(9223372030926249001) == 3037000499, 'isqrt(3037000499^2)' assert math.isqrt(9223372030926249000) == 3037000498, 'isqrt(3037000499^2 - 1)' # === erf/erfc range coverage === # Small x (|x| < 0.84375): exercises PP/QQ polynomial assert math.erf(0.1) == 0.1124629160182849, 'erf(0.1) small-x range' assert math.erf(0.5) == 0.5204998778130465, 'erf(0.5) small-x range' # Medium x (1.25 ≤ |x| < 28): exercises erfc_inner path assert math.erf(2.0) == 0.9953222650189527, 'erf(2.0) medium-x range' assert math.erf(5.0) == 0.9999999999984626, 'erf(5.0) large-x range' # erfc in range 3 (1.25 ≤ |x| < 2.857): exercises RA/SA coefficients erfc_2 = math.erfc(2.0) assert math.isclose(erfc_2, 0.004677734981047266, rel_tol=1e-12), 'erfc(2.0) range 3' ================================================ FILE: crates/monty/test_cases/math__pow_domain_error.py ================================================ import math math.pow(-1, 0.5) """ TRACEBACK: Traceback (most recent call last): File "math__pow_domain_error.py", line 3, in math.pow(-1, 0.5) ~~~~~~~~~~~~~~~~~ ValueError: math domain error """ ================================================ FILE: crates/monty/test_cases/math__sin_inf_error.py ================================================ import math math.sin(math.inf) """ TRACEBACK: Traceback (most recent call last): File "math__sin_inf_error.py", line 3, in math.sin(math.inf) ~~~~~~~~~~~~~~~~~~ ValueError: expected a finite input, got inf """ ================================================ FILE: crates/monty/test_cases/math__sqrt_negative_error.py ================================================ import math math.sqrt(-1) """ TRACEBACK: Traceback (most recent call last): File "math__sqrt_negative_error.py", line 3, in math.sqrt(-1) ~~~~~~~~~~~~~ ValueError: expected a nonnegative input, got -1.0 """ ================================================ FILE: crates/monty/test_cases/math__tan_inf_error.py ================================================ import math math.tan(math.inf) """ TRACEBACK: Traceback (most recent call last): File "math__tan_inf_error.py", line 3, in math.tan(math.inf) ~~~~~~~~~~~~~~~~~~ ValueError: expected a finite input, got inf """ ================================================ FILE: crates/monty/test_cases/math__trunc_str_error.py ================================================ import math math.trunc('x') """ TRACEBACK: Traceback (most recent call last): File "math__trunc_str_error.py", line 3, in math.trunc('x') ~~~~~~~~~~~~~~~ TypeError: type str doesn't define __trunc__ method """ ================================================ FILE: crates/monty/test_cases/method__args_kwargs_unpacking.py ================================================ # Tests for method calls with *args unpacking # === Basic *args unpacking === items = ['a', 'b', 'c'] result = '-'.join(*[items]) assert result == 'a-b-c', f'join with *args: {result}' parts = ['hello', 'world'] result = ' '.join(*[parts]) assert result == 'hello world', f'join with *args list: {result}' # === Empty *args unpacking === result = '-'.join(*[[]]) assert result == '', f'join with empty *args: {result}' empty = [] result = '-'.join(*[empty]) assert result == '', f'join with empty list via *args: {result}' # === *args with tuple unpacking === values = ('x', 'y', 'z') result = '|'.join(*[list(values)]) assert result == 'x|y|z', f'join with tuple *args: {result}' # === String methods with *args === s = 'hello world' args = ('o', 'O') result = s.replace(*args) assert result == 'hellO wOrld', f'replace with *args: {result}' # Count with *args count_args = ('l',) result = s.count(*count_args) assert result == 3, f'count with *args: {result}' # === List methods with *args === my_list = [1, 2, 3] append_args = [4] my_list.append(*append_args) assert my_list == [1, 2, 3, 4], f'append with *args: {my_list}' my_list = [1, 2, 3] extend_args = [[4, 5]] my_list.extend(*extend_args) assert my_list == [1, 2, 3, 4, 5], f'extend with *args: {my_list}' my_list = [1, 2, 3] insert_args = (1, 'x') my_list.insert(*insert_args) assert my_list == [1, 'x', 2, 3], f'insert with *args: {my_list}' # === Dict methods with *args === d = {'a': 1, 'b': 2} get_args = ('a',) result = d.get(*get_args) assert result == 1, f'dict.get with *args: {result}' get_args_default = ('missing', 'default') result = d.get(*get_args_default) assert result == 'default', f'dict.get with *args and default: {result}' # === Mixed positional and *args === my_list = [1, 2, 3] extra_args = ('y',) my_list.insert(0, *extra_args) assert my_list == ['y', 1, 2, 3], f'insert with pos and *args: {my_list}' # === setdefault with *args === d = {'a': 1} args = ('b', 2) result = d.setdefault(*args) assert result == 2, f'setdefault with *args: {result}' assert d == {'a': 1, 'b': 2}, f'dict after setdefault: {d}' # === pop with *args === d = {'a': 1, 'b': 2} pop_args = ('a',) result = d.pop(*pop_args) assert result == 1, f'pop with *args: {result}' assert d == {'b': 2}, f'dict after pop: {d}' pop_args_default = ('missing', 'default') result = d.pop(*pop_args_default) assert result == 'default', f'pop with *args and default: {result}' # === String split with *args === s = 'a,b,c,d' split_args = (',',) result = s.split(*split_args) assert result == ['a', 'b', 'c', 'd'], f'split with *args: {result}' split_args_maxsplit = (',', 2) result = s.split(*split_args_maxsplit) assert result == ['a', 'b', 'c,d'], f'split with *args maxsplit: {result}' # === String startswith/endswith with *args === s = 'hello' startswith_args = (('hel', 'hey'),) result = s.startswith(*startswith_args) assert result == True, f'startswith with *args tuple: {result}' endswith_args = ('lo',) result = s.endswith(*endswith_args) assert result == True, f'endswith with *args: {result}' # === List index with *args === my_list = [1, 2, 3, 2, 4] index_args = (2,) result = my_list.index(*index_args) assert result == 1, f'index with *args: {result}' index_args_start = (2, 2) result = my_list.index(*index_args_start) assert result == 3, f'index with *args and start: {result}' # === String find with *args === s = 'hello hello' find_args = ('hello',) result = s.find(*find_args) assert result == 0, f'find with *args: {result}' find_args_start = ('hello', 1) result = s.find(*find_args_start) assert result == 6, f'find with *args and start: {result}' # ============================================================ # **kwargs unpacking tests # ============================================================ # === Basic **kwargs unpacking with dict.update === d = {'a': 1} opts = {'b': 2, 'c': 3} d.update(**opts) assert d == {'a': 1, 'b': 2, 'c': 3}, f'update with **kwargs: {d}' # === Empty **kwargs unpacking === d = {'a': 1} empty_opts = {} d.update(**empty_opts) assert d == {'a': 1}, f'update with empty **kwargs: {d}' # === **kwargs with string keys === d = {} str_opts = {'key1': 'value1', 'key2': 'value2'} d.update(**str_opts) assert d == {'key1': 'value1', 'key2': 'value2'}, f'update with string **kwargs: {d}' # === **kwargs with heap-allocated values === d = {} list_val = [1, 2, 3] dict_val = {'nested': True} heap_opts = {'list': list_val, 'dict': dict_val} d.update(**heap_opts) assert d['list'] == [1, 2, 3], f'update with list value: {d}' assert d['dict'] == {'nested': True}, f'update with dict value: {d}' # === Multiple **kwargs updates === d = {'a': 1} opts1 = {'b': 2} opts2 = {'c': 3} d.update(**opts1) d.update(**opts2) assert d == {'a': 1, 'b': 2, 'c': 3}, f'multiple updates with **kwargs: {d}' # === **kwargs overwriting existing keys === d = {'a': 1, 'b': 2} override_opts = {'b': 'new', 'c': 3} d.update(**override_opts) assert d == {'a': 1, 'b': 'new', 'c': 3}, f'update overwriting with **kwargs: {d}' # === Mixed *args and **kwargs with dict.update === # dict.update can take a dict positionally AND **kwargs d = {'a': 1} pos_update = {'b': 2} kw_update = {'c': 3} d.update(pos_update, **kw_update) assert d == {'a': 1, 'b': 2, 'c': 3}, f'update with pos and **kwargs: {d}' # === *args tuple unpacking combined with method === d = {'a': 1} args_tuple = ({'x': 10},) d.update(*args_tuple) assert d == {'a': 1, 'x': 10}, f'update with *args tuple: {d}' # === Combined *args and **kwargs === d = {} pos_dict = {'a': 1} kw_opts = {'b': 2} d.update(*[pos_dict], **kw_opts) assert d == {'a': 1, 'b': 2}, f'update with *args and **kwargs: {d}' # === Regular kwargs combined with **kwargs === # This tests the code path where we have both explicit keyword args and **kwargs unpacking d = {} extra_opts = {'c': 3} d.update(a=1, b=2, **extra_opts) assert d == {'a': 1, 'b': 2, 'c': 3}, f'update with regular kwargs and **kwargs: {d}' # === Regular kwargs only (no **kwargs) with method call === d = {} d.update(x=10, y=20) assert d == {'x': 10, 'y': 20}, f'update with regular kwargs only: {d}' # === Mixed positional, regular kwargs, and **kwargs === d = {'existing': 0} pos_update = {'a': 1} extra = {'d': 4} d.update(pos_update, b=2, c=3, **extra) assert d == {'existing': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4}, f'update with pos, kwargs, **kwargs: {d}' # === Empty **kwargs with regular kwargs === d = {} empty_extra = {} d.update(x=1, **empty_extra) assert d == {'x': 1}, f'update with kwargs and empty **kwargs: {d}' # === **kwargs with different keys from regular kwargs === d = {} extra = {'b': 'from_dict'} d.update(a='original', **extra) assert d == {'a': 'original', 'b': 'from_dict'}, f'update with different kwargs: {d}' # ============================================================ # PEP 448 generalized method calls (multiple * or **) # ============================================================ # === Multiple **kwargs in method call === d = {} d.update(**{'a': 1}, **{'b': 2}) assert d == {'a': 1, 'b': 2}, f'update with multiple **kwargs: {d}' d = {'x': 0} d.update(**{'a': 1}, **{'b': 2}, **{'c': 3}) assert d == {'x': 0, 'a': 1, 'b': 2, 'c': 3}, f'update with three **kwargs: {d}' # Mixed named kwargs and multiple **kwargs d = {} d.update(a=1, **{'b': 2}, **{'c': 3}) assert d == {'a': 1, 'b': 2, 'c': 3}, f'update named + multiple **kwargs: {d}' # === Positional args mixed with *unpack in method GeneralizedCall === # insert(*[0], 1): positional 1 comes AFTER the *unpack → GeneralizedCall. # This exercises CallArg::Unpack (the *[0]) and CallArg::Value (the 1) # in the compile_method_call GeneralizedCall branch. my_list = [2, 3] my_list.insert(*[0], 1) assert my_list == [1, 2, 3], 'insert: star index then positional value' my_list2 = ['a', 'b', 'c', 'd'] my_list2.insert(*[1], 'x') assert my_list2 == ['a', 'x', 'b', 'c', 'd'], 'insert: star index then positional string' # === *args + multiple **kwargs in method GeneralizedCall === # d.update(*[...], **{...}, **{...}): two **unpacks → GeneralizedCall (not ArgsKargs). # The *unpack in args covers CallArg::Unpack; the two **unpacks mean has_kwargs=True, # covering the kwargs dict-builder block in the compile_method_call GeneralizedCall branch. d = {} d.update(*[{}], **{'a': 1}, **{'b': 2}) assert d == {'a': 1, 'b': 2}, 'update: star args + two star-star kwargs' ================================================ FILE: crates/monty/test_cases/name_error__unbound_local_func.py ================================================ # Test that accessing a variable before assignment in a function raises UnboundLocalError # (In function scope, Python pre-scans for assignments so it knows x is local) def foo(): print(x) x = 1 foo() """ TRACEBACK: Traceback (most recent call last): File "name_error__unbound_local_func.py", line 8, in foo() ~~~~~ File "name_error__unbound_local_func.py", line 4, in foo print(x) ~ UnboundLocalError: cannot access local variable 'x' where it is not associated with a value """ ================================================ FILE: crates/monty/test_cases/name_error__unbound_local_module.py ================================================ # Test that accessing a variable before assignment at module level raises NameError # (Unlike function scope, module level doesn't pre-scan for assignments) print(x) x = 1 """ TRACEBACK: Traceback (most recent call last): File "name_error__unbound_local_module.py", line 3, in print(x) ~ NameError: name 'x' is not defined """ ================================================ FILE: crates/monty/test_cases/name_error__undefined_call_chained.py ================================================ x = aaa_func() + bbb_func() """ TRACEBACK: Traceback (most recent call last): File "name_error__undefined_call_chained.py", line 1, in x = aaa_func() + bbb_func() ~~~~~~~~ NameError: name 'aaa_func' is not defined """ ================================================ FILE: crates/monty/test_cases/name_error__undefined_call_in_expr.py ================================================ x = 1 + missing_func() """ TRACEBACK: Traceback (most recent call last): File "name_error__undefined_call_in_expr.py", line 1, in x = 1 + missing_func() ~~~~~~~~~~~~ NameError: name 'missing_func' is not defined """ ================================================ FILE: crates/monty/test_cases/name_error__undefined_call_in_function.py ================================================ def outer(): return missing_func() outer() """ TRACEBACK: Traceback (most recent call last): File "name_error__undefined_call_in_function.py", line 5, in outer() ~~~~~~~ File "name_error__undefined_call_in_function.py", line 2, in outer return missing_func() ~~~~~~~~~~~~ NameError: name 'missing_func' is not defined """ ================================================ FILE: crates/monty/test_cases/name_error__undefined_call_with_args.py ================================================ missing_func(1, 2, 3) """ TRACEBACK: Traceback (most recent call last): File "name_error__undefined_call_with_args.py", line 1, in missing_func(1, 2, 3) ~~~~~~~~~~~~ NameError: name 'missing_func' is not defined """ ================================================ FILE: crates/monty/test_cases/name_error__undefined_global.py ================================================ # Test that accessing an undefined global name raises NameError unknown_func() """ TRACEBACK: Traceback (most recent call last): File "name_error__undefined_global.py", line 2, in unknown_func() ~~~~~~~~~~~~ NameError: name 'unknown_func' is not defined """ ================================================ FILE: crates/monty/test_cases/namedtuple__missing_attr.py ================================================ # Test AttributeError message for missing attribute on named tuple import sys sys.version_info.foobar """ TRACEBACK: Traceback (most recent call last): File "namedtuple__missing_attr.py", line 4, in sys.version_info.foobar AttributeError: 'sys.version_info' object has no attribute 'foobar' """ ================================================ FILE: crates/monty/test_cases/namedtuple__ops.py ================================================ import sys vi = sys.version_info # === Equality: same object === assert vi == vi, 'namedtuple equals itself' # === Equality: two references === vi2 = sys.version_info assert vi == vi2, 'two refs to same namedtuple are equal' # === Equality: namedtuple == equivalent tuple === t = (vi.major, vi.minor, vi.micro, vi.releaselevel, vi.serial) assert vi == t, 'namedtuple equals equivalent tuple' assert t == vi, 'equivalent tuple equals namedtuple' # === Inequality: wrong length === assert vi != (3,), 'namedtuple not equal to wrong-length tuple' assert (3,) != vi, 'wrong-length tuple not equal to namedtuple' # === Inequality: different values === assert vi != (0, 0, 0, 'final', 0), 'namedtuple not equal to different values' # === Inequality: non-tuple types === assert vi != 42, 'namedtuple not equal to int' assert vi != 'hello', 'namedtuple not equal to str' assert vi != None, 'namedtuple not equal to None' assert vi != [3, 14], 'namedtuple not equal to list' # === repr === r = repr(vi) assert r.startswith('sys.version_info(major='), f'namedtuple repr starts with type name, {r!r}' assert ', minor=' in r, f'namedtuple repr has minor field, {r!r}' assert r.endswith(')'), f'namedtuple repr ends with paren, {r!r}' ================================================ FILE: crates/monty/test_cases/nonlocal__error_module_level.py ================================================ # nonlocal at module level is a syntax error nonlocal x # type: ignore # Raise=SyntaxError('nonlocal declaration not allowed at module level') ================================================ FILE: crates/monty/test_cases/nonlocal__ops.py ================================================ # === Basic nonlocal read/write === def read_outer(): x = 10 def inner(): return x # reads from outer scope return inner() assert read_outer() == 10, 'nonlocal read' def write_outer(): x = 10 def inner(): nonlocal x x = 20 inner() return x assert write_outer() == 20, 'nonlocal write' # === Classic counter pattern === def make_counter(): count = 0 def increment(): nonlocal count count = count + 1 return count return increment counter2 = make_counter() assert counter2() == 1, 'counter first call' assert counter2() == 2, 'counter second call' assert counter2() == 3, 'counter third call' # === Implicit capture (read without nonlocal) === def implicit_capture(): a = 10 b = 20 def inner(): return a + b # reads both from outer return inner() assert implicit_capture() == 30, 'implicit capture multiple vars' # === Pass-through nesting === def pass_through(): x = 1 def middle(): nonlocal x x = x + 10 def inner(): nonlocal x x = x + 100 return x r1 = inner() # returns 111 r2 = x # x is now 111 return r1 + r2 # 222 return middle() assert pass_through() == 222, 'nonlocal pass through' # === Deep nesting (3 levels) === def deep_nesting(): x = 1 def level2(): nonlocal x x = x + 10 def level3(): nonlocal x x = x + 100 return x return level3() return level2() assert deep_nesting() == 111, 'deep nesting' # === Deep nesting (4 levels) === def deep_pass_through(): val = 1 def level1(): nonlocal val val = val + 1 def level2(): nonlocal val val = val + 10 def level3(): nonlocal val val = val + 100 return val return level3() return level2() result = level1() # val: 1 -> 2 -> 12 -> 112 return (result, val) assert deep_pass_through() == (112, 112), 'deep pass through 4 levels' # === Multiple independent cells === def multiple_cells(): a = 1 b = 10 c = 100 def modify_a(): nonlocal a a = a + 1 return a def modify_b(): nonlocal b b = b + 10 return b def modify_c(): nonlocal c c = c + 100 return c def read_all(): return a + b + c r1 = modify_b() # b = 20 r2 = modify_a() # a = 2 r3 = modify_c() # c = 200 r4 = read_all() # 2 + 20 + 200 = 222 return (r1, r2, r3, r4) assert multiple_cells() == (20, 2, 200, 222), 'multiple independent cells' # === Shared cell (getter/setter pattern) === def shared_cell(): x = 0 def getter(): return x def setter(v): nonlocal x x = v return (getter, setter) pair = shared_cell() getter = pair[0] setter = pair[1] assert getter() == 0, 'shared cell initial' setter(42) assert getter() == 42, 'shared cell after setter' # === Shared multiple vars === def shared_multiple_vars(): x = 0 y = 0 def add_to_x(n): nonlocal x x = x + n return x def add_to_y(n): nonlocal y y = y + n return y def swap(): nonlocal x, y tmp = x x = y y = tmp return (x, y) def get_both(): return (x, y) return (add_to_x, add_to_y, swap, get_both) ops = shared_multiple_vars() add_x = ops[0] add_y = ops[1] swap = ops[2] get = ops[3] add_x(5) # x=5 add_y(10) # y=10 add_x(3) # x=8 swap() # x=10, y=8 assert get() == (10, 8), 'shared multiple vars with swap' # === Local and captured === def local_and_captured(): x = 1 def inner(): nonlocal x x = x + x return x before = x # 1 middle = inner() # 2 after = x # 2 final = inner() # 4 return (before, middle, after, final, x) assert local_and_captured() == (1, 2, 2, 4, 4), 'local and captured' # === Mixing global and nonlocal === g1 = 100 def global_and_nonlocal(): x = 1 def inner(): global g1 nonlocal x g1 = g1 + 1 x = x + 10 return g1 + x return inner() assert global_and_nonlocal() == 112, 'global and nonlocal together' # === Closure with global and nonlocal === g2 = 1000 def make_closure_global(): x = 1 def closure(): global g2 nonlocal x result = g2 + x g2 = g2 + 1 x = x + 10 return result return closure c = make_closure_global() r1 = c() # returns 1001 r2 = c() # returns 1012 r3 = c() # returns 1023 assert (r1, r2, r3, g2) == (1001, 1012, 1023, 1003), 'closure with global and nonlocal' # === Closure creates closure === def outer_factory(): outer_val = 10 def inner_factory(): nonlocal outer_val inner_val = outer_val def innermost(): nonlocal inner_val inner_val = inner_val + 1 return inner_val outer_val = outer_val + 100 return innermost return inner_factory factory = outer_factory() closure1 = factory() # inner_val=10, outer_val->110 closure2 = factory() # inner_val=110, outer_val->210 r1 = closure1() # 11 r2 = closure1() # 12 r3 = closure2() # 111 r4 = closure1() # 13 assert (r1, r2, r3, r4) == (11, 12, 111, 13), 'closure creates closure' # === Augmented assignment with nonlocal === def augmented_assign(): x = 10 def inner(): nonlocal x x += 5 inner() return x assert augmented_assign() == 15, 'augmented assign nonlocal' # === Cell contains closure === def cell_contains_closure(): y = 100 def inner(): return y x = inner # x holds closure, x is also a cell var def get_x(): nonlocal x return x f = get_x() return f() assert cell_contains_closure() == 100, 'cell contains closure' ================================================ FILE: crates/monty/test_cases/os__environ.py ================================================ # call-external # Tests for os.environ property import os # === os.environ property === # os.environ returns a dict-like object env = os.environ # === os.environ key access === assert env['VIRTUAL_HOME'] == '/virtual/home', 'environ key access VIRTUAL_HOME' assert os.environ['VIRTUAL_HOME'] == '/virtual/home', 'environ key access VIRTUAL_HOME' assert os.environ['VIRTUAL_USER'] == 'testuser', 'environ key access VIRTUAL_USER' assert os.environ['VIRTUAL_EMPTY'] == '', 'environ key access VIRTUAL_EMPTY' # === os.environ get method === assert env.get('VIRTUAL_HOME') == '/virtual/home', 'environ.get existing key' assert os.environ.get('VIRTUAL_HOME') == '/virtual/home', 'environ.get existing key' assert os.environ.get('VIRTUAL_USER') == 'testuser', 'environ.get existing user' assert os.environ.get('NONEXISTENT_VAR_12345') is None, 'environ.get missing returns None' assert os.environ.get('NONEXISTENT_VAR_12345', 'default') == 'default', 'environ.get with default' # === os.environ length === assert len(env) == 3, 'environ has 3 virtual entries' # === os.environ membership test === assert 'VIRTUAL_HOME' in env, 'VIRTUAL_HOME in environ' assert 'VIRTUAL_HOME' in os.environ, 'VIRTUAL_HOME in environ' assert 'VIRTUAL_USER' in env, 'VIRTUAL_USER in environ' assert 'NONEXISTENT_VAR_12345' not in env, 'nonexistent not in environ' assert 'NONEXISTENT_VAR_12345' not in os.environ, 'nonexistent not in environ' # === os.environ keys/values/items === keys = list(os.environ.keys()) assert 'VIRTUAL_HOME' in keys, 'VIRTUAL_HOME in keys' assert 'VIRTUAL_USER' in keys, 'VIRTUAL_USER in keys' values = list(os.environ.values()) assert '/virtual/home' in values, '/virtual/home in values' assert 'testuser' in values, 'testuser in values' ================================================ FILE: crates/monty/test_cases/os__getenv_key_list_error.py ================================================ # call-external import os os.getenv([1, 2, 3]) # Raise=TypeError('str expected, not list') ================================================ FILE: crates/monty/test_cases/os__getenv_key_type_error.py ================================================ # call-external import os os.getenv(123) # Raise=TypeError('str expected, not int') ================================================ FILE: crates/monty/test_cases/parse_error__complex.py ================================================ # xfail=cpython 1 + 2j # Raise=NotImplementedError('The monty syntax parser does not yet support complex constants') ================================================ FILE: crates/monty/test_cases/pathlib__import.py ================================================ import pathlib # Verify that pathlib.Path can be called as an attribute p = pathlib.Path('a.txt') assert p.name == 'a.txt' # Verify that it still works when imported directly from pathlib import Path p2 = Path('b.txt') assert p2.name == 'b.txt' ================================================ FILE: crates/monty/test_cases/pathlib__os.py ================================================ # call-external from pathlib import Path # === exists() === assert Path('/virtual/file.txt').exists() == True, 'file exists' assert Path('/virtual/subdir').exists() == True, 'dir exists' assert Path('/virtual/subdir/deep').exists() == True, 'nested dir exists' assert Path('/nonexistent').exists() == False, 'nonexistent path' assert Path('/nonexistent/file.txt').exists() == False, 'nonexistent nested path' # === is_file() === assert Path('/virtual/file.txt').is_file() == True, 'is_file true for file' assert Path('/virtual/subdir').is_file() == False, 'is_file false for dir' assert Path('/nonexistent').is_file() == False, 'is_file false for nonexistent' # === is_dir() === assert Path('/virtual/subdir').is_dir() == True, 'is_dir true for dir' assert Path('/virtual/file.txt').is_dir() == False, 'is_dir false for file' assert Path('/nonexistent').is_dir() == False, 'is_dir false for nonexistent' # === is_symlink() === assert Path('/virtual/file.txt').is_symlink() == False, 'is_symlink false (no symlinks in vfs)' assert Path('/nonexistent').is_symlink() == False, 'is_symlink false for nonexistent' # === read_text() === assert Path('/virtual/file.txt').read_text() == 'hello world\n', 'read_text basic' assert Path('/virtual/empty.txt').read_text() == '', 'read_text empty file' assert Path('/virtual/subdir/nested.txt').read_text() == 'nested content', 'read_text nested' assert Path('/virtual/subdir/deep/file.txt').read_text() == 'deep', 'read_text deep nested' # === read_bytes() === assert Path('/virtual/data.bin').read_bytes() == b'\x00\x01\x02\x03', 'read_bytes binary' assert Path('/virtual/empty.txt').read_bytes() == b'', 'read_bytes empty' assert Path('/virtual/file.txt').read_bytes() == b'hello world\n', 'read_bytes text file' # === stat() basic === st = Path('/virtual/file.txt').stat() assert st.st_size == 12, 'stat size (len of "hello world\\n")' # 0o644 permissions + regular file type bits (0o100000) assert st.st_mode & 0o777 == 0o644, 'stat mode permissions' # Verify it's a regular file using raw mode bits # S_IFREG = 0o100000, so check that file type bits match assert st.st_mode & 0o170000 == 0o100000, 'stat is regular file' # === stat() directory === st_dir = Path('/virtual/subdir').stat() # S_IFDIR = 0o040000, so check that file type bits match assert st_dir.st_mode & 0o170000 == 0o040000, 'stat is directory' assert st_dir.st_mode & 0o777 == 0o755, 'stat dir mode permissions' # === stat() index access === st2 = Path('/virtual/file.txt').stat() assert st2[6] == 12, 'stat index access for st_size' assert st2[0] & 0o777 == 0o644, 'stat index access for st_mode' # === iterdir() === entries = list(Path('/virtual').iterdir()) assert len(entries) == 5, 'iterdir returns correct count' # iterdir() should return Path objects, not strings first_entry = entries[0] assert isinstance(first_entry, Path), f'iterdir should return Path objects, got {type(first_entry)}' # Path objects should have .name attribute names = [e.name for e in entries] assert 'file.txt' in names, 'iterdir contains file.txt' assert 'subdir' in names, 'iterdir contains subdir' assert 'data.bin' in names, 'iterdir contains data.bin' # Path objects should have .parent attribute assert entries[0].parent == Path('/virtual'), 'iterdir entry parent is correct' # === iterdir() nested === nested_entries = list(Path('/virtual/subdir').iterdir()) assert len(nested_entries) == 2, 'iterdir nested count' nested_names = [e.name for e in nested_entries] assert 'nested.txt' in nested_names, 'iterdir nested contains nested.txt' assert 'deep' in nested_names, 'iterdir nested contains deep' # === iterdir() entries can be used for further operations === # Find the nested.txt entry and read it for entry in nested_entries: if entry.name == 'nested.txt': assert entry.read_text() == 'nested content', 'iterdir entry can be read' # === resolve() === p = Path('/virtual/file.txt').resolve() assert str(p) == '/virtual/file.txt', 'resolve absolute path unchanged' # === absolute() === p2 = Path('/virtual/subdir').absolute() assert str(p2) == '/virtual/subdir', 'absolute path unchanged' # === path concatenation with OS calls === base = Path('/virtual') full = base / 'subdir' / 'nested.txt' assert full.read_text() == 'nested content', 'path concat then read' assert full.exists() == True, 'path concat then exists' # === write_text() === Path('/virtual/new_file.txt').write_text('created by write_text') assert Path('/virtual/new_file.txt').read_text() == 'created by write_text', 'write_text creates file' # Overwrite existing file Path('/virtual/file.txt').write_text('overwritten') assert Path('/virtual/file.txt').read_text() == 'overwritten', 'write_text overwrites' # === write_bytes() === Path('/virtual/binary.dat').write_bytes(b'\xff\xfe\xfd') assert Path('/virtual/binary.dat').read_bytes() == b'\xff\xfe\xfd', 'write_bytes creates file' # === mkdir() === Path('/virtual/new_dir').mkdir() assert Path('/virtual/new_dir').is_dir() == True, 'mkdir creates directory' # mkdir with parents Path('/virtual/a/b/c').mkdir(parents=True) assert Path('/virtual/a/b/c').is_dir() == True, 'mkdir parents creates nested' # mkdir with exist_ok Path('/virtual/new_dir').mkdir(exist_ok=True) # Should not raise # === unlink() === Path('/virtual/to_delete.txt').write_text('delete me') assert Path('/virtual/to_delete.txt').exists() == True, 'file exists before unlink' Path('/virtual/to_delete.txt').unlink() assert Path('/virtual/to_delete.txt').exists() == False, 'unlink removes file' # === rmdir() === Path('/virtual/empty_dir').mkdir() assert Path('/virtual/empty_dir').is_dir() == True, 'dir exists before rmdir' Path('/virtual/empty_dir').rmdir() assert Path('/virtual/empty_dir').exists() == False, 'rmdir removes directory' # === rename() === Path('/virtual/old_name.txt').write_text('rename test') Path('/virtual/old_name.txt').rename(Path('/virtual/new_name.txt')) assert Path('/virtual/old_name.txt').exists() == False, 'rename removes old path' assert Path('/virtual/new_name.txt').read_text() == 'rename test', 'rename creates new path' ================================================ FILE: crates/monty/test_cases/pathlib__os_read_error.py ================================================ # call-external from pathlib import Path Path('/nonexistent').read_text() """ TRACEBACK: Traceback (most recent call last): File "pathlib__os_read_error.py", line 4, in Path('/nonexistent').read_text() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FileNotFoundError: [Errno 2] No such file or directory: '/nonexistent' """ ================================================ FILE: crates/monty/test_cases/pathlib__pure.py ================================================ # === Path constructor === from pathlib import Path p = Path('/usr/local/bin/python') assert str(p) == '/usr/local/bin/python', 'Path str should match input' # Constructor with multiple arguments assert str(Path('folder', 'file.txt')) == 'folder/file.txt', 'Path with two args joins' assert str(Path('/usr', 'local', 'bin')) == '/usr/local/bin', 'Path with three args joins' assert str(Path('start', '/absolute', 'end')) == '/absolute/end', 'absolute in middle replaces' # Constructor with no arguments assert str(Path()) == '.', 'Path() returns current dir' # === name property === assert p.name == 'python', 'name should be final component' assert Path('/usr/local/bin/').name == 'bin', 'name should handle trailing slash' assert Path('/').name == '', 'root path should have empty name' assert Path('file.txt').name == 'file.txt', 'relative path name' # === parent property === assert str(p.parent) == '/usr/local/bin', 'parent should remove last component' assert str(Path('/usr').parent) == '/', 'parent of first-level should be root' assert str(Path('/').parent) == '/', 'parent of root is root' assert str(Path('file.txt').parent) == '.', 'parent of relative without dir is .' # === stem property === assert Path('/path/file.tar.gz').stem == 'file.tar', 'stem removes last extension' assert Path('/path/file.txt').stem == 'file', 'stem removes single extension' assert Path('/path/.bashrc').stem == '.bashrc', 'stem preserves hidden files' assert Path('/path/file').stem == 'file', 'stem without extension' # === suffix property === assert Path('/path/file.tar.gz').suffix == '.gz', 'suffix is last extension' assert Path('/path/file.txt').suffix == '.txt', 'suffix with single extension' assert Path('/path/.bashrc').suffix == '', 'hidden file has no suffix' assert Path('/path/file').suffix == '', 'no extension means empty suffix' # === suffixes property === assert Path('/path/file.tar.gz').suffixes == ['.tar', '.gz'], 'suffixes list' assert Path('/path/file.txt').suffixes == ['.txt'], 'single suffix as list' assert Path('/path/.bashrc').suffixes == [], 'hidden file has no suffixes' # === parts property === assert Path('/usr/local/bin').parts == ('/', 'usr', 'local', 'bin'), 'absolute path parts' assert Path('usr/local').parts == ('usr', 'local'), 'relative path parts' assert Path('/').parts == ('/',), 'root path parts' # === is_absolute method === assert Path('/usr/bin').is_absolute() == True, 'absolute path' assert Path('usr/bin').is_absolute() == False, 'relative path not absolute' assert Path('').is_absolute() == False, 'empty path not absolute' # === joinpath method === assert str(Path('/usr').joinpath('local')) == '/usr/local', 'joinpath with one arg' assert str(Path('/usr').joinpath('local', 'bin')) == '/usr/local/bin', 'joinpath with two args' assert str(Path('/usr').joinpath('/etc')) == '/etc', 'joinpath with absolute replaces' assert str(Path('.').joinpath('file')) == 'file', 'joinpath from dot' # === with_name method === assert str(Path('/path/file.txt').with_name('other.py')) == '/path/other.py', 'with_name replaces name' assert str(Path('file.txt').with_name('other.py')) == 'other.py', 'with_name on relative' # === with_suffix method === assert str(Path('/path/file.txt').with_suffix('.py')) == '/path/file.py', 'with_suffix replaces' assert str(Path('/path/file.txt').with_suffix('')) == '/path/file', 'with_suffix removes' assert str(Path('/path/file').with_suffix('.txt')) == '/path/file.txt', 'with_suffix adds' # === / operator === assert str(Path('/usr') / 'local') == '/usr/local', '/ operator joins' assert str(Path('/usr') / 'local' / 'bin') == '/usr/local/bin', '/ operator chains' # === as_posix method === assert Path('/usr/bin').as_posix() == '/usr/bin', 'as_posix returns string' # === __fspath__ method (os.PathLike protocol) === assert Path('/usr/bin').__fspath__() == '/usr/bin', '__fspath__ returns string' # === repr === r = repr(Path('/usr/bin')) assert r == "PosixPath('/usr/bin')", f'repr should be PosixPath, got {r}' ================================================ FILE: crates/monty/test_cases/pyobject__cycle_dict_self.py ================================================ # Test that returning a cyclic dict doesn't crash (MontyObject cycle detection) d = {} d['self'] = d d # Return={'self': {...}} ================================================ FILE: crates/monty/test_cases/pyobject__cycle_list_dict.py ================================================ # Test composite cycle: list containing dict containing original list c = [] e = {'list': c} c.append(e) c # Return=[{'list': [...]}] ================================================ FILE: crates/monty/test_cases/pyobject__cycle_list_self.py ================================================ # Test that returning a cyclic list doesn't crash (MontyObject cycle detection) a = [] a.append(a) a # Return=[[...]] ================================================ FILE: crates/monty/test_cases/pyobject__cycle_multiple_refs.py ================================================ # Test multiple references to the same cyclic object f = [] f.append(f) g = [f, f] g # Return=[[[...]], [[...]]] ================================================ FILE: crates/monty/test_cases/range__error_no_args.py ================================================ range() # Raise=TypeError('range expected at least 1 argument, got 0') ================================================ FILE: crates/monty/test_cases/range__error_step_zero.py ================================================ range(0, 10, 0) # Raise=ValueError('range() arg 3 must not be zero') ================================================ FILE: crates/monty/test_cases/range__error_too_many_args.py ================================================ range(1, 2, 3, 4) # Raise=TypeError('range expected at most 3 arguments, got 4') ================================================ FILE: crates/monty/test_cases/range__getitem_index_error.py ================================================ r = range(5) r[10] """ TRACEBACK: Traceback (most recent call last): File "range__getitem_index_error.py", line 2, in r[10] ~~~~~ IndexError: range object index out of range """ ================================================ FILE: crates/monty/test_cases/range__ops.py ================================================ # === range() with one argument (stop) === assert list(range(0)) == [], 'range(0) is empty' assert list(range(1)) == [0], 'range(1) is [0]' assert list(range(5)) == [0, 1, 2, 3, 4], 'range(5) is [0, 1, 2, 3, 4]' assert list(range(-3)) == [], 'range negative stop is empty' # === range() with two arguments (start, stop) === assert list(range(0, 3)) == [0, 1, 2], 'range(0, 3)' assert list(range(1, 5)) == [1, 2, 3, 4], 'range(1, 5)' assert list(range(5, 10)) == [5, 6, 7, 8, 9], 'range(5, 10)' assert list(range(3, 3)) == [], 'range equal start stop is empty' assert list(range(5, 3)) == [], 'range start > stop is empty' assert list(range(-5, -2)) == [-5, -4, -3], 'range negative to negative' assert list(range(-3, 2)) == [-3, -2, -1, 0, 1], 'range negative to positive' # === range() with three arguments (start, stop, step) === assert list(range(0, 10, 2)) == [0, 2, 4, 6, 8], 'range step 2' assert list(range(1, 10, 3)) == [1, 4, 7], 'range step 3' assert list(range(0, 10, 5)) == [0, 5], 'range step 5' assert list(range(0, 10, 10)) == [0], 'range step equals diff' assert list(range(0, 10, 20)) == [0], 'range step > diff' # === range() with negative step === assert list(range(10, 0, -1)) == [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], 'range step -1' assert list(range(10, 0, -2)) == [10, 8, 6, 4, 2], 'range step -2' assert list(range(5, 0, -1)) == [5, 4, 3, 2, 1], 'range 5 to 0 step -1' assert list(range(0, 5, -1)) == [], 'range start < stop with negative step is empty' assert list(range(-1, -5, -1)) == [-1, -2, -3, -4], 'range negative with negative step' # === tuple(range()) conversions === assert tuple(range(3)) == (0, 1, 2), 'tuple(range(3))' assert tuple(range(1, 4)) == (1, 2, 3), 'tuple(range(1, 4))' assert tuple(range(0, 6, 2)) == (0, 2, 4), 'tuple(range(0, 6, 2))' # === range in for loops === total = 0 for i in range(5): total = total + i assert total == 10, 'for loop with range(5)' total2 = 0 for i in range(1, 4): total2 = total2 + i assert total2 == 6, 'for loop with range(1, 4)' total3 = 0 for i in range(0, 10, 2): total3 = total3 + i assert total3 == 20, 'for loop with range step 2' # count down countdown = [] for i in range(3, 0, -1): countdown.append(i) assert countdown == [3, 2, 1], 'for loop countdown' # === range repr === assert repr(range(5)) == 'range(0, 5)', 'repr range one arg' assert repr(range(1, 5)) == 'range(1, 5)', 'repr range two args' assert repr(range(1, 5, 2)) == 'range(1, 5, 2)', 'repr range three args' assert repr(range(0, 10, 1)) == 'range(0, 10)', 'repr range step 1 omitted' assert repr(range(5, 0, -1)) == 'range(5, 0, -1)', 'repr range negative step' # === range type === assert type(range(5)) == range, 'type of range' assert type(range(1, 5)) == range, 'type of range two args' assert type(range(1, 5, 2)) == range, 'type of range three args' # === range equality === assert range(5) == range(5), 'range equality same' assert range(0, 5) == range(5), 'range(0, 5) == range(5)' assert range(1, 5) == range(1, 5), 'range equality two args' assert range(1, 5, 2) == range(1, 5, 2), 'range equality three args' assert range(5) != range(6), 'range inequality' assert range(1, 5) != range(2, 5), 'range inequality start differs' assert range(1, 5, 1) != range(1, 5, 2), 'range inequality step differs' # === range bool (truthiness) === assert bool(range(5)) == True, 'non-empty range is truthy' assert bool(range(1, 5)) == True, 'range(1, 5) is truthy' assert bool(range(0)) == False, 'empty range(0) is falsy' assert bool(range(5, 5)) == False, 'empty range equal start stop is falsy' assert bool(range(5, 0)) == False, 'empty range start > stop is falsy' assert bool(range(5, 0, -1)) == True, 'range countdown is truthy' assert bool(range(0, 5, -1)) == False, 'empty range wrong direction is falsy' # === range isinstance === assert isinstance(range(5), range), 'isinstance range' # === len(range()) === assert len(range(5)) == 5, 'len(range(5))' assert len(range(0)) == 0, 'len(range(0))' assert len(range(1, 5)) == 4, 'len(range(1, 5))' assert len(range(0, 10, 2)) == 5, 'len(range step 2)' assert len(range(10, 0, -1)) == 10, 'len(range negative step)' assert len(range(0, 10, 3)) == 4, 'len(range step 3)' # === range equality by sequence (not parameters) === assert range(0, 3, 2) == range(0, 4, 2), 'ranges with same sequence [0,2] are equal' assert range(0, 5, 2) == range(0, 6, 2), 'range(0,5,2) == range(0,6,2) both [0,2,4]' assert range(5, 0, -2) == range(5, -1, -2), 'negative step same sequence' assert range(0) == range(0), 'empty ranges equal' assert range(5, 5) == range(10, 10), 'different empty ranges equal' assert range(0, 0) == range(5, 5), 'empty ranges with different params equal' # === Range indexing (getitem) === # Basic indexing for range(stop) r = range(5) assert r[0] == 0, 'range(5)[0]' assert r[1] == 1, 'range(5)[1]' assert r[4] == 4, 'range(5)[4]' # Negative indexing assert r[-1] == 4, 'range(5)[-1]' assert r[-2] == 3, 'range(5)[-2]' assert r[-5] == 0, 'range(5)[-5]' # Range with start r = range(10, 15) assert r[0] == 10, 'range(10, 15)[0]' assert r[1] == 11, 'range(10, 15)[1]' assert r[4] == 14, 'range(10, 15)[4]' assert r[-1] == 14, 'range(10, 15)[-1]' assert r[-5] == 10, 'range(10, 15)[-5]' # Range with step r = range(0, 10, 2) assert r[0] == 0, 'range(0, 10, 2)[0]' assert r[1] == 2, 'range(0, 10, 2)[1]' assert r[2] == 4, 'range(0, 10, 2)[2]' assert r[3] == 6, 'range(0, 10, 2)[3]' assert r[4] == 8, 'range(0, 10, 2)[4]' assert r[-1] == 8, 'range(0, 10, 2)[-1]' assert r[-2] == 6, 'range(0, 10, 2)[-2]' # Range with step 3 r = range(1, 10, 3) assert r[0] == 1, 'range(1, 10, 3)[0]' assert r[1] == 4, 'range(1, 10, 3)[1]' assert r[2] == 7, 'range(1, 10, 3)[2]' assert r[-1] == 7, 'range(1, 10, 3)[-1]' # Range with negative step r = range(10, 0, -1) assert r[0] == 10, 'range(10, 0, -1)[0]' assert r[1] == 9, 'range(10, 0, -1)[1]' assert r[9] == 1, 'range(10, 0, -1)[9]' assert r[-1] == 1, 'range(10, 0, -1)[-1]' assert r[-10] == 10, 'range(10, 0, -1)[-10]' # Range with negative step and larger step r = range(10, 0, -2) assert r[0] == 10, 'range(10, 0, -2)[0]' assert r[1] == 8, 'range(10, 0, -2)[1]' assert r[2] == 6, 'range(10, 0, -2)[2]' assert r[3] == 4, 'range(10, 0, -2)[3]' assert r[4] == 2, 'range(10, 0, -2)[4]' assert r[-1] == 2, 'range(10, 0, -2)[-1]' # Range starting from negative r = range(-5, 0) assert r[0] == -5, 'range(-5, 0)[0]' assert r[2] == -3, 'range(-5, 0)[2]' assert r[-1] == -1, 'range(-5, 0)[-1]' # Single element range r = range(42, 43) assert r[0] == 42, 'single element range[0]' assert r[-1] == 42, 'single element range[-1]' # Variable index r = range(100) i = 50 assert r[i] == 50, 'range getitem with variable index' # Bool indices (True=1, False=0) r = range(10, 15) assert r[False] == 10, 'range getitem with False' assert r[True] == 11, 'range getitem with True' # === Range containment ('in' operator) === # Basic containment assert 0 in range(5), '0 in range(5)' assert 4 in range(5), '4 in range(5)' assert 5 not in range(5), '5 not in range(5)' assert -1 not in range(5), '-1 not in range(5)' # Range with start assert 10 in range(10, 15), '10 in range(10, 15)' assert 14 in range(10, 15), '14 in range(10, 15)' assert 15 not in range(10, 15), '15 not in range(10, 15)' assert 9 not in range(10, 15), '9 not in range(10, 15)' # Range with step assert 0 in range(0, 10, 2), '0 in range(0, 10, 2)' assert 2 in range(0, 10, 2), '2 in range(0, 10, 2)' assert 8 in range(0, 10, 2), '8 in range(0, 10, 2)' assert 3 not in range(0, 10, 2), '3 not in range(0, 10, 2)' assert 10 not in range(0, 10, 2), '10 not in range(0, 10, 2)' # Range with negative step assert 10 in range(10, 0, -1), '10 in countdown' assert 1 in range(10, 0, -1), '1 in countdown' assert 0 not in range(10, 0, -1), '0 not in countdown' assert 11 not in range(10, 0, -1), '11 not in countdown' # Negative step with step > 1 assert 10 in range(10, 0, -2), '10 in range(10, 0, -2)' assert 8 in range(10, 0, -2), '8 in range(10, 0, -2)' assert 9 not in range(10, 0, -2), '9 not in range(10, 0, -2)' # Negative ranges assert -3 in range(-5, 0), '-3 in range(-5, 0)' assert 0 not in range(-5, 0), '0 not in range(-5, 0)' # Empty ranges assert 5 not in range(0), '5 not in empty range' assert 0 not in range(5, 5), '0 not in empty equal range' # Non-int types return False (no TypeError) assert 'a' not in range(5), 'string not in range' # Float containment (floats equal to integers are contained) assert 3.0 in range(5), '3.0 in range(5)' assert 0.0 in range(5), '0.0 in range(5)' assert 4.0 in range(5), '4.0 in range(5)' assert 3.5 not in range(5), '3.5 not in range(5)' assert 5.0 not in range(5), '5.0 not in range(5)' assert 2.0 in range(0, 10, 2), '2.0 in even range' assert 3.0 not in range(0, 10, 2), '3.0 not in even range' assert -1.0 not in range(5), '-1.0 not in range(5)' # Bool as container element (True=1, False=0 for comparison) assert True in range(5), 'True in range(5)' assert False in range(5), 'False in range(5)' assert True not in range(0), 'True not in empty range' ================================================ FILE: crates/monty/test_cases/re__basic.py ================================================ # Tests for the re (regular expression) module - basic functionality import re # === Constant === assert re.NOFLAG == 0, 're.NOFLAG == 0' assert re.I == re.IGNORECASE == 2, 're.I == re.IGNORECASE == 2' assert re.M == re.MULTILINE == 8, 're.M == re.MULTILINE == 8' assert re.S == re.DOTALL == 16, 're.S == re.DOTALL == 16' # === re.search() basic === m = re.search('hello', 'say hello world') assert m is not None, 're.search finds a match' assert m.group() == 'hello', 're.search group(0) returns matched text' assert m.group(0) == 'hello', 're.search group(0) explicit returns matched text' assert m.start() == 4, 're.search start() returns start position' assert m.end() == 9, 're.search end() returns end position' assert m.span() == (4, 9), 're.search span() returns (start, end) tuple' # === re.search() with no match === m = re.search('xyz', 'hello world') assert m is None, 're.search returns None when no match' # === re.search() with error === try: re.search('(', 'test') assert False, 're.search with invalid pattern should raise error' except re.PatternError as e: # The error message may vary based on the regex engine, but it should not be empty assert len(str(e)) > 0, 're.search with invalid pattern raises PatternError with message' # === re.match() === m = re.match('hello', 'hello world') assert m is not None, 're.match matches at start' assert m.group() == 'hello', 're.match group returns matched text' m = re.match('world', 'hello world') assert m is None, 're.match does not match in the middle' # === re.fullmatch() === m = re.fullmatch('hello', 'hello') assert m is not None, 're.fullmatch matches exact string' assert m.group() == 'hello', 're.fullmatch group returns full match' m = re.fullmatch('hello', 'hello world') assert m is None, 're.fullmatch does not match partial string' # === re.findall() with no groups === result = re.findall(r'\d+', 'a1 b22 c333') assert result == ['1', '22', '333'], 'findall without groups returns list of matches' # === re.findall() with no match === result = re.findall(r'\d+', 'no numbers') assert result == [], 'findall with no match returns empty list' # === re.sub() === result = re.sub(r'\d+', 'X', 'a1 b2 c3') assert result == 'aX bX cX', 're.sub replaces all matches' # === re.sub() with count === result = re.sub(r'\d+', 'X', 'a1 b2 c3', 1) assert result == 'aX b2 c3', 're.sub with count=1 replaces only first' result = re.sub(r'\d+', 'X', 'a1 b2 c3', 2) assert result == 'aX bX c3', 're.sub with count=2 replaces first two' # === re.compile() === pattern = re.compile(r'\d+') m = pattern.search('abc 123 def') assert m is not None, 'compiled pattern search finds match' assert m.group() == '123', 'compiled pattern match returns correct group' m = pattern.match('123 abc') assert m is not None, 'compiled pattern match at start' assert m.group() == '123', 'compiled pattern match group' m = pattern.match('abc 123') assert m is None, 'compiled pattern match does not match in middle' # === compiled pattern fullmatch === pattern = re.compile(r'\d+') m = pattern.fullmatch('123') assert m is not None, 'compiled pattern fullmatch on exact string' assert m.group() == '123', 'compiled pattern fullmatch group' m = pattern.fullmatch('123abc') assert m is None, 'compiled pattern fullmatch rejects partial match' # === compiled pattern findall === pattern = re.compile(r'\d+') result = pattern.findall('a1 b2 c3') assert result == ['1', '2', '3'], 'compiled pattern findall' # === compiled pattern sub === pattern = re.compile(r'\d+') result = pattern.sub('X', 'a1 b2 c3') assert result == 'aX bX cX', 'compiled pattern sub' result = pattern.sub('X', 'a1 b2 c3', 1) assert result == 'aX b2 c3', 'compiled pattern sub with count' # === Flags: IGNORECASE === pattern = re.compile(r'hello', re.IGNORECASE) m = pattern.search('Hello World') assert m is not None, 'IGNORECASE flag works' assert m.group() == 'Hello', 'IGNORECASE matches case-insensitively' # === Flags: DOTALL === pattern = re.compile(r'a.b', re.DOTALL) m = pattern.search('a\nb') assert m is not None, 'DOTALL flag allows dot to match newline' assert m.group() == 'a\nb', 'DOTALL matches newline with dot' # === Flags: MULTILINE === pattern = re.compile(r'^\w+', re.MULTILINE) result = pattern.findall('hello\nworld') assert result == ['hello', 'world'], 'MULTILINE allows ^ to match at line boundaries' # === Pattern attributes === pattern = re.compile(r'\d+', re.IGNORECASE) assert pattern.pattern == r'\d+', '.pattern returns the pattern string' # CPython flags include re.UNICODE (32) by default, so we check flags & 2 instead assert pattern.flags & re.IGNORECASE, '.flags includes IGNORECASE' # === Pattern repr === p = re.compile(r'\d+') assert repr(p) == r"re.compile('\\d+')", 'Pattern repr without flags' p = re.compile(r'\d+', re.IGNORECASE) assert repr(p) == r"re.compile('\\d+', re.IGNORECASE)", 'Pattern repr with IGNORECASE' # === Flag constants === assert re.IGNORECASE == 2, 'IGNORECASE flag value' assert re.MULTILINE == 8, 'MULTILINE flag value' assert re.DOTALL == 16, 'DOTALL flag value' # === Combined flags === pattern = re.compile(r'^hello', re.IGNORECASE | re.MULTILINE) result = pattern.findall('Hello\nhello\nHELLO') assert result == ['Hello', 'hello', 'HELLO'], 'Combined IGNORECASE | MULTILINE flags' # === More MULTILINE tests === # Without MULTILINE, ^ matches only start of string pattern = re.compile(r'^\w+') result = pattern.findall('line1\nline2\nline3') assert result == ['line1'], 'Without MULTILINE, ^ matches only start of string' # With MULTILINE, ^ matches each line start pattern = re.compile(r'^\w+', re.MULTILINE) result = pattern.findall('line1\nline2\nline3') assert result == ['line1', 'line2', 'line3'], 'With MULTILINE, ^ matches each line start' # Without MULTILINE, $ matches only end of string pattern = re.compile(r'\w+$') result = pattern.findall('line1\nline2\nline3') assert result == ['line3'], 'Without MULTILINE, $ matches only end of string' # With MULTILINE, $ matches each line end pattern = re.compile(r'\w+$', re.MULTILINE) result = pattern.findall('line1\nline2\nline3') assert result == ['line1', 'line2', 'line3'], 'With MULTILINE, $ matches each line end' # === More DOTALL tests === # Without DOTALL, . does not match newline pattern = re.compile(r'a.b') m = pattern.search('a\nb') assert m is None, 'Without DOTALL, . does not match newline' # With DOTALL, . matches newline pattern = re.compile(r'a.b', re.DOTALL) m = pattern.search('a\nb') assert m is not None, 'With DOTALL, . matches newline' assert m.group() == 'a\nb', 'DOTALL allows . to match newline' # DOTALL with multiple newlines pattern = re.compile(r'start.*end', re.DOTALL) m = pattern.search('start\nline1\nline2\nend') assert m is not None, 'DOTALL .* matches multiple newlines' assert m.group() == 'start\nline1\nline2\nend', 'DOTALL .* captures everything including newlines' # === Pattern repr with multiple flags (I, M, D order) === p = re.compile(r'test', re.IGNORECASE) assert repr(p) == r"re.compile('test', re.IGNORECASE)", 'Pattern repr with I flag' p = re.compile(r'test', re.MULTILINE) assert repr(p) == r"re.compile('test', re.MULTILINE)", 'Pattern repr with M flag' p = re.compile(r'test', re.DOTALL) assert repr(p) == r"re.compile('test', re.DOTALL)", 'Pattern repr with D flag' p = re.compile(r'test', re.IGNORECASE | re.MULTILINE) assert repr(p) == r"re.compile('test', re.IGNORECASE|re.MULTILINE)", 'Pattern repr with I|M flags' p = re.compile(r'test', re.IGNORECASE | re.DOTALL) assert repr(p) == r"re.compile('test', re.IGNORECASE|re.DOTALL)", 'Pattern repr with I|D flags' p = re.compile(r'test', re.MULTILINE | re.DOTALL) assert repr(p) == r"re.compile('test', re.MULTILINE|re.DOTALL)", 'Pattern repr with M|D flags' p = re.compile(r'test', re.IGNORECASE | re.MULTILINE | re.DOTALL) assert repr(p) == r"re.compile('test', re.IGNORECASE|re.MULTILINE|re.DOTALL)", 'Pattern repr with I|M|D flags' # === Combined IGNORECASE and DOTALL === pattern = re.compile(r'Hello.*World', re.IGNORECASE | re.DOTALL) m = pattern.search('HELLO\nmiddle\nWORLD') assert m is not None, 'Combined IGNORECASE|DOTALL finds match' assert m.group() == 'HELLO\nmiddle\nWORLD', 'IGNORECASE|DOTALL matches case-insensitively across newlines' # === Combined MULTILINE and DOTALL === pattern = re.compile(r'^a.*b$', re.MULTILINE | re.DOTALL) result = pattern.findall('a\nb\nc\nb') assert result == ['a\nb\nc\nb'], 'Combined MULTILINE|DOTALL with ^ and $ and .' # === All three flags combined === pattern = re.compile(r'^Hello.*World$', re.IGNORECASE | re.MULTILINE | re.DOTALL) m = pattern.search('first\nHELLO\nsome\nlines\nWORLD\nlast') assert m is not None, 'All three flags combined finds match' assert m.group() == 'HELLO\nsome\nlines\nWORLD', 'I|M|D flags work together' # === Empty pattern === m = re.search(r'', 'abc') assert m is not None, 'search with empty pattern finds match' assert m.start() == 0 and m.end() == 0, 'empty pattern matches at start of string' # === Zero-length matches === m = re.search(r'a*', 'bc') assert m is not None, 'search with zero-length match finds match' assert m.group() == '', 'zero-length match returns empty string' # === Object identity of compiled patterns === p1 = re.compile(r'\d+') p2 = re.compile(r'\d+') assert p1 == p2, 'separately compiled patterns with same pattern are equal' match1 = p1.search('123') match2 = p2.search('123') assert match1 != match2, 'matches from different pattern objects are distinct' # === re.sub() error: missing pattern === try: re.sub() assert False, 're.sub() with no args should raise TypeError' except TypeError as e: assert 'pattern' in str(e).lower(), 're.sub missing pattern error mentions pattern' # === re.sub() error: missing repl === try: re.sub(r'\d+') assert False, 're.sub(pattern) should raise TypeError' except TypeError as e: assert 'repl' in str(e).lower(), 're.sub missing repl error mentions repl' # === re.sub() error: missing string === try: re.sub(r'\d+', 'X') assert False, 're.sub(pattern, repl) should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 're.sub missing string error mentions string' # === re.sub() error: count is not an integer === try: re.sub(r'\d+', 'X', 'a1b2', 1.5) assert False, 're.sub with float count should raise TypeError' except TypeError as e: assert "'float' object cannot be interpreted as an integer" in str(e), 're.sub float count error' try: re.sub(r'\d+', 'X', 'a1b2', 'one') assert False, 're.sub with string count should raise TypeError' except TypeError as e: assert "'str' object cannot be interpreted as an integer" in str(e), 're.sub string count error' # === Pattern.sub() error: missing repl === pattern = re.compile(r'\d+') try: pattern.sub() assert False, 'Pattern.sub() with no args should raise TypeError' except TypeError as e: assert 'repl' in str(e).lower(), 'Pattern.sub missing repl error mentions repl' # === Pattern.sub() error: missing string === try: pattern.sub('X') assert False, 'Pattern.sub(repl) should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 'Pattern.sub missing string error mentions string' # === re.sub() with count=0 (replace all) === result = re.sub(r'\d', 'X', '1a2b3c', 0) assert result == 'XaXbXc', 're.sub with count=0 replaces all' # === re.sub() empty replacement === result = re.sub(r'\d+', '', 'a1 b2 c3') assert result == 'a b c', 're.sub with empty replacement removes matches' # === Pattern.sub() edge case: empty match === pattern = re.compile(r'a*') result = pattern.sub('X', 'bac') # Note: this might be a zero-width match behavior that's different assert 'X' in result, 'Pattern.sub handles zero-width matches' # === re.compile() error: invalid pattern === try: re.compile('(unclosed') assert False, 're.compile with invalid pattern should raise PatternError' except re.PatternError as e: assert len(str(e)) > 0, 're.compile invalid pattern raises PatternError' # === re.search() error: pattern is not a string === try: re.search(123, 'hello') assert False, 're.search with int pattern should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 're.search non-string pattern error' # === re.search() error: string is not a string === try: re.search(r'\d+', 123) assert False, 're.search with int string should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 're.search non-string string error' # === re.match() error: pattern is not a string === try: re.match(None, 'hello') assert False, 're.match with None pattern should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 're.match None pattern error' # === re.fullmatch() error: string is not a string === try: re.fullmatch(r'\d+', None) assert False, 're.fullmatch with None string should raise TypeError' except TypeError as e: assert 'string' in str(e).lower(), 're.fullmatch None string error' # === Object basic === assert bool(re.compile(r'\d+')) assert bool(re.search(r'\w+', 'hello')) assert isinstance(re.compile(r'\d+'), re.Pattern), 're.compile returns re.Pattern instance' assert isinstance(re.search(r'\w+', 'hello'), re.Match), 're.search returns re.Match instance' assert str(type(re.compile(r'\d+'))) == "", 'type of compiled pattern is re.Pattern' assert str(type(re.search(r'\w+', 'hello'))) == "", 'type of search match is re.Match' # === fullmatch with alternation === # fullmatch must try all alternatives to find a full-string match, # not just pick the first alternative that matches somewhere m = re.fullmatch('a|ab', 'ab') assert m is not None, 'fullmatch with alternation finds full match' assert m.group() == 'ab', 'fullmatch alternation matches full-string alternative' m = re.fullmatch('ab|a', 'ab') assert m is not None, 'fullmatch when full-match alternative is first' assert m.group() == 'ab', 'fullmatch returns correct match when first alt matches' m = re.fullmatch('cat|category', 'category') assert m is not None, 'fullmatch alternation picks full-string alternative' assert m.group() == 'category', 'fullmatch alternation returns correct match' m = re.fullmatch('x|ab|a', 'ab') assert m is not None, 'fullmatch with three alternatives' assert m.group() == 'ab', 'fullmatch picks correct alternative from three' # compiled pattern fullmatch with alternation p = re.compile('a|ab') m = p.fullmatch('ab') assert m is not None, 'compiled fullmatch with alternation finds full match' assert m.group() == 'ab', 'compiled fullmatch alternation matches correctly' # fullmatch with alternation and groups m = re.fullmatch('(a)|(ab)', 'ab') assert m is not None, 'fullmatch alternation with groups' assert m.group(0) == 'ab', 'fullmatch alternation groups: group(0) is full match' assert m.group(1) is None, 'fullmatch alternation groups: group(1) did not match' assert m.group(2) == 'ab', 'fullmatch alternation groups: group(2) matched' # fullmatch with quantifiers m = re.fullmatch('a+|b+', 'aaa') assert m is not None, 'fullmatch a+|b+ on aaa' assert m.group() == 'aaa', 'fullmatch a+|b+ returns full match' # fullmatch with .* (greedy) m = re.fullmatch('.*', 'anything') assert m is not None, 'fullmatch .* matches anything' assert m.group() == 'anything', 'fullmatch .* returns full string' # fullmatch on empty string with empty pattern m = re.fullmatch('', '') assert m is not None, 'fullmatch empty pattern on empty string' assert m.group() == '', 'fullmatch empty returns empty' # fullmatch should not match partial strings even with alternation m = re.fullmatch('a|ab', 'abc') assert m is None, 'fullmatch rejects when no alternative spans full string' # fullmatch with MULTILINE should still require full-string match p = re.compile('hello', re.MULTILINE) m = p.fullmatch('hello') assert m is not None, 'fullmatch MULTILINE on single line' assert m.group() == 'hello', 'fullmatch MULTILINE returns correct match' m = p.fullmatch('hello\nworld') assert m is None, 'fullmatch MULTILINE rejects multi-line input' # fullmatch with alternation and flags combined p = re.compile('(a+)|(b+)', re.MULTILINE) m = p.fullmatch('bbb') assert m is not None, 'fullmatch groups with MULTILINE flag' assert m.group(0) == 'bbb', 'fullmatch groups MULTILINE: group(0) correct' assert m.group(1) is None, 'fullmatch groups MULTILINE: group(1) did not match' assert m.group(2) == 'bbb', 'fullmatch groups MULTILINE: group(2) matched' # === Literal $ in replacement === result = re.sub(r'\d+', '$', 'a1b2') assert result == 'a$b$', 'literal $ in replacement is preserved' result = re.sub(r'\d+', '$1', 'a1b2') assert result == 'a$1b$1', 'literal $1 in replacement is preserved (not backreference)' result = re.sub(r'\d+', '$$', 'a1b2') assert result == 'a$$b$$', 'literal $$ in replacement is preserved' # compiled pattern with $ in replacement p = re.compile(r'\d+') result = p.sub('$', 'a1b2') assert result == 'a$b$', 'compiled pattern: literal $ in replacement is preserved' result = re.sub(r'\d+', '$$$', 'a1b2') assert result == 'a$$$b$$$', 'triple $ in replacement preserved' # plain replacement with no special chars result = re.sub(r'\d+', 'NUM', 'a1 b2') assert result == 'aNUM bNUM', 'plain replacement without special chars' # === Negative count in re.sub === result = re.sub(r'\d+', 'X', 'a1 b2 c3', -1) assert result == 'a1 b2 c3', 're.sub with negative count returns string unchanged' result = re.sub(r'\d+', 'X', 'a1 b2 c3', -100) assert result == 'a1 b2 c3', 're.sub with large negative count returns string unchanged' result = re.sub(r'\d+', 'X', 'a1 b2 c3', -999) assert result == 'a1 b2 c3', 're.sub with very large negative count returns string unchanged' # compiled pattern with negative count p = re.compile(r'\d+') result = p.sub('X', 'a1 b2 c3', -1) assert result == 'a1 b2 c3', 'compiled pattern: negative count returns string unchanged' result = p.sub('X', 'a1 b2 c3', -100) assert result == 'a1 b2 c3', 'compiled pattern: large negative count returns string unchanged' # negative count with empty string result = re.sub(r'\d+', 'X', '', -1) assert result == '', 're.sub negative count on empty string' # === re.sub with count boundary values === result = re.sub(r'\d+', 'X', 'a1 b2 c3', 0) assert result == 'aX bX cX', 're.sub count=0 replaces all (explicit)' result = re.sub(r'\d+', 'X', 'a1 b2 c3', 1) assert result == 'aX b2 c3', 're.sub count=1 replaces first only' result = re.sub(r'\d+', 'X', 'a1 b2 c3', 3) assert result == 'aX bX cX', 're.sub count=3 replaces all three' result = re.sub(r'\d+', 'X', 'a1 b2 c3', 100) assert result == 'aX bX cX', 're.sub count exceeding matches replaces all' # === Pattern.sub() error: too many arguments === p = re.compile(r'\d+') try: p.sub('X', 'a1b2', 0, 'extra') assert False, 'Pattern.sub with 4 args should raise TypeError' except TypeError as e: assert 'at most 3' in str(e), 'Pattern.sub too many args error' # === Flags on module-level functions === # re.search with flags m = re.search(r'hello', 'HELLO WORLD', re.IGNORECASE) assert m is not None, 're.search with IGNORECASE flag' assert m.group() == 'HELLO', 're.search IGNORECASE matches case-insensitively' m = re.search(r'hello', 'HELLO WORLD') assert m is None, 're.search without flags is case-sensitive' # re.match with flags m = re.match(r'hello', 'HELLO WORLD', re.IGNORECASE) assert m is not None, 're.match with IGNORECASE flag' assert m.group() == 'HELLO', 're.match IGNORECASE matches case-insensitively' # re.fullmatch with flags m = re.fullmatch(r'hello', 'HELLO', re.IGNORECASE) assert m is not None, 're.fullmatch with IGNORECASE flag' assert m.group() == 'HELLO', 're.fullmatch IGNORECASE matches case-insensitively' # re.findall with flags result = re.findall(r'hello', 'Hello HELLO hello', re.IGNORECASE) assert result == ['Hello', 'HELLO', 'hello'], 're.findall with IGNORECASE flag' # re.sub with flags (5th positional arg) result = re.sub(r'hello', 'X', 'Hello HELLO hello', 0, re.IGNORECASE) assert result == 'X X X', 're.sub with flags as 5th arg' # re.search with DOTALL flag m = re.search(r'a.b', 'a\nb', re.DOTALL) assert m is not None, 're.search with DOTALL flag' assert m.group() == 'a\nb', 're.search DOTALL matches across newlines' # re.findall with MULTILINE result = re.findall(r'^\w+', 'hello\nworld\nfoo', re.MULTILINE) assert result == ['hello', 'world', 'foo'], 're.findall with MULTILINE flag' # re.search with combined flags m = re.search(r'hello.*world', 'HELLO\nWORLD', re.IGNORECASE | re.DOTALL) assert m is not None, 're.search with IGNORECASE | DOTALL' assert m.group() == 'HELLO\nWORLD', 're.search combined flags work' # === re.ASCII flag === assert re.ASCII == 256, 're.ASCII flag value' assert re.A == re.ASCII, 're.A is alias for re.ASCII' # re.ASCII flag is accepted (doesn't error) p = re.compile(r'\w+', re.ASCII) m = p.search('cafe') assert m is not None, 'ASCII mode matches ASCII word chars' assert m.group() == 'cafe', 'ASCII mode returns correct match' # re.ASCII can be combined with other flags p = re.compile(r'hello', re.ASCII | re.IGNORECASE) m = p.search('HELLO') assert m is not None, 'ASCII | IGNORECASE combined' assert m.group() == 'HELLO', 'ASCII | IGNORECASE matches correctly' # Pattern repr with re.ASCII flag p = re.compile(r'\w+', re.ASCII) assert repr(p) == r"re.compile('\\w+', re.ASCII)", 'Pattern repr with ASCII flag' p = re.compile(r'\w+', re.ASCII | re.IGNORECASE) assert repr(p) == r"re.compile('\\w+', re.IGNORECASE|re.ASCII)", 'Pattern repr with ASCII|IGNORECASE flags' # re.ASCII on module-level functions m = re.search(r'\w+', 'cafe', re.ASCII) assert m is not None, 're.search with re.ASCII flag' assert m.group() == 'cafe', 're.search re.ASCII returns correct match' m = re.match(r'\w+', 'cafe', re.A) assert m is not None, 're.match with re.A alias' assert m.group() == 'cafe', 're.match re.A returns correct match' m = re.fullmatch(r'\w+', 'cafe', re.ASCII) assert m is not None, 're.fullmatch with re.ASCII flag' assert m.group() == 'cafe', 're.fullmatch re.ASCII returns correct match' result = re.findall(r'\w+', 'a b c', re.ASCII) assert result == ['a', 'b', 'c'], 're.findall with re.ASCII flag' # === match with alternation (anchored) === # re.match('b|ab', 'ab') must try alternation at position 0 m = re.match(r'b|ab', 'ab') assert m is not None, 're.match with alternation at start' assert m.group() == 'ab', 're.match alternation: second alt matches at pos 0' # re.match with alternation: first alt doesn't start at pos 0 m = re.match(r'world|hello', 'hello world') assert m is not None, 're.match alternation: finds match starting at pos 0' assert m.group() == 'hello', 're.match alternation: correct alternative matches' # compiled pattern match with alternation p = re.compile(r'b|ab') m = p.match('ab') assert m is not None, 'Pattern.match with alternation' assert m.group() == 'ab', 'Pattern.match alternation: second alt matches at pos 0' # match with alternation where shorter alt matches at pos 0 m = re.match(r'a|ab', 'ab') assert m is not None, 're.match alternation: shorter alt at pos 0' assert m.group() == 'a', 're.match alternation: leftmost match wins (like CPython)' # match with alternation + flags m = re.match(r'B|AB', 'ab', re.IGNORECASE) assert m is not None, 're.match alternation with IGNORECASE flag' assert m.group() == 'ab', 're.match alternation IGNORECASE: second alt matches at pos 0' # compiled match with alternation + flags p = re.compile(r'B|AB', re.IGNORECASE) m = p.match('ab') assert m is not None, 'Pattern.match alternation with IGNORECASE flag' assert m.group() == 'ab', 'Pattern.match alternation IGNORECASE matches correctly' # === \g numeric backreference in replacement === result = re.sub(r'(\w+)\s+(\w+)', r'\g<2> \g<1>', 'hello world') assert result == 'world hello', r'\g numeric backreference swaps groups' result = re.sub(r'(\w+)\s+(\w+)', r'\g<0>', 'hello world') assert result == 'hello world', r'\g<0> is the full match' result = re.sub(r'(\w+)', r'\g<1>!', 'hello world') assert result == 'hello! world!', r'\g<1> with suffix' # \g with multiple replacements in one string result = re.sub(r'(\w+)\s+(\w+)\s+(\w+)', r'\g<3>-\g<2>-\g<1>', 'a b c') assert result == 'c-b-a', r'\g multiple groups reversed' # \g mixed with \1 style backrefs result = re.sub(r'(\w+)\s+(\w+)', r'\1-\g<2>', 'hello world') assert result == 'hello-world', r'\1 and \g<2> mixed in replacement' # \g mixed with literal $ result = re.sub(r'(\w+)', r'$\g<1>$', 'hi') assert result == '$hi$', r'\g<1> with literal $ signs' # === \g named backreference in replacement === result = re.sub(r'(?P\w+)\s+(?P\w+)', r'\g \g', 'hello world') assert result == 'world hello', r'\g named backreference swaps groups' # \g on compiled pattern p = re.compile(r'(?P\w+)') result = p.sub(r'[\g]', 'hello world') assert result == '[hello] [world]', r'compiled pattern \g backreference' # \g mixed with \g result = re.sub(r'(?P\w+)\s+(\w+)', r'\g-\g<2>', 'hello world') assert result == 'hello-world', r'\g and \g mixed' # === \g combined with other replacement features === result = re.sub(r'(\w+)', r'[\g<1>]', 'hi') assert result == '[hi]', r'\g<1> with surrounding literal brackets' # compiled pattern with \g p = re.compile(r'(\w+)\s+(\w+)') result = p.sub(r'\g<2>-\g<1>', 'hello world') assert result == 'world-hello', r'compiled pattern \g backreference' # === Bool as int in re functions === # bool as flags (True=1, False=0) m = re.search(r'hello', 'HELLO', False) assert m is None, 'search flags=False (0) is case-sensitive' m = re.match(r'hello', 'HELLO', False) assert m is None, 'match flags=False is case-sensitive' m = re.fullmatch(r'hello', 'HELLO', False) assert m is None, 'fullmatch flags=False is case-sensitive' result = re.findall(r'hello', 'HELLO hello', False) assert result == ['hello'], 'findall flags=False is case-sensitive' p = re.compile(r'hello', False) assert p.flags & re.IGNORECASE == 0, 'compile with flags=False has no IGNORECASE' p = re.compile(r'hello', True) assert p.flags & 1 != 0, 'compile with flags=True stores 1' # bool as count in re.sub (True=1 replacement, False=0=all) result = re.sub(r'\d', 'X', '123', True) assert result == 'X23', 'count=True replaces only first match' result = re.sub(r'\d', 'X', '123', False) assert result == 'XXX', 'count=False (0) replaces all matches' # bool as count in Pattern.sub p = re.compile(r'\d') result = p.sub('X', '123', True) assert result == 'X23', 'Pattern.sub count=True replaces only first' result = p.sub('X', '123', False) assert result == 'XXX', 'Pattern.sub count=False replaces all' # === re.error alias (same as re.PatternError) === assert re.error is re.PatternError, 're.error is alias for re.PatternError' try: re.compile('(unclosed') assert False, 'should raise' except re.error as e: assert len(str(e)) > 0, 're.error catches PatternError' # === re.escape() === assert re.escape('hello') == 'hello', 're.escape leaves alphanumeric unchanged' assert re.escape('hello world!') == 'hello\\ world!', 're.escape escapes space but not !' assert re.escape('a.b+c*d?e') == 'a\\.b\\+c\\*d\\?e', 're.escape escapes regex metacharacters' assert re.escape('') == '', 're.escape on empty string' assert re.escape('[test]') == '\\[test\\]', 're.escape escapes brackets' assert re.escape('price: $10') == 'price:\\ \\$10', 're.escape escapes space and dollar' assert re.escape('a_b') == 'a_b', 're.escape preserves underscores' # re.escape result works as a literal pattern text = 'price is $10.00 (USD)' escaped = re.escape('$10.00') m = re.search(escaped, text) assert m is not None, 'escaped pattern matches literally' assert m.group() == '$10.00', 'escaped pattern matches the exact string' # === re.sub() with keyword arguments === result = re.sub(r'\d+', 'X', 'a1 b2 c3', count=1) assert result == 'aX b2 c3', 're.sub with count kwarg' result = re.sub(r'hello', 'X', 'Hello HELLO hello', count=0, flags=re.IGNORECASE) assert result == 'X X X', 're.sub with flags kwarg' # Pattern.sub with count kwarg p = re.compile(r'\d+') result = p.sub('X', 'a1 b2 c3', count=1) assert result == 'aX b2 c3', 'Pattern.sub with count kwarg' # === re.split() === result = re.split(r'\s+', 'hello world foo') assert result == ['hello', 'world', 'foo'], 're.split basic' result = re.split(r'[,;]', 'a,b;c') assert result == ['a', 'b', 'c'], 're.split on multiple delimiters' result = re.split(r'\s+', 'hello world foo', maxsplit=1) assert result == ['hello', 'world foo'], 're.split with maxsplit=1' result = re.split(r'\s+', 'hello') assert result == ['hello'], 're.split with no matches' result = re.split(r'\s+', '') assert result == [''], 're.split on empty string' # Pattern.split p = re.compile(r'[,;]') result = p.split('a,b;c') assert result == ['a', 'b', 'c'], 'Pattern.split basic' result = p.split('a,b;c', maxsplit=1) assert result == ['a', 'b;c'], 'Pattern.split with maxsplit kwarg' # === re.finditer() === matches = list(re.finditer(r'\d+', 'a1 b22 c333')) assert len(matches) == 3, 'finditer returns 3 matches' assert matches[0].group() == '1', 'finditer match 0' assert matches[1].group() == '22', 'finditer match 1' assert matches[2].group() == '333', 'finditer match 2' # finditer with no matches matches = list(re.finditer(r'\d+', 'no numbers')) assert len(matches) == 0, 'finditer with no matches returns empty' # finditer iteration groups = [m.group() for m in re.finditer(r'\w+', 'hello world')] assert groups == ['hello', 'world'], 'finditer in list comprehension' # Pattern.finditer p = re.compile(r'\d+') matches = list(p.finditer('a1 b22')) assert len(matches) == 2, 'Pattern.finditer returns 2 matches' assert matches[0].group() == '1', 'Pattern.finditer match 0' assert matches[1].group() == '22', 'Pattern.finditer match 1' # finditer with capture groups matches = list(re.finditer(r'(\w+)=(\w+)', 'a=1 b=2')) assert len(matches) == 2, 'finditer with groups returns 2 matches' assert matches[0].group(1) == 'a', 'finditer group 1 of match 0' assert matches[0].group(2) == '1', 'finditer group 2 of match 0' assert matches[1].group(1) == 'b', 'finditer group 1 of match 1' ================================================ FILE: crates/monty/test_cases/re__grouping.py ================================================ # Tests for the re (regular expression) module - capture groups and grouping import re # === Capture groups === m = re.search(r'(\w+)@(\w+)', 'user@host') assert m is not None, 're.search with groups finds a match' assert m.group(0) == 'user@host', 'group(0) is the full match' assert m.group(1) == 'user', 'group(1) is first capture' assert m.group(2) == 'host', 'group(2) is second capture' assert m.groups() == ('user', 'host'), 'groups() returns tuple of captures' # === group start/end/span with capture groups === m = re.search(r'(\w+)@(\w+)', 'email: user@host here') assert m is not None, 'search with groups finds match' assert m.start(0) == 7, 'start(0) is full match start' assert m.end(0) == 16, 'end(0) is full match end' assert m.start(1) == 7, 'start(1) is group 1 start' assert m.end(1) == 11, 'end(1) is group 1 end' assert m.span(1) == (7, 11), 'span(1) is group 1 span' assert m.start(2) == 12, 'start(2) is group 2 start' assert m.end(2) == 16, 'end(2) is group 2 end' assert m.span(2) == (12, 16), 'span(2) is group 2 span' # === re.findall() with one group === result = re.findall(r'(\d+)', 'a1 b22 c333') assert result == ['1', '22', '333'], 'findall with one group returns list of group strings' # === re.findall() with multiple groups === result = re.findall(r'(\w+)=(\w+)', 'a=1 b=2') assert result == [('a', '1'), ('b', '2')], 'findall with multiple groups returns list of tuples' # === No groups: groups() returns empty tuple === m = re.search(r'\d+', '42') assert m is not None, 'search with no groups finds match' assert m.groups() == (), 'groups() with no capture groups returns empty tuple' # === Backreferences === m = re.search(r'(\w+)\s+\1', 'hello hello') assert m is not None, 'backreference finds repeated word' assert m.group(0) == 'hello hello', 'backreference full match' assert m.group(1) == 'hello', 'backreference group' # === Invalid group index === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group(2) assert False, 'Accessing invalid group index should raise IndexError' except IndexError as e: assert str(e) == 'no such group' try: m.group('foo') assert False, 'Accessing group with non-integer index should raise IndexError' except IndexError as e: assert str(e) == 'no such group' # === re.sub() replacement with backreferences === result = re.sub(r'(\w+)=(\w+)', r'\2=\1', 'a=1 b=2') assert result == '1=a 2=b', 're.sub with backreferences swaps groups' # === Negative group index === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group(-1) assert False, 'Negative group index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'negative group raises IndexError with "no such group" message' # === Out-of-range group index === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group(999) assert False, 'Out-of-range group index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'out-of-range group raises IndexError with "no such group" message' # === Non-integer group argument: float === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group(1.5) assert False, 'Float group argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'float group arg raises IndexError' # === Non-integer group argument: string === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group('1') assert False, 'String group argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'string group arg raises IndexError' # === Non-integer group argument: None === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.group(None) assert False, 'None group argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'None group arg raises IndexError' # === Negative group index for start() === m = re.search(r'(\w+)@(\w+)', 'user@host') assert m is not None, 'search with groups finds match' try: m.start(-1) assert False, 'Negative start index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'negative start raises IndexError with "no such group" message' # === Out-of-range group index for start() === m = re.search(r'(\w+)@(\w+)', 'user@host') assert m is not None, 'search with groups finds match' try: m.start(999) assert False, 'Out-of-range start index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'out-of-range start raises IndexError' # === Non-integer argument for start() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.start(1.5) assert False, 'Float start argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'float start arg raises IndexError' # === Negative group index for end() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.end(-2) assert False, 'Negative end index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'negative end raises IndexError with "no such group" message' # === Out-of-range group index for end() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.end(100) assert False, 'Out-of-range end index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'out-of-range end raises IndexError' # === Non-integer argument for end() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.end('0') assert False, 'String end argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'string end arg raises IndexError' # === Negative group index for span() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.span(-1) assert False, 'Negative span index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'negative span raises IndexError with "no such group" message' # === Out-of-range group index for span() === m = re.search(r'(\w+)@(\w+)', 'user@host') assert m is not None, 'search with groups finds match' try: m.span(5) assert False, 'Out-of-range span index should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'out-of-range span raises IndexError' # === Non-integer argument for span() === m = re.search(r'(\w+)', 'hello') assert m is not None, 'search with group finds match' try: m.span(None) assert False, 'None span argument should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'None span arg raises IndexError' # === Accessing unmatched optional group returns None === # Optional groups that don't match return None instead of raising an error m = re.search(r'(\w+)?@(\w+)', '@host') assert m is not None, 'search with optional group finds match' assert m.group(1) is None, 'unmatched optional group returns None' assert m.start(1) == -1, 'start of unmatched optional group returns -1' assert m.end(1) == -1, 'end of unmatched optional group returns -1' assert m.span(1) == (-1, -1), 'span of unmatched optional group returns (-1, -1)' # === Named group access with m.group('name') === m = re.search(r'(?P\w+)\s+(?P\w+)', 'hello world') assert m is not None, 'named group search finds match' assert m.group('first') == 'hello', "group('first') returns first named group" assert m.group('second') == 'world', "group('second') returns second named group" assert m.group(1) == 'hello', 'named group is also accessible by index' assert m.group(2) == 'world', 'named group is also accessible by index' assert m.group(0) == 'hello world', 'group(0) still returns full match' # Named group with invalid name try: m.group('nonexistent') assert False, 'non-existent named group should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'non-existent named group error message' # === m.group() with multiple arguments === m = re.search(r'(\w+)\s+(\w+)\s+(\w+)', 'a b c') assert m is not None, 'multi-group search finds match' result = m.group(1, 2) assert result == ('a', 'b'), 'group(1, 2) returns tuple of two groups' result = m.group(1, 2, 3) assert result == ('a', 'b', 'c'), 'group(1, 2, 3) returns tuple of three groups' result = m.group(0, 1) assert result == ('a b c', 'a'), 'group(0, 1) includes full match' # === m.groupdict() === m = re.search(r'(?P\w+)\s+(?P\w+)', 'hello world') assert m is not None, 'named group search for groupdict' d = m.groupdict() assert d == {'first': 'hello', 'second': 'world'}, 'groupdict returns correct dict' # groupdict with no named groups m = re.search(r'(\w+)\s+(\w+)', 'hello world') assert m is not None, 'unnamed group search for groupdict' d = m.groupdict() assert d == {}, 'groupdict with no named groups returns empty dict' # groupdict with unmatched optional named group m = re.search(r'(?P\w+)?@(?P\w+)', '@host') assert m is not None, 'optional named group search for groupdict' d = m.groupdict() assert d == {'first': None, 'second': 'host'}, 'groupdict includes unmatched named groups as None' ================================================ FILE: crates/monty/test_cases/re__match.py ================================================ # Tests for the re (regular expression) module - Match object import re # === Match .string attribute === m = re.search('hello', 'say hello') assert m is not None, 'search finds match for .string test' assert m.string == 'say hello', '.string returns the input string' # === Match truthiness === m = re.search(r'\d+', '123') assert m, 'Match objects are truthy' # === Match repr === m = re.search(r'\d+', 'abc 42 def') assert repr(m) == "", 'Match repr' # === Object basic === assert bool(re.search(r'\w+', 'hello')) assert isinstance(re.search(r'\w+', 'hello'), re.Match), 're.search returns re.Match instance' assert str(type(re.search(r'\w+', 'hello'))) == "", 'type of search match is re.Match' # === Match equality - Match objects are not comparable === m1 = re.search(r'\w+', 'hello') m2 = re.search(r'\w+', 'hello') assert (m1 == m2) == False, 'different Match objects are not equal' assert m1 != m2, 'Match objects with same content are not equal' # === Match methods are reusable on same object === m = re.search(r'(\w+)@(\w+)', 'user@host') assert m is not None, 'search finds match' assert m.group(0) == 'user@host', 'first call to group(0) works' assert m.group(0) == 'user@host', 'second call to group(0) works' assert m.group(1) == 'user', 'call to group(1) works' assert m.start(1) == 0, 'start(1) works' assert m.end(1) == 4, 'end(1) works' assert m.span(0) == (0, 9), 'span(0) works' # === .string attribute is accessible multiple times === m = re.search(r'hello', 'say hello world') assert m is not None, 'search finds match' assert m.string == 'say hello world', 'first access to .string works' assert m.string == 'say hello world', 'second access to .string works' # === Match object with empty string === m = re.search(r'', 'hello') assert m is not None, 'empty pattern matches' assert m.string == 'hello', '.string returns input for empty match' assert m.group(0) == '', 'empty match group(0) is empty string' # === Match object from match() function === m = re.match(r'(\w+)', 'hello world') assert m is not None, 're.match finds match' assert m.group(0) == 'hello', 'match() returns correct match' assert m.start(0) == 0, 'match starts at position 0' assert m.string == 'hello world', '.string returns full input' # === Match object from fullmatch() function === m = re.fullmatch(r'\w+', 'hello') assert m is not None, 're.fullmatch finds exact match' assert m.group(0) == 'hello', 'fullmatch returns correct match' assert m.start(0) == 0, 'fullmatch starts at position 0' assert m.end(0) == 5, 'fullmatch ends at correct position' # === Match repr basic format === m = re.search(r'\d+', 'abc 42 def') assert repr(m) == "", 'Match repr basic format' m = re.search(r'\w+', 'hello') assert repr(m) == "", 'Match repr at start' m = re.search(r'', 'hello') assert repr(m) == "", 'Match repr empty match' # === Match repr with special characters === p = re.compile(r'a.b', re.DOTALL) m = p.search('a\nb') assert m is not None, 'DOTALL match for repr test' r = repr(m) assert r == "", 'Match repr escapes newline' m = re.search(r'a.b', 'a\tb') assert m is not None, 'tab match for repr test' r = repr(m) assert r == "", 'Match repr escapes tab' # backslash in matched text m = re.search(r'a.b', 'a\\b') assert m is not None, 'backslash match for repr test' r = repr(m) assert r == "", 'Match repr escapes backslash' # carriage return in matched text p = re.compile(r'a.b', re.DOTALL) m = p.search('a\rb') assert m is not None, 'carriage return match for repr test' r = repr(m) assert r == "", 'Match repr escapes carriage return' # single quote in matched text — repr switches to double quotes m = re.search(r'a.b', "a'b") assert m is not None, 'single quote match for repr test' r = repr(m) assert r == '', 'Match repr handles single quote' # double quote in matched text — repr uses single quotes m = re.search(r'a.b', 'a"b') assert m is not None, 'double quote match for repr test' r = repr(m) assert r == "", 'Match repr handles double quote' # === Pattern repr === p = re.compile('hello') assert repr(p) == "re.compile('hello')", 'Pattern repr simple string' p = re.compile(r'\n\t') assert repr(p) == "re.compile('\\\\n\\\\t')", 'Pattern repr with escape sequences in pattern' # === Bool as group index === m = re.search(r'(\w+)\s+(\w+)', 'hello world') assert m is not None, 'search for bool group test' assert m.group(True) == 'hello', 'group(True) is group(1)' assert m.group(False) == 'hello world', 'group(False) is group(0)' assert m.start(True) == 0, 'start(True) is start(1)' assert m.end(True) == 5, 'end(True) is end(1)' assert m.span(True) == (0, 5), 'span(True) is span(1)' assert m.span(False) == (0, 11), 'span(False) is span(0)' # === m[N] subscript access === m = re.search(r'(\w+)\s+(\w+)', 'hello world') assert m is not None, 'search for subscript test' assert m[0] == 'hello world', 'm[0] is full match' assert m[1] == 'hello', 'm[1] is first group' assert m[2] == 'world', 'm[2] is second group' # subscript with named groups m = re.search(r'(?P\w+)\s+(?P\w+)', 'hello world') assert m is not None, 'search for named subscript test' assert m['first'] == 'hello', "m['first'] accesses named group" assert m['second'] == 'world', "m['second'] accesses named group" assert m[1] == 'hello', 'm[1] also works with named groups' # subscript with invalid index try: m[99] assert False, 'out-of-range subscript should raise IndexError' except IndexError as e: assert str(e) == 'no such group', 'subscript IndexError message' ================================================ FILE: crates/monty/test_cases/recursion__deep_drop.py ================================================ # Test that dropping deeply nested containers doesn't crash (stack overflow). # Heap::dec_ref recurses in Rust when freeing child references, so deeply # nested containers can overflow the Rust call stack during cleanup. # CPython handles this fine (its dealloc uses an iterative trashcan mechanism). # Once fixed (iterative dec_ref), this should work without crashing. # === Deep list drop === x = [1] for _ in range(10000): x = [x] x = None # triggers recursive dec_ref chain assert True, 'survived deep list drop' # === Deep tuple drop === y = (1,) for _ in range(10000): y = (y,) y = None # triggers recursive dec_ref chain assert True, 'survived deep tuple drop' # === Deep dict drop === z = {'a': 1} for _ in range(10000): z = {'a': z} z = None # triggers recursive dec_ref chain assert True, 'survived deep dict drop' ================================================ FILE: crates/monty/test_cases/recursion__deep_eq.py ================================================ # Test that deeply nested lists don't crash during equality comparison # Monty raises RecursionError at depth limit, CPython handles in C code a = [] b = [] for _ in range(30): # Use lower depth that works within unified recursion limit a = [a] b = [b] # Should not crash result = a == b assert isinstance(result, bool), 'comparison should return a bool' assert result == True, 'structurally equal nested lists should be equal' # Test non-equal nested lists c = [] for _ in range(30): c = [c] c = [1] # Make the innermost different for _ in range(29): c = [c] result2 = a == c assert result2 == False, 'structurally different nested lists should not be equal' ================================================ FILE: crates/monty/test_cases/recursion__deep_hash.py ================================================ # Test that hashing deeply nested containers raises RecursionError instead # of crashing with a Rust stack overflow. # === Deep tuple hash === x = (1,) for _ in range(10000): x = (x,) try: h = hash(x) assert isinstance(h, int), 'hash should return an int' except RecursionError: pass # acceptable if depth guard triggers # === Deep frozenset hash === y = frozenset({1}) for _ in range(10000): y = frozenset({y}) try: h = hash(y) assert isinstance(h, int), 'hash should return an int' except RecursionError: pass # acceptable if depth guard triggers # === Deep tuple as dict key (triggers hash) === z = (1,) for _ in range(10000): z = (z,) d = {} try: d[z] = 'value' except RecursionError: pass # acceptable if depth guard triggers # === Deep tuple as set element (triggers hash) === w = (1,) for _ in range(10000): w = (w,) s = set() try: s.add(w) except RecursionError: pass # acceptable if depth guard triggers ================================================ FILE: crates/monty/test_cases/recursion__deep_repr.py ================================================ # Test that deeply nested lists don't crash during repr() # Monty truncates with "..." at depth limit, CPython handles in C code x = [] for _ in range(200): x = [x] # Should not crash - either returns full repr or truncated with "..." result = repr(x) assert isinstance(result, str), 'repr should return a string' assert result.startswith('['), 'repr should start with [' # Either full repr or truncated assert result.endswith(']') or '...' in result, 'repr should end with ] or contain ...' ================================================ FILE: crates/monty/test_cases/recursion__function_depth.py ================================================ # Test that recursive function calls hit the recursion limit # This uses Python function call recursion which both CPython and Monty limit def recurse(n): if n == 0: return 0 return recurse(n - 1) + 1 # This should raise RecursionError in both interpreters recurse(2000) # Raise=RecursionError('maximum recursion depth exceeded') ================================================ FILE: crates/monty/test_cases/refcount__cycle_mutual_reference.py ================================================ # Mutual reference cycle: a contains b, b contains a # This creates a cycle where: # - a has refcount 2 (variable 'a' + being inside b) # - b has refcount 2 (variable 'b' + being inside a) # Without cycle detection, when both variables go out of scope: # - a's refcount drops to 1 (still in b) # - b's refcount drops to 1 (still in a) # - Neither reaches 0, neither is freed (memory leak) # # NOTE: We return len(b) instead of b because repr(b) would cause infinite # recursion / stack overflow (a separate bug - Python handles this by printing [...] # for cyclic references) a = [] b = [] a.append(b) b.append(a) len(b) # ref-counts={'a': 2, 'b': 2} ================================================ FILE: crates/monty/test_cases/refcount__cycle_self_reference.py ================================================ # Self-referential list: a contains itself # This creates a cycle where a's refcount is 2 (variable + self-reference) # Without cycle detection, when 'a' goes out of scope, refcount drops to 1 # but the object is never freed (memory leak) # # NOTE: We return len(a) instead of a because repr(a) would cause infinite # recursion / stack overflow (a separate bug - Python handles this by printing [...] # for cyclic references) a = [] a.append(a) len(a) # ref-counts={'a': 2} ================================================ FILE: crates/monty/test_cases/refcount__dict_basic.py ================================================ k = 'key' v = [1, 2] d = {k: v} d # ref-counts={'v': 2, 'd': 2} ================================================ FILE: crates/monty/test_cases/refcount__dict_get.py ================================================ v = [1, 2] d = {0: v} x = d[0] x # ref-counts={'v': 4, 'd': 1, 'x': 4} ================================================ FILE: crates/monty/test_cases/refcount__dict_keys_and.py ================================================ inner = ('x',) wrapped = (inner,) empty = [] rhs = [wrapped, empty] try: {}.keys() & rhs assert False, 'dict_keys intersection should reject unhashable iterable items' except TypeError as e: assert str(e) == "cannot use 'list' as a set element (unhashable type: 'list')", ( 'dict_keys intersection should surface the recoverable set-element hash error' ) # ref-counts={'inner': 2, 'wrapped': 2, 'empty': 2, 'rhs': 1} ================================================ FILE: crates/monty/test_cases/refcount__dict_overwrite.py ================================================ v1 = [1] v2 = [2] d = {0: v1} d[0] = v2 d # ref-counts={'v1': 1, 'v2': 2, 'd': 2} ================================================ FILE: crates/monty/test_cases/refcount__gather_cleanup.py ================================================ # Test that GatherFuture and coroutines are properly cleaned up after gather completes. # The strict matching check will fail if the GatherFuture leaks (heap_count > unique_refs). import asyncio async def task1(): return 1 async def task2(): return 2 result = await asyncio.gather(task1(), task2()) # pyright: ignore result # ref-counts={'result': 2, 'asyncio': 1} ================================================ FILE: crates/monty/test_cases/refcount__gather_exception.py ================================================ # Test that GatherFuture and coroutines are properly cleaned up when a task raises. # When one task fails, sibling tasks should be cancelled and all resources freed. import asyncio async def task_ok(): return 1 async def task_fail(): raise ValueError('task failed') try: result = await asyncio.gather(task_ok(), task_fail()) # pyright: ignore except ValueError: pass # ref-counts={'asyncio': 1} ================================================ FILE: crates/monty/test_cases/refcount__gather_nested_cancel.py ================================================ # Test that nested GatherFuture is properly cleaned up when outer task is cancelled. # When one task in an outer gather fails, sibling tasks (including those with inner gathers) # should be cancelled and all GatherFutures properly cleaned up. import asyncio async def inner_task(): return 1 async def task_with_inner_gather(): # This inner gather should be cancelled when the outer gather fails result = await asyncio.gather(inner_task(), inner_task()) return result async def task_fail(): raise ValueError('outer task failed') try: result = await asyncio.gather(task_with_inner_gather(), task_fail()) # pyright: ignore except ValueError: pass # ref-counts={'asyncio': 1} ================================================ FILE: crates/monty/test_cases/refcount__immediate_skipped.py ================================================ x = 42 y = [1, 2] y # ref-counts={'y': 2} ================================================ FILE: crates/monty/test_cases/refcount__keyword_only_kwarg_arity_errors.py ================================================ # Tests cleanup when keyword-only parsers reject calls before consuming kwargs. # # Shared kwarg parsing helpers are safe only if callers guard owned kwargs before # any early arity/type errors. These handled exceptions should not leak the # heap-backed kwarg values. sorted_key = ['sorted-key'] sort_key = ['list-sort-key'] items = [3, 2, 1] try: sorted(key=sorted_key) assert False, 'sorted() with no positional args should raise TypeError' except TypeError as e: assert e.args == ('sorted expected 1 argument, got 0',), 'sorted() arity error should match CPython' try: items.sort(1, key=sort_key) assert False, 'list.sort() should reject positional args before consuming kwargs' except TypeError: pass # The handled exceptions above must not retain references to the kwarg payloads. # ref-counts={'sorted_key': 1, 'sort_key': 1, 'items': 1} ================================================ FILE: crates/monty/test_cases/refcount__kwargs_unpacking.py ================================================ # Tests reference counting correctness for **kwargs unpacking def receive_kwargs(a, b, c): return a # === Heap-allocated values in unpacked dict === # All heap objects must be directly referenced by variables for strict matching list_a = [1, 2, 3] list_c = [4, 5] kwargs_dict = {'a': list_a, 'b': 'hello', 'c': list_c} result = receive_kwargs(**kwargs_dict) assert result == [1, 2, 3], 'received list via **kwargs' assert result is list_a, 'should be same object' # Second call to verify dict reuse works result2 = receive_kwargs(**kwargs_dict) assert result2 is list_a, 'second call returns same object' # list_a: 5 refs (list_a var, kwargs_dict['a'], result, result2, final expr) # list_c: 2 refs (list_c var, kwargs_dict['c']) # kwargs_dict: 1 ref # result: 5 refs (same object as list_a) # result2: 5 refs (same object as list_a) result2 # ref-counts={'list_a': 5, 'list_c': 2, 'kwargs_dict': 1, 'result': 5, 'result2': 5} ================================================ FILE: crates/monty/test_cases/refcount__list_append_multiple.py ================================================ item = [1] lst = [] lst.append(item) lst.append(item) lst # ref-counts={'item': 3, 'lst': 2} ================================================ FILE: crates/monty/test_cases/refcount__list_append_ref.py ================================================ item = [1] lst = [] lst.append(item) lst # ref-counts={'item': 2, 'lst': 2} ================================================ FILE: crates/monty/test_cases/refcount__list_concat.py ================================================ a = [1] b = [2] c = a + b c # ref-counts={'a': 1, 'b': 1, 'c': 2} ================================================ FILE: crates/monty/test_cases/refcount__list_getitem.py ================================================ item = [1, 2] lst = [item] x = lst[0] x # ref-counts={'item': 4, 'lst': 1, 'x': 4} ================================================ FILE: crates/monty/test_cases/refcount__list_iadd.py ================================================ a = [1] b = [2] a += b a # ref-counts={'a': 2, 'b': 1} ================================================ FILE: crates/monty/test_cases/refcount__min_max_key_error_paths.py ================================================ # Tests reference counting when min()/max() key functions raise on the first item. # # Both the iterable form and the multiple-argument form must guard the initial # candidate before calling the user-provided key function. Otherwise the first # winner leaks when key evaluation raises before the comparison loop starts. def raising_key(value): raise ValueError('boom') item_iter = ['iter'] item_multi = ['multi'] other_multi = ['other'] try: max([item_iter], key=raising_key) assert False, 'max(iterable, key=raising_key) should raise ValueError' except ValueError as e: assert e.args == ('boom',), 'max iterable key error should propagate unchanged' try: min(item_multi, other_multi, key=raising_key) assert False, 'min(arg1, arg2, key=raising_key) should raise ValueError' except ValueError as e: assert e.args == ('boom',), 'min multi-arg key error should propagate unchanged' # The temporary argument container for max() and the current winner slots in # both builtin code paths must be released after the handled exception. # ref-counts={'item_iter': 1, 'item_multi': 1, 'other_multi': 1} ================================================ FILE: crates/monty/test_cases/refcount__nested_list.py ================================================ inner = [1, 2] outer = [inner, inner] outer # ref-counts={'inner': 3, 'outer': 2} ================================================ FILE: crates/monty/test_cases/refcount__re_pattern_sub_error_paths.py ================================================ # Tests reference counting on Pattern.sub error paths. # # The positional arg iterator and extra args must be properly dropped even # when Pattern.sub raises due to too many args or a bad count type. # These paths previously leaked because pos.next().is_some() consumed a # Value without dropping it. import re # Use lists as heap-allocated values we can track repl_list = ['replacement'] input_list = ['the input'] p = re.compile('hello') # Exercise error path: too many positional arguments try: p.sub('repl', 'string', 0, 'extra') except TypeError: pass # Exercise error path: bad count type try: p.sub('repl', 'string', 'bad') except TypeError: pass # Exercise negative count path (early return) result = p.sub('repl', 'hello', -1) assert result == 'hello', 'negative count returns input unchanged' # repl_list: 1 (variable) # input_list: 1 (variable) # p: 1 (variable) # re: 1 (module) # result: 2 (variable + final expression) result # ref-counts={'repl_list': 1, 'input_list': 1, 'p': 1, 're': 1, 'result': 2} ================================================ FILE: crates/monty/test_cases/refcount__re_search_match.py ================================================ # Tests reference counting for re.search, re.match, and re.fullmatch. # # Verifies that Match objects, Pattern objects, and intermediate strings # are correctly reference-counted through normal usage paths. # All heap objects must be directly referenced by variables for strict matching. import re # Compile a pattern and run search — both pattern and match stay alive p = re.compile(r'(\w+)') m = p.search('hello world') assert m is not None, 'search finds match' group_str = m.group(0) assert group_str == 'hello', 'group(0) returns matched text' # Run fullmatch — exercises the compiled_fullmatch regex path m2 = p.fullmatch('hello') assert m2 is not None, 'fullmatch finds match' full_str = m2.group(0) assert full_str == 'hello', 'fullmatch group(0) returns matched text' # findall returns a list — keep individual elements in variables # so strict matching passes (all heap objects must be reachable) results = p.findall('a b c') assert results == ['a', 'b', 'c'], 'findall returns list of matches' r0 = results[0] r1 = results[1] r2 = results[2] # p: 1, m: 1, group_str: 1, m2: 1, full_str: 1 # results: 1, r0: 2 (var + list), r1: 2 (var + list), r2: 2 (var + list + final expr) # re: 1 r2 # ref-counts={'p': 1, 'm': 1, 'group_str': 1, 'm2': 1, 'full_str': 1, 'results': 1, 'r0': 2, 'r1': 2, 'r2': 3, 're': 1} ================================================ FILE: crates/monty/test_cases/refcount__re_sub_error_paths.py ================================================ # Tests reference counting on re.sub error paths. # # The positional arg iterator and extra args must be properly dropped even # when re.sub raises due to too many args or a bad count type. # These paths previously leaked because pos.next().is_some() consumed a # Value without dropping it, and the pos iterator itself was unguarded. import re # Use lists as heap-allocated values that we can track through error paths. # String literals may be interned and won't show up in heap ref counts. repl_list = ['replacement'] input_list = ['the input'] # Exercise error path: bad count type with heap-allocated args in scope try: re.sub('pattern', 'repl', 'input', 'bad') except TypeError: pass # Exercise negative count path (early return, no regex compilation) result = re.sub('pattern', 'repl', 'hello', -1) assert result == 'hello', 'negative count returns input unchanged' # All lists should still be alive and reachable # repl_list: 1 (variable) # input_list: 1 (variable) # re: 1 (module) # result: 2 (variable + final expression) result # ref-counts={'repl_list': 1, 'input_list': 1, 're': 1, 'result': 2} ================================================ FILE: crates/monty/test_cases/refcount__shared_reference.py ================================================ x = [1, 2, 3] y = x x # ref-counts={'x': 3, 'y': 3} ================================================ FILE: crates/monty/test_cases/refcount__single_list.py ================================================ x = [1, 2, 3] x # ref-counts={'x': 2} ================================================ FILE: crates/monty/test_cases/repr__cycle_detection.py ================================================ # Test cycle detection in repr for self-referential structures # Section 1: List self-reference a = [] a.append(a) assert repr(a) == '[[...]]', 'list self-reference' # Section 2: Dict self-reference d = {} d['self'] = d assert repr(d) == "{'self': {...}}", 'dict self-reference' # Section 3: Composite - list containing dict containing original list c = [] e = {'list': c} c.append(e) assert repr(c) == "[{'list': [...]}]", 'list containing dict cycle' assert repr(e) == "{'list': [{...}]}", 'dict containing list cycle' # Section 4: Multiple references to same cyclic object f = [] f.append(f) g = [f, f] assert repr(g) == '[[[...]], [[...]]]', 'multiple refs to cyclic list' ================================================ FILE: crates/monty/test_cases/set__ops.py ================================================ # === Construction === s = set() assert len(s) == 0, 'empty set len' assert s == set(), 'empty set equality' s = set([1, 2, 3]) assert len(s) == 3, 'set from list len' # === Basic Methods === s = set() s.add(1) s.add(2) s.add(1) # duplicate assert len(s) == 2, 'add with duplicate' # === Discard and Remove === s = set([1, 2, 3]) s.discard(2) assert len(s) == 2, 'discard existing' s.discard(99) # should not raise assert len(s) == 2, 'discard non-existing' # === Pop === s = set([1]) v = s.pop() assert v == 1, 'pop returns element' assert len(s) == 0, 'pop removes element' # === Clear === s = set([1, 2, 3]) s.clear() assert len(s) == 0, 'clear' # === Copy === s = set([1, 2, 3]) s2 = s.copy() assert s == s2, 'copy equality' s.add(4) assert s != s2, 'copy is independent' # === Update === s = set([1, 2]) s.update([2, 3, 4]) assert len(s) == 4, 'update with list' # === Union === s1 = set([1, 2]) s2 = set([2, 3]) u = s1.union(s2) assert len(u) == 3, 'union len' # === Intersection === s1 = set([1, 2, 3]) s2 = set([2, 3, 4]) i = s1.intersection(s2) assert len(i) == 2, 'intersection len' # === Difference === s1 = set([1, 2, 3]) s2 = set([2, 3, 4]) d = s1.difference(s2) assert len(d) == 1, 'difference len' # === Symmetric Difference === s1 = set([1, 2, 3]) s2 = set([2, 3, 4]) sd = s1.symmetric_difference(s2) assert len(sd) == 2, 'symmetric_difference len' # === Binary operators === s = {1, 2} t = {2, 3} fs = frozenset([2, 3]) assert s & t == {2}, 'set & set works' assert s | t == {1, 2, 3}, 'set | set works' assert s ^ t == {1, 3}, 'set ^ set works' assert s - t == {1}, 'set - set works' assert s & fs == {2}, 'set & frozenset works' assert s | fs == {1, 2, 3}, 'set | frozenset works' assert s ^ fs == {1, 3}, 'set ^ frozenset works' assert s - fs == {1}, 'set - frozenset works' keys = {'a': 1, 'b': 2}.keys() items = {'a': 1, 'b': 2}.items() assert {'a'} & keys == {'a'}, 'set & dict_keys works' assert {'a'} | keys == {'a', 'b'}, 'set | dict_keys works' assert {('a', 1)} ^ items == {('b', 2)}, 'set ^ dict_items works' assert {('a', 1), ('b', 2)} - items == set(), 'set - dict_items works' assert type(s & fs).__name__ == 'set', 'set operators keep the left operand type' try: s & [1, 2] assert False, 'set operators reject non-set rhs' except TypeError as e: assert str(e) == "unsupported operand type(s) for &: 'set' and 'list'", 'set & rhs error matches CPython' try: s | [1, 2] assert False, 'set union operator rejects non-set rhs' except TypeError as e: assert str(e) == "unsupported operand type(s) for |: 'set' and 'list'", 'set | rhs error matches CPython' try: s ^ [1, 2] assert False, 'set xor operator rejects non-set rhs' except TypeError as e: assert str(e) == "unsupported operand type(s) for ^: 'set' and 'list'", 'set ^ rhs error matches CPython' try: s - [1, 2] assert False, 'set subtraction operator rejects non-set rhs' except TypeError as e: assert str(e) == "unsupported operand type(s) for -: 'set' and 'list'", 'set - rhs error matches CPython' # === Issubset === s1 = set([1, 2]) s2 = set([1, 2, 3]) assert s1.issubset(s2) == True, 'issubset true' assert s2.issubset(s1) == False, 'issubset false' # === Issuperset === s1 = set([1, 2, 3]) s2 = set([1, 2]) assert s1.issuperset(s2) == True, 'issuperset true' assert s2.issuperset(s1) == False, 'issuperset false' # === Isdisjoint === s1 = set([1, 2]) s2 = set([3, 4]) s3 = set([2, 3]) assert s1.isdisjoint(s2) == True, 'isdisjoint true' assert s1.isdisjoint(s3) == False, 'isdisjoint false' # === Bool === assert bool(set()) == False, 'empty set is falsy' assert bool(set([1])) == True, 'non-empty set is truthy' # === repr === assert repr(set()) == 'set()', 'empty set repr' # === Set literals === s = {1, 2, 3} assert len(s) == 3, 'set literal len' s = {1, 1, 2, 2, 3} assert len(s) == 3, 'set literal deduplication' # Set literal with expressions x = 5 s = {x, x + 1, x + 2} assert len(s) == 3, 'set literal with expressions' # === Set unpacking (PEP 448) === a = [1, 2] b = [3, 4] assert {*a} == {1, 2}, 'single set unpack from list' assert {*a, *b} == {1, 2, 3, 4}, 'double set unpack' assert {0, *a, 5} == {0, 1, 2, 5}, 'mixed set unpack' assert {*[]} == set(), 'unpack empty into set' assert {*(1, 2)} == {1, 2}, 'unpack tuple into set' assert {*{'a': 1, 'b': 2}} == {'a', 'b'}, 'unpack dict keys into set' assert {*'aab'} == {'a', 'b'}, 'unpack string into set' # Heap-allocated set: covers the HeapData::Set arm in set_extend inner_set = {1, 2, 3} assert {*inner_set} == {1, 2, 3}, 'unpack set into set' # Heap-allocated Str (result of concat, not interned): covers HeapData::Str in set_extend hs = 'hel' + 'lo' assert {*hs} == {'h', 'e', 'l', 'o'}, 'unpack heap string into set' # Non-iterable heap-allocated Ref (closure) hits the inner `_` arm in set_extend. # A plain top-level function is Value::DefFunction (not a Ref), so a closure is # required to reach the Value::Ref(_) branch (HeapData that is not List/Tuple/Set/Dict/Str). def _make_set_unpack_closure(): _sentinel = 1 def _inner(): return _sentinel return _inner _set_unpack_closure = _make_set_unpack_closure() try: _x = {*_set_unpack_closure} assert False, 'expected TypeError for non-iterable heap closure in set unpack' except TypeError: pass ================================================ FILE: crates/monty/test_cases/set__review_bugs.py ================================================ # Tests for review issues # === frozenset repr for non-empty sets === # frozenset repr should show "frozenset({...})" not just "{...}" fs_repr = repr(frozenset({1, 2})) assert fs_repr == 'frozenset({1, 2})' or fs_repr == 'frozenset({2, 1})', 'frozenset repr should include type name' assert repr(frozenset()) == 'frozenset()', 'empty frozenset repr' # set repr should NOT have type prefix s_repr = repr({1, 2}) assert s_repr == '{1, 2}' or s_repr == '{2, 1}', 'set repr should not have prefix' # === issubset with range (non-Ref iterable) === # These should work, not raise TypeError s = {1, 2, 3} assert s.issubset(range(10)), 'issubset should accept range' assert s.issuperset(range(1, 3)), 'issuperset should accept range' assert s.isdisjoint(range(10, 20)), 'isdisjoint should accept range' # === set construction with nested heap objects === # This tests ref counting - if refs are dropped before incrementing, this will fail t = (1, 2) s = set([t]) assert len(s) == 1, 'set should have one element' assert repr(s) == '{(1, 2)}', 'set repr should not have prefix' # More complex case - the list is temporary and will be dropped s2 = set([(3, 4)]) assert len(s2) == 1, 'set from temp list should have one element' assert repr(s2) == '{(3, 4)}', 'set repr should not have prefix' # frozenset with nested objects fs = frozenset([(5, 6)]) assert len(fs) == 1, 'frozenset from temp list should have one element' assert repr(fs) == 'frozenset({(5, 6)})', 'frozenset repr should not have prefix' ================================================ FILE: crates/monty/test_cases/set__unpack_type_error.py ================================================ {*42} # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/slice__invalid_indices.py ================================================ [1, 2, 3]['a':'b'] # Raise=TypeError('slice indices must be integers or None or have an __index__ method') ================================================ FILE: crates/monty/test_cases/slice__kwargs.py ================================================ slice(stop=5) """ TRACEBACK: Traceback (most recent call last): File "slice__kwargs.py", line 1, in slice(stop=5) ~~~~~~~~~~~~~ TypeError: slice() takes no keyword arguments """ ================================================ FILE: crates/monty/test_cases/slice__no_args.py ================================================ slice() """ TRACEBACK: Traceback (most recent call last): File "slice__no_args.py", line 1, in slice() ~~~~~~~ TypeError: slice expected at least 1 argument, got 0 """ ================================================ FILE: crates/monty/test_cases/slice__ops.py ================================================ # === Basic list slicing === lst = [0, 1, 2, 3, 4, 5] assert lst[1:4] == [1, 2, 3], 'basic list slice' assert lst[:3] == [0, 1, 2], 'list slice from start' assert lst[3:] == [3, 4, 5], 'list slice to end' assert lst[:] == [0, 1, 2, 3, 4, 5], 'list full slice' # === Negative indices === assert lst[-3:] == [3, 4, 5], 'list slice negative start' assert lst[:-2] == [0, 1, 2, 3], 'list slice negative stop' assert lst[-4:-1] == [2, 3, 4], 'list slice both negative' # === Step === assert lst[::2] == [0, 2, 4], 'list slice with step' assert lst[1::2] == [1, 3, 5], 'list slice with start and step' assert lst[::-1] == [5, 4, 3, 2, 1, 0], 'list reverse slice' assert lst[4:1:-1] == [4, 3, 2], 'list negative step with bounds' assert lst[::3] == [0, 3], 'list slice step of 3' # === Out of bounds (clamped) === assert lst[10:20] == [], 'list out of bounds high' assert lst[-100:2] == [0, 1], 'list out of bounds low' assert lst[2:100] == [2, 3, 4, 5], 'list stop beyond length' # === Empty results === assert lst[3:1] == [], 'list empty slice start > stop' assert lst[3:3] == [], 'list empty slice start == stop' # === String slicing === s = 'hello' assert s[1:4] == 'ell', 'string slice basic' assert s[:3] == 'hel', 'string slice from start' assert s[3:] == 'lo', 'string slice to end' assert s[:] == 'hello', 'string full slice' assert s[::-1] == 'olleh', 'string reverse' assert s[::2] == 'hlo', 'string slice with step' # === Unicode string slicing === u = 'cafe' assert u[1:3] == 'af', 'unicode slice basic' assert u[::-1] == 'efac', 'unicode reverse' # === Tuple slicing === t = (0, 1, 2, 3, 4) assert t[1:4] == (1, 2, 3), 'tuple slice basic' assert t[::-1] == (4, 3, 2, 1, 0), 'tuple reverse' assert t[::2] == (0, 2, 4), 'tuple slice with step' # === Bytes slicing === b = b'\x00\x01\x02\x03\x04' assert b[1:4] == b'\x01\x02\x03', 'bytes slice basic' assert b[::-1] == b'\x04\x03\x02\x01\x00', 'bytes reverse' assert b[::2] == b'\x00\x02\x04', 'bytes slice with step' # === Range slicing === r = range(10) assert r[2:5] == range(2, 5), 'range slice basic' assert r[::2] == range(0, 10, 2), 'range slice with step' r2 = range(0, 10, 2) assert r2[1:4] == range(2, 8, 2), 'stepped range slice' # === slice() builtin === s1 = slice(3) assert s1.start is None, 'slice stop only - start is None' assert s1.stop == 3, 'slice stop only - stop is 3' assert s1.step is None, 'slice stop only - step is None' s2 = slice(1, 4) assert s2.start == 1, 'slice start stop - start is 1' assert s2.stop == 4, 'slice start stop - stop is 4' assert s2.step is None, 'slice start stop - step is None' s3 = slice(1, 10, 2) assert s3.start == 1, 'slice full - start is 1' assert s3.stop == 10, 'slice full - stop is 10' assert s3.step == 2, 'slice full - step is 2' # === Using slice objects === sl = slice(1, 4) assert lst[sl] == [1, 2, 3], 'slice object for list' assert s[sl] == 'ell', 'slice object for string' assert t[sl] == (1, 2, 3), 'slice object for tuple' # === slice repr and str === assert repr(slice(3)) == 'slice(None, 3, None)', 'slice repr stop only' assert repr(slice(1, 4)) == 'slice(1, 4, None)', 'slice repr start stop' assert repr(slice(1, 10, 2)) == 'slice(1, 10, 2)', 'slice repr full' assert str(slice(1, 4)) == 'slice(1, 4, None)', 'slice str same as repr' # === Edge case: negative step with None bounds === assert lst[::-2] == [5, 3, 1], 'list negative step no bounds' assert s[::-2] == 'olh', 'string negative step no bounds' # === Edge case: step larger than length === assert lst[::10] == [0], 'step larger than length' # === Empty sequence slicing === empty_list = [] assert empty_list[:] == [], 'empty list full slice' assert empty_list[1:4] == [], 'empty list any slice' assert empty_list[::-1] == [], 'empty list reverse' empty_str = '' assert empty_str[:] == '', 'empty string full slice' assert empty_str[1:4] == '', 'empty string any slice' # === Boolean truthiness of slice === assert slice(1, 2), 'slice is truthy' assert slice(None), 'slice with None stop is truthy' # === Slice equality === assert slice(1, 2) == slice(1, 2), 'slice equality same values' assert not (slice(1, 2) == slice(1, 3)), 'slice inequality different stop' assert slice(None) == slice(None), 'slice equality both None' assert slice(1, 2, 3) == slice(1, 2, 3), 'slice equality with step' assert not (slice(1, 2, 3) == slice(1, 2, 4)), 'slice inequality different step' # === Slice with bool indices === assert [0, 1, 2, 3][True:] == [1, 2, 3], 'slice with True start' assert [0, 1, 2, 3][:True] == [0], 'slice with True stop' assert [0, 1, 2, 3][::True] == [0, 1, 2, 3], 'slice with True step' assert [0, 1, 2, 3][False:] == [0, 1, 2, 3], 'slice with False start' assert [0, 1, 2, 3][:False] == [], 'slice with False stop' # === Range slicing edge cases === assert range(0)[1:2] == range(0, 0), 'empty range slicing' assert range(5)[::-1] == range(4, -1, -1), 'range reverse slice' assert list(range(5)[::-1]) == [4, 3, 2, 1, 0], 'range reverse slice iteration' # === Negative step with out-of-bounds start === lst5 = [0, 1, 2, 3, 4] assert lst5[-10::-1] == [], 'far negative start with negative step should be empty' assert lst5[-6::-1] == [], 'just out of bounds negative start' assert lst5[-5::-1] == [0], 'exactly at first element' assert lst5[-4::-1] == [1, 0], 'second element backwards' # Range slicing with out-of-bounds negative start assert list(range(5)[-10::-1]) == [], 'range far negative start' assert list(range(5)[-6::-1]) == [], 'range just out of bounds' assert list(range(5)[-5::-1]) == [0], 'range exactly at first' # String slicing with out-of-bounds negative start assert 'hello'[-10::-1] == '', 'string far negative start empty' assert 'hello'[-5::-1] == 'h', 'string exactly at first' # Tuple slicing with out-of-bounds negative start assert (0, 1, 2, 3, 4)[-10::-1] == (), 'tuple far negative start empty' assert (0, 1, 2, 3, 4)[-5::-1] == (0,), 'tuple exactly at first' ================================================ FILE: crates/monty/test_cases/slice__step_zero.py ================================================ [1, 2, 3][::0] """ TRACEBACK: Traceback (most recent call last): File "slice__step_zero.py", line 1, in [1, 2, 3][::0] ~~~~~~~~~~~~~~ ValueError: slice step cannot be zero """ ================================================ FILE: crates/monty/test_cases/slice__step_zero_bytes.py ================================================ b'hello'[::0] """ TRACEBACK: Traceback (most recent call last): File "slice__step_zero_bytes.py", line 1, in b'hello'[::0] ~~~~~~~~~~~~~ ValueError: slice step cannot be zero """ ================================================ FILE: crates/monty/test_cases/slice__step_zero_range.py ================================================ range(5)[::0] """ TRACEBACK: Traceback (most recent call last): File "slice__step_zero_range.py", line 1, in range(5)[::0] ~~~~~~~~~~~~~ ValueError: slice step cannot be zero """ ================================================ FILE: crates/monty/test_cases/slice__step_zero_str.py ================================================ 'hello'[::0] """ TRACEBACK: Traceback (most recent call last): File "slice__step_zero_str.py", line 1, in 'hello'[::0] ~~~~~~~~~~~~ ValueError: slice step cannot be zero """ ================================================ FILE: crates/monty/test_cases/slice__step_zero_tuple.py ================================================ (1, 2, 3)[::0] """ TRACEBACK: Traceback (most recent call last): File "slice__step_zero_tuple.py", line 1, in (1, 2, 3)[::0] ~~~~~~~~~~~~~~ ValueError: slice step cannot be zero """ ================================================ FILE: crates/monty/test_cases/slice__too_many_args.py ================================================ slice(1, 2, 3, 4) """ TRACEBACK: Traceback (most recent call last): File "slice__too_many_args.py", line 1, in slice(1, 2, 3, 4) ~~~~~~~~~~~~~~~~~ TypeError: slice expected at most 3 arguments, got 4 """ ================================================ FILE: crates/monty/test_cases/str__getitem_index_error.py ================================================ s = 'hello' s[10] """ TRACEBACK: Traceback (most recent call last): File "str__getitem_index_error.py", line 2, in s[10] ~~~~~ IndexError: string index out of range """ ================================================ FILE: crates/monty/test_cases/str__index_not_found.py ================================================ 'hello'.index('x') """ TRACEBACK: Traceback (most recent call last): File "str__index_not_found.py", line 1, in 'hello'.index('x') ~~~~~~~~~~~~~~~~~~ ValueError: substring not found """ ================================================ FILE: crates/monty/test_cases/str__join_no_args.py ================================================ ','.join() """ TRACEBACK: Traceback (most recent call last): File "str__join_no_args.py", line 1, in ','.join() ~~~~~~~~~~ TypeError: str.join() takes exactly one argument (0 given) """ ================================================ FILE: crates/monty/test_cases/str__join_non_string.py ================================================ ','.join([1, 2]) """ TRACEBACK: Traceback (most recent call last): File "str__join_non_string.py", line 1, in ','.join([1, 2]) ~~~~~~~~~~~~~~~~ TypeError: sequence item 0: expected str instance, int found """ ================================================ FILE: crates/monty/test_cases/str__join_not_iterable.py ================================================ ','.join(123) """ TRACEBACK: Traceback (most recent call last): File "str__join_not_iterable.py", line 1, in ','.join(123) ~~~~~~~~~~~~~ TypeError: can only join an iterable """ ================================================ FILE: crates/monty/test_cases/str__join_too_many_args.py ================================================ ','.join(['a'], ['b']) """ TRACEBACK: Traceback (most recent call last): File "str__join_too_many_args.py", line 1, in ','.join(['a'], ['b']) ~~~~~~~~~~~~~~~~~~~~~~ TypeError: str.join() takes exactly one argument (2 given) """ ================================================ FILE: crates/monty/test_cases/str__methods.py ================================================ # === Phase 1: Simple transformations === # lower() assert 'HELLO'.lower() == 'hello', 'lower basic' assert 'Hello World'.lower() == 'hello world', 'lower mixed' assert 'hello'.lower() == 'hello', 'lower already lower' assert ''.lower() == '', 'lower empty' assert '123'.lower() == '123', 'lower numbers unchanged' # upper() assert 'hello'.upper() == 'HELLO', 'upper basic' assert 'Hello World'.upper() == 'HELLO WORLD', 'upper mixed' assert 'HELLO'.upper() == 'HELLO', 'upper already upper' assert ''.upper() == '', 'upper empty' assert '123'.upper() == '123', 'upper numbers unchanged' # capitalize() assert 'hello'.capitalize() == 'Hello', 'capitalize basic' assert 'HELLO'.capitalize() == 'Hello', 'capitalize all upper' assert 'hELLO wORLD'.capitalize() == 'Hello world', 'capitalize mixed' assert ''.capitalize() == '', 'capitalize empty' assert '123abc'.capitalize() == '123abc', 'capitalize number start' # title() assert 'hello world'.title() == 'Hello World', 'title basic' assert 'HELLO WORLD'.title() == 'Hello World', 'title all upper' assert "they're".title() == "They'Re", 'title apostrophe' assert ''.title() == '', 'title empty' assert '123 abc'.title() == '123 Abc', 'title number start' # swapcase() assert 'Hello World'.swapcase() == 'hELLO wORLD', 'swapcase basic' assert 'HELLO'.swapcase() == 'hello', 'swapcase all upper' assert 'hello'.swapcase() == 'HELLO', 'swapcase all lower' assert ''.swapcase() == '', 'swapcase empty' # casefold() assert 'Hello'.casefold() == 'hello', 'casefold basic' assert 'HELLO'.casefold() == 'hello', 'casefold all upper' assert ''.casefold() == '', 'casefold empty' # === Phase 2: Predicate methods === # isalpha() assert 'hello'.isalpha() == True, 'isalpha basic' assert 'Hello'.isalpha() == True, 'isalpha mixed case' assert ''.isalpha() == False, 'isalpha empty' assert 'hello123'.isalpha() == False, 'isalpha with digits' assert 'hello world'.isalpha() == False, 'isalpha with space' # isdigit() assert '123'.isdigit() == True, 'isdigit basic' assert ''.isdigit() == False, 'isdigit empty' assert '123abc'.isdigit() == False, 'isdigit with letters' assert '12 34'.isdigit() == False, 'isdigit with space' # isalnum() assert 'hello123'.isalnum() == True, 'isalnum basic' assert 'hello'.isalnum() == True, 'isalnum letters only' assert '123'.isalnum() == True, 'isalnum digits only' assert ''.isalnum() == False, 'isalnum empty' assert 'hello 123'.isalnum() == False, 'isalnum with space' # isnumeric() assert '123'.isnumeric() == True, 'isnumeric basic' assert ''.isnumeric() == False, 'isnumeric empty' assert '123abc'.isnumeric() == False, 'isnumeric with letters' # isspace() assert ' '.isspace() == True, 'isspace spaces' assert '\t\n'.isspace() == True, 'isspace tabs and newlines' assert ''.isspace() == False, 'isspace empty' assert ' a '.isspace() == False, 'isspace with letter' # islower() assert 'hello'.islower() == True, 'islower basic' assert 'Hello'.islower() == False, 'islower mixed' assert ''.islower() == False, 'islower empty' assert '123'.islower() == False, 'islower numbers only' assert 'hello123'.islower() == True, 'islower with numbers' # isupper() assert 'HELLO'.isupper() == True, 'isupper basic' assert 'Hello'.isupper() == False, 'isupper mixed' assert ''.isupper() == False, 'isupper empty' assert '123'.isupper() == False, 'isupper numbers only' assert 'HELLO123'.isupper() == True, 'isupper with numbers' # isascii() assert 'hello'.isascii() == True, 'isascii basic' assert ''.isascii() == True, 'isascii empty' assert '\x00\x7f'.isascii() == True, 'isascii boundary' # isdecimal() assert '123'.isdecimal() == True, 'isdecimal basic' assert ''.isdecimal() == False, 'isdecimal empty' assert '123abc'.isdecimal() == False, 'isdecimal with letters' # === Phase 3: Search methods === # find() assert 'hello'.find('l') == 2, 'find basic' assert 'hello'.find('ll') == 2, 'find substring' assert 'hello'.find('x') == -1, 'find not found' assert 'hello'.find('') == 0, 'find empty string' assert 'hello'.find('l', 3) == 3, 'find with start' assert 'hello'.find('l', 0, 3) == 2, 'find with start and end' # rfind() assert 'hello'.rfind('l') == 3, 'rfind basic' assert 'hello'.rfind('x') == -1, 'rfind not found' assert 'hello'.rfind('l', 0, 3) == 2, 'rfind with end' # index() assert 'hello'.index('l') == 2, 'index basic' assert 'hello'.index('ll') == 2, 'index substring' # rindex() assert 'hello'.rindex('l') == 3, 'rindex basic' # count() assert 'hello'.count('l') == 2, 'count basic' assert 'hello'.count('ll') == 1, 'count substring' assert 'hello'.count('x') == 0, 'count not found' assert 'hello'.count('') == 6, 'count empty string' assert 'aaa'.count('a') == 3, 'count repeated' # startswith() assert 'hello'.startswith('he') == True, 'startswith basic' assert 'hello'.startswith('lo') == False, 'startswith false' assert 'hello'.startswith('') == True, 'startswith empty' assert 'hello'.startswith('ell', 1) == True, 'startswith with start' # endswith() assert 'hello'.endswith('lo') == True, 'endswith basic' assert 'hello'.endswith('he') == False, 'endswith false' assert 'hello'.endswith('') == True, 'endswith empty' assert 'hello'.endswith('ell', 0, 4) == True, 'endswith with end' # === Phase 4: Strip/trim methods === # strip() assert ' hello '.strip() == 'hello', 'strip whitespace' assert 'xxhelloxx'.strip('x') == 'hello', 'strip chars' assert 'hello'.strip() == 'hello', 'strip nothing' assert ''.strip() == '', 'strip empty' assert ' '.strip() == '', 'strip only whitespace' # lstrip() assert ' hello '.lstrip() == 'hello ', 'lstrip whitespace' assert 'xxhello'.lstrip('x') == 'hello', 'lstrip chars' assert 'hello'.lstrip() == 'hello', 'lstrip nothing' # rstrip() assert ' hello '.rstrip() == ' hello', 'rstrip whitespace' assert 'helloxx'.rstrip('x') == 'hello', 'rstrip chars' assert 'hello'.rstrip() == 'hello', 'rstrip nothing' # removeprefix() assert 'hello world'.removeprefix('hello ') == 'world', 'removeprefix basic' assert 'hello world'.removeprefix('world') == 'hello world', 'removeprefix not found' assert 'hello'.removeprefix('') == 'hello', 'removeprefix empty' # removesuffix() assert 'hello world'.removesuffix(' world') == 'hello', 'removesuffix basic' assert 'hello world'.removesuffix('hello') == 'hello world', 'removesuffix not found' assert 'hello'.removesuffix('') == 'hello', 'removesuffix empty' # === Phase 5: Split methods === # split() assert 'a b c'.split() == ['a', 'b', 'c'], 'split whitespace' assert 'a,b,c'.split(',') == ['a', 'b', 'c'], 'split comma' assert 'a,b,c'.split(',', 1) == ['a', 'b,c'], 'split maxsplit' assert ' a b '.split() == ['a', 'b'], 'split multiple spaces' assert 'hello'.split('x') == ['hello'], 'split not found' # rsplit() assert 'a b c'.rsplit() == ['a', 'b', 'c'], 'rsplit whitespace' assert 'a,b,c'.rsplit(',') == ['a', 'b', 'c'], 'rsplit comma' assert 'a,b,c'.rsplit(',', 1) == ['a,b', 'c'], 'rsplit maxsplit' # splitlines() assert 'a\nb\nc'.splitlines() == ['a', 'b', 'c'], 'splitlines basic' assert 'a\nb\nc'.splitlines(True) == ['a\n', 'b\n', 'c'], 'splitlines keepends' assert 'a\r\nb'.splitlines() == ['a', 'b'], 'splitlines crlf' assert ''.splitlines() == [], 'splitlines empty' # partition() assert 'hello world'.partition(' ') == ('hello', ' ', 'world'), 'partition basic' assert 'hello'.partition('x') == ('hello', '', ''), 'partition not found' assert 'hello world test'.partition(' ') == ('hello', ' ', 'world test'), 'partition first' # rpartition() assert 'hello world'.rpartition(' ') == ('hello', ' ', 'world'), 'rpartition basic' assert 'hello'.rpartition('x') == ('', '', 'hello'), 'rpartition not found' assert 'hello world test'.rpartition(' ') == ('hello world', ' ', 'test'), 'rpartition last' # === Phase 6: Replace/modify methods === # replace() assert 'hello'.replace('l', 'L') == 'heLLo', 'replace basic' assert 'hello'.replace('l', 'L', 1) == 'heLlo', 'replace count' assert 'hello'.replace('x', 'y') == 'hello', 'replace not found' assert 'aaa'.replace('a', 'b') == 'bbb', 'replace all' assert ''.replace('a', 'b') == '', 'replace empty' # center() assert 'hi'.center(6) == ' hi ', 'center basic' assert 'hi'.center(6, '-') == '--hi--', 'center fillchar' assert 'hi'.center(2) == 'hi', 'center no padding' assert 'hi'.center(1) == 'hi', 'center smaller' # ljust() assert 'hi'.ljust(6) == 'hi ', 'ljust basic' assert 'hi'.ljust(6, '-') == 'hi----', 'ljust fillchar' assert 'hi'.ljust(2) == 'hi', 'ljust no padding' # rjust() assert 'hi'.rjust(6) == ' hi', 'rjust basic' assert 'hi'.rjust(6, '-') == '----hi', 'rjust fillchar' assert 'hi'.rjust(2) == 'hi', 'rjust no padding' # zfill() assert '42'.zfill(5) == '00042', 'zfill basic' assert '-42'.zfill(5) == '-0042', 'zfill negative' assert '+42'.zfill(5) == '+0042', 'zfill positive' assert '42'.zfill(2) == '42', 'zfill no padding' assert ''.zfill(3) == '000', 'zfill empty' # === Phase 7: Additional tests for Python compatibility === # startswith/endswith with tuple assert 'hello'.startswith(('he', 'lo')) == True, 'startswith tuple first match' assert 'hello'.startswith(('lo', 'he')) == True, 'startswith tuple second match' assert 'hello'.startswith(('x', 'y')) == False, 'startswith tuple no match' assert 'hello'.endswith(('he', 'lo')) == True, 'endswith tuple first match' assert 'hello'.endswith(('lo', 'he')) == True, 'endswith tuple second match' assert 'hello'.endswith(('x', 'y')) == False, 'endswith tuple no match' assert 'hello'.startswith(('ell',), 1) == True, 'startswith tuple with start' # find/rfind/index/rindex/count with None as start/end assert 'hello'.find('l', None) == 2, 'find with None start' assert 'hello'.find('l', None, None) == 2, 'find with None start and end' assert 'hello'.find('l', 0, None) == 2, 'find with None end' assert 'hello'.rfind('l', None, None) == 3, 'rfind with None start and end' assert 'hello'.count('l', None, None) == 2, 'count with None start and end' assert 'hello'.startswith('he', None) == True, 'startswith with None start' assert 'hello'.endswith('lo', None, None) == True, 'endswith with None start and end' # strip with None assert ' hello '.strip(None) == 'hello', 'strip None same as no arg' assert ' hello '.lstrip(None) == 'hello ', 'lstrip None same as no arg' assert ' hello '.rstrip(None) == ' hello', 'rstrip None same as no arg' # === Phase 8: Keyword argument tests === # split with keyword args assert 'a,b,c'.split(sep=',') == ['a', 'b', 'c'], 'split sep kwarg' assert 'a,b,c'.split(',', maxsplit=1) == ['a', 'b,c'], 'split maxsplit kwarg' assert 'a,b,c'.split(sep=',', maxsplit=1) == ['a', 'b,c'], 'split both kwargs' # rsplit with keyword args assert 'a,b,c'.rsplit(sep=',') == ['a', 'b', 'c'], 'rsplit sep kwarg' assert 'a,b,c'.rsplit(',', maxsplit=1) == ['a,b', 'c'], 'rsplit maxsplit kwarg' assert 'a,b,c'.rsplit(sep=',', maxsplit=1) == ['a,b', 'c'], 'rsplit both kwargs' # splitlines with keyword args assert 'a\nb\nc'.splitlines(keepends=True) == ['a\n', 'b\n', 'c'], 'splitlines keepends kwarg' assert 'a\nb\nc'.splitlines(keepends=False) == ['a', 'b', 'c'], 'splitlines keepends=False' # replace with keyword args assert 'aaa'.replace('a', 'b', count=2) == 'bba', 'replace count kwarg' # === Phase 9: Additional methods === # encode() assert 'hello'.encode() == b'hello', 'encode default' assert 'hello'.encode('utf-8') == b'hello', 'encode utf-8' assert 'hello'.encode('utf8') == b'hello', 'encode utf8 alias' assert 'hello'.encode('UTF-8') == b'hello', 'encode UTF-8 case insensitive' assert ''.encode() == b'', 'encode empty' assert 'hello'.encode('utf-8', 'strict') == b'hello', 'encode with errors' # isidentifier() assert 'hello'.isidentifier() == True, 'isidentifier basic' assert '_hello'.isidentifier() == True, 'isidentifier underscore' assert '__init__'.isidentifier() == True, 'isidentifier dunder' assert 'hello123'.isidentifier() == True, 'isidentifier with digits' assert ''.isidentifier() == False, 'isidentifier empty' assert '123hello'.isidentifier() == False, 'isidentifier digit start' assert 'hello world'.isidentifier() == False, 'isidentifier with space' assert 'hello-world'.isidentifier() == False, 'isidentifier with dash' assert 'class'.isidentifier() == True, 'isidentifier keyword' # isidentifier doesn't check keywords # istitle() assert 'Hello World'.istitle() == True, 'istitle basic' assert 'Hello'.istitle() == True, 'istitle single word' assert 'HELLO'.istitle() == False, 'istitle all upper' assert 'hello'.istitle() == False, 'istitle all lower' assert ''.istitle() == False, 'istitle empty' assert 'Hello world'.istitle() == False, 'istitle lowercase word' assert '123'.istitle() == False, 'istitle numbers only' assert 'Hello 123 World'.istitle() == True, 'istitle with numbers' assert "They'Re".istitle() == True, 'istitle apostrophe' # === Phase 10: Unicode support for is* methods === # isdecimal with Unicode decimal digits assert '٠١٢٣٤٥٦٧٨٩'.isdecimal() == True, 'isdecimal Arabic-Indic' assert '0123456789'.isdecimal() == True, 'isdecimal Fullwidth' assert '०१२३४५६७८९'.isdecimal() == True, 'isdecimal Devanagari' assert '²'.isdecimal() == False, 'isdecimal superscript not decimal' assert '½'.isdecimal() == False, 'isdecimal fraction not decimal' # isdigit with superscripts and subscripts assert '²³'.isdigit() == True, 'isdigit superscripts' assert '₀₁₂₃₄₅₆₇₈₉'.isdigit() == True, 'isdigit subscripts' assert '0123456789'.isdigit() == True, 'isdigit ASCII' assert '٠١٢٣٤٥٦٧٨٩'.isdigit() == True, 'isdigit Arabic-Indic' assert '½'.isdigit() == False, 'isdigit fraction not digit' # isnumeric with fractions and other numerics assert '½'.isnumeric() == True, 'isnumeric fraction' assert '²'.isnumeric() == True, 'isnumeric superscript' assert '٠١٢٣٤٥٦٧٨٩'.isnumeric() == True, 'isnumeric Arabic-Indic' assert '0123456789'.isnumeric() == True, 'isnumeric ASCII' ================================================ FILE: crates/monty/test_cases/str__ops.py ================================================ # === String concatenation (+) === assert 'hello' + ' ' + 'world' == 'hello world', 'basic concat' assert '' + 'test' == 'test', 'empty left concat' assert 'test' + '' == 'test', 'empty right concat' assert '' + '' == '', 'empty both concat' assert 'a' + 'b' + 'c' + 'd' == 'abcd', 'multiple concat' # === Augmented assignment (+=) === s = 'hello' s += ' world' assert s == 'hello world', 'basic iadd' s = 'test' s += '' assert s == 'test', 'iadd empty' s = 'a' s += 'b' s += 'c' assert s == 'abc', 'multiple iadd' s = 'ab' s += s assert s == 'abab', 'iadd self' # === String length === assert len('') == 0, 'len empty' assert len('a') == 1, 'len single' assert len('hello') == 5, 'len basic' assert len('hello world') == 11, 'len with space' assert len('caf\xe9') == 4, 'len unicode' # === String repr/str === assert repr('') == "''", 'empty string repr' assert str('') == '', 'empty string str' assert repr('hello') == "'hello'", 'string repr' assert str('hello') == 'hello', 'string str' assert repr('hello "world"') == '\'hello "world"\'', 'string with quotes repr' assert str('hello "world"') == 'hello "world"', 'string with quotes str' # === String repetition (*) === assert 'ab' * 3 == 'ababab', 'str mult int' assert 3 * 'ab' == 'ababab', 'int mult str' assert 'x' * 0 == '', 'str mult zero' assert 'x' * -1 == '', 'str mult negative' assert '' * 5 == '', 'empty str mult' assert 'a' * 1 == 'a', 'str mult one' # === String repetition augmented assignment (*=) === s = 'ab' s *= 3 assert s == 'ababab', 'str imult' s = 'x' s *= 0 assert s == '', 'str imult zero' # === String join method === # Basic join on literals assert ','.join(['a', 'b', 'c']) == 'a,b,c', 'join list with comma' assert ''.join(['a', 'b', 'c']) == 'abc', 'join with empty separator' assert '-'.join([]) == '', 'join empty list' assert ','.join(['only']) == 'only', 'join single element' # Join with different iterables assert ' '.join(('hello', 'world')) == 'hello world', 'join tuple' # Join with string iterable (iterates over characters) assert ','.join('abc') == 'a,b,c', 'join string iterable' # Join with variable separator sep = '-' assert sep.join(['a', 'b']) == 'a-b', 'join with variable separator' # Heap-allocated string separator s = str('.') assert s.join(['a', 'b']) == 'a.b', 'join with heap string' # Mixed string types in iterable (interned and heap) mixed = ['hello', str('world')] assert ' '.join(mixed) == 'hello world', 'join with mixed string types' # === String indexing (getitem) === # Basic indexing assert 'hello'[0] == 'h', 'getitem index 0' assert 'hello'[1] == 'e', 'getitem index 1' assert 'hello'[4] == 'o', 'getitem last index' # Negative indexing assert 'hello'[-1] == 'o', 'getitem -1' assert 'hello'[-2] == 'l', 'getitem -2' assert 'hello'[-5] == 'h', 'getitem -5' # Single character strings assert 'a'[0] == 'a', 'getitem single char at 0' assert 'a'[-1] == 'a', 'getitem single char at -1' # Unicode strings s = 'café' assert s[0] == 'c', 'unicode getitem 0' assert s[1] == 'a', 'unicode getitem 1' assert s[2] == 'f', 'unicode getitem 2' assert s[3] == 'é', 'unicode getitem 3 (accented)' assert s[-1] == 'é', 'unicode getitem -1' # Multi-byte unicode (CJK characters) s = '日本語' assert s[0] == '日', 'cjk getitem 0' assert s[1] == '本', 'cjk getitem 1' assert s[2] == '語', 'cjk getitem 2' assert s[-1] == '語', 'cjk getitem -1' # Emoji (multi-byte UTF-8) s = 'a🎉b' assert s[0] == 'a', 'emoji string getitem 0' assert s[1] == '🎉', 'emoji string getitem 1 (emoji)' assert s[2] == 'b', 'emoji string getitem 2' # Heap-allocated strings s = str('hello') assert s[0] == 'h', 'heap string getitem' assert s[-1] == 'o', 'heap string negative getitem' # Variable index s = 'abc' i = 1 assert s[i] == 'b', 'getitem with variable index' # Bool indices (True=1, False=0) s = 'abc' assert s[False] == 'a', 'str getitem with False' assert s[True] == 'b', 'str getitem with True' # === Sorting and comparisons === assert 'a' < 'b', 'str < str' assert 'b' > 'a', 'str > str' assert 'a' <= 'a', 'str <= str equal' assert 'a' <= 'b', 'str <= str less' assert 'b' >= 'b', 'str >= str equal' assert 'b' >= 'a', 'str >= str greater' assert not ('b' < 'a'), 'str not < str' assert not ('a' > 'b'), 'str not > str' # Different lengths assert 'a' < 'aa', 'shorter prefix is less' assert 'ab' < 'b', 'first char decides' assert '' < 'a', 'empty string is less' assert 'abc' > 'ab', 'longer string with same prefix is greater' # Non-ASCII comparisons (by Unicode code point) assert 'café' < 'cafë', 'non-ascii comparison (é < ë)' assert 'z' < 'é', 'ascii < non-ascii (z < é)' assert '日' < '本', 'CJK comparison by code point' assert '😀' < '😁', 'emoji comparison by code point' # Sorting assert sorted('cba') == ['a', 'b', 'c'], 'sorted string' assert sorted(['b', 'c', 'a']) == ['a', 'b', 'c'], 'sorted list of strings' assert sorted(['café', 'cafë', 'cafa']) == ['cafa', 'café', 'cafë'], 'sorted non-ascii strings' assert sorted(['bb', 'a', 'ba']) == ['a', 'ba', 'bb'], 'sorted different length strings' ================================================ FILE: crates/monty/test_cases/str__partition_empty.py ================================================ 'hello'.partition('') """ TRACEBACK: Traceback (most recent call last): File "str__partition_empty.py", line 1, in 'hello'.partition('') ~~~~~~~~~~~~~~~~~~~~~ ValueError: empty separator """ ================================================ FILE: crates/monty/test_cases/str__rsplit_empty_sep.py ================================================ 'hello'.rsplit('') """ TRACEBACK: Traceback (most recent call last): File "str__rsplit_empty_sep.py", line 1, in 'hello'.rsplit('') ~~~~~~~~~~~~~~~~~~ ValueError: empty separator """ ================================================ FILE: crates/monty/test_cases/str__split_empty_sep.py ================================================ 'hello'.split('') """ TRACEBACK: Traceback (most recent call last): File "str__split_empty_sep.py", line 1, in 'hello'.split('') ~~~~~~~~~~~~~~~~~ ValueError: empty separator """ ================================================ FILE: crates/monty/test_cases/sys__types.py ================================================ # Tests for sys module types import sys # === Verify type() returns _io.TextIOWrapper for stdout/stderr === assert str(type(sys.stdout)) == "", 'type(stdout) is _io.TextIOWrapper' assert str(type(sys.stderr)) == "", 'type(stderr) is _io.TextIOWrapper' ================================================ FILE: crates/monty/test_cases/traceback__division_error.py ================================================ def foo(): 1 / 0 def bar(): foo() def baz(): bar() baz() """ TRACEBACK: Traceback (most recent call last): File "traceback__division_error.py", line 13, in baz() ~~~~~ File "traceback__division_error.py", line 10, in baz bar() ~~~~~ File "traceback__division_error.py", line 6, in bar foo() ~~~~~ File "traceback__division_error.py", line 2, in foo 1 / 0 ~~~~~ ZeroDivisionError: division by zero """ ================================================ FILE: crates/monty/test_cases/traceback__index_error.py ================================================ def foo(): a = [] a[1] foo() """ TRACEBACK: Traceback (most recent call last): File "traceback__index_error.py", line 6, in foo() ~~~~~ File "traceback__index_error.py", line 3, in foo a[1] ~~~~ IndexError: list index out of range """ ================================================ FILE: crates/monty/test_cases/traceback__insert_as_int.py ================================================ a = [] a.insert({1: 2}, 2) """ TRACEBACK: Traceback (most recent call last): File "traceback__insert_as_int.py", line 2, in a.insert({1: 2}, 2) ~~~~~~~~~~~~~~~~~~~ TypeError: 'dict' object cannot be interpreted as an integer """ ================================================ FILE: crates/monty/test_cases/traceback__nested_call.py ================================================ def foo(): raise ValueError('xxx') def bar(): foo() def baz(): bar() baz() """ TRACEBACK: Traceback (most recent call last): File "traceback__nested_call.py", line 13, in baz() ~~~~~ File "traceback__nested_call.py", line 10, in baz bar() ~~~~~ File "traceback__nested_call.py", line 6, in bar foo() ~~~~~ File "traceback__nested_call.py", line 2, in foo raise ValueError('xxx') ValueError: xxx """ ================================================ FILE: crates/monty/test_cases/traceback__nonlocal_module_scope.py ================================================ # nonlocal at module level is a syntax error nonlocal x # type: ignore """ TRACEBACK: Traceback (most recent call last): File "traceback__nonlocal_module_scope.py", line 2 nonlocal x # type: ignore ~~~~~~~~~~ SyntaxError: nonlocal declaration not allowed at module level """ ================================================ FILE: crates/monty/test_cases/traceback__nonlocal_unbound.py ================================================ def outer(): def inner(): nonlocal x return x inner() x = 1 outer() """ TRACEBACK: Traceback (most recent call last): File "traceback__nonlocal_unbound.py", line 10, in outer() ~~~~~~~ File "traceback__nonlocal_unbound.py", line 6, in outer inner() ~~~~~~~ File "traceback__nonlocal_unbound.py", line 4, in inner return x ~ NameError: cannot access free variable 'x' where it is not associated with a value in enclosing scope """ ================================================ FILE: crates/monty/test_cases/traceback__range_as_int.py ================================================ range([1]) """ TRACEBACK: Traceback (most recent call last): File "traceback__range_as_int.py", line 1, in range([1]) ~~~~~~~~~~ TypeError: 'list' object cannot be interpreted as an integer """ ================================================ FILE: crates/monty/test_cases/traceback__recursion_error.py ================================================ def recurse(): recurse() recurse() """ TRACEBACK: Traceback (most recent call last): File "traceback__recursion_error.py", line 5, in recurse() ~~~~~~~~~ File "traceback__recursion_error.py", line 2, in recurse recurse() ~~~~~~~~~ File "traceback__recursion_error.py", line 2, in recurse recurse() ~~~~~~~~~ File "traceback__recursion_error.py", line 2, in recurse recurse() ~~~~~~~~~ [Previous line repeated 47 more times] RecursionError: maximum recursion depth exceeded """ ================================================ FILE: crates/monty/test_cases/traceback__set_mutation.py ================================================ s = {1, 2} for x in s: s.add(3) """ TRACEBACK: Traceback (most recent call last): File "traceback__set_mutation.py", line 2, in for x in s: ~ RuntimeError: Set changed size during iteration """ ================================================ FILE: crates/monty/test_cases/traceback__undefined_attr_call.py ================================================ def foo(): snap.method() foo() """ TRACEBACK: Traceback (most recent call last): File "traceback__undefined_attr_call.py", line 5, in foo() ~~~~~ File "traceback__undefined_attr_call.py", line 2, in foo snap.method() ~~~~ NameError: name 'snap' is not defined """ ================================================ FILE: crates/monty/test_cases/traceback__undefined_call.py ================================================ def foo(): snap(1) foo() """ TRACEBACK: Traceback (most recent call last): File "traceback__undefined_call.py", line 5, in foo() ~~~~~ File "traceback__undefined_call.py", line 2, in foo snap(1) ~~~~ NameError: name 'snap' is not defined """ ================================================ FILE: crates/monty/test_cases/traceback__undefined_raise.py ================================================ def foo(): raise snap foo() """ TRACEBACK: Traceback (most recent call last): File "traceback__undefined_raise.py", line 5, in foo() ~~~~~ File "traceback__undefined_raise.py", line 2, in foo raise snap ~~~~ NameError: name 'snap' is not defined """ ================================================ FILE: crates/monty/test_cases/try_except__all.py ================================================ # === Basic exception catching === caught = False try: raise ValueError('test') except ValueError: caught = True assert caught, 'should catch ValueError' # === Exception variable binding === msg = None try: raise TypeError('the message') except TypeError as e: msg = repr(e) # repr(e) returns "TypeError('the message')" - confirms we caught the right exception assert msg == "TypeError('the message')", 'should capture exception' # === Multiple handlers - first match wins === which = None try: raise TypeError('type error') except ValueError: which = 'value' except TypeError: which = 'type' except: which = 'bare' assert which == 'type', 'first matching handler should be used' # === Bare except catches all === caught_bare = False try: raise KeyError('key') except: caught_bare = True assert caught_bare, 'bare except should catch all' # === Else block runs when no exception === else_ran = False try: x = 1 except: pass else: else_ran = True assert else_ran, 'else should run when no exception' # === Else block does not run when exception occurs === else_ran_with_exc = True try: raise ValueError() except ValueError: pass else: else_ran_with_exc = False assert else_ran_with_exc, 'else should not run when exception occurs' # === Finally always runs after try === finally_ran = False try: x = 1 finally: finally_ran = True assert finally_ran, 'finally should run after try' # === Finally runs after exception caught === finally_after_catch = False try: raise ValueError() except ValueError: pass finally: finally_after_catch = True assert finally_after_catch, 'finally should run after exception caught' # === Bare raise re-raises current exception === caught_reraised = False try: try: raise ValueError('original') except ValueError: raise # bare raise except ValueError as e: caught_reraised = repr(e) == "ValueError('original')" assert caught_reraised, 'bare raise should re-raise original exception' # === Nested try/except === outer_caught = False inner_caught = False try: try: raise ValueError('inner') except ValueError: inner_caught = True raise TypeError('outer') except TypeError: outer_caught = True assert inner_caught and outer_caught, 'nested exceptions should work' # === Exception base class matches all === caught_by_base = False try: raise KeyError('key') except Exception: caught_by_base = True assert caught_by_base, 'Exception should catch all exception types' # === Tuple of exception types === caught_tuple = False try: raise TypeError('type') except (ValueError, TypeError): caught_tuple = True assert caught_tuple, 'tuple of types should match' # === Return in try with finally === def try_return_finally(): try: return 1 finally: pass assert try_return_finally() == 1, 'return in try should work with finally' # === Return in finally overrides try return === def finally_return_overrides(): try: return 1 finally: return 2 # type: ignore[returnInFinally] assert finally_return_overrides() == 2, 'finally return should override try return' # === Exception in handler propagates === handler_exc_propagated = False try: try: raise ValueError() except ValueError: raise TypeError('from handler') except TypeError as e: handler_exc_propagated = repr(e) == "TypeError('from handler')" assert handler_exc_propagated, 'exception in handler should propagate' # === Return in finally overrides exception from handler === def finally_return_overrides_handler_exc(): try: raise TypeError('Error') finally: return 'finally wins handler' # type: ignore assert finally_return_overrides_handler_exc() == 'finally wins handler', ( 'return in finally should override exception from handler' ) def finally_return_overrides_handler_exc2(): try: try: raise ValueError('inner') except ValueError: raise TypeError('handler failure') finally: return 'finally wins handler' # type: ignore assert finally_return_overrides_handler_exc2() == 'finally wins handler', ( 'return in finally should override exception from handler' ) # === Return in finally overrides exception from else === def finally_return_overrides_else_exc(): try: try: pass except ValueError: pass else: raise RuntimeError('else failure') finally: return 'finally wins else' # type: ignore assert finally_return_overrides_else_exc() == 'finally wins else', ( 'return in finally should override exception from else block' ) # === Exception variable is cleared after handler === # After except handler, the exception variable is deleted (Python 3 behavior) e_cleared = False try: try: raise ValueError('test') except ValueError as e: pass # e should be undefined here in Python 3, accessing it raises NameError _ = e # This should raise NameError except NameError: e_cleared = True assert e_cleared, 'exception variable should be deleted after handler' # === Unhandled exception propagates === unhandled_propagated = False try: try: raise KeyError('unhandled') except ValueError: pass # KeyError doesn't match, should propagate except KeyError as e: unhandled_propagated = repr(e) == "KeyError('unhandled')" assert unhandled_propagated, 'unhandled exception should propagate to outer try' # === Finally runs before unhandled exception propagates === finally_before_propagate = False try: try: raise KeyError('propagate') except ValueError: pass finally: finally_before_propagate = True except KeyError: pass assert finally_before_propagate, 'finally should run before exception propagates' # === Exception in finally replaces original exception === finally_exc_wins = False try: try: raise ValueError('original') finally: raise TypeError('from finally') except TypeError as e: finally_exc_wins = repr(e) == "TypeError('from finally')" except ValueError: finally_exc_wins = False # Should not reach here assert finally_exc_wins, 'exception in finally should replace original' # === Exception in else propagates === else_exc_propagated = False try: try: pass # No exception in try except: pass else: raise ValueError('from else') except ValueError as e: else_exc_propagated = repr(e) == "ValueError('from else')" assert else_exc_propagated, 'exception in else should propagate' # === Finally runs after exception in else === finally_after_else_exc = False try: try: pass except: pass else: raise ValueError('else error') finally: finally_after_else_exc = True except ValueError: pass assert finally_after_else_exc, 'finally should run after exception in else' # === Exception hierarchy: LookupError === # LookupError should catch KeyError caught_key_by_lookup = False try: raise KeyError('key') except LookupError: caught_key_by_lookup = True assert caught_key_by_lookup, 'LookupError should catch KeyError' # LookupError should catch IndexError caught_index_by_lookup = False try: raise IndexError('index') except LookupError: caught_index_by_lookup = True assert caught_index_by_lookup, 'LookupError should catch IndexError' # LookupError should NOT catch ValueError caught_value_by_lookup = False try: try: raise ValueError('value') except LookupError: caught_value_by_lookup = True except ValueError: pass assert not caught_value_by_lookup, 'LookupError should NOT catch ValueError' # === Exception hierarchy: ArithmeticError === # ArithmeticError should catch ZeroDivisionError caught_zero_by_arith = False try: raise ZeroDivisionError('zero') except ArithmeticError: caught_zero_by_arith = True assert caught_zero_by_arith, 'ArithmeticError should catch ZeroDivisionError' # ArithmeticError should catch OverflowError caught_overflow_by_arith = False try: raise OverflowError('overflow') except ArithmeticError: caught_overflow_by_arith = True assert caught_overflow_by_arith, 'ArithmeticError should catch OverflowError' # === Exception hierarchy: RuntimeError === # RuntimeError should catch NotImplementedError caught_notimpl_by_runtime = False try: raise NotImplementedError('not impl') except RuntimeError: caught_notimpl_by_runtime = True assert caught_notimpl_by_runtime, 'RuntimeError should catch NotImplementedError' # RuntimeError should catch RecursionError caught_recursion_by_runtime = False try: raise RecursionError('recursion') except RuntimeError: caught_recursion_by_runtime = True assert caught_recursion_by_runtime, 'RuntimeError should catch RecursionError' # === Exception hierarchy in tuple === # Tuple containing base class should catch derived caught_by_tuple_base = False try: raise KeyError('key') except (ValueError, LookupError): caught_by_tuple_base = True assert caught_by_tuple_base, 'tuple with LookupError should catch KeyError' # === isinstance with exception hierarchy === try: raise KeyError('key') except KeyError as e: assert isinstance(e, KeyError), 'exception should be instance of KeyError' assert isinstance(e, LookupError), 'KeyError should be instance of LookupError' assert isinstance(e, Exception), 'KeyError should be instance of Exception' assert not isinstance(e, ArithmeticError), 'KeyError should not be ArithmeticError' try: raise ZeroDivisionError('zero') except ZeroDivisionError as e: assert isinstance(e, ZeroDivisionError), 'exception should be instance of ZeroDivisionError' assert isinstance(e, ArithmeticError), 'ZeroDivisionError should be instance of ArithmeticError' assert isinstance(e, Exception), 'ZeroDivisionError should be instance of Exception' assert not isinstance(e, LookupError), 'ZeroDivisionError should not be LookupError' # === Multiple handlers where none match === # Exception should propagate when no handler matches multi_no_match_propagated = False try: try: raise MemoryError('out of memory') except ValueError: pass except TypeError: pass except KeyError: pass except MemoryError as e: multi_no_match_propagated = repr(e) == "MemoryError('out of memory')" assert multi_no_match_propagated, 'exception should propagate when no handler matches' # === BaseException hierarchy === # BaseException should catch all exceptions including Exception subclasses caught_value_by_base = False try: raise ValueError('value') except BaseException: caught_value_by_base = True assert caught_value_by_base, 'BaseException should catch ValueError' caught_key_by_base = False try: raise KeyError('key') except BaseException: caught_key_by_base = True assert caught_key_by_base, 'BaseException should catch KeyError' caught_type_by_base = False try: raise TypeError('type') except BaseException: caught_type_by_base = True assert caught_type_by_base, 'BaseException should catch TypeError' # BaseException catches KeyboardInterrupt caught_keyboard_by_base = False try: raise KeyboardInterrupt() except BaseException: caught_keyboard_by_base = True assert caught_keyboard_by_base, 'BaseException should catch KeyboardInterrupt' # BaseException catches SystemExit caught_sysexit_by_base = False try: raise SystemExit() except BaseException: caught_sysexit_by_base = True assert caught_sysexit_by_base, 'BaseException should catch SystemExit' # === Exception does NOT catch BaseException direct subclasses === # Exception should NOT catch KeyboardInterrupt caught_keyboard_by_exc = False try: try: raise KeyboardInterrupt() except Exception: caught_keyboard_by_exc = True except BaseException: pass assert not caught_keyboard_by_exc, 'Exception should NOT catch KeyboardInterrupt' # Exception should NOT catch SystemExit caught_sysexit_by_exc = False try: try: raise SystemExit() except Exception: caught_sysexit_by_exc = True except BaseException: pass assert not caught_sysexit_by_exc, 'Exception should NOT catch SystemExit' # But Exception SHOULD catch regular exceptions caught_value_by_exc = False try: raise ValueError('test') except Exception: caught_value_by_exc = True assert caught_value_by_exc, 'Exception should catch ValueError' # === isinstance with BaseException === try: raise ValueError('test') except ValueError as e: assert isinstance(e, BaseException), 'ValueError should be instance of BaseException' try: raise KeyboardInterrupt() except KeyboardInterrupt as e: assert isinstance(e, BaseException), 'KeyboardInterrupt should be instance of BaseException' assert not isinstance(e, Exception), 'KeyboardInterrupt should NOT be instance of Exception' try: raise SystemExit() except SystemExit as e: assert isinstance(e, BaseException), 'SystemExit should be instance of BaseException' assert not isinstance(e, Exception), 'SystemExit should NOT be instance of Exception' # === Tuple containing BaseException === caught_by_tuple_with_base = False try: raise KeyboardInterrupt() except (ValueError, BaseException): caught_by_tuple_with_base = True assert caught_by_tuple_with_base, 'tuple with BaseException should catch KeyboardInterrupt' ================================================ FILE: crates/monty/test_cases/try_except__bare_raise_no_context.py ================================================ raise # Raise=RuntimeError('No active exception to reraise') ================================================ FILE: crates/monty/test_cases/try_except__invalid_type.py ================================================ try: raise ValueError('test') except 123: pass # Raise=TypeError('catching classes that do not inherit from BaseException is not allowed') ================================================ FILE: crates/monty/test_cases/tuple__getitem_out_of_bounds.py ================================================ a = (1, 2) a[5] # Raise=IndexError('tuple index out of range') ================================================ FILE: crates/monty/test_cases/tuple__index_not_found.py ================================================ (1, 2, 3).index(4) """ TRACEBACK: Traceback (most recent call last): File "tuple__index_not_found.py", line 1, in (1, 2, 3).index(4) ~~~~~~~~~~~~~~~~~~ ValueError: tuple.index(x): x not in tuple """ ================================================ FILE: crates/monty/test_cases/tuple__index_start_gt_end.py ================================================ # Test that tuple.index with start > end doesn't panic but raises ValueError (1, 2, 3).index(1, 5, 2) """ TRACEBACK: Traceback (most recent call last): File "tuple__index_start_gt_end.py", line 2, in (1, 2, 3).index(1, 5, 2) ~~~~~~~~~~~~~~~~~~~~~~~~ ValueError: tuple.index(x): x not in tuple """ ================================================ FILE: crates/monty/test_cases/tuple__methods.py ================================================ # === tuple.index() === t = (1, 2, 3, 2) assert t.index(2) == 1, 'index finds first occurrence' assert t.index(3) == 2, 'index finds element' assert t.index(2, 2) == 3, 'index with start' assert t.index(2, 1, 4) == 1, 'index with start and end' t = ('a', 'b', 'c') assert t.index('b') == 1, 'index string in tuple' # === tuple.count() === t = (1, 2, 2, 3, 2) assert t.count(2) == 3, 'count multiple occurrences' assert t.count(1) == 1, 'count single occurrence' assert t.count(4) == 0, 'count zero occurrences' assert ().count(1) == 0, 'count on empty tuple' t = ('a', 'b', 'a') assert t.count('a') == 2, 'count strings' ================================================ FILE: crates/monty/test_cases/tuple__ops.py ================================================ # === Empty tuple identity (singleton optimization) === # In Python, () is () is always True because empty tuples are interned assert () is (), 'empty tuple identity' assert tuple() is (), 'tuple() is empty tuple' assert tuple() is tuple(), 'tuple() identity' a = () b = () assert a is b, 'empty tuple vars are same object' # Empty tuple from operations assert (1,)[1:] is (), 'slice to empty is singleton' assert (1, 2) * 0 is (), 'mult by 0 is empty singleton' # === Tuple length === assert len(()) == 0, 'len empty' assert len((1,)) == 1, 'len single' assert len((1, 2, 3)) == 3, 'len basic' # === Tuple indexing === a = (1, 2, 3) assert a[1] == 2, 'getitem basic' a = ('a', 'b', 'c') assert a[0 - 2] == 'b', 'getitem negative' assert a[-1] == 'c', 'getitem -1' # === Nested tuples === assert ((1, 2), (3, 4)) == ((1, 2), (3, 4)), 'nested tuple' # === Tuple repr/str === assert repr((1, 2)) == '(1, 2)', 'tuple repr' assert str((1, 2)) == '(1, 2)', 'tuple str' # === Tuple concatenation (+) === assert (1, 2) + (3, 4) == (1, 2, 3, 4), 'tuple add basic' assert () + (1, 2) == (1, 2), 'empty add tuple' assert (1, 2) + () == (1, 2), 'tuple add empty' assert () + () == (), 'empty add empty' assert ('a', 'b') + ('c',) == ('a', 'b', 'c'), 'tuple add strings' assert ((1, 2),) + ((3, 4),) == ((1, 2), (3, 4)), 'tuple add nested' # === Tuple repetition (*) === assert (1, 2) * 3 == (1, 2, 1, 2, 1, 2), 'tuple mult int' assert 3 * (1, 2) == (1, 2, 1, 2, 1, 2), 'int mult tuple' assert (1,) * 0 == (), 'tuple mult zero' assert (1,) * -1 == (), 'tuple mult negative' assert () * 5 == (), 'empty tuple mult' assert (1, 2) * 1 == (1, 2), 'tuple mult one' # === Tuple augmented assignment edge cases === t = ([1],) try: t[0] += [2] assert False, 'tuple item augmented assignment should fail' except TypeError as e: assert e.args == ("'tuple' object does not support item assignment",), 'tuple += error matches CPython' assert t == ([1, 2],), 'inner list mutation happens before tuple store fails' # === tuple() constructor === assert tuple() == (), 'tuple() empty' assert tuple([1, 2, 3]) == (1, 2, 3), 'tuple from list' assert tuple((1, 2, 3)) == (1, 2, 3), 'tuple from tuple' assert tuple(range(3)) == (0, 1, 2), 'tuple from range' assert tuple('abc') == ('a', 'b', 'c'), 'tuple from string' assert tuple(b'abc') == (97, 98, 99), 'tuple from bytes' assert tuple({'a': 1, 'b': 2}) == ('a', 'b'), 'tuple from dict yields keys' # non-ASCII strings (multi-byte UTF-8) assert tuple('héllo') == ('h', 'é', 'l', 'l', 'o'), 'tuple from string with accented char' assert tuple('日本') == ('日', '本'), 'tuple from string with CJK chars' assert tuple('a🎉b') == ('a', '🎉', 'b'), 'tuple from string with emoji' # === Tuple unpacking (PEP 448) === a = (1, 2) b = (3, 4) assert (*a,) == (1, 2), 'single tuple unpack' assert (*a, *b) == (1, 2, 3, 4), 'double tuple unpack' assert (0, *a, 5) == (0, 1, 2, 5), 'mixed tuple unpack' assert (*(),) == (), 'unpack empty tuple' assert (*[1, 2],) == (1, 2), 'unpack list into tuple' # === Tuple comparison (<, >, <=, >=) === assert (1, 2) < (1, 3), 'lt second element differs' assert (1,) < (2,), 'lt single element' assert () < (1,), 'lt empty vs non-empty' assert (1, 2) < (1, 2, 3), 'lt shorter tuple' assert not (1, 2) < (1, 2), 'not lt when equal' assert not (1, 3) < (1, 2), 'not lt when greater' assert (1, 3) > (1, 2), 'gt second element' assert (2,) > (1,), 'gt single element' assert (1,) > (), 'gt non-empty vs empty' assert (1, 2, 3) > (1, 2), 'gt longer tuple' assert not (1, 2) > (1, 2), 'not gt when equal' assert (1, 2) <= (1, 2), 'le when equal' assert (1, 2) <= (1, 3), 'le when less' assert not (1, 3) <= (1, 2), 'not le when greater' assert (1, 2) >= (1, 2), 'ge when equal' assert (1, 3) >= (1, 2), 'ge when greater' assert not (1, 2) >= (1, 3), 'not ge when less' # === Tuple comparison with sorted() === assert sorted([(2, 'b'), (1, 'a')]) == [(1, 'a'), (2, 'b')], 'sorted tuples' assert sorted([(1, 'b'), (1, 'a')]) == [(1, 'a'), (1, 'b')], 'sorted tuples second element' assert sorted([(3,), (1,), (2,)]) == [(1,), (2,), (3,)], 'sorted single-element tuples' # === Nested tuple comparison === assert ((1, 2), 3) < ((1, 3), 2), 'nested tuple comparison' assert (1, (2, 3)) < (1, (2, 4)), 'nested tuple inner comparison' # === Equal-but-unorderable elements (None, lists, dicts) === # CPython checks __eq__ first; equal elements skip ordering comparison assert not (1, None) < (1, None), 'equal None elements not lt' assert (1, None) <= (1, None), 'equal None elements le' assert (1, None) >= (1, None), 'equal None elements ge' assert not (1, None) > (1, None), 'equal None elements not gt' assert (1, None) < (2, None), 'first element resolves before None' assert (1, [1, 2]) <= (1, [1, 2]), 'equal list elements le' # === Mixed types in tuple comparison === assert (1,) < (2.0,), 'int vs float in tuple' assert (1.0,) < (2,), 'float vs int in tuple' assert (True,) < (2,), 'bool vs int in tuple' assert (False,) < (True,), 'False vs True in tuple' assert (1, 'a') < (1, 'b'), 'string comparison in tuple' assert ('a', 1) < ('b', 1), 'string first element in tuple' # === Empty and equal tuples === assert not () < (), 'empty tuples not lt' assert () <= (), 'empty tuples le' assert () >= (), 'empty tuples ge' assert not () > (), 'empty tuples not gt' ================================================ FILE: crates/monty/test_cases/tuple__unpack_type_error.py ================================================ (*42,) # Raise=TypeError('Value after * must be an iterable, not int') ================================================ FILE: crates/monty/test_cases/type__builtin_attr_error.py ================================================ x = len x.nonexistent """ TRACEBACK: Traceback (most recent call last): File "type__builtin_attr_error.py", line 2, in x.nonexistent AttributeError: 'builtin_function_or_method' object has no attribute 'nonexistent' """ ================================================ FILE: crates/monty/test_cases/type__bytes_negative.py ================================================ bytes(-1) # Raise=ValueError('negative count') ================================================ FILE: crates/monty/test_cases/type__cell_not_builtin.py ================================================ print(cell) """ TRACEBACK: Traceback (most recent call last): File "type__cell_not_builtin.py", line 1, in print(cell) ~~~~ NameError: name 'cell' is not defined """ ================================================ FILE: crates/monty/test_cases/type__exception_attr_error.py ================================================ # Regression test for: "fmt() called on disabled variant" panic # Type::Exception must be displayable in error messages. e = ValueError('test') e.nonexistent """ TRACEBACK: Traceback (most recent call last): File "type__exception_attr_error.py", line 4, in e.nonexistent AttributeError: 'ValueError' object has no attribute 'nonexistent' """ ================================================ FILE: crates/monty/test_cases/type__float_conversion_error.py ================================================ float([1, 2]) # Raise=TypeError("float() argument must be a string or a real number, not 'list'") ================================================ FILE: crates/monty/test_cases/type__float_repr_both_quotes.py ================================================ float("it's \"nice\"") """ TRACEBACK: Traceback (most recent call last): File "type__float_repr_both_quotes.py", line 1, in float("it's \"nice\"") ~~~~~~~~~~~~~~~~~~~~~~ ValueError: could not convert string to float: 'it\'s "nice"' """ ================================================ FILE: crates/monty/test_cases/type__float_repr_newline.py ================================================ float("a\nb") """ TRACEBACK: Traceback (most recent call last): File "type__float_repr_newline.py", line 1, in float("a\nb") ~~~~~~~~~~~~~ ValueError: could not convert string to float: 'a\nb' """ ================================================ FILE: crates/monty/test_cases/type__float_repr_single_quote.py ================================================ float("it's") """ TRACEBACK: Traceback (most recent call last): File "type__float_repr_single_quote.py", line 1, in float("it's") ~~~~~~~~~~~~~ ValueError: could not convert string to float: "it's" """ ================================================ FILE: crates/monty/test_cases/type__int_conversion_error.py ================================================ int([1, 2]) # Raise=TypeError("int() argument must be a string, a bytes-like object or a real number, not 'list'") ================================================ FILE: crates/monty/test_cases/type__list_not_iterable.py ================================================ list(123) # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/type__non_builtin_name_error.py ================================================ print(TextIOWrapper) """ TRACEBACK: Traceback (most recent call last): File "type__non_builtin_name_error.py", line 1, in print(TextIOWrapper) ~~~~~~~~~~~~~ NameError: name 'TextIOWrapper' is not defined """ ================================================ FILE: crates/monty/test_cases/type__ops.py ================================================ # === type() function === assert type(1) == int, 'type(int) returns int' assert type(1.5) == float, 'type(float) returns float' assert type(True) == bool, 'type(bool) returns bool' assert type('hello') == str, 'type(str) returns str' assert type([1, 2]) == list, 'type(list) returns list' assert type((1, 2)) == tuple, 'type(tuple) returns tuple' assert type({1: 2}) == dict, 'type(dict) returns dict' assert type(b'hi') == bytes, 'type(bytes) returns bytes' assert type(None) == type(None), 'type(None) is consistent' # === type() inequality === assert type(1) != str, 'int type != str' assert type([]) != tuple, 'list type != tuple' assert type({}) != list, 'dict type != list' assert type(1) != float, 'int type != float' # === type repr === assert repr(int) == "", 'int type repr' assert repr(float) == "", 'float type repr' assert repr(bool) == "", 'bool type repr' assert repr(str) == "", 'str type repr' assert repr(list) == "", 'list type repr' assert repr(tuple) == "", 'tuple type repr' assert repr(dict) == "", 'dict type repr' assert repr(bytes) == "", 'bytes type repr' # === type identity === assert int is int, 'int is int' assert str is str, 'str is str' assert list is list, 'list is list' assert type(1) is int, 'type(1) is int' assert type('') is str, 'type str is str' assert type([]) is list, 'type([]) is list' # === list() constructor === assert list() == [], 'list() empty' assert list([1, 2, 3]) == [1, 2, 3], 'list(list) copy' assert list((1, 2, 3)) == [1, 2, 3], 'list(tuple) convert' assert list(range(3)) == [0, 1, 2], 'list(range) convert' assert list('abc') == ['a', 'b', 'c'], 'list(str) split chars' assert list('') == [], 'list empty str' # list copy is independent orig = [1, 2, 3] copy = list(orig) copy.append(4) assert orig == [1, 2, 3], 'list copy is independent' assert copy == [1, 2, 3, 4], 'list copy modified' # === tuple() constructor === assert tuple() == (), 'tuple() empty' assert tuple([1, 2, 3]) == (1, 2, 3), 'tuple(list) convert' assert tuple((1, 2)) == (1, 2), 'tuple(tuple) copy' assert tuple(range(3)) == (0, 1, 2), 'tuple(range) convert' assert tuple('ab') == ('a', 'b'), 'tuple(str) split chars' assert tuple('') == (), 'tuple empty str' # === dict() constructor === assert dict() == {}, 'dict() empty' assert dict({1: 2}) == {1: 2}, 'dict(dict) copy' assert dict({'a': 1, 'b': 2}) == {'a': 1, 'b': 2}, 'dict(dict) multiple keys' # dict copy is independent orig_dict = {1: 2} copy_dict = dict(orig_dict) copy_dict[3] = 4 assert orig_dict == {1: 2}, 'dict copy is independent' assert copy_dict == {1: 2, 3: 4}, 'dict copy modified' assert dict([('a', 1), ('b', 2)]) == {'a': 1, 'b': 2}, 'dict(list of tuples)' assert dict((('a', 1), ('b', 2))) == {'a': 1, 'b': 2}, 'dict(tuple of tuples)' headers = ['a', 'b'] row_data = [1, 2] assert dict(zip(headers, row_data)) == {'a': 1, 'b': 2}, 'dict(zip(list, list))' assert dict(zip(['a', 'b'], [1])) == {'a': 1}, 'dict(zip()) truncates to shortest iterable' assert dict(a=1, b=2) == {'a': 1, 'b': 2}, 'dict keyword arguments' assert dict([('a', 1)], b=2) == {'a': 1, 'b': 2}, 'dict positional iterable plus kwargs' assert dict([('a', 1)], a=2) == {'a': 2}, 'dict kwargs overwrite positional iterable values' # === str() constructor === assert str() == '', 'str() empty' assert str(123) == '123', 'str(int)' assert str(-42) == '-42', 'str(negative int)' assert str(0) == '0', 'str(zero)' assert str(1.5) == '1.5', 'str(float)' assert str(True) == 'True', 'str(bool True)' assert str(False) == 'False', 'str(bool False)' assert str(None) == 'None', 'str(None)' assert str([1, 2]) == '[1, 2]', 'str(list)' assert str((1, 2)) == '(1, 2)', 'str(tuple)' assert str({1: 2}) == '{1: 2}', 'str(dict)' assert str('hello') == 'hello', 'str(str)' assert str(b'hi') == "b'hi'", 'str(bytes)' # === bytes() constructor === assert bytes() == b'', 'bytes() empty' assert bytes(3) == b'\x00\x00\x00', 'bytes(int) zero-filled' assert bytes(0) == b'', 'bytes(0) empty' assert bytes(b'hi') == b'hi', 'bytes(bytes) copy' # === int() constructor === assert int() == 0, 'int() default' assert int(42) == 42, 'int(int)' assert int(-5) == -5, 'int(negative int)' assert int(3.7) == 3, 'int(float) truncates down' assert int(-3.7) == -3, 'int(negative float) truncates toward zero' assert int(3.0) == 3, 'int(whole float)' assert int(True) == 1, 'int(True)' assert int(False) == 0, 'int(False)' # int() with extreme float values (should clamp to i64 range in Monty) # Note: Python uses arbitrary precision; Monty clamps to i64 assert isinstance(int(1e18), int), 'int(large float) returns int' assert isinstance(int(-1e18), int), 'int(large negative float) returns int' assert int(0.0) == 0, 'int(0.0) is zero' assert int(-0.0) == 0, 'int(-0.0) is zero' assert int(0.9) == 0, 'int(0.9) truncates to 0' assert int(-0.9) == 0, 'int(-0.9) truncates to 0' # === float() constructor === assert float() == 0.0, 'float() default' assert float(42) == 42.0, 'float(int)' assert float(-5) == -5.0, 'float(negative int)' assert float(3.14) == 3.14, 'float(float)' assert float(True) == 1.0, 'float(True)' assert float(False) == 0.0, 'float(False)' # === bool() constructor === assert bool() == False, 'bool() default' assert bool(0) == False, 'bool(0)' assert bool(1) == True, 'bool(1)' assert bool(-1) == True, 'bool(-1)' assert bool(0.0) == False, 'bool(0.0)' assert bool(1.5) == True, 'bool(1.5)' assert bool('') == False, 'bool empty str' assert bool('x') == True, 'bool non-empty str' assert bool([]) == False, 'bool empty list' assert bool([1]) == True, 'bool non-empty list' assert bool(()) == False, 'bool empty tuple' assert bool((1,)) == True, 'bool non-empty tuple' assert bool({}) == False, 'bool empty dict' assert bool({1: 2}) == True, 'bool non-empty dict' assert bool(None) == False, 'bool(None)' # === isinstance() === assert isinstance(1, int), 'isinstance int' assert isinstance(1.5, float), 'isinstance float' assert isinstance(True, bool), 'isinstance bool' assert isinstance('hello', str), 'isinstance str' assert isinstance([1, 2], list), 'isinstance list' assert isinstance((1, 2), tuple), 'isinstance tuple' assert isinstance({1: 2}, dict), 'isinstance dict' assert isinstance(b'hi', bytes), 'isinstance bytes' # isinstance negative cases assert not isinstance(1, str), 'isinstance int not str' assert not isinstance('x', int), 'isinstance str not int' assert not isinstance([], dict), 'isinstance list not dict' # isinstance with tuple of types assert isinstance(1, (int, str)), 'isinstance tuple match first' assert isinstance('x', (int, str)), 'isinstance tuple match second' assert not isinstance([], (int, str)), 'isinstance tuple no match' assert isinstance(1, (str, float, int)), 'isinstance tuple match third' # bool is subtype of int assert isinstance(True, int), 'bool is instance of int' assert isinstance(False, int), 'False is instance of int' assert isinstance(True, (int, str)), 'bool matches int in tuple' # isinstance with exception types err = ValueError('test') assert isinstance(err, ValueError), 'isinstance exception' assert isinstance(err, Exception), 'isinstance exception base type' assert not isinstance(err, TypeError), 'isinstance exception wrong type' assert isinstance(err, (ValueError, TypeError)), 'isinstance exception tuple' # isinstance with nested tuples assert isinstance('a', (int, (str, bytes))), 'isinstance nested tuple match' assert isinstance(1, ((str, float), int)), 'isinstance deeply nested' assert not isinstance([], (int, (str, bytes))), 'isinstance nested tuple no match' # NoneType capitalization assert repr(type(None)) == "", 'NoneType capitalized' # === type().__name__ === assert type(42).__name__ == 'int', 'int type name' assert type('hello').__name__ == 'str', 'str type name' assert type(True).__name__ == 'bool', 'bool type name' assert type(None).__name__ == 'NoneType', 'NoneType name' assert type([1, 2]).__name__ == 'list', 'list type name' assert type({'a': 1}).__name__ == 'dict', 'dict type name' # type().__name__ for exceptions try: raise ValueError('test') except ValueError as e: assert type(e).__name__ == 'ValueError', 'exception type name' ================================================ FILE: crates/monty/test_cases/type__shadow_exc.py ================================================ # Builtin exception type 'ValueError' can be shadowed by assignment ValueError = 'not an exception' assert ValueError == 'not an exception', 'ValueError shadowed' ================================================ FILE: crates/monty/test_cases/type__shadow_int.py ================================================ # Builtin type name 'int' can be shadowed by assignment int = 42 assert int == 42, 'int shadowed by assignment' # for loop variable shadows builtin result = [] for int in range(3): result.append(int) assert result == [0, 1, 2], 'int works as for loop variable' ================================================ FILE: crates/monty/test_cases/type__shadow_len.py ================================================ # Builtin function 'len' can be shadowed by assignment len = 'shadowed' assert len == 'shadowed', 'len shadowed by assignment' ================================================ FILE: crates/monty/test_cases/type__tuple_not_iterable.py ================================================ tuple(123) # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/type_error__int_add_list.py ================================================ 2 + [1] # Raise=TypeError("unsupported operand type(s) for +: 'int' and 'list'") ================================================ FILE: crates/monty/test_cases/type_error__int_div_str.py ================================================ 5 / 'x' # Raise=TypeError("unsupported operand type(s) for /: 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__int_floordiv_str.py ================================================ 5 // 'x' # Raise=TypeError("unsupported operand type(s) for //: 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__int_iadd_str.py ================================================ x = 5 x += 'a' # Raise=TypeError("unsupported operand type(s) for +=: 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__int_mod_str.py ================================================ 5 % 'x' # Raise=TypeError("unsupported operand type(s) for %: 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__int_pow_str.py ================================================ 5 ** 'x' # Raise=TypeError("unsupported operand type(s) for ** or pow(): 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__int_sub_str.py ================================================ 5 - 'x' # Raise=TypeError("unsupported operand type(s) for -: 'int' and 'str'") ================================================ FILE: crates/monty/test_cases/type_error__list_add_int.py ================================================ [1, 2] + 3 # Raise=TypeError('can only concatenate list (not "int") to list') ================================================ FILE: crates/monty/test_cases/type_error__list_add_str.py ================================================ [1] + 'x' # Raise=TypeError('can only concatenate list (not "str") to list') ================================================ FILE: crates/monty/test_cases/type_error__list_iadd_int.py ================================================ # xfail=monty # Monty's list += only supports other lists, not arbitrary iterables. # CPython's list += calls extend() which requires an iterable. x = [1] x += 2 # Raise=TypeError("'int' object is not iterable") ================================================ FILE: crates/monty/test_cases/type_error__str_add_int.py ================================================ 'hello' + 1 # Raise=TypeError('can only concatenate str (not "int") to str') ================================================ FILE: crates/monty/test_cases/type_error__str_iadd_int.py ================================================ x = 'hello' x += 1 # Raise=TypeError('can only concatenate str (not "int") to str') ================================================ FILE: crates/monty/test_cases/type_error__unary_invert_str.py ================================================ # bitwise NOT on string should raise TypeError ~'hello' # Raise=TypeError("bad operand type for unary ~: 'str'") ================================================ FILE: crates/monty/test_cases/type_error__unary_minus_str.py ================================================ # unary minus on heap-allocated string should raise TypeError # str() creates a heap-allocated string, triggering ref count check -str(42) # Raise=TypeError("bad operand type for unary -: 'str'") ================================================ FILE: crates/monty/test_cases/type_error__unary_neg_str.py ================================================ # unary minus on string should raise TypeError -'hello' # Raise=TypeError("bad operand type for unary -: 'str'") ================================================ FILE: crates/monty/test_cases/type_error__unary_plus_str.py ================================================ # unary plus on heap-allocated string should raise TypeError # str() creates a heap-allocated string, triggering ref count check +str(42) # Raise=TypeError("bad operand type for unary +: 'str'") ================================================ FILE: crates/monty/test_cases/typing__types.py ================================================ # Tests for typing module type() behavior # # CPython's typing module uses various internal types for different constructs. # Monty simplifies this by using typing._SpecialForm for all typing markers. # Where CPython also uses _SpecialForm, we use == for exact match. # Where CPython uses different internal types, we accept both representations. import typing # === Types that match between CPython and Monty === assert repr(type(typing.Optional)) == "", 'type(Optional)' assert repr(type(typing.ClassVar)) == "", 'type(ClassVar)' assert repr(type(typing.Final)) == "", 'type(Final)' assert repr(type(typing.Union)) == "", 'type(Union)' # === Types that differ between CPython and Monty === # CPython uses specialized internal types; Monty uses _SpecialForm for all assert repr(type(typing.Any)) in ("", ""), 'type(Any)' assert repr(type(typing.Callable)) in ("", ""), ( 'type(Callable)' ) # === Verify TYPE_CHECKING is False === assert typing.TYPE_CHECKING is False, 'TYPE_CHECKING should be False at runtime' ================================================ FILE: crates/monty/test_cases/unpack__nested.py ================================================ # Test nested tuple unpacking # === Basic nested unpacking === data = ((1, 2), 'x') (a, b), c = data assert a == 1, 'nested unpack first inner' assert b == 2, 'nested unpack second inner' assert c == 'x', 'nested unpack outer' # === Deeply nested === ((a, b), (c, d)) = ((1, 2), (3, 4)) assert a == 1, 'deep nested first' assert b == 2, 'deep nested second' assert c == 3, 'deep nested third' assert d == 4, 'deep nested fourth' # === Mixed depths === (a, (b, c)) = (1, (2, 3)) assert a == 1, 'mixed depth outer' assert b == 2, 'mixed depth inner first' assert c == 3, 'mixed depth inner second' # === Three levels deep === (a, (b, (c, d))) = (1, (2, (3, 4))) assert a == 1, 'three level outer' assert b == 2, 'three level mid' assert c == 3, 'three level inner first' assert d == 4, 'three level inner second' # === In for loops === items = [((1, 2), 'a'), ((3, 4), 'b')] sums = [] letters = [] for (a, b), c in items: sums.append(a + b) letters.append(c) assert sums == [3, 7], 'for loop nested unpack sums' assert letters == ['a', 'b'], 'for loop nested unpack letters' # === In comprehensions === items = [((1, 2), 'a'), ((3, 4), 'b')] result = [a + b for (a, b), c in items] assert result == [3, 7], 'comprehension nested unpack' # === Deep nested in comprehension === items = [((1, 2), (3, 4)), ((5, 6), (7, 8))] result = [a + b + c + d for (a, b), (c, d) in items] assert result == [10, 26], 'comprehension deep nested unpack' ================================================ FILE: crates/monty/test_cases/unpack__non_sequence.py ================================================ a, b = 42 """ TRACEBACK: Traceback (most recent call last): File "unpack__non_sequence.py", line 1, in a, b = 42 ~~~~ TypeError: cannot unpack non-iterable int object """ ================================================ FILE: crates/monty/test_cases/unpack__not_enough.py ================================================ a, b, c = (1, 2) """ TRACEBACK: Traceback (most recent call last): File "unpack__not_enough.py", line 1, in a, b, c = (1, 2) ~~~~~~~ ValueError: not enough values to unpack (expected 3, got 2) """ ================================================ FILE: crates/monty/test_cases/unpack__ops.py ================================================ # === Basic tuple unpacking === a, b = (1, 2) assert a == 1, 'first element of tuple' assert b == 2, 'second element of tuple' # === Unpacking without parentheses === x, y = 10, 20 assert x == 10, 'first element without parens' assert y == 20, 'second element without parens' # === Three element unpacking === a, b, c = (1, 2, 3) assert a == 1, 'three elements: first' assert b == 2, 'three elements: second' assert c == 3, 'three elements: third' # === Unpacking from function return === def returns_pair(): return 42, 37 x, y = returns_pair() assert x == 42, 'function return first' assert y == 37, 'function return second' def returns_triple(): return 'a', 'b', 'c' p, q, r = returns_triple() assert p == 'a', 'function return triple first' assert q == 'b', 'function return triple second' assert r == 'c', 'function return triple third' # === Unpacking list === a, b = [100, 200] assert a == 100, 'list unpack first' assert b == 200, 'list unpack second' a, b, c, d = [1, 2, 3, 4] assert a == 1, 'four element list first' assert d == 4, 'four element list fourth' # === Unpacking string === a, b = 'xy' assert a == 'x', 'string unpack first char' assert b == 'y', 'string unpack second char' p, q, r = 'abc' assert p == 'a', 'three char string first' assert q == 'b', 'three char string second' assert r == 'c', 'three char string third' # === Unpacking with different value types === a, b = (True, False) assert a is True, 'bool tuple first' assert b is False, 'bool tuple second' a, b = (1.5, 2.5) assert a == 1.5, 'float tuple first' assert b == 2.5, 'float tuple second' a, b = (None, 42) assert a is None, 'mixed tuple None' assert b == 42, 'mixed tuple int' # === Unpacking with nested containers === a, b = ([1, 2], [3, 4]) assert a == [1, 2], 'nested list first' assert b == [3, 4], 'nested list second' a, b = ((1, 2), (3, 4)) assert a == (1, 2), 'nested tuple first' assert b == (3, 4), 'nested tuple second' # === Reassignment via unpacking === x = 1 y = 2 x, y = y, x assert x == 2, 'swap first' assert y == 1, 'swap second' # === Single element tuple (edge case) === # Note: (x,) = (1,) is valid Python (a,) = (42,) assert a == 42, 'single element tuple unpack' (a,) = [99] assert a == 99, 'single element list unpack' (a,) = 'z' assert a == 'z', 'single char string unpack' # === Star unpacking (extended unpacking) === # Star at end first, *rest = [1, 2, 3, 4, 5] assert first == 1, 'star at end: first' assert rest == [2, 3, 4, 5], 'star at end: rest' # Star at start *init, last = [1, 2, 3, 4, 5] assert init == [1, 2, 3, 4], 'star at start: init' assert last == 5, 'star at start: last' # Star in middle first, *middle, last = [1, 2, 3, 4, 5] assert first == 1, 'star in middle: first' assert middle == [2, 3, 4], 'star in middle: middle' assert last == 5, 'star in middle: last' # Empty rest (minimum values) first, *rest, last = [1, 2] assert first == 1, 'empty rest: first' assert rest == [], 'empty rest: rest is empty list' assert last == 2, 'empty rest: last' # From tuple a, *b = (10, 20, 30) assert a == 10, 'star from tuple: a' assert b == [20, 30], 'star from tuple: b is list' # From string first, *mid, last = 'abcde' assert first == 'a', 'star from string: first' assert mid == ['b', 'c', 'd'], 'star from string: mid' assert last == 'e', 'star from string: last' # With more targets before star a, b, c, *rest = [1, 2, 3, 4, 5, 6] assert a == 1, 'multiple before star: a' assert b == 2, 'multiple before star: b' assert c == 3, 'multiple before star: c' assert rest == [4, 5, 6], 'multiple before star: rest' # With more targets after star *init, x, y, z = [1, 2, 3, 4, 5, 6] assert init == [1, 2, 3], 'multiple after star: init' assert x == 4, 'multiple after star: x' assert y == 5, 'multiple after star: y' assert z == 6, 'multiple after star: z' # Star captures all but one head, *tail = [1] assert head == 1, 'single item: head' assert tail == [], 'single item: tail is empty' # Star with bracket syntax [a, *b, c] = [1, 2, 3, 4] assert a == 1, 'bracket syntax: a' assert b == [2, 3], 'bracket syntax: b' assert c == 4, 'bracket syntax: c' ================================================ FILE: crates/monty/test_cases/unpack__star_not_enough.py ================================================ a, *b, c = [1] """ TRACEBACK: Traceback (most recent call last): File "unpack__star_not_enough.py", line 1, in a, *b, c = [1] ~~~~~~~~ ValueError: not enough values to unpack (expected at least 2, got 1) """ ================================================ FILE: crates/monty/test_cases/unpack__too_many.py ================================================ a, b = (1, 2, 3, 4, 5) """ TRACEBACK: Traceback (most recent call last): File "unpack__too_many.py", line 1, in a, b = (1, 2, 3, 4, 5) ~~~~ ValueError: too many values to unpack (expected 2, got 5) """ ================================================ FILE: crates/monty/test_cases/version__cpython.py ================================================ import sys v = sys.version_info assert (v[0], v[1]) == (3, 14), f'Expected Python 3.14, got ({v[0]}, {v[1]})' ================================================ FILE: crates/monty/test_cases/walrus__all.py ================================================ # === Basic walrus operator === # Simple assignment expression assert (x := 5) == 5, 'walrus returns value' assert x == 5, 'walrus assigns to variable' # Walrus in parentheses y = (z := 10) assert y == 10, 'walrus value can be assigned' assert z == 10, 'walrus target is assigned' # simple if x = None answer = 'unset' if y := x: answer = f'x is {y}' assert answer == 'unset' x = 123 if y := x: answer = f'x is {y}' assert answer == 'x is 123' x = 0 if y := x: answer = f'x is {y}' else: answer = 'x is unset' assert answer == 'x is unset' # === Walrus in if conditions === if (a := 3) > 0: assert a == 3, 'walrus in if test' else: assert False, 'should not reach else' # With falsy value if b := 0: assert False, 'should not reach truthy branch' else: assert b == 0, 'walrus assigns even when falsy' # === Walrus in while loops === counter = 0 result = [] while (n := counter) < 3: result.append(n) counter += 1 assert result == [0, 1, 2], 'walrus in while condition' assert n == 3, 'walrus value persists after while' # === Nested walrus === # Inner walrus assigned first, then outer assert (outer := (inner := 7) + 1) == 8, 'nested walrus returns correct value' assert inner == 7, 'inner walrus assigned' assert outer == 8, 'outer walrus assigned' # === Walrus in list literals === items = [(v := 1), v + 1, v + 2] assert items == [1, 2, 3], 'walrus in list literal' assert v == 1, 'walrus variable accessible after list' # === Walrus in ternary expressions === result = (t := 5) if True else 0 assert result == 5, 'walrus in ternary truthy branch' assert t == 5, 'walrus assigned in ternary' result2 = 0 if False else (f := 6) assert result2 == 6, 'walrus in ternary falsy branch' assert f == 6, 'walrus assigned in falsy branch' # === Walrus in dict/set literals === d = {(k := 'key'): (val := 42)} assert d == {'key': 42}, 'walrus in dict literal' assert k == 'key', 'walrus key assigned' assert val == 42, 'walrus value assigned' s = {(s1 := 1), (s2 := 2)} assert s == {1, 2}, 'walrus in set literal' assert s1 == 1, 'walrus in set element 1' assert s2 == 2, 'walrus in set element 2' # === Walrus in subscript expressions === arr = [10, 20, 30] value = arr[(idx := 1)] assert value == 20, 'walrus in subscript index' assert idx == 1, 'walrus index assigned' # === Walrus in function calls === def identity(x): return x result = identity((arg := 99)) assert result == 99, 'walrus in function argument' assert arg == 99, 'walrus arg assigned' # === Walrus with comparison operators === assert (cmp := 10) > 5, 'walrus in comparison' assert cmp == 10, 'walrus assigned in comparison' # === Walrus in chained comparisons === # Note: Chained comparisons like `0 < (mid := 5) < 10` are not yet supported # Testing a simpler comparison chain mid = (chain := 5) assert 0 < chain and chain < 10, 'walrus result used in comparison chain' assert mid == 5, 'walrus assigned correctly' # === Walrus in boolean expressions === # Short-circuit with and result = (first := 1) and (second := 2) assert result == 2, 'walrus in and expression' assert first == 1, 'first walrus assigned' assert second == 2, 'second walrus assigned (and evaluated)' # Short-circuit with or (second not evaluated) result = (or_first := 1) or (or_skip := 999) assert result == 1, 'walrus in or expression (short-circuit)' assert or_first == 1, 'or first walrus assigned' # === Walrus with operations === assert (op := 3) + 2 == 5, 'walrus with addition' assert op == 3, 'walrus assigned before operation' # === Walrus in f-strings === msg = f'{(fvar := "hello")} world' assert msg == 'hello world', 'walrus in f-string' assert fvar == 'hello', 'walrus assigned in f-string' # === Walrus with global === global_var = None def set_global(): global global_var return (global_var := 'set') result = set_global() assert result == 'set', 'walrus with global returns value' assert global_var == 'set', 'global var assigned via walrus' # === Walrus creates local in function scope === def func_scope(): if local := 42: pass return local assert func_scope() == 42, 'walrus creates local in function' # === Walrus in list comprehension element (leaks to enclosing scope) === # Per PEP 572, walrus in comprehension assigns to enclosing scope # Note: walrus in comprehension iterable is not allowed, but in element/condition it is result = [(leak := x) for x in range(3)] assert result == [0, 1, 2], 'walrus in comprehension element' assert leak == 2, 'walrus in comprehension leaks to enclosing scope' # === Walrus in comprehension condition === result = [x for x in range(5) if (limit := 3) and x < limit] assert result == [0, 1, 2], 'walrus in comprehension condition' assert limit == 3, 'walrus from comprehension condition accessible' # === Multiple walrus in same expression === result = (m1 := 1) + (m2 := 2) + (m3 := 3) assert result == 6, 'multiple walrus in expression' assert m1 == 1, 'first multi-walrus' assert m2 == 2, 'second multi-walrus' assert m3 == 3, 'third multi-walrus' # === Walrus in tuple === tup = ((t1 := 'a'), (t2 := 'b')) assert tup == ('a', 'b'), 'walrus in tuple' assert t1 == 'a', 'first tuple walrus' assert t2 == 'b', 'second tuple walrus' ================================================ FILE: crates/monty/test_cases/while__all.py ================================================ # === Basic while loop === i = 0 result = [] while i < 3: result.append(i) i += 1 assert result == [0, 1, 2], 'basic while loop' # === While with break === i = 0 result = [] while i < 10: if i == 3: break result.append(i) i += 1 assert result == [0, 1, 2], 'while with break' # === While with continue === i = 0 result = [] while i < 5: i += 1 if i % 2 == 0: continue result.append(i) assert result == [1, 3, 5], 'while with continue' # === While with else (no break - else runs) === i = 0 flag = 0 while i < 3: i += 1 else: flag = 1 assert flag == 1, 'while else runs when no break' # === While with else (with break - else skipped) === i = 0 flag = 0 while i < 10: i += 1 if i == 2: break else: flag = 1 assert flag == 0, 'while else skipped on break' # === while True with break === i = 0 result = [] while True: result.append(i) i += 1 if i >= 3: break assert result == [0, 1, 2], 'while True with break' # === while False (never executes) === flag = 0 while False: flag = 1 assert flag == 0, 'while False never executes' # === while False with else (else runs immediately) === flag = 0 while False: flag = 1 else: flag = 2 assert flag == 2, 'while False runs else immediately' # === Nested while loops === i = 0 result = [] while i < 2: j = 0 while j < 2: result.append((i, j)) j += 1 i += 1 assert result == [(0, 0), (0, 1), (1, 0), (1, 1)], 'nested while loops' # === Nested while with break inner === i = 0 result = [] while i < 3: j = 0 while j < 3: if j == 1: break result.append((i, j)) j += 1 i += 1 assert result == [(0, 0), (1, 0), (2, 0)], 'nested while break inner only' # === For inside while === i = 0 result = [] while i < 2: for j in ['a', 'b']: result.append((i, j)) i += 1 assert result == [(0, 'a'), (0, 'b'), (1, 'a'), (1, 'b')], 'for inside while' # === While inside for === result = [] for i in [0, 1]: j = 0 while j < 2: result.append((i, j)) j += 1 assert result == [(0, 0), (0, 1), (1, 0), (1, 1)], 'while inside for' # === Complex condition with and === i = 0 j = 10 result = [] while i < 5 and j > 5: result.append((i, j)) i += 1 j -= 1 assert result == [(0, 10), (1, 9), (2, 8), (3, 7), (4, 6)], 'while with and condition' # === Complex condition with or === i = 5 count = 0 while i < 3 or count < 2: count += 1 i += 1 assert count == 2, 'while with or condition' # === While with function call condition === def check(n): return n < 3 i = 0 result = [] while check(i): result.append(i) i += 1 assert result == [0, 1, 2], 'while with function call condition' # === Continue does not skip else === i = 0 flag = 0 while i < 3: i += 1 if i == 2: continue else: flag = 1 assert flag == 1, 'continue does not skip else' # === Nested while - break outer via flag === i = 0 result = [] done = False while i < 3 and not done: j = 0 while j < 3: if i == 1 and j == 1: done = True break result.append((i, j)) j += 1 i += 1 assert result == [(0, 0), (0, 1), (0, 2), (1, 0)], 'nested while with flag break' # === While with negative condition === i = 5 result = [] while not i == 3: result.append(i) i -= 1 assert result == [5, 4], 'while with not condition' # === Nested while with inner else === i = 0 result = [] while i < 2: j = 0 while j < 2: result.append(j) j += 1 else: result.append('inner-else') i += 1 assert result == [0, 1, 'inner-else', 0, 1, 'inner-else'], 'nested while with inner else' # === Break in nested while skips inner else === i = 0 result = [] while i < 2: j = 0 while j < 3: if j == 1: break result.append(j) j += 1 else: result.append('inner-else') i += 1 assert result == [0, 0], 'break skips inner else only' ================================================ FILE: crates/monty/tests/asyncio.rs ================================================ //! Tests for async edge cases around ResolveFutures::resume behavior. //! //! These tests verify the behavior of the async execution model, specifically around //! resolving external futures incrementally via `ResolveFutures::resume()`. use monty::{ ExcType, ExtFunctionResult, MontyException, MontyObject, MontyRun, NameLookupResult, NoLimitTracker, PrintWriter, ResolveFutures, RunProgress, }; /// Helper to create a MontyRun for async external function tests. /// /// Sets up an async function that calls two async external functions (`foo` and `bar`) /// via asyncio.gather and returns their sum. fn create_gather_two_runner() -> MontyRun { let code = r" import asyncio async def main(): a, b = await asyncio.gather(foo(), bar()) return a + b await main() "; MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap() } /// Helper to create a MontyRun for async external function tests with three functions. fn create_gather_three_runner() -> MontyRun { let code = r" import asyncio async def main(): a, b, c = await asyncio.gather(foo(), bar(), baz()) return a + b + c await main() "; MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap() } /// Resolves consecutive `NameLookup` yields by providing a `Function` object for each name. fn resolve_name_lookups( mut progress: RunProgress, ) -> Result, monty::MontyException> { while let RunProgress::NameLookup(lookup) = progress { let name = lookup.name.clone(); progress = lookup.resume( NameLookupResult::Value(MontyObject::Function { name, docstring: None }), PrintWriter::Stdout, )?; } Ok(progress) } /// Helper to drive execution through external calls until we get ResolveFutures. /// /// Returns (pending_call_ids, state, collected_call_ids) where collected_call_ids /// are the call_ids from all the FunctionCalls we processed with resume_pending(). fn drive_to_resolve_futures(mut progress: RunProgress) -> (ResolveFutures, Vec) { let mut collected_call_ids = Vec::new(); loop { match progress { RunProgress::NameLookup(lookup) => { let name = lookup.name.clone(); progress = lookup .resume( NameLookupResult::Value(MontyObject::Function { name, docstring: None }), PrintWriter::Stdout, ) .unwrap(); } RunProgress::FunctionCall(call) => { collected_call_ids.push(call.call_id); progress = call.resume_pending(PrintWriter::Stdout).unwrap(); } RunProgress::ResolveFutures(state) => { return (state, collected_call_ids); } RunProgress::Complete(_) => { panic!("unexpected Complete before ResolveFutures"); } RunProgress::OsCall(call) => { panic!("unexpected OsCall: {:?}", call.function); } } } } // === Test: Resume with all call_ids at once === #[test] fn resume_with_all_call_ids() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); assert_eq!(call_ids.len(), 2, "should have 2 pending calls"); // Resume with all results at once let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10))), (call_ids[1], ExtFunctionResult::Return(MontyObject::Int(32))), ]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(42)); } // === Test: Resume with partial results === #[test] fn resume_with_partial_results() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Resume with only the first result let results = vec![(call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // Should still need more futures resolved let state = progress.into_resolve_futures().expect("should still need futures"); // Resume with the second result let results = vec![(call_ids[1], ExtFunctionResult::Return(MontyObject::Int(32)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(42)); } // === Test: Resume with unknown call_id === #[test] fn resume_with_unknown_call_id() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, _call_ids) = drive_to_resolve_futures(progress); // Resume with an unknown call_id let results = vec![(9999, ExtFunctionResult::Return(MontyObject::Int(10)))]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should error on unknown call_id"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::RuntimeError); let msg = exc.message().unwrap(); assert!( msg.contains("unknown call_id 9999"), "error should mention unknown call_id, got: {msg}" ); } // === Test: Resume with empty results === #[test] fn resume_with_empty_results() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Resume with empty results - should still be blocked let results: Vec<(u32, ExtFunctionResult)> = vec![]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // Should still need futures resolved let state = progress.into_resolve_futures().expect("should still need futures"); // Now resolve everything let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10))), (call_ids[1], ExtFunctionResult::Return(MontyObject::Int(32))), ]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(42)); } // === Test: Resume with error result === #[test] fn resume_with_error_result() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Resume with one success and one error let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10))), ( call_ids[1], ExtFunctionResult::Error(MontyException::new(ExcType::ValueError, Some("test error".to_string()))), ), ]; let result = state.resume(results, PrintWriter::Stdout); // Should propagate the error assert!(result.is_err(), "should propagate error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::ValueError); assert_eq!(exc.message(), Some("test error")); } // === Test: Resume with three functions, reversed order === #[test] fn resume_with_reversed_order() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Resume with results in reverse order - should still work let results = vec![ (call_ids[1], ExtFunctionResult::Return(MontyObject::Int(32))), // bar() = 32 (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10))), // foo() = 10 ]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(42)); } // === Test: Three-way gather with incremental resolution === #[test] fn three_way_gather_incremental() { let runner = create_gather_three_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); assert_eq!(call_ids.len(), 3, "should have 3 pending calls"); // Resolve one at a time let results = vec![(call_ids[0], ExtFunctionResult::Return(MontyObject::Int(100)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let state = progress.into_resolve_futures().expect("need more"); let results = vec![(call_ids[1], ExtFunctionResult::Return(MontyObject::Int(200)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let state = progress.into_resolve_futures().expect("need more"); let results = vec![(call_ids[2], ExtFunctionResult::Return(MontyObject::Int(300)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(600)); } // === Test: Duplicate call_id in results (should be fine - second is ignored) === #[test] fn resume_with_duplicate_call_id() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Include duplicate - second value should be ignored let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(10))), (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(99))), // duplicate - ignored! (call_ids[1], ExtFunctionResult::Return(MontyObject::Int(32))), ]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(42)); } // === Test: gather_error_propagated_as_exception === #[test] fn gather_error_propagated_as_exception() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Both fail with errors let results = vec![ ( call_ids[0], ExtFunctionResult::Error(MontyException::new(ExcType::ValueError, Some("foo error".to_string()))), ), ( call_ids[1], ExtFunctionResult::Error(MontyException::new( ExcType::RuntimeError, Some("bar error".to_string()), )), ), ]; let result = state.resume(results, PrintWriter::Stdout); // One of the errors should propagate (implementation may choose either) assert!(result.is_err(), "should propagate an error"); } // === Test: Sequential awaits - second fails === fn create_sequential_awaits_runner() -> MontyRun { let code = r" import asyncio async def main(): a = await foo() b = await bar() return a + b await main() "; MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap() } #[test] fn sequential_awaits_second_fails() { let runner = create_sequential_awaits_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let progress = resolve_name_lookups(progress).unwrap(); // First external call (foo) let RunProgress::FunctionCall(call) = progress else { panic!("expected FunctionCall for foo"); }; let foo_call_id = call.call_id; let progress = call.resume_pending(PrintWriter::Stdout).unwrap(); // Should yield for resolution let state = progress.into_resolve_futures().expect("should need foo resolved"); assert_eq!(state.pending_call_ids(), vec![foo_call_id]); // Resolve foo successfully let results = vec![(foo_call_id, ExtFunctionResult::Return(MontyObject::Int(10)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let progress = resolve_name_lookups(progress).unwrap(); // Second external call (bar) let RunProgress::FunctionCall(call) = progress else { panic!("expected FunctionCall for bar"); }; let bar_call_id = call.call_id; let progress = call.resume_pending(PrintWriter::Stdout).unwrap(); // Should yield for resolution let state = progress.into_resolve_futures().expect("should need bar resolved"); assert_eq!(state.pending_call_ids(), vec![bar_call_id]); // Fail bar with an exception let results = vec![( bar_call_id, ExtFunctionResult::Error(MontyException::new(ExcType::ValueError, Some("bar failed".to_string()))), )]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate bar's error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::ValueError); assert_eq!(exc.message(), Some("bar failed")); } // === Test: Sequential awaits - first fails === #[test] fn sequential_awaits_first_fails() { let runner = create_sequential_awaits_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let progress = resolve_name_lookups(progress).unwrap(); // First external call (foo) let RunProgress::FunctionCall(call) = progress else { panic!("expected FunctionCall for foo"); }; let foo_call_id = call.call_id; let progress = call.resume_pending(PrintWriter::Stdout).unwrap(); let state = progress.into_resolve_futures().expect("should need foo resolved"); // Fail foo with an exception - bar should never be called let results = vec![( foo_call_id, ExtFunctionResult::Error(MontyException::new( ExcType::RuntimeError, Some("foo failed early".to_string()), )), )]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate foo's error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::RuntimeError); assert_eq!(exc.message(), Some("foo failed early")); } // === Test: Gather - first external fails before second is resolved === #[test] fn gather_first_external_fails_immediately() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); assert_eq!(call_ids.len(), 2, "should have 2 calls"); // Resolve first call with error, second with success let results = vec![( call_ids[0], ExtFunctionResult::Error(MontyException::new(ExcType::ValueError, Some("foo failed".to_string()))), )]; let result = state.resume(results, PrintWriter::Stdout); // Error should propagate immediately (no need to resolve second) assert!(result.is_err(), "should propagate foo's error immediately"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::ValueError); assert_eq!(exc.message(), Some("foo failed")); } // === Test: Gather - second external fails === #[test] fn gather_second_external_fails() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // Resolve second call with error let results = vec![( call_ids[1], ExtFunctionResult::Error(MontyException::new( ExcType::RuntimeError, Some("bar failed".to_string()), )), )]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate bar's error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::RuntimeError); assert_eq!(exc.message(), Some("bar failed")); } // === Test: Both gather tasks fail === #[test] fn gather_both_fail() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); let results = vec![ ( call_ids[0], ExtFunctionResult::Error(MontyException::new(ExcType::ValueError, Some("foo failed".to_string()))), ), ( call_ids[1], ExtFunctionResult::Error(MontyException::new( ExcType::RuntimeError, Some("bar failed".to_string()), )), ), ]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate one of the errors"); } // === Test: Three-way gather, partial error === #[test] fn three_way_gather_partial_error() { let runner = create_gather_three_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // First and third succeed, second fails let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(100))), ( call_ids[1], ExtFunctionResult::Error(MontyException::new( ExcType::TypeError, Some("bar type error".to_string()), )), ), (call_ids[2], ExtFunctionResult::Return(MontyObject::Int(300))), ]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate bar's error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::TypeError); } // === Test: Incremental resolution with error on second round === #[test] fn incremental_resolution_error_on_second_round() { let runner = create_gather_two_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); // First resolve one successfully let results = vec![(call_ids[0], ExtFunctionResult::Return(MontyObject::Int(100)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let state = progress.into_resolve_futures().expect("need more"); // Then fail the second let results = vec![( call_ids[1], ExtFunctionResult::Error(MontyException::new( ExcType::ValueError, Some("delayed failure".to_string()), )), )]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate delayed error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::ValueError); assert_eq!(exc.message(), Some("delayed failure")); } // === Test: Gather with all at once, mixed success/failure === #[test] fn gather_three_all_at_once_mixed() { let runner = create_gather_three_runner(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let (state, call_ids) = drive_to_resolve_futures(progress); let results = vec![ (call_ids[0], ExtFunctionResult::Return(MontyObject::Int(100))), (call_ids[1], ExtFunctionResult::Return(MontyObject::Int(200))), ]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); let state = progress.into_resolve_futures().expect("need more"); let results = vec![( call_ids[2], ExtFunctionResult::Error(MontyException::new( ExcType::RuntimeError, Some("baz failed".to_string()), )), )]; let result = state.resume(results, PrintWriter::Stdout); assert!(result.is_err(), "should propagate baz error"); } // === Tests: Nested gather with task switching === // // These tests target a pair of bugs in task switching during incremental resolution: // - Correct value pushing when restoring from a resolved task (Bug 1) // - Correct waiter context detection for current task (Bug 2) /// Helper to drive execution, collecting function calls and resolving them async, /// until we reach ResolveFutures. Returns the snapshot and a vec of /// (call_id, function_name) pairs for all external calls made. fn drive_collecting_calls( mut progress: RunProgress, ) -> (ResolveFutures, Vec<(u32, String)>) { let mut collected = Vec::new(); loop { match progress { RunProgress::NameLookup(lookup) => { let name = lookup.name.clone(); progress = lookup .resume( NameLookupResult::Value(MontyObject::Function { name, docstring: None }), PrintWriter::Stdout, ) .unwrap(); } RunProgress::FunctionCall(call) => { collected.push((call.call_id, call.function_name.clone())); progress = call.resume_pending(PrintWriter::Stdout).unwrap(); } RunProgress::ResolveFutures(state) => { return (state, collected); } RunProgress::Complete(_) => { panic!("unexpected Complete before ResolveFutures"); } RunProgress::OsCall(call) => { panic!("unexpected OsCall: {:?}", call.function); } } } } /// Tests nested gathers where spawned tasks do sequential external await then inner gather. /// /// Pattern: /// - Outer gather spawns 3 coroutine tasks /// - Each coroutine does `await get_lat_lng(city)` then `await asyncio.gather(get_temp(city), get_desc(city))` /// - All external functions are resolved via async futures /// /// This exercises both Bug 1 (resolved value not pushed to restored task stack) and /// Bug 2 (current task's gather result pushed to wrong location). #[test] fn nested_gather_with_spawned_tasks_and_external_futures() { let code = r" import asyncio async def process(city): coords = await get_lat_lng(city) temp, desc = await asyncio.gather(get_temp(city), get_desc(city)) return coords + temp + desc async def main(): results = await asyncio.gather( process('a'), process('b'), process('c'), ) return results[0] + results[1] + results[2] await main() "; let runner = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); // Drive until all initial external calls are made and we need to resolve futures let (state, calls) = drive_collecting_calls(progress); // The 3 spawned tasks each call get_lat_lng first, so we expect 3 get_lat_lng calls assert_eq!(calls.len(), 3, "should have 3 initial get_lat_lng calls"); for (_, name) in &calls { assert_eq!(name, "get_lat_lng", "initial calls should all be get_lat_lng"); } // Resolve all 3 get_lat_lng calls: each returns 100 let results: Vec<(u32, ExtFunctionResult)> = calls .iter() .map(|(id, _)| (*id, ExtFunctionResult::Return(MontyObject::Int(100)))) .collect(); let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // After resolving get_lat_lng, each task proceeds to the inner gather which // calls get_temp and get_desc. Drive those calls. let (state, calls) = drive_collecting_calls(progress); // Each of 3 tasks calls get_temp + get_desc = 6 calls total assert_eq!(calls.len(), 6, "should have 6 inner gather calls (3 tasks * 2 each)"); let temp_calls: Vec<_> = calls.iter().filter(|(_, n)| n == "get_temp").collect(); let desc_calls: Vec<_> = calls.iter().filter(|(_, n)| n == "get_desc").collect(); assert_eq!(temp_calls.len(), 3, "should have 3 get_temp calls"); assert_eq!(desc_calls.len(), 3, "should have 3 get_desc calls"); // Resolve all inner calls: get_temp returns 10, get_desc returns 1 let results: Vec<(u32, ExtFunctionResult)> = calls .iter() .map(|(id, name)| { let val = if name == "get_temp" { 10 } else { 1 }; (*id, ExtFunctionResult::Return(MontyObject::Int(val))) }) .collect(); let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // Each task returns coords(100) + temp(10) + desc(1) = 111 // main returns 111 + 111 + 111 = 333 let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(333)); } /// Tests nested gathers with incremental resolution (one task at a time). /// /// Same pattern as above but resolves futures in multiple rounds to ensure /// task switching between partially-resolved states works correctly. #[test] fn nested_gather_incremental_resolution() { let code = r" import asyncio async def process(x): a = await step1(x) b, c = await asyncio.gather(step2(x), step3(x)) return a + b + c async def main(): r1, r2 = await asyncio.gather(process('x'), process('y')) return r1 + r2 await main() "; let runner = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); // Drive to get the initial step1 calls let (state, calls) = drive_collecting_calls(progress); assert_eq!(calls.len(), 2, "should have 2 step1 calls"); // Resolve only the FIRST step1 call let results = vec![(calls[0].0, ExtFunctionResult::Return(MontyObject::Int(100)))]; let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // First task proceeds to inner gather (step2 + step3), second task still blocked let (state, new_calls) = drive_collecting_calls(progress); // We should see step2 and step3 for the first task assert_eq!(new_calls.len(), 2, "should have 2 inner calls from first task"); // Now resolve the second step1 call AND the first task's inner calls let mut results: Vec<(u32, ExtFunctionResult)> = vec![ // Second task's step1 (calls[1].0, ExtFunctionResult::Return(MontyObject::Int(200))), ]; // First task's inner calls for (id, name) in &new_calls { let val = if name == "step2" { 10 } else { 1 }; results.push((*id, ExtFunctionResult::Return(MontyObject::Int(val)))); } let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // Second task now proceeds to inner gather let (state, final_calls) = drive_collecting_calls(progress); assert_eq!(final_calls.len(), 2, "should have 2 inner calls from second task"); // Resolve second task's inner calls let results: Vec<(u32, ExtFunctionResult)> = final_calls .iter() .map(|(id, name)| { let val = if name == "step2" { 20 } else { 2 }; (*id, ExtFunctionResult::Return(MontyObject::Int(val))) }) .collect(); let progress = state.resume(results, PrintWriter::Stdout).unwrap(); // First task: 100 + 10 + 1 = 111 // Second task: 200 + 20 + 2 = 222 // Total: 111 + 222 = 333 let result = progress.into_complete().expect("should complete"); assert_eq!(result, MontyObject::Int(333)); } ================================================ FILE: crates/monty/tests/binary_serde.rs ================================================ //! Tests for binary serialization and deserialization of `MontyRun` and `RunProgress`. //! //! These tests verify that execution state can be serialized with postcard for: //! - Caching parsed code to avoid re-parsing //! - Snapshotting execution state for external function calls use monty::{MontyObject, MontyRun, NameLookupResult, NoLimitTracker, PrintWriter, RunProgress}; /// Resolves consecutive `NameLookup` yields by providing a `Function` object for each name. fn resolve_name_lookups( mut progress: RunProgress, ) -> Result, monty::MontyException> { while let RunProgress::NameLookup(lookup) = progress { let name = lookup.name.clone(); progress = lookup.resume( NameLookupResult::Value(MontyObject::Function { name, docstring: None }), PrintWriter::Stdout, )?; } Ok(progress) } // === MontyRun dump/load Tests === #[test] fn monty_run_dump_load_simple() { // Create a runner, dump it, load it, and verify it produces the same result let runner = MontyRun::new("1 + 2".to_owned(), "test.py", vec![]).unwrap(); let bytes = runner.dump().unwrap(); let loaded = MontyRun::load(&bytes).unwrap(); let result = loaded.run_no_limits(vec![]).unwrap(); assert_eq!(result, MontyObject::Int(3)); } #[test] fn monty_run_dump_load_with_inputs() { // Test that input names are preserved across dump/load let runner = MontyRun::new("x + y * 2".to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned()]).unwrap(); let bytes = runner.dump().unwrap(); let loaded = MontyRun::load(&bytes).unwrap(); let result = loaded .run_no_limits(vec![MontyObject::Int(10), MontyObject::Int(5)]) .unwrap(); assert_eq!(result, MontyObject::Int(20)); } #[test] fn monty_run_dump_load_preserves_code() { // Verify the code string is preserved let code = "def foo(x):\n return x * 2\nfoo(21)".to_owned(); let runner = MontyRun::new(code.clone(), "test.py", vec![]).unwrap(); let bytes = runner.dump().unwrap(); let loaded = MontyRun::load(&bytes).unwrap(); assert_eq!(loaded.code(), code); let result = loaded.run_no_limits(vec![]).unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn monty_run_dump_load_complex_code() { // Test with more complex code including functions, loops, conditionals let code = r" def fib(n): if n <= 1: return n return fib(n - 1) + fib(n - 2) result = [] for i in range(10): result.append(fib(i)) result " .to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let bytes = runner.dump().unwrap(); let loaded = MontyRun::load(&bytes).unwrap(); let result = loaded.run_no_limits(vec![]).unwrap(); // First 10 Fibonacci numbers: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34 let expected = MontyObject::List(vec![ MontyObject::Int(0), MontyObject::Int(1), MontyObject::Int(1), MontyObject::Int(2), MontyObject::Int(3), MontyObject::Int(5), MontyObject::Int(8), MontyObject::Int(13), MontyObject::Int(21), MontyObject::Int(34), ]); assert_eq!(result, expected); } #[test] fn monty_run_dump_load_multiple_runs() { // A loaded runner can be run multiple times let runner = MontyRun::new("x * 2".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let bytes = runner.dump().unwrap(); let loaded = MontyRun::load(&bytes).unwrap(); assert_eq!( loaded.run_no_limits(vec![MontyObject::Int(5)]).unwrap(), MontyObject::Int(10) ); assert_eq!( loaded.run_no_limits(vec![MontyObject::Int(21)]).unwrap(), MontyObject::Int(42) ); } // === RunProgress dump/load Tests === #[test] fn run_progress_dump_load_roundtrip() { // Start execution with an external function, dump at the call, load and resume let runner = MontyRun::new("ext_fn(42) + 1".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); // First resolve the NameLookup for ext_fn let progress = resolve_name_lookups(progress).unwrap(); // Dump the progress at the external call let bytes = progress.dump().unwrap(); // Load it back let loaded: RunProgress = RunProgress::load(&bytes).unwrap(); // Should still be at the external function call let call = loaded.into_function_call().expect("should be at function call"); assert_eq!(call.function_name, "ext_fn"); assert_eq!(call.args, vec![MontyObject::Int(42)]); // Resume execution with a return value let result = call.resume(MontyObject::Int(100), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(101)); // 100 + 1 } #[test] fn run_progress_dump_load_multiple_calls() { // Test multiple external calls with dump/load between each let runner = MontyRun::new("x = ext_fn(1); y = ext_fn(2); x + y".to_owned(), "test.py", vec![]).unwrap(); // First call - resolve NameLookup for ext_fn first let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let progress = resolve_name_lookups(progress).unwrap(); let bytes = progress.dump().unwrap(); let loaded: RunProgress = RunProgress::load(&bytes).unwrap(); let call = loaded.into_function_call().unwrap(); assert_eq!(call.function_name, "ext_fn"); assert_eq!(call.args, vec![MontyObject::Int(1)]); // Resume first call let progress = call.resume(MontyObject::Int(10), PrintWriter::Stdout).unwrap(); // Resolve any NameLookup for the second ext_fn reference let progress = resolve_name_lookups(progress).unwrap(); // Dump/load at second call let bytes = progress.dump().unwrap(); let loaded: RunProgress = RunProgress::load(&bytes).unwrap(); let call = loaded.into_function_call().unwrap(); assert_eq!(call.function_name, "ext_fn"); assert_eq!(call.args, vec![MontyObject::Int(2)]); // Resume second call to completion let result = call.resume(MontyObject::Int(20), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(30)); // 10 + 20 } #[test] fn run_progress_complete_roundtrip() { // When execution completes, we can still dump/load the Complete variant let runner = MontyRun::new("1 + 2".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let bytes = progress.dump().unwrap(); let loaded: RunProgress = RunProgress::load(&bytes).unwrap(); assert_eq!(loaded.into_complete().unwrap(), MontyObject::Int(3)); } ================================================ FILE: crates/monty/tests/bytecode_limits.rs ================================================ //! Tests for bytecode operand overflow limits. //! //! These tests verify that the bytecode compiler handles cases where operands //! exceed the u8/u16 limits of the bytecode encoding: //! //! - Local variable slots: Use wide instructions (u16), so up to 65535 locals work //! - Function call arguments: Limited to 255 (u8 operand) - returns SyntaxError if exceeded //! - Keyword argument counts: Limited to 255 (u8 operand) - returns SyntaxError if exceeded use std::fmt::Write; use monty::{ExcType, MontyRun}; /// Generates Python code with N local variables in a function. /// /// Creates: `def f(): v0=0; v1=1; ...; v{n-1}={n-1}; return v{n-1}` fn generate_many_locals(count: usize) -> String { let mut code = String::from("def f():\n"); for i in 0..count { writeln!(code, " v{i} = {i}").unwrap(); } writeln!(code, " return v{}", count - 1).unwrap(); code.push_str("f()"); code } /// Generates Python code calling a function with N positional arguments. /// /// Creates: `def f(*args): return len(args)\nf(0, 1, 2, ..., n-1)` fn generate_many_positional_args(count: usize) -> String { let mut code = String::from("def f(*args): return len(args)\nf("); for i in 0..count { if i > 0 { code.push_str(", "); } code.push_str(&i.to_string()); } code.push(')'); code } /// Generates Python code calling a function with N keyword arguments. /// /// Creates: `def f(**kw): return len(kw)\nf(k0=0, k1=1, ..., k{n-1}={n-1})` fn generate_many_keyword_args(count: usize) -> String { let mut code = String::from("def f(**kw): return len(kw)\nf("); for i in 0..count { if i > 0 { code.push_str(", "); } write!(code, "k{i}={i}").unwrap(); } code.push(')'); code } /// Generates Python code with a function that has N parameters. /// /// Creates: `def f(p0, p1, ..., p{n-1}): return p{n-1}\nf(0, 1, ..., n-1)` fn generate_many_parameters(count: usize) -> String { let mut code = String::from("def f("); for i in 0..count { if i > 0 { code.push_str(", "); } write!(code, "p{i}").unwrap(); } code.push_str("):\n"); writeln!(code, " return p{}", count - 1).unwrap(); code.push_str("f("); for i in 0..count { if i > 0 { code.push_str(", "); } code.push_str(&i.to_string()); } code.push(')'); code } /// Asserts that a MontyRun result is a SyntaxError with a message containing the expected text. fn assert_syntax_error(result: Result, expected_msg: &str) { let err = result.expect_err("expected SyntaxError"); assert_eq!( err.exc_type(), ExcType::SyntaxError, "expected SyntaxError, got {:?}: {:?}", err.exc_type(), err.message() ); let msg = err.message().expect("SyntaxError should have message"); assert!( msg.contains(expected_msg), "expected message containing '{expected_msg}', got: {msg}" ); } mod local_variable_limits { use super::*; #[test] fn locals_under_u8_limit_succeeds() { // 255 locals should work with u8 slots (0-254) let code = generate_many_locals(255); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "255 locals should compile successfully"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "255 locals should run successfully"); } #[test] fn locals_at_u8_boundary_succeeds() { // 256 locals (slots 0-255) - uses wide instructions for slot 255+ let code = generate_many_locals(256); let result = MontyRun::new(code, "test.py", vec![]); assert!( result.is_ok(), "256 locals should compile successfully (wide instructions)" ); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "256 locals should run successfully"); } #[test] fn locals_exceeding_u8_uses_wide_instructions() { // 257 locals requires LoadLocalW/StoreLocalW for slot 256 let code = generate_many_locals(257); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "257 locals should compile (using wide instructions)"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "257 locals should run correctly with wide instructions"); } #[test] fn locals_well_over_u8_limit() { // 300 locals - well into wide instruction territory let code = generate_many_locals(300); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "300 locals should compile successfully"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "300 locals should run successfully"); } } mod function_argument_limits { use super::*; #[test] fn positional_args_under_u8_limit_succeeds() { // 255 positional args should work let code = generate_many_positional_args(255); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "255 positional args should compile successfully"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "255 positional args should run successfully"); } #[test] fn positional_args_at_u8_boundary_returns_syntax_error() { // 256 positional args - exceeds u8 limit, should return SyntaxError let code = generate_many_positional_args(256); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 positional arguments"); } #[test] fn positional_args_exceeding_u8_limit_returns_syntax_error() { // 257 positional args - clearly exceeds u8 capacity let code = generate_many_positional_args(257); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 positional arguments"); } } mod keyword_argument_limits { use super::*; #[test] fn keyword_args_under_u8_limit_succeeds() { // 255 keyword args should work let code = generate_many_keyword_args(255); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "255 keyword args should compile successfully"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "255 keyword args should run successfully"); } #[test] fn keyword_args_at_u8_boundary_returns_syntax_error() { // 256 keyword args - exceeds u8 limit, should return SyntaxError let code = generate_many_keyword_args(256); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 keyword arguments"); } #[test] fn keyword_args_exceeding_u8_limit_returns_syntax_error() { // 257 keyword args - clearly exceeds u8 capacity let code = generate_many_keyword_args(257); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 keyword arguments"); } } mod function_parameter_limits { use super::*; #[test] fn parameters_under_u8_limit_succeeds() { // 255 parameters should work - both definition and call let code = generate_many_parameters(255); let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "255 parameters should compile successfully"); let run = result.unwrap(); let result = run.run_no_limits(vec![]); assert!(result.is_ok(), "255 parameters should run successfully"); } #[test] fn parameters_at_u8_boundary_returns_syntax_error_for_call() { // 256 parameters - the function definition uses locals (wide instructions ok), // but the call site has 256 positional args which exceeds the limit let code = generate_many_parameters(256); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 positional arguments"); } #[test] fn parameters_exceeding_u8_limit_returns_syntax_error_for_call() { // 257 parameters - same issue, call site has too many args let code = generate_many_parameters(257); let result = MontyRun::new(code, "test.py", vec![]); assert_syntax_error(result, "more than 255 positional arguments"); } } ================================================ FILE: crates/monty/tests/datatest_runner.rs ================================================ use std::{ cell::RefCell, collections::{HashMap, HashSet}, error::Error, ffi::CString, fs, panic::{self, AssertUnwindSafe}, path::Path, sync::{ OnceLock, mpsc::{self, RecvTimeoutError}, }, thread, time::Duration, }; use ahash::AHashMap; use monty::{ ExcType, ExtFunctionResult, LimitedTracker, MontyException, MontyObject, MontyRun, NameLookupResult, OsFunction, PrintWriter, ResourceLimits, RunProgress, dir_stat, file_stat, }; use pyo3::{prelude::*, types::PyDict}; use similar::TextDiff; /// Recursion limit for test execution. /// /// Used for both Monty and CPython tests. CPython needs ~5 extra frames /// for runpy overhead, which is added in run_file_and_get_traceback. /// /// NOTE this value is chosen to avoid both: /// * other recursion errors in python (if it's too low) /// * and, stack overflows in debug rust (if it's too high) const TEST_RECURSION_LIMIT: usize = 50; /// Test configuration parsed from directive comments. /// /// Parsed from an optional first-line comment like `# xfail=monty,cpython` or `# call-external`. /// If not present, defaults to running on both interpreters in standard mode. /// /// ## Xfail Semantics (Strict) /// - `xfail=monty` - Test is expected to fail on Monty; if it passes, that's an error /// - `xfail=cpython` - Test is expected to fail on CPython; if it passes, that's an error /// - `xfail=monty,cpython` - Expected to fail on both interpreters #[derive(Debug, Clone, Default)] #[expect(clippy::struct_excessive_bools)] struct TestConfig { /// When true, test is expected to fail on Monty (strict xfail). xfail_monty: bool, /// When true, test is expected to fail on CPython (strict xfail). xfail_cpython: bool, /// When true, use MontyRun with external function support instead of MontyRun. iter_mode: bool, /// When true, wrap code in async context for CPython execution. /// Used for tests with top-level await which Monty supports but CPython doesn't. async_mode: bool, } /// Represents the expected outcome of a test fixture #[derive(Debug, Clone)] enum Expectation { /// Expect exception (parse-time or runtime) with specific message Raise(String), /// Expect successful execution, check py_str() output ReturnStr(String), /// Expect successful execution, check py_repr() output Return(String), /// Expect successful execution, check py_type() output ReturnType(String), /// Expect successful execution, check ref counts of named variables. /// Only used when `ref-count-return` feature is enabled; skipped otherwise. RefCounts(#[cfg_attr(not(feature = "ref-count-return"), expect(dead_code))] AHashMap), /// Expect exception with full traceback comparison. /// The expected traceback string should match exactly between Monty and CPython. Traceback(String), /// Expect successful execution without raising an exception (no return value check). /// Used for tests that rely on asserts or just verify code runs. NoException, } impl Expectation { /// Returns the expected value string fn expected_value(&self) -> &str { match self { Self::Raise(s) | Self::ReturnStr(s) | Self::Return(s) | Self::ReturnType(s) | Self::Traceback(s) => s, Self::RefCounts(_) | Self::NoException => "", } } } /// Parse a Python fixture file into code, expected outcome, and test configuration. /// /// The file may optionally contain a `# xfail=monty,cpython` comment to specify /// which interpreters the test is expected to fail on. If not present, defaults to /// running on both and expecting success. /// /// The file may have an expectation comment as the LAST line: /// - `# Raise=ExceptionType('message')` - Exception (parse-time or runtime) /// - `# Return.str=value` - Check py_str() output /// - `# Return=value` - Check py_repr() output /// - `# Return.type=typename` - Check py_type() output /// - `# ref-counts={'var': count, ...}` - Check ref counts of named heap variables /// /// Or a traceback expectation as a triple-quoted string at the end (uses actual test filename): /// ```text /// """TRACEBACK: /// Traceback (most recent call last): /// File "my_test.py", line 4, in /// foo() /// ValueError: message /// """ /// ``` /// /// If no expectation comment is present, the test just verifies the code runs without exception. fn parse_fixture(content: &str) -> (String, Expectation, TestConfig) { let lines: Vec<&str> = content.lines().collect(); assert!(!lines.is_empty(), "Empty fixture file"); // comment lines with leading # and spaces stripped let comment_lines = lines .iter() .filter(|line| line.starts_with('#')) .map(|line| line.trim_start_matches('#').trim()) .collect::>(); let mut config = TestConfig { iter_mode: comment_lines.iter().any(|line| line.starts_with("call-external")), async_mode: comment_lines.iter().any(|line| line.starts_with("run-async")), ..Default::default() }; // Check for "xfail=" directive if let Some(&xfail_line) = comment_lines.iter().find(|line| line.starts_with("xfail=")) { // Parse until whitespace or end of line let xfail_end = xfail_line.find(|c: char| c.is_whitespace()).unwrap_or(xfail_line.len()); let xfail_str = &xfail_line[..xfail_end]; config.xfail_monty = xfail_str.contains("monty"); config.xfail_cpython = xfail_str.contains("cpython"); } // Check for TRACEBACK expectation (triple-quoted string at end of file) // Format: """TRACEBACK:\n...\n""" if let Some((code, traceback)) = parse_traceback_expectation(content) { return (code, Expectation::Traceback(traceback), config); } // Get the last line and check if it's an expectation comment let last_line = lines.last().unwrap(); // Parse expectation from comment line if present // Note: Check more specific patterns first (Return.str, Return.type, ref-counts) before general Return let (expectation, code_lines) = if let Some(expected) = last_line.strip_prefix("# ref-counts=") { ( Expectation::RefCounts(parse_ref_counts(expected)), &lines[..lines.len() - 1], ) } else if let Some(expected) = last_line.strip_prefix("# Return.str=") { (Expectation::ReturnStr(expected.to_string()), &lines[..lines.len() - 1]) } else if let Some(expected) = last_line.strip_prefix("# Return.type=") { (Expectation::ReturnType(expected.to_string()), &lines[..lines.len() - 1]) } else if let Some(expected) = last_line.strip_prefix("# Return=") { (Expectation::Return(expected.to_string()), &lines[..lines.len() - 1]) } else if let Some(expected) = last_line.strip_prefix("# Raise=") { (Expectation::Raise(expected.to_string()), &lines[..lines.len() - 1]) } else { // No expectation comment - just run and check it doesn't raise (Expectation::NoException, &lines[..]) }; // Code is everything except the directive comment (and expectation comment if present) let code = code_lines.join("\n"); (code, expectation, config) } /// Parses a TRACEBACK expectation from the end of a fixture file. /// /// Looks for a triple-quoted string starting with `"""TRACEBACK:` at the end of the file. /// Returns `Some((code, expected_traceback))` if found, `None` otherwise. /// /// The traceback string should contain the full expected output including the /// "Traceback (most recent call last):" header and the exception line. fn parse_traceback_expectation(content: &str) -> Option<(String, String)> { // Format: """\nTRACEBACK:\n...\n""" const MARKER: &str = "\"\"\"\nTRACEBACK:\n"; // Find the TRACEBACK marker let marker_pos = content.find(MARKER)?; // Extract the code before the marker let code_part = &content[..marker_pos]; let lines: Vec<&str> = code_part.lines().collect(); let code = lines.join("\n").trim_end().to_string(); // Extract the traceback content between the markers let after_marker = &content[marker_pos + MARKER.len()..]; // Find the closing triple quotes (preceded by newline) let end_pos = after_marker.find("\n\"\"\"")?; let traceback_content = &after_marker[..end_pos]; Some((code, traceback_content.to_string())) } /// Parses the ref-counts format: {'var': count, 'var2': count2} /// /// Supports both single and double quotes for variable names. /// Example: {'x': 2, 'y': 1} or {"x": 2, "y": 1} fn parse_ref_counts(s: &str) -> AHashMap { let mut counts = AHashMap::new(); let trimmed = s.trim().trim_start_matches('{').trim_end_matches('}'); for pair in trimmed.split(',') { let pair = pair.trim(); if pair.is_empty() { continue; } let parts: Vec<&str> = pair.split(':').collect(); assert!( parts.len() == 2, "Invalid ref-counts pair format: {pair}. Expected 'name': count" ); let name = parts[0].trim().trim_matches('\'').trim_matches('"'); let count: usize = parts[1] .trim() .parse() .unwrap_or_else(|_| panic!("Invalid ref count value: {}", parts[1])); counts.insert(name.to_string(), count); } counts } /// Python implementations of external functions for running iter mode tests in CPython. /// /// These implementations mirror the behavior of `dispatch_external_call` so that /// iter mode tests produce identical results in both Monty and CPython. /// /// This is loaded from `scripts/iter_test_methods.py` which is also imported by /// `scripts/run_traceback.py` to ensure consistency. const ITER_EXT_FUNCTIONS_PYTHON: &str = include_str!("../../../scripts/iter_test_methods.py"); /// Pre-imports Python modules that can cause race conditions during parallel test execution. /// /// Python's import machinery isn't fully thread-safe during module initialization. /// When multiple tests try to import modules like `typing` or `dataclasses` simultaneously, /// one thread may see a partially initialized module, causing `AttributeError`. /// /// This function must be called once before any parallel test execution to ensure /// all relevant modules are fully initialized. fn ensure_python_modules_imported() { static INIT: OnceLock<()> = OnceLock::new(); INIT.get_or_init(|| { Python::attach(|py| { // Import modules that are used by iter_test_methods.py and can cause race conditions. // The order matters: import dependencies first. py.import("typing").expect("Failed to import typing"); py.import("dataclasses").expect("Failed to import dataclasses"); py.import("pathlib").expect("Failed to import pathlib"); py.import("stat").expect("Failed to import stat"); py.import("asyncio").expect("Failed to import asyncio"); py.import("traceback").expect("Failed to import traceback"); // Also pre-execute the iter_test_methods code once to ensure all its // module-level code (dataclass definitions, monkey-patches) is initialized let ext_funcs_cstr = CString::new(ITER_EXT_FUNCTIONS_PYTHON).expect("Invalid C string"); py.run(&ext_funcs_cstr, None, None) .expect("Failed to pre-initialize iter_test_methods"); }); }); } /// Result from dispatching an external function call. /// /// Distinguishes between synchronous calls (return immediately) and /// asynchronous calls (return a future that needs later resolution). enum DispatchResult { /// Synchronous result - pass directly to `state.run()`. Sync(ExtFunctionResult), /// Asynchronous call - use `state.run_pending()` and resolve later. /// Contains the value to resolve the future with. Async(MontyObject), } /// Dispatches an external function call to the appropriate test implementation. /// /// Returns `DispatchResult::Sync` for synchronous calls or `DispatchResult::Async` /// for coroutine calls that should use `run_pending()`. /// /// # Panics /// Panics if the function name is unknown or arguments are invalid types. fn dispatch_external_call(name: &str, args: Vec) -> DispatchResult { match name { "add_ints" => { assert!(args.len() == 2, "add_ints requires 2 arguments"); let a = i64::try_from(&args[0]).expect("add_ints: first arg must be int"); let b = i64::try_from(&args[1]).expect("add_ints: second arg must be int"); DispatchResult::Sync(MontyObject::Int(a + b).into()) } "concat_strings" => { assert!(args.len() == 2, "concat_strings requires 2 arguments"); let a = String::try_from(&args[0]).expect("concat_strings: first arg must be str"); let b = String::try_from(&args[1]).expect("concat_strings: second arg must be str"); DispatchResult::Sync(MontyObject::String(a + &b).into()) } "return_value" => { assert!(args.len() == 1, "return_value requires 1 argument"); DispatchResult::Sync(args.into_iter().next().unwrap().into()) } "get_list" => { assert!(args.is_empty(), "get_list requires no arguments"); DispatchResult::Sync( MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2), MontyObject::Int(3)]).into(), ) } "raise_error" => { // raise_error(exc_type: str, message: str) -> raises exception assert!(args.len() == 2, "raise_error requires 2 arguments"); let exc_type_str = String::try_from(&args[0]).expect("raise_error: first arg must be str"); let message = String::try_from(&args[1]).expect("raise_error: second arg must be str"); let exc_type = match exc_type_str.as_str() { "ValueError" => ExcType::ValueError, "TypeError" => ExcType::TypeError, "KeyError" => ExcType::KeyError, "RuntimeError" => ExcType::RuntimeError, _ => panic!("raise_error: unsupported exception type: {exc_type_str}"), }; DispatchResult::Sync(MontyException::new(exc_type, Some(message)).into()) } "make_point" => { assert!(args.is_empty(), "make_point requires no arguments"); // Return an immutable Point(x=1, y=2) dataclass DispatchResult::Sync( MontyObject::Dataclass { name: "Point".to_string(), type_id: 0, // Test fixture has no real Python type field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(1)), (MontyObject::String("y".to_string()), MontyObject::Int(2)), ] .into(), frozen: true, } .into(), ) } "make_mutable_point" => { assert!(args.is_empty(), "make_mutable_point requires no arguments"); // Return a mutable Point(x=1, y=2) dataclass DispatchResult::Sync( MontyObject::Dataclass { name: "MutablePoint".to_string(), type_id: 0, // Test fixture has no real Python type field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(1)), (MontyObject::String("y".to_string()), MontyObject::Int(2)), ] .into(), frozen: false, } .into(), ) } "make_user" => { assert!(args.len() == 1, "make_user requires 1 argument"); let name = String::try_from(&args[0]).expect("make_user: first arg must be str"); // Return an immutable User(name=name, active=True) dataclass DispatchResult::Sync( MontyObject::Dataclass { name: "User".to_string(), type_id: 0, // Test fixture has no real Python type field_names: vec!["name".to_string(), "active".to_string()], attrs: vec![ (MontyObject::String("name".to_string()), MontyObject::String(name)), (MontyObject::String("active".to_string()), MontyObject::Bool(true)), ] .into(), frozen: true, } .into(), ) } "make_empty" => { assert!(args.is_empty(), "make_empty requires no arguments"); // Return an immutable empty dataclass with no fields DispatchResult::Sync( MontyObject::Dataclass { name: "Empty".to_string(), type_id: 0, // Test fixture has no real Python type field_names: vec![], attrs: vec![].into(), frozen: true, } .into(), ) } "async_call" => { // async_call(x) -> coroutine that returns x // This is an async function - use run_pending() and resolve later assert!(args.len() == 1, "async_call requires 1 argument"); DispatchResult::Async(args.into_iter().next().unwrap()) } _ => panic!("Unknown external function: {name}"), } } /// Dispatches a dataclass method call to the appropriate test implementation. /// /// The first argument is always the dataclass instance (`self`). Known methods /// are implemented to mirror the Python dataclass methods in `iter_test_methods.py`. /// Unknown methods return `AttributeError`. fn dispatch_method_call( method_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], ) -> ExtFunctionResult { let class_name = match args.first() { Some(MontyObject::Dataclass { name, .. }) => name.as_str(), _ => "", }; match (class_name, method_name) { // Point.sum(self) -> int ("Point" | "MutablePoint", "sum") => { let (x, y) = extract_point_fields(&args[0]); MontyObject::Int(x + y).into() } // Point.add(self, dx, dy) -> Point ("Point", "add") => { assert!(args.len() == 3, "Point.add requires self, dx, dy"); let (x, y) = extract_point_fields(&args[0]); let dx = i64::try_from(&args[1]).expect("dx must be int"); let dy = i64::try_from(&args[2]).expect("dy must be int"); MontyObject::Dataclass { name: "Point".to_string(), type_id: 0, field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(x + dx)), (MontyObject::String("y".to_string()), MontyObject::Int(y + dy)), ] .into(), frozen: true, } .into() } // Point.scale(self, factor) -> Point ("Point", "scale") => { assert!(args.len() == 2, "Point.scale requires self, factor"); let (x, y) = extract_point_fields(&args[0]); let factor = i64::try_from(&args[1]).expect("factor must be int"); MontyObject::Dataclass { name: "Point".to_string(), type_id: 0, field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(x * factor)), (MontyObject::String("y".to_string()), MontyObject::Int(y * factor)), ] .into(), frozen: true, } .into() } // Point.describe(self, label='point') -> str ("Point", "describe") => { let (x, y) = extract_point_fields(&args[0]); // Check positional arg first, then kwargs, then default let label = if args.len() > 1 { String::try_from(&args[1]).expect("label must be str") } else if let Some(kw_label) = get_kwarg_str(kwargs, "label") { kw_label } else { "point".to_string() }; MontyObject::String(format!("{label}({x}, {y})")).into() } // MutablePoint.shift(self, dx, dy) -> None (mutates in-place via host) // Note: In the test runner, we can't actually mutate the dataclass in-place // since the host doesn't have direct heap access. Return None as the method // would in Python (the mutation happens inside Python's method body). // For test coverage purposes, we just return None. ("MutablePoint", "shift") => MontyObject::None.into(), // User.greeting(self) -> str ("User", "greeting") => { let name = extract_user_name(&args[0]); MontyObject::String(format!("Hello, {name}!")).into() } // Unknown method — return AttributeError _ => { let message = format!("'{class_name}' object has no attribute '{method_name}'"); MontyException::new(ExcType::AttributeError, Some(message)).into() } } } /// Extracts (x, y) fields from a Point or MutablePoint `MontyObject::Dataclass`. fn extract_point_fields(obj: &MontyObject) -> (i64, i64) { match obj { MontyObject::Dataclass { attrs, .. } => { let mut x = 0i64; let mut y = 0i64; for (key, value) in attrs { if let MontyObject::String(k) = key { match k.as_str() { "x" => x = i64::try_from(value).expect("x must be int"), "y" => y = i64::try_from(value).expect("y must be int"), _ => {} } } } (x, y) } other => panic!("Expected Dataclass, got {other:?}"), } } /// Extracts a string kwarg value by key name. fn get_kwarg_str(kwargs: &[(MontyObject, MontyObject)], name: &str) -> Option { for (key, value) in kwargs { if let MontyObject::String(key_str) = key && key_str == name { return Some(String::try_from(value).expect("kwarg value must be str")); } } None } /// Extracts the `name` field from a User `MontyObject::Dataclass`. fn extract_user_name(obj: &MontyObject) -> String { match obj { MontyObject::Dataclass { attrs, .. } => { for (key, value) in attrs { if let MontyObject::String(k) = key && k == "name" { return String::try_from(value).expect("name must be str"); } } panic!("User dataclass has no 'name' field"); } other => panic!("Expected Dataclass, got {other:?}"), } } // ============================================================================= // Virtual Filesystem for OS Call Tests // ============================================================================= /// Virtual file entry for OS call tests (static VFS). struct StaticVirtualFile { content: &'static [u8], mode: i64, } /// Virtual file entry (owned, for unified VFS lookups). struct VirtualFile { content: Vec, mode: i64, } /// Virtual filesystem modification time (arbitrary fixed timestamp). const VFS_MTIME: f64 = 1_700_000_000.0; /// Virtual filesystem for testing Path methods. /// /// Structure: /// ```text /// /virtual/ /// ├── file.txt (file, 644, "hello world\n") /// ├── data.bin (file, 644, b"\x00\x01\x02\x03") /// ├── empty.txt (file, 644, "") /// ├── subdir/ /// │ ├── nested.txt (file, 644, "nested content") /// │ └── deep/ /// │ └── file.txt (file, 644, "deep") /// └── readonly.txt (file, 444, "readonly") /// /// /nonexistent (does not exist) /// ``` fn get_static_virtual_file(path: &str) -> Option { match path { "/virtual/file.txt" => Some(StaticVirtualFile { content: b"hello world\n", mode: 0o644, }), "/virtual/data.bin" => Some(StaticVirtualFile { content: b"\x00\x01\x02\x03", mode: 0o644, }), "/virtual/empty.txt" => Some(StaticVirtualFile { content: b"", mode: 0o644, }), "/virtual/subdir/nested.txt" => Some(StaticVirtualFile { content: b"nested content", mode: 0o644, }), "/virtual/subdir/deep/file.txt" => Some(StaticVirtualFile { content: b"deep", mode: 0o644, }), "/virtual/readonly.txt" => Some(StaticVirtualFile { content: b"readonly", mode: 0o444, }), _ => None, } } /// Gets a virtual file, checking the mutable layer first, then falling back to static. fn get_virtual_file(path: &str) -> Option { // Check mutable layer first let mutable_result = MUTABLE_VFS.with(|vfs| { let vfs = vfs.borrow(); // Check if deleted if vfs.deleted_files.contains(path) { return Some(None); } // Check if exists in mutable layer if let Some((content, mode)) = vfs.files.get(path) { return Some(Some(VirtualFile { content: content.clone(), mode: *mode, })); } None }); match mutable_result { Some(Some(file)) => Some(file), Some(None) => None, // File was deleted None => { // Fall back to static VFS get_static_virtual_file(path).map(|f| VirtualFile { content: f.content.to_vec(), mode: f.mode, }) } } } // ============================================================================= // Mutable VFS Layer (Thread-Local Storage for Write Operations) // ============================================================================= /// Mutable state for the virtual filesystem, supporting write operations. /// /// This layer sits on top of the static VFS and allows tests to create, modify, and /// delete files and directories. The state is thread-local so tests don't interfere /// with each other. #[derive(Default)] struct MutableVfs { /// Files created or modified during test execution. files: HashMap, i64)>, // path -> (content, mode) /// Directories created during test execution. dirs: HashSet, /// Files deleted during test execution (shadows static VFS entries). deleted_files: HashSet, /// Directories deleted during test execution. deleted_dirs: HashSet, } thread_local! { /// Thread-local mutable VFS state. static MUTABLE_VFS: RefCell = RefCell::new(MutableVfs::default()); } /// Resets the mutable VFS state for a new test. fn reset_mutable_vfs() { MUTABLE_VFS.with(|vfs| { *vfs.borrow_mut() = MutableVfs::default(); }); } /// Check if the given path is a directory in the virtual filesystem. fn is_virtual_dir(path: &str) -> bool { // Check mutable layer first let result = MUTABLE_VFS.with(|vfs| { let vfs = vfs.borrow(); if vfs.deleted_dirs.contains(path) { return Some(false); } if vfs.dirs.contains(path) { return Some(true); } None }); if let Some(is_dir) = result { return is_dir; } // Fall back to static VFS matches!(path, "/virtual" | "/virtual/subdir" | "/virtual/subdir/deep") } /// Get directory entries for a virtual directory. fn get_virtual_dir_entries(path: &str) -> Option> { // First check if the directory exists if !is_virtual_dir(path) { return None; } // Get static entries (if any) let static_entries: Vec<&'static str> = match path { "/virtual" => vec![ "/virtual/file.txt", "/virtual/data.bin", "/virtual/empty.txt", "/virtual/subdir", "/virtual/readonly.txt", ], "/virtual/subdir" => vec!["/virtual/subdir/nested.txt", "/virtual/subdir/deep"], "/virtual/subdir/deep" => vec!["/virtual/subdir/deep/file.txt"], _ => vec![], }; // Combine with mutable layer MUTABLE_VFS.with(|vfs| { let vfs = vfs.borrow(); let mut entries: HashSet = static_entries .iter() .filter(|e| { let s: &str = e; !vfs.deleted_files.contains(s) && !vfs.deleted_dirs.contains(s) }) .map(|e| (*e).to_owned()) .collect(); // Add mutable files and dirs in this directory let prefix = if path.ends_with('/') { path.to_owned() } else { format!("{path}/") }; for file_path in vfs.files.keys() { if file_path.starts_with(&prefix) { // Only include direct children (not nested) let rest = &file_path[prefix.len()..]; if !rest.contains('/') { entries.insert(file_path.clone()); } } } for dir_path in &vfs.dirs { if dir_path.starts_with(&prefix) { let rest = &dir_path[prefix.len()..]; if !rest.contains('/') { entries.insert(dir_path.clone()); } } } Some(entries.into_iter().collect()) }) } /// Helper to get a boolean kwarg by name. fn get_kwarg_bool(kwargs: &[(MontyObject, MontyObject)], name: &str) -> bool { for (key, value) in kwargs { if let MontyObject::String(key_str) = key && key_str == name { return matches!(value, MontyObject::Bool(true)); } } false } /// Dispatches an OS function call using the virtual filesystem. /// /// Returns an `ExternalResult` to pass back to the Monty interpreter. /// Raises `FileNotFoundError` for missing files/directories. #[expect(clippy::cast_possible_wrap)] // Virtual file sizes are tiny, no wrap possible fn dispatch_os_call( function: OsFunction, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], ) -> ExtFunctionResult { // Handle GetEnviron first as it takes no path argument if function == OsFunction::GetEnviron { // Return the virtual environment as a dict let env_dict = vec![ ( MontyObject::String("VIRTUAL_HOME".to_owned()), MontyObject::String("/virtual/home".to_owned()), ), ( MontyObject::String("VIRTUAL_USER".to_owned()), MontyObject::String("testuser".to_owned()), ), ( MontyObject::String("VIRTUAL_EMPTY".to_owned()), MontyObject::String(String::new()), ), ]; return MontyObject::Dict(env_dict.into()).into(); } // Extract path from MontyObject::Path (or String for backwards compatibility) let path = match &args[0] { MontyObject::Path(p) => p.clone(), MontyObject::String(s) => s.clone(), other => panic!("OS call: first arg must be path, got {other:?}"), }; match function { OsFunction::GetEnviron => unreachable!("handled above"), OsFunction::Exists => { let exists = get_virtual_file(&path).is_some() || is_virtual_dir(&path); MontyObject::Bool(exists).into() } OsFunction::IsFile => { let is_file = get_virtual_file(&path).is_some(); MontyObject::Bool(is_file).into() } OsFunction::IsDir => { let is_dir = is_virtual_dir(&path); MontyObject::Bool(is_dir).into() } OsFunction::IsSymlink => { // Virtual filesystem doesn't have symlinks MontyObject::Bool(false).into() } OsFunction::ReadText => { if let Some(file) = get_virtual_file(&path) { match std::str::from_utf8(&file.content) { Ok(text) => MontyObject::String(text.to_owned()).into(), Err(_) => MontyException::new( ExcType::UnicodeDecodeError, Some("'utf-8' codec can't decode bytes".to_owned()), ) .into(), } } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::ReadBytes => { if let Some(file) = get_virtual_file(&path) { MontyObject::Bytes(file.content).into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::Stat => { if let Some(file) = get_virtual_file(&path) { file_stat(file.mode, file.content.len() as i64, VFS_MTIME).into() } else if is_virtual_dir(&path) { dir_stat(0o755, VFS_MTIME).into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::Iterdir => { if let Some(entries) = get_virtual_dir_entries(&path) { // Return Path objects, not strings let list: Vec = entries.into_iter().map(MontyObject::Path).collect(); MontyObject::List(list).into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::Resolve | OsFunction::Absolute => { // For virtual paths, return as-is (they're already absolute) MontyObject::String(path).into() } OsFunction::Getenv => { // Virtual environment for testing os.getenv() // args[0] is key, args[1] is default (may be None) let key = String::try_from(&args[0]).expect("getenv: first arg must be key string"); let default = &args[1]; // Provide a few test environment variables let value = match key.as_str() { "VIRTUAL_HOME" => Some("/virtual/home"), "VIRTUAL_USER" => Some("testuser"), "VIRTUAL_EMPTY" => Some(""), _ => None, }; if let Some(v) = value { MontyObject::String(v.to_owned()).into() } else if matches!(default, MontyObject::None) { MontyObject::None.into() } else { // Return the default value default.clone().into() } } OsFunction::WriteText => { // args[0] is path, args[1] is text content let text = String::try_from(&args[1]).expect("write_text: second arg must be string"); MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.files.insert(path.clone(), (text.into_bytes(), 0o644)); vfs.deleted_files.remove(&path); }); // write_text returns the number of bytes written let byte_count = MUTABLE_VFS.with(|vfs| vfs.borrow().files.get(&path).map_or(0, |(c, _)| c.len())); MontyObject::Int(byte_count as i64).into() } OsFunction::WriteBytes => { // args[0] is path, args[1] is bytes content let bytes = match &args[1] { MontyObject::Bytes(b) => b.clone(), other => panic!("write_bytes: second arg must be bytes, got {other:?}"), }; let byte_count = bytes.len(); MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.files.insert(path.clone(), (bytes, 0o644)); vfs.deleted_files.remove(&path); }); // write_bytes returns the number of bytes written MontyObject::Int(byte_count as i64).into() } OsFunction::Mkdir => { // Check for parents and exist_ok in kwargs (e.g., mkdir(parents=True, exist_ok=True)) let parents = get_kwarg_bool(kwargs, "parents"); let exist_ok = get_kwarg_bool(kwargs, "exist_ok"); // Check if already exists if is_virtual_dir(&path) { if exist_ok { return MontyObject::None.into(); } return MontyException::new(ExcType::OSError, Some(format!("[Errno 17] File exists: '{path}'"))).into(); } // Check parent directory let parent = std::path::Path::new(&path) .parent() .map(|p| p.to_string_lossy().to_string()) .unwrap_or_default(); if !parent.is_empty() && !is_virtual_dir(&parent) { if parents { // Create parent directories recursively create_parent_dirs(&parent); } else { return MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into(); } } MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.deleted_dirs.remove(&path); vfs.dirs.insert(path); }); MontyObject::None.into() } OsFunction::Unlink => { // args[0] is path if get_virtual_file(&path).is_some() { MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.files.remove(&path); vfs.deleted_files.insert(path); }); MontyObject::None.into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::Rmdir => { // args[0] is path if is_virtual_dir(&path) { MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.dirs.remove(&path); vfs.deleted_dirs.insert(path); }); MontyObject::None.into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } OsFunction::Rename => { // args[0] is src path, args[1] is dest path let dest = match &args[1] { MontyObject::Path(p) => p.clone(), MontyObject::String(s) => s.clone(), other => panic!("rename: second arg must be path, got {other:?}"), }; if let Some(file) = get_virtual_file(&path) { MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); // Remove from old location vfs.files.remove(&path); vfs.deleted_files.insert(path); // Add to new location vfs.files.insert(dest, (file.content, file.mode)); }); MontyObject::None.into() } else if is_virtual_dir(&path) { MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.dirs.remove(&path); vfs.deleted_dirs.insert(path); vfs.dirs.insert(dest); }); MontyObject::None.into() } else { MontyException::new( ExcType::FileNotFoundError, Some(format!("[Errno 2] No such file or directory: '{path}'")), ) .into() } } } } /// Helper to create parent directories recursively. fn create_parent_dirs(path: &str) { if is_virtual_dir(path) { return; } // Create parent first if let Some(parent) = std::path::Path::new(path).parent() { let parent_str = parent.to_string_lossy().to_string(); if !parent_str.is_empty() { create_parent_dirs(&parent_str); } } // Create this directory MUTABLE_VFS.with(|vfs| { let mut vfs = vfs.borrow_mut(); vfs.dirs.insert(path.to_owned()); }); } /// Represents a test failure with details about expected vs actual values. #[derive(Debug)] struct TestFailure { test_name: String, kind: String, expected: String, actual: String, } impl std::fmt::Display for TestFailure { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!( f, "[{}] {} mismatch\ngot {:?}\ndiff:", self.test_name, self.kind, self.actual )?; for change in TextDiff::from_lines(&self.expected, &self.actual).iter_all_changes() { write!(f, "{}{}", change.tag(), change)?; } Ok(()) } } /// Try to run a test, returning Ok(()) on success or Err with failure details. /// /// This function executes Python code via the MontyRun and validates the result /// against the expected outcome specified in the fixture. fn try_run_test(path: &Path, code: &str, expectation: &Expectation) -> Result<(), TestFailure> { let test_name = path.strip_prefix("test_cases/").unwrap_or(path).display().to_string(); // Reset the mutable VFS for each test reset_mutable_vfs(); // Handle ref-count-return tests separately since they need run_ref_counts() #[cfg(feature = "ref-count-return")] if let Expectation::RefCounts(expected) = expectation { match MontyRun::new(code.to_owned(), &test_name, vec![]) { Ok(ex) => { let result = ex.run_ref_counts(vec![]); match result { Ok(monty::RefCountOutput { counts, unique_refs, heap_count, .. }) => { // Strict matching: verify all heap objects are accounted for by variables if unique_refs != heap_count { return Err(TestFailure { test_name, kind: "Strict matching".to_string(), expected: format!("{heap_count} heap objects"), actual: format!("{unique_refs} referenced by variables, counts: {counts:?}"), }); } if &counts != expected { return Err(TestFailure { test_name, kind: "ref-counts".to_string(), expected: format!("{expected:?}"), actual: format!("{counts:?}"), }); } return Ok(()); } Err(e) => { return Err(TestFailure { test_name, kind: "Runtime".to_string(), expected: "success".to_string(), actual: e.to_string(), }); } } } Err(parse_err) => { return Err(TestFailure { test_name, kind: "Parse".to_string(), expected: "success".to_string(), actual: parse_err.to_string(), }); } } } match MontyRun::new(code.to_owned(), &test_name, vec![]) { Ok(ex) => { let limits = ResourceLimits::new().max_recursion_depth(Some(TEST_RECURSION_LIMIT)); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); match result { Ok(obj) => match expectation { Expectation::ReturnStr(expected) => { let output = obj.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "str()".to_string(), expected: expected.clone(), actual: output, }); } } Expectation::Return(expected) => { let output = obj.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "py_repr()".to_string(), expected: expected.clone(), actual: output, }); } } Expectation::ReturnType(expected) => { let output = obj.type_name(); if output != expected { return Err(TestFailure { test_name, kind: "type_name()".to_string(), expected: expected.clone(), actual: output.to_string(), }); } } #[cfg(not(feature = "ref-count-return"))] Expectation::RefCounts(_) => { // Skip ref-count tests when feature is disabled } Expectation::NoException => { // Success - code ran without exception as expected } Expectation::Raise(expected) | Expectation::Traceback(expected) => { return Err(TestFailure { test_name, kind: "Exception".to_string(), expected: expected.clone(), actual: "no exception raised".to_string(), }); } #[cfg(feature = "ref-count-return")] Expectation::RefCounts(_) => unreachable!(), }, Err(e) => { if let Expectation::Raise(expected) = expectation { let output = e.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "Exception".to_string(), expected: expected.clone(), actual: output, }); } } else if let Expectation::Traceback(expected) = expectation { let output = e.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "Traceback".to_string(), expected: expected.clone(), actual: output, }); } } else { return Err(TestFailure { test_name, kind: "Unexpected error".to_string(), expected: "success".to_string(), actual: e.to_string(), }); } } } } Err(parse_err) => { if let Expectation::Raise(expected) = expectation { let output = parse_err.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "Parse error".to_string(), expected: expected.clone(), actual: output, }); } } else if let Expectation::Traceback(expected) = expectation { let output = parse_err.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "Traceback".to_string(), expected: expected.clone(), actual: output, }); } } else { return Err(TestFailure { test_name, kind: "Unexpected parse error".to_string(), expected: "success".to_string(), actual: parse_err.to_string(), }); } } } Ok(()) } /// Try to run a test using MontyRun with external function support. /// /// This function handles tests marked with `# call-external` directive by using the /// iterative executor API and providing implementations for predefined external functions. fn try_run_iter_test(path: &Path, code: &str, expectation: &Expectation) -> Result<(), TestFailure> { let test_name = path.strip_prefix("test_cases/").unwrap_or(path).display().to_string(); // Reset the mutable VFS for each test reset_mutable_vfs(); // Ref-counting tests not supported in iter mode #[cfg(feature = "ref-count-return")] if matches!(expectation, Expectation::RefCounts(_)) { return Err(TestFailure { test_name, kind: "Configuration".to_string(), expected: "non-refcount test".to_string(), actual: "ref-counts tests are not supported in iter mode".to_string(), }); } let exec = match MontyRun::new(code.to_owned(), &test_name, vec![]) { Ok(e) => e, Err(parse_err) => { if let Expectation::Raise(expected) = expectation { let output = parse_err.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "Parse error".to_string(), expected: expected.clone(), actual: output, }); } return Ok(()); } else if let Expectation::Traceback(expected) = expectation { let output = parse_err.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "Traceback".to_string(), expected: expected.clone(), actual: output, }); } return Ok(()); } return Err(TestFailure { test_name, kind: "Unexpected parse error".to_string(), expected: "success".to_string(), actual: parse_err.to_string(), }); } }; // Run execution loop, handling external function calls until complete let result = run_iter_loop(exec); match result { Ok(obj) => match expectation { Expectation::ReturnStr(expected) => { let output = obj.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "str()".to_string(), expected: expected.clone(), actual: output, }); } } Expectation::Return(expected) => { let output = obj.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "py_repr()".to_string(), expected: expected.clone(), actual: output, }); } } Expectation::ReturnType(expected) => { let output = obj.type_name(); if output != expected { return Err(TestFailure { test_name, kind: "type_name()".to_string(), expected: expected.clone(), actual: output.to_string(), }); } } #[cfg(not(feature = "ref-count-return"))] Expectation::RefCounts(_) => {} Expectation::NoException => {} Expectation::Raise(expected) | Expectation::Traceback(expected) => { return Err(TestFailure { test_name, kind: "Exception".to_string(), expected: expected.clone(), actual: "no exception raised".to_string(), }); } #[cfg(feature = "ref-count-return")] Expectation::RefCounts(_) => unreachable!(), }, Err(e) => { if let Expectation::Raise(expected) = expectation { let output = e.py_repr(); if output != *expected { return Err(TestFailure { test_name, kind: "Exception".to_string(), expected: expected.clone(), actual: output, }); } } else if let Expectation::Traceback(expected) = expectation { let output = e.to_string(); if output != *expected { return Err(TestFailure { test_name, kind: "Traceback".to_string(), expected: expected.clone(), actual: output, }); } } else { return Err(TestFailure { test_name, kind: "Unexpected error".to_string(), expected: "success".to_string(), actual: e.to_string(), }); } } } Ok(()) } /// Execute the iter loop, dispatching external function calls until complete. /// /// When `ref-count-panic` feature is NOT enabled, this function also tests /// serialization round-trips by dumping and loading the execution state at /// each external function call boundary. /// /// Supports both synchronous and asynchronous external functions: /// - Sync functions: result is passed immediately via `state.run()` /// - Async functions: `state.run_pending()` creates a future, resolved via `ResolveFutures` fn run_iter_loop(exec: MontyRun) -> Result { let limits = ResourceLimits::new().max_recursion_depth(Some(TEST_RECURSION_LIMIT)); let mut progress = exec.start(vec![], LimitedTracker::new(limits), PrintWriter::Stdout)?; // Track pending async calls: (call_id, result_value) let mut pending_results: Vec<(u32, MontyObject)> = Vec::new(); loop { // Test serialization round-trip at each step (skip when ref-count-panic is enabled // since the old RunProgress would panic on drop without proper cleanup) #[cfg(not(feature = "ref-count-panic"))] { let bytes = progress.dump().expect("failed to dump RunProgress"); progress = RunProgress::load(&bytes).expect("failed to load RunProgress"); } match progress { RunProgress::Complete(result) => return Ok(result), RunProgress::FunctionCall(call) => { // Method calls on dataclasses are dispatched to the host. // Dispatch known methods; return AttributeError for unknown ones. if call.method_call { let result = dispatch_method_call(&call.function_name, &call.args, &call.kwargs); progress = call.resume(result, PrintWriter::Stdout)?; continue; } let dispatch_result = dispatch_external_call(&call.function_name, call.args.clone()); match dispatch_result { DispatchResult::Sync(return_value) => { progress = call.resume(return_value, PrintWriter::Stdout)?; } DispatchResult::Async(result_value) => { // Store the result for later resolution pending_results.push((call.call_id, result_value)); // Continue execution with a pending future progress = call.resume_pending(PrintWriter::Stdout)?; } } } RunProgress::ResolveFutures(state) => { // Resolve all pending futures that we have results for let results: Vec<(u32, ExtFunctionResult)> = state .pending_call_ids() .iter() .filter_map(|p| { pending_results.iter().position(|(id, _)| id == p).map(|idx| { let (call_id, value) = pending_results.remove(idx); (call_id, ExtFunctionResult::Return(value)) }) }) .collect(); assert!( !results.is_empty(), "ResolveFutures: no results available for pending calls: {:?}", state.pending_call_ids().iter().collect::>() ); progress = state.resume(results, PrintWriter::Stdout)?; } RunProgress::NameLookup(lookup) => { let result = match lookup.name.as_str() { // External functions — resolved as callable Function objects "add_ints" | "concat_strings" | "return_value" | "get_list" | "raise_error" | "make_point" | "make_mutable_point" | "make_user" | "make_empty" | "async_call" => { NameLookupResult::Value(MontyObject::Function { name: lookup.name.clone(), docstring: None, }) } // Non-function constants — resolved as plain values "CONST_INT" => NameLookupResult::Value(MontyObject::Int(42)), "CONST_STR" => NameLookupResult::Value(MontyObject::String("hello".to_string())), #[expect(clippy::approx_constant, reason = "3.14 is the intended test value")] "CONST_FLOAT" => NameLookupResult::Value(MontyObject::Float(3.14)), "CONST_BOOL" => NameLookupResult::Value(MontyObject::Bool(true)), "CONST_LIST" => NameLookupResult::Value(MontyObject::List(vec![ MontyObject::Int(1), MontyObject::Int(2), MontyObject::Int(3), ])), "CONST_NONE" => NameLookupResult::Value(MontyObject::None), // Unknown names → NameError _ => NameLookupResult::Undefined, }; progress = lookup.resume(result, PrintWriter::Stdout)?; } RunProgress::OsCall(call) => { let result = dispatch_os_call(call.function, &call.args, &call.kwargs); progress = call.resume(result, PrintWriter::Stdout)?; } } } } /// Split Python code into statements and a final expression to evaluate. /// /// For Return expectations, the last non-empty line is the expression to evaluate. /// For Raise/NoException, the entire code is statements (returns None for expression). /// /// Returns (statements_code, optional_final_expression). fn split_code_for_module(code: &str, need_return_value: bool) -> (String, Option) { let lines: Vec<&str> = code.lines().collect(); // Find the last non-empty line let last_idx = lines .iter() .rposition(|line| !line.trim().is_empty()) .expect("Empty code"); if need_return_value { let last_line = lines[last_idx].trim(); // Check if the last line is a statement (can't be evaluated as an expression) // Matches both `assert expr` and `assert(expr)` forms if last_line.starts_with("assert ") || last_line.starts_with("assert(") { // All code is statements, no expression to evaluate (lines[..=last_idx].join("\n"), None) } else { // Everything except last line is statements, last line is the expression let statements = lines[..last_idx].join("\n"); let expr = last_line.to_string(); (statements, Some(expr)) } } else { // All code is statements (for exception tests or NoException) (lines[..=last_idx].join("\n"), None) } } /// Wraps code in an async context for CPython execution. /// /// Monty supports top-level `await`, but CPython does not. This function transforms code /// like: /// /// ```python /// async def foo(): /// return 1 /// result = await foo() /// ``` /// /// Into: /// /// ```python /// import asyncio /// async def __test_main(): /// async def foo(): /// return 1 /// result = await foo() /// return result # if need_return_value /// __test_result__ = asyncio.run(__test_main()) /// ``` fn wrap_code_for_async(code: &str, need_return_value: bool) -> (String, Option) { let lines: Vec<&str> = code.lines().collect(); // Find the last non-empty, non-comment line let last_idx = lines .iter() .rposition(|line| { let trimmed = line.trim(); !trimmed.is_empty() && !trimmed.starts_with('#') }) .expect("Empty code"); // Indent all code by 4 spaces for the function body let indented: String = lines .iter() .map(|line| { if line.is_empty() { String::new() } else { format!(" {line}") } }) .collect::>() .join("\n"); let return_stmt = if need_return_value { // The last non-empty, non-comment line is the expression to return let last_line = lines[last_idx].trim(); format!("\n return {last_line}") } else { String::new() }; let wrapped = format!( "import asyncio\nasync def __test_main():\n{indented}{return_stmt}\n__test_result__ = asyncio.run(__test_main())" ); if need_return_value { (wrapped, Some("__test_result__".to_string())) } else { (wrapped, None) } } /// Run the traceback script to get CPython's traceback output for a test file. /// /// This imports scripts/run_traceback.py via pyo3 and calls `run_file_and_get_traceback()` /// which executes the file via runpy.run_path() to ensure full traceback information /// (including caret lines) is preserved. /// /// When `iter_mode` is true, external function implementations are injected into the /// file's globals before execution. /// /// When `async_mode` is true, code is wrapped in an async context before execution. fn run_traceback_script(path: &Path, iter_mode: bool, async_mode: bool) -> String { Python::attach(|py| { let run_traceback = import_run_traceback(py); // Get absolute path for the test file let abs_path = path.canonicalize().expect("Failed to get absolute path"); let path_str = abs_path.to_str().expect("Invalid UTF-8 in path"); // Call run_file_and_get_traceback with the recursion limit, iter_mode, and async_mode flags let result = run_traceback .call_method1( "run_file_and_get_traceback", (path_str, TEST_RECURSION_LIMIT, iter_mode, async_mode), ) .expect("Failed to call run_file_and_get_traceback"); // Handle None return (no exception raised) if result.is_none() { String::new() } else { result .extract() .expect("Failed to extract string from return value of run_file_and_get_traceback") } }) } fn format_traceback(py: Python<'_>, exc: &PyErr) -> String { let run_traceback = import_run_traceback(py); let exc_value = exc.value(py); let return_value = run_traceback .call_method1("format_full_traceback", (exc_value,)) .expect("Failed to call format_full_traceback"); return_value .extract() .expect("failed to extract string from return value of format_full_traceback") } /// Import the run_traceback module fn import_run_traceback(py: Python<'_>) -> Bound<'_, PyModule> { // Add scripts directory to sys.path (tests run from crates/monty/) let sys = py.import("sys").expect("Failed to import sys"); let sys_path = sys.getattr("path").expect("Failed to get sys.path"); sys_path .call_method1("insert", (0, "../../scripts")) .expect("Failed to add scripts to sys.path"); // Import the run_traceback module py.import("run_traceback").expect("Failed to import run_traceback") } /// Result from CPython execution - either a value to compare, or an early return. enum CpythonResult { /// Value to compare against expectation Value(String), /// No value to compare (NoException test succeeded) NoValue, /// Test failed with this error Failed(TestFailure), } /// Try to run a test through CPython, returning Ok(()) on success or Err with failure details. /// /// This function executes the same Python code via CPython (using pyo3) and /// compares the result with the expected value. This ensures Monty behaves /// identically to CPython. /// /// Code is executed at module level (not wrapped in a function) so that /// `global` keyword semantics work correctly. /// /// RefCounts tests are skipped as they're Monty-specific. /// Traceback tests use scripts/run_traceback.py for reliable caret line support. fn try_run_cpython_test( path: &Path, code: &str, expectation: &Expectation, iter_mode: bool, async_mode: bool, ) -> Result<(), TestFailure> { // Ensure Python modules are imported before parallel tests access them. // This prevents race conditions during module initialization. ensure_python_modules_imported(); // Skip RefCounts tests - only relevant for Monty if matches!(expectation, Expectation::RefCounts(_)) { return Ok(()); } let test_name = path.strip_prefix("test_cases/").unwrap_or(path).display().to_string(); // Traceback tests use the external script for reliable caret line support if let Expectation::Traceback(expected) = expectation { let result = run_traceback_script(path, iter_mode, async_mode); if result != *expected { return Err(TestFailure { test_name, kind: "CPython traceback".to_string(), expected: expected.clone(), actual: result, }); } return Ok(()); } let need_return_value = matches!( expectation, Expectation::Return(_) | Expectation::ReturnStr(_) | Expectation::ReturnType(_) ); // Use async wrapper for tests with top-level await let (statements, maybe_expr) = if async_mode { wrap_code_for_async(code, need_return_value) } else { split_code_for_module(code, need_return_value) }; let result: CpythonResult = Python::attach(|py| { // Execute statements at module level let globals = PyDict::new(py); // For iter mode tests, inject external function implementations into globals if iter_mode { let ext_funcs_cstr = CString::new(ITER_EXT_FUNCTIONS_PYTHON).expect("Invalid C string in ext funcs"); py.run(&ext_funcs_cstr, Some(&globals), None) .expect("Failed to define external functions for iter mode"); } // Run the statements let statements_cstr = CString::new(statements.as_str()).expect("Invalid C string in statements"); let stmt_result = py.run(&statements_cstr, Some(&globals), None); // Handle exception during statement execution if let Err(e) = stmt_result { if matches!(expectation, Expectation::NoException) { return CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython unexpected exception".to_string(), expected: "no exception".to_string(), actual: format_traceback(py, &e), }); } if matches!(expectation, Expectation::Raise(_)) { return CpythonResult::Value(format_cpython_exception(py, &e)); } return CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython unexpected exception".to_string(), expected: "success".to_string(), actual: format_traceback(py, &e), }); } // If we have an expression to evaluate, evaluate it if let Some(expr) = maybe_expr { let expr_cstr = CString::new(expr.as_str()).expect("Invalid C string in expr"); match py.eval(&expr_cstr, Some(&globals), None) { Ok(result) => { // Code returned successfully - format based on expectation type match expectation { Expectation::Return(_) => CpythonResult::Value(result.repr().unwrap().to_string()), Expectation::ReturnStr(_) => CpythonResult::Value(result.str().unwrap().to_string()), Expectation::ReturnType(_) => { CpythonResult::Value(result.get_type().name().unwrap().to_string()) } Expectation::Raise(expected) => CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython exception".to_string(), expected: expected.clone(), actual: "no exception raised".to_string(), }), // Traceback tests are handled by run_traceback_script above Expectation::Traceback(_) | Expectation::NoException | Expectation::RefCounts(_) => { unreachable!() } } } Err(e) => { // Expression raised an exception if matches!(expectation, Expectation::NoException) { return CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython unexpected exception".to_string(), expected: "no exception".to_string(), actual: format_traceback(py, &e), }); } if matches!(expectation, Expectation::Raise(_)) { return CpythonResult::Value(format_cpython_exception(py, &e)); } // Traceback tests are handled by run_traceback_script above CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython unexpected exception".to_string(), expected: "success".to_string(), actual: format_traceback(py, &e), }) } } } else { // No expression to evaluate // Traceback tests are handled by run_traceback_script above if let Expectation::Raise(expected) = expectation { return CpythonResult::Failed(TestFailure { test_name: test_name.clone(), kind: "CPython exception".to_string(), expected: expected.clone(), actual: "no exception raised".to_string(), }); } CpythonResult::NoValue // NoException expectation - success } }); match result { CpythonResult::Value(actual) => { let expected = expectation.expected_value(); if actual != expected { return Err(TestFailure { test_name, kind: "CPython result".to_string(), expected: expected.to_string(), actual, }); } Ok(()) } CpythonResult::NoValue => Ok(()), CpythonResult::Failed(failure) => Err(failure), } } /// Format a CPython exception into the expected format. fn format_cpython_exception(py: Python<'_>, e: &PyErr) -> String { let exc_type = e.get_type(py).name().unwrap(); let exc_message: String = e .value(py) .getattr("args") .and_then(|args| args.get_item(0)) .and_then(|item| item.extract()) .unwrap_or_default(); if exc_message.is_empty() { format!("{exc_type}()") } else if exc_message.contains('\'') { // Use double quotes when message contains single quotes (like Python's repr) format!("{exc_type}(\"{exc_message}\")") } else { // Use single quotes (default Python repr format) format!("{exc_type}('{exc_message}')") } } /// Timeout duration for Monty tests. /// /// Tests that exceed this duration are considered to be hanging (infinite loop) /// and will fail with a timeout error. const TEST_TIMEOUT: Duration = Duration::from_secs(2); /// Result from running a test with a timeout. enum TimeoutResult { /// The closure completed successfully. Ok(T), /// The closure panicked with the given message. Panicked(String), /// The timeout was exceeded. TimedOut, } /// Runs a closure with a timeout, returning an error if it exceeds the duration or panics. /// /// Spawns the closure in a separate thread and waits for the result with a timeout. /// Distinguishes between three cases: /// - Success: the closure returned normally /// - Panic: the closure panicked (detected via channel disconnect + catch_unwind) /// - Timeout: the timeout was exceeded (possible infinite loop) /// /// Note that if a timeout occurs, the spawned thread will continue running in the /// background (Rust doesn't support killing threads), but the test will fail immediately. fn run_with_timeout(timeout: Duration, f: F) -> TimeoutResult where F: FnOnce() -> T + Send + 'static, T: Send + 'static, { let (tx, rx) = mpsc::channel(); thread::spawn(move || { // Catch panics so we can report them properly instead of as timeouts let result = panic::catch_unwind(AssertUnwindSafe(f)); match result { Ok(value) => { let _ = tx.send(Ok(value)); } Err(panic_payload) => { // Extract panic message from the payload let msg = if let Some(s) = panic_payload.downcast_ref::<&str>() { (*s).to_string() } else if let Some(s) = panic_payload.downcast_ref::() { s.clone() } else { "unknown panic".to_string() }; let _ = tx.send(Err(msg)); } } }); match rx.recv_timeout(timeout) { Ok(Ok(value)) => TimeoutResult::Ok(value), Ok(Err(panic_msg)) => TimeoutResult::Panicked(panic_msg), Err(RecvTimeoutError::Timeout) => TimeoutResult::TimedOut, // Disconnected without sending means something went very wrong Err(RecvTimeoutError::Disconnected) => { TimeoutResult::Panicked("thread terminated without sending result".to_string()) } } } /// Test function that runs each fixture through Monty. /// /// Handles xfail with strict semantics: if a test is marked `xfail=monty`, it must fail. /// If an xfail test passes unexpectedly, that's an error. fn run_test_cases_monty(path: &Path) -> Result<(), Box> { let content = fs::read_to_string(path)?; let (code, expectation, config) = parse_fixture(&content); let test_name = path.strip_prefix("test_cases/").unwrap_or(path).display().to_string(); // Move data into the closure since it needs 'static lifetime let path_owned = path.to_owned(); let iter_mode = config.iter_mode; let result = run_with_timeout(TEST_TIMEOUT, move || { if iter_mode { try_run_iter_test(&path_owned, &code, &expectation) } else { try_run_test(&path_owned, &code, &expectation) } }); // Handle timeout/panic errors from the test thread let result = match result { TimeoutResult::Ok(inner_result) => inner_result, TimeoutResult::Panicked(panic_msg) => Err(TestFailure { test_name: test_name.clone(), kind: "Panic".to_string(), expected: "no panic".to_string(), actual: format!("test panicked: {panic_msg}"), }), TimeoutResult::TimedOut => Err(TestFailure { test_name: test_name.clone(), kind: "Timeout".to_string(), expected: format!("completion within {TEST_TIMEOUT:?}"), actual: format!("test timed out after {TEST_TIMEOUT:?} (possible infinite loop)"), }), }; if config.xfail_monty { // Strict xfail: test must fail; if it passed, xfail should be removed assert!( result.is_err(), "[{test_name}] Test marked xfail=monty passed unexpectedly. Remove xfail if the test is now fixed." ); } else if let Err(failure) = result { panic!("{failure}"); } Ok(()) } /// Test function that runs each fixture through CPython. /// /// Handles xfail with strict semantics: if a test is marked `xfail=cpython`, it must fail. /// If an xfail test passes unexpectedly, that's an error. fn run_test_cases_cpython(path: &Path) -> Result<(), Box> { let content = fs::read_to_string(path)?; let (code, expectation, config) = parse_fixture(&content); let test_name = path.strip_prefix("test_cases/").unwrap_or(path).display().to_string(); let result = try_run_cpython_test(path, &code, &expectation, config.iter_mode, config.async_mode); if config.xfail_cpython { // Strict xfail: test must fail; if it passed, xfail should be removed assert!( result.is_err(), "[{test_name}] Test marked xfail=cpython passed unexpectedly. Remove xfail if the test is now fixed." ); } else if let Err(failure) = result { panic!("{failure}"); } Ok(()) } // Generate tests for all fixture files using datatest-stable harness macro datatest_stable::harness!( run_test_cases_monty, "test_cases", r"^.*\.py$", run_test_cases_cpython, "test_cases", r"^.*\.py$", ); ================================================ FILE: crates/monty/tests/inputs.rs ================================================ //! Tests for passing input values to the executor. //! //! These tests verify that `MontyObject` inputs are correctly converted to `Object` //! and can be used in Python code execution. use indexmap::IndexMap; use monty::{ExcType, MontyObject, MontyRun}; // === Immediate Value Tests === #[test] fn input_int() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Int(42)]).unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn input_int_arithmetic() { let ex = MontyRun::new("x + 1".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Int(41)]).unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn input_bool_true() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Bool(true)]).unwrap(); assert_eq!(result, MontyObject::Bool(true)); } #[test] fn input_bool_false() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Bool(false)]).unwrap(); assert_eq!(result, MontyObject::Bool(false)); } #[test] fn input_float() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Float(2.5)]).unwrap(); assert_eq!(result, MontyObject::Float(2.5)); } #[test] fn input_none() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::None]).unwrap(); assert_eq!(result, MontyObject::None); } #[test] fn input_ellipsis() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Ellipsis]).unwrap(); assert_eq!(result, MontyObject::Ellipsis); } // === Heap-Allocated Value Tests === #[test] fn input_string() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::String("hello".to_string())]) .unwrap(); assert_eq!(result, MontyObject::String("hello".to_string())); } #[test] fn input_string_concat() { let ex = MontyRun::new("x + ' world'".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::String("hello".to_string())]) .unwrap(); assert_eq!(result, MontyObject::String("hello world".to_string())); } #[test] fn input_bytes() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Bytes(vec![1, 2, 3])]).unwrap(); assert_eq!(result, MontyObject::Bytes(vec![1, 2, 3])); } #[test] fn input_list() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)])]) .unwrap(); assert_eq!( result, MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)]) ); } #[test] fn input_list_append() { let ex = MontyRun::new("x.append(3)\nx".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)])]) .unwrap(); assert_eq!( result, MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2), MontyObject::Int(3)]) ); } #[test] fn input_tuple() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::Tuple(vec![ MontyObject::Int(1), MontyObject::String("two".to_string()), ])]) .unwrap(); assert_eq!( result, MontyObject::Tuple(vec![MontyObject::Int(1), MontyObject::String("two".to_string())]) ); } #[test] fn input_dict() { let mut map = IndexMap::new(); map.insert(MontyObject::String("a".to_string()), MontyObject::Int(1)); let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::dict(map)]).unwrap(); // Build expected map for comparison let mut expected = IndexMap::new(); expected.insert(MontyObject::String("a".to_string()), MontyObject::Int(1)); assert_eq!(result, MontyObject::Dict(expected.into())); } #[test] fn input_dict_get() { let mut map = IndexMap::new(); map.insert(MontyObject::String("key".to_string()), MontyObject::Int(42)); let ex = MontyRun::new("x['key']".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::dict(map)]).unwrap(); assert_eq!(result, MontyObject::Int(42)); } // === Multiple Inputs === #[test] fn multiple_inputs_two() { let ex = MontyRun::new("x + y".to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::Int(10), MontyObject::Int(32)]) .unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn multiple_inputs_three() { let ex = MontyRun::new( "x + y + z".to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned(), "z".to_owned()], ) .unwrap(); let result = ex .run_no_limits(vec![MontyObject::Int(10), MontyObject::Int(20), MontyObject::Int(12)]) .unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn multiple_inputs_mixed_types() { // Create a list from two inputs let ex = MontyRun::new("[x, y]".to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::Int(1), MontyObject::String("two".to_string())]) .unwrap(); assert_eq!( result, MontyObject::List(vec![MontyObject::Int(1), MontyObject::String("two".to_string())]) ); } // === Edge Cases === #[test] fn no_inputs() { let ex = MontyRun::new("42".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!(result, MontyObject::Int(42)); } #[test] fn nested_list() { let ex = MontyRun::new("x[0][1]".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::List(vec![MontyObject::List(vec![ MontyObject::Int(1), MontyObject::Int(2), ])])]) .unwrap(); assert_eq!(result, MontyObject::Int(2)); } #[test] fn empty_list_input() { let ex = MontyRun::new("len(x)".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::List(vec![])]).unwrap(); assert_eq!(result, MontyObject::Int(0)); } #[test] fn empty_string_input() { let ex = MontyRun::new("len(x)".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::String(String::new())]).unwrap(); assert_eq!(result, MontyObject::Int(0)); } // === Exception Input Tests === #[test] fn input_exception() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::Exception { exc_type: ExcType::ValueError, arg: Some("test message".to_string()), }]) .unwrap(); assert_eq!( result, MontyObject::Exception { exc_type: ExcType::ValueError, arg: Some("test message".to_string()), } ); } #[test] fn input_exception_no_arg() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::Exception { exc_type: ExcType::TypeError, arg: None, }]) .unwrap(); assert_eq!( result, MontyObject::Exception { exc_type: ExcType::TypeError, arg: None, } ); } #[test] fn input_exception_in_list() { let ex = MontyRun::new("x[0]".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex .run_no_limits(vec![MontyObject::List(vec![MontyObject::Exception { exc_type: ExcType::KeyError, arg: Some("key".to_string()), }])]) .unwrap(); assert_eq!( result, MontyObject::Exception { exc_type: ExcType::KeyError, arg: Some("key".to_string()), } ); } #[test] fn input_exception_raise() { // Test that an exception passed as input can be raised let ex = MontyRun::new("raise x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Exception { exc_type: ExcType::ValueError, arg: Some("input error".to_string()), }]); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::ValueError); assert_eq!(exc.message(), Some("input error")); } // === Invalid Input Tests === #[test] fn invalid_input_repr() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); let result = ex.run_no_limits(vec![MontyObject::Repr("some repr".to_string())]); assert!(result.is_err(), "Repr should not be a valid input"); } #[test] fn invalid_input_repr_nested_in_list() { let ex = MontyRun::new("x".to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // Repr nested inside a list should still be invalid let result = ex.run_no_limits(vec![MontyObject::List(vec![MontyObject::Repr( "nested repr".to_string(), )])]); assert!(result.is_err(), "Repr nested in list should be invalid"); } // === Function Parameter Shadowing Tests === // These tests verify that function parameters properly shadow script inputs with the same name. #[test] fn function_param_shadows_input() { // Function parameter `x` should shadow the script input `x` let code = " def foo(x): return x + 1 foo(x * 2) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // x=5 (input), foo(x * 2) = foo(10), inside foo x=10 (param), returns 11 let result = ex.run_no_limits(vec![MontyObject::Int(5)]).unwrap(); assert_eq!(result, MontyObject::Int(11)); } #[test] fn function_param_shadows_input_multiple_params() { // Multiple function parameters should all shadow their corresponding inputs let code = " def add(x, y): return x + y add(x * 10, y * 100) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned()]).unwrap(); // x=2, y=3 (inputs), add(20, 300), inside add x=20, y=300, returns 320 let result = ex .run_no_limits(vec![MontyObject::Int(2), MontyObject::Int(3)]) .unwrap(); assert_eq!(result, MontyObject::Int(320)); } #[test] fn function_param_shadows_input_but_global_accessible() { // Function parameter shadows input, but other inputs are still accessible as globals let code = " def foo(x): return x + y foo(100) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned(), "y".to_owned()]).unwrap(); // x=5, y=3 (inputs), foo(100), inside foo x=100 (param), y=3 (global), returns 103 let result = ex .run_no_limits(vec![MontyObject::Int(5), MontyObject::Int(3)]) .unwrap(); assert_eq!(result, MontyObject::Int(103)); } #[test] fn function_param_shadows_input_accessible_outside() { // Script input should still be accessible outside the function that shadows it let code = " def double(x): return x * 2 double(10) + x "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // x=5 (input), double(10) = 20, then 20 + x (global) = 20 + 5 = 25 let result = ex.run_no_limits(vec![MontyObject::Int(5)]).unwrap(); assert_eq!(result, MontyObject::Int(25)); } #[test] fn function_param_with_default_shadows_input() { // Function parameter with default should shadow input when called with argument let code = " def foo(x=100): return x + 1 foo(x * 2) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // x=5 (input), foo(10), inside foo x=10 (param), returns 11 let result = ex.run_no_limits(vec![MontyObject::Int(5)]).unwrap(); assert_eq!(result, MontyObject::Int(11)); } #[test] fn function_uses_input_as_argument() { // Input can be passed as argument, and param shadows inside function let code = " def double(x): return x * 2 double(x) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // x=7 (input), double(7), inside double x=7 (param from arg), returns 14 let result = ex.run_no_limits(vec![MontyObject::Int(7)]).unwrap(); assert_eq!(result, MontyObject::Int(14)); } #[test] fn function_doesnt_uses_input_as_argument() { let code = " def double(x): return x * 2 double(2) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec!["x".to_owned()]).unwrap(); // x=7 (input), double(7), inside double x=7 (param from arg), returns 14 let result = ex.run_no_limits(vec![MontyObject::Int(7)]).unwrap(); assert_eq!(result, MontyObject::Int(4)); } ================================================ FILE: crates/monty/tests/json_serde.rs ================================================ //! Tests for JSON serialization and deserialization of `MontyObject`. //! //! `MontyObject` uses derived serde with externally tagged enum format. //! This means each variant is wrapped in an object with the variant name as key. use monty::{ExcType, MontyObject, MontyRun}; // === JSON Serialization Tests === #[test] fn json_output_primitives() { // Primitives are wrapped in their variant names assert_eq!(serde_json::to_string(&MontyObject::Int(42)).unwrap(), r#"{"Int":42}"#); assert_eq!( serde_json::to_string(&MontyObject::Float(1.5)).unwrap(), r#"{"Float":1.5}"# ); assert_eq!( serde_json::to_string(&MontyObject::String("hi".into())).unwrap(), r#"{"String":"hi"}"# ); assert_eq!( serde_json::to_string(&MontyObject::Bool(true)).unwrap(), r#"{"Bool":true}"# ); assert_eq!(serde_json::to_string(&MontyObject::None).unwrap(), r#""None""#); } #[test] fn json_output_list() { let ex = MontyRun::new("[1, 'two', 3.0]".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!( serde_json::to_string(&result).unwrap(), r#"{"List":[{"Int":1},{"String":"two"},{"Float":3.0}]}"# ); } #[test] fn json_output_dict() { let ex = MontyRun::new("{'a': 1, 'b': 2}".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!( serde_json::to_string(&result).unwrap(), r#"{"Dict":[[{"String":"a"},{"Int":1}],[{"String":"b"},{"Int":2}]]}"# ); } #[test] fn json_output_tuple() { let ex = MontyRun::new("(1, 'two')".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!( serde_json::to_string(&result).unwrap(), r#"{"Tuple":[{"Int":1},{"String":"two"}]}"# ); } #[test] fn json_output_bytes() { let ex = MontyRun::new("b'hi'".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!(serde_json::to_string(&result).unwrap(), r#"{"Bytes":[104,105]}"#); } #[test] fn json_output_ellipsis() { let ex = MontyRun::new("...".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!(serde_json::to_string(&result).unwrap(), r#""Ellipsis""#); } #[test] fn json_output_exception() { let obj = MontyObject::Exception { exc_type: ExcType::ValueError, arg: Some("test".to_string()), }; assert_eq!( serde_json::to_string(&obj).unwrap(), r#"{"Exception":{"exc_type":"ValueError","arg":"test"}}"# ); } #[test] fn json_output_repr() { let obj = MontyObject::Repr("".to_string()); assert_eq!(serde_json::to_string(&obj).unwrap(), r#"{"Repr":""}"#); } #[test] fn json_output_cycle_list() { // Test JSON serialization of cyclic list let ex = MontyRun::new("a = []; a.append(a); a".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); // The cyclic reference becomes MontyObject::Cycle assert_eq!( serde_json::to_string(&result).unwrap(), r#"{"List":[{"Cycle":[1,"[...]"]}]}"# ); } #[test] fn json_output_cycle_dict() { // Test JSON serialization of cyclic dict let ex = MontyRun::new("d = {}; d['self'] = d; d".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); assert_eq!( serde_json::to_string(&result).unwrap(), r#"{"Dict":[[{"String":"self"},{"Cycle":[1,"{...}"]}]]}"# ); } // === JSON Deserialization Tests === #[test] fn json_deserialize_primitives() { // Deserialize tagged format let int: MontyObject = serde_json::from_str(r#"{"Int":42}"#).unwrap(); let float: MontyObject = serde_json::from_str(r#"{"Float":2.5}"#).unwrap(); let string: MontyObject = serde_json::from_str(r#"{"String":"hello"}"#).unwrap(); let bool_val: MontyObject = serde_json::from_str(r#"{"Bool":true}"#).unwrap(); let null: MontyObject = serde_json::from_str(r#""None""#).unwrap(); assert_eq!(int, MontyObject::Int(42)); assert_eq!(float, MontyObject::Float(2.5)); assert_eq!(string, MontyObject::String("hello".to_string())); assert_eq!(bool_val, MontyObject::Bool(true)); assert_eq!(null, MontyObject::None); } #[test] fn json_deserialize_list() { let list: MontyObject = serde_json::from_str(r#"{"List":[{"Int":1},{"String":"two"},{"Float":3.0}]}"#).unwrap(); assert_eq!( list, MontyObject::List(vec![ MontyObject::Int(1), MontyObject::String("two".to_string()), MontyObject::Float(3.0) ]) ); } #[test] fn json_deserialize_dict() { let dict: MontyObject = serde_json::from_str(r#"{"Dict":[[{"String":"a"},{"Int":1}],[{"String":"b"},{"Int":2}]]}"#).unwrap(); if let MontyObject::Dict(pairs) = dict { let pairs_vec: Vec<_> = pairs.into_iter().collect(); assert_eq!(pairs_vec.len(), 2); assert_eq!( pairs_vec[0], (MontyObject::String("a".to_string()), MontyObject::Int(1)) ); assert_eq!( pairs_vec[1], (MontyObject::String("b".to_string()), MontyObject::Int(2)) ); } else { panic!("expected Dict"); } } // === Round-trip Tests === #[test] fn json_roundtrip() { // Values round-trip through JSON correctly let ex = MontyRun::new( "{'items': [1, 'two', None], 'flag': True}".to_owned(), "test.py", vec![], ) .unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let json = serde_json::to_string(&result).unwrap(); let parsed: MontyObject = serde_json::from_str(&json).unwrap(); assert_eq!(result, parsed); } #[test] fn json_roundtrip_empty() { // Empty structures round-trip correctly let list: MontyObject = serde_json::from_str(r#"{"List":[]}"#).unwrap(); let dict: MontyObject = serde_json::from_str(r#"{"Dict":[]}"#).unwrap(); assert_eq!(serde_json::to_string(&list).unwrap(), r#"{"List":[]}"#); assert_eq!(serde_json::to_string(&dict).unwrap(), r#"{"Dict":[]}"#); } // === Cycle Equality Tests === #[test] fn cycle_equality_same_id() { // Multiple references to the same cyclic object should produce equal Cycle values let ex = MontyRun::new("a = []; a.append(a); [a, a]".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); if let MontyObject::List(outer) = &result { assert_eq!(outer.len(), 2, "outer list should have 2 elements"); if let (MontyObject::List(inner1), MontyObject::List(inner2)) = (&outer[0], &outer[1]) { assert_eq!(inner1.len(), 1); assert_eq!(inner2.len(), 1); assert_eq!(inner1[0], inner2[0], "cycles referencing same object should be equal"); assert!(matches!(&inner1[0], MontyObject::Cycle(..))); } else { panic!("expected inner lists"); } } else { panic!("expected outer list"); } } #[test] fn cycle_equality_different_ids() { // Two separate cyclic objects should produce unequal Cycle values let ex = MontyRun::new( "a = []; a.append(a); b = []; b.append(b); [a, b]".to_owned(), "test.py", vec![], ) .unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); if let MontyObject::List(outer) = &result { assert_eq!(outer.len(), 2, "outer list should have 2 elements"); if let (MontyObject::List(inner1), MontyObject::List(inner2)) = (&outer[0], &outer[1]) { assert_eq!(inner1.len(), 1); assert_eq!(inner2.len(), 1); assert_ne!( inner1[0], inner2[0], "cycles referencing different objects should not be equal" ); if let (MontyObject::Cycle(id1, ph1), MontyObject::Cycle(id2, ph2)) = (&inner1[0], &inner2[0]) { assert_ne!(id1, id2, "heap IDs should differ"); assert_eq!(ph1, ph2, "placeholders should match (both are lists)"); assert_eq!(*ph1, "[...]"); } else { panic!("expected Cycle variants"); } } else { panic!("expected inner lists"); } } else { panic!("expected outer list"); } } ================================================ FILE: crates/monty/tests/main.rs ================================================ use monty::{MontyObject, MontyRun}; /// Test we can reuse exec without borrow checker issues. #[test] fn repeat_exec() { let ex = MontyRun::new("1 + 2".to_owned(), "test.py", vec![]).unwrap(); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: i64 = r.as_ref().try_into().unwrap(); assert_eq!(int_value, 3); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: i64 = r.as_ref().try_into().unwrap(); assert_eq!(int_value, 3); } #[test] fn test_get_interned_string() { let ex = MontyRun::new("'foobar'".to_owned(), "test.py", vec![]).unwrap(); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: String = r.as_ref().try_into().unwrap(); assert_eq!(int_value, "foobar"); let r = ex.run_no_limits(vec![]).unwrap(); let int_value: String = r.as_ref().try_into().unwrap(); assert_eq!(int_value, "foobar"); } /// Test that calling a method on a dataclass in standard execution mode /// (without iter/external function support) returns a NotImplementedError. /// This exercises the `FrameExit::MethodCall` path in `frame_exit_to_object`. #[test] fn dataclass_method_call_in_standard_mode_errors() { let point = MontyObject::Dataclass { name: "Point".to_string(), type_id: 0, field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(1)), (MontyObject::String("y".to_string()), MontyObject::Int(2)), ] .into(), frozen: true, }; let ex = MontyRun::new("point.sum()".to_owned(), "test.py", vec!["point".to_string()]).unwrap(); let err = ex.run_no_limits(vec![point]).unwrap_err(); let msg = err.to_string(); assert!( msg.contains("Method call 'sum' not implemented with standard execution"), "Expected NotImplementedError for method call, got: {msg}" ); } /// Test that subscript augmented matrix multiplication reports the dedicated /// unsupported-operation compile error. /// /// CPython supports `@=` syntax, so the comparative Python test-case suite /// cannot cover Monty's current compile-time rejection of this operator. Keep /// this as a Rust-side regression test until matrix multiplication support /// exists. #[test] fn subscript_augassign_matmul_reports_not_supported() { let err = MontyRun::new("d = {'x': 1}\nd['x'] @= 2".to_owned(), "test.py", vec![]).unwrap_err(); assert_eq!( err.to_string(), "Traceback (most recent call last):\n File \"test.py\", line 2\n d['x'] @= 2\n ~~~~~~\nSyntaxError: matrix multiplication augmented assignment (@=) is not yet supported" ); } ================================================ FILE: crates/monty/tests/math_module.rs ================================================ use monty::{MontyObject, MontyRun}; /// Helper to run a Python expression and return the result. fn run_expr(code: &str) -> MontyObject { let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); ex.run_no_limits(vec![]).unwrap() } /// Helper to run Python code that is expected to raise an exception. /// Returns the exception message string. fn run_expect_error(code: &str) -> String { let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let err = ex.run_no_limits(vec![]).unwrap_err(); err.to_string() } // ========================== // Overflow tests (i64-specific) // ========================== /// `math.factorial(21)` overflows i64 (21! = 51090942171709440000 > i64::MAX). /// Monty raises OverflowError since it doesn't have big integer support. #[test] fn factorial_i64_overflow() { let msg = run_expect_error("import math\nmath.factorial(21)"); assert!( msg.contains("OverflowError"), "Expected OverflowError for factorial(21), got: {msg}" ); } /// `math.comb(66, 33)` fits in i64 (7219428434016265740) thanks to GCD reduction /// that avoids intermediate overflow. Verify it computes the correct value. #[test] fn comb_large_but_fits_i64() { let result = run_expr("import math\nmath.comb(66, 33)"); let v: i64 = (&result).try_into().unwrap(); assert_eq!(v, 7_219_428_434_016_265_740); } /// `math.comb(68, 34)` overflows i64 even with GCD reduction /// (68C34 = 28048800420600 * ... > i64::MAX). #[test] fn comb_i64_overflow() { let msg = run_expect_error("import math\nmath.comb(68, 34)"); assert!( msg.contains("OverflowError"), "Expected OverflowError for comb(68, 34), got: {msg}" ); } /// `math.perm(21, 21)` overflows i64 (same as 21! which exceeds i64::MAX). #[test] fn perm_i64_overflow() { let msg = run_expect_error("import math\nmath.perm(21, 21)"); assert!( msg.contains("OverflowError"), "Expected OverflowError for perm(21, 21), got: {msg}" ); } // ========================== // ldexp negative exponent loop // ========================== /// `math.ldexp(1.0, -1050)` exercises the negative exponent loop in `math_ldexp` /// because -1050 is between -1074 and -1022, requiring iterative halving. #[test] fn ldexp_large_negative_exponent_loop() { let result = run_expr("import math\nmath.ldexp(1.0, -1050)"); let f: f64 = (&result).try_into().unwrap(); // ldexp(1.0, -1050) is a very small subnormal but not zero assert!(f > 0.0, "ldexp(1.0, -1050) should be positive, got: {f}"); assert!(f < 1e-300, "ldexp(1.0, -1050) should be tiny, got: {f}"); } /// `math.ldexp(1.0, -1074)` is the smallest representable positive float (subnormal). #[test] fn ldexp_minimum_subnormal() { let result = run_expr("import math\nmath.ldexp(1.0, -1074)"); let f: f64 = (&result).try_into().unwrap(); // Compare bits directly since this is an exact IEEE 754 subnormal value assert_eq!( f.to_bits(), 5e-324_f64.to_bits(), "ldexp(1.0, -1074) should equal 5e-324" ); } // ========================== // isqrt Newton's method refinement // ========================== /// `math.isqrt` with values near i64::MAX where f64 sqrt loses precision, /// triggering the Newton's method refinement and overshoot correction. #[test] fn isqrt_large_values_newton_refinement() { // i64::MAX = 9223372036854775807 // isqrt(i64::MAX) = 3037000499 (3037000499^2 = 9223372030926249001 <= i64::MAX) let result = run_expr("import math\nmath.isqrt(9223372036854775807)"); let v: i64 = (&result).try_into().unwrap(); assert_eq!(v, 3_037_000_499); // 3037000499^2 = 9223372030926249001 (perfect square) let result = run_expr("import math\nmath.isqrt(9223372030926249001)"); let v: i64 = (&result).try_into().unwrap(); assert_eq!(v, 3_037_000_499); // 3037000499^2 - 1: the initial f64 estimate overshoots by 1, // triggering both the delta==0 break and the overshoot correction loop. let result = run_expr("import math\nmath.isqrt(9223372030926249000)"); let v: i64 = (&result).try_into().unwrap(); assert_eq!(v, 3_037_000_498); } ================================================ FILE: crates/monty/tests/name_lookup.rs ================================================ //! Tests for `NameLookup` — the mechanism by which the host resolves undefined names //! during iterative execution. //! //! When the VM encounters an undefined global (or unassigned local at module scope), //! it yields `RunProgress::NameLookup` so the host can provide a value or signal //! that the name is truly undefined. These tests exercise that API directly: //! //! - Resolving names to various types (functions, ints, strings, lists, booleans) //! - Returning `NameLookupResult::Undefined` to trigger `NameError` //! - Caching: a resolved name should not yield another `NameLookup` //! - Multiple distinct names each get their own lookup //! - Builtins bypass the `NameLookup` mechanism entirely use monty::{MontyObject, MontyRun, NameLookupResult, NoLimitTracker, PrintWriter, RunProgress}; /// Helper: drives execution through consecutive `NameLookup` yields, /// resolving each by calling `resolver(name)`. fn resolve_lookups_with( mut progress: RunProgress, resolver: impl Fn(&str) -> NameLookupResult, ) -> Result, monty::MontyException> { while let RunProgress::NameLookup(lookup) = progress { let result = resolver(&lookup.name); progress = lookup.resume(result, PrintWriter::Stdout)?; } Ok(progress) } /// Helper: resolves all `NameLookup` yields as `Function` objects (the common case /// for external function calls). fn resolve_as_functions( progress: RunProgress, ) -> Result, monty::MontyException> { resolve_lookups_with(progress, |name| { NameLookupResult::Value(MontyObject::Function { name: name.to_string(), docstring: None, }) }) } // --------------------------------------------------------------------------- // Resolving to different types // --------------------------------------------------------------------------- /// NameLookup resolved as a Function → code can call it and use the result. #[test] fn resolve_as_function_and_call() { let runner = MontyRun::new("x = ext(10); x + 1".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); // Resolve NameLookup for 'ext' as a function let progress = resolve_as_functions(progress).unwrap(); // Should now be at a FunctionCall for ext(10) let call = progress.into_function_call().expect("expected FunctionCall"); assert_eq!(call.function_name, "ext"); assert_eq!(call.args, vec![MontyObject::Int(10)]); // Resume with 42 → code evaluates 42 + 1 = 43 let result = call.resume(MontyObject::Int(42), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(43)); } /// NameLookup resolved as an integer constant — no function call involved. #[test] fn resolve_as_int() { let runner = MontyRun::new("PI + 1".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "PI"); let result = lookup.resume(MontyObject::Int(3), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(4)); } /// NameLookup resolved as a string value. #[test] fn resolve_as_string() { let runner = MontyRun::new("GREETING + '!'".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "GREETING"); let result = lookup .resume(MontyObject::String("hello".to_string()), PrintWriter::Stdout) .unwrap(); assert_eq!( result.into_complete().unwrap(), MontyObject::String("hello!".to_string()) ); } /// NameLookup resolved as a boolean. #[test] fn resolve_as_bool() { let runner = MontyRun::new("not FLAG".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "FLAG"); let result = lookup.resume(MontyObject::Bool(true), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Bool(false)); } /// NameLookup resolved as a list. #[test] fn resolve_as_list() { let runner = MontyRun::new("len(ITEMS)".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "ITEMS"); let items = MontyObject::List(vec![MontyObject::Int(10), MontyObject::Int(20), MontyObject::Int(30)]); let result = lookup.resume(items, PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(3)); } /// NameLookup resolved as a float. #[test] fn resolve_as_float() { let runner = MontyRun::new("TAU + 0.5".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "TAU"); let result = lookup.resume(MontyObject::Float(6.0), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Float(6.5)); } // --------------------------------------------------------------------------- // Undefined → NameError // --------------------------------------------------------------------------- /// Returning `NameLookupResult::Undefined` causes `NameError` at global scope. #[test] fn undefined_raises_name_error() { let runner = MontyRun::new("unknown_thing".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "unknown_thing"); let err = lookup .resume(NameLookupResult::Undefined, PrintWriter::Stdout) .unwrap_err(); let msg = err.to_string(); assert!( msg.contains("NameError: name 'unknown_thing' is not defined"), "Expected NameError, got: {msg}" ); } /// In non-iterative mode (`run_no_limits`), undefined globals automatically raise `NameError` /// without yielding to the host. #[test] fn standard_mode_raises_name_error() { let runner = MontyRun::new("unknown_fn(42)".to_owned(), "test.py", vec![]).unwrap(); let err = runner.run_no_limits(vec![]).unwrap_err(); let msg = err.to_string(); assert!( msg.contains("NameError: name 'unknown_fn' is not defined"), "Expected NameError, got: {msg}" ); } /// Undefined inside a function that does NOT assign the name locally should /// still raise `NameError` (not `UnboundLocalError`), since the name lookup /// falls through to the global scope. #[test] fn undefined_in_function_raises_name_error() { // `missing` is not assigned inside `f()`, so Python treats it as a global lookup let code = "def f():\n return missing\nf()".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "missing"); let err = lookup .resume(NameLookupResult::Undefined, PrintWriter::Stdout) .unwrap_err(); let msg = err.to_string(); assert!( msg.contains("NameError: name 'missing' is not defined"), "Expected NameError, got: {msg}" ); } // --------------------------------------------------------------------------- // Caching // --------------------------------------------------------------------------- /// Function calls in call context bypass `NameLookup` entirely — they go /// directly to `FunctionCall` via `LoadGlobalCallable` + `ExtFunction`. #[test] fn resolved_name_is_cached() { let code = "a = ext(1); b = ext(2); a + b".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let mut progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let mut call_count = 0; loop { match progress { RunProgress::FunctionCall(call) => { assert_eq!(call.function_name, "ext"); call_count += 1; let val: i64 = (&call.args[0]).try_into().unwrap(); progress = call.resume(MontyObject::Int(val * 10), PrintWriter::Stdout).unwrap(); } RunProgress::Complete(result) => { // ext(1) -> 10, ext(2) -> 20 → 30 assert_eq!(result, MontyObject::Int(30)); break; } other => panic!("unexpected progress: {other:?}"), } } assert_eq!(call_count, 2, "should get FunctionCall for each ext() call"); } /// A non-function constant resolved once is also cached. #[test] fn resolved_constant_is_cached() { // Use the same constant twice — should only yield one NameLookup let code = "X + X".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let mut progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let mut lookup_count = 0; loop { match progress { RunProgress::NameLookup(lookup) => { assert_eq!(lookup.name, "X"); lookup_count += 1; progress = lookup.resume(MontyObject::Int(21), PrintWriter::Stdout).unwrap(); } RunProgress::Complete(result) => { assert_eq!(result, MontyObject::Int(42)); break; } other => panic!("unexpected progress: {other:?}"), } } assert_eq!(lookup_count, 1, "constant should be cached after first lookup"); } // --------------------------------------------------------------------------- // Multiple names // --------------------------------------------------------------------------- /// Different undefined names in call context each yield `FunctionCall` directly /// (via `LoadGlobalCallable`), not `NameLookup`. #[test] fn multiple_names_each_looked_up() { let code = "a = foo(1); b = bar(2); a + b".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let mut progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let mut called_names = Vec::new(); loop { match progress { RunProgress::FunctionCall(call) => { called_names.push(call.function_name.clone()); let val: i64 = (&call.args[0]).try_into().unwrap(); progress = call.resume(MontyObject::Int(val * 100), PrintWriter::Stdout).unwrap(); } RunProgress::Complete(result) => { // foo(1) -> 100, bar(2) -> 200 → 300 assert_eq!(result, MontyObject::Int(300)); break; } other => panic!("unexpected progress: {other:?}"), } } assert_eq!(called_names, vec!["foo", "bar"]); } /// Mix of function calls and constant name lookups in the same execution. /// `ext` in call context goes directly to `FunctionCall` (no `NameLookup`). /// `OFFSET` in non-call context yields `NameLookup`. #[test] fn mixed_function_and_constant_lookups() { let code = "ext(OFFSET)".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let mut progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); let mut looked_up_names = Vec::new(); loop { match progress { RunProgress::NameLookup(lookup) => { let name = lookup.name.clone(); looked_up_names.push(name.clone()); let value = match name.as_str() { "OFFSET" => MontyObject::Int(100), _ => panic!("unexpected name lookup: {name}"), }; progress = lookup.resume(value, PrintWriter::Stdout).unwrap(); } RunProgress::FunctionCall(call) => { // ext goes directly to FunctionCall via LoadGlobalCallable assert_eq!(call.function_name, "ext"); assert_eq!(call.args, vec![MontyObject::Int(100)]); progress = call.resume(MontyObject::Int(999), PrintWriter::Stdout).unwrap(); } RunProgress::Complete(result) => { assert_eq!(result, MontyObject::Int(999)); break; } other => panic!("unexpected progress: {other:?}"), } } // Only 'OFFSET' yields NameLookup; 'ext' bypasses it via LoadGlobalCallable assert_eq!(looked_up_names, vec!["OFFSET"]); } // --------------------------------------------------------------------------- // Builtins bypass NameLookup // --------------------------------------------------------------------------- /// Known builtins like `len` and `range` do NOT trigger `NameLookup`. #[test] fn builtins_do_not_trigger_lookup() { let runner = MontyRun::new("len([1, 2, 3])".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); assert_eq!(progress.into_complete().unwrap(), MontyObject::Int(3)); } /// `range` is a builtin — should complete without any NameLookup. #[test] fn range_builtin_no_lookup() { let runner = MontyRun::new("list(range(3))".to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); assert_eq!( progress.into_complete().unwrap(), MontyObject::List(vec![MontyObject::Int(0), MontyObject::Int(1), MontyObject::Int(2)]) ); } // --------------------------------------------------------------------------- // Function passed as input (no NameLookup) // --------------------------------------------------------------------------- /// A function passed as an input is already in the namespace — calling it should /// yield a `FunctionCall` directly without any `NameLookup`. #[test] fn input_function_no_lookup() { let runner = MontyRun::new("my_fn(10)".to_owned(), "test.py", vec!["my_fn".to_string()]).unwrap(); let progress = runner .start( vec![MontyObject::Function { name: "my_fn".to_string(), docstring: None, }], NoLimitTracker, PrintWriter::Stdout, ) .unwrap(); // Should go straight to FunctionCall — no NameLookup let call = progress .into_function_call() .expect("expected FunctionCall, not NameLookup"); assert_eq!(call.function_name, "my_fn"); assert_eq!(call.args, vec![MontyObject::Int(10)]); let result = call.resume(MontyObject::Int(99), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(99)); } /// A function input assigned to a new variable and called via the alias should /// still yield a `FunctionCall` without any `NameLookup`. #[test] fn input_function_reassigned_then_called() { let runner = MontyRun::new( "alias = my_fn; alias(5)".to_owned(), "test.py", vec!["my_fn".to_string()], ) .unwrap(); let progress = runner .start( vec![MontyObject::Function { name: "my_fn".to_string(), docstring: None, }], NoLimitTracker, PrintWriter::Stdout, ) .unwrap(); // No NameLookup — my_fn is an input, alias is a local assignment let call = progress .into_function_call() .expect("expected FunctionCall, not NameLookup"); assert_eq!(call.function_name, "my_fn"); assert_eq!(call.args, vec![MontyObject::Int(5)]); let result = call.resume(MontyObject::Int(50), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(50)); } /// A function input used alongside a name-looked-up constant: the function should /// not trigger NameLookup but the constant should. #[test] fn input_function_with_looked_up_arg() { let runner = MontyRun::new("my_fn(OFFSET)".to_owned(), "test.py", vec!["my_fn".to_string()]).unwrap(); let mut progress = runner .start( vec![MontyObject::Function { name: "my_fn".to_string(), docstring: None, }], NoLimitTracker, PrintWriter::Stdout, ) .unwrap(); // OFFSET is undefined — should yield NameLookup (my_fn should NOT) let lookup = match progress { RunProgress::NameLookup(l) => l, other => panic!("expected NameLookup for 'OFFSET', got {other:?}"), }; assert_eq!(lookup.name, "OFFSET"); progress = lookup.resume(MontyObject::Int(42), PrintWriter::Stdout).unwrap(); // Now should be at FunctionCall for my_fn(42) let call = progress.into_function_call().expect("expected FunctionCall"); assert_eq!(call.function_name, "my_fn"); assert_eq!(call.args, vec![MontyObject::Int(42)]); let result = call.resume(MontyObject::Int(100), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(100)); } /// When a NameLookup resolves to a Function whose name differs from the variable /// name (i.e., the function's `__name__` is not interned), the VM stores it as /// `HeapData::ExtFunction(String)`. Calling it should yield a `FunctionCall` with /// the function's actual name, not the variable name. #[test] fn resolve_function_with_non_interned_name() { // `x = foobar` triggers NameLookup for 'foobar', we resolve it as a function // named 'not_foobar'. Then `x()` calls the function. let code = "x = foobar; x()".to_owned(); let runner = MontyRun::new(code, "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); // First: NameLookup for 'foobar' let lookup = progress.into_name_lookup().unwrap(); assert_eq!(lookup.name, "foobar"); // Resolve with a function whose name is NOT 'foobar' — it won't be interned let progress = lookup .resume( NameLookupResult::Value(MontyObject::Function { name: "not_foobar".to_string(), docstring: None, }), PrintWriter::Stdout, ) .unwrap(); // The VM calls x() which is HeapData::ExtFunction("not_foobar") → FunctionCall let call = progress .into_function_call() .expect("expected FunctionCall for 'not_foobar'"); assert_eq!(call.function_name, "not_foobar"); assert!(call.args.is_empty()); assert!(call.kwargs.is_empty()); // Resume with a return value let result = call.resume(MontyObject::Int(42), PrintWriter::Stdout).unwrap(); assert_eq!(result.into_complete().unwrap(), MontyObject::Int(42)); } ================================================ FILE: crates/monty/tests/os_tests.rs ================================================ //! Tests for OS function calls. //! //! Verifies that Path filesystem methods and os module functions yield //! `RunProgress::OsCall` with the correct `OsFunction` variant and arguments, //! and that return values are correctly used by Python code. use monty::{MontyObject, MontyRun, NoLimitTracker, OsFunction, PrintWriter, RunProgress, file_stat}; /// Helper to run code and extract the OsCall progress. /// /// Runs the provided Python code and asserts that it yields an `OsCall`. /// Returns the `OsFunction` and positional arguments from the call. /// The state is resumed with a mock result to properly clean up ref counts. fn run_to_oscall(code: &str) -> (OsFunction, Vec) { let runner = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); match progress { RunProgress::OsCall(call) => { // Resume with a mock result appropriate for the function type. let mock_result = match call.function { OsFunction::Exists | OsFunction::IsFile | OsFunction::IsDir | OsFunction::IsSymlink => { MontyObject::Bool(true) } OsFunction::ReadText | OsFunction::Resolve | OsFunction::Absolute => { MontyObject::String("mock".to_owned()) } OsFunction::ReadBytes => MontyObject::Bytes(vec![]), OsFunction::Stat => MontyObject::None, OsFunction::Iterdir => MontyObject::List(vec![]), OsFunction::WriteText | OsFunction::WriteBytes | OsFunction::Mkdir | OsFunction::Unlink | OsFunction::Rmdir | OsFunction::Rename => MontyObject::None, OsFunction::Getenv => MontyObject::String("mock_env_value".to_owned()), OsFunction::GetEnviron => MontyObject::Dict(vec![].into()), }; let function = call.function; let args = call.args.clone(); let _ = call.resume(mock_result, PrintWriter::Stdout); (function, args) } _ => panic!("expected OsCall, got {progress:?}"), } } /// Helper to run code, provide an OS call result, and get the final value. fn run_oscall_with_result(code: &str, mock_result: MontyObject) -> (OsFunction, Vec, MontyObject) { let runner = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let progress = runner.start(vec![], NoLimitTracker, PrintWriter::Stdout).unwrap(); match progress { RunProgress::OsCall(call) => { let function = call.function; let args = call.args.clone(); let resumed = call.resume(mock_result, PrintWriter::Stdout).unwrap(); let final_result = resumed.into_complete().expect("expected Complete after resume"); (function, args, final_result) } _ => panic!("expected OsCall, got {progress:?}"), } } // ============================================================================= // Verify each OsFunction variant yields correctly // ============================================================================= #[test] fn path_exists() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/test.txt').exists()"); assert_eq!(func, OsFunction::Exists); assert_eq!(args, vec![MontyObject::Path("/tmp/test.txt".to_owned())]); } #[test] fn path_is_file() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/test.txt').is_file()"); assert_eq!(func, OsFunction::IsFile); assert_eq!(args, vec![MontyObject::Path("/tmp/test.txt".to_owned())]); } #[test] fn path_is_dir() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp').is_dir()"); assert_eq!(func, OsFunction::IsDir); assert_eq!(args, vec![MontyObject::Path("/tmp".to_owned())]); } #[test] fn path_is_symlink() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/link').is_symlink()"); assert_eq!(func, OsFunction::IsSymlink); assert_eq!(args, vec![MontyObject::Path("/tmp/link".to_owned())]); } #[test] fn path_read_text() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/file.txt').read_text()"); assert_eq!(func, OsFunction::ReadText); assert_eq!(args, vec![MontyObject::Path("/tmp/file.txt".to_owned())]); } #[test] fn path_read_bytes() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/file.bin').read_bytes()"); assert_eq!(func, OsFunction::ReadBytes); assert_eq!(args, vec![MontyObject::Path("/tmp/file.bin".to_owned())]); } #[test] fn path_stat() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp/file.txt').stat()"); assert_eq!(func, OsFunction::Stat); assert_eq!(args, vec![MontyObject::Path("/tmp/file.txt".to_owned())]); } #[test] fn path_iterdir() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/tmp').iterdir()"); assert_eq!(func, OsFunction::Iterdir); assert_eq!(args, vec![MontyObject::Path("/tmp".to_owned())]); } #[test] fn path_resolve() { let (func, args) = run_to_oscall("from pathlib import Path; Path('./relative').resolve()"); assert_eq!(func, OsFunction::Resolve); assert_eq!(args, vec![MontyObject::Path("./relative".to_owned())]); } #[test] fn path_absolute() { let (func, args) = run_to_oscall("from pathlib import Path; Path('./relative').absolute()"); assert_eq!(func, OsFunction::Absolute); assert_eq!(args, vec![MontyObject::Path("./relative".to_owned())]); } // ============================================================================= // Path argument handling (spaces, unicode, concatenation) // ============================================================================= #[test] fn path_with_spaces() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/path/with spaces/file.txt').exists()"); assert_eq!(func, OsFunction::Exists); assert_eq!(args[0], MontyObject::Path("/path/with spaces/file.txt".to_owned())); } #[test] fn path_with_unicode() { let (func, args) = run_to_oscall("from pathlib import Path; Path('/путь/文件.txt').exists()"); assert_eq!(func, OsFunction::Exists); assert_eq!(args[0], MontyObject::Path("/путь/文件.txt".to_owned())); } #[test] fn path_concatenation_yields_correct_path() { let (func, args) = run_to_oscall( r" from pathlib import Path base = Path('/home') full = base / 'user' / 'file.txt' full.exists() ", ); assert_eq!(func, OsFunction::Exists); assert_eq!(args[0], MontyObject::Path("/home/user/file.txt".to_owned())); } // ============================================================================= // Round-trip tests: OS call result used by Python code // ============================================================================= #[test] fn exists_result_used_in_conditional() { let code = r" from pathlib import Path 'found' if Path('/tmp/test.txt').exists() else 'missing' "; let (func, _, result) = run_oscall_with_result(code, MontyObject::Bool(true)); assert_eq!(func, OsFunction::Exists); assert_eq!(result, MontyObject::String("found".to_owned())); // Also test false case let (_, _, result) = run_oscall_with_result(code, MontyObject::Bool(false)); assert_eq!(result, MontyObject::String("missing".to_owned())); } #[test] fn read_text_result_concatenated() { let code = r" from pathlib import Path 'Content: ' + Path('/tmp/hello.txt').read_text() "; let (func, _, result) = run_oscall_with_result(code, MontyObject::String("Hello!".to_owned())); assert_eq!(func, OsFunction::ReadText); assert_eq!(result, MontyObject::String("Content: Hello!".to_owned())); } #[test] fn read_bytes_result_used() { let code = r" from pathlib import Path data = Path('/tmp/file.bin').read_bytes() data[0] "; let (func, _, result) = run_oscall_with_result(code, MontyObject::Bytes(vec![0x42, 0x43, 0x44])); assert_eq!(func, OsFunction::ReadBytes); assert_eq!(result, MontyObject::Int(0x42)); } #[test] fn iterdir_result_iterated() { let code = r" from pathlib import Path entries = Path('/tmp').iterdir() len(entries) "; // Return a list of path strings (simulating directory entries) let mock_entries = MontyObject::List(vec![ MontyObject::String("/tmp/file1.txt".to_owned()), MontyObject::String("/tmp/file2.txt".to_owned()), MontyObject::String("/tmp/subdir".to_owned()), ]); let (func, args, result) = run_oscall_with_result(code, mock_entries); assert_eq!(func, OsFunction::Iterdir); assert_eq!(args[0], MontyObject::Path("/tmp".to_owned())); assert_eq!(result, MontyObject::Int(3)); } #[test] fn iterdir_result_indexed() { let code = r" from pathlib import Path entries = Path('/home/user').iterdir() entries[0] "; let mock_entries = MontyObject::List(vec![ MontyObject::String("/home/user/documents".to_owned()), MontyObject::String("/home/user/downloads".to_owned()), ]); let (func, args, result) = run_oscall_with_result(code, mock_entries); assert_eq!(func, OsFunction::Iterdir); assert_eq!(args[0], MontyObject::Path("/home/user".to_owned())); assert_eq!(result, MontyObject::String("/home/user/documents".to_owned())); } #[test] fn stat_result_st_size() { let code = r" from pathlib import Path info = Path('/tmp/file.txt').stat() info.st_size "; let (func, args, result) = run_oscall_with_result(code, file_stat(0o644, 1024, 0.0)); assert_eq!(func, OsFunction::Stat); assert_eq!(args[0], MontyObject::Path("/tmp/file.txt".to_owned())); assert_eq!(result, MontyObject::Int(1024)); } #[test] fn stat_result_st_mode() { let code = r" from pathlib import Path info = Path('/tmp/file.txt').stat() info.st_mode "; // 0o755 = rwxr-xr-x (file_stat adds 0o100_000 for regular file type) let (func, args, result) = run_oscall_with_result(code, file_stat(0o755, 0, 0.0)); assert_eq!(func, OsFunction::Stat); assert_eq!(args[0], MontyObject::Path("/tmp/file.txt".to_owned())); assert_eq!(result, MontyObject::Int(0o100_755)); } #[test] fn stat_result_multiple_fields() { let code = r" from pathlib import Path info = Path('/var/log/syslog').stat() (info.st_size, info.st_mode) "; // 0o644 = rw-r--r-- (file_stat adds 0o100_000 for regular file type) let (func, args, result) = run_oscall_with_result(code, file_stat(0o644, 4096, 0.0)); assert_eq!(func, OsFunction::Stat); assert_eq!(args[0], MontyObject::Path("/var/log/syslog".to_owned())); assert_eq!( result, MontyObject::Tuple(vec![MontyObject::Int(4096), MontyObject::Int(0o100_644)]) ); } #[test] fn stat_result_index_access() { // stat_result also supports index access like a tuple let code = r" from pathlib import Path info = Path('/tmp/file.txt').stat() info[6] # st_size is at index 6 "; let (func, args, result) = run_oscall_with_result(code, file_stat(0o644, 2048, 0.0)); assert_eq!(func, OsFunction::Stat); assert_eq!(args[0], MontyObject::Path("/tmp/file.txt".to_owned())); assert_eq!(result, MontyObject::Int(2048)); } // ============================================================================= // os.getenv tests // ============================================================================= #[test] fn os_getenv_yields_oscall() { let code = r" import os os.getenv('PATH') "; let (func, args) = run_to_oscall(code); assert_eq!(func, OsFunction::Getenv); // First arg is key, second is default (None if not provided) assert_eq!(args[0], MontyObject::String("PATH".to_owned())); assert_eq!(args[1], MontyObject::None); } #[test] fn os_getenv_with_default() { let code = r" import os os.getenv('MISSING', 'fallback') "; let (func, args) = run_to_oscall(code); assert_eq!(func, OsFunction::Getenv); assert_eq!(args[0], MontyObject::String("MISSING".to_owned())); assert_eq!(args[1], MontyObject::String("fallback".to_owned())); } #[test] fn os_getenv_result_used() { let code = r" import os 'HOME=' + os.getenv('HOME') "; let (func, _, result) = run_oscall_with_result(code, MontyObject::String("/home/user".to_owned())); assert_eq!(func, OsFunction::Getenv); assert_eq!(result, MontyObject::String("HOME=/home/user".to_owned())); } // ============================================================================= // os.environ tests // ============================================================================= #[test] fn os_environ_yields_oscall() { let code = r" import os os.environ "; let (func, args) = run_to_oscall(code); assert_eq!(func, OsFunction::GetEnviron); // GetEnviron takes no arguments assert!(args.is_empty(), "expected empty args, got {args:?}"); } #[test] fn os_environ_result_is_dict() { let code = r" import os type(os.environ).__name__ "; let mock_env = MontyObject::Dict( vec![ ( MontyObject::String("HOME".to_owned()), MontyObject::String("/home/user".to_owned()), ), ( MontyObject::String("PATH".to_owned()), MontyObject::String("/usr/bin".to_owned()), ), ] .into(), ); let (func, _, result) = run_oscall_with_result(code, mock_env); assert_eq!(func, OsFunction::GetEnviron); assert_eq!(result, MontyObject::String("dict".to_owned())); } #[test] fn os_environ_key_access() { let code = r" import os os.environ['HOME'] "; let mock_env = MontyObject::Dict( vec![( MontyObject::String("HOME".to_owned()), MontyObject::String("/home/user".to_owned()), )] .into(), ); let (func, _, result) = run_oscall_with_result(code, mock_env); assert_eq!(func, OsFunction::GetEnviron); assert_eq!(result, MontyObject::String("/home/user".to_owned())); } #[test] fn os_environ_get_method() { let code = r" import os os.environ.get('MISSING', 'default') "; let mock_env = MontyObject::Dict(vec![].into()); let (func, _, result) = run_oscall_with_result(code, mock_env); assert_eq!(func, OsFunction::GetEnviron); assert_eq!(result, MontyObject::String("default".to_owned())); } #[test] fn os_environ_len() { let code = r" import os len(os.environ) "; let mock_env = MontyObject::Dict( vec![ (MontyObject::String("A".to_owned()), MontyObject::String("1".to_owned())), (MontyObject::String("B".to_owned()), MontyObject::String("2".to_owned())), (MontyObject::String("C".to_owned()), MontyObject::String("3".to_owned())), ] .into(), ); let (func, _, result) = run_oscall_with_result(code, mock_env); assert_eq!(func, OsFunction::GetEnviron); assert_eq!(result, MontyObject::Int(3)); } #[test] fn os_environ_in_check() { let code = r" import os 'HOME' in os.environ "; let mock_env = MontyObject::Dict( vec![( MontyObject::String("HOME".to_owned()), MontyObject::String("/home/user".to_owned()), )] .into(), ); let (func, _, result) = run_oscall_with_result(code, mock_env); assert_eq!(func, OsFunction::GetEnviron); assert_eq!(result, MontyObject::Bool(true)); } ================================================ FILE: crates/monty/tests/parse_errors.rs ================================================ use std::fmt::Write; use monty::{ExcType, MontyException, MontyRun}; /// Helper to extract the exception type from a parse error. fn get_exc_type(result: Result) -> ExcType { let err = result.expect_err("expected parse error"); err.exc_type() } #[test] fn complex_numbers_return_not_implemented_error() { let result = MontyRun::new("1 + 2j".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); } #[test] fn complex_numbers_have_descriptive_message() { let result = MontyRun::new("1 + 2j".to_owned(), "test.py", vec![]); let exc = result.expect_err("expected parse error"); assert!( exc.message().is_some_and(|m| m.contains("complex")), "message should mention 'complex', got: {exc}" ); } #[test] fn yield_expressions_return_not_implemented_error() { // Yield expressions are not supported and fail at parse time let result = MontyRun::new("def foo():\n yield 1".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); let result = MontyRun::new("def foo():\n yield 1".to_owned(), "test.py", vec![]); let exc = result.expect_err("expected parse error"); assert!( exc.message().is_some_and(|m| m.contains("yield")), "message should mention 'yield', got: {exc}" ); } #[test] fn classes_return_not_implemented_error() { let result = MontyRun::new("class Foo: pass".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); } #[test] fn unknown_imports_compile_successfully_error_deferred_to_runtime() { // Unknown modules (not sys, typing, os, etc.) compile successfully. // The ModuleNotFoundError is deferred to runtime, allowing TYPE_CHECKING // imports to work without causing compile-time errors. let result = MontyRun::new("import foobar".to_owned(), "test.py", vec![]); assert!(result.is_ok(), "unknown import should compile successfully"); } #[test] fn with_statement_returns_not_implemented_error() { let result = MontyRun::new("with open('f') as f: pass".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); } #[test] fn error_display_format() { // Verify the Display format matches Python's exception output with traceback let result = MontyRun::new("1 + 2j".to_owned(), "test.py", vec![]); let err = result.expect_err("expected parse error"); let display = err.to_string(); // Should start with traceback header assert!( display.starts_with("Traceback (most recent call last):"), "display should start with 'Traceback': got: {display}" ); // Should contain the file/line info assert!( display.contains("File \"test.py\", line 1"), "display should contain file location, got: {display}" ); // Should end with NotImplementedError message assert!( display.contains("NotImplementedError:"), "display should contain 'NotImplementedError:', got: {display}" ); assert!( display.contains("monty syntax parser"), "display should mention 'monty syntax parser', got: {display}" ); } /// Tests that syntax errors return `SyntaxError` exceptions. #[test] fn invalid_fstring_format_spec_returns_syntax_error() { let result = MontyRun::new("f'{1:10xyz}'".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn invalid_fstring_format_spec_str_returns_syntax_error() { let result = MontyRun::new("f'{\"hello\":abc}'".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn syntax_error_display_format() { let result = MontyRun::new("f'{1:10xyz}'".to_owned(), "test.py", vec![]); let err = result.expect_err("expected parse error"); let display = err.to_string(); assert!( display.contains("SyntaxError:"), "display should contain 'SyntaxError:', got: {display}" ); } #[test] fn deeply_nested_tuples_exceed_limit() { // Build nested tuple like ((((x,),),),) with depth > 200 let mut code = "x".to_string(); for _ in 0..250 { code = format!("({code},)"); } let result = MontyRun::new(code, "test.py", vec![]); let err = result.expect_err("expected parse error"); assert_eq!(err.exc_type(), ExcType::SyntaxError); assert_eq!( err.message(), Some("too many nested parentheses"), "error message should match CPython, got: {:?}", err.message() ); } #[test] fn nested_tuples_within_limit_succeed() { // Build nested tuple with depth = 20, which is well under the 200 limit. // We use a small value because the ruff parser uses significant stack // space per nesting level in debug builds. let mut code = "x".to_string(); for _ in 0..20 { code = format!("({code},)"); } let result = MontyRun::new(code, "test.py", vec![]); assert!(result.is_ok(), "nesting within limit should succeed"); } #[test] fn deeply_nested_unpack_assignment_exceeds_limit() { // Build nested unpack assignment like ((((x,),),),) = value with depth > 200 let mut target = "x".to_string(); for _ in 0..250 { target = format!("({target},)"); } let code = format!("{target} = (1,)"); let result = MontyRun::new(code, "test.py", vec![]); let err = result.expect_err("expected parse error"); assert_eq!(err.exc_type(), ExcType::SyntaxError); assert_eq!( err.message(), Some("too many nested parentheses"), "error message should match CPython, got: {:?}", err.message() ); } #[test] fn deeply_nested_lists_exceed_limit() { // Build nested list like [[[[[x]]]]] let mut code = "x".to_string(); for _ in 0..250 { code = format!("[{code}]"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_dicts_exceed_limit() { // Build nested dict like {'a': {'a': {'a': ...}}} let mut code = "1".to_string(); for _ in 0..250 { code = format!("{{'a': {code}}}"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_function_calls_exceed_limit() { // Build nested calls like f(f(f(f(x)))) let mut code = "x".to_string(); for _ in 0..250 { code = format!("f({code})"); } let code = format!("def f(x): return x\n{code}"); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_binary_ops_exceed_limit() { // Build nested binary ops like ((((x + 1) + 1) + 1) + 1) let mut code = "x".to_string(); for _ in 0..250 { code = format!("({code} + 1)"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_ternary_if_exceed_limit() { // Build nested ternary like (1 if (1 if (1 if ... else 0) else 0) else 0) let mut code = "x".to_string(); for _ in 0..250 { code = format!("(1 if {code} else 0)"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_subscripts_exceed_limit() { // Build nested subscripts like a[b[c[d[...]]]] let mut code = "0".to_string(); for _ in 0..250 { code = format!("a[{code}]"); } let code = format!("a = [1]\n{code}"); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_list_comprehension_exceed_limit() { // Build nested list comprehension like [x for x in [y for y in [...]]] let mut code = "[1]".to_string(); for _ in 0..250 { code = format!("[x for x in {code}]"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_if_statements_exceed_limit() { // Build nested if statements let mut code = "x = 1\n".to_string(); for i in 0..250 { let indent = " ".repeat(i); writeln!(code, "{indent}if 1:").unwrap(); } write!(code, "{}pass", " ".repeat(250)).unwrap(); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_while_loops_exceed_limit() { // Build nested while loops let mut code = String::new(); for i in 0..250 { let indent = " ".repeat(i); writeln!(code, "{indent}while True:").unwrap(); } write!(code, "{}break", " ".repeat(250)).unwrap(); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_for_loops_exceed_limit() { // Build nested for loops let mut code = String::new(); for i in 0..250 { let indent = " ".repeat(i); writeln!(code, "{indent}for x in [1]:").unwrap(); } write!(code, "{}pass", " ".repeat(250)).unwrap(); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_try_except_exceed_limit() { // Build nested try/except blocks let mut code = String::new(); for i in 0..250 { let indent = " ".repeat(i); writeln!(code, "{indent}try:").unwrap(); } writeln!(code, "{}pass", " ".repeat(250)).unwrap(); for i in (0..250).rev() { let indent = " ".repeat(i); writeln!(code, "{indent}except: pass").unwrap(); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_function_defs_exceed_limit() { // Build nested function definitions let mut code = String::new(); for i in 0..250 { let indent = " ".repeat(i); writeln!(code, "{indent}def f():").unwrap(); } write!(code, "{}pass", " ".repeat(250)).unwrap(); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_attribute_access_exceed_limit() { // Build chained attribute access like a.b.c.d.e... let mut code = "a".to_string(); for _ in 0..250 { code.push_str(".x"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_lambdas_exceed_limit() { // Build nested lambdas like (lambda: (lambda: (lambda: ... x))) let mut code = "x".to_string(); for _ in 0..250 { code = format!("(lambda: {code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_unary_not_exceed_limit() { // Build nested not operators like not (not (not ... True)) let mut code = "True".to_string(); for _ in 0..250 { code = format!("not ({code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_unary_minus_exceed_limit() { // Build nested unary minus like -(-(-... 1)) let mut code = "1".to_string(); for _ in 0..250 { code = format!("-({code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_walrus_operator_exceed_limit() { // Build nested walrus operators like (a := (b := (c := ... 1))) let mut code = "1".to_string(); for i in 0..250 { code = format!("(x{i} := {code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_await_exceed_limit() { // Build nested await like await (await (await ... x)) // We need this in an async function context let mut code = "x".to_string(); for _ in 0..250 { code = format!("await ({code})"); } let code = format!("async def f():\n {code}"); let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_boolean_and_exceed_limit() { // Build nested boolean and like (True and (True and (True and ...))) let mut code = "True".to_string(); for _ in 0..250 { code = format!("(True and {code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn deeply_nested_boolean_or_exceed_limit() { // Build nested boolean or like (False or (False or (False or ...))) let mut code = "True".to_string(); for _ in 0..250 { code = format!("(False or {code})"); } let result = MontyRun::new(code, "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } // === Runtime NotImplementedError tests === // These test that unimplemented features return proper errors instead of panicking. /// Helper to run code and get the exception type from a runtime error. fn run_and_get_exc_type(code: &str) -> ExcType { let runner = MontyRun::new(code.to_owned(), "test.py", vec![]).expect("should parse"); let err = runner.run_no_limits(vec![]).expect_err("expected runtime error"); err.exc_type() } #[test] fn matrix_multiplication_returns_not_implemented_error() { // The @ operator (matrix multiplication) is not supported at runtime assert_eq!(run_and_get_exc_type("1 @ 2"), ExcType::NotImplementedError); } #[test] fn matrix_multiplication_augmented_assignment_returns_syntax_error() { // The @= operator (augmented matrix multiplication) is not supported at compile time let result = MontyRun::new("a = 1\na @= 2".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::SyntaxError); } #[test] fn matrix_multiplication_augmented_assignment_has_descriptive_message() { // Verify the error message is helpful let result = MontyRun::new("a = 1\na @= 2".to_owned(), "test.py", vec![]); let exc = result.expect_err("expected compile error"); assert!( exc.message().is_some_and(|m| m.contains("@=")), "message should mention '@=', got: {:?}", exc.message() ); } #[test] fn del_statement_returns_not_implemented_error() { // The del statement is not supported at parse time let result = MontyRun::new("x = 1\ndel x".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); } ================================================ FILE: crates/monty/tests/print_writer.rs ================================================ use monty::{MontyRun, NoLimitTracker, PrintWriter}; #[test] fn print_single_string() { let ex = MontyRun::new("print('hello')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "hello\n"); } #[test] fn print_multiple_args() { let ex = MontyRun::new("print('hello', 'world')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "hello world\n"); } #[test] fn print_multiple_statements() { let ex = MontyRun::new( "print('one')\nprint('two')\nprint('three')".to_owned(), "test.py", vec![], ) .unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "one\ntwo\nthree\n"); } #[test] fn print_empty() { let ex = MontyRun::new("print()".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "\n"); } #[test] fn print_integers() { let ex = MontyRun::new("print(1, 2, 3)".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "1 2 3\n"); } #[test] fn print_mixed_types() { let ex = MontyRun::new("print('count:', 42, True)".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "count: 42 True\n"); } #[test] fn print_in_function() { let code = " def greet(name): print('Hello', name) greet('Alice') greet('Bob') "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "Hello Alice\nHello Bob\n"); } #[test] fn print_in_loop() { let code = " for i in range(3): print(i) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "0\n1\n2\n"); } #[test] fn collect_output_accessible_after_run() { let ex = MontyRun::new("print('test')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "test\n"); } #[test] fn writer_reuse_accumulates() { let mut output = String::new(); let ex1 = MontyRun::new("print('first')".to_owned(), "test.py", vec![]).unwrap(); ex1.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); let ex2 = MontyRun::new("print('second')".to_owned(), "test.py", vec![]).unwrap(); ex2.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "first\nsecond\n"); } #[test] fn disabled_suppresses_output() { let code = " for i in range(100): print('this should be suppressed', i) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Should complete without error, output is silently discarded let result = ex.run(vec![], NoLimitTracker, PrintWriter::Disabled); assert!(result.is_ok()); } // === print() kwargs tests === #[test] fn print_custom_sep() { let ex = MontyRun::new("print('a', 'b', 'c', sep='-')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "a-b-c\n"); } #[test] fn print_custom_end() { let ex = MontyRun::new("print('hello', end='!')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "hello!"); } #[test] fn print_custom_sep_and_end() { let ex = MontyRun::new( "print('x', 'y', 'z', sep=', ', end='\\n---\\n')".to_owned(), "test.py", vec![], ) .unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "x, y, z\n---\n"); } #[test] fn print_empty_sep() { let ex = MontyRun::new("print('a', 'b', 'c', sep='')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "abc\n"); } #[test] fn print_empty_end() { let code = "print('first', end='')\nprint('second')"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "firstsecond\n"); } #[test] fn print_sep_none() { // sep=None should use default space let ex = MontyRun::new("print('a', 'b', sep=None)".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); // In Python, sep=None means use default, but we treat it as empty string for simplicity // This matches: print('a', 'b', sep=None) outputs "ab\n" with our impl assert_eq!(output, "a b\n"); } #[test] fn print_end_none() { // end=None should use empty string (our interpretation) let ex = MontyRun::new("print('hello', end=None)".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "hello\n"); } #[test] fn print_flush_ignored() { // flush=True should be accepted but ignored let ex = MontyRun::new("print('test', flush=True)".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "test\n"); } #[test] fn print_kwargs_dict() { // Use a dict literal instead of dict() since dict builtin is not implemented let ex = MontyRun::new("print('a', 'b', **{'sep': '-'})".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "a-b\n"); } #[test] fn print_only_kwargs_no_args() { let ex = MontyRun::new("print(sep='-', end='!')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "!"); } #[test] fn print_multiline_sep() { let ex = MontyRun::new("print(1, 2, 3, sep='\\n')".to_owned(), "test.py", vec![]).unwrap(); let mut output = String::new(); ex.run(vec![], NoLimitTracker, PrintWriter::Collect(&mut output)) .unwrap(); assert_eq!(output, "1\n2\n3\n"); } ================================================ FILE: crates/monty/tests/py_object.rs ================================================ use monty::MontyObject; /// Tests for `MontyObject::is_truthy()` - Python's truth value testing rules. #[test] fn is_truthy_none_is_falsy() { assert!(!MontyObject::None.is_truthy()); } #[test] fn is_truthy_ellipsis_is_truthy() { assert!(MontyObject::Ellipsis.is_truthy()); } #[test] fn is_truthy_false_is_falsy() { assert!(!MontyObject::Bool(false).is_truthy()); } #[test] fn is_truthy_true_is_truthy() { assert!(MontyObject::Bool(true).is_truthy()); } #[test] fn is_truthy_zero_int_is_falsy() { assert!(!MontyObject::Int(0).is_truthy()); } #[test] fn is_truthy_nonzero_int_is_truthy() { assert!(MontyObject::Int(1).is_truthy()); assert!(MontyObject::Int(-1).is_truthy()); assert!(MontyObject::Int(42).is_truthy()); } #[test] fn is_truthy_zero_float_is_falsy() { assert!(!MontyObject::Float(0.0).is_truthy()); } #[test] fn is_truthy_nonzero_float_is_truthy() { assert!(MontyObject::Float(1.0).is_truthy()); assert!(MontyObject::Float(-0.5).is_truthy()); assert!(MontyObject::Float(f64::INFINITY).is_truthy()); } #[test] fn is_truthy_empty_string_is_falsy() { assert!(!MontyObject::String(String::new()).is_truthy()); } #[test] fn is_truthy_nonempty_string_is_truthy() { assert!(MontyObject::String("hello".to_string()).is_truthy()); assert!(MontyObject::String(" ".to_string()).is_truthy()); } #[test] fn is_truthy_empty_bytes_is_falsy() { assert!(!MontyObject::Bytes(vec![]).is_truthy()); } #[test] fn is_truthy_nonempty_bytes_is_truthy() { assert!(MontyObject::Bytes(vec![0]).is_truthy()); assert!(MontyObject::Bytes(vec![1, 2, 3]).is_truthy()); } #[test] fn is_truthy_empty_list_is_falsy() { assert!(!MontyObject::List(vec![]).is_truthy()); } #[test] fn is_truthy_nonempty_list_is_truthy() { assert!(MontyObject::List(vec![MontyObject::Int(1)]).is_truthy()); } #[test] fn is_truthy_empty_tuple_is_falsy() { assert!(!MontyObject::Tuple(vec![]).is_truthy()); } #[test] fn is_truthy_nonempty_tuple_is_truthy() { assert!(MontyObject::Tuple(vec![MontyObject::Int(1)]).is_truthy()); } #[test] fn is_truthy_empty_dict_is_falsy() { assert!(!MontyObject::dict(vec![]).is_truthy()); } #[test] fn is_truthy_nonempty_dict_is_truthy() { let dict = vec![(MontyObject::String("key".to_string()), MontyObject::Int(1))]; assert!(MontyObject::dict(dict).is_truthy()); } /// Tests for `MontyObject::type_name()` - Python type names. #[test] fn type_name() { assert_eq!(MontyObject::None.type_name(), "NoneType"); assert_eq!(MontyObject::Ellipsis.type_name(), "ellipsis"); assert_eq!(MontyObject::Bool(true).type_name(), "bool"); assert_eq!(MontyObject::Bool(false).type_name(), "bool"); assert_eq!(MontyObject::Int(0).type_name(), "int"); assert_eq!(MontyObject::Int(42).type_name(), "int"); assert_eq!(MontyObject::Float(0.0).type_name(), "float"); assert_eq!(MontyObject::Float(2.5).type_name(), "float"); assert_eq!(MontyObject::String(String::new()).type_name(), "str"); assert_eq!(MontyObject::String("hello".to_string()).type_name(), "str"); assert_eq!(MontyObject::Bytes(vec![]).type_name(), "bytes"); assert_eq!(MontyObject::Bytes(vec![1, 2, 3]).type_name(), "bytes"); assert_eq!(MontyObject::List(vec![]).type_name(), "list"); assert_eq!(MontyObject::Tuple(vec![]).type_name(), "tuple"); assert_eq!(MontyObject::dict(vec![]).type_name(), "dict"); } ================================================ FILE: crates/monty/tests/regex.rs ================================================ /// Tests for regex-specific behavior that differs from CPython. /// /// These tests verify Monty-specific regex behavior that cannot be tested via /// the datatest runner (which runs tests against both CPython and Monty). /// In particular, `fancy_regex` enforces a backtrack limit that CPython lacks, /// so pathological patterns raise `PatternError` in Monty instead of hanging. /// /// CPython's regex engine uses backtracking with no step limit. Pathological /// patterns (e.g. `((a+)\2)+b` against 50+ 'a's) cause exponential-time hangs /// that grow unboundedly — a denial-of-service vector. Monty uses `fancy_regex` /// which enforces a default 1M-step backtrack limit, raising `re.PatternError` /// when exceeded. This is strictly better behavior for a sandbox. use monty::MontyRun; /// Helper to run Python code and return the string result. fn run(code: &str) -> String { let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let s: String = result.as_ref().try_into().unwrap(); s } /// Verify that `fancy_regex`'s backtrack limit prevents ReDoS. /// /// CPython's regex engine has no backtrack limit, so pathological patterns with /// backreferences cause exponential-time hangs (e.g. `((a+)\2)+b` against 40 'a's /// takes ~0.17s on CPython and doubles with each additional character, making it /// completely unusable at ~50+ characters and a denial-of-service vector). /// /// Monty uses `fancy_regex` which enforces a default 1M-step backtrack limit. /// Patterns that exceed this limit raise `re.PatternError` instead of hanging, /// making the sandbox safe against ReDoS attacks via backreference-based patterns. /// /// Note: `fancy_regex` delegates simple patterns (no backreferences or lookaround) /// to the `regex` crate's DFA engine, which guarantees linear-time matching. /// The backtrack limit only applies to patterns that require the backtracking engine. #[test] fn backtrack_limit_prevents_redos() { // Pattern with backreference forces the backtracking engine. // ((a+)\2)+b tries to match repeated groups of a's where each group // is followed by its own backreference, then a 'b' that never appears. // This creates exponential backtracking paths. let result = run(r" import re try: re.search(r'((a+)\2)+b', 'a' * 40 + 'c') result = 'no error' except re.PatternError as e: result = str(e) result "); assert_eq!( result, "Error executing regex: Max limit for backtracking count exceeded" ); } /// Verify that the backtrack limit also applies to compiled patterns. #[test] fn backtrack_limit_on_compiled_pattern() { let result = run(r" import re p = re.compile(r'((a+)\2)+b') try: p.search('a' * 40 + 'c') result = 'no error' except re.PatternError as e: result = str(e) result "); assert_eq!( result, "Error executing regex: Max limit for backtracking count exceeded" ); } /// Verify that non-fancy patterns (no backreferences/lookaround) are delegated /// to the DFA engine and don't hit the backtrack limit even with large inputs. #[test] fn dfa_engine_handles_large_inputs() { // (a+)+b is pathological for backtracking engines but fancy_regex delegates // it to the regex crate's DFA engine since it has no fancy features. let result = run(r" import re m = re.search(r'(a+)+b', 'a' * 10000 + 'c') assert m is None, 'no match expected' 'ok' "); assert_eq!(result, "ok"); } ================================================ FILE: crates/monty/tests/repl.rs ================================================ //! Tests for stateful REPL execution with no replay. //! //! The REPL session keeps heap/global namespace state between snippets and executes //! only the newly fed snippet each time. use monty::{ ExtFunctionResult, MontyException, MontyObject, MontyRepl, NoLimitTracker, PrintWriter, ReplContinuationMode, ReplProgress, ReplStartError, ResourceTracker, detect_repl_continuation_mode, }; #[test] fn repl_executes_only_new_code() { let mut repl = MontyRepl::new("repl.py", NoLimitTracker); let init_output = feed_run_print(&mut repl, "counter = 0").unwrap(); assert_eq!(init_output, MontyObject::None); // Execute a snippet that mutates state. let output = feed_run_print(&mut repl, "counter = counter + 1").unwrap(); assert_eq!(output, MontyObject::None); // Feed only the read expression. If replay happened, we'd get 2 instead of 1. let output = feed_run_print(&mut repl, "counter").unwrap(); assert_eq!(output, MontyObject::Int(1)); } fn feed_run_print(repl: &mut MontyRepl, code: &str) -> Result { repl.feed_run(code, vec![], PrintWriter::Stdout) } fn init_repl(code: &str) -> (MontyRepl, MontyObject) { let mut repl = MontyRepl::new("repl.py", NoLimitTracker); let output = feed_run_print(&mut repl, code).unwrap(); (repl, output) } #[test] fn repl_persists_state_and_definitions() { let (mut repl, _) = init_repl("x = 10"); feed_run_print(&mut repl, "def add(v):\n return x + v").unwrap(); feed_run_print(&mut repl, "x = 20").unwrap(); let output = feed_run_print(&mut repl, "add(22)").unwrap(); assert_eq!(output, MontyObject::Int(42)); } #[test] fn repl_function_redefinition_uses_latest_definition() { let (mut repl, init_output) = init_repl(""); assert_eq!(init_output, MontyObject::None); feed_run_print(&mut repl, "def f():\n return 1").unwrap(); assert_eq!(feed_run_print(&mut repl, "f()").unwrap(), MontyObject::Int(1)); feed_run_print(&mut repl, "def f():\n return 2").unwrap(); assert_eq!(feed_run_print(&mut repl, "f()").unwrap(), MontyObject::Int(2)); } #[test] fn repl_nested_function_redefinition_updates_callers() { let (mut repl, init_output) = init_repl(""); assert_eq!(init_output, MontyObject::None); feed_run_print(&mut repl, "def g():\n return 10").unwrap(); feed_run_print(&mut repl, "def f():\n return g() + 1").unwrap(); assert_eq!(feed_run_print(&mut repl, "f()").unwrap(), MontyObject::Int(11)); feed_run_print(&mut repl, "def g():\n return 41").unwrap(); assert_eq!(feed_run_print(&mut repl, "f()").unwrap(), MontyObject::Int(42)); } #[test] fn repl_runtime_error_keeps_partial_state_consistent() { let (mut repl, init_output) = init_repl(""); assert_eq!(init_output, MontyObject::None); let result = feed_run_print(&mut repl, "def f():\n return 41\nx = 1\nraise RuntimeError('boom')"); assert!(result.is_err(), "snippet should raise RuntimeError"); // Definitions and assignments that happened before the exception should remain valid. assert_eq!(feed_run_print(&mut repl, "f()").unwrap(), MontyObject::Int(41)); assert_eq!(feed_run_print(&mut repl, "x").unwrap(), MontyObject::Int(1)); } #[test] fn repl_heap_mutations_are_not_replayed() { let (mut repl, _) = init_repl("items = []"); feed_run_print(&mut repl, "items.append(1)").unwrap(); assert_eq!( feed_run_print(&mut repl, "items").unwrap(), MontyObject::List(vec![MontyObject::Int(1)]) ); feed_run_print(&mut repl, "items.append(2)").unwrap(); assert_eq!( feed_run_print(&mut repl, "items").unwrap(), MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)]) ); } #[test] fn repl_detects_continuation_mode_for_common_cases() { assert_eq!( detect_repl_continuation_mode("value = 1\n"), ReplContinuationMode::Complete ); assert_eq!( detect_repl_continuation_mode("if True:\n"), ReplContinuationMode::IncompleteBlock ); assert_eq!( detect_repl_continuation_mode("[1,\n"), ReplContinuationMode::IncompleteImplicit ); } #[test] fn repl_tracebacks_use_incrementing_python_input_filenames() { let (mut repl, init_output) = init_repl(""); assert_eq!(init_output, MontyObject::None); let first = feed_run_print(&mut repl, "missing_name").unwrap_err(); let second = feed_run_print(&mut repl, "missing_name").unwrap_err(); assert_eq!(first.traceback().len(), 1); assert_eq!(second.traceback().len(), 1); assert_eq!(first.traceback()[0].filename, ""); assert_eq!(second.traceback()[0].filename, ""); } #[test] fn repl_dump_load_survives_between_snippets() { let (mut repl, _) = init_repl("total = 1"); feed_run_print(&mut repl, "total = total + 1").unwrap(); let bytes = repl.dump().unwrap(); let mut loaded: MontyRepl = MontyRepl::load(&bytes).unwrap(); feed_run_print(&mut loaded, "total = total * 21").unwrap(); let output = feed_run_print(&mut loaded, "total").unwrap(); assert_eq!(output, MontyObject::Int(42)); } #[test] fn repl_dump_load_preserves_heap_aliasing() { let (mut repl, _) = init_repl("a = []\nb = a"); feed_run_print(&mut repl, "a.append(1)").unwrap(); let bytes = repl.dump().unwrap(); let mut loaded: MontyRepl = MontyRepl::load(&bytes).unwrap(); feed_run_print(&mut loaded, "b.append(2)").unwrap(); assert_eq!( feed_run_print(&mut loaded, "a").unwrap(), MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)]) ); assert_eq!( feed_run_print(&mut loaded, "b").unwrap(), MontyObject::List(vec![MontyObject::Int(1), MontyObject::Int(2)]) ); } #[test] fn repl_start_external_call_resumes_to_updated_repl() { let (repl, init_output) = init_repl(""); assert_eq!(init_output, MontyObject::None); // With LoadGlobalCallable, function calls go directly to FunctionCall let progress = repl.feed_start("ext_fn(41) + 1", vec![], PrintWriter::Stdout).unwrap(); let call = progress.into_function_call().expect("expected function call"); assert_eq!(call.function_name, "ext_fn"); assert_eq!(call.args, vec![MontyObject::Int(41)]); let progress = call.resume(MontyObject::Int(41), PrintWriter::Stdout).unwrap(); let (mut repl, value) = progress.into_complete().expect("expected completion"); assert_eq!(value, MontyObject::Int(42)); assert_eq!(feed_run_print(&mut repl, "x = 5").unwrap(), MontyObject::None); assert_eq!(feed_run_print(&mut repl, "x").unwrap(), MontyObject::Int(5)); } #[test] fn repl_progress_dump_load_roundtrip() { let (repl, _) = init_repl(""); // With LoadGlobalCallable, ext_fn goes directly to FunctionCall let progress = repl.feed_start("ext_fn(20) + 22", vec![], PrintWriter::Stdout).unwrap(); let bytes = progress.dump().unwrap(); let loaded: ReplProgress = ReplProgress::load(&bytes).unwrap(); let call = loaded.into_function_call().expect("expected function call"); assert_eq!(call.args, vec![MontyObject::Int(20)]); let progress = call.resume(MontyObject::Int(20), PrintWriter::Stdout).unwrap(); let (mut repl, value) = progress.into_complete().expect("expected completion"); assert_eq!(value, MontyObject::Int(42)); assert_eq!(feed_run_print(&mut repl, "z = 1").unwrap(), MontyObject::None); assert_eq!(feed_run_print(&mut repl, "z").unwrap(), MontyObject::Int(1)); } #[test] fn repl_start_run_pending_resolve_futures_roundtrip() { let (mut repl, _) = init_repl(""); feed_run_print( &mut repl, r" async def main(): value = await foo() return value + 1 ", ) .unwrap(); let progress = repl.feed_start("await main()", vec![], PrintWriter::Stdout).unwrap(); // With LoadGlobalCallable, foo() goes directly to FunctionCall let call = progress.into_function_call().expect("expected function call"); let call_id = call.call_id; let progress = call.resume_pending(PrintWriter::Stdout).unwrap(); let bytes = progress.dump().unwrap(); let loaded: ReplProgress = ReplProgress::load(&bytes).unwrap(); let state = loaded.into_resolve_futures().expect("expected resolve futures"); assert_eq!(state.pending_call_ids(), &[call_id]); let progress = state .resume( vec![(call_id, ExtFunctionResult::Return(MontyObject::Int(41)))], PrintWriter::Stdout, ) .unwrap(); let (mut repl, value) = progress.into_complete().expect("expected completion"); assert_eq!(value, MontyObject::Int(42)); assert_eq!( feed_run_print(&mut repl, "final_value = 42").unwrap(), MontyObject::None ); assert_eq!(feed_run_print(&mut repl, "final_value").unwrap(), MontyObject::Int(42)); } #[test] fn repl_start_runtime_error_preserves_repl_state() { // Simulate an agent loop: create variables, then a later snippet raises. // The REPL must survive so subsequent snippets can access prior variables. let (repl, _) = init_repl("x = 10"); // Snippet that sets a new variable then raises — returned via ReplStartError. let err = repl .feed_start("y = 20\nraise ValueError('boom')", vec![], PrintWriter::Stdout) .expect_err("expected ReplStartError"); let ReplStartError { mut repl, error } = *err; assert_eq!(error.exc_type(), monty::ExcType::ValueError); assert_eq!(error.message(), Some("boom")); // Variables from BEFORE the error snippet survive. assert_eq!(feed_run_print(&mut repl, "x").unwrap(), MontyObject::Int(10)); // Variable assigned BEFORE the raise within the erroring snippet also survives. assert_eq!(feed_run_print(&mut repl, "y").unwrap(), MontyObject::Int(20)); // New snippets continue to work normally. assert_eq!(feed_run_print(&mut repl, "x + y + 12").unwrap(), MontyObject::Int(42)); } #[test] fn repl_start_runtime_error_during_external_call_preserves_repl_state() { // An external function returns an error, which should come back as ReplStartError // with the REPL session preserved. let (repl, _) = init_repl("z = 99"); let progress = repl.feed_start("ext_fn(1)", vec![], PrintWriter::Stdout).unwrap(); let call = progress.into_function_call().expect("expected function call"); // Resume with an exception from the external function. let exc = monty::MontyException::new(monty::ExcType::RuntimeError, Some("ext failed".to_string())); let err = call .resume(ExtFunctionResult::Error(exc), PrintWriter::Stdout) .expect_err("expected ReplStartError"); let ReplStartError { mut repl, error } = *err; assert_eq!(error.exc_type(), monty::ExcType::RuntimeError); // Variable from before the error is still accessible. assert_eq!(feed_run_print(&mut repl, "z").unwrap(), MontyObject::Int(99)); } #[test] fn repl_dataclass_method_call_yields_function_call_with_method_flag() { // Create a REPL with a dataclass input and call a method on it. // This exercises the MethodCall path in repl.rs handle_repl_vm_result. let point = MontyObject::Dataclass { name: "Point".to_string(), type_id: 0, field_names: vec!["x".to_string(), "y".to_string()], attrs: vec![ (MontyObject::String("x".to_string()), MontyObject::Int(1)), (MontyObject::String("y".to_string()), MontyObject::Int(2)), ] .into(), frozen: true, }; let repl = MontyRepl::new("repl.py", NoLimitTracker); // Calling point.sum() should yield a FunctionCall with method_call=true. // Pass the dataclass as an input to feed_start() so it gets a namespace slot. let progress = repl .feed_start("point.sum()", vec![("point".to_string(), point)], PrintWriter::Stdout) .unwrap(); let call = progress.into_function_call().expect("expected method call"); assert_eq!(call.function_name, "sum"); assert!(call.method_call, "should be a method call"); // First arg should be the dataclass instance (self) assert!(matches!(&call.args[0], MontyObject::Dataclass { name, .. } if name == "Point")); // Resume with a return value (sum of x + y = 3) let progress = call.resume(MontyObject::Int(3), PrintWriter::Stdout).unwrap(); let (mut repl, value) = progress.into_complete().expect("expected completion"); assert_eq!(value, MontyObject::Int(3)); // Verify REPL state is preserved after method call assert_eq!(feed_run_print(&mut repl, "1 + 1").unwrap(), MontyObject::Int(2)); } #[test] fn repl_start_new_external_function_in_later_block() { // Verify that an external function never referenced in prior blocks can be // called for the first time in a later REPL snippet. let (mut repl, _) = init_repl("x = 10"); feed_run_print(&mut repl, "y = x + 5").unwrap(); // Now call a brand-new external function that was never mentioned before. let progress = repl.feed_start("new_ext(y)", vec![], PrintWriter::Stdout).unwrap(); let call = progress.into_function_call().expect("expected function call"); assert_eq!(call.function_name, "new_ext"); assert_eq!(call.args, vec![MontyObject::Int(15)]); let progress = call.resume(MontyObject::Int(100), PrintWriter::Stdout).unwrap(); let (mut repl, value) = progress.into_complete().expect("expected completion"); assert_eq!(value, MontyObject::Int(100)); // REPL state from before the external call is still intact. assert_eq!(feed_run_print(&mut repl, "x").unwrap(), MontyObject::Int(10)); assert_eq!(feed_run_print(&mut repl, "y").unwrap(), MontyObject::Int(15)); } ================================================ FILE: crates/monty/tests/resource_limits.rs ================================================ /// Tests for resource limits and garbage collection. /// /// These tests verify that the `ResourceTracker` system correctly enforces /// allocation limits, time limits, and triggers garbage collection. use std::time::{Duration, Instant}; use monty::{ ExcType, LimitedTracker, MontyObject, MontyRun, NameLookupResult, PrintWriter, ResourceLimits, RunProgress, }; /// Resolves consecutive `NameLookup` yields by providing a `Function` object for each name. /// /// External functions are no longer declared upfront. Instead, the VM yields `NameLookup` /// when it encounters an unresolved name. This helper resolves all such lookups until /// a different progress variant is reached. fn resolve_name_lookups( mut progress: RunProgress, ) -> Result, monty::MontyException> { while let RunProgress::NameLookup(lookup) = progress { let name = lookup.name.clone(); progress = lookup.resume( NameLookupResult::Value(MontyObject::Function { name, docstring: None }), PrintWriter::Stdout, )?; } Ok(progress) } /// Test that GC properly collects dict cycles via the has_refs() check in allocate(). /// /// This test creates cycles using dict literals and dict setitem. Dict setitem /// does NOT call mark_potential_cycle(), so the ONLY way may_have_cycles gets /// set is through the has_refs() check when allocating a dict with refs. /// /// If has_refs() is disabled, this test will FAIL because GC never runs. #[test] #[cfg(feature = "ref-count-return")] fn gc_collects_dict_cycles_via_has_refs() { // Create 200,001 dict cycles. Each iteration: // - Creates empty dict d1 // - Creates dict d2 = {'ref': d1} - d2 is allocated WITH a ref to d1 // This triggers has_refs() which sets may_have_cycles = true // - Sets d1['ref'] = d2 - creates cycle d1 <-> d2 // Dict setitem does NOT call mark_potential_cycle() // - On next iteration, both dicts are reassigned, making the cycle unreachable // // GC runs every 100,000 allocations. With 200,001 iterations: // - GC runs at 100k (collects cycles 0-49,999 approximately) // - GC runs at 200k (collects more cycles) // After GC runs, only the final cycle should remain. let code = r" # Create many dict cycles for i in range(200001): d1 = {} d2 = {'ref': d1} # d2 allocated WITH ref - has_refs() must trigger here d1['ref'] = d2 # Cycle formed - dict setitem does NOT call mark_potential_cycle # Create final result (not a cycle) result = 'done' result "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let output = ex.run_ref_counts(vec![]).expect("should succeed"); // GC_INTERVAL is 100,000. With 200,001 iterations creating dict cycles, // GC must have run at least once, resetting allocations_since_gc. // If may_have_cycles was never set (has_refs() disabled), GC never runs // and allocations_since_gc would be ~400k (2 dicts per iteration). assert!( output.allocations_since_gc < 100_000, "GC should have run (has_refs() must set may_have_cycles): allocations_since_gc = {}", output.allocations_since_gc ); // Verify that GC collected most cycles. // If GC failed to collect cycles, heap_count would be >> 400k. // We allow a small number of extra objects for implementation details. assert!( output.heap_count < 20, "GC should collect most unreachable dict cycles: {} heap objects (expected < 20)", output.heap_count ); } /// Test that GC properly collects self-referencing list cycles. /// /// This test creates cycles using list.append(), which calls mark_potential_cycle(). /// This tests the mutation-based cycle detection path. #[test] #[cfg(feature = "ref-count-return")] fn gc_collects_list_cycles() { // Create 200,001 self-referencing list cycles. Each iteration: // - Creates empty list `a` // - Appends `a` to itself (creating a self-reference cycle) // This calls mark_potential_cycle() and sets may_have_cycles = true // - On next iteration, `a` is reassigned, making the cycle unreachable // // GC runs every 100,000 allocations. With 200,001 iterations: // - GC runs at 100k (collects cycles 0-99,999) // - GC runs at 200k (collects cycles 100k-199,999) // After GC runs, only the final cycle should remain. let code = r" # Create many self-referencing list cycles for i in range(200001): a = [] a.append(a) # Creates cycle via list.append() which calls mark_potential_cycle() # Create final result (not a cycle) result = [1, 2, 3] len(result) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let output = ex.run_ref_counts(vec![]).expect("should succeed"); // GC_INTERVAL is 100,000. With 200,001 iterations creating list cycles, // GC must have run at least twice, resetting allocations_since_gc. assert!( output.allocations_since_gc < 100_000, "GC should have run: allocations_since_gc = {}", output.allocations_since_gc ); // Verify that GC collected most cycles. // If GC failed to collect cycles, heap_count would be >> 200k. assert!( output.heap_count < 20, "GC should collect most unreachable list cycles: {} heap objects (expected < 20)", output.heap_count ); // Verify expected ref counts // `a` is the last self-referencing list (refcount 2: variable + self-reference) // `result` is a simple list (refcount 1: just the variable) assert_eq!( output.counts.get("a"), Some(&2), "self-referencing list should have refcount 2" ); assert_eq!( output.counts.get("result"), Some(&1), "result list should have refcount 1" ); } /// Test that allocation limits return an error. #[test] fn allocation_limit_exceeded() { // Use multi-character strings to ensure heap allocation (single ASCII chars are interned) let code = r" result = [] for i in range(100, 115): result.append(str(i)) result "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_allocations(4); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); // Should fail due to allocation limit assert!(result.is_err(), "should exceed allocation limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("allocation limit exceeded")), "expected allocation limit error, got: {exc}" ); } #[test] fn allocation_limit_not_exceeded() { // Single-digit strings are interned (no allocation), so this uses minimal heap let code = r" result = [] for i in range(9): result.append(str(i)) result "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Allocations: list (1) + range (1) + iterator (1) = 3 // Note: str(0)...str(8) are single ASCII chars, so they use pre-interned strings let limits = ResourceLimits::new().max_allocations(5); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); // Should succeed assert!(result.is_ok(), "should not exceed allocation limit"); } #[test] fn time_limit_exceeded() { // Create a long-running loop using for + range (while isn't implemented yet) // Use a very large range to ensure it runs long enough to hit the time limit let code = r" x = 0 for i in range(100000000): x = x + 1 x "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a short time limit let limits = ResourceLimits::new().max_duration(Duration::from_millis(50)); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); // Should fail due to time limit assert!(result.is_err(), "should exceed time limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::TimeoutError); assert!( exc.message().is_some_and(|m| m.contains("time limit exceeded")), "expected time limit error, got: {exc}" ); } #[test] fn time_limit_not_exceeded() { // Simple code that runs quickly let code = "x = 1 + 2\nx"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a generous time limit let limits = ResourceLimits::new().max_duration(Duration::from_secs(5)); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); // Should succeed assert!(result.is_ok(), "should not exceed time limit"); } /// Test that memory limits return an error. #[test] fn memory_limit_exceeded() { // Create code that builds up memory using lists // Each iteration creates a new list that gets appended let code = r" result = [] for i in range(100): result.append([1, 2, 3, 4, 5]) result "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a very low memory limit (100 bytes) to trigger on nested list allocation let limits = ResourceLimits::new().max_memory(100); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); // Should fail due to memory limit assert!(result.is_err(), "should exceed memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } #[test] fn combined_limits() { // Test multiple limits together let code = "x = 1 + 2\nx"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new() .max_allocations(1000) .max_duration(Duration::from_secs(5)) .max_memory(1024 * 1024); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "should succeed with generous limits"); } #[test] fn run_without_limits_succeeds() { // Verify that run() still works (no limits) let code = r" result = [] for i in range(100): result.append(str(i)) len(result) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Standard run should succeed let result = ex.run_no_limits(vec![]); assert!(result.is_ok(), "standard run should succeed"); } #[test] fn gc_interval_triggers_collection() { // This test verifies that GC can run without crashing // We can't easily verify that GC actually collected anything without // adding more introspection, but we can verify it runs let code = r" result = [] for i in range(100): temp = [1, 2, 3] result.append(i) len(result) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set GC to run every 10 allocations let limits = ResourceLimits::new().gc_interval(10); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "should succeed with GC enabled"); } #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn executor_iter_resource_limit_on_resume() { // Test that resource limits are enforced across function calls // First function call succeeds, but resumed execution exceeds limit // f-string to create multi-char strings (not interned) let code = "foo(1)\nx = []\nfor i in range(10):\n x.append(f'x{i}')\nlen(x)"; let run = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // First function call should succeed with generous limit let limits = ResourceLimits::new().max_allocations(5); let progress = run .start(vec![], LimitedTracker::new(limits), PrintWriter::Stdout) .unwrap(); let call = resolve_name_lookups(progress) .unwrap() .into_function_call() .expect("function call"); assert_eq!(call.function_name, "foo"); assert_eq!(call.args, vec![MontyObject::Int(1)]); // Resume - should fail due to allocation limit during the for loop let result = call.resume(MontyObject::None, PrintWriter::Stdout); assert!(result.is_err(), "should exceed allocation limit on resume"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("allocation limit exceeded")), "expected allocation limit error, got: {exc}" ); } #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn executor_iter_resource_limit_before_function_call() { // Test that resource limits are enforced before first function call // f-string to create multi-char strings (not interned) let code = "x = []\nfor i in range(10):\n x.append(f'x{i}')\nfoo(len(x))\n42"; let run = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Should fail before reaching the function call let limits = ResourceLimits::new().max_allocations(3); let result = run.start(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "should exceed allocation limit before function call"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("allocation limit exceeded")), "expected allocation limit error, got: {exc}" ); } #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn char_f_string_not_allocated() { // Single character f-string interned not not allocated let code = "x = []\nfor i in range(10):\n x.append(f'{i}')"; let run = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_allocations(4); run.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout) .unwrap(); } #[test] fn executor_iter_resource_limit_multiple_function_calls() { // Test resource limits across multiple function calls let code = "foo(1)\nbar(2)\nbaz(3)\n4"; let run = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Very tight allocation limit - should still work for simple function calls let limits = ResourceLimits::new().max_allocations(100); let progress = run .start(vec![], LimitedTracker::new(limits), PrintWriter::Stdout) .unwrap(); let call = resolve_name_lookups(progress) .unwrap() .into_function_call() .expect("first call"); assert_eq!(call.function_name, "foo"); assert_eq!(call.args, vec![MontyObject::Int(1)]); let progress = call.resume(MontyObject::None, PrintWriter::Stdout).unwrap(); let call = resolve_name_lookups(progress) .unwrap() .into_function_call() .expect("second call"); assert_eq!(call.function_name, "bar"); assert_eq!(call.args, vec![MontyObject::Int(2)]); let progress = call.resume(MontyObject::None, PrintWriter::Stdout).unwrap(); let call = resolve_name_lookups(progress) .unwrap() .into_function_call() .expect("third call"); assert_eq!(call.function_name, "baz"); assert_eq!(call.args, vec![MontyObject::Int(3)]); let result = call .resume(MontyObject::None, PrintWriter::Stdout) .unwrap() .into_complete() .expect("complete"); assert_eq!(result, MontyObject::Int(4)); } /// Test that deep recursion triggers memory limit due to namespace tracking. /// /// Function call namespaces (local variables) are tracked by ResourceTracker. /// Each recursive call creates a new namespace, which should count against /// the memory limit. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn recursion_respects_memory_limit() { // Recursive function that creates stack frames with local variables let code = r" def recurse(n): x = 1 if n > 0: return recurse(n - 1) return 0 recurse(1000) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Very tight memory limit - should fail due to namespace memory // Each frame needs at least namespace_size * size_of::() bytes let limits = ResourceLimits::new().max_memory(1000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "should exceed memory limit from recursion"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that recursion depth limit returns an error. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn recursion_depth_limit_exceeded() { let code = r" def recurse(n): if n > 0: return recurse(n - 1) return 0 recurse(100) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set recursion limit to 10 let limits = ResourceLimits::new().max_recursion_depth(Some(10)); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "should exceed recursion depth limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::RecursionError); assert!( exc.message() .is_some_and(|m| m.contains("maximum recursion depth exceeded")), "expected recursion depth error, got: {exc}" ); } #[test] fn recursion_depth_limit_not_exceeded() { let code = r" def recurse(n): if n > 0: return recurse(n - 1) return 0 recurse(5) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set recursion limit to 10 - should succeed with 5 levels let limits = ResourceLimits::new().max_recursion_depth(Some(10)); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "should not exceed recursion depth limit"); } // === BigInt large result pre-check tests === // These tests verify that operations that would produce very large BigInt results // are rejected before the computation begins, preventing DoS attacks. /// Test that large pow operations are rejected by memory limits. #[test] fn bigint_pow_memory_limit() { // 2 ** 10_000_000 would produce ~1.25MB result let code = "2 ** 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a 1MB memory limit - should fail before computing let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large pow should exceed memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that pow with huge exponents is rejected even when the size estimate overflows u64. /// /// This catches a bug where `estimate_pow_bytes` returned `None` on u64 overflow, /// and the `if let Some(estimated)` pattern silently skipped the check. #[test] fn pow_overflowing_estimate_rejected() { // base ~63 bits, exp ~62 bits: estimated result bits = 63 * 3962939411543162624 overflows u64 let code = "-7234189268083315611 ** 3962939411543162624"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "pow with overflowing estimate should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that pow with a large base and moderate exponent is rejected by memory limits. /// /// `-7234408281351689115 ** 65327` has a 63-bit base, so the result is ~63*65327 ≈ 4M bits ≈ 514KB. /// With a 100KB memory limit the pre-check should reject this before computing. #[test] fn pow_large_base_moderate_exp_rejected() { let code = "-7234408281351689115 ** 65327"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large pow should exceed memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that the 4× safety multiplier for pow intermediate allocations catches /// cases where the final result fits but repeated-squaring intermediates don't. /// /// `2 ** 500000`: final result = 2 * 500000 bits = 125KB. Without multiplier this /// passes a 200KB limit. With 4× multiplier: 500KB > 200KB → rejected. #[test] fn pow_intermediate_allocation_multiplier() { let code = "2 ** 500000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // 200KB limit: final result (125KB) fits, but 4× estimate (500KB) exceeds it let limits = ResourceLimits::new().max_memory(200_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!( result.is_err(), "pow should be rejected due to intermediate allocation overhead" ); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); // 2 bits * 500000 = 125KB final, × 4 = 500072 bytes (includes base memory offset) assert_eq!( exc.message(), Some("memory limit exceeded: 500072 bytes > 200000 bytes") ); } /// Test that pow still succeeds when the 4× estimate is within the limit. /// /// `2 ** 100000`: final result = 2 * 100000 bits ≈ 25KB. With 4× multiplier: ~100KB. /// A 1MB limit should comfortably allow this. #[test] fn pow_within_limit_with_multiplier() { let code = "x = 2 ** 100000\nx > 0"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "pow with 4× estimate under limit should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test the exact fuzzer OOM pattern: right-associative chained exponentiation. /// /// `3 ** 3661666` is the first sub-expression of the fuzzer input /// `1666**3**366**3**3661666`. Since `**` is right-associative, `3**3661666` /// is computed first. Base 3 has 2 bits, so: 2 * 3661666 = 7323332 bits ≈ 915KB. /// With 4× multiplier: 3660KB > 1MB fuzz limit → rejected. #[test] fn pow_fuzzer_oom_chained_exponentiation() { // This is the subexpression that caused the fuzzer OOM let code = "3 ** 3661666"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // 1MB limit (matching the fuzzer's resource limit) let limits = ResourceLimits::new().max_memory(1_024 * 1_024); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!( result.is_err(), "fuzzer OOM pattern should be rejected by 4× multiplier" ); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); // 2 bits * 3661666 = 915KB final, × 4 = 3661740 bytes assert_eq!( exc.message(), Some("memory limit exceeded: 3661740 bytes > 1048576 bytes") ); } /// Test the full fuzzer input that originally caused OOM. /// /// The input `1666**3**366**3**3661666` should be rejected before any large /// intermediate allocation occurs. #[test] fn pow_fuzzer_oom_full_input() { let code = "1666**3**366**3**3661666"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(1_024 * 1_024); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "full fuzzer OOM input should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); // 3**3661666 is evaluated first (right-associative). Base 3 = 2 bits, // so estimate = 2 * 3661666 bits = 915KB. With 4× multiplier: 3661740 bytes > 1MB. assert_eq!( exc.message(), Some("memory limit exceeded: 3661740 bytes > 1048576 bytes") ); } /// Test that large left shift operations are rejected by memory limits. #[test] fn bigint_lshift_memory_limit() { // 1 << 10_000_000 would produce ~1.25MB result let code = "1 << 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a 1MB memory limit - should fail before computing let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large lshift should exceed memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that large multiplication operations are rejected by memory limits. #[test] fn bigint_mult_memory_limit() { // (2**4_000_000) * (2**4_000_000) would produce ~1MB result let code = "big = 2 ** 4000000\nbig * big"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a 1MB memory limit - should fail before computing the multiplication let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large mult should exceed memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that small BigInt operations succeed within memory limits. #[test] fn bigint_small_operations_within_limit() { // 2 ** 1000 produces ~125 bytes - well under limit let code = "x = 2 ** 1000\ny = 1 << 1000\nz = x * 2\nx > 0 and y > 0 and z > 0"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Set a 1MB memory limit - should succeed let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small BigInt operations should succeed within limit"); let val = result.unwrap(); assert_eq!(val, MontyObject::Bool(true)); } /// Test that edge cases (0, 1, -1) with huge exponents succeed even with limits. /// These produce constant-size results regardless of exponent. #[test] fn bigint_edge_cases_always_succeed() { // Test each edge case individually to minimize other allocations // These edge cases produce constant-size results regardless of exponent: // - 0 ** huge = 0 // - 1 ** huge = 1 // - (-1) ** huge = 1 or -1 // - 0 << huge = 0 // 1MB limit would reject 2**10000000 (~1.25MB) but allows edge cases let limits = ResourceLimits::new().max_memory(1_000_000); // 0 ** huge = 0 let code = "0 ** 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run(vec![], LimitedTracker::new(limits.clone()), PrintWriter::Stdout); assert!(result.is_ok(), "0 ** huge should succeed"); assert_eq!(result.unwrap(), MontyObject::Int(0)); // 1 ** huge = 1 let code = "1 ** 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run(vec![], LimitedTracker::new(limits.clone()), PrintWriter::Stdout); assert!(result.is_ok(), "1 ** huge should succeed"); assert_eq!(result.unwrap(), MontyObject::Int(1)); // (-1) ** huge_even = 1 let code = "(-1) ** 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run(vec![], LimitedTracker::new(limits.clone()), PrintWriter::Stdout); assert!(result.is_ok(), "(-1) ** huge_even should succeed"); assert_eq!(result.unwrap(), MontyObject::Int(1)); // (-1) ** huge_odd = -1 let code = "(-1) ** 10000001"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run(vec![], LimitedTracker::new(limits.clone()), PrintWriter::Stdout); assert!(result.is_ok(), "(-1) ** huge_odd should succeed"); assert_eq!(result.unwrap(), MontyObject::Int(-1)); // 0 << huge = 0 let code = "0 << 10000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "0 << huge should succeed"); assert_eq!(result.unwrap(), MontyObject::Int(0)); } /// Test that pow() builtin also respects memory limits. #[test] fn bigint_builtin_pow_memory_limit() { let code = "pow(2, 10000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(1_000_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "builtin pow should respect memory limit"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that large BigInt operations are rejected BEFORE allocation via check_large_result. /// /// The pre-allocation size check estimates result size and rejects operations that would /// exceed the memory limit before any memory is actually consumed. #[test] fn bigint_rejected_before_allocation() { // 2**1000000: base 2 has 2 bits, so estimate = 2 * 1000000 bits = 250KB // With 4× safety multiplier for intermediate allocations = 1000KB // Set limit to 100KB - the pre-check should reject before allocating let code = "2 ** 1000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "should be rejected before allocation"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert_eq!( exc.message(), Some("memory limit exceeded: 1000072 bytes > 100000 bytes") ); } // === String/Bytes large result pre-check tests === // These tests verify that string/bytes multiplication operations that would produce // very large results are rejected before the computation begins. /// Test that large string multiplication is rejected before allocation. #[test] fn string_mult_memory_limit() { // 'x' * 1000000 = 1MB string let code = "'x' * 1000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large string mult should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that large bytes multiplication is rejected before allocation. #[test] fn bytes_mult_memory_limit() { // b'x' * 1000000 = 1MB bytes let code = "b'x' * 1000000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large bytes mult should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that small string multiplication works within limits. #[test] fn string_mult_within_limit() { // 'abc' * 100 = 300 bytes, well within 100KB limit let code = "'abc' * 100 == 'abc' * 100"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small string mult should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that small bytes multiplication works within limits. #[test] fn bytes_mult_within_limit() { // b'abc' * 100 = 300 bytes, well within 100KB limit let code = "b'abc' * 100 == b'abc' * 100"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small bytes mult should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that string multiplication is rejected before allocation via check_large_result. #[test] fn string_mult_rejected_before_allocation() { // 'x' * 200000 = 200KB string // Set limit to 100KB - the pre-check should reject before allocating let code = "'x' * 200000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "should be rejected before allocation"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); // The exact size may include some overhead, but should be around 200KB assert!( exc.message() .is_some_and(|m| m.contains("memory limit exceeded") && m.contains("> 100000 bytes")), "expected memory limit error with ~200KB size, got: {:?}", exc.message() ); } /// Test that large list multiplication is rejected before allocation. #[test] fn list_mult_memory_limit() { // [1] * 10000 = 10,000 Values = ~160KB (at 16 bytes per Value) let code = "[1] * 10000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large list mult should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that large tuple multiplication is rejected before allocation. #[test] fn tuple_mult_memory_limit() { // (1,) * 10000 = 10,000 Values = ~160KB (at 16 bytes per Value) let code = "(1,) * 10000"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "large tuple mult should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that small list multiplication works within limits. #[test] fn list_mult_within_limit() { // [1, 2, 3] * 20 = 60 Values, well within 100KB limit let code = "[1, 2, 3] * 20 == [1, 2, 3] * 20"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small list mult should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that `int * bytes` (int on left) is also rejected by the pre-check. /// /// This catches a bug where interned bytes/strings bypassed the `mult_sequence` /// pre-check because `py_mult` handled `InternBytes * Int` inline without /// checking resource limits. #[test] fn int_times_bytes_memory_limit() { // int on left side: 1000000 * b'x' = 1MB let code = "1000000 * b'x'"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "int * bytes should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `int * str` (int on left) is also rejected by the pre-check. #[test] fn int_times_string_memory_limit() { // int on left side: 1000000 * 'x' = 1MB let code = "1000000 * 'x'"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); // 100KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "int * str should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `bigint * bytes` (LongInt on left) is rejected by the pre-check. #[test] fn longint_times_bytes_memory_limit() { // i64::MAX + 1 = 9223372036854775808, which is a LongInt but fits in usize on 64-bit. // Multiplied by 1-byte bytes literal, this would be ~9.2 exabytes. let code = "9223372036854775808 * b'x'"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bigint * bytes should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `bigint * str` (LongInt on left) is rejected by the pre-check. #[test] fn longint_times_string_memory_limit() { // i64::MAX + 1 = 9223372036854775808, which is a LongInt but fits in usize on 64-bit. let code = "9223372036854775808 * 'x'"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bigint * str should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that small tuple multiplication works within limits. #[test] fn tuple_mult_within_limit() { // (1, 2, 3) * 20 = 60 Values, well within 100KB limit let code = "(1, 2, 3) * 20 == (1, 2, 3) * 20"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small tuple mult should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } // === Timeout enforcement in builtin iteration loops === // These tests verify that `max_duration_secs` is enforced inside Rust-side loops // within builtin functions. Previously, builtins like sum(), sorted(), min(), max() // ran Rust loops entirely within a single bytecode instruction, bypassing the VM's // per-instruction timeout check. The fix adds `heap.check_time()` calls inside // `MontyIter::for_next()` and other non-iterator loops. /// Helper: runs code with a short time limit and asserts it produces a TimeoutError promptly. fn assert_timeout_in_builtin(code: &str, label: &str) { let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_duration(Duration::from_millis(100)); let start = std::time::Instant::now(); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); let elapsed = start.elapsed(); assert!(result.is_err(), "{label}: should exceed time limit"); let exc = result.unwrap_err(); assert_eq!( exc.exc_type(), ExcType::TimeoutError, "{label}: expected TimeoutError, got: {exc}" ); assert!( elapsed < Duration::from_secs(2), "{label}: should terminate promptly, took {elapsed:?}" ); } /// Test that `sum(range(huge))` respects the time limit. /// /// `sum()` iterates via `for_next()` which now calls `heap.check_time()`. #[test] fn timeout_in_sum_builtin() { assert_timeout_in_builtin("sum(range(10**18))", "sum(range(10**18))"); } /// Test that `list(range(huge))` respects the time limit. /// /// The `list()` constructor collects via `MontyIter::collect()` -> `for_next()`. #[test] fn timeout_in_list_constructor() { assert_timeout_in_builtin("list(range(10**18))", "list(range(10**18))"); } /// Test that `sorted(range(huge))` respects the time limit. /// /// `sorted()` first collects items via `for_next()`, then sorts. The collection /// phase alone should trigger the timeout for very large ranges. #[test] fn timeout_in_sorted_builtin() { assert_timeout_in_builtin("sorted(range(10**18))", "sorted(range(10**18))"); } /// Test that `min(range(huge))` respects the time limit. /// /// `min()` with a single iterable argument iterates via `for_next()`. #[test] fn timeout_in_min_builtin() { assert_timeout_in_builtin("min(range(10**18))", "min(range(10**18))"); } /// Test that `max(range(huge))` respects the time limit. /// /// `max()` with a single iterable argument iterates via `for_next()`. #[test] fn timeout_in_max_builtin() { assert_timeout_in_builtin("max(range(10**18))", "max(range(10**18))"); } /// Test that `all(range(huge))` respects the time limit. /// /// `all()` iterates via `for_next()` and only short-circuits on falsy values. /// `range(1, 10**18)` produces only truthy values so it keeps iterating. #[test] fn timeout_in_all_builtin() { assert_timeout_in_builtin("all(range(1, 10**18))", "all(range(1, 10**18))"); } /// Test that `enumerate(range(huge))` iteration respects the time limit. /// /// `enumerate()` creates tuples on each iteration via `for_next()`. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_any_builtin() { // range(0, 1) repeated via a for loop calling any on each chunk isn't ideal, // but we can test with a large range starting from 0 where only first element is falsy // Actually, any(range(10**18)) will return True immediately because range starts at 0 // which is falsy, but 1 is truthy. So any() returns True after checking 0, 1. // Instead, we need a different approach - just use the for_next timeout via enumerate. assert_timeout_in_builtin("list(enumerate(range(10**18)))", "enumerate(range(10**18))"); } /// Test that `tuple(range(huge))` respects the time limit. /// /// The `tuple()` constructor collects via `MontyIter::collect()` -> `for_next()`. #[test] fn timeout_in_tuple_constructor() { assert_timeout_in_builtin("tuple(range(10**18))", "tuple(range(10**18))"); } /// Test that `' '.join(...)` iteration respects the time limit. /// /// `str.join()` collects items from the iterable via `for_next()`. #[test] fn timeout_in_str_join() { assert_timeout_in_builtin("' '.join(str(i) for i in range(10**18))", "str.join with generator"); } /// Test that the insertion sort inner loop in `sorted()` respects the time limit. /// /// Uses reverse-sorted data to trigger worst-case O(n^2) insertion sort behavior. /// The sort comparison loop has an explicit `heap.check_time()` call. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_sorted_comparison_loop() { // Build a reverse-sorted list, then sort it. Insertion sort on reverse-sorted // data is O(n^2). let code = r" x = list(range(10**6, 0, -1)) sorted(x) "; assert_timeout_in_builtin(code, "sorted(reversed list)"); } /// Test that `[1] * 10_000_000` (list repetition) respects the time limit. /// /// The `mult_sequence()` copy loop now calls `heap.check_time()` on each /// repetition to prevent large sequence multiplications from bypassing timeout. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_list_repetition() { assert_timeout_in_builtin("[1, 2, 3] * 10_000_000", "list repetition"); } /// Test that `(1,) * 10_000_000` (tuple repetition) respects the time limit. /// /// Same as list repetition but for tuples — both paths in `mult_sequence()` /// now check the time limit. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_tuple_repetition() { assert_timeout_in_builtin("(1, 2, 3) * 10_000_000", "tuple repetition"); } /// Test that comparing two large equal lists respects the time limit. /// /// `List::py_eq()` iterates element-wise comparing pairs. With large equal lists, /// it must compare every element before returning True. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_list_equality() { let code = r" a = list(range(10_000_000)) b = list(range(10_000_000)) a == b "; assert_timeout_in_builtin(code, "list equality"); } /// Test that comparing two large equal dicts respects the time limit. /// /// `Dict::py_eq()` iterates all entries checking keys and values. With large equal /// dicts, it must check every entry before returning True. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_dict_equality() { let code = r" a = {i: i for i in range(10_000_000)} b = {i: i for i in range(10_000_000)} a == b "; assert_timeout_in_builtin(code, "dict equality"); } /// Test that `str.splitlines()` on a large string respects the time limit. /// /// `str_splitlines()` scans the entire string for line endings in a while loop /// that now calls `heap.check_time()` on each iteration. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_str_splitlines() { let code = r" s = 'a\n' * 5_000_000 s.splitlines() "; assert_timeout_in_builtin(code, "str.splitlines()"); } /// Test that `bytes.splitlines()` on large bytes respects the time limit. /// /// `bytes_splitlines()` scans bytes for line endings and now checks the time limit. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_in_bytes_splitlines() { let code = r" s = b'a\n' * 5_000_000 s.splitlines() "; assert_timeout_in_builtin(code, "bytes.splitlines()"); } // === Timeout truncation in repr === // These tests verify that `repr()` on large containers respects the time limit // and terminates promptly instead of hanging indefinitely. The repr methods // (`repr_sequence_fmt`, `Dict::py_repr_fmt`, `SetInner::repr_fmt`) call // `heap.check_time()` on each iteration and write `...[timeout]` when the // time limit is exceeded, returning normally instead of propagating an error. // // Each test uses the external function "interrupt" pattern: the large object is // built with NO time limit, then execution pauses at `interrupt()`. A short time // limit is set before resuming, so only the `repr()` call is timed. /// Helper: builds a large object without time limit, then runs `repr()` on it /// with a short time limit and asserts it produces a TimeoutError promptly. /// /// The code must call `interrupt()` between object construction and `repr()`. fn assert_repr_timeout(code: &str, label: &str) { let run = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); // Phase 1: build the large object with no time limit let limits = ResourceLimits::new(); let progress = run .start(vec![], LimitedTracker::new(limits), PrintWriter::Stdout) .unwrap(); let mut call = resolve_name_lookups(progress) .unwrap() .into_function_call() .expect("interrupt call"); assert_eq!(call.function_name, "interrupt"); // Phase 2: set a short time limit and resume — repr() should timeout call.tracker_mut().set_max_duration(Duration::from_millis(10)); let start = Instant::now(); let result = call.resume(MontyObject::None, PrintWriter::Stdout); let elapsed = start.elapsed(); let exc = result.unwrap_err(); assert_eq!( exc.exc_type(), ExcType::TimeoutError, "{label}: expected TimeoutError, got: {exc}" ); let msg = exc.message().unwrap(); assert!(msg.starts_with("time limit exceeded:")); assert!(msg.ends_with("ms > 10ms")); assert!( elapsed < Duration::from_millis(200), "{label}: should terminate promptly, took {elapsed:?}" ); } /// Test that `repr(large_list)` respects the time limit. /// /// Uses a list of 100K short strings so that repr formatting is slow enough /// to trigger the timeout. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_truncation_in_list_repr() { let code = r" x = ['abcdefghij'] * 100_000 interrupt() repr(x) "; assert_repr_timeout(code, "list repr"); } /// Test that `repr(large_dict)` respects the time limit. /// /// Uses a dict with 100K entries where values are short strings, /// making repr formatting slow enough to trigger the timeout. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_truncation_in_dict_repr() { let code = r" x = {i: 'abcdefghij' for i in range(100_000)} interrupt() repr(x) "; assert_repr_timeout(code, "dict repr"); } /// Test that `repr(large_set)` respects the time limit. /// /// Uses a set of 100K unique strings so that repr formatting is slow enough /// to trigger the timeout. #[test] #[cfg_attr( feature = "ref-count-panic", ignore = "resource exhaustion doesn't guarantee heap state consistency" )] fn timeout_truncation_in_set_repr() { let code = r" x = {str(i) for i in range(100_000)} interrupt() repr(x) "; assert_repr_timeout(code, "set repr"); } /// Test that `str.replace` with amplification is rejected before allocation. /// /// `'a' * 1000` is 1KB (within limit), but replacing each 'a' with a 1KB string /// produces a 1MB result. The pre-check should reject this before `String::replace()` /// allocates the result on the Rust heap. #[test] fn str_replace_amplification_memory_limit() { let code = r" s = 'a' * 1000 s.replace('a', 'b' * 1000) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); // 500KB limit let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "str.replace amplification should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that small `str.replace` works within limits. #[test] fn str_replace_within_limit() { let code = "'hello world'.replace('world', 'rust') == 'hello rust'"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small str.replace should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that `bytes.replace` with amplification is rejected before allocation. #[test] fn bytes_replace_amplification_memory_limit() { let code = r" s = b'a' * 1000 s.replace(b'a', b'b' * 1000) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bytes.replace amplification should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `str.replace` with empty pattern amplification is rejected. /// /// Empty pattern inserts `new` before each char and after the last, so /// result size = input_len * (new_len + 1). #[test] fn str_replace_empty_pattern_memory_limit() { let code = r" s = 'a' * 500 s.replace('', 'x' * 1000) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(200_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!( result.is_err(), "str.replace with empty pattern amplification should be rejected" ); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `str.ljust` with huge width is rejected before allocation. /// /// Without the pre-check, `String::with_capacity(width)` would allocate /// directly on the Rust heap, bypassing the memory tracker entirely. #[test] fn str_ljust_memory_limit() { let code = "'x'.ljust(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "str.ljust with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `str.rjust` with huge width is rejected before allocation. #[test] fn str_rjust_memory_limit() { let code = "'x'.rjust(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "str.rjust with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `str.center` with huge width is rejected before allocation. #[test] fn str_center_memory_limit() { let code = "'x'.center(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "str.center with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `str.zfill` with huge width is rejected before allocation. #[test] fn str_zfill_memory_limit() { let code = "'42'.zfill(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "str.zfill with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that small padding operations work within limits. #[test] fn str_padding_within_limit() { let code = "'hi'.ljust(10) == 'hi '"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small padding should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that `bytes.ljust` with huge width is rejected before allocation. #[test] fn bytes_ljust_memory_limit() { let code = "b'x'.ljust(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bytes.ljust with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `bytes.rjust` with huge width is rejected before allocation. #[test] fn bytes_rjust_memory_limit() { let code = "b'x'.rjust(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bytes.rjust with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `bytes.center` with huge width is rejected before allocation. #[test] fn bytes_center_memory_limit() { let code = "b'x'.center(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bytes.center with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `bytes.zfill` with huge width is rejected before allocation. #[test] fn bytes_zfill_memory_limit() { let code = "b'42'.zfill(2000000)"; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "bytes.zfill with huge width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that f-string formatting with huge width is rejected before allocation. #[test] fn fstring_dynamic_width_memory_limit() { // Dynamic format spec via f-string nesting: {w} produces a runtime-parsed spec let code = "w = 2000000\nf\"{'x':>{w}}\""; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "f-string with huge dynamic width should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } // === re.sub() memory tracking tests === // These tests verify that the single-pass replacement loop in `re.sub()` tracks // the running output size and bails out when the resource limit is exceeded. /// Test that `re.sub` with every-char pattern amplification is rejected. /// /// Pattern 'a' matches every character in 'aaa...'. Each replacement expands /// 1 byte → 1000 bytes, so the output grows to ~1MB which exceeds the 500KB limit. /// The inline loop catches this after a few hundred matches. #[test] fn re_sub_amplification_memory_limit() { let code = r" import re s = 'a' * 1000 re.sub('a', 'b' * 1000, s) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "re.sub amplification should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); assert!( exc.message().is_some_and(|m| m.contains("memory limit exceeded")), "expected memory limit error, got: {exc}" ); } /// Test that `re.sub` with empty pattern amplification is rejected. /// /// Empty pattern matches N+1 times for N-char input (between and around every /// character). Each match inserts 1000 bytes, so 501 matches × 1000 ≈ 500KB /// which exceeds the 200KB limit. #[test] fn re_sub_empty_pattern_amplification_memory_limit() { let code = r" import re s = 'a' * 500 re.sub('', 'x' * 1000, s) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(200_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!( result.is_err(), "re.sub with empty pattern amplification should be rejected" ); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `pattern.sub` (compiled pattern method) is also rejected. #[test] fn re_pattern_sub_amplification_memory_limit() { let code = r" import re p = re.compile('a') s = 'a' * 1000 p.sub('b' * 1000, s) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "pattern.sub amplification should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } /// Test that `re.sub` raises `re.PatternError` when the regex engine hits its backtracking limit. /// /// The pattern `(a+)+\1b` forces `fancy_regex` into its backtracking VM (due to the /// backreference `\1`). With enough `a`s followed by a non-matching character, the /// exponential blowup exceeds the engine's backtracking step limit (~1M steps). #[test] fn re_sub_backtracking_limit_raises_pattern_error() { let code = r" import re re.sub('(a+)+\\1b', 'X', 'a' * 30 + 'c') "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "backtracking limit should raise an error"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::RePatternError); assert!( exc.message().is_some_and(|m| m.contains("backtrack")), "expected backtracking error, got: {exc}" ); } // --- Selective patterns: few matches in large text stay within limits --- /// Test that a selective pattern on large text passes. /// /// The pattern `xxx` only matches 3 times (at positions 0, 3, 6 in the 9-char prefix), /// so the result is ~10000 - 9 + 300 = 10291 bytes — well within the 500KB limit. #[test] fn re_sub_selective_pattern_passes() { // 'xxx' repeated 3 times at the start, rest is 'a's let code = r" import re s = 'xxx' * 3 + 'a' * 9991 result = re.sub('xxx', 'y' * 100, s) len(result) == 9991 + 3 * 100 "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!( result.is_ok(), "selective pattern with few matches should pass: {result:?}" ); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that a digit-matching pattern on mostly-text input passes. /// /// Pattern `\d+` matches only the 10-digit number, so the result is /// 990 + 200 = 1190 bytes — well within the 150KB limit. #[test] fn re_sub_digit_pattern_passes() { let code = r" import re s = 'a' * 990 + '1234567890' result = re.sub('\d+', 'X' * 200, s) len(result) == 990 + 200 "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(150_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "digit pattern on mostly-text should pass: {result:?}"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that every-char amplification is still rejected even with a generic pattern. /// /// Pattern `.` matches every character (10000 matches), each expanding 1 → 1000 bytes. /// The inline loop catches this after a few hundred matches once the running output /// size exceeds the 500KB limit. #[test] fn re_sub_every_char_amplification_rejected() { let code = r" import re s = 'a' * 10000 re.sub('.', 'b' * 1000, s) "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_err(), "every-char pattern amplification should be rejected"); let exc = result.unwrap_err(); assert_eq!(exc.exc_type(), ExcType::MemoryError); } // --- General re.sub tests --- /// Test that small `re.sub` works within limits. #[test] fn re_sub_within_limit() { let code = r" import re re.sub('world', 'rust', 'hello world') == 'hello rust' "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(100_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "small re.sub should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } /// Test that `re.sub` with count parameter limits replacements correctly. /// /// `count=5` caps replacements to 5, so the result is /// 995 unchanged bytes + 5 × 100 replacement bytes = 1495 bytes. #[test] fn re_sub_with_count_within_limit() { let code = r" import re re.sub('a', 'b' * 100, 'a' * 1000, count=5) == 'b' * 500 + 'a' * 995 "; let ex = MontyRun::new(code.to_owned(), "test.py", vec![]).unwrap(); let limits = ResourceLimits::new().max_memory(500_000); let result = ex.run(vec![], LimitedTracker::new(limits), PrintWriter::Stdout); assert!(result.is_ok(), "re.sub with small count should succeed"); assert_eq!(result.unwrap(), MontyObject::Bool(true)); } ================================================ FILE: crates/monty/tests/try_from.rs ================================================ use monty::MontyRun; /// Tests for successful TryFrom conversions from Python values to Rust types. /// /// These tests validate that the `TryFrom` implementations on `MontyObject` correctly /// convert Python objects to their corresponding Rust types when the conversion /// is valid (e.g., Python int to Rust i64, Python str to Rust String). #[test] fn try_from_ok_int_to_i64() { let ex = MontyRun::new("42".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: i64 = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, 42); } #[test] fn try_from_ok_zero_to_i64() { let ex = MontyRun::new("0".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: i64 = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, 0); } #[test] #[expect(clippy::float_cmp)] fn try_from_ok_float_to_f64() { let ex = MontyRun::new("2.5".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: f64 = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, 2.5); } #[test] #[expect(clippy::float_cmp)] fn try_from_ok_int_to_f64() { let ex = MontyRun::new("42".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: f64 = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, 42.0); } #[test] fn try_from_ok_string_to_string() { let ex = MontyRun::new("'hello'".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: String = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, "hello".to_string()); } #[test] fn try_from_ok_empty_string_to_string() { let ex = MontyRun::new("''".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: String = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, String::new()); } #[test] fn try_from_ok_multiline_string_to_string() { let ex = MontyRun::new("'hello\\nworld'".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: String = (&result).try_into().expect("conversion should succeed"); assert_eq!(value, "hello\nworld".to_string()); } #[test] fn try_from_ok_bool_true_to_bool() { let ex = MontyRun::new("True".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: bool = (&result).try_into().expect("conversion should succeed"); assert!(value); } #[test] fn try_from_ok_bool_false_to_bool() { let ex = MontyRun::new("False".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let value: bool = (&result).try_into().expect("conversion should succeed"); assert!(!value); } /// Tests for failed TryFrom conversions from Python values to Rust types. /// /// These tests validate that the `TryFrom` implementations correctly reject /// invalid conversions with appropriate error messages (e.g., trying to convert /// a Python str to a Rust i64). #[test] fn try_from_err_string_to_i64() { let ex = MontyRun::new("'hello'".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected int, got str"); } #[test] fn try_from_err_float_to_i64() { let ex = MontyRun::new("2.5".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected int, got float"); } #[test] fn try_from_err_none_to_i64() { let ex = MontyRun::new("None".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected int, got NoneType"); } #[test] fn try_from_err_list_to_i64() { let ex = MontyRun::new("[1, 2, 3]".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected int, got list"); } #[test] fn try_from_err_int_to_string() { let ex = MontyRun::new("42".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected str, got int"); } #[test] fn try_from_err_none_to_string() { let ex = MontyRun::new("None".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected str, got NoneType"); } #[test] fn try_from_err_list_to_string() { let ex = MontyRun::new("[1, 2]".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected str, got list"); } #[test] fn try_from_err_int_to_bool() { let ex = MontyRun::new("1".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected bool, got int"); } #[test] fn try_from_err_string_to_bool() { let ex = MontyRun::new("'true'".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected bool, got str"); } #[test] fn try_from_err_none_to_bool() { let ex = MontyRun::new("None".to_owned(), "test.py", vec![]).unwrap(); let result = ex.run_no_limits(vec![]).unwrap(); let err = TryInto::::try_into(&result).expect_err("conversion should fail"); assert_eq!(err.to_string(), "expected bool, got NoneType"); } ================================================ FILE: crates/monty-cli/Cargo.toml ================================================ [package] name = "monty-cli" version = { workspace = true } license = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } authors = { workspace = true } description = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [[bin]] name = "monty" path = "src/main.rs" [dependencies] clap = { version = "4", features = ["derive"] } monty = { path = "../monty" } monty_type_checking = { path = "../monty-type-checking" } rustyline = "15" [lints] workspace = true ================================================ FILE: crates/monty-cli/src/main.rs ================================================ use std::{ fmt, fs, process::ExitCode, time::{Duration, Instant}, }; use clap::Parser; use monty::{ LimitedTracker, MontyObject, MontyRepl, MontyRun, NameLookupResult, NoLimitTracker, PrintWriter, ReplContinuationMode, ResourceLimits, ResourceTracker, RunProgress, detect_repl_continuation_mode, }; use rustyline::{DefaultEditor, error::ReadlineError}; // disabled due to format failing on https://github.com/pydantic/monty/pull/75 where CI and local wanted imports ordered differently // TODO re-enabled soon! #[rustfmt::skip] use monty_type_checking::{SourceFile, type_check}; /// ANSI escape code for dim/gray text. const DIM: &str = "\x1b[2m"; /// ANSI escape code for bold red text (errors). const BOLD_RED: &str = "\x1b[1m\x1b[31m"; /// ANSI escape code for bold green text (success, headings). const BOLD_GREEN: &str = "\x1b[1m\x1b[32m"; /// ANSI escape code for bold cyan text (commands, prompts). const BOLD_CYAN: &str = "\x1b[1m\x1b[36m"; /// ANSI escape code to reset all text styling. const RESET: &str = "\x1b[0m"; const ARROW: &str = "❯"; /// Monty — a sandboxed Python interpreter written in Rust. /// /// - `monty` starts an empty interactive REPL /// - `monty ` runs the file in script mode /// - `monty -c ` executes `` as a Python program /// - `monty -i` starts an empty interactive REPL /// - `monty -i ` seeds the REPL with file contents #[derive(Parser)] #[command(version)] struct Cli { /// Start interactive REPL mode. #[arg(short = 'i', long = "interactive")] interactive: bool, /// Run the type checker before executing. #[arg(short = 't', long = "type-check")] type_check: bool, /// Execute a Python program passed as a string (like `python -c`). #[arg(short = 'c')] command: Option, /// Python file to execute. file: Option, /// Maximum number of heap allocations before execution is terminated. #[arg(long)] max_allocations: Option, /// Maximum execution time in seconds (e.g. `0.5` for 500ms). #[arg(long)] max_duration: Option, /// Maximum heap memory (e.g. `1024`, `512KB`, `10MB`, `1GB`). #[arg(long, value_parser = parse_memory_size)] max_memory: Option, /// Run garbage collection every N allocations. #[arg(long)] gc_interval: Option, /// Maximum call-stack depth (defaults to 1000 when any limit is set). #[arg(long)] max_recursion_depth: Option, } impl Cli { /// Builds `ResourceLimits` from the parsed CLI arguments. /// /// Returns `None` when no resource flags were provided, which lets the /// caller fall back to `NoLimitTracker` for zero-overhead execution. fn resource_limits(&self) -> Option { if self.max_allocations.is_none() && self.max_duration.is_none() && self.max_memory.is_none() && self.gc_interval.is_none() && self.max_recursion_depth.is_none() { return None; } let mut limits = ResourceLimits::new(); if let Some(n) = self.max_allocations { limits = limits.max_allocations(n); } if let Some(secs) = self.max_duration { limits = limits.max_duration(Duration::from_secs_f64(secs)); } if let Some(bytes) = self.max_memory { limits = limits.max_memory(bytes); } if let Some(interval) = self.gc_interval { limits = limits.gc_interval(interval); } if let Some(depth) = self.max_recursion_depth { limits = limits.max_recursion_depth(Some(depth)); } Some(limits) } } const EXT_FUNCTIONS: bool = false; fn main() -> ExitCode { let cli = Cli::parse(); let type_check_enabled = cli.type_check; let limits = cli.resource_limits(); if let Some(cmd) = cli.command { if cli.file.is_some() { eprintln!("{BOLD_RED}error{RESET}: cannot specify both -c and a file"); return ExitCode::FAILURE; } return if cli.interactive { dispatch_repl("", &cmd, limits) } else { dispatch_script("", cmd, type_check_enabled, limits) }; } if let Some(file_path) = cli.file.as_deref() { let code = match read_file(file_path) { Ok(code) => code, Err(err) => { eprintln!("{BOLD_RED}error{RESET}: {err}"); return ExitCode::FAILURE; } }; return if cli.interactive { dispatch_repl(file_path, &code, limits) } else { dispatch_script(file_path, code, type_check_enabled, limits) }; } dispatch_repl("repl.py", "", limits) } /// Dispatches script execution with either `LimitedTracker` or `NoLimitTracker`. /// /// This top-level branch avoids threading generics through the entire call chain /// while still keeping the zero-overhead `NoLimitTracker` path when no limits are set. fn dispatch_script( file_path: &str, code: String, type_check_enabled: bool, limits: Option, ) -> ExitCode { if let Some(limits) = limits { run_script(file_path, code, type_check_enabled, LimitedTracker::new(limits)) } else { run_script(file_path, code, type_check_enabled, NoLimitTracker) } } /// Dispatches REPL startup with either `LimitedTracker` or `NoLimitTracker`. fn dispatch_repl(file_path: &str, code: &str, limits: Option) -> ExitCode { if let Some(limits) = limits { run_repl(file_path, code, LimitedTracker::new(limits)) } else { run_repl(file_path, code, NoLimitTracker) } } /// Executes a Python file in one-shot CLI mode. /// /// This path keeps the existing CLI behavior: run type-checking for visibility, /// compile the file as a full module, and execute it either through direct /// execution or through the suspendable progress loop when external functions /// are enabled. /// /// Returns `ExitCode::SUCCESS` for successful execution and /// `ExitCode::FAILURE` for parse/type/runtime failures. fn run_script(file_path: &str, code: String, type_check_enabled: bool, tracker: impl ResourceTracker) -> ExitCode { if type_check_enabled { let start = Instant::now(); if let Some(failure) = type_check(&SourceFile::new(&code, file_path), None).unwrap() { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {BOLD_RED}type check failed{RESET}:\n{failure}", FormattedDuration(elapsed) ); } else { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {BOLD_GREEN}type check passed{RESET}", FormattedDuration(elapsed) ); } } let input_names = vec![]; let inputs = vec![]; let runner = match MontyRun::new(code, file_path, input_names) { Ok(ex) => ex, Err(err) => { eprintln!("{BOLD_RED}error{RESET}:\n{err}"); return ExitCode::FAILURE; } }; if EXT_FUNCTIONS { let start = Instant::now(); let progress = match runner.start(inputs, tracker, PrintWriter::Stdout) { Ok(p) => p, Err(err) => { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {BOLD_RED}error{RESET}: {err}", FormattedDuration(elapsed) ); return ExitCode::FAILURE; } }; match run_until_complete(progress) { Ok(value) => { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {value}", FormattedDuration(elapsed) ); ExitCode::SUCCESS } Err(err) => { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {BOLD_RED}error{RESET}: {err}", FormattedDuration(elapsed) ); ExitCode::FAILURE } } } else { let start = Instant::now(); let value = match runner.run(inputs, tracker, PrintWriter::Stdout) { Ok(p) => p, Err(err) => { let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {BOLD_RED}error{RESET}: {err}", FormattedDuration(elapsed) ); return ExitCode::FAILURE; } }; let elapsed = start.elapsed(); eprintln!( "{DIM}{}{RESET} {BOLD_CYAN}{ARROW}{RESET} {value}", FormattedDuration(elapsed) ); ExitCode::SUCCESS } } /// Starts an interactive line-by-line REPL session. /// /// Initializes `MontyRepl` once and incrementally feeds entered snippets without /// replaying previous snippets, which matches the intended stateful REPL model. /// Multiline input follows CPython-style prompts: /// - `❯ ` for a new statement /// - `… ` for continuation lines /// /// Returns `ExitCode::SUCCESS` on EOF or `exit`, and `ExitCode::FAILURE` on /// initialization or I/O errors. fn run_repl(file_path: &str, code: &str, tracker: impl ResourceTracker) -> ExitCode { let mut repl = MontyRepl::new(file_path, tracker); if !code.is_empty() { execute_repl_snippet(&mut repl, code); } eprintln!("Monty v{} REPL. Type `exit` to exit.", env!("CARGO_PKG_VERSION")); let mut rl = match DefaultEditor::new() { Ok(rl) => rl, Err(err) => { eprintln!("{BOLD_RED}error{RESET} initializing editor: {err}"); return ExitCode::FAILURE; } }; let mut pending_snippet = String::new(); let mut continuation_mode = ReplContinuationMode::Complete; loop { let prompt = if continuation_mode == ReplContinuationMode::Complete { format!("{BOLD_CYAN}{ARROW}{RESET} ") } else { "… ".to_owned() }; let line = match rl.readline(&prompt) { Ok(line) => line, Err(ReadlineError::Eof) => return ExitCode::SUCCESS, Err(ReadlineError::Interrupted) => { // Ctrl-C: discard pending input and start fresh pending_snippet.clear(); continuation_mode = ReplContinuationMode::Complete; continue; } Err(err) => { eprintln!("{BOLD_RED}error{RESET} reading input: {err}"); return ExitCode::FAILURE; } }; let snippet = line.trim_end(); if continuation_mode == ReplContinuationMode::Complete && snippet.is_empty() { continue; } if continuation_mode == ReplContinuationMode::Complete && snippet == "exit" { return ExitCode::SUCCESS; } pending_snippet.push_str(snippet); pending_snippet.push('\n'); if continuation_mode == ReplContinuationMode::IncompleteBlock && snippet.is_empty() { let _ = rl.add_history_entry(pending_snippet.trim_end()); execute_repl_snippet(&mut repl, &pending_snippet); pending_snippet.clear(); continuation_mode = ReplContinuationMode::Complete; continue; } let detected_mode = detect_repl_continuation_mode(&pending_snippet); match detected_mode { ReplContinuationMode::Complete => { if continuation_mode == ReplContinuationMode::IncompleteBlock { continue; } let _ = rl.add_history_entry(pending_snippet.trim_end()); execute_repl_snippet(&mut repl, &pending_snippet); pending_snippet.clear(); continuation_mode = ReplContinuationMode::Complete; } ReplContinuationMode::IncompleteBlock => continuation_mode = ReplContinuationMode::IncompleteBlock, ReplContinuationMode::IncompleteImplicit => { if continuation_mode != ReplContinuationMode::IncompleteBlock { continuation_mode = ReplContinuationMode::IncompleteImplicit; } } } } } /// Executes one collected REPL snippet, printing the result or error. fn execute_repl_snippet(repl: &mut MontyRepl, snippet: &str) { match repl.feed_run(snippet, vec![], PrintWriter::Stdout) { Ok(output) => { if output != MontyObject::None { println!("{output}"); } } Err(err) => { eprintln!("{BOLD_RED}error{RESET}: {err}"); } } } /// Drives suspendable execution until completion. /// /// This repeatedly resumes `RunProgress` values by resolving supported /// external calls and returns the final value when execution reaches /// `RunProgress::Complete`. /// /// Returns an error string for unsupported suspend points (OS calls or async /// futures) or invalid external-function dispatch. fn run_until_complete(mut progress: RunProgress) -> Result { loop { match progress { RunProgress::Complete(value) => return Ok(value), RunProgress::FunctionCall(call) => { let return_value = resolve_external_call(&call.function_name, &call.args)?; progress = call .resume(return_value, PrintWriter::Stdout) .map_err(|err| format!("{err}"))?; } RunProgress::ResolveFutures(state) => { return Err(format!( "async futures not supported in CLI: {:?}", state.pending_call_ids() )); } RunProgress::NameLookup(lookup) => { let result = if lookup.name == "add_ints" { NameLookupResult::Value(MontyObject::Function { name: "add_ints".to_string(), docstring: None, }) } else { NameLookupResult::Undefined }; progress = lookup .resume(result, PrintWriter::Stdout) .map_err(|err| format!("{err}"))?; } RunProgress::OsCall(call) => { return Err(format!( "OS calls not supported in CLI: {:?}({:?})", call.function, call.args )); } } } } /// Resolves supported CLI external function calls. /// /// The CLI currently supports only `add_ints(int, int)`, which makes it /// possible to exercise the suspend/resume path in a deterministic way. /// /// Returns a runtime-like error string for unknown function names, wrong arity, /// or incorrect argument types. fn resolve_external_call(function_name: &str, args: &[MontyObject]) -> Result { if function_name != "add_ints" { return Err(format!("unknown external function: {function_name}({args:?})")); } if args.len() != 2 { return Err(format!("add_ints requires exactly 2 arguments, got {}", args.len())); } if let (MontyObject::Int(a), MontyObject::Int(b)) = (&args[0], &args[1]) { Ok(MontyObject::Int(a + b)) } else { Err(format!("add_ints requires integer arguments, got {args:?}")) } } /// Reads a Python source file from disk, returning its contents as a string. /// /// Returns an error message if the path doesn't exist, isn't a file, or can't be read. fn read_file(file_path: &str) -> Result { match fs::metadata(file_path) { Ok(metadata) => { if !metadata.is_file() { return Err(format!("{file_path} is not a file")); } } Err(err) => { return Err(format!("reading {file_path}: {err}")); } } match fs::read_to_string(file_path) { Ok(contents) => Ok(contents), Err(err) => Err(format!("reading file: {err}")), } } /// Wrapper around `Duration` that formats with 5 significant digits and an auto-selected unit. /// /// - `< 1ms` → microseconds, e.g. `123.45μs` /// - `1ms..1s` → milliseconds, e.g. `12.345ms` /// - `≥ 1s` → seconds, e.g. `1.2345s` /// /// The goal is a compact, human-readable duration string that stays consistent in width /// regardless of whether execution took microseconds or seconds. struct FormattedDuration(Duration); impl fmt::Display for FormattedDuration { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let duration = self.0; let total_secs = duration.as_secs_f64(); if total_secs < 1e-3 { // Microseconds let us = total_secs * 1e6; let decimals = sig_digits_after_decimal(us); write!(f, "{us:.decimals$}μs") } else if total_secs < 1.0 { // Milliseconds let ms = total_secs * 1e3; let decimals = sig_digits_after_decimal(ms); write!(f, "{ms:.decimals$}ms") } else { // Seconds let decimals = sig_digits_after_decimal(total_secs); write!(f, "{total_secs:.decimals$}s") } } } /// Calculates how many decimal places to show for 5 significant digits. /// /// Counts the number of digits before the decimal point, then returns `5 - that count` /// (clamped to 0). For example, `12.345` has 2 digits before the decimal → 3 after = 5 total. fn sig_digits_after_decimal(value: f64) -> usize { let before = if value < 1.0 { 1 } else { // value is always positive and < 1e6 in practice, so log10 fits in a u32 #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)] let digits = (value.log10().floor() as u32) + 1; digits as usize }; 5usize.saturating_sub(before) } /// Parses a memory size string with optional unit suffix. /// /// Accepts plain byte counts (`1024`) or values with a case-insensitive suffix: /// `KB` (kilobytes), `MB` (megabytes), `GB` (gigabytes). The numeric part must /// be a valid `usize`. /// /// # Examples /// /// - `"512"` → 512 /// - `"512KB"` → 524_288 /// - `"10MB"` → 10_485_760 /// - `"1GB"` → 1_073_741_824 fn parse_memory_size(s: &str) -> Result { let s = s.trim(); let (num_str, multiplier) = if let Some(n) = s.strip_suffix("GB").or_else(|| s.strip_suffix("gb")) { (n.trim(), 1024 * 1024 * 1024) } else if let Some(n) = s.strip_suffix("MB").or_else(|| s.strip_suffix("mb")) { (n.trim(), 1024 * 1024) } else if let Some(n) = s.strip_suffix("KB").or_else(|| s.strip_suffix("kb")) { (n.trim(), 1024) } else { (s, 1) }; let value: usize = num_str.parse().map_err(|e| format!("invalid memory size '{s}': {e}"))?; value .checked_mul(multiplier) .ok_or_else(|| format!("memory size '{s}' overflows")) } ================================================ FILE: crates/monty-js/.cargo/config.toml ================================================ [target.x86_64-pc-windows-msvc] rustflags = ["-C", "target-feature=+crt-static"] ================================================ FILE: crates/monty-js/.gitignore ================================================ # Created by https://www.toptal.com/developers/gitignore/api/node # Edit at https://www.toptal.com/developers/gitignore?templates=node ### Node ### # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* lerna-debug.log* # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json # Runtime data pids *.pid *.seed *.pid.lock # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage *.lcov # nyc test coverage .nyc_output # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) .grunt # Bower dependency directory (https://bower.io/) bower_components # node-waf configuration .lock-wscript # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules/ jspm_packages/ # TypeScript v1 declaration files typings/ # TypeScript cache *.tsbuildinfo # Optional npm cache directory .npm # Optional eslint cache .eslintcache # Microbundle cache .rpt2_cache/ .rts2_cache_cjs/ .rts2_cache_es/ .rts2_cache_umd/ # Optional REPL history .node_repl_history # Output of 'npm pack' *.tgz # Yarn Integrity file .yarn-integrity # dotenv environment variables file .env .env.test # parcel-bundler cache (https://parceljs.org/) .cache # Next.js build output .next # Nuxt.js build / generate output .nuxt dist # Gatsby files .cache/ # Comment in the public line in if your project uses Gatsby and not Next.js # https://nextjs.org/blog/next-9-1#public-directory-support # public # vuepress build output .vuepress/dist # Serverless directories .serverless/ # FuseBox cache .fusebox/ # DynamoDB Local files .dynamodb/ # TernJS port file .tern-port # Stores VSCode versions used for testing VSCode extensions .vscode-test # End of https://www.toptal.com/developers/gitignore/api/node #Added by cargo /target Cargo.lock *.node .pnp.* .yarn/* !.yarn/patches !.yarn/plugins !.yarn/releases !.yarn/sdks !.yarn/versions /npm # napi generated js files browser.js index.js index.d.ts monty.wasi.cjs monty.wasi-browser.js wasi-worker.mjs wasi-worker-browser.mjs # tsc output (source is wrapper.ts) wrapper.js wrapper.d.ts wrapper.d.ts.map wrapper.js.map .claude/settings.local.json plans prompts monty.wasm32-wasi.wasm ================================================ FILE: crates/monty-js/.prettierignore ================================================ target .yarn index.js package-template.wasi-browser.js package-template.wasi.cjs wasi-worker-browser.mjs wasi-worker.mjs .yarnrc.yml ================================================ FILE: crates/monty-js/Cargo.toml ================================================ [package] name = "monty-js" description = "TypeScript/JavaScript bindings for the Monty sandboxed Python interpreter" readme = "README.md" version = { workspace = true } rust-version = { workspace = true } # edition = { workspace = true } edition = "2021" authors = { workspace = true } license = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [lib] crate-type = ["cdylib"] [dependencies] monty = { path = "../monty" } monty_type_checking = { path = "../monty-type-checking" } napi = { version = "3.0.0", default-features = false, features = ["napi6", "compat-mode"] } napi-derive = "3.0.0" num-bigint = { workspace = true } serde = { workspace = true } postcard = { workspace = true } [build-dependencies] napi-build = "2" [lints] workspace = true ================================================ FILE: crates/monty-js/README.md ================================================ # @pydantic/monty JavaScript/TypeScript bindings for the Monty sandboxed Python interpreter. ## Installation ```bash npm install @pydantic/monty ``` ## Basic Usage ```ts import { Monty } from '@pydantic/monty' // Create interpreter and run code const m = new Monty('1 + 2') const result = m.run() // returns 3 ``` ## Input Variables ```ts const m = new Monty('x + y', { inputs: ['x', 'y'] }) const result = m.run({ inputs: { x: 10, y: 20 } }) // returns 30 ``` ## External Functions For synchronous external functions, pass them directly to `run()`: ```ts const m = new Monty('add(2, 3)') const result = m.run({ externalFunctions: { add: (a: number, b: number) => a + b, }, }) // returns 5 ``` For async external functions, use `runMontyAsync()`: ```ts import { Monty, runMontyAsync } from '@pydantic/monty' const m = new Monty('fetch_data(url)', { inputs: ['url'], }) const result = await runMontyAsync(m, { inputs: { url: 'https://example.com' }, externalFunctions: { fetch_data: async (url: string) => { const response = await fetch(url) return response.text() }, }, }) ``` ## Iterative Execution For fine-grained control over external function calls, use `start()` and `resume()`: ```ts const m = new Monty('a() + b()') let progress = m.start() while (progress instanceof MontySnapshot) { console.log(`Calling: ${progress.functionName}`) console.log(`Args: ${progress.args}`) // Provide the return value and resume progress = progress.resume({ returnValue: 10 }) } // progress is now MontyComplete console.log(progress.output) // 20 ``` ## Error Handling ```ts import { Monty, MontySyntaxError, MontyRuntimeError, MontyTypingError } from '@pydantic/monty' try { const m = new Monty('1 / 0') m.run() } catch (error) { if (error instanceof MontySyntaxError) { console.log('Syntax error:', error.message) } else if (error instanceof MontyRuntimeError) { console.log('Runtime error:', error.message) console.log('Traceback:', error.traceback()) } else if (error instanceof MontyTypingError) { console.log('Type error:', error.displayDiagnostics()) } } ``` ## Type Checking ```ts const m = new Monty('"hello" + 1') try { m.typeCheck() } catch (error) { if (error instanceof MontyTypingError) { console.log(error.displayDiagnostics('concise')) } } // Or enable during construction const m2 = new Monty('1 + 1', { typeCheck: true }) ``` ## Resource Limits ```ts const m = new Monty('1 + 1') const result = m.run({ limits: { maxAllocations: 10000, maxDurationSecs: 5, maxMemory: 1024 * 1024, // 1MB maxRecursionDepth: 100, }, }) ``` ## Serialization ```ts // Save parsed code to avoid re-parsing const m = new Monty('complex_code()') const data = m.dump() // Later, restore without re-parsing const m2 = Monty.load(data) const result = m2.run() // Snapshots can also be serialized const snapshot = m.start() if (snapshot instanceof MontySnapshot) { const snapshotData = snapshot.dump() // Later, restore and resume const restored = MontySnapshot.load(snapshotData) const result = restored.resume({ returnValue: 42 }) } ``` ## API Reference ### `Monty` Class - `constructor(code: string, options?: MontyOptions)` - Parse Python code - `run(options?: RunOptions)` - Execute and return the result - `start(options?: StartOptions)` - Start iterative execution - `typeCheck(prefixCode?: string)` - Perform static type checking - `dump()` - Serialize to binary format - `Monty.load(data)` - Deserialize from binary format - `scriptName` - The script name (default: `'main.py'`) - `inputs` - Declared input variable names ### `MontyOptions` - `scriptName?: string` - Name used in tracebacks (default: `'main.py'`) - `inputs?: string[]` - Input variable names - `typeCheck?: boolean` - Enable type checking on construction - `typeCheckPrefixCode?: string` - Code to prepend for type checking ### `RunOptions` - `inputs?: object` - Input variable values - `limits?: ResourceLimits` - Resource limits - `externalFunctions?: object` - External function callbacks ### `ResourceLimits` - `maxAllocations?: number` - Maximum heap allocations - `maxDurationSecs?: number` - Maximum execution time in seconds - `maxMemory?: number` - Maximum heap memory in bytes - `gcInterval?: number` - Run GC every N allocations - `maxRecursionDepth?: number` - Maximum call stack depth (default: 1000) ### `MontySnapshot` Class Returned by `start()` when execution pauses at an external function call. - `scriptName` - The script being executed - `functionName` - The external function being called - `args` - Positional arguments - `kwargs` - Keyword arguments - `resume(options: ResumeOptions)` - Resume with return value or exception - `dump()` / `MontySnapshot.load(data)` - Serialization ### `MontyComplete` Class Returned by `start()` or `resume()` when execution completes. - `output` - The final result value ### Error Classes - `MontyError` - Base class for all Monty errors - `MontySyntaxError` - Syntax/parsing errors - `MontyRuntimeError` - Runtime exceptions (with `traceback()`) - `MontyTypingError` - Type checking errors (with `displayDiagnostics()`) ================================================ FILE: crates/monty-js/__test__/async.spec.ts ================================================ import test from 'ava' import { Monty, MontyRuntimeError, runMontyAsync } from '../wrapper' // ============================================================================= // Basic async external function tests // ============================================================================= test('runMontyAsync with sync external function', async (t) => { const m = new Monty('get_value()') const result = await runMontyAsync(m, { externalFunctions: { get_value: () => 42, }, }) t.is(result, 42) }) test('runMontyAsync with async external function', async (t) => { const m = new Monty('fetch_data()') const result = await runMontyAsync(m, { externalFunctions: { fetch_data: async () => { // Simulate async operation await new Promise((resolve) => setTimeout(resolve, 10)) return 'async result' }, }, }) t.is(result, 'async result') }) test('runMontyAsync with multiple async calls', async (t) => { const m = new Monty( ` a = fetch_a() b = fetch_b() a + b `, {}, ) const result = await runMontyAsync(m, { externalFunctions: { fetch_a: async () => { await new Promise((resolve) => setTimeout(resolve, 5)) return 10 }, fetch_b: async () => { await new Promise((resolve) => setTimeout(resolve, 5)) return 20 }, }, }) t.is(result, 30) }) test('runMontyAsync with inputs', async (t) => { const m = new Monty('multiply(x)', { inputs: ['x'] }) const result = await runMontyAsync(m, { inputs: { x: 5 }, externalFunctions: { multiply: async (n: number) => n * 2, }, }) t.is(result, 10) }) test('runMontyAsync with args and kwargs', async (t) => { const m = new Monty('process(1, 2, name="test")') const result = await runMontyAsync(m, { externalFunctions: { process: async (a: number, b: number, kwargs: { name: string }) => { return `${kwargs.name}: ${a + b}` }, }, }) t.is(result, 'test: 3') }) // ============================================================================= // Error handling tests // ============================================================================= test('runMontyAsync sync function throws exception', async (t) => { const m = new Monty('fail_sync()') class ValueError extends Error { override name = 'ValueError' } const error = await t.throwsAsync( runMontyAsync(m, { externalFunctions: { fail_sync: () => { throw new ValueError('sync error') }, }, }), ) t.true(error instanceof MontyRuntimeError) }) test('runMontyAsync async function throws exception', async (t) => { const m = new Monty('fail_async()') class ValueError extends Error { override name = 'ValueError' } const error = await t.throwsAsync( runMontyAsync(m, { externalFunctions: { fail_async: async () => { await new Promise((resolve) => setTimeout(resolve, 5)) throw new ValueError('async error') }, }, }), ) t.true(error instanceof MontyRuntimeError) }) test('runMontyAsync exception caught in try/except', async (t) => { const m = new Monty( ` try: might_fail() except ValueError: result = 'caught' result `, {}, ) class ValueError extends Error { override name = 'ValueError' } const result = await runMontyAsync(m, { externalFunctions: { might_fail: async () => { throw new ValueError('expected error') }, }, }) t.is(result, 'caught') }) test('runMontyAsync missing external function raises NameError', async (t) => { const m = new Monty('missing_func()') const error = await t.throwsAsync(runMontyAsync(m, { externalFunctions: {} })) t.true(error instanceof MontyRuntimeError) t.true(error!.message.includes('NameError')) }) test('runMontyAsync missing function caught in try/except', async (t) => { const m = new Monty( ` try: missing() except NameError: result = 'caught' result `, ) const result = await runMontyAsync(m, { externalFunctions: {} }) t.is(result, 'caught') }) // ============================================================================= // Complex type tests // ============================================================================= test('runMontyAsync returns complex types', async (t) => { const m = new Monty('get_data()') const result = await runMontyAsync(m, { externalFunctions: { get_data: async () => { return [1, 2, { key: 'value' }] }, }, }) t.true(Array.isArray(result)) t.is(result[0], 1) t.is(result[1], 2) t.true(result[2] instanceof Map) t.is(result[2].get('key'), 'value') }) test('runMontyAsync with list input', async (t) => { const m = new Monty('sum_list(items)', { inputs: ['items'] }) const result = await runMontyAsync(m, { inputs: { items: [1, 2, 3, 4, 5] }, externalFunctions: { sum_list: async (items: number[]) => { return items.reduce((a, b) => a + b, 0) }, }, }) t.is(result, 15) }) // ============================================================================= // Mixed sync/async tests // ============================================================================= test('runMontyAsync mixed sync and async functions', async (t) => { const m = new Monty( ` sync_result = sync_func() async_result = async_func() sync_result + async_result `, {}, ) const result = await runMontyAsync(m, { externalFunctions: { sync_func: () => 100, async_func: async () => { await new Promise((resolve) => setTimeout(resolve, 5)) return 200 }, }, }) t.is(result, 300) }) test('runMontyAsync chained async calls', async (t) => { const m = new Monty( ` first = get_first() second = process(first) finalize(second) `, {}, ) const result = await runMontyAsync(m, { externalFunctions: { get_first: async () => 'hello', process: async (s: string) => s.toUpperCase(), finalize: async (s: string) => `${s}!`, }, }) t.is(result, 'HELLO!') }) // ============================================================================= // No external functions tests // ============================================================================= test('runMontyAsync without external functions', async (t) => { const m = new Monty('1 + 2') const result = await runMontyAsync(m, {}) t.is(result, 3) }) test('runMontyAsync pure computation', async (t) => { const m = new Monty( ` def factorial(n): if n <= 1: return 1 return n * factorial(n - 1) factorial(5) `, ) const result = await runMontyAsync(m) t.is(result, 120) }) // ============================================================================= // printCallback tests // ============================================================================= test('runMontyAsync with printCallback', async (t) => { const m = new Monty('print("hello from async")') const output: string[] = [] const result = await runMontyAsync(m, { printCallback: (stream, text) => { t.is(stream, 'stdout') output.push(text) }, }) t.is(result, null) t.deepEqual(output, ['hello from async', '\n']) }) test('runMontyAsync printCallback with external functions', async (t) => { const m = new Monty('x = get_value()\nprint(f"got {x}")\nx', { externalFunctions: ['get_value'], }) const output: string[] = [] const result = await runMontyAsync(m, { externalFunctions: { get_value: () => 42, }, printCallback: (stream, text) => { t.is(stream, 'stdout') output.push(text) }, }) t.is(result, 42) t.deepEqual(output, ['got 42', '\n']) }) test('runMontyAsync printCallback with multiple prints', async (t) => { const m = new Monty('print("a")\nprint("b")\nprint("c")') const output: string[] = [] await runMontyAsync(m, { printCallback: (_stream, text) => { output.push(text) }, }) t.deepEqual(output, ['a', '\n', 'b', '\n', 'c', '\n']) }) ================================================ FILE: crates/monty-js/__test__/basic.spec.ts ================================================ import test from 'ava' import { Monty, MontySyntaxError } from '../wrapper' // ============================================================================= // Constructor tests // ============================================================================= test('Monty constructor with default options', (t) => { const m = new Monty('1 + 2') t.is(m.scriptName, 'main.py') t.deepEqual(m.inputs, []) }) test('Monty constructor with custom script name', (t) => { const m = new Monty('1 + 2', { scriptName: 'test.py' }) t.is(m.scriptName, 'test.py') }) test('Monty constructor with inputs', (t) => { const m = new Monty('x + y', { inputs: ['x', 'y'] }) t.deepEqual(m.inputs, ['x', 'y']) }) test('Monty constructor with syntax error', (t) => { const error = t.throws(() => new Monty('def'), { instanceOf: MontySyntaxError }) t.true(error?.message.includes('SyntaxError')) }) // ============================================================================= // repr() tests // ============================================================================= test('Monty repr() no inputs', (t) => { const m = new Monty('1 + 1') const repr = m.repr() t.true(repr.includes('Monty')) t.true(repr.includes('main.py')) }) test('Monty repr() with inputs', (t) => { const m = new Monty('x', { inputs: ['x', 'y'] }) const repr = m.repr() t.true(repr.includes('Monty')) t.true(repr.includes('inputs')) }) test('Monty repr() with inputs and external call', (t) => { const m = new Monty('foo(x)', { inputs: ['x'] }) const repr = m.repr() t.true(repr.includes('inputs')) }) // ============================================================================= // Simple expression tests // ============================================================================= test('simple expression', (t) => { const m = new Monty('1 + 2') t.is(m.run(), 3) }) test('arithmetic', (t) => { const m = new Monty('10 * 5 - 3') t.is(m.run(), 47) }) test('string concatenation', (t) => { const m = new Monty('"hello" + " " + "world"') t.is(m.run(), 'hello world') }) // ============================================================================= // Multiple runs tests // ============================================================================= test('multiple runs same instance', (t) => { const m = new Monty('x * 2', { inputs: ['x'] }) t.is(m.run({ inputs: { x: 5 } }), 10) t.is(m.run({ inputs: { x: 10 } }), 20) t.is(m.run({ inputs: { x: -3 } }), -6) }) test('run multiple times no inputs', (t) => { const m = new Monty('1 + 2') t.is(m.run(), 3) t.is(m.run(), 3) t.is(m.run(), 3) }) // ============================================================================= // Multiline code tests // ============================================================================= test('multiline code', (t) => { const code = ` x = 1 y = 2 x + y ` const m = new Monty(code) t.is(m.run(), 3) }) test('function definition and call', (t) => { const code = ` def add(a, b): return a + b add(3, 4) ` const m = new Monty(code) t.is(m.run(), 7) }) ================================================ FILE: crates/monty-js/__test__/exceptions.spec.ts ================================================ import test from 'ava' import type { ErrorConstructor } from 'ava' import { Monty, MontyError, MontySyntaxError, MontyRuntimeError, MontyTypingError } from '../wrapper' // Helper for asserting MontyRuntimeError, private constructor requires the awkward cast via any // but it works fine at runtime export const isRuntimeError = { instanceOf: MontyRuntimeError as any as ErrorConstructor } // ============================================================================= // MontyRuntimeError tests // ============================================================================= test('zero division error', (t) => { const m = new Monty('1 / 0') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'ZeroDivisionError: division by zero') }) test('value error', (t) => { const m = new Monty('raise ValueError("bad value")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'ValueError: bad value') }) test('type error', (t) => { const m = new Monty("'string' + 1") const error = t.throws(() => m.run(), isRuntimeError) t.true(error.message.includes('TypeError')) }) test('index error', (t) => { const m = new Monty('[1, 2, 3][10]') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'IndexError: list index out of range') }) test('key error', (t) => { const m = new Monty('{"a": 1}["b"]') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'KeyError: b') }) test('attribute error', (t) => { const m = new Monty('raise AttributeError("no such attr")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'AttributeError: no such attr') }) test('name error', (t) => { const m = new Monty('undefined_variable') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, "NameError: name 'undefined_variable' is not defined") }) test('assertion error', (t) => { const m = new Monty('assert False') const error = t.throws(() => m.run(), isRuntimeError) t.true(error.message.includes('AssertionError')) }) test('assertion error with message', (t) => { const m = new Monty('assert False, "custom message"') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'AssertionError: custom message') }) test('runtime error', (t) => { const m = new Monty('raise RuntimeError("runtime error")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'RuntimeError: runtime error') }) test('not implemented error', (t) => { const m = new Monty('raise NotImplementedError("not implemented")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'NotImplementedError: not implemented') }) // ============================================================================= // OS call errors (no OS callback support in JS bindings) // ============================================================================= test('os.environ via run() raises NotImplementedError', (t) => { const m = new Monty('import os\nx = os.environ') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.exception.typeName, 'NotImplementedError') t.is(error.exception.message, "OS function 'os.environ' not implemented with standard execution") }) test('os.getenv via run() raises NotImplementedError', (t) => { const m = new Monty("import os\nx = os.getenv('HOME')") const error = t.throws(() => m.run(), isRuntimeError) t.is(error.exception.typeName, 'NotImplementedError') t.is(error.exception.message, "OS function 'os.getenv' not implemented with standard execution") }) // ============================================================================= // MontySyntaxError tests // ============================================================================= test('syntax error on init', (t) => { const error = t.throws(() => new Monty('def'), { instanceOf: MontySyntaxError }) t.true(error.message.includes('SyntaxError')) }) test('syntax error unclosed paren', (t) => { const error = t.throws(() => new Monty('print(1'), { instanceOf: MontySyntaxError }) t.true(error.message.includes('SyntaxError')) }) test('syntax error invalid syntax', (t) => { const error = t.throws(() => new Monty('x = = 1'), { instanceOf: MontySyntaxError }) t.true(error.message.includes('SyntaxError')) }) // ============================================================================= // Catching with base class tests // ============================================================================= test('catch with base class', (t) => { const m = new Monty('1 / 0') try { m.run() t.fail('Should have thrown') } catch (e) { t.true(e instanceof MontyError) } }) test('catch syntax error with base class', (t) => { try { new Monty('def') } catch (e) { t.true(e instanceof MontyError) } }) // ============================================================================= // Exception handling within Monty tests // ============================================================================= test('raise caught exception', (t) => { const code = ` try: 1 / 0 except ZeroDivisionError as e: result = 'caught' result ` const m = new Monty(code) t.is(m.run(), 'caught') }) test('exception in function', (t) => { const code = ` def fail(): raise ValueError('from function') fail() ` const m = new Monty(code) const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'ValueError: from function') }) // ============================================================================= // Display and str methods tests // ============================================================================= test('display traceback', (t) => { const m = new Monty('1 / 0') const error = t.throws(() => m.run(), isRuntimeError) const display = error.display('traceback') t.true(display.includes('Traceback (most recent call last):')) t.true(display.includes('ZeroDivisionError')) }) test('display type msg', (t) => { const m = new Monty('raise ValueError("test message")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.display('type-msg'), 'ValueError: test message') }) test('runtime display', (t) => { const m = new Monty('raise ValueError("test message")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.display('msg'), 'test message') t.is(error.display('type-msg'), 'ValueError: test message') const traceback = error.display('traceback') t.true(traceback.includes('Traceback (most recent call last):')) t.true( traceback.includes("raise ValueError('test message')") || traceback.includes('raise ValueError("test message")'), ) t.true(traceback.includes('ValueError: test message')) }) test('str returns type msg', (t) => { const m = new Monty('raise ValueError("test message")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, 'ValueError: test message') }) test('syntax error display', (t) => { const error = t.throws(() => new Monty('def'), { instanceOf: MontySyntaxError }) t.true(error.display().includes('Expected an identifier')) t.true(error.display('type-msg').includes('SyntaxError')) }) // ============================================================================= // Traceback tests // ============================================================================= test('traceback frames', (t) => { const code = `def inner(): raise ValueError('error') def outer(): inner() outer() ` const m = new Monty(code) const error = t.throws(() => m.run(), isRuntimeError) const display = error.display('traceback') t.true(display.includes('Traceback (most recent call last):')) t.true(display.includes('outer()')) t.true(display.includes('inner()')) t.true(display.includes('ValueError: error')) }) // ============================================================================= // MontyError base class tests // ============================================================================= test('MontyError extends Error', (t) => { const err = new MontyError('ValueError', 'test message') t.true(err instanceof Error) t.true(err instanceof MontyError) t.is(err.name, 'MontyError') }) test('MontyError constructor and properties', (t) => { const err = new MontyError('ValueError', 'test message') t.deepEqual(err.exception, { typeName: 'ValueError', message: 'test message' }) t.is(err.message, 'ValueError: test message') }) test('MontyError display()', (t) => { const err = new MontyError('ValueError', 'test message') t.is(err.display('msg'), 'test message') t.is(err.display('type-msg'), 'ValueError: test message') }) test('MontyError with empty message', (t) => { const err = new MontyError('TypeError', '') t.is(err.display('type-msg'), 'TypeError') }) // ============================================================================= // MontySyntaxError class tests // ============================================================================= test('MontySyntaxError extends MontyError and Error', (t) => { const err = new MontySyntaxError('invalid syntax') t.true(err instanceof Error) t.true(err instanceof MontyError) t.true(err instanceof MontySyntaxError) t.is(err.name, 'MontySyntaxError') }) test('MontySyntaxError constructor and properties', (t) => { const err = new MontySyntaxError('invalid syntax') t.deepEqual(err.exception, { typeName: 'SyntaxError', message: 'invalid syntax' }) t.is(err.message, 'SyntaxError: invalid syntax') }) test('MontySyntaxError display()', (t) => { const err = new MontySyntaxError('unexpected token') t.is(err.display(), 'unexpected token') t.is(err.display('msg'), 'unexpected token') t.is(err.display('type-msg'), 'SyntaxError: unexpected token') }) // ============================================================================= // MontyRuntimeError class tests // ============================================================================= test('MontyRuntimeError display()', (t) => { const m = new Monty('1 / 0') const error = t.throws(() => m.run(), isRuntimeError) t.true(error instanceof MontyError) t.true(error instanceof Error) t.is(error.message, 'ZeroDivisionError: division by zero') const traceback = error.display('traceback') t.is(error.display(), traceback) t.true(traceback.includes('Traceback (most recent call last):')) t.is(error.display('type-msg'), 'ZeroDivisionError: division by zero') t.is(error.display('msg'), 'division by zero') }) test('MontyRuntimeError can be caught with instanceof', (t) => { const m = new Monty('1 / 0') try { m.run() t.fail('Should have thrown') } catch (e) { t.true(e instanceof MontyRuntimeError) t.true(e instanceof MontyError) t.true(e instanceof Error) } }) // ============================================================================= // MontyTypingError class tests // ============================================================================= test('MontyTypingError extends MontyError and Error', (t) => { const err = new MontyTypingError('type mismatch') t.true(err instanceof Error) t.true(err instanceof MontyError) t.true(err instanceof MontyTypingError) t.is(err.name, 'MontyTypingError') }) test('MontyTypingError is thrown on type check failure', (t) => { const code = ` x: int = "not an int" ` const error = t.throws(() => new Monty(code, { typeCheck: true }), { instanceOf: MontyTypingError }) t.true(error instanceof MontyError) t.true(error instanceof Error) }) // ============================================================================= // Error catching hierarchy tests // ============================================================================= test('MontyError catches all Monty exceptions', (t) => { // Syntax error try { new Monty('def') } catch (e) { t.true(e instanceof MontyError) } // Runtime error try { new Monty('1 / 0').run() } catch (e) { t.true(e instanceof MontyError) } // Type error try { new Monty('x: int = "str"', { typeCheck: true }) } catch (e) { t.true(e instanceof MontyError) } }) test('can distinguish error types with instanceof', (t) => { // Test syntax error try { new Monty('def') } catch (e) { t.true(e instanceof MontySyntaxError) t.false(e instanceof MontyRuntimeError) t.false(e instanceof MontyTypingError) } // Test runtime error try { new Monty('1 / 0').run() } catch (e) { t.true(e instanceof MontyRuntimeError) t.false(e instanceof MontySyntaxError) t.false(e instanceof MontyTypingError) } // Test type error try { new Monty('x: int = "str"', { typeCheck: true }) } catch (e) { t.true(e instanceof MontyTypingError) t.false(e instanceof MontySyntaxError) t.false(e instanceof MontyRuntimeError) } }) // ============================================================================= // Exception info accessors tests // ============================================================================= test('exception getter returns correct info for runtime error', (t) => { const m = new Monty('raise ValueError("test")') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.exception.typeName, 'ValueError') t.is(error.exception.message, 'test') }) test('exception getter returns correct info for syntax error', (t) => { const error = t.throws(() => new Monty('def'), { instanceOf: MontySyntaxError }) t.is(error.exception.typeName, 'SyntaxError') }) // ============================================================================= // Polymorphic display() tests // ============================================================================= test('display() works polymorphically on MontyTypingError', (t) => { try { new Monty('x: int = "str"', { typeCheck: true }) t.fail('Should have thrown') } catch (e) { t.true(e instanceof MontyError) const msg = (e as MontyError).display('msg') t.true(msg.length > 0) const typeMsg = (e as MontyError).display('type-msg') t.true(typeMsg.startsWith('TypeError:')) } }) ================================================ FILE: crates/monty-js/__test__/external.spec.ts ================================================ import test from 'ava' import { Monty } from '../wrapper' import { isRuntimeError } from './exceptions.spec' // ============================================================================= // Basic external function tests // ============================================================================= test('external function no args', (t) => { const m = new Monty('noop()') const noop = (...args: unknown[]) => { t.deepEqual(args, []) return 'called' } const result = m.run({ externalFunctions: { noop } }) t.is(result, 'called') }) test('external function positional args', (t) => { const m = new Monty('func(1, 2, 3)') const func = (...args: unknown[]) => { t.deepEqual(args, [1, 2, 3]) return 'ok' } t.is(m.run({ externalFunctions: { func } }), 'ok') }) test('external function kwargs only', (t) => { const m = new Monty('func(a=1, b="two")') const func = (...args: unknown[]) => { // kwargs are passed as the last argument as an object t.deepEqual(args, [{ a: 1, b: 'two' }]) return 'ok' } t.is(m.run({ externalFunctions: { func } }), 'ok') }) test('external function mixed args kwargs', (t) => { const m = new Monty('func(1, 2, x="hello", y=True)') const func = (...args: unknown[]) => { // positional args followed by kwargs object t.deepEqual(args, [1, 2, { x: 'hello', y: true }]) return 'ok' } t.is(m.run({ externalFunctions: { func } }), 'ok') }) test('external function complex types', (t) => { const m = new Monty('func([1, 2], {"key": "value"})') const func = (...args: unknown[]) => { t.deepEqual(args[0], [1, 2]) // Dicts are returned as Maps t.true(args[1] instanceof Map) t.is((args[1] as Map).get('key'), 'value') return 'ok' } t.is(m.run({ externalFunctions: { func } }), 'ok') }) test('external function returns none', (t) => { const m = new Monty('do_nothing()') const do_nothing = () => { // returns undefined which becomes None } t.is(m.run({ externalFunctions: { do_nothing } }), null) }) test('external function returns complex type', (t) => { const m = new Monty('get_data()') const get_data = () => { return { a: [1, 2, 3], b: { nested: true } } } const result = m.run({ externalFunctions: { get_data } }) // Plain objects become Maps t.true(result instanceof Map) t.deepEqual(result.get('a'), [1, 2, 3]) const nested = result.get('b') t.true(nested instanceof Map) t.is(nested.get('nested'), true) }) // ============================================================================= // Multiple external functions tests // ============================================================================= test('multiple external functions', (t) => { const m = new Monty('add(1, 2) + mul(3, 4)') const add = (a: number, b: number) => { t.is(a, 1) t.is(b, 2) return a + b } const mul = (a: number, b: number) => { t.is(a, 3) t.is(b, 4) return a * b } const result = m.run({ externalFunctions: { add, mul } }) t.is(result, 15) // 3 + 12 }) test('external function called multiple times', (t) => { const m = new Monty('counter() + counter() + counter()') let callCount = 0 const counter = () => { callCount += 1 return callCount } const result = m.run({ externalFunctions: { counter } }) t.is(result, 6) // 1 + 2 + 3 t.is(callCount, 3) }) test('external function with input', (t) => { const m = new Monty('process(x)', { inputs: ['x'] }) const process = (x: number) => { t.is(x, 5) return x * 10 } t.is(m.run({ inputs: { x: 5 }, externalFunctions: { process } }), 50) }) // ============================================================================= // Error handling tests // ============================================================================= test('undeclared external function raises name error', (t) => { const m = new Monty('missing()') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, "NameError: name 'missing' is not defined") }) test('undeclared function raises name error', (t) => { const m = new Monty('unknown_func()') const error = t.throws(() => m.run(), isRuntimeError) t.is(error.message, "NameError: name 'unknown_func' is not defined") }) test('external function raises exception', (t) => { const m = new Monty('fail()') const fail = () => { const error = new Error('intentional error') error.name = 'ValueError' throw error } const error = t.throws(() => m.run({ externalFunctions: { fail } }), isRuntimeError) t.true(error.message.includes('ValueError')) t.true(error.message.includes('intentional error')) }) test('external function wrong name raises name error', (t) => { // When 'foo' is called but only 'bar' is provided at runtime, foo is a NameError // because no externalFunctions are declared in the constructor const m = new Monty('foo()') const bar = () => 1 const error = t.throws(() => m.run({ externalFunctions: { bar } }), isRuntimeError) t.is(error.message, "NameError: name 'foo' is not defined") }) test('external function exception caught by try except', (t) => { const code = ` try: fail() except ValueError: caught = True caught ` const m = new Monty(code) const fail = () => { const error = new Error('caught error') error.name = 'ValueError' throw error } t.is(m.run({ externalFunctions: { fail } }), true) }) test('external function exception type preserved', (t) => { const m = new Monty('fail()') const fail = () => { const error = new Error('type error message') error.name = 'TypeError' throw error } const error = t.throws(() => m.run({ externalFunctions: { fail } }), isRuntimeError) t.true(error.message.includes('TypeError')) t.true(error.message.includes('type error message')) }) // ============================================================================= // Exception hierarchy tests // ============================================================================= const exceptionTypes = [ 'ZeroDivisionError', 'OverflowError', 'ArithmeticError', 'NotImplementedError', 'RecursionError', 'RuntimeError', 'KeyError', 'IndexError', 'LookupError', 'ValueError', 'TypeError', 'AttributeError', 'NameError', 'AssertionError', ] for (const exceptionType of exceptionTypes) { test(`external function exception hierarchy - ${exceptionType}`, (t) => { const m = new Monty('fail()') const fail = () => { const error = new Error('test message') error.name = exceptionType throw error } const error = t.throws(() => m.run({ externalFunctions: { fail } }), isRuntimeError) t.true(error.message.includes(exceptionType)) }) } // ============================================================================= // Exception caught by parent tests // ============================================================================= const parentChildPairs: Array<[string, string]> = [ ['ZeroDivisionError', 'ArithmeticError'], ['OverflowError', 'ArithmeticError'], ['NotImplementedError', 'RuntimeError'], ['RecursionError', 'RuntimeError'], ['KeyError', 'LookupError'], ['IndexError', 'LookupError'], ] for (const [childType, parentType] of parentChildPairs) { test(`external function exception caught by parent - ${childType} caught by ${parentType}`, (t) => { const code = ` try: fail() except ${parentType}: caught = 'parent' except ${childType}: caught = 'child' caught ` const m = new Monty(code) const fail = () => { const error = new Error('test') error.name = childType throw error } // Child exception should be caught by parent handler (which comes first) t.is(m.run({ externalFunctions: { fail } }), 'parent') }) } // ============================================================================= // Exception in various contexts // ============================================================================= test('external function exception in expression', (t) => { const m = new Monty('1 + fail() + 2') const fail = () => { const error = new Error('mid-expression error') error.name = 'RuntimeError' throw error } const error = t.throws(() => m.run({ externalFunctions: { fail } }), isRuntimeError) t.true(error.message.includes('RuntimeError')) t.true(error.message.includes('mid-expression error')) }) test('external function exception after successful call', (t) => { const code = ` a = success() b = fail() a + b ` const m = new Monty(code) const success = () => 10 const fail = () => { const error = new Error('second call fails') error.name = 'ValueError' throw error } const error = t.throws(() => m.run({ externalFunctions: { success, fail } }), isRuntimeError) t.true(error.message.includes('ValueError')) t.true(error.message.includes('second call fails')) }) test('external function exception with finally', (t) => { const code = ` finally_ran = False try: fail() except ValueError: pass finally: finally_ran = True finally_ran ` const m = new Monty(code) const fail = () => { const error = new Error('error') error.name = 'ValueError' throw error } t.is(m.run({ externalFunctions: { fail } }), true) }) ================================================ FILE: crates/monty-js/__test__/inputs.spec.ts ================================================ import test from 'ava' import { Monty } from '../wrapper' // ============================================================================= // Single input tests // ============================================================================= test('single input', (t) => { const m = new Monty('x', { inputs: ['x'] }) t.is(m.run({ inputs: { x: 42 } }), 42) }) test('multiple inputs', (t) => { const m = new Monty('x + y + z', { inputs: ['x', 'y', 'z'] }) t.is(m.run({ inputs: { x: 1, y: 2, z: 3 } }), 6) }) test('input used in expression', (t) => { const m = new Monty('x * 2 + y', { inputs: ['x', 'y'] }) t.is(m.run({ inputs: { x: 5, y: 3 } }), 13) }) test('input string', (t) => { const m = new Monty('greeting + " " + name', { inputs: ['greeting', 'name'] }) t.is(m.run({ inputs: { greeting: 'Hello', name: 'World' } }), 'Hello World') }) test('input list', (t) => { const m = new Monty('data[0] + data[1]', { inputs: ['data'] }) t.is(m.run({ inputs: { data: [10, 20] } }), 30) }) test('input dict', (t) => { const m = new Monty('config["a"] * config["b"]', { inputs: ['config'] }) t.is(m.run({ inputs: { config: { a: 3, b: 4 } } }), 12) }) // ============================================================================= // Missing input tests // ============================================================================= test('missing input raises', (t) => { const m = new Monty('x + y', { inputs: ['x', 'y'] }) const error = t.throws(() => m.run({ inputs: { x: 1 } })) t.true(error?.message.includes('Missing required input')) }) test('all inputs missing raises', (t) => { const m = new Monty('x', { inputs: ['x'] }) const error = t.throws(() => m.run()) t.true(error?.message.includes('Missing required input')) }) test('no inputs declared but provided raises', (t) => { const m = new Monty('1 + 1') const error = t.throws(() => m.run({ inputs: { x: 1 } })) t.true(error?.message.includes('No input variables declared')) }) // ============================================================================= // Input order tests // ============================================================================= test('inputs order independent', (t) => { const m = new Monty('a - b', { inputs: ['a', 'b'] }) // Dict order shouldn't matter t.is(m.run({ inputs: { b: 3, a: 10 } }), 7) }) // ============================================================================= // Function parameter shadowing tests // ============================================================================= test('function param shadows input', (t) => { const code = ` def foo(x): return x + 1 foo(x * 2) ` const m = new Monty(code, { inputs: ['x'] }) // x=5, so foo(x * 2) = foo(10), and inside foo, x is 10 (not 5), so returns 11 t.is(m.run({ inputs: { x: 5 } }), 11) }) test('function param shadows input multiple params', (t) => { const code = ` def add(x, y): return x + y add(x * 10, y * 100) ` const m = new Monty(code, { inputs: ['x', 'y'] }) // x=2, y=3, so add(20, 300) should return 320 t.is(m.run({ inputs: { x: 2, y: 3 } }), 320) }) test('input accessible outside shadowing function', (t) => { const code = ` def double(x): return x * 2 result = double(10) + x result ` const m = new Monty(code, { inputs: ['x'] }) // double(10) = 20, x (input) = 5, so result = 25 t.is(m.run({ inputs: { x: 5 } }), 25) }) test('function param shadows input with default', (t) => { const code = ` def foo(x=100): return x + 1 foo(x * 2) ` const m = new Monty(code, { inputs: ['x'] }) // x=5, foo(10), inside foo x=10 (not 5 or 100), returns 11 t.is(m.run({ inputs: { x: 5 } }), 11) }) test('function uses input directly', (t) => { const code = ` def foo(y): return x + y foo(10) ` const m = new Monty(code, { inputs: ['x'] }) // x=5 (input), foo(10) with y=10, returns x + y = 5 + 10 = 15 t.is(m.run({ inputs: { x: 5 } }), 15) }) // ============================================================================= // Complex input types tests // ============================================================================= test('complex input types', (t) => { const m = new Monty('len(items)', { inputs: ['items'] }) t.is(m.run({ inputs: { items: [1, 2, 3, 4, 5] } }), 5) }) ================================================ FILE: crates/monty-js/__test__/limits.spec.ts ================================================ import test from 'ava' import { Monty, MontyRuntimeError, type ResourceLimits } from '../wrapper' // ============================================================================= // ResourceLimits construction tests // ============================================================================= test('resource limits custom', (t) => { const limits: ResourceLimits = { maxAllocations: 100, maxDurationSecs: 5.0, maxMemory: 1024, gcInterval: 10, maxRecursionDepth: 500, } // Just verify the object is valid and can be passed const m = new Monty('1 + 1') t.is(m.run({ limits }), 2) }) test('run with limits', (t) => { const m = new Monty('1 + 1') const limits: ResourceLimits = { maxDurationSecs: 5.0 } t.is(m.run({ limits }), 2) }) // ============================================================================= // Recursion limit tests // ============================================================================= test('recursion limit', (t) => { const code = ` def recurse(n): if n <= 0: return 0 return 1 + recurse(n - 1) recurse(10) ` const m = new Monty(code) const limits: ResourceLimits = { maxRecursionDepth: 5 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('RecursionError')) }) test('recursion limit ok', (t) => { const code = ` def recurse(n): if n <= 0: return 0 return 1 + recurse(n - 1) recurse(5) ` const m = new Monty(code) const limits: ResourceLimits = { maxRecursionDepth: 100 } t.is(m.run({ limits }), 5) }) // ============================================================================= // Allocation limit tests // ============================================================================= test('allocation limit', (t) => { // Use a more aggressive allocation pattern const code = ` result = [] for i in range(10000): result.append([i]) len(result) ` const m = new Monty(code) const limits: ResourceLimits = { maxAllocations: 5 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('MemoryError')) }) // ============================================================================= // Memory limit tests // ============================================================================= test('memory limit', (t) => { const code = ` result = [] for i in range(1000): result.append('x' * 100) len(result) ` const m = new Monty(code) const limits: ResourceLimits = { maxMemory: 100 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('MemoryError')) }) // ============================================================================= // Limits with inputs tests // ============================================================================= test('limits with inputs', (t) => { const m = new Monty('x * 2', { inputs: ['x'] }) const limits: ResourceLimits = { maxDurationSecs: 5.0 } t.is(m.run({ inputs: { x: 21 }, limits }), 42) }) // ============================================================================= // Large operation limits tests // ============================================================================= test('pow memory limit', (t) => { const m = new Monty('2 ** 10000000') const limits: ResourceLimits = { maxMemory: 1_000_000 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('MemoryError')) }) test('lshift memory limit', (t) => { const m = new Monty('1 << 10000000') const limits: ResourceLimits = { maxMemory: 1_000_000 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('MemoryError')) }) test('mult memory limit', (t) => { const code = ` big = 2 ** 4000000 result = big * big ` const m = new Monty(code) const limits: ResourceLimits = { maxMemory: 1_000_000 } const error = t.throws(() => m.run({ limits }), { instanceOf: MontyRuntimeError }) t.true(error.message.includes('MemoryError')) }) test('small operations within limit', (t) => { const m = new Monty('2 ** 1000') const limits: ResourceLimits = { maxMemory: 1_000_000 } const result = m.run({ limits }) t.true(typeof result === 'bigint' || typeof result === 'number') }) // ============================================================================= // Time limit tests // ============================================================================= test('time limit', (t) => { // Use recursion instead of while loop const code = ` def infinite(n): return infinite(n + 1) infinite(0) ` const m = new Monty(code) const limits: ResourceLimits = { maxDurationSecs: 0.1 } const error = t.throws(() => m.run({ limits })) // May hit time limit or recursion limit t.true( error?.message.includes('TimeoutError') || error?.message.includes('timed out') || error?.message.includes('RecursionError'), ) }) ================================================ FILE: crates/monty-js/__test__/package.json ================================================ { "type": "module" } ================================================ FILE: crates/monty-js/__test__/print.spec.ts ================================================ import type { ExecutionContext } from 'ava' import test from 'ava' import { Monty, type ResourceLimits, MontySnapshot, MontyComplete } from '../wrapper' // ============================================================================= // Print tests // ============================================================================= function makePrintCollector(t: ExecutionContext) { const output: string[] = [] const callback = (stream: string, text: string) => { t.is(stream, 'stdout') output.push(text) } return { callback, output } } test('basic', (t) => { const m = new Monty('print("hello")') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'hello\n') }) test('multiple', (t) => { const m = new Monty('print("hello")\nprint("world")') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'hello\nworld\n') }) test('with values', (t) => { const m = new Monty('print("The answer is", 42)') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'The answer is 42\n') }) test('with step', (t) => { const m = new Monty('print(1, 2, 3, sep="-")') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), '1-2-3\n') }) test('with end', (t) => { const m = new Monty('print("hello", end="!")') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'hello!') }) test('returns none', (t) => { const m = new Monty('result = print("hello")') const { callback } = makePrintCollector(t) const result = m.run({ printCallback: callback }) t.is(result, null) }) test('empty', (t) => { const m = new Monty('print()') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), '\n') }) test('with limits', (t) => { const m = new Monty('print("with limits")') const { output, callback } = makePrintCollector(t) const limits: ResourceLimits = { maxDurationSecs: 5.0, } m.run({ printCallback: callback, limits }) t.is(output.join(''), 'with limits\n') }) test('with inputs', (t) => { const m = new Monty('print("Input value is", x)', { inputs: ['x'] }) const { output, callback } = makePrintCollector(t) m.run({ inputs: { x: 99 }, printCallback: callback }) t.is(output.join(''), 'Input value is 99\n') }) test('print in loop', (t) => { const code = ` for i in range(3): print("Count", i) ` const m = new Monty(code) const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'Count 0\nCount 1\nCount 2\n') }) test('print mixed types', (t) => { const m = new Monty('print("Value:", 3.14, True, None, [1, 2, 3])') const { output, callback } = makePrintCollector(t) m.run({ printCallback: callback }) t.is(output.join(''), 'Value: 3.14 True None [1, 2, 3]\n') }) function makeErrorCallback(error: Error, t: ExecutionContext) { const output: string[] = [] const callback = (stream: string, text: string) => { const _ignore = text t.is(stream, 'stdout') throw error } return { callback, output } } test('raises error', (t) => { const m = new Monty('print("This will error")') const error = new Error('Custom print error') const { callback } = makeErrorCallback(error, t) const thrown = t.throws(() => { m.run({ printCallback: callback }) }) // the error is slightly different with WASI, it doesn't include "Error: " t.regex(thrown?.message, /Exception: (:?Error: )?Custom print error/) }) test('raises in function', (t) => { const code = ` def greet(name): print(f"Hello, {name}!") greet("Alice") ` const m = new Monty(code) const error = new Error('Print error in function') const { callback } = makeErrorCallback(error, t) const thrown = t.throws(() => { m.run({ printCallback: callback }) }) // the error is slightly different with WASI, it doesn't include "Error: " t.regex(thrown?.message, /Exception: (:?Error: )?Print error in function/) }) test('raises in nested function', (t) => { const code = ` def outer(): def inner(): print("Inside inner function") inner() outer() ` const m = new Monty(code) const error = new Error('Print error in nested function') const { callback } = makeErrorCallback(error, t) const thrown = t.throws(() => { m.run({ printCallback: callback }) }) // the error is slightly different with WASI, it doesn't include "Error: " t.regex(thrown?.message, /Exception: (:?Error: )?Print error in nested function/) }) test('raises in loop', (t) => { const code = ` for i in range(3): print(f"Count: {i}") ` const m = new Monty(code) const error = new Error('Print error in loop') const { callback } = makeErrorCallback(error, t) const thrown = t.throws(() => { m.run({ printCallback: callback }) }) // the error is slightly different with WASI, it doesn't include "Error: " t.regex(thrown?.message, /Exception: (:?Error: )?Print error in loop/) }) test('with snapshot', (t) => { const m = new Monty('print("snapshot")') const { output, callback } = makePrintCollector(t) const result = m.start({ printCallback: callback, }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, null) t.is(output.join(''), 'snapshot\n') }) test('with snapshot resume', (t) => { const code = ` print("hello") print(func()) ` const m = new Monty(code) const { output, callback } = makePrintCollector(t) const progress = m.start({ printCallback: callback, }) t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot const result = snapshot.resume({ returnValue: 'world', }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, null) t.is(output.join(''), 'hello\nworld\n') }) test('with snapshot dump load', (t) => { const m = new Monty('print(func())') const { output, callback } = makePrintCollector(t) const progress = m.start({ printCallback: callback, }) t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot const data = snapshot.dump() const progress2 = MontySnapshot.load(data, { printCallback: callback, }) const result = progress2.resume({ returnValue: 42, }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, null) t.is(output.join(''), '42\n') }) ================================================ FILE: crates/monty-js/__test__/repl.spec.ts ================================================ import test from 'ava' import { MontyRepl } from '../wrapper' test('feed preserves state without replay', (t) => { const repl = new MontyRepl() repl.feed('counter = 0') t.is(repl.feed('counter = counter + 1'), null) t.is(repl.feed('counter'), 1) t.is(repl.feed('counter = counter + 1'), null) t.is(repl.feed('counter'), 2) }) test('constructor accepts scriptName option', (t) => { const repl = new MontyRepl({ scriptName: 'test.py' }) t.is(repl.scriptName, 'test.py') }) test('default scriptName is main.py', (t) => { const repl = new MontyRepl() t.is(repl.scriptName, 'main.py') }) test('repl dump/load roundtrip', (t) => { const repl = new MontyRepl() repl.feed('x = 40') t.is(repl.feed('x = x + 1'), null) const serialized = repl.dump() const loaded = MontyRepl.load(serialized) t.is(loaded.feed('x + 1'), 42) }) ================================================ FILE: crates/monty-js/__test__/serialize.spec.ts ================================================ import test from 'ava' import { Monty, MontySnapshot, MontyNameLookup, MontyComplete, type ResourceLimits } from '../wrapper' import { Buffer } from 'node:buffer' // ============================================================================= // Monty dump/load tests // ============================================================================= test('monty dump load roundtrip', (t) => { const m = new Monty('x + 1', { inputs: ['x'] }) const data = m.dump() t.true(data instanceof Buffer) t.true(data.length > 0) const m2 = Monty.load(data) t.is(m2.run({ inputs: { x: 41 } }), 42) }) test('monty dump load preserves script name', (t) => { const m = new Monty('1', { scriptName: 'custom.py' }) const data = m.dump() const m2 = Monty.load(data) t.is(m2.scriptName, 'custom.py') }) test('monty dump load preserves inputs', (t) => { const m = new Monty('x + y', { inputs: ['x', 'y'] }) const data = m.dump() const m2 = Monty.load(data) t.deepEqual(m2.inputs, ['x', 'y']) t.is(m2.run({ inputs: { x: 1, y: 2 } }), 3) }) test('monty dump load preserves code execution', (t) => { const m = new Monty('func()') const data = m.dump() const m2 = Monty.load(data) const progress = m2.start() t.true(progress instanceof MontySnapshot) t.is((progress as MontySnapshot).functionName, 'func') }) test('monty dump produces same result on multiple calls', (t) => { const m = new Monty('1 + 2') const bytes1 = m.dump() const bytes2 = m.dump() t.deepEqual(bytes1, bytes2) }) test('monty dump load various outputs', (t) => { const testCases: Array<[string, unknown]> = [ ['1 + 1', 2], ['"hello"', 'hello'], ['[1, 2, 3]', [1, 2, 3]], ['True', true], ['None', null], ] for (const [code, expected] of testCases) { const m = new Monty(code) const data = m.dump() const m2 = Monty.load(data) t.deepEqual(m2.run(), expected) } }) // ============================================================================= // MontySnapshot dump/load tests // ============================================================================= test('snapshot dump load roundtrip', (t) => { const m = new Monty('func(1, 2)') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, [1, 2]) t.deepEqual(snapshot.kwargs, {}) const data = snapshot.dump() t.true(data instanceof Buffer) t.true(data.length > 0) const snapshot2 = MontySnapshot.load(data) t.is(snapshot2.functionName, 'func') t.deepEqual(snapshot2.args, [1, 2]) t.deepEqual(snapshot2.kwargs, {}) const result = snapshot2.resume({ returnValue: 100 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 100) }) test('snapshot dump load preserves script name', (t) => { const m = new Monty('func()', { scriptName: 'test.py' }) const progress = m.start() t.true(progress instanceof MontySnapshot) const data = (progress as MontySnapshot).dump() const progress2 = MontySnapshot.load(data) t.is(progress2.scriptName, 'test.py') }) test('snapshot dump load with kwargs', (t) => { const m = new Monty('func(a=1, b="hello")') const progress = m.start() t.true(progress instanceof MontySnapshot) const data = (progress as MontySnapshot).dump() const progress2 = MontySnapshot.load(data) t.is(progress2.functionName, 'func') t.deepEqual(progress2.args, []) t.deepEqual(progress2.kwargs, { a: 1, b: 'hello' }) }) test('snapshot dump after resume fails', (t) => { const m = new Monty('func()') const snapshot = m.start() as MontySnapshot snapshot.resume({ returnValue: 1 }) const error = t.throws(() => snapshot.dump()) t.true(error?.message.includes('already been resumed')) }) test('snapshot dump load multiple calls', (t) => { const m = new Monty('a() + b()') // First call: a() let progress: MontySnapshot | MontyNameLookup | MontyComplete = m.start() t.true(progress instanceof MontySnapshot) let snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'a') // Dump and load the state const data = snapshot.dump() snapshot = MontySnapshot.load(data) // Resume with first return value — triggers b() progress = snapshot.resume({ returnValue: 10 }) t.true(progress instanceof MontySnapshot) let snapshot2 = progress as MontySnapshot t.is(snapshot2.functionName, 'b') // Dump and load again const data2 = snapshot2.dump() snapshot2 = MontySnapshot.load(data2) // Resume with second return value const result = snapshot2.resume({ returnValue: 5 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 15) }) test('snapshot dump load with limits', (t) => { const m = new Monty('func()') const limits: ResourceLimits = { maxAllocations: 1000 } const progress = m.start({ limits }) t.true(progress instanceof MontySnapshot) const data = (progress as MontySnapshot).dump() const progress2 = MontySnapshot.load(data) const result = progress2.resume({ returnValue: 99 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 99) }) // ============================================================================= // MontyNameLookup dump/load tests // ============================================================================= test('name lookup dump load roundtrip', (t) => { const m = new Monty('x = foo; x') const lookup = m.start() t.true(lookup instanceof MontyNameLookup) const data = (lookup as MontyNameLookup).dump() t.true(data instanceof Buffer) t.true(data.length > 0) const lookup2 = MontyNameLookup.load(data) t.is(lookup2.variableName, 'foo') t.is(lookup2.scriptName, 'main.py') const result = lookup2.resume({ value: 42 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 42) }) test('name lookup dump after resume fails', (t) => { const m = new Monty('x = foo; x') const lookup = m.start() as MontyNameLookup lookup.resume({ value: 42 }) const error = t.throws(() => lookup.dump()) t.true(error?.message.includes('already been resumed')) }) ================================================ FILE: crates/monty-js/__test__/start.spec.ts ================================================ import test from 'ava' import { Monty, MontySnapshot, MontyNameLookup, MontyComplete, MontyRuntimeError, type ResourceLimits, type ResumeOptions, } from '../wrapper' // ============================================================================= // start() returns MontyComplete tests // ============================================================================= test('start no external functions returns complete', (t) => { const m = new Monty('1 + 2') const result = m.start() t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 3) }) test('start returns complete for various types', (t) => { const testCases: Array<[string, unknown]> = [ ['1', 1], ['"hello"', 'hello'], ['[1, 2, 3]', [1, 2, 3]], ['None', null], ['True', true], ] for (const [code, expected] of testCases) { const m = new Monty(code) const result = m.start() t.true(result instanceof MontyComplete) t.deepEqual((result as MontyComplete).output, expected) } }) // ============================================================================= // start() returns MontySnapshot tests (callable names go through FunctionCall) // ============================================================================= test('start with external function returns progress', (t) => { const m = new Monty('func()') const result = m.start() t.true(result instanceof MontySnapshot) const snapshot = result as MontySnapshot t.is(snapshot.scriptName, 'main.py') t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, []) t.deepEqual(snapshot.kwargs, {}) }) test('start custom script name', (t) => { const m = new Monty('func()', { scriptName: 'custom.py' }) const result = m.start() t.true(result instanceof MontySnapshot) t.is((result as MontySnapshot).scriptName, 'custom.py') }) test('start progress with args', (t) => { const m = new Monty('func(1, 2, 3)') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, [1, 2, 3]) t.deepEqual(snapshot.kwargs, {}) }) test('start progress with kwargs', (t) => { const m = new Monty('func(a=1, b="two")') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, []) t.deepEqual(snapshot.kwargs, { a: 1, b: 'two' }) }) test('start progress with mixed args kwargs', (t) => { const m = new Monty('func(1, 2, x="hello", y=True)') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, [1, 2]) t.deepEqual(snapshot.kwargs, { x: 'hello', y: true }) }) // ============================================================================= // start() returns MontyNameLookup tests (non-callable name resolution) // ============================================================================= test('start with unknown name returns name lookup', (t) => { const m = new Monty('x = foo; x') const result = m.start() t.true(result instanceof MontyNameLookup) const lookup = result as MontyNameLookup t.is(lookup.scriptName, 'main.py') t.is(lookup.variableName, 'foo') }) test('name lookup resume with value completes', (t) => { const m = new Monty('x = foo; x') const result = m.start() t.true(result instanceof MontyNameLookup) const lookup = result as MontyNameLookup t.is(lookup.variableName, 'foo') const complete = lookup.resume({ value: 42 }) t.true(complete instanceof MontyComplete) t.is((complete as MontyComplete).output, 42) }) test('name lookup resume without value raises NameError', (t) => { const m = new Monty('x = foo; x') const result = m.start() t.true(result instanceof MontyNameLookup) const lookup = result as MontyNameLookup const error = t.throws(() => lookup.resume(), { instanceOf: MontyRuntimeError, }) t.true(error.message.includes('NameError')) t.true(error.message.includes('foo')) }) test('name lookup custom script name', (t) => { const m = new Monty('x = foo; x', { scriptName: 'custom.py' }) const result = m.start() t.true(result instanceof MontyNameLookup) t.is((result as MontyNameLookup).scriptName, 'custom.py') }) test('name lookup resume cannot be called twice', (t) => { const m = new Monty('x = foo; x') const lookup = m.start() as MontyNameLookup // First resume succeeds lookup.resume({ value: 42 }) // Second resume should fail const error = t.throws(() => lookup.resume({ value: 99 })) t.true(error?.message.includes('already')) }) test('name lookup resolves to function, then function call yields snapshot', (t) => { // Assign an external function to x via name lookup, then call x() const m = new Monty('x = foobar; x()') const lookup = m.start() t.true(lookup instanceof MontyNameLookup) t.is((lookup as MontyNameLookup).variableName, 'foobar') // Provide a function — JS functions convert to MontyObject::Function function notFoobar(): unknown { return 42 } const snapshot = (lookup as MontyNameLookup).resume({ value: notFoobar }) t.true(snapshot instanceof MontySnapshot) // Function name comes from the JS function's name, not the variable t.is((snapshot as MontySnapshot).functionName, 'notFoobar') const result = (snapshot as MontySnapshot).resume({ returnValue: 99 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 99) }) // ============================================================================= // resume() tests // ============================================================================= test('progress resume returns complete', (t) => { const m = new Monty('func()') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'func') t.deepEqual(snapshot.args, []) t.deepEqual(snapshot.kwargs, {}) const result = snapshot.resume({ returnValue: 42 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 42) }) test('resume with none', (t) => { const m = new Monty('func()') const snapshot = m.start() as MontySnapshot const result = snapshot.resume({ returnValue: null }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, null) }) test('resume complex return value', (t) => { const m = new Monty('func()') const snapshot = m.start() as MontySnapshot const complexValue = { a: [1, 2, 3], b: { nested: true } } const result = snapshot.resume({ returnValue: complexValue }) t.true(result instanceof MontyComplete) // JS objects become Maps in Python (and come back as Maps) const output = (result as MontyComplete).output as Map t.true(output instanceof Map) t.deepEqual(output.get('a'), [1, 2, 3]) const nestedMap = output.get('b') as Map t.true(nestedMap instanceof Map) t.is(nestedMap.get('nested'), true) }) // ============================================================================= // Multiple external function calls tests // ============================================================================= test('multiple external calls', (t) => { const m = new Monty('a() + b()') // First call let progress: MontySnapshot | MontyNameLookup | MontyComplete = m.start() t.true(progress instanceof MontySnapshot) t.is((progress as MontySnapshot).functionName, 'a') // Resume with first return value progress = (progress as MontySnapshot).resume({ returnValue: 10 }) t.true(progress instanceof MontySnapshot) t.is((progress as MontySnapshot).functionName, 'b') // Resume with second return value const result = (progress as MontySnapshot).resume({ returnValue: 5 }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 15) }) test('chain of external calls', (t) => { const m = new Monty('c() + c() + c()') let callCount = 0 let progress: MontySnapshot | MontyNameLookup | MontyComplete = m.start() while (progress instanceof MontySnapshot) { t.is(progress.functionName, 'c') callCount += 1 progress = progress.resume({ returnValue: callCount }) } t.true(progress instanceof MontyComplete) t.is((progress as MontyComplete).output, 6) // 1 + 2 + 3 t.is(callCount, 3) }) // ============================================================================= // start() with options tests // ============================================================================= test('start with inputs', (t) => { const m = new Monty('process(x)', { inputs: ['x'] }) const progress = m.start({ inputs: { x: 100 } }) t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot t.is(snapshot.functionName, 'process') t.deepEqual(snapshot.args, [100]) }) test('start with limits', (t) => { const m = new Monty('1 + 2') const limits: ResourceLimits = { maxAllocations: 1000 } const result = m.start({ limits }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, 3) }) // ============================================================================= // resume() cannot be called twice tests // ============================================================================= test('resume cannot be called twice', (t) => { const m = new Monty('func()') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot // First resume succeeds snapshot.resume({ returnValue: 1 }) // Second resume should fail const error = t.throws(() => snapshot.resume({ returnValue: 2 })) t.true(error?.message.includes('already')) }) // ============================================================================= // resume() with exception tests // ============================================================================= test('resume with exception caught', (t) => { const code = ` try: result = external_func() except ValueError: caught = True caught ` const m = new Monty(code) const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot // Resume with an exception using keyword argument const result = snapshot.resume({ exception: { type: 'ValueError', message: 'test error' } }) t.true(result instanceof MontyComplete) t.is((result as MontyComplete).output, true) }) test('resume exception propagates uncaught', (t) => { const m = new Monty('external_func()') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot // Resume with an exception that won't be caught - wrapped in MontyRuntimeError const error = t.throws(() => snapshot.resume({ exception: { type: 'ValueError', message: 'uncaught error' } }), { instanceOf: MontyRuntimeError, }) t.true(error.message.includes('ValueError')) t.true(error.message.includes('uncaught error')) }) test('resume exception in nested try', (t) => { const code = ` outer_caught = False finally_ran = False try: try: external_func() except TypeError: pass # Won't catch ValueError finally: finally_ran = True except ValueError: outer_caught = True (outer_caught, finally_ran) ` const m = new Monty(code) const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot const result = snapshot.resume({ exception: { type: 'ValueError', message: 'propagates to outer' } }) t.true(result instanceof MontyComplete) const output = (result as MontyComplete).output t.true(Array.isArray(output)) t.is(output[0], true) // outer_caught t.is(output[1], true) // finally_ran }) // ============================================================================= // Invalid resume args tests // ============================================================================= test('invalid resume args', (t) => { const m = new Monty('func()') const progress = m.start() t.true(progress instanceof MontySnapshot) const snapshot = progress as MontySnapshot // Neither provided const error = t.throws(() => snapshot.resume({} as ResumeOptions)) t.true(error?.message.includes('returnValue or exception')) }) // ============================================================================= // Monty instance reuse tests // ============================================================================= test('start can reuse monty instance', (t) => { const m = new Monty('func(x)', { inputs: ['x'] }) // First run const progress1 = m.start({ inputs: { x: 1 } }) t.true(progress1 instanceof MontySnapshot) t.deepEqual((progress1 as MontySnapshot).args, [1]) const result1 = (progress1 as MontySnapshot).resume({ returnValue: 10 }) t.true(result1 instanceof MontyComplete) t.is((result1 as MontyComplete).output, 10) // Second run with different input const progress2 = m.start({ inputs: { x: 2 } }) t.true(progress2 instanceof MontySnapshot) t.deepEqual((progress2 as MontySnapshot).args, [2]) const result2 = (progress2 as MontySnapshot).resume({ returnValue: 20 }) t.true(result2 instanceof MontyComplete) t.is((result2 as MontyComplete).output, 20) }) // ============================================================================= // OS call handling in start() tests // ============================================================================= test('os.environ via start() throws NotImplementedError instead of panicking', (t) => { const m = new Monty('import os\nx = os.environ') const error = t.throws(() => m.start(), { instanceOf: MontyRuntimeError }) t.is(error.exception.typeName, 'NotImplementedError') t.is(error.exception.message, "OS function 'os.environ' not implemented") }) test('os.getenv via start() throws NotImplementedError instead of panicking', (t) => { const m = new Monty("import os\nx = os.getenv('HOME')") const error = t.throws(() => m.start(), { instanceOf: MontyRuntimeError }) t.is(error.exception.typeName, 'NotImplementedError') t.is(error.exception.message, "OS function 'os.getenv' not implemented") }) // ============================================================================= // repr() tests // ============================================================================= test('name lookup repr', (t) => { const m = new Monty('x = foo; x') const progress = m.start() t.true(progress instanceof MontyNameLookup) const repr = (progress as MontyNameLookup).repr() t.true(repr.includes('MontyNameLookup')) t.true(repr.includes('foo')) }) test('progress repr', (t) => { const m = new Monty('func(1, x=2)') const progress = m.start() t.true(progress instanceof MontySnapshot) const repr = (progress as MontySnapshot).repr() t.true(repr.includes('MontySnapshot')) t.true(repr.includes('func')) }) test('complete repr', (t) => { const m = new Monty('42') const result = m.start() t.true(result instanceof MontyComplete) const repr = (result as MontyComplete).repr() t.true(repr.includes('MontyComplete')) }) ================================================ FILE: crates/monty-js/__test__/type_check.spec.ts ================================================ import test from 'ava' import { Monty, MontyTypingError } from '../wrapper' // ============================================================================= // typeCheck() tests // ============================================================================= test('type check no errors', (t) => { const m = new Monty('x = 1') t.notThrows(() => m.typeCheck()) }) test('type check with errors', (t) => { const m = new Monty('"hello" + 1') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) t.true(error.message.includes('unsupported-operator')) }) test('type check function return type', (t) => { const code = ` def foo() -> int: return "not an int" ` const m = new Monty(code) const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) t.true(error.message.includes('invalid-return-type')) }) test('type check undefined variable', (t) => { const m = new Monty('print(undefined_var)') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) t.true(error.message.includes('unresolved-reference')) }) test('type check valid function', (t) => { const code = ` def add(a: int, b: int) -> int: return a + b add(1, 2) ` const m = new Monty(code) t.notThrows(() => m.typeCheck()) }) test('type check with prefix code', (t) => { const m = new Monty('result = x + 1') // Without prefix, x is undefined t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) // With prefix declaring x as a variable, it should pass t.notThrows(() => m.typeCheck('x = 0')) }) // ============================================================================= // Constructor type_check parameter tests // ============================================================================= test('constructor type check default false', (t) => { // This should NOT raise during construction (typeCheck=false is default) const m = new Monty('"hello" + 1') // But we can still call typeCheck() manually later t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) }) test('constructor type check explicit true', (t) => { t.throws(() => new Monty('"hello" + 1', { typeCheck: true }), { instanceOf: MontyTypingError }) }) test('constructor type check explicit false', (t) => { // This should NOT raise during construction const m = new Monty('"hello" + 1', { typeCheck: false }) // But we can still call typeCheck() manually later t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) }) test('constructor default allows run with inputs', (t) => { // Code with undefined variable - type checking would fail const m = new Monty('x + 1', { inputs: ['x'] }) // But runtime works fine with the input provided const result = m.run({ inputs: { x: 5 } }) t.is(result, 6) }) test('constructor type check prefix code', (t) => { // Without prefix, this would fail type checking (x is undefined) // Use assignment to define x, not just type annotation t.notThrows(() => new Monty('result = x + 1', { typeCheck: true, typeCheckPrefixCode: 'x = 0' })) }) test('constructor type check prefix code with external function', (t) => { // Define fetch as a function that takes a string and returns a string const prefix = ` def fetch(url: str) -> str: return '' ` t.notThrows( () => new Monty('result = fetch("https://example.com")', { typeCheck: true, typeCheckPrefixCode: prefix, }), ) }) test('constructor type check prefix code invalid', (t) => { // Prefix defines x as str, but code tries to use it with int addition t.throws( () => new Monty('result: int = x + 1', { typeCheck: true, typeCheckPrefixCode: 'x = "hello"', }), { instanceOf: MontyTypingError }, ) }) // ============================================================================= // MontyTypingError tests // ============================================================================= test('monty typing error is monty error subclass', (t) => { const m = new Monty('"hello" + 1') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) t.true(error instanceof Error) }) test('monty typing error displayDiagnostics', (t) => { const m = new Monty('"hello" + 1') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) // displayDiagnostics() returns rich diagnostics, display('msg') returns the raw message t.is(error.message, `TypeError: ${error.display('msg')}`) }) test('monty typing error displayDiagnostics concise format', (t) => { const m = new Monty('"hello" + 1') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) const concise = error.displayDiagnostics('concise') t.true(concise.includes('error[unsupported-operator]')) }) test('monty typing error inherits base display formats', (t) => { const m = new Monty('"hello" + 1') const error = t.throws(() => m.typeCheck(), { instanceOf: MontyTypingError }) t.is(error.display('msg'), error.exception.message) t.true(error.display('type-msg').startsWith('TypeError:')) }) ================================================ FILE: crates/monty-js/__test__/types.spec.ts ================================================ import test from 'ava' import { Monty } from '../wrapper' import { Buffer } from 'node:buffer' // ============================================================================= // None tests // ============================================================================= test('none input', (t) => { const m = new Monty('x is None', { inputs: ['x'] }) t.is(m.run({ inputs: { x: null } }), true) }) test('none output', (t) => { const m = new Monty('None') t.is(m.run(), null) }) // ============================================================================= // Bool tests // ============================================================================= test('bool true', (t) => { const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: true } }) t.is(result, true) }) test('bool false', (t) => { const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: false } }) t.is(result, false) }) // ============================================================================= // Number tests // ============================================================================= test('int', (t) => { const m = new Monty('x', { inputs: ['x'] }) t.is(m.run({ inputs: { x: 42 } }), 42) t.is(m.run({ inputs: { x: -100 } }), -100) t.is(m.run({ inputs: { x: 0 } }), 0) }) test('float', (t) => { const m = new Monty('x', { inputs: ['x'] }) t.is(m.run({ inputs: { x: 3.14 } }), 3.14) t.is(m.run({ inputs: { x: -2.5 } }), -2.5) t.is(m.run({ inputs: { x: 0.0 } }), 0.0) }) // ============================================================================= // String tests // ============================================================================= test('string', (t) => { const m = new Monty('x', { inputs: ['x'] }) t.is(m.run({ inputs: { x: 'hello' } }), 'hello') t.is(m.run({ inputs: { x: '' } }), '') t.is(m.run({ inputs: { x: 'unicode: éè' } }), 'unicode: éè') }) // ============================================================================= // Bytes tests // ============================================================================= test('bytes', (t) => { const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: Buffer.from('hello') } }) t.true(Buffer.isBuffer(result)) t.deepEqual([...result], [104, 101, 108, 108, 111]) }) test('bytes empty', (t) => { const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: Buffer.from([]) } }) t.true(Buffer.isBuffer(result)) t.deepEqual([...result], []) }) test('bytes result', (t) => { const m = new Monty('b"hello"') const result = m.run() t.true(Buffer.isBuffer(result)) t.deepEqual([...result], [104, 101, 108, 108, 111]) }) // ============================================================================= // List tests // ============================================================================= test('list', (t) => { const m = new Monty('x', { inputs: ['x'] }) t.deepEqual(m.run({ inputs: { x: [1, 2, 3] } }), [1, 2, 3]) t.deepEqual(m.run({ inputs: { x: [] } }), []) t.deepEqual(m.run({ inputs: { x: ['a', 'b'] } }), ['a', 'b']) }) test('list output', (t) => { const m = new Monty('[1, 2, 3]') t.deepEqual(m.run(), [1, 2, 3]) }) // ============================================================================= // Tuple tests // ============================================================================= test('tuple', (t) => { const m = new Monty('(1, 2, 3)') const result = m.run() // Tuples are returned as arrays with a __tuple__ marker property t.true(Array.isArray(result)) t.deepEqual([...result], [1, 2, 3]) t.is(result.__tuple__, true) }) test('tuple empty', (t) => { const m = new Monty('()') const result = m.run() t.true(Array.isArray(result)) t.deepEqual([...result], []) t.is(result.__tuple__, true) }) // ============================================================================= // Dict tests // ============================================================================= test('dict', (t) => { const m = new Monty('{"a": 1, "b": 2}') const result = m.run() // Dicts are returned as native JS Map (preserves key types and insertion order) t.true(result instanceof Map) t.is(result.get('a'), 1) t.is(result.get('b'), 2) t.is(result.size, 2) }) test('dict empty', (t) => { const m = new Monty('{}') const result = m.run() t.true(result instanceof Map) t.is(result.size, 0) }) // ============================================================================= // Set tests // ============================================================================= test('set', (t) => { const m = new Monty('{1, 2, 3}') const result = m.run() t.deepEqual(result, new Set([1, 2, 3])) }) test('set empty', (t) => { const m = new Monty('set()') const result = m.run() t.deepEqual(result, new Set()) }) // ============================================================================= // Frozenset tests // ============================================================================= test('frozenset', (t) => { const m = new Monty('frozenset([1, 2, 3])') const result = m.run() // FrozenSet is returned as a native JS Set (no frozen equivalent in JS) t.true(result instanceof Set) t.deepEqual(result, new Set([1, 2, 3])) }) test('frozenset empty', (t) => { const m = new Monty('frozenset()') const result = m.run() t.deepEqual(result, new Set()) }) // ============================================================================= // Ellipsis tests // ============================================================================= test('ellipsis input', (t) => { // In JS we represent ellipsis as an object with __monty_type__: 'Ellipsis' const m = new Monty('x is ...', { inputs: ['x'] }) t.is(m.run({ inputs: { x: { __monty_type__: 'Ellipsis' } } }), true) }) test('ellipsis output', (t) => { const m = new Monty('...') const result = m.run() t.deepEqual(result, { __monty_type__: 'Ellipsis' }) }) // ============================================================================= // Nested collection tests // ============================================================================= test('nested list', (t) => { const m = new Monty('x', { inputs: ['x'] }) const nested = [ [1, 2], [3, [4, 5]], ] t.deepEqual(m.run({ inputs: { x: nested } }), [ [1, 2], [3, [4, 5]], ]) }) test('nested dict', (t) => { const m = new Monty('{"list": [1, 2], "nested": {"a": 1}}') const result = m.run() // Dicts are returned as native JS Map t.true(result instanceof Map) t.deepEqual(result.get('list'), [1, 2]) const nested = result.get('nested') t.true(nested instanceof Map) t.is(nested.get('a'), 1) }) test('mixed nested', (t) => { const m = new Monty('{"list": [1, 2], "tuple": (3, 4), "nested": {"set": {5, 6}}}') const result = m.run() t.true(result instanceof Map) t.deepEqual(result.get('list'), [1, 2]) const tuple = result.get('tuple') t.true(Array.isArray(tuple)) t.is(tuple.__tuple__, true) t.deepEqual([...tuple], [3, 4]) const nested = result.get('nested') t.true(nested instanceof Map) t.true(nested.get('set') instanceof Set) }) test('nested set in list', (t) => { const m = new Monty('[{1, 2}, {3, 4}]') const result = m.run() t.true(Array.isArray(result)) t.is(result.length, 2) t.true(result[0] instanceof Set) t.true(result[1] instanceof Set) t.deepEqual(result[0], new Set([1, 2])) t.deepEqual(result[1], new Set([3, 4])) }) test('nested bytes in dict', (t) => { const m = new Monty('{"data": b"abc"}') const result = m.run() t.true(result instanceof Map) const data = result.get('data') t.true(Buffer.isBuffer(data)) t.deepEqual([...data], [97, 98, 99]) }) test('tuple containing set', (t) => { const m = new Monty('({1, 2}, "hello")') const result = m.run() t.true(Array.isArray(result)) t.is(result.__tuple__, true) t.true(result[0] instanceof Set) t.deepEqual(result[0], new Set([1, 2])) t.is(result[1], 'hello') }) // ============================================================================= // BigInt tests // ============================================================================= test('bigint input', (t) => { const big = 2n ** 100n const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: big } }) t.is(result, big) }) test('bigint output', (t) => { const m = new Monty('2**100') const result = m.run() t.is(result, 2n ** 100n) }) test('bigint negative input', (t) => { const bigNeg = -(2n ** 100n) const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: bigNeg } }) t.is(result, bigNeg) }) test('int overflow to bigint', (t) => { const maxI64 = 9223372036854775807n const m = new Monty('x + 1', { inputs: ['x'] }) const result = m.run({ inputs: { x: maxI64 } }) t.is(result, maxI64 + 1n) }) test('bigint arithmetic', (t) => { const big = 2n ** 100n const m = new Monty('x * 2 + y', { inputs: ['x', 'y'] }) const result = m.run({ inputs: { x: big, y: big } }) t.is(result, big * 2n + big) }) test('bigint comparison', (t) => { const big = 2n ** 100n const m = new Monty('x > y', { inputs: ['x', 'y'] }) t.is(m.run({ inputs: { x: big, y: 42 } }), true) t.is(m.run({ inputs: { x: 42, y: big } }), false) }) test('bigint in collection', (t) => { const big = 2n ** 100n const m = new Monty('x', { inputs: ['x'] }) const result = m.run({ inputs: { x: [big, 42, big * 2n] } }) t.deepEqual(result, [big, 42, big * 2n]) }) ================================================ FILE: crates/monty-js/build.rs ================================================ use std::{env, fs, path::Path, process::Command}; /// Build script that sets up napi bindings and syncs the package.json version /// with the Cargo workspace version. /// /// Cargo sets `CARGO_PKG_VERSION` in the environment when executing build scripts, /// so we use that as the single source of truth. If package.json has a different /// version, we update it in place. fn main() { // Re-run when package.json changes so we can re-check the version. println!("cargo:rerun-if-changed=package.json"); sync_package_json_version(); napi_build::setup(); } /// Read the Cargo package version and update package.json if the version differs. /// /// Uses the runtime `CARGO_PKG_VERSION` env var (not `env!()`) so that the build /// script picks up version changes without needing to be recompiled. fn sync_package_json_version() { let cargo_version = env::var("CARGO_PKG_VERSION").expect("CARGO_PKG_VERSION not set"); let package_json_path = Path::new("package.json"); let contents = fs::read_to_string(package_json_path).expect("failed to read package.json"); // Replace the top-level "version" field. We match lines starting with // ` "version":` which is the standard prettier-formatted location. let expected = format!(" \"version\": \"{cargo_version}\","); let mut result = String::with_capacity(contents.len()); let mut changed = false; for line in contents.lines() { // Only match the top-level "version" field (exactly 2-space indent), // not nested ones like scripts.version (4-space indent). if !changed && line.starts_with(" \"version\"") { // version unchanged, exit early if line == expected { return; } result.push_str(&expected); changed = true; } else { result.push_str(line); } result.push('\n'); } if !changed { return; } eprintln!("Updating package.json version to {cargo_version}"); fs::write(package_json_path, &result).expect("failed to write package.json"); // Sync package-lock.json to match the updated version. let status = Command::new("npm") .args(["install", "--package-lock-only"]) .status() .expect("failed to run npm"); assert!(status.success(), "npm install --package-lock-only failed"); } ================================================ FILE: crates/monty-js/index-header.d.ts ================================================ // index-header.d.ts - header will be written into index.d.ts on build type JsMontyObject = any ================================================ FILE: crates/monty-js/package.json ================================================ { "name": "@pydantic/monty", "version": "0.0.8", "type": "module", "description": "Sandboxed Python interpreter for JavaScript/TypeScript", "main": "wrapper.js", "types": "wrapper.d.ts", "exports": { ".": { "types": "./wrapper.d.ts", "default": "./wrapper.js" } }, "repository": { "type": "git", "url": "https://github.com/pydantic/monty" }, "license": "MIT", "browser": "browser.js", "files": [ "index.d.ts", "index.js", "wrapper.js", "wrapper.d.ts", "browser.js" ], "napi": { "binaryName": "monty", "targets": [ "x86_64-pc-windows-msvc", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "wasm32-wasip1-threads" ], "dtsHeaderFile": "./index-header.d.ts" }, "engines": { "node": ">= 6.14.2 < 7 || >= 8.11.2 < 9 || >= 9.11.0 < 10 || >= 10.0.0" }, "publishConfig": { "registry": "https://registry.npmjs.org/", "access": "public" }, "scripts": { "artifacts": "napi artifacts", "bench": "node --import @oxc-node/core/register benchmark/bench.ts", "build": "run-s build:napi build:ts", "build:debug": "run-s build:napi:debug build:ts", "build:napi": "napi build --platform --release --esm", "build:napi:debug": "napi build --platform --esm", "build:ts": "tsc", "create-npm-dirs": "napi create-npm-dirs", "format": "run-p format:prettier format:rs format:toml", "format:prettier": "prettier . -w", "format:toml": "taplo format", "format:rs": "cargo fmt", "lint": "oxlint .", "prepublishOnly": "napi prepublish -t npm", "test": "ava", "smoke-test": "bash scripts/smoke-test.sh", "preversion": "napi build --platform && git add .", "version": "napi version" }, "devDependencies": { "@emnapi/core": "^1.5.0", "@emnapi/runtime": "^1.5.0", "@napi-rs/cli": "^3.2.0", "@oxc-node/core": "^0.0.35", "@taplo/cli": "^0.7.0", "@tybys/wasm-util": "^0.10.0", "@types/node": "^25.0.9", "ava": "^6.4.1", "chalk": "^5.6.2", "npm-run-all2": "^8.0.4", "oxlint": "^1.14.0", "prettier": "^3.6.2", "tinybench": "^6.0.0", "typescript": "^5.9.2" }, "ava": { "extensions": { "ts": "module" }, "timeout": "2m", "workerThreads": false, "nodeArguments": [ "--import", "@oxc-node/core/register" ] }, "prettier": { "printWidth": 120, "semi": false, "trailingComma": "all", "singleQuote": true, "arrowParens": "always" } } ================================================ FILE: crates/monty-js/scripts/smoke-test.sh ================================================ #!/bin/bash set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" cd "$ROOT_DIR" echo "=== Building package ===" npm run build # Detect current platform NODE_FILE=$(ls monty.*.node 2>/dev/null | head -1) if [ -z "$NODE_FILE" ]; then echo "Error: No .node file found after build" exit 1 fi # Extract platform from filename (e.g., monty.darwin-arm64.node -> darwin-arm64) PLATFORM=$(echo "$NODE_FILE" | sed 's/monty\.\(.*\)\.node/\1/') echo "Detected platform: $PLATFORM" echo "=== Setting up platform packages ===" npm run create-npm-dirs # Copy binary to platform package directory (simulates napi artifacts) PLATFORM_DIR="npm/$PLATFORM" if [ ! -d "$PLATFORM_DIR" ]; then echo "Error: Platform directory $PLATFORM_DIR not found" exit 1 fi cp "$NODE_FILE" "$PLATFORM_DIR/" # Add optionalDependencies to main package.json (without publishing) npx napi prepublish -t npm --skip-optional-publish echo "=== Creating platform package tgz ===" cd "$PLATFORM_DIR" PLATFORM_TGZ=$(npm pack 2>/dev/null) mv "$PLATFORM_TGZ" "$ROOT_DIR/" cd "$ROOT_DIR" echo "Created: $PLATFORM_TGZ" echo "=== Creating main package tgz ===" MAIN_TGZ=$(npm pack 2>/dev/null) echo "Created: $MAIN_TGZ" echo "=== Installing in smoke-test ===" cd "$ROOT_DIR/smoke-test" rm -rf node_modules package-lock.json # Install platform package first, then main package npm install "../$PLATFORM_TGZ" --force npm install "../$MAIN_TGZ" --force echo "=== Type checking ===" npm run type-check echo "=== Running smoke tests ===" npm test echo "=== Cleaning up ===" cd "$ROOT_DIR" rm -f "$MAIN_TGZ" "$PLATFORM_TGZ" rm -rf npm/ # Remove optionalDependencies added by napi prepublish (keeps other package.json changes) npm pkg delete optionalDependencies 2>/dev/null || true echo "=== Smoke test passed! ===" ================================================ FILE: crates/monty-js/smoke-test/.gitignore ================================================ node_modules/ *.tgz package-lock.json ================================================ FILE: crates/monty-js/smoke-test/package.json ================================================ { "name": "monty-smoke-test", "version": "1.0.0", "type": "module", "private": true, "scripts": { "type-check": "tsc --noEmit", "test": "node --experimental-strip-types test.ts" }, "devDependencies": { "typescript": "^5.9.2" }, "dependencies": { "@pydantic/monty": "file:../pydantic-monty-1.0.0.tgz", "@pydantic/monty-darwin-arm64": "file:../pydantic-monty-darwin-arm64-1.0.0.tgz" } } ================================================ FILE: crates/monty-js/smoke-test/test.ts ================================================ import { Monty, MontySyntaxError, MontyRuntimeError, MontySnapshot, MontyComplete } from '@pydantic/monty' let passed = 0 let failed = 0 function assert(condition: boolean, message: string): void { if (!condition) { console.error(`FAIL: ${message}`) failed++ } else { console.log(`PASS: ${message}`) passed++ } } function assertThrows(fn: () => void, errorClass: new (...args: never[]) => T, message: string): void { try { fn() console.error(`FAIL: ${message} - no error thrown`) failed++ } catch (e) { if (e instanceof errorClass) { console.log(`PASS: ${message}`) passed++ } else { console.error(`FAIL: ${message} - wrong error type: ${(e as Error).constructor.name}`) failed++ } } } console.log('=== Basic Execution ===') const m1 = new Monty('1 + 2') assert(m1.run() === 3, 'basic arithmetic') const m2 = new Monty('10 * 5 - 3') assert(m2.run() === 47, 'complex arithmetic') const m3 = new Monty('"hello" + " " + "world"') assert(m3.run() === 'hello world', 'string concatenation') console.log('\n=== Constructor Options ===') const m4 = new Monty('x + y', { inputs: ['x', 'y'] }) assert(m4.inputs.length === 2, 'inputs array populated') assert(m4.inputs[0] === 'x', 'first input correct') // External functions are no longer declared in the constructor - they are resolved at runtime via start/resume const m6 = new Monty('1', { scriptName: 'custom.py' }) assert(m6.scriptName === 'custom.py', 'custom script name') console.log('\n=== Inputs ===') const m7 = new Monty('x * 2', { inputs: ['x'] }) assert(m7.run({ inputs: { x: 5 } }) === 10, 'single input') assert(m7.run({ inputs: { x: -3 } }) === -6, 'negative input') const m8 = new Monty('a + b + c', { inputs: ['a', 'b', 'c'] }) assert(m8.run({ inputs: { a: 1, b: 2, c: 3 } }) === 6, 'multiple inputs') console.log('\n=== Error Handling ===') assertThrows(() => new Monty('def'), MontySyntaxError, 'syntax error throws MontySyntaxError') assertThrows(() => new Monty('1/0').run(), MontyRuntimeError, 'division by zero throws MontyRuntimeError') assertThrows( () => new Monty('raise ValueError("test")').run(), MontyRuntimeError, 'raise statement throws MontyRuntimeError', ) console.log('\n=== Error Properties ===') try { new Monty('raise ValueError("custom message")').run() } catch (e) { if (e instanceof MontyRuntimeError) { assert(e.exception.typeName === 'ValueError', 'exception typeName correct') assert(e.exception.message === 'custom message', 'exception message correct') assert(e.display('msg') === 'custom message', 'display msg format') assert(e.display('type-msg') === 'ValueError: custom message', 'display type-msg format') const frames = e.traceback() assert(Array.isArray(frames), 'traceback returns array') } } console.log('\n=== External Functions (start/resume) ===') const m9 = new Monty('foo(42)') const result9 = m9.start() assert(result9 instanceof MontySnapshot, 'start returns MontySnapshot') if (!(result9 instanceof MontySnapshot)) throw new Error('Expected MontySnapshot') assert(result9.functionName === 'foo', 'snapshot has correct function name') assert(result9.args[0] === 42, 'snapshot has correct args') assert(Object.keys(result9.kwargs).length === 0, 'snapshot has empty kwargs') const complete1 = result9.resume({ returnValue: 'result' }) assert(complete1 instanceof MontyComplete, 'resume returns MontyComplete') if (!(complete1 instanceof MontyComplete)) throw new Error('Expected MontyComplete') assert(complete1.output === 'result', 'complete has correct output') console.log('\n=== External Functions with kwargs ===') const m10 = new Monty('bar(1, 2, x=3, y=4)') const result10 = m10.start() if (!(result10 instanceof MontySnapshot)) throw new Error('Expected MontySnapshot') assert(result10.args[0] === 1, 'positional arg 1') assert(result10.args[1] === 2, 'positional arg 2') assert(result10.kwargs['x'] === 3, 'kwarg x') assert(result10.kwargs['y'] === 4, 'kwarg y') result10.resume({ returnValue: null }) console.log('\n=== Multiple External Calls ===') const m11 = new Monty('a = get_a()\nb = get_b()\na + b') let state: MontySnapshot | MontyComplete = m11.start() assert(state instanceof MontySnapshot, 'first call returns snapshot') assert((state as MontySnapshot).functionName === 'get_a', 'first function is get_a') state = (state as MontySnapshot).resume({ returnValue: 10 }) assert(state instanceof MontySnapshot, 'second call returns snapshot') assert((state as MontySnapshot).functionName === 'get_b', 'second function is get_b') state = (state as MontySnapshot).resume({ returnValue: 20 }) assert(state instanceof MontyComplete, 'final state is complete') assert((state as MontyComplete).output === 30, 'result is sum of external returns') console.log('\n=== Serialization ===') const m12 = new Monty('x + 1', { inputs: ['x'] }) const dumped = m12.dump() assert(dumped instanceof Buffer, 'dump returns Buffer') assert(dumped.length > 0, 'dump is not empty') const loaded = Monty.load(dumped) assert(loaded.run({ inputs: { x: 10 } }) === 11, 'loaded instance works') console.log('\n=== Snapshot Serialization ===') const m13 = new Monty('ext(x) + 1', { inputs: ['x'] }) const snap = m13.start({ inputs: { x: 5 } }) as MontySnapshot const snapDumped = snap.dump() assert(snapDumped instanceof Buffer, 'snapshot dump returns Buffer') const snapLoaded = MontySnapshot.load(snapDumped) assert(snapLoaded.functionName === 'ext', 'loaded snapshot has function name') assert(snapLoaded.args[0] === 5, 'loaded snapshot has args') const finalResult = snapLoaded.resume({ returnValue: 100 }) as MontyComplete assert(finalResult.output === 101, 'resumed loaded snapshot works') console.log('\n=== repr() ===') const m14 = new Monty('1 + 1') const repr = m14.repr() assert(typeof repr === 'string', 'repr returns string') assert(repr.includes('Monty'), 'repr contains Monty') console.log('\n=== Summary ===') console.log(`Passed: ${passed}`) console.log(`Failed: ${failed}`) if (failed > 0) { process.exit(1) } console.log('\nAll smoke tests passed!') ================================================ FILE: crates/monty-js/smoke-test/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2022", "module": "NodeNext", "moduleResolution": "NodeNext", "strict": true, "noEmit": true, "skipLibCheck": true }, "include": ["test.ts"] } ================================================ FILE: crates/monty-js/src/convert.rs ================================================ //! Type conversion between Monty's `MontyObject` and JavaScript values via napi-rs. //! //! This module provides bidirectional conversion using native napi-rs APIs: //! - `monty_to_js`: Convert Monty's `MontyObject` to a JavaScript value //! - `js_to_monty`: Convert a JavaScript value to Monty's `MontyObject` //! //! ## Type Mappings //! //! ### Native JS types (bidirectional): //! - `MontyObject::None` ↔ `null` //! - `MontyObject::Bool` ↔ `boolean` //! - `MontyObject::Int` ↔ `number` (if within safe integer range) or `BigInt` //! - `MontyObject::BigInt` ↔ `BigInt` //! - `MontyObject::Float` ↔ `number` (including `NaN`, `Infinity`, `-Infinity`) //! - `MontyObject::String` ↔ `string` //! - `MontyObject::Bytes` ↔ `Buffer` (Node.js) //! - `MontyObject::List` ↔ `Array` //! - `MontyObject::Dict` ↔ `Map` (preserves key types and insertion order) //! - `MontyObject::Set` ↔ `Set` //! - `MontyObject::FrozenSet` ↔ `Set` (JS has no frozen set) //! //! ### Marked JS types (with `__monty_type__` property): //! - `MontyObject::Ellipsis` → `{ __monty_type__: 'Ellipsis' }` //! - `MontyObject::Tuple` → `Array` with `__tuple__: true` //! - `MontyObject::Exception` → `{ __monty_type__: 'Exception', excType, message }` //! - `MontyObject::Type` → `{ __monty_type__: 'Type', value }` //! - `MontyObject::BuiltinFunction` → `{ __monty_type__: 'BuiltinFunction', value }` //! - `MontyObject::Dataclass` → `{ __monty_type__: 'Dataclass', name, fields, ... }` //! - `MontyObject::Repr` → plain `string` //! - `MontyObject::Cycle` → placeholder `string` use std::collections::HashMap; use monty::{DictPairs, ExcType, MontyObject}; use napi::bindgen_prelude::*; use num_bigint::BigInt as NumBigInt; /// JavaScript safe integer range: -(2^53) to 2^53. const JS_SAFE_INT_MIN: i64 = -(1_i64 << 53); const JS_SAFE_INT_MAX: i64 = 1_i64 << 53; /// Wrapper for returning an unknown JS value from napi functions. /// /// This allows `monty_to_js` to return dynamically typed JS values. pub struct JsMontyObject<'env>(pub(crate) Unknown<'env>); impl JsMontyObject<'_> { /// Returns the raw napi value for use in low-level operations. pub fn raw(&self) -> sys::napi_value { self.0.raw() } } impl ToNapiValue for JsMontyObject<'_> { unsafe fn to_napi_value(env: sys::napi_env, val: Self) -> Result { Unknown::to_napi_value(env, val.0) } } /// Converts Monty's `MontyObject` to a JavaScript value using native napi-rs APIs. /// /// This function creates native JS types where possible: /// - Numbers use JS `number` or `BigInt` depending on size /// - Dicts use native JS `Map` (preserves key types and insertion order) /// - Sets use native JS `Set` /// - Bytes use Node.js `Buffer` /// - Tuples use arrays with a `__tuple__` marker property /// /// Types that don't have direct JS equivalents get marker properties to preserve /// type information for round-tripping. pub fn monty_to_js<'e>(obj: &MontyObject, env: &'e Env) -> Result> { let unknown = match obj { MontyObject::None => create_js_null(env)?, MontyObject::Ellipsis => create_js_ellipsis(env)?, MontyObject::Bool(b) => create_js_bool(*b, env)?, MontyObject::Int(i) => create_js_int(*i, env)?, MontyObject::BigInt(bi) => create_js_bigint(bi, env)?, MontyObject::Float(f) => env.create_double(*f)?.into_unknown(env)?, MontyObject::String(s) => env.create_string(s)?.into_unknown(env)?, MontyObject::Bytes(bytes) => create_js_buffer(bytes, env)?, MontyObject::List(items) => create_js_array(items, env)?.into_unknown(env)?, MontyObject::Tuple(items) => create_js_tuple(items, env)?, // NamedTuple is converted to a tuple (loses named access in JS) MontyObject::NamedTuple { values, .. } => create_js_tuple(values, env)?, MontyObject::Dict(pairs) => create_js_map(pairs, env)?, MontyObject::Set(items) | MontyObject::FrozenSet(items) => create_js_set(items, env)?, MontyObject::Exception { exc_type, arg } => create_js_exception(*exc_type, arg.as_deref(), env)?, MontyObject::Type(t) => create_js_type_marker(&t.to_string(), env)?, MontyObject::BuiltinFunction(f) => create_js_builtin_function_marker(&f.to_string(), env)?, MontyObject::Dataclass { name, type_id, field_names, attrs, frozen, } => create_js_dataclass(name, *type_id, field_names, attrs, *frozen, env)?, MontyObject::Path(p) => env.create_string(p)?.into_unknown(env)?, MontyObject::Repr(s) | MontyObject::Cycle(_, s) => env.create_string(s)?.into_unknown(env)?, // Function objects are internal to the name lookup protocol and should not normally // appear as final output values. If they do, represent as a string with the function name. MontyObject::Function { name, .. } => env.create_string(name)?.into_unknown(env)?, }; Ok(JsMontyObject(unknown)) } /// Creates a JS null value. fn create_js_null(env: &Env) -> Result> { // Use raw napi to create null let mut result = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and result is valid on success unsafe { let status = sys::napi_get_null(env.raw(), &raw mut result); if status != sys::Status::napi_ok { return Err(Error::from_reason("Failed to create null")); } Ok(Unknown::from_raw_unchecked(env.raw(), result)) } } /// Creates a JS boolean value. fn create_js_bool(b: bool, env: &Env) -> Result> { let mut result = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and result is valid on success unsafe { let status = sys::napi_get_boolean(env.raw(), b, &raw mut result); if status != sys::Status::napi_ok { return Err(Error::from_reason("Failed to create boolean")); } Ok(Unknown::from_raw_unchecked(env.raw(), result)) } } /// Creates a JS number or BigInt depending on whether the value fits in JS safe integer range. fn create_js_int(i: i64, env: &Env) -> Result> { if (JS_SAFE_INT_MIN..=JS_SAFE_INT_MAX).contains(&i) { env.create_int64(i)?.into_unknown(env) } else { // Use BigInt for large integers BigInt::from(i).into_unknown(env) } } /// Creates a native JS BigInt from an arbitrary-precision integer. /// /// For integers that fit in i64, uses direct creation. For larger integers, /// calls the global `BigInt()` constructor with the decimal string representation. fn create_js_bigint<'e>(bi: &NumBigInt, env: &'e Env) -> Result> { // Try to fit in i64 first for efficiency if let Ok(i) = i64::try_from(bi) { return BigInt::from(i).into_unknown(env); } // For larger integers, call global BigInt(string) let global = env.get_global()?; let bigint_constructor: Function = global.get_named_property("BigInt")?; let result = bigint_constructor.call(bi.to_string())?; result.into_unknown(env) } /// Creates a Node.js Buffer from bytes. fn create_js_buffer<'e>(bytes: &[u8], env: &'e Env) -> Result> { let buffer = BufferSlice::from_data(env, bytes.to_vec())?; buffer.into_unknown(env) } /// Creates a native JS Array from Monty list items, recursively converting each element. fn create_js_array<'e>(items: &[MontyObject], env: &'e Env) -> Result> { let mut arr = env.create_array(items.len().try_into().expect("array size overflows u32"))?; for (i, item) in items.iter().enumerate() { let js_item = monty_to_js(item, env)?; arr.set(i.try_into().expect("overflow on array index"), js_item)?; } Ok(arr) } /// Creates a tuple representation as a JS array with a `__tuple__` marker property. /// /// This allows distinguishing tuples from lists in JavaScript while still allowing /// array-like access to tuple elements. fn create_js_tuple<'e>(items: &[MontyObject], env: &'e Env) -> Result> { let mut arr = create_js_array(items, env)?; arr.set_named_property("__tuple__", true)?; arr.into_unknown(env) } /// Creates a native JS `Map` from Monty dict pairs, recursively converting keys and values. /// /// Using `Map` instead of plain objects preserves: /// - Non-string key types (numbers, booleans, etc.) /// - Insertion order /// - Proper equality semantics for keys fn create_js_map<'e>(pairs: &DictPairs, env: &'e Env) -> Result> { let global = env.get_global()?; let map_constructor: Function<()> = global.get_named_property("Map")?; let map: Object<'e> = map_constructor.new_instance(())?.coerce_to_object()?; let set_method: Unknown = map.get_named_property("set")?; for (k, v) in pairs { let js_key = monty_to_js(k, env)?; let js_value = monty_to_js(v, env)?; // Call map.set(key, value) using raw napi to pass two separate arguments call_method_2_args(env.raw(), map.raw(), set_method.raw(), js_key.0.raw(), js_value.0.raw())?; } map.into_unknown(env) } /// Calls a JS method with 2 arguments using raw napi. /// /// This is needed because napi-rs's `Function::apply` with tuple args doesn't work correctly /// for methods expecting two separate arguments. fn call_method_2_args( env: sys::napi_env, this: sys::napi_value, method: sys::napi_value, arg1: sys::napi_value, arg2: sys::napi_value, ) -> Result<()> { let args = [arg1, arg2]; let mut result = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and result is valid on success unsafe { let status = sys::napi_call_function(env, this, method, 2, args.as_ptr(), &raw mut result); if status != sys::Status::napi_ok { return Err(Error::from_reason("Failed to call method")); } } Ok(()) } /// Creates a native JS Set from Monty set items. fn create_js_set<'e>(items: &[MontyObject], env: &'e Env) -> Result> { let global = env.get_global()?; let set_constructor: Function<()> = global.get_named_property("Set")?; let set: Object<'e> = set_constructor.new_instance(())?.coerce_to_object()?; let add_method: Function = set.get_named_property("add")?; for item in items { let js_item = monty_to_js(item, env)?; add_method.apply(set, js_item.0)?; } set.into_unknown(env) } /// Creates a JS object representing Ellipsis: `{ __monty_type__: 'Ellipsis' }`. fn create_js_ellipsis(env: &Env) -> Result> { let mut obj = Object::new(env)?; obj.set_named_property("__monty_type__", "Ellipsis")?; obj.into_unknown(env) } /// Creates a JS object representing an exception. fn create_js_exception<'e>(exc_type: ExcType, arg: Option<&str>, env: &'e Env) -> Result> { let mut obj = Object::new(env)?; obj.set_named_property("__monty_type__", "Exception")?; obj.set_named_property("excType", exc_type.to_string())?; obj.set_named_property("message", arg.unwrap_or(""))?; obj.into_unknown(env) } /// Creates a JS object representing a Type: `{ __monty_type__: 'Type', value: '...' }`. fn create_js_type_marker<'e>(type_str: &str, env: &'e Env) -> Result> { let mut obj = Object::new(env)?; obj.set_named_property("__monty_type__", "Type")?; obj.set_named_property("value", type_str)?; obj.into_unknown(env) } /// Creates a JS object representing a builtin function. fn create_js_builtin_function_marker<'e>(func_str: &str, env: &'e Env) -> Result> { let mut obj = Object::new(env)?; obj.set_named_property("__monty_type__", "BuiltinFunction")?; obj.set_named_property("value", func_str)?; obj.into_unknown(env) } /// Creates a JS object representing a dataclass instance. fn create_js_dataclass<'e>( name: &str, type_id: u64, field_names: &[String], attrs: &DictPairs, frozen: bool, env: &'e Env, ) -> Result> { let mut obj = Object::new(env)?; obj.set_named_property("__monty_type__", "Dataclass")?; obj.set_named_property("name", name)?; // type_id as BigInt since it may exceed JS safe integer range let type_id_bigint = BigInt::from(type_id); obj.set_named_property("typeId", type_id_bigint)?; // field_names as array let mut field_names_arr = env.create_array(field_names.len().try_into().expect("field_names size overflows u32"))?; for (i, field_name) in field_names.iter().enumerate() { field_names_arr.set( i.try_into().expect("overflow on field_names index"), env.create_string(field_name)?, )?; } obj.set_named_property("fieldNames", field_names_arr)?; // Build attrs as a nested object mapping field names to values let attrs_map: HashMap<&str, &MontyObject> = attrs .into_iter() .filter_map(|(k, v)| { if let MontyObject::String(key) = k { Some((key.as_str(), v)) } else { None } }) .collect(); let mut fields_obj = Object::new(env)?; for field_name in field_names { if let Some(value) = attrs_map.get(field_name.as_str()) { let js_value = monty_to_js(value, env)?; fields_obj.set_named_property(field_name.as_str(), js_value)?; } } obj.set_named_property("fields", fields_obj)?; obj.set_named_property("frozen", frozen)?; obj.into_unknown(env) } // ============================================================================= // JS to Monty conversion // ============================================================================= /// Converts a JavaScript value to Monty's `MontyObject`. /// /// This function handles native JS types and marked objects: /// - `null` → `None` /// - `boolean` → `Bool` /// - `number` → `Int` (if integer) or `Float` /// - `bigint` → `Int` (if fits in i64) or `BigInt` /// - `string` → `String` /// - `Buffer`/`Uint8Array` → `Bytes` /// - `Array` with `__tuple__` → `Tuple` /// - `Array` → `List` /// - `Map` → `Dict` /// - `Set` → `Set` /// - `Object` with `__monty_type__` → corresponding Monty type /// - `Object` → `Dict` (string keys only) pub fn js_to_monty(value: Unknown<'_>, env: Env) -> Result { let value_type = value.get_type()?; match value_type { ValueType::Null | ValueType::Undefined => Ok(MontyObject::None), ValueType::Boolean => { let b: bool = value.coerce_to_bool()?; Ok(MontyObject::Bool(b)) } ValueType::Number => { let n: f64 = value.coerce_to_number()?.get_double()?; // Check if the number is actually an integer (no fractional part) // and fits within i64 range if n.fract() == 0.0 && n >= i64::MIN as f64 && n <= i64::MAX as f64 { #[expect( clippy::cast_possible_truncation, reason = "Checked above that n is integer and within i64 range" )] return Ok(MontyObject::Int(n as i64)); } Ok(MontyObject::Float(n)) } ValueType::BigInt => { let bigint: BigInt = BigInt::from_unknown(value)?; // BigInt has public fields: sign_bit (bool) and words (Vec) // Convert words (u64 array) to num-bigint::BigInt // Each word is a 64-bit limb, little-endian order if bigint.words.is_empty() { return Ok(MontyObject::Int(0)); } let mut bi = NumBigInt::from(0u64); for (i, &word) in bigint.words.iter().enumerate() { let limb = NumBigInt::from(word); bi += limb << (64 * i); } if bigint.sign_bit { bi = -bi; } // Try to fit in i64 if let Ok(i) = i64::try_from(&bi) { Ok(MontyObject::Int(i)) } else { Ok(MontyObject::BigInt(bi)) } } ValueType::String => { let s: String = value.coerce_to_string()?.into_utf8()?.into_owned()?; Ok(MontyObject::String(s)) } ValueType::Object => { let obj: Object = value.coerce_to_object()?; // Check if it's a Buffer (Uint8Array) if obj.is_buffer()? { let buffer: BufferSlice = BufferSlice::from_unknown(value)?; return Ok(MontyObject::Bytes(buffer.to_vec())); } // Check if it's a Map if is_js_map(&obj, env)? { return js_map_to_monty(obj, env); } // Check if it's a Set if is_js_set(&obj, env)? { return js_set_to_monty(obj, env); } // Check if it's an Array if obj.is_array()? { return js_array_to_monty(obj, env); } // Check for __monty_type__ marker if let Some(monty_type) = get_string_property(&obj, "__monty_type__")? { return js_marked_object_to_monty(&obj, &monty_type, env); } // Plain object → Dict (with string keys) js_object_to_monty_dict(obj, env) } ValueType::Function => { // JS functions are converted to MontyObject::Function for external function resolution. // The function's `name` property is used as the Monty function name. let func_obj: Object = value.coerce_to_object()?; let name: String = func_obj .get_named_property::("name") .unwrap_or_else(|_| "".to_string()); Ok(MontyObject::Function { name, docstring: None }) } ValueType::Symbol | ValueType::External => { // These JS types don't have Monty equivalents Err(Error::from_reason(format!( "Cannot convert JS {value_type:?} to Monty value" ))) } // Unknown is not a real JS type, it's a napi-rs placeholder ValueType::Unknown => Err(Error::from_reason("Unknown JS value type")), } } /// Checks if a JS object is an instance of Set. fn is_js_set(obj: &Object, env: Env) -> Result { let global = env.get_global()?; let set_constructor: Function<()> = global.get_named_property("Set")?; obj.instanceof(set_constructor) } /// Checks if a JS object is an instance of Map. fn is_js_map(obj: &Object, env: Env) -> Result { let global = env.get_global()?; let map_constructor: Function<()> = global.get_named_property("Map")?; obj.instanceof(map_constructor) } /// Converts a JS Map to `MontyObject::Dict`. fn js_map_to_monty(map: Object, env: Env) -> Result { // Get the entries iterator let entries_method: Function<()> = map.get_named_property("entries")?; let iterator: Object = entries_method.apply(map, ())?.coerce_to_object()?; let mut pairs = Vec::new(); loop { let next_method: Function<()> = iterator.get_named_property("next")?; let result: Object = next_method.apply(iterator, ())?.coerce_to_object()?; let done: bool = result.get_named_property::("done")?; if done { break; } // value is [key, value] array let entry: Object = result.get_named_property::("value")?.coerce_to_object()?; let key: Unknown = entry.get_element(0)?; let value: Unknown = entry.get_element(1)?; let monty_key = js_to_monty(key, env)?; let monty_value = js_to_monty(value, env)?; pairs.push((monty_key, monty_value)); } Ok(MontyObject::dict(pairs)) } /// Converts a JS Set to `MontyObject::Set`. fn js_set_to_monty(set: Object, env: Env) -> Result { // Get the values iterator let values_method: Function<()> = set.get_named_property("values")?; let iterator: Object = values_method.apply(set, ())?.coerce_to_object()?; let mut items = Vec::new(); loop { let next_method: Function<()> = iterator.get_named_property("next")?; let result: Object = next_method.apply(iterator, ())?.coerce_to_object()?; let done: bool = result.get_named_property::("done")?; if done { break; } let value: Unknown = result.get_named_property("value")?; items.push(js_to_monty(value, env)?); } Ok(MontyObject::Set(items)) } /// Converts a JS Array to `MontyObject::List` or `MontyObject::Tuple`. fn js_array_to_monty(arr: Object, env: Env) -> Result { let is_tuple: bool = arr.get_named_property::>("__tuple__")?.unwrap_or(false); let length: u32 = arr.get_named_property("length")?; let mut items = Vec::with_capacity(length as usize); for i in 0..length { let element: Unknown = arr.get_element(i)?; items.push(js_to_monty(element, env)?); } if is_tuple { Ok(MontyObject::Tuple(items)) } else { Ok(MontyObject::List(items)) } } /// Converts a JS object with `__monty_type__` marker to the appropriate `MontyObject`. fn js_marked_object_to_monty(obj: &Object, monty_type: &str, env: Env) -> Result { match monty_type { "Ellipsis" => Ok(MontyObject::Ellipsis), "Exception" => { let exc_type_str: String = obj.get_named_property("excType")?; let message: String = obj.get_named_property("message")?; let exc_type: ExcType = exc_type_str .parse() .map_err(|_| Error::from_reason(format!("Unknown exception type: {exc_type_str}")))?; let arg = if message.is_empty() { None } else { Some(message) }; Ok(MontyObject::Exception { exc_type, arg }) } "Type" => { // Type objects can't be fully round-tripped; return as Repr let value: String = obj.get_named_property("value")?; Ok(MontyObject::Repr(format!(""))) } "BuiltinFunction" => { // BuiltinFunction objects can't be fully round-tripped; return as Repr let value: String = obj.get_named_property("value")?; Ok(MontyObject::Repr(format!(""))) } "Dataclass" => { let name: String = obj.get_named_property("name")?; // type_id is BigInt - access its public fields let type_id_bigint: BigInt = obj.get_named_property("typeId")?; let type_id = if type_id_bigint.words.is_empty() { 0u64 } else if type_id_bigint.sign_bit { return Err(Error::from_reason("Dataclass typeId cannot be negative")); } else { type_id_bigint.words[0] }; // field_names let field_names_arr: Array = obj.get_named_property("fieldNames")?; let field_names_len = field_names_arr.len(); let mut field_names = Vec::with_capacity(field_names_len as usize); for i in 0..field_names_len { let name: String = field_names_arr.get::(i)?.unwrap_or_default(); field_names.push(name); } // fields object let fields_obj: Object = obj.get_named_property("fields")?; let mut attrs_vec = Vec::new(); for field_name in &field_names { if let Some(value) = fields_obj.get_named_property::>(field_name.as_str())? { let monty_value = js_to_monty(value, env)?; attrs_vec.push((MontyObject::String(field_name.clone()), monty_value)); } } let attrs = DictPairs::from(attrs_vec); let frozen: bool = obj.get_named_property("frozen")?; Ok(MontyObject::Dataclass { name, type_id, field_names, attrs, frozen, }) } _ => { // Unknown marker type, treat as dict js_object_to_monty_dict(*obj, env) } } } /// Converts a plain JS object to `MontyObject::Dict`. /// /// This is a fallback for plain objects (not Map instances). Since JS object keys /// are always strings, all keys in the resulting Dict will be strings. /// For full key type preservation, use JS `Map` instead. fn js_object_to_monty_dict(obj: Object, env: Env) -> Result { let keys = obj.get_property_names()?; // Get length by accessing the "length" property let length: u32 = keys.get_named_property("length")?; let mut pairs = Vec::with_capacity(length as usize); for i in 0..length { let key: Unknown = keys.get_element(i)?; let key_str: String = key.coerce_to_string()?.into_utf8()?.into_owned()?; let value: Unknown = obj.get_named_property(&key_str)?; let monty_value = js_to_monty(value, env)?; pairs.push((MontyObject::String(key_str), monty_value)); } Ok(MontyObject::dict(pairs)) } /// Helper to get an optional string property from a JS object. fn get_string_property(obj: &Object, name: &str) -> Result> { let has_property = obj.has_named_property(name)?; if !has_property { return Ok(None); } let value: Unknown = obj.get_named_property(name)?; if value.get_type()? == ValueType::String { let s: String = value.coerce_to_string()?.into_utf8()?.into_owned()?; Ok(Some(s)) } else { Ok(None) } } ================================================ FILE: crates/monty-js/src/exceptions.rs ================================================ //! Exception types for the Monty TypeScript/JavaScript bindings. //! //! This module provides thin napi wrappers around Monty's internal exceptions. //! The JavaScript wrapper layer (`wrapper.js`) is responsible for converting //! these into proper JS `Error` subclasses (`MontySyntaxError`, `MontyRuntimeError`). //! //! It is done this way because `napi` has no way to create JS `Error` subclasses from //! Rust. //! //! ## Architecture //! //! - `JsMontyException`: Thin wrapper around `monty::MontyException`. The JS wrapper //! checks `exception.typeName` to distinguish syntax errors from runtime errors. //! - `MontyTypingError`: Wraps `TypeCheckingDiagnostics` for static type checking errors. //! This is separate because type errors come from static analysis, not Python execution. use std::fmt; use monty::StackFrame; use monty_type_checking::TypeCheckingDiagnostics; use napi::bindgen_prelude::*; use napi_derive::napi; use serde::{Deserialize, Serialize}; // ============================================================================= // JsMontyException - Thin wrapper around core MontyException // ============================================================================= /// Wrapper around core `MontyException` for napi bindings. /// /// This is a thin newtype wrapper that exposes the necessary getters for the /// JavaScript wrapper to construct appropriate error types (`MontySyntaxError` /// or `MontyRuntimeError`) based on the exception type. #[napi(js_name = "MontyException")] pub struct JsMontyException(monty::MontyException); impl fmt::Display for JsMontyException { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) } } #[napi] impl JsMontyException { /// Returns information about the inner Python exception. /// /// The `typeName` field can be used to distinguish syntax errors (`"SyntaxError"`) /// from runtime errors (e.g., `"ValueError"`, `"TypeError"`). #[napi(getter)] #[must_use] pub fn exception(&self) -> ExceptionInfo { ExceptionInfo { type_name: self.0.exc_type().to_string(), message: self.0.message().unwrap_or_default().to_string(), } } /// Returns the error message. #[napi(getter)] #[must_use] pub fn message(&self) -> String { self.0.message().unwrap_or_default().to_string() } /// Returns the Monty traceback as an array of Frame objects. /// /// For syntax errors, this will be an empty array. /// For runtime errors, this contains the stack frames where the error occurred. #[napi] pub fn traceback(&self) -> Vec { self.0.traceback().iter().map(Frame::from_stack_frame).collect() } /// Returns formatted exception string. /// /// @param format - Output format: /// - 'traceback' - Full traceback (default) /// - 'type-msg' - 'ExceptionType: message' format /// - 'msg' - just the message #[napi] pub fn display(&self, format: Option) -> Result { let format = format.as_deref().unwrap_or("traceback"); match format { "traceback" => Ok(self.0.to_string()), "type-msg" => { let type_name = self.0.exc_type().to_string(); let message = self.0.message().unwrap_or_default(); if message.is_empty() { Ok(type_name) } else { Ok(format!("{type_name}: {message}")) } } "msg" => Ok(self.0.message().unwrap_or_default().to_string()), _ => Err(Error::from_reason(format!( "Invalid display format: '{format}'. Expected 'traceback', 'type-msg', or 'msg'" ))), } } /// Returns a string representation of the error. #[napi(js_name = "toString")] #[must_use] pub fn to_js_string(&self) -> String { self.to_string() } } impl JsMontyException { /// Creates a new JsMontyException from a core MontyException. #[must_use] pub fn new(exc: monty::MontyException) -> Self { Self(exc) } } // ============================================================================= // MontyTypingError - Raised when type checking finds errors // ============================================================================= /// Raised when type checking finds errors in the code. /// /// This exception is raised when static type analysis detects type errors. /// Use `display()` to render diagnostics in various formats. #[napi] pub struct MontyTypingError { /// The type checking failure containing diagnostic information. failure: TypeCheckingDiagnostics, /// Cached string representation. cached_string: String, } impl fmt::Display for MontyTypingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.cached_string) } } #[napi] impl MontyTypingError { /// Returns information about the inner exception. #[napi(getter)] #[must_use] pub fn exception(&self) -> ExceptionInfo { ExceptionInfo { type_name: "TypeError".to_string(), message: self.cached_string.clone(), } } /// Returns the error message. #[napi(getter)] #[must_use] pub fn message(&self) -> String { self.cached_string.clone() } /// Renders the type error diagnostics with the specified format and color. /// /// @param format - Output format. One of: /// - 'full' - Full diagnostic output (default) /// - 'concise' - Concise output /// - 'azure' - Azure DevOps format /// - 'json' - JSON format /// - 'jsonlines' - JSON Lines format /// - 'rdjson' - RDJson format /// - 'pylint' - Pylint format /// - 'gitlab' - GitLab CI format /// - 'github' - GitHub Actions format /// @param color - Whether to include ANSI color codes. Default: false #[napi] pub fn display(&self, format: Option, color: Option) -> Result { let format = format.as_deref().unwrap_or("full"); let color = color.unwrap_or(false); self.failure .clone() .color(color) .format_from_str(format) .map_err(Error::from_reason) .map(|f| f.to_string()) } /// Returns a string representation of the error. #[napi(js_name = "toString")] #[must_use] pub fn to_js_string(&self) -> String { self.to_string() } } impl MontyTypingError { /// Creates a MontyTypingError from a TypeCheckingDiagnostics. #[must_use] pub fn from_failure(failure: TypeCheckingDiagnostics) -> Self { let cached_string = failure.to_string(); Self { failure, cached_string } } } // ============================================================================= // Helper types // ============================================================================= /// Information about the inner Python exception. /// /// This provides structured access to the exception type and message /// for programmatic error handling. #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ExceptionInfo { /// The exception type name (e.g., "ValueError", "TypeError", "SyntaxError"). pub type_name: String, /// The exception message. pub message: String, } /// A single frame in a Monty traceback. /// /// Contains all the information needed to display a traceback line: /// the file location, function name, and optional source code preview. #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Frame { /// The filename where the code is located. pub filename: String, /// Line number (1-based). pub line: u32, /// Column number (1-based). pub column: u32, /// End line number (1-based). pub end_line: u32, /// End column number (1-based). pub end_column: u32, /// The name of the function, or null for module-level code. pub function_name: Option, /// The source code line for preview in the traceback. pub source_line: Option, } impl Frame { /// Creates a `Frame` from Monty's `StackFrame`. #[must_use] pub fn from_stack_frame(frame: &StackFrame) -> Self { Self { filename: frame.filename.clone(), line: u32::from(frame.start.line), column: u32::from(frame.start.column), end_line: u32::from(frame.end.line), end_column: u32::from(frame.end.column), function_name: frame.frame_name.clone(), source_line: frame.preview_line.clone(), } } } /// Converts a javascript error into a MontyException. pub fn exc_js_to_monty(js_err: napi::Error) -> ::monty::MontyException { let exc = js_err_to_exc_type(js_err.status); let arg = js_err.reason.clone(); ::monty::MontyException::new(exc, Some(arg)) } fn js_err_to_exc_type(exc: napi::Status) -> ::monty::ExcType { use ::monty::ExcType; match exc { napi::Status::Ok => ExcType::Exception, // Should never happen napi::Status::InvalidArg => ExcType::TypeError, napi::Status::ObjectExpected | napi::Status::StringExpected | napi::Status::NameExpected | napi::Status::FunctionExpected | napi::Status::NumberExpected | napi::Status::BooleanExpected | napi::Status::ArrayExpected | napi::Status::BigintExpected | napi::Status::DateExpected | napi::Status::ArrayBufferExpected | napi::Status::DetachableArraybufferExpected | napi::Status::HandleScopeMismatch | napi::Status::CallbackScopeMismatch => ExcType::ValueError, napi::Status::GenericFailure => ExcType::Exception, napi::Status::Cancelled => ExcType::KeyboardInterrupt, napi::Status::QueueFull | napi::Status::Closing | napi::Status::WouldDeadlock | napi::Status::NoExternalBuffersAllowed | napi::Status::PendingException | napi::Status::EscapeCalledTwice => ExcType::RuntimeError, napi::Status::Unknown => ExcType::Exception, } } ================================================ FILE: crates/monty-js/src/lib.rs ================================================ // napi macros generate code that triggers some clippy lints #![allow(clippy::needless_pass_by_value, clippy::trivially_copy_pass_by_ref)] //! Node.js/TypeScript bindings for the Monty sandboxed Python interpreter. //! //! This module provides a JavaScript/TypeScript interface to Monty via napi-rs, //! allowing execution of sandboxed Python code from Node.js with configurable //! inputs, resource limits, and external function callbacks. //! //! ## Quick Start //! //! ```typescript //! import { Monty } from 'monty'; //! //! // Simple execution //! const m = new Monty('1 + 2'); //! const result = m.run(); // returns 3 //! //! // With inputs //! const m2 = new Monty('x + y', { inputs: ['x', 'y'] }); //! const result2 = m2.run({ inputs: { x: 10, y: 20 } }); // returns 30 //! //! // Iterative execution with external functions //! const m3 = new Monty('external_func()'); //! let progress = m3.start(); //! if (progress instanceof MontySnapshot) { //! progress = progress.resume({ returnValue: 42 }); //! } //! ``` mod convert; mod exceptions; mod limits; mod monty_cls; pub use exceptions::{ExceptionInfo, Frame, JsMontyException, MontyTypingError}; pub use limits::JsResourceLimits; pub use monty_cls::{ ExceptionInput, Monty, MontyComplete, MontyNameLookup, MontyOptions, MontyRepl, MontySnapshot, NameLookupLoadOptions, NameLookupResumeOptions, ResumeOptions, RunOptions, SnapshotLoadOptions, StartOptions, }; ================================================ FILE: crates/monty-js/src/limits.rs ================================================ //! Resource limits handling for the Monty TypeScript/JavaScript bindings. //! //! Provides utilities to extract and apply resource limits from JavaScript objects, //! including time limits, memory limits, and recursion depth. use std::time::Duration; use monty::{ResourceLimits, DEFAULT_MAX_RECURSION_DEPTH}; use napi_derive::napi; /// Resource limits configuration from JavaScript. /// /// All limits are optional. Omit a key to disable that limit. #[napi(object, js_name = "ResourceLimits")] #[derive(Debug, Clone, Copy, Default)] pub struct JsResourceLimits { /// Maximum number of heap allocations allowed. pub max_allocations: Option, /// Maximum execution time in seconds. pub max_duration_secs: Option, /// Maximum heap memory in bytes. pub max_memory: Option, /// Run garbage collection every N allocations. pub gc_interval: Option, /// Maximum function call stack depth (default: 1000). pub max_recursion_depth: Option, } impl From for ResourceLimits { fn from(js_limits: JsResourceLimits) -> Self { let max_recursion_depth = js_limits .max_recursion_depth .map(|v| v as usize) .or(Some(DEFAULT_MAX_RECURSION_DEPTH)); let mut limits = Self::new().max_recursion_depth(max_recursion_depth); if let Some(max) = js_limits.max_allocations { limits = limits.max_allocations(max as usize); } if let Some(secs) = js_limits.max_duration_secs { limits = limits.max_duration(Duration::from_secs_f64(secs)); } if let Some(max) = js_limits.max_memory { limits = limits.max_memory(max as usize); } if let Some(interval) = js_limits.gc_interval { limits = limits.gc_interval(interval as usize); } limits } } ================================================ FILE: crates/monty-js/src/monty_cls.rs ================================================ //! The main `Monty` class and iterative execution support for the TypeScript/JavaScript bindings. //! //! Provides a sandboxed Python interpreter that can be configured with inputs //! and resource limits. External functions are provided at runtime via //! `RunOptions` or `StartOptions`. Supports both immediate execution //! via `run()` and iterative execution via `start()`/`resume()`. //! //! ## Quick Start //! //! ```typescript //! import { Monty } from 'monty'; //! //! // Simple execution //! const m = new Monty('1 + 2'); //! const result = m.run(); // returns 3 //! //! // With inputs //! const m2 = new Monty('x + y', { inputs: ['x', 'y'] }); //! const result2 = m2.run({ inputs: { x: 10, y: 20 } }); // returns 30 //! ``` //! //! ## Iterative Execution //! //! ```text //! Monty.start() -> MontySnapshot | MontyNameLookup | MontyComplete //! | | //! v v //! MontySnapshot.resume() / MontyNameLookup.resume() //! -> MontySnapshot | MontyNameLookup | MontyComplete //! | | //! v v //! (repeat until complete) //! ``` //! //! ```typescript //! const m = new Monty('result = external_func(1, 2)'); //! //! let progress = m.start(); //! while (progress instanceof MontySnapshot) { //! console.log(`Calling ${progress.functionName} with args:`, progress.args); //! progress = progress.resume({ returnValue: 42 }); //! } //! console.log('Final result:', progress.output); //! ``` use std::borrow::Cow; use monty::{ ExcType, ExtFunctionResult, FunctionCall, LimitedTracker, MontyException, MontyObject, MontyRepl as CoreMontyRepl, MontyRun, NameLookup, NameLookupResult, NoLimitTracker, OsCall, PrintWriter, PrintWriterCallback, ResourceTracker, RunProgress, }; use monty_type_checking::{type_check, SourceFile}; use napi::bindgen_prelude::*; use napi_derive::napi; use crate::{ convert::{js_to_monty, monty_to_js, JsMontyObject}, exceptions::{exc_js_to_monty, JsMontyException, MontyTypingError}, limits::JsResourceLimits, }; // ============================================================================= // Monty - Main interpreter class // ============================================================================= /// A sandboxed Python interpreter instance. /// /// Parses and compiles Python code on initialization, then can be run /// multiple times with different input values. This separates the parsing /// cost from execution, making repeated runs more efficient. #[napi] pub struct Monty { /// The compiled code runner, ready to execute. runner: MontyRun, /// The artificial name of the python code "file". script_name: String, /// Names of input variables expected by the code. input_names: Vec, } /// Options for creating a new Monty instance. #[napi(object)] #[derive(Default)] pub struct MontyOptions { /// Name used in tracebacks and error messages. Default: 'main.py' pub script_name: Option, /// List of input variable names available in the code. pub inputs: Option>, /// Whether to perform type checking on the code. Default: false pub type_check: Option, /// Optional code to prepend before type checking. pub type_check_prefix_code: Option, } /// Options for running code. #[napi(object)] #[derive(Default)] pub struct RunOptions<'env> { pub inputs: Option>, /// Resource limits configuration. pub limits: Option, /// Optional print callback function. pub print_callback: Option>, /// Dict of external function callbacks. /// Keys are function names, values are callable functions. pub external_functions: Option>, } /// Options for starting execution. #[napi(object)] #[derive(Default)] pub struct StartOptions<'env> { /// Dict of input variable values. pub inputs: Option>, /// Resource limits configuration. pub limits: Option, /// Optional print callback function. pub print_callback: Option>, } #[napi] impl Monty { /// Creates a new Monty interpreter by parsing the given code. /// /// Returns either a Monty instance, a MontyException (for syntax errors), or a MontyTypingError. /// The wrapper should check the result type and throw the appropriate error. /// /// @param code - Python code to execute /// @param options - Configuration options /// @returns Monty instance on success, or error object on failure #[napi] pub fn create( code: String, options: Option, ) -> Result> { let ResolvedMontyOptions { script_name, input_names, do_type_check, type_check_prefix_code, } = resolve_monty_options(options); // Perform type checking if requested if do_type_check { if let Some(error) = run_type_check_result(&code, &script_name, type_check_prefix_code.as_deref())? { return Ok(Either3::C(error)); } } // Create the runner (parses the code) let runner = match MontyRun::new(code, &script_name, input_names.clone()) { Ok(r) => r, Err(exc) => return Ok(Either3::B(JsMontyException::new(exc))), }; Ok(Either3::A(Self { runner, script_name, input_names, })) } /// Performs static type checking on the code. /// /// Returns either nothing (success) or a MontyTypingError. /// /// @param prefixCode - Optional code to prepend before type checking /// @returns null on success, or MontyTypingError on failure #[napi] pub fn type_check(&self, prefix_code: Option) -> Result> { run_type_check_result(self.runner.code(), &self.script_name, prefix_code.as_deref()) } /// Executes the code and returns the result, or an exception object if execution fails. /// /// If runtime `externalFunctions` are provided, the start/resume loop is used /// to dispatch external function calls and name lookups. Otherwise, code is /// executed directly. /// /// @param options - Execution options (inputs, limits, externalFunctions) /// @returns The result of the last expression, or a MontyException if execution fails #[napi] pub fn run<'env>( &self, env: &'env Env, options: Option>, ) -> Result, JsMontyException>> { let options = options.unwrap_or_default(); let input_values = self.extract_input_values(options.inputs, *env)?; let external_functions = options.external_functions; let mut print_cb; let print_writer = match &options.print_callback { Some(func) => { print_cb = CallbackStringPrint::new_js(env, func)?; PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; // If we have runtime external functions, use the start/resume loop // to handle both FunctionCall and NameLookup dispatching if external_functions.is_some() { return self.run_with_external_functions( env, input_values, options.limits, external_functions, print_writer, ); } let result = if let Some(limits) = options.limits { let tracker = LimitedTracker::new(limits.into()); self.runner.run(input_values, tracker, print_writer) } else { let tracker = NoLimitTracker; self.runner.run(input_values, tracker, print_writer) }; match result { Ok(value) => Ok(Either::A(monty_to_js(&value, env)?)), Err(exc) => Ok(Either::B(JsMontyException::new(exc))), } } /// Internal helper to run code with external function callbacks. /// /// Handles both `FunctionCall` and `NameLookup` dispatch in a loop. /// For `NameLookup`, checks the runtime external functions map: if the name /// is found, resolves it as a `Function`; otherwise returns `Undefined`. fn run_with_external_functions<'env>( &self, env: &'env Env, input_values: Vec, limits: Option, external_functions: Option>, mut print_output: PrintWriter<'_>, ) -> Result, JsMontyException>> { let runner = self.runner.clone(); // Helper macro to handle the execution loop for both tracker types macro_rules! run_loop { ($tracker:expr) => {{ let progress = runner.start(input_values, $tracker, print_output.reborrow()); let mut progress = match progress { Ok(p) => p, Err(exc) => return Ok(Either::B(JsMontyException::new(exc))), }; loop { match progress { RunProgress::Complete(result) => { return Ok(Either::A(monty_to_js(&result, env)?)); } RunProgress::FunctionCall(call) => { let return_value = call_external_function( env, external_functions.as_ref(), &call.function_name, &call.args, &call.kwargs, )?; progress = match call.resume(return_value, print_output.reborrow()) { Ok(p) => p, Err(exc) => return Ok(Either::B(JsMontyException::new(exc))), }; } RunProgress::NameLookup(lookup) => { let result = resolve_name_lookup(external_functions.as_ref(), &lookup.name)?; progress = match lookup.resume(result, print_output.reborrow()) { Ok(p) => p, Err(exc) => return Ok(Either::B(JsMontyException::new(exc))), }; } RunProgress::ResolveFutures(_) => { return Err(Error::from_reason( "Async futures are not supported in synchronous run(). Use start() for async execution.", )); } RunProgress::OsCall(OsCall { function, .. }) => { return Ok(Either::B(JsMontyException::new(MontyException::new( ExcType::NotImplementedError, Some(format!("OS function '{function}' not implemented")), )))); } } } }}; } if let Some(limits) = limits { let tracker = LimitedTracker::new(limits.into()); run_loop!(tracker) } else { run_loop!(NoLimitTracker) } } /// Starts execution and returns a snapshot (paused at external call or name lookup), /// completion, or error. /// /// This method enables iterative execution where code pauses at external function /// calls or name lookups, allowing the host to provide return values before resuming. /// /// @param options - Execution options (inputs, limits) /// @returns MontySnapshot if paused at function call, MontyNameLookup if paused at /// name lookup, MontyComplete if done, or MontyException if failed #[napi] pub fn start<'env>( &self, env: &'env Env, options: Option>, ) -> Result> { let options = options.unwrap_or_default(); let input_values = self.extract_input_values(options.inputs, *env)?; // Clone the runner since start() consumes it - allows reuse of the parsed code let runner = self.runner.clone(); // Build print writer and capture the callback ref for the snapshot let mut print_cb; let print_writer = match &options.print_callback { Some(func) => { print_cb = CallbackStringPrint::new_js(env, func)?; PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; let print_callback_ref = options.print_callback.as_ref().map(Function::create_ref).transpose()?; // Start execution with appropriate tracker if let Some(limits) = options.limits { let tracker = LimitedTracker::new(limits.into()); let progress = match runner.start(input_values, tracker, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback_ref, self.script_name())) } else { let tracker = NoLimitTracker; let progress = match runner.start(input_values, tracker, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback_ref, self.script_name())) } } /// Serializes the Monty instance to a binary format. /// /// The serialized data can be stored and later restored with `Monty.load()`. /// This allows caching parsed code to avoid re-parsing on subsequent runs. /// /// @returns Buffer containing the serialized Monty instance #[napi] pub fn dump(&self) -> Result { let serialized = SerializedMonty { runner: self.runner.clone(), script_name: self.script_name.clone(), input_names: self.input_names.clone(), }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| Error::from_reason(format!("Serialization failed: {e}")))?; Ok(Buffer::from(bytes)) } /// Deserializes a Monty instance from binary format. /// /// @param data - The serialized Monty data from `dump()` /// @returns A new Monty instance #[napi(factory)] pub fn load(data: Buffer) -> Result { let serialized: SerializedMonty = postcard::from_bytes(&data).map_err(|e| Error::from_reason(format!("Deserialization failed: {e}")))?; Ok(Self { runner: serialized.runner, script_name: serialized.script_name, input_names: serialized.input_names, }) } /// Returns the script name. #[napi(getter)] pub fn script_name(&self) -> String { self.script_name.clone() } /// Returns the input variable names. #[napi(getter)] pub fn inputs(&self) -> Vec { self.input_names.clone() } /// Returns a string representation of the Monty instance. #[napi] pub fn repr(&self) -> String { use std::fmt::Write; let lines = self.runner.code().lines().count(); let mut s = format!( "Monty(<{} line{} of code>, scriptName='{}'", lines, if lines == 1 { "" } else { "s" }, self.script_name ); if !self.input_names.is_empty() { write!(s, ", inputs={:?}", self.input_names).unwrap(); } s.push(')'); s } /// Extracts input values from the JS Object in the order they were declared. fn extract_input_values(&self, inputs: Option>, env: Env) -> Result> { extract_input_values_in_order(&self.input_names, inputs, env) } } /// Performs type checking on the code and returns the error object if there are type errors. /// /// Returns `None` if type checking passes, or `Some(MontyTypingError)` if there are errors. fn run_type_check_result(code: &str, script_name: &str, prefix_code: Option<&str>) -> Result> { let source_code: Cow = if let Some(prefix_code) = prefix_code { format!("{prefix_code}\n{code}").into() } else { code.into() }; let source_file = SourceFile::new(&source_code, script_name); let result = type_check(&source_file, None).map_err(|e| Error::from_reason(format!("Type checking failed: {e}")))?; Ok(result.map(MontyTypingError::from_failure)) } // ============================================================================= // MontyRepl - Incremental no-replay REPL session // ============================================================================= /// REPL state holder for napi interoperability. /// /// `napi` classes cannot be generic, so this enum stores REPL sessions for both /// resource tracker variants. #[derive(Debug, serde::Serialize, serde::Deserialize)] enum EitherRepl { NoLimit(CoreMontyRepl), Limited(CoreMontyRepl), } /// Options for creating a new `MontyRepl` instance. /// /// Controls the script name shown in tracebacks and optional resource limits /// that apply to all subsequent `feed()` calls. #[napi(object)] #[derive(Default)] pub struct MontyReplOptions { /// Name used in tracebacks and error messages. Default: 'main.py' pub script_name: Option, /// Resource limits configuration applied to all snippet executions. pub limits: Option, } /// Stateful no-replay REPL session. /// /// Create with `new MontyRepl()` then call `feed()` to execute snippets /// incrementally against persistent heap and namespace state. #[napi] pub struct MontyRepl { repl: EitherRepl, script_name: String, } #[napi] impl MontyRepl { /// Creates an empty REPL session ready to receive snippets via `feed()`. /// /// No code is parsed or executed at construction time — all execution /// is driven through `feed()`. /// /// @param options - Optional configuration (scriptName, limits) #[napi(constructor)] #[must_use] pub fn new(options: Option) -> Self { let options = options.unwrap_or_default(); let script_name = options.script_name.unwrap_or_else(|| "main.py".to_string()); let repl = if let Some(limits) = options.limits { let tracker = LimitedTracker::new(limits.into()); EitherRepl::Limited(CoreMontyRepl::new(&script_name, tracker)) } else { EitherRepl::NoLimit(CoreMontyRepl::new(&script_name, NoLimitTracker)) }; Self { repl, script_name } } /// Returns the script name for this REPL session. #[napi(getter)] #[must_use] pub fn script_name(&self) -> String { self.script_name.clone() } /// Executes one incremental snippet against persistent REPL state. #[napi] pub fn feed<'env>( &mut self, env: &'env Env, code: String, ) -> Result, JsMontyException>> { let output = match &mut self.repl { EitherRepl::NoLimit(repl) => repl.feed_run(&code, vec![], PrintWriter::Stdout), EitherRepl::Limited(repl) => repl.feed_run(&code, vec![], PrintWriter::Stdout), }; match output { Ok(value) => Ok(Either::A(monty_to_js(&value, env)?)), Err(exc) => Ok(Either::B(JsMontyException::new(exc))), } } /// Serializes this REPL session to bytes. #[napi] pub fn dump(&self) -> Result { let serialized = SerializedRepl { repl: &self.repl, script_name: &self.script_name, }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| Error::from_reason(format!("Serialization failed: {e}")))?; Ok(Buffer::from(bytes)) } /// Restores a REPL session from bytes produced by `dump()`. #[napi(factory)] pub fn load(data: Buffer) -> Result { let serialized: SerializedReplOwned = postcard::from_bytes(&data).map_err(|e| Error::from_reason(format!("Deserialization failed: {e}")))?; Ok(Self { repl: serialized.repl, script_name: serialized.script_name, }) } /// Returns a string representation of the REPL session. #[napi] #[must_use] pub fn repr(&self) -> String { format!("MontyRepl(scriptName='{}')", self.script_name) } } /// Fully resolved creation options shared by `Monty` and `MontyRepl`. /// /// This keeps parsing/type-checking defaults consistent across non-REPL and /// REPL entry points. struct ResolvedMontyOptions { script_name: String, input_names: Vec, do_type_check: bool, type_check_prefix_code: Option, } /// Normalizes optional JS-facing creation options into concrete defaults. fn resolve_monty_options(options: Option) -> ResolvedMontyOptions { let options = options.unwrap_or(MontyOptions { script_name: None, inputs: None, type_check: None, type_check_prefix_code: None, }); ResolvedMontyOptions { script_name: options.script_name.unwrap_or_else(|| "main.py".to_string()), input_names: options.inputs.unwrap_or_default(), do_type_check: options.type_check.unwrap_or(false), type_check_prefix_code: options.type_check_prefix_code, } } /// Extracts input values in declaration order from a JS object. /// /// This helper is shared by regular `Monty` execution and direct REPL creation /// so both paths perform identical input validation. fn extract_input_values_in_order( input_names: &[String], inputs: Option>, env: Env, ) -> Result> { if input_names.is_empty() { if inputs.is_some() { return Err(Error::from_reason( "No input variables declared but inputs object was provided", )); } return Ok(vec![]); } let Some(inputs) = inputs else { return Err(Error::from_reason(format!("Missing required inputs: {input_names:?}"))); }; input_names .iter() .map(|name| { if !inputs.has_named_property(name)? { return Err(Error::from_reason(format!("Missing required input: '{name}'"))); } let value: Unknown = inputs.get_named_property(name)?; js_to_monty(value, env) }) .collect() } // ============================================================================= // EitherSnapshot - Internal enum to handle generic resource tracker types // ============================================================================= /// Runtime execution snapshot, holds a `FunctionCall` for either resource tracker variant /// since napi structs can't be generic. /// /// Used internally by `MontySnapshot` to store execution state. /// The `Done` variant indicates the snapshot has been consumed. #[derive(Debug, serde::Serialize, serde::Deserialize)] enum EitherSnapshot { NoLimit(FunctionCall), Limited(FunctionCall), /// Sentinel indicating the snapshot has been consumed via `resume()`. Done, } // ============================================================================= // MontySnapshot - Paused execution at an external function call // ============================================================================= /// Represents paused execution waiting for an external function call return value. /// /// Contains information about the pending external function call and allows /// resuming execution with the return value or an exception. #[napi] pub struct MontySnapshot { /// The execution state that can be resumed. snapshot: EitherSnapshot, /// Name of the script being executed. script_name: String, /// The name of the external function being called. function_name: String, /// The positional arguments passed to the function (stored as MontyObject for serialization). args: Vec, /// The keyword arguments passed to the function (stored as MontyObject pairs for serialization). kwargs: Vec<(MontyObject, MontyObject)>, /// Optional print callback function. print_callback: Option, } /// Options for resuming execution. #[napi(object)] pub struct ResumeOptions<'env> { /// The value to return from the external function call. pub return_value: Option>, /// An exception to raise in the interpreter. /// Format: { type: string, message: string } pub exception: Option, } /// Input for raising an exception during resume. #[napi(object)] pub struct ExceptionInput { /// The exception type name (e.g., "ValueError"). pub r#type: String, /// The exception message. pub message: String, } /// Options for loading a serialized snapshot. #[napi(object)] pub struct SnapshotLoadOptions<'env> { /// Optional print callback function. pub print_callback: Option>, // Future: could add dataclass-like registry support } #[napi] impl MontySnapshot { /// Returns the name of the script being executed. #[napi(getter)] pub fn script_name(&self) -> String { self.script_name.clone() } /// Returns the name of the external function being called. #[napi(getter)] pub fn function_name(&self) -> String { self.function_name.clone() } /// Returns the positional arguments passed to the external function. #[napi(getter)] pub fn args<'env>(&self, env: &'env Env) -> Result>> { self.args.iter().map(|obj| monty_to_js(obj, env)).collect() } /// Returns the keyword arguments passed to the external function as an object. #[napi(getter)] pub fn kwargs<'env>(&self, env: &'env Env) -> Result> { let mut obj = Object::new(env)?; for (k, v) in &self.kwargs { // Keys should be strings let key = match k { MontyObject::String(s) => s.clone(), _ => format!("{k:?}"), }; let js_value = monty_to_js(v, env)?; obj.set_named_property(&key, js_value)?; } Ok(obj) } /// Resumes execution with either a return value or an exception. /// /// Exactly one of `returnValue` or `exception` must be provided. /// /// @param options - Object with either `returnValue` or `exception` /// @returns MontySnapshot if paused at function call, MontyNameLookup if paused at /// name lookup, MontyComplete if done, or MontyException if failed #[napi] pub fn resume<'env>( &mut self, env: &'env Env, options: ResumeOptions<'env>, ) -> Result> { // Validate that exactly one of returnValue or exception is provided let external_result = match (options.return_value, options.exception) { (Some(value), None) => { let monty_value = js_to_monty(value, *env)?; ExtFunctionResult::Return(monty_value) } (None, Some(exc)) => { let monty_exc = MontyException::new(string_to_exc_type(&exc.r#type)?, Some(exc.message)); ExtFunctionResult::Error(monty_exc) } (Some(_), Some(_)) => { return Err(Error::from_reason( "resume() accepts either returnValue or exception, not both", )); } (None, None) => { return Err(Error::from_reason("resume() requires either returnValue or exception")); } }; // Take the snapshot, replacing with Done let snapshot = std::mem::replace(&mut self.snapshot, EitherSnapshot::Done); // Take the print callback // This is necessary to move out of `&mut self` to please the borrow checker. // Unless the entire snapshot generator is refactored we have to do this. let print_callback = std::mem::take(&mut self.print_callback); // Build print writer from the callback ref let mut print_cb; let print_writer = match &print_callback { Some(func) => { print_cb = CallbackStringPrint::new_js_ref(env, func)?; PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; // Resume execution based on the snapshot type match snapshot { EitherSnapshot::NoLimit(call) => { let progress = match call.resume(external_result, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback, self.script_name.clone())) } EitherSnapshot::Limited(call) => { let progress = match call.resume(external_result, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback, self.script_name.clone())) } EitherSnapshot::Done => Err(Error::from_reason("Snapshot has already been resumed")), } } /// Serializes the MontySnapshot to a binary format. /// /// The serialized data can be stored and later restored with `MontySnapshot.load()`. /// This allows suspending execution and resuming later, potentially in a different process. /// /// @returns Buffer containing the serialized snapshot #[napi] pub fn dump(&self) -> Result { if matches!(self.snapshot, EitherSnapshot::Done) { return Err(Error::from_reason("Cannot dump snapshot that has already been resumed")); } let serialized = SerializedSnapshot { snapshot: &self.snapshot, script_name: &self.script_name, function_name: &self.function_name, args: &self.args, kwargs: &self.kwargs, }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| Error::from_reason(format!("Serialization failed: {e}")))?; Ok(Buffer::from(bytes)) } /// Deserializes a MontySnapshot from binary format. /// /// @param data - The serialized snapshot data from `dump()` /// @param options - Optional load options (reserved for future use) /// @returns A new MontySnapshot instance #[napi(factory)] pub fn load(data: Buffer, options: Option) -> Result { let serialized: SerializedSnapshotOwned = postcard::from_bytes(&data).map_err(|e| Error::from_reason(format!("Deserialization failed: {e}")))?; Ok(Self { snapshot: serialized.snapshot, script_name: serialized.script_name, function_name: serialized.function_name, args: serialized.args, kwargs: serialized.kwargs, print_callback: options .as_ref() .and_then(|t| t.print_callback.as_ref()) .map(Function::create_ref) .transpose()?, }) } /// Returns a string representation of the MontySnapshot. #[napi] pub fn repr(&self) -> String { format!( "MontySnapshot(scriptName='{}', functionName='{}', args={:?}, kwargs={:?})", self.script_name, self.function_name, self.args, self.kwargs ) } } // ============================================================================= // MontyComplete - Completed execution // ============================================================================= /// Represents completed execution with a final output value. /// /// The output value is stored as a `MontyObject` internally and converted to JS on access. #[napi] pub struct MontyComplete { /// The final output value from the executed code. output_value: MontyObject, } #[napi] impl MontyComplete { /// Returns the final output value from the executed code. #[napi(getter)] pub fn output<'env>(&self, env: &'env Env) -> Result> { monty_to_js(&self.output_value, env) } /// Returns a string representation of the MontyComplete. #[napi] #[must_use] pub fn repr(&self) -> String { format!("MontyComplete(output={:?})", self.output_value) } } // ============================================================================= // EitherLookupSnapshot - Internal enum for NameLookup tracker variants // ============================================================================= /// Runtime execution snapshot, holds a `NameLookup` for either resource tracker variant /// since napi structs can't be generic. /// /// The `Done` variant indicates the snapshot has been consumed. #[derive(Debug, serde::Serialize, serde::Deserialize)] enum EitherLookupSnapshot { NoLimit(NameLookup), Limited(NameLookup), /// Sentinel indicating the snapshot has been consumed via `resume()`. Done, } /// Trait to convert a typed `NameLookup` into `EitherLookupSnapshot`. trait FromLookupSnapshot { /// Wraps a name-lookup snapshot. fn from_lookup(lookup: NameLookup) -> Self; } impl FromLookupSnapshot for EitherLookupSnapshot { fn from_lookup(lookup: NameLookup) -> Self { Self::NoLimit(lookup) } } impl FromLookupSnapshot for EitherLookupSnapshot { fn from_lookup(lookup: NameLookup) -> Self { Self::Limited(lookup) } } // ============================================================================= // MontyNameLookup - Paused execution at a name lookup // ============================================================================= /// Represents paused execution waiting for a name to be resolved. /// /// The host should check if the variable name corresponds to a known value /// (e.g., an external function). Call `resume()` with the value to continue /// execution, or call `resume()` with no value to raise `NameError`. #[napi] pub struct MontyNameLookup { /// The execution state that can be resumed. snapshot: EitherLookupSnapshot, /// Name of the script being executed. script_name: String, /// The name of the variable being looked up. variable_name: String, /// Optional print callback function. print_callback: Option, } /// Options for resuming execution from a name lookup. /// /// If `value` is provided, the name resolves to that value and execution continues. /// If `value` is omitted or undefined, the VM raises a `NameError`. #[napi(object)] pub struct NameLookupResumeOptions<'env> { /// The value to provide for the name. pub value: Option>, } /// Options for loading a serialized name lookup snapshot. #[napi(object)] pub struct NameLookupLoadOptions<'env> { /// Optional print callback function. pub print_callback: Option>, } #[napi] impl MontyNameLookup { /// Returns the name of the script being executed. #[napi(getter)] pub fn script_name(&self) -> String { self.script_name.clone() } /// Returns the name of the variable being looked up. #[napi(getter)] pub fn variable_name(&self) -> String { self.variable_name.clone() } /// Resumes execution after resolving the name lookup. /// /// If `value` is provided, the name resolves to that value and execution continues. /// If `value` is omitted or undefined, the VM raises a `NameError`. /// /// @param options - Optional object with `value` to resolve the name to /// @returns MontySnapshot if paused at function call, MontyNameLookup if paused at /// another name lookup, MontyComplete if done, or MontyException if failed #[napi] pub fn resume<'env>( &mut self, env: &'env Env, options: Option>, ) -> Result> { let lookup_result = match options.and_then(|opts| opts.value) { Some(value) => { let monty_value = js_to_monty(value, *env)?; NameLookupResult::Value(monty_value) } None => NameLookupResult::Undefined, }; // Take the snapshot, replacing with Done let snapshot = std::mem::replace(&mut self.snapshot, EitherLookupSnapshot::Done); // Take the print callback let print_callback = std::mem::take(&mut self.print_callback); // Build print writer from the callback ref let mut print_cb; let print_writer = match &print_callback { Some(func) => { print_cb = CallbackStringPrint::new_js_ref(env, func)?; PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; match snapshot { EitherLookupSnapshot::NoLimit(lookup) => { let progress = match lookup.resume(lookup_result, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback, self.script_name.clone())) } EitherLookupSnapshot::Limited(lookup) => { let progress = match lookup.resume(lookup_result, print_writer) { Ok(p) => p, Err(exc) => return Ok(Either4::D(JsMontyException::new(exc))), }; Ok(progress_to_result(progress, print_callback, self.script_name.clone())) } EitherLookupSnapshot::Done => Err(Error::from_reason("Name lookup has already been resumed")), } } /// Serializes the MontyNameLookup to a binary format. /// /// The serialized data can be stored and later restored with `MontyNameLookup.load()`. /// /// @returns Buffer containing the serialized name lookup snapshot #[napi] pub fn dump(&self) -> Result { if matches!(self.snapshot, EitherLookupSnapshot::Done) { return Err(Error::from_reason( "Cannot dump name lookup that has already been resumed", )); } let serialized = SerializedNameLookup { snapshot: &self.snapshot, script_name: &self.script_name, variable_name: &self.variable_name, }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| Error::from_reason(format!("Serialization failed: {e}")))?; Ok(Buffer::from(bytes)) } /// Deserializes a MontyNameLookup from binary format. /// /// @param data - The serialized data from `dump()` /// @param options - Optional load options /// @returns A new MontyNameLookup instance #[napi(factory)] pub fn load(data: Buffer, options: Option) -> Result { let serialized: SerializedNameLookupOwned = postcard::from_bytes(&data).map_err(|e| Error::from_reason(format!("Deserialization failed: {e}")))?; Ok(Self { snapshot: serialized.snapshot, script_name: serialized.script_name, variable_name: serialized.variable_name, print_callback: options .as_ref() .and_then(|t| t.print_callback.as_ref()) .map(Function::create_ref) .transpose()?, }) } /// Returns a string representation of the MontyNameLookup. #[napi] pub fn repr(&self) -> String { format!( "MontyNameLookup(scriptName='{}', variableName='{}')", self.script_name, self.variable_name ) } } // Function type for JS callback used in `CallbackStringPrint`. type JsPrintCallback<'env> = Function<'env, FnArgs<(&'static str, String)>, ()>; type JsPrintCallbackRef = FunctionRef, ()>; /// A `PrintWriter` implementation that calls a javascript callback for each print output. /// /// This structure internally holds a `napi::Function`. pub struct CallbackStringPrint<'env>(JsPrintCallback<'env>); impl<'env> CallbackStringPrint<'env> { /// Creates a new `CallbackStringPrint` from a `JsFunction`. pub fn new_js(env: &'env Env, func: &JsPrintCallback<'env>) -> napi::Result { Ok(Self(func.create_ref()?.borrow_back(env)?)) } /// Creates a new printer from a function reference. /// /// This will re-borrow the function reference for use in printing. pub fn new_js_ref(env: &'env Env, func: &JsPrintCallbackRef) -> napi::Result { Ok(Self(func.borrow_back(env)?)) } } impl PrintWriterCallback for CallbackStringPrint<'_> { fn stdout_write(&mut self, output: Cow<'_, str>) -> std::result::Result<(), MontyException> { self.0 .call(("stdout", output.as_ref().to_owned()).into()) .map_err(exc_js_to_monty)?; Ok(()) } fn stdout_push(&mut self, end: char) -> std::result::Result<(), MontyException> { self.0 .call(("stdout", end.to_string()).into()) .map_err(exc_js_to_monty)?; Ok(()) } } // ============================================================================= // Helper functions for progress conversion // ============================================================================= /// Converts a `RunProgress` to either a `MontySnapshot`, `MontyNameLookup`, /// `MontyComplete`, or `JsMontyException`. /// /// `NameLookup` events are surfaced to the host as `MontyNameLookup` instances, /// allowing the host to decide how to resolve each name (or let the VM raise `NameError`). /// /// For progress types that are not yet supported in the JS bindings (`ResolveFutures`, `OsCall`), /// returns a `JsMontyException` with `NotImplementedError` instead of panicking, matching /// the Python bindings behavior. fn progress_to_result( progress: RunProgress, print_callback: Option, script_name: String, ) -> Either4 where T: ResourceTracker + serde::Serialize + serde::de::DeserializeOwned, EitherSnapshot: FromSnapshot, EitherLookupSnapshot: FromLookupSnapshot, { match progress { RunProgress::Complete(result) => Either4::C(MontyComplete { output_value: result }), RunProgress::FunctionCall(call) => { let function_name = call.function_name.clone(); let args = call.args.clone(); let kwargs = call.kwargs.clone(); Either4::A(MontySnapshot { snapshot: EitherSnapshot::from_snapshot(call), script_name, function_name, args, kwargs, print_callback, }) } RunProgress::NameLookup(lookup) => { let variable_name = lookup.name.clone(); Either4::B(MontyNameLookup { snapshot: EitherLookupSnapshot::from_lookup(lookup), script_name, variable_name, print_callback, }) } RunProgress::ResolveFutures(_) => Either4::D(JsMontyException::new(MontyException::new( ExcType::NotImplementedError, Some("Async futures (ResolveFutures) are not yet supported in the JS bindings".to_owned()), ))), RunProgress::OsCall(OsCall { function, .. }) => Either4::D(JsMontyException::new(MontyException::new( ExcType::NotImplementedError, Some(format!("OS function '{function}' not implemented")), ))), } } /// Trait to convert a typed `FunctionCall` into `EitherSnapshot`. trait FromSnapshot { /// Wraps a function-call snapshot. fn from_snapshot(call: FunctionCall) -> Self; } impl FromSnapshot for EitherSnapshot { fn from_snapshot(call: FunctionCall) -> Self { Self::NoLimit(call) } } impl FromSnapshot for EitherSnapshot { fn from_snapshot(call: FunctionCall) -> Self { Self::Limited(call) } } /// Converts a string exception type to `ExcType`. fn string_to_exc_type(type_name: &str) -> Result { type_name .parse() .map_err(|_| Error::from_reason(format!("Invalid exception type: '{type_name}'"))) } // ============================================================================= // Serialization types // ============================================================================= /// Serialization wrapper for `Monty` that includes all fields needed for reconstruction. #[derive(serde::Serialize, serde::Deserialize)] struct SerializedMonty { runner: MontyRun, script_name: String, input_names: Vec, } /// Serialization wrapper for `MontyRepl` using borrowed references. #[derive(serde::Serialize)] struct SerializedRepl<'a> { repl: &'a EitherRepl, script_name: &'a str, } /// Owned version of `SerializedRepl` for deserialization. #[derive(serde::Deserialize)] struct SerializedReplOwned { repl: EitherRepl, script_name: String, } /// Serialization wrapper for `MontySnapshot` using borrowed references. #[derive(serde::Serialize)] struct SerializedSnapshot<'a> { snapshot: &'a EitherSnapshot, script_name: &'a str, function_name: &'a str, args: &'a [MontyObject], kwargs: &'a [(MontyObject, MontyObject)], } /// Owned version of `SerializedSnapshot` for deserialization. #[derive(serde::Deserialize)] struct SerializedSnapshotOwned { snapshot: EitherSnapshot, script_name: String, function_name: String, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, } /// Serialization wrapper for `MontyNameLookup` using borrowed references. #[derive(serde::Serialize)] struct SerializedNameLookup<'a> { snapshot: &'a EitherLookupSnapshot, script_name: &'a str, variable_name: &'a str, } /// Owned version of `SerializedNameLookup` for deserialization. #[derive(serde::Deserialize)] struct SerializedNameLookupOwned { snapshot: EitherLookupSnapshot, script_name: String, variable_name: String, } // ============================================================================= // External function support // ============================================================================= /// Calls a JavaScript external function and returns the result. /// /// Converts args/kwargs from Monty format, calls the JS function, /// and converts the result back to Monty format (or an exception). fn call_external_function( env: &Env, external_functions: Option<&Object<'_>>, function_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], ) -> Result { // Get the external functions dict, or error if not provided let functions = external_functions.ok_or_else(|| { Error::from_reason(format!( "External function '{function_name}' called but no externalFunctions provided" )) })?; // Look up the function by name if !functions.has_named_property(function_name)? { // Return a NameError exception — matches Python's behavior for undefined names let exc = MontyException::new( ExcType::NameError, Some(format!("name '{function_name}' is not defined")), ); return Ok(ExtFunctionResult::Error(exc)); } let callable: Unknown = functions.get_named_property(function_name)?; // Convert positional arguments to JS let mut js_args: Vec = Vec::with_capacity(args.len() + 1); for arg in args { js_args.push(monty_to_js(arg, env)?.raw()); } // If we have kwargs, add them as a final object argument if !kwargs.is_empty() { let mut kwargs_obj = Object::new(env)?; for (key, value) in kwargs { let key_str = match key { MontyObject::String(s) => s.clone(), _ => format!("{key:?}"), }; kwargs_obj.set_named_property(&key_str, monty_to_js(value, env)?)?; } js_args.push(kwargs_obj.raw()); } // Get undefined for the 'this' argument let mut undefined_raw = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and result is valid on success unsafe { sys::napi_get_undefined(env.raw(), &raw mut undefined_raw); } // Call the function using raw napi let mut result_raw = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and result is valid on success let status = unsafe { sys::napi_call_function( env.raw(), undefined_raw, // this = undefined callable.raw(), js_args.len(), js_args.as_ptr(), &raw mut result_raw, ) }; if status != sys::Status::napi_ok { // An error occurred - get the pending exception let mut is_exception = false; // SAFETY: [DH] - all arguments are valid unsafe { sys::napi_is_exception_pending(env.raw(), &raw mut is_exception) }; if is_exception { let mut exception_raw = std::ptr::null_mut(); // SAFETY: [DH] - all arguments are valid and exception_raw is valid on success let status = unsafe { sys::napi_get_and_clear_last_exception(env.raw(), &raw mut exception_raw) }; if status != sys::Status::napi_ok { // Failed to get the exception - return a generic error let exc = MontyException::new( ExcType::RuntimeError, Some("External function call failed and exception could not be retrieved".to_string()), ); return Ok(ExtFunctionResult::Error(exc)); } let exception_obj = Object::from_raw(env.raw(), exception_raw); let exc = extract_js_exception(exception_obj); return Ok(ExtFunctionResult::Error(exc)); } // Generic error let exc = MontyException::new(ExcType::RuntimeError, Some("External function call failed".to_string())); return Ok(ExtFunctionResult::Error(exc)); } // Convert the result back to Monty format // SAFETY: [DH] - result_raw is valid on success let result = unsafe { Unknown::from_raw_unchecked(env.raw(), result_raw) }; let monty_result = js_to_monty(result, *env)?; Ok(ExtFunctionResult::Return(monty_result)) } /// Extracts exception info from a JS exception object. fn extract_js_exception(exception_obj: Object<'_>) -> MontyException { // Try to get the 'name' property (e.g., "ValueError") let name: std::result::Result = exception_obj.get_named_property("name"); // Try to get the 'message' property let message: std::result::Result = exception_obj.get_named_property("message"); let exc_type = name .ok() .and_then(|n| string_to_exc_type(&n).ok()) .unwrap_or(ExcType::RuntimeError); let msg = message.ok(); MontyException::new(exc_type, msg) } /// Resolves a name lookup against the runtime external functions map. /// /// If the name exists as a property on the external functions object, returns /// `NameLookupResult::Value` with a `Function` object. Otherwise returns /// `NameLookupResult::Undefined` so the VM raises `NameError`. fn resolve_name_lookup(external_functions: Option<&Object<'_>>, name: &str) -> Result { if let Some(functions) = external_functions { if functions.has_named_property(name)? { return Ok(NameLookupResult::Value(MontyObject::Function { name: name.to_string(), docstring: None, // TODO, can we do better? })); } } Ok(NameLookupResult::Undefined) } ================================================ FILE: crates/monty-js/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2020", "module": "ESNext", "moduleResolution": "Bundler", "strict": true, "declaration": true, "declarationMap": true, "sourceMap": true, "esModuleInterop": true, "skipLibCheck": true, "noUnusedLocals": true, "noUnusedParameters": true }, "include": ["wrapper.ts"], "exclude": ["node_modules", "__test__", "benchmark"] } ================================================ FILE: crates/monty-js/wrapper.ts ================================================ // Custom error classes that extend Error for proper JavaScript error handling. // These wrap the native Rust classes to provide instanceof support. import type { ExceptionInfo, ExceptionInput, Frame, JsMontyObject, MontyOptions, NameLookupLoadOptions, NameLookupResumeOptions, ResourceLimits, ResumeOptions, RunOptions, SnapshotLoadOptions, StartOptions, } from './index.js' import { Monty as NativeMonty, MontyRepl as NativeMontyRepl, MontySnapshot as NativeMontySnapshot, MontyNameLookup as NativeMontyNameLookup, MontyComplete as NativeMontyComplete, MontyException as NativeMontyException, MontyTypingError as NativeMontyTypingError, } from './index.js' export type { MontyOptions, RunOptions, ResourceLimits, Frame, ExceptionInfo, StartOptions, ResumeOptions, ExceptionInput, SnapshotLoadOptions, NameLookupResumeOptions, NameLookupLoadOptions, JsMontyObject, } /** * Alias for ResourceLimits (deprecated name). */ export type JsResourceLimits = ResourceLimits /** * Base class for all Monty interpreter errors. * * This is the parent class for `MontySyntaxError`, `MontyRuntimeError`, and `MontyTypingError`. * Catching `MontyError` will catch any exception raised by Monty. */ export class MontyError extends Error { protected _typeName: string protected _message: string constructor(typeName: string, message: string) { super(message ? `${typeName}: ${message}` : typeName) this.name = 'MontyError' this._typeName = typeName this._message = message // Maintains proper stack trace for where our error was thrown (only available on V8) if (Error.captureStackTrace) { Error.captureStackTrace(this, MontyError) } } /** * Returns information about the inner Python exception. */ get exception(): ExceptionInfo { return { typeName: this._typeName, message: this._message, } } /** * Returns formatted exception string. * @param format - 'type-msg' for 'ExceptionType: message', 'msg' for just the message */ display(format: 'type-msg' | 'msg' = 'msg'): string { switch (format) { case 'msg': return this._message case 'type-msg': return this._message ? `${this._typeName}: ${this._message}` : this._typeName default: throw new Error(`Invalid display format: '${format}'. Expected 'type-msg' or 'msg'`) } } } /** * Raised when Python code has syntax errors or cannot be parsed by Monty. * * The inner exception is always a `SyntaxError`. Use `display()` to get * formatted error output. */ export class MontySyntaxError extends MontyError { private _native: NativeMontyException | null constructor(messageOrNative: string | NativeMontyException) { if (typeof messageOrNative === 'string') { super('SyntaxError', messageOrNative) this._native = null } else { const exc = messageOrNative.exception super('SyntaxError', exc.message) this._native = messageOrNative } this.name = 'MontySyntaxError' if (Error.captureStackTrace) { Error.captureStackTrace(this, MontySyntaxError) } } /** * Returns formatted exception string. * @param format - 'type-msg' for 'SyntaxError: message', 'msg' for just the message */ override display(format: 'type-msg' | 'msg' = 'msg'): string { if (this._native && typeof this._native.display === 'function') { return this._native.display(format) } return super.display(format) } } /** * Raised when Monty code fails during execution. * * Provides access to the traceback frames where the error occurred via `traceback()`, * and formatted output via `display()`. */ export class MontyRuntimeError extends MontyError { private _native: NativeMontyException | null private _tracebackString: string | null private _frames: Frame[] | null constructor( nativeOrTypeName: NativeMontyException | string, message?: string, tracebackString?: string, frames?: Frame[], ) { if (typeof nativeOrTypeName === 'string') { // Legacy constructor: (typeName, message, tracebackString, frames) super(nativeOrTypeName, message!) this._native = null this._tracebackString = tracebackString ?? null this._frames = frames ?? null } else { // New constructor: (nativeException) const exc = nativeOrTypeName.exception super(exc.typeName, exc.message) this._native = nativeOrTypeName this._tracebackString = null this._frames = null } this.name = 'MontyRuntimeError' if (Error.captureStackTrace) { Error.captureStackTrace(this, MontyRuntimeError) } } /** * Returns the Monty traceback as an array of Frame objects. */ traceback(): Frame[] { if (this._native) { return this._native.traceback() } return this._frames || [] } /** * Returns formatted exception string. * @param format - 'traceback' for full traceback, 'type-msg' for 'ExceptionType: message', 'msg' for just the message */ display(format: 'traceback' | 'type-msg' | 'msg' = 'traceback'): string { if (this._native && typeof this._native.display === 'function') { return this._native.display(format) } // Fallback for legacy constructor switch (format) { case 'traceback': return this._tracebackString || this.message case 'type-msg': return this._message ? `${this._typeName}: ${this._message}` : this._typeName case 'msg': return this._message default: throw new Error(`Invalid display format: '${format}'. Expected 'traceback', 'type-msg', or 'msg'`) } } } export type TypingDisplayFormat = | 'full' | 'concise' | 'azure' | 'json' | 'jsonlines' | 'rdjson' | 'pylint' | 'gitlab' | 'github' /** * Raised when type checking finds errors in the code. * * This exception is raised when static type analysis detects type errors. * Use `displayDiagnostics()` to render rich diagnostics in various formats for tooling integration. * Use `display()` (inherited) for simple 'type-msg' or 'msg' formats. */ export class MontyTypingError extends MontyError { private _native: NativeMontyTypingError | null constructor(messageOrNative: string | NativeMontyTypingError, nativeError: NativeMontyTypingError | null = null) { if (typeof messageOrNative === 'string') { super('TypeError', messageOrNative) this._native = nativeError } else { const exc = messageOrNative.exception super('TypeError', exc.message) this._native = messageOrNative } this.name = 'MontyTypingError' if (Error.captureStackTrace) { Error.captureStackTrace(this, MontyTypingError) } } /** * Renders rich type error diagnostics for tooling integration. * * @param format - Output format (default: 'full') * @param color - Include ANSI color codes (default: false) */ displayDiagnostics(format: TypingDisplayFormat = 'full', color: boolean = false): string { if (this._native && typeof this._native.display === 'function') { return this._native.display(format, color) } return this._message } } /** * Wrapped Monty class that throws proper Error subclasses. */ export class Monty { private _native: NativeMonty /** * Creates a new Monty interpreter by parsing the given code. * * @param code - Python code to execute * @param options - Configuration options * @throws {MontySyntaxError} If the code has syntax errors * @throws {MontyTypingError} If type checking is enabled and finds errors */ constructor(code: string, options?: MontyOptions) { const result = NativeMonty.create(code, options) if (result instanceof NativeMontyException) { // Check typeName to distinguish syntax errors from other exceptions if (result.exception.typeName === 'SyntaxError') { throw new MontySyntaxError(result) } throw new MontyRuntimeError(result) } if (result instanceof NativeMontyTypingError) { throw new MontyTypingError(result) } this._native = result } /** * Performs static type checking on the code. * * @param prefixCode - Optional code to prepend before type checking * @throws {MontyTypingError} If type checking finds errors */ typeCheck(prefixCode?: string): void { const result = this._native.typeCheck(prefixCode) if (result instanceof NativeMontyTypingError) { throw new MontyTypingError(result) } } /** * Executes the code and returns the result. * * @param options - Execution options (inputs, limits) * @returns The result of the last expression * @throws {MontyRuntimeError} If the code raises an exception */ run(options?: RunOptions): JsMontyObject { const result = this._native.run(options) if (result instanceof NativeMontyException) { throw new MontyRuntimeError(result) } return result } /** * Starts execution and returns a snapshot (paused at external call or name lookup) or completion. * * @param options - Execution options (inputs, limits) * @returns MontySnapshot if paused at function call, MontyNameLookup if paused at * name lookup, MontyComplete if done * @throws {MontyRuntimeError} If the code raises an exception */ start(options?: StartOptions): MontySnapshot | MontyNameLookup | MontyComplete { const result = this._native.start(options) return wrapStartResult(result) } /** * Serializes the Monty instance to a binary format. */ dump(): Buffer { return this._native.dump() } /** * Deserializes a Monty instance from binary format. */ static load(data: Buffer): Monty { const instance = Object.create(Monty.prototype) as Monty instance._native = NativeMonty.load(data) return instance } /** Returns the script name. */ get scriptName(): string { return this._native.scriptName } /** Returns the input variable names. */ get inputs(): string[] { return this._native.inputs } /** Returns a string representation of the Monty instance. */ repr(): string { return this._native.repr() } } /** Options for creating a new MontyRepl instance. */ export interface MontyReplOptions { /** Name used in tracebacks and error messages. Default: 'main.py' */ scriptName?: string /** Resource limits applied to all snippet executions. */ limits?: ResourceLimits } /** * Incremental no-replay REPL session. * * Create with `new MontyRepl()` then call `feed()` to execute snippets * incrementally against persistent state. */ export class MontyRepl { private _native: NativeMontyRepl /** * Creates an empty REPL session ready to receive snippets via `feed()`. * * @param options - Optional configuration (scriptName, limits) */ constructor(options?: MontyReplOptions) { this._native = new NativeMontyRepl(options) } /** Returns the script name for this REPL session. */ get scriptName(): string { return this._native.scriptName } /** * Executes one incremental snippet. * * @param code - Snippet code to execute * @returns Snippet output * @throws {MontyRuntimeError} If execution raises an exception */ feed(code: string): JsMontyObject { const result = this._native.feed(code) if (result instanceof NativeMontyException) { throw new MontyRuntimeError(result) } return result } /** Serializes the REPL session to bytes. */ dump(): Buffer { return this._native.dump() } /** Restores a REPL session from bytes. */ static load(data: Buffer): MontyRepl { const native = NativeMontyRepl.load(data) const repl = Object.create(MontyRepl.prototype) as MontyRepl ;(repl as any)._native = native return repl } /** Returns a string representation of the REPL session. */ repr(): string { return this._native.repr() } } /** * Helper to wrap native start/resume results, throwing errors as needed. */ function wrapStartResult( result: NativeMontySnapshot | NativeMontyNameLookup | NativeMontyComplete | NativeMontyException, ): MontySnapshot | MontyNameLookup | MontyComplete { if (result instanceof NativeMontyException) { throw new MontyRuntimeError(result) } // Check MontyNameLookup before MontySnapshot — napi `Either4` may cause // false positives with `instanceof` if checked in the wrong order. if (result instanceof NativeMontyNameLookup) { return new MontyNameLookup(result) } if (result instanceof NativeMontySnapshot) { return new MontySnapshot(result) } if (result instanceof NativeMontyComplete) { return new MontyComplete(result) } throw new Error(`Unexpected result type from native binding: ${result}`) } /** * Represents paused execution waiting for an external function call return value. * * Contains information about the pending external function call and allows * resuming execution with the return value or an exception. */ export class MontySnapshot { private _native: NativeMontySnapshot constructor(nativeSnapshot: NativeMontySnapshot) { this._native = nativeSnapshot } /** Returns the name of the script being executed. */ get scriptName(): string { return this._native.scriptName } /** Returns the name of the external function being called. */ get functionName(): string { return this._native.functionName } /** Returns the positional arguments passed to the external function. */ get args(): JsMontyObject[] { return this._native.args } /** Returns the keyword arguments passed to the external function as an object. */ get kwargs(): Record { return this._native.kwargs as Record } /** * Resumes execution with either a return value or an exception. * * @param options - Object with either `returnValue` or `exception` * @returns MontySnapshot if paused at function call, MontyNameLookup if paused at * name lookup, MontyComplete if done * @throws {MontyRuntimeError} If the code raises an exception */ resume(options: ResumeOptions): MontySnapshot | MontyNameLookup | MontyComplete { const result = this._native.resume(options) return wrapStartResult(result) } /** * Serializes the MontySnapshot to a binary format. */ dump(): Buffer { return this._native.dump() } /** * Deserializes a MontySnapshot from binary format. */ static load(data: Buffer, options?: SnapshotLoadOptions): MontySnapshot { const nativeSnapshot = NativeMontySnapshot.load(data, options) return new MontySnapshot(nativeSnapshot) } /** Returns a string representation of the MontySnapshot. */ repr(): string { return this._native.repr() } } /** * Represents paused execution waiting for a name to be resolved. * * The host should check if the variable name corresponds to a known value * (e.g., an external function). Call `resume()` with the value to continue * execution, or call `resume()` with no value to raise `NameError`. */ export class MontyNameLookup { private _native: NativeMontyNameLookup constructor(nativeNameLookup: NativeMontyNameLookup) { this._native = nativeNameLookup } /** Returns the name of the script being executed. */ get scriptName(): string { return this._native.scriptName } /** Returns the name of the variable being looked up. */ get variableName(): string { return this._native.variableName } /** * Resumes execution after resolving the name lookup. * * If `value` is provided, the name resolves to that value and execution continues. * If `value` is omitted/undefined, the VM raises a `NameError`. * * @param options - Optional object with `value` to resolve the name to * @returns MontySnapshot if paused at function call, MontyNameLookup if paused at * another name lookup, MontyComplete if done * @throws {MontyRuntimeError} If the code raises an exception */ resume(options?: NameLookupResumeOptions): MontySnapshot | MontyNameLookup | MontyComplete { const result = this._native.resume(options) return wrapStartResult(result) } /** * Serializes the MontyNameLookup to a binary format. */ dump(): Buffer { return this._native.dump() } /** * Deserializes a MontyNameLookup from binary format. */ static load(data: Buffer, options?: NameLookupLoadOptions): MontyNameLookup { const nativeLookup = NativeMontyNameLookup.load(data, options) return new MontyNameLookup(nativeLookup) } /** Returns a string representation of the MontyNameLookup. */ repr(): string { return this._native.repr() } } /** * Represents completed execution with a final output value. */ export class MontyComplete { private _native: NativeMontyComplete constructor(nativeComplete: NativeMontyComplete) { this._native = nativeComplete } /** Returns the final output value from the executed code. */ get output(): JsMontyObject { return this._native.output } /** Returns a string representation of the MontyComplete. */ repr(): string { return this._native.repr() } } /** * Options for `runMontyAsync`. */ export interface RunMontyAsyncOptions { /** Input values for the script. */ inputs?: Record /** External function implementations (sync or async). */ externalFunctions?: Record unknown> /** Resource limits. */ limits?: ResourceLimits /** Callback invoked on each print() call. The first argument is the stream name (always "stdout"), the second is the printed text. */ printCallback?: (stream: string, text: string) => void } /** * Runs a Monty script with async external function support. * * This function handles both synchronous and asynchronous external functions. * When an external function returns a Promise, it will be awaited before * resuming execution. * * @param montyRunner - The Monty runner instance to execute * @param options - Execution options * @returns The output of the Monty script * @throws {MontyRuntimeError} If the code raises an exception * @throws {MontySyntaxError} If the code has syntax errors * * @example * const m = new Monty('result = await fetch_data(url)', { * inputs: ['url'], * }); * * const result = await runMontyAsync(m, { * inputs: { url: 'https://example.com' }, * externalFunctions: { * fetch_data: async (url) => { * const response = await fetch(url); * return response.text(); * } * } * }); */ export async function runMontyAsync(montyRunner: Monty, options: RunMontyAsyncOptions = {}): Promise { const { inputs, externalFunctions = {}, limits, printCallback } = options let progress: MontySnapshot | MontyNameLookup | MontyComplete = montyRunner.start({ inputs, limits, printCallback, }) while (!(progress instanceof MontyComplete)) { if (progress instanceof MontyNameLookup) { // Name lookup — check if the name is a known external function const name = progress.variableName const extFunction = externalFunctions[name] if (extFunction) { // Resolve the name as a function value progress = progress.resume({ value: extFunction }) } else { // Unknown name — resume with no value to raise NameError progress = progress.resume() } continue } // MontySnapshot — external function call const snapshot = progress const funcName = snapshot.functionName const extFunction = externalFunctions[funcName] if (!extFunction) { // Function not found — this shouldn't normally happen since NameLookup // would have raised NameError, but handle it defensively progress = snapshot.resume({ exception: { type: 'NameError', message: `name '${funcName}' is not defined`, }, }) continue } try { // Call the external function let result = extFunction(...snapshot.args, snapshot.kwargs) // If the result is a Promise, await it if (result && typeof (result as Promise).then === 'function') { result = await result } // Resume with the return value progress = snapshot.resume({ returnValue: result }) } catch (error) { // External function threw an exception - convert to Monty exception const err = error as Error const excType = err.name || 'RuntimeError' const excMessage = err.message || String(error) progress = snapshot.resume({ exception: { type: excType, message: excMessage, }, }) } } return progress.output } ================================================ FILE: crates/monty-python/Cargo.toml ================================================ [package] name = "pydantic-monty" description = "Python bindings for the Monty sandboxed Python interpreter" readme = "README.md" version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } authors = { workspace = true } license = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [lib] name = "_monty" crate-type = ["cdylib"] [dependencies] monty = { path = "../monty" } monty_type_checking = { path = "../monty-type-checking" } pyo3 = { version = "0.28", features = ["indexmap", "generate-import-lib", "num-bigint"] } num-bigint = { workspace = true } indexmap = { workspace = true } serde = { workspace = true } postcard = { workspace = true } send_wrapper = "0.6.0" sha2 = { workspace = true } [build-dependencies] pyo3-build-config = { version = "0.28", features = ["resolve-config"] } [features] # make extensions visible to cargo vendor extension-module = ["pyo3/generate-import-lib"] [lints] workspace = true ================================================ FILE: crates/monty-python/README.md ================================================ # pydantic-monty Python bindings for the Monty sandboxed Python interpreter. ## Installation ```bash pip install pydantic-monty ``` ## Usage ### Basic Expression Evaluation ```python import pydantic_monty # Simple code with no inputs m = pydantic_monty.Monty('1 + 2') print(m.run()) #> 3 ``` ### Using Input Variables ```python import pydantic_monty # Create with code that uses input variables m = pydantic_monty.Monty('x * y', inputs=['x', 'y']) # Run multiple times with different inputs print(m.run(inputs={'x': 2, 'y': 3})) #> 6 print(m.run(inputs={'x': 10, 'y': 5})) #> 50 ``` ### Resource Limits ```python import pydantic_monty m = pydantic_monty.Monty('x + y', inputs=['x', 'y']) # With resource limits limits = pydantic_monty.ResourceLimits(max_duration_secs=1.0) result = m.run(inputs={'x': 1, 'y': 2}, limits=limits) assert result == 3 ``` ### External Functions ```python import pydantic_monty # Code that calls an external function m = pydantic_monty.Monty('double(x)', inputs=['x']) # Provide the external function implementation at runtime result = m.run(inputs={'x': 5}, external_functions={'double': lambda x: x * 2}) print(result) #> 10 ``` ### Iterative Execution with External Functions Use `start()` and `resume()` to handle external function calls iteratively, giving you control over each call: ```python import pydantic_monty code = """ data = fetch(url) len(data) """ m = pydantic_monty.Monty(code, inputs=['url']) # Start execution - pauses when fetch() is called result = m.start(inputs={'url': 'https://example.com'}) print(type(result)) #> print(result.function_name) # fetch #> fetch print(result.args) #> ('https://example.com',) # Perform the actual fetch, then resume with the result result = result.resume(return_value='hello world') print(type(result)) #> print(result.output) #> 11 ``` ### Serialization Both `Monty` and `FunctionSnapshot` can be serialized to bytes and restored later. This allows caching parsed code or suspending execution across process boundaries: ```python import pydantic_monty # Serialize parsed code to avoid re-parsing m = pydantic_monty.Monty('x + 1', inputs=['x']) data = m.dump() # Later, restore and run m2 = pydantic_monty.Monty.load(data) print(m2.run(inputs={'x': 41})) #> 42 ``` Execution state can also be serialized mid-flight: ```python import pydantic_monty m = pydantic_monty.Monty('fetch(url)', inputs=['url']) progress = m.start(inputs={'url': 'https://example.com'}) # Serialize the execution state state = progress.dump() # Later, restore and resume (e.g., in a different process) progress2 = pydantic_monty.load_snapshot(state) result = progress2.resume(return_value='response data') print(result.output) #> response data ``` ================================================ FILE: crates/monty-python/build.rs ================================================ fn main() { // see https://pyo3.rs/main/building-and-distribution/multiple-python-versions.html pyo3_build_config::use_pyo3_cfgs(); } ================================================ FILE: crates/monty-python/example.py ================================================ """Example usage of the Monty Python bindings.""" import pydantic_monty # Basic execution - simple expression m = pydantic_monty.Monty('1 + 2 * 3') print(f'Basic: {m.run()!r}') # 7 # Using input variables m = pydantic_monty.Monty('x + y', inputs=['x', 'y']) print(f'Inputs: {m.run(inputs={"x": 10, "y": 20})}') # 30 # Reusing the same parsed code with different values print(f'Reuse: {m.run(inputs={"x": 100, "y": 200})}') # 300 # With resource limits limits = pydantic_monty.ResourceLimits(max_duration_secs=5.0, max_memory=1024 * 1024) m = pydantic_monty.Monty('x * y * z', inputs=['x', 'y', 'z']) print(f'With limits: {m.run(inputs={"x": 2, "y": 3, "z": 4}, limits=limits)}') # 24 # External function callbacks m = pydantic_monty.Monty('fetch("https://example.com")') def fetch(url: str) -> str: return f'Fetched: {url}' print(f'External: {m.run(external_functions={"fetch": fetch})}') # Print output is forwarded to Python stdout m = pydantic_monty.Monty('print("Hello from Monty!")') m.run() # Exception handling m = pydantic_monty.Monty('1 / 0') try: m.run() except ZeroDivisionError as e: print(f'Caught: {type(e).__name__}') ================================================ FILE: crates/monty-python/exercise.py ================================================ """ Exercise script for PGO data collection. Runs all test cases through Monty with type checking enabled, exercising the full interpreter pipeline for profiling. """ import time from pathlib import Path import pydantic_monty def main(): test_cases = Path(__file__).parent.parent / 'monty' / 'test_cases' run, run_success, type_errors = 0, 0, 0 start = time.perf_counter() for py_file in test_cases.glob('*.py'): code = py_file.read_text(encoding='utf-8') # Exercise parsing and type checking try: try: m = pydantic_monty.Monty(code, type_check=True) except pydantic_monty.MontyTypingError: # Many test cases have type errors m = pydantic_monty.Monty(code) type_errors += 1 # Exercise execution run += 1 m.run(print_callback=lambda _, __: None) run_success += 1 except pydantic_monty.MontyError: # ignore syntax errors or errors while running the code pass except Exception as e: raise RuntimeError(f'Error running {py_file.name}: {e}') from e t = time.perf_counter() - start print(f'Executed {run} test cases in {t:.2f} seconds, {run_success} succeeded, {type_errors} had type errors') if __name__ == '__main__': main() ================================================ FILE: crates/monty-python/pyproject.toml ================================================ [build-system] requires = ["maturin>=1.9.4,<2.0"] build-backend = "maturin" [project] # the module is named `pydantic_monty` name = "pydantic-monty" description = "Python bindings for the Monty sandboxed Python interpreter" readme = "README.md" requires-python = ">=3.10" classifiers = [ "Development Status :: 3 - Alpha", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", "License :: OSI Approved :: MIT License", "Operating System :: Unix", "Operating System :: POSIX :: Linux", "Environment :: MacOS X", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Internet", "Programming Language :: Python :: Implementation", ] dynamic = ["license", "version"] [project.urls] Homepage = "https://github.com/pydantic/monty" Source = "https://github.com/pydantic/monty" [tool.maturin] python-source = "python" module-name = "pydantic_monty._monty" features = ["pyo3/extension-module"] [dependency-groups] dev = [ "anyio>=4.0", "black>=25.12.0", "dirty-equals>=0.11", "inline-snapshot>=0.31.1", "pytest>=9.0.2", "pytest-examples>=0.0.14", "pytest-pretty>=1.3.0", ] [tool.pytest.ini_options] anyio_mode = "auto" xfail_strict = true filterwarnings = ["error"] ================================================ FILE: crates/monty-python/python/pydantic_monty/__init__.py ================================================ from __future__ import annotations from typing import TYPE_CHECKING, Any, Callable, Literal, TypedDict, TypeVar, cast if TYPE_CHECKING: from collections.abc import Awaitable from types import EllipsisType from ._monty import ( Frame, FunctionSnapshot, FutureSnapshot, Monty, MontyComplete, MontyError, MontyRepl, MontyRuntimeError, MontySyntaxError, MontyTypingError, NameLookupSnapshot, __version__, load_repl_snapshot, load_snapshot, ) from .os_access import AbstractFile, AbstractOS, CallbackFile, MemoryFile, OSAccess, OsFunction, StatResult __all__ = ( # this file 'run_monty_async', 'run_repl_async', 'ExternalResult', 'ResourceLimits', # _monty '__version__', 'Monty', 'MontyRepl', 'MontyComplete', 'FunctionSnapshot', 'NameLookupSnapshot', 'FutureSnapshot', 'MontyError', 'MontySyntaxError', 'MontyRuntimeError', 'MontyTypingError', 'Frame', 'load_snapshot', 'load_repl_snapshot', # os_access 'StatResult', 'OsFunction', 'AbstractOS', 'AbstractFile', 'MemoryFile', 'CallbackFile', 'OSAccess', ) T = TypeVar('T') async def run_monty_async( monty_runner: Monty, *, inputs: dict[str, Any] | None = None, external_functions: dict[str, Callable[..., Any]] | None = None, limits: ResourceLimits | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, os: AbstractOS | None = None, ) -> Any: """Run a Monty script with async external functions and optional OS access. This function provides a convenient way to run Monty code that uses both async external functions and filesystem operations via OSAccess. Args: monty_runner: The Monty runner to use. external_functions: A dictionary of external functions to use, can be sync or async. inputs: A dictionary of inputs to use. limits: The resource limits to use. print_callback: A callback to use for printing. os: Optional OS access handler for filesystem operations (e.g., OSAccess instance). Returns: The output of the Monty script. """ from functools import partial progress = await _run_in_pool( partial(monty_runner.start, inputs=inputs, limits=limits, print_callback=print_callback) ) return await _dispatch_loop(progress, external_functions or {}, os) async def run_repl_async( repl: MontyRepl, code: str, *, inputs: dict[str, Any] | None = None, external_functions: dict[str, Callable[..., Any]] | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, os: AbstractOS | None = None, ) -> Any: """Feed a snippet to a REPL session with async external function support. This is the REPL equivalent of `run_monty_async`. It calls `feed_start()` on the REPL and drives the snapshot/resume loop, dispatching external function calls (sync or async), OS calls, dataclass method calls, and future resolution. Args: repl: The REPL session to feed the snippet to. code: The Python code snippet to execute. external_functions: A dictionary of external functions to use, can be sync or async. inputs: A dictionary of inputs to use. print_callback: A callback to use for printing. os: Optional OS access handler for filesystem operations (e.g., OSAccess instance). Returns: The output of the snippet. """ from functools import partial progress = await _run_in_pool(partial(repl.feed_start, code, inputs=inputs, print_callback=print_callback)) return await _dispatch_loop(progress, external_functions or {}, os) async def _run_in_pool(func: Callable[[], T]) -> T: """Run a function in a thread pool executor, releasing the GIL.""" import asyncio from concurrent.futures import ThreadPoolExecutor loop = asyncio.get_running_loop() with ThreadPoolExecutor() as pool: return await loop.run_in_executor(pool, func) async def _dispatch_loop( progress: FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete, external_functions: dict[str, Callable[..., Any]], os: AbstractOS | None, ) -> Any: """Drive the snapshot/resume loop for both Monty and MontyRepl. Handles external function calls (sync and async), OS calls, dataclass method calls, name lookups, and future resolution. """ import asyncio import inspect from functools import partial tasks: dict[int, asyncio.Task[tuple[int, ExternalResult]]] = {} try: while True: if isinstance(progress, MontyComplete): return progress.output elif isinstance(progress, FunctionSnapshot): # Handle OS function calls (e.g., Path.read_text, Path.exists) if progress.is_os_function: # When is_os_function is True, function_name is always an OsFunction os_func_name = cast(OsFunction, progress.function_name) if os is None: e = NotImplementedError( f'OS function {progress.function_name} called but no os handler provided' ) progress = await _run_in_pool(partial(progress.resume, exception=e)) else: try: result = os(os_func_name, progress.args, progress.kwargs) except Exception as exc: progress = await _run_in_pool(partial(progress.resume, exception=exc)) else: progress = await _run_in_pool(partial(progress.resume, return_value=result)) # Handle dataclass method calls (first arg is the instance) elif progress.is_method_call: self_obj = progress.args[0] method = getattr(self_obj, progress.function_name) remaining_args = progress.args[1:] try: result = method(*remaining_args, **progress.kwargs) except Exception as exc: progress = await _run_in_pool(partial(progress.resume, exception=exc)) else: if inspect.iscoroutine(result): call_id = progress.call_id tasks[call_id] = asyncio.create_task(_run_external_function(call_id, result)) progress = await _run_in_pool(partial(progress.resume, future=...)) else: progress = await _run_in_pool(partial(progress.resume, return_value=result)) # Handle external function calls elif ext_function := external_functions.get(progress.function_name): try: result = ext_function(*progress.args, **progress.kwargs) except Exception as exc: progress = await _run_in_pool(partial(progress.resume, exception=exc)) else: if inspect.iscoroutine(result): call_id = progress.call_id tasks[call_id] = asyncio.create_task(_run_external_function(call_id, result)) progress = await _run_in_pool(partial(progress.resume, future=...)) else: progress = await _run_in_pool(partial(progress.resume, return_value=result)) else: e = LookupError(f"Unable to find '{progress.function_name}' in external functions dict") progress = await _run_in_pool(partial(progress.resume, exception=e)) elif isinstance(progress, NameLookupSnapshot): ext_function = external_functions.get(progress.variable_name) if ext_function is not None: progress = await _run_in_pool(partial(progress.resume, value=ext_function)) else: progress = await _run_in_pool(progress.resume) else: assert isinstance(progress, FutureSnapshot), f'Unexpected progress type {progress!r}' current_tasks: list[asyncio.Task[tuple[int, ExternalResult]]] = [] for call_id in progress.pending_call_ids: if task := tasks.get(call_id): current_tasks.append(task) done, _ = await asyncio.wait(current_tasks, return_when=asyncio.FIRST_COMPLETED) results: dict[int, ExternalResult] = {} for task in done: call_id, result = task.result() results[call_id] = result tasks.pop(call_id) progress = await _run_in_pool(partial(progress.resume, results)) finally: for task in tasks.values(): task.cancel() try: await asyncio.gather(*tasks.values()) except asyncio.CancelledError: pass async def _run_external_function(call_id: int, coro: Awaitable[Any]) -> tuple[int, ExternalResult]: try: result = await coro except Exception as e: return call_id, ExternalException(exception=e) else: return call_id, ExternalReturnValue(return_value=result) class ResourceLimits(TypedDict, total=False): """ Configuration for resource limits during code execution. All limits are optional. Omit a key to disable that limit. """ max_allocations: int """Maximum number of heap allocations allowed.""" max_duration_secs: float """Maximum execution time in seconds.""" max_memory: int """Maximum heap memory in bytes.""" gc_interval: int """Run garbage collection every N allocations.""" max_recursion_depth: int """Maximum function call stack depth (default: 1000).""" class ExternalReturnValue(TypedDict): return_value: Any class ExternalException(TypedDict): exception: Exception class ExternalFuture(TypedDict): future: EllipsisType ExternalResult = ExternalReturnValue | ExternalException | ExternalFuture ================================================ FILE: crates/monty-python/python/pydantic_monty/_monty.pyi ================================================ from types import EllipsisType from typing import Any, Callable, Literal, final, overload from typing_extensions import Self from . import ExternalResult, ResourceLimits from .os_access import OsFunction __all__ = [ '__version__', 'Monty', 'MontyRepl', 'MontyComplete', 'FunctionSnapshot', 'NameLookupSnapshot', 'FutureSnapshot', 'MontyError', 'MontySyntaxError', 'MontyRuntimeError', 'MontyTypingError', 'Frame', 'load_snapshot', 'load_repl_snapshot', ] __version__: str @final class Monty: """ A sandboxed Python interpreter instance. Parses and compiles Python code on initialization, then can be run multiple times with different input values. This separates the parsing cost from execution, making repeated runs more efficient. """ def __new__( cls, code: str, *, script_name: str = 'main.py', inputs: list[str] | None = None, type_check: bool = False, type_check_stubs: str | None = None, dataclass_registry: list[type] | None = None, ) -> Self: """ Create a new Monty interpreter by parsing the given code. Arguments: code: Python code to execute script_name: Name used in tracebacks and error messages inputs: List of input variable names available in the code type_check: Whether to perform type checking on the code (default: True) type_check_stubs: Optional code to prepend before type checking, e.g. with input variable declarations or external function signatures dataclass_registry: Optional list of dataclass types to register for proper isinstance() support on output, see `register_dataclass()` above. Raises: MontySyntaxError: If the code cannot be parsed MontyTypingError: If type_check is True and type errors are found """ def type_check(self, prefix_code: str | None = None) -> None: """ Perform static type checking on the code. Analyzes the code for type errors without executing it. This uses a subset of Python's type system supported by Monty. Arguments: prefix_code: Optional code to prepend before type checking, e.g. with input variable declarations or external function signatures. Raises: MontyTypingError: If type errors are found. Use `.display(format, color)` on the exception to render the diagnostics in different formats. RuntimeError: If the type checking infrastructure fails internally. """ def run( self, *, inputs: dict[str, Any] | None = None, limits: ResourceLimits | None = None, external_functions: dict[str, Callable[..., Any]] | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, os: Callable[[OsFunction, tuple[Any, ...]], Any] | None = None, ) -> Any: """ Execute the code and return the result. The GIL is released allowing parallel execution. Arguments: inputs: Dict of input variable values (must match names from __init__) limits: Optional resource limits configuration external_functions: Dict of external function callbacks print_callback: Optional callback for print output os: Optional callback for OS calls. Called with (function_name, args) where function_name is like 'Path.exists' and args is a tuple of arguments. Must return the appropriate value for the OS function (e.g., bool for exists(), stat_result for stat()). Returns: The result of the last expression in the code Raises: MontyRuntimeError: If the code raises an exception during execution """ def start( self, *, inputs: dict[str, Any] | None = None, limits: ResourceLimits | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """ Start the code execution and return a progress object, or completion. This allows you to iteratively run code and parse/resume whenever an external function is called. The GIL is released allowing parallel execution. Arguments: inputs: Dict of input variable values (must match names from __init__) limits: Optional resource limits configuration print_callback: Optional callback for print output Returns: FunctionSnapshot if an external function call is pending, NameLookupSnapshot if more futures need to be resolved, FutureSnapshot if futures need to be resolved, MontyComplete if execution finished without external calls. Raises: MontyRuntimeError: If the code raises an exception during execution """ def dump(self) -> bytes: """ Serialize the Monty instance to a binary format. The serialized data can be stored and later restored with `Monty.load()`. This allows caching parsed code to avoid re-parsing on subsequent runs. Returns: Bytes containing the serialized Monty instance. Raises: ValueError: If serialization fails. """ @staticmethod def load( data: bytes, *, dataclass_registry: list[type] | None = None, ) -> Monty: """ Deserialize a Monty instance from binary format. Arguments: data: The serialized Monty data from `dump()` dataclass_registry: Optional list of dataclass types to register for proper isinstance() support on output, see `register_dataclass()` above. Returns: A new Monty instance. Raises: ValueError: If deserialization fails. """ def register_dataclass(self, cls: type) -> None: """ Register a dataclass type for proper isinstance() support on output. When a dataclass passes through Monty and is returned, it normally becomes an `UnknownDataclass`. By registering the original type, we can use it to instantiate a real instance of that dataclass. Arguments: cls: The dataclass type to register. Raises: TypeError: If the argument is not a dataclass type. """ def __repr__(self) -> str: ... @final class MontyRepl: """ Incremental no-replay REPL session. Create with `MontyRepl()` then call `feed_run()` to execute snippets incrementally against persistent heap and namespace state. """ def __new__( cls, *, script_name: str = 'main.py', limits: ResourceLimits | None = None, dataclass_registry: list[type] | None = None, ) -> Self: """ Create an empty REPL session ready to receive snippets via `feed_run()`. No code is parsed or executed at construction time. """ @property def script_name(self) -> str: """The name of the script being executed.""" def register_dataclass(self, cls: type) -> None: """ Register a dataclass type for proper isinstance() support on output. """ def feed_run( self, code: str, *, inputs: dict[str, Any] | None = None, external_functions: dict[str, Callable[..., Any]] | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, os: Callable[[str, tuple[Any, ...], dict[str, Any]], Any] | None = None, ) -> Any: """ Execute one incremental snippet and return its output. When `inputs` is provided, the key-value pairs are injected into the REPL namespace before executing the snippet. When `external_functions` is provided, external function calls and name lookups are dispatched to the provided callables — matching the behavior of `Monty.run(external_functions=...)`. """ def feed_start( self, code: str, *, inputs: dict[str, Any] | None = None, print_callback: Callable[[Literal['stdout'], str], None] | None = None, ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """ Start executing an incremental snippet, yielding snapshots for external calls. Unlike `feed_run()`, which handles external function dispatch internally, `feed_start()` returns a snapshot object whenever the code needs an external function call, OS call, name lookup, or future resolution. The caller provides the result via `snapshot.resume(...)`, which returns the next snapshot or `MontyComplete`. This enables the same iterative start/resume pattern used by `Monty.start()`, including support for async external functions via `FutureSnapshot`. On completion or error, the REPL state is automatically restored. """ def dump(self) -> bytes: """Serialize the REPL session to bytes.""" @staticmethod def load( data: bytes, *, dataclass_registry: list[type] | None = None, ) -> MontyRepl: """Restore a REPL session from bytes.""" @final class FunctionSnapshot: """ Represents a paused execution waiting for an external function call return value. Contains information about the pending external function call and allows resuming execution with the return value. """ @property def script_name(self) -> str: """The name of the script being executed.""" @property def is_os_function(self) -> bool: """Whether this snapshot is for an OS function call (e.g., Path.stat).""" @property def is_method_call(self) -> bool: """Whether this snapshot is for a dataclass method call (first arg is `self`).""" @property def function_name(self) -> str | OsFunction: """The name of the function being called (external function or OS function like 'Path.stat'). Will be a `OsFunction` if `is_os_function` is `True`. """ @property def args(self) -> tuple[Any, ...]: """The positional arguments passed to the external function.""" @property def kwargs(self) -> dict[str, Any]: """The keyword arguments passed to the external function.""" @property def call_id(self) -> int: """The unique identifier for this external function call.""" @overload def resume(self, *, return_value: Any) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """Resume execution with a return value from the external function. `resume` may only be called once on each FunctionSnapshot instance. The GIL is released allowing parallel execution. Arguments: return_value: The value to return from the external function call. exception: An exception to raise in the Monty interpreter. future: A future to await in the Monty interpreter. Returns: FunctionSnapshot if another external function call is pending, FutureSnapshot if another name lookup is pending, FutureSnapshot if futures need to be resolved, MontyComplete if execution finished. Raises: TypeError: If both arguments are provided. RuntimeError: If execution has already completed. MontyRuntimeError: If the code raises an exception during execution """ @overload def resume( self, *, exception: BaseException ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """Resume execution by raising the exception in the Monty interpreter. See docstring for the first overload for more information. """ @overload def resume(self, *, future: EllipsisType) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """Resume execution by returning a pending future. No result is provided, we simply resume execution stating that a future is pending. See docstring for the first overload for more information. """ def dump(self) -> bytes: """ Serialize the FunctionSnapshot instance to a binary format. The serialized data can be restored with `load_snapshot()` or `load_repl_snapshot()`. This allows suspending execution and resuming later, potentially in a different process. Note: The `print_callback` is not serialized and must be re-provided via `set_print_callback()` after loading if print output is needed. Returns: Bytes containing the serialized FunctionSnapshot instance. Raises: ValueError: If serialization fails. RuntimeError: If the progress has already been resumed. """ def __repr__(self) -> str: ... @final class NameLookupSnapshot: """ Represents a paused execution waiting for multiple futures to be resolved. Contains information about the pending futures and allows resuming execution with the results. """ @property def script_name(self) -> str: """The name of the script being executed.""" @property def variable_name(self) -> str: """The name of the variable being looked up.""" def resume( self, *, value: Any | None = None, ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """Resume execution with result the value from a name lookup, if any. If no `value` is passed, a `NameError` is raised. `resume` may only be called once on each NameLookupSnapshot instance. The GIL is released allowing parallel execution. Arguments: value: The value from the name lookup, if any. Returns: FunctionSnapshot if an external function call is pending, NameLookupSnapshot if more futures need to be resolved, FutureSnapshot if another name lookup is pending, MontyComplete if execution finished. Raises: TypeError: If result dict has invalid keys. RuntimeError: If execution has already completed. MontyRuntimeError: If the code raises an exception during execution """ def dump(self) -> bytes: """ Serialize the NameLookupSnapshot instance to a binary format. The serialized data can be restored with `load_snapshot()` or `load_repl_snapshot()`. This allows suspending execution and resuming later, potentially in a different process. Note: The `print_callback` is not serialized and must be re-provided via `set_print_callback()` after loading if print output is needed. Returns: Bytes containing the serialized NameLookupSnapshot instance. Raises: ValueError: If serialization fails. RuntimeError: If the progress has already been resumed. """ def __repr__(self) -> str: ... @final class FutureSnapshot: """ Represents a paused execution waiting for multiple futures to be resolved. Contains information about the pending futures and allows resuming execution with the results. """ @property def script_name(self) -> str: """The name of the script being executed.""" @property def pending_call_ids(self) -> list[int]: """The call IDs of the pending futures. Raises an error if the snapshot has already been resumed. """ def resume( self, results: dict[int, ExternalResult], ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot | MontyComplete: """Resume execution with results for one or more futures. `resume` may only be called once on each FutureSnapshot instance. The GIL is released allowing parallel execution. Arguments: results: Dict mapping call_id to result dict. Each result dict must have either 'return_value' or 'exception' key (not both). Returns: FunctionSnapshot if an external function call is pending, NameLookupSnapshot if more futures need to be resolved, FutureSnapshot if more futures need to be resolved, MontyComplete if execution finished. Raises: TypeError: If result dict has invalid keys. RuntimeError: If execution has already completed. MontyRuntimeError: If the code raises an exception during execution """ def dump(self) -> bytes: """ Serialize the FutureSnapshot instance to a binary format. The serialized data can be restored with `load_snapshot()` or `load_repl_snapshot()`. This allows suspending execution and resuming later, potentially in a different process. Note: The `print_callback` is not serialized and must be re-provided via `set_print_callback()` after loading if print output is needed. Returns: Bytes containing the serialized FutureSnapshot instance. Raises: ValueError: If serialization fails. RuntimeError: If the progress has already been resumed. """ def __repr__(self) -> str: ... @final class MontyComplete: """The result of a completed code execution.""" @property def output(self) -> Any: """The final output value from the executed code.""" def __repr__(self) -> str: ... class MontyError(Exception): """Base exception for all Monty interpreter errors. Catching `MontyError` will catch syntax, runtime, and typing errors from Monty. This exception is raised internally by Monty and cannot be constructed directly. """ def exception(self) -> BaseException: """Returns the inner exception as a Python exception object.""" def __str__(self) -> str: """Returns the exception message.""" @final class MontySyntaxError(MontyError): """Raised when Python code has syntax errors or cannot be parsed by Monty. Inherits exception(), __str__() from MontyError. """ def display(self, format: Literal['type-msg', 'msg'] = 'msg') -> str: """Returns formatted exception string. Args: format: 'type-msg' - 'ExceptionType: message' format 'msg' - just the message """ @final class MontyTypingError(MontyError): """Raised when type checking finds errors in the code. This exception is raised when static type analysis detects type errors before execution. Use `.display(format, color)` to render the diagnostics in different formats. Inherits exception(), __str__() from MontyError. Cannot be constructed directly from Python. """ def display( self, format: Literal[ 'full', 'concise', 'azure', 'json', 'jsonlines', 'rdjson', 'pylint', 'gitlab', 'github' ] = 'full', color: bool = False, ) -> str: """Renders the type error diagnostics with the specified format and color. Args: format: Output format for the diagnostics. Defaults to 'full'. color: Whether to include ANSI color codes. Defaults to False. """ @final class MontyRuntimeError(MontyError): """Raised when Monty code fails during execution. Inherits exception(), __str__() from MontyError. Additionally provides traceback() and display() methods. """ def traceback(self) -> list[Frame]: """Returns the Monty traceback as a list of Frame objects.""" def display(self, format: Literal['traceback', 'type-msg', 'msg'] = 'traceback') -> str: """Returns formatted exception string. Args: format: 'traceback' - full traceback with exception 'type-msg' - 'ExceptionType: message' format 'msg' - just the message """ @final class Frame: """A single frame in a Monty traceback.""" @property def filename(self) -> str: """The filename where the code is located.""" @property def line(self) -> int: """Line number (1-based).""" @property def column(self) -> int: """Column number (1-based).""" @property def end_line(self) -> int: """End line number (1-based).""" @property def end_column(self) -> int: """End column number (1-based).""" @property def function_name(self) -> str | None: """The name of the function, or None for module-level code.""" @property def source_line(self) -> str | None: """The source code line for preview in the traceback.""" def dict(self) -> dict[str, int | str | None]: """dict of attributes.""" def load_snapshot( data: bytes, *, print_callback: Callable[[Literal['stdout'], str], None] | None = None, dataclass_registry: list[type] | None = None, ) -> FunctionSnapshot | NameLookupSnapshot | FutureSnapshot: """Load a non-REPL snapshot from serialized bytes. Auto-detects the snapshot type (FunctionSnapshot, NameLookupSnapshot, or FutureSnapshot) from the serialized data. Arguments: data: Serialized snapshot bytes from `.dump()` print_callback: Optional callback for print output dataclass_registry: Optional list of dataclass types to register Returns: The deserialized snapshot, ready to be resumed. Raises: ValueError: If deserialization fails or data contains a REPL snapshot (use `load_repl_snapshot` for those). """ def load_repl_snapshot( data: bytes, *, print_callback: Callable[[Literal['stdout'], str], None] | None = None, dataclass_registry: list[type] | None = None, ) -> tuple[FunctionSnapshot | NameLookupSnapshot | FutureSnapshot, MontyRepl]: """Load a REPL snapshot from serialized bytes. Returns both the snapshot and a reconstructed `MontyRepl` session. The snapshot's REPL variant is wired to the returned `MontyRepl`, so resuming the snapshot will update the REPL state. Arguments: data: Serialized snapshot bytes from `.dump()` on a REPL snapshot print_callback: Optional callback for print output dataclass_registry: Optional list of dataclass types to register Returns: A tuple of (snapshot, MontyRepl). Raises: ValueError: If deserialization fails. """ ================================================ FILE: crates/monty-python/python/pydantic_monty/os_access.py ================================================ from __future__ import annotations from abc import ABC, abstractmethod from pathlib import PurePosixPath from typing import TYPE_CHECKING, Any, Callable, Literal, NamedTuple, Protocol, Sequence, TypeAlias, TypeGuard if TYPE_CHECKING: # Self is 3.11+, hence this from typing import Self __all__ = 'OsFunction', 'AbstractOS', 'AbstractFile', 'MemoryFile', 'CallbackFile', 'OSAccess', 'StatResult' OsFunction = Literal[ 'Path.exists', 'Path.is_file', 'Path.is_dir', 'Path.is_symlink', 'Path.read_text', 'Path.read_bytes', 'Path.write_text', 'Path.write_bytes', 'Path.mkdir', 'Path.unlink', 'Path.rmdir', 'Path.iterdir', 'Path.stat', 'Path.rename', 'Path.resolve', 'Path.absolute', 'os.getenv', 'os.environ', ] class StatResult(NamedTuple): """Equivalent to os.stat_result.""" @classmethod def file_stat(cls, size: int, mode: int = 0o644, mtime: float | None = None) -> Self: """Creates a stat_result namedtuple for a regular file. Use this when responding to Path.stat() OS calls. Args: size: File size in bytes mode: File permissions as octal (e.g., 0o644) or full mode with file type mtime: Modification time as Unix timestamp, defaults to Now. """ import time # If only permission bits provided (no file type), add regular file type if mode < 0o1000: mode = mode | 0o100_000 mtime = time.time() if mtime is None else mtime return cls(mode, 0, 0, 1, 0, 0, size, mtime, mtime, mtime) @classmethod def dir_stat(cls, mode: int = 0o755, mtime: float | None = None) -> Self: """Creates a stat_result namedtuple for a directory. Use this when responding to Path.stat() OS calls on directories. Args: mode: Directory permissions as octal (e.g., 0o755) or full mode with file type mtime: Modification time as Unix timestamp, defaults to Now. Returns: A namedtuple with stat_result fields """ import time # If only permission bits provided (no file type), add directory type if mode < 0o1000: mode = mode | 0o040_000 mtime = time.time() if mtime is None else mtime return cls(mode, 0, 0, 2, 0, 0, 4096, mtime, mtime, mtime) st_mode: int """protection bits""" st_ino: int """inode""" st_dev: int """device""" st_nlink: int """number of hard links""" st_uid: int """user ID of owner""" st_gid: int """group ID of owner""" st_size: int """total size, in bytes""" st_atime: float """time of last access""" st_mtime: float """time of last modification""" st_ctime: float """time of last change""" class AbstractOS(ABC): """Abstract base class for implementing virtual filesystems and OS access. Subclass this and implement the abstract methods to provide a custom filesystem that Monty code can interact with via Path methods. Pass an instance as the `os` parameter to `Monty.run()`. """ def __call__(self, function_name: OsFunction, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: """Dispatch a filesystem operation to the appropriate method. This is called by Monty when Monty code invokes Path methods. You typically don't need to override this method. Args: function_name: The Path method being called (e.g., 'Path.exists'). args: The arguments passed to the method. kwargs: The keyword arguments passed to the method. Returns: The result of the filesystem operation. """ kwargs = kwargs or {} match function_name: case 'Path.exists': return self.path_exists(*args) case 'Path.is_file': return self.path_is_file(*args) case 'Path.is_dir': return self.path_is_dir(*args) case 'Path.is_symlink': return self.path_is_symlink(*args) case 'Path.read_text': return self.path_read_text(*args) case 'Path.read_bytes': return self.path_read_bytes(*args) case 'Path.write_text': return self.path_write_text(*args) case 'Path.write_bytes': return self.path_write_bytes(*args) case 'Path.mkdir': assert len(kwargs) <= 2, f'Unexpected keyword arguments: {kwargs}' parents = kwargs.get('parents', False) exist_ok = kwargs.get('exist_ok', False) return self.path_mkdir(*args, parents=parents, exist_ok=exist_ok) case 'Path.unlink': return self.path_unlink(*args) case 'Path.rmdir': return self.path_rmdir(*args) case 'Path.iterdir': return self.path_iterdir(*args) case 'Path.stat': return self.path_stat(*args) case 'Path.rename': return self.path_rename(*args) case 'Path.resolve': return self.path_resolve(*args) case 'Path.absolute': return self.path_absolute(*args) case 'os.getenv': return self.getenv(*args) case 'os.environ': return self.get_environ() @abstractmethod def path_exists(self, path: PurePosixPath) -> bool: """Check if a path exists. Args: path: The path to check. Returns: True if the path exists, False otherwise. """ raise NotImplementedError @abstractmethod def path_is_file(self, path: PurePosixPath) -> bool: """Check if a path is a regular file. Args: path: The path to check. Returns: True if the path is a regular file, False otherwise. """ raise NotImplementedError @abstractmethod def path_is_dir(self, path: PurePosixPath) -> bool: """Check if a path is a directory. Args: path: The path to check. Returns: True if the path is a directory, False otherwise. """ raise NotImplementedError @abstractmethod def path_is_symlink(self, path: PurePosixPath) -> bool: """Check if a path is a symbolic link. Args: path: The path to check. Returns: True if the path is a symbolic link, False otherwise. """ raise NotImplementedError @abstractmethod def path_read_text(self, path: PurePosixPath) -> str: """Read the contents of a file as text. Args: path: The path to the file. Returns: The file contents as a string. Raises: FileNotFoundError: If the file does not exist. IsADirectoryError: If the path is a directory. """ raise NotImplementedError @abstractmethod def path_read_bytes(self, path: PurePosixPath) -> bytes: """Read the contents of a file as bytes. Args: path: The path to the file. Returns: The file contents as bytes. Raises: FileNotFoundError: If the file does not exist. IsADirectoryError: If the path is a directory. """ raise NotImplementedError @abstractmethod def path_write_text(self, path: PurePosixPath, data: str) -> int: """Write text data to a file. Args: path: The path to the file. data: The text content to write. Returns: The number of characters written. Raises: FileNotFoundError: If the parent directory does not exist. IsADirectoryError: If the path is a directory. """ raise NotImplementedError @abstractmethod def path_write_bytes(self, path: PurePosixPath, data: bytes) -> int: """Write binary data to a file. Args: path: The path to the file. data: The binary content to write. Returns: The number of bytes written. Raises: FileNotFoundError: If the parent directory does not exist. IsADirectoryError: If the path is a directory. """ raise NotImplementedError @abstractmethod def path_mkdir(self, path: PurePosixPath, parents: bool, exist_ok: bool) -> None: """Create a directory. Args: path: The path of the directory to create. parents: If True, create parent directories as needed. exist_ok: If True, don't raise an error if the directory exists. Raises: FileNotFoundError: If parents is False and parent directory doesn't exist. FileExistsError: If exist_ok is False and the directory already exists. """ raise NotImplementedError @abstractmethod def path_unlink(self, path: PurePosixPath) -> None: """Remove a file. Args: path: The path to the file to remove. Raises: FileNotFoundError: If the file does not exist. IsADirectoryError: If the path is a directory. """ raise NotImplementedError @abstractmethod def path_rmdir(self, path: PurePosixPath) -> None: """Remove an empty directory. Args: path: The path to the directory to remove. Raises: FileNotFoundError: If the directory does not exist. NotADirectoryError: If the path is not a directory. OSError: If the directory is not empty. """ raise NotImplementedError @abstractmethod def path_iterdir(self, path: PurePosixPath) -> list[PurePosixPath]: """List the contents of a directory. Args: path: The path to the directory. Returns: A list of full paths (as PurePosixPath) for entries in the directory. Raises: FileNotFoundError: If the directory does not exist. NotADirectoryError: If the path is not a directory. """ raise NotImplementedError @abstractmethod def path_stat(self, path: PurePosixPath) -> StatResult: """Get file status information. Use file_stat(), dir_stat(), or symlink_stat() helpers to create the return value. Args: path: The path to stat. Returns: A StatResult with file metadata. Raises: FileNotFoundError: If the path does not exist. """ raise NotImplementedError @abstractmethod def path_rename(self, path: PurePosixPath, target: PurePosixPath) -> None: """Rename a file or directory. Args: path: The current path. target: The new path. Raises: FileNotFoundError: If the source path does not exist. FileExistsError: If the target already exists (platform-dependent). """ raise NotImplementedError @abstractmethod def path_resolve(self, path: PurePosixPath) -> str: """Resolve a path to an absolute path, resolving any symlinks. Args: path: The path to resolve. Returns: The resolved absolute path with symlinks resolved. """ raise NotImplementedError @abstractmethod def path_absolute(self, path: PurePosixPath) -> str: """Convert a path to an absolute path without resolving symlinks. Args: path: The path to convert. Returns: The absolute path. """ raise NotImplementedError @abstractmethod def getenv(self, key: str, default: str | None = None) -> str | None: """Get an environment variable value. Args: key: The name of the environment variable. default: The value to return if the environment variable is not set. Returns: The value of the environment variable, or `default` if not set. """ raise NotImplementedError @abstractmethod def get_environ(self) -> dict[str, str]: """Get the entire environment as a dictionary. Returns: A dictionary containing all environment variables. """ raise NotImplementedError class AbstractFile(Protocol): """Protocol defining the interface for files used with OSAccess. This protocol allows custom file implementations to be used with OSAccess. The built-in implementations are: - `MemoryFile`: Stores content in memory (recommended for sandboxed execution) - `CallbackFile`: Delegates to custom callbacks (use with caution - see its docstring) Security Note: Custom implementations of this protocol run in the host Python environment. The `read_content()` and `write_content()` methods can execute arbitrary code, including accessing the real filesystem. Only use implementations you trust. For sandboxed execution where Monty code should not access real files, use `MemoryFile` which stores all content in memory. Attributes: path: The virtual path of the file within the OSAccess filesystem. name: The filename (basename) extracted from path. permissions: Unix-style permission bits (e.g., 0o644). deleted: Whether the file has been marked as deleted. """ path: PurePosixPath name: str permissions: int deleted: bool def read_content(self) -> str | bytes: """Read and return the file's content.""" ... def write_content(self, content: str | bytes) -> None: """Write content to the file.""" ... def delete(self) -> None: """Mark the file as deleted.""" ... Tree: TypeAlias = 'dict[str, AbstractFile | Tree]' def _is_file(entry: None | AbstractFile | Tree) -> TypeGuard[AbstractFile]: return hasattr(entry, 'path') def _is_dir(entry: None | AbstractFile | Tree) -> TypeGuard[Tree]: return isinstance(entry, dict) class MemoryFile: """An in-memory virtual file for use with OSAccess. This is the recommended file type for sandboxed Monty execution. Content is stored entirely in Python memory with no access to the real filesystem. When Monty code reads from this file, it receives the stored content. When Monty code writes to this file, the content attribute is updated. Example:: from pydantic_monty import Monty, OSAccess, MemoryFile fs = OSAccess( [ MemoryFile('/config.json', '{"debug": true}'), MemoryFile('/data.bin', b'\\x00\\x01\\x02'), ] ) result = Monty(''' from pathlib import Path Path('/config.json').read_text() ''').run(os=fs) # result == '{"debug": true}' Attributes: path: The virtual path of the file within the OSAccess filesystem. name: The filename (basename) extracted from path. content: The file content (str for text, bytes for binary). permissions: Unix-style permission bits (default: 0o644). deleted: Whether the file has been marked as deleted. """ path: PurePosixPath name: str content: str | bytes permissions: int = 0o644 deleted: bool def __init__(self, path: str | PurePosixPath, content: str | bytes, *, permissions: int = 0o644) -> None: """Create an in-memory virtual file. Args: path: The virtual path for this file in the OSAccess filesystem. content: The initial file content (str for text, bytes for binary). permissions: Unix-style permission bits (default: 0o644). """ self.path = PurePosixPath(path) self.name = self.path.name self.content = content self.permissions = permissions self.deleted = False def read_content(self) -> str | bytes: """Return the stored content.""" return self.content def write_content(self, content: str | bytes) -> None: """Update the stored content.""" self.content = content def delete(self) -> None: """Mark the file as deleted.""" self.deleted = True def __repr__(self) -> str: repr_content = "'...'" if isinstance(self.content, str) else "b'...'" return f'MemoryFile(path={self.path}, content={repr_content}, permissions={self.permissions})' _type_check_memory_file: AbstractFile = MemoryFile('test.txt', '') class CallbackFile: """A virtual file backed by custom read/write callbacks. This class allows you to create files whose content is dynamically generated or persisted through custom logic. When Monty code reads or writes to this file, the provided callbacks are invoked. Security Warning: The callbacks execute in the host Python environment with FULL access to the real filesystem, network, and all system resources. A callback that accesses the real filesystem effectively breaks the Monty sandbox. Example of UNSAFE usage that breaks the sandbox:: # DON'T DO THIS - allows Monty to read real files! CallbackFile( '/config.txt', read=lambda p: open('/etc/passwd').read(), write=lambda p, c: open('/tmp/out', 'w').write(c), ) For sandboxed execution, use `MemoryFile` instead, which stores content purely in memory with no external access. Safe use cases for CallbackFile: - Returning dynamically computed content (e.g., current timestamp) - Logging writes without persisting them - Validating/transforming content before storage in memory - Integration testing with controlled external resources Attributes: path: The virtual path of the file within the OSAccess filesystem. name: The filename (basename) extracted from path. read: Callback invoked when the file is read. Receives the path and must return str or bytes. write: Callback invoked when the file is written. Receives the path and content (str or bytes). permissions: Unix-style permission bits (default: 0o644). deleted: Whether the file has been marked as deleted. """ path: PurePosixPath name: str read: Callable[[PurePosixPath], str | bytes] write: Callable[[PurePosixPath, str | bytes], None] permissions: int = 0o644 deleted: bool def __init__( self, path: str | PurePosixPath, read: Callable[[PurePosixPath], str | bytes], write: Callable[[PurePosixPath, str | bytes], None], *, permissions: int = 0o644, ) -> None: """Create a callback-backed virtual file. Args: path: The virtual path for this file in the OSAccess filesystem. read: Callback to generate content when the file is read. write: Callback to handle content when the file is written. permissions: Unix-style permission bits (default: 0o644). """ self.path = PurePosixPath(path) self.name = self.path.name self.read = read self.write = write self.permissions = permissions self.deleted = False def read_content(self) -> str | bytes: """Read content by invoking the read callback.""" return self.read(self.path) def write_content(self, content: str | bytes) -> None: """Write content by invoking the write callback.""" self.write(self.path, content) def delete(self) -> None: """Mark the file as deleted.""" self.deleted = True def __repr__(self) -> str: return f'CallbackFile(path={self.path}, read={self.read}, write={self.write}, permissions={self.permissions})' _type_check_callback_file: AbstractFile = CallbackFile('test.txt', lambda _: '', lambda _, __: None) class OSAccess(AbstractOS): """In-memory virtual filesystem for sandboxed Monty execution. OSAccess provides a complete virtual filesystem that Monty code can interact with via `pathlib.Path` methods. Files exist only in memory (when using `MemoryFile`) and cannot access the real filesystem. Security Model: When using `MemoryFile` objects, OSAccess is fully sandboxed: - Monty code can only access files explicitly registered with OSAccess - Path traversal (e.g., `../../etc/passwd`) cannot escape to real files - All file content is stored in Python memory, not on disk - Environment variables are isolated to the provided `environ` dict However, if `CallbackFile` is used, the callbacks run in the host environment and CAN access real resources. See `CallbackFile` docstring. Attributes: files: List of AbstractFile objects registered with this filesystem. environ: Dictionary of environment variables accessible via os.getenv(). """ files: list[AbstractFile] environ: dict[str, str] _tree: Tree def __init__( self, files: Sequence[AbstractFile] | None = None, environ: dict[str, str] | None = None, *, root_dir: str | PurePosixPath = '/', ): """Create a virtual filesystem with the given files. Args: files: Files to register in the virtual filesystem. Use `MemoryFile` for sandboxed in-memory files, or `CallbackFile` for custom logic (with security caveats - see its docstring). environ: Environment variables accessible to Monty code via os.getenv(). Isolated from the real environment. root_dir: Base directory for normalizing relative file paths. Relative paths in files will be prefixed with this. Default is '/'. Raises: AssertionError: If root_dir is not an absolute path. ValueError: If a file path conflicts with another file (e.g., trying to create a file inside another file's path). """ self.files = list(files) if files else [] self.environ = environ or {} # Initialize tree with root directory - / is always present self._tree = {'/': {}} root_dir = PurePosixPath(root_dir) assert root_dir.is_absolute(), f'Root directory must be absolute, got {root_dir}' for file in self.files: if not file.path.is_absolute(): file.path = root_dir / file.path subtree = self._tree *dir_parts, name = file.path.parts for part in dir_parts: entry = subtree.setdefault(part, {}) if _is_dir(entry): subtree = entry else: raise ValueError(f'Cannot put file {file} within sub-directory of file {entry}') subtree[name] = file def __repr__(self) -> str: return f'OSAccess(files={self.files}, environ={self.environ})' def path_exists(self, path: PurePosixPath) -> bool: return self._get_entry(path) is not None def path_is_file(self, path: PurePosixPath) -> bool: return _is_file(self._get_entry(path)) def path_is_dir(self, path: PurePosixPath) -> bool: return _is_dir(self._get_entry(path)) def path_is_symlink(self, path: PurePosixPath) -> bool: return False def path_read_text(self, path: PurePosixPath) -> str: file = self._get_file(path) content = file.read_content() return content if isinstance(content, str) else content.decode() def path_read_bytes(self, path: PurePosixPath) -> bytes: file = self._get_file(path) content = file.read_content() return content if isinstance(content, bytes) else content.encode() def path_write_text(self, path: PurePosixPath, data: str) -> int: self._write_file(path, data) return len(data) def path_write_bytes(self, path: PurePosixPath, data: bytes) -> int: self._write_file(path, data) return len(data) def _write_file(self, path: PurePosixPath, data: bytes | str) -> None: entry = self._get_entry(path) if _is_file(entry): entry.write_content(data) return elif _is_dir(entry): raise IsADirectoryError(f'[Errno 21] Is a directory: {str(path)!r}') # write a new file if the parent directory exists parent_entry = self._parent_entry(path) if _is_dir(parent_entry): file_path = PurePosixPath(path) parent_entry[file_path.name] = new_file = MemoryFile(file_path, data) self.files.append(new_file) else: raise FileNotFoundError(f'[Errno 2] No such file or directory: {str(path)!r}') def path_mkdir(self, path: PurePosixPath, parents: bool, exist_ok: bool) -> None: entry = self._get_entry(path) if _is_file(entry): raise FileExistsError(f'[Errno 17] File exists: {str(path)!r}') elif _is_dir(entry): if exist_ok: return else: raise FileExistsError(f'[Errno 17] File exists: {str(path)!r}') parent_entry = self._parent_entry(path) if _is_dir(parent_entry): parent_entry[PurePosixPath(path).name] = {} return elif _is_file(parent_entry): raise NotADirectoryError(f'[Errno 20] Not a directory: {str(path)!r}') elif parents: subtree = self._tree for part in PurePosixPath(path).parts: entry = subtree.setdefault(part, {}) if _is_dir(entry): subtree = entry else: raise NotADirectoryError(f'[Errno 20] Not a directory: {str(path)!r}') else: raise FileNotFoundError(f'[Errno 2] No such file or directory: {str(path)!r}') def path_unlink(self, path: PurePosixPath) -> None: file = self._get_file(path) file.delete() # remove from parent parent_dir = self._parent_entry(path) assert _is_dir(parent_dir), f'Expected parent of a file to always be a directory, got {parent_dir}' del parent_dir[file.name] def path_rmdir(self, path: PurePosixPath) -> None: dir = self._get_dir(path) if dir: raise OSError(f'[Errno 39] Directory not empty: {str(path)!r}') # remove from parent parent_dir = self._parent_entry(path) assert _is_dir(parent_dir), f'Expected parent of a file to always be a directory, got {parent_dir}' del parent_dir[PurePosixPath(path).name] def path_iterdir(self, path: PurePosixPath) -> list[PurePosixPath]: # Return full paths as PurePosixPath objects (will be converted to MontyObject::Path) dir_path = PurePosixPath(path) return [dir_path / name for name in self._get_dir(path).keys()] def path_stat(self, path: PurePosixPath) -> StatResult: entry = self._get_entry_exists(path) if _is_file(entry): content = entry.read_content() size = len(content) if isinstance(content, bytes) else len(content.encode()) return StatResult.file_stat(size=size, mode=entry.permissions) else: return StatResult.dir_stat() def path_rename(self, path: PurePosixPath, target: PurePosixPath) -> None: src_entry = self._get_entry(path) if src_entry is None: raise FileNotFoundError(f'[Errno 2] No such file or directory: {str(path)!r} -> {str(target)!r}') parent_dir = self._parent_entry(path) assert _is_dir(parent_dir), f'Expected parent of a file to always be a directory, got {parent_dir}' target_parent = self._parent_entry(target) if not _is_dir(target_parent): raise FileNotFoundError(f'[Errno 2] No such file or directory: {str(path)!r} -> {str(target)!r}') target_entry = self._get_entry(target) if _is_file(src_entry): if _is_dir(target_entry): raise IsADirectoryError(f'[Errno 21] Is a directory: {str(path)!r} -> {str(target)!r}') if _is_file(target_entry): # need to mark the target as deleted as it'll be overwritten target_entry.delete() src_name = src_entry.path.name target_name = PurePosixPath(target).name # remove it from the old directory del parent_dir[src_name] # and put it in the new directory target_parent[target_name] = src_entry else: assert _is_dir(src_entry), 'src path must be a directory here' if _is_file(target_entry): raise NotADirectoryError(f'[Errno 20] Not a directory: {str(path)!r} -> {str(target)!r}') elif _is_dir(target_entry) and target_entry: raise OSError(f'[Errno 66] Directory not empty: {str(path)!r} -> {str(target)!r}') src_name = PurePosixPath(path).name target_name = PurePosixPath(target).name # remove it from the old directory del parent_dir[src_name] # and put it in the new directory target_parent[target_name] = src_entry # Update paths for all files in the renamed directory self._update_paths_recursive(src_entry, PurePosixPath(path), PurePosixPath(target)) def path_resolve(self, path: PurePosixPath) -> str: # No symlinks in OSAccess, so resolve is same as absolute with normalization return self.path_absolute(path) def path_absolute(self, path: PurePosixPath) -> str: p = PurePosixPath(path) if p.is_absolute(): return str(p) # In this virtual filesystem, we treat '/' as the working directory return str(PurePosixPath('/') / p) def getenv(self, key: str, default: str | None = None) -> str | None: return self.environ.get(key, default) def get_environ(self) -> dict[str, str]: return self.environ def _get_entry(self, path: PurePosixPath) -> Tree | AbstractFile | None: dir = self._tree *dir_parts, name = PurePosixPath(path).parts for part in dir_parts: entry = dir.get(part) if _is_dir(entry): dir = entry else: return None return dir.get(name) def _get_entry_exists(self, path: PurePosixPath) -> Tree | AbstractFile: entry = self._get_entry(path) if entry is None: raise FileNotFoundError(f'[Errno 2] No such file or directory: {str(path)!r}') else: return entry def _get_file(self, path: PurePosixPath) -> AbstractFile: entry = self._get_entry_exists(path) if _is_file(entry): return entry else: raise IsADirectoryError(f'[Errno 21] Is a directory: {str(path)!r}') def _get_dir(self, path: PurePosixPath) -> Tree: entry = self._get_entry_exists(path) if _is_dir(entry): return entry else: raise NotADirectoryError(f'[Errno 20] Not a directory: {str(path)!r}') def _parent_entry(self, path: PurePosixPath) -> Tree | AbstractFile | None: return self._get_entry(PurePosixPath(path).parent) def _update_paths_recursive(self, tree: Tree, old_prefix: PurePosixPath, new_prefix: PurePosixPath) -> None: """Update path attributes for all files in a tree after directory rename. When a directory is renamed, the internal tree structure is moved but AbstractFile objects still have their old paths. This method recursively updates all file paths by replacing old_prefix with new_prefix. """ for entry in tree.values(): if _is_file(entry): # Replace old prefix with new prefix in file path relative = entry.path.relative_to(old_prefix) entry.path = new_prefix / relative elif _is_dir(entry): self._update_paths_recursive(entry, old_prefix, new_prefix) ================================================ FILE: crates/monty-python/python/pydantic_monty/py.typed ================================================ ================================================ FILE: crates/monty-python/src/convert.rs ================================================ //! Type conversion between Monty's `MontyObject` and PyO3 Python objects. //! //! This module provides bidirectional conversion: //! - `py_to_monty`: Convert Python objects to Monty's `MontyObject` for input //! - `monty_to_py`: Convert Monty's `MontyObject` back to Python objects for output use ::monty::MontyObject; use monty::MontyException; use num_bigint::BigInt; use pyo3::{ exceptions::{PyBaseException, PyTypeError}, intern, prelude::*, sync::PyOnceLock, types::{PyBool, PyBytes, PyDict, PyFloat, PyFrozenSet, PyInt, PyList, PySet, PyString, PyTuple}, }; use crate::{ dataclass::{DcRegistry, dataclass_to_monty, dataclass_to_py, is_dataclass}, exceptions::{exc_monty_to_py, exc_to_monty_object}, }; /// Converts a Python object to Monty's `MontyObject` representation. /// /// Handles all standard Python types that Monty supports as inputs, including callable objects /// which are converted to `MontyObject::Function`. Unsupported types will raise a `TypeError`. /// /// When a dataclass is encountered, it is automatically registered in `dc_registry` /// so that the original Python type can be reconstructed on output (enabling `isinstance()`). /// This applies recursively to nested dataclasses in fields, lists, dicts, etc. /// /// # Important /// Checks `bool` before `int` since `bool` is a subclass of `int` in Python. /// Callable check is last since many Python types (classes, etc.) are technically callable. pub fn py_to_monty(obj: &Bound<'_, PyAny>, dc_registry: &DcRegistry) -> PyResult { if obj.is_none() { Ok(MontyObject::None) } else if let Ok(bool) = obj.cast::() { // Check bool BEFORE int since bool is a subclass of int in Python Ok(MontyObject::Bool(bool.is_true())) } else if let Ok(int) = obj.cast::() { // Try i64 first (fast path), fall back to BigInt for large values if let Ok(i) = int.extract::() { Ok(MontyObject::Int(i)) } else { // Extract as BigInt for values that don't fit in i64 let bi: BigInt = int.extract()?; Ok(MontyObject::BigInt(bi)) } } else if let Ok(float) = obj.cast::() { Ok(MontyObject::Float(float.extract()?)) } else if let Ok(string) = obj.cast::() { Ok(MontyObject::String(string.extract()?)) } else if let Ok(bytes) = obj.cast::() { Ok(MontyObject::Bytes(bytes.extract()?)) } else if let Ok(list) = obj.cast::() { let items: PyResult> = list.iter().map(|item| py_to_monty(&item, dc_registry)).collect(); Ok(MontyObject::List(items?)) } else if let Ok(tuple) = obj.cast::() { // Check for namedtuple BEFORE treating as regular tuple // Namedtuples have a `_fields` attribute with field names if let Ok(fields) = obj.getattr("_fields") && let Ok(fields_tuple) = fields.cast::() { let py_type = obj.get_type(); // Get the simple class name (e.g., "stat_result") let simple_name = py_type.name()?.to_string(); // Get the module (e.g., "os" or "__main__") let module: String = py_type.getattr("__module__")?.extract()?; // Construct full type name: "os.stat_result" // Skip module prefix if it's a Python built-in module let type_name = if module.starts_with('_') || module == "builtins" { simple_name } else { format!("{module}.{simple_name}") }; // Extract field names as strings let field_names: PyResult> = fields_tuple.iter().map(|f| f.extract::()).collect(); // Extract values let values: PyResult> = tuple.iter().map(|item| py_to_monty(&item, dc_registry)).collect(); return Ok(MontyObject::NamedTuple { type_name, field_names: field_names?, values: values?, }); } // Regular tuple let items: PyResult> = tuple.iter().map(|item| py_to_monty(&item, dc_registry)).collect(); Ok(MontyObject::Tuple(items?)) } else if let Ok(dict) = obj.cast::() { // in theory we could provide a way of passing the iterator direct to the internal MontyObject construct // it's probably not worth it right now Ok(MontyObject::dict( dict.iter() .map(|(k, v)| Ok((py_to_monty(&k, dc_registry)?, py_to_monty(&v, dc_registry)?))) .collect::>>()?, )) } else if let Ok(set) = obj.cast::() { let items: PyResult> = set.iter().map(|item| py_to_monty(&item, dc_registry)).collect(); Ok(MontyObject::Set(items?)) } else if let Ok(frozenset) = obj.cast::() { let items: PyResult> = frozenset.iter().map(|item| py_to_monty(&item, dc_registry)).collect(); Ok(MontyObject::FrozenSet(items?)) } else if obj.is(obj.py().Ellipsis()) { Ok(MontyObject::Ellipsis) } else if let Ok(exc) = obj.cast::() { Ok(exc_to_monty_object(exc)) } else if is_dataclass(obj) { // Auto-register the dataclass type so it can be reconstructed on output dc_registry.insert(&obj.get_type())?; dataclass_to_monty(obj, dc_registry) } else if obj.is_instance(get_pure_posix_path(obj.py())?)? { // Handle pathlib.PurePosixPath and thereby pathlib.PosixPath objects let path_str: String = obj.str()?.extract()?; Ok(MontyObject::Path(path_str)) } else if obj.is_callable() { // Callable check is last since many Python types (classes, etc.) are technically callable, // and we want to match more specific types first (e.g. dataclasses). let name = get_name(obj); let docstring = get_docstring(obj); Ok(MontyObject::Function { name, docstring }) } else if let Ok(name) = obj.get_type().qualname() { let msg = match obj.get_type().module() { Ok(module) => format!("Cannot convert {module}.{name} to Monty value"), Err(_) => format!("Cannot convert {name} to Monty value"), }; Err(PyTypeError::new_err(msg)) } else { Err(PyTypeError::new_err("Cannot convert unknown type to Monty value")) } } /// Converts Monty's `MontyObject` to a native Python object, using the dataclass registry. /// /// When a dataclass is converted and its class name is found in the registry, /// an instance of the original Python type is created (so `isinstance()` works). /// Otherwise, falls back to `PyMontyDataclass`. pub fn monty_to_py(py: Python<'_>, obj: &MontyObject, dc_registry: &DcRegistry) -> PyResult> { match obj { MontyObject::None => Ok(py.None()), MontyObject::Ellipsis => Ok(py.Ellipsis()), MontyObject::Bool(b) => Ok(PyBool::new(py, *b).to_owned().into_any().unbind()), MontyObject::Int(i) => Ok(i.into_pyobject(py)?.clone().into_any().unbind()), MontyObject::BigInt(bi) => Ok(bi.into_pyobject(py)?.clone().into_any().unbind()), MontyObject::Float(f) => Ok(f.into_pyobject(py)?.clone().into_any().unbind()), MontyObject::String(s) => Ok(PyString::new(py, s).into_any().unbind()), MontyObject::Bytes(b) => Ok(PyBytes::new(py, b).into_any().unbind()), MontyObject::List(items) => { let py_items: PyResult>> = items.iter().map(|item| monty_to_py(py, item, dc_registry)).collect(); Ok(PyList::new(py, py_items?)?.into_any().unbind()) } MontyObject::Tuple(items) => { let py_items: PyResult>> = items.iter().map(|item| monty_to_py(py, item, dc_registry)).collect(); Ok(PyTuple::new(py, py_items?)?.into_any().unbind()) } // NamedTuple - create a proper Python namedtuple using collections.namedtuple MontyObject::NamedTuple { type_name, field_names, values, } => { // Extract module and simple name from full type_name // e.g., "os.stat_result" -> module="os", simple_name="stat_result" let (module, simple_name) = if let Some(idx) = type_name.rfind('.') { (&type_name[..idx], &type_name[idx + 1..]) } else { ("", type_name.as_str()) }; // Create a namedtuple type with the module set for round-trip support // collections.namedtuple(typename, field_names, module=module) let namedtuple_fn = get_namedtuple(py)?; let py_field_names = PyList::new(py, field_names)?; let nt_type = if module.is_empty() { namedtuple_fn.call1((simple_name, py_field_names))? } else { let kwargs = PyDict::new(py); kwargs.set_item("module", module)?; namedtuple_fn.call((simple_name, py_field_names), Some(&kwargs))? }; // Convert values and instantiate using _make() which accepts an iterable // note `_make` might start with an underscore, but it's a public documented method // https://docs.python.org/3/library/collections.html#collections.somenamedtuple._make let py_values: PyResult>> = values.iter().map(|item| monty_to_py(py, item, dc_registry)).collect(); let instance = nt_type.call_method1("_make", (py_values?,))?; Ok(instance.into_any().unbind()) } MontyObject::Dict(map) => { let dict = PyDict::new(py); for (k, v) in map { dict.set_item(monty_to_py(py, k, dc_registry)?, monty_to_py(py, v, dc_registry)?)?; } Ok(dict.into_any().unbind()) } MontyObject::Set(items) => { let set = PySet::empty(py)?; for item in items { set.add(monty_to_py(py, item, dc_registry)?)?; } Ok(set.into_any().unbind()) } MontyObject::FrozenSet(items) => { let py_items: PyResult>> = items.iter().map(|item| monty_to_py(py, item, dc_registry)).collect(); Ok(PyFrozenSet::new(py, &py_items?)?.into_any().unbind()) } // Return the exception instance as a value (not raised) MontyObject::Exception { exc_type, arg } => { let exc = exc_monty_to_py(py, MontyException::new(*exc_type, arg.clone())); Ok(exc.into_value(py).into_any()) } // Return Python's built-in type object MontyObject::Type(t) => import_builtins(py)?.getattr(py, t.to_string()), MontyObject::BuiltinFunction(f) => import_builtins(py)?.getattr(py, f.to_string()), // Dataclass - use registry to reconstruct original type if available MontyObject::Dataclass { name, type_id, field_names, attrs, frozen, } => dataclass_to_py(py, name, *type_id, field_names, attrs, *frozen, dc_registry), // Path - convert to Python pathlib.Path MontyObject::Path(p) => { let pure_posix_path = get_pure_posix_path(py)?; let path_obj = pure_posix_path.call1((p,))?; Ok(path_obj.into_any().unbind()) } // Output-only types - convert to string representation MontyObject::Repr(s) => Ok(PyString::new(py, s).into_any().unbind()), MontyObject::Cycle(_, placeholder) => Ok(PyString::new(py, placeholder).into_any().unbind()), // Function objects are internal to the name lookup protocol and should not normally // appear as final output values. If they do, represent as a string with the function name. MontyObject::Function { name, .. } => Ok(PyString::new(py, name).into_any().unbind()), } } pub fn import_builtins(py: Python<'_>) -> PyResult<&Py> { static BUILTINS: PyOnceLock> = PyOnceLock::new(); BUILTINS.get_or_try_init(py, || py.import("builtins").map(Bound::unbind)) } /// Cached import of `collections.namedtuple` function. fn get_namedtuple(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static NAMEDTUPLE: PyOnceLock> = PyOnceLock::new(); NAMEDTUPLE.import(py, "collections", "namedtuple") } /// Cached import of `pathlib.PurePosixPath` class. fn get_pure_posix_path(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static PUREPOSIX: PyOnceLock> = PyOnceLock::new(); PUREPOSIX.import(py, "pathlib", "PurePosixPath") } pub fn get_name(f: &Bound<'_, PyAny>) -> String { f.getattr(intern!(f.py(), "__name__")) .and_then(|n| n.extract::()) .unwrap_or_else(|_| "".to_string()) } /// get the `__doc__` attribute from a (hopefully) function pub fn get_docstring(f: &Bound<'_, PyAny>) -> Option { f.getattr(intern!(f.py(), "__doc__")) .and_then(|d| d.extract::()) .ok() } ================================================ FILE: crates/monty-python/src/dataclass.rs ================================================ //! Dataclass conversion between Python and Monty. //! //! This module handles: //! - Converting Python dataclass instances to `MontyObject::Dataclass` //! - Converting `MontyObject::Dataclass` back to Python via `PyUnknownDataclass` //! - `PyUnknownDataclass`: A Python class that mimics dataclass behavior use std::{ collections::hash_map::DefaultHasher, hash::{Hash, Hasher}, }; use ::monty::{DictPairs, MontyObject}; use pyo3::{ Bound, exceptions::{PyAttributeError, PyTypeError}, intern, prelude::*, sync::PyOnceLock, types::{PyDict, PyList, PyString, PyType}, }; use crate::convert::{monty_to_py, py_to_monty}; /// Checks if a Python object is a dataclass instance (not a type). /// /// Copied from pydantic's `is_dataclass` logic. pub fn is_dataclass(value: &Bound<'_, PyAny>) -> bool { value .hasattr(intern!(value.py(), "__dataclass_fields__")) .unwrap_or(false) && !value.is_instance_of::() } /// Converts a Python dataclass instance to `MontyObject::Dataclass`. /// /// Extracts field names in definition order (for repr) and all field values as attrs. /// The `type_id` is set to `id(type(dc))` in Python, allowing registry lookups by type identity. /// The `dc_registry` is threaded through to `py_to_monty` so that nested dataclasses /// in field values are also auto-registered. pub fn dataclass_to_monty(value: &Bound<'_, PyAny>, dc_registry: &DcRegistry) -> PyResult { let py = value.py(); let dc_type = value.get_type(); let name: String = dc_type.getattr(intern!(py, "__name__"))?.extract()?; // Get type_id from id(type(dc)) for registry lookups let type_id = dc_type.as_ptr() as u64; let fields_dict = value .getattr(intern!(py, "__dataclass_fields__"))? .cast_into::()?; let frozen = value .getattr(intern!(py, "__dataclass_params__"))? .getattr(intern!(py, "frozen"))? .extract::()?; let field_type_marker = get_field_marker(py)?; // Collect field names and attrs let mut field_names = Vec::new(); let mut attrs = Vec::new(); for (field_name_obj, field) in fields_dict.iter() { let field_type = field.getattr(intern!(py, "_field_type"))?; if field_type.is(field_type_marker) { let field_name_str = field_name_obj.cast::()?.to_str()?.to_string(); // we don't include private fields in the dataclass serialized for monty if field_name_str.starts_with('_') { continue; } let field_value = value.getattr(field_name_obj.cast::()?)?; let field_name_monty = py_to_monty(&field_name_obj, dc_registry)?; let field_value_monty = py_to_monty(&field_value, dc_registry)?; field_names.push(field_name_str); attrs.push((field_name_monty, field_value_monty)); } } Ok(MontyObject::Dataclass { name, type_id, field_names, attrs: attrs.into(), frozen, }) } /// Converts a `MontyObject::Dataclass` to a Python object. /// /// If the `type_id` is found in the dc_registry, creates an instance of the original /// Python dataclass type (so `isinstance(result, OriginalClass)` works). /// Otherwise, falls back to creating a `PyUnknownDataclass`. pub fn dataclass_to_py( py: Python<'_>, name: &str, type_id: u64, field_names: &[String], attrs: &DictPairs, frozen: bool, dc_registry: &DcRegistry, ) -> PyResult> { // Try to use the original type from the dc_registry (keyed by type_id) if let Some(original_type_py) = dc_registry.get(py, type_id)? { let original_type = original_type_py.bind(py).cast::()?; // Build kwargs dict from field names and values let kwargs = PyDict::new(py); for (key, value) in attrs { // Skip non-string keys if let MontyObject::String(s) = key { // Only include declared fields in constructor kwargs let key_str = s.as_str(); if field_names.iter().any(|f| f.as_str() == key_str) { kwargs.set_item(key_str, monty_to_py(py, value, dc_registry)?)?; } } } // Call the dataclass constructor with kwargs original_type.call((), Some(&kwargs)).map(Bound::unbind) } else { // Fall back to PyUnknownDataclass let dc = PyUnknownDataclass::new(py, name.to_string(), field_names.to_vec(), attrs, frozen, dc_registry)?; Ok(Py::new(py, dc)?.into_any()) } } /// Maps Python dataclass type identity (pointer address as `u64`) to the original /// Python type object (`Py`). /// /// This registry enables round-trip reconstruction of dataclass types: when a /// dataclass passes through Monty, its type is stored here so that on output, /// `isinstance(result, OriginalClass)` works correctly. /// /// Wraps a `Py` so that `clone_ref` produces a shared handle to the same /// underlying dict — all clones see the same data without needing `Arc`. /// The GIL already serializes access, making additional locking unnecessary. #[derive(Debug)] pub struct DcRegistry { registry: Py, } impl DcRegistry { /// Creates a new empty registry. pub fn new(py: Python<'_>) -> Self { Self { registry: PyDict::new(py).unbind(), } } /// Creates a `DcRegistry` from an optional Python list of dataclass types. /// /// Each type in the list is registered by its pointer identity, matching the key /// format used by `dataclass_to_monty`. pub fn from_list(py: Python<'_>, dataclass_registry: Option<&Bound<'_, PyList>>) -> PyResult { let slf = Self::new(py); if let Some(registry_list) = dataclass_registry { for cls in registry_list { slf.insert(&cls)?; } } Ok(slf) } /// Creates a shared handle to this registry (cheap Python refcount bump). /// /// The clone points to the **same** underlying Python dict, so insertions /// through any handle are visible to all others. pub fn clone_ref(&self, py: Python<'_>) -> Self { Self { registry: self.registry.clone_ref(py), } } /// Registers a Python type in the dataclass registry, keyed by pointer identity. /// /// This is idempotent — calling it multiple times with the same type is safe and /// simply overwrites the existing entry. The key is the raw pointer address of the /// type object, matching what `dataclass_to_monty` stores as `type_id` in /// `MontyObject::Dataclass`. This allows `dataclass_to_py` to look up the original /// Python class when reconstructing output values. pub fn insert(&self, obj: &Bound<'_, T>) -> PyResult<()> { let py = obj.py(); let type_id = obj.as_ptr() as u64; self.registry.bind(py).set_item(type_id, obj.as_any()) } /// Looks up an original Python type by its pointer identity. pub fn get(&self, py: Python<'_>, type_id: u64) -> PyResult>> { Ok(self.registry.bind(py).get_item(type_id)?.map(Bound::unbind)) } } /// Python class that mimics dataclass behavior for `MontyObject::Dataclass`. /// /// Supports: /// - Attribute access (`__getattr__`, `__setattr__`) /// - String representation (`__repr__`, `__str__`) /// - Equality comparison (`__eq__`) /// - Hashing for frozen instances (`__hash__`) /// - `dataclasses` module compatibility (`__dataclass_fields__`) #[pyclass(name = "UnknownDataclass")] pub struct PyUnknownDataclass { /// Class name (e.g., "Point", "User") name: String, /// Declared field names in definition order (for repr) field_names: Vec, /// All attributes (fields + any extra attrs) attrs: Py, /// Whether this instance is frozen (immutable) frozen: bool, } #[pymethods] impl PyUnknownDataclass { /// Returns a dict mapping field names to Field objects. /// /// This enables compatibility with `dataclasses.is_dataclass()`, `dataclasses.fields()`, /// `dataclasses.asdict()`, etc. #[getter] fn __dataclass_fields__(&self, py: Python<'_>) -> PyResult> { let field_marker = get_field_marker(py)?; let missing = get_missing(py)?; let field_class = get_field_class(py)?; let attrs = self.attrs.bind(py); let fields_dict = PyDict::new(py); for field_name in &self.field_names { // Get the field value's type for the type annotation let field_type = if let Some(value) = attrs.get_item(field_name)? { value.get_type().into_any() } else { py.None().into_bound(py).get_type().into_any() }; // Create a Field object with the required attributes let field_obj = if cfg!(Py_3_14) { // Field(default, default_factory, init, repr, hash, compare, metadata, kw_only, doc) // doc is now in 3.14 // https://github.com/python/cpython/blob/3.14/Lib/dataclasses.py#L294 field_class.call1(( missing, // default missing, // default_factory true, // init true, // repr py.None(), // hash (None means use compare value) true, // compare py.None(), // metadata false, // kw_only py.None(), // doc ))? } else { // https://github.com/python/cpython/blob/3.13/Lib/dataclasses.py#L288 // Field(default, default_factory, init, repr, hash, compare, metadata, kw_only) field_class.call1(( missing, // default missing, // default_factory true, // init true, // repr py.None(), // hash (None means use compare value) true, // compare py.None(), // metadata false, // kw_only ))? }; // Set name and type (these are set after construction in real dataclasses) field_obj.setattr("name", field_name)?; field_obj.setattr("type", field_type)?; field_obj.setattr("_field_type", field_marker)?; fields_dict.set_item(field_name, field_obj)?; } Ok(fields_dict.unbind()) } /// Returns a `_DataclassParams` object with dataclass configuration. /// /// This enables compatibility with code that checks `obj.__dataclass_params__.frozen`, etc. #[getter] fn __dataclass_params__(&self, py: Python<'_>) -> PyResult> { let params_class = get_dataclass_params_class(py)?; let params = if cfg!(Py_3_12) { // https://github.com/python/cpython/blob/3.12/Lib/dataclasses.py#L373 // _DataclassParams(init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, slots, weakref_slot) params_class.call1(( true, // init true, // repr true, // eq false, // order false, // unsafe_hash self.frozen, // frozen true, // match_args false, // kw_only false, // slots false, // weakref_slot ))? } else { // https://github.com/python/cpython/blob/3.11/Lib/dataclasses.py#L346 // _DataclassParams(init, repr, eq, order, unsafe_hash, frozen) params_class.call1(( true, // init true, // repr true, // eq false, // order false, // unsafe_hash self.frozen, // frozen ))? }; Ok(params.unbind()) } /// Get an attribute value. fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult> { let attrs = self.attrs.bind(py); match attrs.get_item(name)? { Some(value) => Ok(value.unbind()), None => Err(PyAttributeError::new_err(format!( "'UnknownDataclass' object has no attribute '{name}'", ))), } } /// Set an attribute value. /// /// Raises `FrozenInstanceError` (subclass of `AttributeError`) for frozen dataclasses. fn __setattr__(&self, py: Python<'_>, name: &str, value: Py) -> PyResult<()> { if self.frozen { let frozen_error = get_frozen_instance_error(py)?; let msg = format!("cannot assign to field '{name}'"); return Err(PyErr::from_value(frozen_error.call1((msg,))?)); } let attrs = self.attrs.bind(py); attrs.set_item(name, value)?; Ok(()) } /// String representation: ClassName(field1=value1, field2=value2, ...) fn __repr__(&self, py: Python<'_>) -> PyResult { let attrs = self.attrs.bind(py); let mut parts = Vec::new(); for field_name in &self.field_names { if let Some(value) = attrs.get_item(field_name)? { let value_repr: String = value.repr()?.extract()?; parts.push(format!("{field_name}={value_repr}")); } } Ok(format!("", self.name, parts.join(", "))) } /// Equality comparison. fn __eq__(&self, py: Python<'_>, other: &Bound<'_, PyAny>) -> PyResult { // Check if other is also a PyUnknownDataclass if let Ok(other_dc) = other.extract::>() { if self.name != other_dc.name { return Ok(false); } let self_attrs = self.attrs.bind(py); let other_attrs = other_dc.attrs.bind(py); // Compare all attrs self_attrs.eq(other_attrs) } else { Ok(false) } } /// Hash (only for frozen dataclasses). fn __hash__(&self, py: Python<'_>) -> PyResult { if !self.frozen { return Err(PyTypeError::new_err("unhashable type: 'UnknownDataclass'")); } let mut hasher = DefaultHasher::new(); let attrs = self.attrs.bind(py); for field_name in &self.field_names { field_name.hash(&mut hasher); if let Some(value) = attrs.get_item(field_name)? { let value_hash: isize = value.hash()?; value_hash.hash(&mut hasher); } } // Python's hash returns a signed integer; reinterpret bits for large values let hash_u64 = hasher.finish(); #[cfg(target_pointer_width = "64")] let hash_isize = isize::from_ne_bytes(hash_u64.to_ne_bytes()); #[cfg(not(target_pointer_width = "64"))] let hash_isize = { // On 32-bit: truncate to lower 32 bits, then reinterpret as i32 -> isize let hash_u32 = hash_u64 as u32; i32::from_ne_bytes(hash_u32.to_ne_bytes()) as isize }; Ok(hash_isize) } } impl PyUnknownDataclass { /// Creates a new `PyUnknownDataclass` from `MontyObject` fields. pub fn new<'a>( py: Python<'_>, name: String, field_names: Vec, attrs: impl IntoIterator, frozen: bool, dc_registry: &DcRegistry, ) -> PyResult { let dict = PyDict::new(py); for (k, v) in attrs { dict.set_item(monty_to_py(py, k, dc_registry)?, monty_to_py(py, v, dc_registry)?)?; } Ok(Self { name, field_names, attrs: dict.unbind(), frozen, }) } } /// Cached import of `dataclasses._FIELD` marker. /// /// Used to match the logic from `dataclasses.fields()`: /// `tuple(f for f in fields.values() if f._field_type is _FIELD)` fn get_field_marker(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static DC_FIELD_MARKER: PyOnceLock> = PyOnceLock::new(); DC_FIELD_MARKER.import(py, "dataclasses", "_FIELD") } /// Cached import of `dataclasses.MISSING` sentinel. fn get_missing(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static DC_MISSING: PyOnceLock> = PyOnceLock::new(); DC_MISSING.import(py, "dataclasses", "MISSING") } /// Cached import of `dataclasses.Field` class. fn get_field_class(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static DC_FIELD_CLASS: PyOnceLock> = PyOnceLock::new(); DC_FIELD_CLASS.import(py, "dataclasses", "Field") } /// Cached import of `dataclasses._DataclassParams` class. fn get_dataclass_params_class(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static DC_PARAMS_CLASS: PyOnceLock> = PyOnceLock::new(); DC_PARAMS_CLASS.import(py, "dataclasses", "_DataclassParams") } /// Cached import of `dataclasses.FrozenInstanceError` exception class. pub fn get_frozen_instance_error(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static DC_FROZEN_ERROR: PyOnceLock> = PyOnceLock::new(); DC_FROZEN_ERROR.import(py, "dataclasses", "FrozenInstanceError") } ================================================ FILE: crates/monty-python/src/exceptions.rs ================================================ //! Custom exception types for the Monty Python interpreter. //! //! Provides a hierarchy of exception types that wrap Monty's internal exceptions, //! preserving traceback information and allowing Python code to distinguish //! between syntax errors, runtime errors, and type checking errors from Monty-executed code. //! //! ## Exception Hierarchy //! //! ```text //! MontyError(Exception) # Base class for all Monty exceptions //! ├── MontySyntaxError # Raised when syntax is invalid or Monty can't parse the code //! ├── MontyRuntimeError # Raised when code fails during execution //! └── MontyTypingError # Raised when type checking finds errors in the code //! ``` use ::monty::{ExcType, MontyException, StackFrame}; use monty_type_checking::TypeCheckingDiagnostics; use pyo3::{ PyClassInitializer, PyTypeCheck, exceptions::{self}, prelude::*, sync::PyOnceLock, types::{PyDict, PyList, PyString}, }; use crate::dataclass::get_frozen_instance_error; /// Base exception for all Monty interpreter errors. /// /// This is the parent class for both `MontySyntaxError` and `MontyRuntimeError`. /// Catching `MontyError` will catch any exception raised by Monty. #[pyclass(extends=exceptions::PyException, module="pydantic_monty", subclass, skip_from_py_object)] #[derive(Clone)] pub struct MontyError { /// The underlying Monty exception. exc: MontyException, } impl MontyError { /// Converts a Monty exception to a `PyErr`. /// /// For `SyntaxError` exceptions, creates a `MontySyntaxError`. /// For all other exceptions, creates a `MontyRuntimeError` with all the exception /// information preserved, including the traceback frames and display string. #[must_use] pub fn new_err(py: Python<'_>, exc: MontyException) -> PyErr { // Syntax errors get their own exception type if exc.exc_type() == ExcType::SyntaxError { MontySyntaxError::new_err(py, exc) } else { MontyRuntimeError::new_err(py, exc) } } } impl MontyError { /// Creates a new `MontyError` wrapping a `MontyException`. #[must_use] pub fn new(exc: MontyException) -> Self { Self { exc } } /// Returns the exception type. fn exc_type(&self) -> ExcType { self.exc.exc_type() } /// Returns the exception message, if any. fn message(&self) -> Option<&str> { self.exc.message() } } #[pymethods] impl MontyError { /// Returns the inner exception as a Python exception object. /// /// This recreates a native Python exception (e.g., `ValueError`, `TypeError`) /// from the stored exception type and message. fn exception(&self, py: Python<'_>) -> Py { let py_err = exc_monty_to_py(py, self.exc.clone()); py_err.into_value(py).into_any() } fn __str__(&self) -> String { self.message().unwrap_or_default().to_string() } fn __repr__(&self) -> String { let exc_type_name = self.exc_type(); if let Some(msg) = self.message() { format!("MontyError({exc_type_name}: {msg})") } else { format!("MontyError({exc_type_name})") } } } /// Raised when Python code has syntax errors or cannot be parsed by Monty. /// /// Inherits from `MontyError`. The inner exception is always a `SyntaxError`. #[pyclass(extends=MontyError, module="pydantic_monty", skip_from_py_object)] #[derive(Clone)] pub struct MontySyntaxError; impl MontySyntaxError { /// Creates a new `MontySyntaxError` with the given message. #[must_use] pub fn new_err(py: Python<'_>, exc: MontyException) -> PyErr { let base_error = MontyError::new(exc); let init = PyClassInitializer::from(base_error).add_subclass(Self); match Py::new(py, init) { Ok(err) => PyErr::from_value(err.into_bound(py).into_any()), Err(e) => e, } } } #[pymethods] impl MontySyntaxError { /// Returns formatted exception string. /// /// Args: /// format: 'type-msg' - 'ExceptionType: message' format /// 'msg' - just the message #[pyo3(signature = (format = "msg"))] #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn display(slf: PyRef<'_, Self>, format: &str) -> PyResult { let parent = slf.as_super(); match format { "msg" => Ok(parent.message().unwrap_or_default().to_string()), "type-msg" => Ok(parent.exc.summary()), _ => Err(exceptions::PyValueError::new_err(format!( "Invalid display format: '{format}'. Expected 'type-msg', or 'msg'" ))), } } #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn __str__(slf: PyRef<'_, Self>) -> String { slf.as_super().message().unwrap_or_default().to_string() } #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn __repr__(slf: PyRef<'_, Self>) -> String { let parent = slf.as_super(); if let Some(msg) = parent.message() { format!("MontySyntaxError({msg})") } else { "MontySyntaxError()".to_string() } } } /// Raised when type checking finds errors in the code. /// /// Inherits from `MontyError`. This exception is raised when static type /// analysis detects type errors. Stores the `TypeCheckingFailure` so diagnostics /// can be re-rendered with different format/color settings via `display()`. #[pyclass(extends=MontyError, module="pydantic_monty")] pub struct MontyTypingError { failure: TypeCheckingDiagnostics, } impl MontyTypingError { /// Creates a `MontyTypingError` from a `TypeCheckingFailure`. #[must_use] pub fn new_err(py: Python<'_>, failure: TypeCheckingDiagnostics) -> PyErr { // we need a MontyException to create the base, but it shouldn't be visible anywhere let base = MontyError::new(MontyException::new(ExcType::TypeError, None)); let init = PyClassInitializer::from(base).add_subclass(Self { failure }); match Py::new(py, init) { Ok(err) => PyErr::from_value(err.into_bound(py).into_any()), Err(e) => e, } } } #[pymethods] impl MontyTypingError { /// Renders the type error diagnostics with the specified format and color. /// /// Args: /// format: Output format /// color: Whether to include ANSI color codes in the output. #[pyo3(signature = (format = "full", color = false))] fn display(&self, format: &str, color: bool) -> PyResult { self.failure .clone() .color(color) .format_from_str(format) .map_err(exceptions::PyValueError::new_err) .map(|f| f.to_string()) } fn __str__(&self) -> String { self.failure.to_string() } fn __repr__(&self) -> String { format!("MontyTypingError({})", self.failure) } } /// Raised when Monty code fails during execution. /// /// Inherits from `MontyError`. Additionally provides `traceback()` to access /// the Monty stack frames where the error occurred. #[pyclass(extends=MontyError, module="pydantic_monty")] pub struct MontyRuntimeError { /// The traceback frames where the error occurred (pre-converted to Python objects). frames: Vec>, } impl MontyRuntimeError { /// Creates a new `MontyRuntimeError` from the given exception data. #[must_use] pub fn new_err(py: Python<'_>, exc: MontyException) -> PyErr { // Convert stack frames to PyFrame objects let frames_result: PyResult>> = exc .traceback() .iter() .map(|f| Py::new(py, PyFrame::from_stack_frame(f))) .collect(); let frames = match frames_result { Ok(frames) => frames, Err(e) => return e, }; let base_error = MontyError::new(exc); // Create the MontyRuntimeError with proper initialization let runtime_error = Self { frames }; let init = pyo3::PyClassInitializer::from(base_error).add_subclass(runtime_error); match Py::new(py, init) { Ok(err) => PyErr::from_value(err.into_bound(py).into_any()), Err(e) => e, } } } #[pymethods] impl MontyRuntimeError { /// Returns the Monty traceback as a list of Frame objects. fn traceback(&self, py: Python<'_>) -> Py { PyList::new(py, &self.frames) .expect("failed to create frames list") .unbind() } /// Returns formatted exception string. /// /// Overrides the base class to provide the full traceback when format='traceback'. #[pyo3(signature = (format = "traceback"))] #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn display(slf: PyRef<'_, Self>, format: &str) -> PyResult { match format { "traceback" => Ok(slf.as_super().exc.to_string()), "type-msg" => Ok(slf.as_super().exc.summary()), "msg" => Ok(slf.as_super().message().unwrap_or_default().to_string()), _ => Err(exceptions::PyValueError::new_err(format!( "Invalid display format: '{format}'. Expected 'traceback', 'type-msg', or 'msg'" ))), } } #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn __str__(slf: PyRef<'_, Self>) -> String { let parent = slf.as_super(); let exc_type_name = parent.exc_type(); if let Some(msg) = parent.message() && !msg.is_empty() { return format!("{exc_type_name}: {msg}"); } format!("{exc_type_name}") } #[expect(clippy::needless_pass_by_value, reason = "required by macro")] fn __repr__(slf: PyRef<'_, Self>) -> String { let parent = slf.as_super(); let exc_type_name = parent.exc_type(); if let Some(msg) = parent.message() && !msg.is_empty() { return format!("MontyRuntimeError({exc_type_name}: {msg})"); } format!("MontyRuntimeError({exc_type_name})") } } /// A single frame in a Monty traceback. /// /// Contains all the information needed to display a traceback line: /// the file location, function name, and optional source code preview. #[pyclass(name = "Frame", module = "pydantic_monty", frozen, skip_from_py_object)] #[derive(Debug, Clone)] pub struct PyFrame { /// The filename where the code is located. #[pyo3(get)] pub filename: String, /// Line number (1-based). #[pyo3(get)] pub line: u16, /// Column number (1-based). #[pyo3(get)] pub column: u16, /// End line number (1-based). #[pyo3(get)] pub end_line: u16, /// End column number (1-based). #[pyo3(get)] pub end_column: u16, /// The name of the function, or None for module-level code. #[pyo3(get)] pub function_name: Option, /// The source code line for preview in the traceback. #[pyo3(get)] pub source_line: Option, } #[pymethods] impl PyFrame { fn dict(&self, py: Python<'_>) -> Py { let dict = PyDict::new(py); dict.set_item("filename", self.filename.clone()).unwrap(); dict.set_item("line", self.line).unwrap(); dict.set_item("column", self.column).unwrap(); dict.set_item("end_line", self.end_line).unwrap(); dict.set_item("end_column", self.end_column).unwrap(); dict.set_item("function_name", self.function_name.clone()).unwrap(); dict.set_item("source_line", self.source_line.clone()).unwrap(); dict.unbind() } fn __repr__(&self) -> String { let func = self.function_name.as_ref().map_or("".to_string(), Clone::clone); format!( "Frame(filename='{}', line={}, column={}, function_name='{}')", self.filename, self.line, self.column, func ) } } impl PyFrame { /// Creates a `PyFrame` from Monty's `StackFrame`. #[must_use] pub fn from_stack_frame(frame: &StackFrame) -> Self { Self { filename: frame.filename.clone(), line: frame.start.line, column: frame.start.column, end_line: frame.end.line, end_column: frame.end.column, function_name: frame.frame_name.clone(), source_line: frame.preview_line.clone(), } } } /// Converts Monty's `MontyException` to the matching Python exception value. /// /// Creates an appropriate Python exception type with the message. /// The traceback information is included in the exception message /// since PyO3 doesn't provide direct traceback manipulation. pub fn exc_monty_to_py(py: Python<'_>, exc: MontyException) -> PyErr { let exc_type = exc.exc_type(); let msg = exc.into_message().unwrap_or_default(); match exc_type { ExcType::Exception => exceptions::PyException::new_err(msg), ExcType::BaseException => exceptions::PyBaseException::new_err(msg), ExcType::SystemExit => exceptions::PySystemExit::new_err(msg), ExcType::KeyboardInterrupt => exceptions::PyKeyboardInterrupt::new_err(msg), ExcType::ArithmeticError => exceptions::PyArithmeticError::new_err(msg), ExcType::OverflowError => exceptions::PyOverflowError::new_err(msg), ExcType::ZeroDivisionError => exceptions::PyZeroDivisionError::new_err(msg), ExcType::LookupError => exceptions::PyLookupError::new_err(msg), ExcType::IndexError => exceptions::PyIndexError::new_err(msg), ExcType::KeyError => exceptions::PyKeyError::new_err(msg), ExcType::RuntimeError => exceptions::PyRuntimeError::new_err(msg), ExcType::NotImplementedError => exceptions::PyNotImplementedError::new_err(msg), ExcType::RecursionError => exceptions::PyRecursionError::new_err(msg), ExcType::AssertionError => exceptions::PyAssertionError::new_err(msg), ExcType::AttributeError => exceptions::PyAttributeError::new_err(msg), ExcType::FrozenInstanceError => { if let Ok(exc_cls) = get_frozen_instance_error(py) && let Ok(exc_instance) = exc_cls.call1((PyString::new(py, &msg),)) { return PyErr::from_value(exc_instance); } // if creating the right exception fails, fallback to AttributeError which it's a subclass of exceptions::PyAttributeError::new_err(msg) } ExcType::MemoryError => exceptions::PyMemoryError::new_err(msg), ExcType::NameError => exceptions::PyNameError::new_err(msg), ExcType::UnboundLocalError => exceptions::PyUnboundLocalError::new_err(msg), ExcType::StopIteration => exceptions::PyStopIteration::new_err(msg), ExcType::SyntaxError => exceptions::PySyntaxError::new_err(msg), ExcType::TimeoutError => exceptions::PyTimeoutError::new_err(msg), ExcType::TypeError => exceptions::PyTypeError::new_err(msg), ExcType::ValueError => exceptions::PyValueError::new_err(msg), ExcType::UnicodeDecodeError => exceptions::PyUnicodeDecodeError::new_err(msg), ExcType::ImportError => exceptions::PyImportError::new_err(msg), ExcType::ModuleNotFoundError => exceptions::PyModuleNotFoundError::new_err(msg), ExcType::OSError => exceptions::PyOSError::new_err(msg), ExcType::FileNotFoundError => exceptions::PyFileNotFoundError::new_err(msg), ExcType::FileExistsError => exceptions::PyFileExistsError::new_err(msg), ExcType::IsADirectoryError => exceptions::PyIsADirectoryError::new_err(msg), ExcType::NotADirectoryError => exceptions::PyNotADirectoryError::new_err(msg), ExcType::RePatternError => { if let Ok(re_pattern_error) = get_re_pattern_error(py) && let Ok(exc_instance) = re_pattern_error.call1((PyString::new(py, &msg),)) { PyErr::from_value(exc_instance) } else { exceptions::PyRuntimeError::new_err(msg) } } } } /// Converts a python exception to monty. /// /// Used when resuming execution with an exception from Python. pub fn exc_py_to_monty(py: Python<'_>, py_err: &PyErr) -> MontyException { let exc = py_err.value(py); let exc_type = py_err_to_exc_type(exc); let arg = exc.str().ok().map(|s| s.to_string_lossy().into_owned()); MontyException::new(exc_type, arg) } /// Converts a Python exception to Monty's `MontyObject::Exception`. pub fn exc_to_monty_object(exc: &Bound<'_, exceptions::PyBaseException>) -> ::monty::MontyObject { let exc_type = py_err_to_exc_type(exc); let arg = exc.str().ok().map(|s| s.to_string_lossy().into_owned()); ::monty::MontyObject::Exception { exc_type, arg } } /// Maps a Python exception type to Monty's `ExcType` enum. /// /// NOTE: order matters here as some exceptions are subclasses of others! /// In general we group exceptions by their type hierarchy to improve performance. fn py_err_to_exc_type(exc: &Bound<'_, exceptions::PyBaseException>) -> ExcType { // Exception hierarchy if exceptions::PyException::type_check(exc) { // put the most commonly used exceptions first if exceptions::PyTypeError::type_check(exc) { ExcType::TypeError // ValueError hierarchy (check UnicodeDecodeError first as it's a subclass) } else if exceptions::PyValueError::type_check(exc) { if exceptions::PyUnicodeDecodeError::type_check(exc) { ExcType::UnicodeDecodeError } else { ExcType::ValueError } } else if exceptions::PyAssertionError::type_check(exc) { ExcType::AssertionError } else if exceptions::PySyntaxError::type_check(exc) { ExcType::SyntaxError // LookupError hierarchy } else if exceptions::PyLookupError::type_check(exc) { if exceptions::PyKeyError::type_check(exc) { ExcType::KeyError } else if exceptions::PyIndexError::type_check(exc) { ExcType::IndexError } else { ExcType::LookupError } // ArithmeticError hierarchy } else if exceptions::PyArithmeticError::type_check(exc) { if exceptions::PyZeroDivisionError::type_check(exc) { ExcType::ZeroDivisionError } else if exceptions::PyOverflowError::type_check(exc) { ExcType::OverflowError } else { ExcType::ArithmeticError } // RuntimeError hierarchy } else if exceptions::PyRuntimeError::type_check(exc) { if exceptions::PyNotImplementedError::type_check(exc) { ExcType::NotImplementedError } else if exceptions::PyRecursionError::type_check(exc) { ExcType::RecursionError } else { ExcType::RuntimeError } // AttributeError hierarchy } else if exceptions::PyAttributeError::type_check(exc) { if is_frozen_instance_error(exc) { ExcType::FrozenInstanceError } else { ExcType::AttributeError } // NameError hierarchy (check UnboundLocalError first as it's a subclass) } else if exceptions::PyNameError::type_check(exc) { if exceptions::PyUnboundLocalError::type_check(exc) { ExcType::UnboundLocalError } else { ExcType::NameError } // OSError hierarchy (check specific subclasses first) } else if exceptions::PyOSError::type_check(exc) { if exceptions::PyFileNotFoundError::type_check(exc) { ExcType::FileNotFoundError } else if exceptions::PyFileExistsError::type_check(exc) { ExcType::FileExistsError } else if exceptions::PyIsADirectoryError::type_check(exc) { ExcType::IsADirectoryError } else if exceptions::PyNotADirectoryError::type_check(exc) { ExcType::NotADirectoryError } else { ExcType::OSError } // other standalone exception types } else if exceptions::PyTimeoutError::type_check(exc) { ExcType::TimeoutError } else if exceptions::PyMemoryError::type_check(exc) { ExcType::MemoryError } else { ExcType::Exception } // BaseException direct subclasses } else if exceptions::PySystemExit::type_check(exc) { ExcType::SystemExit } else if exceptions::PyKeyboardInterrupt::type_check(exc) { ExcType::KeyboardInterrupt // Catch-all for BaseException } else { ExcType::BaseException } } /// Checks if an exception is an instance of `dataclasses.FrozenInstanceError`. /// /// Since `FrozenInstanceError` is not a built-in PyO3 exception type, we need to /// check using Python's isinstance against the imported class. fn is_frozen_instance_error(exc: &Bound<'_, exceptions::PyBaseException>) -> bool { if let Ok(frozen_error_cls) = get_frozen_instance_error(exc.py()) { exc.is_instance(frozen_error_cls).unwrap_or(false) } else { false } } fn get_re_pattern_error(py: Python<'_>) -> PyResult<&Bound<'_, PyAny>> { static RE_PATTERN_ERROR: PyOnceLock> = PyOnceLock::new(); if cfg!(Py_3_13) { RE_PATTERN_ERROR.import(py, "re", "PatternError") } else { RE_PATTERN_ERROR.import(py, "re", "error") } } ================================================ FILE: crates/monty-python/src/external.rs ================================================ //! External function callback support. //! //! Allows Python code running in Monty to call back to host Python functions. //! External functions are registered by name and called when Monty execution //! reaches a call to that function. use ::monty::{ExtFunctionResult, MontyObject}; use pyo3::{ exceptions::PyRuntimeError, prelude::*, types::{PyDict, PyTuple}, }; use crate::{ convert::{monty_to_py, py_to_monty}, dataclass::DcRegistry, exceptions::exc_py_to_monty, }; /// Dispatches a dataclass method call back to the original Python object. /// /// When Monty encounters a call like `dc.my_method(args)`, the VM pauses with a /// `FrameExit::MethodCall` containing the method name (e.g. `"my_method"`) /// and the dataclass instance as the first arg. This function: /// 1. Converts the first arg (dataclass `self`) back to a Python object /// 2. Calls `getattr(self_obj, method_name)(*remaining_args, **kwargs)` /// 3. Converts the result back to Monty format pub fn dispatch_method_call( py: Python<'_>, function_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], dc_registry: &DcRegistry, ) -> ExtFunctionResult { match dispatch_method_call_inner(py, function_name, args, kwargs, dc_registry) { Ok(result) => ExtFunctionResult::Return(result), Err(err) => ExtFunctionResult::Error(exc_py_to_monty(py, &err)), } } /// Inner implementation of method dispatch that returns `PyResult` for error handling. fn dispatch_method_call_inner( py: Python<'_>, function_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], dc_registry: &DcRegistry, ) -> PyResult { // First arg is the dataclass self let mut args_iter = args.iter(); let self_obj = args_iter .next() .ok_or_else(|| PyRuntimeError::new_err("Method call missing self argument"))?; let py_self = monty_to_py(py, self_obj, dc_registry)?; // Get the method from the object let method = py_self.bind(py).getattr(function_name)?; let result = if args.len() == 1 && kwargs.is_empty() { method.call0()? } else { // Convert remaining positional arguments let remaining_args: PyResult>> = args_iter.map(|arg| monty_to_py(py, arg, dc_registry)).collect(); let py_args_tuple = PyTuple::new(py, remaining_args?)?; // Call the method let py_kwargs = if kwargs.is_empty() { None } else { // Convert keyword arguments let py_kwargs = PyDict::new(py); for (key, value) in kwargs { let py_key = monty_to_py(py, key, dc_registry)?; let py_value = monty_to_py(py, value, dc_registry)?; py_kwargs.set_item(py_key, py_value)?; } Some(py_kwargs) }; method.call(&py_args_tuple, py_kwargs.as_ref())? }; py_to_monty(&result, dc_registry) } /// Registry that maps external function names to Python callables. /// /// Passed to the execution loop and used to dispatch calls when Monty /// execution pauses at an external function. The `dc_registry` is a /// GIL-protected `PyDict` wrapper, so auto-registration of dataclass types /// encountered in return values is transparent to callers. pub struct ExternalFunctionRegistry<'a, 'py> { py: Python<'py>, functions: &'py Bound<'py, PyDict>, dc_registry: &'a DcRegistry, } impl<'a, 'py> ExternalFunctionRegistry<'a, 'py> { /// Creates a new registry from a Python dict of `name -> callable`. pub fn new(py: Python<'py>, functions: &'py Bound<'py, PyDict>, dc_registry: &'a DcRegistry) -> Self { Self { py, functions, dc_registry, } } /// Calls an external function by name with Monty arguments. /// /// Converts args/kwargs from Monty format, calls the Python callable /// with unpacked `*args, **kwargs`, and converts the result back to Monty format. /// /// If the Python function raises an exception, it's converted to a Monty /// exception that will be raised inside Monty execution. pub fn call( &self, function_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], ) -> ExtFunctionResult { match self.call_inner(function_name, args, kwargs) { Ok(Some(result)) => ExtFunctionResult::Return(result), Ok(None) => ExtFunctionResult::NotFound(function_name.to_owned()), Err(err) => ExtFunctionResult::Error(exc_py_to_monty(self.py, &err)), } } /// Inner implementation that returns `PyResult` for error handling. fn call_inner( &self, function_name: &str, args: &[MontyObject], kwargs: &[(MontyObject, MontyObject)], ) -> PyResult> { // Look up the callable let Some(callable) = self.functions.get_item(function_name)? else { return Ok(None); }; // Convert positional arguments to Python objects let py_args: PyResult>> = args .iter() .map(|arg| monty_to_py(self.py, arg, self.dc_registry)) .collect(); let py_args_tuple = PyTuple::new(self.py, py_args?)?; // Convert keyword arguments to Python dict let py_kwargs = PyDict::new(self.py); for (key, value) in kwargs { // Keys in kwargs should be strings let py_key = monty_to_py(self.py, key, self.dc_registry)?; let py_value = monty_to_py(self.py, value, self.dc_registry)?; py_kwargs.set_item(py_key, py_value)?; } // Call the function with unpacked *args, **kwargs let result = if py_kwargs.is_empty() { callable.call1(&py_args_tuple)? } else { callable.call(&py_args_tuple, Some(&py_kwargs))? }; // Convert result back to Monty format py_to_monty(&result, self.dc_registry).map(Some) } } ================================================ FILE: crates/monty-python/src/lib.rs ================================================ //! Python bindings for the Monty sandboxed Python interpreter. //! //! This module provides a Python interface to Monty, allowing execution of //! sandboxed Python code with configurable resource limits and external //! function callbacks. mod convert; mod dataclass; mod exceptions; mod external; mod limits; mod monty_cls; mod repl; mod serialization; use std::sync::OnceLock; // Use `::monty` to refer to the external crate (not the pymodule) pub use exceptions::{MontyError, MontyRuntimeError, MontySyntaxError, MontyTypingError, PyFrame}; pub use monty_cls::{PyFunctionSnapshot, PyFutureSnapshot, PyMonty, PyMontyComplete, PyNameLookupSnapshot}; use pyo3::prelude::*; pub use repl::PyMontyRepl; /// Copied from `get_pydantic_core_version` in pydantic fn get_version() -> &'static str { static VERSION: OnceLock = OnceLock::new(); VERSION.get_or_init(|| { let version = env!("CARGO_PKG_VERSION"); // cargo uses "1.0-alpha1" etc. while python uses "1.0.0a1", this is not full compatibility, // but it's good enough for now // see https://docs.rs/semver/1.0.9/semver/struct.Version.html#method.parse for rust spec // see https://peps.python.org/pep-0440/ for python spec // it seems the dot after "alpha/beta" e.g. "-alpha.1" is not necessary, hence why this works version.replace("-alpha", "a").replace("-beta", "b") }) } /// Monty - A sandboxed Python interpreter written in Rust. #[pymodule] mod _monty { use pyo3::prelude::*; #[pymodule_export] use super::MontyError; #[pymodule_export] use super::MontyRuntimeError; #[pymodule_export] use super::MontySyntaxError; #[pymodule_export] use super::MontyTypingError; #[pymodule_export] use super::PyFrame as Frame; #[pymodule_export] use super::PyFunctionSnapshot as FunctionSnapshot; #[pymodule_export] use super::PyFutureSnapshot as FutureSnapshot; #[pymodule_export] use super::PyMonty as Monty; #[pymodule_export] use super::PyMontyComplete as MontyComplete; #[pymodule_export] use super::PyMontyRepl as MontyRepl; #[pymodule_export] use super::PyNameLookupSnapshot as NameLookupSnapshot; use super::get_version; #[pymodule_export] use super::serialization::load_repl_snapshot; #[pymodule_export] use super::serialization::load_snapshot; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", get_version())?; Ok(()) } } ================================================ FILE: crates/monty-python/src/limits.rs ================================================ //! Python wrapper for Monty's `ResourceLimits`. //! //! Provides a TypedDict interface to configure resource limits for code execution, //! including time limits, memory limits, and recursion depth. use std::{ sync::atomic::{AtomicU16, Ordering}, time::Duration, }; use monty::{DEFAULT_MAX_RECURSION_DEPTH, ResourceError, ResourceTracker}; use pyo3::{prelude::*, types::PyDict}; use crate::exceptions::exc_py_to_monty; /// Extracts resource limits from a Python dict. /// /// The dict should have the following optional keys: /// - `max_allocations`: Maximum number of heap allocations allowed (int) /// - `max_duration_secs`: Maximum execution time in seconds (float) /// - `max_memory`: Maximum heap memory in bytes (int) /// - `gc_interval`: Run garbage collection every N allocations (int) /// - `max_recursion_depth`: Maximum function call stack depth (int, default: 1000) /// /// If a key is missing or set to `None`, that limit is not applied /// (except `max_recursion_depth` which defaults to 1000). /// /// Raises `TypeError` if a value is present but has the wrong type. pub fn extract_limits(dict: &Bound<'_, PyDict>) -> PyResult { let max_allocations = extract_optional_usize(dict, "max_allocations")?; let max_duration_secs = extract_optional_f64(dict, "max_duration_secs")?; let max_memory = extract_optional_usize(dict, "max_memory")?; let gc_interval = extract_optional_usize(dict, "gc_interval")?; let max_recursion_depth = extract_optional_usize(dict, "max_recursion_depth")?.or(Some(DEFAULT_MAX_RECURSION_DEPTH)); let mut limits = monty::ResourceLimits::new().max_recursion_depth(max_recursion_depth); if let Some(max) = max_allocations { limits = limits.max_allocations(max); } if let Some(secs) = max_duration_secs { limits = limits.max_duration(Duration::from_secs_f64(secs)); } if let Some(max) = max_memory { limits = limits.max_memory(max); } if let Some(interval) = gc_interval { limits = limits.gc_interval(interval); } Ok(limits) } /// Extracts an optional usize from a dict, raising `TypeError` if the value has the wrong type. fn extract_optional_usize(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { match dict.get_item(key)? { None => Ok(None), Some(value) if value.is_none() => Ok(None), Some(value) => Ok(Some(value.extract()?)), } } /// Extracts an optional f64 from a dict, raising `TypeError` if the value has the wrong type. fn extract_optional_f64(dict: &Bound<'_, PyDict>, key: &str) -> PyResult> { match dict.get_item(key)? { None => Ok(None), Some(value) if value.is_none() => Ok(None), Some(value) => Ok(Some(value.extract()?)), } } /// How often to check Python signals (every N calls to `check_time`). /// /// This balances responsiveness to Ctrl+C against performance overhead. /// With ~1000 checks, signal handling adds negligible overhead while still /// responding to interrupts within a reasonable timeframe. const SIGNAL_CHECK_INTERVAL: u16 = 1000; /// A resource tracker that wraps another ResourceTracker and periodically checks Python signals. /// /// This allows Ctrl+C and other Python signals to interrupt long-running code /// executed through the monty interpreter. Signals are checked every /// `SIGNAL_CHECK_INTERVAL` calls to `check_time` (at statement boundaries). #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct PySignalTracker { inner: T, /// Counter for check_time calls, used to rate-limit signal checks. /// /// Uses `AtomicU16` for interior mutability so `check_time` can take `&self` /// (required by the `ResourceTracker` trait) while remaining `Sync` for PyO3. check_counter: AtomicU16, } impl PySignalTracker { /// Creates a new signal-checking tracker wrapping the given tracker. pub fn new(inner: T) -> Self { Self { inner, check_counter: AtomicU16::new(0), } } fn check_python_signals(&self) -> Result<(), ResourceError> { // Periodically check Python signals let count = self.check_counter.fetch_add(1, Ordering::Relaxed).wrapping_add(1); if count.is_multiple_of(SIGNAL_CHECK_INTERVAL) { Python::attach(|py| { py.check_signals() .map_err(|e| ResourceError::Exception(exc_py_to_monty(py, &e))) })?; } Ok(()) } } impl ResourceTracker for PySignalTracker { fn on_allocate(&mut self, get_size: impl FnOnce() -> usize) -> Result<(), ResourceError> { self.inner.on_allocate(get_size) } fn on_free(&mut self, get_size: impl FnOnce() -> usize) { self.inner.on_free(get_size); } fn check_time(&self) -> Result<(), ResourceError> { // First check inner tracker's time limit self.inner.check_time()?; // then periodically check for Python signals self.check_python_signals() } fn check_recursion_depth(&self, current_depth: usize) -> Result<(), ResourceError> { self.inner.check_recursion_depth(current_depth) } fn check_large_result(&self, estimated_bytes: usize) -> Result<(), ResourceError> { self.inner.check_large_result(estimated_bytes) } } ================================================ FILE: crates/monty-python/src/monty_cls.rs ================================================ use std::{ borrow::Cow, fmt::Write, sync::{Mutex, PoisonError}, }; // Use `::monty` to refer to the external crate (not the pymodule) use ::monty::{ ExtFunctionResult, FunctionCall, LimitedTracker, MontyException, MontyObject, MontyRun, NameLookupResult, NoLimitTracker, OsCall, PrintWriter, PrintWriterCallback, ReplFunctionCall, ReplNameLookup, ReplOsCall, ReplProgress, ReplResolveFutures, ReplStartError, ResolveFutures, ResourceTracker, RunProgress, }; use monty::{ExcType, NameLookup}; use monty_type_checking::{SourceFile, type_check}; use pyo3::{ IntoPyObjectExt, exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError}, intern, prelude::*, types::{PyBytes, PyDict, PyList, PyTuple, PyType}, }; use send_wrapper::SendWrapper; use crate::{ convert::{get_docstring, monty_to_py, py_to_monty}, dataclass::DcRegistry, exceptions::{MontyError, MontyTypingError, exc_py_to_monty}, external::{ExternalFunctionRegistry, dispatch_method_call}, limits::{PySignalTracker, extract_limits}, repl::{EitherRepl, FromCoreRepl, PyMontyRepl}, }; /// A sandboxed Python interpreter instance. /// /// Parses and compiles Python code on initialization, then can be run /// multiple times with different input values. This separates the parsing /// cost from execution, making repeated runs more efficient. #[pyclass(name = "Monty", module = "pydantic_monty")] #[derive(Debug)] pub struct PyMonty { /// The compiled code snapshot, ready to execute. runner: MontyRun, /// The artificial name of the python code "file" script_name: String, /// Names of input variables expected by the code. input_names: Vec, /// Registry of dataclass types for reconstructing original types on output. /// /// Maps type pointer identity (`u64`) to the original Python type, allowing /// `isinstance(result, OriginalClass)` to work correctly after round-tripping through Monty. dc_registry: DcRegistry, } #[pymethods] impl PyMonty { /// Creates a new Monty interpreter by parsing the given code. /// /// # Arguments /// * `code` - Python code to execute /// * `inputs` - List of input variable names available in the code /// * `type_check` - Whether to perform type checking on the code /// * `type_check_stubs` - Prefix code to be executed before type checking /// * `dataclass_registry` - Registry of dataclass types for reconstructing original types on output. #[new] #[pyo3(signature = (code, *, script_name="main.py", inputs=None, type_check=false, type_check_stubs=None, dataclass_registry=None))] fn new( py: Python<'_>, code: String, script_name: &str, inputs: Option<&Bound<'_, PyList>>, type_check: bool, type_check_stubs: Option<&str>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult { let input_names = list_str(inputs, "inputs")?; if type_check { py_type_check(py, &code, script_name, type_check_stubs)?; } // Create the snapshot (parses the code) let runner = MontyRun::new(code, script_name, input_names.clone()).map_err(|e| MontyError::new_err(py, e))?; Ok(Self { runner, script_name: script_name.to_string(), input_names, dc_registry: DcRegistry::from_list(py, dataclass_registry)?, }) } /// Registers a dataclass type for proper isinstance() support on output. /// /// When a dataclass passes through Monty and is returned, it becomes a `MontyDataclass`. /// By registering the original type, `isinstance(result, OriginalClass)` will return `True`. /// /// # Arguments /// * `cls` - The dataclass type to register /// /// # Raises /// * `TypeError` if the argument is not a dataclass type fn register_dataclass(&self, cls: &Bound<'_, PyType>) -> PyResult<()> { self.dc_registry.insert(cls) } /// Performs static type checking on the code. /// /// Analyzes the code for type errors without executing it. This uses /// a subset of Python's type system supported by Monty. /// /// # Args /// * `prefix_code` - Optional prefix to prepend to the code before type checking, /// e.g. with inputs and external function signatures /// /// # Raises /// * `RuntimeError` if type checking infrastructure fails /// * `MontyTypingError` if type errors are found #[pyo3(signature = (prefix_code=None))] fn type_check(&self, py: Python<'_>, prefix_code: Option<&str>) -> PyResult<()> { py_type_check(py, self.runner.code(), &self.script_name, prefix_code) } /// Executes the code and returns the result. /// /// # Returns /// The result of the last expression in the code /// /// # Raises /// Various Python exceptions matching what the code would raise #[pyo3(signature = (*, inputs=None, limits=None, external_functions=None, print_callback=None, os=None))] fn run( &self, py: Python<'_>, inputs: Option<&Bound<'_, PyDict>>, limits: Option<&Bound<'_, PyDict>>, external_functions: Option<&Bound<'_, PyDict>>, print_callback: Option<&Bound<'_, PyAny>>, os: Option<&Bound<'_, PyAny>>, ) -> PyResult> { // Clone the Arc handle — all clones share the same underlying registry, // so auto-registrations during execution are visible to all users. let input_values = self.extract_input_values(inputs, &self.dc_registry)?; if let Some(os_callback) = os && !os_callback.is_callable() { let msg = format!("TypeError: '{}' object is not callable", os_callback.get_type().name()?); return Err(PyTypeError::new_err(msg)); } // Build print writer let mut print_cb; let print_writer = match print_callback { Some(cb) => { print_cb = CallbackStringPrint::new(cb); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; // Run with appropriate tracker type (must branch due to different generic types) if let Some(limits) = limits { let tracker = PySignalTracker::new(LimitedTracker::new(extract_limits(limits)?)); self.run_impl(py, input_values, tracker, external_functions, os, print_writer) } else { let tracker = PySignalTracker::new(NoLimitTracker); self.run_impl(py, input_values, tracker, external_functions, os, print_writer) } } #[pyo3(signature = (*, inputs=None, limits=None, print_callback=None))] fn start<'py>( &self, py: Python<'py>, inputs: Option<&Bound<'py, PyDict>>, limits: Option<&Bound<'py, PyDict>>, print_callback: Option>, ) -> PyResult> { // Clone the Arc handle — shares the same underlying registry let dc_registry = self.dc_registry.clone_ref(py); let input_values = self.extract_input_values(inputs, &dc_registry)?; // Build print writer - CallbackStringPrint is Send so GIL can be released let mut print_cb; let print_writer = match &print_callback { Some(cb) => { print_cb = CallbackStringPrint::new(cb); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; let runner = self.runner.clone(); let print_writer = SendWrapper::new(print_writer); // Helper macro to start execution with GIL released macro_rules! start_impl { ($tracker:expr) => {{ py.detach(|| runner.start(input_values, $tracker, print_writer.take())) .map_err(|e| MontyError::new_err(py, e))? }}; } // Branch on limits (different generic types) let progress = if let Some(limits) = limits { let tracker = PySignalTracker::new(LimitedTracker::new(extract_limits(limits)?)); EitherProgress::Limited(start_impl!(tracker)) } else { let tracker = PySignalTracker::new(NoLimitTracker); EitherProgress::NoLimit(start_impl!(tracker)) }; progress.progress_or_complete( py, self.script_name.clone(), print_callback.map(Bound::unbind), dc_registry, ) } /// Serializes the Monty instance to a binary format. /// /// The serialized data can be stored and later restored with `Monty.load()`. /// This allows caching parsed code to avoid re-parsing on subsequent runs. /// /// # Returns /// Bytes containing the serialized Monty instance. /// /// # Raises /// `ValueError` if serialization fails. fn dump<'py>(&self, py: Python<'py>) -> PyResult> { let serialized = SerializedMonty { runner: self.runner.clone(), script_name: self.script_name.clone(), input_names: self.input_names.clone(), }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(PyBytes::new(py, &bytes)) } /// Deserializes a Monty instance from binary format. /// /// # Arguments /// * `data` - The serialized Monty data from `dump()` /// * `dataclass_registry` - Optional list of dataclasses to register /// /// # Returns /// A new Monty instance. /// /// # Raises /// `ValueError` if deserialization fails. #[staticmethod] #[pyo3(signature = (data, *, dataclass_registry=None))] fn load( py: Python<'_>, data: &Bound<'_, PyBytes>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult { let bytes = data.as_bytes(); let serialized: SerializedMonty = postcard::from_bytes(bytes).map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(Self { runner: serialized.runner, script_name: serialized.script_name, input_names: serialized.input_names, dc_registry: DcRegistry::from_list(py, dataclass_registry)?, }) } fn __repr__(&self) -> String { let lines = self.runner.code().lines().count(); let mut s = format!( "Monty(<{} line{} of code>, script_name='{}'", lines, if lines == 1 { "" } else { "s" }, self.script_name ); if !self.input_names.is_empty() { write!(s, ", inputs={:?}", self.input_names).unwrap(); } s.push(')'); s } } fn py_type_check(py: Python<'_>, code: &str, script_name: &str, type_stubs: Option<&str>) -> PyResult<()> { let type_stubs = type_stubs.map(|type_stubs| SourceFile::new(type_stubs, "type_stubs.pyi")); let opt_diagnostics = type_check(&SourceFile::new(code, script_name), type_stubs.as_ref()).map_err(PyRuntimeError::new_err)?; if let Some(diagnostic) = opt_diagnostics { Err(MontyTypingError::new_err(py, diagnostic)) } else { Ok(()) } } impl PyMonty { /// Extracts input values from a Python dict in the order they were declared. /// /// Validates that all required inputs are provided. Any dataclass inputs are /// automatically registered in `dc_registry` via `py_to_monty` so they can be /// properly reconstructed on output. fn extract_input_values( &self, inputs: Option<&Bound<'_, PyDict>>, dc_registry: &DcRegistry, ) -> PyResult> { if self.input_names.is_empty() { if inputs.is_some() { return Err(PyTypeError::new_err( "No input variables declared but inputs dict was provided", )); } return Ok(vec![]); } let Some(inputs) = inputs else { return Err(PyTypeError::new_err(format!( "Missing required inputs: {:?}", self.input_names ))); }; // Extract values in declaration order self.input_names .iter() .map(|name| { let value = inputs .get_item(name)? .ok_or_else(|| PyKeyError::new_err(format!("Missing required input: '{name}'")))?; py_to_monty(&value, dc_registry) }) .collect::>() } /// Runs code with a generic resource tracker, releasing the GIL during execution. /// /// Takes explicit field references instead of `&mut self` so that `run()` can /// remain `&self` (required for concurrent thread access in PyO3). fn run_impl( &self, py: Python<'_>, input_values: Vec, tracker: impl ResourceTracker + Send, external_functions: Option<&Bound<'_, PyDict>>, os: Option<&Bound<'_, PyAny>>, print_output: PrintWriter<'_>, ) -> PyResult> { // wrap print_output in SendWrapper so that it can be accessed inside the py.detach calls despite // no `Send` bound - py.detach() is overly restrictive to prevent `Bound` types going inside let mut print_output = SendWrapper::new(print_output); // Check if any inputs contain dataclasses (including nested in containers) — // if so, we need the iterative path because method calls could happen lazily // and need to be dispatched to the host. let has_dataclass_inputs = || input_values.iter().any(contains_dataclass); if external_functions.is_none() && os.is_none() && !has_dataclass_inputs() { return match py.detach(|| self.runner.run(input_values, tracker, print_output.reborrow())) { Ok(v) => monty_to_py(py, &v, &self.dc_registry), Err(err) => Err(MontyError::new_err(py, err)), }; } // Clone the runner since start() consumes it - allows reuse of the parsed code let runner = self.runner.clone(); let mut progress = py .detach(|| runner.start(input_values, tracker, print_output.reborrow())) .map_err(|e| MontyError::new_err(py, e))?; loop { match progress { RunProgress::Complete(result) => return monty_to_py(py, &result, &self.dc_registry), RunProgress::FunctionCall(call) => { // Dataclass method calls have method_call=true and the first arg is the instance let return_value = if call.method_call { dispatch_method_call(py, &call.function_name, &call.args, &call.kwargs, &self.dc_registry) } else if let Some(ext_fns) = external_functions { let registry = ExternalFunctionRegistry::new(py, ext_fns, &self.dc_registry); registry.call(&call.function_name, &call.args, &call.kwargs) } else { return Err(PyRuntimeError::new_err(format!( "External function '{}' called but no external_functions provided", call.function_name ))); }; progress = py .detach(|| call.resume(return_value, print_output.reborrow())) .map_err(|e| MontyError::new_err(py, e))?; } RunProgress::NameLookup(lookup) => { let result = if let Some(ext_fns) = external_functions && let Some(value) = ext_fns.get_item(&lookup.name)? { NameLookupResult::Value(MontyObject::Function { name: lookup.name.clone(), docstring: get_docstring(&value), }) } else { NameLookupResult::Undefined }; progress = py .detach(|| lookup.resume(result, print_output.reborrow())) .map_err(|e| MontyError::new_err(py, e))?; } RunProgress::ResolveFutures(_) => { return Err(PyRuntimeError::new_err("async futures not supported with `Monty.run`")); } RunProgress::OsCall(call) => { let result: ExtFunctionResult = if let Some(os_callback) = os { // Convert args to Python let py_args: Vec> = call .args .iter() .map(|arg| monty_to_py(py, arg, &self.dc_registry)) .collect::>()?; let py_args_tuple = PyTuple::new(py, py_args)?; // Convert kwargs to Python dict let py_kwargs = PyDict::new(py); for (k, v) in &call.kwargs { py_kwargs.set_item( monty_to_py(py, k, &self.dc_registry)?, monty_to_py(py, v, &self.dc_registry)?, )?; } // call the os callback, if an exception is raised, return it to monty match os_callback.call1((call.function.to_string(), py_args_tuple, py_kwargs)) { Ok(result) => py_to_monty(&result, &self.dc_registry)?.into(), Err(err) => exc_py_to_monty(py, &err).into(), } } else { MontyException::new( ExcType::NotImplementedError, Some(format!("OS function '{}' not implemented", call.function)), ) .into() }; progress = py .detach(|| call.resume(result, print_output.reborrow())) .map_err(|e| MontyError::new_err(py, e))?; } } } } } /// pyclass doesn't support generic types, hence hard coding the generics #[derive(Debug)] pub(crate) enum EitherProgress { NoLimit(RunProgress>), Limited(RunProgress>), /// REPL progress with back-reference to the owning `PyMontyRepl` for auto-restore. ReplNoLimit(ReplProgress>, Py), /// REPL progress with back-reference to the owning `PyMontyRepl` for auto-restore. ReplLimited(ReplProgress>, Py), } impl EitherProgress { /// Converts progress into the appropriate Python object: /// function snapshot, name lookup snapshot, future snapshot, or complete. pub(crate) fn progress_or_complete( self, py: Python<'_>, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> { match self { Self::NoLimit(p) => run_progress_to_py(py, p, script_name, print_callback, dc_registry), Self::Limited(p) => run_progress_to_py(py, p, script_name, print_callback, dc_registry), Self::ReplNoLimit(p, owner) => repl_progress_to_py(py, p, script_name, print_callback, dc_registry, owner), Self::ReplLimited(p, owner) => repl_progress_to_py(py, p, script_name, print_callback, dc_registry, owner), } } } /// Converts a `RunProgress` into the appropriate Python snapshot type. fn run_progress_to_py( py: Python<'_>, progress: RunProgress, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> where EitherFunctionSnapshot: FromFunctionCall + FromOsCall, EitherLookupSnapshot: FromNameLookup, EitherFutureSnapshot: FromResolveFutures, { match progress { RunProgress::Complete(result) => PyMontyComplete::create(py, &result, &dc_registry), RunProgress::FunctionCall(call) => { PyFunctionSnapshot::function_call(py, call, script_name, print_callback, dc_registry) } RunProgress::OsCall(call) => PyFunctionSnapshot::os_call(py, call, script_name, print_callback, dc_registry), RunProgress::ResolveFutures(state) => { PyFutureSnapshot::new_py_any(py, state, script_name, print_callback, dc_registry) } RunProgress::NameLookup(lookup) => { PyNameLookupSnapshot::new_py_any(py, lookup, script_name, print_callback, dc_registry) } } } /// Converts a `ReplProgress` into the appropriate Python snapshot type. /// /// On completion, restores the REPL state into `repl_owner` before returning `MontyComplete`. /// The `repl_owner` is propagated into snapshot enum variants so the chain can continue. fn repl_progress_to_py( py: Python<'_>, progress: ReplProgress, script_name: String, print_callback: Option>, dc_registry: DcRegistry, repl_owner: Py, ) -> PyResult> where EitherFunctionSnapshot: FromReplFunctionCall + FromReplOsCall, EitherLookupSnapshot: FromReplNameLookup, EitherFutureSnapshot: FromReplResolveFutures, EitherRepl: FromCoreRepl, { match progress { ReplProgress::Complete { repl, value } => { repl_owner.get().put_repl(EitherRepl::from_core(repl)); PyMontyComplete::create(py, &value, &dc_registry) } ReplProgress::FunctionCall(call) => { PyFunctionSnapshot::repl_function_call(py, call, script_name, print_callback, dc_registry, repl_owner) } ReplProgress::OsCall(call) => { PyFunctionSnapshot::repl_os_call(py, call, script_name, print_callback, dc_registry, repl_owner) } ReplProgress::NameLookup(lookup) => { let variable_name = lookup.name.clone(); PyNameLookupSnapshot::repl_name_lookup( py, lookup, script_name, print_callback, dc_registry, repl_owner, variable_name, ) } ReplProgress::ResolveFutures(state) => { PyFutureSnapshot::repl_resolve_futures(py, state, script_name, print_callback, dc_registry, repl_owner) } } } /// Runtime execution snapshot, holds either a `FunctionCall` or `OsCall` for both /// resource tracker variants since pyclass structs can't be generic. /// /// Also holds REPL variants (`ReplFunctionCall`, `ReplOsCall`) for `MontyRepl.feed_start()`. /// REPL variants carry a `Py` back-reference so the REPL can be auto-restored /// on completion or error. /// /// Used internally by `PyFunctionSnapshot` to store execution state. Both `FunctionCall` /// and `OsCall` have the same `resume()` signature, so we dispatch to the appropriate /// inner type based on the variant. /// /// The `Done` variant indicates the snapshot has been consumed. /// /// Serde: REPL variants serialize as their non-REPL counterparts (stripping the owner). /// Deserialization always produces non-REPL variants. #[derive(Debug)] pub(crate) enum EitherFunctionSnapshot { // Run variants (from Monty.start()) NoLimitFn(FunctionCall>), NoLimitOs(OsCall>), LimitedFn(FunctionCall>), LimitedOs(OsCall>), // REPL variants (from MontyRepl.feed_start()) — carry the REPL owner ReplNoLimitFn(ReplFunctionCall>, Py), ReplNoLimitOs(ReplOsCall>, Py), ReplLimitedFn(ReplFunctionCall>, Py), ReplLimitedOs(ReplOsCall>, Py), /// Sentinel indicating the snapshot has been consumed via `resume()`. Done, } /// Helper trait for wrapping `FunctionCall` into `EitherFunctionSnapshot`. trait FromFunctionCall { /// Wraps a function call into the appropriate variant. fn from_fn(call: FunctionCall) -> Self; } impl FromFunctionCall> for EitherFunctionSnapshot { fn from_fn(call: FunctionCall>) -> Self { Self::NoLimitFn(call) } } impl FromFunctionCall> for EitherFunctionSnapshot { fn from_fn(call: FunctionCall>) -> Self { Self::LimitedFn(call) } } /// Helper trait for wrapping `OsCall` into `EitherFunctionSnapshot`. trait FromOsCall { /// Wraps an OS call into the appropriate variant. fn from_os(call: OsCall) -> Self; } impl FromOsCall> for EitherFunctionSnapshot { fn from_os(call: OsCall>) -> Self { Self::NoLimitOs(call) } } impl FromOsCall> for EitherFunctionSnapshot { fn from_os(call: OsCall>) -> Self { Self::LimitedOs(call) } } /// Helper trait for wrapping `ReplFunctionCall` into `EitherFunctionSnapshot`. trait FromReplFunctionCall { /// Wraps a REPL function call into the appropriate variant. fn from_repl_fn(call: ReplFunctionCall, owner: Py) -> Self; } impl FromReplFunctionCall> for EitherFunctionSnapshot { fn from_repl_fn(call: ReplFunctionCall>, owner: Py) -> Self { Self::ReplNoLimitFn(call, owner) } } impl FromReplFunctionCall> for EitherFunctionSnapshot { fn from_repl_fn(call: ReplFunctionCall>, owner: Py) -> Self { Self::ReplLimitedFn(call, owner) } } /// Helper trait for wrapping `ReplOsCall` into `EitherFunctionSnapshot`. trait FromReplOsCall { /// Wraps a REPL OS call into the appropriate variant. fn from_repl_os(call: ReplOsCall, owner: Py) -> Self; } impl FromReplOsCall> for EitherFunctionSnapshot { fn from_repl_os(call: ReplOsCall>, owner: Py) -> Self { Self::ReplNoLimitOs(call, owner) } } impl FromReplOsCall> for EitherFunctionSnapshot { fn from_repl_os(call: ReplOsCall>, owner: Py) -> Self { Self::ReplLimitedOs(call, owner) } } /// Snapshot generated during execution when monty yields to the host for a function call. #[pyclass(name = "FunctionSnapshot", module = "pydantic_monty")] #[derive(Debug)] pub struct PyFunctionSnapshot { snapshot: Mutex, print_callback: Option>, dc_registry: DcRegistry, /// Name of the script being executed #[pyo3(get)] pub script_name: String, /// Whether this call refers to an OS function #[pyo3(get)] pub is_os_function: bool, /// Whether this call is a dataclass method call (first arg is `self`) #[pyo3(get)] pub is_method_call: bool, /// The name of the function being called. #[pyo3(get)] pub function_name: String, /// The positional arguments passed to the function. #[pyo3(get)] pub args: Py, /// The keyword arguments passed to the function (key, value pairs). #[pyo3(get)] pub kwargs: Py, /// The unique identifier for this call #[pyo3(get)] pub call_id: u32, } impl PyFunctionSnapshot { /// Creates a `PyFunctionSnapshot` for an external function call. /// /// Extracts display fields from the `FunctionCall` before moving it into /// `EitherSnapshot` via the provided `wrap` closure. fn function_call( py: Python<'_>, call: FunctionCall, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> where EitherFunctionSnapshot: FromFunctionCall, { let function_name = call.function_name.clone(); let call_id = call.call_id; let method_call = call.method_call; let items: PyResult>> = call .args .iter() .map(|item| monty_to_py(py, item, &dc_registry)) .collect(); let dict = PyDict::new(py); for (k, v) in &call.kwargs { dict.set_item(monty_to_py(py, k, &dc_registry)?, monty_to_py(py, v, &dc_registry)?)?; } let slf = Self { snapshot: Mutex::new(EitherFunctionSnapshot::from_fn(call)), print_callback, script_name, is_os_function: false, is_method_call: method_call, function_name, args: PyTuple::new(py, items?)?.unbind(), kwargs: dict.unbind(), call_id, dc_registry, }; slf.into_bound_py_any(py) } /// Creates a `PyFunctionSnapshot` for an OS-level call. /// /// Extracts display fields from the `OsCall` before moving it into /// `EitherSnapshot` via the provided `wrap` closure. fn os_call( py: Python<'_>, call: OsCall, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> where EitherFunctionSnapshot: FromOsCall, { let function_name = call.function.to_string(); let call_id = call.call_id; let items: PyResult>> = call .args .iter() .map(|item| monty_to_py(py, item, &dc_registry)) .collect(); let dict = PyDict::new(py); for (k, v) in &call.kwargs { dict.set_item(monty_to_py(py, k, &dc_registry)?, monty_to_py(py, v, &dc_registry)?)?; } let slf = Self { snapshot: Mutex::new(EitherFunctionSnapshot::from_os(call)), print_callback, script_name, is_os_function: true, is_method_call: false, function_name, args: PyTuple::new(py, items?)?.unbind(), kwargs: dict.unbind(), call_id, dc_registry, }; slf.into_bound_py_any(py) } /// Creates a `PyFunctionSnapshot` for a REPL external function call. fn repl_function_call( py: Python<'_>, call: ReplFunctionCall, script_name: String, print_callback: Option>, dc_registry: DcRegistry, repl_owner: Py, ) -> PyResult> where EitherFunctionSnapshot: FromReplFunctionCall, { let function_name = call.function_name.clone(); let call_id = call.call_id; let method_call = call.method_call; let items: PyResult>> = call .args .iter() .map(|item| monty_to_py(py, item, &dc_registry)) .collect(); let dict = PyDict::new(py); for (k, v) in &call.kwargs { dict.set_item(monty_to_py(py, k, &dc_registry)?, monty_to_py(py, v, &dc_registry)?)?; } let slf = Self { snapshot: Mutex::new(EitherFunctionSnapshot::from_repl_fn(call, repl_owner)), print_callback, script_name, is_os_function: false, is_method_call: method_call, function_name, args: PyTuple::new(py, items?)?.unbind(), kwargs: dict.unbind(), call_id, dc_registry, }; slf.into_bound_py_any(py) } /// Creates a `PyFunctionSnapshot` for a REPL OS-level call. fn repl_os_call( py: Python<'_>, call: ReplOsCall, script_name: String, print_callback: Option>, dc_registry: DcRegistry, repl_owner: Py, ) -> PyResult> where EitherFunctionSnapshot: FromReplOsCall, { let function_name = call.function.to_string(); let call_id = call.call_id; let items: PyResult>> = call .args .iter() .map(|item| monty_to_py(py, item, &dc_registry)) .collect(); let dict = PyDict::new(py); for (k, v) in &call.kwargs { dict.set_item(monty_to_py(py, k, &dc_registry)?, monty_to_py(py, v, &dc_registry)?)?; } let slf = Self { snapshot: Mutex::new(EitherFunctionSnapshot::from_repl_os(call, repl_owner)), print_callback, script_name, is_os_function: true, is_method_call: false, function_name, args: PyTuple::new(py, items?)?.unbind(), kwargs: dict.unbind(), call_id, dc_registry, }; slf.into_bound_py_any(py) } /// Constructs a `PyFunctionSnapshot` from deserialized parts. /// /// Used by `load_snapshot` and `load_repl_snapshot` to reconstruct snapshot objects. #[expect(clippy::too_many_arguments)] pub(crate) fn from_deserialized( py: Python<'_>, snapshot: EitherFunctionSnapshot, print_callback: Option>, dc_registry: DcRegistry, script_name: String, is_os_function: bool, is_method_call: bool, function_name: String, args: Py, kwargs: Py, call_id: u32, ) -> PyResult> { let slf = Self { snapshot: Mutex::new(snapshot), print_callback, dc_registry, script_name, is_os_function, is_method_call, function_name, args, kwargs, call_id, }; slf.into_bound_py_any(py) } } #[pymethods] impl PyFunctionSnapshot { /// Resumes execution with either a return value, exception or future. /// /// Exactly one of `return_value`, `exception` or `future` must be provided as a keyword argument. /// /// # Raises /// * `TypeError` if both arguments are provided, or neither /// * `RuntimeError` if the snapshot has already been resumed #[pyo3(signature = (**kwargs))] pub fn resume<'py>(&self, py: Python<'py>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult> { const ARGS_ERROR: &str = "resume() accepts either return_value or exception, not both"; let mut snapshot = self .snapshot .lock() .map_err(|_| PyRuntimeError::new_err("Snapshot is currently being resumed by another thread"))?; let snapshot = std::mem::replace(&mut *snapshot, EitherFunctionSnapshot::Done); let Some(kwargs) = kwargs else { return Err(PyTypeError::new_err(ARGS_ERROR)); }; let external_result = extract_external_result(py, kwargs, ARGS_ERROR, &self.dc_registry, self.call_id)?; // Build print writer before detaching - clone_ref needs py token let mut print_cb; let print_writer = match &self.print_callback { Some(cb) => { print_cb = CallbackStringPrint::from_py(cb.clone_ref(py)); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; // wrap print_writer in SendWrapper so that it can be accessed inside the py.detach calls despite // no `Send` bound - py.detach() is overly restrictive to prevent `Bound` types going inside let mut print_writer = SendWrapper::new(print_writer); let progress = match snapshot { EitherFunctionSnapshot::NoLimitFn(call) => { let result = py.detach(|| call.resume(external_result, print_writer.reborrow())); EitherProgress::NoLimit(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFunctionSnapshot::NoLimitOs(call) => { let result = py.detach(|| call.resume(external_result, print_writer.reborrow())); EitherProgress::NoLimit(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFunctionSnapshot::LimitedFn(call) => { let result = py.detach(|| call.resume(external_result, print_writer.reborrow())); EitherProgress::Limited(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFunctionSnapshot::LimitedOs(call) => { let result = py.detach(|| call.resume(external_result, print_writer.reborrow())); EitherProgress::Limited(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFunctionSnapshot::ReplNoLimitFn(call, owner) => { let result = py .detach(|| call.resume(external_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplNoLimit(result, owner) } EitherFunctionSnapshot::ReplNoLimitOs(call, owner) => { let result = py .detach(|| call.resume(external_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplNoLimit(result, owner) } EitherFunctionSnapshot::ReplLimitedFn(call, owner) => { let result = py .detach(|| call.resume(external_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplLimited(result, owner) } EitherFunctionSnapshot::ReplLimitedOs(call, owner) => { let result = py .detach(|| call.resume(external_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplLimited(result, owner) } EitherFunctionSnapshot::Done => return Err(PyRuntimeError::new_err("Progress already resumed")), }; let dc_registry = self.dc_registry.clone_ref(py); progress.progress_or_complete( py, self.script_name.clone(), self.print_callback.as_ref().map(|cb| cb.clone_ref(py)), dc_registry, ) } /// Serializes the FunctionSnapshot instance to a binary format. /// /// The serialized data can be stored and later restored with `load_snapshot()` /// or `load_repl_snapshot()`. REPL snapshots automatically include the REPL state. /// /// Note: The `print_callback` is not serialized and must be re-provided when loading. /// /// # Returns /// Bytes containing the serialized FunctionSnapshot instance. /// /// # Raises /// `ValueError` if serialization fails. /// `RuntimeError` if the progress has already been resumed. fn dump<'py>(&self, py: Python<'py>) -> PyResult> { let bytes = crate::serialization::dump_function_snapshot( py, &self.snapshot, &self.script_name, self.is_os_function, self.is_method_call, &self.function_name, &self.args, &self.kwargs, self.call_id, &self.dc_registry, )?; Ok(PyBytes::new(py, &bytes)) } fn __repr__(&self, py: Python<'_>) -> PyResult { Ok(format!( "FunctionSnapshot(script_name='{}', function_name='{}', args={}, kwargs={})", self.script_name, self.function_name, self.args.bind(py).repr()?, self.kwargs.bind(py).repr()? )) } } /// Runtime execution snapshot, holds a `NameLookup` for both /// resource tracker variants since pyclass structs can't be generic. /// /// Also holds REPL variants with `Py` for `MontyRepl.feed_start()`. /// /// The `Done` variant indicates the snapshot has been consumed. #[derive(Debug)] pub(crate) enum EitherLookupSnapshot { NoLimit(NameLookup>), Limited(NameLookup>), ReplNoLimit(ReplNameLookup>, Py), ReplLimited(ReplNameLookup>, Py), /// Sentinel indicating the snapshot has been consumed via `resume()`. Done, } /// Helper trait for wrapping `NameLookup` into `EitherLookupSnapshot`. trait FromNameLookup { /// Wraps a name lookup into the appropriate variant. fn from_name_lookup(lookup: NameLookup) -> Self; } impl FromNameLookup> for EitherLookupSnapshot { fn from_name_lookup(lookup: NameLookup>) -> Self { Self::NoLimit(lookup) } } impl FromNameLookup> for EitherLookupSnapshot { fn from_name_lookup(lookup: NameLookup>) -> Self { Self::Limited(lookup) } } /// Helper trait for wrapping `ReplNameLookup` into `EitherLookupSnapshot`. trait FromReplNameLookup { /// Wraps a REPL name lookup into the appropriate variant. fn from_repl_name_lookup(lookup: ReplNameLookup, owner: Py) -> Self; } impl FromReplNameLookup> for EitherLookupSnapshot { fn from_repl_name_lookup(lookup: ReplNameLookup>, owner: Py) -> Self { Self::ReplNoLimit(lookup, owner) } } impl FromReplNameLookup> for EitherLookupSnapshot { fn from_repl_name_lookup(lookup: ReplNameLookup>, owner: Py) -> Self { Self::ReplLimited(lookup, owner) } } /// Snapshot generated during execution when monty yields to the host for a name lookup. #[pyclass(name = "NameLookupSnapshot", module = "pydantic_monty")] #[derive(Debug)] pub struct PyNameLookupSnapshot { snapshot: Mutex, print_callback: Option>, dc_registry: DcRegistry, /// Name of the script being executed #[pyo3(get)] pub script_name: String, /// Name of the variable being looked up #[pyo3(get)] pub variable_name: String, } impl PyNameLookupSnapshot { /// Creates a `PyNameLookupSnapshot` for an external function call. /// /// Extracts display fields from the `FunctionCall` before moving it into /// `EitherSnapshot` via the provided `wrap` closure. fn new_py_any( py: Python<'_>, lookup: NameLookup, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> where EitherLookupSnapshot: FromNameLookup, { let variable_name = lookup.name.clone(); let slf = Self { snapshot: Mutex::new(EitherLookupSnapshot::from_name_lookup(lookup)), print_callback, dc_registry, script_name, variable_name, }; slf.into_bound_py_any(py) } /// Creates a `PyNameLookupSnapshot` for a REPL name lookup. fn repl_name_lookup( py: Python<'_>, lookup: ReplNameLookup, script_name: String, print_callback: Option>, dc_registry: DcRegistry, repl_owner: Py, variable_name: String, ) -> PyResult> where EitherLookupSnapshot: FromReplNameLookup, { let slf = Self { snapshot: Mutex::new(EitherLookupSnapshot::from_repl_name_lookup(lookup, repl_owner)), print_callback, dc_registry, script_name, variable_name, }; slf.into_bound_py_any(py) } /// Constructs a `PyNameLookupSnapshot` from deserialized parts. pub(crate) fn from_deserialized( py: Python<'_>, snapshot: EitherLookupSnapshot, print_callback: Option>, dc_registry: DcRegistry, script_name: String, variable_name: String, ) -> PyResult> { let slf = Self { snapshot: Mutex::new(snapshot), print_callback, dc_registry, script_name, variable_name, }; slf.into_bound_py_any(py) } } #[pymethods] impl PyNameLookupSnapshot { /// Resumes execution with either a value or undefined. #[pyo3(signature = (**kwargs))] pub fn resume<'py>(&self, py: Python<'py>, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult> { let mut snapshot = self .snapshot .lock() .map_err(|_| PyRuntimeError::new_err("Snapshot is currently being resumed by another thread"))?; let snapshot = std::mem::replace(&mut *snapshot, EitherLookupSnapshot::Done); let lookup_result = if let Some(kwargs) = kwargs && let Some(value) = kwargs.get_item(intern!(py, "value"))? { NameLookupResult::Value(py_to_monty(&value, &self.dc_registry)?) } else { NameLookupResult::Undefined }; // Build print writer before detaching - clone_ref needs py token let mut print_cb; let print_writer = match &self.print_callback { Some(cb) => { print_cb = CallbackStringPrint::from_py(cb.clone_ref(py)); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; let mut print_writer = SendWrapper::new(print_writer); let progress = match snapshot { EitherLookupSnapshot::NoLimit(snapshot) => { let result = py.detach(|| snapshot.resume(lookup_result, print_writer.reborrow())); EitherProgress::NoLimit(result.map_err(|e| MontyError::new_err(py, e))?) } EitherLookupSnapshot::Limited(snapshot) => { let result = py.detach(|| snapshot.resume(lookup_result, print_writer.reborrow())); EitherProgress::Limited(result.map_err(|e| MontyError::new_err(py, e))?) } EitherLookupSnapshot::ReplNoLimit(snapshot, owner) => { let result = py .detach(|| snapshot.resume(lookup_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplNoLimit(result, owner) } EitherLookupSnapshot::ReplLimited(snapshot, owner) => { let result = py .detach(|| snapshot.resume(lookup_result, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplLimited(result, owner) } EitherLookupSnapshot::Done => return Err(PyRuntimeError::new_err("Progress already resumed")), }; // Clone the Arc handle for the next snapshot/complete let dc_registry = self.dc_registry.clone_ref(py); progress.progress_or_complete( py, self.script_name.clone(), self.print_callback.as_ref().map(|cb| cb.clone_ref(py)), dc_registry, ) } /// Serializes the NameLookupSnapshot instance to a binary format. /// /// The serialized data can be stored and later restored with `load_snapshot()` /// or `load_repl_snapshot()`. REPL snapshots automatically include the REPL state. /// /// Note: The `print_callback` is not serialized and must be re-provided when loading. /// /// # Returns /// Bytes containing the serialized NameLookupSnapshot instance. /// /// # Raises /// `ValueError` if serialization fails. /// `RuntimeError` if the progress has already been resumed. fn dump<'py>(&self, py: Python<'py>) -> PyResult> { let bytes = crate::serialization::dump_lookup_snapshot(&self.snapshot, &self.script_name, &self.variable_name)?; Ok(PyBytes::new(py, &bytes)) } fn __repr__(&self) -> String { format!( "NameLookupSnapshot(script_name='{}', variable_name={:?})", self.script_name, self.variable_name ) } } /// Holds a `ResolveFutures` for either resource tracker variant. /// /// Also holds REPL variants with `Py` for `MontyRepl.feed_start()`. /// /// Used internally by `PyFutureSnapshot` to store execution state when /// awaiting resolution of pending async external calls. #[derive(Debug)] pub(crate) enum EitherFutureSnapshot { NoLimit(ResolveFutures>), Limited(ResolveFutures>), ReplNoLimit(ReplResolveFutures>, Py), ReplLimited(ReplResolveFutures>, Py), /// Sentinel indicating the snapshot has been consumed via `resume()`. Done, } /// Helper trait for wrapping `ResolveFutures` into `EitherFutureSnapshot`. trait FromResolveFutures { /// Wraps a resolve-futures state into the appropriate variant. fn from_resolve_futures(state: ResolveFutures) -> Self; } impl FromResolveFutures> for EitherFutureSnapshot { fn from_resolve_futures(state: ResolveFutures>) -> Self { Self::NoLimit(state) } } impl FromResolveFutures> for EitherFutureSnapshot { fn from_resolve_futures(state: ResolveFutures>) -> Self { Self::Limited(state) } } /// Helper trait for wrapping `ReplResolveFutures` into `EitherFutureSnapshot`. trait FromReplResolveFutures { /// Wraps a REPL resolve-futures state into the appropriate variant. fn from_repl_resolve_futures(state: ReplResolveFutures, owner: Py) -> Self; } impl FromReplResolveFutures> for EitherFutureSnapshot { fn from_repl_resolve_futures( state: ReplResolveFutures>, owner: Py, ) -> Self { Self::ReplNoLimit(state, owner) } } impl FromReplResolveFutures> for EitherFutureSnapshot { fn from_repl_resolve_futures( state: ReplResolveFutures>, owner: Py, ) -> Self { Self::ReplLimited(state, owner) } } /// Snapshot generated during execution when monty yields to the host to resolve a future. /// /// Works for both `Monty.start()` and `MontyRepl.feed_start()`. #[pyclass(name = "FutureSnapshot", module = "pydantic_monty", frozen)] #[derive(Debug)] pub struct PyFutureSnapshot { snapshot: Mutex, print_callback: Option>, dc_registry: DcRegistry, /// Name of the script being executed #[pyo3(get)] pub script_name: String, } impl PyFutureSnapshot { fn new_py_any( py: Python<'_>, state: ResolveFutures, script_name: String, print_callback: Option>, dc_registry: DcRegistry, ) -> PyResult> where EitherFutureSnapshot: FromResolveFutures, { let slf = Self { snapshot: Mutex::new(EitherFutureSnapshot::from_resolve_futures(state)), print_callback, dc_registry, script_name, }; slf.into_bound_py_any(py) } /// Constructs a `PyFutureSnapshot` from deserialized parts. /// /// Used by `load_snapshot` and `load_repl_snapshot` to reconstruct snapshot objects. pub(crate) fn from_deserialized( py: Python<'_>, snapshot: EitherFutureSnapshot, print_callback: Option>, dc_registry: DcRegistry, script_name: String, ) -> PyResult> { let slf = Self { snapshot: Mutex::new(snapshot), print_callback, dc_registry, script_name, }; slf.into_bound_py_any(py) } /// Creates a `PyFutureSnapshot` for a REPL resolve-futures state. fn repl_resolve_futures( py: Python<'_>, state: ReplResolveFutures, script_name: String, print_callback: Option>, dc_registry: DcRegistry, repl_owner: Py, ) -> PyResult> where EitherFutureSnapshot: FromReplResolveFutures, { let slf = Self { snapshot: Mutex::new(EitherFutureSnapshot::from_repl_resolve_futures(state, repl_owner)), print_callback, dc_registry, script_name, }; slf.into_bound_py_any(py) } } #[pymethods] impl PyFutureSnapshot { /// Resumes execution with results for one or more futures. #[pyo3(signature = (results))] pub fn resume<'py>(&self, py: Python<'py>, results: &Bound<'_, PyDict>) -> PyResult> { const ARGS_ERROR: &str = "results values must be a dict with either 'return_value' or 'exception', not both"; let mut snapshot = self .snapshot .lock() .map_err(|_| PyRuntimeError::new_err("Snapshot is currently being resumed by another thread"))?; let snapshot = std::mem::replace(&mut *snapshot, EitherFutureSnapshot::Done); let external_results = results .iter() .map(|(key, value)| { let call_id = key.extract::()?; let dict = value.cast::()?; let value = extract_external_result(py, dict, ARGS_ERROR, &self.dc_registry, call_id)?; Ok((call_id, value)) }) .collect::>>()?; // Build print writer before detaching - clone_ref needs py token let mut print_cb; let print_writer = match &self.print_callback { Some(cb) => { print_cb = CallbackStringPrint::from_py(cb.clone_ref(py)); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; let mut print_writer = SendWrapper::new(print_writer); let progress = match snapshot { EitherFutureSnapshot::NoLimit(snapshot) => { let result = py.detach(|| snapshot.resume(external_results, print_writer.reborrow())); EitherProgress::NoLimit(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFutureSnapshot::Limited(snapshot) => { let result = py.detach(|| snapshot.resume(external_results, print_writer.reborrow())); EitherProgress::Limited(result.map_err(|e| MontyError::new_err(py, e))?) } EitherFutureSnapshot::ReplNoLimit(snapshot, owner) => { let result = py .detach(|| snapshot.resume(external_results, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplNoLimit(result, owner) } EitherFutureSnapshot::ReplLimited(snapshot, owner) => { let result = py .detach(|| snapshot.resume(external_results, print_writer.reborrow())) .map_err(|e| restore_repl_from_repl_start_error(py, &owner, *e))?; EitherProgress::ReplLimited(result, owner) } EitherFutureSnapshot::Done => return Err(PyRuntimeError::new_err("Progress already resumed")), }; // Clone the Arc handle for the next snapshot/complete let dc_registry = self.dc_registry.clone_ref(py); progress.progress_or_complete( py, self.script_name.clone(), self.print_callback.as_ref().map(|cb| cb.clone_ref(py)), dc_registry, ) } /// Returns the pending call IDs associated with the FutureSnapshot instance. /// /// # Returns /// A slice of pending call IDs. #[getter] fn pending_call_ids<'py>(&self, py: Python<'py>) -> PyResult> { let snapshot = self.snapshot.lock().unwrap_or_else(PoisonError::into_inner); match &*snapshot { EitherFutureSnapshot::NoLimit(snapshot) => PyList::new(py, snapshot.pending_call_ids()), EitherFutureSnapshot::Limited(snapshot) => PyList::new(py, snapshot.pending_call_ids()), EitherFutureSnapshot::ReplNoLimit(snapshot, _) => PyList::new(py, snapshot.pending_call_ids()), EitherFutureSnapshot::ReplLimited(snapshot, _) => PyList::new(py, snapshot.pending_call_ids()), EitherFutureSnapshot::Done => Err(PyRuntimeError::new_err("FutureSnapshot already resumed")), } } /// Serializes the FutureSnapshot instance to a binary format. /// /// The serialized data can be stored and later restored with `load_snapshot()` /// or `load_repl_snapshot()`. REPL snapshots automatically include the REPL state. /// /// Note: The `print_callback` is not serialized and must be re-provided when loading. /// /// # Returns /// Bytes containing the serialized FutureSnapshot instance. /// /// # Raises /// `ValueError` if serialization fails. /// `RuntimeError` if the progress has already been resumed. fn dump<'py>(&self, py: Python<'py>) -> PyResult> { let bytes = crate::serialization::dump_future_snapshot(&self.snapshot, &self.script_name)?; Ok(PyBytes::new(py, &bytes)) } fn __repr__(&self) -> String { let snapshot = self.snapshot.lock().unwrap_or_else(PoisonError::into_inner); let pending_call_ids = match &*snapshot { EitherFutureSnapshot::NoLimit(s) => s.pending_call_ids(), EitherFutureSnapshot::Limited(s) => s.pending_call_ids(), EitherFutureSnapshot::ReplNoLimit(s, _) => s.pending_call_ids(), EitherFutureSnapshot::ReplLimited(s, _) => s.pending_call_ids(), EitherFutureSnapshot::Done => &[], }; format!( "FutureSnapshot(script_name='{}', pending_call_ids={pending_call_ids:?})", self.script_name, ) } } #[pyclass(name = "MontyComplete", module = "pydantic_monty", frozen)] pub struct PyMontyComplete { #[pyo3(get)] pub output: Py, // TODO we might want to add stats on execution here like time, allocations, etc. } impl PyMontyComplete { fn create<'py>(py: Python<'py>, output: &MontyObject, dc_registry: &DcRegistry) -> PyResult> { let output = monty_to_py(py, output, dc_registry)?; let slf = Self { output }; slf.into_bound_py_any(py) } } #[pymethods] impl PyMontyComplete { fn __repr__(&self, py: Python<'_>) -> PyResult { Ok(format!("MontyComplete(output={})", self.output.bind(py).repr()?)) } } fn list_str(arg: Option<&Bound<'_, PyList>>, name: &str) -> PyResult> { if let Some(names) = arg { names .iter() .map(|item| item.extract::()) .collect::>>() .map_err(|e| PyTypeError::new_err(format!("{name}: {e}"))) } else { Ok(vec![]) } } /// A `PrintWriter` implementation that calls a Python callback for each print output. /// /// This struct holds a GIL-independent `Py` reference to the callback, /// allowing it to be used across GIL release boundaries. The GIL is re-acquired /// briefly for each callback invocation. #[derive(Debug)] pub(crate) struct CallbackStringPrint(Py); impl CallbackStringPrint { /// Creates a new `CallbackStringPrint` from a borrowed Python callback. fn new(callback: &Bound<'_, PyAny>) -> Self { Self(callback.clone().unbind()) } /// Creates a new `CallbackStringPrint` from an owned `Py`. pub(crate) fn from_py(callback: Py) -> Self { Self(callback) } } impl PrintWriterCallback for CallbackStringPrint { fn stdout_write(&mut self, output: Cow<'_, str>) -> Result<(), MontyException> { Python::attach(|py| { self.0.bind(py).call1(("stdout", output.as_ref()))?; Ok::<_, PyErr>(()) }) .map_err(|e| Python::attach(|py| exc_py_to_monty(py, &e))) } fn stdout_push(&mut self, end: char) -> Result<(), MontyException> { Python::attach(|py| { self.0.bind(py).call1(("stdout", end.to_string()))?; Ok::<_, PyErr>(()) }) .map_err(|e| Python::attach(|py| exc_py_to_monty(py, &e))) } } /// Recursively checks whether a `MontyObject` contains a dataclass, including /// inside containers like `List`, `Tuple`, and `Dict`. /// /// This is used to decide whether to take the iterative execution path: dataclass /// method calls need host dispatch, so if any input (even nested) is a dataclass /// we must use the iterative runner rather than the non-iterative `run()`. fn contains_dataclass(obj: &MontyObject) -> bool { match obj { MontyObject::Dataclass { .. } => true, MontyObject::List(items) | MontyObject::Tuple(items) => items.iter().any(contains_dataclass), MontyObject::Dict(pairs) => pairs .into_iter() .any(|(k, v)| contains_dataclass(k) || contains_dataclass(v)), _ => false, } } /// Serialization wrapper for `PyMonty` that includes all fields needed for reconstruction. #[derive(serde::Serialize, serde::Deserialize)] struct SerializedMonty { runner: MontyRun, script_name: String, input_names: Vec, } /// Extract an external result (object or exception) from a dictionary. /// /// Any dataclass return values are automatically registered in the `dc_registry` via `py_to_monty` /// so they can be properly reconstructed on output. /// Extracts an `ExternalResult` from a Python dict with a single key. /// /// Accepts `return_value`, `exception`, or `future` (with value `...`). /// The `call_id` is required for `future` results to track the pending call. fn extract_external_result( py: Python<'_>, dict: &Bound<'_, PyDict>, error_msg: &'static str, dc_registry: &DcRegistry, call_id: u32, ) -> PyResult { if dict.len() != 1 { Err(PyTypeError::new_err(error_msg)) } else if let Some(rv) = dict.get_item(intern!(py, "return_value"))? { // Return value provided Ok(py_to_monty(&rv, dc_registry)?.into()) } else if let Some(exc) = dict.get_item(intern!(py, "exception"))? { // Exception provided let py_err = PyErr::from_value(exc.into_any()); Ok(exc_py_to_monty(py, &py_err).into()) } else if let Some(exc) = dict.get_item(intern!(py, "future"))? { if exc.eq(py.Ellipsis()).unwrap_or_default() { Ok(ExtFunctionResult::Future(call_id)) } else { Err(PyTypeError::new_err( "Value for the 'future' key must be Ellipsis (...)", )) } } else { // wrong key in kwargs Err(PyTypeError::new_err(error_msg)) } } /// Extracts the REPL from a `ReplStartError`, restores it into the owner, /// and returns the Python exception. fn restore_repl_from_repl_start_error( py: Python<'_>, repl_owner: &Py, err: ReplStartError, ) -> PyErr where EitherRepl: FromCoreRepl, { repl_owner.get().put_repl(EitherRepl::from_core(err.repl)); MontyError::new_err(py, err.error) } ================================================ FILE: crates/monty-python/src/repl.rs ================================================ use std::sync::{Mutex, PoisonError}; // Use `::monty` to refer to the external crate (not the pymodule) use ::monty::{ ExtFunctionResult, LimitedTracker, MontyException, MontyObject, MontyRepl as CoreMontyRepl, NameLookupResult, NoLimitTracker, PrintWriter, ReplProgress, ReplStartError, ResourceTracker, }; use monty::ExcType; use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, prelude::*, types::{PyBytes, PyDict, PyList, PyTuple}, }; use send_wrapper::SendWrapper; use crate::{ convert::{get_docstring, monty_to_py, py_to_monty}, dataclass::DcRegistry, exceptions::{MontyError, exc_py_to_monty}, external::{ExternalFunctionRegistry, dispatch_method_call}, limits::{PySignalTracker, extract_limits}, monty_cls::CallbackStringPrint, }; /// Runtime REPL session holder for pyclass interoperability. /// /// PyO3 classes cannot be generic, so this enum stores REPL sessions for both /// resource tracker variants. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub(crate) enum EitherRepl { NoLimit(CoreMontyRepl>), Limited(CoreMontyRepl>), } /// Stateful no-replay REPL session. /// /// Create with `MontyRepl()` then call `feed_run()` to execute snippets /// incrementally against persistent heap and namespace state. /// /// Uses `Mutex` for the inner REPL because `CoreMontyRepl` contains a `Heap` /// with `Cell` (not `Sync`), and PyO3 requires `Send + Sync` for all /// pyclass types. The mutex also prevents concurrent `feed_run()` calls. #[pyclass(name = "MontyRepl", module = "pydantic_monty", frozen)] #[derive(Debug)] pub struct PyMontyRepl { repl: Mutex>, dc_registry: DcRegistry, /// Name of the script being executed. #[pyo3(get)] pub script_name: String, } #[pymethods] impl PyMontyRepl { /// Creates an empty REPL session ready to receive snippets via `feed_run()`. /// /// No code is parsed or executed at construction time — all execution /// is driven through `feed_run()`. #[new] #[pyo3(signature = (*, script_name="main.py", limits=None, dataclass_registry=None))] fn new( py: Python<'_>, script_name: &str, limits: Option<&Bound<'_, PyDict>>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult { let dc_registry = DcRegistry::from_list(py, dataclass_registry)?; let script_name = script_name.to_string(); let repl = if let Some(limits) = limits { let tracker = PySignalTracker::new(LimitedTracker::new(extract_limits(limits)?)); EitherRepl::Limited(CoreMontyRepl::new(&script_name, tracker)) } else { let tracker = PySignalTracker::new(NoLimitTracker); EitherRepl::NoLimit(CoreMontyRepl::new(&script_name, tracker)) }; Ok(Self { repl: Mutex::new(Some(repl)), dc_registry, script_name, }) } /// Registers a dataclass type for proper isinstance() support on output. fn register_dataclass(&self, cls: &Bound<'_, pyo3::types::PyType>) -> PyResult<()> { self.dc_registry.insert(cls) } /// Feeds and executes a single incremental REPL snippet. /// /// The snippet is compiled against existing session state and executed once /// without replaying previously fed snippets. /// /// When `external_functions` is provided, external function calls and name /// lookups are dispatched to the provided callables — matching the behavior /// of `Monty.run(external_functions=...)`. #[pyo3(signature = (code, *, inputs=None, external_functions=None, print_callback=None, os=None))] fn feed_run<'py>( &self, py: Python<'py>, code: &str, inputs: Option<&Bound<'_, PyDict>>, external_functions: Option<&Bound<'_, PyDict>>, print_callback: Option>, os: Option<&Bound<'_, PyAny>>, ) -> PyResult> { let input_values = extract_repl_inputs(inputs, &self.dc_registry)?; let mut print_cb; let mut print_writer = match print_callback { Some(cb) => { print_cb = CallbackStringPrint::from_py(cb); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; if external_functions.is_some() || os.is_some() { return self.feed_run_with_externals(py, code, input_values, external_functions, os, print_writer); } let mut guard = self .repl .try_lock() .map_err(|_| PyRuntimeError::new_err("REPL session is currently executing another snippet"))?; let repl = guard .as_mut() .ok_or_else(|| PyRuntimeError::new_err("REPL session is currently executing another snippet"))?; let output = match repl { EitherRepl::NoLimit(repl) => repl.feed_run(code, input_values, print_writer.reborrow()), EitherRepl::Limited(repl) => repl.feed_run(code, input_values, print_writer.reborrow()), } .map_err(|e| MontyError::new_err(py, e))?; Ok(monty_to_py(py, &output, &self.dc_registry)?.into_bound(py)) } /// Starts executing an incremental snippet, yielding snapshots for external calls. /// /// Unlike `feed_run()`, which handles external function dispatch internally via a loop, /// `feed_start()` returns a snapshot object whenever the code needs an external function /// call, OS call, name lookup, or future resolution. The caller then provides the result /// via `snapshot.resume(...)`, which returns the next snapshot or `MontyComplete`. /// /// This enables the same iterative start/resume pattern used by `Monty.start()`, /// including support for async external functions via `FutureSnapshot`. #[pyo3(signature = (code, *, inputs=None, print_callback=None))] fn feed_start<'py>( slf: &Bound<'py, Self>, py: Python<'py>, code: &str, inputs: Option<&Bound<'_, PyDict>>, print_callback: Option>, ) -> PyResult> { let this = slf.get(); let input_values = extract_repl_inputs(inputs, &this.dc_registry)?; let mut print_cb; let print_writer = match &print_callback { Some(cb) => { print_cb = CallbackStringPrint::from_py(cb.clone_ref(py)); PrintWriter::Callback(&mut print_cb) } None => PrintWriter::Stdout, }; let mut print_output = SendWrapper::new(print_writer); let repl = this.take_repl()?; let repl_owner: Py = slf.clone().unbind(); let code_owned = code.to_owned(); let inputs_owned = input_values; let dc_registry = this.dc_registry.clone_ref(py); let script_name = this.script_name.clone(); match repl { EitherRepl::NoLimit(repl) => { let progress = py .detach(|| repl.feed_start(&code_owned, inputs_owned, print_output.reborrow())) .map_err(|e| this.restore_repl_from_start_error(py, *e))?; let either = crate::monty_cls::EitherProgress::ReplNoLimit(progress, repl_owner); either.progress_or_complete(py, script_name, print_callback, dc_registry) } EitherRepl::Limited(repl) => { let progress = py .detach(|| repl.feed_start(&code_owned, inputs_owned, print_output.reborrow())) .map_err(|e| this.restore_repl_from_start_error(py, *e))?; let either = crate::monty_cls::EitherProgress::ReplLimited(progress, repl_owner); either.progress_or_complete(py, script_name, print_callback, dc_registry) } } } /// Serializes this REPL session to bytes. fn dump<'py>(&self, py: Python<'py>) -> PyResult> { #[derive(serde::Serialize)] struct SerializedRepl<'a> { repl: &'a EitherRepl, script_name: &'a str, } let guard = self.repl.lock().unwrap_or_else(PoisonError::into_inner); let repl = guard .as_ref() .ok_or_else(|| PyRuntimeError::new_err("REPL session is currently executing another snippet"))?; let serialized = SerializedRepl { repl, script_name: &self.script_name, }; let bytes = postcard::to_allocvec(&serialized).map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(PyBytes::new(py, &bytes)) } /// Restores a REPL session from `dump()` bytes. #[staticmethod] #[pyo3(signature = (data, *, dataclass_registry=None))] fn load( py: Python<'_>, data: &Bound<'_, PyBytes>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult { #[derive(serde::Deserialize)] struct SerializedReplOwned { repl: EitherRepl, script_name: String, } let serialized: SerializedReplOwned = postcard::from_bytes(data.as_bytes()).map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(Self { repl: Mutex::new(Some(serialized.repl)), dc_registry: DcRegistry::from_list(py, dataclass_registry)?, script_name: serialized.script_name, }) } fn __repr__(&self) -> String { format!("MontyRepl(script_name='{}')", self.script_name) } } impl PyMontyRepl { /// Executes a REPL snippet with external function and OS call support. /// /// Uses the iterative `feed_start` / resume loop to handle external function /// calls and name lookups, matching the same dispatch logic as `Monty.run()`. /// /// `feed_start` consumes the REPL, so we temporarily take it out of the mutex /// (leaving `None`) and restore it on both success and error paths. fn feed_run_with_externals<'py>( &self, py: Python<'py>, code: &str, input_values: Vec<(String, MontyObject)>, external_functions: Option<&Bound<'_, PyDict>>, os: Option<&Bound<'_, PyAny>>, mut print_writer: PrintWriter<'_>, ) -> PyResult> { let mut print_output = SendWrapper::new(&mut print_writer); let repl = self.take_repl()?; let result = match repl { EitherRepl::NoLimit(repl) => { self.feed_start_loop(py, repl, code, input_values, external_functions, os, &mut print_output) } EitherRepl::Limited(repl) => { self.feed_start_loop(py, repl, code, input_values, external_functions, os, &mut print_output) } }; // On error, the REPL is already restored inside `restore_repl_from_start_error`. match result { Ok((output, restored_repl)) => { self.put_repl(restored_repl); Ok(monty_to_py(py, &output, &self.dc_registry)?.into_bound(py)) } Err(err) => Err(err), } } /// Runs the feed_start / resume loop for a specific resource tracker type. /// /// Returns the output value and the restored REPL enum variant, or a Python error. #[expect(clippy::too_many_arguments)] fn feed_start_loop( &self, py: Python<'_>, repl: CoreMontyRepl, code: &str, input_values: Vec<(String, MontyObject)>, external_functions: Option<&Bound<'_, PyDict>>, os: Option<&Bound<'_, PyAny>>, print_output: &mut SendWrapper<&mut PrintWriter<'_>>, ) -> PyResult<(MontyObject, EitherRepl)> where EitherRepl: FromCoreRepl, { let code_owned = code.to_owned(); let mut progress = py .detach(|| repl.feed_start(&code_owned, input_values, print_output.reborrow())) .map_err(|e| self.restore_repl_from_start_error(py, *e))?; loop { match progress { ReplProgress::Complete { repl, value } => { return Ok((value, EitherRepl::from_core(repl))); } ReplProgress::FunctionCall(call) => { let return_value = if call.method_call { dispatch_method_call(py, &call.function_name, &call.args, &call.kwargs, &self.dc_registry) } else if let Some(ext_fns) = external_functions { let registry = ExternalFunctionRegistry::new(py, ext_fns, &self.dc_registry); registry.call(&call.function_name, &call.args, &call.kwargs) } else { let msg = format!( "External function '{}' called but no external_functions provided", call.function_name ); self.put_repl(EitherRepl::from_core(call.into_repl())); return Err(PyRuntimeError::new_err(msg)); }; progress = py .detach(|| call.resume(return_value, print_output.reborrow())) .map_err(|e| self.restore_repl_from_start_error(py, *e))?; } ReplProgress::NameLookup(lookup) => { let result = if let Some(ext_fns) = external_functions && let Some(value) = ext_fns.get_item(&lookup.name)? { NameLookupResult::Value(MontyObject::Function { name: lookup.name.clone(), docstring: get_docstring(&value), }) } else { NameLookupResult::Undefined }; progress = py .detach(|| lookup.resume(result, print_output.reborrow())) .map_err(|e| self.restore_repl_from_start_error(py, *e))?; } ReplProgress::OsCall(call) => { let result: ExtFunctionResult = if let Some(os_callback) = os { let py_args: Vec> = call .args .iter() .map(|arg| monty_to_py(py, arg, &self.dc_registry)) .collect::>()?; let py_args_tuple = PyTuple::new(py, py_args)?; let py_kwargs = PyDict::new(py); for (k, v) in &call.kwargs { py_kwargs.set_item( monty_to_py(py, k, &self.dc_registry)?, monty_to_py(py, v, &self.dc_registry)?, )?; } match os_callback.call1((call.function.to_string(), py_args_tuple, py_kwargs)) { Ok(result) => py_to_monty(&result, &self.dc_registry)?.into(), Err(err) => exc_py_to_monty(py, &err).into(), } } else { MontyException::new( ExcType::NotImplementedError, Some(format!("OS function '{}' not implemented", call.function)), ) .into() }; progress = py .detach(|| call.resume(result, print_output.reborrow())) .map_err(|e| self.restore_repl_from_start_error(py, *e))?; } ReplProgress::ResolveFutures(state) => { self.put_repl(EitherRepl::from_core(state.into_repl())); return Err(PyRuntimeError::new_err( "async futures not supported with `MontyRepl.feed_run`", )); } } } } /// Takes the REPL out of the mutex for `feed_start` (which consumes self), /// leaving `None` until the REPL is restored via `put_repl`. pub(crate) fn take_repl(&self) -> PyResult { let mut guard = self .repl .try_lock() .map_err(|_| PyRuntimeError::new_err("REPL session is currently executing another snippet"))?; guard .take() .ok_or_else(|| PyRuntimeError::new_err("REPL session is currently executing another snippet")) } /// Creates an empty REPL owner for snapshot deserialization. /// /// The REPL mutex starts as `None` — the real REPL state lives inside the /// deserialized snapshot and will be restored via `put_repl` when the /// snapshot is resumed to completion. pub(crate) fn empty_owner(script_name: String, dc_registry: DcRegistry) -> Self { Self { repl: Mutex::new(None), dc_registry, script_name, } } /// Restores a REPL into the mutex after `feed_start` completes successfully. pub(crate) fn put_repl(&self, repl: EitherRepl) { let mut guard = self.repl.lock().unwrap_or_else(PoisonError::into_inner); *guard = Some(repl); } /// Extracts the REPL from a `ReplStartError`, restores it into `self.repl`, /// and returns the Python exception. fn restore_repl_from_start_error(&self, py: Python<'_>, err: ReplStartError) -> PyErr where EitherRepl: FromCoreRepl, { self.put_repl(EitherRepl::from_core(err.repl)); MontyError::new_err(py, err.error) } } /// Converts a Python dict of `{name: value}` pairs into the `Vec<(String, MontyObject)>` /// format expected by the core REPL's `feed_run` and `feed_start`. fn extract_repl_inputs( inputs: Option<&Bound<'_, PyDict>>, dc_registry: &DcRegistry, ) -> PyResult> { let Some(inputs) = inputs else { return Ok(vec![]); }; inputs .iter() .map(|(key, value)| { let name = key.extract::()?; let obj = py_to_monty(&value, dc_registry)?; Ok((name, obj)) }) .collect::>() } /// Helper trait to convert a typed `CoreMontyRepl` back into the /// type-erased `EitherRepl` enum. pub(crate) trait FromCoreRepl { /// Wraps a core REPL into the appropriate `EitherRepl` variant. fn from_core(repl: CoreMontyRepl) -> Self; } impl FromCoreRepl> for EitherRepl { fn from_core(repl: CoreMontyRepl>) -> Self { Self::NoLimit(repl) } } impl FromCoreRepl> for EitherRepl { fn from_core(repl: CoreMontyRepl>) -> Self { Self::Limited(repl) } } ================================================ FILE: crates/monty-python/src/serialization.rs ================================================ //! Unified snapshot serialization with versioning and integrity checks. //! //! All snapshot `dump()` calls produce a wire format: //! //! ```text //! [version: u16 LE] [sha256: 32 bytes] [postcard payload] //! ``` //! //! Two module-level `#[pyfunction]`s — `load_snapshot` and `load_repl_snapshot` — //! handle deserialization without requiring callers to know the snapshot type. use std::sync::{Mutex, PoisonError}; use ::monty::{ FunctionCall, LimitedTracker, MontyObject, NameLookup, NoLimitTracker, OsCall, ReplFunctionCall, ReplNameLookup, ReplOsCall, ReplResolveFutures, ResolveFutures, }; use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, prelude::*, types::{PyBytes, PyDict, PyList, PyTuple}, }; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use crate::{ convert::{monty_to_py, py_to_monty}, dataclass::DcRegistry, limits::PySignalTracker, monty_cls::{ EitherFunctionSnapshot, EitherFutureSnapshot, EitherLookupSnapshot, PyFunctionSnapshot, PyFutureSnapshot, PyNameLookupSnapshot, }, repl::PyMontyRepl, }; /// Current serialization format version. Incremented on breaking wire-format changes. const SERIALIZATION_VERSION: u16 = 1; /// Size of the wire-format header: 2 bytes version + 32 bytes SHA-256 hash. const HEADER_SIZE: usize = 2 + 32; // --------------------------------------------------------------------------- // Wire-format helpers // --------------------------------------------------------------------------- /// Serializes a value with a version header and SHA-256 integrity hash. /// /// Layout: `[version: u16 LE] [sha256(payload): 32 bytes] [postcard payload]` fn serialize_with_header(value: &impl Serialize) -> Result, postcard::Error> { let payload = postcard::to_allocvec(value)?; let hash = Sha256::digest(&payload); let mut buf = Vec::with_capacity(HEADER_SIZE + payload.len()); buf.extend_from_slice(&SERIALIZATION_VERSION.to_le_bytes()); buf.extend_from_slice(&hash); buf.extend_from_slice(&payload); Ok(buf) } /// Deserializes bytes produced by `serialize_with_header`, checking version and integrity. fn deserialize_with_header<'de, T: Deserialize<'de>>(bytes: &'de [u8]) -> PyResult { if bytes.len() < HEADER_SIZE { return Err(PyValueError::new_err( "Serialized data is too short to contain a valid header", )); } let version = u16::from_le_bytes([bytes[0], bytes[1]]); if version != SERIALIZATION_VERSION { return Err(PyValueError::new_err(format!( "Serialized data version {version} is not compatible with current version {SERIALIZATION_VERSION}" ))); } let stored_hash = &bytes[2..HEADER_SIZE]; let payload = &bytes[HEADER_SIZE..]; let computed_hash = Sha256::digest(payload); if computed_hash.as_slice() != stored_hash { return Err(PyValueError::new_err("Serialized data integrity check failed")); } postcard::from_bytes(payload).map_err(|e| PyValueError::new_err(e.to_string())) } // --------------------------------------------------------------------------- // Tagged wrapper enums // --------------------------------------------------------------------------- /// Non-REPL snapshot: tagged union over all snapshot types. /// /// Postcard's enum tagging handles type discrimination, so `load_snapshot` /// doesn't need to know the snapshot type upfront. /// /// Uses `Serde*Snapshot` types for snapshot fields — these are the wire-format /// representations without `Py` references. #[derive(Serialize, Deserialize)] pub(crate) enum SerializedSnapshot { /// External function or OS call. Function { snapshot: SerdeFunctionSnapshot, script_name: String, is_os_function: bool, is_method_call: bool, function_name: String, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, }, /// Name lookup. NameLookup { snapshot: SerdeLookupSnapshot, script_name: String, variable_name: String, }, /// Future resolution. Future { snapshot: SerdeFutureSnapshot, script_name: String, }, } /// REPL snapshot: includes the REPL state alongside the execution snapshot. /// /// On deserialization, the REPL state is reconstructed into a fresh `PyMontyRepl` /// and the snapshot is rewired to reference it. /// /// Uses `SerdeFunctionSnapshot` (etc.) directly so REPL call variants are preserved /// in the wire format — unlike `EitherFunctionSnapshot::Deserialize` which maps /// REPL variants to `Done`. #[derive(Serialize, Deserialize)] pub(crate) enum SerializedReplSnapshot { /// External function or OS call with REPL state. /// /// The REPL state is embedded inside the snapshot's `Repl*` variant — no /// separate `repl` field is needed. Function { snapshot: SerdeFunctionSnapshot, script_name: String, is_os_function: bool, is_method_call: bool, function_name: String, args: Vec, kwargs: Vec<(MontyObject, MontyObject)>, call_id: u32, }, /// Name lookup with REPL state. NameLookup { snapshot: SerdeLookupSnapshot, script_name: String, variable_name: String, }, /// Future resolution with REPL state. Future { snapshot: SerdeFutureSnapshot, script_name: String, }, } // --------------------------------------------------------------------------- // Serde helpers for Either*Snapshot types // --------------------------------------------------------------------------- /// Wire-format representation of `EitherFunctionSnapshot` without `Py`. /// /// REPL variants preserve the inner call data for round-tripping through /// `load_repl_snapshot`. Non-REPL variants pass through directly. #[derive(Serialize, Deserialize)] pub(crate) enum SerdeFunctionSnapshot { NoLimitFn(FunctionCall>), NoLimitOs(OsCall>), LimitedFn(FunctionCall>), LimitedOs(OsCall>), ReplNoLimitFn(ReplFunctionCall>), ReplNoLimitOs(ReplOsCall>), ReplLimitedFn(ReplFunctionCall>), ReplLimitedOs(ReplOsCall>), Done, } /// Borrowing version of `SerdeFunctionSnapshot` for zero-copy serialization. #[derive(Serialize)] enum SerdeFunctionSnapshotRef<'a> { NoLimitFn(&'a FunctionCall>), NoLimitOs(&'a OsCall>), LimitedFn(&'a FunctionCall>), LimitedOs(&'a OsCall>), ReplNoLimitFn(&'a ReplFunctionCall>), ReplNoLimitOs(&'a ReplOsCall>), ReplLimitedFn(&'a ReplFunctionCall>), ReplLimitedOs(&'a ReplOsCall>), Done, } impl SerdeFunctionSnapshot { /// Converts into `EitherFunctionSnapshot` for the non-REPL path. /// /// Returns an error if this contains a REPL variant — use `into_either_with_repl` /// for REPL snapshots instead. fn into_either(self) -> PyResult { match self { Self::NoLimitFn(c) => Ok(EitherFunctionSnapshot::NoLimitFn(c)), Self::NoLimitOs(c) => Ok(EitherFunctionSnapshot::NoLimitOs(c)), Self::LimitedFn(c) => Ok(EitherFunctionSnapshot::LimitedFn(c)), Self::LimitedOs(c) => Ok(EitherFunctionSnapshot::LimitedOs(c)), Self::ReplNoLimitFn(_) | Self::ReplNoLimitOs(_) | Self::ReplLimitedFn(_) | Self::ReplLimitedOs(_) => Err( PyValueError::new_err("Cannot load a REPL snapshot with load_snapshot, use load_repl_snapshot instead"), ), Self::Done => Ok(EitherFunctionSnapshot::Done), } } /// Converts into `EitherFunctionSnapshot` with a REPL owner attached. /// /// REPL variants are wired to the given `Py`. /// Non-REPL variants pass through unchanged. fn into_either_with_repl(self, owner: Py) -> EitherFunctionSnapshot { match self { Self::NoLimitFn(c) => EitherFunctionSnapshot::NoLimitFn(c), Self::NoLimitOs(c) => EitherFunctionSnapshot::NoLimitOs(c), Self::LimitedFn(c) => EitherFunctionSnapshot::LimitedFn(c), Self::LimitedOs(c) => EitherFunctionSnapshot::LimitedOs(c), Self::ReplNoLimitFn(c) => EitherFunctionSnapshot::ReplNoLimitFn(c, owner), Self::ReplNoLimitOs(c) => EitherFunctionSnapshot::ReplNoLimitOs(c, owner), Self::ReplLimitedFn(c) => EitherFunctionSnapshot::ReplLimitedFn(c, owner), Self::ReplLimitedOs(c) => EitherFunctionSnapshot::ReplLimitedOs(c, owner), Self::Done => EitherFunctionSnapshot::Done, } } } impl EitherFunctionSnapshot { /// Borrows self as a `SerdeFunctionSnapshotRef` for serialization. fn as_serde_ref(&self) -> SerdeFunctionSnapshotRef<'_> { match self { Self::NoLimitFn(c) => SerdeFunctionSnapshotRef::NoLimitFn(c), Self::NoLimitOs(c) => SerdeFunctionSnapshotRef::NoLimitOs(c), Self::LimitedFn(c) => SerdeFunctionSnapshotRef::LimitedFn(c), Self::LimitedOs(c) => SerdeFunctionSnapshotRef::LimitedOs(c), Self::ReplNoLimitFn(c, _) => SerdeFunctionSnapshotRef::ReplNoLimitFn(c), Self::ReplNoLimitOs(c, _) => SerdeFunctionSnapshotRef::ReplNoLimitOs(c), Self::ReplLimitedFn(c, _) => SerdeFunctionSnapshotRef::ReplLimitedFn(c), Self::ReplLimitedOs(c, _) => SerdeFunctionSnapshotRef::ReplLimitedOs(c), Self::Done => SerdeFunctionSnapshotRef::Done, } } } /// Wire-format representation of `EitherLookupSnapshot` without `Py`. #[derive(Serialize, Deserialize)] pub(crate) enum SerdeLookupSnapshot { NoLimit(NameLookup>), Limited(NameLookup>), ReplNoLimit(ReplNameLookup>), ReplLimited(ReplNameLookup>), Done, } /// Borrowing version of `SerdeLookupSnapshot` for zero-copy serialization. #[derive(Serialize)] enum SerdeLookupSnapshotRef<'a> { NoLimit(&'a NameLookup>), Limited(&'a NameLookup>), ReplNoLimit(&'a ReplNameLookup>), ReplLimited(&'a ReplNameLookup>), Done, } impl SerdeLookupSnapshot { /// Converts into `EitherLookupSnapshot` for the non-REPL path. fn into_either(self) -> PyResult { match self { Self::NoLimit(l) => Ok(EitherLookupSnapshot::NoLimit(l)), Self::Limited(l) => Ok(EitherLookupSnapshot::Limited(l)), Self::ReplNoLimit(_) | Self::ReplLimited(_) => Err(PyValueError::new_err( "Cannot load a REPL snapshot with load_snapshot, use load_repl_snapshot instead", )), Self::Done => Ok(EitherLookupSnapshot::Done), } } /// Converts into `EitherLookupSnapshot` with a REPL owner attached. fn into_either_with_repl(self, owner: Py) -> EitherLookupSnapshot { match self { Self::NoLimit(l) => EitherLookupSnapshot::NoLimit(l), Self::Limited(l) => EitherLookupSnapshot::Limited(l), Self::ReplNoLimit(l) => EitherLookupSnapshot::ReplNoLimit(l, owner), Self::ReplLimited(l) => EitherLookupSnapshot::ReplLimited(l, owner), Self::Done => EitherLookupSnapshot::Done, } } } impl EitherLookupSnapshot { /// Borrows self as a `SerdeLookupSnapshotRef` for serialization. fn as_serde_ref(&self) -> SerdeLookupSnapshotRef<'_> { match self { Self::NoLimit(l) => SerdeLookupSnapshotRef::NoLimit(l), Self::Limited(l) => SerdeLookupSnapshotRef::Limited(l), Self::ReplNoLimit(l, _) => SerdeLookupSnapshotRef::ReplNoLimit(l), Self::ReplLimited(l, _) => SerdeLookupSnapshotRef::ReplLimited(l), Self::Done => SerdeLookupSnapshotRef::Done, } } } /// Wire-format representation of `EitherFutureSnapshot` without `Py`. #[derive(Serialize, Deserialize)] pub(crate) enum SerdeFutureSnapshot { NoLimit(ResolveFutures>), Limited(ResolveFutures>), ReplNoLimit(ReplResolveFutures>), ReplLimited(ReplResolveFutures>), Done, } /// Borrowing version of `SerdeFutureSnapshot` for zero-copy serialization. #[derive(Serialize)] enum SerdeFutureSnapshotRef<'a> { NoLimit(&'a ResolveFutures>), Limited(&'a ResolveFutures>), ReplNoLimit(&'a ReplResolveFutures>), ReplLimited(&'a ReplResolveFutures>), Done, } impl SerdeFutureSnapshot { /// Converts into `EitherFutureSnapshot` for the non-REPL path. fn into_either(self) -> PyResult { match self { Self::NoLimit(s) => Ok(EitherFutureSnapshot::NoLimit(s)), Self::Limited(s) => Ok(EitherFutureSnapshot::Limited(s)), Self::ReplNoLimit(_) | Self::ReplLimited(_) => Err(PyValueError::new_err( "Cannot load a REPL snapshot with load_snapshot, use load_repl_snapshot instead", )), Self::Done => Ok(EitherFutureSnapshot::Done), } } /// Converts into `EitherFutureSnapshot` with a REPL owner attached. fn into_either_with_repl(self, owner: Py) -> EitherFutureSnapshot { match self { Self::NoLimit(s) => EitherFutureSnapshot::NoLimit(s), Self::Limited(s) => EitherFutureSnapshot::Limited(s), Self::ReplNoLimit(s) => EitherFutureSnapshot::ReplNoLimit(s, owner), Self::ReplLimited(s) => EitherFutureSnapshot::ReplLimited(s, owner), Self::Done => EitherFutureSnapshot::Done, } } } impl EitherFutureSnapshot { /// Borrows self as a `SerdeFutureSnapshotRef` for serialization. fn as_serde_ref(&self) -> SerdeFutureSnapshotRef<'_> { match self { Self::NoLimit(s) => SerdeFutureSnapshotRef::NoLimit(s), Self::Limited(s) => SerdeFutureSnapshotRef::Limited(s), Self::ReplNoLimit(s, _) => SerdeFutureSnapshotRef::ReplNoLimit(s), Self::ReplLimited(s, _) => SerdeFutureSnapshotRef::ReplLimited(s), Self::Done => SerdeFutureSnapshotRef::Done, } } } // --------------------------------------------------------------------------- // dump helpers (called from #[pymethods] on each snapshot type) // --------------------------------------------------------------------------- /// Checks that a function snapshot hasn't been consumed, then serializes it. /// /// For REPL variants, extracts the REPL state and produces `SerializedReplSnapshot`. /// For non-REPL variants, produces `SerializedSnapshot`. #[expect(clippy::too_many_arguments)] pub(crate) fn dump_function_snapshot( py: Python<'_>, snapshot_mutex: &Mutex, script_name: &str, is_os_function: bool, is_method_call: bool, function_name: &str, args: &Py, kwargs: &Py, call_id: u32, dc_registry: &DcRegistry, ) -> PyResult> { let snapshot = snapshot_mutex.lock().unwrap_or_else(PoisonError::into_inner); if matches!(&*snapshot, EitherFunctionSnapshot::Done) { return Err(PyRuntimeError::new_err( "Cannot dump progress that has already been resumed", )); } let args_monty = convert_args_to_monty(py, args, dc_registry)?; let kwargs_monty = convert_kwargs_to_monty(py, kwargs, dc_registry)?; let serde_ref = snapshot.as_serde_ref(); if snapshot.is_repl() { let serialized = SerializedReplSnapshotRef::Function { snapshot: serde_ref, script_name, is_os_function, is_method_call, function_name, args: &args_monty, kwargs: &kwargs_monty, call_id, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } else { let serialized = SerializedSnapshotRef::Function { snapshot: serde_ref, script_name, is_os_function, is_method_call, function_name, args: &args_monty, kwargs: &kwargs_monty, call_id, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } } /// Checks that a lookup snapshot hasn't been consumed, then serializes it. pub(crate) fn dump_lookup_snapshot( snapshot_mutex: &Mutex, script_name: &str, variable_name: &str, ) -> PyResult> { let snapshot = snapshot_mutex.lock().unwrap_or_else(PoisonError::into_inner); if matches!(&*snapshot, EitherLookupSnapshot::Done) { return Err(PyRuntimeError::new_err( "Cannot dump progress that has already been resumed", )); } let serde_ref = snapshot.as_serde_ref(); if snapshot.is_repl() { let serialized = SerializedReplSnapshotRef::NameLookup { snapshot: serde_ref, script_name, variable_name, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } else { let serialized = SerializedSnapshotRef::NameLookup { snapshot: serde_ref, script_name, variable_name, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } } /// Checks that a future snapshot hasn't been consumed, then serializes it. pub(crate) fn dump_future_snapshot( snapshot_mutex: &Mutex, script_name: &str, ) -> PyResult> { let snapshot = snapshot_mutex.lock().unwrap_or_else(PoisonError::into_inner); if matches!(&*snapshot, EitherFutureSnapshot::Done) { return Err(PyRuntimeError::new_err( "Cannot dump progress that has already been resumed", )); } let serde_ref = snapshot.as_serde_ref(); if snapshot.is_repl() { let serialized = SerializedReplSnapshotRef::Future { snapshot: serde_ref, script_name, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } else { let serialized = SerializedSnapshotRef::Future { snapshot: serde_ref, script_name, }; serialize_with_header(&serialized).map_err(|e| PyValueError::new_err(e.to_string())) } } // --------------------------------------------------------------------------- // Borrowing serialization refs (avoid cloning large snapshot data) // --------------------------------------------------------------------------- /// Borrowing version of `SerializedSnapshot` for zero-copy serialization. #[derive(Serialize)] enum SerializedSnapshotRef<'a> { Function { snapshot: SerdeFunctionSnapshotRef<'a>, script_name: &'a str, is_os_function: bool, is_method_call: bool, function_name: &'a str, args: &'a [MontyObject], kwargs: &'a [(MontyObject, MontyObject)], call_id: u32, }, NameLookup { snapshot: SerdeLookupSnapshotRef<'a>, script_name: &'a str, variable_name: &'a str, }, Future { snapshot: SerdeFutureSnapshotRef<'a>, script_name: &'a str, }, } /// Borrowing version of `SerializedReplSnapshot` for zero-copy serialization. #[derive(Serialize)] enum SerializedReplSnapshotRef<'a> { Function { snapshot: SerdeFunctionSnapshotRef<'a>, script_name: &'a str, is_os_function: bool, is_method_call: bool, function_name: &'a str, args: &'a [MontyObject], kwargs: &'a [(MontyObject, MontyObject)], call_id: u32, }, NameLookup { snapshot: SerdeLookupSnapshotRef<'a>, script_name: &'a str, variable_name: &'a str, }, Future { snapshot: SerdeFutureSnapshotRef<'a>, script_name: &'a str, }, } // --------------------------------------------------------------------------- // Module-level load functions // --------------------------------------------------------------------------- /// Loads a non-REPL snapshot from bytes. /// /// Returns `FunctionSnapshot | NameLookupSnapshot | FutureSnapshot` depending /// on what was serialized. Callers no longer need to know the snapshot type upfront. #[pyfunction] #[pyo3(signature = (data, *, print_callback=None, dataclass_registry=None))] pub(crate) fn load_snapshot<'py>( py: Python<'py>, data: &Bound<'_, PyBytes>, print_callback: Option>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult> { let bytes = data.as_bytes(); let serialized: SerializedSnapshot = deserialize_with_header(bytes)?; let dc_registry = DcRegistry::from_list(py, dataclass_registry)?; match serialized { SerializedSnapshot::Function { snapshot, script_name, is_os_function, is_method_call, function_name, args, kwargs, call_id, } => { let either = snapshot.into_either()?; let py_args = monty_objects_to_py_tuple(py, &args, &dc_registry)?; let py_kwargs = monty_pairs_to_py_dict(py, &kwargs, &dc_registry)?; PyFunctionSnapshot::from_deserialized( py, either, print_callback, dc_registry, script_name, is_os_function, is_method_call, function_name, py_args, py_kwargs, call_id, ) } SerializedSnapshot::NameLookup { snapshot, script_name, variable_name, } => { let either = snapshot.into_either()?; PyNameLookupSnapshot::from_deserialized(py, either, print_callback, dc_registry, script_name, variable_name) } SerializedSnapshot::Future { snapshot, script_name } => { let either = snapshot.into_either()?; PyFutureSnapshot::from_deserialized(py, either, print_callback, dc_registry, script_name) } } } /// Loads a REPL snapshot from bytes, returning `(snapshot, MontyRepl)`. /// /// The REPL state is reconstructed into a fresh `PyMontyRepl` and the snapshot's /// REPL variant is rewired to point to it. #[pyfunction] #[pyo3(signature = (data, *, print_callback=None, dataclass_registry=None))] pub(crate) fn load_repl_snapshot<'py>( py: Python<'py>, data: &Bound<'_, PyBytes>, print_callback: Option>, dataclass_registry: Option<&Bound<'_, PyList>>, ) -> PyResult<(Bound<'py, PyAny>, Py)> { let bytes = data.as_bytes(); let serialized: SerializedReplSnapshot = deserialize_with_header(bytes)?; let dc_registry = DcRegistry::from_list(py, dataclass_registry)?; match serialized { SerializedReplSnapshot::Function { snapshot, script_name, is_os_function, is_method_call, function_name, args, kwargs, call_id, } => { let repl_py = create_empty_py_repl(py, &script_name, &dc_registry)?; let either = snapshot.into_either_with_repl(repl_py.clone_ref(py)); let py_args = monty_objects_to_py_tuple(py, &args, &dc_registry)?; let py_kwargs = monty_pairs_to_py_dict(py, &kwargs, &dc_registry)?; let snap = PyFunctionSnapshot::from_deserialized( py, either, print_callback, dc_registry, script_name, is_os_function, is_method_call, function_name, py_args, py_kwargs, call_id, )?; Ok((snap, repl_py)) } SerializedReplSnapshot::NameLookup { snapshot, script_name, variable_name, } => { let repl_py = create_empty_py_repl(py, &script_name, &dc_registry)?; let either = snapshot.into_either_with_repl(repl_py.clone_ref(py)); let snap = PyNameLookupSnapshot::from_deserialized( py, either, print_callback, dc_registry, script_name, variable_name, )?; Ok((snap, repl_py)) } SerializedReplSnapshot::Future { snapshot, script_name } => { let repl_py = create_empty_py_repl(py, &script_name, &dc_registry)?; let either = snapshot.into_either_with_repl(repl_py.clone_ref(py)); let snap = PyFutureSnapshot::from_deserialized(py, either, print_callback, dc_registry, script_name)?; Ok((snap, repl_py)) } } } /// Creates an empty `Py` for use as a REPL owner reference. /// /// The REPL starts with `None` inside — the real REPL state lives inside the /// snapshot and will be restored via `put_repl` when the snapshot completes. fn create_empty_py_repl(py: Python<'_>, script_name: &str, dc_registry: &DcRegistry) -> PyResult> { let repl_obj = PyMontyRepl::empty_owner(script_name.to_owned(), dc_registry.clone_ref(py)); Py::new(py, repl_obj) } // --------------------------------------------------------------------------- // Conversion helpers // --------------------------------------------------------------------------- /// Converts a `Py` of Python args to `Vec`. fn convert_args_to_monty(py: Python<'_>, args: &Py, dc_registry: &DcRegistry) -> PyResult> { args.bind(py) .iter() .map(|item| py_to_monty(&item, dc_registry)) .collect() } /// Converts a `Py` of Python kwargs to `Vec<(MontyObject, MontyObject)>`. fn convert_kwargs_to_monty( py: Python<'_>, kwargs: &Py, dc_registry: &DcRegistry, ) -> PyResult> { kwargs .bind(py) .iter() .map(|(k, v)| Ok((py_to_monty(&k, dc_registry)?, py_to_monty(&v, dc_registry)?))) .collect() } /// Converts `&[MontyObject]` to a Python tuple. fn monty_objects_to_py_tuple( py: Python<'_>, objects: &[MontyObject], dc_registry: &DcRegistry, ) -> PyResult> { let items: Vec> = objects .iter() .map(|item| monty_to_py(py, item, dc_registry)) .collect::>()?; Ok(PyTuple::new(py, items)?.unbind()) } /// Converts `&[(MontyObject, MontyObject)]` to a Python dict. fn monty_pairs_to_py_dict( py: Python<'_>, pairs: &[(MontyObject, MontyObject)], dc_registry: &DcRegistry, ) -> PyResult> { let dict = PyDict::new(py); for (k, v) in pairs { dict.set_item(monty_to_py(py, k, dc_registry)?, monty_to_py(py, v, dc_registry)?)?; } Ok(dict.unbind()) } // --------------------------------------------------------------------------- // Trait extensions on Either*Snapshot for REPL detection and state extraction // --------------------------------------------------------------------------- impl EitherFunctionSnapshot { /// Returns `true` if this snapshot is from a REPL `feed_start()` call. pub(crate) fn is_repl(&self) -> bool { matches!( self, Self::ReplNoLimitFn(..) | Self::ReplNoLimitOs(..) | Self::ReplLimitedFn(..) | Self::ReplLimitedOs(..) ) } } impl EitherLookupSnapshot { /// Returns `true` if this snapshot is from a REPL `feed_start()` call. pub(crate) fn is_repl(&self) -> bool { matches!(self, Self::ReplNoLimit(..) | Self::ReplLimited(..)) } } impl EitherFutureSnapshot { /// Returns `true` if this snapshot is from a REPL `feed_start()` call. pub(crate) fn is_repl(&self) -> bool { matches!(self, Self::ReplNoLimit(..) | Self::ReplLimited(..)) } } ================================================ FILE: crates/monty-python/tests/test_async.py ================================================ import asyncio import pytest from dirty_equals import IsList from inline_snapshot import snapshot import pydantic_monty from pydantic_monty import run_monty_async, run_repl_async def test_async(): code = 'await foobar(1, 2)' m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('foobar') assert progress.args == snapshot((1, 2)) call_id = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FutureSnapshot) assert progress.pending_call_ids == snapshot([call_id]) progress = progress.resume({call_id: {'return_value': 3}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(3) def test_asyncio_gather(): code = """ import asyncio await asyncio.gather(foo(1), bar(2)) """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('foo') assert progress.args == snapshot((1,)) foo_call_ids = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('bar') assert progress.args == snapshot((2,)) bar_call_ids = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FutureSnapshot) dump_progress = progress.dump() assert progress.pending_call_ids == IsList(foo_call_ids, bar_call_ids, check_order=False) progress = progress.resume({foo_call_ids: {'return_value': 3}, bar_call_ids: {'return_value': 4}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot([3, 4]) progress2 = pydantic_monty.load_snapshot(dump_progress) assert isinstance(progress2, pydantic_monty.FutureSnapshot) assert progress2.pending_call_ids == IsList(foo_call_ids, bar_call_ids, check_order=False) progress = progress2.resume({bar_call_ids: {'return_value': 14}, foo_call_ids: {'return_value': 13}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot([13, 14]) progress3 = pydantic_monty.load_snapshot(dump_progress) assert isinstance(progress3, pydantic_monty.FutureSnapshot) progress = progress3.resume({bar_call_ids: {'return_value': 14}, foo_call_ids: {'future': ...}}) assert isinstance(progress, pydantic_monty.FutureSnapshot) assert progress.pending_call_ids == [foo_call_ids] progress = progress.resume({foo_call_ids: {'return_value': 144}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot([144, 14]) # === Tests for run_monty_async === async def test_run_monty_async_sync_function(): """Test run_monty_async with a basic sync external function.""" m = pydantic_monty.Monty('get_value()') def get_value(): return 42 result = await run_monty_async(m, external_functions={'get_value': get_value}) assert result == snapshot(42) async def test_run_monty_async_async_function(): """Test run_monty_async with a basic async external function.""" m = pydantic_monty.Monty('await fetch_data()') async def fetch_data(): await asyncio.sleep(0.001) return 'async result' result = await run_monty_async(m, external_functions={'fetch_data': fetch_data}) assert result == snapshot('async result') async def test_run_monty_async_function_not_found(): """Test that missing external function raises wrapped error.""" m = pydantic_monty.Monty('missing_func()') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_monty_async(m, external_functions={}) inner = exc_info.value.exception() assert isinstance(inner, LookupError) assert inner.args[0] == snapshot("Unable to find 'missing_func' in external functions dict") async def test_run_monty_async_sync_exception(): """Test that sync function exceptions propagate correctly.""" m = pydantic_monty.Monty('fail()') def fail(): raise ValueError('sync error') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_monty_async(m, external_functions={'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('sync error') async def test_run_monty_async_async_exception(): """Test that async function exceptions propagate correctly.""" m = pydantic_monty.Monty('await async_fail()') async def async_fail(): await asyncio.sleep(0.001) raise RuntimeError('async error') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_monty_async(m, external_functions={'async_fail': async_fail}) inner = exc_info.value.exception() assert isinstance(inner, RuntimeError) assert inner.args[0] == snapshot('async error') async def test_run_monty_async_exception_caught(): """Test that exceptions caught in try/except don't propagate.""" code = """ try: fail() except ValueError: caught = True caught """ m = pydantic_monty.Monty(code) def fail(): raise ValueError('caught error') result = await run_monty_async(m, external_functions={'fail': fail}) assert result == snapshot(True) async def test_run_monty_async_multiple_async_functions(): """Test asyncio.gather with multiple async functions.""" code = """ import asyncio await asyncio.gather(fetch_a(), fetch_b()) """ m = pydantic_monty.Monty(code) async def fetch_a(): await asyncio.sleep(0.01) return 'a' async def fetch_b(): await asyncio.sleep(0.005) return 'b' result = await run_monty_async(m, external_functions={'fetch_a': fetch_a, 'fetch_b': fetch_b}) assert result == snapshot(['a', 'b']) async def test_run_monty_async_mixed_sync_async(): """Test mix of sync and async external functions.""" code = """ sync_val = sync_func() async_val = await async_func() sync_val + async_val """ m = pydantic_monty.Monty(code) def sync_func(): return 10 async def async_func(): await asyncio.sleep(0.001) return 5 result = await run_monty_async(m, external_functions={'sync_func': sync_func, 'async_func': async_func}) assert result == snapshot(15) async def test_run_monty_async_with_inputs(): """Test run_monty_async with inputs parameter.""" m = pydantic_monty.Monty('process(x, y)', inputs=['x', 'y']) def process(a: int, b: int) -> int: return a * b result = await run_monty_async(m, inputs={'x': 6, 'y': 7}, external_functions={'process': process}) assert result == snapshot(42) async def test_run_monty_async_with_print_callback(): """Test run_monty_async with print_callback parameter.""" output: list[tuple[str, str]] = [] def callback(stream: str, text: str) -> None: output.append((stream, text)) m = pydantic_monty.Monty('print("hello from async")') result = await run_monty_async(m, print_callback=callback) assert result is None assert output == snapshot([('stdout', 'hello from async'), ('stdout', '\n')]) async def test_run_monty_async_function_returning_none(): """Test async function that returns None.""" m = pydantic_monty.Monty('do_nothing()') def do_nothing(): return None result = await run_monty_async(m, external_functions={'do_nothing': do_nothing}) assert result is None async def test_run_monty_async_no_external_calls(): """Test run_monty_async when code has no external calls.""" m = pydantic_monty.Monty('1 + 2 + 3') result = await run_monty_async(m) assert result == snapshot(6) # === Tests for run_monty_async with os parameter === async def test_run_monty_async_with_os(): """run_monty_async can use OSAccess for file operations.""" from pydantic_monty import MemoryFile, OSAccess fs = OSAccess([MemoryFile('/test.txt', content='hello world')]) m = pydantic_monty.Monty( """ from pathlib import Path Path('/test.txt').read_text() """, ) result = await run_monty_async(m, os=fs) assert result == snapshot('hello world') async def test_run_monty_async_os_with_external_functions(): """run_monty_async can combine OSAccess with external functions.""" from pydantic_monty import MemoryFile, OSAccess fs = OSAccess([MemoryFile('/data.txt', content='test data')]) async def process(text: str) -> str: return text.upper() m = pydantic_monty.Monty( """ from pathlib import Path content = Path('/data.txt').read_text() await process(content) """, ) result = await run_monty_async( m, external_functions={'process': process}, os=fs, ) assert result == snapshot('TEST DATA') async def test_run_monty_async_os_file_not_found(): """run_monty_async propagates OS errors correctly.""" from pydantic_monty import OSAccess fs = OSAccess() m = pydantic_monty.Monty( """ from pathlib import Path Path('/missing.txt').read_text() """, ) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_monty_async(m, os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing.txt'") async def test_run_monty_async_os_not_provided(): """run_monty_async raises error when OS function called without os handler.""" m = pydantic_monty.Monty( """ from pathlib import Path Path('/test.txt').exists() """, ) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_monty_async(m) inner = exc_info.value.exception() assert isinstance(inner, RuntimeError) assert 'OS function' in inner.args[0] assert 'no os handler provided' in inner.args[0] async def test_run_monty_async_nested_gather_with_external_functions(): """Test nested asyncio.gather with spawned tasks and external async functions. https://github.com/pydantic/monty/pull/174 Reproduces the pattern from stack_overflow.py: outer gather spawns 3 coroutine tasks, each doing a sequential await then an inner gather with 2 external futures. """ code = """\ import asyncio async def get_city_weather(city_name: str): coords = await get_lat_lng(location_description=city_name) lat, lng = coords['lat'], coords['lng'] temp_task = get_temp(lat=lat, lng=lng) desc_task = get_weather_description(lat=lat, lng=lng) temp, desc = await asyncio.gather(temp_task, desc_task) return { 'city': city_name, 'temp': temp, 'description': desc } async def main(): cities = ['London', 'Paris', 'Tokyo'] results = await asyncio.gather(*(get_city_weather(city) for city in cities)) return results await main() """ m = pydantic_monty.Monty(code) city_coords = { 'London': {'lat': 51.5, 'lng': -0.1}, 'Paris': {'lat': 48.9, 'lng': 2.3}, 'Tokyo': {'lat': 35.7, 'lng': 139.7}, } city_temps = { (51.5, -0.1): 15.0, (48.9, 2.3): 18.0, (35.7, 139.7): 22.0, } city_descs = { (51.5, -0.1): 'Cloudy', (48.9, 2.3): 'Sunny', (35.7, 139.7): 'Humid', } async def get_lat_lng(location_description: str): return city_coords[location_description] async def get_temp(lat: float, lng: float): return city_temps[(lat, lng)] async def get_weather_description(lat: float, lng: float): return city_descs[(lat, lng)] result = await run_monty_async( m, external_functions={ 'get_lat_lng': get_lat_lng, 'get_temp': get_temp, 'get_weather_description': get_weather_description, }, ) assert result == snapshot( [ {'city': 'London', 'temp': 15.0, 'description': 'Cloudy'}, {'city': 'Paris', 'temp': 18.0, 'description': 'Sunny'}, {'city': 'Tokyo', 'temp': 22.0, 'description': 'Humid'}, ] ) async def test_run_monty_async_os_write_and_read(): """run_monty_async supports both reading and writing files.""" from pydantic_monty import MemoryFile, OSAccess fs = OSAccess([MemoryFile('/file.txt', content='original')]) m = pydantic_monty.Monty( """ from pathlib import Path p = Path('/file.txt') p.write_text('updated') p.read_text() """, ) result = await run_monty_async(m, os=fs) assert result == snapshot('updated') # === Tests for MontyRepl.feed_start() with async patterns === def test_repl_feed_start_async_gather(): """MontyRepl.feed_start supports asyncio.gather with multiple futures.""" code = """ import asyncio await asyncio.gather(foo(1), bar(2)) """ repl = pydantic_monty.MontyRepl() progress = repl.feed_start(code) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('foo') foo_call_id = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('bar') bar_call_id = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FutureSnapshot) from dirty_equals import IsList assert progress.pending_call_ids == IsList(foo_call_id, bar_call_id, check_order=False) progress = progress.resume({foo_call_id: {'return_value': 3}, bar_call_id: {'return_value': 4}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot([3, 4]) # REPL should still be usable after async completion assert repl.feed_run('1 + 1') == snapshot(2) def test_repl_feed_start_async_state_persistence(): """MontyRepl.feed_start async: REPL state persists across async snippets.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') progress = repl.feed_start('result = await fetch(x)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('fetch') assert progress.args == snapshot((10,)) call_id = progress.call_id progress = progress.resume(future=...) assert isinstance(progress, pydantic_monty.FutureSnapshot) progress = progress.resume({call_id: {'return_value': 'fetched'}}) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output is None # assignment, not expression assert repl.feed_run('result') == snapshot('fetched') assert repl.feed_run('x') == snapshot(10) # === Tests for run_repl_async === async def test_run_repl_async_sync_function(): """run_repl_async with a basic sync external function.""" repl = pydantic_monty.MontyRepl() def get_value(): return 42 result = await run_repl_async(repl, 'get_value()', external_functions={'get_value': get_value}) assert result == snapshot(42) async def test_run_repl_async_async_function(): """run_repl_async with a basic async external function.""" repl = pydantic_monty.MontyRepl() async def fetch_data(): await asyncio.sleep(0.001) return 'async result' result = await run_repl_async(repl, 'await fetch_data()', external_functions={'fetch_data': fetch_data}) assert result == snapshot('async result') async def test_run_repl_async_state_persists(): """REPL state persists across multiple run_repl_async calls.""" repl = pydantic_monty.MontyRepl() def double(x: int) -> int: return x * 2 ext = {'double': double} await run_repl_async(repl, 'x = 10', external_functions=ext) await run_repl_async(repl, 'y = double(x)', external_functions=ext) result = await run_repl_async(repl, 'y', external_functions=ext) assert result == snapshot(20) async def test_run_repl_async_async_state_persists(): """REPL state persists across async calls with await.""" repl = pydantic_monty.MontyRepl() async def fetch(key: str) -> str: return f'value_{key}' ext = {'fetch': fetch} await run_repl_async(repl, "a = await fetch('one')", external_functions=ext) await run_repl_async(repl, "b = await fetch('two')", external_functions=ext) result = await run_repl_async(repl, 'a + b', external_functions=ext) assert result == snapshot('value_onevalue_two') async def test_run_repl_async_gather(): """run_repl_async handles asyncio.gather with multiple futures.""" repl = pydantic_monty.MontyRepl() async def fetch_a(): await asyncio.sleep(0.01) return 'a' async def fetch_b(): await asyncio.sleep(0.005) return 'b' code = """\ import asyncio await asyncio.gather(fetch_a(), fetch_b()) """ result = await run_repl_async(repl, code, external_functions={'fetch_a': fetch_a, 'fetch_b': fetch_b}) assert result == snapshot(['a', 'b']) async def test_run_repl_async_function_not_found(): """run_repl_async raises error for missing external function.""" repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: await run_repl_async(repl, 'missing_func()', external_functions={}) inner = exc_info.value.exception() assert isinstance(inner, LookupError) assert inner.args[0] == snapshot("Unable to find 'missing_func' in external functions dict") async def test_run_repl_async_error_preserves_state(): """REPL state is preserved after an error in run_repl_async.""" repl = pydantic_monty.MontyRepl() await run_repl_async(repl, 'x = 42') def fail(): raise ValueError('oops') with pytest.raises(pydantic_monty.MontyRuntimeError): await run_repl_async(repl, 'fail()', external_functions={'fail': fail}) result = await run_repl_async(repl, 'x') assert result == snapshot(42) async def test_run_repl_async_with_inputs(): """run_repl_async supports inputs parameter.""" repl = pydantic_monty.MontyRepl() def add(a: int, b: int) -> int: return a + b result = await run_repl_async(repl, 'add(x, y)', inputs={'x': 3, 'y': 4}, external_functions={'add': add}) assert result == snapshot(7) async def test_run_repl_async_with_print_callback(): """run_repl_async supports print_callback parameter.""" repl = pydantic_monty.MontyRepl() output: list[str] = [] def callback(stream: str, text: str) -> None: output.append(text) await run_repl_async(repl, 'print("hello from repl")', print_callback=callback) assert output == snapshot(['hello from repl', '\n']) async def test_run_repl_async_with_os(): """run_repl_async supports OS access.""" from pydantic_monty import MemoryFile, OSAccess repl = pydantic_monty.MontyRepl() fs = OSAccess([MemoryFile('/test.txt', content='repl content')]) code = """\ from pathlib import Path Path('/test.txt').read_text() """ result = await run_repl_async(repl, code, os=fs) assert result == snapshot('repl content') async def test_run_repl_async_mixed_sync_async(): """run_repl_async handles mix of sync and async functions.""" repl = pydantic_monty.MontyRepl() def sync_func(): return 10 async def async_func(): await asyncio.sleep(0.001) return 5 code = """\ sync_val = sync_func() async_val = await async_func() sync_val + async_val """ result = await run_repl_async(repl, code, external_functions={'sync_func': sync_func, 'async_func': async_func}) assert result == snapshot(15) async def test_run_repl_async_no_external_calls(): """run_repl_async works when code has no external calls.""" repl = pydantic_monty.MontyRepl() result = await run_repl_async(repl, '1 + 2 + 3') assert result == snapshot(6) # === LLM agent patterns: realistic run_repl_async scenarios === async def test_repl_llm_iterative_data_collection(): """LLM defines a helper, collects data in batches, accumulates results across snippets.""" repl = pydantic_monty.MontyRepl() responses: dict[int, list[dict[str, object]]] = { 0: [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 2: [{'id': 3, 'name': 'Charlie'}], 3: [], } async def fetch_users(offset: int, limit: int) -> list[dict[str, object]]: return responses.get(offset, []) ext = {'fetch_users': fetch_users} # Snippet 1: LLM sets up accumulator await run_repl_async(repl, 'all_users = []', external_functions=ext) # Snippet 2: LLM fetches first batch await run_repl_async( repl, """\ batch = await fetch_users(0, 2) all_users = all_users + batch len(batch) """, external_functions=ext, ) # Snippet 3: LLM fetches next batch using state await run_repl_async( repl, """\ batch = await fetch_users(len(all_users), 2) all_users = all_users + batch len(batch) """, external_functions=ext, ) # Snippet 4: LLM fetches again, gets empty — realizes done await run_repl_async( repl, """\ batch = await fetch_users(len(all_users), 2) all_users = all_users + batch len(batch) """, external_functions=ext, ) # Snippet 5: LLM extracts final result result = await run_repl_async(repl, '[u["name"] for u in all_users]', external_functions=ext) assert result == snapshot(['Alice', 'Bob', 'Charlie']) async def test_repl_llm_error_recovery_retry(): """LLM catches an error, adjusts approach, retries successfully.""" repl = pydantic_monty.MontyRepl() call_count = 0 async def flaky_api(query: str) -> str: nonlocal call_count call_count += 1 if call_count == 1: raise ConnectionError('server unavailable') return f'result for {query}' ext = {'flaky_api': flaky_api} # Snippet 1: LLM tries, gets error with pytest.raises(pydantic_monty.MontyRuntimeError): await run_repl_async(repl, "data = await flaky_api('test')", external_functions=ext) # Snippet 2: LLM wraps in try/except and retries result = await run_repl_async( repl, """\ try: data = await flaky_api('test') except Exception as e: data = 'fallback' data """, external_functions=ext, ) assert result == snapshot('result for test') async def test_repl_llm_redefine_helper_function(): """LLM defines a function, uses it, then redefines it with improvements.""" repl = pydantic_monty.MontyRepl() async def fetch(url: str) -> str: return f'{url}' ext = {'fetch': fetch} # Snippet 1: LLM defines initial parser await run_repl_async( repl, """\ def parse_title(html): return html """, external_functions=ext, ) # Snippet 2: LLM uses it, gets raw html back result = await run_repl_async( repl, """\ html = await fetch('example.com') parse_title(html) """, external_functions=ext, ) assert result == snapshot('example.com') # Snippet 3: LLM redefines parser with better logic await run_repl_async( repl, """\ def parse_title(html): start = html.find('>') + 1 end = html.rfind('<') return html[start:end] """, external_functions=ext, ) # Snippet 4: uses improved parser on previously fetched data result = await run_repl_async(repl, 'parse_title(html)', external_functions=ext) assert result == snapshot('example.com') async def test_repl_llm_sequential_async_pipeline(): """LLM builds a data pipeline: fetch -> transform -> store, each step depends on previous.""" repl = pydantic_monty.MontyRepl() async def search(query: str) -> list[str]: return [f'{query}_result_1', f'{query}_result_2'] async def summarize(text: str) -> str: return f'summary({text})' records: list[str] = [] def record(item: str) -> None: records.append(item) ext = {'search': search, 'summarize': summarize, 'record': record} code = """\ results = await search('python async') summaries = [] for r in results: s = await summarize(r) summaries.append(s) record(s) summaries """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot(['summary(python async_result_1)', 'summary(python async_result_2)']) assert records == snapshot(['summary(python async_result_1)', 'summary(python async_result_2)']) async def test_repl_llm_gather_fan_out(): """LLM uses asyncio.gather to fan out many concurrent requests.""" repl = pydantic_monty.MontyRepl() async def fetch_price(item: str) -> float: prices = {'apple': 1.5, 'banana': 0.75, 'cherry': 3.0, 'date': 5.0, 'elderberry': 8.0} return prices[item] ext = {'fetch_price': fetch_price} code = """\ import asyncio items = ['apple', 'banana', 'cherry', 'date', 'elderberry'] prices = await asyncio.gather(*(fetch_price(item) for item in items)) dict(zip(items, prices)) """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot({'apple': 1.5, 'banana': 0.75, 'cherry': 3.0, 'date': 5.0, 'elderberry': 8.0}) async def test_repl_llm_try_except_around_external(): """LLM wraps individual external calls in try/except for graceful degradation.""" repl = pydantic_monty.MontyRepl() def fetch_data(key: str) -> str: if key == 'bad': raise KeyError(f'no data for {key}') return f'data_{key}' ext = {'fetch_data': fetch_data} code = """\ results = {} for key in ['good', 'bad', 'also_good']: try: results[key] = fetch_data(key) except KeyError: results[key] = 'missing' results """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot({'good': 'data_good', 'bad': 'missing', 'also_good': 'data_also_good'}) async def test_repl_llm_conditional_external_call(): """LLM only calls external function when a condition is met.""" repl = pydantic_monty.MontyRepl() call_count = 0 async def expensive_lookup(key: str) -> str: nonlocal call_count call_count += 1 return f'looked up {key}' ext = {'expensive_lookup': expensive_lookup} # Snippet 1: set up a cache await run_repl_async(repl, "cache = {'x': 'cached_x'}", external_functions=ext) # Snippet 2: LLM checks cache before calling code = """\ results = [] for key in ['x', 'y', 'x']: if key in cache: results.append(cache[key]) else: val = await expensive_lookup(key) cache[key] = val results.append(val) results """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot(['cached_x', 'looked up y', 'cached_x']) assert call_count == 1 # only 'y' triggered a call async def test_repl_llm_side_effect_recording(): """LLM uses a side-effect-only external function to record structured data.""" repl = pydantic_monty.MontyRepl() recorded: list[dict[str, object]] = [] def record_model(name: str, params: str, price: float) -> None: recorded.append({'name': name, 'params': params, 'price': price}) async def get_models() -> list[dict[str, str]]: return [ {'name': 'gpt-4', 'params': '1.7T'}, {'name': 'claude-3', 'params': '???'}, ] ext = {'record_model': record_model, 'get_models': get_models} code = """\ models = await get_models() for m in models: record_model(m['name'], m['params'], 0.01) len(models) """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot(2) assert recorded == snapshot( [{'name': 'gpt-4', 'params': '1.7T', 'price': 0.01}, {'name': 'claude-3', 'params': '???', 'price': 0.01}] ) async def test_repl_llm_helper_wrapping_externals_with_retry(): """LLM defines a helper function that wraps external calls with retry logic.""" repl = pydantic_monty.MontyRepl() attempt_counts: dict[str, int] = {} def unreliable_fetch(url: str) -> str: attempt_counts.setdefault(url, 0) attempt_counts[url] += 1 if attempt_counts[url] < 2: raise ValueError('temporary failure') return f'content of {url}' ext = {'unreliable_fetch': unreliable_fetch} # Snippet 1: LLM defines retry helper await run_repl_async( repl, """\ def fetch_with_retry(url, max_retries=3): for i in range(max_retries): try: return unreliable_fetch(url) except ValueError: if i == max_retries - 1: raise raise ValueError('should not reach here') """, external_functions=ext, ) # Snippet 2: LLM uses the retry helper result = await run_repl_async(repl, "fetch_with_retry('example.com')", external_functions=ext) assert result == snapshot('content of example.com') assert attempt_counts == snapshot({'example.com': 2}) async def test_repl_llm_nested_gather_with_sequential_deps(): """LLM does gather of tasks where each task has sequential async steps internally.""" repl = pydantic_monty.MontyRepl() async def get_user(user_id: int) -> dict[str, object]: return {'id': user_id, 'name': f'user_{user_id}'} async def get_posts(user_id: int) -> list[str]: return [f'post_{user_id}_1', f'post_{user_id}_2'] ext = {'get_user': get_user, 'get_posts': get_posts} code = """\ import asyncio async def get_user_with_posts(uid): user = await get_user(uid) posts = await get_posts(uid) user['posts'] = posts return user results = await asyncio.gather( get_user_with_posts(1), get_user_with_posts(2), get_user_with_posts(3), ) results """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot( [ {'id': 1, 'name': 'user_1', 'posts': ['post_1_1', 'post_1_2']}, {'id': 2, 'name': 'user_2', 'posts': ['post_2_1', 'post_2_2']}, {'id': 3, 'name': 'user_3', 'posts': ['post_3_1', 'post_3_2']}, ] ) async def test_repl_llm_external_returns_complex_nested_structure(): """LLM processes deeply nested API response from external function.""" repl = pydantic_monty.MontyRepl() async def get_api_response() -> dict[str, object]: return { 'status': 'ok', 'data': { 'users': [ {'name': 'Alice', 'scores': [95, 87, 92]}, {'name': 'Bob', 'scores': [78, 85, 90]}, ], 'metadata': {'page': 1, 'total': 2}, }, } ext = {'get_api_response': get_api_response} # Snippet 1: fetch and store await run_repl_async(repl, 'response = await get_api_response()', external_functions=ext) # Snippet 2: LLM navigates nested structure result = await run_repl_async( repl, """\ users = response['data']['users'] averages = {} for u in users: avg = sum(u['scores']) / len(u['scores']) averages[u['name']] = round(avg, 1) averages """, external_functions=ext, ) assert result == snapshot({'Alice': 91.3, 'Bob': 84.3}) async def test_repl_llm_external_with_kwargs(): """LLM calls external functions using keyword arguments.""" repl = pydantic_monty.MontyRepl() async def search(query: str, limit: int = 10, offset: int = 0) -> dict[str, object]: return {'query': query, 'limit': limit, 'offset': offset, 'results': [f'{query}_{i}' for i in range(limit)]} ext = {'search': search} code = """\ page1 = await search('test', limit=2, offset=0) page2 = await search('test', limit=2, offset=2) page1['results'] + page2['results'] """ result = await run_repl_async(repl, code, external_functions=ext) assert result == snapshot(['test_0', 'test_1', 'test_0', 'test_1']) async def test_repl_llm_os_read_then_process_with_external(): """LLM reads a file via OS, then processes content with an async external function.""" from pydantic_monty import MemoryFile, OSAccess repl = pydantic_monty.MontyRepl() fs = OSAccess([MemoryFile('/data.csv', content='alice,95\nbob,87\ncharlie,92')]) async def analyze(text: str) -> dict[str, int]: rows = text.strip().split('\n') return {name: int(score) for name, score in (r.split(',') for r in rows)} ext = {'analyze': analyze} # Snippet 1: read file await run_repl_async( repl, """\ from pathlib import Path raw = Path('/data.csv').read_text() """, external_functions=ext, os=fs, ) # Snippet 2: process with external result = await run_repl_async(repl, 'await analyze(raw)', external_functions=ext, os=fs) assert result == snapshot({'alice': 95, 'bob': 87, 'charlie': 92}) async def test_repl_llm_long_multi_step_session(): """Simulates a multi-step LLM agent session: setup, explore, process, summarize.""" repl = pydantic_monty.MontyRepl() db: dict[str, list[dict[str, object]]] = { 'products': [ {'name': 'Widget', 'price': 9.99, 'category': 'tools'}, {'name': 'Gadget', 'price': 24.99, 'category': 'electronics'}, {'name': 'Doohickey', 'price': 4.99, 'category': 'tools'}, {'name': 'Thingamajig', 'price': 49.99, 'category': 'electronics'}, ], } async def query_db(table: str, filters: dict[str, str] | None = None) -> list[dict[str, object]]: rows = db.get(table, []) if filters: for k, v in filters.items(): rows = [r for r in rows if r.get(k) == v] return rows ext = {'query_db': query_db} # Step 1: LLM explores what's available result = await run_repl_async(repl, 'await query_db("products")', external_functions=ext) assert len(result) == 4 # Step 2: LLM filters by category await run_repl_async( repl, "tools = await query_db('products', filters={'category': 'tools'})", external_functions=ext, ) # Step 3: LLM computes stats result = await run_repl_async( repl, """\ total = sum(p['price'] for p in tools) avg = total / len(tools) {'count': len(tools), 'total': round(total, 2), 'average': round(avg, 2)} """, external_functions=ext, ) assert result == snapshot({'count': 2, 'total': 14.98, 'average': 7.49}) # Step 4: LLM also checks electronics await run_repl_async( repl, "electronics = await query_db('products', filters={'category': 'electronics'})", external_functions=ext, ) # Step 5: LLM builds final summary from accumulated state result = await run_repl_async( repl, """\ summary = {} for cat, items in [('tools', tools), ('electronics', electronics)]: summary[cat] = { 'count': len(items), 'total': round(sum(i['price'] for i in items), 2), 'items': [i['name'] for i in items], } summary """, external_functions=ext, ) assert result == snapshot( { 'tools': {'count': 2, 'total': 14.98, 'items': ['Widget', 'Doohickey']}, 'electronics': {'count': 2, 'total': 74.98, 'items': ['Gadget', 'Thingamajig']}, } ) async def test_repl_llm_string_manipulation_of_external_result(): """LLM fetches HTML-like content and does string processing across snippets.""" repl = pydantic_monty.MontyRepl() async def fetch_page(url: str) -> str: return 'Test Page

Hello

World

' ext = {'fetch_page': fetch_page} await run_repl_async(repl, "html = await fetch_page('example.com')", external_functions=ext) # LLM extracts title result = await run_repl_async( repl, """\ start = html.find('') + len('<title>') end = html.find('') title = html[start:end] title """, external_functions=ext, ) assert result == snapshot('Test Page') # LLM extracts paragraphs result = await run_repl_async( repl, """\ paragraphs = [] remaining = html while '

' in remaining: s = remaining.find('

') + 3 e = remaining.find('

') paragraphs.append(remaining[s:e]) remaining = remaining[e + 4:] paragraphs """, external_functions=ext, ) assert result == snapshot(['Hello', 'World']) async def test_repl_llm_syntax_error_then_fix(): """LLM writes code with a syntax error, then fixes it in the next snippet.""" repl = pydantic_monty.MontyRepl() def add(a: int, b: int) -> int: return a + b ext = {'add': add} # Snippet 1: set up state await run_repl_async(repl, 'x = 10', external_functions=ext) # Snippet 2: syntax error with pytest.raises(pydantic_monty.MontySyntaxError): await run_repl_async(repl, 'y = add(x,', external_functions=ext) # Snippet 3: state preserved, LLM fixes the code result = await run_repl_async(repl, 'y = add(x, 5)\ny', external_functions=ext) assert result == snapshot(15) ================================================ FILE: crates/monty-python/tests/test_basic.py ================================================ from inline_snapshot import snapshot import pydantic_monty def test_simple_expression(): m = pydantic_monty.Monty('1 + 2') assert m.run() == snapshot(3) def test_arithmetic(): m = pydantic_monty.Monty('10 * 5 - 3') assert m.run() == snapshot(47) def test_string_concatenation(): m = pydantic_monty.Monty('"hello" + " " + "world"') assert m.run() == snapshot('hello world') def test_multiple_runs_same_instance(): m = pydantic_monty.Monty('x * 2', inputs=['x']) assert m.run(inputs={'x': 5}) == snapshot(10) assert m.run(inputs={'x': 10}) == snapshot(20) assert m.run(inputs={'x': -3}) == snapshot(-6) def test_repr_no_inputs(): m = pydantic_monty.Monty('1 + 1') assert repr(m) == snapshot("Monty(<1 line of code>, script_name='main.py')") def test_repr_with_inputs(): m = pydantic_monty.Monty('x', inputs=['x', 'y']) assert repr(m) == snapshot('Monty(<1 line of code>, script_name=\'main.py\', inputs=["x", "y"])') def test_repr_with_external_functions(): m = pydantic_monty.Monty('foo()') assert repr(m) == snapshot("Monty(<1 line of code>, script_name='main.py')") def test_repr_with_inputs_and_external_functions(): m = pydantic_monty.Monty('foo(x)', inputs=['x']) assert repr(m) == snapshot('Monty(<1 line of code>, script_name=\'main.py\', inputs=["x"])') def test_multiline_code(): code = """ x = 1 y = 2 x + y """ m = pydantic_monty.Monty(code) assert m.run() == snapshot(3) def test_function_definition_and_call(): code = """ def add(a, b): return a + b add(3, 4) """ m = pydantic_monty.Monty(code) assert m.run() == snapshot(7) ================================================ FILE: crates/monty-python/tests/test_dataclasses.py ================================================ from dataclasses import ( FrozenInstanceError, asdict, astuple, dataclass, fields, is_dataclass, ) from typing import NoReturn import pytest from inline_snapshot import snapshot import pydantic_monty @dataclass class Person: name: str age: int def test_dataclass_input(): """Dataclass instances are converted and returned as MontyDataclass.""" m = pydantic_monty.Monty('x', inputs=['x']) m.register_dataclass(Person) result = m.run(inputs={'x': Person(name='Alice', age=30)}) assert result.name == snapshot('Alice') assert result.age == snapshot(30) assert is_dataclass(result) assert isinstance(result, Person) assert asdict(result) == snapshot({'name': 'Alice', 'age': 30}) assert repr(result) == snapshot("Person(name='Alice', age=30)") def test_dataclass_auto_registered(): """Dataclass passed as input is auto-registered, so isinstance() works without explicit registry.""" m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': Person(name='Alice', age=30)}) assert result.name == snapshot('Alice') assert result.age == snapshot(30) assert is_dataclass(result) assert isinstance(result, Person) assert asdict(result) == snapshot({'name': 'Alice', 'age': 30}) assert repr(result) == snapshot("Person(name='Alice', age=30)") @dataclass(frozen=True) class Point: x: int y: int def test_dataclass_frozen(): """Frozen dataclasses are converted like regular dataclasses.""" m = pydantic_monty.Monty('p', inputs=['p'], dataclass_registry=[Point]) result = m.run(inputs={'p': Point(x=10, y=20)}) assert isinstance(result, Point) assert result.x == snapshot(10) assert result.y == snapshot(20) assert repr(result) == snapshot('Point(x=10, y=20)') @dataclass class Address: city: str zip_code: str @dataclass class PersonAddress: name: str address: Address def test_dataclass_nested(): """Nested dataclasses are recursively converted.""" m = pydantic_monty.Monty('x', inputs=['x']) m.register_dataclass(Address) m.register_dataclass(PersonAddress) result = m.run(inputs={'x': PersonAddress(name='Bob', address=Address(city='NYC', zip_code='10001'))}) assert isinstance(result, PersonAddress) assert result.name == snapshot('Bob') assert isinstance(result.address, Address) assert result.address.city == snapshot('NYC') assert result.address.zip_code == snapshot('10001') def test_dataclass_nested_auto_registered(): """Nested dataclasses are auto-registered when passed as input.""" m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': PersonAddress(name='Bob', address=Address(city='NYC', zip_code='10001'))}) assert isinstance(result, PersonAddress) assert result.name == snapshot('Bob') assert isinstance(result.address, Address) assert result.address.city == snapshot('NYC') assert result.address.zip_code == snapshot('10001') def test_dataclass_auto_registered_in_list(): """Dataclass inside a list input is auto-registered.""" m = pydantic_monty.Monty('x[0]', inputs=['x']) result = m.run(inputs={'x': [Person(name='Alice', age=30)]}) assert isinstance(result, Person) assert result.name == snapshot('Alice') def test_dataclass_auto_registered_in_dict_value(): """Dataclass inside a dict value is auto-registered.""" m = pydantic_monty.Monty('x["key"]', inputs=['x']) result = m.run(inputs={'x': {'key': Person(name='Alice', age=30)}}) assert isinstance(result, Person) assert result.name == snapshot('Alice') def test_dataclass_explicit_registry_idempotent(): """Explicit registry still works alongside auto-registration (idempotent).""" m = pydantic_monty.Monty('x', inputs=['x'], dataclass_registry=[Person]) result = m.run(inputs={'x': Person(name='Alice', age=30)}) assert isinstance(result, Person) assert result.name == snapshot('Alice') assert result.age == snapshot(30) def test_dataclass_with_list_field(): """Dataclasses with list fields are properly converted.""" @dataclass class Container: items: list[int] m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': Container(items=[1, 2, 3])}) assert result.items == snapshot([1, 2, 3]) def test_dataclass_with_dict_field(): """Dataclasses with dict fields are properly converted.""" @dataclass class Config: settings: dict[str, int] m = pydantic_monty.Monty('x', inputs=['x']) m.register_dataclass(Config) result = m.run(inputs={'x': Config(settings={'a': 1, 'b': 2})}) assert result.settings == snapshot({'a': 1, 'b': 2}) def test_dataclass_empty(): """Empty dataclass (no fields) has empty repr.""" @dataclass class Empty: pass m = pydantic_monty.Monty('x', inputs=['x']) m.register_dataclass(Empty) result = m.run(inputs={'x': Empty()}) assert repr(result) == snapshot('test_dataclass_empty..Empty()') @pytest.mark.xfail(reason='We should extend the dataclass registry to cover all types, then test it is enforced') def test_dataclass_type_raises(): """Dataclass type (not instance) should raise TypeError.""" @dataclass class MyClass: value: int m = pydantic_monty.Monty('x', inputs=['x']) m.register_dataclass(MyClass) with pytest.raises(TypeError) as exc_info: m.run(inputs={'x': MyClass}) assert str(exc_info.value) == snapshot('Cannot convert builtins.type to Monty value') # === Field access === def test_dataclass_field_access(): """Access individual fields of a dataclass.""" @dataclass class Person: name: str age: int m = pydantic_monty.Monty('x.name', inputs=['x']) assert m.run(inputs={'x': Person(name='Alice', age=30)}) == snapshot('Alice') m = pydantic_monty.Monty('x.age', inputs=['x']) assert m.run(inputs={'x': Person(name='Alice', age=30)}) == snapshot(30) def test_dataclass_field_access_nested(): """Access fields of nested dataclasses.""" m = pydantic_monty.Monty('x.address.city', inputs=['x']) result = m.run(inputs={'x': PersonAddress(name='Bob', address=Address(city='NYC', zip_code='10001'))}) assert result == snapshot('NYC') def test_dataclass_field_in_expression(): """Use dataclass fields in expressions.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p.x + p.y', inputs=['p']) assert m.run(inputs={'p': Point(x=10, y=20)}) == snapshot(30) def test_dataclass_field_access_missing(): """Accessing a non-existent field raises AttributeError.""" @dataclass class Person: name: str m = pydantic_monty.Monty('x.age', inputs=['x']) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'x': Person(name='Alice')}) assert isinstance(exc_info.value.exception(), AttributeError) # === Repr === def test_dataclass_repr(): """Repr of dataclass shows ClassName(field=value, ...) format.""" @dataclass class Person: name: str age: int m = pydantic_monty.Monty('repr(x)', inputs=['x']) assert m.run(inputs={'x': Person(name='Alice', age=30)}) == snapshot("Person(name='Alice', age=30)") def test_dataclass_repr_frozen(): """Repr of frozen dataclass shows same format.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('repr(p)', inputs=['p']) assert m.run(inputs={'p': Point(x=10, y=20)}) == snapshot('Point(x=10, y=20)') def test_dataclass_repr_nested(): """Repr of nested dataclass shows nested repr.""" @dataclass class Inner: value: int @dataclass class Outer: inner: Inner m = pydantic_monty.Monty('repr(x)', inputs=['x']) assert m.run(inputs={'x': Outer(inner=Inner(value=42))}) == snapshot('Outer(inner=Inner(value=42))') def test_dataclass_repr_empty(): """Repr of empty dataclass shows ClassName().""" @dataclass class Empty: pass m = pydantic_monty.Monty('repr(x)', inputs=['x']) m.register_dataclass(Empty) assert m.run(inputs={'x': Empty()}) == snapshot('Empty()') # === Setattr === def test_dataclass_setattr_mutable(): """Setting attributes on mutable dataclass works (auto-registered, returns real dataclass).""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) assert isinstance(result, Point) # Modify existing field result.x = 100 assert result.x == snapshot(100) assert repr(result) == snapshot('test_dataclass_setattr_mutable..Point(x=100, y=20)') def test_dataclass_setattr_frozen(): """Setting attributes on frozen dataclass raises FrozenInstanceError.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) # FrozenInstanceError is raised (which is a subclass of AttributeError) with pytest.raises(FrozenInstanceError, match="cannot assign to field 'x'"): result.x = 100 with pytest.raises(FrozenInstanceError, match="cannot assign to field 'z'"): result.z = 30 def test_frozen_instance_error_is_attribute_error(): """FrozenInstanceError can be caught as AttributeError.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) # Can catch with AttributeError (parent class) with pytest.raises(AttributeError): result.x = 100 # Verify it's actually FrozenInstanceError try: result.y = 200 except AttributeError as e: assert isinstance(e, FrozenInstanceError) def test_frozen_instance_error_message(): """FrozenInstanceError has correct message format.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) with pytest.raises(FrozenInstanceError) as exc_info: result.x = 100 assert exc_info.value.args[0] == snapshot("cannot assign to field 'x'") def test_frozen_instance_error_from_monty_code(): """FrozenInstanceError raised by Monty code is properly converted.""" @dataclass(frozen=True) class Point: x: int y: int # Monty code that tries to modify a frozen dataclass code = """ p.x = 100 """ m = pydantic_monty.Monty(code, inputs=['p']) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'p': Point(x=10, y=20)}) inner = exc_info.value.exception() assert isinstance(inner, FrozenInstanceError) assert inner.args[0] == snapshot("cannot assign to field 'x'") def test_frozen_instance_error_from_monty_caught_as_attribute_error(): """FrozenInstanceError from Monty can be caught as AttributeError.""" @dataclass(frozen=True) class Point: x: int y: int code = 'p.x = 100' m = pydantic_monty.Monty(code, inputs=['p']) # Wrapped in MontyRuntimeError, but inner exception is FrozenInstanceError # which is a subclass of AttributeError with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'p': Point(x=10, y=20)}) inner = exc_info.value.exception() assert isinstance(inner, AttributeError) assert isinstance(inner, FrozenInstanceError) def test_frozen_instance_error_from_external_function(): """FrozenInstanceError from external function is properly converted.""" code = """ try: fail() except FrozenInstanceError: caught = 'frozen' except AttributeError: caught = 'attr' caught """ m = pydantic_monty.Monty(code) def fail() -> NoReturn: raise FrozenInstanceError('cannot assign to field') # Monty should catch it as FrozenInstanceError specifically result = m.run(external_functions={'fail': fail}) assert result == snapshot('frozen') def test_frozen_instance_error_from_external_function_propagates(): """FrozenInstanceError from external function propagates to Python.""" m = pydantic_monty.Monty('fail()') def fail() -> NoReturn: raise FrozenInstanceError('test frozen error') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, FrozenInstanceError) assert inner.args[0] == snapshot('test frozen error') # === Equality === def test_dataclass_equality_same(): """Equal dataclasses compare equal.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('(a, b)', inputs=['a', 'b']) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': Point(x=10, y=20)}) assert a == b def test_dataclass_equality_different_values(): """Dataclasses with different values compare not equal.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('(a, b)', inputs=['a', 'b']) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': Point(x=10, y=30)}) assert a != b def test_dataclass_equality_different_types(): """Dataclasses of different types compare not equal.""" @dataclass class Point: x: int y: int @dataclass class Vector: x: int y: int m = pydantic_monty.Monty('(a, b)', inputs=['a', 'b']) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': Vector(x=10, y=20)}) assert a != b def test_dataclass_equality_with_other_type(): """Dataclass compared to non-dataclass returns False.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) assert result != {'x': 10, 'y': 20} assert result != (10, 20) assert result != 'Point(x=10, y=20)' # === Hashing === def test_dataclass_hash_frozen(): """Frozen dataclasses are hashable.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) h = hash(result) assert isinstance(h, int) # Hash is consistent assert hash(result) == h def test_dataclass_hash_frozen_equal_values(): """Equal frozen dataclasses have equal hashes.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('(a, b)', inputs=['a', 'b']) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': Point(x=10, y=20)}) assert hash(a) == hash(b) def test_dataclass_hash_mutable_raises(): """Mutable dataclasses are not hashable.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) with pytest.raises(TypeError, match="unhashable type: 'Point'"): hash(result) def test_dataclass_hash_in_set(): """Frozen dataclasses can be used in sets.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('(a, b, c)', inputs=['a', 'b', 'c']) a, b, c = m.run( inputs={ 'a': Point(x=10, y=20), 'b': Point(x=10, y=20), # duplicate 'c': Point(x=30, y=40), } ) s = {a, b, c} assert len(s) == snapshot(2) def test_dataclass_hash_as_dict_key(): """Frozen dataclasses can be used as dict keys.""" @dataclass(frozen=True) class Point: x: int y: int m = pydantic_monty.Monty('(a, b)', inputs=['a', 'b']) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': Point(x=10, y=20)}) d = {a: 'first'} assert d[b] == snapshot('first') # === dataclasses module compatibility === def test_dataclass_is_dataclass(): """is_dataclass() returns True for returned dataclasses.""" @dataclass class Person: name: str age: int m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': Person(name='Alice', age=30)}) assert is_dataclass(result) is True def test_dataclass_fields(): """fields() returns Field objects for returned dataclasses.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) fs = fields(result) assert len(fs) == snapshot(2) assert fs[0].name == snapshot('x') assert fs[1].name == snapshot('y') # Type is inferred from value assert fs[0].type is int assert fs[1].type is int def test_dataclass_fields_string(): """fields() returns correct type for string fields.""" @dataclass class Person: name: str m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Person(name='Alice')}) fs = fields(result) assert fs[0].name == snapshot('name') assert fs[0].type is str def test_dataclass_asdict(): """asdict() converts returned dataclass to dict.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) d = asdict(result) assert d == snapshot({'x': 10, 'y': 20}) def test_dataclass_asdict_nested(): """asdict() recursively converts nested dataclasses.""" @dataclass class Inner: value: int @dataclass class Outer: inner: Inner m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': Outer(inner=Inner(value=42))}) d = asdict(result) assert d == snapshot({'inner': {'value': 42}}) def test_dataclass_astuple(): """astuple() converts returned dataclass to tuple.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) t = astuple(result) assert t == snapshot((10, 20)) def test_dataclass_dataclass_fields_attr(): """__dataclass_fields__ attribute is accessible.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) df = result.__dataclass_fields__ assert 'x' in df assert 'y' in df assert df['x'].name == snapshot('x') assert df['y'].name == snapshot('y') def test_dataclass_params_frozen(): """__dataclass_params__.frozen reflects frozen status.""" @dataclass(frozen=True) class FrozenPoint: x: int y: int @dataclass class MutablePoint: x: int y: int m = pydantic_monty.Monty('(f, m)', inputs=['f', 'm']) frozen, mutable = m.run(inputs={'f': FrozenPoint(x=1, y=2), 'm': MutablePoint(x=3, y=4)}) assert frozen.__dataclass_params__.frozen is True assert mutable.__dataclass_params__.frozen is False def test_dataclass_params_attributes(): """__dataclass_params__ has expected attributes.""" @dataclass class Point: x: int y: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Point(x=10, y=20)}) params = result.__dataclass_params__ assert params.init is True assert params.repr is True assert params.eq is True assert params.order is False assert params.frozen is False def test_repeat_dataclass_name(): """Two classes with the same name are distinguished because we use id, not name.""" def create_point(): @dataclass class Point: x: int y: int return Point point_cls2 = create_point() m = pydantic_monty.Monty('a, b', inputs=['a', 'b'], dataclass_registry=[Point, point_cls2]) a, b = m.run(inputs={'a': Point(x=10, y=20), 'b': point_cls2(x=30, y=40)}) assert isinstance(a, Point) assert isinstance(b, point_cls2) # === Dataclass method call tests === @dataclass class Greeter: greeting: str def greet(self) -> str: return self.greeting @dataclass class Calculator: value: int def add(self, n: int) -> int: return self.value + n def multiply(self, n: int) -> int: return self.value * n @dataclass class Point2D: x: float y: float def distance(self) -> float: return (self.x**2 + self.y**2) ** 0.5 def translate(self, dx: float, dy: float) -> 'Point2D': return Point2D(x=self.x + dx, y=self.y + dy) def test_method_no_args_raw(): """Calling a dataclass method with no args (besides self), raw.""" m = pydantic_monty.Monty('g.greet()', inputs=['g'], dataclass_registry=[Greeter]) result = m.start(inputs={'g': Greeter(greeting='hello')}) assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.script_name == snapshot('main.py') assert result.function_name == snapshot('greet') assert result.args == snapshot((Greeter(greeting='hello'),)) assert result.kwargs == snapshot({}) def test_method_no_args(): """Calling a dataclass method with no args (besides self).""" m = pydantic_monty.Monty('g.greet()', inputs=['g'], dataclass_registry=[Greeter]) result = m.run(inputs={'g': Greeter(greeting='hello')}) assert result == snapshot('hello') def test_method_with_args(): """Calling a dataclass method with positional args.""" m = pydantic_monty.Monty('c.add(10)', inputs=['c'], dataclass_registry=[Calculator]) result = m.run(inputs={'c': Calculator(value=5)}) assert result == snapshot(15) def test_method_accessing_fields(): """Method that reads multiple fields from self.""" m = pydantic_monty.Monty('p.distance()', inputs=['p'], dataclass_registry=[Point2D]) result = m.run(inputs={'p': Point2D(x=3.0, y=4.0)}) assert result == snapshot(5.0) def test_method_returning_dataclass(): """Method that returns a new dataclass instance.""" m = pydantic_monty.Monty('p.translate(1.0, 2.0)', inputs=['p'], dataclass_registry=[Point2D]) result = m.run(inputs={'p': Point2D(x=3.0, y=4.0)}) assert isinstance(result, Point2D) assert result.x == snapshot(4.0) assert result.y == snapshot(6.0) def test_method_on_frozen_dataclass(): """Methods work on frozen dataclasses too.""" @dataclass(frozen=True) class FrozenCalc: value: int def doubled(self) -> int: return self.value * 2 m = pydantic_monty.Monty('c.doubled()', inputs=['c'], dataclass_registry=[FrozenCalc]) result = m.run(inputs={'c': FrozenCalc(value=21)}) assert result == snapshot(42) def test_method_with_kwargs(): """Method called with keyword arguments.""" @dataclass class Formatter: base: str def format(self, prefix: str = '', suffix: str = '') -> str: return prefix + self.base + suffix m = pydantic_monty.Monty( "f.format(prefix='[', suffix=']')", inputs=['f'], dataclass_registry=[Formatter], ) result = m.run(inputs={'f': Formatter(base='hello')}) assert result == snapshot('[hello]') def test_method_multiple_calls(): """Multiple method calls in the same expression.""" m = pydantic_monty.Monty( 'c.add(10) + c.multiply(3)', inputs=['c'], dataclass_registry=[Calculator], ) result = m.run(inputs={'c': Calculator(value=5)}) assert result == snapshot(30) def test_method_nonexistent_raises(): """Calling a non-existent method raises AttributeError.""" m = pydantic_monty.Monty('g.nonexistent()', inputs=['g'], dataclass_registry=[Greeter]) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'g': Greeter(greeting='hi')}) assert str(exc_info.value) == snapshot("AttributeError: 'Greeter' object has no attribute 'nonexistent'") def test_method_on_nested_dataclass_in_list(): """Method call on a dataclass nested inside a list input.""" m = pydantic_monty.Monty('items[0].greet()', inputs=['items'], dataclass_registry=[Greeter]) result = m.run(inputs={'items': [Greeter(greeting='nested')]}) assert result == snapshot('nested') def test_method_on_nested_dataclass_in_dict(): """Method call on a dataclass nested inside a dict input.""" m = pydantic_monty.Monty('d["g"].greet()', inputs=['d'], dataclass_registry=[Greeter]) result = m.run(inputs={'d': {'g': Greeter(greeting='from dict')}}) assert result == snapshot('from dict') def test_method_on_nested_dataclass_in_tuple(): """Method call on a dataclass nested inside a tuple input.""" m = pydantic_monty.Monty('t[1].add(10)', inputs=['t'], dataclass_registry=[Calculator]) result = m.run(inputs={'t': (0, Calculator(value=5))}) assert result == snapshot(15) def test_dataclass_private_fields_skipped(): """Private fields (starting with _) are excluded from conversion.""" @dataclass class WithPrivate: name: str _internal: int = 0 m = pydantic_monty.Monty('repr(x)', inputs=['x']) result = m.run(inputs={'x': WithPrivate(name='Alice', _internal=42)}) assert result == snapshot("WithPrivate(name='Alice')") def test_dataclass_private_fields_skipped_no_default(): """Private fields without defaults cause TypeError on reconstruction (field is missing).""" @dataclass class WithPrivateNoDefault: name: str _secret: str m = pydantic_monty.Monty('x', inputs=['x']) with pytest.raises(TypeError): m.run(inputs={'x': WithPrivateNoDefault(name='Alice', _secret='hidden')}) def test_dataclass_private_field_not_accessible_in_monty(): """Private fields are not accessible inside Monty expressions.""" @dataclass class WithPrivate: name: str _internal: int = 0 m = pydantic_monty.Monty('x._internal', inputs=['x']) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'x': WithPrivate(name='Alice', _internal=42)}) assert isinstance(exc_info.value.exception(), AttributeError) def test_method_on_nested_dataclass_field(): """Method call on a dataclass that is a field of another dataclass (d.c.method()).""" @dataclass class Inner: value: int def doubled(self) -> int: return self.value * 2 @dataclass class Outer: inner: Inner m = pydantic_monty.Monty('o.inner.doubled()', inputs=['o'], dataclass_registry=[Outer, Inner]) result = m.run(inputs={'o': Outer(inner=Inner(value=21))}) assert result == snapshot(42) ================================================ FILE: crates/monty-python/tests/test_exceptions.py ================================================ import pytest from inline_snapshot import snapshot import pydantic_monty # === MontyRuntimeError tests === def test_zero_division_error(): m = pydantic_monty.Monty('1 / 0') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() # Check that it's also a MontyError assert isinstance(exc_info.value, pydantic_monty.MontyError) # Check the inner exception inner = exc_info.value.exception() assert isinstance(inner, ZeroDivisionError) def test_value_error(): m = pydantic_monty.Monty("raise ValueError('bad value')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert str(inner) == snapshot('bad value') def test_type_error(): m = pydantic_monty.Monty("'string' + 1") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, TypeError) def test_index_error(): m = pydantic_monty.Monty('[1, 2, 3][10]') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, IndexError) def test_key_error(): m = pydantic_monty.Monty("{'a': 1}['b']") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, KeyError) def test_attribute_error(): m = pydantic_monty.Monty("raise AttributeError('no such attr')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, AttributeError) assert str(inner) == snapshot('no such attr') def test_name_error(): m = pydantic_monty.Monty('undefined_variable') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, NameError) def test_assertion_error(): m = pydantic_monty.Monty('assert False') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, AssertionError) def test_assertion_error_with_message(): m = pydantic_monty.Monty("assert False, 'custom message'") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, AssertionError) assert str(inner) == snapshot('custom message') def test_runtime_error(): m = pydantic_monty.Monty("raise RuntimeError('runtime error')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, RuntimeError) assert str(inner) == snapshot('runtime error') def test_not_implemented_error(): m = pydantic_monty.Monty("raise NotImplementedError('not implemented')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, NotImplementedError) assert str(inner) == snapshot('not implemented') # === MontySyntaxError tests === def test_syntax_error_on_init(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('def') # Check that it's also a MontyError assert isinstance(exc_info.value, pydantic_monty.MontyError) # Check the inner exception inner = exc_info.value.exception() assert isinstance(inner, SyntaxError) def test_syntax_error_unclosed_paren(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('print(1') inner = exc_info.value.exception() assert isinstance(inner, SyntaxError) def test_syntax_error_invalid_syntax(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('x = = 1') inner = exc_info.value.exception() assert isinstance(inner, SyntaxError) # === Catching with base class === def test_catch_with_base_class(): m = pydantic_monty.Monty('1 / 0') with pytest.raises(pydantic_monty.MontyError): m.run() def test_catch_syntax_error_with_base_class(): with pytest.raises(pydantic_monty.MontyError): pydantic_monty.Monty('def') # === Exception handling within Monty === def test_raise_caught_exception(): code = """ try: 1 / 0 except ZeroDivisionError as e: result = 'caught' result """ m = pydantic_monty.Monty(code) assert m.run() == snapshot('caught') def test_exception_in_function(): code = """ def fail(): raise ValueError('from function') fail() """ m = pydantic_monty.Monty(code) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert str(inner) == snapshot('from function') # === Display and str methods === def test_display_traceback(): m = pydantic_monty.Monty('1 / 0') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() display = exc_info.value.display() assert 'Traceback (most recent call last):' in display assert 'ZeroDivisionError' in display def test_display_type_msg(): m = pydantic_monty.Monty("raise ValueError('test message')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() display = exc_info.value.display('type-msg') assert display == snapshot('ValueError: test message') def test_runtime_display(): m = pydantic_monty.Monty("raise ValueError('test message')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() assert exc_info.value.display('msg') == snapshot('test message') assert exc_info.value.display('type-msg') == snapshot('ValueError: test message') assert exc_info.value.display() == snapshot("""\ Traceback (most recent call last): File "main.py", line 1, in raise ValueError('test message') ValueError: test message\ """) def test_str_returns_msg(): m = pydantic_monty.Monty("raise ValueError('test message')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() assert str(exc_info.value) == snapshot('ValueError: test message') def test_syntax_error_display(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('def') assert exc_info.value.display() == snapshot('Expected an identifier at byte range 3..3') assert exc_info.value.display('type-msg') == snapshot('SyntaxError: Expected an identifier at byte range 3..3') def test_syntax_error_str(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('def') # str() returns just the message assert 'SyntaxError' not in str(exc_info.value) # === Traceback tests === def test_traceback_frames(): code = """\ def inner(): raise ValueError('error') def outer(): inner() outer() """ m = pydantic_monty.Monty(code) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() frames = exc_info.value.traceback() assert isinstance(frames, list) assert len(frames) >= 2 # At least module level, outer(), and inner() assert exc_info.value.display() == snapshot("""\ Traceback (most recent call last): File "main.py", line 7, in outer() ~~~~~~~ File "main.py", line 5, in outer inner() ~~~~~~~ File "main.py", line 2, in inner raise ValueError('error') ValueError: error\ """) assert [f.dict() for f in frames] == snapshot( [ { 'filename': 'main.py', 'line': 7, 'column': 1, 'end_line': 7, 'end_column': 8, 'function_name': '', 'source_line': 'outer()', }, { 'filename': 'main.py', 'line': 5, 'column': 5, 'end_line': 5, 'end_column': 12, 'function_name': 'outer', 'source_line': ' inner()', }, { 'filename': 'main.py', 'line': 2, 'column': 11, 'end_line': 2, 'end_column': 30, 'function_name': 'inner', 'source_line': " raise ValueError('error')", }, ] ) def test_frame_properties(): code = """ def foo(): raise ValueError('test') foo() """ m = pydantic_monty.Monty(code) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() frames = exc_info.value.traceback() assert [f.dict() for f in frames] == snapshot( [ { 'filename': 'main.py', 'line': 5, 'column': 1, 'end_line': 5, 'end_column': 6, 'function_name': '', 'source_line': 'foo()', }, { 'filename': 'main.py', 'line': 3, 'column': 11, 'end_line': 3, 'end_column': 29, 'function_name': 'foo', 'source_line': " raise ValueError('test')", }, ] ) # === Repr tests === def test_runtime_error_repr(): m = pydantic_monty.Monty("raise ValueError('test')") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() assert repr(exc_info.value) == snapshot('MontyRuntimeError(ValueError: test)') def test_syntax_error_repr(): with pytest.raises(pydantic_monty.MontySyntaxError) as exc_info: pydantic_monty.Monty('def') assert repr(exc_info.value) == snapshot('MontySyntaxError(Expected an identifier at byte range 3..3)') def test_frame_repr(): code = """ def foo(): raise ValueError('test') foo() """ m = pydantic_monty.Monty(code) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() frames = exc_info.value.traceback() frame = frames[0] assert repr(frame) == snapshot("Frame(filename='main.py', line=5, column=1, function_name='')") ================================================ FILE: crates/monty-python/tests/test_external.py ================================================ from typing import Any import pytest from inline_snapshot import snapshot import pydantic_monty def test_external_function_no_args(): m = pydantic_monty.Monty('noop()') def noop(*args: Any, **kwargs: Any) -> str: assert args == snapshot(()) assert kwargs == snapshot({}) return 'called' assert m.run(external_functions={'noop': noop}) == snapshot('called') def test_external_function_positional_args(): m = pydantic_monty.Monty('func(1, 2, 3)') def func(*args: Any, **kwargs: Any) -> str: assert args == snapshot((1, 2, 3)) assert kwargs == snapshot({}) return 'ok' assert m.run(external_functions={'func': func}) == snapshot('ok') def test_external_function_kwargs_only(): m = pydantic_monty.Monty('func(a=1, b="two")') def func(*args: Any, **kwargs: Any) -> str: assert args == snapshot(()) assert kwargs == snapshot({'a': 1, 'b': 'two'}) return 'ok' assert m.run(external_functions={'func': func}) == snapshot('ok') def test_external_function_mixed_args_kwargs(): m = pydantic_monty.Monty('func(1, 2, x="hello", y=True)') def func(*args: Any, **kwargs: Any) -> str: assert args == snapshot((1, 2)) assert kwargs == snapshot({'x': 'hello', 'y': True}) return 'ok' assert m.run(external_functions={'func': func}) == snapshot('ok') def test_external_function_complex_types(): m = pydantic_monty.Monty('func([1, 2], {"key": "value"})') def func(*args: Any, **kwargs: Any) -> str: assert args == snapshot(([1, 2], {'key': 'value'})) assert kwargs == snapshot({}) return 'ok' assert m.run(external_functions={'func': func}) == snapshot('ok') def test_external_function_returns_none(): m = pydantic_monty.Monty('do_nothing()') def do_nothing(*args: Any, **kwargs: Any) -> None: assert args == snapshot(()) assert kwargs == snapshot({}) assert m.run(external_functions={'do_nothing': do_nothing}) is None def test_external_function_returns_complex_type(): m = pydantic_monty.Monty('get_data()') def get_data(*args: Any, **kwargs: Any) -> dict[str, Any]: return {'a': [1, 2, 3], 'b': {'nested': True}} result = m.run(external_functions={'get_data': get_data}) assert result == snapshot({'a': [1, 2, 3], 'b': {'nested': True}}) def test_multiple_external_functions(): m = pydantic_monty.Monty('add(1, 2) + mul(3, 4)') def add(*args: Any, **kwargs: Any) -> int: assert args == snapshot((1, 2)) assert kwargs == snapshot({}) return args[0] + args[1] def mul(*args: Any, **kwargs: Any) -> int: assert args == snapshot((3, 4)) assert kwargs == snapshot({}) return args[0] * args[1] result = m.run(external_functions={'add': add, 'mul': mul}) assert result == snapshot(15) # 3 + 12 def test_external_function_called_multiple_times(): m = pydantic_monty.Monty('counter() + counter() + counter()') call_count = 0 def counter(*args: Any, **kwargs: Any) -> int: nonlocal call_count assert args == snapshot(()) assert kwargs == snapshot({}) call_count += 1 return call_count result = m.run(external_functions={'counter': counter}) assert result == snapshot(6) # 1 + 2 + 3 assert call_count == snapshot(3) def test_external_function_with_input(): m = pydantic_monty.Monty('process(x)', inputs=['x']) def process(*args: Any, **kwargs: Any) -> int: assert args == snapshot((5,)) assert kwargs == snapshot({}) return args[0] * 10 assert m.run(inputs={'x': 5}, external_functions={'process': process}) == snapshot(50) def test_external_function_not_provided_raises_name_error(): """Calling an unknown function without external_functions raises NameError.""" m = pydantic_monty.Monty('missing()') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert type(inner) is NameError assert str(inner) == snapshot("name 'missing' is not defined") def test_undeclared_function_raises_name_error(): m = pydantic_monty.Monty('unknown_func()') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert type(inner) is NameError assert str(inner) == snapshot("name 'unknown_func' is not defined") def test_external_function_raises_exception(): """Test that exceptions from external functions propagate to the caller.""" m = pydantic_monty.Monty('fail()') def fail(*args: Any, **kwargs: Any) -> None: raise ValueError('intentional error') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('intentional error') def test_external_function_wrong_name_raises(): """Test that calling a function not in external_functions raises NameError.""" m = pydantic_monty.Monty('foo()') def bar(*args: Any, **kwargs: Any) -> int: return 1 with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'bar': bar}) inner = exc_info.value.exception() assert type(inner) is NameError assert str(inner) == snapshot("name 'foo' is not defined") def test_external_function_exception_caught_by_try_except(): """Test that exceptions from external functions can be caught by try/except.""" code = """ try: fail() except ValueError: caught = True caught """ m = pydantic_monty.Monty(code) def fail(*args: Any, **kwargs: Any) -> None: raise ValueError('caught error') result = m.run(external_functions={'fail': fail}) assert result == snapshot(True) def test_external_function_exception_type_preserved(): """Test that various exception types are correctly preserved.""" m = pydantic_monty.Monty('fail()') def fail_type_error(*args: Any, **kwargs: Any) -> None: raise TypeError('type error message') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'fail': fail_type_error}) inner = exc_info.value.exception() assert isinstance(inner, TypeError) assert inner.args[0] == snapshot('type error message') @pytest.mark.parametrize( 'exception_class,exception_name', [ # ArithmeticError hierarchy (ZeroDivisionError, 'ZeroDivisionError'), (OverflowError, 'OverflowError'), (ArithmeticError, 'ArithmeticError'), # RuntimeError hierarchy (NotImplementedError, 'NotImplementedError'), (RecursionError, 'RecursionError'), (RuntimeError, 'RuntimeError'), # LookupError hierarchy (KeyError, 'KeyError'), (IndexError, 'IndexError'), (LookupError, 'LookupError'), # Other exceptions (ValueError, 'ValueError'), (TypeError, 'TypeError'), (AttributeError, 'AttributeError'), (NameError, 'NameError'), (AssertionError, 'AssertionError'), ], ) def test_external_function_exception_hierarchy(exception_class: type[BaseException], exception_name: str): """Test that exception types in hierarchies are correctly preserved.""" # Test that exception propagates with correct type m = pydantic_monty.Monty('fail()') def fail(*args: Any, **kwargs: Any) -> None: raise exception_class('test message') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, exception_class) @pytest.mark.parametrize( 'exception_class,parent_class,expected_result', [ # ArithmeticError hierarchy (ZeroDivisionError, ArithmeticError, 'child'), (OverflowError, ArithmeticError, 'child'), # RuntimeError hierarchy (NotImplementedError, RuntimeError, 'child'), (RecursionError, RuntimeError, 'child'), # LookupError hierarchy (KeyError, LookupError, 'child'), (IndexError, LookupError, 'child'), ], ) def test_external_function_exception_caught_by_parent( exception_class: type[BaseException], parent_class: type[BaseException], expected_result: str ): """Test that child exceptions can be caught by parent except handlers.""" code = f""" try: fail() except {parent_class.__name__}: caught = 'parent' except {exception_class.__name__}: caught = 'child' caught """ m = pydantic_monty.Monty(code) def fail(*args: Any, **kwargs: Any) -> None: raise exception_class('test') # Child exception should be caught by parent handler (which comes first) result = m.run(external_functions={'fail': fail}) assert result == 'parent' @pytest.mark.parametrize( 'exception_class,expected_result', [ (ZeroDivisionError, 'ZeroDivisionError'), (OverflowError, 'OverflowError'), (NotImplementedError, 'NotImplementedError'), (RecursionError, 'RecursionError'), (KeyError, 'KeyError'), (IndexError, 'IndexError'), ], ) def test_external_function_exception_caught_specifically(exception_class: type[BaseException], expected_result: str): """Test that child exceptions can be caught by their specific handler.""" code = f""" try: fail() except {exception_class.__name__}: caught = '{expected_result}' caught """ m = pydantic_monty.Monty(code) def fail(*args: Any, **kwargs: Any) -> None: raise exception_class('test') result = m.run(external_functions={'fail': fail}) assert result == expected_result def test_external_function_exception_in_expression(): """Test exception from external function in an expression context.""" m = pydantic_monty.Monty('1 + fail() + 2') def fail(*args: Any, **kwargs: Any) -> int: raise RuntimeError('mid-expression error') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, RuntimeError) assert inner.args[0] == snapshot('mid-expression error') def test_external_function_exception_after_successful_call(): """Test exception handling after a successful external call.""" code = """ a = success() b = fail() a + b """ m = pydantic_monty.Monty(code) def success(*args: Any, **kwargs: Any) -> int: return 10 def fail(*args: Any, **kwargs: Any) -> int: raise ValueError('second call fails') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'success': success, 'fail': fail}) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('second call fails') def test_external_function_exception_with_finally(): """Test that finally block runs when external function raises.""" code = """ finally_ran = False try: fail() except ValueError: pass finally: finally_ran = True finally_ran """ m = pydantic_monty.Monty(code) def fail(*args: Any, **kwargs: Any) -> None: raise ValueError('error') result = m.run(external_functions={'fail': fail}) assert result == snapshot(True) ================================================ FILE: crates/monty-python/tests/test_inputs.py ================================================ import pytest from inline_snapshot import snapshot import pydantic_monty def test_single_input(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': 42}) == snapshot(42) def test_multiple_inputs(): m = pydantic_monty.Monty('x + y + z', inputs=['x', 'y', 'z']) assert m.run(inputs={'x': 1, 'y': 2, 'z': 3}) == snapshot(6) def test_input_used_in_expression(): m = pydantic_monty.Monty('x * 2 + y', inputs=['x', 'y']) assert m.run(inputs={'x': 5, 'y': 3}) == snapshot(13) def test_input_string(): m = pydantic_monty.Monty('greeting + " " + name', inputs=['greeting', 'name']) assert m.run(inputs={'greeting': 'Hello', 'name': 'World'}) == snapshot('Hello World') def test_input_list(): m = pydantic_monty.Monty('data[0] + data[1]', inputs=['data']) assert m.run(inputs={'data': [10, 20]}) == snapshot(30) def test_input_dict(): m = pydantic_monty.Monty('config["a"] * config["b"]', inputs=['config']) assert m.run(inputs={'config': {'a': 3, 'b': 4}}) == snapshot(12) def test_missing_input_raises(): m = pydantic_monty.Monty('x + y', inputs=['x', 'y']) with pytest.raises(KeyError, match="Missing required input: 'y'"): m.run(inputs={'x': 1}) def test_all_inputs_missing_raises(): m = pydantic_monty.Monty('x', inputs=['x']) with pytest.raises(TypeError, match='Missing required inputs'): m.run() def test_no_inputs_declared_but_provided_raises(): m = pydantic_monty.Monty('1 + 1') with pytest.raises(TypeError, match='No input variables declared but inputs dict was provided'): m.run(inputs={'x': 1}) with pytest.raises(TypeError, match='No input variables declared but inputs dict was provided'): m.run(inputs={}) def test_inputs_order_independent(): m = pydantic_monty.Monty('a - b', inputs=['a', 'b']) # Dict order shouldn't matter assert m.run(inputs={'b': 3, 'a': 10}) == snapshot(7) def test_function_param_shadows_input(): """Function parameter should shadow script input with the same name.""" code = """ def foo(x): return x + 1 foo(x * 2) """ m = pydantic_monty.Monty(code, inputs=['x']) # x=5, so foo(x * 2) = foo(10), and inside foo, x is 10 (not 5), so returns 11 assert m.run(inputs={'x': 5}) == snapshot(11) def test_function_param_shadows_input_multiple_params(): """Multiple function parameters should all shadow their corresponding inputs.""" code = """ def add(x, y): return x + y add(x * 10, y * 100) """ m = pydantic_monty.Monty(code, inputs=['x', 'y']) # x=2, y=3, so add(20, 300) should return 320 assert m.run(inputs={'x': 2, 'y': 3}) == snapshot(320) def test_input_accessible_outside_shadowing_function(): """Script input should still be accessible outside the function that shadows it.""" code = """ def double(x): return x * 2 result = double(10) + x result """ m = pydantic_monty.Monty(code, inputs=['x']) # double(10) = 20, x (input) = 5, so result = 25 assert m.run(inputs={'x': 5}) == snapshot(25) def test_function_param_shadows_input_with_default(): """Function parameter with default should shadow script input when called with arg.""" code = """ def foo(x=100): return x + 1 foo(x * 2) """ m = pydantic_monty.Monty(code, inputs=['x']) # x=5, foo(10), inside foo x=10 (not 5 or 100), returns 11 assert m.run(inputs={'x': 5}) == snapshot(11) def test_function_uses_input_directly(): """Function that doesn't shadow should still access the input.""" code = """ def foo(y): return x + y foo(10) """ m = pydantic_monty.Monty(code, inputs=['x']) # x=5 (input), foo(10) with y=10, returns x + y = 5 + 10 = 15 assert m.run(inputs={'x': 5}) == snapshot(15) ================================================ FILE: crates/monty-python/tests/test_limits.py ================================================ import multiprocessing import os import signal import threading import time from types import FrameType import pytest from inline_snapshot import snapshot import pydantic_monty def test_resource_limits_custom(): limits = pydantic_monty.ResourceLimits( max_allocations=100, max_duration_secs=5.0, max_memory=1024, gc_interval=10, max_recursion_depth=500, ) assert limits.get('max_allocations') == snapshot(100) assert limits.get('max_duration_secs') == snapshot(5.0) assert limits.get('max_memory') == snapshot(1024) assert limits.get('gc_interval') == snapshot(10) assert limits.get('max_recursion_depth') == snapshot(500) def test_resource_limits_repr(): limits = pydantic_monty.ResourceLimits(max_duration_secs=1.0) assert repr(limits) == snapshot("{'max_duration_secs': 1.0}") def test_run_with_limits(): m = pydantic_monty.Monty('1 + 1') limits = pydantic_monty.ResourceLimits(max_duration_secs=5.0) assert m.run(limits=limits) == snapshot(2) def test_recursion_limit(): code = """ def recurse(n): if n <= 0: return 0 return 1 + recurse(n - 1) recurse(10) """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_recursion_depth=5) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), RecursionError) def test_recursion_limit_ok(): code = """ def recurse(n): if n <= 0: return 0 return 1 + recurse(n - 1) recurse(5) """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_recursion_depth=100) assert m.run(limits=limits) == snapshot(5) def test_allocation_limit(): # Note: allocation counting may not trigger on all operations # Use a more aggressive allocation pattern code = """ result = [] for i in range(10000): result.append([i]) # Each append creates a new list len(result) """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_allocations=5) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), MemoryError) def test_memory_limit(): code = """ result = [] for i in range(1000): result.append('x' * 100) len(result) """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_memory=100) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), MemoryError) def test_limits_with_inputs(): m = pydantic_monty.Monty('x * 2', inputs=['x']) limits = pydantic_monty.ResourceLimits(max_duration_secs=5.0) assert m.run(inputs={'x': 21}, limits=limits) == snapshot(42) def test_limits_wrong_type_raises_error(): m = pydantic_monty.Monty('1 + 1') with pytest.raises(TypeError): m.run(limits={'max_allocations': 'not an int'}) # pyright: ignore[reportArgumentType] def test_limits_none_value_allowed(): m = pydantic_monty.Monty('1 + 1') # None is valid to explicitly disable a limit assert m.run(limits={'max_allocations': None}) == snapshot(2) # pyright: ignore[reportArgumentType] def test_signal_alarm_custom_error(): """Test that custom signal handlers work during execution. The idea here is we run another thread which sends a signal to the current process after a delay then set up a signal handler to catch that signal and raise a custom exception. So while monty is running, we have to run the code to catch the signal, and propagate that exception. """ code = """ def fib(n): if n <= 1: return n return fib(n - 1) + fib(n - 2) fib(35) """ m = pydantic_monty.Monty(code) def send_signal(): time.sleep(0.1) os.kill(os.getpid(), signal.SIGINT) def raise_potato(signum: int, frame: FrameType | None) -> None: raise ValueError('potato') thread = threading.Thread(target=send_signal) thread.start() old_handler = signal.signal(signal.SIGINT, raise_potato) try: with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('potato') finally: thread.join() signal.signal(signal.SIGINT, old_handler) def _send_sigint_after_delay(pid: int, delay: float) -> None: """Helper function to send SIGINT to a process after a delay.""" time.sleep(delay) os.kill(pid, signal.SIGINT) def test_keyboard_interrupt(): """Test that KeyboardInterrupt is raised when SIGINT is sent during execution.""" code = """ def fib(n): if n <= 1: return n return fib(n - 1) + fib(n - 2) fib(35) """ m = pydantic_monty.Monty(code) # Send SIGINT after a short delay using a separate process proc = multiprocessing.Process(target=_send_sigint_after_delay, args=(os.getpid(), 0.05)) proc.start() try: raised_keyboard_interrupt = False try: m.run() except pydantic_monty.MontyRuntimeError as e: if isinstance(e.exception(), KeyboardInterrupt): raised_keyboard_interrupt = True assert raised_keyboard_interrupt, 'Expected KeyboardInterrupt to be raised' finally: proc.join() def test_pow_memory_limit(): """Large pow should fail when memory limit is set.""" m = pydantic_monty.Monty('2 ** 10000000') limits = pydantic_monty.ResourceLimits(max_memory=1_000_000) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), MemoryError) def test_lshift_memory_limit(): """Large left shift should fail when memory limit is set.""" m = pydantic_monty.Monty('1 << 10000000') limits = pydantic_monty.ResourceLimits(max_memory=1_000_000) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), MemoryError) def test_mult_memory_limit(): """Large multiplication should fail when memory limit is set.""" # First create a large number, then try to square it code = """ big = 2 ** 4000000 result = big * big """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_memory=1_000_000) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) assert isinstance(exc_info.value.exception(), MemoryError) def test_small_operations_within_limit(): """Smaller operations should succeed even with limits.""" m = pydantic_monty.Monty('2 ** 1000') limits = pydantic_monty.ResourceLimits(max_memory=1_000_000) result = m.run(limits=limits) assert result > 0 @pytest.mark.parametrize( 'code', [ 'sum(range(10**18))', 'list(range(10**18))', 'sorted(range(10**18))', 'min(range(10**18))', 'max(range(10**18))', ], ids=['sum', 'list', 'sorted', 'min', 'max'], ) def test_timeout_enforced_in_builtin_loops(code: str): """Timeout must be enforced inside Rust-side builtin iteration loops. Previously, builtins like sum(), sorted(), min(), max() ran Rust-side loops entirely within a single bytecode instruction, bypassing the VM's per-instruction timeout check. """ m = pydantic_monty.Monty(code) limits = pydantic_monty.ResourceLimits(max_duration_secs=0.1) start = time.monotonic() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(limits=limits) elapsed = time.monotonic() - start assert isinstance(exc_info.value.exception(), TimeoutError) # Should terminate promptly - well under 2 seconds assert elapsed < 2.0 ================================================ FILE: crates/monty-python/tests/test_os_access.py ================================================ """Tests for OSAccess class functionality. These tests verify the OSAccess class behavior - the high-level virtual filesystem that can be passed to Monty.run(os=...). Most tests run Python code through Monty to verify behavior as it would be used in practice. For tests of the AbstractOS interface via custom subclasses, see test_os_access_raw.py. """ from pathlib import PurePosixPath from typing import Any import pytest from inline_snapshot import snapshot from pydantic_monty import CallbackFile, MemoryFile, Monty, MontyRuntimeError, OSAccess # Alias for brevity in tests P = PurePosixPath # ============================================================================= # OSAccess Initialization & Validation # ============================================================================= def test_non_absolute_path(): """OSAccess rejects files with relative paths.""" osa = OSAccess([MemoryFile('relative/path.txt', content='test')]) assert osa.files[0].path.as_posix() == '/relative/path.txt' osa = OSAccess([MemoryFile('relative/path.txt', content='test')], root_dir='/foo/bar') assert osa.files[0].path.as_posix() == '/foo/bar/relative/path.txt' def test_file_nested_within_file_rejected(): """OSAccess rejects files nested within another file's path.""" with pytest.raises(ValueError) as exc_info: OSAccess( [ MemoryFile('/test/file.txt', content='outer'), MemoryFile('/test/file.txt/nested.txt', content='inner'), ] ) assert str(exc_info.value) == snapshot( "Cannot put file MemoryFile(path=/test/file.txt/nested.txt, content='...', permissions=420) " "within sub-directory of file MemoryFile(path=/test/file.txt, content='...', permissions=420)" ) def test_empty_initialization(): """OSAccess can be initialized with no files.""" fs = OSAccess() result = Monty('from pathlib import Path; Path("/any/path").exists()').run(os=fs) assert result is False def test_environ_parameter(): """OSAccess accepts environ parameter for environment variables.""" fs = OSAccess(environ={'MY_VAR': 'my_value'}) result = Monty("import os; os.getenv('MY_VAR')").run(os=fs) assert result == snapshot('my_value') # ============================================================================= # Path Existence Checks (via Monty) # ============================================================================= def test_path_exists_file(): """path_exists returns True for existing files.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/file.txt").exists()').run(os=fs) assert result is True def test_path_exists_directory(): """path_exists returns True for directories created by file paths.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/subdir").exists()').run(os=fs) assert result is True def test_path_exists_nested(): """path_exists handles deeply nested paths.""" fs = OSAccess([MemoryFile('/a/b/c/d/file.txt', content='deep')]) code = """ from pathlib import Path (Path('/a').exists(), Path('/a/b').exists(), Path('/a/b/c').exists(), Path('/a/b/c/d').exists()) """ result = Monty(code).run(os=fs) assert result == snapshot((True, True, True, True)) def test_path_exists_missing(): """path_exists returns False for non-existent paths.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/other/path").exists()').run(os=fs) assert result is False def test_path_is_file_for_file(): """path_is_file returns True for files.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/file.txt").is_file()').run(os=fs) assert result is True def test_path_is_file_for_directory(): """path_is_file returns False for directories.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/subdir").is_file()').run(os=fs) assert result is False def test_path_is_file_missing(): """path_is_file returns False for non-existent paths.""" fs = OSAccess() result = Monty('from pathlib import Path; Path("/missing").is_file()').run(os=fs) assert result is False def test_path_is_dir_for_directory(): """path_is_dir returns True for directories.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/subdir").is_dir()').run(os=fs) assert result is True def test_path_is_dir_for_file(): """path_is_dir returns False for files.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/file.txt").is_dir()').run(os=fs) assert result is False def test_path_is_dir_missing(): """path_is_dir returns False for non-existent paths.""" fs = OSAccess() result = Monty('from pathlib import Path; Path("/missing").is_dir()').run(os=fs) assert result is False def test_path_is_symlink_always_false(): """path_is_symlink always returns False (no symlink support).""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) code = """ from pathlib import Path (Path('/test/file.txt').is_symlink(), Path('/test').is_symlink(), Path('/missing').is_symlink()) """ result = Monty(code).run(os=fs) assert result == snapshot((False, False, False)) # ============================================================================= # Reading Files (via Monty) # ============================================================================= def test_read_text_string_content(): """path_read_text returns string content directly.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello world')]) result = Monty('from pathlib import Path; Path("/test/file.txt").read_text()').run(os=fs) assert result == snapshot('hello world') def test_read_text_bytes_content_decoded(): """path_read_text decodes bytes content as UTF-8.""" fs = OSAccess([MemoryFile('/test/file.txt', content=b'bytes content')]) result = Monty('from pathlib import Path; Path("/test/file.txt").read_text()').run(os=fs) assert result == snapshot('bytes content') def test_read_bytes_bytes_content(): """path_read_bytes returns bytes content directly.""" fs = OSAccess([MemoryFile('/test/file.bin', content=b'\x00\x01\x02\x03')]) result = Monty('from pathlib import Path; Path("/test/file.bin").read_bytes()').run(os=fs) assert result == snapshot(b'\x00\x01\x02\x03') def test_read_bytes_string_content_encoded(): """path_read_bytes encodes string content as UTF-8.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) result = Monty('from pathlib import Path; Path("/test/file.txt").read_bytes()').run(os=fs) assert result == snapshot(b'hello') def test_read_text_file_not_found(): """path_read_text raises FileNotFoundError for missing files.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty('from pathlib import Path; Path("/missing.txt").read_text()').run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing.txt'") def test_read_bytes_file_not_found(): """path_read_bytes raises FileNotFoundError for missing files.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty('from pathlib import Path; Path("/missing.bin").read_bytes()').run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing.bin'") def test_read_text_is_a_directory(): """path_read_text raises error for directories.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty('from pathlib import Path; Path("/test/subdir").read_text()').run(os=fs) # Monty reports this as OSError, not IsADirectoryError assert str(exc_info.value) == snapshot("IsADirectoryError: [Errno 21] Is a directory: '/test/subdir'") def test_read_bytes_is_a_directory(): """path_read_bytes raises error for directories.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty('from pathlib import Path; Path("/test/subdir").read_bytes()').run(os=fs) # Monty reports this as OSError, not IsADirectoryError assert str(exc_info.value) == snapshot("IsADirectoryError: [Errno 21] Is a directory: '/test/subdir'") # ============================================================================= # Writing Files (via Monty) # ============================================================================= def test_write_text_via_monty(): """Path.write_text() creates a new file via Monty.""" fs = OSAccess([MemoryFile('/test/existing.txt', content='existing')]) code = """ from pathlib import Path Path('/test/new.txt').write_text('new content') """ result = Monty(code).run(os=fs) # write_text returns the number of bytes written assert result == snapshot(11) # Verify file was created assert fs.path_exists(P('/test/new.txt')) is True assert fs.path_read_text(P('/test/new.txt')) == 'new content' def test_write_text_overwrite_via_monty(): """Path.write_text() overwrites existing file via Monty.""" fs = OSAccess([MemoryFile('/test/file.txt', content='original')]) code = """ from pathlib import Path Path('/test/file.txt').write_text('updated') """ Monty(code).run(os=fs) assert fs.path_read_text(P('/test/file.txt')) == 'updated' def test_write_bytes_via_monty(): """Path.write_bytes() creates a new file via Monty.""" fs = OSAccess([MemoryFile('/test/existing.txt', content='existing')]) code = """ from pathlib import Path Path('/test/new.bin').write_bytes(b'binary data') """ result = Monty(code).run(os=fs) assert result == snapshot(11) assert fs.path_read_bytes(P('/test/new.bin')) == b'binary data' def test_write_text_parent_not_exists_via_monty(): """Path.write_text() raises FileNotFoundError when parent doesn't exist via Monty.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/no/parent/file.txt').write_text('test')").run(os=fs) assert str(exc_info.value) == snapshot( "FileNotFoundError: [Errno 2] No such file or directory: '/no/parent/file.txt'" ) def test_write_text_to_directory_via_monty(): """Path.write_text() raises IsADirectoryError when writing to a directory via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/subdir').write_text('test')").run(os=fs) assert str(exc_info.value) == snapshot("IsADirectoryError: [Errno 21] Is a directory: '/test/subdir'") # ============================================================================= # Writing Files (via direct API) # ============================================================================= def test_write_text_new_file_direct(): """path_write_text creates a new file via direct API.""" fs = OSAccess([MemoryFile('/test/existing.txt', content='existing')]) # Write a new file fs.path_write_text(P('/test/new.txt'), 'new content') # Verify it was created assert fs.path_exists(P('/test/new.txt')) is True assert fs.path_read_text(P('/test/new.txt')) == 'new content' def test_write_text_overwrite_existing_direct(): """path_write_text overwrites existing file content via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='original')]) fs.path_write_text(P('/test/file.txt'), 'updated') assert fs.path_read_text(P('/test/file.txt')) == 'updated' def test_write_bytes_new_file_direct(): """path_write_bytes creates a new file via direct API.""" fs = OSAccess([MemoryFile('/test/existing.txt', content='existing')]) fs.path_write_bytes(P('/test/new.bin'), b'binary data') assert fs.path_read_bytes(P('/test/new.bin')) == b'binary data' def test_write_bytes_overwrite_existing_direct(): """path_write_bytes overwrites existing file content via direct API.""" fs = OSAccess([MemoryFile('/test/file.bin', content=b'original')]) fs.path_write_bytes(P('/test/file.bin'), b'updated') assert fs.path_read_bytes(P('/test/file.bin')) == b'updated' def test_write_text_parent_not_exists_direct(): """path_write_text raises FileNotFoundError when parent doesn't exist via direct API.""" fs = OSAccess() with pytest.raises(FileNotFoundError) as exc_info: fs.path_write_text(P('/no/parent/file.txt'), 'test') assert str(exc_info.value) == snapshot("[Errno 2] No such file or directory: '/no/parent/file.txt'") def test_write_text_to_directory_direct(): """path_write_text raises IsADirectoryError when writing to a directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(IsADirectoryError) as exc_info: fs.path_write_text(P('/test/subdir'), 'test') assert str(exc_info.value) == snapshot("[Errno 21] Is a directory: '/test/subdir'") # ============================================================================= # Directory Operations - mkdir (via Monty) # ============================================================================= def test_mkdir_basic_via_monty(): """Path.mkdir() creates a directory via Monty.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) code = """ from pathlib import Path Path('/test/newdir').mkdir() """ Monty(code).run(os=fs) assert fs.path_is_dir(P('/test/newdir')) is True def test_mkdir_with_parents_via_monty(): """Path.mkdir(parents=True) creates parent directories via Monty.""" fs = OSAccess() code = """ from pathlib import Path Path('/a/b/c/d').mkdir(parents=True) """ Monty(code).run(os=fs) assert fs.path_is_dir(P('/a')) is True assert fs.path_is_dir(P('/a/b')) is True assert fs.path_is_dir(P('/a/b/c')) is True assert fs.path_is_dir(P('/a/b/c/d')) is True def test_mkdir_exist_ok_true_via_monty(): """Path.mkdir(exist_ok=True) doesn't raise for existing directory via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) code = """ from pathlib import Path Path('/test/subdir').mkdir(exist_ok=True) """ # Should not raise Monty(code).run(os=fs) assert fs.path_is_dir(P('/test/subdir')) is True def test_mkdir_exist_ok_false_via_monty(): """Path.mkdir() raises OSError (FileExistsError) for existing directory via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/subdir').mkdir()").run(os=fs) # Monty maps FileExistsError to OSError assert str(exc_info.value) == snapshot("FileExistsError: [Errno 17] File exists: '/test/subdir'") def test_mkdir_parent_not_exists_via_monty(): """Path.mkdir() raises FileNotFoundError when parent doesn't exist via Monty.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/no/parent/dir').mkdir()").run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/no/parent/dir'") # ============================================================================= # Directory Operations - mkdir (via direct API) # ============================================================================= def test_mkdir_basic_direct(): """path_mkdir creates a directory via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) fs.path_mkdir(P('/test/newdir'), parents=False, exist_ok=False) assert fs.path_is_dir(P('/test/newdir')) is True def test_mkdir_with_parents_direct(): """path_mkdir with parents=True creates parent directories via direct API.""" fs = OSAccess() fs.path_mkdir(P('/a/b/c/d'), parents=True, exist_ok=False) assert fs.path_is_dir(P('/a')) is True assert fs.path_is_dir(P('/a/b')) is True assert fs.path_is_dir(P('/a/b/c')) is True assert fs.path_is_dir(P('/a/b/c/d')) is True def test_mkdir_exist_ok_true_direct(): """path_mkdir with exist_ok=True doesn't raise for existing directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) # Should not raise fs.path_mkdir(P('/test/subdir'), parents=False, exist_ok=True) assert fs.path_is_dir(P('/test/subdir')) is True def test_mkdir_exist_ok_false_direct(): """path_mkdir with exist_ok=False raises for existing directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(FileExistsError) as exc_info: fs.path_mkdir(P('/test/subdir'), parents=False, exist_ok=False) assert str(exc_info.value) == snapshot("[Errno 17] File exists: '/test/subdir'") def test_mkdir_file_exists_direct(): """path_mkdir raises FileExistsError when a file exists at the path via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) with pytest.raises(FileExistsError) as exc_info: fs.path_mkdir(P('/test/file.txt'), parents=False, exist_ok=False) assert str(exc_info.value) == snapshot("[Errno 17] File exists: '/test/file.txt'") def test_mkdir_parent_not_exists_direct(): """path_mkdir without parents raises FileNotFoundError when parent doesn't exist via direct API.""" fs = OSAccess() with pytest.raises(FileNotFoundError) as exc_info: fs.path_mkdir(P('/no/parent/dir'), parents=False, exist_ok=False) assert str(exc_info.value) == snapshot("[Errno 2] No such file or directory: '/no/parent/dir'") def test_mkdir_parent_is_file_direct(): """path_mkdir raises NotADirectoryError when parent is a file via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) with pytest.raises(NotADirectoryError) as exc_info: fs.path_mkdir(P('/test/file.txt/subdir'), parents=True, exist_ok=False) assert str(exc_info.value) == snapshot("[Errno 20] Not a directory: '/test/file.txt/subdir'") # ============================================================================= # Directory Operations - rmdir (via Monty) # ============================================================================= def test_rmdir_empty_directory_via_monty(): """Path.rmdir() removes an empty directory via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) fs.path_mkdir(P('/test/newdir'), parents=False, exist_ok=False) code = """ from pathlib import Path Path('/test/newdir').rmdir() """ Monty(code).run(os=fs) assert fs.path_exists(P('/test/newdir')) is False def test_rmdir_non_empty_directory_via_monty(): """Path.rmdir() raises OSError for non-empty directory via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/subdir').rmdir()").run(os=fs) assert str(exc_info.value) == snapshot("OSError: [Errno 39] Directory not empty: '/test/subdir'") def test_rmdir_not_found_via_monty(): """Path.rmdir() raises FileNotFoundError for non-existent path via Monty.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/missing').rmdir()").run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing'") def test_rmdir_file_not_directory_via_monty(): """Path.rmdir() raises NotADirectoryError for files via Monty.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/file.txt').rmdir()").run(os=fs) assert str(exc_info.value) == snapshot("NotADirectoryError: [Errno 20] Not a directory: '/test/file.txt'") # ============================================================================= # Directory Operations - rmdir (via direct API) # ============================================================================= def test_rmdir_empty_directory_direct(): """path_rmdir removes an empty directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) fs.path_mkdir(P('/test/newdir'), parents=False, exist_ok=False) fs.path_rmdir(P('/test/newdir')) assert fs.path_exists(P('/test/newdir')) is False def test_rmdir_non_empty_directory_direct(): """path_rmdir raises OSError for non-empty directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(OSError) as exc_info: fs.path_rmdir(P('/test/subdir')) assert str(exc_info.value) == snapshot("[Errno 39] Directory not empty: '/test/subdir'") def test_rmdir_file_not_directory_direct(): """path_rmdir raises NotADirectoryError for files via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) with pytest.raises(NotADirectoryError) as exc_info: fs.path_rmdir(P('/test/file.txt')) assert str(exc_info.value) == snapshot("[Errno 20] Not a directory: '/test/file.txt'") def test_rmdir_not_found_direct(): """path_rmdir raises FileNotFoundError for non-existent path via direct API.""" fs = OSAccess() with pytest.raises(FileNotFoundError) as exc_info: fs.path_rmdir(P('/missing')) assert str(exc_info.value) == snapshot("[Errno 2] No such file or directory: '/missing'") # ============================================================================= # Directory Operations - iterdir (via Monty) # ============================================================================= def test_iterdir_list_contents(): """path_iterdir lists directory contents.""" fs = OSAccess( [ MemoryFile('/test/a.txt', content='a'), MemoryFile('/test/b.txt', content='b'), MemoryFile('/test/subdir/c.txt', content='c'), ] ) code = """ from pathlib import Path [str(p) for p in Path('/test').iterdir()] """ result = Monty(code).run(os=fs) # Result may be in any order, so sort in Python assert sorted(result) == snapshot(['/test/a.txt', '/test/b.txt', '/test/subdir']) def test_iterdir_empty_directory_direct(): """path_iterdir returns empty list for empty directory via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) fs.path_mkdir(P('/test/empty'), parents=False, exist_ok=False) result = fs.path_iterdir(P('/test/empty')) assert result == snapshot([]) def test_iterdir_not_a_directory_direct(): """path_iterdir raises NotADirectoryError for files via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) with pytest.raises(NotADirectoryError) as exc_info: fs.path_iterdir(P('/test/file.txt')) assert str(exc_info.value) == snapshot("[Errno 20] Not a directory: '/test/file.txt'") def test_iterdir_not_found(): """path_iterdir raises FileNotFoundError for non-existent path.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; list(Path('/missing').iterdir())").run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing'") # ============================================================================= # File Operations - unlink (via Monty) # ============================================================================= def test_unlink_file_via_monty(): """Path.unlink() removes a file via Monty.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) code = """ from pathlib import Path Path('/test/file.txt').unlink() """ Monty(code).run(os=fs) assert fs.path_exists(P('/test/file.txt')) is False def test_unlink_file_not_found_via_monty(): """Path.unlink() raises FileNotFoundError for non-existent files via Monty.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/missing.txt').unlink()").run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing.txt'") def test_unlink_is_directory_via_monty(): """Path.unlink() raises IsADirectoryError for directories via Monty.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/subdir').unlink()").run(os=fs) assert str(exc_info.value) == snapshot("IsADirectoryError: [Errno 21] Is a directory: '/test/subdir'") # ============================================================================= # File Operations - unlink (via direct API) # ============================================================================= def test_unlink_file_direct(): """path_unlink removes a file via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) fs.path_unlink(P('/test/file.txt')) assert fs.path_exists(P('/test/file.txt')) is False def test_unlink_file_not_found_direct(): """path_unlink raises FileNotFoundError for non-existent files via direct API.""" fs = OSAccess() with pytest.raises(FileNotFoundError) as exc_info: fs.path_unlink(P('/missing.txt')) assert str(exc_info.value) == snapshot("[Errno 2] No such file or directory: '/missing.txt'") def test_unlink_is_directory_direct(): """path_unlink raises IsADirectoryError for directories via direct API.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) with pytest.raises(IsADirectoryError) as exc_info: fs.path_unlink(P('/test/subdir')) assert str(exc_info.value) == snapshot("[Errno 21] Is a directory: '/test/subdir'") # ============================================================================= # Stat Operations (via Monty) # ============================================================================= def test_stat_file(): """path_stat returns stat result for files with size and mode.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello world')]) code = """ from pathlib import Path s = Path('/test/file.txt').stat() (s.st_size, s.st_mode & 0o777) """ result = Monty(code).run(os=fs) assert result == snapshot((11, 0o644)) def test_stat_file_custom_permissions(): """path_stat returns custom file permissions.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello', permissions=0o755)]) code = """ from pathlib import Path s = Path('/test/file.txt').stat() s.st_mode & 0o777 """ result = Monty(code).run(os=fs) assert result == snapshot(0o755) def test_stat_directory(): """path_stat returns stat result for directories.""" fs = OSAccess([MemoryFile('/test/subdir/file.txt', content='hello')]) code = """ from pathlib import Path s = Path('/test/subdir').stat() s.st_mode """ result = Monty(code).run(os=fs) # Directory mode bits: 0o040000 (directory) | 0o755 (default perms) = 0o040755 assert result == snapshot(0o040755) def test_stat_file_not_found(): """path_stat raises FileNotFoundError for non-existent paths.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/missing').stat()").run(os=fs) assert str(exc_info.value) == snapshot("FileNotFoundError: [Errno 2] No such file or directory: '/missing'") def test_stat_bytes_content_size(): """path_stat calculates size correctly for bytes content.""" fs = OSAccess([MemoryFile('/test/file.bin', content=b'\x00\x01\x02\x03\x04')]) code = """ from pathlib import Path Path('/test/file.bin').stat().st_size """ result = Monty(code).run(os=fs) assert result == snapshot(5) def test_stat_unicode_size(): """path_stat calculates size as encoded UTF-8 bytes for string content.""" # Unicode snowman is 3 bytes in UTF-8 fs = OSAccess([MemoryFile('/test/file.txt', content='☃')]) code = """ from pathlib import Path Path('/test/file.txt').stat().st_size """ result = Monty(code).run(os=fs) assert result == snapshot(3) # ============================================================================= # Rename Operations (via Monty) # ============================================================================= def test_rename_file_via_monty(): """Path.rename() renames a file via Monty.""" fs = OSAccess([MemoryFile('/test/old.txt', content='content')]) code = """ from pathlib import Path Path('/test/old.txt').rename(Path('/test/new.txt')) """ Monty(code).run(os=fs) assert fs.path_exists(P('/test/old.txt')) is False assert fs.path_exists(P('/test/new.txt')) is True assert fs.path_read_text(P('/test/new.txt')) == 'content' def test_rename_source_not_found_via_monty(): """Path.rename() raises FileNotFoundError when source doesn't exist via Monty.""" fs = OSAccess() with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/missing.txt').rename(Path('/new.txt'))").run(os=fs) assert str(exc_info.value) == snapshot( "FileNotFoundError: [Errno 2] No such file or directory: '/missing.txt' -> '/new.txt'" ) def test_rename_target_parent_not_found_via_monty(): """Path.rename() raises FileNotFoundError when target parent doesn't exist via Monty.""" fs = OSAccess([MemoryFile('/test/file.txt', content='content')]) with pytest.raises(MontyRuntimeError) as exc_info: Monty("from pathlib import Path; Path('/test/file.txt').rename(Path('/no/parent/file.txt'))").run(os=fs) assert str(exc_info.value) == snapshot( "FileNotFoundError: [Errno 2] No such file or directory: '/test/file.txt' -> '/no/parent/file.txt'" ) # ============================================================================= # Rename Operations (via direct API) # ============================================================================= def test_rename_file_direct(): """path_rename renames a file via direct API.""" fs = OSAccess([MemoryFile('/test/old.txt', content='content')]) fs.path_rename(P('/test/old.txt'), P('/test/new.txt')) assert fs.path_exists(P('/test/old.txt')) is False assert fs.path_exists(P('/test/new.txt')) is True assert fs.path_read_text(P('/test/new.txt')) == 'content' def test_rename_source_not_found_direct(): """path_rename raises FileNotFoundError when source doesn't exist via direct API.""" fs = OSAccess() with pytest.raises(FileNotFoundError) as exc_info: fs.path_rename(P('/missing.txt'), P('/new.txt')) assert str(exc_info.value) == snapshot("[Errno 2] No such file or directory: '/missing.txt' -> '/new.txt'") def test_rename_target_parent_not_found_direct(): """path_rename raises FileNotFoundError when target parent doesn't exist via direct API.""" fs = OSAccess([MemoryFile('/test/file.txt', content='content')]) with pytest.raises(FileNotFoundError) as exc_info: fs.path_rename(P('/test/file.txt'), P('/no/parent/file.txt')) assert str(exc_info.value) == snapshot( "[Errno 2] No such file or directory: '/test/file.txt' -> '/no/parent/file.txt'" ) def test_rename_directory_direct(): """path_rename renames a directory via direct API.""" fs = OSAccess([MemoryFile('/test/olddir/file.txt', content='content')]) fs.path_mkdir(P('/test/newdir'), parents=False, exist_ok=False) fs.path_rename(P('/test/newdir'), P('/test/renamed')) assert fs.path_is_dir(P('/test/renamed')) is True def test_rename_directory_non_empty_target_direct(): """path_rename raises OSError when renaming directory to non-empty target via direct API.""" fs = OSAccess( [ MemoryFile('/test/src/a.txt', content='a'), MemoryFile('/test/dst/b.txt', content='b'), ] ) with pytest.raises(OSError) as exc_info: fs.path_rename(P('/test/src'), P('/test/dst')) assert str(exc_info.value) == snapshot("[Errno 66] Directory not empty: '/test/src' -> '/test/dst'") def test_rename_directory_updates_file_paths_direct(): """path_rename updates paths of all files within renamed directory.""" file1 = MemoryFile('/old/dir/file1.txt', content='one') file2 = MemoryFile('/old/dir/subdir/file2.txt', content='two') fs = OSAccess([file1, file2]) # Create target parent and rename the directory fs.path_mkdir(P('/new'), parents=False, exist_ok=False) fs.path_rename(P('/old/dir'), P('/new/location')) # Verify files are accessible at new paths assert fs.path_read_text(P('/new/location/file1.txt')) == 'one' assert fs.path_read_text(P('/new/location/subdir/file2.txt')) == 'two' # Verify the AbstractFile objects have updated paths assert file1.path.as_posix() == '/new/location/file1.txt' assert file2.path.as_posix() == '/new/location/subdir/file2.txt' # Verify old paths no longer exist assert fs.path_exists(P('/old/dir')) is False assert fs.path_exists(P('/old/dir/file1.txt')) is False # ============================================================================= # Path Resolution (via Monty) # ============================================================================= def test_path_resolve_absolute(): """path_resolve returns absolute path.""" fs = OSAccess([MemoryFile('/test/file.txt', content='hello')]) code = """ from pathlib import Path str(Path('/test/file.txt').resolve()) """ result = Monty(code).run(os=fs) assert result == snapshot('/test/file.txt') def test_path_absolute_already_absolute(): """path_absolute returns same path for already absolute path.""" fs = OSAccess() code = """ from pathlib import Path str(Path('/already/absolute').absolute()) """ result = Monty(code).run(os=fs) assert result == snapshot('/already/absolute') def test_path_absolute_relative(): """path_absolute converts relative path to absolute.""" fs = OSAccess() code = """ from pathlib import Path str(Path('relative/path').absolute()) """ result = Monty(code).run(os=fs) assert result == snapshot('/relative/path') def test_path_resolve_same_as_absolute(): """path_resolve behaves same as absolute (no symlinks in OSAccess).""" fs = OSAccess() code = """ from pathlib import Path str(Path('relative').resolve()) == str(Path('relative').absolute()) """ result = Monty(code).run(os=fs) assert result is True # ============================================================================= # Environment Variables (via Monty) # ============================================================================= def test_getenv_existing_key(): """getenv returns value for existing key.""" fs = OSAccess(environ={'MY_VAR': 'my_value'}) result = Monty("import os; os.getenv('MY_VAR')").run(os=fs) assert result == snapshot('my_value') def test_getenv_missing_key(): """getenv returns None for missing key.""" fs = OSAccess(environ={'OTHER': 'value'}) result = Monty("import os; os.getenv('MISSING')").run(os=fs) assert result is None def test_getenv_missing_with_default(): """getenv returns default for missing key when default provided.""" fs = OSAccess(environ={}) result = Monty("import os; os.getenv('MISSING', 'default_value')").run(os=fs) assert result == snapshot('default_value') def test_getenv_multiple_vars(): """getenv handles multiple environment variables.""" fs = OSAccess(environ={'VAR1': 'value1', 'VAR2': 'value2', 'VAR3': 'value3'}) code = """ import os (os.getenv('VAR1'), os.getenv('VAR2'), os.getenv('VAR3')) """ result = Monty(code).run(os=fs) assert result == snapshot(('value1', 'value2', 'value3')) def test_get_environ_returns_dict(): """os.environ returns the full environ dict.""" fs = OSAccess(environ={'HOME': '/home/user', 'USER': 'testuser'}) result = Monty('import os; os.environ').run(os=fs) assert result == snapshot({'HOME': '/home/user', 'USER': 'testuser'}) def test_get_environ_key_access(): """os.environ['KEY'] returns the value.""" fs = OSAccess(environ={'MY_VAR': 'my_value'}) result = Monty("import os; os.environ['MY_VAR']").run(os=fs) assert result == snapshot('my_value') def test_get_environ_key_missing_raises(): """os.environ['MISSING'] raises KeyError.""" fs = OSAccess(environ={}) with pytest.raises(MontyRuntimeError) as exc_info: Monty("import os; os.environ['MISSING']").run(os=fs) assert str(exc_info.value) == snapshot('KeyError: MISSING') def test_get_environ_get_method(): """os.environ.get() works correctly.""" fs = OSAccess(environ={'HOME': '/home/user'}) result = Monty("import os; os.environ.get('HOME')").run(os=fs) assert result == snapshot('/home/user') def test_get_environ_get_missing_with_default(): """os.environ.get() returns default for missing key.""" fs = OSAccess(environ={}) result = Monty("import os; os.environ.get('MISSING', 'fallback')").run(os=fs) assert result == snapshot('fallback') def test_get_environ_len(): """len(os.environ) returns the number of env vars.""" fs = OSAccess(environ={'A': '1', 'B': '2', 'C': '3'}) result = Monty('import os; len(os.environ)').run(os=fs) assert result == snapshot(3) def test_get_environ_contains(): """'KEY' in os.environ tests membership.""" fs = OSAccess(environ={'PRESENT': 'value'}) code = """ import os ('PRESENT' in os.environ, 'ABSENT' in os.environ) """ result = Monty(code).run(os=fs) assert result == snapshot((True, False)) def test_get_environ_keys(): """os.environ.keys() returns the keys.""" fs = OSAccess(environ={'X': '1', 'Y': '2'}) result = Monty('import os; list(os.environ.keys())').run(os=fs) assert set(result) == snapshot({'X', 'Y'}) def test_get_environ_values(): """os.environ.values() returns the values.""" fs = OSAccess(environ={'X': 'a', 'Y': 'b'}) result = Monty('import os; list(os.environ.values())').run(os=fs) assert set(result) == snapshot({'a', 'b'}) def test_get_environ_items(): """os.environ.items() returns key-value pairs.""" fs = OSAccess(environ={'X': '1', 'Y': '2'}) result = Monty('import os; list(os.environ.items())').run(os=fs) assert set(result) == snapshot({('X', '1'), ('Y', '2')}) def test_get_environ_empty(): """os.environ returns empty dict when no environ provided.""" fs = OSAccess() result = Monty('import os; os.environ').run(os=fs) assert result == snapshot({}) # ============================================================================= # MemoryFile Behavior # ============================================================================= def test_memory_file_string_content(): """MemoryFile stores and returns string content.""" file = MemoryFile('/test/file.txt', content='hello') assert file.read_content() == snapshot('hello') assert file.path.as_posix() == snapshot('/test/file.txt') assert file.name == snapshot('file.txt') def test_memory_file_bytes_content(): """MemoryFile stores and returns bytes content.""" file = MemoryFile('/test/file.bin', content=b'\x00\x01\x02') assert file.read_content() == snapshot(b'\x00\x01\x02') def test_memory_file_custom_permissions(): """MemoryFile accepts custom permissions.""" file = MemoryFile('/test/exec.sh', content='#!/bin/bash', permissions=0o755) assert file.permissions == snapshot(0o755) def test_memory_file_write_and_read(): """MemoryFile supports writing and re-reading content.""" file = MemoryFile('/test/file.txt', content='original') file.write_content('updated') assert file.read_content() == snapshot('updated') def test_memory_file_delete(): """MemoryFile can be marked as deleted.""" file = MemoryFile('/test/file.txt', content='content') assert file.deleted is False file.delete() assert file.deleted is True def test_memory_file_repr(): """MemoryFile has useful repr for debugging.""" file = MemoryFile('/test/file.txt', content='content') assert repr(file) == snapshot("MemoryFile(path=/test/file.txt, content='...', permissions=420)") def test_memory_file_bytes_repr(): """MemoryFile repr shows b'...' for bytes content.""" file = MemoryFile('/test/file.bin', content=b'\x00') assert repr(file) == snapshot("MemoryFile(path=/test/file.bin, content=b'...', permissions=420)") # ============================================================================= # CallbackFile Behavior # ============================================================================= def test_callback_file_read(): """CallbackFile calls read callback.""" read_calls: list[PurePosixPath] = [] def read_fn(path: PurePosixPath) -> str: read_calls.append(path) return f'content from {path}' def write_fn(path: PurePosixPath, content: str | bytes) -> None: pass file = CallbackFile('/test/file.txt', read=read_fn, write=write_fn) fs = OSAccess([file]) result = Monty('from pathlib import Path; Path("/test/file.txt").read_text()').run(os=fs) assert result == snapshot('content from /test/file.txt') assert len(read_calls) == 1 def test_callback_file_write_direct(): """CallbackFile calls write callback via direct API.""" written: list[tuple[PurePosixPath, Any]] = [] def read_fn(path: PurePosixPath) -> str: return '' def write_fn(path: PurePosixPath, content: str | bytes) -> None: written.append((path, content)) file = CallbackFile('/test/file.txt', read=read_fn, write=write_fn) fs = OSAccess([file]) # Use direct API since write_text not implemented in Monty fs.path_write_text(P('/test/file.txt'), 'new content') assert len(written) == 1 assert written[0][1] == snapshot('new content') def test_callback_file_custom_permissions(): """CallbackFile accepts custom permissions.""" file = CallbackFile( '/test/file.txt', read=lambda _: '', write=lambda _p, _c: None, permissions=0o700, ) assert file.permissions == snapshot(0o700) def test_callback_file_repr(): """CallbackFile has useful repr for debugging.""" file = CallbackFile('/test/file.txt', read=lambda _: '', write=lambda _, __: None) assert 'CallbackFile(path=/test/file.txt' in repr(file) # ============================================================================= # Custom AbstractFile Implementation # ============================================================================= class CustomFile: """Minimal custom AbstractFile implementation.""" def __init__(self, path: str, content: str) -> None: self.path = PurePosixPath(path) self.name = self.path.name self.permissions = 0o644 self.deleted = False self.content = content def read_content(self) -> str: return self.content def write_content(self, content: str | bytes) -> None: self.content = content if isinstance(content, str) else content.decode() def delete(self) -> None: self.deleted = True def test_custom_abstract_file(): """Custom AbstractFile implementation works with OSAccess.""" custom = CustomFile('/test/custom.txt', 'custom content') fs = OSAccess([custom]) result = Monty('from pathlib import Path; Path("/test/custom.txt").read_text()').run(os=fs) assert result == snapshot('custom content') def test_custom_abstract_file_mixed_with_memory_file(): """Custom AbstractFile can be mixed with MemoryFile.""" custom = CustomFile('/test/custom.txt', 'from custom') memory = MemoryFile('/test/memory.txt', content='from memory') fs = OSAccess([custom, memory]) code = """ from pathlib import Path (Path('/test/custom.txt').read_text(), Path('/test/memory.txt').read_text()) """ result = Monty(code).run(os=fs) assert result == snapshot(('from custom', 'from memory')) # ============================================================================= # Direct API Test (without Monty) # ============================================================================= def test_os_access_direct_api(): """OSAccess methods can be called directly without Monty.""" fs = OSAccess( [ MemoryFile('/test/file.txt', content='hello'), MemoryFile('/test/subdir/nested.txt', content='nested'), ] ) # Test path_exists assert fs.path_exists(P('/test/file.txt')) is True assert fs.path_exists(P('/missing')) is False # Test path_is_file / path_is_dir assert fs.path_is_file(P('/test/file.txt')) is True assert fs.path_is_dir(P('/test/file.txt')) is False assert fs.path_is_dir(P('/test/subdir')) is True assert fs.path_is_file(P('/test/subdir')) is False # Test path_read_text / path_read_bytes assert fs.path_read_text(P('/test/file.txt')) == 'hello' assert fs.path_read_bytes(P('/test/file.txt')) == b'hello' # Test path_stat stat = fs.path_stat(P('/test/file.txt')) assert stat.st_size == 5 # Test path_iterdir contents = fs.path_iterdir(P('/test')) assert sorted(contents) == snapshot([PurePosixPath('/test/file.txt'), PurePosixPath('/test/subdir')]) # Test path_absolute assert fs.path_absolute(P('relative')) == '/relative' assert fs.path_absolute(P('/absolute')) == '/absolute' # ============================================================================= # Edge Cases # ============================================================================= def test_root_directory(): """Root directory '/' is handled correctly.""" fs = OSAccess([MemoryFile('/file.txt', content='root file')]) code = """ from pathlib import Path (Path('/').is_dir(), sorted([str(p) for p in Path('/').iterdir()])) """ result = Monty(code).run(os=fs) assert result == snapshot((True, ['/file.txt'])) def test_empty_file(): """Empty file content is handled correctly.""" fs = OSAccess([MemoryFile('/empty.txt', content='')]) code = """ from pathlib import Path (Path('/empty.txt').read_text(), Path('/empty.txt').stat().st_size) """ result = Monty(code).run(os=fs) assert result == snapshot(('', 0)) def test_large_nested_path(): """Deeply nested paths are handled correctly.""" fs = OSAccess([MemoryFile('/a/b/c/d/e/f/g/h/i/j/file.txt', content='deep')]) code = """ from pathlib import Path Path('/a/b/c/d/e/f/g/h/i/j/file.txt').read_text() """ result = Monty(code).run(os=fs) assert result == snapshot('deep') def test_special_characters_in_content(): """Special characters in file content are handled correctly.""" content = 'line1\nline2\ttab\r\nwindows' fs = OSAccess([MemoryFile('/special.txt', content=content)]) result = Monty('from pathlib import Path; Path("/special.txt").read_text()').run(os=fs) assert result == snapshot('line1\nline2\ttab\r\nwindows') ================================================ FILE: crates/monty-python/tests/test_os_access_compat.py ================================================ """OSAccess compatibility tests. These tests verify that OSAccess (Monty's virtual filesystem) behaves identically to CPython's real filesystem operations. Each test runs twice - once with Monty using OSAccess/MemoryFile and once with CPython using a real temp directory. This ensures that code written for real filesystems works correctly in the sandboxed Monty environment. """ from abc import ABC, abstractmethod from pathlib import Path from typing import Any, TypeAlias import pytest from pydantic_monty import MemoryFile, Monty, OSAccess # Type alias for nested tree structure (file content or nested dict). # Using Any for the recursive dict value since Python's type system doesn't # handle recursive types well without TypedDict or Protocol. TreeDict: TypeAlias = 'dict[str, str | bytes | TreeDict]' class CodeRunner(ABC): """Abstract interface for running Python code against a filesystem. Implementations provide either a virtual filesystem (Monty+OSAccess) or a real filesystem (CPython+temp directory) for compatibility testing. """ @abstractmethod def write_file(self, path: str, content: str | bytes) -> None: """Add a file to the test filesystem setup. Args: path: Relative path for the file (e.g., 'test/file.txt') content: File content as string or bytes """ @abstractmethod def run_code(self, code: str) -> Any: """Run Python code and return the result. The code can use Path('relative/path') and it will be resolved to the appropriate root (OSAccess root or temp directory). Args: code: Python code to execute Returns: The result of the last expression in the code Raises: Exception: If the code raises an exception """ @abstractmethod def tree(self) -> TreeDict: """Return a dict tree of files and their contents. Returns: Nested dict where keys are file/dir names and values are: - str/bytes for file contents - dict for subdirectories """ @abstractmethod def set_environ(self, environ: dict[str, str]) -> None: """Set environment variables for the test. Args: environ: Dictionary of environment variable names to values """ class MontyRunner(CodeRunner): """CodeRunner implementation using Monty with OSAccess virtual filesystem.""" def __init__(self) -> None: self._files: list[MemoryFile] = [] self._environ: dict[str, str] = {} self._os_access: OSAccess | None = None def write_file(self, path: str, content: str | bytes) -> None: # Use relative paths - OSAccess now supports them self._files.append(MemoryFile(path, content=content)) # Reset OSAccess so it gets rebuilt with new files self._os_access = None def set_environ(self, environ: dict[str, str]) -> None: self._environ = environ # Reset OSAccess so it gets rebuilt with new environ self._os_access = None def _get_os_access(self) -> OSAccess: if self._os_access is None: self._os_access = OSAccess(self._files, environ=self._environ) return self._os_access def run_code(self, code: str) -> Any: # Prepend imports - OSAccess now handles relative paths wrapped_code = f'from pathlib import Path\nimport os\n{code}' m = Monty(wrapped_code) return m.run(os=self._get_os_access()) def tree(self) -> TreeDict: result: TreeDict = {} def add_to_tree(tree: TreeDict, parts: list[str], content: str | bytes) -> None: if len(parts) == 1: tree[parts[0]] = content else: if parts[0] not in tree: tree[parts[0]] = {} sub: Any = tree[parts[0]] if isinstance(sub, dict): add_to_tree(sub, parts[1:], content) # type: ignore[arg-type] # Build tree from all files for file in self._files: if file.deleted: continue path_parts = list(file.path.parts) content = file.read_content() add_to_tree(result, path_parts, content) return result class CPythonRunner(CodeRunner): """CodeRunner implementation using CPython with a real temp directory.""" def __init__(self, tmp_path: Path) -> None: self._root = tmp_path self._environ: dict[str, str] = {} def write_file(self, path: str, content: str | bytes) -> None: full_path = self._root / path full_path.parent.mkdir(parents=True, exist_ok=True) if isinstance(content, bytes): full_path.write_bytes(content) else: full_path.write_text(content) def set_environ(self, environ: dict[str, str]) -> None: self._environ = environ def run_code(self, code: str) -> Any: import ast import types # Map absolute paths (starting with /) to the temp directory # This matches OSAccess behavior which normalizes relative paths to / root = self._root def rooted_path(p: str | Path) -> Path: path = Path(p) if path.is_absolute(): # Absolute path - strip leading / and map to root return root / str(path).lstrip('/') else: # Relative path - prepend / then map to root return root / p # Create a mock os module with our environ mock_os = types.SimpleNamespace() mock_os.environ = self._environ def getenv(key: str, default: str | None = None) -> str | None: return self._environ.get(key, default) mock_os.getenv = getenv namespace: dict[str, Any] = {'Path': rooted_path, 'os': mock_os} exec(code, namespace) # Find the last expression result tree = ast.parse(code) if tree.body and isinstance(tree.body[-1], ast.Expr): last_expr = ast.Expression(tree.body[-1].value) compiled = compile(last_expr, '', 'eval') return eval(compiled, namespace) return None def tree(self) -> TreeDict: def build_tree(path: Path) -> TreeDict: result: TreeDict = {} for item in sorted(path.iterdir()): if item.is_dir(): subtree = build_tree(item) result[item.name] = subtree else: # Try to read as text, fall back to bytes try: result[item.name] = item.read_text() except UnicodeDecodeError: result[item.name] = item.read_bytes() return result if not self._root.exists(): return {} return build_tree(self._root) @pytest.fixture(params=['monty', 'cpython']) def runner(request: pytest.FixtureRequest, tmp_path: Path) -> CodeRunner: """Fixture that provides both Monty and CPython runners for comparison testing.""" if request.param == 'monty': return MontyRunner() else: return CPythonRunner(tmp_path) # ============================================================================= # Path Existence Tests # ============================================================================= def test_path_exists_file(runner: CodeRunner) -> None: """Path.exists() returns True for existing files.""" runner.write_file('test/file.txt', 'hello') result = runner.run_code("Path('/test/file.txt').exists()") assert result is True def test_path_exists_directory(runner: CodeRunner) -> None: """Path.exists() returns True for directories.""" runner.write_file('test/subdir/file.txt', 'hello') result = runner.run_code("Path('/test/subdir').exists()") assert result is True def test_path_exists_missing(runner: CodeRunner) -> None: """Path.exists() returns False for non-existent paths.""" result = runner.run_code("Path('/missing/file.txt').exists()") assert result is False def test_path_is_file(runner: CodeRunner) -> None: """Path.is_file() returns True for files, False for directories.""" runner.write_file('test/file.txt', 'hello') assert runner.run_code("Path('/test/file.txt').is_file()") is True assert runner.run_code("Path('/test').is_file()") is False def test_path_is_dir(runner: CodeRunner) -> None: """Path.is_dir() returns True for directories, False for files.""" runner.write_file('test/file.txt', 'hello') assert runner.run_code("Path('/test').is_dir()") is True assert runner.run_code("Path('/test/file.txt').is_dir()") is False # ============================================================================= # Reading Files # ============================================================================= def test_read_text(runner: CodeRunner) -> None: """Path.read_text() returns file content as string.""" runner.write_file('data/hello.txt', 'hello world') result = runner.run_code("Path('/data/hello.txt').read_text()") assert result == 'hello world' def test_read_bytes(runner: CodeRunner) -> None: """Path.read_bytes() returns file content as bytes.""" runner.write_file('data/binary.bin', b'\x00\x01\x02\x03') result = runner.run_code("Path('/data/binary.bin').read_bytes()") assert result == b'\x00\x01\x02\x03' def test_read_text_unicode(runner: CodeRunner) -> None: """Path.read_text() handles unicode content.""" runner.write_file('unicode.txt', 'hello \u2603 world') result = runner.run_code("Path('/unicode.txt').read_text()") assert result == 'hello \u2603 world' # ============================================================================= # Tree Verification # ============================================================================= def test_tree_simple(runner: CodeRunner) -> None: """tree() returns correct structure for simple files.""" runner.write_file('a.txt', 'content a') runner.write_file('b.txt', 'content b') assert runner.tree() == {'a.txt': 'content a', 'b.txt': 'content b'} def test_tree_nested(runner: CodeRunner) -> None: """tree() returns correct structure for nested directories.""" runner.write_file('dir/subdir/file.txt', 'nested content') assert runner.tree() == {'dir': {'subdir': {'file.txt': 'nested content'}}} def test_tree_mixed(runner: CodeRunner) -> None: """tree() handles mixed files and directories.""" runner.write_file('root.txt', 'root') runner.write_file('dir/file.txt', 'in dir') expected = {'root.txt': 'root', 'dir': {'file.txt': 'in dir'}} assert runner.tree() == expected # ============================================================================= # Stat Operations # ============================================================================= def test_stat_size(runner: CodeRunner) -> None: """Path.stat().st_size returns correct file size.""" runner.write_file('sized.txt', 'hello') result = runner.run_code("Path('/sized.txt').stat().st_size") assert result == 5 def test_stat_size_unicode(runner: CodeRunner) -> None: """Path.stat().st_size returns byte size for unicode content.""" # Unicode snowman is 3 bytes in UTF-8 runner.write_file('unicode.txt', '\u2603') result = runner.run_code("Path('/unicode.txt').stat().st_size") assert result == 3 # ============================================================================= # Directory Listing # ============================================================================= def test_iterdir(runner: CodeRunner) -> None: """Path.iterdir() lists directory contents. Note: Monty returns filenames as strings while CPython returns Path objects with full paths. We normalize by getting .name (or using the string directly for Monty). Sorting is done in Python due to Monty limitations. """ runner.write_file('dir/a.txt', 'a') runner.write_file('dir/b.txt', 'b') runner.write_file('dir/subdir/c.txt', 'c') # Get filenames - Monty returns strings, CPython returns Paths with full path # Use list() to collect, then sort in Python result = runner.run_code("list(Path('/dir').iterdir())") # Normalize: Monty gives strings, CPython gives Paths if isinstance(result[0], str): names = result # Monty: already filenames else: names = [p.name for p in result] # CPython: extract name from Path assert sorted(names) == ['a.txt', 'b.txt', 'subdir'] # ============================================================================= # Error Cases - FileNotFoundError # ============================================================================= def test_read_text_file_not_found(runner: CodeRunner) -> None: """Path.read_text() raises FileNotFoundError for missing files.""" result = runner.run_code(""" result = None try: Path('/missing.txt').read_text() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_read_bytes_file_not_found(runner: CodeRunner) -> None: """Path.read_bytes() raises FileNotFoundError for missing files.""" result = runner.run_code(""" result = None try: Path('/missing.bin').read_bytes() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_stat_file_not_found(runner: CodeRunner) -> None: """Path.stat() raises FileNotFoundError for missing files.""" result = runner.run_code(""" result = None try: Path('/missing.txt').stat() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_iterdir_not_found(runner: CodeRunner) -> None: """Path.iterdir() raises FileNotFoundError for missing directories.""" result = runner.run_code(""" result = None try: list(Path('/missing_dir').iterdir()) except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' # ============================================================================= # Error Cases - IsADirectoryError # ============================================================================= def test_read_text_is_directory(runner: CodeRunner) -> None: """Path.read_text() raises IsADirectoryError when path is a directory.""" runner.write_file('mydir/file.txt', 'content') result = runner.run_code(""" result = None try: Path('/mydir').read_text() except IsADirectoryError as e: result = type(e).__name__ result """) assert result == 'IsADirectoryError' def test_read_bytes_is_directory(runner: CodeRunner) -> None: """Path.read_bytes() raises IsADirectoryError when path is a directory.""" runner.write_file('mydir/file.txt', 'content') result = runner.run_code(""" result = None try: Path('/mydir').read_bytes() except IsADirectoryError as e: result = type(e).__name__ result """) assert result == 'IsADirectoryError' # ============================================================================= # Error Cases - NotADirectoryError # ============================================================================= def test_iterdir_not_a_directory(runner: CodeRunner) -> None: """Path.iterdir() raises NotADirectoryError when path is a file.""" runner.write_file('file.txt', 'content') result = runner.run_code(""" result = None try: list(Path('/file.txt').iterdir()) except NotADirectoryError as e: result = type(e).__name__ result """) assert result == 'NotADirectoryError' # ============================================================================= # Error Cases - FileExistsError # ============================================================================= def test_mkdir_file_exists(runner: CodeRunner) -> None: """Path.mkdir() raises FileExistsError when directory already exists.""" runner.write_file('existing_dir/file.txt', 'content') result = runner.run_code(""" result = None try: Path('/existing_dir').mkdir() except FileExistsError as e: result = type(e).__name__ result """) assert result == 'FileExistsError' def test_mkdir_file_at_path(runner: CodeRunner) -> None: """Path.mkdir() raises FileExistsError when a file exists at the path.""" runner.write_file('somefile.txt', 'content') result = runner.run_code(""" result = None try: Path('/somefile.txt').mkdir() except FileExistsError as e: result = type(e).__name__ result """) assert result == 'FileExistsError' def test_mkdir_exist_ok_no_error(runner: CodeRunner) -> None: """Path.mkdir(exist_ok=True) doesn't raise when directory exists.""" runner.write_file('existing_dir/file.txt', 'content') result = runner.run_code(""" Path('/existing_dir').mkdir(exist_ok=True) 'no error' """) assert result == 'no error' # ============================================================================= # Error Cases - mkdir parent not found # ============================================================================= def test_mkdir_parent_not_found(runner: CodeRunner) -> None: """Path.mkdir() raises FileNotFoundError when parent doesn't exist.""" result = runner.run_code(""" result = None try: Path('/no/parent/here').mkdir() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_mkdir_parents_creates_all(runner: CodeRunner) -> None: """Path.mkdir(parents=True) creates all parent directories.""" result = runner.run_code(""" Path('/a/b/c/d').mkdir(parents=True) Path('/a/b/c/d').is_dir() """) assert result is True # ============================================================================= # Error Cases - unlink # ============================================================================= def test_unlink_file_not_found(runner: CodeRunner) -> None: """Path.unlink() raises FileNotFoundError for missing files.""" result = runner.run_code(""" result = None try: Path('/missing.txt').unlink() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_unlink_is_directory(runner: CodeRunner) -> None: """Path.unlink() raises an error when path is a directory. Note: On macOS, CPython raises PermissionError for unlink() on directories, while Linux raises IsADirectoryError. OSAccess consistently raises IsADirectoryError. """ runner.write_file('mydir/file.txt', 'content') # Use OSError as catch-all since PermissionError and IsADirectoryError are subclasses result = runner.run_code(""" result = None try: Path('/mydir').unlink() except OSError as e: result = type(e).__name__ result """) # OSAccess raises IsADirectoryError, CPython on macOS raises PermissionError assert result in ('IsADirectoryError', 'PermissionError') # ============================================================================= # Error Cases - rmdir # ============================================================================= def test_rmdir_not_found(runner: CodeRunner) -> None: """Path.rmdir() raises FileNotFoundError for missing directories.""" result = runner.run_code(""" result = None try: Path('/missing_dir').rmdir() except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_rmdir_not_a_directory(runner: CodeRunner) -> None: """Path.rmdir() raises NotADirectoryError when path is a file.""" runner.write_file('file.txt', 'content') result = runner.run_code(""" result = None try: Path('/file.txt').rmdir() except NotADirectoryError as e: result = type(e).__name__ result """) assert result == 'NotADirectoryError' def test_rmdir_not_empty(runner: CodeRunner) -> None: """Path.rmdir() raises OSError when directory is not empty.""" runner.write_file('nonempty/file.txt', 'content') result = runner.run_code(""" result = None try: Path('/nonempty').rmdir() except OSError as e: result = type(e).__name__ result """) assert result == 'OSError' # ============================================================================= # Error Cases - rename # ============================================================================= def test_rename_source_not_found(runner: CodeRunner) -> None: """Path.rename() raises FileNotFoundError when source doesn't exist.""" result = runner.run_code(""" result = None try: Path('/missing.txt').rename(Path('/new.txt')) except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' # ============================================================================= # Write Operations # ============================================================================= def test_write_text_new_file(runner: CodeRunner) -> None: """Path.write_text() creates a new file and returns character count.""" result = runner.run_code(""" count = Path('/new_file.txt').write_text('hello world') (count, Path('/new_file.txt').read_text()) """) assert result == (11, 'hello world') def test_write_text_overwrite(runner: CodeRunner) -> None: """Path.write_text() overwrites existing files.""" runner.write_file('existing.txt', 'old content') result = runner.run_code(""" Path('/existing.txt').write_text('new content') Path('/existing.txt').read_text() """) assert result == 'new content' def test_write_bytes_new_file(runner: CodeRunner) -> None: """Path.write_bytes() creates a new file and returns byte count.""" result = runner.run_code(""" count = Path('/new_binary.bin').write_bytes(b'\\x00\\x01\\x02') (count, Path('/new_binary.bin').read_bytes()) """) assert result == (3, b'\x00\x01\x02') def test_write_text_parent_not_found(runner: CodeRunner) -> None: """Path.write_text() raises FileNotFoundError when parent doesn't exist.""" result = runner.run_code(""" result = None try: Path('/no/parent/file.txt').write_text('content') except FileNotFoundError as e: result = type(e).__name__ result """) assert result == 'FileNotFoundError' def test_write_text_to_directory(runner: CodeRunner) -> None: """Path.write_text() raises IsADirectoryError when writing to a directory.""" runner.write_file('mydir/file.txt', 'content') result = runner.run_code(""" result = None try: Path('/mydir').write_text('content') except IsADirectoryError as e: result = type(e).__name__ result """) assert result == 'IsADirectoryError' # ============================================================================= # Environment Variable Tests # ============================================================================= def test_environ_key_access(runner: CodeRunner) -> None: """os.environ['KEY'] returns the value for existing keys.""" runner.set_environ({'MY_VAR': 'my_value'}) result = runner.run_code("os.environ['MY_VAR']") assert result == 'my_value' def test_environ_get_method(runner: CodeRunner) -> None: """os.environ.get() returns the value for existing keys.""" runner.set_environ({'MY_VAR': 'my_value'}) result = runner.run_code("os.environ.get('MY_VAR')") assert result == 'my_value' def test_environ_get_missing_with_default(runner: CodeRunner) -> None: """os.environ.get() returns default for missing keys.""" runner.set_environ({}) result = runner.run_code("os.environ.get('MISSING', 'fallback')") assert result == 'fallback' def test_environ_missing_key_raises_keyerror(runner: CodeRunner) -> None: """os.environ['MISSING'] raises KeyError with consistent message.""" runner.set_environ({}) result = runner.run_code(""" result = None try: os.environ['NONEXISTENT_KEY'] except KeyError as e: result = str(e) result """) # Both Monty and CPython should produce the same KeyError message format assert result == "'NONEXISTENT_KEY'" ================================================ FILE: crates/monty-python/tests/test_os_access_raw.py ================================================ """Tests for AbstractFileSystem implementation. These tests verify that AbstractFileSystem can be subclassed to provide a virtual filesystem that Monty code can interact with via Path methods. """ from pathlib import PurePosixPath import pytest from inline_snapshot import snapshot import pydantic_monty from pydantic_monty import AbstractOS, StatResult class TestOS(AbstractOS): """A simple in-memory filesystem for testing.""" __test__ = False def __init__(self) -> None: self.files: dict[str, bytes] = {} self.directories: set[str] = {'/'} def _ensure_parent_exists(self, path: str) -> None: """Ensure all parent directories exist.""" parts = path.rstrip('/').split('/') for i in range(1, len(parts)): parent = '/'.join(parts[:i]) or '/' self.directories.add(parent) def path_exists(self, path: PurePosixPath) -> bool: p = str(path) return p in self.files or p in self.directories def path_is_file(self, path: PurePosixPath) -> bool: return str(path) in self.files def path_is_dir(self, path: PurePosixPath) -> bool: return str(path) in self.directories def path_is_symlink(self, path: PurePosixPath) -> bool: return False # No symlink support in this simple implementation def path_read_text(self, path: PurePosixPath) -> str: p = str(path) if p not in self.files: raise FileNotFoundError(f'No such file: {p}') return self.files[p].decode('utf-8') def path_read_bytes(self, path: PurePosixPath) -> bytes: p = str(path) if p not in self.files: raise FileNotFoundError(f'No such file: {p}') return self.files[p] def path_write_text(self, path: PurePosixPath, data: str) -> int: p = str(path) self._ensure_parent_exists(p) self.files[p] = data.encode('utf-8') return len(data) def path_write_bytes(self, path: PurePosixPath, data: bytes) -> int: p = str(path) self._ensure_parent_exists(p) self.files[p] = data return len(data) def path_mkdir(self, path: PurePosixPath, parents: bool, exist_ok: bool) -> None: p = str(path) if p in self.directories: if not exist_ok: raise FileExistsError(f'Directory exists: {p}') return if parents: self._ensure_parent_exists(p) self.directories.add(p) def path_unlink(self, path: PurePosixPath) -> None: p = str(path) if p not in self.files: raise FileNotFoundError(f'No such file: {p}') del self.files[p] def path_rmdir(self, path: PurePosixPath) -> None: p = str(path) if p not in self.directories: raise FileNotFoundError(f'No such directory: {p}') # Check if directory is empty for f in self.files: if f.startswith(p + '/'): raise OSError(f'Directory not empty: {p}') for d in self.directories: if d != p and d.startswith(p + '/'): raise OSError(f'Directory not empty: {p}') self.directories.remove(p) def path_iterdir(self, path: PurePosixPath) -> list[PurePosixPath]: p = str(path) if p not in self.directories: raise FileNotFoundError(f'No such directory: {p}') result: list[PurePosixPath] = [] prefix = p.rstrip('/') + '/' seen: set[str] = set() for f in self.files: if f.startswith(prefix): # Get immediate child name rest = f[len(prefix) :] child = rest.split('/')[0] if child and child not in seen: seen.add(child) result.append(PurePosixPath(prefix + child)) for d in self.directories: if d.startswith(prefix) and d != p: rest = d[len(prefix) :] child = rest.split('/')[0] if child and child not in seen: seen.add(child) result.append(PurePosixPath(prefix + child)) return sorted(result) def path_stat(self, path: PurePosixPath) -> StatResult: p = str(path) if p in self.files: return StatResult.file_stat(len(self.files[p]), 0o644, 0.0) elif p in self.directories: return StatResult.dir_stat(0o755, 0.0) else: raise FileNotFoundError(f'No such file or directory: {p}') def path_rename(self, path: PurePosixPath, target: PurePosixPath) -> None: p = str(path) t = str(target) if p in self.files: self._ensure_parent_exists(t) self.files[t] = self.files.pop(p) elif p in self.directories: self._ensure_parent_exists(t) self.directories.remove(p) self.directories.add(t) # Move all files under this directory prefix = p.rstrip('/') + '/' to_move = [(f, t + f[len(p) :]) for f in self.files if f.startswith(prefix)] for old, new in to_move: self.files[new] = self.files.pop(old) else: raise FileNotFoundError(f'No such file or directory: {p}') def path_resolve(self, path: PurePosixPath) -> str: # Simple implementation: just normalize the path p = str(path) parts: list[str] = [] for part in p.split('/'): if part == '..': if parts: parts.pop() elif part and part != '.': parts.append(part) return '/' + '/'.join(parts) def path_absolute(self, path: PurePosixPath) -> str: p = str(path) if p.startswith('/'): return p return '/' + p def getenv(self, key: str, default: str | None = None) -> str | None: # Simple virtual environment for testing env = { 'TEST_VAR': 'test_value', 'HOME': '/test/home', } return env.get(key, default) def get_environ(self) -> dict[str, str]: return { 'TEST_VAR': 'test_value', 'HOME': '/test/home', } # ============================================================================= # Basic AbstractFileSystem tests # ============================================================================= def test_abstract_filesystem_exists(): """AbstractFileSystem.path_exists() works with os.""" fs = TestOS() fs.files['/test.txt'] = b'hello' m = pydantic_monty.Monty('from pathlib import Path; Path("/test.txt").exists()') result = m.run(os=fs) assert result is True def test_abstract_filesystem_exists_missing(): """AbstractFileSystem.path_exists() returns False for missing files.""" fs = TestOS() m = pydantic_monty.Monty('from pathlib import Path; Path("/missing.txt").exists()') result = m.run(os=fs) assert result is False def test_abstract_filesystem_is_file(): """AbstractFileSystem.path_is_file() distinguishes files from directories.""" fs = TestOS() fs.files['/file.txt'] = b'content' fs.directories.add('/mydir') code = """ from pathlib import Path (Path('/file.txt').is_file(), Path('/mydir').is_file()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot((True, False)) def test_abstract_filesystem_is_dir(): """AbstractFileSystem.path_is_dir() distinguishes directories from files.""" fs = TestOS() fs.files['/file.txt'] = b'content' fs.directories.add('/mydir') code = """ from pathlib import Path (Path('/file.txt').is_dir(), Path('/mydir').is_dir()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot((False, True)) def test_abstract_filesystem_read_text(): """AbstractFileSystem.path_read_text() returns file contents.""" fs = TestOS() fs.files['/hello.txt'] = b'Hello, World!' m = pydantic_monty.Monty('from pathlib import Path; Path("/hello.txt").read_text()') result = m.run(os=fs) assert result == snapshot('Hello, World!') def test_abstract_filesystem_read_text_missing(): """AbstractFileSystem.path_read_text() raises FileNotFoundError for missing files.""" fs = TestOS() m = pydantic_monty.Monty('from pathlib import Path; Path("/missing.txt").read_text()') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(os=fs) assert str(exc_info.value) == snapshot('FileNotFoundError: No such file: /missing.txt') assert isinstance(exc_info.value.exception(), FileNotFoundError) def test_abstract_filesystem_read_bytes(): """AbstractFileSystem.path_read_bytes() returns raw bytes.""" fs = TestOS() fs.files['/data.bin'] = b'\x00\x01\x02\x03' m = pydantic_monty.Monty('from pathlib import Path; Path("/data.bin").read_bytes()') result = m.run(os=fs) assert result == snapshot(b'\x00\x01\x02\x03') # ============================================================================= # stat() tests # ============================================================================= def test_abstract_filesystem_stat_file(): """AbstractFileSystem.path_stat() returns stat result for files.""" fs = TestOS() fs.files['/file.txt'] = b'hello world' code = """ from pathlib import Path s = Path('/file.txt').stat() (s.st_size, s.st_mode) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot((11, 0o100644)) def test_abstract_filesystem_stat_directory(): """AbstractFileSystem.path_stat() returns stat result for directories.""" fs = TestOS() fs.directories.add('/mydir') code = """ from pathlib import Path s = Path('/mydir').stat() s.st_mode """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot(0o040755) def test_abstract_filesystem_stat_missing(): """AbstractFileSystem.path_stat() raises FileNotFoundError for missing paths.""" fs = TestOS() m = pydantic_monty.Monty('from pathlib import Path\nPath("/missing").stat()') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(os=fs) assert str(exc_info.value) == snapshot('FileNotFoundError: No such file or directory: /missing') assert exc_info.value.display() == snapshot("""\ Traceback (most recent call last): File "main.py", line 2, in Path("/missing").stat() ~~~~~~~~~~~~~~~~~~~~~~~ FileNotFoundError: No such file or directory: /missing\ """) # ============================================================================= # iterdir() tests # ============================================================================= def test_abstract_filesystem_iterdir(): """AbstractFileSystem.path_iterdir() lists directory contents.""" fs = TestOS() fs.directories.add('/mydir') fs.files['/mydir/a.txt'] = b'a' fs.files['/mydir/b.txt'] = b'b' fs.directories.add('/mydir/subdir') code = """ from pathlib import Path list(Path('/mydir').iterdir()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) # Result is a list of Path objects with child names joined to parent assert len(result) == 3 names = sorted(str(p) for p in result) assert names == snapshot(['/mydir/a.txt', '/mydir/b.txt', '/mydir/subdir']) def test_abstract_filesystem_iterdir_empty(): """AbstractFileSystem.path_iterdir() returns empty list for empty directory.""" fs = TestOS() fs.directories.add('/empty') code = """ from pathlib import Path list(Path('/empty').iterdir()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot([]) # ============================================================================= # resolve() and absolute() tests # ============================================================================= def test_abstract_filesystem_resolve(): """AbstractFileSystem.path_resolve() normalizes paths.""" fs = TestOS() code = """ from pathlib import Path str(Path('/foo/bar/../baz').resolve()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot('/foo/baz') def test_abstract_filesystem_absolute(): """AbstractFileSystem.path_absolute() returns absolute path.""" fs = TestOS() code = """ from pathlib import Path str(Path('/already/absolute').absolute()) """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot('/already/absolute') def test_abstract_filesystem_getenv(): """AbstractFileSystem.getenv() returns environment variable value.""" fs = TestOS() code = """ import os os.getenv('TEST_VAR') """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot('test_value') def test_abstract_filesystem_getenv_missing(): """AbstractFileSystem.getenv() returns None for missing variable.""" fs = TestOS() code = """ import os os.getenv('NONEXISTENT') """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result is None def test_abstract_filesystem_getenv_default(): """AbstractFileSystem.getenv() returns default for missing variable.""" fs = TestOS() code = """ import os os.getenv('NONEXISTENT', 'my_default') """ m = pydantic_monty.Monty(code) result = m.run(os=fs) assert result == snapshot('my_default') # ============================================================================= # file_stat / dir_stat helper tests # ============================================================================= def test_file_stat_helper(): """file_stat() creates a proper stat result.""" stat = StatResult.file_stat(1024, 0o644, 1700000000.0) # Check it has the expected structure (10 fields) assert len(stat) == snapshot(10) # Index access: st_mode=0, st_size=6, st_mtime=8 assert stat[0] == snapshot(0o100644) # st_mode - file_stat adds file type bits assert stat[6] == snapshot(1024) # st_size assert stat[8] == snapshot(1700000000.0) # st_mtime def test_dir_stat_helper(): """dir_stat() creates a proper stat result for directories.""" stat = StatResult.dir_stat(0o755, 1700000000.0) assert len(stat) == snapshot(10) # Index access: st_mode=0, st_size=6, st_mtime=8 assert stat[0] == snapshot(0o040755) # st_mode - dir_stat adds directory type bits assert stat[6] == snapshot(4096) # st_size - directories have fixed size assert stat[8] == snapshot(1700000000.0) # st_mtime def test_path_monty_to_py(): m = pydantic_monty.Monty('from pathlib import Path; Path("/foo/bar/thing.txt")') result = m.run() assert result == PurePosixPath('/foo/bar/thing.txt') assert type(result) is PurePosixPath def test_path_py_to_monty(): p = PurePosixPath('/foo/bar/thing.txt') m = pydantic_monty.Monty('f"type={type(p)} {p=}"', inputs=['p']) result = m.run(inputs={'p': p}) assert result == snapshot("type= p=PosixPath('/foo/bar/thing.txt')") ================================================ FILE: crates/monty-python/tests/test_os_calls.py ================================================ """Tests for OS function calls (Path methods) via the start/resume API. These tests verify that Path filesystem methods correctly yield OS calls with the right function name and arguments, and that return values from the host are properly converted and used by Monty code. """ from pathlib import PurePosixPath from typing import Any import pytest from inline_snapshot import snapshot import pydantic_monty from pydantic_monty import StatResult # ============================================================================= # Basic OS call yielding # ============================================================================= def test_path_exists_yields_oscall(): """Path.exists() yields an OS call with correct function and path.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/test.txt").exists()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.exists') assert result.args == snapshot((PurePosixPath('/tmp/test.txt'),)) assert result.kwargs == snapshot({}) def test_path_stat_yields_oscall(): """Path.stat() yields an OS call.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/etc/passwd").stat()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.stat') assert result.args == snapshot((PurePosixPath('/etc/passwd'),)) def test_path_read_text_yields_oscall(): """Path.read_text() yields an OS call.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/hello.txt").read_text()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.read_text') assert result.args == snapshot((PurePosixPath('/tmp/hello.txt'),)) # ============================================================================= # Path construction and concatenation # ============================================================================= def test_path_concatenation(): """Path concatenation with / operator produces correct path string.""" code = """ from pathlib import Path base = Path('/home') full = base / 'user' / 'documents' / 'file.txt' full.exists() """ m = pydantic_monty.Monty(code) result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.args == snapshot((PurePosixPath('/home/user/documents/file.txt'),)) # ============================================================================= # Resume with return values # ============================================================================= def test_exists_resume(): """Resuming exists() with bool returns it to Monty code.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/test.txt").exists()') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=True) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is True def test_read_text_resume(): """Resuming read_text() with string content returns it to Monty code.""" code = """ from pathlib import Path content = Path('/tmp/hello.txt').read_text() 'Content: ' + content """ m = pydantic_monty.Monty(code) snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value='Hello, World!') assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot('Content: Hello, World!') # ============================================================================= # stat() result round-trip (Python -> Monty -> Python) # ============================================================================= def test_stat_resume_and_use_in_monty(): """Resuming stat() with file_stat() allows Monty to access fields.""" code = """ from pathlib import Path info = Path('/tmp/file.txt').stat() (info.st_mode, info.st_size, info[6]) """ m = pydantic_monty.Monty(code) snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) assert snapshot_result.function_name == snapshot('Path.stat') # Resume with a file_stat result - Monty accesses multiple fields result = snapshot_result.resume(return_value=StatResult.file_stat(1024, 0o100_644, 1234567890.0)) assert isinstance(result, pydantic_monty.MontyComplete) # st_mode=0o100_644, st_size=1024, info[6]=st_size=1024 assert result.output == snapshot((0o100_644, 1024, 1024)) def test_stat_result_returned_from_monty(): """stat_result returned from Monty is accessible in Python.""" code = """ from pathlib import Path Path('/tmp/file.txt').stat() """ m = pydantic_monty.Monty(code) snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=StatResult.file_stat(2048, 0o100_755, 1700000000.0)) assert isinstance(result, pydantic_monty.MontyComplete) stat_result = result.output # Access attributes on the returned namedtuple assert stat_result.st_mode == snapshot(0o100_755) assert stat_result.st_size == snapshot(2048) assert stat_result.st_mtime == snapshot(1700000000.0) # Index access works too assert stat_result[0] == snapshot(0o100_755) # st_mode assert stat_result[6] == snapshot(2048) # st_size def test_stat_result(): """stat_result repr shows field names and values.""" code = """ from pathlib import Path Path('/tmp/file.txt').stat() """ m = pydantic_monty.Monty(code) snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=StatResult.file_stat(512, 0o644, 0.0)) assert isinstance(result, pydantic_monty.MontyComplete) assert repr(result.output) == snapshot( 'StatResult(st_mode=33188, st_ino=0, st_dev=0, st_nlink=1, st_uid=0, st_gid=0, st_size=512, st_atime=0.0, st_mtime=0.0, st_ctime=0.0)' ) # Should be a tuple subclass assert len(result.output) == 10 assert isinstance(result.output, tuple) # ============================================================================= # Multiple OS calls in sequence # ============================================================================= def test_multiple_path_calls(): """Multiple Path method calls yield multiple OS calls in sequence.""" code = """ from pathlib import Path p = Path('/tmp/test.txt') exists = p.exists() is_file = p.is_file() (exists, is_file) """ m = pydantic_monty.Monty(code) # First call: exists() result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.function_name == snapshot('Path.exists') # Resume exists() with True result = result.resume(return_value=True) assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.function_name == snapshot('Path.is_file') # Resume is_file() with True result = result.resume(return_value=True) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot((True, True)) def test_conditional_path_calls(): """Path calls inside conditionals work correctly.""" code = """ from pathlib import Path p = Path('/tmp/test.txt') if p.exists(): content = p.read_text() else: content = 'not found' content """ m = pydantic_monty.Monty(code) # First call: exists() result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.function_name == snapshot('Path.exists') # Resume exists() with True - should trigger read_text() result = result.resume(return_value=True) assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.function_name == snapshot('Path.read_text') # Resume read_text() with content result = result.resume(return_value='file contents') assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot('file contents') # ============================================================================= # OS call vs external function distinction # ============================================================================= def test_os_call_vs_external_function(): """OS calls have is_os_function=True, external functions have is_os_function=False.""" # OS call m1 = pydantic_monty.Monty('from pathlib import Path; Path("/tmp").exists()') result1 = m1.start() assert isinstance(result1, pydantic_monty.FunctionSnapshot) assert result1.is_os_function is True # External function m2 = pydantic_monty.Monty('my_func()') result2 = m2.start() assert isinstance(result2, pydantic_monty.FunctionSnapshot) assert result2.is_os_function is False # ============================================================================= # os in run() method # ============================================================================= def test_os_basic(): """os receives function name and args, return value is used.""" calls: list[Any] = [] def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> bool: calls.append((function_name, args)) return True m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/test.txt").exists()') result = m.run(os=os_handler) assert result is True assert calls == snapshot([('Path.exists', (PurePosixPath('/tmp/test.txt'),))]) def test_os_stat(): """os can return stat_result for Path.stat().""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'Path.stat': return StatResult.file_stat(1024, 0o644, 1700000000.0) return None code = """ from pathlib import Path info = Path('/tmp/file.txt').stat() (info.st_mode, info.st_size) """ m = pydantic_monty.Monty(code) result = m.run(os=os_handler) assert result == snapshot((0o100_644, 1024)) def test_os_multiple_calls(): """os is called for each OS operation.""" calls: list[Any] = [] def os_handler( function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None ) -> bool | str | None: calls.append(function_name) match function_name: case 'Path.exists': return True case 'Path.read_text': return 'file contents' case _: return None code = """ from pathlib import Path p = Path('/tmp/test.txt') if p.exists(): result = p.read_text() else: result = 'not found' result """ m = pydantic_monty.Monty(code) result = m.run(os=os_handler) assert result == snapshot('file contents') assert calls == snapshot(['Path.exists', 'Path.read_text']) def test_os_not_provided_error(): """Error is raised when OS call is made without os.""" import pytest m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp").exists()') # When no external functions and no os, run() takes the fast path # and OS calls raise NotImplementedError inside Monty with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run() assert str(exc_info.value) == snapshot( "NotImplementedError: OS function 'Path.exists' not implemented with standard execution" ) def test_os_not_provided_error_ext_func(): """Error is raised when OS call is made without os.""" import pytest m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp").exists()') # When no external functions and no os, run() takes the fast path # and OS calls raise NotImplementedError inside Monty with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(external_functions={'x': int}) assert str(exc_info.value) == snapshot("NotImplementedError: OS function 'Path.exists' not implemented") def test_not_callable(): """Raise NotImplementedError inside inside monty if so os""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/test.txt").exists()') with pytest.raises(TypeError, match="TypeError: 'int' object is not callable"): m.run(os=123) # type: ignore # ============================================================================= # os.getenv() tests # ============================================================================= def test_os_getenv_yields_oscall(): """os.getenv() yields an OS call with correct function and args.""" m = pydantic_monty.Monty('import os; os.getenv("HOME")') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('os.getenv') assert result.args == snapshot(('HOME', None)) def test_os_getenv_with_default_yields_oscall(): """os.getenv() with default yields an OS call with both args.""" m = pydantic_monty.Monty('import os; os.getenv("MISSING", "fallback")') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('os.getenv') assert result.args == snapshot(('MISSING', 'fallback')) def test_os_getenv_callback(): """os.getenv() with os works correctly.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> str | None: if function_name == 'os.getenv': key, default = args env = {'HOME': '/home/user', 'USER': 'testuser'} return env.get(key, default) return None m = pydantic_monty.Monty('import os; os.getenv("HOME")') result = m.run(os=os_handler) assert result == snapshot('/home/user') def test_os_getenv_callback_missing(): """os.getenv() returns None for missing env var when no default.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> str | None: if function_name == 'os.getenv': key, default = args env: dict[str, str] = {} return env.get(key, default) return None m = pydantic_monty.Monty('import os; os.getenv("NONEXISTENT")') result = m.run(os=os_handler) assert result is None def test_os_getenv_callback_with_default(): """os.getenv() uses default when env var is missing.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> str | None: if function_name == 'os.getenv': key, default = args env: dict[str, str] = {} return env.get(key, default) return None m = pydantic_monty.Monty('import os; os.getenv("NONEXISTENT", "default_value")') result = m.run(os=os_handler) assert result == snapshot('default_value') # ============================================================================= # os.environ tests # ============================================================================= def test_os_environ_yields_oscall(): """os.environ yields an OS call with correct function name.""" m = pydantic_monty.Monty('import os; os.environ') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('os.environ') assert result.args == snapshot(()) def test_os_environ_key_access(): """os.environ['KEY'] works correctly after getting environ dict.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'HOME': '/home/user', 'USER': 'testuser'} return None m = pydantic_monty.Monty("import os; os.environ['HOME']") result = m.run(os=os_handler) assert result == snapshot('/home/user') def test_os_environ_key_missing_raises(): """os.environ['MISSING'] raises KeyError.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {} return None m = pydantic_monty.Monty("import os; os.environ['MISSING']") with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(os=os_handler) assert str(exc_info.value) == snapshot('KeyError: MISSING') def test_os_environ_get_method(): """os.environ.get() works correctly.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'HOME': '/home/user'} return None m = pydantic_monty.Monty("import os; os.environ.get('HOME')") result = m.run(os=os_handler) assert result == snapshot('/home/user') def test_os_environ_get_with_default(): """os.environ.get() with default for missing key.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {} return None m = pydantic_monty.Monty("import os; os.environ.get('MISSING', 'default')") result = m.run(os=os_handler) assert result == snapshot('default') def test_os_environ_len(): """len(os.environ) returns correct count.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'A': '1', 'B': '2', 'C': '3'} return None m = pydantic_monty.Monty('import os; len(os.environ)') result = m.run(os=os_handler) assert result == snapshot(3) def test_os_environ_contains(): """'KEY' in os.environ works correctly.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'HOME': '/home/user'} return None m = pydantic_monty.Monty("import os; ('HOME' in os.environ, 'MISSING' in os.environ)") result = m.run(os=os_handler) assert result == snapshot((True, False)) def test_os_environ_keys(): """os.environ.keys() returns keys.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'HOME': '/home', 'USER': 'test'} return None m = pydantic_monty.Monty('import os; list(os.environ.keys())') result = m.run(os=os_handler) assert set(result) == snapshot({'HOME', 'USER'}) def test_os_environ_values(): """os.environ.values() returns values.""" def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: if function_name == 'os.environ': return {'A': '1', 'B': '2'} return None m = pydantic_monty.Monty('import os; list(os.environ.values())') result = m.run(os=os_handler) assert set(result) == snapshot({'1', '2'}) # ============================================================================= # Path write operations - write_text() # ============================================================================= def test_path_write_text_yields_oscall(): """Path.write_text() yields an OS call with correct function, path, and content.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/output.txt").write_text("hello world")') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.write_text') assert result.args == snapshot((PurePosixPath('/tmp/output.txt'), 'hello world')) def test_path_write_text_resume(): """Resuming write_text() with byte count returns it to Monty code.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/output.txt").write_text("hello")') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=5) # write_text returns number of bytes written assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(5) def test_path_write_text_callback(): """Path.write_text() with os callback works correctly.""" written_files: dict[str, str] = {} def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> int | None: if function_name == 'Path.write_text': path, content = args written_files[str(path)] = content return len(content.encode('utf-8')) return None m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/test.txt").write_text("test content")') result = m.run(os=os_handler) assert result == snapshot(12) assert written_files == snapshot({'/tmp/test.txt': 'test content'}) # ============================================================================= # Path write operations - write_bytes() # ============================================================================= def test_path_write_bytes_yields_oscall(): """Path.write_bytes() yields an OS call with correct function, path, and bytes.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/data.bin").write_bytes(b"\\x00\\x01\\x02")') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.write_bytes') assert result.args == snapshot((PurePosixPath('/tmp/data.bin'), b'\x00\x01\x02')) def test_path_write_bytes_resume(): """Resuming write_bytes() with byte count returns it to Monty code.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/data.bin").write_bytes(b"abc")') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=3) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(3) # ============================================================================= # Path write operations - mkdir() # ============================================================================= def test_path_mkdir_yields_oscall(): """Path.mkdir() yields an OS call with correct function and path.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/newdir").mkdir()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.mkdir') assert result.args == snapshot((PurePosixPath('/tmp/newdir'),)) def test_path_mkdir_with_parents_yields_oscall(): """Path.mkdir(parents=True) yields an OS call with kwargs.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/a/b/c").mkdir(parents=True)') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.mkdir') assert result.args == snapshot((PurePosixPath('/tmp/a/b/c'),)) assert result.kwargs == snapshot({'parents': True}) def test_path_mkdir_with_exist_ok_yields_oscall(): """Path.mkdir(exist_ok=True) yields an OS call with kwargs.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/existing").mkdir(exist_ok=True)') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.mkdir') assert result.kwargs == snapshot({'exist_ok': True}) def test_path_mkdir_with_both_kwargs(): """Path.mkdir(parents=True, exist_ok=True) yields an OS call with both kwargs.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/a/b").mkdir(parents=True, exist_ok=True)') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.kwargs == snapshot({'parents': True, 'exist_ok': True}) def test_path_mkdir_resume(): """Resuming mkdir() with None returns correctly.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/newdir").mkdir()') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is None # ============================================================================= # Path write operations - unlink() # ============================================================================= def test_path_unlink_yields_oscall(): """Path.unlink() yields an OS call with correct function and path.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/to_delete.txt").unlink()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.unlink') assert result.args == snapshot((PurePosixPath('/tmp/to_delete.txt'),)) def test_path_unlink_resume(): """Resuming unlink() with None returns correctly.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/file.txt").unlink()') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is None # ============================================================================= # Path write operations - rmdir() # ============================================================================= def test_path_rmdir_yields_oscall(): """Path.rmdir() yields an OS call with correct function and path.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/empty_dir").rmdir()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.rmdir') assert result.args == snapshot((PurePosixPath('/tmp/empty_dir'),)) def test_path_rmdir_resume(): """Resuming rmdir() with None returns correctly.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/dir").rmdir()') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) result = snapshot_result.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is None # ============================================================================= # Path write operations - rename() # ============================================================================= def test_path_rename_yields_oscall(): """Path.rename() yields an OS call with source and target paths.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/old.txt").rename(Path("/tmp/new.txt"))') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.is_os_function is True assert result.function_name == snapshot('Path.rename') assert result.args == snapshot((PurePosixPath('/tmp/old.txt'), PurePosixPath('/tmp/new.txt'))) def test_path_rename_resume(): """Resuming rename() returns the new path.""" m = pydantic_monty.Monty('from pathlib import Path; Path("/tmp/old.txt").rename(Path("/tmp/new.txt"))') snapshot_result = m.start() assert isinstance(snapshot_result, pydantic_monty.FunctionSnapshot) # rename() returns None (the new Path is constructed by Monty) result = snapshot_result.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is None # ============================================================================= # Write operations with os callback # ============================================================================= def test_write_operations_callback(): """Multiple write operations work with os callback.""" operations: list[tuple[str, tuple[Any, ...]]] = [] def os_handler(function_name: str, args: tuple[Any, ...], kwargs: dict[str, Any] | None = None) -> Any: operations.append((function_name, args)) match function_name: case 'Path.mkdir': return None case 'Path.write_text': return len(args[1].encode('utf-8')) case 'Path.exists': return True case 'Path.read_text': return 'file content' case _: return None code = """ from pathlib import Path Path('/tmp/mydir').mkdir() Path('/tmp/mydir/file.txt').write_text('hello') Path('/tmp/mydir/file.txt').read_text() """ m = pydantic_monty.Monty(code) result = m.run(os=os_handler) assert result == snapshot('file content') assert operations == snapshot( [ ('Path.mkdir', (PurePosixPath('/tmp/mydir'),)), ('Path.write_text', (PurePosixPath('/tmp/mydir/file.txt'), 'hello')), ('Path.read_text', (PurePosixPath('/tmp/mydir/file.txt'),)), ] ) ================================================ FILE: crates/monty-python/tests/test_print.py ================================================ from typing import Callable, Literal import pytest from inline_snapshot import snapshot import pydantic_monty PrintCallback = Callable[[Literal['stdout'], str], None] def make_print_collector() -> tuple[list[str], PrintCallback]: """Create a print callback that collects output into a list.""" output: list[str] = [] def callback(stream: Literal['stdout'], text: str) -> None: assert stream == 'stdout' output.append(text) return output, callback def test_print_basic() -> None: m = pydantic_monty.Monty('print("hello")') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('hello\n') def test_print_multiple() -> None: code = """ print("line 1") print("line 2") """ m = pydantic_monty.Monty(code) output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('line 1\nline 2\n') def test_print_with_values() -> None: m = pydantic_monty.Monty('print(1, 2, 3)') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('1 2 3\n') def test_print_with_sep() -> None: m = pydantic_monty.Monty('print(1, 2, 3, sep="-")') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('1-2-3\n') def test_print_with_end() -> None: m = pydantic_monty.Monty('print("hello", end="!")') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('hello!') def test_print_returns_none() -> None: m = pydantic_monty.Monty('print("test")') _, callback = make_print_collector() result = m.run(print_callback=callback) assert result is None def test_print_empty() -> None: m = pydantic_monty.Monty('print()') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('\n') def test_print_with_limits() -> None: """Verify print_callback works together with resource limits.""" m = pydantic_monty.Monty('print("with limits")') output, callback = make_print_collector() limits = pydantic_monty.ResourceLimits(max_duration_secs=5.0) m.run(print_callback=callback, limits=limits) assert ''.join(output) == snapshot('with limits\n') def test_print_with_inputs() -> None: """Verify print_callback works together with inputs.""" m = pydantic_monty.Monty('print(x)', inputs=['x']) output, callback = make_print_collector() m.run(inputs={'x': 42}, print_callback=callback) assert ''.join(output) == snapshot('42\n') def test_print_in_loop() -> None: code = """ for i in range(3): print(i) """ m = pydantic_monty.Monty(code) output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('0\n1\n2\n') def test_print_mixed_types() -> None: m = pydantic_monty.Monty('print(1, "hello", True, None)') output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('1 hello True None\n') def make_error_callback(error: Exception) -> PrintCallback: """Create a print callback that raises an exception.""" def callback(stream: Literal['stdout'], text: str) -> None: raise error return callback def test_print_callback_raises_value_error() -> None: """Test that ValueError raised in callback propagates correctly.""" m = pydantic_monty.Monty('print("hello")') callback = make_error_callback(ValueError('callback error')) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(print_callback=callback) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('callback error') def test_print_callback_raises_type_error() -> None: """Test that TypeError raised in callback propagates correctly.""" m = pydantic_monty.Monty('print("hello")') callback = make_error_callback(TypeError('wrong type')) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(print_callback=callback) inner = exc_info.value.exception() assert isinstance(inner, TypeError) assert inner.args[0] == snapshot('wrong type') def test_print_callback_raises_in_function() -> None: """Test exception from callback when print is called inside a function.""" code = """ def greet(name): print(f"Hello, {name}!") greet("World") """ m = pydantic_monty.Monty(code) callback = make_error_callback(RuntimeError('io error')) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(print_callback=callback) inner = exc_info.value.exception() assert isinstance(inner, RuntimeError) assert inner.args[0] == snapshot('io error') def test_print_callback_raises_in_nested_function() -> None: """Test exception from callback when print is called in nested functions.""" code = """ def outer(): def inner(): print("from inner") inner() outer() """ m = pydantic_monty.Monty(code) callback = make_error_callback(ValueError('nested error')) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(print_callback=callback) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('nested error') def test_print_callback_raises_in_loop() -> None: """Test exception from callback when print is called in a loop.""" code = """ for i in range(5): print(i) """ m = pydantic_monty.Monty(code) call_count = 0 def callback(stream: Literal['stdout'], text: str) -> None: nonlocal call_count call_count += 1 if call_count >= 3: raise ValueError('stopped at 3') with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(print_callback=callback) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('stopped at 3') assert call_count == snapshot(3) def test_map_print() -> None: """Test that print can be used inside map.""" code = """ list(map(print, [1, 2, 3])) """ m = pydantic_monty.Monty(code) output, callback = make_print_collector() m.run(print_callback=callback) assert ''.join(output) == snapshot('1\n2\n3\n') ================================================ FILE: crates/monty-python/tests/test_re.py ================================================ import re import sys import pytest from inline_snapshot import snapshot import pydantic_monty def test_re_module(): m = pydantic_monty.Monty('import re') output = m.run() assert output is None def test_re_compile(): code = """ import re pattern = re.compile(r'\\d+') matches = pattern.findall('There are 24 hours in a day and 365 days in a year.') """ m = pydantic_monty.Monty(code) output = m.run() assert output is None supported_flags = [ (['re.I', 're.IGNORECASE'], re.IGNORECASE), (['re.M', 're.MULTILINE'], re.MULTILINE), (['re.S', 're.DOTALL'], re.DOTALL), ] if sys.version_info >= (3, 11): supported_flags.append((['re.NOFLAG'], re.NOFLAG)) @pytest.mark.parametrize( 'flags,target', supported_flags, ids=str, ) def test_re_constant(flags: list[str], target: int): code = f'import re; ({",".join(flags)},)' m = pydantic_monty.Monty(code) output = m.run() assert all(map(lambda orig: orig == target, output)) def test_re_compile_repr(): code = r""" import re pattern = re.compile(r'\d+', re.IGNORECASE | re.DOTALL) pattern """ m = pydantic_monty.Monty(code) output = m.run() assert output == r"re.compile('\\d+', re.IGNORECASE|re.DOTALL)" def test_re_match_repr(): code = """ import re pattern = re.compile(r'\\d+') pattern.match('123abc') """ m = pydantic_monty.Monty(code) output = m.run() assert output == "" def test_re_match_groups(): code = """ import re pattern = re.compile(r'(\\d+)-(\\w+)') match = pattern.match('123-abc') match.groups() """ m = pydantic_monty.Monty(code) output = m.run() assert output == ('123', 'abc') def test_re_substitution(): code = """ import re pattern = re.compile(r'\\s+') result = pattern.sub('-', 'This is a test.') result """ m = pydantic_monty.Monty(code) output = m.run() assert output == 'This-is-a-test.' def test_re_error_handling(): code = """ import re try: pattern = re.compile(r'[') except Exception as e: error_message = str(e) error_message """ m = pydantic_monty.Monty(code) output = m.run() error = 'Parsing error at position 1: Invalid character class' assert error in output def test_re_resume(): code = """ import re pattern = re.compile(func()) matches = pattern.findall('Sample 123 text 456') dump(matches) """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('func') assert progress.args == snapshot(()) assert progress.kwargs == snapshot({}) progress2 = progress.resume(return_value='\\d+') assert isinstance(progress2, pydantic_monty.FunctionSnapshot) result = progress2.resume(return_value=['123', '456']) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(['123', '456']) def test_re_persistence(): code = """ import re pattern = re.compile(r'\\w+') dump() matches = pattern.findall('Test 123!') matches """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) result = progress2.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(['Test', '123']) def test_re_error_upcast(): code = """ import re re.compile(r'[') """ m = pydantic_monty.Monty(code) try: m.run() assert False, 'Expected an exception to be raised' except pydantic_monty.MontyRuntimeError as e: error_message = str(e) assert True, 'Expected an exception to be raised' if sys.version_info >= (3, 13): assert type(e.exception()) is re.PatternError else: assert type(e.exception()) is re.error assert 'Parsing error at position 1: Invalid character class' in error_message ================================================ FILE: crates/monty-python/tests/test_readme_examples.py ================================================ import pytest from pytest_examples import CodeExample, EvalExample, find_examples paths = ( 'crates/monty-python/README.md', 'README.md', ) @pytest.mark.parametrize('example', find_examples(*paths), ids=str) def test_readme_examples(example: CodeExample, eval_example: EvalExample): eval_example.set_config(target_version='py310', ruff_ignore=['FA102']) eval_example.lint(example) opt_test = example.prefix_settings().get('test', '') if opt_test.startswith('skip'): pytest.skip(opt_test[4:].lstrip(' -') or 'running code skipped') if eval_example.update_examples: eval_example.run_print_update(example) else: eval_example.run_print_check(example) ================================================ FILE: crates/monty-python/tests/test_repl.py ================================================ from typing import Callable, Literal import pytest from inline_snapshot import snapshot import pydantic_monty PrintCallback = Callable[[Literal['stdout'], str], None] def make_print_collector() -> tuple[list[str], PrintCallback]: """Create a print callback that collects output into a list.""" output: list[str] = [] def callback(stream: Literal['stdout'], text: str) -> None: assert stream == 'stdout' output.append(text) return output, callback # === Construction === def test_default_construction(): repl = pydantic_monty.MontyRepl() assert repl.script_name == snapshot('main.py') def test_custom_script_name(): repl = pydantic_monty.MontyRepl(script_name='test.py') assert repl.script_name == snapshot('test.py') def test_repr(): repl = pydantic_monty.MontyRepl(script_name='my_repl.py') assert repr(repl) == snapshot("MontyRepl(script_name='my_repl.py')") # === Basic feed_run behavior === def test_feed_run_expression_returns_value(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('1 + 2') == snapshot(3) def test_feed_run_assignment_returns_none(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('x = 42') == snapshot(None) def test_feed_run_empty_string_returns_none(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('') == snapshot(None) def test_feed_run_none_literal(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('None') is None # === State persistence across feeds === def test_variable_persists_across_feeds(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') assert repl.feed_run('x') == snapshot(10) def test_incremental_mutation(): repl = pydantic_monty.MontyRepl() repl.feed_run('counter = 0') repl.feed_run('counter = counter + 1') repl.feed_run('counter = counter + 1') assert repl.feed_run('counter') == snapshot(2) def test_multiple_variables(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') repl.feed_run('y = 20') assert repl.feed_run('x + y') == snapshot(30) def test_function_defined_then_called(): repl = pydantic_monty.MontyRepl() repl.feed_run('def double(n):\n return n * 2') assert repl.feed_run('double(21)') == snapshot(42) def test_function_uses_previously_defined_variable(): repl = pydantic_monty.MontyRepl() repl.feed_run('factor = 3') repl.feed_run('def multiply(n):\n return n * factor') assert repl.feed_run('multiply(7)') == snapshot(21) def test_list_mutation_persists(): repl = pydantic_monty.MontyRepl() repl.feed_run('items = [1, 2, 3]') repl.feed_run('items.append(4)') assert repl.feed_run('len(items)') == snapshot(4) assert repl.feed_run('items') == snapshot([1, 2, 3, 4]) def test_dict_mutation_persists(): repl = pydantic_monty.MontyRepl() repl.feed_run("data = {'a': 1}") repl.feed_run("data['b'] = 2") assert repl.feed_run('len(data)') == snapshot(2) assert repl.feed_run("data['b']") == snapshot(2) def test_variable_reassignment(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = "hello"') assert repl.feed_run('x') == snapshot('hello') repl.feed_run('x = 42') assert repl.feed_run('x') == snapshot(42) # === Multi-statement snippets === def test_multi_statement_snippet(): repl = pydantic_monty.MontyRepl() repl.feed_run('a = 1\nb = 2\nc = a + b') assert repl.feed_run('c') == snapshot(3) def test_loop_in_snippet(): repl = pydantic_monty.MontyRepl() repl.feed_run('total = 0\nfor i in range(5):\n total = total + i') assert repl.feed_run('total') == snapshot(10) def test_if_else_in_snippet(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') repl.feed_run('result = "big" if x > 5 else "small"') assert repl.feed_run('result') == snapshot('big') # === Return value types === @pytest.mark.parametrize( 'code,expected', [ ('42', 42), ('3.14', 3.14), ('"hello"', 'hello'), ('True', True), ('False', False), ('[1, 2, 3]', [1, 2, 3]), ('(1, 2, 3)', (1, 2, 3)), ("{'a': 1}", {'a': 1}), ], ids=['int', 'float', 'str', 'true', 'false', 'list', 'tuple', 'dict'], ) def test_feed_run_return_types(code: str, expected: object): repl = pydantic_monty.MontyRepl() assert repl.feed_run(code) == expected # === Error handling === def test_syntax_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontySyntaxError): repl.feed_run('def') def test_runtime_error_preserves_state(): """A runtime error should not destroy previously defined state.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 42') with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_run('1 / 0') # x should still be accessible after the error assert repl.feed_run('x') == snapshot(42) def test_name_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('undefined_var') inner = exc_info.value.exception() assert isinstance(inner, NameError) def test_type_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('"hello" + 1') inner = exc_info.value.exception() assert isinstance(inner, TypeError) def test_zero_division_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('1 / 0') inner = exc_info.value.exception() assert isinstance(inner, ZeroDivisionError) def test_index_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('[1, 2][10]') inner = exc_info.value.exception() assert isinstance(inner, IndexError) def test_key_error(): repl = pydantic_monty.MontyRepl() with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run("{'a': 1}['b']") inner = exc_info.value.exception() assert isinstance(inner, KeyError) def test_multiple_errors_dont_corrupt_state(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 1') with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_run('1 / 0') repl.feed_run('x = x + 1') with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_run('undefined_name') assert repl.feed_run('x') == snapshot(2) # === Print callback === def test_print_callback_on_feed(): repl = pydantic_monty.MontyRepl() output, callback = make_print_collector() repl.feed_run('print("hello")', print_callback=callback) assert ''.join(output) == snapshot('hello\n') def test_print_callback_across_feeds(): repl = pydantic_monty.MontyRepl() output, callback = make_print_collector() repl.feed_run('print("first")', print_callback=callback) repl.feed_run('print("second")', print_callback=callback) assert ''.join(output) == snapshot('first\nsecond\n') # === Resource limits === def test_construction_with_limits(): limits = pydantic_monty.ResourceLimits(max_duration_secs=5.0) repl = pydantic_monty.MontyRepl(limits=limits) assert repl.feed_run('1 + 1') == snapshot(2) def test_infinite_loop_with_limits(): limits = pydantic_monty.ResourceLimits(max_duration_secs=0.5) repl = pydantic_monty.MontyRepl(limits=limits) with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_run('while True:\n pass') # === Serialization === def test_dump_load_roundtrip(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 40') repl.feed_run('x = x + 1') serialized = repl.dump() assert isinstance(serialized, bytes) loaded = pydantic_monty.MontyRepl.load(serialized) assert loaded.feed_run('x + 1') == snapshot(42) def test_dump_load_preserves_functions(): repl = pydantic_monty.MontyRepl() repl.feed_run('def greet(name):\n return "hello " + name') loaded = pydantic_monty.MontyRepl.load(repl.dump()) assert loaded.feed_run('greet("world")') == snapshot('hello world') def test_dump_load_preserves_script_name(): repl = pydantic_monty.MontyRepl(script_name='custom.py') loaded = pydantic_monty.MontyRepl.load(repl.dump()) assert loaded.script_name == snapshot('custom.py') def test_load_with_print_callback(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 1') output, callback = make_print_collector() loaded = pydantic_monty.MontyRepl.load(repl.dump()) loaded.feed_run('print(x)', print_callback=callback) assert ''.join(output) == snapshot('1\n') def test_load_invalid_data(): with pytest.raises(ValueError): pydantic_monty.MontyRepl.load(b'invalid data') # === External functions === def test_external_function_basic(): def add(a: int, b: int) -> int: return a + b repl = pydantic_monty.MontyRepl() assert repl.feed_run('result = add(3, 4)', external_functions={'add': add}) == snapshot(None) assert repl.feed_run('result') == snapshot(7) def test_external_function_return_value(): def greet(name: str) -> str: return f'hello {name}' repl = pydantic_monty.MontyRepl() assert repl.feed_run('greet("world")', external_functions={'greet': greet}) == snapshot('hello world') def test_external_function_called_multiple_times(): call_count = 0 def counter(): nonlocal call_count call_count += 1 return call_count repl = pydantic_monty.MontyRepl() ext = {'counter': counter} assert repl.feed_run('counter()', external_functions=ext) == snapshot(1) assert repl.feed_run('counter()', external_functions=ext) == snapshot(2) assert call_count == 2 def test_external_function_persists_state_across_feeds(): def double(x: int) -> int: return x * 2 repl = pydantic_monty.MontyRepl() repl.feed_run('x = 5') assert repl.feed_run('double(x)', external_functions={'double': double}) == snapshot(10) def test_external_function_exception_becomes_runtime_error(): def fail(): raise ValueError('external failure') repl = pydantic_monty.MontyRepl() ext = {'fail': fail} with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('fail()', external_functions=ext) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert str(inner) == snapshot('external failure') def test_external_function_error_preserves_repl_state(): def fail(): raise ValueError('boom') repl = pydantic_monty.MontyRepl() repl.feed_run('x = 42') ext = {'fail': fail} with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_run('fail()', external_functions=ext) # REPL state should be preserved after error assert repl.feed_run('x') == snapshot(42) def test_external_function_undefined_raises_name_error(): """Calling a name that's not in external_functions raises NameError.""" repl = pydantic_monty.MontyRepl() ext = {'known': lambda: 1} with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: repl.feed_run('unknown()', external_functions=ext) inner = exc_info.value.exception() assert isinstance(inner, NameError) def test_external_function_with_print_callback(): output, callback = make_print_collector() repl = pydantic_monty.MontyRepl() ext = {'get_msg': lambda: 'from external'} repl.feed_run('x = get_msg()\nprint(x)', external_functions=ext, print_callback=callback) assert ''.join(output) == snapshot('from external\n') def test_external_function_with_kwargs(): def greet(name: str, greeting: str = 'hello') -> str: return f'{greeting} {name}' repl = pydantic_monty.MontyRepl() ext = {'greet': greet} assert repl.feed_run("greet('world', greeting='hi')", external_functions=ext) == snapshot('hi world') def test_feed_run_no_externals_with_os_preserves_repl_state(): """feed_run with os= but no external_functions= preserves REPL state when an external call is hit. When os= is provided, feed_run uses the feed_start_loop path. If a non-OS external function is called but external_functions was not provided, the loop must restore the REPL before returning the error. """ repl = pydantic_monty.MontyRepl() repl.feed_run('x = 42') # Provide os= to force the feed_start_loop path, but no external_functions def dummy_os(func: str, args: object, kwargs: object) -> None: pass with pytest.raises(RuntimeError, match='no external_functions provided'): repl.feed_run('unknown_func()', os=dummy_os) # REPL state must be preserved — previously this was lost assert repl.feed_run('x') == snapshot(42) # === Inputs === def test_inputs_basic(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('x + 1', inputs={'x': 10}) == snapshot(11) def test_inputs_used_in_same_snippet(): repl = pydantic_monty.MontyRepl() repl.feed_run('y = x + 1', inputs={'x': 42}) assert repl.feed_run('y') == snapshot(43) def test_inputs_multiple_values(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('a + b', inputs={'a': 3, 'b': 7}) == snapshot(10) def test_inputs_override_existing_variable(): repl = pydantic_monty.MontyRepl() repl.feed_run('x = 1') assert repl.feed_run('x', inputs={'x': 99}) == snapshot(99) def test_inputs_with_external_functions(): def double(n: int) -> int: return n * 2 repl = pydantic_monty.MontyRepl() assert repl.feed_run('double(x)', inputs={'x': 5}, external_functions={'double': double}) == snapshot(10) # === Tests for MontyRepl.feed_start() === def test_feed_start_no_external_calls(): """feed_start with no external calls returns MontyComplete directly.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('1 + 2') assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(3) # REPL should still be usable assert repl.feed_run('3 + 4') == snapshot(7) def test_feed_start_state_persists(): """feed_start preserves REPL state from prior feed_run calls.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') progress = repl.feed_start('x + 5') assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(15) def test_feed_start_external_function(): """feed_start yields FunctionSnapshot for external function calls.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('add(1, 2)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('add') assert progress.args == snapshot((1, 2)) progress = progress.resume(return_value=3) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(3) # REPL should still be usable after assert repl.feed_run('1 + 1') == snapshot(2) def test_feed_start_external_function_preserves_state(): """feed_start async result is accessible in subsequent feed_run calls.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('result = add(1, 2)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) progress = progress.resume(return_value=42) assert isinstance(progress, pydantic_monty.MontyComplete) assert repl.feed_run('result') == snapshot(42) def test_feed_start_multiple_external_calls(): """feed_start handles multiple sequential external calls.""" repl = pydantic_monty.MontyRepl() code = 'a = foo(1)\nb = bar(2)\na + b' progress = repl.feed_start(code) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('foo') progress = progress.resume(return_value=10) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('bar') progress = progress.resume(return_value=20) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(30) def test_feed_start_error_preserves_repl_state(): """REPL state is preserved when feed_start raises an error.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 42') with pytest.raises(pydantic_monty.MontyRuntimeError): repl.feed_start('1 / 0') # REPL should still be usable assert repl.feed_run('x') == snapshot(42) def test_feed_start_resume_error_preserves_repl_state(): """REPL state is preserved when resume raises a runtime error.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 99') progress = repl.feed_start('fail()') assert isinstance(progress, pydantic_monty.FunctionSnapshot) # Resume with an exception that isn't caught with pytest.raises(pydantic_monty.MontyRuntimeError): progress.resume(exception=ValueError('boom')) assert repl.feed_run('x') == snapshot(99) def test_feed_start_with_inputs(): """feed_start supports the inputs parameter.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('process(x)', inputs={'x': 5}) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('process') assert progress.args == snapshot((5,)) progress = progress.resume(return_value=25) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(25) def test_feed_start_with_print_callback(): """feed_start supports the print_callback parameter.""" output: list[tuple[str, str]] = [] def callback(stream: str, text: str) -> None: output.append((stream, text)) repl = pydantic_monty.MontyRepl() progress = repl.feed_start('print("hello")', print_callback=callback) assert isinstance(progress, pydantic_monty.MontyComplete) assert output == snapshot([('stdout', 'hello'), ('stdout', '\n')]) def test_feed_start_name_lookup(): """feed_start yields NameLookupSnapshot for bare name access.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('x = foo') assert isinstance(progress, pydantic_monty.NameLookupSnapshot) assert progress.variable_name == snapshot('foo') progress = progress.resume(value=42) assert isinstance(progress, pydantic_monty.MontyComplete) assert repl.feed_run('x') == snapshot(42) def test_feed_start_dump_load_repl_snapshot(): """FunctionSnapshot from feed_start can be serialized and deserialized with load_repl_snapshot.""" repl = pydantic_monty.MontyRepl() repl.feed_run('x = 10') progress = repl.feed_start('add(x, 2)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() loaded, loaded_repl = pydantic_monty.load_repl_snapshot(data) assert isinstance(loaded, pydantic_monty.FunctionSnapshot) assert loaded.function_name == snapshot('add') assert loaded.args == snapshot((10, 2)) # Resume the loaded snapshot result = loaded.resume(return_value=12) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(12) # REPL state is restored and usable assert loaded_repl.feed_run('x') == snapshot(10) def test_feed_start_dump_load_repl_snapshot_preserves_state(): """REPL state from before feed_start is preserved through dump/load.""" repl = pydantic_monty.MontyRepl() repl.feed_run('counter = 0') repl.feed_run('counter = counter + 1') repl.feed_run('counter = counter + 1') progress = repl.feed_start('result = fetch(counter)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.args == snapshot((2,)) data = progress.dump() loaded, loaded_repl = pydantic_monty.load_repl_snapshot(data) assert isinstance(loaded, pydantic_monty.FunctionSnapshot) result = loaded.resume(return_value='done') assert isinstance(result, pydantic_monty.MontyComplete) # Counter should still be 2, and result should be set assert loaded_repl.feed_run('counter') == snapshot(2) assert loaded_repl.feed_run('result') == snapshot('done') def test_feed_start_dump_load_repl_snapshot_name_lookup(): """NameLookupSnapshot from feed_start can be serialized and deserialized.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('x = foo') assert isinstance(progress, pydantic_monty.NameLookupSnapshot) assert progress.variable_name == snapshot('foo') data = progress.dump() loaded, loaded_repl = pydantic_monty.load_repl_snapshot(data) assert isinstance(loaded, pydantic_monty.NameLookupSnapshot) assert loaded.variable_name == snapshot('foo') result = loaded.resume(value=99) assert isinstance(result, pydantic_monty.MontyComplete) assert loaded_repl.feed_run('x') == snapshot(99) def test_feed_start_dump_load_repl_snapshot_multiple_calls(): """Multiple external calls with dump/load between each.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('a = foo(1)\nb = bar(2)\na + b') assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('foo') # Dump/load between first and second call data = progress.dump() loaded, _ = pydantic_monty.load_repl_snapshot(data) assert isinstance(loaded, pydantic_monty.FunctionSnapshot) progress2 = loaded.resume(return_value=10) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.function_name == snapshot('bar') # Dump/load between second call and completion data2 = progress2.dump() loaded2, _ = pydantic_monty.load_repl_snapshot(data2) assert isinstance(loaded2, pydantic_monty.FunctionSnapshot) result = loaded2.resume(return_value=20) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(30) def test_feed_start_dump_load_snapshot_errors_on_repl(): """load_snapshot rejects REPL snapshots — the wire formats are incompatible.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('fetch(1)') assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() # REPL snapshots use a different wire format, so load_snapshot fails on deserialization with pytest.raises(ValueError): pydantic_monty.load_snapshot(data) def test_feed_start_dump_load_repl_snapshot_with_print_callback(): """print_callback works on loaded REPL snapshots.""" output, callback = make_print_collector() repl = pydantic_monty.MontyRepl() progress = repl.feed_start('x = fetch()') assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() loaded, loaded_repl = pydantic_monty.load_repl_snapshot(data, print_callback=callback) assert isinstance(loaded, pydantic_monty.FunctionSnapshot) # Resume — the loaded snapshot should use the print callback for subsequent prints loaded.resume(return_value=42) loaded_repl.feed_run('print(x)', print_callback=callback) assert ''.join(output) == snapshot('42\n') def test_feed_start_dump_load_repl_snapshot_preserves_script_name(): """Script name is preserved through REPL snapshot dump/load.""" repl = pydantic_monty.MontyRepl(script_name='my_repl.py') progress = repl.feed_start('fetch()') assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() loaded, loaded_repl = pydantic_monty.load_repl_snapshot(data) assert loaded.script_name == snapshot('my_repl.py') assert loaded_repl.script_name == snapshot('my_repl.py') def test_non_repl_dump_load_with_load_snapshot(): """Non-REPL snapshots from Monty.start() work with load_snapshot.""" m = pydantic_monty.Monty('func(1, 2)') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() loaded = pydantic_monty.load_snapshot(data) assert isinstance(loaded, pydantic_monty.FunctionSnapshot) assert loaded.function_name == snapshot('func') assert loaded.args == snapshot((1, 2)) result = loaded.resume(return_value=100) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(100) def test_feed_start_dump_after_resume_fails(): """Cannot dump a REPL snapshot that has already been resumed.""" repl = pydantic_monty.MontyRepl() progress = repl.feed_start('fetch()') assert isinstance(progress, pydantic_monty.FunctionSnapshot) progress.resume(return_value=1) with pytest.raises(RuntimeError) as exc_info: progress.dump() assert exc_info.value.args[0] == snapshot('Cannot dump progress that has already been resumed') def test_inputs_various_types(): repl = pydantic_monty.MontyRepl() assert repl.feed_run('s', inputs={'s': 'hello'}) == snapshot('hello') assert repl.feed_run('n', inputs={'n': 42}) == snapshot(42) assert repl.feed_run('f', inputs={'f': 3.14}) == snapshot(3.14) assert repl.feed_run('b', inputs={'b': True}) == snapshot(True) assert repl.feed_run('lst', inputs={'lst': [1, 2]}) == snapshot([1, 2]) ================================================ FILE: crates/monty-python/tests/test_serialize.py ================================================ from dataclasses import dataclass, is_dataclass from typing import Any import pytest from inline_snapshot import snapshot import pydantic_monty def test_monty_dump_load_roundtrip(): m = pydantic_monty.Monty('x + 1', inputs=['x']) data = m.dump() assert isinstance(data, bytes) assert len(data) > 0 m2 = pydantic_monty.Monty.load(data) assert m2.run(inputs={'x': 41}) == snapshot(42) def test_monty_dump_load_preserves_script_name(): m = pydantic_monty.Monty('1', script_name='custom.py') data = m.dump() m2 = pydantic_monty.Monty.load(data) assert repr(m2) == snapshot("Monty(<1 line of code>, script_name='custom.py')") def test_monty_dump_load_preserves_inputs(): m = pydantic_monty.Monty('x + y', inputs=['x', 'y']) data = m.dump() m2 = pydantic_monty.Monty.load(data) assert m2.run(inputs={'x': 1, 'y': 2}) == snapshot(3) def test_monty_dump_load_preserves_external_functions(): m = pydantic_monty.Monty('func()') data = m.dump() m2 = pydantic_monty.Monty.load(data) result = m2.run(external_functions={'func': lambda: 42}) assert result == snapshot(42) def test_monty_load_invalid_data(): with pytest.raises(ValueError) as exc_info: pydantic_monty.Monty.load(b'invalid data') assert str(exc_info.value) == snapshot('Hit the end of buffer, expected more data') def test_progress_dump_load_roundtrip(): m = pydantic_monty.Monty('func(1, 2)') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() assert isinstance(data, bytes) assert len(data) > 0 progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.function_name == snapshot('func') assert progress2.args == snapshot((1, 2)) assert progress2.kwargs == snapshot({}) result = progress2.resume(return_value=100) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(100) def test_progress_dump_load_preserves_script_name(): m = pydantic_monty.Monty('func()', script_name='test.py') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.script_name == snapshot('test.py') def test_progress_dump_load_with_kwargs(): m = pydantic_monty.Monty('func(a=1, b="hello")') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.function_name == snapshot('func') assert progress2.args == snapshot(()) assert progress2.kwargs == snapshot({'a': 1, 'b': 'hello'}) def test_progress_dump_after_resume_fails(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) progress.resume(return_value=1) with pytest.raises(RuntimeError) as exc_info: progress.dump() assert exc_info.value.args[0] == snapshot('Cannot dump progress that has already been resumed') def test_progress_load_invalid_data(): with pytest.raises(ValueError): pydantic_monty.load_snapshot(b'invalid data') def test_progress_dump_load_multiple_calls(): m = pydantic_monty.Monty('a() + b()') # First call progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('a') # Dump and load the state data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) # Resume with first return value progress3 = progress2.resume(return_value=10) assert isinstance(progress3, pydantic_monty.FunctionSnapshot) assert progress3.function_name == snapshot('b') # Dump and load again data2 = progress3.dump() progress4 = pydantic_monty.load_snapshot(data2) assert isinstance(progress4, pydantic_monty.FunctionSnapshot) # Resume with second return value result = progress4.resume(return_value=5) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(15) def test_progress_load_with_print_callback(): output: list[tuple[str, str]] = [] def callback(stream: str, text: str) -> None: output.append((stream, text)) m = pydantic_monty.Monty('print("before"); func(); print("after")') progress = m.start(print_callback=callback) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert output == snapshot([('stdout', 'before'), ('stdout', '\n')]) # Dump and load with new callback data = progress.dump() output.clear() progress2 = pydantic_monty.load_snapshot(data, print_callback=callback) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) result = progress2.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert output == snapshot([('stdout', 'after'), ('stdout', '\n')]) def test_progress_load_without_print_callback(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) result = progress2.resume(return_value=42) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(42) @pytest.mark.parametrize( 'code,expected', [ ('1 + 1', 2), ('"hello"', 'hello'), ('[1, 2, 3]', [1, 2, 3]), ('{"a": 1}', {'a': 1}), ('True', True), ('None', None), ], ) def test_monty_dump_load_various_outputs(code: str, expected: Any): m = pydantic_monty.Monty(code) data = m.dump() m2 = pydantic_monty.Monty.load(data) assert m2.run() == expected def test_progress_dump_load_with_limits(): m = pydantic_monty.Monty('func()') limits = pydantic_monty.ResourceLimits(max_allocations=1000) progress = m.start(limits=limits) assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) result = progress2.resume(return_value=99) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(99) @dataclass class Person: name: str age: int def test_monty_load_dataclass(): m = pydantic_monty.Monty('x', inputs=['x']) data = m.dump() m2 = pydantic_monty.Monty.load(data) m2.register_dataclass(Person) result = m2.run(inputs={'x': Person(name='Alice', age=30)}) assert isinstance(result, Person) def test_progress_dump_load_dataclass(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) data = progress.dump() assert isinstance(data, bytes) assert len(data) > 0 progress2 = pydantic_monty.load_snapshot(data, dataclass_registry=[Person]) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.function_name == snapshot('func') assert progress2.args == snapshot(()) assert progress2.kwargs == snapshot({}) result = progress2.resume(return_value=Person(name='Alice', age=30)) assert isinstance(result, pydantic_monty.MontyComplete) assert isinstance(result.output, Person) assert result.output.name == snapshot('Alice') assert result.output.age == snapshot(30) def test_progress_dump_load_unknown_dataclass(): """When a snapshot containing a dataclass is loaded without registering the type, the result should be an UnknownDataclass with the correct attributes.""" m = pydantic_monty.Monty( 'external_call()\nx', inputs=['x'], ) progress = m.start(inputs={'x': Person(name='Bob', age=25)}) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('external_call') # Dump the snapshot (dataclass x is in the heap) data = progress.dump() # Load WITHOUT providing dataclass_registry — Person type is unknown progress2 = pydantic_monty.load_snapshot(data) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) # Resume execution — x is returned as UnknownDataclass result = progress2.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) output = result.output # Should NOT be a Person instance since the type wasn't registered assert not isinstance(output, Person) assert type(output).__name__ == snapshot('UnknownDataclass') # Attributes should still be accessible assert output.name == snapshot('Bob') assert output.age == snapshot(25) # Should be compatible with dataclasses module assert is_dataclass(output) # repr should indicate it's unknown assert repr(output) == snapshot("") ================================================ FILE: crates/monty-python/tests/test_start.py ================================================ from typing import Any import pytest from inline_snapshot import snapshot import pydantic_monty def test_start_no_external_functions_returns_complete(): m = pydantic_monty.Monty('1 + 2') result = m.start() assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(3) def test_start_with_external_function_returns_progress(): m = pydantic_monty.Monty('func()') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.script_name == snapshot('main.py') assert result.function_name == snapshot('func') assert result.args == snapshot(()) assert result.kwargs == snapshot({}) def test_start_custom_script_name(): m = pydantic_monty.Monty('func()', script_name='custom.py') result = m.start() assert isinstance(result, pydantic_monty.FunctionSnapshot) assert result.script_name == snapshot('custom.py') def test_start_progress_resume_returns_complete(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('func') assert progress.args == snapshot(()) assert progress.kwargs == snapshot({}) result = progress.resume(return_value=42) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(42) def test_start_progress_with_args(): m = pydantic_monty.Monty('func(1, 2, 3)') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('func') assert progress.args == snapshot((1, 2, 3)) assert progress.kwargs == snapshot({}) def test_start_progress_with_kwargs(): m = pydantic_monty.Monty('func(a=1, b="two")') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('func') assert progress.args == snapshot(()) assert progress.kwargs == snapshot({'a': 1, 'b': 'two'}) def test_start_progress_with_mixed_args_kwargs(): m = pydantic_monty.Monty('func(1, 2, x="hello", y=True)') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('func') assert progress.args == snapshot((1, 2)) assert progress.kwargs == snapshot({'x': 'hello', 'y': True}) def test_start_multiple_external_calls(): m = pydantic_monty.Monty('a() + b()') # First call progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('a') # Resume with first return value progress = progress.resume(return_value=10) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('b') # Resume with second return value result = progress.resume(return_value=5) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(15) def test_start_chain_of_external_calls(): m = pydantic_monty.Monty('c() + c() + c()') call_count = 0 progress = m.start() while isinstance(progress, pydantic_monty.FunctionSnapshot | pydantic_monty.FutureSnapshot): assert isinstance(progress, pydantic_monty.FunctionSnapshot), 'Expected FunctionSnapshot' assert progress.function_name == snapshot('c') call_count += 1 progress = progress.resume(return_value=call_count) assert isinstance(progress, pydantic_monty.MontyComplete) assert progress.output == snapshot(6) # 1 + 2 + 3 assert call_count == snapshot(3) def test_start_with_inputs(): m = pydantic_monty.Monty('process(x)', inputs=['x']) progress = m.start(inputs={'x': 100}) assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert progress.function_name == snapshot('process') assert progress.args == snapshot((100,)) def test_start_with_limits(): m = pydantic_monty.Monty('1 + 2') limits = pydantic_monty.ResourceLimits(max_allocations=1000) result = m.start(limits=limits) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(3) def test_start_with_print_callback(): output: list[tuple[str, str]] = [] def callback(stream: str, text: str) -> None: output.append((stream, text)) m = pydantic_monty.Monty('print("hello")') result = m.start(print_callback=callback) assert isinstance(result, pydantic_monty.MontyComplete) assert output == snapshot([('stdout', 'hello'), ('stdout', '\n')]) def test_start_resume_cannot_be_called_twice(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) # First resume succeeds progress.resume(return_value=1) # Second resume should fail with pytest.raises(RuntimeError) as exc_info: progress.resume(return_value=2) assert exc_info.value.args[0] == snapshot('Progress already resumed') def test_start_complex_return_value(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) result = progress.resume(return_value={'a': [1, 2, 3], 'b': {'nested': True}}) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot({'a': [1, 2, 3], 'b': {'nested': True}}) def test_start_resume_with_none(): m = pydantic_monty.Monty('func()') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) result = progress.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output is None def test_progress_repr(): m = pydantic_monty.Monty('func(1, x=2)') progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) assert repr(progress) == snapshot( "FunctionSnapshot(script_name='main.py', function_name='func', args=(1,), kwargs={'x': 2})" ) def test_complete_repr(): m = pydantic_monty.Monty('42') result = m.start() assert isinstance(result, pydantic_monty.MontyComplete) assert repr(result) == snapshot('MontyComplete(output=42)') def test_start_can_reuse_monty_instance(): m = pydantic_monty.Monty('func(x)', inputs=['x']) # First run progress1 = m.start(inputs={'x': 1}) assert isinstance(progress1, pydantic_monty.FunctionSnapshot) assert progress1.args == snapshot((1,)) result1 = progress1.resume(return_value=10) assert isinstance(result1, pydantic_monty.MontyComplete) assert result1.output == snapshot(10) # Second run with different input progress2 = m.start(inputs={'x': 2}) assert isinstance(progress2, pydantic_monty.FunctionSnapshot) assert progress2.args == snapshot((2,)) result2 = progress2.resume(return_value=20) assert isinstance(result2, pydantic_monty.MontyComplete) assert result2.output == snapshot(20) @pytest.mark.parametrize( 'code,expected', [ ('1', 1), ('"hello"', 'hello'), ('[1, 2, 3]', [1, 2, 3]), ('{"a": 1}', {'a': 1}), ('None', None), ('True', True), ], ) def test_start_returns_complete_for_various_types(code: str, expected: Any): m = pydantic_monty.Monty(code) result = m.start() assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == expected def test_start_progress_resume_with_exception_caught(): """Test that resuming with an exception is caught by try/except.""" code = """ try: result = external_func() except ValueError: caught = True caught """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) # Resume with an exception using keyword argument result = progress.resume(exception=ValueError('test error')) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(True) def test_start_progress_resume_exception_propagates_uncaught(): """Test that uncaught exceptions from resume() propagate to caller.""" code = 'external_func()' m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) # Resume with an exception that won't be caught - wrapped in MontyRuntimeError with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: progress.resume(exception=ValueError('uncaught error')) inner = exc_info.value.exception() assert isinstance(inner, ValueError) assert inner.args[0] == snapshot('uncaught error') def test_resume_none(): code = 'external_func()' m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) result = progress.resume(return_value=None) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(None) def test_invalid_resume_args(): """Test that resume() with no args returns None.""" code = 'external_func()' m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) # no args provided with pytest.raises(TypeError) as exc_info: progress.resume() # pyright: ignore[reportCallIssue] assert exc_info.value.args[0] == snapshot('resume() accepts either return_value or exception, not both') # Both arguments provided with pytest.raises(TypeError) as exc_info: progress.resume(return_value=42, exception=ValueError('error')) # pyright: ignore[reportCallIssue] assert exc_info.value.args[0] == snapshot('resume() accepts either return_value or exception, not both') # invalid kwarg provided with pytest.raises(TypeError) as exc_info: progress.resume(invalid_kwarg=42) # pyright: ignore[reportCallIssue] assert exc_info.value.args[0] == snapshot('resume() accepts either return_value or exception, not both') def test_start_progress_resume_exception_in_nested_try(): """Test exception handling in nested try/except blocks.""" code = """ outer_caught = False finally_ran = False try: try: external_func() except TypeError: pass # Won't catch ValueError finally: finally_ran = True except ValueError: outer_caught = True (outer_caught, finally_ran) """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) result = progress.resume(exception=ValueError('propagates to outer')) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot((True, True)) def test_name_lookup(): m = pydantic_monty.Monty('x = foo; x') p = m.start() assert isinstance(p, pydantic_monty.NameLookupSnapshot) p2 = p.resume(value=42) assert isinstance(p2, pydantic_monty.MontyComplete) assert p2.output == 42 def test_ext_function_alt_name(): """Test that a NameLookup can resolve to a function whose __name__ differs from the variable it was assigned to. The VM should yield a FunctionCall with the *function's* name (not the variable name).""" m = pydantic_monty.Monty('x = foobar; x()') p = m.start() assert isinstance(p, pydantic_monty.NameLookupSnapshot) def not_foobar(): return 42 p2 = p.resume(value=not_foobar) # The function is called via HeapData::ExtFunction, yielding a FunctionSnapshot assert isinstance(p2, pydantic_monty.FunctionSnapshot) assert p2.function_name == snapshot('not_foobar') assert p2.args == snapshot(()) assert p2.kwargs == snapshot({}) result = p2.resume(return_value=42) assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == snapshot(42) ================================================ FILE: crates/monty-python/tests/test_threading.py ================================================ import os import threading import time from functools import partial from typing import cast import pytest from inline_snapshot import snapshot import pydantic_monty # I don't see a way to run these tests reliably on CI since github actions only has one CPU # perhaps we could use ubuntu-24.04-arm once the repo is open source (it's currently not supported for private repos) # https://docs.github.com/en/actions/reference/runners/github-hosted-runners pytestmark = pytest.mark.skipif('CI' in os.environ, reason='on CI') def test_parallel_exec(): """Run code directly, run it in parallel, check that parallel execution not much slower.""" code = """ x = 0 for i in range(200_000): x += 1 x """ m = pydantic_monty.Monty(code) start = time.perf_counter() result = m.run() diff = time.perf_counter() - start assert result == 200_000 threads = [threading.Thread(target=m.run) for _ in range(4)] start = time.perf_counter() for t in threads: t.start() for t in threads: t.join() diff_parallel = time.perf_counter() - start # check that running the function in parallel 4 times is less than 1.5x slower than running it once time_multiple = diff_parallel / diff assert time_multiple < 1.5, 'Execution should not be slower in parallel' def test_parallel_exec_print(): """Run code directly, run it in parallel, check that parallel execution not much slower.""" code = """ x = 0 for i in range(200_000): x += 1 print(x) """ captured: list[str] = [] def print_callback(file: str, content: str): captured.append(f'{file}: {content}') m = pydantic_monty.Monty(code) start = time.perf_counter() result = m.run(print_callback=print_callback) diff = time.perf_counter() - start assert result is None assert captured == snapshot(['stdout: 200000', 'stdout: \n']) threads = [threading.Thread(target=partial(m.run, print_callback=print_callback)) for _ in range(4)] start = time.perf_counter() for t in threads: t.start() for t in threads: t.join() diff_parallel = time.perf_counter() - start # check that running the function in parallel 4 times is less than 1.5x slower than running it once time_multiple = diff_parallel / diff assert time_multiple < 1.5, 'Execution should not be slower in parallel' def double(a: int) -> int: return a * 2 def test_parallel_exec_ext_functions(): """Run code directly, run it in parallel, check that parallel execution not much slower.""" code = """ x = 0 for i in range(100_000): x += 1 x = double(x) for i in range(100_000): x += 1 x """ m = pydantic_monty.Monty(code) start = time.perf_counter() result = m.run(external_functions={'double': double}) diff = time.perf_counter() - start assert result == 300_000 threads = [threading.Thread(target=partial(m.run, external_functions={'double': double})) for _ in range(4)] start = time.perf_counter() for t in threads: t.start() for t in threads: t.join() diff_parallel = time.perf_counter() - start # check that running the function in parallel 4 times is less than 1.5x slower than running it once time_multiple = diff_parallel / diff assert time_multiple < 1.5, 'Execution should not be slower in parallel' def test_parallel_exec_start(): """Run code directly, run it in parallel, check that parallel execution not much slower.""" code = """ x = 0 for i in range(200_000): x += 1 double(x) """ m = pydantic_monty.Monty(code) start = time.perf_counter() progress = m.start() diff = time.perf_counter() - start assert isinstance(progress, pydantic_monty.FunctionSnapshot) threads = [threading.Thread(target=m.start) for _ in range(4)] start = time.perf_counter() for t in threads: t.start() for t in threads: t.join() diff_parallel = time.perf_counter() - start # check that running the function in parallel 4 times is less than 1.5x slower than running it once time_multiple = diff_parallel / diff assert time_multiple < 1.5, 'Execution should not be slower in parallel' def test_parallel_exec_start_resume(): """Run code directly, run it in parallel, check that parallel execution not much slower.""" code = """ x = double(1) for i in range(200_000): x += 1 x """ m = pydantic_monty.Monty(code) progress = m.start() assert isinstance(progress, pydantic_monty.FunctionSnapshot) start = time.perf_counter() result = progress.resume(return_value=2) diff = time.perf_counter() - start assert isinstance(result, pydantic_monty.MontyComplete) assert result.output == 200_002 progresses = cast(list[pydantic_monty.FunctionSnapshot], [m.start() for _ in range(4)]) threads = [threading.Thread(target=partial(p.resume, return_value=2)) for p in progresses] start = time.perf_counter() for t in threads: t.start() for t in threads: t.join() diff_parallel = time.perf_counter() - start # check that running the function in parallel 4 times is less than 1.5x slower than running it once time_multiple = diff_parallel / diff assert time_multiple < 1.5, 'Execution should not be slower in parallel' ================================================ FILE: crates/monty-python/tests/test_type_check.py ================================================ import pytest from inline_snapshot import snapshot import pydantic_monty def test_type_check_no_errors(): """Type checking code with no errors returns None.""" m = pydantic_monty.Monty('x = 1') assert m.type_check() is None def test_type_check_with_errors(): """Type checking code with type errors raises MontyTypingError.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() assert str(exc_info.value) == snapshot("""\ error[unsupported-operator]: Unsupported `+` operation --> main.py:1:1 | 1 | "hello" + 1 | -------^^^- | | | | | Has type `Literal[1]` | Has type `Literal["hello"]` | info: rule `unsupported-operator` is enabled by default """) def test_type_check_function_return_type(): """Type checking detects mismatched return types.""" code = """ def foo() -> int: return "not an int" """ m = pydantic_monty.Monty(code) with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() assert str(exc_info.value) == snapshot("""\ error[invalid-return-type]: Return type does not match returned value --> main.py:2:14 | 2 | def foo() -> int: | --- Expected `int` because of return type 3 | return "not an int" | ^^^^^^^^^^^^ expected `int`, found `Literal["not an int"]` | info: rule `invalid-return-type` is enabled by default """) def test_type_check_undefined_variable(): """Type checking detects undefined variables.""" m = pydantic_monty.Monty('print(undefined_var)') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() assert str(exc_info.value) == snapshot("""\ error[unresolved-reference]: Name `undefined_var` used when not defined --> main.py:1:7 | 1 | print(undefined_var) | ^^^^^^^^^^^^^ | info: rule `unresolved-reference` is enabled by default """) def test_type_check_valid_function(): """Type checking valid function returns None.""" code = """ def add(a: int, b: int) -> int: return a + b add(1, 2) """ m = pydantic_monty.Monty(code) assert m.type_check() is None def test_type_check_with_prefix_code(): """Type checking with prefix code for input declarations.""" m = pydantic_monty.Monty('result = x + 1') # Without prefix, x is undefined with pytest.raises(pydantic_monty.MontyTypingError): m.type_check() # With prefix declaring x as a variable, it should pass assert m.type_check(prefix_code='x = 0') is None def test_type_check_display_invalid_format(): """Invalid format string on display() raises ValueError.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() with pytest.raises(ValueError) as val_exc: exc_info.value.display('invalid_format') # pyright: ignore[reportArgumentType] assert str(val_exc.value) == snapshot('Unknown format: invalid_format') def test_type_check_display_concise_format(): """Type checking with concise format via display().""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() assert exc_info.value.display('concise') == snapshot( 'main.py:1:1: error[unsupported-operator] Operator `+` is not supported between objects of type `Literal["hello"]` and `Literal[1]`\n' ) # === MontyTypingError tests === def test_monty_typing_error_is_monty_error_subclass(): """MontyTypingError is a subclass of MontyError.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() error = exc_info.value assert isinstance(error, pydantic_monty.MontyError) assert isinstance(error, Exception) def test_monty_typing_error_repr(): """MontyTypingError has proper repr with truncation.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() # repr truncates at 50 chars assert repr(exc_info.value) == snapshot("""\ MontyTypingError(error[unsupported-operator]: Unsupported `+` operation --> main.py:1:1 | 1 | "hello" + 1 | -------^^^- | | | | | Has type `Literal[1]` | Has type `Literal["hello"]` | info: rule `unsupported-operator` is enabled by default )\ """) def test_monty_typing_error_caught_as_monty_error(): """MontyTypingError can be caught as MontyError.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyError): m.type_check() def test_monty_typing_error_display_default(): """MontyTypingError display() defaults to full format.""" m = pydantic_monty.Monty('"hello" + 1') with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: m.type_check() # Default display should match str() assert exc_info.value.display() == str(exc_info.value) # === Constructor type_check parameter tests === def test_constructor_type_check_default_false(): """Type checking is disabled by default in constructor.""" # This should NOT raise during construction (type_check=False is default) m = pydantic_monty.Monty('"hello" + 1') # But we can still call type_check() manually later with pytest.raises(pydantic_monty.MontyTypingError): m.type_check() def test_constructor_type_check_explicit_true(): """Explicit type_check=True raises on type errors.""" with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: pydantic_monty.Monty('"hello" + 1', type_check=True) assert str(exc_info.value) == snapshot("""\ error[unsupported-operator]: Unsupported `+` operation --> main.py:1:1 | 1 | "hello" + 1 | -------^^^- | | | | | Has type `Literal[1]` | Has type `Literal["hello"]` | info: rule `unsupported-operator` is enabled by default """) def test_constructor_type_check_explicit_false(): """Explicit type_check=False skips type checking during construction.""" # This should NOT raise during construction m = pydantic_monty.Monty('"hello" + 1', type_check=False) # But we can still call type_check() manually later with pytest.raises(pydantic_monty.MontyTypingError): m.type_check() def test_constructor_default_allows_run_with_inputs(): """Default (type_check=False) allows running code that would fail type checking.""" # Code with undefined variable - type checking would fail m = pydantic_monty.Monty('x + 1', inputs=['x']) # But runtime works fine with the input provided result = m.run(inputs={'x': 5}) assert result == 6 def test_constructor_type_check_stubs(): """type_check_stubs provides declarations for type checking.""" # Without prefix, this would fail type checking (x is undefined) # Use assignment to define x, not just type annotation m = pydantic_monty.Monty('result = x + 1', type_check=True, type_check_stubs='x = 0') # Should construct successfully because prefix declares x assert m is not None def test_constructor_type_check_stubs_with_external_function(): """type_check_stubs can declare external function signatures.""" # Define fetch as a function that takes a string and returns a string prefix = """ def fetch(url: str) -> str: return '' """ m = pydantic_monty.Monty( 'result = fetch("https://example.com")', type_check=True, type_check_stubs=prefix, ) assert m is not None def test_constructor_type_check_stubs_invalid(): """type_check_stubs with wrong types still catches errors.""" # Prefix defines x as str, but code tries to use it with int addition with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: pydantic_monty.Monty( 'result: int = x + 1', type_check=True, type_check_stubs='x = "hello"', ) # Should fail because str + int is invalid assert str(exc_info.value) == snapshot("""\ error[unsupported-operator]: Unsupported `+` operation --> main.py:1:15 | 1 | result: int = x + 1 | -^^^- | | | | | Has type `Literal[1]` | Has type `Literal["hello"]` | info: rule `unsupported-operator` is enabled by default """) def test_inject_stubs_offset(): type_definitions = """\ from typing import Any Messages = list[dict[str, Any]] async def call_llm(prompt: str, messages: Messages) -> str | Messages: ... prompt: str = '' """ code = """\ async def agent(prompt: str, messages: Messages): while True: print(f'messages so far: {messages}') output = await call_llm(prompt, messages) if isinstance(output, str): return output messages.extend(output) await agent(prompt, []) """ pydantic_monty.Monty( code, inputs=['prompt'], script_name='agent.py', type_check=True, type_check_stubs=type_definitions, ) with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: pydantic_monty.Monty( code.replace('Messages', 'MXessages'), inputs=['prompt'], script_name='agent.py', type_check=True, type_check_stubs=type_definitions, ) assert str(exc_info.value) == snapshot("""\ error[unresolved-reference]: Name `MXessages` used when not defined --> agent.py:1:40 | 1 | async def agent(prompt: str, messages: MXessages): | ^^^^^^^^^ 2 | while True: 3 | print(f'messages so far: {messages}') | info: rule `unresolved-reference` is enabled by default """) code_call_func_wrong = 'await call_llm(prompt, 42)' with pytest.raises(pydantic_monty.MontyTypingError) as exc_info: pydantic_monty.Monty( code_call_func_wrong, inputs=['prompt'], script_name='agent.py', type_check=True, type_check_stubs=type_definitions, ) assert str(exc_info.value) == snapshot("""\ error[invalid-argument-type]: Argument to function `call_llm` is incorrect --> agent.py:1:24 | 1 | await call_llm(prompt, 42) | ^^ Expected `list[dict[str, Any]]`, found `Literal[42]` | info: Function defined here --> type_stubs.pyi:5:11 | 3 | Messages = list[dict[str, Any]] 4 | 5 | async def call_llm(prompt: str, messages: Messages) -> str | Messages: | ^^^^^^^^ ------------------ Parameter declared here 6 | ... | info: rule `invalid-argument-type` is enabled by default """) ================================================ FILE: crates/monty-python/tests/test_types.py ================================================ import pytest from inline_snapshot import snapshot import pydantic_monty def test_none_input(): m = pydantic_monty.Monty('x is None', inputs=['x']) assert m.run(inputs={'x': None}) is True def test_none_output(): m = pydantic_monty.Monty('None') assert m.run() is None def test_bool_true(): m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': True}) assert result is True assert type(result) is bool def test_bool_false(): m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': False}) assert result is False assert type(result) is bool def test_int(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': 42}) == snapshot(42) assert m.run(inputs={'x': -100}) == snapshot(-100) assert m.run(inputs={'x': 0}) == snapshot(0) def test_float(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': 3.14}) == snapshot(3.14) assert m.run(inputs={'x': -2.5}) == snapshot(-2.5) assert m.run(inputs={'x': 0.0}) == snapshot(0.0) def test_string(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': 'hello'}) == snapshot('hello') assert m.run(inputs={'x': ''}) == snapshot('') assert m.run(inputs={'x': 'unicode: éè'}) == snapshot('unicode: éè') def test_bytes(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': b'hello'}) == snapshot(b'hello') assert m.run(inputs={'x': b''}) == snapshot(b'') assert m.run(inputs={'x': b'\x00\x01\x02'}) == snapshot(b'\x00\x01\x02') def test_list(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': [1, 2, 3]}) == snapshot([1, 2, 3]) assert m.run(inputs={'x': []}) == snapshot([]) assert m.run(inputs={'x': ['a', 'b']}) == snapshot(['a', 'b']) def test_tuple(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': (1, 2, 3)}) == snapshot((1, 2, 3)) assert m.run(inputs={'x': ()}) == snapshot(()) assert m.run(inputs={'x': ('a',)}) == snapshot(('a',)) def test_dict(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': {'a': 1, 'b': 2}}) == snapshot({'a': 1, 'b': 2}) assert m.run(inputs={'x': {}}) == snapshot({}) def test_set(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': {1, 2, 3}}) == snapshot({1, 2, 3}) assert m.run(inputs={'x': set()}) == snapshot(set()) def test_frozenset(): m = pydantic_monty.Monty('x', inputs=['x']) assert m.run(inputs={'x': frozenset([1, 2, 3])}) == snapshot(frozenset({1, 2, 3})) assert m.run(inputs={'x': frozenset()}) == snapshot(frozenset()) def test_ellipsis_input(): m = pydantic_monty.Monty('x is ...', inputs=['x']) assert m.run(inputs={'x': ...}) is True def test_ellipsis_output(): m = pydantic_monty.Monty('...') assert m.run() is ... def test_nested_list(): m = pydantic_monty.Monty('x', inputs=['x']) nested = [[1, 2], [3, [4, 5]]] assert m.run(inputs={'x': nested}) == snapshot([[1, 2], [3, [4, 5]]]) def test_nested_dict(): m = pydantic_monty.Monty('x', inputs=['x']) nested = {'a': {'b': {'c': 1}}} assert m.run(inputs={'x': nested}) == snapshot({'a': {'b': {'c': 1}}}) def test_mixed_nested(): m = pydantic_monty.Monty('x', inputs=['x']) mixed = {'list': [1, 2], 'tuple': (3, 4), 'nested': {'set': {5, 6}}} result = m.run(inputs={'x': mixed}) assert result['list'] == snapshot([1, 2]) assert result['tuple'] == snapshot((3, 4)) assert result['nested']['set'] == snapshot({5, 6}) def test_list_output(): m = pydantic_monty.Monty('[1, 2, 3]') assert m.run() == snapshot([1, 2, 3]) def test_dict_output(): m = pydantic_monty.Monty("{'a': 1, 'b': 2}") assert m.run() == snapshot({'a': 1, 'b': 2}) def test_tuple_output(): m = pydantic_monty.Monty('(1, 2, 3)') assert m.run() == snapshot((1, 2, 3)) def test_set_output(): m = pydantic_monty.Monty('{1, 2, 3}') assert m.run() == snapshot({1, 2, 3}) # === Exception types === def test_exception_input(): m = pydantic_monty.Monty('x', inputs=['x']) exc = ValueError('test error') result = m.run(inputs={'x': exc}) assert isinstance(result, ValueError) assert str(result) == snapshot('test error') def test_exception_output(): m = pydantic_monty.Monty('ValueError("created")') result = m.run() assert isinstance(result, ValueError) assert str(result) == snapshot('created') @pytest.mark.parametrize('exc_class', [ValueError, TypeError, RuntimeError, AttributeError], ids=repr) def test_exception_roundtrip(exc_class: type[Exception]): m = pydantic_monty.Monty('x', inputs=['x']) exc = exc_class('message') result = m.run(inputs={'x': exc}) assert type(result) is exc_class assert str(result) == snapshot('message') def test_exception_subclass_input(): """Custom exception subtypes are converted to their nearest supported base.""" class MyError(ValueError): pass m = pydantic_monty.Monty('x', inputs=['x']) exc = MyError('custom') result = m.run(inputs={'x': exc}) # Custom exception becomes ValueError (nearest supported type) assert type(result) is ValueError assert str(result) == snapshot('custom') # === Subtype coercion === # Monty converts Python subclasses to their base types since it doesn't # have Python's class system. def test_int_subclass_input(): class MyInt(int): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MyInt(42)}) assert type(result) is int assert result == snapshot(42) def test_str_subclass_input(): class MyStr(str): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MyStr('hello')}) assert type(result) is str assert result == snapshot('hello') def test_list_subclass_input(): class MyList(list[int]): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MyList([1, 2, 3])}) assert type(result) is list assert result == snapshot([1, 2, 3]) def test_dict_subclass_input(): class MyDict(dict[str, int]): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MyDict({'a': 1})}) assert type(result) is dict assert result == snapshot({'a': 1}) def test_tuple_subclass_input(): class MyTuple(tuple[int, ...]): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MyTuple((1, 2))}) assert type(result) is tuple assert result == snapshot((1, 2)) def test_set_subclass_input(): class MySet(set[int]): pass m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': MySet({1, 2})}) assert type(result) is set assert result == snapshot({1, 2}) def test_bool_preserves_type(): """Bool is a subclass of int but should be preserved as bool.""" m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': True}) assert type(result) is bool assert result is True def test_return_int(): m = pydantic_monty.Monty('x = 4\ntype(x)') result = m.run() assert result is int m = pydantic_monty.Monty('int') result = m.run() assert result is int def test_return_exception(): m = pydantic_monty.Monty('x = ValueError()\ntype(x)') result = m.run() assert result is ValueError m = pydantic_monty.Monty('ValueError') result = m.run() assert result is ValueError def test_return_builtin(): m = pydantic_monty.Monty('len') result = m.run() assert result is len # === BigInt (arbitrary precision integers) === def test_bigint_input(): """Passing a large integer (> i64::MAX) as input.""" big = 2**100 m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': big}) assert result == big assert type(result) is int def test_bigint_output(): """Returning a large integer computed inside Monty.""" m = pydantic_monty.Monty('2**100') result = m.run() assert result == 2**100 assert type(result) is int def test_bigint_negative_input(): """Passing a large negative integer as input.""" big_neg = -(2**100) m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': big_neg}) assert result == big_neg assert type(result) is int def test_int_overflow_to_bigint(): """Small int input that overflows to bigint during computation.""" max_i64 = 9223372036854775807 m = pydantic_monty.Monty('x + 1', inputs=['x']) result = m.run(inputs={'x': max_i64}) assert result == max_i64 + 1 assert type(result) is int def test_bigint_arithmetic(): """BigInt arithmetic operations.""" big = 2**100 m = pydantic_monty.Monty('x * 2 + y', inputs=['x', 'y']) result = m.run(inputs={'x': big, 'y': big}) assert result == big * 2 + big assert type(result) is int def test_bigint_comparison(): """Comparing bigints with regular ints.""" big = 2**100 m = pydantic_monty.Monty('x > y', inputs=['x', 'y']) assert m.run(inputs={'x': big, 'y': 42}) is True assert m.run(inputs={'x': 42, 'y': big}) is False def test_bigint_in_collection(): """BigInts inside collections.""" big = 2**100 m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': [big, 42, big * 2]}) assert result == [big, 42, big * 2] assert type(result[0]) is int def test_bigint_as_dict_key(): """BigInt as dictionary key.""" big = 2**100 m = pydantic_monty.Monty('x', inputs=['x']) result = m.run(inputs={'x': {big: 'value'}}) assert result == {big: 'value'} assert big in result def test_bigint_hash_consistency_small_values(): """Hash of small values computed as BigInt must match regular int hash. This is critical for dict key lookups: inserting with int and looking up with a computed BigInt (or vice versa) must work correctly. """ # Value 42 computed via BigInt arithmetic big = 2**100 m = pydantic_monty.Monty('(x - x) + 42', inputs=['x']) computed_42 = m.run(inputs={'x': big}) # Hash must match assert hash(computed_42) == hash(42), 'hash of computed int must match literal' # Dict lookup must work both ways d = {42: 'value'} assert d[computed_42] == 'value', 'lookup with computed bigint finds int key' d2 = {computed_42: 'value'} assert d2[42] == 'value', 'lookup with int finds computed bigint key' def test_bigint_hash_consistency_boundary(): """Hash consistency at i64 boundary values.""" max_i64 = 9223372036854775807 # Compute MAX_I64 via BigInt arithmetic m = pydantic_monty.Monty('(x - 1)', inputs=['x']) computed_max = m.run(inputs={'x': max_i64 + 1}) assert hash(computed_max) == hash(max_i64), 'hash at MAX_I64 boundary must match' def test_bigint_hash_consistency_large_values(): """Equal large BigInts must hash the same.""" big1 = 2**100 big2 = 2**100 # Verify they hash the same in Python first assert hash(big1) == hash(big2), 'precondition: equal bigints hash same in Python' # Verify hashes match after round-trip through Monty m = pydantic_monty.Monty('x', inputs=['x']) result1 = m.run(inputs={'x': big1}) result2 = m.run(inputs={'x': big2}) assert hash(result1) == hash(result2), 'equal bigints from Monty must hash same' # Dict lookup must work d = {result1: 'value'} assert d[result2] == 'value', 'lookup with equal bigint works' # === NamedTuple output === def test_namedtuple_sys_version_info(): """sys.version_info returns a proper namedtuple with attribute access.""" m = pydantic_monty.Monty('import sys; sys.version_info') result = m.run() # Should have named attribute access assert hasattr(result, 'major') assert hasattr(result, 'minor') assert hasattr(result, 'micro') assert hasattr(result, 'releaselevel') assert hasattr(result, 'serial') # Values should match Monty's Python version (3.14) assert result.major == snapshot(3) assert result.minor == snapshot(14) assert result.micro == snapshot(0) assert result.releaselevel == snapshot('final') assert result.serial == snapshot(0) def test_namedtuple_sys_version_info_index_access(): """sys.version_info supports both index and attribute access.""" m = pydantic_monty.Monty('import sys; sys.version_info') result = m.run() # Index access should work assert result[0] == result.major assert result[1] == result.minor assert result[2] == result.micro def test_namedtuple_sys_version_info_tuple_comparison(): """sys.version_info can be compared to tuples.""" m = pydantic_monty.Monty('import sys; (sys.version_info.major, sys.version_info.minor, sys.version_info.micro)') result = m.run() assert result == snapshot((3, 14, 0)) # === User-defined NamedTuple input === def test_namedtuple_custom_input_attribute_access(): """User-defined NamedTuple with custom field names can be accessed by attribute.""" from typing import NamedTuple class Person(NamedTuple): name: str age: int m = pydantic_monty.Monty('p.name', inputs=['p']) assert m.run(inputs={'p': Person(name='Alice', age=30)}) == snapshot('Alice') m = pydantic_monty.Monty('p.age', inputs=['p']) assert m.run(inputs={'p': Person(name='Alice', age=30)}) == snapshot(30) def test_namedtuple_custom_input_index_access(): """User-defined NamedTuple supports both attribute and index access.""" from typing import NamedTuple class Point(NamedTuple): x: int y: int m = pydantic_monty.Monty('p[0] + p[1]', inputs=['p']) assert m.run(inputs={'p': Point(x=10, y=20)}) == snapshot(30) def test_namedtuple_custom_input_multiple_fields(): """NamedTuple with multiple custom field names works correctly.""" from typing import NamedTuple class Config(NamedTuple): host: str port: int debug: bool timeout: float m = pydantic_monty.Monty("f'{c.host}:{c.port}'", inputs=['c']) result = m.run(inputs={'c': Config(host='localhost', port=8080, debug=True, timeout=30.0)}) assert result == snapshot('localhost:8080') m = pydantic_monty.Monty('c.debug', inputs=['c']) result = m.run(inputs={'c': Config(host='localhost', port=8080, debug=True, timeout=30.0)}) assert result is True def test_namedtuple_custom_input_repr(): """User-defined NamedTuple has correct repr with fully-qualified type name.""" from typing import NamedTuple class Item(NamedTuple): name: str price: float m = pydantic_monty.Monty('repr(item)', inputs=['item']) result = m.run(inputs={'item': Item(name='widget', price=9.99)}) # Monty uses the full qualified name (module.ClassName) for the type assert result == snapshot("test_types.Item(name='widget', price=9.99)") def test_namedtuple_custom_input_len(): """User-defined NamedTuple supports len().""" from typing import NamedTuple class Triple(NamedTuple): a: int b: int c: int m = pydantic_monty.Monty('len(t)', inputs=['t']) assert m.run(inputs={'t': Triple(a=1, b=2, c=3)}) == snapshot(3) def test_namedtuple_custom_input_roundtrip(): """User-defined NamedTuple can be passed through and returned.""" from typing import NamedTuple class Pair(NamedTuple): first: int second: int m = pydantic_monty.Monty('p', inputs=['p']) result = m.run(inputs={'p': Pair(first=1, second=2)}) # Returns a namedtuple-like object (not the same Python class) assert result[0] == snapshot(1) assert result[1] == snapshot(2) assert result.first == snapshot(1) assert result.second == snapshot(2) def test_namedtuple_custom_missing_attr_error(): """Accessing non-existent attribute on custom NamedTuple raises AttributeError.""" from typing import NamedTuple class Simple(NamedTuple): value: int m = pydantic_monty.Monty('s.nonexistent', inputs=['s']) with pytest.raises(pydantic_monty.MontyRuntimeError) as exc_info: m.run(inputs={'s': Simple(value=42)}) # Monty uses the full qualified name (module.ClassName) for the type assert "AttributeError: 'test_types.Simple' object has no attribute 'nonexistent'" in str(exc_info.value) ================================================ FILE: crates/monty-type-checking/Cargo.toml ================================================ [package] name = "monty_type_checking" readme = "../../README.md" version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } authors = { workspace = true } license = { workspace = true } description = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [lib] name = "monty_type_checking" path = "src/lib.rs" [dependencies] monty_typeshed = { path = "../monty-typeshed" } ruff_python_ast = { workspace = true } ruff_db = { workspace = true } ruff_text_size = { workspace = true } ty_python_semantic = { workspace = true } ty_module_resolver = { workspace = true } salsa = { workspace = true } [dev-dependencies] pretty_assertions = { workspace = true } [lints] workspace = true ================================================ FILE: crates/monty-type-checking/src/db.rs ================================================ use std::{fmt, sync::Arc}; use ruff_db::{ Db as SourceDb, files::{File, Files}, system::{DbWithTestSystem, System, TestSystem}, vendored::VendoredFileSystem, }; use ruff_python_ast::PythonVersion; use ty_module_resolver::{Db as ModuleResolverDb, SearchPaths}; use ty_python_semantic::{ AnalysisSettings, Db, Program, default_lint_registry, lint::{LintRegistry, RuleSelection}, }; /// Very simple in-memory salsa/ty database. /// /// Mostly taken from /// https://github.com/astral-sh/ruff/blob/7bacca9b625c2a658470afd99a0bf0aa0b4f1dbb/crates/ty_python_semantic/src/db.rs#L51 #[salsa::db] #[derive(Clone)] pub(crate) struct MemoryDb { storage: salsa::Storage, files: Files, system: TestSystem, vendored: VendoredFileSystem, rule_selection: Arc, analysis_settings: Arc, } impl fmt::Debug for MemoryDb { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TypeCheckingFailure") .field("files", &self.files) .field("system", &self.system) .field("vendored", &self.vendored) .field("rule_selection", &self.rule_selection) .field("analysis_settings", &self.analysis_settings) .finish_non_exhaustive() } } impl MemoryDb { pub fn new() -> Self { Self { storage: salsa::Storage::new(None), system: TestSystem::default(), vendored: monty_typeshed::file_system().clone(), files: Files::default(), rule_selection: Arc::new(RuleSelection::from_registry(default_lint_registry())), analysis_settings: AnalysisSettings::default().into(), } } } impl DbWithTestSystem for MemoryDb { fn test_system(&self) -> &TestSystem { &self.system } fn test_system_mut(&mut self) -> &mut TestSystem { &mut self.system } } #[salsa::db] impl SourceDb for MemoryDb { fn vendored(&self) -> &VendoredFileSystem { &self.vendored } fn system(&self) -> &dyn System { &self.system } fn files(&self) -> &Files { &self.files } fn python_version(&self) -> PythonVersion { PythonVersion::PY314 } } #[salsa::db] impl Db for MemoryDb { fn should_check_file(&self, file: File) -> bool { !file.path(self).is_vendored_path() } fn rule_selection(&self, _file: File) -> &RuleSelection { &self.rule_selection } fn lint_registry(&self) -> &LintRegistry { default_lint_registry() } fn analysis_settings(&self, _file: File) -> &AnalysisSettings { &self.analysis_settings } fn verbose(&self) -> bool { false } } #[salsa::db] impl ModuleResolverDb for MemoryDb { fn search_paths(&self) -> &SearchPaths { Program::get(self).search_paths(self) } } #[salsa::db] impl salsa::Database for MemoryDb {} ================================================ FILE: crates/monty-type-checking/src/lib.rs ================================================ mod db; mod type_check; pub use crate::type_check::{SourceFile, TypeCheckingDiagnostics, type_check}; ================================================ FILE: crates/monty-type-checking/src/type_check.rs ================================================ use std::{ fmt::{self, Display}, sync::{Arc, Mutex}, }; use ruff_db::{ Db as SourceDb, diagnostic::{ Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig, DisplayDiagnostics, UnifiedFile, }, files::{File, FileRootKind, system_path_to_file}, system::{DbWithWritableSystem as _, SystemPathBuf}, }; use ruff_text_size::{TextRange, TextSize}; use ty_module_resolver::SearchPathSettings; use ty_python_semantic::{ Program, ProgramSettings, PythonPlatform, PythonVersionSource, PythonVersionWithSource, types::check_types, }; use crate::db::MemoryDb; /// Definition of a source file. pub struct SourceFile<'a> { /// source code pub source_code: &'a str, /// file path pub path: &'a str, } impl<'a> SourceFile<'a> { /// Create a new source file. #[must_use] pub fn new(source_code: &'a str, path: &'a str) -> Self { Self { source_code, path } } } /// Type check some python source code, checking if it's valid to run with monty. /// /// # Arguments /// * `python_source` - The python source code to type check. /// * `stubs_file` - Optional stubs file to use for type checking. /// /// # Returns /// * `Ok(Some(TypeCheckingFailure))` - If there are typing errors. /// * `Ok(None)` - If there are no typing errors. /// * `Err(String)` - If there was an unexpected/internal error during type checking. pub fn type_check( python_source: &SourceFile<'_>, stubs_file: Option<&SourceFile<'_>>, ) -> Result, String> { let mut db = MemoryDb::new(); // Files must be written under a directory that's registered as a search path for module // resolution to work. We use "/" as the root directory so paths appear without a prefix. let src_root = SystemPathBuf::from("/"); // Register the source root for Salsa tracking - required for module resolution db.files().try_add_root(&db, &src_root, FileRootKind::Project); let search_paths = SearchPathSettings::new(vec![src_root.clone()]) .to_search_paths(db.system(), db.vendored()) .map_err(to_string)?; // The API is confusing here - we have to load the "program" here like this, otherwise we get unwrap // panics when calling `check_types` Program::from_settings( &db, ProgramSettings { python_version: PythonVersionWithSource { version: db.python_version(), source: PythonVersionSource::default(), }, python_platform: PythonPlatform::default(), search_paths, }, ); // Build absolute paths for files under / let main_path = src_root.join(python_source.path); let main_source = python_source.source_code; let code_offset: u32 = if let Some(stubs_file) = stubs_file { let stubs_path = src_root.join(stubs_file.path); // write the stub file db.write_file(&stubs_path, stubs_file.source_code).map_err(to_string)?; // prepend the stub import to the main source code let stub_stem = stubs_file .path .split_once('.') .map_or(stubs_file.path, |(before, _)| before); let mut new_source = format!("from {stub_stem} import *\n"); let offset = u32::try_from(new_source.len()).map_err(to_string)?; new_source.push_str(main_source); // write the main source code db.write_file(&main_path, &new_source).map_err(to_string)?; // one line offset for errors vs. the original source code since we injected the stub import offset } else { // write just the main source code db.write_file(&main_path, main_source).map_err(to_string)?; 0 }; let main_file = system_path_to_file(&db, &main_path).map_err(to_string)?; let mut diagnostics = check_types(&db, main_file); diagnostics.retain(filter_diagnostics); if diagnostics.is_empty() { Ok(None) } else { // without all this errors would appear on the wrong line because we injected `from type_stubs import *` // if we injected the stubs import, we need to write the actual source back to the file in the database db.write_file(&main_path, main_source).map_err(to_string)?; // and then adjust each span in the error message to account for the injected stubs import if code_offset > 0 { let offset = TextSize::new(code_offset); for diagnostic in &mut diagnostics { // Adjust spans in main diagnostic annotations (only for spans in the main file) for ann in diagnostic.annotations_mut() { adjust_annotation_span(ann, main_file, offset); } // Adjust spans in sub-diagnostic annotations (e.g., "info: Function defined here") for sub in diagnostic.sub_diagnostics_mut() { for ann in sub.annotations_mut() { adjust_annotation_span(ann, main_file, offset); } } } } // Sort diagnostics by line number diagnostics.sort_by(|a, b| a.rendering_sort_key(&db).cmp(&b.rendering_sort_key(&db))); Ok(Some(TypeCheckingDiagnostics::new(diagnostics, db))) } } fn to_string(err: impl Display) -> String { err.to_string() } /// Adjust the span of an annotation by subtracting the given offset. /// /// This is used when we inject a stub import at the beginning of the source code, /// and need to adjust all spans to account for the injected code. /// Only adjusts spans that belong to the main file being type-checked. fn adjust_annotation_span(ann: &mut Annotation, main_file: File, offset: TextSize) { let span = ann.get_span(); // Only adjust spans for the main file (not stubs or other files) if let UnifiedFile::Ty(span_file) = span.file() && *span_file == main_file && let Some(range) = span.range() { let new_range = TextRange::new(range.start() - offset, range.end() - offset); let new_span = span.clone().with_range(new_range); ann.set_span(new_span); } } /// Represents diagnostic details when type checking fails. #[derive(Clone)] pub struct TypeCheckingDiagnostics { /// The actual diagnostic message diagnostics: Vec, /// db used to display diagnostics, wrapped in Mutex for Sync so MontyTypingError is sendable db: Arc>, /// How to format the output format: DiagnosticFormat, /// Whether to highlight the output with ansi colors color: bool, } /// Debug output for TypeCheckingDiagnostics shows the pretty typing output, and no other values since /// this will be displayed when users are printing `Result<..., TypeCheckingDiagnostics>` etc. and the /// raw errors are not useful to end users. impl fmt::Debug for TypeCheckingDiagnostics { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let config = self.config(); let db = self.db.lock().unwrap(); write!( f, "TypeCheckingDiagnostics:\n{}", DisplayDiagnostics::new(&*db, &config, &self.diagnostics) ) } } /// To display true debugs details about the TypeCheckingDiagnostics #[derive(Debug)] #[expect(dead_code)] pub struct DebugTypeCheckingDiagnostics<'a> { diagnostics: &'a [Diagnostic], db: Arc>, format: DiagnosticFormat, color: bool, } impl fmt::Display for TypeCheckingDiagnostics { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let db = self.db.lock().unwrap(); DisplayDiagnostics::new(&*db, &self.config(), &self.diagnostics).fmt(f) } } impl TypeCheckingDiagnostics { fn new(diagnostics: Vec, db: MemoryDb) -> Self { Self { diagnostics, db: Arc::new(Mutex::new(db)), format: DiagnosticFormat::Full, color: false, } } fn config(&self) -> DisplayDiagnosticConfig { DisplayDiagnosticConfig::new("monty") .format(self.format) .color(self.color) } /// To display debug details for the TypeCheckingDiagnostics since debug is the pretty output #[must_use] pub fn debug_details(&self) -> DebugTypeCheckingDiagnostics<'_> { DebugTypeCheckingDiagnostics { diagnostics: &self.diagnostics, db: self.db.clone(), format: self.format, color: self.color, } } /// Set the format of the diagnostics. #[must_use] pub fn format(self, format: DiagnosticFormat) -> Self { Self { format, ..self } } /// Set the format of the diagnostics from a string. /// Valid formats: "full", "concise", "azure", "json", "jsonlines", "rdjson", /// "pylint", "gitlab", "github". pub fn format_from_str(self, format: &str) -> Result { let format = match format.to_ascii_lowercase().as_str() { "full" => DiagnosticFormat::Full, "concise" => DiagnosticFormat::Concise, "azure" => DiagnosticFormat::Azure, "json" => DiagnosticFormat::Json, "jsonlines" | "json-lines" => DiagnosticFormat::JsonLines, "rdjson" => DiagnosticFormat::Rdjson, "pylint" => DiagnosticFormat::Pylint, // don't bother with the "junit" feature, please check the binary size and add it if you need this format // "junit" => DiagnosticFormat::Junit, "gitlab" => DiagnosticFormat::Gitlab, "github" => DiagnosticFormat::Github, _ => return Err(format!("Unknown format: {format}")), }; Ok(Self { format, ..self }) } /// Set whether to highlight the output with ansi colors #[must_use] pub fn color(self, color: bool) -> Self { Self { color, ..self } } } /// Filter out diagnostics we want to ignore. /// /// Should only be necessary until is fixed. fn filter_diagnostics(d: &Diagnostic) -> bool { !(matches!(d.id(), DiagnosticId::InvalidSyntax) && matches!( d.primary_message(), "`await` statement outside of a function" | "`await` outside of an asynchronous function" )) } ================================================ FILE: crates/monty-type-checking/tests/bad_types.py ================================================ # This file contains intentional type errors to test the type checker. # Each section demonstrates a different category of type error. # === import sys from typing import assert_type def takes_int(x: int) -> None: pass def takes_str(x: str) -> None: pass def takes_list_int(x: list[int]) -> None: pass # Wrong primitive types takes_int('hello') takes_int(3.14) takes_str(42) takes_str([1, 2, 3]) # Wrong container element types takes_list_int(['a', 'b', 'c']) takes_list_int([1.0, 2.0, 3.0]) # === Invalid return types === def should_return_int() -> int: return 'oops' def should_return_str() -> str: return 123 def should_return_list_int() -> list[int]: return ['a', 'b'] def should_return_none() -> None: return 42 # === Type mismatches in expressions === def get_int() -> int: return 42 def get_str() -> str: return 'hello' # Binary operations with incompatible types result1 = get_int() + get_str() result2 = get_str() - get_int() # === assert_type failures === x: int = 42 assert_type(x, str) y: list[int] = [1, 2, 3] assert_type(y, list[str]) # === Attribute errors === class MyClass: def __init__(self) -> None: self.value: int = 42 obj = MyClass() z = obj.nonexistent_attr # === Too many / too few arguments === def takes_two(a: int, b: str) -> None: pass takes_two(1) takes_two(1, 'hello', 'extra') # === Wrong keyword arguments === takes_two(a=1, c='wrong') # === Calling non-callable === not_callable: int = 42 not_callable() print(sys.copyright) ================================================ FILE: crates/monty-type-checking/tests/bad_types_output.txt ================================================ bad_types.py:22:11: error[invalid-argument-type] Argument to function `takes_int` is incorrect: Expected `int`, found `Literal["hello"]` bad_types.py:23:11: error[invalid-argument-type] Argument to function `takes_int` is incorrect: Expected `int`, found `float` bad_types.py:24:11: error[invalid-argument-type] Argument to function `takes_str` is incorrect: Expected `str`, found `Literal[42]` bad_types.py:25:11: error[invalid-argument-type] Argument to function `takes_str` is incorrect: Expected `str`, found `list[Unknown | int]` bad_types.py:28:16: error[invalid-argument-type] Argument to function `takes_list_int` is incorrect: Expected `list[int]`, found `list[int | str]` bad_types.py:29:16: error[invalid-argument-type] Argument to function `takes_list_int` is incorrect: Expected `list[int]`, found `list[int | float]` bad_types.py:36:12: error[invalid-return-type] Return type does not match returned value: expected `int`, found `Literal["oops"]` bad_types.py:40:12: error[invalid-return-type] Return type does not match returned value: expected `str`, found `Literal[123]` bad_types.py:44:12: error[invalid-return-type] Return type does not match returned value: expected `list[int]`, found `list[int | str]` bad_types.py:48:12: error[invalid-return-type] Return type does not match returned value: expected `None`, found `Literal[42]` bad_types.py:63:11: error[unsupported-operator] Operator `+` is not supported between objects of type `int` and `str` bad_types.py:64:11: error[unsupported-operator] Operator `-` is not supported between objects of type `str` and `int` bad_types.py:70:1: error[type-assertion-failure] Type `str` does not match asserted type `Literal[42]` bad_types.py:73:1: error[type-assertion-failure] Type `list[str]` does not match asserted type `list[int]` bad_types.py:85:5: error[unresolved-attribute] Object of type `MyClass` has no attribute `nonexistent_attr` bad_types.py:95:1: error[missing-argument] No argument provided for required parameter `b` of function `takes_two` bad_types.py:96:23: error[too-many-positional-arguments] Too many positional arguments to function `takes_two`: expected 2, got 3 bad_types.py:101:1: error[missing-argument] No argument provided for required parameter `b` of function `takes_two` bad_types.py:101:16: error[unknown-argument] Argument `c` does not match any known parameter of function `takes_two` bad_types.py:107:1: error[call-non-callable] Object of type `Literal[42]` is not callable bad_types.py:109:7: error[unresolved-attribute] Module `sys` has no member `copyright` ================================================ FILE: crates/monty-type-checking/tests/good_types.py ================================================ import asyncio import os import re import sys from dataclasses import dataclass from pathlib import Path from typing import Any, assert_type # === Type checking helper functions === def check_int(x: int) -> None: pass def check_float(x: float) -> None: pass def check_str(x: str) -> None: pass def check_bool(x: bool) -> None: pass def check_bytes(x: bytes) -> None: pass def check_list_int(x: list[int]) -> None: pass def check_list_str(x: list[str]) -> None: pass def check_tuple_int(x: tuple[int, ...]) -> None: pass def check_dict_str_int(x: dict[str, int]) -> None: pass def check_set_int(x: set[int]) -> None: pass def check_frozenset_int(x: frozenset[int]) -> None: pass # === Value getter functions === def get_int() -> int: return 123 def get_float() -> float: return 3.14 def get_str() -> str: return 'hello' def get_list_int() -> list[int]: return [1, 2, 3] def get_list_str() -> list[str]: return ['a', 'b', 'c'] def get_object() -> object: return object() def get_dict_str_int() -> dict[str, int]: return {'a': 1, 'b': 2} def get_set_str() -> set[str]: return {'a', 'b', 'c'} def get_frozenset_str() -> frozenset[str]: return frozenset({'a', 'b', 'c'}) def get_tuple_str_int() -> tuple[str, int]: return ('hello', 42) def get_bytes() -> bytes: return b'hello' # === Core Types === obj = object() t = type(42) # === Primitive Types === # bool check_bool(True) check_bool(False) check_bool(bool(1)) check_bool(bool('')) # int check_int(42) check_int(int('42')) check_int(int(3.14)) check_int(int(get_int())) check_int(int(get_float())) # float check_float(3.14) check_float(float('3.14')) check_float(float(42)) f = get_float() assert_type(f, float) # === String and Bytes Types === # str check_str('hello') check_str(str(42)) check_str(str(b'hello', 'utf-8')) check_str(str(get_int())) # bytes check_bytes(b'hello') check_bytes(bytes('hello', 'utf-8')) check_bytes(bytes(10)) check_bytes(bytes([65, 66, 67])) check_bytes(bytes(get_int())) k2 = get_bytes() assert_type(k2, bytes) # === Container Types === # list check_list_int([1, 2, 3]) check_list_str(list('abc')) check_list_int(list(range(10))) m2 = get_list_int() assert_type(m2, list[int]) m3 = get_list_str() assert_type(m3, list[str]) # tuple check_tuple_int(tuple([1, 2, 3])) p2 = get_tuple_str_int() assert_type(p2, tuple[str, int]) # dict check_dict_str_int({'a': 1, 'b': 2}) check_dict_str_int(dict(a=1, b=2)) d = get_dict_str_int() assert_type(d, dict[str, int]) # set check_set_int({1, 2, 3}) check_set_int(set([1, 2, 3])) # frozenset check_frozenset_int(frozenset([1, 2, 3])) # range w = range(get_int()) assert_type(w, range) # slice sl1 = slice(10) sl2 = slice(0, 10) sl3 = slice(0, 10, 2) # === Builtin Functions === # abs check_int(abs(-5)) check_float(abs(-3.14)) # all / any check_bool(all([True, False])) check_bool(any([True, False])) aa = all(get_list_int()) assert_type(aa, bool) # bin / hex / oct check_str(bin(42)) check_str(hex(255)) check_str(oct(8)) ac = bin(get_int()) assert_type(ac, str) # chr / ord check_str(chr(65)) check_int(ord('A')) af = chr(get_int()) assert_type(af, str) ag = ord(get_str()) assert_type(ag, int) # divmod dm = divmod(10, 3) # hash check_int(hash('hello')) ai = hash(get_str()) assert_type(ai, int) # id check_int(id(object())) ak = id(get_object()) assert_type(ak, int) # isinstance check_bool(isinstance(42, int)) al = isinstance(get_object(), int) assert_type(al, bool) # len check_int(len([1, 2, 3])) an = len(get_list_int()) assert_type(an, int) # max / min check_int(max(1, 2, 3)) check_int(min(1, 2, 3)) # pow check_int(pow(2, 3)) check_float(pow(2.0, 3.0)) # print aw = print(get_str()) assert_type(aw, None) # repr check_str(repr(42)) ax = repr(get_int()) assert_type(ax, str) # round check_int(round(3.7)) # sorted check_list_int(sorted([3, 1, 2])) # sum check_int(sum([1, 2, 3])) ba = sum(get_list_int()) assert_type(ba, int) # type bf = type(get_int()) assert_type(bf, type[int]) # === Iterator Types === # enumerate for i_enum, v_enum in enumerate([1, 2, 3]): check_int(i_enum) check_int(v_enum) # reversed for v_rev in reversed([1, 2, 3]): check_int(v_rev) # zip for a_zip, b_zip in zip([1, 2], ['a', 'b']): check_int(a_zip) check_str(b_zip) # === Literal Types === bk = None assert_type(bk, None) # === Exception Types === e1 = BaseException('error') e2 = Exception('error') e3 = SystemExit(1) e4 = KeyboardInterrupt() e5 = ArithmeticError('error') e6 = OverflowError('error') e7 = ZeroDivisionError('error') e8 = LookupError('error') e9 = IndexError('error') e10 = KeyError('key') e11 = RuntimeError('error') e12 = NotImplementedError('error') e13 = RecursionError('error') e14 = AttributeError('error') e15 = AssertionError('error') e16 = MemoryError('error') e17 = NameError('error') e18 = SyntaxError('error') e19 = TimeoutError('error') e20 = TypeError('error') e21 = ValueError('error') e22 = StopIteration() # === Exception Inheritance === def handle_base(e: BaseException) -> None: pass handle_base(Exception('error')) handle_base(ValueError('error')) handle_base(KeyError('key')) handle_base(ZeroDivisionError('error')) def handle_exception(e: Exception) -> None: pass handle_exception(ValueError('error')) handle_exception(TypeError('error')) handle_exception(RuntimeError('error')) # === Try/Except === try: x_try = 1 / 0 except ZeroDivisionError as e_try: check_str(str(e_try)) try: d_try: dict[str, int] = {} v_try = d_try['missing'] except KeyError: pass try: lst_try: list[int] = [] v_lst = lst_try[0] except IndexError: pass # === Raise === def may_fail(x_fail: int) -> int: if x_fail < 0: raise ValueError('x must be non-negative') if x_fail == 0: raise ZeroDivisionError('x cannot be zero') return 100 // x_fail def not_implemented() -> None: raise NotImplementedError('subclass must implement') print(sys.version) print(sys.version_info) print(None, file=sys.stdout) print(None, file=sys.stderr) # === async === async def foo(a: int): return a * 2 async def bar(): await foo(1) await foo(2) await foo(3) await asyncio.gather(bar()) # pyright: ignore asyncio.run(foo(1)) @dataclass class Point: x: int y: float p = Point(1, 2) assert_type(p.x, int) assert_type(p.y, float) p.x = 3 print(p) path = Path(__file__) assert_type(path, Path) # assert_type(path.name, str) p2 = path.parent assert_type(p2, Path) p3 = path / 'test.txt' assert_type(p3, Path) assert p3.name == 'test.txt' x = os.getenv('foobar') assert_type(x, str | None) y = os.getenv('foobar', default=int('123')) assert_type(y, str | int) x2 = os.environ.get('foobar') assert_type(x2, str | None) # === re module === # re.search returns Match or None s1 = re.search(r'\d+', 'abc 42') assert_type(s1, re.Match[str] | None) # re.match returns Match or None s2 = re.match(r'\w+', 'hello') assert_type(s2, re.Match[str] | None) # re.fullmatch returns Match or None s3 = re.fullmatch(r'\w+', 'hello') assert_type(s3, re.Match[str] | None) # re.compile returns Pattern p_re = re.compile(r'\d+') assert_type(p_re, re.Pattern[str]) # re.findall returns list of Any fa = re.findall(r'\d+', 'a1 b2 c3') assert_type(fa, list[Any]) # re.sub returns str s4 = re.sub(r'\d+', 'X', 'a1 b2') assert_type(s4, str) # Pattern.search returns Match or None p_re2 = re.compile(r'(\w+)') s5 = p_re2.search('hello world') assert_type(s5, re.Match[str] | None) # Pattern.match returns Match or None s6 = p_re2.match('hello world') assert_type(s6, re.Match[str] | None) # Pattern.sub returns str s7 = p_re2.sub('X', 'hello world') assert_type(s7, str) # Pattern.findall returns list of Any fa2 = p_re2.findall('hello world') assert_type(fa2, list[Any]) ================================================ FILE: crates/monty-type-checking/tests/main.rs ================================================ use std::fs; use monty_type_checking::{SourceFile, type_check}; use pretty_assertions::assert_eq; use ruff_db::diagnostic::DiagnosticFormat; #[test] fn type_checking_success() { let code = r" def add(x: int, y: int) -> int: return x + y result = add(1, 2) "; let result = type_check(&SourceFile::new(code, "main.py"), None).unwrap(); assert!(result.is_none()); } #[test] fn type_checking_error() { let code = "\ def add(x: int, y: int) -> int: return x + y result = add(1, '2') "; let result = type_check(&SourceFile::new(code, "main.py"), None).unwrap(); assert!(result.is_some()); let error_diagnostics = result.unwrap().to_string(); assert_eq!( error_diagnostics, r#"error[invalid-argument-type]: Argument to function `add` is incorrect --> main.py:4:17 | 2 | return x + y 3 | 4 | result = add(1, '2') | ^^^ Expected `int`, found `Literal["2"]` | info: Function defined here --> main.py:1:5 | 1 | def add(x: int, y: int) -> int: | ^^^ ------ Parameter declared here 2 | return x + y | info: rule `invalid-argument-type` is enabled by default "# ); } #[test] fn type_checking_error_stubs() { let stubs = "\ from dataclasses import dataclass @dataclass class User: name: str age: int "; let code = "\ def add(x: int, y: int) -> int: return x + y result = add(1, '2')"; let result = type_check( &SourceFile::new(code, "main.py"), Some(&SourceFile::new(stubs, "type_stubs.pyi")), ) .unwrap(); let error_diagnostics = result.unwrap(); assert_eq!( error_diagnostics.to_string(), r#"error[invalid-argument-type]: Argument to function `add` is incorrect --> main.py:4:17 | 2 | return x + y 3 | 4 | result = add(1, '2') | ^^^ Expected `int`, found `Literal["2"]` | info: Function defined here --> main.py:1:5 | 1 | def add(x: int, y: int) -> int: | ^^^ ------ Parameter declared here 2 | return x + y | info: rule `invalid-argument-type` is enabled by default "# ); } #[test] fn type_checking_error_concise() { let code = r" def add(x: int, y: int) -> int: return x + y result = add(1, '2') "; let result = type_check(&SourceFile::new(code, "main.py"), None).unwrap(); assert!(result.is_some()); let failure = result.unwrap().format(DiagnosticFormat::Concise); let error_diagnostics = failure.to_string(); assert_eq!( error_diagnostics, "main.py:5:17: error[invalid-argument-type] Argument to function `add` is incorrect: Expected `int`, found `Literal[\"2\"]`\n" ); let color_failure = failure.color(true).to_string(); assert!(color_failure.starts_with('\u{1b}')); } #[test] fn missing_stdlib_datetime() { let code = "import datetime\nprint(datetime.datetime.now())"; let result = type_check(&SourceFile::new(code, "main.py"), None).unwrap(); assert!(result.is_some()); let failure = result.unwrap().format(DiagnosticFormat::Concise); let error_diagnostics = failure.to_string(); assert_eq!( error_diagnostics, "main.py:1:8: error[unresolved-import] Cannot resolve imported module `datetime`\n" ); let dbg = format!("{failure:?}"); assert!(dbg.starts_with("TypeCheckingDiagnostics:"), "got: {dbg}"); } /// Test that good_types.py type-checks without errors. /// /// This file uses `assert_type` from typing to verify that inferred types match expected types. #[test] fn type_good_types() { let code = include_str!("good_types.py"); let result = type_check(&SourceFile::new(code, "good_types.py"), None).unwrap(); assert!(result.is_none(), "Expected no type errors, got: {result:#?}"); } fn check_file_content(file_name: &str, mut actual: &str) { let expected_path = format!("{}/tests/{}", env!("CARGO_MANIFEST_DIR"), file_name); let expected = if fs::exists(&expected_path).unwrap() { fs::read_to_string(&expected_path).unwrap() } else { std::fs::write(&expected_path, actual).unwrap(); panic!("{file_name} did not exist, file created.") }; let expected = expected.as_str().trim(); actual = actual.trim(); if actual == expected { println!("File content matches expected."); return; } let status = if std::env::var("UPDATE_EXPECT").is_ok() { std::fs::write(&expected_path, actual).unwrap(); "FILE UPDATE" } else { "file not updated, run with UPDATE_EXPECT=1 to update" }; panic!("Type errors don't match expected.\n\nEXPECTED:\n{expected}\n\nACTUAL:\n{actual}\n\n{status}."); } /// Test that bad_types.py produces the expected type errors. /// /// Set `UPDATE_EXPECT=1` to update the expected errors file. #[test] fn type_bad_types() { let code = include_str!("bad_types.py"); let result = type_check(&SourceFile::new(code, "bad_types.py"), None).unwrap(); let failure = result.expect("Expected type errors in bad_types.py"); let actual = failure .format(ruff_db::diagnostic::DiagnosticFormat::Concise) .to_string(); check_file_content("bad_types_output.txt", &actual); } #[test] fn test_reveal_types() { let code = include_str!("reveal_types.py"); let result = type_check(&SourceFile::new(code, "reveal_types.py"), None).unwrap(); let failure = result.expect("Expected type errors in reveal_types.py"); let actual = failure .format(ruff_db::diagnostic::DiagnosticFormat::Concise) .to_string(); check_file_content("reveal_types_output.txt", &actual); } ================================================ FILE: crates/monty-type-checking/tests/reveal_types.py ================================================ from typing import reveal_type # === Core types === reveal_type(None) reveal_type(object()) reveal_type(type(1)) # === Primitive types === reveal_type(True) reveal_type(int(1)) reveal_type(float(1.2)) # === String/bytes types === reveal_type('hello') reveal_type(b'foobar') # === Container types === reveal_type([1]) reveal_type((1, 2)) reveal_type({1: 2}) reveal_type({1, 2}) reveal_type(frozenset({1, 2})) reveal_type(range(10)) # === Iterator types === reveal_type(enumerate([1, 2])) reveal_type(reversed([1, 2])) reveal_type(zip([1], [2])) # === Slicing === reveal_type(slice(1, 2)) # === Exception types === reveal_type(BaseException()) reveal_type(Exception()) reveal_type(SystemExit()) reveal_type(KeyboardInterrupt()) reveal_type(ArithmeticError()) reveal_type(OverflowError()) reveal_type(ZeroDivisionError()) reveal_type(LookupError()) reveal_type(IndexError()) reveal_type(KeyError()) reveal_type(RuntimeError()) reveal_type(NotImplementedError()) reveal_type(RecursionError()) reveal_type(AttributeError()) reveal_type(AssertionError()) reveal_type(MemoryError()) reveal_type(NameError()) reveal_type(SyntaxError()) reveal_type(OSError()) reveal_type(TimeoutError()) reveal_type(TypeError()) reveal_type(ValueError()) reveal_type(StopIteration()) ================================================ FILE: crates/monty-type-checking/tests/reveal_types_output.txt ================================================ reveal_types.py:4:13: info[revealed-type] Revealed type: `None` reveal_types.py:5:13: info[revealed-type] Revealed type: `object` reveal_types.py:6:13: info[revealed-type] Revealed type: `` reveal_types.py:9:13: info[revealed-type] Revealed type: `Literal[True]` reveal_types.py:10:13: info[revealed-type] Revealed type: `int` reveal_types.py:11:13: info[revealed-type] Revealed type: `float` reveal_types.py:14:13: info[revealed-type] Revealed type: `Literal["hello"]` reveal_types.py:15:13: info[revealed-type] Revealed type: `Literal[b"foobar"]` reveal_types.py:18:13: info[revealed-type] Revealed type: `list[Unknown | int]` reveal_types.py:19:13: info[revealed-type] Revealed type: `tuple[Literal[1], Literal[2]]` reveal_types.py:20:13: info[revealed-type] Revealed type: `dict[Unknown | int, Unknown | int]` reveal_types.py:21:13: info[revealed-type] Revealed type: `set[Unknown | int]` reveal_types.py:22:13: info[revealed-type] Revealed type: `frozenset[int]` reveal_types.py:23:13: info[revealed-type] Revealed type: `range` reveal_types.py:26:13: info[revealed-type] Revealed type: `enumerate[int]` reveal_types.py:27:13: info[revealed-type] Revealed type: `reversed[int]` reveal_types.py:28:13: info[revealed-type] Revealed type: `zip[Unknown]` reveal_types.py:31:13: info[revealed-type] Revealed type: `slice[Any, Any, Any]` reveal_types.py:34:13: info[revealed-type] Revealed type: `BaseException` reveal_types.py:35:13: info[revealed-type] Revealed type: `Exception` reveal_types.py:36:13: info[revealed-type] Revealed type: `SystemExit` reveal_types.py:37:13: info[revealed-type] Revealed type: `KeyboardInterrupt` reveal_types.py:38:13: info[revealed-type] Revealed type: `ArithmeticError` reveal_types.py:39:13: info[revealed-type] Revealed type: `OverflowError` reveal_types.py:40:13: info[revealed-type] Revealed type: `ZeroDivisionError` reveal_types.py:41:13: info[revealed-type] Revealed type: `LookupError` reveal_types.py:42:13: info[revealed-type] Revealed type: `IndexError` reveal_types.py:43:13: info[revealed-type] Revealed type: `KeyError` reveal_types.py:44:13: info[revealed-type] Revealed type: `RuntimeError` reveal_types.py:45:13: info[revealed-type] Revealed type: `NotImplementedError` reveal_types.py:46:13: info[revealed-type] Revealed type: `RecursionError` reveal_types.py:47:13: info[revealed-type] Revealed type: `AttributeError` reveal_types.py:48:13: info[revealed-type] Revealed type: `AssertionError` reveal_types.py:49:13: info[revealed-type] Revealed type: `MemoryError` reveal_types.py:50:13: info[revealed-type] Revealed type: `NameError` reveal_types.py:51:13: info[revealed-type] Revealed type: `SyntaxError` reveal_types.py:52:13: info[revealed-type] Revealed type: `OSError` reveal_types.py:53:13: info[revealed-type] Revealed type: `TimeoutError` reveal_types.py:54:13: info[revealed-type] Revealed type: `TypeError` reveal_types.py:55:13: info[revealed-type] Revealed type: `ValueError` reveal_types.py:56:13: info[revealed-type] Revealed type: `StopIteration` ================================================ FILE: crates/monty-typeshed/.gitignore ================================================ # Do not ignore any of the vendored files. If this pattern is not present, # we will gitignore the `venv/` stubs in typeshed, as there is a general # rule to ignore `venv/` directories in the root `.gitignore`. !/vendor/typeshed/**/* /typeshed-repo/ ================================================ FILE: crates/monty-typeshed/Cargo.toml ================================================ [package] name = "monty_typeshed" readme = "README.md" version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } authors = { workspace = true } license = { workspace = true } description = { workspace = true } keywords = { workspace = true } categories = { workspace = true } homepage = { workspace = true } repository = { workspace = true } [dependencies] ruff_db = { workspace = true } zip = { version = "0.6.6", default-features = false } [build-dependencies] path-slash = "0.2.1" walkdir = "2.3.2" zip = { version = "0.6.6", features = ["zstd", "deflate"] } [features] zstd = ["zip/zstd"] deflate = ["zip/deflate"] [lints] workspace = true ================================================ FILE: crates/monty-typeshed/README.md ================================================ # Vendored types for a very minimal subset of the CPython stdlib Copied originally from but only parts of are kept, since those are the only functions supported from the stdlib. The `vendor/typeshed` directory is updated by calling `make update-typeshed` which calls the `update.py` script in this directory. See for more information on the project. THEREFORE FILES IN THE `vendor/typeshed` DIRECTORY SHOULD NOT BE EDITED MANUALLY. ================================================ FILE: crates/monty-typeshed/build.rs ================================================ //! Build script to package our vendored typeshed files //! into a zip archive that can be included in the Ruff binary. //! //! This script should be automatically run at build time //! whenever the script itself changes, or whenever any files //! in `crates/ty_vendored/vendor/typeshed` change. #![expect(clippy::unnecessary_debug_formatting)] use std::{fs::File, io::Write, path::Path}; use path_slash::PathExt; use zip::{ CompressionMethod, result::ZipResult, write::{FileOptions, ZipWriter}, }; const TYPESHED_SOURCE_DIR: &str = "vendor/typeshed"; // const TY_EXTENSIONS_STUBS: &str = "ty_extensions/ty_extensions.pyi"; const TYPESHED_ZIP_LOCATION: &str = "/zipped_typeshed.zip"; /// Recursively zip the contents of the entire typeshed directory and patch typeshed /// on the fly to include the `ty_extensions` module. /// /// This routine is adapted from a recipe at /// fn write_zipped_typeshed_to(writer: File) -> ZipResult { let mut zip = ZipWriter::new(writer); // Use deflated compression for WASM builds because compiling `zstd-sys` requires clang // [source](https://github.com/gyscos/zstd-rs/wiki/Compile-for-WASM) which complicates the build // by a lot. Deflated compression is slower but it shouldn't matter much for the WASM use case // (WASM itself is already slower than a native build for a specific platform). // We can't use `#[cfg(...)]` here because the target-arch in a build script is the // architecture of the system running the build script and not the architecture of the build-target. // That's why we use the `TARGET` environment variable here. let method = if cfg!(feature = "zstd") { CompressionMethod::Zstd } else if cfg!(feature = "deflate") { CompressionMethod::Deflated } else { CompressionMethod::Stored }; let options = FileOptions::default() .compression_method(method) .unix_permissions(0o644); for entry in walkdir::WalkDir::new(TYPESHED_SOURCE_DIR) { let dir_entry = entry.unwrap(); let absolute_path = dir_entry.path(); let normalized_relative_path = absolute_path .strip_prefix(Path::new(TYPESHED_SOURCE_DIR)) .unwrap() .to_slash() .expect("Unexpected non-utf8 typeshed path!"); // Write file or directory explicitly // Some unzip tools unzip files with directory paths correctly, some do not! if absolute_path.is_file() { println!("adding file {absolute_path:?} as {normalized_relative_path:?} ..."); zip.start_file(&*normalized_relative_path, options)?; let mut f = File::open(absolute_path)?; std::io::copy(&mut f, &mut zip).unwrap(); // Patch the VERSIONS file to make `ty_extensions` available if normalized_relative_path == "stdlib/VERSIONS" { writeln!(&mut zip, "ty_extensions: 3.0-")?; } } else if !normalized_relative_path.is_empty() { // Only if not root! Avoids path spec / warning // and mapname conversion failed error on unzip println!("adding dir {absolute_path:?} as {normalized_relative_path:?} ..."); zip.add_directory(normalized_relative_path, options)?; } } // // Patch typeshed and add the stubs for the `ty_extensions` module // println!("adding file {TY_EXTENSIONS_STUBS} as stdlib/ty_extensions.pyi ..."); // zip.start_file("stdlib/ty_extensions.pyi", options)?; // let mut f = File::open(TY_EXTENSIONS_STUBS)?; // std::io::copy(&mut f, &mut zip).unwrap(); zip.finish() } fn main() { assert!(Path::new(TYPESHED_SOURCE_DIR).is_dir(), "Where is typeshed?"); let out_dir = std::env::var("OUT_DIR").unwrap(); // N.B. Deliberately using `format!()` instead of `Path::join()` here, // so that we use `/` as a path separator on all platforms. // That enables us to load the typeshed zip at compile time in `module.rs` // (otherwise we'd have to dynamically determine the exact path to the typeshed zip // based on the default path separator for the specific platform we're on, // which can't be done at compile time.) let zipped_typeshed_location = format!("{out_dir}{TYPESHED_ZIP_LOCATION}"); let zipped_typeshed_file = File::create(zipped_typeshed_location).unwrap(); write_zipped_typeshed_to(zipped_typeshed_file).unwrap(); } ================================================ FILE: crates/monty-typeshed/custom/README.md ================================================ This directory contains custom type stubs where types available in monty differ from those in the standard library. ================================================ FILE: crates/monty-typeshed/custom/asyncio.pyi ================================================ from collections.abc import Awaitable, Generator from typing import Any, Literal, TypeAlias, TypeVar, overload _T = TypeVar('_T') _T1 = TypeVar('_T1') _T2 = TypeVar('_T2') _T3 = TypeVar('_T3') _T4 = TypeVar('_T4') _T5 = TypeVar('_T5') _T6 = TypeVar('_T6') class _Future(Awaitable[_T]): """ Minimal copy of Future from _typeshed/stdlib/_asyncio.pyi """ def __iter__(self) -> Generator[Any, None, _T]: ... def __await__(self) -> Generator[Any, None, _T]: ... _FutureLike: TypeAlias = _Future[_T] | Awaitable[_T] def run(main: Awaitable[_T], *, debug: bool | None = None, loop_factory: Any = None) -> _T: ... @overload def gather( coro_or_future1: _FutureLike[_T1], /, *, return_exceptions: Literal[False] = False ) -> _Future[tuple[_T1]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4, _T5]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], coro_or_future6: _FutureLike[_T6], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4, _T5, _T6]]: ... @overload def gather(*coros_or_futures: _FutureLike[_T], return_exceptions: Literal[False] = False) -> _Future[list[_T]]: ... @overload def gather(coro_or_future1: _FutureLike[_T1], /, *, return_exceptions: bool) -> _Future[tuple[_T1 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], /, *, return_exceptions: bool ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], /, *, return_exceptions: bool, ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], /, *, return_exceptions: bool, ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], /, *, return_exceptions: bool, ) -> _Future[ tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException, _T5 | BaseException] ]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], coro_or_future6: _FutureLike[_T6], /, *, return_exceptions: bool, ) -> _Future[ tuple[ _T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException, _T5 | BaseException, _T6 | BaseException, ] ]: ... @overload def gather(*coros_or_futures: _FutureLike[_T], return_exceptions: bool) -> _Future[list[_T | BaseException]]: ... ================================================ FILE: crates/monty-typeshed/custom/os.pyi ================================================ from abc import ABC, abstractmethod from typing import Callable, Protocol, TypeAlias, TypeVar, final, overload, runtime_checkable from _typeshed import AnyStr_co, structseq _T = TypeVar('_T') environ: dict[str, str] @overload def getenv(key: str) -> str | None: ... @overload def getenv(key: str, default: _T) -> str | _T: ... @final class stat_result(structseq[float], tuple[int, int, int, int, int, int, int, float, float, float]): # The constructor of this class takes an iterable of variable length (though it must be at least 10). # # However, this class behaves like a tuple of 10 elements, # no matter how long the iterable supplied to the constructor is. # https://github.com/python/typeshed/pull/6560#discussion_r767162532 # # The 10 elements always present are st_mode, st_ino, st_dev, st_nlink, # st_uid, st_gid, st_size, st_atime, st_mtime, st_ctime. # # More items may be added at the end by some implementations. @property def st_mode(self) -> int: """protection bits""" ... @property def st_ino(self) -> int: """inode""" ... @property def st_dev(self) -> int: """device""" ... @property def st_nlink(self) -> int: """number of hard links""" ... @property def st_uid(self) -> int: """user ID of owner""" ... @property def st_gid(self) -> int: """group ID of owner""" ... @property def st_size(self) -> int: """total size, in bytes""" ... @property def st_atime(self) -> float: """time of last access""" ... @property def st_mtime(self) -> float: """time of last modification""" ... @property def st_ctime(self) -> float: """time of last change""" ... # (Samuel) PathLike is included here because it's used by pathlib # mypy and pyright object to this being both ABC and Protocol. # At runtime it inherits from ABC and is not a Protocol, but it will be # on the allowlist for use as a Protocol starting in 3.14. @runtime_checkable class PathLike(ABC, Protocol[AnyStr_co]): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] __slots__ = () @abstractmethod def __fspath__(self) -> AnyStr_co: ... _Opener: TypeAlias = Callable[[str, int], int] ================================================ FILE: crates/monty-typeshed/custom/sys.pyi ================================================ from typing import Any, Final, Literal, TextIO, final, type_check_only from _typeshed import MaybeNone, structseq from typing_extensions import TypeAlias # stdin: TextIO | MaybeNone stdout: TextIO | MaybeNone stderr: TextIO | MaybeNone version: str # Type alias used as a mixin for structseq classes that cannot be instantiated at runtime # This can't be represented in the type system, so we just use `structseq[Any]` _UninstantiableStructseq: TypeAlias = structseq[Any] _ReleaseLevel: TypeAlias = Literal['alpha', 'beta', 'candidate', 'final'] @final @type_check_only class _version_info(_UninstantiableStructseq, tuple[int, int, int, _ReleaseLevel, int]): __match_args__: Final = ('major', 'minor', 'micro', 'releaselevel', 'serial') @property def major(self) -> int: ... @property def minor(self) -> int: ... @property def micro(self) -> int: ... @property def releaselevel(self) -> _ReleaseLevel: ... @property def serial(self) -> int: ... version_info: _version_info ================================================ FILE: crates/monty-typeshed/src/lib.rs ================================================ use std::sync::LazyLock; use ruff_db::vendored::VendoredFileSystem; /// The source commit of the vendored typeshed. pub const SOURCE_COMMIT: &str = include_str!("../vendor/typeshed/source_commit.txt").trim_ascii_end(); // The file path here is hardcoded in this crate's `build.rs` script. // Luckily this crate will fail to build if this file isn't available at build time. static TYPESHED_ZIP_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip")); #[must_use] pub fn file_system() -> &'static VendoredFileSystem { static VENDORED_TYPESHED_STUBS: LazyLock = LazyLock::new(|| VendoredFileSystem::new_static(TYPESHED_ZIP_BYTES).unwrap()); &VENDORED_TYPESHED_STUBS } #[cfg(test)] mod tests { use std::io::{self, Read}; use super::*; #[test] fn test_commit() { assert_eq!(SOURCE_COMMIT.len(), 40); } #[test] fn typeshed_zip_created_at_build_time() { let mut typeshed_zip_archive = zip::ZipArchive::new(io::Cursor::new(TYPESHED_ZIP_BYTES)).unwrap(); let mut builtins_stub = typeshed_zip_archive.by_name("stdlib/builtins.pyi").unwrap(); assert!(builtins_stub.is_file()); let mut builtins_source = String::new(); builtins_stub.read_to_string(&mut builtins_source).unwrap(); assert!(builtins_source.contains("class int:")); } #[test] fn typeshed_versions_file_exists() { let mut typeshed_zip_archive = zip::ZipArchive::new(io::Cursor::new(TYPESHED_ZIP_BYTES)).unwrap(); let mut versions_file = typeshed_zip_archive.by_name("stdlib/VERSIONS").unwrap(); assert!(versions_file.is_file()); let mut versions_content = String::new(); versions_file.read_to_string(&mut versions_content).unwrap(); // VERSIONS file should contain module version info like "builtins: 3.0-" assert!(versions_content.contains("builtins:")); } } ================================================ FILE: crates/monty-typeshed/update.py ================================================ #!/usr/bin/env python3 """Update vendored typeshed files from the upstream repository. This script: 1. Clones the typeshed repository to crates/monty-typeshed/typeshed-repo (or updates if it exists) 2. Records the HEAD commit hash 3. Filters builtins.pyi to keep only supported classes and functions 4. Writes the filtered file to the vendor directory Usage: python crates/monty-typeshed/update.py """ import ast import shutil import subprocess from pathlib import Path # Whitelisted builtin functions (from crates/monty/src/builtins/) ALLOWED_FUNCTIONS = { 'abs', 'all', 'any', 'bin', 'chr', 'divmod', 'hash', 'hex', 'id', 'isinstance', 'len', 'max', 'min', 'oct', 'ord', 'pow', 'print', 'repr', 'round', 'sorted', 'sum', } # Whitelisted builtin classes (from crates/monty/src/types/ and exception_private.rs) ALLOWED_CLASSES = { # Core types 'object', 'type', # Primitive types 'bool', 'int', 'float', # String/bytes types 'str', 'bytes', # Container types 'list', 'tuple', 'dict', 'set', 'frozenset', 'range', # Iterator types (these are classes, not functions) 'enumerate', 'reversed', 'zip', # Slicing 'slice', # property is used by pathlib.Path 'property', # Exception hierarchy (from crates/monty/src/exception_private.rs) 'BaseException', 'Exception', 'SystemExit', 'KeyboardInterrupt', 'ArithmeticError', 'OverflowError', 'ZeroDivisionError', 'LookupError', 'IndexError', 'KeyError', 'RuntimeError', 'NotImplementedError', 'RecursionError', 'AttributeError', 'AssertionError', 'MemoryError', 'NameError', 'SyntaxError', 'OSError', 'TimeoutError', 'TypeError', 'ValueError', 'StopIteration', } # Files to copy without filtering COPY_FILES = [ # Core type system 'typing.pyi', 'typing_extensions.pyi', '_collections_abc.pyi', # Used in type annotations 'types.pyi', # So type checking works with dataclasses 'dataclasses.pyi', # used by dataclasses 'enum.pyi', # the re std lib module is not mostly implemented 're.pyi', # ============================== # all all collections dir 'collections/__init__.pyi', 'collections/abc.pyi', # ============================== # Take only `__init__.pyi` from _typeshed dir '_typeshed/__init__.pyi', # ============================== # all of pathlib dir 'pathlib/__init__.pyi', 'pathlib/types.pyi', # ============================== # math module 'math.pyi', ] # content for typeshed's `VERSIONS` file VERSIONS = """\ # DO NOT EDIT THIS FILE DIRECTLY # instead edit crates/monty-typeshed/update.py # this file should match the modules # which monty's minimimal typeshed includes _collections_abc: 3.3- _typeshed: 3.0- # not present at runtime, only for type checking asyncio: 3.4- builtins: 3.0- collections: 3.0- dataclasses: 3.7- math: 3.0- os: 3.0- pathlib: 3.4- pathlib.types: 3.14- re: 3.0- sys: 3.0- typing: 3.5- typing_extensions: 3.7- types: 3.0- """ CRATE_DIR = Path(__file__).parent REPO_ROOT = CRATE_DIR.parent.parent VENDOR_DIR = CRATE_DIR / 'vendor' / 'typeshed' STDLIB_DIR = VENDOR_DIR / 'stdlib' CUSTOM_DIR = CRATE_DIR / 'custom' TYPESHED_REPO_DIR = CRATE_DIR / 'typeshed-repo' TYPESHED_REPO_URL = 'git@github.com:python/typeshed.git' def clone_or_update_typeshed() -> str: """Clone or update the typeshed repository and return the path and HEAD commit hash. If the repository already exists at TYPESHED_REPO_DIR, performs a git pull. Otherwise, clones the repository to that location. Returns: commit_hash """ if TYPESHED_REPO_DIR.exists(): print(f'{TYPESHED_REPO_DIR} exists, not pulling') else: print(f'Cloning typeshed to {TYPESHED_REPO_DIR}...') subprocess.run( ['git', 'clone', '--depth=1', TYPESHED_REPO_URL, str(TYPESHED_REPO_DIR)], check=True, capture_output=True, ) result = subprocess.run( ['git', 'rev-parse', 'HEAD'], cwd=TYPESHED_REPO_DIR, check=True, capture_output=True, text=True, ) return result.stdout.strip() def filter_statements(nodes: list[ast.stmt]) -> list[ast.stmt]: """Filter a list of statements to keep only allowed functions and classes. Keeps: - Imports - Type variable assignments (e.g., _T = TypeVar('_T')) - Allowed function definitions - Allowed class definitions Args: nodes: List of AST statement nodes. Returns: Filtered list of statements. """ result: list[ast.stmt] = [] for node in nodes: if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if node.name in ALLOWED_FUNCTIONS: result.append(node) elif isinstance(node, ast.ClassDef): if node.name.startswith('_') or node.name in ALLOWED_CLASSES: result.append(node) elif isinstance(node, ast.If): # Recursively filter version-conditional blocks filtered = filter_if_block(node) if filtered is not None: result.append(filtered) else: # Keep imports, type aliases, assignments, etc. result.append(node) return result def filter_if_block(node: ast.If) -> ast.If | None: """Filter an if block, recursively filtering function and class definitions. Handles version conditionals like `if sys.version_info >= (3, 10):`. Args: node: An ast.If node. Returns: Filtered If node, or None if both branches are empty after filtering. """ filtered_body = filter_statements(node.body) filtered_orelse = filter_statements(node.orelse) # If both branches are empty, skip this if block entirely if not filtered_body and not filtered_orelse: return None # Create a new If node with filtered contents new_node = ast.If( test=node.test, body=filtered_body if filtered_body else [ast.Pass()], orelse=filtered_orelse, ) return ast.copy_location(new_node, node) def filter_builtins(source: str) -> str: """Filter builtins.pyi to keep only allowed classes and functions. This function parses the source with Python's ast module and filters top-level definitions to only include those in the allow lists. All imports and type definitions are preserved. Args: source: The source code of builtins.pyi. Returns: Filtered source code. """ tree = ast.parse(source) tree.body = filter_statements(tree.body) ast.fix_missing_locations(tree) return ast.unparse(tree) def main() -> int: """Main entry point.""" # Clean up any stale files from previous runs if VENDOR_DIR.exists(): print(f'Removing existing {VENDOR_DIR}...') shutil.rmtree(VENDOR_DIR) # Clone or update typeshed commit = clone_or_update_typeshed() print(f'At python/typeshed commit {commit}') # Read source file src_stdlib = TYPESHED_REPO_DIR / 'stdlib' builtins_path = src_stdlib / 'builtins.pyi' source = builtins_path.read_text() # Filter filtered = filter_builtins(source) # Write output files STDLIB_DIR.mkdir(parents=True, exist_ok=True) (STDLIB_DIR / 'builtins.pyi').write_text(filtered) print(f'Wrote {(STDLIB_DIR / "builtins.pyi").relative_to(REPO_ROOT)}') (STDLIB_DIR / 'VERSIONS').write_text(VERSIONS) print(f'Wrote {(STDLIB_DIR / "VERSIONS").relative_to(REPO_ROOT)}') (VENDOR_DIR / 'source_commit.txt').write_text(commit + '\n') print(f'Wrote {(VENDOR_DIR / "source_commit.txt").relative_to(REPO_ROOT)}') for file_path in COPY_FILES: src_file = src_stdlib / file_path if src_file.exists(): dest_file = STDLIB_DIR / file_path dest_file.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src_file, dest_file) else: raise ValueError(f'{file_path} not found in typeshed') print(f'Copied {len(COPY_FILES)} stdlib typeshed files') # copy pyi files from CUSTOM_DIR into STDLIB_DIR custom_count = 0 for file in CUSTOM_DIR.glob('*.pyi'): shutil.copy2(file, STDLIB_DIR) custom_count += 1 print(f'Copied {custom_count} custom typeshed files') return 0 if __name__ == '__main__': exit(main()) ================================================ FILE: crates/monty-typeshed/vendor/typeshed/source_commit.txt ================================================ 0e16ea31d2e188fdc126cb31e7c4fcc6b5a8da96 ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/VERSIONS ================================================ # DO NOT EDIT THIS FILE DIRECTLY # instead edit crates/monty-typeshed/update.py # this file should match the modules # which monty's minimimal typeshed includes _collections_abc: 3.3- _typeshed: 3.0- # not present at runtime, only for type checking asyncio: 3.4- builtins: 3.0- collections: 3.0- dataclasses: 3.7- math: 3.0- os: 3.0- pathlib: 3.4- pathlib.types: 3.14- re: 3.0- sys: 3.0- typing: 3.5- typing_extensions: 3.7- types: 3.0- ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/_collections_abc.pyi ================================================ import sys from abc import abstractmethod from types import MappingProxyType from typing import ( # noqa: Y022,Y038,UP035,Y057,RUF100 AbstractSet as Set, AsyncGenerator as AsyncGenerator, AsyncIterable as AsyncIterable, AsyncIterator as AsyncIterator, Awaitable as Awaitable, ByteString as ByteString, Callable as Callable, ClassVar, Collection as Collection, Container as Container, Coroutine as Coroutine, Generator as Generator, Generic, Hashable as Hashable, ItemsView as ItemsView, Iterable as Iterable, Iterator as Iterator, KeysView as KeysView, Mapping as Mapping, MappingView as MappingView, MutableMapping as MutableMapping, MutableSequence as MutableSequence, MutableSet as MutableSet, Protocol, Reversible as Reversible, Sequence as Sequence, Sized as Sized, TypeVar, ValuesView as ValuesView, final, runtime_checkable, ) __all__ = [ 'Awaitable', 'Coroutine', 'AsyncIterable', 'AsyncIterator', 'AsyncGenerator', 'Hashable', 'Iterable', 'Iterator', 'Generator', 'Reversible', 'Sized', 'Container', 'Callable', 'Collection', 'Set', 'MutableSet', 'Mapping', 'MutableMapping', 'MappingView', 'KeysView', 'ItemsView', 'ValuesView', 'Sequence', 'MutableSequence', 'ByteString', ] if sys.version_info >= (3, 12): __all__ += ['Buffer'] _KT_co = TypeVar('_KT_co', covariant=True) # Key type covariant containers. _VT_co = TypeVar('_VT_co', covariant=True) # Value type covariant containers. @final class dict_keys(KeysView[_KT_co], Generic[_KT_co, _VT_co]): # undocumented def __eq__(self, value: object, /) -> bool: ... def __reversed__(self) -> Iterator[_KT_co]: ... __hash__: ClassVar[None] # type: ignore[assignment] if sys.version_info >= (3, 13): def isdisjoint(self, other: Iterable[_KT_co], /) -> bool: ... if sys.version_info >= (3, 10): @property def mapping(self) -> MappingProxyType[_KT_co, _VT_co]: ... @final class dict_values(ValuesView[_VT_co], Generic[_KT_co, _VT_co]): # undocumented def __reversed__(self) -> Iterator[_VT_co]: ... if sys.version_info >= (3, 10): @property def mapping(self) -> MappingProxyType[_KT_co, _VT_co]: ... @final class dict_items(ItemsView[_KT_co, _VT_co]): # undocumented def __eq__(self, value: object, /) -> bool: ... def __reversed__(self) -> Iterator[tuple[_KT_co, _VT_co]]: ... __hash__: ClassVar[None] # type: ignore[assignment] if sys.version_info >= (3, 13): def isdisjoint(self, other: Iterable[tuple[_KT_co, _VT_co]], /) -> bool: ... if sys.version_info >= (3, 10): @property def mapping(self) -> MappingProxyType[_KT_co, _VT_co]: ... if sys.version_info >= (3, 12): @runtime_checkable class Buffer(Protocol): __slots__ = () @abstractmethod def __buffer__(self, flags: int, /) -> memoryview: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/_typeshed/__init__.pyi ================================================ # Utility types for typeshed # # See the README.md file in this directory for more information. import sys from collections.abc import Awaitable, Callable, Iterable, Iterator, Sequence, Set as AbstractSet, Sized from dataclasses import Field from os import PathLike from types import FrameType, TracebackType from typing import ( Any, AnyStr, ClassVar, Final, Generic, Literal, Protocol, SupportsFloat, SupportsIndex, SupportsInt, TypeVar, final, overload, ) from typing_extensions import Buffer, LiteralString, Self as _Self, TypeAlias _KT = TypeVar('_KT') _KT_co = TypeVar('_KT_co', covariant=True) _KT_contra = TypeVar('_KT_contra', contravariant=True) _VT = TypeVar('_VT') _VT_co = TypeVar('_VT_co', covariant=True) _T = TypeVar('_T') _T_co = TypeVar('_T_co', covariant=True) _T_contra = TypeVar('_T_contra', contravariant=True) # Alternative to `typing_extensions.Self`, exclusively for use with `__new__` # in metaclasses: # def __new__(cls: type[Self], ...) -> Self: ... # In other cases, use `typing_extensions.Self`. Self = TypeVar('Self') # covariant version of typing.AnyStr, useful for protocols AnyStr_co = TypeVar('AnyStr_co', str, bytes, covariant=True) # For partially known annotations. Usually, fields where type annotations # haven't been added are left unannotated, but in some situations this # isn't possible or a type is already partially known. In cases like these, # use Incomplete instead of Any as a marker. For example, use # "Incomplete | None" instead of "Any | None". Incomplete: TypeAlias = Any # stable # To describe a function parameter that is unused and will work with anything. Unused: TypeAlias = object # stable # Marker for return types that include None, but where forcing the user to # check for None can be detrimental. Sometimes called "the Any trick". See # https://typing.python.org/en/latest/guides/writing_stubs.html#the-any-trick # for more information. MaybeNone: TypeAlias = Any # stable # Used to mark arguments that default to a sentinel value. This prevents # stubtest from complaining about the default value not matching. # # def foo(x: int | None = sentinel) -> None: ... # # In cases where the sentinel object is exported and can be used by user code, # a construct like this is better: # # _SentinelType = NewType("_SentinelType", object) # does not exist at runtime # sentinel: Final[_SentinelType] # def foo(x: int | None | _SentinelType = ...) -> None: ... sentinel: Any # stable # stable class IdentityFunction(Protocol): def __call__(self, x: _T, /) -> _T: ... # stable class SupportsNext(Protocol[_T_co]): def __next__(self) -> _T_co: ... # stable class SupportsAnext(Protocol[_T_co]): def __anext__(self) -> Awaitable[_T_co]: ... class SupportsBool(Protocol): def __bool__(self) -> bool: ... # Comparison protocols class SupportsDunderLT(Protocol[_T_contra]): def __lt__(self, other: _T_contra, /) -> SupportsBool: ... class SupportsDunderGT(Protocol[_T_contra]): def __gt__(self, other: _T_contra, /) -> SupportsBool: ... class SupportsDunderLE(Protocol[_T_contra]): def __le__(self, other: _T_contra, /) -> SupportsBool: ... class SupportsDunderGE(Protocol[_T_contra]): def __ge__(self, other: _T_contra, /) -> SupportsBool: ... class SupportsAllComparisons( SupportsDunderLT[Any], SupportsDunderGT[Any], SupportsDunderLE[Any], SupportsDunderGE[Any], Protocol ): ... SupportsRichComparison: TypeAlias = SupportsDunderLT[Any] | SupportsDunderGT[Any] SupportsRichComparisonT = TypeVar('SupportsRichComparisonT', bound=SupportsRichComparison) # Dunder protocols class SupportsAdd(Protocol[_T_contra, _T_co]): def __add__(self, x: _T_contra, /) -> _T_co: ... class SupportsRAdd(Protocol[_T_contra, _T_co]): def __radd__(self, x: _T_contra, /) -> _T_co: ... class SupportsSub(Protocol[_T_contra, _T_co]): def __sub__(self, x: _T_contra, /) -> _T_co: ... class SupportsRSub(Protocol[_T_contra, _T_co]): def __rsub__(self, x: _T_contra, /) -> _T_co: ... class SupportsMul(Protocol[_T_contra, _T_co]): def __mul__(self, x: _T_contra, /) -> _T_co: ... class SupportsRMul(Protocol[_T_contra, _T_co]): def __rmul__(self, x: _T_contra, /) -> _T_co: ... class SupportsDivMod(Protocol[_T_contra, _T_co]): def __divmod__(self, other: _T_contra, /) -> _T_co: ... class SupportsRDivMod(Protocol[_T_contra, _T_co]): def __rdivmod__(self, other: _T_contra, /) -> _T_co: ... # This protocol is generic over the iterator type, while Iterable is # generic over the type that is iterated over. class SupportsIter(Protocol[_T_co]): def __iter__(self) -> _T_co: ... # This protocol is generic over the iterator type, while AsyncIterable is # generic over the type that is iterated over. class SupportsAiter(Protocol[_T_co]): def __aiter__(self) -> _T_co: ... class SupportsLen(Protocol): def __len__(self) -> int: ... class SupportsLenAndGetItem(Protocol[_T_co]): def __len__(self) -> int: ... def __getitem__(self, k: int, /) -> _T_co: ... class SupportsTrunc(Protocol): def __trunc__(self) -> int: ... # Mapping-like protocols # stable class SupportsItems(Protocol[_KT_co, _VT_co]): def items(self) -> AbstractSet[tuple[_KT_co, _VT_co]]: ... # stable class SupportsKeysAndGetItem(Protocol[_KT, _VT_co]): def keys(self) -> Iterable[_KT]: ... def __getitem__(self, key: _KT, /) -> _VT_co: ... # stable class SupportsGetItem(Protocol[_KT_contra, _VT_co]): def __getitem__(self, key: _KT_contra, /) -> _VT_co: ... # stable class SupportsContainsAndGetItem(Protocol[_KT_contra, _VT_co]): def __contains__(self, x: Any, /) -> bool: ... def __getitem__(self, key: _KT_contra, /) -> _VT_co: ... # stable class SupportsItemAccess(Protocol[_KT_contra, _VT]): def __contains__(self, x: Any, /) -> bool: ... def __getitem__(self, key: _KT_contra, /) -> _VT: ... def __setitem__(self, key: _KT_contra, value: _VT, /) -> None: ... def __delitem__(self, key: _KT_contra, /) -> None: ... StrPath: TypeAlias = str | PathLike[str] # stable BytesPath: TypeAlias = bytes | PathLike[bytes] # stable GenericPath: TypeAlias = AnyStr | PathLike[AnyStr] StrOrBytesPath: TypeAlias = str | bytes | PathLike[str] | PathLike[bytes] # stable OpenTextModeUpdating: TypeAlias = Literal[ 'r+', '+r', 'rt+', 'r+t', '+rt', 'tr+', 't+r', '+tr', 'w+', '+w', 'wt+', 'w+t', '+wt', 'tw+', 't+w', '+tw', 'a+', '+a', 'at+', 'a+t', '+at', 'ta+', 't+a', '+ta', 'x+', '+x', 'xt+', 'x+t', '+xt', 'tx+', 't+x', '+tx', ] OpenTextModeWriting: TypeAlias = Literal['w', 'wt', 'tw', 'a', 'at', 'ta', 'x', 'xt', 'tx'] OpenTextModeReading: TypeAlias = Literal['r', 'rt', 'tr', 'U', 'rU', 'Ur', 'rtU', 'rUt', 'Urt', 'trU', 'tUr', 'Utr'] OpenTextMode: TypeAlias = OpenTextModeUpdating | OpenTextModeWriting | OpenTextModeReading OpenBinaryModeUpdating: TypeAlias = Literal[ 'rb+', 'r+b', '+rb', 'br+', 'b+r', '+br', 'wb+', 'w+b', '+wb', 'bw+', 'b+w', '+bw', 'ab+', 'a+b', '+ab', 'ba+', 'b+a', '+ba', 'xb+', 'x+b', '+xb', 'bx+', 'b+x', '+bx', ] OpenBinaryModeWriting: TypeAlias = Literal['wb', 'bw', 'ab', 'ba', 'xb', 'bx'] OpenBinaryModeReading: TypeAlias = Literal['rb', 'br', 'rbU', 'rUb', 'Urb', 'brU', 'bUr', 'Ubr'] OpenBinaryMode: TypeAlias = OpenBinaryModeUpdating | OpenBinaryModeReading | OpenBinaryModeWriting # stable class HasFileno(Protocol): def fileno(self) -> int: ... FileDescriptor: TypeAlias = int # stable FileDescriptorLike: TypeAlias = int | HasFileno # stable FileDescriptorOrPath: TypeAlias = int | StrOrBytesPath # stable class SupportsRead(Protocol[_T_co]): def read(self, length: int = ..., /) -> _T_co: ... # stable class SupportsReadline(Protocol[_T_co]): def readline(self, length: int = ..., /) -> _T_co: ... # stable class SupportsNoArgReadline(Protocol[_T_co]): def readline(self) -> _T_co: ... # stable class SupportsWrite(Protocol[_T_contra]): def write(self, s: _T_contra, /) -> object: ... # stable class SupportsFlush(Protocol): def flush(self) -> object: ... # Suitable for dictionary view objects class Viewable(Protocol[_T_co]): def __len__(self) -> int: ... def __iter__(self) -> Iterator[_T_co]: ... class SupportsGetItemViewable(Protocol[_KT, _VT_co]): def __len__(self) -> int: ... def __iter__(self) -> Iterator[_KT]: ... def __getitem__(self, key: _KT, /) -> _VT_co: ... # Unfortunately PEP 688 does not allow us to distinguish read-only # from writable buffers. We use these aliases for readability for now. # Perhaps a future extension of the buffer protocol will allow us to # distinguish these cases in the type system. ReadOnlyBuffer: TypeAlias = Buffer # stable # Anything that implements the read-write buffer interface. WriteableBuffer: TypeAlias = Buffer # Same as WriteableBuffer, but also includes read-only buffer types (like bytes). ReadableBuffer: TypeAlias = Buffer # stable class SliceableBuffer(Buffer, Protocol): def __getitem__(self, slice: slice[SupportsIndex | None], /) -> Sequence[int]: ... class IndexableBuffer(Buffer, Protocol): def __getitem__(self, i: int, /) -> int: ... class SupportsGetItemBuffer(SliceableBuffer, IndexableBuffer, Protocol): def __contains__(self, x: Any, /) -> bool: ... @overload def __getitem__(self, slice: slice[SupportsIndex | None], /) -> Sequence[int]: ... @overload def __getitem__(self, i: int, /) -> int: ... class SizedBuffer(Sized, Buffer, Protocol): ... ExcInfo: TypeAlias = tuple[type[BaseException], BaseException, TracebackType] OptExcInfo: TypeAlias = ExcInfo | tuple[None, None, None] # stable if sys.version_info >= (3, 10): from types import NoneType as NoneType else: # Used by type checkers for checks involving None (does not exist at runtime) @final class NoneType: def __bool__(self) -> Literal[False]: ... # This is an internal CPython type that is like, but subtly different from, a NamedTuple # Subclasses of this type are found in multiple modules. # In typeshed, `structseq` is only ever used as a mixin in combination with a fixed-length `Tuple` # See discussion at #6546 & #6560 # `structseq` classes are unsubclassable, so are all decorated with `@final`. class structseq(Generic[_T_co]): n_fields: Final[int] n_unnamed_fields: Final[int] n_sequence_fields: Final[int] # The first parameter will generally only take an iterable of a specific length. # E.g. `os.uname_result` takes any iterable of length exactly 5. # # The second parameter will accept a dict of any kind without raising an exception, # but only has any meaning if you supply it a dict where the keys are strings. # https://github.com/python/typeshed/pull/6560#discussion_r767149830 def __new__(cls, sequence: Iterable[_T_co], dict: dict[str, Any] = ...) -> _Self: ... if sys.version_info >= (3, 13): def __replace__(self, **kwargs: Any) -> _Self: ... # Superset of typing.AnyStr that also includes LiteralString AnyOrLiteralStr = TypeVar('AnyOrLiteralStr', str, bytes, LiteralString) # Represents when str or LiteralStr is acceptable. Useful for string processing # APIs where literalness of return value depends on literalness of inputs StrOrLiteralStr = TypeVar('StrOrLiteralStr', LiteralString, str) # Objects suitable to be passed to sys.setprofile, threading.setprofile, and similar ProfileFunction: TypeAlias = Callable[[FrameType, str, Any], object] # Objects suitable to be passed to sys.settrace, threading.settrace, and similar TraceFunction: TypeAlias = Callable[[FrameType, str, Any], TraceFunction | None] # experimental # Might not work as expected for pyright, see # https://github.com/python/typeshed/pull/9362 # https://github.com/microsoft/pyright/issues/4339 class DataclassInstance(Protocol): __dataclass_fields__: ClassVar[dict[str, Field[Any]]] # Anything that can be passed to the int/float constructors if sys.version_info >= (3, 14): ConvertibleToInt: TypeAlias = str | ReadableBuffer | SupportsInt | SupportsIndex else: ConvertibleToInt: TypeAlias = str | ReadableBuffer | SupportsInt | SupportsIndex | SupportsTrunc ConvertibleToFloat: TypeAlias = str | ReadableBuffer | SupportsFloat | SupportsIndex # A few classes updated from Foo(str, Enum) to Foo(StrEnum). This is a convenience so these # can be accurate on all python versions without getting too wordy if sys.version_info >= (3, 11): from enum import StrEnum as StrEnum else: from enum import Enum class StrEnum(str, Enum): ... # Objects that appear in annotations or in type expressions. # Similar to PEP 747's TypeForm but a little broader. AnnotationForm: TypeAlias = Any if sys.version_info >= (3, 14): from annotationlib import Format # These return annotations, which can be arbitrary objects AnnotateFunc: TypeAlias = Callable[[Format], dict[str, AnnotationForm]] EvaluateFunc: TypeAlias = Callable[[Format], AnnotationForm] ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/asyncio.pyi ================================================ from collections.abc import Awaitable, Generator from typing import Any, Literal, TypeAlias, TypeVar, overload _T = TypeVar('_T') _T1 = TypeVar('_T1') _T2 = TypeVar('_T2') _T3 = TypeVar('_T3') _T4 = TypeVar('_T4') _T5 = TypeVar('_T5') _T6 = TypeVar('_T6') class _Future(Awaitable[_T]): """ Minimal copy of Future from _typeshed/stdlib/_asyncio.pyi """ def __iter__(self) -> Generator[Any, None, _T]: ... def __await__(self) -> Generator[Any, None, _T]: ... _FutureLike: TypeAlias = _Future[_T] | Awaitable[_T] def run(main: Awaitable[_T], *, debug: bool | None = None, loop_factory: Any = None) -> _T: ... @overload def gather( coro_or_future1: _FutureLike[_T1], /, *, return_exceptions: Literal[False] = False ) -> _Future[tuple[_T1]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4, _T5]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], coro_or_future6: _FutureLike[_T6], /, *, return_exceptions: Literal[False] = False, ) -> _Future[tuple[_T1, _T2, _T3, _T4, _T5, _T6]]: ... @overload def gather(*coros_or_futures: _FutureLike[_T], return_exceptions: Literal[False] = False) -> _Future[list[_T]]: ... @overload def gather(coro_or_future1: _FutureLike[_T1], /, *, return_exceptions: bool) -> _Future[tuple[_T1 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], /, *, return_exceptions: bool ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], /, *, return_exceptions: bool, ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], /, *, return_exceptions: bool, ) -> _Future[tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException]]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], /, *, return_exceptions: bool, ) -> _Future[ tuple[_T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException, _T5 | BaseException] ]: ... @overload def gather( coro_or_future1: _FutureLike[_T1], coro_or_future2: _FutureLike[_T2], coro_or_future3: _FutureLike[_T3], coro_or_future4: _FutureLike[_T4], coro_or_future5: _FutureLike[_T5], coro_or_future6: _FutureLike[_T6], /, *, return_exceptions: bool, ) -> _Future[ tuple[ _T1 | BaseException, _T2 | BaseException, _T3 | BaseException, _T4 | BaseException, _T5 | BaseException, _T6 | BaseException, ] ]: ... @overload def gather(*coros_or_futures: _FutureLike[_T], return_exceptions: bool) -> _Future[list[_T | BaseException]]: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/builtins.pyi ================================================ import _sitebuiltins import sys import types from _collections_abc import dict_items, dict_keys, dict_values from collections.abc import Awaitable, Callable, Iterable, Iterator, MutableSet, Reversible, Set as AbstractSet, Sized from types import GenericAlias, TracebackType from typing import ( Any, ClassVar, Final, Generic, MutableMapping, MutableSequence, Protocol, Sequence, SupportsAbs, SupportsBytes, SupportsFloat, SupportsIndex, TypeVar, final, overload, type_check_only, ) import _typeshed from _typeshed import ( AnnotationForm, ConvertibleToFloat, ConvertibleToInt, ReadableBuffer, SupportsAdd, SupportsAnext, SupportsDivMod, SupportsFlush, SupportsKeysAndGetItem, SupportsLenAndGetItem, SupportsNext, SupportsRAdd, SupportsRDivMod, SupportsRichComparison, SupportsRichComparisonT, SupportsWrite, ) from typing_extensions import ( Literal, LiteralString, ParamSpec, Self, TypeAlias, TypeVarTuple, deprecated, disjoint_base, ) if sys.version_info >= (3, 14): from _typeshed import AnnotateFunc _T = TypeVar('_T') _I = TypeVar('_I', default=int) _T_co = TypeVar('_T_co', covariant=True) _T_contra = TypeVar('_T_contra', contravariant=True) _R_co = TypeVar('_R_co', covariant=True) _KT = TypeVar('_KT') _VT = TypeVar('_VT') _S = TypeVar('_S') _T1 = TypeVar('_T1') _T2 = TypeVar('_T2') _T3 = TypeVar('_T3') _T4 = TypeVar('_T4') _T5 = TypeVar('_T5') _SupportsNextT_co = TypeVar('_SupportsNextT_co', bound=SupportsNext[Any], covariant=True) _SupportsAnextT_co = TypeVar('_SupportsAnextT_co', bound=SupportsAnext[Any], covariant=True) _AwaitableT = TypeVar('_AwaitableT', bound=Awaitable[Any]) _AwaitableT_co = TypeVar('_AwaitableT_co', bound=Awaitable[Any], covariant=True) _P = ParamSpec('_P') _StartT_co = TypeVar('_StartT_co', covariant=True, default=Any) _StopT_co = TypeVar('_StopT_co', covariant=True, default=_StartT_co) _StepT_co = TypeVar('_StepT_co', covariant=True, default=_StartT_co | _StopT_co) @disjoint_base class object: __doc__: str | None __dict__: dict[str, Any] __module__: str __annotations__: dict[str, Any] @property def __class__(self) -> type[Self]: ... @__class__.setter def __class__(self, type: type[Self], /) -> None: ... def __init__(self) -> None: ... def __new__(cls) -> Self: ... def __setattr__(self, name: str, value: Any, /) -> None: ... def __delattr__(self, name: str, /) -> None: ... def __eq__(self, value: object, /) -> bool: ... def __ne__(self, value: object, /) -> bool: ... def __str__(self) -> str: ... def __repr__(self) -> str: ... def __hash__(self) -> int: ... def __format__(self, format_spec: str, /) -> str: ... def __getattribute__(self, name: str, /) -> Any: ... def __sizeof__(self) -> int: ... def __reduce__(self) -> str | tuple[Any, ...]: ... def __reduce_ex__(self, protocol: SupportsIndex, /) -> str | tuple[Any, ...]: ... if sys.version_info >= (3, 11): def __getstate__(self) -> object: ... def __dir__(self) -> Iterable[str]: ... def __init_subclass__(cls) -> None: ... @classmethod def __subclasshook__(cls, subclass: type, /) -> bool: ... @disjoint_base class type: @property def __base__(self) -> type | None: ... __bases__: tuple[type, ...] @property def __basicsize__(self) -> int: ... __dict__: Final[types.MappingProxyType[str, Any]] @property def __dictoffset__(self) -> int: ... @property def __flags__(self) -> int: ... @property def __itemsize__(self) -> int: ... __module__: str @property def __mro__(self) -> tuple[type, ...]: ... __name__: str __qualname__: str @property def __text_signature__(self) -> str | None: ... @property def __weakrefoffset__(self) -> int: ... @overload def __init__(self, o: object, /) -> None: ... @overload def __init__(self, name: str, bases: tuple[type, ...], dict: dict[str, Any], /, **kwds: Any) -> None: ... @overload def __new__(cls, o: object, /) -> type: ... @overload def __new__( cls: type[_typeshed.Self], name: str, bases: tuple[type, ...], namespace: dict[str, Any], /, **kwds: Any ) -> _typeshed.Self: ... def __call__(self, *args: Any, **kwds: Any) -> Any: ... def __subclasses__(self: _typeshed.Self) -> list[_typeshed.Self]: ... def mro(self) -> list[type]: ... def __instancecheck__(self, instance: Any, /) -> bool: ... def __subclasscheck__(self, subclass: type, /) -> bool: ... @classmethod def __prepare__(metacls, name: str, bases: tuple[type, ...], /, **kwds: Any) -> MutableMapping[str, object]: ... if sys.version_info >= (3, 10): def __or__(self: _typeshed.Self, value: Any, /) -> types.UnionType | _typeshed.Self: ... def __ror__(self: _typeshed.Self, value: Any, /) -> types.UnionType | _typeshed.Self: ... if sys.version_info >= (3, 12): __type_params__: tuple[TypeVar | ParamSpec | TypeVarTuple, ...] __annotations__: dict[str, AnnotationForm] if sys.version_info >= (3, 14): __annotate__: AnnotateFunc | None _PositiveInteger: TypeAlias = Literal[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ] _NegativeInteger: TypeAlias = Literal[ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20 ] _LiteralInteger = _PositiveInteger | _NegativeInteger | Literal[0] @disjoint_base class int: @overload def __new__(cls, x: ConvertibleToInt = 0, /) -> Self: ... @overload def __new__(cls, x: str | bytes | bytearray, /, base: SupportsIndex) -> Self: ... def as_integer_ratio(self) -> tuple[int, Literal[1]]: ... @property def real(self) -> int: ... @property def imag(self) -> Literal[0]: ... @property def numerator(self) -> int: ... @property def denominator(self) -> Literal[1]: ... def conjugate(self) -> int: ... def bit_length(self) -> int: ... if sys.version_info >= (3, 10): def bit_count(self) -> int: ... if sys.version_info >= (3, 11): def to_bytes( self, length: SupportsIndex = 1, byteorder: Literal['little', 'big'] = 'big', *, signed: bool = False ) -> bytes: ... @classmethod def from_bytes( cls, bytes: Iterable[SupportsIndex] | SupportsBytes | ReadableBuffer, byteorder: Literal['little', 'big'] = 'big', *, signed: bool = False, ) -> Self: ... else: def to_bytes( self, length: SupportsIndex, byteorder: Literal['little', 'big'], *, signed: bool = False ) -> bytes: ... @classmethod def from_bytes( cls, bytes: Iterable[SupportsIndex] | SupportsBytes | ReadableBuffer, byteorder: Literal['little', 'big'], *, signed: bool = False, ) -> Self: ... if sys.version_info >= (3, 12): def is_integer(self) -> Literal[True]: ... def __add__(self, value: int, /) -> int: ... def __sub__(self, value: int, /) -> int: ... def __mul__(self, value: int, /) -> int: ... def __floordiv__(self, value: int, /) -> int: ... def __truediv__(self, value: int, /) -> float: ... def __mod__(self, value: int, /) -> int: ... def __divmod__(self, value: int, /) -> tuple[int, int]: ... def __radd__(self, value: int, /) -> int: ... def __rsub__(self, value: int, /) -> int: ... def __rmul__(self, value: int, /) -> int: ... def __rfloordiv__(self, value: int, /) -> int: ... def __rtruediv__(self, value: int, /) -> float: ... def __rmod__(self, value: int, /) -> int: ... def __rdivmod__(self, value: int, /) -> tuple[int, int]: ... @overload def __pow__(self, x: Literal[0], /) -> Literal[1]: ... @overload def __pow__(self, value: Literal[0], mod: None, /) -> Literal[1]: ... @overload def __pow__(self, value: _PositiveInteger, mod: None = None, /) -> int: ... @overload def __pow__(self, value: _NegativeInteger, mod: None = None, /) -> float: ... @overload def __pow__(self, value: int, mod: None = None, /) -> Any: ... @overload def __pow__(self, value: int, mod: int, /) -> int: ... def __rpow__(self, value: int, mod: int | None = None, /) -> Any: ... def __and__(self, value: int, /) -> int: ... def __or__(self, value: int, /) -> int: ... def __xor__(self, value: int, /) -> int: ... def __lshift__(self, value: int, /) -> int: ... def __rshift__(self, value: int, /) -> int: ... def __rand__(self, value: int, /) -> int: ... def __ror__(self, value: int, /) -> int: ... def __rxor__(self, value: int, /) -> int: ... def __rlshift__(self, value: int, /) -> int: ... def __rrshift__(self, value: int, /) -> int: ... def __neg__(self) -> int: ... def __pos__(self) -> int: ... def __invert__(self) -> int: ... def __trunc__(self) -> int: ... def __ceil__(self) -> int: ... def __floor__(self) -> int: ... if sys.version_info >= (3, 14): def __round__(self, ndigits: SupportsIndex | None = None, /) -> int: ... else: def __round__(self, ndigits: SupportsIndex = ..., /) -> int: ... def __getnewargs__(self) -> tuple[int]: ... def __eq__(self, value: object, /) -> bool: ... def __ne__(self, value: object, /) -> bool: ... def __lt__(self, value: int, /) -> bool: ... def __le__(self, value: int, /) -> bool: ... def __gt__(self, value: int, /) -> bool: ... def __ge__(self, value: int, /) -> bool: ... def __float__(self) -> float: ... def __int__(self) -> int: ... def __abs__(self) -> int: ... def __hash__(self) -> int: ... def __bool__(self) -> bool: ... def __index__(self) -> int: ... def __format__(self, format_spec: str, /) -> str: ... @disjoint_base class float: def __new__(cls, x: ConvertibleToFloat = 0, /) -> Self: ... def as_integer_ratio(self) -> tuple[int, int]: ... def hex(self) -> str: ... def is_integer(self) -> bool: ... @classmethod def fromhex(cls, string: str, /) -> Self: ... @property def real(self) -> float: ... @property def imag(self) -> float: ... def conjugate(self) -> float: ... def __add__(self, value: float, /) -> float: ... def __sub__(self, value: float, /) -> float: ... def __mul__(self, value: float, /) -> float: ... def __floordiv__(self, value: float, /) -> float: ... def __truediv__(self, value: float, /) -> float: ... def __mod__(self, value: float, /) -> float: ... def __divmod__(self, value: float, /) -> tuple[float, float]: ... @overload def __pow__(self, value: int, mod: None = None, /) -> float: ... @overload def __pow__(self, value: float, mod: None = None, /) -> Any: ... def __radd__(self, value: float, /) -> float: ... def __rsub__(self, value: float, /) -> float: ... def __rmul__(self, value: float, /) -> float: ... def __rfloordiv__(self, value: float, /) -> float: ... def __rtruediv__(self, value: float, /) -> float: ... def __rmod__(self, value: float, /) -> float: ... def __rdivmod__(self, value: float, /) -> tuple[float, float]: ... @overload def __rpow__(self, value: _PositiveInteger, mod: None = None, /) -> float: ... @overload def __rpow__(self, value: _NegativeInteger, mod: None = None, /) -> complex: ... @overload def __rpow__(self, value: float, mod: None = None, /) -> Any: ... def __getnewargs__(self) -> tuple[float]: ... def __trunc__(self) -> int: ... def __ceil__(self) -> int: ... def __floor__(self) -> int: ... @overload def __round__(self, ndigits: None = None, /) -> int: ... @overload def __round__(self, ndigits: SupportsIndex, /) -> float: ... def __eq__(self, value: object, /) -> bool: ... def __ne__(self, value: object, /) -> bool: ... def __lt__(self, value: float, /) -> bool: ... def __le__(self, value: float, /) -> bool: ... def __gt__(self, value: float, /) -> bool: ... def __ge__(self, value: float, /) -> bool: ... def __neg__(self) -> float: ... def __pos__(self) -> float: ... def __int__(self) -> int: ... def __float__(self) -> float: ... def __abs__(self) -> float: ... def __hash__(self) -> int: ... def __bool__(self) -> bool: ... def __format__(self, format_spec: str, /) -> str: ... if sys.version_info >= (3, 14): @classmethod def from_number(cls, number: float | SupportsIndex | SupportsFloat, /) -> Self: ... @type_check_only class _FormatMapMapping(Protocol): def __getitem__(self, key: str, /) -> Any: ... @type_check_only class _TranslateTable(Protocol): def __getitem__(self, key: int, /) -> str | int | None: ... @disjoint_base class str(Sequence[str]): @overload def __new__(cls, object: object = '') -> Self: ... @overload def __new__(cls, object: ReadableBuffer, encoding: str = 'utf-8', errors: str = 'strict') -> Self: ... @overload def capitalize(self: LiteralString) -> LiteralString: ... @overload def capitalize(self) -> str: ... @overload def casefold(self: LiteralString) -> LiteralString: ... @overload def casefold(self) -> str: ... @overload def center(self: LiteralString, width: SupportsIndex, fillchar: LiteralString = ' ', /) -> LiteralString: ... @overload def center(self, width: SupportsIndex, fillchar: str = ' ', /) -> str: ... def count(self, sub: str, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /) -> int: ... def encode(self, encoding: str = 'utf-8', errors: str = 'strict') -> bytes: ... def endswith( self, suffix: str | tuple[str, ...], start: SupportsIndex | None = None, end: SupportsIndex | None = None, / ) -> bool: ... @overload def expandtabs(self: LiteralString, tabsize: SupportsIndex = 8) -> LiteralString: ... @overload def expandtabs(self, tabsize: SupportsIndex = 8) -> str: ... def find(self, sub: str, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /) -> int: ... @overload def format(self: LiteralString, *args: LiteralString, **kwargs: LiteralString) -> LiteralString: ... @overload def format(self, *args: object, **kwargs: object) -> str: ... def format_map(self, mapping: _FormatMapMapping, /) -> str: ... def index(self, sub: str, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /) -> int: ... def isalnum(self) -> bool: ... def isalpha(self) -> bool: ... def isascii(self) -> bool: ... def isdecimal(self) -> bool: ... def isdigit(self) -> bool: ... def isidentifier(self) -> bool: ... def islower(self) -> bool: ... def isnumeric(self) -> bool: ... def isprintable(self) -> bool: ... def isspace(self) -> bool: ... def istitle(self) -> bool: ... def isupper(self) -> bool: ... @overload def join(self: LiteralString, iterable: Iterable[LiteralString], /) -> LiteralString: ... @overload def join(self, iterable: Iterable[str], /) -> str: ... @overload def ljust(self: LiteralString, width: SupportsIndex, fillchar: LiteralString = ' ', /) -> LiteralString: ... @overload def ljust(self, width: SupportsIndex, fillchar: str = ' ', /) -> str: ... @overload def lower(self: LiteralString) -> LiteralString: ... @overload def lower(self) -> str: ... @overload def lstrip(self: LiteralString, chars: LiteralString | None = None, /) -> LiteralString: ... @overload def lstrip(self, chars: str | None = None, /) -> str: ... @overload def partition(self: LiteralString, sep: LiteralString, /) -> tuple[LiteralString, LiteralString, LiteralString]: ... @overload def partition(self, sep: str, /) -> tuple[str, str, str]: ... if sys.version_info >= (3, 13): @overload def replace( self: LiteralString, old: LiteralString, new: LiteralString, /, count: SupportsIndex = -1 ) -> LiteralString: ... @overload def replace(self, old: str, new: str, /, count: SupportsIndex = -1) -> str: ... else: @overload def replace( self: LiteralString, old: LiteralString, new: LiteralString, count: SupportsIndex = -1, / ) -> LiteralString: ... @overload def replace(self, old: str, new: str, count: SupportsIndex = -1, /) -> str: ... @overload def removeprefix(self: LiteralString, prefix: LiteralString, /) -> LiteralString: ... @overload def removeprefix(self, prefix: str, /) -> str: ... @overload def removesuffix(self: LiteralString, suffix: LiteralString, /) -> LiteralString: ... @overload def removesuffix(self, suffix: str, /) -> str: ... def rfind(self, sub: str, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /) -> int: ... def rindex(self, sub: str, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /) -> int: ... @overload def rjust(self: LiteralString, width: SupportsIndex, fillchar: LiteralString = ' ', /) -> LiteralString: ... @overload def rjust(self, width: SupportsIndex, fillchar: str = ' ', /) -> str: ... @overload def rpartition( self: LiteralString, sep: LiteralString, / ) -> tuple[LiteralString, LiteralString, LiteralString]: ... @overload def rpartition(self, sep: str, /) -> tuple[str, str, str]: ... @overload def rsplit( self: LiteralString, sep: LiteralString | None = None, maxsplit: SupportsIndex = -1 ) -> list[LiteralString]: ... @overload def rsplit(self, sep: str | None = None, maxsplit: SupportsIndex = -1) -> list[str]: ... @overload def rstrip(self: LiteralString, chars: LiteralString | None = None, /) -> LiteralString: ... @overload def rstrip(self, chars: str | None = None, /) -> str: ... @overload def split( self: LiteralString, sep: LiteralString | None = None, maxsplit: SupportsIndex = -1 ) -> list[LiteralString]: ... @overload def split(self, sep: str | None = None, maxsplit: SupportsIndex = -1) -> list[str]: ... @overload def splitlines(self: LiteralString, keepends: bool = False) -> list[LiteralString]: ... @overload def splitlines(self, keepends: bool = False) -> list[str]: ... def startswith( self, prefix: str | tuple[str, ...], start: SupportsIndex | None = None, end: SupportsIndex | None = None, / ) -> bool: ... @overload def strip(self: LiteralString, chars: LiteralString | None = None, /) -> LiteralString: ... @overload def strip(self, chars: str | None = None, /) -> str: ... @overload def swapcase(self: LiteralString) -> LiteralString: ... @overload def swapcase(self) -> str: ... @overload def title(self: LiteralString) -> LiteralString: ... @overload def title(self) -> str: ... def translate(self, table: _TranslateTable, /) -> str: ... @overload def upper(self: LiteralString) -> LiteralString: ... @overload def upper(self) -> str: ... @overload def zfill(self: LiteralString, width: SupportsIndex, /) -> LiteralString: ... @overload def zfill(self, width: SupportsIndex, /) -> str: ... @staticmethod @overload def maketrans(x: dict[int, _T] | dict[str, _T] | dict[str | int, _T], /) -> dict[int, _T]: ... @staticmethod @overload def maketrans(x: str, y: str, /) -> dict[int, int]: ... @staticmethod @overload def maketrans(x: str, y: str, z: str, /) -> dict[int, int | None]: ... @overload def __add__(self: LiteralString, value: LiteralString, /) -> LiteralString: ... @overload def __add__(self, value: str, /) -> str: ... def __contains__(self, key: str, /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __ge__(self, value: str, /) -> bool: ... @overload def __getitem__(self: LiteralString, key: SupportsIndex | slice[SupportsIndex | None], /) -> LiteralString: ... @overload def __getitem__(self, key: SupportsIndex | slice[SupportsIndex | None], /) -> str: ... def __gt__(self, value: str, /) -> bool: ... def __hash__(self) -> int: ... @overload def __iter__(self: LiteralString) -> Iterator[LiteralString]: ... @overload def __iter__(self) -> Iterator[str]: ... def __le__(self, value: str, /) -> bool: ... def __len__(self) -> int: ... def __lt__(self, value: str, /) -> bool: ... @overload def __mod__(self: LiteralString, value: LiteralString | tuple[LiteralString, ...], /) -> LiteralString: ... @overload def __mod__(self, value: Any, /) -> str: ... @overload def __mul__(self: LiteralString, value: SupportsIndex, /) -> LiteralString: ... @overload def __mul__(self, value: SupportsIndex, /) -> str: ... def __ne__(self, value: object, /) -> bool: ... @overload def __rmul__(self: LiteralString, value: SupportsIndex, /) -> LiteralString: ... @overload def __rmul__(self, value: SupportsIndex, /) -> str: ... def __getnewargs__(self) -> tuple[str]: ... def __format__(self, format_spec: str, /) -> str: ... @disjoint_base class bytes(Sequence[int]): @overload def __new__(cls, o: Iterable[SupportsIndex] | SupportsIndex | SupportsBytes | ReadableBuffer, /) -> Self: ... @overload def __new__(cls, string: str, /, encoding: str, errors: str = 'strict') -> Self: ... @overload def __new__(cls) -> Self: ... def capitalize(self) -> bytes: ... def center(self, width: SupportsIndex, fillchar: bytes = b' ', /) -> bytes: ... def count( self, sub: ReadableBuffer | SupportsIndex, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> int: ... def decode(self, encoding: str = 'utf-8', errors: str = 'strict') -> str: ... def endswith( self, suffix: ReadableBuffer | tuple[ReadableBuffer, ...], start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> bool: ... def expandtabs(self, tabsize: SupportsIndex = 8) -> bytes: ... def find( self, sub: ReadableBuffer | SupportsIndex, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> int: ... def hex(self, sep: str | bytes = ..., bytes_per_sep: SupportsIndex = 1) -> str: ... def index( self, sub: ReadableBuffer | SupportsIndex, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> int: ... def isalnum(self) -> bool: ... def isalpha(self) -> bool: ... def isascii(self) -> bool: ... def isdigit(self) -> bool: ... def islower(self) -> bool: ... def isspace(self) -> bool: ... def istitle(self) -> bool: ... def isupper(self) -> bool: ... def join(self, iterable_of_bytes: Iterable[ReadableBuffer], /) -> bytes: ... def ljust(self, width: SupportsIndex, fillchar: bytes | bytearray = b' ', /) -> bytes: ... def lower(self) -> bytes: ... def lstrip(self, bytes: ReadableBuffer | None = None, /) -> bytes: ... def partition(self, sep: ReadableBuffer, /) -> tuple[bytes, bytes, bytes]: ... def replace(self, old: ReadableBuffer, new: ReadableBuffer, count: SupportsIndex = -1, /) -> bytes: ... def removeprefix(self, prefix: ReadableBuffer, /) -> bytes: ... def removesuffix(self, suffix: ReadableBuffer, /) -> bytes: ... def rfind( self, sub: ReadableBuffer | SupportsIndex, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> int: ... def rindex( self, sub: ReadableBuffer | SupportsIndex, start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> int: ... def rjust(self, width: SupportsIndex, fillchar: bytes | bytearray = b' ', /) -> bytes: ... def rpartition(self, sep: ReadableBuffer, /) -> tuple[bytes, bytes, bytes]: ... def rsplit(self, sep: ReadableBuffer | None = None, maxsplit: SupportsIndex = -1) -> list[bytes]: ... def rstrip(self, bytes: ReadableBuffer | None = None, /) -> bytes: ... def split(self, sep: ReadableBuffer | None = None, maxsplit: SupportsIndex = -1) -> list[bytes]: ... def splitlines(self, keepends: bool = False) -> list[bytes]: ... def startswith( self, prefix: ReadableBuffer | tuple[ReadableBuffer, ...], start: SupportsIndex | None = None, end: SupportsIndex | None = None, /, ) -> bool: ... def strip(self, bytes: ReadableBuffer | None = None, /) -> bytes: ... def swapcase(self) -> bytes: ... def title(self) -> bytes: ... def translate(self, table: ReadableBuffer | None, /, delete: ReadableBuffer = b'') -> bytes: ... def upper(self) -> bytes: ... def zfill(self, width: SupportsIndex, /) -> bytes: ... if sys.version_info >= (3, 14): @classmethod def fromhex(cls, string: str | ReadableBuffer, /) -> Self: ... else: @classmethod def fromhex(cls, string: str, /) -> Self: ... @staticmethod def maketrans(frm: ReadableBuffer, to: ReadableBuffer, /) -> bytes: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[int]: ... def __hash__(self) -> int: ... @overload def __getitem__(self, key: SupportsIndex, /) -> int: ... @overload def __getitem__(self, key: slice[SupportsIndex | None], /) -> bytes: ... def __add__(self, value: ReadableBuffer, /) -> bytes: ... def __mul__(self, value: SupportsIndex, /) -> bytes: ... def __rmul__(self, value: SupportsIndex, /) -> bytes: ... def __mod__(self, value: Any, /) -> bytes: ... def __contains__(self, key: SupportsIndex | ReadableBuffer, /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __ne__(self, value: object, /) -> bool: ... def __lt__(self, value: bytes, /) -> bool: ... def __le__(self, value: bytes, /) -> bool: ... def __gt__(self, value: bytes, /) -> bool: ... def __ge__(self, value: bytes, /) -> bool: ... def __getnewargs__(self) -> tuple[bytes]: ... if sys.version_info >= (3, 11): def __bytes__(self) -> bytes: ... def __buffer__(self, flags: int, /) -> memoryview: ... _IntegerFormats: TypeAlias = Literal[ 'b', 'B', '@b', '@B', 'h', 'H', '@h', '@H', 'i', 'I', '@i', '@I', 'l', 'L', '@l', '@L', 'q', 'Q', '@q', '@Q', 'P', '@P', ] @final class bool(int): def __new__(cls, o: object = False, /) -> Self: ... @overload def __and__(self, value: bool, /) -> bool: ... @overload def __and__(self, value: int, /) -> int: ... @overload def __or__(self, value: bool, /) -> bool: ... @overload def __or__(self, value: int, /) -> int: ... @overload def __xor__(self, value: bool, /) -> bool: ... @overload def __xor__(self, value: int, /) -> int: ... @overload def __rand__(self, value: bool, /) -> bool: ... @overload def __rand__(self, value: int, /) -> int: ... @overload def __ror__(self, value: bool, /) -> bool: ... @overload def __ror__(self, value: int, /) -> int: ... @overload def __rxor__(self, value: bool, /) -> bool: ... @overload def __rxor__(self, value: int, /) -> int: ... def __getnewargs__(self) -> tuple[int]: ... @deprecated('Will throw an error in Python 3.16. Use `not` for logical negation of bools instead.') def __invert__(self) -> int: ... @final class slice(Generic[_StartT_co, _StopT_co, _StepT_co]): @property def start(self) -> _StartT_co: ... @property def step(self) -> _StepT_co: ... @property def stop(self) -> _StopT_co: ... @overload def __new__(cls, start: None, stop: None = None, step: None = None, /) -> slice[Any, Any, Any]: ... @overload def __new__(cls, stop: _T2, /) -> slice[Any, _T2, Any]: ... @overload def __new__(cls, start: _T1, stop: None, step: None = None, /) -> slice[_T1, Any, Any]: ... @overload def __new__(cls, start: None, stop: _T2, step: None = None, /) -> slice[Any, _T2, Any]: ... @overload def __new__(cls, start: _T1, stop: _T2, step: None = None, /) -> slice[_T1, _T2, Any]: ... @overload def __new__(cls, start: None, stop: None, step: _T3, /) -> slice[Any, Any, _T3]: ... @overload def __new__(cls, start: _T1, stop: None, step: _T3, /) -> slice[_T1, Any, _T3]: ... @overload def __new__(cls, start: None, stop: _T2, step: _T3, /) -> slice[Any, _T2, _T3]: ... @overload def __new__(cls, start: _T1, stop: _T2, step: _T3, /) -> slice[_T1, _T2, _T3]: ... def __eq__(self, value: object, /) -> bool: ... if sys.version_info >= (3, 12): def __hash__(self) -> int: ... else: __hash__: ClassVar[None] def indices(self, len: SupportsIndex, /) -> tuple[int, int, int]: ... @disjoint_base class tuple(Sequence[_T_co]): def __new__(cls, iterable: Iterable[_T_co] = (), /) -> Self: ... def __len__(self) -> int: ... def __contains__(self, key: object, /) -> bool: ... @overload def __getitem__(self, key: SupportsIndex, /) -> _T_co: ... @overload def __getitem__(self, key: slice[SupportsIndex | None], /) -> tuple[_T_co, ...]: ... def __iter__(self) -> Iterator[_T_co]: ... def __lt__(self, value: tuple[_T_co, ...], /) -> bool: ... def __le__(self, value: tuple[_T_co, ...], /) -> bool: ... def __gt__(self, value: tuple[_T_co, ...], /) -> bool: ... def __ge__(self, value: tuple[_T_co, ...], /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... @overload def __add__(self, value: tuple[_T_co, ...], /) -> tuple[_T_co, ...]: ... @overload def __add__(self, value: tuple[_T, ...], /) -> tuple[_T_co | _T, ...]: ... def __mul__(self, value: SupportsIndex, /) -> tuple[_T_co, ...]: ... def __rmul__(self, value: SupportsIndex, /) -> tuple[_T_co, ...]: ... def count(self, value: Any, /) -> int: ... def index(self, value: Any, start: SupportsIndex = 0, stop: SupportsIndex = sys.maxsize, /) -> int: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @disjoint_base class list(MutableSequence[_T]): @overload def __init__(self) -> None: ... @overload def __init__(self, iterable: Iterable[_T], /) -> None: ... def copy(self) -> list[_T]: ... def append(self, object: _T, /) -> None: ... def extend(self, iterable: Iterable[_T], /) -> None: ... def pop(self, index: SupportsIndex = -1, /) -> _T: ... def index(self, value: _T, start: SupportsIndex = 0, stop: SupportsIndex = sys.maxsize, /) -> int: ... def count(self, value: _T, /) -> int: ... def insert(self, index: SupportsIndex, object: _T, /) -> None: ... def remove(self, value: _T, /) -> None: ... @overload def sort(self: list[SupportsRichComparisonT], *, key: None = None, reverse: bool = False) -> None: ... @overload def sort(self, *, key: Callable[[_T], SupportsRichComparison], reverse: bool = False) -> None: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[_T]: ... __hash__: ClassVar[None] @overload def __getitem__(self, i: SupportsIndex, /) -> _T: ... @overload def __getitem__(self, s: slice[SupportsIndex | None], /) -> list[_T]: ... @overload def __setitem__(self, key: SupportsIndex, value: _T, /) -> None: ... @overload def __setitem__(self, key: slice[SupportsIndex | None], value: Iterable[_T], /) -> None: ... def __delitem__(self, key: SupportsIndex | slice[SupportsIndex | None], /) -> None: ... @overload def __add__(self, value: list[_T], /) -> list[_T]: ... @overload def __add__(self, value: list[_S], /) -> list[_S | _T]: ... def __iadd__(self, value: Iterable[_T], /) -> Self: ... def __mul__(self, value: SupportsIndex, /) -> list[_T]: ... def __rmul__(self, value: SupportsIndex, /) -> list[_T]: ... def __imul__(self, value: SupportsIndex, /) -> Self: ... def __contains__(self, key: object, /) -> bool: ... def __reversed__(self) -> Iterator[_T]: ... def __gt__(self, value: list[_T], /) -> bool: ... def __ge__(self, value: list[_T], /) -> bool: ... def __lt__(self, value: list[_T], /) -> bool: ... def __le__(self, value: list[_T], /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @disjoint_base class dict(MutableMapping[_KT, _VT]): @overload def __init__(self, /) -> None: ... @overload def __init__(self: dict[str, _VT], /, **kwargs: _VT) -> None: ... @overload def __init__(self, map: SupportsKeysAndGetItem[_KT, _VT], /) -> None: ... @overload def __init__(self: dict[str, _VT], map: SupportsKeysAndGetItem[str, _VT], /, **kwargs: _VT) -> None: ... @overload def __init__(self, iterable: Iterable[tuple[_KT, _VT]], /) -> None: ... @overload def __init__(self: dict[str, _VT], iterable: Iterable[tuple[str, _VT]], /, **kwargs: _VT) -> None: ... @overload def __init__(self: dict[str, str], iterable: Iterable[list[str]], /) -> None: ... @overload def __init__(self: dict[bytes, bytes], iterable: Iterable[list[bytes]], /) -> None: ... def __new__(cls, /, *args: Any, **kwargs: Any) -> Self: ... def copy(self) -> dict[_KT, _VT]: ... def keys(self) -> dict_keys[_KT, _VT]: ... def values(self) -> dict_values[_KT, _VT]: ... def items(self) -> dict_items[_KT, _VT]: ... @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: None = None, /) -> dict[_T, Any | None]: ... @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: _S, /) -> dict[_T, _S]: ... @overload def get(self, key: _KT, default: None = None, /) -> _VT | None: ... @overload def get(self, key: _KT, default: _VT, /) -> _VT: ... @overload def get(self, key: _KT, default: _T, /) -> _VT | _T: ... @overload def pop(self, key: _KT, /) -> _VT: ... @overload def pop(self, key: _KT, default: _VT, /) -> _VT: ... @overload def pop(self, key: _KT, default: _T, /) -> _VT | _T: ... def __len__(self) -> int: ... def __getitem__(self, key: _KT, /) -> _VT: ... def __setitem__(self, key: _KT, value: _VT, /) -> None: ... def __delitem__(self, key: _KT, /) -> None: ... def __iter__(self) -> Iterator[_KT]: ... def __eq__(self, value: object, /) -> bool: ... def __reversed__(self) -> Iterator[_KT]: ... __hash__: ClassVar[None] def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @overload def __or__(self, value: dict[_KT, _VT], /) -> dict[_KT, _VT]: ... @overload def __or__(self, value: dict[_T1, _T2], /) -> dict[_KT | _T1, _VT | _T2]: ... @overload def __ror__(self, value: dict[_KT, _VT], /) -> dict[_KT, _VT]: ... @overload def __ror__(self, value: dict[_T1, _T2], /) -> dict[_KT | _T1, _VT | _T2]: ... @overload def __ior__(self, value: SupportsKeysAndGetItem[_KT, _VT], /) -> Self: ... @overload def __ior__(self, value: Iterable[tuple[_KT, _VT]], /) -> Self: ... @disjoint_base class set(MutableSet[_T]): @overload def __init__(self) -> None: ... @overload def __init__(self, iterable: Iterable[_T], /) -> None: ... def add(self, element: _T, /) -> None: ... def copy(self) -> set[_T]: ... def difference(self, *s: Iterable[object]) -> set[_T]: ... def difference_update(self, *s: Iterable[object]) -> None: ... def discard(self, element: object, /) -> None: ... def intersection(self, *s: Iterable[object]) -> set[_T]: ... def intersection_update(self, *s: Iterable[object]) -> None: ... def isdisjoint(self, s: Iterable[object], /) -> bool: ... def issubset(self, s: Iterable[object], /) -> bool: ... def issuperset(self, s: Iterable[object], /) -> bool: ... def remove(self, element: _T, /) -> None: ... def symmetric_difference(self, s: Iterable[_S], /) -> set[_T | _S]: ... def symmetric_difference_update(self, s: Iterable[_T], /) -> None: ... def union(self, *s: Iterable[_S]) -> set[_T | _S]: ... def update(self, *s: Iterable[_T]) -> None: ... def __len__(self) -> int: ... def __contains__(self, o: object, /) -> bool: ... def __iter__(self) -> Iterator[_T]: ... def __and__(self, value: AbstractSet[object], /) -> set[_T]: ... def __iand__(self, value: AbstractSet[object], /) -> Self: ... def __or__(self, value: AbstractSet[_S], /) -> set[_T | _S]: ... def __ior__(self, value: AbstractSet[_T], /) -> Self: ... def __sub__(self, value: AbstractSet[object], /) -> set[_T]: ... def __isub__(self, value: AbstractSet[object], /) -> Self: ... def __xor__(self, value: AbstractSet[_S], /) -> set[_T | _S]: ... def __ixor__(self, value: AbstractSet[_T], /) -> Self: ... def __le__(self, value: AbstractSet[object], /) -> bool: ... def __lt__(self, value: AbstractSet[object], /) -> bool: ... def __ge__(self, value: AbstractSet[object], /) -> bool: ... def __gt__(self, value: AbstractSet[object], /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... __hash__: ClassVar[None] def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @disjoint_base class frozenset(AbstractSet[_T_co]): @overload def __new__(cls) -> Self: ... @overload def __new__(cls, iterable: Iterable[_T_co], /) -> Self: ... def copy(self) -> frozenset[_T_co]: ... def difference(self, *s: Iterable[object]) -> frozenset[_T_co]: ... def intersection(self, *s: Iterable[object]) -> frozenset[_T_co]: ... def isdisjoint(self, s: Iterable[object], /) -> bool: ... def issubset(self, s: Iterable[object], /) -> bool: ... def issuperset(self, s: Iterable[object], /) -> bool: ... def symmetric_difference(self, s: Iterable[_S], /) -> frozenset[_T_co | _S]: ... def union(self, *s: Iterable[_S]) -> frozenset[_T_co | _S]: ... def __len__(self) -> int: ... def __contains__(self, o: object, /) -> bool: ... def __iter__(self) -> Iterator[_T_co]: ... def __and__(self, value: AbstractSet[object], /) -> frozenset[_T_co]: ... def __or__(self, value: AbstractSet[_S], /) -> frozenset[_T_co | _S]: ... def __sub__(self, value: AbstractSet[object], /) -> frozenset[_T_co]: ... def __xor__(self, value: AbstractSet[_S], /) -> frozenset[_T_co | _S]: ... def __le__(self, value: AbstractSet[object], /) -> bool: ... def __lt__(self, value: AbstractSet[object], /) -> bool: ... def __ge__(self, value: AbstractSet[object], /) -> bool: ... def __gt__(self, value: AbstractSet[object], /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @disjoint_base class enumerate(Generic[_T]): def __new__(cls, iterable: Iterable[_T], start: int = 0) -> Self: ... def __iter__(self) -> Self: ... def __next__(self) -> tuple[int, _T]: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @final class range(Sequence[int]): @property def start(self) -> int: ... @property def stop(self) -> int: ... @property def step(self) -> int: ... @overload def __new__(cls, stop: SupportsIndex, /) -> Self: ... @overload def __new__(cls, start: SupportsIndex, stop: SupportsIndex, step: SupportsIndex = 1, /) -> Self: ... def count(self, value: int, /) -> int: ... def index(self, value: int, /) -> int: ... def __len__(self) -> int: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... def __contains__(self, key: object, /) -> bool: ... def __iter__(self) -> Iterator[int]: ... @overload def __getitem__(self, key: SupportsIndex, /) -> int: ... @overload def __getitem__(self, key: slice[SupportsIndex | None], /) -> range: ... def __reversed__(self) -> Iterator[int]: ... @disjoint_base class property: fget: Callable[[Any], Any] | None fset: Callable[[Any, Any], None] | None fdel: Callable[[Any], None] | None __isabstractmethod__: bool if sys.version_info >= (3, 13): __name__: str def __init__( self, fget: Callable[[Any], Any] | None = None, fset: Callable[[Any, Any], None] | None = None, fdel: Callable[[Any], None] | None = None, doc: str | None = None, ) -> None: ... def getter(self, fget: Callable[[Any], Any], /) -> property: ... def setter(self, fset: Callable[[Any, Any], None], /) -> property: ... def deleter(self, fdel: Callable[[Any], None], /) -> property: ... @overload def __get__(self, instance: None, owner: type, /) -> Self: ... @overload def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... def __set__(self, instance: Any, value: Any, /) -> None: ... def __delete__(self, instance: Any, /) -> None: ... def abs(x: SupportsAbs[_T], /) -> _T: ... def all(iterable: Iterable[object], /) -> bool: ... def any(iterable: Iterable[object], /) -> bool: ... def bin(number: SupportsIndex, /) -> str: ... def chr(i: SupportsIndex, /) -> str: ... if sys.version_info >= (3, 10): @type_check_only class _SupportsSynchronousAnext(Protocol[_AwaitableT_co]): def __anext__(self) -> _AwaitableT_co: ... copyright: _sitebuiltins._Printer credits: _sitebuiltins._Printer @overload def divmod(x: SupportsDivMod[_T_contra, _T_co], y: _T_contra, /) -> _T_co: ... @overload def divmod(x: _T_contra, y: SupportsRDivMod[_T_contra, _T_co], /) -> _T_co: ... exit: _sitebuiltins.Quitter def hash(obj: object, /) -> int: ... help: _sitebuiltins._Helper def hex(number: SupportsIndex, /) -> str: ... def id(obj: object, /) -> int: ... @type_check_only class _GetItemIterable(Protocol[_T_co]): def __getitem__(self, i: int, /) -> _T_co: ... if sys.version_info >= (3, 10): _ClassInfo: TypeAlias = type | types.UnionType | tuple[_ClassInfo, ...] else: _ClassInfo: TypeAlias = type | tuple[_ClassInfo, ...] def isinstance(obj: object, class_or_tuple: _ClassInfo, /) -> bool: ... def len(obj: Sized, /) -> int: ... license: _sitebuiltins._Printer @overload def max( arg1: SupportsRichComparisonT, arg2: SupportsRichComparisonT, /, *_args: SupportsRichComparisonT, key: None = None ) -> SupportsRichComparisonT: ... @overload def max(arg1: _T, arg2: _T, /, *_args: _T, key: Callable[[_T], SupportsRichComparison]) -> _T: ... @overload def max(iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None) -> SupportsRichComparisonT: ... @overload def max(iterable: Iterable[_T], /, *, key: Callable[[_T], SupportsRichComparison]) -> _T: ... @overload def max( iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None, default: _T ) -> SupportsRichComparisonT | _T: ... @overload def max(iterable: Iterable[_T1], /, *, key: Callable[[_T1], SupportsRichComparison], default: _T2) -> _T1 | _T2: ... @overload def min( arg1: SupportsRichComparisonT, arg2: SupportsRichComparisonT, /, *_args: SupportsRichComparisonT, key: None = None ) -> SupportsRichComparisonT: ... @overload def min(arg1: _T, arg2: _T, /, *_args: _T, key: Callable[[_T], SupportsRichComparison]) -> _T: ... @overload def min(iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None) -> SupportsRichComparisonT: ... @overload def min(iterable: Iterable[_T], /, *, key: Callable[[_T], SupportsRichComparison]) -> _T: ... @overload def min( iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None, default: _T ) -> SupportsRichComparisonT | _T: ... @overload def min(iterable: Iterable[_T1], /, *, key: Callable[[_T1], SupportsRichComparison], default: _T2) -> _T1 | _T2: ... def oct(number: SupportsIndex, /) -> str: ... _Opener: TypeAlias = Callable[[str, int], int] def ord(c: str | bytes | bytearray, /) -> int: ... @type_check_only class _SupportsWriteAndFlush(SupportsWrite[_T_contra], SupportsFlush, Protocol[_T_contra]): ... @overload def print( *values: object, sep: str | None = ' ', end: str | None = '\n', file: SupportsWrite[str] | None = None, flush: Literal[False] = False, ) -> None: ... @overload def print( *values: object, sep: str | None = ' ', end: str | None = '\n', file: _SupportsWriteAndFlush[str] | None = None, flush: bool, ) -> None: ... _E_contra = TypeVar('_E_contra', contravariant=True) _M_contra = TypeVar('_M_contra', contravariant=True) @type_check_only class _SupportsPow2(Protocol[_E_contra, _T_co]): def __pow__(self, other: _E_contra, /) -> _T_co: ... @type_check_only class _SupportsPow3NoneOnly(Protocol[_E_contra, _T_co]): def __pow__(self, other: _E_contra, modulo: None = None, /) -> _T_co: ... @type_check_only class _SupportsPow3(Protocol[_E_contra, _M_contra, _T_co]): def __pow__(self, other: _E_contra, modulo: _M_contra, /) -> _T_co: ... _SupportsSomeKindOfPow = _SupportsPow2[Any, Any] | _SupportsPow3NoneOnly[Any, Any] | _SupportsPow3[Any, Any, Any] @overload def pow(base: int, exp: int, mod: int) -> int: ... @overload def pow(base: int, exp: Literal[0], mod: None = None) -> Literal[1]: ... @overload def pow(base: int, exp: _PositiveInteger, mod: None = None) -> int: ... @overload def pow(base: int, exp: _NegativeInteger, mod: None = None) -> float: ... @overload def pow(base: int, exp: int, mod: None = None) -> Any: ... @overload def pow(base: _PositiveInteger, exp: float, mod: None = None) -> float: ... @overload def pow(base: _NegativeInteger, exp: float, mod: None = None) -> complex: ... @overload def pow(base: float, exp: int, mod: None = None) -> float: ... @overload def pow(base: float, exp: complex | _SupportsSomeKindOfPow, mod: None = None) -> Any: ... @overload def pow(base: complex, exp: complex | _SupportsSomeKindOfPow, mod: None = None) -> complex: ... @overload def pow(base: _SupportsPow2[_E_contra, _T_co], exp: _E_contra, mod: None = None) -> _T_co: ... @overload def pow(base: _SupportsPow3NoneOnly[_E_contra, _T_co], exp: _E_contra, mod: None = None) -> _T_co: ... @overload def pow(base: _SupportsPow3[_E_contra, _M_contra, _T_co], exp: _E_contra, mod: _M_contra) -> _T_co: ... @overload def pow(base: _SupportsSomeKindOfPow, exp: float, mod: None = None) -> Any: ... @overload def pow(base: _SupportsSomeKindOfPow, exp: complex, mod: None = None) -> complex: ... quit: _sitebuiltins.Quitter @disjoint_base class reversed(Generic[_T]): @overload def __new__(cls, sequence: Reversible[_T], /) -> Iterator[_T]: ... @overload def __new__(cls, sequence: SupportsLenAndGetItem[_T], /) -> Iterator[_T]: ... def __iter__(self) -> Self: ... def __next__(self) -> _T: ... def __length_hint__(self) -> int: ... def repr(obj: object, /) -> str: ... @type_check_only class _SupportsRound1(Protocol[_T_co]): def __round__(self) -> _T_co: ... @type_check_only class _SupportsRound2(Protocol[_T_co]): def __round__(self, ndigits: int, /) -> _T_co: ... @overload def round(number: _SupportsRound1[_T], ndigits: None = None) -> _T: ... @overload def round(number: _SupportsRound2[_T], ndigits: SupportsIndex) -> _T: ... @overload def sorted( iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None, reverse: bool = False ) -> list[SupportsRichComparisonT]: ... @overload def sorted( iterable: Iterable[_T], /, *, key: Callable[[_T], SupportsRichComparison], reverse: bool = False ) -> list[_T]: ... _AddableT1 = TypeVar('_AddableT1', bound=SupportsAdd[Any, Any]) _AddableT2 = TypeVar('_AddableT2', bound=SupportsAdd[Any, Any]) @type_check_only class _SupportsSumWithNoDefaultGiven(SupportsAdd[Any, Any], SupportsRAdd[int, Any], Protocol): ... _SupportsSumNoDefaultT = TypeVar('_SupportsSumNoDefaultT', bound=_SupportsSumWithNoDefaultGiven) @overload def sum(iterable: Iterable[bool | _LiteralInteger], /, start: int = 0) -> int: ... @overload def sum(iterable: Iterable[_SupportsSumNoDefaultT], /) -> _SupportsSumNoDefaultT | Literal[0]: ... @overload def sum(iterable: Iterable[_AddableT1], /, start: _AddableT2) -> _AddableT1 | _AddableT2: ... @disjoint_base class zip(Generic[_T_co]): if sys.version_info >= (3, 10): @overload def __new__(cls, *, strict: bool = False) -> zip[Any]: ... @overload def __new__(cls, iter1: Iterable[_T1], /, *, strict: bool = False) -> zip[tuple[_T1]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], /, *, strict: bool = False ) -> zip[tuple[_T1, _T2]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], /, *, strict: bool = False ) -> zip[tuple[_T1, _T2, _T3]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], /, *, strict: bool = False, ) -> zip[tuple[_T1, _T2, _T3, _T4]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], iter5: Iterable[_T5], /, *, strict: bool = False, ) -> zip[tuple[_T1, _T2, _T3, _T4, _T5]]: ... @overload def __new__( cls, iter1: Iterable[Any], iter2: Iterable[Any], iter3: Iterable[Any], iter4: Iterable[Any], iter5: Iterable[Any], iter6: Iterable[Any], /, *iterables: Iterable[Any], strict: bool = False, ) -> zip[tuple[Any, ...]]: ... else: @overload def __new__(cls) -> zip[Any]: ... @overload def __new__(cls, iter1: Iterable[_T1], /) -> zip[tuple[_T1]]: ... @overload def __new__(cls, iter1: Iterable[_T1], iter2: Iterable[_T2], /) -> zip[tuple[_T1, _T2]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], / ) -> zip[tuple[_T1, _T2, _T3]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], / ) -> zip[tuple[_T1, _T2, _T3, _T4]]: ... @overload def __new__( cls, iter1: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], iter5: Iterable[_T5], /, ) -> zip[tuple[_T1, _T2, _T3, _T4, _T5]]: ... @overload def __new__( cls, iter1: Iterable[Any], iter2: Iterable[Any], iter3: Iterable[Any], iter4: Iterable[Any], iter5: Iterable[Any], iter6: Iterable[Any], /, *iterables: Iterable[Any], ) -> zip[tuple[Any, ...]]: ... def __iter__(self) -> Self: ... def __next__(self) -> _T_co: ... if sys.version_info >= (3, 10): from types import EllipsisType, NotImplementedType ellipsis = EllipsisType Ellipsis: EllipsisType NotImplemented: NotImplementedType else: Ellipsis: ellipsis @final @type_check_only class _NotImplementedType(Any): ... NotImplemented: _NotImplementedType @disjoint_base class BaseException: args: tuple[Any, ...] __cause__: BaseException | None __context__: BaseException | None __suppress_context__: bool __traceback__: TracebackType | None def __init__(self, *args: object) -> None: ... def __new__(cls, *args: Any, **kwds: Any) -> Self: ... def __setstate__(self, state: dict[str, Any] | None, /) -> None: ... def with_traceback(self, tb: TracebackType | None, /) -> Self: ... def __str__(self) -> str: ... def __repr__(self) -> str: ... if sys.version_info >= (3, 11): __notes__: list[str] def add_note(self, note: str, /) -> None: ... class KeyboardInterrupt(BaseException): ... @disjoint_base class SystemExit(BaseException): code: sys._ExitCode class Exception(BaseException): ... @disjoint_base class StopIteration(Exception): value: Any @disjoint_base class OSError(Exception): errno: int | None strerror: str | None filename: Any filename2: Any if sys.platform == 'win32': winerror: int EnvironmentError = OSError IOError = OSError if sys.platform == 'win32': WindowsError = OSError class ArithmeticError(Exception): ... class AssertionError(Exception): ... if sys.version_info >= (3, 10): @disjoint_base class AttributeError(Exception): def __init__(self, *args: object, name: str | None = None, obj: object = None) -> None: ... name: str | None obj: object else: class AttributeError(Exception): ... class LookupError(Exception): ... class MemoryError(Exception): ... if sys.version_info >= (3, 10): @disjoint_base class NameError(Exception): def __init__(self, *args: object, name: str | None = None) -> None: ... name: str | None else: class NameError(Exception): ... class RuntimeError(Exception): ... @disjoint_base class SyntaxError(Exception): msg: str filename: str | None lineno: int | None offset: int | None text: str | None print_file_and_line: None if sys.version_info >= (3, 10): end_lineno: int | None end_offset: int | None @overload def __init__(self) -> None: ... @overload def __init__(self, msg: object, /) -> None: ... @overload def __init__(self, msg: str, info: tuple[str | None, int | None, int | None, str | None], /) -> None: ... if sys.version_info >= (3, 10): @overload def __init__( self, msg: str, info: tuple[str | None, int | None, int | None, str | None, int | None, int | None], / ) -> None: ... class TypeError(Exception): ... class ValueError(Exception): ... class OverflowError(ArithmeticError): ... class ZeroDivisionError(ArithmeticError): ... class IndexError(LookupError): ... class KeyError(LookupError): ... class TimeoutError(OSError): ... class NotImplementedError(RuntimeError): ... class RecursionError(RuntimeError): ... if sys.version_info >= (3, 11): _BaseExceptionT_co = TypeVar('_BaseExceptionT_co', bound=BaseException, covariant=True, default=BaseException) _BaseExceptionT = TypeVar('_BaseExceptionT', bound=BaseException) _ExceptionT_co = TypeVar('_ExceptionT_co', bound=Exception, covariant=True, default=Exception) _ExceptionT = TypeVar('_ExceptionT', bound=Exception) ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/collections/__init__.pyi ================================================ import sys from _collections_abc import dict_items, dict_keys, dict_values from types import GenericAlias from typing import Any, ClassVar, Generic, NoReturn, SupportsIndex, TypeVar, final, overload, type_check_only from _typeshed import SupportsItems, SupportsKeysAndGetItem, SupportsRichComparison, SupportsRichComparisonT from typing_extensions import Self, disjoint_base if sys.version_info >= (3, 10): from collections.abc import ( Callable, ItemsView, Iterable, Iterator, KeysView, Mapping, MutableMapping, MutableSequence, Sequence, ValuesView, ) else: from _collections_abc import * __all__ = [ 'ChainMap', 'Counter', 'OrderedDict', 'UserDict', 'UserList', 'UserString', 'defaultdict', 'deque', 'namedtuple', ] _S = TypeVar('_S') _T = TypeVar('_T') _T1 = TypeVar('_T1') _T2 = TypeVar('_T2') _KT = TypeVar('_KT') _VT = TypeVar('_VT') _KT_co = TypeVar('_KT_co', covariant=True) _VT_co = TypeVar('_VT_co', covariant=True) # namedtuple is special-cased in the type checker; the initializer is ignored. def namedtuple( typename: str, field_names: str | Iterable[str], *, rename: bool = False, module: str | None = None, defaults: Iterable[Any] | None = None, ) -> type[tuple[Any, ...]]: ... class UserDict(MutableMapping[_KT, _VT]): data: dict[_KT, _VT] # __init__ should be kept roughly in line with `dict.__init__`, which has the same semantics @overload def __init__(self, dict: None = None, /) -> None: ... @overload def __init__( self: UserDict[str, _VT], dict: None = None, /, **kwargs: _VT, # pyright: ignore[reportInvalidTypeVarUse] #11780 ) -> None: ... @overload def __init__(self, dict: SupportsKeysAndGetItem[_KT, _VT], /) -> None: ... @overload def __init__( self: UserDict[str, _VT], # pyright: ignore[reportInvalidTypeVarUse] #11780 dict: SupportsKeysAndGetItem[str, _VT], /, **kwargs: _VT, ) -> None: ... @overload def __init__(self, iterable: Iterable[tuple[_KT, _VT]], /) -> None: ... @overload def __init__( self: UserDict[str, _VT], # pyright: ignore[reportInvalidTypeVarUse] #11780 iterable: Iterable[tuple[str, _VT]], /, **kwargs: _VT, ) -> None: ... @overload def __init__(self: UserDict[str, str], iterable: Iterable[list[str]], /) -> None: ... @overload def __init__(self: UserDict[bytes, bytes], iterable: Iterable[list[bytes]], /) -> None: ... def __len__(self) -> int: ... def __getitem__(self, key: _KT) -> _VT: ... def __setitem__(self, key: _KT, item: _VT) -> None: ... def __delitem__(self, key: _KT) -> None: ... def __iter__(self) -> Iterator[_KT]: ... def __contains__(self, key: object) -> bool: ... def copy(self) -> Self: ... def __copy__(self) -> Self: ... # `UserDict.fromkeys` has the same semantics as `dict.fromkeys`, so should be kept in line with `dict.fromkeys`. # TODO: Much like `dict.fromkeys`, the true signature of `UserDict.fromkeys` is inexpressible in the current type system. # See #3800 & https://github.com/python/typing/issues/548#issuecomment-683336963. @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: None = None) -> UserDict[_T, Any | None]: ... @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: _S) -> UserDict[_T, _S]: ... @overload def __or__(self, other: UserDict[_KT, _VT] | dict[_KT, _VT]) -> Self: ... @overload def __or__(self, other: UserDict[_T1, _T2] | dict[_T1, _T2]) -> UserDict[_KT | _T1, _VT | _T2]: ... @overload def __ror__(self, other: UserDict[_KT, _VT] | dict[_KT, _VT]) -> Self: ... @overload def __ror__(self, other: UserDict[_T1, _T2] | dict[_T1, _T2]) -> UserDict[_KT | _T1, _VT | _T2]: ... # UserDict.__ior__ should be kept roughly in line with MutableMapping.update() @overload # type: ignore[misc] def __ior__(self, other: SupportsKeysAndGetItem[_KT, _VT]) -> Self: ... @overload def __ior__(self, other: Iterable[tuple[_KT, _VT]]) -> Self: ... if sys.version_info >= (3, 12): @overload def get(self, key: _KT, default: None = None) -> _VT | None: ... @overload def get(self, key: _KT, default: _VT) -> _VT: ... @overload def get(self, key: _KT, default: _T) -> _VT | _T: ... class UserList(MutableSequence[_T]): data: list[_T] @overload def __init__(self, initlist: None = None) -> None: ... @overload def __init__(self, initlist: Iterable[_T]) -> None: ... __hash__: ClassVar[None] # type: ignore[assignment] def __lt__(self, other: list[_T] | UserList[_T]) -> bool: ... def __le__(self, other: list[_T] | UserList[_T]) -> bool: ... def __gt__(self, other: list[_T] | UserList[_T]) -> bool: ... def __ge__(self, other: list[_T] | UserList[_T]) -> bool: ... def __eq__(self, other: object) -> bool: ... def __contains__(self, item: object) -> bool: ... def __len__(self) -> int: ... @overload def __getitem__(self, i: SupportsIndex) -> _T: ... @overload def __getitem__(self, i: slice[SupportsIndex | None]) -> Self: ... @overload def __setitem__(self, i: SupportsIndex, item: _T) -> None: ... @overload def __setitem__(self, i: slice[SupportsIndex | None], item: Iterable[_T]) -> None: ... def __delitem__(self, i: SupportsIndex | slice[SupportsIndex | None]) -> None: ... def __add__(self, other: Iterable[_T]) -> Self: ... def __radd__(self, other: Iterable[_T]) -> Self: ... def __iadd__(self, other: Iterable[_T]) -> Self: ... def __mul__(self, n: int) -> Self: ... def __rmul__(self, n: int) -> Self: ... def __imul__(self, n: int) -> Self: ... def append(self, item: _T) -> None: ... def insert(self, i: int, item: _T) -> None: ... def pop(self, i: int = -1) -> _T: ... def remove(self, item: _T) -> None: ... def copy(self) -> Self: ... def __copy__(self) -> Self: ... def count(self, item: _T) -> int: ... # The runtime signature is "item, *args", and the arguments are then passed # to `list.index`. In order to give more precise types, we pretend that the # `item` argument is positional-only. def index(self, item: _T, start: SupportsIndex = 0, stop: SupportsIndex = sys.maxsize, /) -> int: ... # All arguments are passed to `list.sort` at runtime, so the signature should be kept in line with `list.sort`. @overload def sort(self: UserList[SupportsRichComparisonT], *, key: None = None, reverse: bool = False) -> None: ... @overload def sort(self, *, key: Callable[[_T], SupportsRichComparison], reverse: bool = False) -> None: ... def extend(self, other: Iterable[_T]) -> None: ... class UserString(Sequence[UserString]): data: str def __init__(self, seq: object) -> None: ... def __int__(self) -> int: ... def __float__(self) -> float: ... def __complex__(self) -> complex: ... def __getnewargs__(self) -> tuple[str]: ... def __lt__(self, string: str | UserString) -> bool: ... def __le__(self, string: str | UserString) -> bool: ... def __gt__(self, string: str | UserString) -> bool: ... def __ge__(self, string: str | UserString) -> bool: ... def __eq__(self, string: object) -> bool: ... def __hash__(self) -> int: ... def __contains__(self, char: object) -> bool: ... def __len__(self) -> int: ... def __getitem__(self, index: SupportsIndex | slice[SupportsIndex | None]) -> Self: ... def __iter__(self) -> Iterator[Self]: ... def __reversed__(self) -> Iterator[Self]: ... def __add__(self, other: object) -> Self: ... def __radd__(self, other: object) -> Self: ... def __mul__(self, n: int) -> Self: ... def __rmul__(self, n: int) -> Self: ... def __mod__(self, args: Any) -> Self: ... def __rmod__(self, template: object) -> Self: ... def capitalize(self) -> Self: ... def casefold(self) -> Self: ... def center(self, width: int, *args: Any) -> Self: ... def count(self, sub: str | UserString, start: int = 0, end: int = sys.maxsize) -> int: ... def encode(self: UserString, encoding: str | None = 'utf-8', errors: str | None = 'strict') -> bytes: ... def endswith(self, suffix: str | tuple[str, ...], start: int | None = 0, end: int | None = sys.maxsize) -> bool: ... def expandtabs(self, tabsize: int = 8) -> Self: ... def find(self, sub: str | UserString, start: int = 0, end: int = sys.maxsize) -> int: ... def format(self, *args: Any, **kwds: Any) -> str: ... def format_map(self, mapping: Mapping[str, Any]) -> str: ... def index(self, sub: str, start: int = 0, end: int = sys.maxsize) -> int: ... def isalpha(self) -> bool: ... def isalnum(self) -> bool: ... def isdecimal(self) -> bool: ... def isdigit(self) -> bool: ... def isidentifier(self) -> bool: ... def islower(self) -> bool: ... def isnumeric(self) -> bool: ... def isprintable(self) -> bool: ... def isspace(self) -> bool: ... def istitle(self) -> bool: ... def isupper(self) -> bool: ... def isascii(self) -> bool: ... def join(self, seq: Iterable[str]) -> str: ... def ljust(self, width: int, *args: Any) -> Self: ... def lower(self) -> Self: ... def lstrip(self, chars: str | None = None) -> Self: ... maketrans = str.maketrans def partition(self, sep: str) -> tuple[str, str, str]: ... def removeprefix(self, prefix: str | UserString, /) -> Self: ... def removesuffix(self, suffix: str | UserString, /) -> Self: ... def replace(self, old: str | UserString, new: str | UserString, maxsplit: int = -1) -> Self: ... def rfind(self, sub: str | UserString, start: int = 0, end: int = sys.maxsize) -> int: ... def rindex(self, sub: str | UserString, start: int = 0, end: int = sys.maxsize) -> int: ... def rjust(self, width: int, *args: Any) -> Self: ... def rpartition(self, sep: str) -> tuple[str, str, str]: ... def rstrip(self, chars: str | None = None) -> Self: ... def split(self, sep: str | None = None, maxsplit: int = -1) -> list[str]: ... def rsplit(self, sep: str | None = None, maxsplit: int = -1) -> list[str]: ... def splitlines(self, keepends: bool = False) -> list[str]: ... def startswith( self, prefix: str | tuple[str, ...], start: int | None = 0, end: int | None = sys.maxsize ) -> bool: ... def strip(self, chars: str | None = None) -> Self: ... def swapcase(self) -> Self: ... def title(self) -> Self: ... def translate(self, *args: Any) -> Self: ... def upper(self) -> Self: ... def zfill(self, width: int) -> Self: ... @disjoint_base class deque(MutableSequence[_T]): @property def maxlen(self) -> int | None: ... @overload def __init__(self, *, maxlen: int | None = None) -> None: ... @overload def __init__(self, iterable: Iterable[_T], maxlen: int | None = None) -> None: ... def append(self, x: _T, /) -> None: ... def appendleft(self, x: _T, /) -> None: ... def copy(self) -> Self: ... def count(self, x: _T, /) -> int: ... def extend(self, iterable: Iterable[_T], /) -> None: ... def extendleft(self, iterable: Iterable[_T], /) -> None: ... def insert(self, i: int, x: _T, /) -> None: ... def index(self, x: _T, start: int = 0, stop: int = ..., /) -> int: ... def pop(self) -> _T: ... # type: ignore[override] def popleft(self) -> _T: ... def remove(self, value: _T, /) -> None: ... def rotate(self, n: int = 1, /) -> None: ... def __copy__(self) -> Self: ... def __len__(self) -> int: ... __hash__: ClassVar[None] # type: ignore[assignment] # These methods of deque don't take slices, unlike MutableSequence, hence the type: ignores def __getitem__(self, key: SupportsIndex, /) -> _T: ... # type: ignore[override] def __setitem__(self, key: SupportsIndex, value: _T, /) -> None: ... # type: ignore[override] def __delitem__(self, key: SupportsIndex, /) -> None: ... # type: ignore[override] def __contains__(self, key: object, /) -> bool: ... def __reduce__(self) -> tuple[type[Self], tuple[()], None, Iterator[_T]]: ... def __iadd__(self, value: Iterable[_T], /) -> Self: ... def __add__(self, value: Self, /) -> Self: ... def __mul__(self, value: int, /) -> Self: ... def __imul__(self, value: int, /) -> Self: ... def __lt__(self, value: deque[_T], /) -> bool: ... def __le__(self, value: deque[_T], /) -> bool: ... def __gt__(self, value: deque[_T], /) -> bool: ... def __ge__(self, value: deque[_T], /) -> bool: ... def __eq__(self, value: object, /) -> bool: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... class Counter(dict[_T, int], Generic[_T]): @overload def __init__(self, iterable: None = None, /) -> None: ... @overload def __init__(self: Counter[str], iterable: None = None, /, **kwargs: int) -> None: ... @overload def __init__(self, mapping: SupportsKeysAndGetItem[_T, int], /) -> None: ... @overload def __init__(self, iterable: Iterable[_T], /) -> None: ... def copy(self) -> Self: ... def elements(self) -> Iterator[_T]: ... def most_common(self, n: int | None = None) -> list[tuple[_T, int]]: ... @classmethod def fromkeys(cls, iterable: Any, v: int | None = None) -> NoReturn: ... # type: ignore[override] @overload def subtract(self, iterable: None = None, /) -> None: ... @overload def subtract(self, mapping: Mapping[_T, int], /) -> None: ... @overload def subtract(self, iterable: Iterable[_T], /) -> None: ... # Unlike dict.update(), use Mapping instead of SupportsKeysAndGetItem for the first overload # (source code does an `isinstance(other, Mapping)` check) # # The second overload is also deliberately different to dict.update() # (if it were `Iterable[_T] | Iterable[tuple[_T, int]]`, # the tuples would be added as keys, breaking type safety) @overload # type: ignore[override] def update(self, m: Mapping[_T, int], /, **kwargs: int) -> None: ... @overload def update(self, iterable: Iterable[_T], /, **kwargs: int) -> None: ... @overload def update(self, iterable: None = None, /, **kwargs: int) -> None: ... def __missing__(self, key: _T) -> int: ... def __delitem__(self, elem: object) -> None: ... if sys.version_info >= (3, 10): def __eq__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... def __add__(self, other: Counter[_S]) -> Counter[_T | _S]: ... def __sub__(self, other: Counter[_T]) -> Counter[_T]: ... def __and__(self, other: Counter[_T]) -> Counter[_T]: ... def __or__(self, other: Counter[_S]) -> Counter[_T | _S]: ... # type: ignore[override] def __pos__(self) -> Counter[_T]: ... def __neg__(self) -> Counter[_T]: ... # several type: ignores because __iadd__ is supposedly incompatible with __add__, etc. def __iadd__(self, other: SupportsItems[_T, int]) -> Self: ... # type: ignore[misc] def __isub__(self, other: SupportsItems[_T, int]) -> Self: ... def __iand__(self, other: SupportsItems[_T, int]) -> Self: ... def __ior__(self, other: SupportsItems[_T, int]) -> Self: ... # type: ignore[override,misc] if sys.version_info >= (3, 10): def total(self) -> int: ... def __le__(self, other: Counter[Any]) -> bool: ... def __lt__(self, other: Counter[Any]) -> bool: ... def __ge__(self, other: Counter[Any]) -> bool: ... def __gt__(self, other: Counter[Any]) -> bool: ... # The pure-Python implementations of the "views" classes # These are exposed at runtime in `collections/__init__.py` class _OrderedDictKeysView(KeysView[_KT_co]): def __reversed__(self) -> Iterator[_KT_co]: ... class _OrderedDictItemsView(ItemsView[_KT_co, _VT_co]): def __reversed__(self) -> Iterator[tuple[_KT_co, _VT_co]]: ... class _OrderedDictValuesView(ValuesView[_VT_co]): def __reversed__(self) -> Iterator[_VT_co]: ... # The C implementations of the "views" classes # (At runtime, these are called `odict_keys`, `odict_items` and `odict_values`, # but they are not exposed anywhere) # pyright doesn't have a specific error code for subclassing error! @final @type_check_only class _odict_keys(dict_keys[_KT_co, _VT_co]): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] def __reversed__(self) -> Iterator[_KT_co]: ... @final @type_check_only class _odict_items(dict_items[_KT_co, _VT_co]): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] def __reversed__(self) -> Iterator[tuple[_KT_co, _VT_co]]: ... @final @type_check_only class _odict_values(dict_values[_KT_co, _VT_co]): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] def __reversed__(self) -> Iterator[_VT_co]: ... @disjoint_base class OrderedDict(dict[_KT, _VT]): def popitem(self, last: bool = True) -> tuple[_KT, _VT]: ... def move_to_end(self, key: _KT, last: bool = True) -> None: ... def copy(self) -> Self: ... def __reversed__(self) -> Iterator[_KT]: ... def keys(self) -> _odict_keys[_KT, _VT]: ... def items(self) -> _odict_items[_KT, _VT]: ... def values(self) -> _odict_values[_KT, _VT]: ... # The signature of OrderedDict.fromkeys should be kept in line with `dict.fromkeys`, modulo positional-only differences. # Like dict.fromkeys, its true signature is not expressible in the current type system. # See #3800 & https://github.com/python/typing/issues/548#issuecomment-683336963. @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: None = None) -> OrderedDict[_T, Any | None]: ... @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: _S) -> OrderedDict[_T, _S]: ... # Keep OrderedDict.setdefault in line with MutableMapping.setdefault, modulo positional-only differences. @overload def setdefault(self: OrderedDict[_KT, _T | None], key: _KT, default: None = None) -> _T | None: ... @overload def setdefault(self, key: _KT, default: _VT) -> _VT: ... # Same as dict.pop, but accepts keyword arguments @overload def pop(self, key: _KT) -> _VT: ... @overload def pop(self, key: _KT, default: _VT) -> _VT: ... @overload def pop(self, key: _KT, default: _T) -> _VT | _T: ... def __eq__(self, value: object, /) -> bool: ... @overload def __or__(self, value: dict[_KT, _VT], /) -> Self: ... @overload def __or__(self, value: dict[_T1, _T2], /) -> OrderedDict[_KT | _T1, _VT | _T2]: ... @overload def __ror__(self, value: dict[_KT, _VT], /) -> Self: ... @overload def __ror__(self, value: dict[_T1, _T2], /) -> OrderedDict[_KT | _T1, _VT | _T2]: ... # type: ignore[misc] @disjoint_base class defaultdict(dict[_KT, _VT]): default_factory: Callable[[], _VT] | None @overload def __init__(self) -> None: ... @overload def __init__(self: defaultdict[str, _VT], **kwargs: _VT) -> None: ... # pyright: ignore[reportInvalidTypeVarUse] #11780 @overload def __init__(self, default_factory: Callable[[], _VT] | None, /) -> None: ... @overload def __init__( self: defaultdict[str, _VT], # pyright: ignore[reportInvalidTypeVarUse] #11780 default_factory: Callable[[], _VT] | None, /, **kwargs: _VT, ) -> None: ... @overload def __init__(self, default_factory: Callable[[], _VT] | None, map: SupportsKeysAndGetItem[_KT, _VT], /) -> None: ... @overload def __init__( self: defaultdict[str, _VT], # pyright: ignore[reportInvalidTypeVarUse] #11780 default_factory: Callable[[], _VT] | None, map: SupportsKeysAndGetItem[str, _VT], /, **kwargs: _VT, ) -> None: ... @overload def __init__(self, default_factory: Callable[[], _VT] | None, iterable: Iterable[tuple[_KT, _VT]], /) -> None: ... @overload def __init__( self: defaultdict[str, _VT], # pyright: ignore[reportInvalidTypeVarUse] #11780 default_factory: Callable[[], _VT] | None, iterable: Iterable[tuple[str, _VT]], /, **kwargs: _VT, ) -> None: ... def __missing__(self, key: _KT, /) -> _VT: ... def __copy__(self) -> Self: ... def copy(self) -> Self: ... @overload def __or__(self, value: dict[_KT, _VT], /) -> Self: ... @overload def __or__(self, value: dict[_T1, _T2], /) -> defaultdict[_KT | _T1, _VT | _T2]: ... @overload def __ror__(self, value: dict[_KT, _VT], /) -> Self: ... @overload def __ror__(self, value: dict[_T1, _T2], /) -> defaultdict[_KT | _T1, _VT | _T2]: ... # type: ignore[misc] class ChainMap(MutableMapping[_KT, _VT]): maps: list[MutableMapping[_KT, _VT]] def __init__(self, *maps: MutableMapping[_KT, _VT]) -> None: ... def new_child(self, m: MutableMapping[_KT, _VT] | None = None) -> Self: ... @property def parents(self) -> Self: ... def __setitem__(self, key: _KT, value: _VT) -> None: ... def __delitem__(self, key: _KT) -> None: ... def __getitem__(self, key: _KT) -> _VT: ... def __iter__(self) -> Iterator[_KT]: ... def __len__(self) -> int: ... def __contains__(self, key: object) -> bool: ... @overload def get(self, key: _KT, default: None = None) -> _VT | None: ... @overload def get(self, key: _KT, default: _VT) -> _VT: ... @overload def get(self, key: _KT, default: _T) -> _VT | _T: ... def __missing__(self, key: _KT) -> _VT: ... # undocumented def __bool__(self) -> bool: ... # Keep ChainMap.setdefault in line with MutableMapping.setdefault, modulo positional-only differences. @overload def setdefault(self: ChainMap[_KT, _T | None], key: _KT, default: None = None) -> _T | None: ... @overload def setdefault(self, key: _KT, default: _VT) -> _VT: ... @overload def pop(self, key: _KT) -> _VT: ... @overload def pop(self, key: _KT, default: _VT) -> _VT: ... @overload def pop(self, key: _KT, default: _T) -> _VT | _T: ... def copy(self) -> Self: ... __copy__ = copy # All arguments to `fromkeys` are passed to `dict.fromkeys` at runtime, # so the signature should be kept in line with `dict.fromkeys`. if sys.version_info >= (3, 13): @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], /) -> ChainMap[_T, Any | None]: ... else: @classmethod @overload def fromkeys(cls, iterable: Iterable[_T]) -> ChainMap[_T, Any | None]: ... @classmethod @overload # Special-case None: the user probably wants to add non-None values later. def fromkeys(cls, iterable: Iterable[_T], value: None, /) -> ChainMap[_T, Any | None]: ... @classmethod @overload def fromkeys(cls, iterable: Iterable[_T], value: _S, /) -> ChainMap[_T, _S]: ... @overload def __or__(self, other: Mapping[_KT, _VT]) -> Self: ... @overload def __or__(self, other: Mapping[_T1, _T2]) -> ChainMap[_KT | _T1, _VT | _T2]: ... @overload def __ror__(self, other: Mapping[_KT, _VT]) -> Self: ... @overload def __ror__(self, other: Mapping[_T1, _T2]) -> ChainMap[_KT | _T1, _VT | _T2]: ... # ChainMap.__ior__ should be kept roughly in line with MutableMapping.update() @overload # type: ignore[misc] def __ior__(self, other: SupportsKeysAndGetItem[_KT, _VT]) -> Self: ... @overload def __ior__(self, other: Iterable[tuple[_KT, _VT]]) -> Self: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/collections/abc.pyi ================================================ from _collections_abc import * from _collections_abc import __all__ as __all__ ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/dataclasses.pyi ================================================ import enum import sys import types from builtins import type as Type # alias to avoid name clashes with fields named "type" from collections.abc import Callable, Iterable, Mapping from types import GenericAlias from typing import Any, Final, Generic, Literal, Protocol, TypeVar, overload, type_check_only from _typeshed import DataclassInstance from typing_extensions import Never, TypeIs _T = TypeVar('_T') _T_co = TypeVar('_T_co', covariant=True) __all__ = [ 'dataclass', 'field', 'Field', 'FrozenInstanceError', 'InitVar', 'MISSING', 'fields', 'asdict', 'astuple', 'make_dataclass', 'replace', 'is_dataclass', ] if sys.version_info >= (3, 10): __all__ += ['KW_ONLY'] _DataclassT = TypeVar('_DataclassT', bound=DataclassInstance) @type_check_only class _DataclassFactory(Protocol): def __call__( self, cls: type[_T], /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, ) -> type[_T]: ... # define _MISSING_TYPE as an enum within the type stubs, # even though that is not really its type at runtime # this allows us to use Literal[_MISSING_TYPE.MISSING] # for background, see: # https://github.com/python/typeshed/pull/5900#issuecomment-895513797 class _MISSING_TYPE(enum.Enum): MISSING = enum.auto() MISSING: Final = _MISSING_TYPE.MISSING if sys.version_info >= (3, 10): class KW_ONLY: ... @overload def asdict(obj: DataclassInstance) -> dict[str, Any]: ... @overload def asdict(obj: DataclassInstance, *, dict_factory: Callable[[list[tuple[str, Any]]], _T]) -> _T: ... @overload def astuple(obj: DataclassInstance) -> tuple[Any, ...]: ... @overload def astuple(obj: DataclassInstance, *, tuple_factory: Callable[[list[Any]], _T]) -> _T: ... if sys.version_info >= (3, 11): @overload def dataclass( cls: type[_T], /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, ) -> type[_T]: ... @overload def dataclass( cls: None = None, /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, ) -> Callable[[type[_T]], type[_T]]: ... elif sys.version_info >= (3, 10): @overload def dataclass( cls: type[_T], /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, ) -> type[_T]: ... @overload def dataclass( cls: None = None, /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, ) -> Callable[[type[_T]], type[_T]]: ... else: @overload def dataclass( cls: type[_T], /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, ) -> type[_T]: ... @overload def dataclass( cls: None = None, /, *, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, ) -> Callable[[type[_T]], type[_T]]: ... # See https://github.com/python/mypy/issues/10750 @type_check_only class _DefaultFactory(Protocol[_T_co]): def __call__(self) -> _T_co: ... class Field(Generic[_T]): if sys.version_info >= (3, 14): __slots__ = ( 'name', 'type', 'default', 'default_factory', 'repr', 'hash', 'init', 'compare', 'metadata', 'kw_only', 'doc', '_field_type', ) elif sys.version_info >= (3, 10): __slots__ = ( 'name', 'type', 'default', 'default_factory', 'repr', 'hash', 'init', 'compare', 'metadata', 'kw_only', '_field_type', ) else: __slots__ = ( 'name', 'type', 'default', 'default_factory', 'repr', 'hash', 'init', 'compare', 'metadata', '_field_type', ) name: str type: Type[_T] | str | Any default: _T | Literal[_MISSING_TYPE.MISSING] default_factory: _DefaultFactory[_T] | Literal[_MISSING_TYPE.MISSING] repr: bool hash: bool | None init: bool compare: bool metadata: types.MappingProxyType[Any, Any] if sys.version_info >= (3, 14): doc: str | None if sys.version_info >= (3, 10): kw_only: bool | Literal[_MISSING_TYPE.MISSING] if sys.version_info >= (3, 14): def __init__( self, default: _T, default_factory: Callable[[], _T], init: bool, repr: bool, hash: bool | None, compare: bool, metadata: Mapping[Any, Any], kw_only: bool, doc: str | None, ) -> None: ... elif sys.version_info >= (3, 10): def __init__( self, default: _T, default_factory: Callable[[], _T], init: bool, repr: bool, hash: bool | None, compare: bool, metadata: Mapping[Any, Any], kw_only: bool, ) -> None: ... else: def __init__( self, default: _T, default_factory: Callable[[], _T], init: bool, repr: bool, hash: bool | None, compare: bool, metadata: Mapping[Any, Any], ) -> None: ... def __set_name__(self, owner: Type[Any], name: str) -> None: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # NOTE: Actual return type is 'Field[_T]', but we want to help type checkers # to understand the magic that happens at runtime. if sys.version_info >= (3, 14): @overload # `default` and `default_factory` are optional and mutually exclusive. def field( *, default: _T, default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., doc: str | None = None, ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Callable[[], _T], init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., doc: str | None = None, ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., doc: str | None = None, ) -> Any: ... elif sys.version_info >= (3, 10): @overload # `default` and `default_factory` are optional and mutually exclusive. def field( *, default: _T, default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Callable[[], _T], init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, kw_only: bool | Literal[_MISSING_TYPE.MISSING] = ..., ) -> Any: ... else: @overload # `default` and `default_factory` are optional and mutually exclusive. def field( *, default: _T, default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Callable[[], _T], init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, ) -> _T: ... @overload def field( *, default: Literal[_MISSING_TYPE.MISSING] = ..., default_factory: Literal[_MISSING_TYPE.MISSING] = ..., init: bool = True, repr: bool = True, hash: bool | None = None, compare: bool = True, metadata: Mapping[Any, Any] | None = None, ) -> Any: ... def fields(class_or_instance: DataclassInstance | type[DataclassInstance]) -> tuple[Field[Any], ...]: ... # HACK: `obj: Never` typing matches if object argument is using `Any` type. @overload def is_dataclass(obj: Never) -> TypeIs[DataclassInstance | type[DataclassInstance]]: ... # type: ignore[narrowed-type-not-subtype] # pyright: ignore[reportGeneralTypeIssues] @overload def is_dataclass(obj: type) -> TypeIs[type[DataclassInstance]]: ... @overload def is_dataclass(obj: object) -> TypeIs[DataclassInstance | type[DataclassInstance]]: ... class FrozenInstanceError(AttributeError): ... class InitVar(Generic[_T]): __slots__ = ('type',) type: Type[_T] def __init__(self, type: Type[_T]) -> None: ... @overload def __class_getitem__(cls, type: Type[_T]) -> InitVar[_T]: ... # pyright: ignore[reportInvalidTypeForm] @overload def __class_getitem__(cls, type: Any) -> InitVar[Any]: ... # pyright: ignore[reportInvalidTypeForm] if sys.version_info >= (3, 14): def make_dataclass( cls_name: str, fields: Iterable[str | tuple[str, Any] | tuple[str, Any, Any]], *, bases: tuple[type, ...] = (), namespace: dict[str, Any] | None = None, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, module: str | None = None, decorator: _DataclassFactory = ..., ) -> type: ... elif sys.version_info >= (3, 12): def make_dataclass( cls_name: str, fields: Iterable[str | tuple[str, Any] | tuple[str, Any, Any]], *, bases: tuple[type, ...] = (), namespace: dict[str, Any] | None = None, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, module: str | None = None, ) -> type: ... elif sys.version_info >= (3, 11): def make_dataclass( cls_name: str, fields: Iterable[str | tuple[str, Any] | tuple[str, Any, Any]], *, bases: tuple[type, ...] = (), namespace: dict[str, Any] | None = None, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, weakref_slot: bool = False, ) -> type: ... elif sys.version_info >= (3, 10): def make_dataclass( cls_name: str, fields: Iterable[str | tuple[str, Any] | tuple[str, Any, Any]], *, bases: tuple[type, ...] = (), namespace: dict[str, Any] | None = None, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, match_args: bool = True, kw_only: bool = False, slots: bool = False, ) -> type: ... else: def make_dataclass( cls_name: str, fields: Iterable[str | tuple[str, Any] | tuple[str, Any, Any]], *, bases: tuple[type, ...] = (), namespace: dict[str, Any] | None = None, init: bool = True, repr: bool = True, eq: bool = True, order: bool = False, unsafe_hash: bool = False, frozen: bool = False, ) -> type: ... def replace(obj: _DataclassT, /, **changes: Any) -> _DataclassT: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/enum.pyi ================================================ import sys import types from builtins import property as _builtins_property from collections.abc import Callable, Iterable, Iterator, Mapping from typing import Any, Final, Generic, Literal, SupportsIndex, TypeVar, overload import _typeshed from _typeshed import SupportsKeysAndGetItem, Unused from typing_extensions import Self, TypeAlias, disjoint_base __all__ = ['EnumMeta', 'Enum', 'IntEnum', 'Flag', 'IntFlag', 'auto', 'unique'] if sys.version_info >= (3, 11): __all__ += [ 'CONFORM', 'CONTINUOUS', 'EJECT', 'EnumCheck', 'EnumType', 'FlagBoundary', 'KEEP', 'NAMED_FLAGS', 'ReprEnum', 'STRICT', 'StrEnum', 'UNIQUE', 'global_enum', 'global_enum_repr', 'global_flag_repr', 'global_str', 'member', 'nonmember', 'property', 'verify', 'pickle_by_enum_name', 'pickle_by_global_name', ] if sys.version_info >= (3, 13): __all__ += ['EnumDict'] _EnumMemberT = TypeVar('_EnumMemberT') _EnumerationT = TypeVar('_EnumerationT', bound=type[Enum]) # The following all work: # >>> from enum import Enum # >>> from string import ascii_lowercase # >>> Enum('Foo', names='RED YELLOW GREEN') # # >>> Enum('Foo', names=[('RED', 1), ('YELLOW, 2)]) # # >>> Enum('Foo', names=((x for x in (ascii_lowercase[i], i)) for i in range(5))) # # >>> Enum('Foo', names={'RED': 1, 'YELLOW': 2}) # _EnumNames: TypeAlias = str | Iterable[str] | Iterable[Iterable[str | Any]] | Mapping[str, Any] _Signature: TypeAlias = Any # TODO: Unable to import Signature from inspect module if sys.version_info >= (3, 11): class nonmember(Generic[_EnumMemberT]): value: _EnumMemberT def __init__(self, value: _EnumMemberT) -> None: ... class member(Generic[_EnumMemberT]): value: _EnumMemberT def __init__(self, value: _EnumMemberT) -> None: ... class _EnumDict(dict[str, Any]): if sys.version_info >= (3, 13): def __init__(self, cls_name: str | None = None) -> None: ... else: def __init__(self) -> None: ... def __setitem__(self, key: str, value: Any) -> None: ... if sys.version_info >= (3, 11): # See comment above `typing.MutableMapping.update` # for why overloads are preferable to a Union here # # Unlike with MutableMapping.update(), the first argument is required, # hence the type: ignore @overload # type: ignore[override] def update(self, members: SupportsKeysAndGetItem[str, Any], **more_members: Any) -> None: ... @overload def update(self, members: Iterable[tuple[str, Any]], **more_members: Any) -> None: ... if sys.version_info >= (3, 13): @property def member_names(self) -> list[str]: ... if sys.version_info >= (3, 13): EnumDict = _EnumDict # Structurally: Iterable[T], Reversible[T], Container[T] where T is the enum itself class EnumMeta(type): if sys.version_info >= (3, 11): def __new__( metacls: type[_typeshed.Self], cls: str, bases: tuple[type, ...], classdict: _EnumDict, *, boundary: FlagBoundary | None = None, _simple: bool = False, **kwds: Any, ) -> _typeshed.Self: ... else: def __new__( metacls: type[_typeshed.Self], cls: str, bases: tuple[type, ...], classdict: _EnumDict, **kwds: Any ) -> _typeshed.Self: ... @classmethod def __prepare__(metacls, cls: str, bases: tuple[type, ...], **kwds: Any) -> _EnumDict: ... # type: ignore[override] def __iter__(self: type[_EnumMemberT]) -> Iterator[_EnumMemberT]: ... def __reversed__(self: type[_EnumMemberT]) -> Iterator[_EnumMemberT]: ... if sys.version_info >= (3, 12): def __contains__(self: type[Any], value: object) -> bool: ... elif sys.version_info >= (3, 11): def __contains__(self: type[Any], member: object) -> bool: ... elif sys.version_info >= (3, 10): def __contains__(self: type[Any], obj: object) -> bool: ... else: def __contains__(self: type[Any], member: object) -> bool: ... def __getitem__(self: type[_EnumMemberT], name: str) -> _EnumMemberT: ... @_builtins_property def __members__(self: type[_EnumMemberT]) -> types.MappingProxyType[str, _EnumMemberT]: ... def __len__(self) -> int: ... def __bool__(self) -> Literal[True]: ... def __dir__(self) -> list[str]: ... # Overload 1: Value lookup on an already existing enum class (simple case) @overload def __call__(cls: type[_EnumMemberT], value: Any, names: None = None) -> _EnumMemberT: ... # Overload 2: Functional API for constructing new enum classes. if sys.version_info >= (3, 11): @overload def __call__( cls, value: str, names: _EnumNames, *, module: str | None = None, qualname: str | None = None, type: type | None = None, start: int = 1, boundary: FlagBoundary | None = None, ) -> type[Enum]: ... else: @overload def __call__( cls, value: str, names: _EnumNames, *, module: str | None = None, qualname: str | None = None, type: type | None = None, start: int = 1, ) -> type[Enum]: ... # Overload 3 (py312+ only): Value lookup on an already existing enum class (complex case) # # >>> class Foo(enum.Enum): # ... X = 1, 2, 3 # >>> Foo(1, 2, 3) # # if sys.version_info >= (3, 12): @overload def __call__(cls: type[_EnumMemberT], value: Any, *values: Any) -> _EnumMemberT: ... if sys.version_info >= (3, 14): @property def __signature__(cls) -> _Signature: ... _member_names_: list[str] # undocumented _member_map_: dict[str, Enum] # undocumented _value2member_map_: dict[Any, Enum] # undocumented if sys.version_info >= (3, 11): # In 3.11 `EnumMeta` metaclass is renamed to `EnumType`, but old name also exists. EnumType = EnumMeta class property(types.DynamicClassAttribute): def __set_name__(self, ownerclass: type[Enum], name: str) -> None: ... name: str clsname: str member: Enum | None _magic_enum_attr = property else: _magic_enum_attr = types.DynamicClassAttribute class Enum(metaclass=EnumMeta): @_magic_enum_attr def name(self) -> str: ... @_magic_enum_attr def value(self) -> Any: ... _name_: str _value_: Any _ignore_: str | list[str] _order_: str __order__: str @classmethod def _missing_(cls, value: object) -> Any: ... @staticmethod def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any: ... # It's not true that `__new__` will accept any argument type, # so ideally we'd use `Any` to indicate that the argument type is inexpressible. # However, using `Any` causes too many false-positives for those using mypy's `--disallow-any-expr` # (see #7752, #2539, mypy/#5788), # and in practice using `object` here has the same effect as using `Any`. def __new__(cls, value: object) -> Self: ... def __dir__(self) -> list[str]: ... def __hash__(self) -> int: ... def __format__(self, format_spec: str) -> str: ... def __reduce_ex__(self, proto: Unused) -> tuple[Any, ...]: ... if sys.version_info >= (3, 11): def __copy__(self) -> Self: ... def __deepcopy__(self, memo: Any) -> Self: ... if sys.version_info >= (3, 12) and sys.version_info < (3, 14): @classmethod def __signature__(cls) -> str: ... if sys.version_info >= (3, 13): # Value may be any type, even in special enums. Enabling Enum parsing from # multiple value types def _add_value_alias_(self, value: Any) -> None: ... def _add_alias_(self, name: str) -> None: ... if sys.version_info >= (3, 11): class ReprEnum(Enum): ... if sys.version_info >= (3, 12): class IntEnum(int, ReprEnum): _value_: int @_magic_enum_attr def value(self) -> int: ... def __new__(cls, value: int) -> Self: ... else: if sys.version_info >= (3, 11): _IntEnumBase = ReprEnum else: _IntEnumBase = Enum @disjoint_base class IntEnum(int, _IntEnumBase): _value_: int @_magic_enum_attr def value(self) -> int: ... def __new__(cls, value: int) -> Self: ... def unique(enumeration: _EnumerationT) -> _EnumerationT: ... _auto_null: Any class Flag(Enum): _name_: str | None # type: ignore[assignment] _value_: int _numeric_repr_: Callable[[int], str] @_magic_enum_attr def name(self) -> str | None: ... # type: ignore[override] @_magic_enum_attr def value(self) -> int: ... def __contains__(self, other: Self) -> bool: ... def __bool__(self) -> bool: ... def __or__(self, other: Self) -> Self: ... def __and__(self, other: Self) -> Self: ... def __xor__(self, other: Self) -> Self: ... def __invert__(self) -> Self: ... if sys.version_info >= (3, 11): def __iter__(self) -> Iterator[Self]: ... def __len__(self) -> int: ... __ror__ = __or__ __rand__ = __and__ __rxor__ = __xor__ if sys.version_info >= (3, 11): class StrEnum(str, ReprEnum): def __new__(cls, value: str) -> Self: ... _value_: str @_magic_enum_attr def value(self) -> str: ... @staticmethod def _generate_next_value_(name: str, start: int, count: int, last_values: list[str]) -> str: ... class EnumCheck(StrEnum): CONTINUOUS = 'no skipped integer values' NAMED_FLAGS = 'multi-flag aliases may not contain unnamed flags' UNIQUE = 'one name per value' CONTINUOUS: Final = EnumCheck.CONTINUOUS NAMED_FLAGS: Final = EnumCheck.NAMED_FLAGS UNIQUE: Final = EnumCheck.UNIQUE class verify: def __init__(self, *checks: EnumCheck) -> None: ... def __call__(self, enumeration: _EnumerationT) -> _EnumerationT: ... class FlagBoundary(StrEnum): STRICT = 'strict' CONFORM = 'conform' EJECT = 'eject' KEEP = 'keep' STRICT: Final = FlagBoundary.STRICT CONFORM: Final = FlagBoundary.CONFORM EJECT: Final = FlagBoundary.EJECT KEEP: Final = FlagBoundary.KEEP def global_str(self: Enum) -> str: ... def global_enum(cls: _EnumerationT, update_str: bool = False) -> _EnumerationT: ... def global_enum_repr(self: Enum) -> str: ... def global_flag_repr(self: Flag) -> str: ... def show_flag_values(value: int) -> list[int]: ... def bin(num: SupportsIndex, max_bits: int | None = None) -> str: ... if sys.version_info >= (3, 12): # The body of the class is the same, but the base classes are different. class IntFlag(int, ReprEnum, Flag, boundary=KEEP): # type: ignore[misc] # complaints about incompatible bases def __new__(cls, value: int) -> Self: ... def __or__(self, other: int) -> Self: ... def __and__(self, other: int) -> Self: ... def __xor__(self, other: int) -> Self: ... def __invert__(self) -> Self: ... __ror__ = __or__ __rand__ = __and__ __rxor__ = __xor__ elif sys.version_info >= (3, 11): # The body of the class is the same, but the base classes are different. @disjoint_base class IntFlag(int, ReprEnum, Flag, boundary=KEEP): # type: ignore[misc] # complaints about incompatible bases def __new__(cls, value: int) -> Self: ... def __or__(self, other: int) -> Self: ... def __and__(self, other: int) -> Self: ... def __xor__(self, other: int) -> Self: ... def __invert__(self) -> Self: ... __ror__ = __or__ __rand__ = __and__ __rxor__ = __xor__ else: @disjoint_base class IntFlag(int, Flag): # type: ignore[misc] # complaints about incompatible bases def __new__(cls, value: int) -> Self: ... def __or__(self, other: int) -> Self: ... def __and__(self, other: int) -> Self: ... def __xor__(self, other: int) -> Self: ... def __invert__(self) -> Self: ... __ror__ = __or__ __rand__ = __and__ __rxor__ = __xor__ class auto: _value_: Any @_magic_enum_attr def value(self) -> Any: ... def __new__(cls) -> Self: ... # These don't exist, but auto is basically immediately replaced with # either an int or a str depending on the type of the enum. StrEnum's auto # shouldn't have these, but they're needed for int versions of auto (mostly the __or__). # Ideally type checkers would special case auto enough to handle this, # but until then this is a slightly inaccurate helping hand. def __or__(self, other: int | Self) -> Self: ... def __and__(self, other: int | Self) -> Self: ... def __xor__(self, other: int | Self) -> Self: ... __ror__ = __or__ __rand__ = __and__ __rxor__ = __xor__ if sys.version_info >= (3, 11): def pickle_by_global_name(self: Enum, proto: int) -> str: ... def pickle_by_enum_name( self: _EnumMemberT, proto: int ) -> tuple[Callable[..., Any], tuple[type[_EnumMemberT], str]]: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/math.pyi ================================================ import sys from collections.abc import Iterable from typing import Any, Final, Literal, Protocol, SupportsFloat, SupportsIndex, TypeVar, overload, type_check_only from _typeshed import SupportsMul, SupportsRMul from typing_extensions import TypeAlias _T = TypeVar('_T') _T_co = TypeVar('_T_co', covariant=True) _SupportsFloatOrIndex: TypeAlias = SupportsFloat | SupportsIndex e: Final[float] pi: Final[float] inf: Final[float] nan: Final[float] tau: Final[float] def acos(x: _SupportsFloatOrIndex, /) -> float: ... def acosh(x: _SupportsFloatOrIndex, /) -> float: ... def asin(x: _SupportsFloatOrIndex, /) -> float: ... def asinh(x: _SupportsFloatOrIndex, /) -> float: ... def atan(x: _SupportsFloatOrIndex, /) -> float: ... def atan2(y: _SupportsFloatOrIndex, x: _SupportsFloatOrIndex, /) -> float: ... def atanh(x: _SupportsFloatOrIndex, /) -> float: ... if sys.version_info >= (3, 11): def cbrt(x: _SupportsFloatOrIndex, /) -> float: ... @type_check_only class _SupportsCeil(Protocol[_T_co]): def __ceil__(self) -> _T_co: ... @overload def ceil(x: _SupportsCeil[_T], /) -> _T: ... @overload def ceil(x: _SupportsFloatOrIndex, /) -> int: ... def comb(n: SupportsIndex, k: SupportsIndex, /) -> int: ... def copysign(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /) -> float: ... def cos(x: _SupportsFloatOrIndex, /) -> float: ... def cosh(x: _SupportsFloatOrIndex, /) -> float: ... def degrees(x: _SupportsFloatOrIndex, /) -> float: ... def dist(p: Iterable[_SupportsFloatOrIndex], q: Iterable[_SupportsFloatOrIndex], /) -> float: ... def erf(x: _SupportsFloatOrIndex, /) -> float: ... def erfc(x: _SupportsFloatOrIndex, /) -> float: ... def exp(x: _SupportsFloatOrIndex, /) -> float: ... if sys.version_info >= (3, 11): def exp2(x: _SupportsFloatOrIndex, /) -> float: ... def expm1(x: _SupportsFloatOrIndex, /) -> float: ... def fabs(x: _SupportsFloatOrIndex, /) -> float: ... def factorial(x: SupportsIndex, /) -> int: ... @type_check_only class _SupportsFloor(Protocol[_T_co]): def __floor__(self) -> _T_co: ... @overload def floor(x: _SupportsFloor[_T], /) -> _T: ... @overload def floor(x: _SupportsFloatOrIndex, /) -> int: ... def fmod(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /) -> float: ... def frexp(x: _SupportsFloatOrIndex, /) -> tuple[float, int]: ... def fsum(seq: Iterable[_SupportsFloatOrIndex], /) -> float: ... def gamma(x: _SupportsFloatOrIndex, /) -> float: ... def gcd(*integers: SupportsIndex) -> int: ... def hypot(*coordinates: _SupportsFloatOrIndex) -> float: ... def isclose( a: _SupportsFloatOrIndex, b: _SupportsFloatOrIndex, *, rel_tol: _SupportsFloatOrIndex = 1e-09, abs_tol: _SupportsFloatOrIndex = 0.0, ) -> bool: ... def isinf(x: _SupportsFloatOrIndex, /) -> bool: ... def isfinite(x: _SupportsFloatOrIndex, /) -> bool: ... def isnan(x: _SupportsFloatOrIndex, /) -> bool: ... def isqrt(n: SupportsIndex, /) -> int: ... def lcm(*integers: SupportsIndex) -> int: ... def ldexp(x: _SupportsFloatOrIndex, i: int, /) -> float: ... def lgamma(x: _SupportsFloatOrIndex, /) -> float: ... def log(x: _SupportsFloatOrIndex, base: _SupportsFloatOrIndex = ...) -> float: ... def log10(x: _SupportsFloatOrIndex, /) -> float: ... def log1p(x: _SupportsFloatOrIndex, /) -> float: ... def log2(x: _SupportsFloatOrIndex, /) -> float: ... def modf(x: _SupportsFloatOrIndex, /) -> tuple[float, float]: ... if sys.version_info >= (3, 12): def nextafter( x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /, *, steps: SupportsIndex | None = None ) -> float: ... else: def nextafter(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /) -> float: ... def perm(n: SupportsIndex, k: SupportsIndex | None = None, /) -> int: ... def pow(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /) -> float: ... _PositiveInteger: TypeAlias = Literal[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ] _NegativeInteger: TypeAlias = Literal[ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20 ] _LiteralInteger = _PositiveInteger | _NegativeInteger | Literal[0] # TODO: Use TypeAlias once mypy bugs are fixed _MultiplicableT1 = TypeVar('_MultiplicableT1', bound=SupportsMul[Any, Any]) _MultiplicableT2 = TypeVar('_MultiplicableT2', bound=SupportsMul[Any, Any]) @type_check_only class _SupportsProdWithNoDefaultGiven(SupportsMul[Any, Any], SupportsRMul[int, Any], Protocol): ... _SupportsProdNoDefaultT = TypeVar('_SupportsProdNoDefaultT', bound=_SupportsProdWithNoDefaultGiven) # This stub is based on the type stub for `builtins.sum`. # Like `builtins.sum`, it cannot be precisely represented in a type stub # without introducing many false positives. # For more details on its limitations and false positives, see #13572. # Instead, just like `builtins.sum`, we explicitly handle several useful cases. @overload def prod(iterable: Iterable[bool | _LiteralInteger], /, *, start: int = 1) -> int: ... # type: ignore[overload-overlap] @overload def prod(iterable: Iterable[_SupportsProdNoDefaultT], /) -> _SupportsProdNoDefaultT | Literal[1]: ... @overload def prod( iterable: Iterable[_MultiplicableT1], /, *, start: _MultiplicableT2 ) -> _MultiplicableT1 | _MultiplicableT2: ... def radians(x: _SupportsFloatOrIndex, /) -> float: ... def remainder(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, /) -> float: ... def sin(x: _SupportsFloatOrIndex, /) -> float: ... def sinh(x: _SupportsFloatOrIndex, /) -> float: ... if sys.version_info >= (3, 12): def sumprod(p: Iterable[float], q: Iterable[float], /) -> float: ... def sqrt(x: _SupportsFloatOrIndex, /) -> float: ... def tan(x: _SupportsFloatOrIndex, /) -> float: ... def tanh(x: _SupportsFloatOrIndex, /) -> float: ... # Is different from `_typeshed.SupportsTrunc`, which is not generic @type_check_only class _SupportsTrunc(Protocol[_T_co]): def __trunc__(self) -> _T_co: ... def trunc(x: _SupportsTrunc[_T], /) -> _T: ... def ulp(x: _SupportsFloatOrIndex, /) -> float: ... if sys.version_info >= (3, 13): def fma(x: _SupportsFloatOrIndex, y: _SupportsFloatOrIndex, z: _SupportsFloatOrIndex, /) -> float: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/os.pyi ================================================ from abc import ABC, abstractmethod from typing import Callable, Protocol, TypeAlias, TypeVar, final, overload, runtime_checkable from _typeshed import AnyStr_co, structseq _T = TypeVar('_T') environ: dict[str, str] @overload def getenv(key: str) -> str | None: ... @overload def getenv(key: str, default: _T) -> str | _T: ... @final class stat_result(structseq[float], tuple[int, int, int, int, int, int, int, float, float, float]): # The constructor of this class takes an iterable of variable length (though it must be at least 10). # # However, this class behaves like a tuple of 10 elements, # no matter how long the iterable supplied to the constructor is. # https://github.com/python/typeshed/pull/6560#discussion_r767162532 # # The 10 elements always present are st_mode, st_ino, st_dev, st_nlink, # st_uid, st_gid, st_size, st_atime, st_mtime, st_ctime. # # More items may be added at the end by some implementations. @property def st_mode(self) -> int: """protection bits""" ... @property def st_ino(self) -> int: """inode""" ... @property def st_dev(self) -> int: """device""" ... @property def st_nlink(self) -> int: """number of hard links""" ... @property def st_uid(self) -> int: """user ID of owner""" ... @property def st_gid(self) -> int: """group ID of owner""" ... @property def st_size(self) -> int: """total size, in bytes""" ... @property def st_atime(self) -> float: """time of last access""" ... @property def st_mtime(self) -> float: """time of last modification""" ... @property def st_ctime(self) -> float: """time of last change""" ... # (Samuel) PathLike is included here because it's used by pathlib # mypy and pyright object to this being both ABC and Protocol. # At runtime it inherits from ABC and is not a Protocol, but it will be # on the allowlist for use as a Protocol starting in 3.14. @runtime_checkable class PathLike(ABC, Protocol[AnyStr_co]): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] __slots__ = () @abstractmethod def __fspath__(self) -> AnyStr_co: ... _Opener: TypeAlias = Callable[[str, int], int] ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/pathlib/__init__.pyi ================================================ import sys import types from collections.abc import Callable, Generator, Iterator, Sequence from io import BufferedRandom, BufferedReader, BufferedWriter, FileIO, TextIOWrapper from os import PathLike, stat_result from types import GenericAlias, TracebackType from typing import IO, Any, BinaryIO, ClassVar, Literal, TypeVar, overload from _typeshed import ( OpenBinaryMode, OpenBinaryModeReading, OpenBinaryModeUpdating, OpenBinaryModeWriting, OpenTextMode, ReadableBuffer, StrOrBytesPath, StrPath, Unused, ) from typing_extensions import Never, Self, deprecated _PathT = TypeVar('_PathT', bound=PurePath) __all__ = ['PurePath', 'PurePosixPath', 'PureWindowsPath', 'Path', 'PosixPath', 'WindowsPath'] if sys.version_info >= (3, 14): from pathlib.types import PathInfo if sys.version_info >= (3, 13): __all__ += ['UnsupportedOperation'] class PurePath(PathLike[str]): if sys.version_info >= (3, 13): __slots__ = ( '_raw_paths', '_drv', '_root', '_tail_cached', '_str', '_str_normcase_cached', '_parts_normcase_cached', '_hash', ) elif sys.version_info >= (3, 12): __slots__ = ( '_raw_paths', '_drv', '_root', '_tail_cached', '_str', '_str_normcase_cached', '_parts_normcase_cached', '_lines_cached', '_hash', ) else: __slots__ = ('_drv', '_root', '_parts', '_str', '_hash', '_pparts', '_cached_cparts') if sys.version_info >= (3, 13): parser: ClassVar[types.ModuleType] def full_match(self, pattern: StrPath, *, case_sensitive: bool | None = None) -> bool: ... @property def parts(self) -> tuple[str, ...]: ... @property def drive(self) -> str: ... @property def root(self) -> str: ... @property def anchor(self) -> str: ... @property def name(self) -> str: ... @property def suffix(self) -> str: ... @property def suffixes(self) -> list[str]: ... @property def stem(self) -> str: ... if sys.version_info >= (3, 12): def __new__(cls, *args: StrPath, **kwargs: Unused) -> Self: ... def __init__(self, *args: StrPath) -> None: ... # pyright: ignore[reportInconsistentConstructor] else: def __new__(cls, *args: StrPath) -> Self: ... def __hash__(self) -> int: ... def __fspath__(self) -> str: ... def __lt__(self, other: PurePath) -> bool: ... def __le__(self, other: PurePath) -> bool: ... def __gt__(self, other: PurePath) -> bool: ... def __ge__(self, other: PurePath) -> bool: ... def __truediv__(self, key: StrPath) -> Self: ... def __rtruediv__(self, key: StrPath) -> Self: ... def __bytes__(self) -> bytes: ... def as_posix(self) -> str: ... @deprecated('Deprecated since Python 3.14; will be removed in Python 3.19. Use `Path.as_uri()` instead.') def as_uri(self) -> str: ... def is_absolute(self) -> bool: ... if sys.version_info >= (3, 13): @deprecated( 'Deprecated since Python 3.13; will be removed in Python 3.15. ' 'Use `os.path.isreserved()` to detect reserved paths on Windows.' ) def is_reserved(self) -> bool: ... else: def is_reserved(self) -> bool: ... if sys.version_info >= (3, 14): def is_relative_to(self, other: StrPath) -> bool: ... elif sys.version_info >= (3, 12): @overload def is_relative_to(self, other: StrPath, /) -> bool: ... @overload @deprecated('Passing additional arguments is deprecated since Python 3.12; removed in Python 3.14.') def is_relative_to(self, other: StrPath, /, *_deprecated: StrPath) -> bool: ... else: def is_relative_to(self, *other: StrPath) -> bool: ... if sys.version_info >= (3, 12): def match(self, path_pattern: str, *, case_sensitive: bool | None = None) -> bool: ... else: def match(self, path_pattern: str) -> bool: ... if sys.version_info >= (3, 14): def relative_to(self, other: StrPath, *, walk_up: bool = False) -> Self: ... elif sys.version_info >= (3, 12): @overload def relative_to(self, other: StrPath, /, *, walk_up: bool = False) -> Self: ... @overload @deprecated('Passing additional arguments is deprecated since Python 3.12; removed in Python 3.14.') def relative_to(self, other: StrPath, /, *_deprecated: StrPath, walk_up: bool = False) -> Self: ... else: def relative_to(self, *other: StrPath) -> Self: ... def with_name(self, name: str) -> Self: ... def with_stem(self, stem: str) -> Self: ... def with_suffix(self, suffix: str) -> Self: ... def joinpath(self, *other: StrPath) -> Self: ... @property def parents(self) -> Sequence[Self]: ... @property def parent(self) -> Self: ... if sys.version_info < (3, 11): def __class_getitem__(cls, type: Any) -> GenericAlias: ... if sys.version_info >= (3, 12): def with_segments(self, *args: StrPath) -> Self: ... class PurePosixPath(PurePath): __slots__ = () class PureWindowsPath(PurePath): __slots__ = () class Path(PurePath): if sys.version_info >= (3, 14): __slots__ = ('_info',) elif sys.version_info >= (3, 10): __slots__ = () else: __slots__ = ('_accessor',) if sys.version_info >= (3, 12): def __new__(cls, *args: StrPath, **kwargs: Unused) -> Self: ... # pyright: ignore[reportInconsistentConstructor] else: def __new__(cls, *args: StrPath, **kwargs: Unused) -> Self: ... @classmethod def cwd(cls) -> Self: ... if sys.version_info >= (3, 10): def stat(self, *, follow_symlinks: bool = True) -> stat_result: ... def chmod(self, mode: int, *, follow_symlinks: bool = True) -> None: ... else: def stat(self) -> stat_result: ... def chmod(self, mode: int) -> None: ... if sys.version_info >= (3, 13): @classmethod def from_uri(cls, uri: str) -> Self: ... def is_dir(self, *, follow_symlinks: bool = True) -> bool: ... def is_file(self, *, follow_symlinks: bool = True) -> bool: ... def read_text( self, encoding: str | None = None, errors: str | None = None, newline: str | None = None ) -> str: ... else: def __enter__(self) -> Self: ... def __exit__( self, t: type[BaseException] | None, v: BaseException | None, tb: TracebackType | None ) -> None: ... def is_dir(self) -> bool: ... def is_file(self) -> bool: ... def read_text(self, encoding: str | None = None, errors: str | None = None) -> str: ... if sys.version_info >= (3, 13): def glob( self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False ) -> Iterator[Self]: ... def rglob( self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False ) -> Iterator[Self]: ... elif sys.version_info >= (3, 12): def glob(self, pattern: str, *, case_sensitive: bool | None = None) -> Generator[Self, None, None]: ... def rglob(self, pattern: str, *, case_sensitive: bool | None = None) -> Generator[Self, None, None]: ... else: def glob(self, pattern: str) -> Generator[Self, None, None]: ... def rglob(self, pattern: str) -> Generator[Self, None, None]: ... if sys.version_info >= (3, 12): def exists(self, *, follow_symlinks: bool = True) -> bool: ... else: def exists(self) -> bool: ... def is_symlink(self) -> bool: ... def is_socket(self) -> bool: ... def is_fifo(self) -> bool: ... def is_block_device(self) -> bool: ... def is_char_device(self) -> bool: ... if sys.version_info >= (3, 12): def is_junction(self) -> bool: ... def iterdir(self) -> Generator[Self, None, None]: ... def lchmod(self, mode: int) -> None: ... def lstat(self) -> stat_result: ... def mkdir(self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False) -> None: ... if sys.version_info >= (3, 14): @property def info(self) -> PathInfo: ... @overload def move_into(self, target_dir: _PathT) -> _PathT: ... # type: ignore[overload-overlap] @overload def move_into(self, target_dir: StrPath) -> Self: ... # type: ignore[overload-overlap] @overload def move(self, target: _PathT) -> _PathT: ... # type: ignore[overload-overlap] @overload def move(self, target: StrPath) -> Self: ... # type: ignore[overload-overlap] @overload def copy_into( self, target_dir: _PathT, *, follow_symlinks: bool = True, preserve_metadata: bool = False ) -> _PathT: ... # type: ignore[overload-overlap] @overload def copy_into( self, target_dir: StrPath, *, follow_symlinks: bool = True, preserve_metadata: bool = False ) -> Self: ... # type: ignore[overload-overlap] @overload def copy(self, target: _PathT, *, follow_symlinks: bool = True, preserve_metadata: bool = False) -> _PathT: ... # type: ignore[overload-overlap] @overload def copy(self, target: StrPath, *, follow_symlinks: bool = True, preserve_metadata: bool = False) -> Self: ... # type: ignore[overload-overlap] # Adapted from builtins.open # Text mode: always returns a TextIOWrapper # The Traversable .open in stdlib/importlib/abc.pyi should be kept in sync with this. @overload def open( self, mode: OpenTextMode = 'r', buffering: int = -1, encoding: str | None = None, errors: str | None = None, newline: str | None = None, ) -> TextIOWrapper: ... # Unbuffered binary mode: returns a FileIO @overload def open( self, mode: OpenBinaryMode, buffering: Literal[0], encoding: None = None, errors: None = None, newline: None = None, ) -> FileIO: ... # Buffering is on: return BufferedRandom, BufferedReader, or BufferedWriter @overload def open( self, mode: OpenBinaryModeUpdating, buffering: Literal[-1, 1] = -1, encoding: None = None, errors: None = None, newline: None = None, ) -> BufferedRandom: ... @overload def open( self, mode: OpenBinaryModeWriting, buffering: Literal[-1, 1] = -1, encoding: None = None, errors: None = None, newline: None = None, ) -> BufferedWriter: ... @overload def open( self, mode: OpenBinaryModeReading, buffering: Literal[-1, 1] = -1, encoding: None = None, errors: None = None, newline: None = None, ) -> BufferedReader: ... # Buffering cannot be determined: fall back to BinaryIO @overload def open( self, mode: OpenBinaryMode, buffering: int = -1, encoding: None = None, errors: None = None, newline: None = None, ) -> BinaryIO: ... # Fallback if mode is not specified @overload def open( self, mode: str, buffering: int = -1, encoding: str | None = None, errors: str | None = None, newline: str | None = None, ) -> IO[Any]: ... # These methods do "exist" on Windows, but they always raise NotImplementedError. if sys.platform == 'win32': if sys.version_info >= (3, 13): # raises UnsupportedOperation: def owner(self: Never, *, follow_symlinks: bool = True) -> str: ... # type: ignore[misc] def group(self: Never, *, follow_symlinks: bool = True) -> str: ... # type: ignore[misc] else: def owner(self: Never) -> str: ... # type: ignore[misc] def group(self: Never) -> str: ... # type: ignore[misc] else: if sys.version_info >= (3, 13): def owner(self, *, follow_symlinks: bool = True) -> str: ... def group(self, *, follow_symlinks: bool = True) -> str: ... else: def owner(self) -> str: ... def group(self) -> str: ... # This method does "exist" on Windows on <3.12, but always raises NotImplementedError # On py312+, it works properly on Windows, as with all other platforms if sys.platform == 'win32' and sys.version_info < (3, 12): def is_mount(self: Never) -> bool: ... # type: ignore[misc] else: def is_mount(self) -> bool: ... def readlink(self) -> Self: ... if sys.version_info >= (3, 10): def rename(self, target: StrPath) -> Self: ... def replace(self, target: StrPath) -> Self: ... else: def rename(self, target: str | PurePath) -> Self: ... def replace(self, target: str | PurePath) -> Self: ... def resolve(self, strict: bool = False) -> Self: ... def rmdir(self) -> None: ... def symlink_to(self, target: StrOrBytesPath, target_is_directory: bool = False) -> None: ... if sys.version_info >= (3, 10): def hardlink_to(self, target: StrOrBytesPath) -> None: ... def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None: ... def unlink(self, missing_ok: bool = False) -> None: ... @classmethod def home(cls) -> Self: ... def absolute(self) -> Self: ... def expanduser(self) -> Self: ... def read_bytes(self) -> bytes: ... def samefile(self, other_path: StrPath) -> bool: ... def write_bytes(self, data: ReadableBuffer) -> int: ... if sys.version_info >= (3, 10): def write_text( self, data: str, encoding: str | None = None, errors: str | None = None, newline: str | None = None ) -> int: ... else: def write_text(self, data: str, encoding: str | None = None, errors: str | None = None) -> int: ... if sys.version_info < (3, 12): if sys.version_info >= (3, 10): @deprecated('Deprecated since Python 3.10; removed in Python 3.12. Use `hardlink_to()` instead.') def link_to(self, target: StrOrBytesPath) -> None: ... else: def link_to(self, target: StrOrBytesPath) -> None: ... if sys.version_info >= (3, 12): def walk( self, top_down: bool = True, on_error: Callable[[OSError], object] | None = None, follow_symlinks: bool = False, ) -> Iterator[tuple[Self, list[str], list[str]]]: ... def as_uri(self) -> str: ... class PosixPath(Path, PurePosixPath): __slots__ = () class WindowsPath(Path, PureWindowsPath): __slots__ = () if sys.version_info >= (3, 13): class UnsupportedOperation(NotImplementedError): ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/pathlib/types.pyi ================================================ from typing import Protocol, runtime_checkable @runtime_checkable class PathInfo(Protocol): def exists(self, *, follow_symlinks: bool = True) -> bool: ... def is_dir(self, *, follow_symlinks: bool = True) -> bool: ... def is_file(self, *, follow_symlinks: bool = True) -> bool: ... def is_symlink(self) -> bool: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/re.pyi ================================================ import enum import sre_compile import sre_constants import sys from collections.abc import Callable, Iterator, Mapping from types import GenericAlias from typing import Any, AnyStr, Final, Generic, Literal, TypeVar, final, overload from _typeshed import MaybeNone, ReadableBuffer from typing_extensions import TypeAlias, deprecated __all__ = [ 'match', 'fullmatch', 'search', 'sub', 'subn', 'split', 'findall', 'finditer', 'compile', 'purge', 'escape', 'error', 'A', 'I', 'L', 'M', 'S', 'X', 'U', 'ASCII', 'IGNORECASE', 'LOCALE', 'MULTILINE', 'DOTALL', 'VERBOSE', 'UNICODE', 'Match', 'Pattern', ] if sys.version_info < (3, 13): __all__ += ['template'] if sys.version_info >= (3, 11): __all__ += ['NOFLAG', 'RegexFlag'] if sys.version_info >= (3, 13): __all__ += ['PatternError'] PatternError = sre_constants.error _T = TypeVar('_T') # The implementation defines this in re._constants (version_info >= 3, 11) or # sre_constants. Typeshed has it here because its __module__ attribute is set to "re". class error(Exception): msg: str pattern: str | bytes | None pos: int | None lineno: int colno: int def __init__(self, msg: str, pattern: str | bytes | None = None, pos: int | None = None) -> None: ... @final class Match(Generic[AnyStr]): @property def pos(self) -> int: ... @property def endpos(self) -> int: ... @property def lastindex(self) -> int | None: ... @property def lastgroup(self) -> str | None: ... @property def string(self) -> AnyStr: ... # The regular expression object whose match() or search() method produced # this match instance. @property def re(self) -> Pattern[AnyStr]: ... @overload def expand(self: Match[str], template: str) -> str: ... @overload def expand(self: Match[bytes], template: ReadableBuffer) -> bytes: ... @overload def expand(self, template: AnyStr) -> AnyStr: ... # group() returns "AnyStr" or "AnyStr | None", depending on the pattern. @overload def group(self, group: Literal[0] = 0, /) -> AnyStr: ... @overload def group(self, group: str | int, /) -> AnyStr | MaybeNone: ... @overload def group(self, group1: str | int, group2: str | int, /, *groups: str | int) -> tuple[AnyStr | MaybeNone, ...]: ... # Each item of groups()'s return tuple is either "AnyStr" or # "AnyStr | None", depending on the pattern. @overload def groups(self) -> tuple[AnyStr | MaybeNone, ...]: ... @overload def groups(self, default: _T) -> tuple[AnyStr | _T, ...]: ... # Each value in groupdict()'s return dict is either "AnyStr" or # "AnyStr | None", depending on the pattern. @overload def groupdict(self) -> dict[str, AnyStr | MaybeNone]: ... @overload def groupdict(self, default: _T) -> dict[str, AnyStr | _T]: ... def start(self, group: int | str = 0, /) -> int: ... def end(self, group: int | str = 0, /) -> int: ... def span(self, group: int | str = 0, /) -> tuple[int, int]: ... @property def regs(self) -> tuple[tuple[int, int], ...]: ... # undocumented # __getitem__() returns "AnyStr" or "AnyStr | None", depending on the pattern. @overload def __getitem__(self, key: Literal[0], /) -> AnyStr: ... @overload def __getitem__(self, key: int | str, /) -> AnyStr | MaybeNone: ... def __copy__(self) -> Match[AnyStr]: ... def __deepcopy__(self, memo: Any, /) -> Match[AnyStr]: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... @final class Pattern(Generic[AnyStr]): @property def flags(self) -> int: ... @property def groupindex(self) -> Mapping[str, int]: ... @property def groups(self) -> int: ... @property def pattern(self) -> AnyStr: ... @overload def search(self: Pattern[str], string: str, pos: int = 0, endpos: int = sys.maxsize) -> Match[str] | None: ... @overload def search( self: Pattern[bytes], string: ReadableBuffer, pos: int = 0, endpos: int = sys.maxsize ) -> Match[bytes] | None: ... @overload def search(self, string: AnyStr, pos: int = 0, endpos: int = sys.maxsize) -> Match[AnyStr] | None: ... @overload def match(self: Pattern[str], string: str, pos: int = 0, endpos: int = sys.maxsize) -> Match[str] | None: ... @overload def match( self: Pattern[bytes], string: ReadableBuffer, pos: int = 0, endpos: int = sys.maxsize ) -> Match[bytes] | None: ... @overload def match(self, string: AnyStr, pos: int = 0, endpos: int = sys.maxsize) -> Match[AnyStr] | None: ... @overload def fullmatch(self: Pattern[str], string: str, pos: int = 0, endpos: int = sys.maxsize) -> Match[str] | None: ... @overload def fullmatch( self: Pattern[bytes], string: ReadableBuffer, pos: int = 0, endpos: int = sys.maxsize ) -> Match[bytes] | None: ... @overload def fullmatch(self, string: AnyStr, pos: int = 0, endpos: int = sys.maxsize) -> Match[AnyStr] | None: ... @overload def split(self: Pattern[str], string: str, maxsplit: int = 0) -> list[str | MaybeNone]: ... @overload def split(self: Pattern[bytes], string: ReadableBuffer, maxsplit: int = 0) -> list[bytes | MaybeNone]: ... @overload def split(self, string: AnyStr, maxsplit: int = 0) -> list[AnyStr | MaybeNone]: ... # return type depends on the number of groups in the pattern @overload def findall(self: Pattern[str], string: str, pos: int = 0, endpos: int = sys.maxsize) -> list[Any]: ... @overload def findall(self: Pattern[bytes], string: ReadableBuffer, pos: int = 0, endpos: int = sys.maxsize) -> list[Any]: ... @overload def findall(self, string: AnyStr, pos: int = 0, endpos: int = sys.maxsize) -> list[AnyStr]: ... @overload def finditer(self: Pattern[str], string: str, pos: int = 0, endpos: int = sys.maxsize) -> Iterator[Match[str]]: ... @overload def finditer( self: Pattern[bytes], string: ReadableBuffer, pos: int = 0, endpos: int = sys.maxsize ) -> Iterator[Match[bytes]]: ... @overload def finditer(self, string: AnyStr, pos: int = 0, endpos: int = sys.maxsize) -> Iterator[Match[AnyStr]]: ... @overload def sub(self: Pattern[str], repl: str | Callable[[Match[str]], str], string: str, count: int = 0) -> str: ... @overload def sub( self: Pattern[bytes], repl: ReadableBuffer | Callable[[Match[bytes]], ReadableBuffer], string: ReadableBuffer, count: int = 0, ) -> bytes: ... @overload def sub(self, repl: AnyStr | Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = 0) -> AnyStr: ... @overload def subn( self: Pattern[str], repl: str | Callable[[Match[str]], str], string: str, count: int = 0 ) -> tuple[str, int]: ... @overload def subn( self: Pattern[bytes], repl: ReadableBuffer | Callable[[Match[bytes]], ReadableBuffer], string: ReadableBuffer, count: int = 0, ) -> tuple[bytes, int]: ... @overload def subn( self, repl: AnyStr | Callable[[Match[AnyStr]], AnyStr], string: AnyStr, count: int = 0 ) -> tuple[AnyStr, int]: ... def __copy__(self) -> Pattern[AnyStr]: ... def __deepcopy__(self, memo: Any, /) -> Pattern[AnyStr]: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # ----- re variables and constants ----- class RegexFlag(enum.IntFlag): A = sre_compile.SRE_FLAG_ASCII ASCII = A DEBUG = sre_compile.SRE_FLAG_DEBUG I = sre_compile.SRE_FLAG_IGNORECASE IGNORECASE = I L = sre_compile.SRE_FLAG_LOCALE LOCALE = L M = sre_compile.SRE_FLAG_MULTILINE MULTILINE = M S = sre_compile.SRE_FLAG_DOTALL DOTALL = S X = sre_compile.SRE_FLAG_VERBOSE VERBOSE = X U = sre_compile.SRE_FLAG_UNICODE UNICODE = U if sys.version_info < (3, 13): T = sre_compile.SRE_FLAG_TEMPLATE TEMPLATE = T if sys.version_info >= (3, 11): NOFLAG = 0 A: Final = RegexFlag.A ASCII: Final = RegexFlag.ASCII DEBUG: Final = RegexFlag.DEBUG I: Final = RegexFlag.I IGNORECASE: Final = RegexFlag.IGNORECASE L: Final = RegexFlag.L LOCALE: Final = RegexFlag.LOCALE M: Final = RegexFlag.M MULTILINE: Final = RegexFlag.MULTILINE S: Final = RegexFlag.S DOTALL: Final = RegexFlag.DOTALL X: Final = RegexFlag.X VERBOSE: Final = RegexFlag.VERBOSE U: Final = RegexFlag.U UNICODE: Final = RegexFlag.UNICODE if sys.version_info < (3, 13): T: Final = RegexFlag.T TEMPLATE: Final = RegexFlag.TEMPLATE if sys.version_info >= (3, 11): NOFLAG: Final = RegexFlag.NOFLAG _FlagsType: TypeAlias = int | RegexFlag # Type-wise the compile() overloads are unnecessary, they could also be modeled using # unions in the parameter types. However mypy has a bug regarding TypeVar # constraints (https://github.com/python/mypy/issues/11880), # which limits us here because AnyStr is a constrained TypeVar. # pattern arguments do *not* accept arbitrary buffers such as bytearray, # because the pattern must be hashable. @overload def compile(pattern: AnyStr, flags: _FlagsType = 0) -> Pattern[AnyStr]: ... @overload def compile(pattern: Pattern[AnyStr], flags: _FlagsType = 0) -> Pattern[AnyStr]: ... @overload def search(pattern: str | Pattern[str], string: str, flags: _FlagsType = 0) -> Match[str] | None: ... @overload def search(pattern: bytes | Pattern[bytes], string: ReadableBuffer, flags: _FlagsType = 0) -> Match[bytes] | None: ... @overload def match(pattern: str | Pattern[str], string: str, flags: _FlagsType = 0) -> Match[str] | None: ... @overload def match(pattern: bytes | Pattern[bytes], string: ReadableBuffer, flags: _FlagsType = 0) -> Match[bytes] | None: ... @overload def fullmatch(pattern: str | Pattern[str], string: str, flags: _FlagsType = 0) -> Match[str] | None: ... @overload def fullmatch( pattern: bytes | Pattern[bytes], string: ReadableBuffer, flags: _FlagsType = 0 ) -> Match[bytes] | None: ... @overload def split( pattern: str | Pattern[str], string: str, maxsplit: int = 0, flags: _FlagsType = 0 ) -> list[str | MaybeNone]: ... @overload def split( pattern: bytes | Pattern[bytes], string: ReadableBuffer, maxsplit: int = 0, flags: _FlagsType = 0 ) -> list[bytes | MaybeNone]: ... @overload def findall(pattern: str | Pattern[str], string: str, flags: _FlagsType = 0) -> list[Any]: ... @overload def findall(pattern: bytes | Pattern[bytes], string: ReadableBuffer, flags: _FlagsType = 0) -> list[Any]: ... @overload def finditer(pattern: str | Pattern[str], string: str, flags: _FlagsType = 0) -> Iterator[Match[str]]: ... @overload def finditer( pattern: bytes | Pattern[bytes], string: ReadableBuffer, flags: _FlagsType = 0 ) -> Iterator[Match[bytes]]: ... @overload def sub( pattern: str | Pattern[str], repl: str | Callable[[Match[str]], str], string: str, count: int = 0, flags: _FlagsType = 0, ) -> str: ... @overload def sub( pattern: bytes | Pattern[bytes], repl: ReadableBuffer | Callable[[Match[bytes]], ReadableBuffer], string: ReadableBuffer, count: int = 0, flags: _FlagsType = 0, ) -> bytes: ... @overload def subn( pattern: str | Pattern[str], repl: str | Callable[[Match[str]], str], string: str, count: int = 0, flags: _FlagsType = 0, ) -> tuple[str, int]: ... @overload def subn( pattern: bytes | Pattern[bytes], repl: ReadableBuffer | Callable[[Match[bytes]], ReadableBuffer], string: ReadableBuffer, count: int = 0, flags: _FlagsType = 0, ) -> tuple[bytes, int]: ... def escape(pattern: AnyStr) -> AnyStr: ... def purge() -> None: ... if sys.version_info < (3, 13): if sys.version_info >= (3, 11): @deprecated('Deprecated since Python 3.11; removed in Python 3.13. Use `re.compile()` instead.') def template(pattern: AnyStr | Pattern[AnyStr], flags: _FlagsType = 0) -> Pattern[AnyStr]: ... # undocumented else: def template(pattern: AnyStr | Pattern[AnyStr], flags: _FlagsType = 0) -> Pattern[AnyStr]: ... # undocumented ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/sys.pyi ================================================ from typing import Any, Final, Literal, TextIO, final, type_check_only from _typeshed import MaybeNone, structseq from typing_extensions import TypeAlias # stdin: TextIO | MaybeNone stdout: TextIO | MaybeNone stderr: TextIO | MaybeNone version: str # Type alias used as a mixin for structseq classes that cannot be instantiated at runtime # This can't be represented in the type system, so we just use `structseq[Any]` _UninstantiableStructseq: TypeAlias = structseq[Any] _ReleaseLevel: TypeAlias = Literal['alpha', 'beta', 'candidate', 'final'] @final @type_check_only class _version_info(_UninstantiableStructseq, tuple[int, int, int, _ReleaseLevel, int]): __match_args__: Final = ('major', 'minor', 'micro', 'releaselevel', 'serial') @property def major(self) -> int: ... @property def minor(self) -> int: ... @property def micro(self) -> int: ... @property def releaselevel(self) -> _ReleaseLevel: ... @property def serial(self) -> int: ... version_info: _version_info ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/types.pyi ================================================ import sys from collections.abc import ( AsyncGenerator, Awaitable, Callable, Coroutine, Generator, ItemsView, Iterable, Iterator, KeysView, Mapping, MutableSequence, ValuesView, ) from importlib.machinery import ModuleSpec from typing import Any, ClassVar, Literal, TypeVar, final, overload from _typeshed import AnnotationForm, MaybeNone, SupportsKeysAndGetItem from _typeshed.importlib import LoaderProtocol from typing_extensions import ParamSpec, Self, TypeAliasType, TypeVarTuple, deprecated, disjoint_base if sys.version_info >= (3, 14): from _typeshed import AnnotateFunc __all__ = [ 'FunctionType', 'LambdaType', 'CodeType', 'MappingProxyType', 'SimpleNamespace', 'GeneratorType', 'CoroutineType', 'AsyncGeneratorType', 'MethodType', 'BuiltinFunctionType', 'ModuleType', 'TracebackType', 'FrameType', 'GetSetDescriptorType', 'MemberDescriptorType', 'new_class', 'prepare_class', 'DynamicClassAttribute', 'coroutine', 'BuiltinMethodType', 'ClassMethodDescriptorType', 'MethodDescriptorType', 'MethodWrapperType', 'WrapperDescriptorType', 'resolve_bases', 'CellType', 'GenericAlias', ] if sys.version_info >= (3, 10): __all__ += ['EllipsisType', 'NoneType', 'NotImplementedType', 'UnionType'] if sys.version_info >= (3, 12): __all__ += ['get_original_bases'] if sys.version_info >= (3, 13): __all__ += ['CapsuleType'] # Note, all classes "defined" here require special handling. _T1 = TypeVar('_T1') _T2 = TypeVar('_T2') _KT_co = TypeVar('_KT_co', covariant=True) _VT_co = TypeVar('_VT_co', covariant=True) # Make sure this class definition stays roughly in line with `builtins.function` @final class FunctionType: @property def __closure__(self) -> tuple[CellType, ...] | None: ... __code__: CodeType __defaults__: tuple[Any, ...] | None __dict__: dict[str, Any] @property def __globals__(self) -> dict[str, Any]: ... __name__: str __qualname__: str __annotations__: dict[str, AnnotationForm] if sys.version_info >= (3, 14): __annotate__: AnnotateFunc | None __kwdefaults__: dict[str, Any] | None if sys.version_info >= (3, 10): @property def __builtins__(self) -> dict[str, Any]: ... if sys.version_info >= (3, 12): __type_params__: tuple[TypeVar | ParamSpec | TypeVarTuple, ...] __module__: str if sys.version_info >= (3, 13): def __new__( cls, code: CodeType, globals: dict[str, Any], name: str | None = None, argdefs: tuple[object, ...] | None = None, closure: tuple[CellType, ...] | None = None, kwdefaults: dict[str, object] | None = None, ) -> Self: ... else: def __new__( cls, code: CodeType, globals: dict[str, Any], name: str | None = None, argdefs: tuple[object, ...] | None = None, closure: tuple[CellType, ...] | None = None, ) -> Self: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... @overload def __get__(self, instance: None, owner: type, /) -> FunctionType: ... @overload def __get__(self, instance: object, owner: type | None = None, /) -> MethodType: ... LambdaType = FunctionType @final class CodeType: def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... @property def co_argcount(self) -> int: ... @property def co_posonlyargcount(self) -> int: ... @property def co_kwonlyargcount(self) -> int: ... @property def co_nlocals(self) -> int: ... @property def co_stacksize(self) -> int: ... @property def co_flags(self) -> int: ... @property def co_code(self) -> bytes: ... @property def co_consts(self) -> tuple[Any, ...]: ... @property def co_names(self) -> tuple[str, ...]: ... @property def co_varnames(self) -> tuple[str, ...]: ... @property def co_filename(self) -> str: ... @property def co_name(self) -> str: ... @property def co_firstlineno(self) -> int: ... if sys.version_info >= (3, 10): @property @deprecated('Deprecated since Python 3.10; will be removed in Python 3.15. Use `CodeType.co_lines()` instead.') def co_lnotab(self) -> bytes: ... else: @property def co_lnotab(self) -> bytes: ... @property def co_freevars(self) -> tuple[str, ...]: ... @property def co_cellvars(self) -> tuple[str, ...]: ... if sys.version_info >= (3, 10): @property def co_linetable(self) -> bytes: ... def co_lines(self) -> Iterator[tuple[int, int, int | None]]: ... if sys.version_info >= (3, 11): @property def co_exceptiontable(self) -> bytes: ... @property def co_qualname(self) -> str: ... def co_positions(self) -> Iterable[tuple[int | None, int | None, int | None, int | None]]: ... if sys.version_info >= (3, 14): def co_branches(self) -> Iterator[tuple[int, int, int]]: ... if sys.version_info >= (3, 11): def __new__( cls, argcount: int, posonlyargcount: int, kwonlyargcount: int, nlocals: int, stacksize: int, flags: int, codestring: bytes, constants: tuple[object, ...], names: tuple[str, ...], varnames: tuple[str, ...], filename: str, name: str, qualname: str, firstlineno: int, linetable: bytes, exceptiontable: bytes, freevars: tuple[str, ...] = ..., cellvars: tuple[str, ...] = ..., /, ) -> Self: ... elif sys.version_info >= (3, 10): def __new__( cls, argcount: int, posonlyargcount: int, kwonlyargcount: int, nlocals: int, stacksize: int, flags: int, codestring: bytes, constants: tuple[object, ...], names: tuple[str, ...], varnames: tuple[str, ...], filename: str, name: str, firstlineno: int, linetable: bytes, freevars: tuple[str, ...] = ..., cellvars: tuple[str, ...] = ..., /, ) -> Self: ... else: def __new__( cls, argcount: int, posonlyargcount: int, kwonlyargcount: int, nlocals: int, stacksize: int, flags: int, codestring: bytes, constants: tuple[object, ...], names: tuple[str, ...], varnames: tuple[str, ...], filename: str, name: str, firstlineno: int, lnotab: bytes, freevars: tuple[str, ...] = ..., cellvars: tuple[str, ...] = ..., /, ) -> Self: ... if sys.version_info >= (3, 11): def replace( self, *, co_argcount: int = -1, co_posonlyargcount: int = -1, co_kwonlyargcount: int = -1, co_nlocals: int = -1, co_stacksize: int = -1, co_flags: int = -1, co_firstlineno: int = -1, co_code: bytes = ..., co_consts: tuple[object, ...] = ..., co_names: tuple[str, ...] = ..., co_varnames: tuple[str, ...] = ..., co_freevars: tuple[str, ...] = ..., co_cellvars: tuple[str, ...] = ..., co_filename: str = ..., co_name: str = ..., co_qualname: str = ..., co_linetable: bytes = ..., co_exceptiontable: bytes = ..., ) -> Self: ... elif sys.version_info >= (3, 10): def replace( self, *, co_argcount: int = -1, co_posonlyargcount: int = -1, co_kwonlyargcount: int = -1, co_nlocals: int = -1, co_stacksize: int = -1, co_flags: int = -1, co_firstlineno: int = -1, co_code: bytes = ..., co_consts: tuple[object, ...] = ..., co_names: tuple[str, ...] = ..., co_varnames: tuple[str, ...] = ..., co_freevars: tuple[str, ...] = ..., co_cellvars: tuple[str, ...] = ..., co_filename: str = ..., co_name: str = ..., co_linetable: bytes = ..., ) -> Self: ... else: def replace( self, *, co_argcount: int = -1, co_posonlyargcount: int = -1, co_kwonlyargcount: int = -1, co_nlocals: int = -1, co_stacksize: int = -1, co_flags: int = -1, co_firstlineno: int = -1, co_code: bytes = ..., co_consts: tuple[object, ...] = ..., co_names: tuple[str, ...] = ..., co_varnames: tuple[str, ...] = ..., co_freevars: tuple[str, ...] = ..., co_cellvars: tuple[str, ...] = ..., co_filename: str = ..., co_name: str = ..., co_lnotab: bytes = ..., ) -> Self: ... if sys.version_info >= (3, 13): __replace__ = replace @final class MappingProxyType(Mapping[_KT_co, _VT_co]): # type: ignore[type-var] # pyright: ignore[reportInvalidTypeArguments] __hash__: ClassVar[None] # type: ignore[assignment] def __new__(cls, mapping: SupportsKeysAndGetItem[_KT_co, _VT_co]) -> Self: ... def __getitem__(self, key: _KT_co, /) -> _VT_co: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] def __iter__(self) -> Iterator[_KT_co]: ... def __len__(self) -> int: ... def __eq__(self, value: object, /) -> bool: ... def copy(self) -> dict[_KT_co, _VT_co]: ... def keys(self) -> KeysView[_KT_co]: ... def values(self) -> ValuesView[_VT_co]: ... def items(self) -> ItemsView[_KT_co, _VT_co]: ... @overload def get(self, key: _KT_co, /) -> _VT_co | None: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] # Covariant type as parameter @overload def get(self, key: _KT_co, default: _VT_co, /) -> _VT_co: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] # Covariant type as parameter @overload def get(self, key: _KT_co, default: _T2, /) -> _VT_co | _T2: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] # Covariant type as parameter def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... def __reversed__(self) -> Iterator[_KT_co]: ... def __or__(self, value: Mapping[_T1, _T2], /) -> dict[_KT_co | _T1, _VT_co | _T2]: ... def __ror__(self, value: Mapping[_T1, _T2], /) -> dict[_KT_co | _T1, _VT_co | _T2]: ... if sys.version_info >= (3, 12): @disjoint_base class SimpleNamespace: __hash__: ClassVar[None] # type: ignore[assignment] if sys.version_info >= (3, 13): def __init__( self, mapping_or_iterable: Mapping[str, Any] | Iterable[tuple[str, Any]] = (), /, **kwargs: Any ) -> None: ... else: def __init__(self, **kwargs: Any) -> None: ... def __eq__(self, value: object, /) -> bool: ... def __getattribute__(self, name: str, /) -> Any: ... def __setattr__(self, name: str, value: Any, /) -> None: ... def __delattr__(self, name: str, /) -> None: ... if sys.version_info >= (3, 13): def __replace__(self, **kwargs: Any) -> Self: ... else: class SimpleNamespace: __hash__: ClassVar[None] # type: ignore[assignment] def __init__(self, **kwargs: Any) -> None: ... def __eq__(self, value: object, /) -> bool: ... def __getattribute__(self, name: str, /) -> Any: ... def __setattr__(self, name: str, value: Any, /) -> None: ... def __delattr__(self, name: str, /) -> None: ... @disjoint_base class ModuleType: __name__: str __file__: str | None @property def __dict__(self) -> dict[str, Any]: ... # type: ignore[override] __loader__: LoaderProtocol | None __package__: str | None __path__: MutableSequence[str] __spec__: ModuleSpec | None # N.B. Although this is the same type as `builtins.object.__doc__`, # it is deliberately redeclared here. Most symbols declared in the namespace # of `types.ModuleType` are available as "implicit globals" within a module's # namespace, but this is not true for symbols declared in the namespace of `builtins.object`. # Redeclaring `__doc__` here helps some type checkers understand that `__doc__` is available # as an implicit global in all modules, similar to `__name__`, `__file__`, `__spec__`, etc. __doc__: str | None __annotations__: dict[str, AnnotationForm] if sys.version_info >= (3, 14): __annotate__: AnnotateFunc | None def __init__(self, name: str, doc: str | None = ...) -> None: ... # __getattr__ doesn't exist at runtime, # but having it here in typeshed makes dynamic imports # using `builtins.__import__` or `importlib.import_module` less painful def __getattr__(self, name: str) -> Any: ... @final class CellType: def __new__(cls, contents: object = ..., /) -> Self: ... __hash__: ClassVar[None] # type: ignore[assignment] cell_contents: Any _YieldT_co = TypeVar('_YieldT_co', covariant=True) _SendT_contra = TypeVar('_SendT_contra', contravariant=True, default=None) _ReturnT_co = TypeVar('_ReturnT_co', covariant=True, default=None) @final class GeneratorType(Generator[_YieldT_co, _SendT_contra, _ReturnT_co]): @property def gi_code(self) -> CodeType: ... @property def gi_frame(self) -> FrameType | None: ... @property def gi_running(self) -> bool: ... @property def gi_yieldfrom(self) -> Iterator[_YieldT_co] | None: ... if sys.version_info >= (3, 11): @property def gi_suspended(self) -> bool: ... __name__: str __qualname__: str def __iter__(self) -> Self: ... def __next__(self) -> _YieldT_co: ... def send(self, arg: _SendT_contra, /) -> _YieldT_co: ... @overload def throw( self, typ: type[BaseException], val: BaseException | object = ..., tb: TracebackType | None = ..., / ) -> _YieldT_co: ... @overload def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = ..., /) -> _YieldT_co: ... if sys.version_info >= (3, 13): def __class_getitem__(cls, item: Any, /) -> Any: ... @final class AsyncGeneratorType(AsyncGenerator[_YieldT_co, _SendT_contra]): @property def ag_await(self) -> Awaitable[Any] | None: ... @property def ag_code(self) -> CodeType: ... @property def ag_frame(self) -> FrameType | None: ... @property def ag_running(self) -> bool: ... __name__: str __qualname__: str if sys.version_info >= (3, 12): @property def ag_suspended(self) -> bool: ... def __aiter__(self) -> Self: ... def __anext__(self) -> Coroutine[Any, Any, _YieldT_co]: ... def asend(self, val: _SendT_contra, /) -> Coroutine[Any, Any, _YieldT_co]: ... @overload async def athrow( self, typ: type[BaseException], val: BaseException | object = ..., tb: TracebackType | None = ..., / ) -> _YieldT_co: ... @overload async def athrow(self, typ: BaseException, val: None = None, tb: TracebackType | None = ..., /) -> _YieldT_co: ... def aclose(self) -> Coroutine[Any, Any, None]: ... def __class_getitem__(cls, item: Any, /) -> GenericAlias: ... # Non-default variations to accommodate coroutines _SendT_nd_contra = TypeVar('_SendT_nd_contra', contravariant=True) _ReturnT_nd_co = TypeVar('_ReturnT_nd_co', covariant=True) @final class CoroutineType(Coroutine[_YieldT_co, _SendT_nd_contra, _ReturnT_nd_co]): __name__: str __qualname__: str @property def cr_await(self) -> Any | None: ... @property def cr_code(self) -> CodeType: ... @property def cr_frame(self) -> FrameType | None: ... @property def cr_running(self) -> bool: ... @property def cr_origin(self) -> tuple[tuple[str, int, str], ...] | None: ... if sys.version_info >= (3, 11): @property def cr_suspended(self) -> bool: ... def close(self) -> None: ... def __await__(self) -> Generator[Any, None, _ReturnT_nd_co]: ... def send(self, arg: _SendT_nd_contra, /) -> _YieldT_co: ... @overload def throw( self, typ: type[BaseException], val: BaseException | object = ..., tb: TracebackType | None = ..., / ) -> _YieldT_co: ... @overload def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = ..., /) -> _YieldT_co: ... if sys.version_info >= (3, 13): def __class_getitem__(cls, item: Any, /) -> Any: ... @final class MethodType: @property def __closure__(self) -> tuple[CellType, ...] | None: ... # inherited from the added function @property def __code__(self) -> CodeType: ... # inherited from the added function @property def __defaults__(self) -> tuple[Any, ...] | None: ... # inherited from the added function @property def __func__(self) -> Callable[..., Any]: ... @property def __self__(self) -> object: ... @property def __name__(self) -> str: ... # inherited from the added function @property def __qualname__(self) -> str: ... # inherited from the added function def __new__(cls, func: Callable[..., Any], instance: object, /) -> Self: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... if sys.version_info >= (3, 13): def __get__(self, instance: object, owner: type | None = None, /) -> Self: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... @final class BuiltinFunctionType: @property def __self__(self) -> object | ModuleType: ... @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... BuiltinMethodType = BuiltinFunctionType @final class WrapperDescriptorType: @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... @final class MethodWrapperType: @property def __self__(self) -> object: ... @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... def __eq__(self, value: object, /) -> bool: ... def __ne__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... @final class MethodDescriptorType: @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... @final class ClassMethodDescriptorType: @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __call__(self, *args: Any, **kwargs: Any) -> Any: ... def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... @final class TracebackType: def __new__(cls, tb_next: TracebackType | None, tb_frame: FrameType, tb_lasti: int, tb_lineno: int) -> Self: ... tb_next: TracebackType | None # the rest are read-only @property def tb_frame(self) -> FrameType: ... @property def tb_lasti(self) -> int: ... @property def tb_lineno(self) -> int: ... @final class FrameType: @property def f_back(self) -> FrameType | None: ... @property def f_builtins(self) -> dict[str, Any]: ... @property def f_code(self) -> CodeType: ... @property def f_globals(self) -> dict[str, Any]: ... @property def f_lasti(self) -> int: ... # see discussion in #6769: f_lineno *can* sometimes be None, # but you should probably file a bug report with CPython if you encounter it being None in the wild. # An `int | None` annotation here causes too many false-positive errors, so applying `int | Any`. @property def f_lineno(self) -> int | MaybeNone: ... @property def f_locals(self) -> dict[str, Any]: ... f_trace: Callable[[FrameType, str, Any], Any] | None f_trace_lines: bool f_trace_opcodes: bool def clear(self) -> None: ... if sys.version_info >= (3, 14): @property def f_generator(self) -> GeneratorType[Any, Any, Any] | CoroutineType[Any, Any, Any] | None: ... @final class GetSetDescriptorType: @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... def __set__(self, instance: Any, value: Any, /) -> None: ... def __delete__(self, instance: Any, /) -> None: ... @final class MemberDescriptorType: @property def __name__(self) -> str: ... @property def __qualname__(self) -> str: ... @property def __objclass__(self) -> type: ... def __get__(self, instance: Any, owner: type | None = None, /) -> Any: ... def __set__(self, instance: Any, value: Any, /) -> None: ... def __delete__(self, instance: Any, /) -> None: ... def new_class( name: str, bases: Iterable[object] = (), kwds: dict[str, Any] | None = None, exec_body: Callable[[dict[str, Any]], object] | None = None, ) -> type: ... def resolve_bases(bases: Iterable[object]) -> tuple[Any, ...]: ... def prepare_class( name: str, bases: tuple[type, ...] = (), kwds: dict[str, Any] | None = None ) -> tuple[type, dict[str, Any], dict[str, Any]]: ... if sys.version_info >= (3, 12): def get_original_bases(cls: type, /) -> tuple[Any, ...]: ... # Does not actually inherit from property, but saying it does makes sure that # pyright handles this class correctly. class DynamicClassAttribute(property): fget: Callable[[Any], Any] | None fset: Callable[[Any, Any], object] | None # type: ignore[assignment] fdel: Callable[[Any], object] | None # type: ignore[assignment] overwrite_doc: bool __isabstractmethod__: bool def __init__( self, fget: Callable[[Any], Any] | None = None, fset: Callable[[Any, Any], object] | None = None, fdel: Callable[[Any], object] | None = None, doc: str | None = None, ) -> None: ... def __get__(self, instance: Any, ownerclass: type | None = None) -> Any: ... def __set__(self, instance: Any, value: Any) -> None: ... def __delete__(self, instance: Any) -> None: ... def getter(self, fget: Callable[[Any], Any]) -> DynamicClassAttribute: ... def setter(self, fset: Callable[[Any, Any], object]) -> DynamicClassAttribute: ... def deleter(self, fdel: Callable[[Any], object]) -> DynamicClassAttribute: ... _Fn = TypeVar('_Fn', bound=Callable[..., object]) _R = TypeVar('_R') _P = ParamSpec('_P') # it's not really an Awaitable, but can be used in an await expression. Real type: Generator & Awaitable @overload def coroutine(func: Callable[_P, Generator[Any, Any, _R]]) -> Callable[_P, Awaitable[_R]]: ... @overload def coroutine(func: _Fn) -> _Fn: ... @disjoint_base class GenericAlias: @property def __origin__(self) -> type | TypeAliasType: ... @property def __args__(self) -> tuple[Any, ...]: ... @property def __parameters__(self) -> tuple[Any, ...]: ... def __new__(cls, origin: type, args: Any, /) -> Self: ... def __getitem__(self, typeargs: Any, /) -> GenericAlias: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... def __mro_entries__(self, bases: Iterable[object], /) -> tuple[type, ...]: ... if sys.version_info >= (3, 11): @property def __unpacked__(self) -> bool: ... @property def __typing_unpacked_tuple_args__(self) -> tuple[Any, ...] | None: ... if sys.version_info >= (3, 10): def __or__(self, value: Any, /) -> UnionType: ... def __ror__(self, value: Any, /) -> UnionType: ... # GenericAlias delegates attr access to `__origin__` def __getattr__(self, name: str) -> Any: ... if sys.version_info >= (3, 10): @final class NoneType: def __bool__(self) -> Literal[False]: ... @final class EllipsisType: ... @final class NotImplementedType(Any): ... @final class UnionType: @property def __args__(self) -> tuple[Any, ...]: ... @property def __parameters__(self) -> tuple[Any, ...]: ... # `(int | str) | Literal["foo"]` returns a generic alias to an instance of `_SpecialForm` (`Union`). # Normally we'd express this using the return type of `_SpecialForm.__ror__`, # but because `UnionType.__or__` accepts `Any`, type checkers will use # the return type of `UnionType.__or__` to infer the result of this operation # rather than `_SpecialForm.__ror__`. To mitigate this, we use `| Any` # in the return type of `UnionType.__(r)or__`. def __or__(self, value: Any, /) -> UnionType | Any: ... def __ror__(self, value: Any, /) -> UnionType | Any: ... def __eq__(self, value: object, /) -> bool: ... def __hash__(self) -> int: ... # you can only subscript a `UnionType` instance if at least one of the elements # in the union is a generic alias instance that has a non-empty `__parameters__` def __getitem__(self, parameters: Any, /) -> object: ... if sys.version_info >= (3, 13): @final class CapsuleType: ... ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/typing.pyi ================================================ # Since this module defines "overload" it is not recognized by Ruff as typing.overload # TODO: The collections import is required, otherwise mypy crashes. # https://github.com/python/mypy/issues/16744 import collections # noqa: F401 # pyright: ignore[reportUnusedImport] import sys from _collections_abc import dict_items, dict_keys, dict_values from abc import ABCMeta, abstractmethod from re import Match as Match, Pattern as Pattern from types import ( BuiltinFunctionType, CodeType, FunctionType, GenericAlias, MethodDescriptorType, MethodType, MethodWrapperType, ModuleType, TracebackType, WrapperDescriptorType, ) import typing_extensions from _typeshed import ( IdentityFunction, ReadableBuffer, SupportsGetItem, SupportsGetItemViewable, SupportsKeysAndGetItem, Viewable, ) from typing_extensions import Never as _Never, ParamSpec as _ParamSpec, deprecated if sys.version_info >= (3, 14): from _typeshed import EvaluateFunc from annotationlib import Format if sys.version_info >= (3, 10): from types import UnionType __all__ = [ 'AbstractSet', 'Annotated', 'Any', 'AnyStr', 'AsyncContextManager', 'AsyncGenerator', 'AsyncIterable', 'AsyncIterator', 'Awaitable', 'BinaryIO', 'ByteString', 'Callable', 'ChainMap', 'ClassVar', 'Collection', 'Container', 'ContextManager', 'Coroutine', 'Counter', 'DefaultDict', 'Deque', 'Dict', 'Final', 'ForwardRef', 'FrozenSet', 'Generator', 'Generic', 'Hashable', 'IO', 'ItemsView', 'Iterable', 'Iterator', 'KeysView', 'List', 'Literal', 'Mapping', 'MappingView', 'Match', 'MutableMapping', 'MutableSequence', 'MutableSet', 'NamedTuple', 'NewType', 'NoReturn', 'Optional', 'OrderedDict', 'Pattern', 'Protocol', 'Reversible', 'Sequence', 'Set', 'Sized', 'SupportsAbs', 'SupportsBytes', 'SupportsComplex', 'SupportsFloat', 'SupportsIndex', 'SupportsInt', 'SupportsRound', 'Text', 'TextIO', 'Tuple', 'Type', 'TypeVar', 'TypedDict', 'Union', 'ValuesView', 'TYPE_CHECKING', 'cast', 'final', 'get_args', 'get_origin', 'get_type_hints', 'no_type_check', 'no_type_check_decorator', 'overload', 'runtime_checkable', ] if sys.version_info >= (3, 14): __all__ += ['evaluate_forward_ref'] if sys.version_info >= (3, 10): __all__ += [ 'Concatenate', 'ParamSpec', 'ParamSpecArgs', 'ParamSpecKwargs', 'TypeAlias', 'TypeGuard', 'is_typeddict', ] if sys.version_info >= (3, 11): __all__ += [ 'LiteralString', 'Never', 'NotRequired', 'Required', 'Self', 'TypeVarTuple', 'Unpack', 'assert_never', 'assert_type', 'clear_overloads', 'dataclass_transform', 'get_overloads', 'reveal_type', ] if sys.version_info >= (3, 12): __all__ += ['TypeAliasType', 'override'] if sys.version_info >= (3, 13): __all__ += ['get_protocol_members', 'is_protocol', 'NoDefault', 'TypeIs', 'ReadOnly'] # We can't use this name here because it leads to issues with mypy, likely # due to an import cycle. Below instead we use Any with a comment. # from _typeshed import AnnotationForm class Any: ... class _Final: __slots__ = ('__weakref__',) def final(f: _T) -> _T: ... @final class TypeVar: @property def __name__(self) -> str: ... @property def __bound__(self) -> Any | None: ... # AnnotationForm @property def __constraints__(self) -> tuple[Any, ...]: ... # AnnotationForm @property def __covariant__(self) -> bool: ... @property def __contravariant__(self) -> bool: ... if sys.version_info >= (3, 12): @property def __infer_variance__(self) -> bool: ... if sys.version_info >= (3, 13): @property def __default__(self) -> Any: ... # AnnotationForm if sys.version_info >= (3, 13): def __new__( cls, name: str, *constraints: Any, # AnnotationForm bound: Any | None = None, # AnnotationForm contravariant: bool = False, covariant: bool = False, infer_variance: bool = False, default: Any = ..., # AnnotationForm ) -> Self: ... elif sys.version_info >= (3, 12): def __new__( cls, name: str, *constraints: Any, # AnnotationForm bound: Any | None = None, # AnnotationForm covariant: bool = False, contravariant: bool = False, infer_variance: bool = False, ) -> Self: ... elif sys.version_info >= (3, 11): def __new__( cls, name: str, *constraints: Any, # AnnotationForm bound: Any | None = None, # AnnotationForm covariant: bool = False, contravariant: bool = False, ) -> Self: ... else: def __init__( self, name: str, *constraints: Any, # AnnotationForm bound: Any | None = None, # AnnotationForm covariant: bool = False, contravariant: bool = False, ) -> None: ... if sys.version_info >= (3, 10): def __or__(self, right: Any, /) -> _SpecialForm: ... # AnnotationForm def __ror__(self, left: Any, /) -> _SpecialForm: ... # AnnotationForm if sys.version_info >= (3, 11): def __typing_subst__(self, arg: Any, /) -> Any: ... if sys.version_info >= (3, 13): def __typing_prepare_subst__(self, alias: Any, args: Any, /) -> tuple[Any, ...]: ... def has_default(self) -> bool: ... if sys.version_info >= (3, 14): @property def evaluate_bound(self) -> EvaluateFunc | None: ... @property def evaluate_constraints(self) -> EvaluateFunc | None: ... @property def evaluate_default(self) -> EvaluateFunc | None: ... # N.B. Keep this definition in sync with typing_extensions._SpecialForm @final class _SpecialForm(_Final): __slots__ = ('_name', '__doc__', '_getitem') def __getitem__(self, parameters: Any) -> object: ... if sys.version_info >= (3, 10): def __or__(self, other: Any) -> _SpecialForm: ... def __ror__(self, other: Any) -> _SpecialForm: ... Union: _SpecialForm Protocol: _SpecialForm Callable: _SpecialForm Type: _SpecialForm NoReturn: _SpecialForm ClassVar: _SpecialForm Optional: _SpecialForm Tuple: _SpecialForm Final: _SpecialForm Literal: _SpecialForm TypedDict: _SpecialForm if sys.version_info >= (3, 11): Self: _SpecialForm Never: _SpecialForm Unpack: _SpecialForm Required: _SpecialForm NotRequired: _SpecialForm LiteralString: _SpecialForm @final class TypeVarTuple: @property def __name__(self) -> str: ... if sys.version_info >= (3, 13): @property def __default__(self) -> Any: ... # AnnotationForm def has_default(self) -> bool: ... if sys.version_info >= (3, 13): def __new__(cls, name: str, *, default: Any = ...) -> Self: ... # AnnotationForm elif sys.version_info >= (3, 12): def __new__(cls, name: str) -> Self: ... else: def __init__(self, name: str) -> None: ... def __iter__(self) -> Any: ... def __typing_subst__(self, arg: Never, /) -> Never: ... def __typing_prepare_subst__(self, alias: Any, args: Any, /) -> tuple[Any, ...]: ... if sys.version_info >= (3, 14): @property def evaluate_default(self) -> EvaluateFunc | None: ... if sys.version_info >= (3, 10): @final class ParamSpecArgs: @property def __origin__(self) -> ParamSpec: ... if sys.version_info >= (3, 12): def __new__(cls, origin: ParamSpec) -> Self: ... else: def __init__(self, origin: ParamSpec) -> None: ... def __eq__(self, other: object, /) -> bool: ... __hash__: ClassVar[None] # type: ignore[assignment] @final class ParamSpecKwargs: @property def __origin__(self) -> ParamSpec: ... if sys.version_info >= (3, 12): def __new__(cls, origin: ParamSpec) -> Self: ... else: def __init__(self, origin: ParamSpec) -> None: ... def __eq__(self, other: object, /) -> bool: ... __hash__: ClassVar[None] # type: ignore[assignment] @final class ParamSpec: @property def __name__(self) -> str: ... @property def __bound__(self) -> Any | None: ... # AnnotationForm @property def __covariant__(self) -> bool: ... @property def __contravariant__(self) -> bool: ... if sys.version_info >= (3, 12): @property def __infer_variance__(self) -> bool: ... if sys.version_info >= (3, 13): @property def __default__(self) -> Any: ... # AnnotationForm if sys.version_info >= (3, 13): def __new__( cls, name: str, *, bound: Any | None = None, # AnnotationForm contravariant: bool = False, covariant: bool = False, infer_variance: bool = False, default: Any = ..., # AnnotationForm ) -> Self: ... elif sys.version_info >= (3, 12): def __new__( cls, name: str, *, bound: Any | None = None, # AnnotationForm contravariant: bool = False, covariant: bool = False, infer_variance: bool = False, ) -> Self: ... elif sys.version_info >= (3, 11): def __new__( cls, name: str, *, bound: Any | None = None, # AnnotationForm contravariant: bool = False, covariant: bool = False, ) -> Self: ... else: def __init__( self, name: str, *, bound: Any | None = None, # AnnotationForm contravariant: bool = False, covariant: bool = False, ) -> None: ... @property def args(self) -> ParamSpecArgs: ... @property def kwargs(self) -> ParamSpecKwargs: ... if sys.version_info >= (3, 11): def __typing_subst__(self, arg: Any, /) -> Any: ... def __typing_prepare_subst__(self, alias: Any, args: Any, /) -> tuple[Any, ...]: ... def __or__(self, right: Any, /) -> _SpecialForm: ... def __ror__(self, left: Any, /) -> _SpecialForm: ... if sys.version_info >= (3, 13): def has_default(self) -> bool: ... if sys.version_info >= (3, 14): @property def evaluate_default(self) -> EvaluateFunc | None: ... Concatenate: _SpecialForm TypeAlias: _SpecialForm TypeGuard: _SpecialForm class NewType: def __init__(self, name: str, tp: Any) -> None: ... # AnnotationForm if sys.version_info >= (3, 11): @staticmethod def __call__(x: _T, /) -> _T: ... else: def __call__(self, x: _T) -> _T: ... def __or__(self, other: Any) -> _SpecialForm: ... def __ror__(self, other: Any) -> _SpecialForm: ... __supertype__: type | NewType __name__: str else: def NewType(name: str, tp: Any) -> Any: ... _F = TypeVar('_F', bound=Callable[..., Any]) _P = _ParamSpec('_P') _T = TypeVar('_T') _FT = TypeVar('_FT', bound=Callable[..., Any] | type) # These type variables are used by the container types. _S = TypeVar('_S') _KT = TypeVar('_KT') # Key type. _VT = TypeVar('_VT') # Value type. _T_co = TypeVar('_T_co', covariant=True) # Any type covariant containers. _KT_co = TypeVar('_KT_co', covariant=True) # Key type covariant containers. _VT_co = TypeVar('_VT_co', covariant=True) # Value type covariant containers. _TC = TypeVar('_TC', bound=type[object]) def overload(func: _F) -> _F: ... def no_type_check(arg: _F) -> _F: ... if sys.version_info >= (3, 13): @deprecated('Deprecated since Python 3.13; removed in Python 3.15.') def no_type_check_decorator(decorator: Callable[_P, _T]) -> Callable[_P, _T]: ... else: def no_type_check_decorator(decorator: Callable[_P, _T]) -> Callable[_P, _T]: ... # This itself is only available during type checking def type_check_only(func_or_cls: _FT) -> _FT: ... # Type aliases and type constructors @type_check_only class _Alias: # Class for defining generic aliases for library types. def __getitem__(self, typeargs: Any) -> Any: ... List = _Alias() Dict = _Alias() DefaultDict = _Alias() Set = _Alias() FrozenSet = _Alias() Counter = _Alias() Deque = _Alias() ChainMap = _Alias() OrderedDict = _Alias() Annotated: _SpecialForm # Predefined type variables. AnyStr = TypeVar('AnyStr', str, bytes) @type_check_only class _Generic: if sys.version_info < (3, 12): __slots__ = () if sys.version_info >= (3, 10): @classmethod def __class_getitem__(cls, args: TypeVar | ParamSpec | tuple[TypeVar | ParamSpec, ...]) -> _Final: ... else: @classmethod def __class_getitem__(cls, args: TypeVar | tuple[TypeVar, ...]) -> _Final: ... Generic: type[_Generic] class _ProtocolMeta(ABCMeta): if sys.version_info >= (3, 12): def __init__(cls, *args: Any, **kwargs: Any) -> None: ... # Abstract base classes. def runtime_checkable(cls: _TC) -> _TC: ... @runtime_checkable class SupportsInt(Protocol, metaclass=ABCMeta): __slots__ = () @abstractmethod def __int__(self) -> int: ... @runtime_checkable class SupportsFloat(Protocol, metaclass=ABCMeta): __slots__ = () @abstractmethod def __float__(self) -> float: ... @runtime_checkable class SupportsComplex(Protocol, metaclass=ABCMeta): __slots__ = () @abstractmethod def __complex__(self) -> complex: ... @runtime_checkable class SupportsBytes(Protocol, metaclass=ABCMeta): __slots__ = () @abstractmethod def __bytes__(self) -> bytes: ... @runtime_checkable class SupportsIndex(Protocol, metaclass=ABCMeta): __slots__ = () @abstractmethod def __index__(self) -> int: ... @runtime_checkable class SupportsAbs(Protocol[_T_co]): __slots__ = () @abstractmethod def __abs__(self) -> _T_co: ... @runtime_checkable class SupportsRound(Protocol[_T_co]): __slots__ = () @overload @abstractmethod def __round__(self) -> int: ... @overload @abstractmethod def __round__(self, ndigits: int, /) -> _T_co: ... @runtime_checkable class Sized(Protocol, metaclass=ABCMeta): @abstractmethod def __len__(self) -> int: ... @runtime_checkable class Hashable(Protocol, metaclass=ABCMeta): # TODO: This is special, in that a subclass of a hashable class may not be hashable # (for example, list vs. object). It's not obvious how to represent this. This class # is currently mostly useless for static checking. @abstractmethod def __hash__(self) -> int: ... @runtime_checkable class Iterable(Protocol[_T_co]): @abstractmethod def __iter__(self) -> Iterator[_T_co]: ... @runtime_checkable class Iterator(Iterable[_T_co], Protocol[_T_co]): @abstractmethod def __next__(self) -> _T_co: ... def __iter__(self) -> Iterator[_T_co]: ... @runtime_checkable class Reversible(Iterable[_T_co], Protocol[_T_co]): @abstractmethod def __reversed__(self) -> Iterator[_T_co]: ... _YieldT_co = TypeVar('_YieldT_co', covariant=True) _SendT_contra = TypeVar('_SendT_contra', contravariant=True, default=None) _ReturnT_co = TypeVar('_ReturnT_co', covariant=True, default=None) @runtime_checkable class Generator(Iterator[_YieldT_co], Protocol[_YieldT_co, _SendT_contra, _ReturnT_co]): def __next__(self) -> _YieldT_co: ... @abstractmethod def send(self, value: _SendT_contra, /) -> _YieldT_co: ... @overload @abstractmethod def throw( self, typ: type[BaseException], val: BaseException | object = None, tb: TracebackType | None = None, / ) -> _YieldT_co: ... @overload @abstractmethod def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = None, /) -> _YieldT_co: ... if sys.version_info >= (3, 13): def close(self) -> _ReturnT_co | None: ... else: def close(self) -> None: ... def __iter__(self) -> Generator[_YieldT_co, _SendT_contra, _ReturnT_co]: ... # NOTE: Prior to Python 3.13 these aliases are lacking the second _ExitT_co parameter if sys.version_info >= (3, 13): from contextlib import AbstractAsyncContextManager as AsyncContextManager, AbstractContextManager as ContextManager else: from contextlib import AbstractAsyncContextManager, AbstractContextManager @runtime_checkable class ContextManager(AbstractContextManager[_T_co, bool | None], Protocol[_T_co]): ... @runtime_checkable class AsyncContextManager(AbstractAsyncContextManager[_T_co, bool | None], Protocol[_T_co]): ... @runtime_checkable class Awaitable(Protocol[_T_co]): @abstractmethod def __await__(self) -> Generator[Any, Any, _T_co]: ... # Non-default variations to accommodate coroutines, and `AwaitableGenerator` having a 4th type parameter. _SendT_nd_contra = TypeVar('_SendT_nd_contra', contravariant=True) _ReturnT_nd_co = TypeVar('_ReturnT_nd_co', covariant=True) class Coroutine(Awaitable[_ReturnT_nd_co], Generic[_YieldT_co, _SendT_nd_contra, _ReturnT_nd_co]): __name__: str __qualname__: str @abstractmethod def send(self, value: _SendT_nd_contra, /) -> _YieldT_co: ... @overload @abstractmethod def throw( self, typ: type[BaseException], val: BaseException | object = None, tb: TracebackType | None = None, / ) -> _YieldT_co: ... @overload @abstractmethod def throw(self, typ: BaseException, val: None = None, tb: TracebackType | None = None, /) -> _YieldT_co: ... @abstractmethod def close(self) -> None: ... # NOTE: This type does not exist in typing.py or PEP 484 but mypy needs it to exist. # The parameters correspond to Generator, but the 4th is the original type. # Obsolete, use _typeshed._type_checker_internals.AwaitableGenerator instead. @type_check_only class AwaitableGenerator( Awaitable[_ReturnT_nd_co], Generator[_YieldT_co, _SendT_nd_contra, _ReturnT_nd_co], Generic[_YieldT_co, _SendT_nd_contra, _ReturnT_nd_co, _S], metaclass=ABCMeta, ): ... @runtime_checkable class AsyncIterable(Protocol[_T_co]): @abstractmethod def __aiter__(self) -> AsyncIterator[_T_co]: ... @runtime_checkable class AsyncIterator(AsyncIterable[_T_co], Protocol[_T_co]): @abstractmethod def __anext__(self) -> Awaitable[_T_co]: ... def __aiter__(self) -> AsyncIterator[_T_co]: ... @runtime_checkable class AsyncGenerator(AsyncIterator[_YieldT_co], Protocol[_YieldT_co, _SendT_contra]): def __anext__(self) -> Coroutine[Any, Any, _YieldT_co]: ... @abstractmethod def asend(self, value: _SendT_contra, /) -> Coroutine[Any, Any, _YieldT_co]: ... @overload @abstractmethod def athrow( self, typ: type[BaseException], val: BaseException | object = None, tb: TracebackType | None = None, / ) -> Coroutine[Any, Any, _YieldT_co]: ... @overload @abstractmethod def athrow( self, typ: BaseException, val: None = None, tb: TracebackType | None = None, / ) -> Coroutine[Any, Any, _YieldT_co]: ... def aclose(self) -> Coroutine[Any, Any, None]: ... _ContainerT_contra = TypeVar('_ContainerT_contra', contravariant=True, default=Any) @runtime_checkable class Container(Protocol[_ContainerT_contra]): # This is generic more on vibes than anything else @abstractmethod def __contains__(self, x: _ContainerT_contra, /) -> bool: ... @runtime_checkable class Collection(Iterable[_T_co], Container[Any], Protocol[_T_co]): # Note: need to use Container[Any] instead of Container[_T_co] to ensure covariance. # Implement Sized (but don't have it as a base class). @abstractmethod def __len__(self) -> int: ... class Sequence(Reversible[_T_co], Collection[_T_co]): @overload @abstractmethod def __getitem__(self, index: int, /) -> _T_co: ... @overload @abstractmethod def __getitem__(self, index: slice[int | None], /) -> Sequence[_T_co]: ... # Mixin methods def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ... def count(self, value: Any, /) -> int: ... def __contains__(self, value: object, /) -> bool: ... def __iter__(self) -> Iterator[_T_co]: ... def __reversed__(self) -> Iterator[_T_co]: ... class MutableSequence(Sequence[_T]): @abstractmethod def insert(self, index: int, value: _T, /) -> None: ... @overload @abstractmethod def __getitem__(self, index: int, /) -> _T: ... @overload @abstractmethod def __getitem__(self, index: slice[int | None], /) -> MutableSequence[_T]: ... @overload @abstractmethod def __setitem__(self, index: int, value: _T, /) -> None: ... @overload @abstractmethod def __setitem__(self, index: slice[int | None], value: Iterable[_T], /) -> None: ... @overload @abstractmethod def __delitem__(self, index: int, /) -> None: ... @overload @abstractmethod def __delitem__(self, index: slice[int | None], /) -> None: ... # Mixin methods def append(self, value: _T, /) -> None: ... def clear(self) -> None: ... def extend(self, values: Iterable[_T], /) -> None: ... def reverse(self) -> None: ... def pop(self, index: int = -1, /) -> _T: ... def remove(self, value: _T, /) -> None: ... def __iadd__(self, values: Iterable[_T], /) -> typing_extensions.Self: ... class AbstractSet(Collection[_T_co]): @abstractmethod def __contains__(self, x: object, /) -> bool: ... def _hash(self) -> int: ... # Mixin methods @classmethod def _from_iterable(cls, it: Iterable[_S], /) -> AbstractSet[_S]: ... def __le__(self, other: AbstractSet[Any], /) -> bool: ... def __lt__(self, other: AbstractSet[Any], /) -> bool: ... def __gt__(self, other: AbstractSet[Any], /) -> bool: ... def __ge__(self, other: AbstractSet[Any], /) -> bool: ... def __and__(self, other: AbstractSet[Any], /) -> AbstractSet[_T_co]: ... def __or__(self, other: AbstractSet[_T], /) -> AbstractSet[_T_co | _T]: ... def __sub__(self, other: AbstractSet[Any], /) -> AbstractSet[_T_co]: ... def __xor__(self, other: AbstractSet[_T], /) -> AbstractSet[_T_co | _T]: ... def __eq__(self, other: object, /) -> bool: ... def isdisjoint(self, other: Iterable[Any], /) -> bool: ... class MutableSet(AbstractSet[_T]): @abstractmethod def add(self, value: _T, /) -> None: ... @abstractmethod def discard(self, value: _T, /) -> None: ... # Mixin methods def clear(self) -> None: ... def pop(self) -> _T: ... def remove(self, value: _T, /) -> None: ... def __ior__(self, it: AbstractSet[_T], /) -> typing_extensions.Self: ... # type: ignore[override,misc] def __iand__(self, it: AbstractSet[Any], /) -> typing_extensions.Self: ... def __ixor__(self, it: AbstractSet[_T], /) -> typing_extensions.Self: ... # type: ignore[override,misc] def __isub__(self, it: AbstractSet[Any], /) -> typing_extensions.Self: ... class MappingView(Sized): __slots__ = ('_mapping',) def __init__(self, mapping: Sized) -> None: ... # undocumented def __len__(self) -> int: ... class ItemsView(MappingView, AbstractSet[tuple[_KT_co, _VT_co]], Generic[_KT_co, _VT_co]): def __init__(self, mapping: SupportsGetItemViewable[_KT_co, _VT_co]) -> None: ... # undocumented @classmethod def _from_iterable(cls, it: Iterable[_S], /) -> set[_S]: ... def __and__(self, other: Iterable[Any], /) -> set[tuple[_KT_co, _VT_co]]: ... def __rand__(self, other: Iterable[_T], /) -> set[_T]: ... def __contains__(self, item: tuple[object, object], /) -> bool: ... # type: ignore[override] def __iter__(self) -> Iterator[tuple[_KT_co, _VT_co]]: ... def __or__(self, other: Iterable[_T], /) -> set[tuple[_KT_co, _VT_co] | _T]: ... def __ror__(self, other: Iterable[_T], /) -> set[tuple[_KT_co, _VT_co] | _T]: ... def __sub__(self, other: Iterable[Any], /) -> set[tuple[_KT_co, _VT_co]]: ... def __rsub__(self, other: Iterable[_T], /) -> set[_T]: ... def __xor__(self, other: Iterable[_T], /) -> set[tuple[_KT_co, _VT_co] | _T]: ... def __rxor__(self, other: Iterable[_T], /) -> set[tuple[_KT_co, _VT_co] | _T]: ... class KeysView(MappingView, AbstractSet[_KT_co]): def __init__(self, mapping: Viewable[_KT_co]) -> None: ... # undocumented @classmethod def _from_iterable(cls, it: Iterable[_S], /) -> set[_S]: ... def __and__(self, other: Iterable[Any], /) -> set[_KT_co]: ... def __rand__(self, other: Iterable[_T], /) -> set[_T]: ... def __contains__(self, key: object, /) -> bool: ... def __iter__(self) -> Iterator[_KT_co]: ... def __or__(self, other: Iterable[_T], /) -> set[_KT_co | _T]: ... def __ror__(self, other: Iterable[_T], /) -> set[_KT_co | _T]: ... def __sub__(self, other: Iterable[Any], /) -> set[_KT_co]: ... def __rsub__(self, other: Iterable[_T], /) -> set[_T]: ... def __xor__(self, other: Iterable[_T], /) -> set[_KT_co | _T]: ... def __rxor__(self, other: Iterable[_T], /) -> set[_KT_co | _T]: ... class ValuesView(MappingView, Collection[_VT_co]): def __init__(self, mapping: SupportsGetItemViewable[Any, _VT_co]) -> None: ... # undocumented def __contains__(self, value: object, /) -> bool: ... def __iter__(self) -> Iterator[_VT_co]: ... # note for Mapping.get and MutableMapping.pop and MutableMapping.setdefault # In _collections_abc.py the parameters are positional-or-keyword, # but dict and types.MappingProxyType (the vast majority of Mapping types) # don't allow keyword arguments. class Mapping(Collection[_KT], Generic[_KT, _VT_co]): # TODO: We wish the key type could also be covariant, but that doesn't work, # see discussion in https://github.com/python/typing/pull/273. @abstractmethod def __getitem__(self, key: _KT, /) -> _VT_co: ... # Mixin methods @overload def get(self, key: _KT, /) -> _VT_co | None: ... @overload def get(self, key: _KT, default: _VT_co, /) -> _VT_co: ... # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] # Covariant type as parameter @overload def get(self, key: _KT, default: _T, /) -> _VT_co | _T: ... def items(self) -> ItemsView[_KT, _VT_co]: ... def keys(self) -> KeysView[_KT]: ... def values(self) -> ValuesView[_VT_co]: ... def __contains__(self, key: object, /) -> bool: ... def __eq__(self, other: object, /) -> bool: ... class MutableMapping(Mapping[_KT, _VT]): @abstractmethod def __setitem__(self, key: _KT, value: _VT, /) -> None: ... @abstractmethod def __delitem__(self, key: _KT, /) -> None: ... def clear(self) -> None: ... @overload def pop(self, key: _KT, /) -> _VT: ... @overload def pop(self, key: _KT, default: _VT, /) -> _VT: ... @overload def pop(self, key: _KT, default: _T, /) -> _VT | _T: ... def popitem(self) -> tuple[_KT, _VT]: ... # This overload should be allowed only if the value type is compatible with None. # # Keep the following methods in line with MutableMapping.setdefault, modulo positional-only differences: # -- collections.OrderedDict.setdefault # -- collections.ChainMap.setdefault # -- weakref.WeakKeyDictionary.setdefault @overload def setdefault(self: MutableMapping[_KT, _T | None], key: _KT, default: None = None, /) -> _T | None: ... @overload def setdefault(self, key: _KT, default: _VT, /) -> _VT: ... # 'update' used to take a Union, but using overloading is better. # The second overloaded type here is a bit too general, because # Mapping[tuple[_KT, _VT], W] is a subclass of Iterable[tuple[_KT, _VT]], # but will always have the behavior of the first overloaded type # at runtime, leading to keys of a mix of types _KT and tuple[_KT, _VT]. # We don't currently have any way of forcing all Mappings to use # the first overload, but by using overloading rather than a Union, # mypy will commit to using the first overload when the argument is # known to be a Mapping with unknown type parameters, which is closer # to the behavior we want. See mypy issue #1430. # # Various mapping classes have __ior__ methods that should be kept roughly in line with .update(): # -- dict.__ior__ # -- os._Environ.__ior__ # -- collections.UserDict.__ior__ # -- collections.ChainMap.__ior__ # -- peewee.attrdict.__add__ # -- peewee.attrdict.__iadd__ # -- weakref.WeakValueDictionary.__ior__ # -- weakref.WeakKeyDictionary.__ior__ @overload def update(self, m: SupportsKeysAndGetItem[_KT, _VT], /) -> None: ... @overload def update(self: SupportsGetItem[str, _VT], m: SupportsKeysAndGetItem[str, _VT], /, **kwargs: _VT) -> None: ... @overload def update(self, m: Iterable[tuple[_KT, _VT]], /) -> None: ... @overload def update(self: SupportsGetItem[str, _VT], m: Iterable[tuple[str, _VT]], /, **kwargs: _VT) -> None: ... @overload def update(self: SupportsGetItem[str, _VT], /, **kwargs: _VT) -> None: ... Text = str TYPE_CHECKING: Final[bool] # In stubs, the arguments of the IO class are marked as positional-only. # This differs from runtime, but better reflects the fact that in reality # classes deriving from IO use different names for the arguments. class IO(Generic[AnyStr]): # At runtime these are all abstract properties, # but making them abstract in the stub is hugely disruptive, for not much gain. # See #8726 __slots__ = () @property def mode(self) -> str: ... # Usually str, but may be bytes if a bytes path was passed to open(). See #10737. # If PEP 696 becomes available, we may want to use a defaulted TypeVar here. @property def name(self) -> str | Any: ... @abstractmethod def close(self) -> None: ... @property def closed(self) -> bool: ... @abstractmethod def fileno(self) -> int: ... @abstractmethod def flush(self) -> None: ... @abstractmethod def isatty(self) -> bool: ... @abstractmethod def read(self, n: int = -1, /) -> AnyStr: ... @abstractmethod def readable(self) -> bool: ... @abstractmethod def readline(self, limit: int = -1, /) -> AnyStr: ... @abstractmethod def readlines(self, hint: int = -1, /) -> list[AnyStr]: ... @abstractmethod def seek(self, offset: int, whence: int = 0, /) -> int: ... @abstractmethod def seekable(self) -> bool: ... @abstractmethod def tell(self) -> int: ... @abstractmethod def truncate(self, size: int | None = None, /) -> int: ... @abstractmethod def writable(self) -> bool: ... @abstractmethod @overload def write(self: IO[bytes], s: ReadableBuffer, /) -> int: ... @abstractmethod @overload def write(self, s: AnyStr, /) -> int: ... @abstractmethod @overload def writelines(self: IO[bytes], lines: Iterable[ReadableBuffer], /) -> None: ... @abstractmethod @overload def writelines(self, lines: Iterable[AnyStr], /) -> None: ... @abstractmethod def __next__(self) -> AnyStr: ... @abstractmethod def __iter__(self) -> Iterator[AnyStr]: ... @abstractmethod def __enter__(self) -> IO[AnyStr]: ... @abstractmethod def __exit__( self, type: type[BaseException] | None, value: BaseException | None, traceback: TracebackType | None, / ) -> None: ... class BinaryIO(IO[bytes]): __slots__ = () @abstractmethod def __enter__(self) -> BinaryIO: ... class TextIO(IO[str]): # See comment regarding the @properties in the `IO` class __slots__ = () @property def buffer(self) -> BinaryIO: ... @property def encoding(self) -> str: ... @property def errors(self) -> str | None: ... @property def line_buffering(self) -> int: ... # int on PyPy, bool on CPython @property def newlines(self) -> Any: ... # None, str or tuple @abstractmethod def __enter__(self) -> TextIO: ... ByteString: typing_extensions.TypeAlias = bytes | bytearray | memoryview # Functions _get_type_hints_obj_allowed_types: typing_extensions.TypeAlias = ( object | Callable[..., Any] | FunctionType | BuiltinFunctionType | MethodType | ModuleType | WrapperDescriptorType | MethodWrapperType | MethodDescriptorType ) if sys.version_info >= (3, 14): def get_type_hints( obj: _get_type_hints_obj_allowed_types, globalns: dict[str, Any] | None = None, localns: Mapping[str, Any] | None = None, include_extras: bool = False, *, format: Format | None = None, # Default: Format.VALUE ) -> dict[str, Any]: ... # AnnotationForm else: def get_type_hints( obj: _get_type_hints_obj_allowed_types, globalns: dict[str, Any] | None = None, localns: Mapping[str, Any] | None = None, include_extras: bool = False, ) -> dict[str, Any]: ... # AnnotationForm def get_args(tp: Any) -> tuple[Any, ...]: ... # AnnotationForm if sys.version_info >= (3, 10): @overload def get_origin(tp: ParamSpecArgs | ParamSpecKwargs) -> ParamSpec: ... @overload def get_origin(tp: UnionType) -> type[UnionType]: ... @overload def get_origin(tp: GenericAlias) -> type: ... @overload def get_origin(tp: Any) -> Any | None: ... # AnnotationForm @overload def cast(typ: type[_T], val: Any) -> _T: ... @overload def cast(typ: str, val: Any) -> Any: ... @overload def cast(typ: object, val: Any) -> Any: ... if sys.version_info >= (3, 11): def reveal_type(obj: _T, /) -> _T: ... def assert_never(arg: Never, /) -> Never: ... def assert_type(val: _T, typ: Any, /) -> _T: ... # AnnotationForm def clear_overloads() -> None: ... def get_overloads(func: Callable[..., object]) -> Sequence[Callable[..., object]]: ... def dataclass_transform( *, eq_default: bool = True, order_default: bool = False, kw_only_default: bool = False, frozen_default: bool = False, # on 3.11, runtime accepts it as part of kwargs field_specifiers: tuple[type[Any] | Callable[..., Any], ...] = (), **kwargs: Any, ) -> IdentityFunction: ... # Type constructors # Obsolete, will be changed to a function. Use _typeshed._type_checker_internals.NamedTupleFallback instead. class NamedTuple(tuple[Any, ...]): _field_defaults: ClassVar[dict[str, Any]] _fields: ClassVar[tuple[str, ...]] # __orig_bases__ sometimes exists on <3.12, but not consistently # So we only add it to the stub on 3.12+. if sys.version_info >= (3, 12): __orig_bases__: ClassVar[tuple[Any, ...]] @overload def __init__(self, typename: str, fields: Iterable[tuple[str, Any]], /) -> None: ... @overload @deprecated( 'Creating a typing.NamedTuple using keyword arguments is deprecated and support will be removed in Python 3.15' ) def __init__(self, typename: str, fields: None = None, /, **kwargs: Any) -> None: ... @classmethod def _make(cls, iterable: Iterable[Any]) -> typing_extensions.Self: ... def _asdict(self) -> dict[str, Any]: ... def _replace(self, **kwargs: Any) -> typing_extensions.Self: ... if sys.version_info >= (3, 13): def __replace__(self, **kwargs: Any) -> typing_extensions.Self: ... # Internal mypy fallback type for all typed dicts (does not exist at runtime) # N.B. Keep this mostly in sync with typing_extensions._TypedDict/mypy_extensions._TypedDict # Obsolete, use _typeshed._type_checker_internals.TypedDictFallback instead. @type_check_only class _TypedDict(Mapping[str, object], metaclass=ABCMeta): __total__: ClassVar[bool] __required_keys__: ClassVar[frozenset[str]] __optional_keys__: ClassVar[frozenset[str]] # __orig_bases__ sometimes exists on <3.12, but not consistently, # so we only add it to the stub on 3.12+ if sys.version_info >= (3, 12): __orig_bases__: ClassVar[tuple[Any, ...]] if sys.version_info >= (3, 13): __readonly_keys__: ClassVar[frozenset[str]] __mutable_keys__: ClassVar[frozenset[str]] def copy(self) -> typing_extensions.Self: ... # Using Never so that only calls using mypy plugin hook that specialize the signature # can go through. def setdefault(self, k: _Never, default: object) -> object: ... # Mypy plugin hook for 'pop' expects that 'default' has a type variable type. def pop(self, k: _Never, default: _T = ...) -> object: ... # pyright: ignore[reportInvalidTypeVarUse] def update(self, m: typing_extensions.Self, /) -> None: ... def __delitem__(self, k: _Never) -> None: ... def items(self) -> dict_items[str, object]: ... def keys(self) -> dict_keys[str, object]: ... def values(self) -> dict_values[str, object]: ... @overload def __or__(self, value: typing_extensions.Self, /) -> typing_extensions.Self: ... @overload def __or__(self, value: dict[str, Any], /) -> dict[str, object]: ... @overload def __ror__(self, value: typing_extensions.Self, /) -> typing_extensions.Self: ... @overload def __ror__(self, value: dict[str, Any], /) -> dict[str, object]: ... # supposedly incompatible definitions of __or__ and __ior__ def __ior__(self, value: typing_extensions.Self, /) -> typing_extensions.Self: ... # type: ignore[misc] if sys.version_info >= (3, 14): from annotationlib import ForwardRef as ForwardRef def evaluate_forward_ref( forward_ref: ForwardRef, *, owner: object = None, globals: dict[str, Any] | None = None, locals: Mapping[str, Any] | None = None, type_params: tuple[TypeVar, ParamSpec, TypeVarTuple] | None = None, format: Format | None = None, ) -> Any: ... # AnnotationForm else: @final class ForwardRef(_Final): __slots__ = ( '__forward_arg__', '__forward_code__', '__forward_evaluated__', '__forward_value__', '__forward_is_argument__', '__forward_is_class__', '__forward_module__', ) __forward_arg__: str __forward_code__: CodeType __forward_evaluated__: bool __forward_value__: Any | None # AnnotationForm __forward_is_argument__: bool __forward_is_class__: bool __forward_module__: Any | None def __init__( self, arg: str, is_argument: bool = True, module: Any | None = None, *, is_class: bool = False ) -> None: ... if sys.version_info >= (3, 13): @overload @deprecated( "Failing to pass a value to the 'type_params' parameter of ForwardRef._evaluate() is deprecated, " 'as it leads to incorrect behaviour when evaluating a stringified annotation ' 'that references a PEP 695 type parameter. It will be disallowed in Python 3.15.' ) def _evaluate( self, globalns: dict[str, Any] | None, localns: Mapping[str, Any] | None, *, recursive_guard: frozenset[str], ) -> Any | None: ... # AnnotationForm @overload def _evaluate( self, globalns: dict[str, Any] | None, localns: Mapping[str, Any] | None, type_params: tuple[TypeVar | ParamSpec | TypeVarTuple, ...], *, recursive_guard: frozenset[str], ) -> Any | None: ... # AnnotationForm elif sys.version_info >= (3, 12): def _evaluate( self, globalns: dict[str, Any] | None, localns: Mapping[str, Any] | None, type_params: tuple[TypeVar | ParamSpec | TypeVarTuple, ...] | None = None, *, recursive_guard: frozenset[str], ) -> Any | None: ... # AnnotationForm else: def _evaluate( self, globalns: dict[str, Any] | None, localns: Mapping[str, Any] | None, recursive_guard: frozenset[str], ) -> Any | None: ... # AnnotationForm def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... if sys.version_info >= (3, 11): def __or__(self, other: Any) -> _SpecialForm: ... def __ror__(self, other: Any) -> _SpecialForm: ... if sys.version_info >= (3, 10): def is_typeddict(tp: object) -> bool: ... def _type_repr(obj: object) -> str: ... if sys.version_info >= (3, 12): _TypeParameter: typing_extensions.TypeAlias = ( TypeVar | typing_extensions.TypeVar | ParamSpec | typing_extensions.ParamSpec | TypeVarTuple | typing_extensions.TypeVarTuple ) def override(method: _F, /) -> _F: ... @final class TypeAliasType: def __new__(cls, name: str, value: Any, *, type_params: tuple[_TypeParameter, ...] = ()) -> Self: ... @property def __value__(self) -> Any: ... # AnnotationForm @property def __type_params__(self) -> tuple[_TypeParameter, ...]: ... @property def __parameters__(self) -> tuple[Any, ...]: ... # AnnotationForm @property def __name__(self) -> str: ... # It's writable on types, but not on instances of TypeAliasType. @property def __module__(self) -> str | None: ... # type: ignore[override] def __getitem__(self, parameters: Any, /) -> GenericAlias: ... # AnnotationForm def __or__(self, right: Any, /) -> _SpecialForm: ... def __ror__(self, left: Any, /) -> _SpecialForm: ... if sys.version_info >= (3, 14): @property def evaluate_value(self) -> EvaluateFunc: ... if sys.version_info >= (3, 13): def is_protocol(tp: type, /) -> bool: ... def get_protocol_members(tp: type, /) -> frozenset[str]: ... @final @type_check_only class _NoDefaultType: ... NoDefault: _NoDefaultType TypeIs: _SpecialForm ReadOnly: _SpecialForm ================================================ FILE: crates/monty-typeshed/vendor/typeshed/stdlib/typing_extensions.pyi ================================================ import abc import enum import sys from _collections_abc import dict_items, dict_keys, dict_values from collections.abc import ( AsyncGenerator as AsyncGenerator, AsyncIterable as AsyncIterable, AsyncIterator as AsyncIterator, Awaitable as Awaitable, Collection as Collection, Container as Container, Coroutine as Coroutine, Generator as Generator, Hashable as Hashable, ItemsView as ItemsView, Iterable as Iterable, Iterator as Iterator, KeysView as KeysView, Mapping as Mapping, MappingView as MappingView, MutableMapping as MutableMapping, MutableSequence as MutableSequence, MutableSet as MutableSet, Reversible as Reversible, Sequence as Sequence, Sized as Sized, ValuesView as ValuesView, ) from contextlib import AbstractAsyncContextManager as AsyncContextManager, AbstractContextManager as ContextManager from re import Match as Match, Pattern as Pattern from types import GenericAlias, ModuleType from typing import ( # noqa: Y022,Y037,Y038,Y039,UP035,RUF100 IO as IO, TYPE_CHECKING as TYPE_CHECKING, AbstractSet as AbstractSet, Any as Any, AnyStr as AnyStr, BinaryIO as BinaryIO, Callable as Callable, ChainMap as ChainMap, ClassVar as ClassVar, Counter as Counter, DefaultDict as DefaultDict, Deque as Deque, Dict as Dict, ForwardRef as ForwardRef, FrozenSet as FrozenSet, Generic as Generic, List as List, NoReturn as NoReturn, Optional as Optional, Set as Set, Text as Text, TextIO as TextIO, Tuple as Tuple, Type as Type, TypedDict as TypedDict, TypeVar as _TypeVar, Union as Union, _Alias, _SpecialForm, cast as cast, no_type_check as no_type_check, no_type_check_decorator as no_type_check_decorator, overload as overload, type_check_only, ) from _typeshed import AnnotationForm, IdentityFunction, Incomplete, Unused if sys.version_info >= (3, 10): from types import UnionType # Please keep order the same as at runtime. __all__ = [ # Super-special typing primitives. 'Any', 'ClassVar', 'Concatenate', 'Final', 'LiteralString', 'ParamSpec', 'ParamSpecArgs', 'ParamSpecKwargs', 'Self', 'Type', 'TypeVar', 'TypeVarTuple', 'Unpack', # ABCs (from collections.abc). 'Awaitable', 'AsyncIterator', 'AsyncIterable', 'Coroutine', 'AsyncGenerator', 'AsyncContextManager', 'Buffer', 'ChainMap', # Concrete collection types. 'ContextManager', 'Counter', 'Deque', 'DefaultDict', 'NamedTuple', 'OrderedDict', 'TypedDict', # Structural checks, a.k.a. protocols. 'SupportsAbs', 'SupportsBytes', 'SupportsComplex', 'SupportsFloat', 'SupportsIndex', 'SupportsInt', 'SupportsRound', 'Reader', 'Writer', # One-off things. 'Annotated', 'assert_never', 'assert_type', 'clear_overloads', 'dataclass_transform', 'deprecated', 'disjoint_base', 'Doc', 'evaluate_forward_ref', 'get_overloads', 'final', 'Format', 'get_annotations', 'get_args', 'get_origin', 'get_original_bases', 'get_protocol_members', 'get_type_hints', 'IntVar', 'is_protocol', 'is_typeddict', 'Literal', 'NewType', 'overload', 'override', 'Protocol', 'Sentinel', 'reveal_type', 'runtime', 'runtime_checkable', 'Text', 'TypeAlias', 'TypeAliasType', 'TypeForm', 'TypeGuard', 'TypeIs', 'TYPE_CHECKING', 'type_repr', 'Never', 'NoReturn', 'ReadOnly', 'Required', 'NotRequired', 'NoDefault', 'NoExtraItems', # Pure aliases, have always been in typing 'AbstractSet', 'AnyStr', 'BinaryIO', 'Callable', 'Collection', 'Container', 'Dict', 'ForwardRef', 'FrozenSet', 'Generator', 'Generic', 'Hashable', 'IO', 'ItemsView', 'Iterable', 'Iterator', 'KeysView', 'List', 'Mapping', 'MappingView', 'Match', 'MutableMapping', 'MutableSequence', 'MutableSet', 'Optional', 'Pattern', 'Reversible', 'Sequence', 'Set', 'Sized', 'TextIO', 'Tuple', 'Union', 'ValuesView', 'cast', 'no_type_check', 'no_type_check_decorator', # Added dynamically 'CapsuleType', ] _T = _TypeVar('_T') _F = _TypeVar('_F', bound=Callable[..., Any]) _TC = _TypeVar('_TC', bound=type[object]) _T_co = _TypeVar('_T_co', covariant=True) # Any type covariant containers. _T_contra = _TypeVar('_T_contra', contravariant=True) # Do not import (and re-export) Protocol or runtime_checkable from # typing module because type checkers need to be able to distinguish # typing.Protocol and typing_extensions.Protocol so they can properly # warn users about potential runtime exceptions when using typing.Protocol # on older versions of Python. Protocol: _SpecialForm def runtime_checkable(cls: _TC) -> _TC: ... # This alias for above is kept here for backwards compatibility. runtime = runtime_checkable Final: _SpecialForm def final(f: _F) -> _F: ... def disjoint_base(cls: _TC) -> _TC: ... Literal: _SpecialForm def IntVar(name: str) -> Any: ... # returns a new TypeVar # Internal mypy fallback type for all typed dicts (does not exist at runtime) # N.B. Keep this mostly in sync with typing._TypedDict/mypy_extensions._TypedDict @type_check_only class _TypedDict(Mapping[str, object], metaclass=abc.ABCMeta): __required_keys__: ClassVar[frozenset[str]] __optional_keys__: ClassVar[frozenset[str]] __total__: ClassVar[bool] __orig_bases__: ClassVar[tuple[Any, ...]] # PEP 705 __readonly_keys__: ClassVar[frozenset[str]] __mutable_keys__: ClassVar[frozenset[str]] # PEP 728 __closed__: ClassVar[bool | None] __extra_items__: ClassVar[AnnotationForm] def copy(self) -> Self: ... # Using Never so that only calls using mypy plugin hook that specialize the signature # can go through. def setdefault(self, k: Never, default: object) -> object: ... # Mypy plugin hook for 'pop' expects that 'default' has a type variable type. def pop(self, k: Never, default: _T = ...) -> object: ... # pyright: ignore[reportInvalidTypeVarUse] def update(self, m: Self, /) -> None: ... def items(self) -> dict_items[str, object]: ... def keys(self) -> dict_keys[str, object]: ... def values(self) -> dict_values[str, object]: ... def __delitem__(self, k: Never) -> None: ... @overload def __or__(self, value: Self, /) -> Self: ... @overload def __or__(self, value: dict[str, Any], /) -> dict[str, object]: ... @overload def __ror__(self, value: Self, /) -> Self: ... @overload def __ror__(self, value: dict[str, Any], /) -> dict[str, object]: ... # supposedly incompatible definitions of `__ior__` and `__or__`: # Since this module defines "Self" it is not recognized by Ruff as typing_extensions.Self def __ior__(self, value: Self, /) -> Self: ... # type: ignore[misc] OrderedDict = _Alias() if sys.version_info >= (3, 13): from typing import get_type_hints as get_type_hints else: def get_type_hints( obj: Any, globalns: dict[str, Any] | None = None, localns: Mapping[str, Any] | None = None, include_extras: bool = False, ) -> dict[str, AnnotationForm]: ... def get_args(tp: AnnotationForm) -> tuple[AnnotationForm, ...]: ... if sys.version_info >= (3, 10): @overload def get_origin(tp: UnionType) -> type[UnionType]: ... @overload def get_origin(tp: GenericAlias) -> type: ... @overload def get_origin(tp: ParamSpecArgs | ParamSpecKwargs) -> ParamSpec: ... @overload def get_origin(tp: AnnotationForm) -> AnnotationForm | None: ... Annotated: _SpecialForm _AnnotatedAlias: Any # undocumented # New and changed things in 3.10 if sys.version_info >= (3, 10): from typing import ( Concatenate as Concatenate, ParamSpecArgs as ParamSpecArgs, ParamSpecKwargs as ParamSpecKwargs, TypeAlias as TypeAlias, TypeGuard as TypeGuard, is_typeddict as is_typeddict, ) else: @final class ParamSpecArgs: @property def __origin__(self) -> ParamSpec: ... def __init__(self, origin: ParamSpec) -> None: ... @final class ParamSpecKwargs: @property def __origin__(self) -> ParamSpec: ... def __init__(self, origin: ParamSpec) -> None: ... Concatenate: _SpecialForm TypeAlias: _SpecialForm TypeGuard: _SpecialForm def is_typeddict(tp: object) -> bool: ... # New and changed things in 3.11 if sys.version_info >= (3, 11): from typing import ( LiteralString as LiteralString, NamedTuple as NamedTuple, Never as Never, NewType as NewType, NotRequired as NotRequired, Required as Required, Self as Self, Unpack as Unpack, assert_never as assert_never, assert_type as assert_type, clear_overloads as clear_overloads, dataclass_transform as dataclass_transform, get_overloads as get_overloads, reveal_type as reveal_type, ) else: Self: _SpecialForm Never: _SpecialForm def reveal_type(obj: _T, /) -> _T: ... def assert_never(arg: Never, /) -> Never: ... def assert_type(val: _T, typ: AnnotationForm, /) -> _T: ... def clear_overloads() -> None: ... def get_overloads(func: Callable[..., object]) -> Sequence[Callable[..., object]]: ... Required: _SpecialForm NotRequired: _SpecialForm LiteralString: _SpecialForm Unpack: _SpecialForm def dataclass_transform( *, eq_default: bool = True, order_default: bool = False, kw_only_default: bool = False, frozen_default: bool = False, field_specifiers: tuple[type[Any] | Callable[..., Any], ...] = (), **kwargs: object, ) -> IdentityFunction: ... class NamedTuple(tuple[Any, ...]): _field_defaults: ClassVar[dict[str, Any]] _fields: ClassVar[tuple[str, ...]] __orig_bases__: ClassVar[tuple[Any, ...]] @overload def __init__(self, typename: str, fields: Iterable[tuple[str, Any]] = ...) -> None: ... @overload def __init__(self, typename: str, fields: None = None, **kwargs: Any) -> None: ... @classmethod def _make(cls, iterable: Iterable[Any]) -> Self: ... def _asdict(self) -> dict[str, Any]: ... def _replace(self, **kwargs: Any) -> Self: ... class NewType: def __init__(self, name: str, tp: AnnotationForm) -> None: ... def __call__(self, obj: _T, /) -> _T: ... __supertype__: type | NewType __name__: str if sys.version_info >= (3, 10): def __or__(self, other: Any) -> _SpecialForm: ... def __ror__(self, other: Any) -> _SpecialForm: ... if sys.version_info >= (3, 12): from collections.abc import Buffer as Buffer from types import get_original_bases as get_original_bases from typing import ( SupportsAbs as SupportsAbs, SupportsBytes as SupportsBytes, SupportsComplex as SupportsComplex, SupportsFloat as SupportsFloat, SupportsIndex as SupportsIndex, SupportsInt as SupportsInt, SupportsRound as SupportsRound, override as override, ) else: def override(arg: _F, /) -> _F: ... def get_original_bases(cls: type, /) -> tuple[Any, ...]: ... # mypy and pyright object to this being both ABC and Protocol. # At runtime it inherits from ABC and is not a Protocol, but it is on the # allowlist for use as a Protocol. @runtime_checkable class Buffer(Protocol, abc.ABC): # type: ignore[misc] # pyright: ignore[reportGeneralTypeIssues] # Not actually a Protocol at runtime; see # https://github.com/python/typeshed/issues/10224 for why we're defining it this way def __buffer__(self, flags: int, /) -> memoryview: ... @runtime_checkable class SupportsInt(Protocol, metaclass=abc.ABCMeta): __slots__ = () @abc.abstractmethod def __int__(self) -> int: ... @runtime_checkable class SupportsFloat(Protocol, metaclass=abc.ABCMeta): __slots__ = () @abc.abstractmethod def __float__(self) -> float: ... @runtime_checkable class SupportsComplex(Protocol, metaclass=abc.ABCMeta): __slots__ = () @abc.abstractmethod def __complex__(self) -> complex: ... @runtime_checkable class SupportsBytes(Protocol, metaclass=abc.ABCMeta): __slots__ = () @abc.abstractmethod def __bytes__(self) -> bytes: ... @runtime_checkable class SupportsIndex(Protocol, metaclass=abc.ABCMeta): __slots__ = () @abc.abstractmethod def __index__(self) -> int: ... @runtime_checkable class SupportsAbs(Protocol[_T_co]): __slots__ = () @abc.abstractmethod def __abs__(self) -> _T_co: ... @runtime_checkable class SupportsRound(Protocol[_T_co]): __slots__ = () @overload @abc.abstractmethod def __round__(self) -> int: ... @overload @abc.abstractmethod def __round__(self, ndigits: int, /) -> _T_co: ... if sys.version_info >= (3, 14): from io import Reader as Reader, Writer as Writer else: @runtime_checkable class Reader(Protocol[_T_co]): __slots__ = () @abc.abstractmethod def read(self, size: int = ..., /) -> _T_co: ... @runtime_checkable class Writer(Protocol[_T_contra]): __slots__ = () @abc.abstractmethod def write(self, data: _T_contra, /) -> int: ... if sys.version_info >= (3, 13): from types import CapsuleType as CapsuleType from typing import ( NoDefault as NoDefault, ParamSpec as ParamSpec, ReadOnly as ReadOnly, TypeIs as TypeIs, TypeVar as TypeVar, TypeVarTuple as TypeVarTuple, get_protocol_members as get_protocol_members, is_protocol as is_protocol, ) from warnings import deprecated as deprecated else: def is_protocol(tp: type, /) -> bool: ... def get_protocol_members(tp: type, /) -> frozenset[str]: ... @final @type_check_only class _NoDefaultType: ... NoDefault: _NoDefaultType @final class CapsuleType: ... class deprecated: message: LiteralString category: type[Warning] | None stacklevel: int def __init__( self, message: LiteralString, /, *, category: type[Warning] | None = ..., stacklevel: int = 1 ) -> None: ... def __call__(self, arg: _T, /) -> _T: ... @final class TypeVar: @property def __name__(self) -> str: ... @property def __bound__(self) -> AnnotationForm | None: ... @property def __constraints__(self) -> tuple[AnnotationForm, ...]: ... @property def __covariant__(self) -> bool: ... @property def __contravariant__(self) -> bool: ... @property def __infer_variance__(self) -> bool: ... @property def __default__(self) -> AnnotationForm: ... def __init__( self, name: str, *constraints: AnnotationForm, bound: AnnotationForm | None = None, covariant: bool = False, contravariant: bool = False, default: AnnotationForm = ..., infer_variance: bool = False, ) -> None: ... def has_default(self) -> bool: ... def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... if sys.version_info >= (3, 10): def __or__(self, right: Any) -> _SpecialForm: ... def __ror__(self, left: Any) -> _SpecialForm: ... if sys.version_info >= (3, 11): def __typing_subst__(self, arg: Any) -> Any: ... @final class ParamSpec: @property def __name__(self) -> str: ... @property def __bound__(self) -> AnnotationForm | None: ... @property def __covariant__(self) -> bool: ... @property def __contravariant__(self) -> bool: ... @property def __infer_variance__(self) -> bool: ... @property def __default__(self) -> AnnotationForm: ... def __init__( self, name: str, *, bound: None | AnnotationForm | str = None, contravariant: bool = False, covariant: bool = False, default: AnnotationForm = ..., ) -> None: ... @property def args(self) -> ParamSpecArgs: ... @property def kwargs(self) -> ParamSpecKwargs: ... def has_default(self) -> bool: ... def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... if sys.version_info >= (3, 10): def __or__(self, right: Any) -> _SpecialForm: ... def __ror__(self, left: Any) -> _SpecialForm: ... @final class TypeVarTuple: @property def __name__(self) -> str: ... @property def __default__(self) -> AnnotationForm: ... def __init__(self, name: str, *, default: AnnotationForm = ...) -> None: ... def __iter__(self) -> Any: ... # Unpack[Self] def has_default(self) -> bool: ... def __typing_prepare_subst__(self, alias: Any, args: Any) -> tuple[Any, ...]: ... ReadOnly: _SpecialForm TypeIs: _SpecialForm # TypeAliasType was added in Python 3.12, but had significant changes in 3.14. if sys.version_info >= (3, 14): from typing import TypeAliasType as TypeAliasType else: @final class TypeAliasType: def __init__( self, name: str, value: AnnotationForm, *, type_params: tuple[TypeVar | ParamSpec | TypeVarTuple, ...] = () ) -> None: ... @property def __value__(self) -> AnnotationForm: ... @property def __type_params__(self) -> tuple[TypeVar | ParamSpec | TypeVarTuple, ...]: ... @property # `__parameters__` can include special forms if a `TypeVarTuple` was # passed as a `type_params` element to the constructor method. def __parameters__(self) -> tuple[TypeVar | ParamSpec | AnnotationForm, ...]: ... @property def __name__(self) -> str: ... # It's writable on types, but not on instances of TypeAliasType. @property def __module__(self) -> str | None: ... # type: ignore[override] # Returns typing._GenericAlias, which isn't stubbed. def __getitem__(self, parameters: Incomplete | tuple[Incomplete, ...]) -> AnnotationForm: ... def __init_subclass__(cls, *args: Unused, **kwargs: Unused) -> NoReturn: ... if sys.version_info >= (3, 10): def __or__(self, right: Any, /) -> _SpecialForm: ... def __ror__(self, left: Any, /) -> _SpecialForm: ... # PEP 727 class Doc: documentation: str def __init__(self, documentation: str, /) -> None: ... def __hash__(self) -> int: ... def __eq__(self, other: object) -> bool: ... # PEP 728 @type_check_only class _NoExtraItemsType: ... NoExtraItems: _NoExtraItemsType # PEP 747 TypeForm: _SpecialForm # PEP 649/749 if sys.version_info >= (3, 14): from typing import evaluate_forward_ref as evaluate_forward_ref from annotationlib import Format as Format, get_annotations as get_annotations, type_repr as type_repr else: class Format(enum.IntEnum): VALUE = 1 VALUE_WITH_FAKE_GLOBALS = 2 FORWARDREF = 3 STRING = 4 @overload def get_annotations( obj: Any, # any object with __annotations__ or __annotate__ *, globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key eval_str: bool = False, format: Literal[Format.STRING], ) -> dict[str, str]: ... @overload def get_annotations( obj: Any, # any object with __annotations__ or __annotate__ *, globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key eval_str: bool = False, format: Literal[Format.FORWARDREF], ) -> dict[str, AnnotationForm | ForwardRef]: ... @overload def get_annotations( obj: Any, # any object with __annotations__ or __annotate__ *, globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key eval_str: bool = False, format: Format = Format.VALUE, ) -> dict[str, AnnotationForm]: ... @overload def evaluate_forward_ref( forward_ref: ForwardRef, *, owner: Callable[..., object] | type[object] | ModuleType | None = None, # any callable, class, or module globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key type_params: Iterable[TypeVar | ParamSpec | TypeVarTuple] | None = None, format: Literal[Format.STRING], _recursive_guard: Container[str] = ..., ) -> str: ... @overload def evaluate_forward_ref( forward_ref: ForwardRef, *, owner: Callable[..., object] | type[object] | ModuleType | None = None, # any callable, class, or module globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key type_params: Iterable[TypeVar | ParamSpec | TypeVarTuple] | None = None, format: Literal[Format.FORWARDREF], _recursive_guard: Container[str] = ..., ) -> AnnotationForm | ForwardRef: ... @overload def evaluate_forward_ref( forward_ref: ForwardRef, *, owner: Callable[..., object] | type[object] | ModuleType | None = None, # any callable, class, or module globals: Mapping[str, Any] | None = None, # value types depend on the key locals: Mapping[str, Any] | None = None, # value types depend on the key type_params: Iterable[TypeVar | ParamSpec | TypeVarTuple] | None = None, format: Format | None = None, _recursive_guard: Container[str] = ..., ) -> AnnotationForm: ... def type_repr(value: object) -> str: ... # PEP 661 class Sentinel: def __init__(self, name: str, repr: str | None = None) -> None: ... if sys.version_info >= (3, 14): def __or__(self, other: Any) -> UnionType: ... # other can be any type form legal for unions def __ror__(self, other: Any) -> UnionType: ... # other can be any type form legal for unions elif sys.version_info >= (3, 10): def __or__(self, other: Any) -> _SpecialForm: ... # other can be any type form legal for unions def __ror__(self, other: Any) -> _SpecialForm: ... # other can be any type form legal for unions ================================================ FILE: examples/README.md ================================================ # Monty Examples Numerous examples of what monty can do, and how. ================================================ FILE: examples/expense_analysis/README.md ================================================ # Team Expense Analysis From [this](https://platform.claude.com/cookbook/tool-use-programmatic-tool-calling-ptc#understanding-the-third-party-api) Anthropic example. ================================================ FILE: examples/expense_analysis/data.py ================================================ from typing import Any team_members = [ {'id': 1, 'name': 'Alice Chen'}, {'id': 2, 'name': 'Bob Smith'}, {'id': 3, 'name': 'Carol Jones'}, {'id': 4, 'name': 'David Kim'}, {'id': 5, 'name': 'Eve Wilson'}, ] # Simulated expense data (multiple line items per person to bloat traditional context) expenses = { 1: [ # Alice - under budget {'date': '2024-07-15', 'amount': 450.00, 'description': 'Flight to NYC'}, {'date': '2024-07-16', 'amount': 200.00, 'description': 'Hotel NYC'}, {'date': '2024-07-17', 'amount': 85.00, 'description': 'Meals NYC'}, {'date': '2024-08-20', 'amount': 380.00, 'description': 'Flight to Chicago'}, {'date': '2024-08-21', 'amount': 175.00, 'description': 'Hotel Chicago'}, {'date': '2024-09-05', 'amount': 520.00, 'description': 'Flight to Seattle'}, {'date': '2024-09-06', 'amount': 225.00, 'description': 'Hotel Seattle'}, {'date': '2024-09-07', 'amount': 95.00, 'description': 'Meals Seattle'}, ], 2: [ # Bob - over standard budget but has custom budget {'date': '2024-07-01', 'amount': 850.00, 'description': 'Flight to London'}, {'date': '2024-07-02', 'amount': 450.00, 'description': 'Hotel London'}, {'date': '2024-07-03', 'amount': 125.00, 'description': 'Meals London'}, {'date': '2024-07-04', 'amount': 450.00, 'description': 'Hotel London'}, {'date': '2024-07-05', 'amount': 120.00, 'description': 'Meals London'}, {'date': '2024-08-10', 'amount': 780.00, 'description': 'Flight to Tokyo'}, {'date': '2024-08-11', 'amount': 380.00, 'description': 'Hotel Tokyo'}, {'date': '2024-08-12', 'amount': 380.00, 'description': 'Hotel Tokyo'}, {'date': '2024-08-13', 'amount': 150.00, 'description': 'Meals Tokyo'}, {'date': '2024-09-15', 'amount': 920.00, 'description': 'Flight to Singapore'}, {'date': '2024-09-16', 'amount': 320.00, 'description': 'Hotel Singapore'}, {'date': '2024-09-17', 'amount': 320.00, 'description': 'Hotel Singapore'}, {'date': '2024-09-18', 'amount': 180.00, 'description': 'Meals Singapore'}, ], 3: [ # Carol - way over budget (no custom budget) {'date': '2024-07-08', 'amount': 1200.00, 'description': 'Flight to Paris'}, {'date': '2024-07-09', 'amount': 550.00, 'description': 'Hotel Paris'}, {'date': '2024-07-10', 'amount': 550.00, 'description': 'Hotel Paris'}, {'date': '2024-07-11', 'amount': 550.00, 'description': 'Hotel Paris'}, {'date': '2024-07-12', 'amount': 200.00, 'description': 'Meals Paris'}, {'date': '2024-08-25', 'amount': 1100.00, 'description': 'Flight to Sydney'}, {'date': '2024-08-26', 'amount': 480.00, 'description': 'Hotel Sydney'}, {'date': '2024-08-27', 'amount': 480.00, 'description': 'Hotel Sydney'}, {'date': '2024-08-28', 'amount': 480.00, 'description': 'Hotel Sydney'}, {'date': '2024-08-29', 'amount': 220.00, 'description': 'Meals Sydney'}, {'date': '2024-09-20', 'amount': 650.00, 'description': 'Flight to Denver'}, {'date': '2024-09-21', 'amount': 280.00, 'description': 'Hotel Denver'}, ], 4: [ # David - slightly under budget {'date': '2024-07-22', 'amount': 420.00, 'description': 'Flight to Boston'}, {'date': '2024-07-23', 'amount': 190.00, 'description': 'Hotel Boston'}, {'date': '2024-07-24', 'amount': 75.00, 'description': 'Meals Boston'}, {'date': '2024-08-05', 'amount': 510.00, 'description': 'Flight to Austin'}, {'date': '2024-08-06', 'amount': 210.00, 'description': 'Hotel Austin'}, {'date': '2024-08-07', 'amount': 90.00, 'description': 'Meals Austin'}, {'date': '2024-09-12', 'amount': 480.00, 'description': 'Flight to Portland'}, {'date': '2024-09-13', 'amount': 195.00, 'description': 'Hotel Portland'}, {'date': '2024-09-14', 'amount': 85.00, 'description': 'Meals Portland'}, ], 5: [ # Eve - over standard budget (no custom budget) {'date': '2024-07-03', 'amount': 680.00, 'description': 'Flight to Miami'}, {'date': '2024-07-04', 'amount': 320.00, 'description': 'Hotel Miami'}, {'date': '2024-07-05', 'amount': 320.00, 'description': 'Hotel Miami'}, {'date': '2024-07-06', 'amount': 145.00, 'description': 'Meals Miami'}, {'date': '2024-08-18', 'amount': 750.00, 'description': 'Flight to San Diego'}, {'date': '2024-08-19', 'amount': 290.00, 'description': 'Hotel San Diego'}, {'date': '2024-08-20', 'amount': 290.00, 'description': 'Hotel San Diego'}, {'date': '2024-08-21', 'amount': 130.00, 'description': 'Meals San Diego'}, {'date': '2024-09-08', 'amount': 820.00, 'description': 'Flight to Las Vegas'}, {'date': '2024-09-09', 'amount': 380.00, 'description': 'Hotel Las Vegas'}, {'date': '2024-09-10', 'amount': 380.00, 'description': 'Hotel Las Vegas'}, {'date': '2024-09-11', 'amount': 175.00, 'description': 'Meals Las Vegas'}, ], } # Custom budgets (only Bob has one) custom_budgets = { 2: {'amount': 7000.00, 'reason': 'International travel required'}, } async def get_team_members(department: str) -> dict[str, Any]: """Get list of team members for a department. Args: department: The department name (e.g., "Engineering"). Returns: Dictionary with list of team members. """ return {'department': department, 'members': team_members} async def get_expenses(user_id: int, quarter: str, category: str) -> dict[str, Any]: """Get expense line items for a user. Args: user_id: The user's ID. quarter: The quarter (e.g., "Q3"). category: The expense category (e.g., "travel"). Returns: Dictionary with expense items. """ items = expenses.get(user_id, []) return {'user_id': user_id, 'quarter': quarter, 'category': category, 'expenses': items} async def get_custom_budget(user_id: int) -> dict[str, Any] | None: """Get custom budget for a user if they have one. Args: user_id: The user's ID. Returns: Custom budget info or None if no custom budget. """ budget_info = custom_budgets.get(user_id) if budget_info: return {'user_id': user_id, 'budget': budget_info['amount'], 'reason': budget_info['reason']} return None ================================================ FILE: examples/expense_analysis/main.py ================================================ import data import pydantic_monty type_definitions = ''' from typing import Any async def get_team_members(department: str) -> dict[str, Any]: """Get list of team members for a department. Args: department: The department name (e.g., "Engineering"). Returns: Dictionary with list of team members. """ ... async def get_expenses(user_id: int, quarter: str, category: str) -> dict[str, Any]: """Get expense line items for a user. Args: user_id: The user's ID. quarter: The quarter (e.g., "Q3"). category: The expense category (e.g., "travel"). Returns: Dictionary with expense items. """ ... async def get_custom_budget(user_id: int) -> dict[str, Any] | None: """Get custom budget for a user if they have one. Args: user_id: The user's ID. Returns: Custom budget info or None if no custom budget. """ ... ''' code = """ # Get Engineering team members team_data = await get_team_members(department="Engineering") team_members = team_data.get("members", []) # Standard budget STANDARD_BUDGET = 5000 # Process each team member total_members = len(team_members) over_budget_list = [] for member in team_members: user_id = member.get("id") name = member.get("name") # Get Q3 travel expenses for this user expenses_data = await get_expenses(user_id=user_id, quarter="Q3", category="travel") expense_items = expenses_data.get("expenses", []) # Sum up total expenses total_spent = sum(item.get("amount", 0) for item in expense_items) # Check if they exceeded standard budget if total_spent > STANDARD_BUDGET: # Check for custom budget custom_budget_data = await get_custom_budget(user_id=user_id) if custom_budget_data is not None: budget = custom_budget_data.get("budget", STANDARD_BUDGET) else: budget = STANDARD_BUDGET # Check if they exceeded their actual budget (standard or custom) if total_spent > budget: amount_over = total_spent - budget over_budget_list.append({ "name": name, "total_spent": total_spent, "budget": budget, "amount_over": amount_over }) # Return the analysis { "total_team_members_analyzed": total_members, "count_exceeded_budget": len(over_budget_list), "over_budget_details": over_budget_list } """ m = pydantic_monty.Monty( code, inputs=['prompt'], script_name='expense.py', type_check=True, type_check_stubs=type_definitions, ) async def main(): output = await pydantic_monty.run_monty_async( m, inputs={'prompt': 'testing'}, external_functions={ 'get_team_members': data.get_team_members, 'get_expenses': data.get_expenses, 'get_custom_budget': data.get_custom_budget, }, ) print(output) if __name__ == '__main__': import asyncio asyncio.run(main()) ================================================ FILE: examples/sql_playground/README.md ================================================ # SQL Playground: Customer Sentiment Analysis This example demonstrates using Monty for a task that **cannot be solved with a single SQL query**: analyzing customer purchase data (CSV) and correlating it with their social media sentiment (JSON tweets). Data is from . ## Why This Example is Interesting 1. **Cross-format data joining**: CSV customer data must join with JSON tweets via Twitter handle - requires programmatic data wrangling 2. **Loop-based external calls**: Sentiment analysis for each tweet happens in a loop - with JSON tool calling this would flood the context window with 50+ results 3. **In-sandbox computation**: Averages, correlation, and aggregation happen in Python - no need for the LLM to do mental math 4. **Variable iteration**: Different customers have different numbers of tweets - code handles this naturally 5. **File sandboxing**: Uses `OSAccess` to mount data files, demonstrating secure file access patterns 6. **Type checking**: Validates LLM-generated code against type stubs before execution ## To run ```bash uv run python examples/sql_playground/main.py ``` ================================================ FILE: examples/sql_playground/external_functions.py ================================================ from __future__ import annotations import json import tempfile from dataclasses import dataclass from pathlib import PurePosixPath from typing import Any from pydantic_monty import OSAccess try: import duckdb except ImportError as e: raise ImportError('duckdb is required for query_csv. Install with: pip install duckdb') from e @dataclass class ExternalFunctions: fs: OSAccess async def query_csv( self, filepath: PurePosixPath, sql: str, parameters: dict[str, Any] | None = None ) -> list[dict[str, Any]]: """Execute SQL query on a CSV file using DuckDB. Args: filepath: Path to the CSV file in the virtual filesystem. sql: SQL query to execute. The CSV data is available as a table named 'data'. parameters: Optional dictionary of parameters to bind to the SQL query. Returns: List of dictionaries, one per row, with column names as keys. """ # Read CSV content from virtual filesystem content = self.fs.path_read_bytes(filepath) # Write to a temporary file for DuckDB to read # (DuckDB's read_csv_auto works best with file paths) with tempfile.NamedTemporaryFile(mode='wb', suffix='.csv') as tmp: tmp.write(content) tmp.flush() conn = duckdb.connect(':memory:') # Create table from CSV # NOTE! duckdb (horribly) reads locals as tables, hence `data` here that isn't used data = conn.read_csv(tmp.name) # Execute the user's query result_rel = conn.execute(sql, parameters) del data # Get column names and rows, then convert to list of dicts columns = [desc[0] for desc in result_rel.description] rows = result_rel.fetchall() return [dict(zip(columns, row)) for row in rows] async def read_json(self, filepath: PurePosixPath) -> list[Any] | dict[str, Any]: """Read and parse a JSON file from the virtual filesystem. Args: filepath: Path to the JSON file in the virtual filesystem. Returns: Parsed JSON data (list or dict). """ content = self.fs.path_read_text(filepath) return json.loads(content) @staticmethod async def analyze_sentiment(text: str) -> float: """Analyze sentiment of text using simple keyword matching. This is a basic sentiment analyzer that scores text based on the presence of positive and negative keywords. For production use, you would want to use a proper NLP library or API. Args: text: The text to analyze. Returns: Sentiment score from -1.0 (very negative) to +1.0 (very positive). A score of 0.0 indicates neutral sentiment. Example: >>> await analyze_sentiment('This product is amazing!') 0.3 """ positive_words = [ 'amazing', 'great', 'love', 'thank', 'helpful', 'a+', 'good', 'best', 'excellent', 'awesome', 'fantastic', 'wonderful', 'glad', 'enjoy', 'better', ] negative_words = [ 'bad', 'angry', 'hate', 'terrible', 'worst', 'fraud', 'awful', 'horrible', 'disappointed', 'poor', 'useless', ] score = 0.0 text_lower = text.lower() for word in positive_words: if word in text_lower: score += 0.3 for word in negative_words: if word in text_lower: score -= 0.3 # Clamp score to [-1, 1] return max(-1.0, min(1.0, score)) ================================================ FILE: examples/sql_playground/main.py ================================================ """SQL Playground Example: Customer Sentiment Analysis with SQL and JSON.""" from __future__ import annotations import asyncio from pathlib import Path from external_functions import ExternalFunctions import pydantic_monty from pydantic_monty import MemoryFile, OSAccess # Path to the mafudge datasets repository (adjust if needed) THIS_DIR = Path(__file__).parent REPO_ROOT = THIS_DIR.parent.parent MAFUDGE_DATASETS = (REPO_ROOT / '..' / 'mafudge_datasets').resolve() assert MAFUDGE_DATASETS.is_dir(), f'mafudge_datasets directory not found at {MAFUDGE_DATASETS}. ' SANDBOX_CODE_PATH = THIS_DIR / 'sandbox_code.py' TYPE_STUBS = (THIS_DIR / 'type_stubs.pyi').read_text() SANDBOX_CODE = SANDBOX_CODE_PATH.read_text() # Read file contents customers_csv = (MAFUDGE_DATASETS / 'customers' / 'customers.csv').read_text() surveys_csv = (MAFUDGE_DATASETS / 'customers' / 'surveys.csv').read_text() tweets_json = (MAFUDGE_DATASETS / 'tweets' / 'tweets.json').read_text() # Create virtual filesystem with mounted files fs = OSAccess( [ MemoryFile('/data/customers/customers.csv', content=customers_csv), MemoryFile('/data/customers/surveys.csv', content=surveys_csv), MemoryFile('/data/tweets/tweets.json', content=tweets_json), ] ) async def main(): """Run the customer sentiment analysis in the Monty sandbox. Returns: List of analysis results for top customers with sentiment scores. """ # Set up the virtual filesystem with data files # Create external functions that can access the filesystem external_funcs = ExternalFunctions(fs) # Create the Monty runner with type checking enabled m = pydantic_monty.Monty( SANDBOX_CODE_PATH.read_text(), script_name='sql_playground.py', type_check=True, type_check_stubs=TYPE_STUBS, ) # Run the analysis with external functions and OS access results = await pydantic_monty.run_monty_async( m, external_functions={ 'query_csv': external_funcs.query_csv, 'read_json': external_funcs.read_json, 'analyze_sentiment': external_funcs.analyze_sentiment, }, os=fs, ) if not results: print('No results found. Check if customers have matching Twitter handles and tweets.') for r in results: sentiment_emoji = '😊' if r['avg_sentiment'] > 0 else '😐' if r['avg_sentiment'] == 0 else '😞' print(f' {r["name"]}') print(f' Purchases: ${r["total_purchases"]:,}') print(f' Twitter: @{r["twitter"]}') print(f' Tweets: {r["tweet_count"]}') print(f' Sentiment: {r["avg_sentiment"]:+.2f} {sentiment_emoji}') print() if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/sql_playground/sandbox_code.py ================================================ """Sandboxed analysis code that runs inside Monty. This code is executed in the Monty sandbox with access to external functions for SQL queries, JSON parsing, and sentiment analysis. """ from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: from type_stubs import analyze_sentiment, query_csv, read_json async def main(): # Step 1: Query top 10 customers by total purchases print('getting top customers...') top_customers = await query_csv( filepath=Path('/data/customers/customers.csv'), sql=""" SELECT "First", "Last", "Email", "Total Purchased" as TotalPurchased FROM data ORDER BY "Total Purchased" DESC LIMIT 10 """, ) # Step 2: Get their Twitter handles from the survey data emails: list[str] = [c['Email'] for c in top_customers] print('getting twitter handles...') twitter_handles = await query_csv( Path('/data/customers/surveys.csv'), f""" SELECT "Email", "Twitter Username" as Twitter FROM data WHERE "Email" IN $emails """, parameters={'emails': emails}, ) email_to_twitter = {row['Email']: row['Twitter'] for row in twitter_handles} # Step 3: Load all tweets tweets = await read_json(filepath=Path('/data/tweets/tweets.json')) assert isinstance(tweets, list) print(f'processing {len(top_customers)} customers...') # Step 4: For each customer, find their tweets and analyze sentiment results: list[dict[str, object]] = [] for customer in top_customers: twitter = email_to_twitter.get(customer['Email']) if not twitter: continue # Find tweets by this user user_tweets = [t for t in tweets if t['user'] == twitter] if not user_tweets: continue # Analyze sentiment of each tweet sentiments: list[float] = [] for tweet in user_tweets: score = await analyze_sentiment(text=tweet['text']) sentiments.append(score) # Calculate average sentiment avg_sentiment = sum(sentiments) / len(sentiments) print(f'{customer["First"]} {customer["Last"]} - {avg_sentiment=}') results.append( { 'name': f'{customer["First"]} {customer["Last"]}', 'total_purchases': customer['TotalPurchased'], 'twitter': twitter, 'tweet_count': len(user_tweets), 'avg_sentiment': round(avg_sentiment, 2), } ) return results # Return the analysis results await main() # pyright: ignore ================================================ FILE: examples/sql_playground/type_stubs.pyi ================================================ from pathlib import Path from typing import Any async def query_csv(filepath: Path, sql: str, parameters: dict[str, Any] | None = None) -> list[dict[str, Any]]: """Execute SQL query on a CSV file using DuckDB.""" ... async def read_json(filepath: Path) -> list[Any] | dict[str, Any]: """Read and parse a JSON file.""" ... async def analyze_sentiment(text: str) -> float: """Analyze sentiment of text. Returns score from -1.0 to +1.0.""" ... ================================================ FILE: examples/web_scraper/README.md ================================================ # Web Scraper Example This example uses Python dataclass APIs for playwright and beautifulsoup to allow the LLM to extract price data from the websites of model labs. We use Pydantic AI to generate code, but instead of using the `CodeExecutionToolset` type from Pydantic AI, we get the LLM to generate code directly allowing us to use new features of Monty not yet available in Pydantic AI. Look at `example_code.py` for an example of the kind of code sonnet 4.5 will generate in this case. Run the example with ```bash uv run python -m examples.web_scraper.main ``` ================================================ FILE: examples/web_scraper/browser.py ================================================ from __future__ import annotations from contextlib import asynccontextmanager from dataclasses import dataclass from typing import TYPE_CHECKING, AsyncIterator, Literal from playwright.async_api import Browser as PwBrowser, Page as PwPage, async_playwright if TYPE_CHECKING: from .external_functions import Page pw_pages: dict[int, PwPage] = {} @asynccontextmanager async def start_browser() -> AsyncIterator[Browser]: async with async_playwright() as p: b = await p.chromium.launch() yield Browser(b) pw_pages.clear() await b.close() @dataclass class Browser: _pw_browser: PwBrowser async def open_page( self, url: str, wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] = 'networkidle', ) -> Page: """Open a URL in a headless browser and return a `Page`. Use this to load a web page so you can inspect its HTML content. Args: url: The URL to navigate to. wait_until: When to consider navigation complete: `'commit'` — after the response is received, `'domcontentloaded'` — after the `DOMContentLoaded` event, `'load'` — after the `load` event, `'networkidle'` — after there are no network connections for 500ms. """ from .external_functions import Page page = await self._pw_browser.new_page() await page.goto(url, wait_until=wait_until) page_id = id(page) pw_pages[page_id] = page return Page( url=page.url, title=await page.title(), html=await page.content(), id=page_id, ) ================================================ FILE: examples/web_scraper/example_code.py ================================================ import asyncio # Open the pricing page page = await open_page(url) # Parse the HTML with BeautifulSoup soup = beautiful_soup(page.html) # Find the main content area that contains pricing information # Let's look for tables or structured pricing data pricing_tables = soup.find_all('table') # Initialize a list to store all model pricing data all_models = [] # Process each table found for table in pricing_tables: # Get all rows in the table rows = table.find_all('tr') if len(rows) < 2: # Skip tables without data rows continue # Get headers from the first row header_row = rows[0] headers = [th.get_text(strip=True) for th in header_row.find_all(['th', 'td'])] # Process data rows for row in rows[1:]: cells = row.find_all(['td', 'th']) if len(cells) < 2: continue # Extract cell values row_data = [cell.get_text(strip=True) for cell in cells] # Skip rows that might indicate deprecated models row_text = ' '.join(row_data).lower() if 'deprecated' in row_text or 'legacy' in row_text: continue # Create a dictionary for this model model_info = {} for i, value in enumerate(row_data): if i < len(headers): model_info[headers[i]] = value else: model_info[f'column_{i}'] = value if model_info: # Only add if we have data all_models.append(model_info) # Print the results print(f'Found {len(all_models)} models with pricing data') print('\nModel pricing information:') for i, model in enumerate(all_models, 1): print(f'\n{i}. {model}') all_models ================================================ FILE: examples/web_scraper/external_functions.py ================================================ import re from dataclasses import dataclass, field from typing import Any, Literal, cast from bs4 import BeautifulSoup, Tag as BsTag from .browser import PwPage, pw_pages async def open_page( url: str, wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] = 'networkidle', ) -> Page: """Open a URL in a headless browser and return a `Page`. Use this to load a web page so you can inspect its HTML content. Args: url: The URL to navigate to. wait_until: When to consider navigation complete: `'commit'` — after the response is received, `'domcontentloaded'` — after the `DOMContentLoaded` event, `'load'` — after the `load` event, `'networkidle'` — after there are no network connections for 500ms. """ raise NotImplementedError('this is here just to generate stubs, see _generate_stubs in main.py') @dataclass class Page: """A snapshot of a Playwright page.""" url: str title: str html: str id: int _pw_page: PwPage = field(init=False) def __post_init__(self): self._pw_page = pw_pages[self.id] async def go_to( self, url: str, wait_until: Literal['commit', 'domcontentloaded', 'load', 'networkidle'] = 'networkidle', ) -> None: """Navigate the page to a new URL. Args: url: The URL to navigate to. wait_until: When to consider navigation complete: `'commit'` — after the response is received, `'domcontentloaded'` — after the `DOMContentLoaded` event, `'load'` — after the `load` event, `'networkidle'` — after there are no network connections for 500ms. """ await self._pw_page.goto(url, wait_until=wait_until) async def click(self, selector: str, force: bool = False) -> None: """Click an element matching the CSS selector and return the updated page. Args: selector: A CSS selector, e.g. `'button.submit'`, `'a[href="/next"]'`. force: If `True`, bypass actionability checks (visibility, pointer-events interception). Useful when an overlay or sticky nav covers the target element. """ await self._pw_page.click(selector, force=force) await self._pw_page.wait_for_load_state('networkidle') async def fill(self, selector: str, value: str) -> None: """Fill a form field matching the CSS selector with the given value. Args: selector: A CSS selector for an input/textarea, e.g. `'input[name="email"]'`. value: The text to type into the field. """ await self._pw_page.fill(selector, value) await self._pw_page.wait_for_load_state('networkidle') async def select_option(self, selector: str, value: str) -> None: """Select an option in a `