Repository: TheAlgorithms/Python
Branch: master
Commit: 68473afc4b22
Files: 1489
Total size: 8.4 MB
Directory structure:
gitextract_lj21xc5n/
├── .devcontainer/
│ ├── Dockerfile
│ ├── README.md
│ ├── devcontainer.json
│ └── post_install
├── .gitattributes
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ ├── feature_request.yml
│ │ └── other.yml
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ ├── stale.yml
│ └── workflows/
│ ├── build.yml
│ ├── devcontainer_ci.yml
│ ├── directory_writer.yml
│ ├── project_euler.yml
│ ├── ruff.yml
│ └── sphinx.yml
├── .gitignore
├── .gitpod.yml
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── DIRECTORY.md
├── LICENSE.md
├── README.md
├── audio_filters/
│ ├── README.md
│ ├── __init__.py
│ ├── butterworth_filter.py
│ ├── equal_loudness_filter.py.broken.txt
│ ├── iir_filter.py
│ ├── loudness_curve.json
│ └── show_response.py
├── backtracking/
│ ├── README.md
│ ├── __init__.py
│ ├── all_combinations.py
│ ├── all_permutations.py
│ ├── all_subsequences.py
│ ├── coloring.py
│ ├── combination_sum.py
│ ├── crossword_puzzle_solver.py
│ ├── generate_parentheses.py
│ ├── generate_parentheses_iterative.py
│ ├── hamiltonian_cycle.py
│ ├── knight_tour.py
│ ├── match_word_pattern.py
│ ├── minimax.py
│ ├── n_queens.py
│ ├── n_queens_math.py
│ ├── power_sum.py
│ ├── rat_in_maze.py
│ ├── sudoku.py
│ ├── sum_of_subsets.py
│ ├── word_break.py
│ ├── word_ladder.py
│ └── word_search.py
├── bit_manipulation/
│ ├── README.md
│ ├── __init__.py
│ ├── binary_and_operator.py
│ ├── binary_coded_decimal.py
│ ├── binary_count_setbits.py
│ ├── binary_count_trailing_zeros.py
│ ├── binary_or_operator.py
│ ├── binary_shifts.py
│ ├── binary_twos_complement.py
│ ├── binary_xor_operator.py
│ ├── bitwise_addition_recursive.py
│ ├── count_1s_brian_kernighan_method.py
│ ├── count_number_of_one_bits.py
│ ├── excess_3_code.py
│ ├── find_previous_power_of_two.py
│ ├── find_unique_number.py
│ ├── gray_code_sequence.py
│ ├── highest_set_bit.py
│ ├── index_of_rightmost_set_bit.py
│ ├── is_even.py
│ ├── is_power_of_two.py
│ ├── largest_pow_of_two_le_num.py
│ ├── missing_number.py
│ ├── numbers_different_signs.py
│ ├── power_of_4.py
│ ├── reverse_bits.py
│ ├── single_bit_manipulation_operations.py
│ └── swap_all_odd_and_even_bits.py
├── blockchain/
│ ├── README.md
│ ├── __init__.py
│ └── diophantine_equation.py
├── boolean_algebra/
│ ├── README.md
│ ├── __init__.py
│ ├── and_gate.py
│ ├── imply_gate.py
│ ├── karnaugh_map_simplification.py
│ ├── multiplexer.py
│ ├── nand_gate.py
│ ├── nimply_gate.py
│ ├── nor_gate.py
│ ├── not_gate.py
│ ├── or_gate.py
│ ├── quine_mc_cluskey.py
│ ├── xnor_gate.py
│ └── xor_gate.py
├── cellular_automata/
│ ├── README.md
│ ├── __init__.py
│ ├── conways_game_of_life.py
│ ├── game_of_life.py
│ ├── langtons_ant.py
│ ├── nagel_schrekenberg.py
│ ├── one_dimensional.py
│ └── wa_tor.py
├── ciphers/
│ ├── README.md
│ ├── __init__.py
│ ├── a1z26.py
│ ├── affine_cipher.py
│ ├── atbash.py
│ ├── autokey.py
│ ├── baconian_cipher.py
│ ├── base16.py
│ ├── base32.py
│ ├── base64_cipher.py
│ ├── base85.py
│ ├── beaufort_cipher.py
│ ├── bifid.py
│ ├── brute_force_caesar_cipher.py
│ ├── caesar_cipher.py
│ ├── cryptomath_module.py
│ ├── decrypt_caesar_with_chi_squared.py
│ ├── deterministic_miller_rabin.py
│ ├── diffie.py
│ ├── diffie_hellman.py
│ ├── elgamal_key_generator.py
│ ├── enigma_machine2.py
│ ├── fractionated_morse_cipher.py
│ ├── gronsfeld_cipher.py
│ ├── hill_cipher.py
│ ├── mixed_keyword_cypher.py
│ ├── mono_alphabetic_ciphers.py
│ ├── morse_code.py
│ ├── onepad_cipher.py
│ ├── permutation_cipher.py
│ ├── playfair_cipher.py
│ ├── polybius.py
│ ├── porta_cipher.py
│ ├── prehistoric_men.txt
│ ├── rabin_miller.py
│ ├── rail_fence_cipher.py
│ ├── rot13.py
│ ├── rsa_cipher.py
│ ├── rsa_factorization.py
│ ├── rsa_key_generator.py
│ ├── running_key_cipher.py
│ ├── shuffled_shift_cipher.py
│ ├── simple_keyword_cypher.py
│ ├── simple_substitution_cipher.py
│ ├── transposition_cipher.py
│ ├── transposition_cipher_encrypt_decrypt_file.py
│ ├── trifid_cipher.py
│ ├── vernam_cipher.py
│ ├── vigenere_cipher.py
│ └── xor_cipher.py
├── computer_vision/
│ ├── README.md
│ ├── __init__.py
│ ├── cnn_classification.py
│ ├── flip_augmentation.py
│ ├── haralick_descriptors.py
│ ├── harris_corner.py
│ ├── horn_schunck.py
│ ├── intensity_based_segmentation.py
│ ├── mean_threshold.py
│ ├── mosaic_augmentation.py
│ └── pooling_functions.py
├── conversions/
│ ├── README.md
│ ├── __init__.py
│ ├── astronomical_length_scale_conversion.py
│ ├── binary_to_decimal.py
│ ├── binary_to_hexadecimal.py
│ ├── binary_to_octal.py
│ ├── convert_number_to_words.py
│ ├── decimal_to_any.py
│ ├── decimal_to_binary.py
│ ├── decimal_to_hexadecimal.py
│ ├── decimal_to_octal.py
│ ├── energy_conversions.py
│ ├── excel_title_to_column.py
│ ├── hex_to_bin.py
│ ├── hexadecimal_to_decimal.py
│ ├── ipv4_conversion.py
│ ├── length_conversion.py
│ ├── molecular_chemistry.py
│ ├── octal_to_binary.py
│ ├── octal_to_decimal.py
│ ├── octal_to_hexadecimal.py
│ ├── prefix_conversions.py
│ ├── prefix_conversions_string.py
│ ├── pressure_conversions.py
│ ├── rectangular_to_polar.py
│ ├── rgb_cmyk_conversion.py
│ ├── rgb_hsv_conversion.py
│ ├── roman_numerals.py
│ ├── speed_conversions.py
│ ├── temperature_conversions.py
│ ├── time_conversions.py
│ ├── volume_conversions.py
│ └── weight_conversion.py
├── data_compression/
│ ├── README.md
│ ├── __init__.py
│ ├── burrows_wheeler.py
│ ├── coordinate_compression.py
│ ├── huffman.py
│ ├── lempel_ziv.py
│ ├── lempel_ziv_decompress.py
│ ├── lz77.py
│ ├── peak_signal_to_noise_ratio.py
│ └── run_length_encoding.py
├── data_structures/
│ ├── __init__.py
│ ├── arrays/
│ │ ├── __init__.py
│ │ ├── equilibrium_index_in_array.py
│ │ ├── find_triplets_with_0_sum.py
│ │ ├── index_2d_array_in_1d.py
│ │ ├── kth_largest_element.py
│ │ ├── median_two_array.py
│ │ ├── monotonic_array.py
│ │ ├── pairs_with_given_sum.py
│ │ ├── permutations.py
│ │ ├── prefix_sum.py
│ │ ├── product_sum.py
│ │ ├── rotate_array.py
│ │ ├── sparse_table.py
│ │ └── sudoku_solver.py
│ ├── binary_tree/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── avl_tree.py
│ │ ├── basic_binary_tree.py
│ │ ├── binary_search_tree.py
│ │ ├── binary_search_tree_recursive.py
│ │ ├── binary_tree_mirror.py
│ │ ├── binary_tree_node_sum.py
│ │ ├── binary_tree_path_sum.py
│ │ ├── binary_tree_traversals.py
│ │ ├── diameter_of_binary_tree.py
│ │ ├── diff_views_of_binary_tree.py
│ │ ├── distribute_coins.py
│ │ ├── fenwick_tree.py
│ │ ├── flatten_binarytree_to_linkedlist.py
│ │ ├── floor_and_ceiling.py
│ │ ├── inorder_tree_traversal_2022.py
│ │ ├── is_sorted.py
│ │ ├── is_sum_tree.py
│ │ ├── lazy_segment_tree.py
│ │ ├── lowest_common_ancestor.py
│ │ ├── maximum_fenwick_tree.py
│ │ ├── maximum_sum_bst.py
│ │ ├── merge_two_binary_trees.py
│ │ ├── mirror_binary_tree.py
│ │ ├── non_recursive_segment_tree.py
│ │ ├── number_of_possible_binary_trees.py
│ │ ├── red_black_tree.py
│ │ ├── segment_tree.py
│ │ ├── segment_tree_other.py
│ │ ├── serialize_deserialize_binary_tree.py
│ │ ├── symmetric_tree.py
│ │ ├── treap.py
│ │ └── wavelet_tree.py
│ ├── disjoint_set/
│ │ ├── __init__.py
│ │ ├── alternate_disjoint_set.py
│ │ └── disjoint_set.py
│ ├── hashing/
│ │ ├── __init__.py
│ │ ├── bloom_filter.py
│ │ ├── double_hash.py
│ │ ├── hash_map.py
│ │ ├── hash_table.py
│ │ ├── hash_table_with_linked_list.py
│ │ ├── number_theory/
│ │ │ ├── __init__.py
│ │ │ └── prime_numbers.py
│ │ ├── quadratic_probing.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ └── test_hash_map.py
│ ├── heap/
│ │ ├── __init__.py
│ │ ├── binomial_heap.py
│ │ ├── heap.py
│ │ ├── heap_generic.py
│ │ ├── max_heap.py
│ │ ├── min_heap.py
│ │ ├── randomized_heap.py
│ │ └── skew_heap.py
│ ├── kd_tree/
│ │ ├── __init__.py
│ │ ├── build_kdtree.py
│ │ ├── example/
│ │ │ ├── __init__.py
│ │ │ ├── example_usage.py
│ │ │ └── hypercube_points.py
│ │ ├── kd_node.py
│ │ ├── nearest_neighbour_search.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ └── test_kdtree.py
│ ├── linked_list/
│ │ ├── __init__.py
│ │ ├── circular_linked_list.py
│ │ ├── deque_doubly.py
│ │ ├── doubly_linked_list.py
│ │ ├── doubly_linked_list_two.py
│ │ ├── floyds_cycle_detection.py
│ │ ├── from_sequence.py
│ │ ├── has_loop.py
│ │ ├── is_palindrome.py
│ │ ├── merge_two_lists.py
│ │ ├── middle_element_of_linked_list.py
│ │ ├── print_reverse.py
│ │ ├── reverse_k_group.py
│ │ ├── rotate_to_the_right.py
│ │ ├── singly_linked_list.py
│ │ ├── skip_list.py
│ │ └── swap_nodes.py
│ ├── queues/
│ │ ├── __init__.py
│ │ ├── circular_queue.py
│ │ ├── circular_queue_linked_list.py
│ │ ├── double_ended_queue.py
│ │ ├── linked_queue.py
│ │ ├── priority_queue_using_list.py
│ │ ├── queue_by_list.py
│ │ ├── queue_by_two_stacks.py
│ │ └── queue_on_pseudo_stack.py
│ ├── stacks/
│ │ ├── __init__.py
│ │ ├── balanced_parentheses.py
│ │ ├── dijkstras_two_stack_algorithm.py
│ │ ├── infix_to_postfix_conversion.py
│ │ ├── infix_to_prefix_conversion.py
│ │ ├── largest_rectangle_histogram.py
│ │ ├── lexicographical_numbers.py
│ │ ├── next_greater_element.py
│ │ ├── postfix_evaluation.py
│ │ ├── prefix_evaluation.py
│ │ ├── stack.py
│ │ ├── stack_using_two_queues.py
│ │ ├── stack_with_doubly_linked_list.py
│ │ ├── stack_with_singly_linked_list.py
│ │ └── stock_span_problem.py
│ ├── suffix_tree/
│ │ ├── __init__.py
│ │ ├── example/
│ │ │ ├── __init__.py
│ │ │ └── example_usage.py
│ │ ├── suffix_tree.py
│ │ ├── suffix_tree_node.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ └── test_suffix_tree.py
│ └── trie/
│ ├── __init__.py
│ ├── radix_tree.py
│ └── trie.py
├── digital_image_processing/
│ ├── __init__.py
│ ├── change_brightness.py
│ ├── change_contrast.py
│ ├── convert_to_negative.py
│ ├── dithering/
│ │ ├── __init__.py
│ │ └── burkes.py
│ ├── edge_detection/
│ │ ├── __init__.py
│ │ └── canny.py
│ ├── filters/
│ │ ├── __init__.py
│ │ ├── bilateral_filter.py
│ │ ├── convolve.py
│ │ ├── gabor_filter.py
│ │ ├── gaussian_filter.py
│ │ ├── laplacian_filter.py
│ │ ├── local_binary_pattern.py
│ │ ├── median_filter.py
│ │ └── sobel_filter.py
│ ├── histogram_equalization/
│ │ ├── __init__.py
│ │ ├── histogram_stretch.py
│ │ ├── image_data/
│ │ │ └── __init__.py
│ │ └── output_data/
│ │ └── __init__.py
│ ├── image_data/
│ │ └── __init__.py
│ ├── index_calculation.py
│ ├── morphological_operations/
│ │ ├── __init__.py
│ │ ├── dilation_operation.py
│ │ └── erosion_operation.py
│ ├── resize/
│ │ ├── __init__.py
│ │ └── resize.py
│ ├── rotation/
│ │ ├── __init__.py
│ │ └── rotation.py
│ ├── sepia.py
│ └── test_digital_image_processing.py
├── divide_and_conquer/
│ ├── __init__.py
│ ├── closest_pair_of_points.py
│ ├── convex_hull.py
│ ├── heaps_algorithm.py
│ ├── heaps_algorithm_iterative.py
│ ├── inversions.py
│ ├── kth_order_statistic.py
│ ├── max_difference_pair.py
│ ├── max_subarray.py
│ ├── mergesort.py
│ ├── peak.py
│ ├── power.py
│ └── strassen_matrix_multiplication.py
├── docs/
│ ├── __init__.py
│ ├── conf.py
│ └── source/
│ └── __init__.py
├── dynamic_programming/
│ ├── __init__.py
│ ├── abbreviation.py
│ ├── all_construct.py
│ ├── bitmask.py
│ ├── catalan_numbers.py
│ ├── climbing_stairs.py
│ ├── combination_sum_iv.py
│ ├── edit_distance.py
│ ├── factorial.py
│ ├── fast_fibonacci.py
│ ├── fibonacci.py
│ ├── fizz_buzz.py
│ ├── floyd_warshall.py
│ ├── integer_partition.py
│ ├── iterating_through_submasks.py
│ ├── k_means_clustering_tensorflow.py
│ ├── knapsack.py
│ ├── largest_divisible_subset.py
│ ├── longest_common_subsequence.py
│ ├── longest_common_substring.py
│ ├── longest_increasing_subsequence.py
│ ├── longest_increasing_subsequence_iterative.py
│ ├── longest_increasing_subsequence_o_nlogn.py
│ ├── longest_palindromic_subsequence.py
│ ├── matrix_chain_multiplication.py
│ ├── matrix_chain_order.py
│ ├── max_non_adjacent_sum.py
│ ├── max_product_subarray.py
│ ├── max_subarray_sum.py
│ ├── min_distance_up_bottom.py
│ ├── minimum_coin_change.py
│ ├── minimum_cost_path.py
│ ├── minimum_partition.py
│ ├── minimum_size_subarray_sum.py
│ ├── minimum_squares_to_represent_a_number.py
│ ├── minimum_steps_to_one.py
│ ├── minimum_tickets_cost.py
│ ├── narcissistic_number.py
│ ├── optimal_binary_search_tree.py
│ ├── palindrome_partitioning.py
│ ├── range_sum_query.py
│ ├── regex_match.py
│ ├── rod_cutting.py
│ ├── smith_waterman.py
│ ├── subset_generation.py
│ ├── sum_of_subset.py
│ ├── trapped_water.py
│ ├── tribonacci.py
│ ├── viterbi.py
│ ├── wildcard_matching.py
│ └── word_break.py
├── electronics/
│ ├── __init__.py
│ ├── apparent_power.py
│ ├── builtin_voltage.py
│ ├── capacitor_equivalence.py
│ ├── carrier_concentration.py
│ ├── charging_capacitor.py
│ ├── charging_inductor.py
│ ├── circular_convolution.py
│ ├── coulombs_law.py
│ ├── electric_conductivity.py
│ ├── electric_power.py
│ ├── electrical_impedance.py
│ ├── ic_555_timer.py
│ ├── ind_reactance.py
│ ├── ohms_law.py
│ ├── real_and_reactive_power.py
│ ├── resistor_color_code.py
│ ├── resistor_equivalence.py
│ ├── resonant_frequency.py
│ └── wheatstone_bridge.py
├── file_transfer/
│ ├── __init__.py
│ ├── mytext.txt
│ ├── receive_file.py
│ ├── send_file.py
│ └── tests/
│ ├── __init__.py
│ └── test_send_file.py
├── financial/
│ ├── README.md
│ ├── __init__.py
│ ├── equated_monthly_installments.py
│ ├── exponential_moving_average.py
│ ├── interest.py
│ ├── present_value.py
│ ├── price_plus_tax.py
│ ├── simple_moving_average.py
│ ├── straight_line_depreciation.py
│ └── time_and_half_pay.py
├── fractals/
│ ├── __init__.py
│ ├── julia_sets.py
│ ├── koch_snowflake.py
│ ├── mandelbrot.py
│ ├── sierpinski_triangle.py
│ └── vicsek.py
├── fuzzy_logic/
│ ├── __init__.py
│ ├── fuzzy_operations.py
│ └── fuzzy_operations.py.DISABLED.txt
├── genetic_algorithm/
│ ├── __init__.py
│ └── basic_string.py
├── geodesy/
│ ├── __init__.py
│ ├── haversine_distance.py
│ └── lamberts_ellipsoidal_distance.py
├── geometry/
│ ├── __init__.py
│ ├── geometry.py
│ ├── graham_scan.py
│ ├── jarvis_march.py
│ └── tests/
│ ├── __init__.py
│ ├── test_graham_scan.py
│ └── test_jarvis_march.py
├── graphics/
│ ├── __init__.py
│ ├── bezier_curve.py
│ ├── butterfly_pattern.py
│ ├── digital_differential_analyzer_line.py
│ └── vector3_for_2d_rendering.py
├── graphs/
│ ├── __init__.py
│ ├── a_star.py
│ ├── ant_colony_optimization_algorithms.py
│ ├── articulation_points.py
│ ├── basic_graphs.py
│ ├── bellman_ford.py
│ ├── bi_directional_dijkstra.py
│ ├── bidirectional_a_star.py
│ ├── bidirectional_breadth_first_search.py
│ ├── bidirectional_search.py
│ ├── boruvka.py
│ ├── breadth_first_search.py
│ ├── breadth_first_search_2.py
│ ├── breadth_first_search_shortest_path.py
│ ├── breadth_first_search_shortest_path_2.py
│ ├── breadth_first_search_zero_one_shortest_path.py
│ ├── check_bipatrite.py
│ ├── check_cycle.py
│ ├── connected_components.py
│ ├── deep_clone_graph.py
│ ├── depth_first_search.py
│ ├── depth_first_search_2.py
│ ├── dijkstra.py
│ ├── dijkstra_2.py
│ ├── dijkstra_algorithm.py
│ ├── dijkstra_alternate.py
│ ├── dijkstra_binary_grid.py
│ ├── dinic.py
│ ├── directed_and_undirected_weighted_graph.py
│ ├── edmonds_karp_multiple_source_and_sink.py
│ ├── eulerian_path_and_circuit_for_undirected_graph.py
│ ├── even_tree.py
│ ├── finding_bridges.py
│ ├── frequent_pattern_graph_miner.py
│ ├── g_topological_sort.py
│ ├── gale_shapley_bigraph.py
│ ├── graph_adjacency_list.py
│ ├── graph_adjacency_matrix.py
│ ├── graph_list.py
│ ├── graphs_floyd_warshall.py
│ ├── greedy_best_first.py
│ ├── greedy_min_vertex_cover.py
│ ├── kahns_algorithm_long.py
│ ├── kahns_algorithm_topo.py
│ ├── karger.py
│ ├── lanczos_eigenvectors.py
│ ├── markov_chain.py
│ ├── matching_min_vertex_cover.py
│ ├── minimum_path_sum.py
│ ├── minimum_spanning_tree_boruvka.py
│ ├── minimum_spanning_tree_kruskal.py
│ ├── minimum_spanning_tree_kruskal2.py
│ ├── minimum_spanning_tree_prims.py
│ ├── minimum_spanning_tree_prims2.py
│ ├── multi_heuristic_astar.py
│ ├── page_rank.py
│ ├── prim.py
│ ├── random_graph_generator.py
│ ├── scc_kosaraju.py
│ ├── strongly_connected_components.py
│ ├── tarjans_scc.py
│ └── tests/
│ ├── __init__.py
│ ├── test_min_spanning_tree_kruskal.py
│ └── test_min_spanning_tree_prim.py
├── greedy_methods/
│ ├── __init__.py
│ ├── best_time_to_buy_and_sell_stock.py
│ ├── fractional_cover_problem.py
│ ├── fractional_knapsack.py
│ ├── fractional_knapsack_2.py
│ ├── gas_station.py
│ ├── minimum_coin_change.py
│ ├── minimum_waiting_time.py
│ ├── optimal_merge_pattern.py
│ └── smallest_range.py
├── hashes/
│ ├── README.md
│ ├── __init__.py
│ ├── adler32.py
│ ├── chaos_machine.py
│ ├── djb2.py
│ ├── elf.py
│ ├── enigma_machine.py
│ ├── fletcher16.py
│ ├── hamming_code.py
│ ├── luhn.py
│ ├── md5.py
│ ├── sdbm.py
│ ├── sha1.py
│ └── sha256.py
├── index.md
├── knapsack/
│ ├── README.md
│ ├── __init__.py
│ ├── greedy_knapsack.py
│ ├── knapsack.py
│ ├── recursive_approach_knapsack.py
│ └── tests/
│ ├── __init__.py
│ ├── test_greedy_knapsack.py
│ └── test_knapsack.py
├── linear_algebra/
│ ├── README.md
│ ├── __init__.py
│ ├── gaussian_elimination.py
│ ├── jacobi_iteration_method.py
│ ├── lu_decomposition.py
│ ├── matrix_inversion.py
│ └── src/
│ ├── __init__.py
│ ├── conjugate_gradient.py
│ ├── gaussian_elimination_pivoting.py
│ ├── lib.py
│ ├── polynom_for_points.py
│ ├── power_iteration.py
│ ├── rank_of_matrix.py
│ ├── rayleigh_quotient.py
│ ├── schur_complement.py
│ ├── test_linear_algebra.py
│ └── transformations_2d.py
├── linear_programming/
│ ├── __init__.py
│ └── simplex.py
├── machine_learning/
│ ├── __init__.py
│ ├── apriori_algorithm.py
│ ├── astar.py
│ ├── automatic_differentiation.py
│ ├── data_transformations.py
│ ├── decision_tree.py
│ ├── dimensionality_reduction.py
│ ├── forecasting/
│ │ ├── __init__.py
│ │ ├── ex_data.csv
│ │ └── run.py
│ ├── frequent_pattern_growth.py
│ ├── gaussian_naive_bayes.py.broken.txt
│ ├── gradient_boosting_classifier.py
│ ├── gradient_boosting_regressor.py.broken.txt
│ ├── gradient_descent.py
│ ├── k_means_clust.py
│ ├── k_nearest_neighbours.py
│ ├── linear_discriminant_analysis.py
│ ├── linear_regression.py
│ ├── local_weighted_learning/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── local_weighted_learning.py
│ ├── logistic_regression.py
│ ├── loss_functions.py
│ ├── lstm/
│ │ ├── __init__.py
│ │ ├── lstm_prediction.py
│ │ └── sample_data.csv
│ ├── mfcc.py
│ ├── multilayer_perceptron_classifier.py
│ ├── polynomial_regression.py
│ ├── principle_component_analysis.py
│ ├── random_forest_classifier.py.broken.txt
│ ├── random_forest_regressor.py.broken.txt
│ ├── scoring_functions.py
│ ├── self_organizing_map.py
│ ├── sequential_minimum_optimization.py
│ ├── similarity_search.py
│ ├── support_vector_machines.py
│ ├── t_stochastic_neighbour_embedding.py
│ ├── word_frequency_functions.py
│ ├── xgboost_classifier.py
│ └── xgboost_regressor.py
├── maths/
│ ├── __init__.py
│ ├── abs.py
│ ├── addition_without_arithmetic.py
│ ├── aliquot_sum.py
│ ├── allocation_number.py
│ ├── arc_length.py
│ ├── area.py
│ ├── area_under_curve.py
│ ├── average_absolute_deviation.py
│ ├── average_mean.py
│ ├── average_median.py
│ ├── average_mode.py
│ ├── bailey_borwein_plouffe.py
│ ├── base_neg2_conversion.py
│ ├── basic_maths.py
│ ├── binary_exponentiation.py
│ ├── binary_multiplication.py
│ ├── binomial_coefficient.py
│ ├── binomial_distribution.py
│ ├── ceil.py
│ ├── chebyshev_distance.py
│ ├── check_polygon.py
│ ├── chinese_remainder_theorem.py
│ ├── chudnovsky_algorithm.py
│ ├── collatz_sequence.py
│ ├── combinations.py
│ ├── continued_fraction.py
│ ├── decimal_isolate.py
│ ├── decimal_to_fraction.py
│ ├── dodecahedron.py
│ ├── double_factorial.py
│ ├── dual_number_automatic_differentiation.py
│ ├── entropy.py
│ ├── euclidean_distance.py
│ ├── euler_method.py
│ ├── euler_modified.py
│ ├── eulers_totient.py
│ ├── extended_euclidean_algorithm.py
│ ├── factorial.py
│ ├── factors.py
│ ├── fast_inverse_sqrt.py
│ ├── fermat_little_theorem.py
│ ├── fibonacci.py
│ ├── find_max.py
│ ├── find_min.py
│ ├── floor.py
│ ├── gamma.py
│ ├── gaussian.py
│ ├── gcd_of_n_numbers.py
│ ├── geometric_mean.py
│ ├── germain_primes.py
│ ├── greatest_common_divisor.py
│ ├── hardy_ramanujanalgo.py
│ ├── images/
│ │ └── __init__.py
│ ├── integer_square_root.py
│ ├── interquartile_range.py
│ ├── is_int_palindrome.py
│ ├── is_ip_v4_address_valid.py
│ ├── is_square_free.py
│ ├── jaccard_similarity.py
│ ├── joint_probability_distribution.py
│ ├── josephus_problem.py
│ ├── juggler_sequence.py
│ ├── karatsuba.py
│ ├── kth_lexicographic_permutation.py
│ ├── largest_of_very_large_numbers.py
│ ├── least_common_multiple.py
│ ├── line_length.py
│ ├── liouville_lambda.py
│ ├── lucas_lehmer_primality_test.py
│ ├── lucas_series.py
│ ├── maclaurin_series.py
│ ├── manhattan_distance.py
│ ├── matrix_exponentiation.py
│ ├── max_sum_sliding_window.py
│ ├── minkowski_distance.py
│ ├── mobius_function.py
│ ├── modular_division.py
│ ├── modular_exponential.py
│ ├── monte_carlo.py
│ ├── monte_carlo_dice.py
│ ├── number_of_digits.py
│ ├── numerical_analysis/
│ │ ├── __init__.py
│ │ ├── adams_bashforth.py
│ │ ├── bisection.py
│ │ ├── bisection_2.py
│ │ ├── integration_by_simpson_approx.py
│ │ ├── intersection.py
│ │ ├── nevilles_method.py
│ │ ├── newton_forward_interpolation.py
│ │ ├── newton_raphson.py
│ │ ├── numerical_integration.py
│ │ ├── proper_fractions.py
│ │ ├── runge_kutta.py
│ │ ├── runge_kutta_fehlberg_45.py
│ │ ├── runge_kutta_gills.py
│ │ ├── secant_method.py
│ │ ├── simpson_rule.py
│ │ ├── square_root.py
│ │ └── weierstrass_method.py
│ ├── odd_sieve.py
│ ├── perfect_cube.py
│ ├── perfect_number.py
│ ├── perfect_square.py
│ ├── persistence.py
│ ├── pi_generator.py
│ ├── pi_monte_carlo_estimation.py
│ ├── points_are_collinear_3d.py
│ ├── pollard_rho.py
│ ├── polynomial_evaluation.py
│ ├── polynomials/
│ │ ├── __init__.py
│ │ └── single_indeterminate_operations.py
│ ├── power_using_recursion.py
│ ├── prime_check.py
│ ├── prime_factors.py
│ ├── prime_numbers.py
│ ├── prime_sieve_eratosthenes.py
│ ├── primelib.py
│ ├── print_multiplication_table.py
│ ├── pythagoras.py
│ ├── qr_decomposition.py
│ ├── quadratic_equations_complex_numbers.py
│ ├── radians.py
│ ├── radix2_fft.py
│ ├── remove_digit.py
│ ├── segmented_sieve.py
│ ├── series/
│ │ ├── __init__.py
│ │ ├── arithmetic.py
│ │ ├── geometric.py
│ │ ├── geometric_series.py
│ │ ├── harmonic.py
│ │ ├── harmonic_series.py
│ │ ├── hexagonal_numbers.py
│ │ └── p_series.py
│ ├── sieve_of_eratosthenes.py
│ ├── sigmoid.py
│ ├── signum.py
│ ├── simultaneous_linear_equation_solver.py
│ ├── sin.py
│ ├── sock_merchant.py
│ ├── softmax.py
│ ├── solovay_strassen_primality_test.py
│ ├── spearman_rank_correlation_coefficient.py
│ ├── special_numbers/
│ │ ├── __init__.py
│ │ ├── armstrong_numbers.py
│ │ ├── automorphic_number.py
│ │ ├── bell_numbers.py
│ │ ├── carmichael_number.py
│ │ ├── catalan_number.py
│ │ ├── hamming_numbers.py
│ │ ├── happy_number.py
│ │ ├── harshad_numbers.py
│ │ ├── hexagonal_number.py
│ │ ├── krishnamurthy_number.py
│ │ ├── perfect_number.py
│ │ ├── polygonal_numbers.py
│ │ ├── pronic_number.py
│ │ ├── proth_number.py
│ │ ├── triangular_numbers.py
│ │ ├── ugly_numbers.py
│ │ └── weird_number.py
│ ├── sum_of_arithmetic_series.py
│ ├── sum_of_digits.py
│ ├── sum_of_geometric_progression.py
│ ├── sum_of_harmonic_series.py
│ ├── sumset.py
│ ├── sylvester_sequence.py
│ ├── tanh.py
│ ├── test_factorial.py
│ ├── test_prime_check.py
│ ├── three_sum.py
│ ├── trapezoidal_rule.py
│ ├── triplet_sum.py
│ ├── twin_prime.py
│ ├── two_pointer.py
│ ├── two_sum.py
│ ├── volume.py
│ └── zellers_congruence.py
├── matrix/
│ ├── __init__.py
│ ├── binary_search_matrix.py
│ ├── count_islands_in_matrix.py
│ ├── count_negative_numbers_in_sorted_matrix.py
│ ├── count_paths.py
│ ├── cramers_rule_2x2.py
│ ├── inverse_of_matrix.py
│ ├── largest_square_area_in_matrix.py
│ ├── matrix_based_game.py
│ ├── matrix_class.py
│ ├── matrix_equalization.py
│ ├── matrix_multiplication_recursion.py
│ ├── matrix_operation.py
│ ├── max_area_of_island.py
│ ├── median_matrix.py
│ ├── nth_fibonacci_using_matrix_exponentiation.py
│ ├── pascal_triangle.py
│ ├── rotate_matrix.py
│ ├── searching_in_sorted_matrix.py
│ ├── sherman_morrison.py
│ ├── spiral_print.py
│ ├── tests/
│ │ ├── __init__.py
│ │ ├── pytest.ini
│ │ └── test_matrix_operation.py
│ └── validate_sudoku_board.py
├── networking_flow/
│ ├── __init__.py
│ ├── ford_fulkerson.py
│ └── minimum_cut.py
├── neural_network/
│ ├── __init__.py
│ ├── activation_functions/
│ │ ├── __init__.py
│ │ ├── binary_step.py
│ │ ├── exponential_linear_unit.py
│ │ ├── gaussian_error_linear_unit.py
│ │ ├── leaky_rectified_linear_unit.py
│ │ ├── mish.py
│ │ ├── rectified_linear_unit.py
│ │ ├── scaled_exponential_linear_unit.py
│ │ ├── soboleva_modified_hyperbolic_tangent.py
│ │ ├── softplus.py
│ │ ├── squareplus.py
│ │ └── swish.py
│ ├── back_propagation_neural_network.py
│ ├── convolution_neural_network.py
│ ├── gan.py_tf
│ ├── input_data.py
│ ├── perceptron.py.DISABLED
│ ├── simple_neural_network.py
│ └── two_hidden_layers_neural_network.py
├── other/
│ ├── __init__.py
│ ├── activity_selection.py
│ ├── alternative_list_arrange.py
│ ├── bankers_algorithm.py
│ ├── davis_putnam_logemann_loveland.py
│ ├── doomsday.py
│ ├── fischer_yates_shuffle.py
│ ├── gauss_easter.py
│ ├── graham_scan.py
│ ├── greedy.py
│ ├── guess_the_number_search.py
│ ├── h_index.py
│ ├── least_recently_used.py
│ ├── lfu_cache.py
│ ├── linear_congruential_generator.py
│ ├── lru_cache.py
│ ├── magicdiamondpattern.py
│ ├── majority_vote_algorithm.py
│ ├── maximum_subsequence.py
│ ├── nested_brackets.py
│ ├── number_container_system.py
│ ├── password.py
│ ├── quine.py
│ ├── scoring_algorithm.py
│ ├── sdes.py
│ ├── sliding_window_maximum.py
│ ├── tower_of_hanoi.py
│ └── word_search.py
├── physics/
│ ├── __init__.py
│ ├── altitude_pressure.py
│ ├── archimedes_principle_of_buoyant_force.py
│ ├── basic_orbital_capture.py
│ ├── casimir_effect.py
│ ├── center_of_mass.py
│ ├── centripetal_force.py
│ ├── coulombs_law.py
│ ├── doppler_frequency.py
│ ├── escape_velocity.py
│ ├── grahams_law.py
│ ├── horizontal_projectile_motion.py
│ ├── hubble_parameter.py
│ ├── ideal_gas_law.py
│ ├── image_data/
│ │ └── __init__.py
│ ├── in_static_equilibrium.py
│ ├── kinetic_energy.py
│ ├── lens_formulae.py
│ ├── lorentz_transformation_four_vector.py
│ ├── malus_law.py
│ ├── mass_energy_equivalence.py
│ ├── mirror_formulae.py
│ ├── n_body_simulation.py
│ ├── newtons_law_of_gravitation.py
│ ├── newtons_second_law_of_motion.py
│ ├── orbital_transfer_work.py
│ ├── period_of_pendulum.py
│ ├── photoelectric_effect.py
│ ├── potential_energy.py
│ ├── rainfall_intensity.py
│ ├── reynolds_number.py
│ ├── rms_speed_of_molecule.py
│ ├── shear_stress.py
│ ├── speed_of_sound.py
│ ├── speeds_of_gas_molecules.py
│ └── terminal_velocity.py
├── project_euler/
│ ├── README.md
│ ├── __init__.py
│ ├── problem_001/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ ├── sol3.py
│ │ ├── sol4.py
│ │ ├── sol5.py
│ │ ├── sol6.py
│ │ └── sol7.py
│ ├── problem_002/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ ├── sol3.py
│ │ ├── sol4.py
│ │ └── sol5.py
│ ├── problem_003/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── sol3.py
│ ├── problem_004/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_005/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_006/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ ├── sol3.py
│ │ └── sol4.py
│ ├── problem_007/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── sol3.py
│ ├── problem_008/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── sol3.py
│ ├── problem_009/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ ├── sol3.py
│ │ └── sol4.py
│ ├── problem_010/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── sol3.py
│ ├── problem_011/
│ │ ├── __init__.py
│ │ ├── grid.txt
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_012/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_013/
│ │ ├── __init__.py
│ │ ├── num.txt
│ │ └── sol1.py
│ ├── problem_014/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_015/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_016/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_017/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_018/
│ │ ├── __init__.py
│ │ ├── solution.py
│ │ └── triangle.txt
│ ├── problem_019/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_020/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ ├── sol3.py
│ │ └── sol4.py
│ ├── problem_021/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_022/
│ │ ├── __init__.py
│ │ ├── p022_names.txt
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_023/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_024/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_025/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── sol3.py
│ ├── problem_026/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_027/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_028/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_029/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_030/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_031/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_032/
│ │ ├── __init__.py
│ │ └── sol32.py
│ ├── problem_033/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_034/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_035/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_036/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_037/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_038/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_039/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_040/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_041/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_042/
│ │ ├── __init__.py
│ │ ├── solution42.py
│ │ └── words.txt
│ ├── problem_043/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_044/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_045/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_046/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_047/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_048/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_049/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_050/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_051/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_052/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_053/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_054/
│ │ ├── __init__.py
│ │ ├── poker_hands.txt
│ │ ├── sol1.py
│ │ └── test_poker_hand.py
│ ├── problem_055/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_056/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_057/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_058/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_059/
│ │ ├── __init__.py
│ │ ├── p059_cipher.txt
│ │ ├── sol1.py
│ │ └── test_cipher.txt
│ ├── problem_062/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_063/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_064/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_065/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_067/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ ├── sol2.py
│ │ └── triangle.txt
│ ├── problem_068/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_069/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_070/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_071/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_072/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_073/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_074/
│ │ ├── __init__.py
│ │ ├── sol1.py
│ │ └── sol2.py
│ ├── problem_075/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_076/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_077/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_078/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_079/
│ │ ├── __init__.py
│ │ ├── keylog.txt
│ │ ├── keylog_test.txt
│ │ └── sol1.py
│ ├── problem_080/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_081/
│ │ ├── __init__.py
│ │ ├── matrix.txt
│ │ └── sol1.py
│ ├── problem_082/
│ │ ├── __init__.py
│ │ ├── input.txt
│ │ ├── sol1.py
│ │ └── test_matrix.txt
│ ├── problem_085/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_086/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_087/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_089/
│ │ ├── __init__.py
│ │ ├── numeralcleanup_test.txt
│ │ ├── p089_roman.txt
│ │ └── sol1.py
│ ├── problem_091/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_092/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_094/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_095/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_097/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_099/
│ │ ├── __init__.py
│ │ ├── base_exp.txt
│ │ └── sol1.py
│ ├── problem_100/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_101/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_102/
│ │ ├── __init__.py
│ │ ├── p102_triangles.txt
│ │ ├── sol1.py
│ │ └── test_triangles.txt
│ ├── problem_104/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_107/
│ │ ├── __init__.py
│ │ ├── p107_network.txt
│ │ ├── sol1.py
│ │ └── test_network.txt
│ ├── problem_109/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_112/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_113/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_114/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_115/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_116/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_117/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_119/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_120/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_121/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_122/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_123/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_125/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_129/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_131/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_135/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_136/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_144/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_145/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_164/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_173/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_174/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_180/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_187/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_188/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_190/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_191/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_203/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_205/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_206/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_207/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_234/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_301/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_345/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_493/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_551/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_587/
│ │ ├── __init__.py
│ │ └── sol1.py
│ ├── problem_686/
│ │ ├── __init__.py
│ │ └── sol1.py
│ └── problem_800/
│ ├── __init__.py
│ └── sol1.py
├── pyproject.toml
├── quantum/
│ ├── README.md
│ ├── __init__.py
│ ├── bb84.py.DISABLED.txt
│ ├── deutsch_jozsa.py.DISABLED.txt
│ ├── half_adder.py.DISABLED.txt
│ ├── not_gate.py.DISABLED.txt
│ ├── q_fourier_transform.py
│ ├── q_full_adder.py.DISABLED.txt
│ ├── quantum_entanglement.py.DISABLED.txt
│ ├── quantum_random.py.DISABLED.txt
│ ├── quantum_teleportation.py.DISABLED.txt
│ ├── ripple_adder_classic.py.DISABLED.txt
│ ├── single_qubit_measure.py.DISABLED.txt
│ └── superdense_coding.py.DISABLED.txt
├── scheduling/
│ ├── __init__.py
│ ├── first_come_first_served.py
│ ├── highest_response_ratio_next.py
│ ├── job_sequence_with_deadline.py
│ ├── job_sequencing_with_deadline.py
│ ├── multi_level_feedback_queue.py
│ ├── non_preemptive_shortest_job_first.py
│ ├── round_robin.py
│ └── shortest_job_first.py
├── scripts/
│ ├── README.md
│ ├── __init__.py
│ ├── build_directory_md.py
│ ├── close_pull_requests_with_awaiting_changes.sh
│ ├── close_pull_requests_with_failing_tests.sh
│ ├── close_pull_requests_with_require_descriptive_names.sh
│ ├── close_pull_requests_with_require_tests.sh
│ ├── close_pull_requests_with_require_type_hints.sh
│ ├── find_git_conflicts.sh
│ ├── project_euler_answers.json
│ ├── validate_filenames.py
│ └── validate_solutions.py
├── searches/
│ ├── __init__.py
│ ├── binary_search.py
│ ├── binary_tree_traversal.py
│ ├── double_linear_search.py
│ ├── double_linear_search_recursion.py
│ ├── exponential_search.py
│ ├── fibonacci_search.py
│ ├── hill_climbing.py
│ ├── interpolation_search.py
│ ├── jump_search.py
│ ├── linear_search.py
│ ├── median_of_medians.py
│ ├── quick_select.py
│ ├── sentinel_linear_search.py
│ ├── simple_binary_search.py
│ ├── simulated_annealing.py
│ ├── tabu_search.py
│ ├── tabu_test_data.txt
│ └── ternary_search.py
├── sorts/
│ ├── README.md
│ ├── __init__.py
│ ├── bead_sort.py
│ ├── binary_insertion_sort.py
│ ├── bitonic_sort.py
│ ├── bogo_sort.py
│ ├── bubble_sort.py
│ ├── bucket_sort.py
│ ├── circle_sort.py
│ ├── cocktail_shaker_sort.py
│ ├── comb_sort.py
│ ├── counting_sort.py
│ ├── cycle_sort.py
│ ├── cyclic_sort.py
│ ├── double_sort.py
│ ├── dutch_national_flag_sort.py
│ ├── exchange_sort.py
│ ├── external_sort.py
│ ├── gnome_sort.py
│ ├── heap_sort.py
│ ├── insertion_sort.py
│ ├── intro_sort.py
│ ├── iterative_merge_sort.py
│ ├── merge_insertion_sort.py
│ ├── merge_sort.py
│ ├── msd_radix_sort.py
│ ├── natural_sort.py
│ ├── normal_distribution_quick_sort.md
│ ├── odd_even_sort.py
│ ├── odd_even_transposition_parallel.py
│ ├── odd_even_transposition_single_threaded.py
│ ├── pancake_sort.py
│ ├── patience_sort.py
│ ├── pigeon_sort.py
│ ├── pigeonhole_sort.py
│ ├── quick_sort.py
│ ├── quick_sort_3_partition.py
│ ├── radix_sort.py
│ ├── recursive_insertion_sort.py
│ ├── recursive_mergesort_array.py
│ ├── recursive_quick_sort.py
│ ├── selection_sort.py
│ ├── shell_sort.py
│ ├── shrink_shell_sort.py
│ ├── slowsort.py
│ ├── stalin_sort.py
│ ├── stooge_sort.py
│ ├── strand_sort.py
│ ├── tim_sort.py
│ ├── topological_sort.py
│ ├── tree_sort.py
│ ├── unknown_sort.py
│ └── wiggle_sort.py
├── strings/
│ ├── __init__.py
│ ├── aho_corasick.py
│ ├── alternative_string_arrange.py
│ ├── anagrams.py
│ ├── anagrams.txt
│ ├── autocomplete_using_trie.py
│ ├── barcode_validator.py
│ ├── bitap_string_match.py
│ ├── boyer_moore_search.py
│ ├── camel_case_to_snake_case.py
│ ├── can_string_be_rearranged_as_palindrome.py
│ ├── capitalize.py
│ ├── check_anagrams.py
│ ├── count_vowels.py
│ ├── credit_card_validator.py
│ ├── damerau_levenshtein_distance.py
│ ├── detecting_english_programmatically.py
│ ├── dictionary.txt
│ ├── dna.py
│ ├── edit_distance.py
│ ├── frequency_finder.py
│ ├── hamming_distance.py
│ ├── indian_phone_validator.py
│ ├── is_contains_unique_chars.py
│ ├── is_isogram.py
│ ├── is_pangram.py
│ ├── is_polish_national_id.py
│ ├── is_spain_national_id.py
│ ├── is_srilankan_phone_number.py
│ ├── is_valid_email_address.py
│ ├── jaro_winkler.py
│ ├── join.py
│ ├── knuth_morris_pratt.py
│ ├── levenshtein_distance.py
│ ├── lower.py
│ ├── manacher.py
│ ├── min_cost_string_conversion.py
│ ├── naive_string_search.py
│ ├── ngram.py
│ ├── palindrome.py
│ ├── pig_latin.py
│ ├── prefix_function.py
│ ├── rabin_karp.py
│ ├── remove_duplicate.py
│ ├── reverse_letters.py
│ ├── reverse_words.py
│ ├── snake_case_to_camel_pascal_case.py
│ ├── split.py
│ ├── string_switch_case.py
│ ├── strip.py
│ ├── text_justification.py
│ ├── title.py
│ ├── top_k_frequent_words.py
│ ├── upper.py
│ ├── wave_string.py
│ ├── wildcard_pattern_matching.py
│ ├── word_occurrence.py
│ ├── word_patterns.py
│ ├── words.txt
│ └── z_function.py
└── web_programming/
├── __init__.py
├── co2_emission.py
├── covid_stats_via_xpath.py
├── crawl_google_results.py
├── crawl_google_scholar_citation.py
├── currency_converter.py
├── current_stock_price.py
├── current_weather.py
├── daily_horoscope.py
├── download_images_from_google_query.py
├── emails_from_url.py
├── fetch_anime_and_play.py
├── fetch_bbc_news.py
├── fetch_github_info.py
├── fetch_jobs.py
├── fetch_quotes.py
├── fetch_well_rx_price.py
├── get_amazon_product_data.py
├── get_imdb_top_250_movies_csv.py
├── get_imdbtop.py.DISABLED
├── get_ip_geolocation.py
├── get_top_billionaires.py
├── get_top_hn_posts.py
├── get_user_tweets.py.DISABLED
├── giphy.py
├── instagram_crawler.py
├── instagram_pic.py
├── instagram_video.py
├── nasa_data.py
├── open_google_results.py
├── random_anime_character.py
├── recaptcha_verification.py
├── reddit.py
├── search_books_by_isbn.py
├── slack_message.py
├── test_fetch_github_info.py
└── world_covid19_stats.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .devcontainer/Dockerfile
================================================
# https://github.com/microsoft/vscode-dev-containers/blob/main/containers/python-3/README.md
ARG VARIANT=3.13-bookworm
FROM mcr.microsoft.com/vscode/devcontainers/python:${VARIANT}
COPY requirements.txt /tmp/pip-tmp/
RUN python3 -m pip install --upgrade pip \
&& python3 -m pip install --no-cache-dir -r /tmp/pip-tmp/requirements.txt \
&& pipx install pre-commit ruff
================================================
FILE: .devcontainer/README.md
================================================
# Development Container
This is **Devcontainer** configuration to provide a consistent development environment for all contributors.
## Features
- [x] Pre-configured **Python environment**
- [x] Automatic installation of **pre-commit hooks**
- [x] **Ruff** linter ready to check your code
- [x] **Oh My Zsh** with plugins:
- `zsh-autosuggestions`
- `zsh-syntax-highlighting`
## Usage
1. Install [**Docker** ](https://www.docker.com/get-started/) and [**Visual Studio Code**](https://code.visualstudio.com/)
2. Install the **Remote - Containers** extension in VS Code
- Do `CTRL+P`, paste this command and press `Enter`
```shell
ext install ms-vscode-remote.remote-containers
```
3. Open this repository in VS Code
4. When prompted, click **"Reopen in Container"**
5. Wait for the environment to build and initialize
After setup:
- `pre-commit` hooks are installed
- `ruff` and other tools are available
- The shell uses Zsh by default
## Tips
To manually run checks on all files:
```bash
pre-commit run --all-files
```
> For further information here's [Microsoft tutorial about devcontainers.](https://code.visualstudio.com/docs/devcontainers/tutorial)
================================================
FILE: .devcontainer/devcontainer.json
================================================
{
"name": "Python 3",
"build": {
"dockerfile": "Dockerfile",
"context": "..",
"args": {
// Update 'VARIANT' to pick a Python version: 3, 3.11, 3.10, 3.9, 3.8
// Append -bullseye or -buster to pin to an OS version.
// Use -bullseye variants on local on arm64/Apple Silicon.
"VARIANT": "3.13-bookworm"
}
},
"postCreateCommand": "zsh .devcontainer/post_install",
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"terminal.integrated.defaultProfile.linux": "zsh"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode"
}
================================================
FILE: .devcontainer/post_install
================================================
#!/usr/bin/env bash
echo "Begin post-installation steps..."
set -e
echo "Installing pre-commit hooks..."
pre-commit install
echo "Installing Oh My Zsh plugins..."
# Install zsh-autosuggestions if not present
if [ ! -d "${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/plugins/zsh-autosuggestions" ]; then
echo "Cloning zsh-autosuggestions..."
git clone https://github.com/zsh-users/zsh-autosuggestions \
"${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/plugins/zsh-autosuggestions"
fi
# Install zsh-syntax-highlighting if not present
if [ ! -d "${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting" ]; then
echo "Cloning zsh-syntax-highlighting..."
git clone https://github.com/zsh-users/zsh-syntax-highlighting.git \
"${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting"
fi
echo "Configuring plugins in ~/.zshrc..."
sed -i '/^plugins=/c\plugins=(git zsh-autosuggestions zsh-syntax-highlighting)' ~/.zshrc
echo "Post-installation steps completed successfully. Enjoy!"
================================================
FILE: .gitattributes
================================================
* text=auto
================================================
FILE: .github/CODEOWNERS
================================================
# This is a comment.
# Each line is a file pattern followed by one or more owners.
# More details are here: https://help.github.com/articles/about-codeowners/
# The '*' pattern is global owners.
# Order is important. The last matching pattern has the most precedence.
/.* @cclauss
# /backtracking/
# /bit_manipulation/
# /blockchain/
# /boolean_algebra/
# /cellular_automata/
# /ciphers/
# /compression/
# /computer_vision/
# /conversions/
# /data_structures/
# /digital_image_processing/
# /divide_and_conquer/
# /dynamic_programming/
# /file_transfer/
# /fuzzy_logic/
# /genetic_algorithm/
# /geodesy/
# /graphics/
# /graphs/
# /greedy_method/
# /hashes/
# /images/
# /linear_algebra/
# /machine_learning/
# /maths/
# /matrix/
# /networking_flow/
# /neural_network/
# /other/
# /project_euler/
# /quantum/
# /scheduling/
# /scripts/
# /searches/
# /sorts/
# /strings/
# /traversals/
/web_programming/ @cclauss
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug report
description: Create a bug report to help us address errors in the repository
labels: [bug]
body:
- type: markdown
attributes:
value: >
Before requesting please search [existing issues](https://github.com/TheAlgorithms/Python/labels/bug).
Usage questions such as "How do I...?" belong on the
[Discord](https://discord.gg/c7MnfGFGa6) and will be closed.
- type: input
attributes:
label: "Repository commit"
description: >
The commit hash for `TheAlgorithms/Python` repository. You can get this
by running the command `git rev-parse HEAD` locally.
placeholder: "a0b0f414ae134aa1772d33bb930e5a960f9979e8"
validations:
required: true
- type: input
attributes:
label: "Python version (python --version)"
placeholder: "Python 3.10.7"
validations:
required: true
- type: textarea
attributes:
label: "Dependencies version (pip freeze)"
description: >
This is the output of the command `pip freeze --all`. Note that the
actual output might be different as compared to the placeholder text.
placeholder: |
appnope==0.1.3
asttokens==2.0.8
backcall==0.2.0
...
validations:
required: true
- type: textarea
attributes:
label: "Expected behavior"
description: "Describe the behavior you expect. May include images or videos."
validations:
required: true
- type: textarea
attributes:
label: "Actual behavior"
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: Discord community
url: https://discord.gg/c7MnfGFGa6
about: Have any questions or need any help? Please contact us via Discord
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature request
description: Suggest features, propose improvements, discuss new ideas.
labels: [enhancement]
body:
- type: markdown
attributes:
value: >
Before requesting please search [existing issues](https://github.com/TheAlgorithms/Python/labels/enhancement).
Do not create issues to implement new algorithms as these will be closed.
Usage questions such as "How do I...?" belong on the
[Discord](https://discord.gg/c7MnfGFGa6) and will be closed.
- type: textarea
attributes:
label: "Feature description"
description: >
This could include new topics or improving any existing implementations.
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/other.yml
================================================
name: Other
description: Use this for any other issues. PLEASE do not create blank issues
labels: ["awaiting triage"]
body:
- type: textarea
id: issuedescription
attributes:
label: What would you like to share?
description: Provide a clear and concise explanation of your issue.
validations:
required: true
- type: textarea
id: extrainfo
attributes:
label: Additional information
description: Is there anything else we should know about this issue?
validations:
required: false
================================================
FILE: .github/dependabot.yml
================================================
# Keep GitHub Actions up to date with Dependabot...
# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
================================================
FILE: .github/pull_request_template.md
================================================
### Describe your change:
* [ ] Add an algorithm?
* [ ] Fix a bug or typo in an existing algorithm?
* [ ] Add or change doctests? -- Note: Please avoid changing both code and tests in a single pull request.
* [ ] Documentation change?
### Checklist:
* [ ] I have read [CONTRIBUTING.md](https://github.com/TheAlgorithms/Python/blob/master/CONTRIBUTING.md).
* [ ] This pull request is all my own work -- I have not plagiarized.
* [ ] I know that pull requests will not be merged if they fail the automated tests.
* [ ] This PR only changes one algorithm file. To ease review, please open separate PRs for separate algorithms.
* [ ] All new Python files are placed inside an existing directory.
* [ ] All filenames are in all lowercase characters with no spaces or dashes.
* [ ] All functions and variable names follow Python naming conventions.
* [ ] All function parameters and return values are annotated with Python [type hints](https://docs.python.org/3/library/typing.html).
* [ ] All functions have [doctests](https://docs.python.org/3/library/doctest.html) that pass the automated testing.
* [ ] All new algorithms include at least one URL that points to Wikipedia or another similar explanation.
* [ ] If this pull request resolves one or more open issues then the description above includes the issue number(s) with a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue): "Fixes #ISSUE-NUMBER".
================================================
FILE: .github/stale.yml
================================================
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 30
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 7
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- "Status: on hold"
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false
# Label to use when marking as stale
staleLabel: stale
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 5
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
pulls:
# Comment to post when marking as stale. Set to `false` to disable
markComment: >
This pull request has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale Pull Request.
closeComment: >
Please reopen this pull request once you commit the changes requested
or make improvements on the code. If this is not the case and you need
some help, feel free to seek help from our [Gitter](https://gitter.im/TheAlgorithms/community)
or ping one of the reviewers. Thank you for your contributions!
issues:
# Comment to post when marking as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale Issue.
closeComment: >
Please reopen this issue once you add more information and updates here.
If this is not the case and you need some help, feel free to seek help
from our [Gitter](https://gitter.im/TheAlgorithms/community) or ping one of the
reviewers. Thank you for your contributions!
================================================
FILE: .github/workflows/build.yml
================================================
name: "build"
on:
pull_request:
schedule:
- cron: "0 0 * * *" # Run everyday
jobs:
build:
runs-on: ubuntu-latest
steps:
- run: sudo apt-get update && sudo apt-get install -y libhdf5-dev
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
with:
enable-cache: true
cache-dependency-glob: uv.lock
- uses: actions/setup-python@v6
with:
python-version: 3.14
allow-prereleases: true
- run: uv sync --group=test
- name: Run tests
# TODO: #8818 Re-enable quantum tests
run: uv run --with=pytest-run-parallel pytest
--iterations=8 --parallel-threads=auto
--ignore=computer_vision/cnn_classification.py
--ignore=docs/conf.py
--ignore=dynamic_programming/k_means_clustering_tensorflow.py
--ignore=machine_learning/local_weighted_learning/local_weighted_learning.py
--ignore=machine_learning/lstm/lstm_prediction.py
--ignore=neural_network/input_data.py
--ignore=project_euler/
--ignore=quantum/q_fourier_transform.py
--ignore=scripts/validate_solutions.py
--ignore=web_programming/current_stock_price.py
--ignore=web_programming/fetch_anime_and_play.py
--cov-report=term-missing:skip-covered
--cov=. .
- if: ${{ success() }}
run: scripts/build_directory_md.py 2>&1 | tee DIRECTORY.md
================================================
FILE: .github/workflows/devcontainer_ci.yml
================================================
name: Test DevContainer Build
on:
push:
paths:
- ".devcontainer/**"
pull_request:
paths:
- ".devcontainer/**"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: devcontainers/ci@v0.3
with:
push: never
runCmd: "true"
================================================
FILE: .github/workflows/directory_writer.yml
================================================
# The objective of this GitHub Action is to update the DIRECTORY.md file (if needed)
# when doing a git push
name: directory_writer
on: [push]
jobs:
directory_writer:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: actions/setup-python@v6
with:
python-version: 3.14
allow-prereleases: true
- name: Write DIRECTORY.md
run: |
scripts/build_directory_md.py 2>&1 | tee DIRECTORY.md
git config --global user.name "$GITHUB_ACTOR"
git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com"
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
- name: Update DIRECTORY.md
run: |
git add DIRECTORY.md
git commit -am "updating DIRECTORY.md" || true
git push --force origin HEAD:$GITHUB_REF || true
================================================
FILE: .github/workflows/project_euler.yml
================================================
on:
pull_request:
# Run only if a file is changed within the project_euler directory and related files
paths:
- "project_euler/**"
- ".github/workflows/project_euler.yml"
- "scripts/validate_solutions.py"
schedule:
- cron: "0 0 * * *" # Run everyday
name: "Project Euler"
jobs:
project-euler:
runs-on: ubuntu-latest
steps:
- run:
sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
libharfbuzz-dev libfribidi-dev libxcb1-dev
libxml2-dev libxslt-dev
libhdf5-dev
libopenblas-dev
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
- uses: actions/setup-python@v6
with:
python-version: 3.14
allow-prereleases: true
- run: uv sync --group=euler-validate --group=test
- run: uv run pytest --doctest-modules --cov-report=term-missing:skip-covered --cov=project_euler/ project_euler/
validate-solutions:
runs-on: ubuntu-latest
steps:
- run:
sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
libharfbuzz-dev libfribidi-dev libxcb1-dev
libxml2-dev libxslt-dev
libhdf5-dev
libopenblas-dev
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
- uses: actions/setup-python@v6
with:
python-version: 3.14
allow-prereleases: true
- run: uv sync --group=euler-validate --group=test
- run: uv run pytest scripts/validate_solutions.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
================================================
FILE: .github/workflows/ruff.yml
================================================
# https://beta.ruff.rs
name: ruff
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
- run: uvx ruff check --output-format=github .
================================================
FILE: .github/workflows/sphinx.yml
================================================
name: sphinx
on:
# Triggers the workflow on push or pull request events but only for the "master" branch
push:
branches: ["master"]
pull_request:
branches: ["master"]
# Or manually from the Actions tab
workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build_docs:
runs-on: ubuntu-24.04-arm
steps:
- run:
sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
libharfbuzz-dev libfribidi-dev libxcb1-dev
libxml2-dev libxslt-dev
libhdf5-dev
libopenblas-dev
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v7
- uses: actions/setup-python@v6
with:
python-version: 3.14
allow-prereleases: true
- run: uv sync --group=docs
- uses: actions/configure-pages@v5
- run: uv run sphinx-build -c docs . docs/_build/html
- uses: actions/upload-pages-artifact@v4
with:
path: docs/_build/html
deploy_docs:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
if: github.event_name != 'pull_request'
needs: build_docs
runs-on: ubuntu-latest
steps:
- uses: actions/deploy-pages@v4
id: deployment
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a Python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.DS_Store
.idea
.try
.vscode/
.vs/
================================================
FILE: .gitpod.yml
================================================
tasks:
- init: pip3 install -r ./requirements.txt
================================================
FILE: .pre-commit-config.yaml
================================================
ci:
autoupdate_schedule: monthly
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-executables-have-shebangs
- id: check-toml
- id: check-yaml
- id: end-of-file-fixer
types: [python]
- id: trailing-whitespace
- id: requirements-txt-fixer
- repo: https://github.com/MarcoGorelli/auto-walrus
rev: 0.4.1
hooks:
- id: auto-walrus
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.14
hooks:
- id: ruff-check
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
additional_dependencies:
- tomli
- repo: https://github.com/tox-dev/pyproject-fmt
rev: v2.12.1
hooks:
- id: pyproject-fmt
- repo: local
hooks:
- id: validate-filenames
name: Validate filenames
entry: ./scripts/validate_filenames.py
language: script
pass_filenames: false
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.24.1
hooks:
- id: validate-pyproject
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.19.1
hooks:
- id: mypy
args:
- --explicit-package-bases
- --ignore-missing-imports
- --install-types
- --non-interactive
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
types_or: [toml, yaml]
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing guidelines
## Before contributing
Welcome to [TheAlgorithms/Python](https://github.com/TheAlgorithms/Python)! Before submitting your pull requests, please ensure that you __read the whole guidelines__. If you have any doubts about the contributing guide, please feel free to [state it clearly in an issue](https://github.com/TheAlgorithms/Python/issues/new) or ask the community on [Gitter](https://gitter.im/TheAlgorithms/community).
## Contributing
### Contributor
We are delighted that you are considering implementing algorithms and data structures for others! This repository is referenced and used by learners from all over the globe. By being one of our contributors, you agree and confirm that:
- You did your work - no plagiarism allowed.
- Any plagiarized work will not be merged.
- Your work will be distributed under [MIT License](LICENSE.md) once your pull request is merged.
- Your submitted work fulfills or mostly fulfills our styles and standards.
__New implementation__ is welcome! For example, new solutions for a problem, different representations for a graph data structure or algorithm designs with different complexity, but __identical implementation__ of an existing implementation is not allowed. Please check whether the solution is already implemented or not before submitting your pull request.
__Improving comments__ and __writing proper tests__ are also highly welcome.
### Contribution
We appreciate any contribution, from fixing a grammar mistake in a comment to implementing complex algorithms. Please read this section if you are contributing your work.
Your contribution will be tested by our [automated testing on GitHub Actions](https://github.com/TheAlgorithms/Python/actions) to save time and mental energy. After you have submitted your pull request, you should see the GitHub Actions tests start to run at the bottom of your submission page. If those tests fail, then click on the ___details___ button to read through the GitHub Actions output to understand the failure. If you do not understand, please leave a comment on your submission page and a community member will try to help.
#### Issues
If you are interested in resolving an [open issue](https://github.com/TheAlgorithms/Python/issues), simply make a pull request with your proposed fix. __We do not assign issues in this repo__ so please do not ask for permission to work on an issue.
__Do not__ create an issue to contribute an algorithm. Please submit a pull request instead.
Please help us keep our issue list small by adding `Fixes #{$ISSUE_NUMBER}` to the description of pull requests that resolve open issues.
For example, if your pull request fixes issue #10, then please add the following to its description:
```
Fixes #10
```
GitHub will use this tag to [auto-close the issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) if and when the PR is merged.
#### What is an Algorithm?
An Algorithm is one or more functions (or classes) that:
* take one or more inputs,
* perform some internal calculations or data manipulations,
* return one or more outputs,
* have minimal side effects (Ex. `print()`, `plot()`, `read()`, `write()`).
Algorithms should be packaged in a way that would make it easy for readers to put them into larger programs.
Algorithms should:
* have intuitive class and function names that make their purpose clear to readers
* use Python naming conventions and intuitive variable names to ease comprehension
* be flexible to take different input values
* have Python type hints for their input parameters and return values
* raise Python exceptions (`ValueError`, etc.) on erroneous input values
* have docstrings with clear explanations and/or URLs to source materials
* contain doctests that test both valid and erroneous input values
* return all calculation results instead of printing or plotting them
Algorithms in this repo should not be how-to examples for existing Python packages. Instead, they should perform internal calculations or manipulations to convert input values into different output values. Those calculations or manipulations can use data types, classes, or functions of existing Python packages but each algorithm in this repo should add unique value.
#### Pre-commit plugin
Use [pre-commit](https://pre-commit.com/#installation) to automatically format your code to match our coding style:
```bash
python3 -m pip install pre-commit # only required the first time
pre-commit install
```
That's it! The plugin will run every time you commit any changes. If there are any errors found during the run, fix them and commit those changes. You can even run the plugin manually on all files:
```bash
pre-commit run --all-files --show-diff-on-failure
```
#### Coding Style
We want your work to be readable by others; therefore, we encourage you to note the following:
- Please write in Python 3.13+. For instance: `print()` is a function in Python 3 so `print "Hello"` will *not* work but `print("Hello")` will.
- Please focus hard on the naming of functions, classes, and variables. Help your reader by using __descriptive names__ that can help you to remove redundant comments.
- Single letter variable names are *old school* so please avoid them unless their life only spans a few lines.
- Expand acronyms because `gcd()` is hard to understand but `greatest_common_divisor()` is not.
- Please follow the [Python Naming Conventions](https://pep8.org/#prescriptive-naming-conventions) so variable_names and function_names should be lower_case, CONSTANTS in UPPERCASE, ClassNames should be CamelCase, etc.
- We encourage the use of Python [f-strings](https://realpython.com/python-f-strings/#f-strings-a-new-and-improved-way-to-format-strings-in-python) where they make the code easier to read.
- Please consider running [__psf/black__](https://github.com/python/black) on your Python file(s) before submitting your pull request. This is not yet a requirement but it does make your code more readable and automatically aligns it with much of [PEP 8](https://www.python.org/dev/peps/pep-0008/). There are other code formatters (autopep8, yapf) but the __black__ formatter is now hosted by the Python Software Foundation. To use it,
```bash
python3 -m pip install black # only required the first time
black .
```
- All submissions will need to pass the test `ruff .` before they will be accepted so if possible, try this test locally on your Python file(s) before submitting your pull request.
```bash
python3 -m pip install ruff # only required the first time
ruff check
```
- Original code submissions require docstrings or comments to describe your work.
- More on docstrings and comments:
If you used a Wikipedia article or some other source material to create your algorithm, please add the URL in a docstring or comment to help your reader.
The following are considered to be bad and may be requested to be improved:
```python
x = x + 2 # increased by 2
```
This is too trivial. Comments are expected to be explanatory. For comments, you can write them above, on or below a line of code, as long as you are consistent within the same piece of code.
We encourage you to put docstrings inside your functions but please pay attention to the indentation of docstrings. The following is a good example:
```python
def sum_ab(a, b):
"""
Return the sum of two integers a and b.
"""
return a + b
```
- Write tests (especially [__doctests__](https://docs.python.org/3/library/doctest.html)) to illustrate and verify your work. We highly encourage the use of _doctests on all functions_.
```python
def sum_ab(a, b):
"""
Return the sum of two integers a and b
>>> sum_ab(2, 2)
4
>>> sum_ab(-2, 3)
1
>>> sum_ab(4.9, 5.1)
10.0
"""
return a + b
```
These doctests will be run by pytest as part of our automated testing so please try to run your doctests locally and make sure that they are found and pass:
```bash
python3 -m doctest -v my_submission.py
```
The use of the Python built-in `input()` function is __not__ encouraged:
```python
input('Enter your input:')
# Or even worse...
input = eval(input("Enter your input: "))
```
However, if your code uses `input()` then we encourage you to gracefully deal with leading and trailing whitespace in user input by adding `.strip()` as in:
```python
starting_value = int(input("Please enter a starting value: ").strip())
```
The use of [Python type hints](https://docs.python.org/3/library/typing.html) is encouraged for function parameters and return values. Our automated testing will run [mypy](https://mypy-lang.org) so run that locally before making your submission.
```python
def sum_ab(a: int, b: int) -> int:
return a + b
```
Instructions on how to install mypy can be found [here](https://github.com/python/mypy). Please use the command `mypy --ignore-missing-imports .` to test all files or `mypy --ignore-missing-imports path/to/file.py` to test a specific file.
- [__List comprehensions and generators__](https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions) are preferred over the use of `lambda`, `map`, `filter`, `reduce` but the important thing is to demonstrate the power of Python in code that is easy to read and maintain.
- Avoid importing external libraries for basic algorithms. Only use those libraries for complicated algorithms.
- If you need a third-party module that is not in the file __requirements.txt__, please add it to that file as part of your submission.
#### Other Requirements for Submissions
- If you are submitting code in the `project_euler/` directory, please also read [the dedicated Guideline](https://github.com/TheAlgorithms/Python/blob/master/project_euler/README.md) before contributing to our Project Euler library.
- The file extension for code files should be `.py`. Jupyter Notebooks should be submitted to [TheAlgorithms/Jupyter](https://github.com/TheAlgorithms/Jupyter).
- Strictly use snake_case (underscore_separated) in your file_name, as it will be easy to parse in future using scripts.
- Please avoid creating new directories if at all possible. Try to fit your work into the existing directory structure.
- If possible, follow the standard *within* the folder you are submitting to.
- If you have modified/added code work, make sure the code compiles before submitting.
- If you have modified/added documentation work, ensure your language is concise and contains no grammar errors.
- Do not update the README.md or DIRECTORY.md file which will be periodically autogenerated by our GitHub Actions processes.
- Add a corresponding explanation to [Algorithms-Explanation](https://github.com/TheAlgorithms/Algorithms-Explanation) (Optional but recommended).
- All submissions will be tested with [__mypy__](http://www.mypy-lang.org) so we encourage you to add [__Python type hints__](https://docs.python.org/3/library/typing.html) where it makes sense to do so.
- Most importantly,
- __Be consistent in the use of these guidelines when submitting.__
- __Join__ us on [Discord](https://discord.com/invite/c7MnfGFGa6) and [Gitter](https://gitter.im/TheAlgorithms/community) __now!__
- Happy coding!
Writer [@poyea](https://github.com/poyea), Jun 2019.
================================================
FILE: DIRECTORY.md
================================================
## Audio Filters
* [Butterworth Filter](audio_filters/butterworth_filter.py)
* [Iir Filter](audio_filters/iir_filter.py)
* [Show Response](audio_filters/show_response.py)
## Backtracking
* [All Combinations](backtracking/all_combinations.py)
* [All Permutations](backtracking/all_permutations.py)
* [All Subsequences](backtracking/all_subsequences.py)
* [Coloring](backtracking/coloring.py)
* [Combination Sum](backtracking/combination_sum.py)
* [Crossword Puzzle Solver](backtracking/crossword_puzzle_solver.py)
* [Generate Parentheses](backtracking/generate_parentheses.py)
* [Generate Parentheses Iterative](backtracking/generate_parentheses_iterative.py)
* [Hamiltonian Cycle](backtracking/hamiltonian_cycle.py)
* [Knight Tour](backtracking/knight_tour.py)
* [Match Word Pattern](backtracking/match_word_pattern.py)
* [Minimax](backtracking/minimax.py)
* [N Queens](backtracking/n_queens.py)
* [N Queens Math](backtracking/n_queens_math.py)
* [Power Sum](backtracking/power_sum.py)
* [Rat In Maze](backtracking/rat_in_maze.py)
* [Sudoku](backtracking/sudoku.py)
* [Sum Of Subsets](backtracking/sum_of_subsets.py)
* [Word Break](backtracking/word_break.py)
* [Word Ladder](backtracking/word_ladder.py)
* [Word Search](backtracking/word_search.py)
## Bit Manipulation
* [Binary And Operator](bit_manipulation/binary_and_operator.py)
* [Binary Coded Decimal](bit_manipulation/binary_coded_decimal.py)
* [Binary Count Setbits](bit_manipulation/binary_count_setbits.py)
* [Binary Count Trailing Zeros](bit_manipulation/binary_count_trailing_zeros.py)
* [Binary Or Operator](bit_manipulation/binary_or_operator.py)
* [Binary Shifts](bit_manipulation/binary_shifts.py)
* [Binary Twos Complement](bit_manipulation/binary_twos_complement.py)
* [Binary Xor Operator](bit_manipulation/binary_xor_operator.py)
* [Bitwise Addition Recursive](bit_manipulation/bitwise_addition_recursive.py)
* [Count 1S Brian Kernighan Method](bit_manipulation/count_1s_brian_kernighan_method.py)
* [Count Number Of One Bits](bit_manipulation/count_number_of_one_bits.py)
* [Excess 3 Code](bit_manipulation/excess_3_code.py)
* [Find Previous Power Of Two](bit_manipulation/find_previous_power_of_two.py)
* [Find Unique Number](bit_manipulation/find_unique_number.py)
* [Gray Code Sequence](bit_manipulation/gray_code_sequence.py)
* [Highest Set Bit](bit_manipulation/highest_set_bit.py)
* [Index Of Rightmost Set Bit](bit_manipulation/index_of_rightmost_set_bit.py)
* [Is Even](bit_manipulation/is_even.py)
* [Is Power Of Two](bit_manipulation/is_power_of_two.py)
* [Largest Pow Of Two Le Num](bit_manipulation/largest_pow_of_two_le_num.py)
* [Missing Number](bit_manipulation/missing_number.py)
* [Numbers Different Signs](bit_manipulation/numbers_different_signs.py)
* [Power Of 4](bit_manipulation/power_of_4.py)
* [Reverse Bits](bit_manipulation/reverse_bits.py)
* [Single Bit Manipulation Operations](bit_manipulation/single_bit_manipulation_operations.py)
* [Swap All Odd And Even Bits](bit_manipulation/swap_all_odd_and_even_bits.py)
## Blockchain
* [Diophantine Equation](blockchain/diophantine_equation.py)
## Boolean Algebra
* [And Gate](boolean_algebra/and_gate.py)
* [Imply Gate](boolean_algebra/imply_gate.py)
* [Karnaugh Map Simplification](boolean_algebra/karnaugh_map_simplification.py)
* [Multiplexer](boolean_algebra/multiplexer.py)
* [Nand Gate](boolean_algebra/nand_gate.py)
* [Nimply Gate](boolean_algebra/nimply_gate.py)
* [Nor Gate](boolean_algebra/nor_gate.py)
* [Not Gate](boolean_algebra/not_gate.py)
* [Or Gate](boolean_algebra/or_gate.py)
* [Quine Mc Cluskey](boolean_algebra/quine_mc_cluskey.py)
* [Xnor Gate](boolean_algebra/xnor_gate.py)
* [Xor Gate](boolean_algebra/xor_gate.py)
## Cellular Automata
* [Conways Game Of Life](cellular_automata/conways_game_of_life.py)
* [Game Of Life](cellular_automata/game_of_life.py)
* [Langtons Ant](cellular_automata/langtons_ant.py)
* [Nagel Schrekenberg](cellular_automata/nagel_schrekenberg.py)
* [One Dimensional](cellular_automata/one_dimensional.py)
* [Wa Tor](cellular_automata/wa_tor.py)
## Ciphers
* [A1Z26](ciphers/a1z26.py)
* [Affine Cipher](ciphers/affine_cipher.py)
* [Atbash](ciphers/atbash.py)
* [Autokey](ciphers/autokey.py)
* [Baconian Cipher](ciphers/baconian_cipher.py)
* [Base16](ciphers/base16.py)
* [Base32](ciphers/base32.py)
* [Base64 Cipher](ciphers/base64_cipher.py)
* [Base85](ciphers/base85.py)
* [Beaufort Cipher](ciphers/beaufort_cipher.py)
* [Bifid](ciphers/bifid.py)
* [Brute Force Caesar Cipher](ciphers/brute_force_caesar_cipher.py)
* [Caesar Cipher](ciphers/caesar_cipher.py)
* [Cryptomath Module](ciphers/cryptomath_module.py)
* [Decrypt Caesar With Chi Squared](ciphers/decrypt_caesar_with_chi_squared.py)
* [Deterministic Miller Rabin](ciphers/deterministic_miller_rabin.py)
* [Diffie](ciphers/diffie.py)
* [Diffie Hellman](ciphers/diffie_hellman.py)
* [Elgamal Key Generator](ciphers/elgamal_key_generator.py)
* [Enigma Machine2](ciphers/enigma_machine2.py)
* [Fractionated Morse Cipher](ciphers/fractionated_morse_cipher.py)
* [Gronsfeld Cipher](ciphers/gronsfeld_cipher.py)
* [Hill Cipher](ciphers/hill_cipher.py)
* [Mixed Keyword Cypher](ciphers/mixed_keyword_cypher.py)
* [Mono Alphabetic Ciphers](ciphers/mono_alphabetic_ciphers.py)
* [Morse Code](ciphers/morse_code.py)
* [Onepad Cipher](ciphers/onepad_cipher.py)
* [Permutation Cipher](ciphers/permutation_cipher.py)
* [Playfair Cipher](ciphers/playfair_cipher.py)
* [Polybius](ciphers/polybius.py)
* [Porta Cipher](ciphers/porta_cipher.py)
* [Rabin Miller](ciphers/rabin_miller.py)
* [Rail Fence Cipher](ciphers/rail_fence_cipher.py)
* [Rot13](ciphers/rot13.py)
* [Rsa Cipher](ciphers/rsa_cipher.py)
* [Rsa Factorization](ciphers/rsa_factorization.py)
* [Rsa Key Generator](ciphers/rsa_key_generator.py)
* [Running Key Cipher](ciphers/running_key_cipher.py)
* [Shuffled Shift Cipher](ciphers/shuffled_shift_cipher.py)
* [Simple Keyword Cypher](ciphers/simple_keyword_cypher.py)
* [Simple Substitution Cipher](ciphers/simple_substitution_cipher.py)
* [Transposition Cipher](ciphers/transposition_cipher.py)
* [Transposition Cipher Encrypt Decrypt File](ciphers/transposition_cipher_encrypt_decrypt_file.py)
* [Trifid Cipher](ciphers/trifid_cipher.py)
* [Vernam Cipher](ciphers/vernam_cipher.py)
* [Vigenere Cipher](ciphers/vigenere_cipher.py)
* [Xor Cipher](ciphers/xor_cipher.py)
## Computer Vision
* [Cnn Classification](computer_vision/cnn_classification.py)
* [Flip Augmentation](computer_vision/flip_augmentation.py)
* [Haralick Descriptors](computer_vision/haralick_descriptors.py)
* [Harris Corner](computer_vision/harris_corner.py)
* [Horn Schunck](computer_vision/horn_schunck.py)
* [Intensity Based Segmentation](computer_vision/intensity_based_segmentation.py)
* [Mean Threshold](computer_vision/mean_threshold.py)
* [Mosaic Augmentation](computer_vision/mosaic_augmentation.py)
* [Pooling Functions](computer_vision/pooling_functions.py)
## Conversions
* [Astronomical Length Scale Conversion](conversions/astronomical_length_scale_conversion.py)
* [Binary To Decimal](conversions/binary_to_decimal.py)
* [Binary To Hexadecimal](conversions/binary_to_hexadecimal.py)
* [Binary To Octal](conversions/binary_to_octal.py)
* [Convert Number To Words](conversions/convert_number_to_words.py)
* [Decimal To Any](conversions/decimal_to_any.py)
* [Decimal To Binary](conversions/decimal_to_binary.py)
* [Decimal To Hexadecimal](conversions/decimal_to_hexadecimal.py)
* [Decimal To Octal](conversions/decimal_to_octal.py)
* [Energy Conversions](conversions/energy_conversions.py)
* [Excel Title To Column](conversions/excel_title_to_column.py)
* [Hex To Bin](conversions/hex_to_bin.py)
* [Hexadecimal To Decimal](conversions/hexadecimal_to_decimal.py)
* [Ipv4 Conversion](conversions/ipv4_conversion.py)
* [Length Conversion](conversions/length_conversion.py)
* [Molecular Chemistry](conversions/molecular_chemistry.py)
* [Octal To Binary](conversions/octal_to_binary.py)
* [Octal To Decimal](conversions/octal_to_decimal.py)
* [Octal To Hexadecimal](conversions/octal_to_hexadecimal.py)
* [Prefix Conversions](conversions/prefix_conversions.py)
* [Prefix Conversions String](conversions/prefix_conversions_string.py)
* [Pressure Conversions](conversions/pressure_conversions.py)
* [Rectangular To Polar](conversions/rectangular_to_polar.py)
* [Rgb Cmyk Conversion](conversions/rgb_cmyk_conversion.py)
* [Rgb Hsv Conversion](conversions/rgb_hsv_conversion.py)
* [Roman Numerals](conversions/roman_numerals.py)
* [Speed Conversions](conversions/speed_conversions.py)
* [Temperature Conversions](conversions/temperature_conversions.py)
* [Time Conversions](conversions/time_conversions.py)
* [Volume Conversions](conversions/volume_conversions.py)
* [Weight Conversion](conversions/weight_conversion.py)
## Data Compression
* [Burrows Wheeler](data_compression/burrows_wheeler.py)
* [Coordinate Compression](data_compression/coordinate_compression.py)
* [Huffman](data_compression/huffman.py)
* [Lempel Ziv](data_compression/lempel_ziv.py)
* [Lempel Ziv Decompress](data_compression/lempel_ziv_decompress.py)
* [Lz77](data_compression/lz77.py)
* [Peak Signal To Noise Ratio](data_compression/peak_signal_to_noise_ratio.py)
* [Run Length Encoding](data_compression/run_length_encoding.py)
## Data Structures
* Arrays
* [Equilibrium Index In Array](data_structures/arrays/equilibrium_index_in_array.py)
* [Find Triplets With 0 Sum](data_structures/arrays/find_triplets_with_0_sum.py)
* [Index 2D Array In 1D](data_structures/arrays/index_2d_array_in_1d.py)
* [Kth Largest Element](data_structures/arrays/kth_largest_element.py)
* [Median Two Array](data_structures/arrays/median_two_array.py)
* [Monotonic Array](data_structures/arrays/monotonic_array.py)
* [Pairs With Given Sum](data_structures/arrays/pairs_with_given_sum.py)
* [Permutations](data_structures/arrays/permutations.py)
* [Prefix Sum](data_structures/arrays/prefix_sum.py)
* [Product Sum](data_structures/arrays/product_sum.py)
* [Rotate Array](data_structures/arrays/rotate_array.py)
* [Sparse Table](data_structures/arrays/sparse_table.py)
* [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
* Binary Tree
* [Avl Tree](data_structures/binary_tree/avl_tree.py)
* [Basic Binary Tree](data_structures/binary_tree/basic_binary_tree.py)
* [Binary Search Tree](data_structures/binary_tree/binary_search_tree.py)
* [Binary Search Tree Recursive](data_structures/binary_tree/binary_search_tree_recursive.py)
* [Binary Tree Mirror](data_structures/binary_tree/binary_tree_mirror.py)
* [Binary Tree Node Sum](data_structures/binary_tree/binary_tree_node_sum.py)
* [Binary Tree Path Sum](data_structures/binary_tree/binary_tree_path_sum.py)
* [Binary Tree Traversals](data_structures/binary_tree/binary_tree_traversals.py)
* [Diameter Of Binary Tree](data_structures/binary_tree/diameter_of_binary_tree.py)
* [Diff Views Of Binary Tree](data_structures/binary_tree/diff_views_of_binary_tree.py)
* [Distribute Coins](data_structures/binary_tree/distribute_coins.py)
* [Fenwick Tree](data_structures/binary_tree/fenwick_tree.py)
* [Flatten Binarytree To Linkedlist](data_structures/binary_tree/flatten_binarytree_to_linkedlist.py)
* [Floor And Ceiling](data_structures/binary_tree/floor_and_ceiling.py)
* [Inorder Tree Traversal 2022](data_structures/binary_tree/inorder_tree_traversal_2022.py)
* [Is Sorted](data_structures/binary_tree/is_sorted.py)
* [Is Sum Tree](data_structures/binary_tree/is_sum_tree.py)
* [Lazy Segment Tree](data_structures/binary_tree/lazy_segment_tree.py)
* [Lowest Common Ancestor](data_structures/binary_tree/lowest_common_ancestor.py)
* [Maximum Fenwick Tree](data_structures/binary_tree/maximum_fenwick_tree.py)
* [Maximum Sum Bst](data_structures/binary_tree/maximum_sum_bst.py)
* [Merge Two Binary Trees](data_structures/binary_tree/merge_two_binary_trees.py)
* [Mirror Binary Tree](data_structures/binary_tree/mirror_binary_tree.py)
* [Non Recursive Segment Tree](data_structures/binary_tree/non_recursive_segment_tree.py)
* [Number Of Possible Binary Trees](data_structures/binary_tree/number_of_possible_binary_trees.py)
* [Red Black Tree](data_structures/binary_tree/red_black_tree.py)
* [Segment Tree](data_structures/binary_tree/segment_tree.py)
* [Segment Tree Other](data_structures/binary_tree/segment_tree_other.py)
* [Serialize Deserialize Binary Tree](data_structures/binary_tree/serialize_deserialize_binary_tree.py)
* [Symmetric Tree](data_structures/binary_tree/symmetric_tree.py)
* [Treap](data_structures/binary_tree/treap.py)
* [Wavelet Tree](data_structures/binary_tree/wavelet_tree.py)
* Disjoint Set
* [Alternate Disjoint Set](data_structures/disjoint_set/alternate_disjoint_set.py)
* [Disjoint Set](data_structures/disjoint_set/disjoint_set.py)
* Hashing
* [Bloom Filter](data_structures/hashing/bloom_filter.py)
* [Double Hash](data_structures/hashing/double_hash.py)
* [Hash Map](data_structures/hashing/hash_map.py)
* [Hash Table](data_structures/hashing/hash_table.py)
* [Hash Table With Linked List](data_structures/hashing/hash_table_with_linked_list.py)
* Number Theory
* [Prime Numbers](data_structures/hashing/number_theory/prime_numbers.py)
* [Quadratic Probing](data_structures/hashing/quadratic_probing.py)
* Tests
* [Test Hash Map](data_structures/hashing/tests/test_hash_map.py)
* Heap
* [Binomial Heap](data_structures/heap/binomial_heap.py)
* [Heap](data_structures/heap/heap.py)
* [Heap Generic](data_structures/heap/heap_generic.py)
* [Max Heap](data_structures/heap/max_heap.py)
* [Min Heap](data_structures/heap/min_heap.py)
* [Randomized Heap](data_structures/heap/randomized_heap.py)
* [Skew Heap](data_structures/heap/skew_heap.py)
* Kd Tree
* [Build Kdtree](data_structures/kd_tree/build_kdtree.py)
* Example
* [Example Usage](data_structures/kd_tree/example/example_usage.py)
* [Hypercube Points](data_structures/kd_tree/example/hypercube_points.py)
* [Kd Node](data_structures/kd_tree/kd_node.py)
* [Nearest Neighbour Search](data_structures/kd_tree/nearest_neighbour_search.py)
* Tests
* [Test Kdtree](data_structures/kd_tree/tests/test_kdtree.py)
* Linked List
* [Circular Linked List](data_structures/linked_list/circular_linked_list.py)
* [Deque Doubly](data_structures/linked_list/deque_doubly.py)
* [Doubly Linked List](data_structures/linked_list/doubly_linked_list.py)
* [Doubly Linked List Two](data_structures/linked_list/doubly_linked_list_two.py)
* [Floyds Cycle Detection](data_structures/linked_list/floyds_cycle_detection.py)
* [From Sequence](data_structures/linked_list/from_sequence.py)
* [Has Loop](data_structures/linked_list/has_loop.py)
* [Is Palindrome](data_structures/linked_list/is_palindrome.py)
* [Merge Two Lists](data_structures/linked_list/merge_two_lists.py)
* [Middle Element Of Linked List](data_structures/linked_list/middle_element_of_linked_list.py)
* [Print Reverse](data_structures/linked_list/print_reverse.py)
* [Reverse K Group](data_structures/linked_list/reverse_k_group.py)
* [Rotate To The Right](data_structures/linked_list/rotate_to_the_right.py)
* [Singly Linked List](data_structures/linked_list/singly_linked_list.py)
* [Skip List](data_structures/linked_list/skip_list.py)
* [Swap Nodes](data_structures/linked_list/swap_nodes.py)
* Queues
* [Circular Queue](data_structures/queues/circular_queue.py)
* [Circular Queue Linked List](data_structures/queues/circular_queue_linked_list.py)
* [Double Ended Queue](data_structures/queues/double_ended_queue.py)
* [Linked Queue](data_structures/queues/linked_queue.py)
* [Priority Queue Using List](data_structures/queues/priority_queue_using_list.py)
* [Queue By List](data_structures/queues/queue_by_list.py)
* [Queue By Two Stacks](data_structures/queues/queue_by_two_stacks.py)
* [Queue On Pseudo Stack](data_structures/queues/queue_on_pseudo_stack.py)
* Stacks
* [Balanced Parentheses](data_structures/stacks/balanced_parentheses.py)
* [Dijkstras Two Stack Algorithm](data_structures/stacks/dijkstras_two_stack_algorithm.py)
* [Infix To Postfix Conversion](data_structures/stacks/infix_to_postfix_conversion.py)
* [Infix To Prefix Conversion](data_structures/stacks/infix_to_prefix_conversion.py)
* [Largest Rectangle Histogram](data_structures/stacks/largest_rectangle_histogram.py)
* [Lexicographical Numbers](data_structures/stacks/lexicographical_numbers.py)
* [Next Greater Element](data_structures/stacks/next_greater_element.py)
* [Postfix Evaluation](data_structures/stacks/postfix_evaluation.py)
* [Prefix Evaluation](data_structures/stacks/prefix_evaluation.py)
* [Stack](data_structures/stacks/stack.py)
* [Stack Using Two Queues](data_structures/stacks/stack_using_two_queues.py)
* [Stack With Doubly Linked List](data_structures/stacks/stack_with_doubly_linked_list.py)
* [Stack With Singly Linked List](data_structures/stacks/stack_with_singly_linked_list.py)
* [Stock Span Problem](data_structures/stacks/stock_span_problem.py)
* Suffix Tree
* Example
* [Example Usage](data_structures/suffix_tree/example/example_usage.py)
* [Suffix Tree](data_structures/suffix_tree/suffix_tree.py)
* [Suffix Tree Node](data_structures/suffix_tree/suffix_tree_node.py)
* Tests
* [Test Suffix Tree](data_structures/suffix_tree/tests/test_suffix_tree.py)
* Trie
* [Radix Tree](data_structures/trie/radix_tree.py)
* [Trie](data_structures/trie/trie.py)
## Digital Image Processing
* [Change Brightness](digital_image_processing/change_brightness.py)
* [Change Contrast](digital_image_processing/change_contrast.py)
* [Convert To Negative](digital_image_processing/convert_to_negative.py)
* Dithering
* [Burkes](digital_image_processing/dithering/burkes.py)
* Edge Detection
* [Canny](digital_image_processing/edge_detection/canny.py)
* Filters
* [Bilateral Filter](digital_image_processing/filters/bilateral_filter.py)
* [Convolve](digital_image_processing/filters/convolve.py)
* [Gabor Filter](digital_image_processing/filters/gabor_filter.py)
* [Gaussian Filter](digital_image_processing/filters/gaussian_filter.py)
* [Laplacian Filter](digital_image_processing/filters/laplacian_filter.py)
* [Local Binary Pattern](digital_image_processing/filters/local_binary_pattern.py)
* [Median Filter](digital_image_processing/filters/median_filter.py)
* [Sobel Filter](digital_image_processing/filters/sobel_filter.py)
* Histogram Equalization
* [Histogram Stretch](digital_image_processing/histogram_equalization/histogram_stretch.py)
* [Index Calculation](digital_image_processing/index_calculation.py)
* Morphological Operations
* [Dilation Operation](digital_image_processing/morphological_operations/dilation_operation.py)
* [Erosion Operation](digital_image_processing/morphological_operations/erosion_operation.py)
* Resize
* [Resize](digital_image_processing/resize/resize.py)
* Rotation
* [Rotation](digital_image_processing/rotation/rotation.py)
* [Sepia](digital_image_processing/sepia.py)
* [Test Digital Image Processing](digital_image_processing/test_digital_image_processing.py)
## Divide And Conquer
* [Closest Pair Of Points](divide_and_conquer/closest_pair_of_points.py)
* [Convex Hull](divide_and_conquer/convex_hull.py)
* [Heaps Algorithm](divide_and_conquer/heaps_algorithm.py)
* [Heaps Algorithm Iterative](divide_and_conquer/heaps_algorithm_iterative.py)
* [Inversions](divide_and_conquer/inversions.py)
* [Kth Order Statistic](divide_and_conquer/kth_order_statistic.py)
* [Max Difference Pair](divide_and_conquer/max_difference_pair.py)
* [Max Subarray](divide_and_conquer/max_subarray.py)
* [Mergesort](divide_and_conquer/mergesort.py)
* [Peak](divide_and_conquer/peak.py)
* [Power](divide_and_conquer/power.py)
* [Strassen Matrix Multiplication](divide_and_conquer/strassen_matrix_multiplication.py)
## Docs
* [Conf](docs/conf.py)
## Dynamic Programming
* [Abbreviation](dynamic_programming/abbreviation.py)
* [All Construct](dynamic_programming/all_construct.py)
* [Bitmask](dynamic_programming/bitmask.py)
* [Catalan Numbers](dynamic_programming/catalan_numbers.py)
* [Climbing Stairs](dynamic_programming/climbing_stairs.py)
* [Combination Sum Iv](dynamic_programming/combination_sum_iv.py)
* [Edit Distance](dynamic_programming/edit_distance.py)
* [Factorial](dynamic_programming/factorial.py)
* [Fast Fibonacci](dynamic_programming/fast_fibonacci.py)
* [Fibonacci](dynamic_programming/fibonacci.py)
* [Fizz Buzz](dynamic_programming/fizz_buzz.py)
* [Floyd Warshall](dynamic_programming/floyd_warshall.py)
* [Integer Partition](dynamic_programming/integer_partition.py)
* [Iterating Through Submasks](dynamic_programming/iterating_through_submasks.py)
* [K Means Clustering Tensorflow](dynamic_programming/k_means_clustering_tensorflow.py)
* [Knapsack](dynamic_programming/knapsack.py)
* [Largest Divisible Subset](dynamic_programming/largest_divisible_subset.py)
* [Longest Common Subsequence](dynamic_programming/longest_common_subsequence.py)
* [Longest Common Substring](dynamic_programming/longest_common_substring.py)
* [Longest Increasing Subsequence](dynamic_programming/longest_increasing_subsequence.py)
* [Longest Increasing Subsequence Iterative](dynamic_programming/longest_increasing_subsequence_iterative.py)
* [Longest Increasing Subsequence O Nlogn](dynamic_programming/longest_increasing_subsequence_o_nlogn.py)
* [Longest Palindromic Subsequence](dynamic_programming/longest_palindromic_subsequence.py)
* [Matrix Chain Multiplication](dynamic_programming/matrix_chain_multiplication.py)
* [Matrix Chain Order](dynamic_programming/matrix_chain_order.py)
* [Max Non Adjacent Sum](dynamic_programming/max_non_adjacent_sum.py)
* [Max Product Subarray](dynamic_programming/max_product_subarray.py)
* [Max Subarray Sum](dynamic_programming/max_subarray_sum.py)
* [Min Distance Up Bottom](dynamic_programming/min_distance_up_bottom.py)
* [Minimum Coin Change](dynamic_programming/minimum_coin_change.py)
* [Minimum Cost Path](dynamic_programming/minimum_cost_path.py)
* [Minimum Partition](dynamic_programming/minimum_partition.py)
* [Minimum Size Subarray Sum](dynamic_programming/minimum_size_subarray_sum.py)
* [Minimum Squares To Represent A Number](dynamic_programming/minimum_squares_to_represent_a_number.py)
* [Minimum Steps To One](dynamic_programming/minimum_steps_to_one.py)
* [Minimum Tickets Cost](dynamic_programming/minimum_tickets_cost.py)
* [Narcissistic Number](dynamic_programming/narcissistic_number.py)
* [Optimal Binary Search Tree](dynamic_programming/optimal_binary_search_tree.py)
* [Palindrome Partitioning](dynamic_programming/palindrome_partitioning.py)
* [Range Sum Query](dynamic_programming/range_sum_query.py)
* [Regex Match](dynamic_programming/regex_match.py)
* [Rod Cutting](dynamic_programming/rod_cutting.py)
* [Smith Waterman](dynamic_programming/smith_waterman.py)
* [Subset Generation](dynamic_programming/subset_generation.py)
* [Sum Of Subset](dynamic_programming/sum_of_subset.py)
* [Trapped Water](dynamic_programming/trapped_water.py)
* [Tribonacci](dynamic_programming/tribonacci.py)
* [Viterbi](dynamic_programming/viterbi.py)
* [Wildcard Matching](dynamic_programming/wildcard_matching.py)
* [Word Break](dynamic_programming/word_break.py)
## Electronics
* [Apparent Power](electronics/apparent_power.py)
* [Builtin Voltage](electronics/builtin_voltage.py)
* [Capacitor Equivalence](electronics/capacitor_equivalence.py)
* [Carrier Concentration](electronics/carrier_concentration.py)
* [Charging Capacitor](electronics/charging_capacitor.py)
* [Charging Inductor](electronics/charging_inductor.py)
* [Circular Convolution](electronics/circular_convolution.py)
* [Coulombs Law](electronics/coulombs_law.py)
* [Electric Conductivity](electronics/electric_conductivity.py)
* [Electric Power](electronics/electric_power.py)
* [Electrical Impedance](electronics/electrical_impedance.py)
* [Ic 555 Timer](electronics/ic_555_timer.py)
* [Ind Reactance](electronics/ind_reactance.py)
* [Ohms Law](electronics/ohms_law.py)
* [Real And Reactive Power](electronics/real_and_reactive_power.py)
* [Resistor Color Code](electronics/resistor_color_code.py)
* [Resistor Equivalence](electronics/resistor_equivalence.py)
* [Resonant Frequency](electronics/resonant_frequency.py)
* [Wheatstone Bridge](electronics/wheatstone_bridge.py)
## File Transfer
* [Receive File](file_transfer/receive_file.py)
* [Send File](file_transfer/send_file.py)
* Tests
* [Test Send File](file_transfer/tests/test_send_file.py)
## Financial
* [Equated Monthly Installments](financial/equated_monthly_installments.py)
* [Exponential Moving Average](financial/exponential_moving_average.py)
* [Interest](financial/interest.py)
* [Present Value](financial/present_value.py)
* [Price Plus Tax](financial/price_plus_tax.py)
* [Simple Moving Average](financial/simple_moving_average.py)
* [Straight Line Depreciation](financial/straight_line_depreciation.py)
* [Time And Half Pay](financial/time_and_half_pay.py)
## Fractals
* [Julia Sets](fractals/julia_sets.py)
* [Koch Snowflake](fractals/koch_snowflake.py)
* [Mandelbrot](fractals/mandelbrot.py)
* [Sierpinski Triangle](fractals/sierpinski_triangle.py)
* [Vicsek](fractals/vicsek.py)
## Fuzzy Logic
* [Fuzzy Operations](fuzzy_logic/fuzzy_operations.py)
## Genetic Algorithm
* [Basic String](genetic_algorithm/basic_string.py)
## Geodesy
* [Haversine Distance](geodesy/haversine_distance.py)
* [Lamberts Ellipsoidal Distance](geodesy/lamberts_ellipsoidal_distance.py)
## Geometry
* [Geometry](geometry/geometry.py)
* [Graham Scan](geometry/graham_scan.py)
* [Jarvis March](geometry/jarvis_march.py)
* Tests
* [Test Graham Scan](geometry/tests/test_graham_scan.py)
* [Test Jarvis March](geometry/tests/test_jarvis_march.py)
## Graphics
* [Bezier Curve](graphics/bezier_curve.py)
* [Butterfly Pattern](graphics/butterfly_pattern.py)
* [Digital Differential Analyzer Line](graphics/digital_differential_analyzer_line.py)
* [Vector3 For 2D Rendering](graphics/vector3_for_2d_rendering.py)
## Graphs
* [A Star](graphs/a_star.py)
* [Ant Colony Optimization Algorithms](graphs/ant_colony_optimization_algorithms.py)
* [Articulation Points](graphs/articulation_points.py)
* [Basic Graphs](graphs/basic_graphs.py)
* [Bellman Ford](graphs/bellman_ford.py)
* [Bi Directional Dijkstra](graphs/bi_directional_dijkstra.py)
* [Bidirectional A Star](graphs/bidirectional_a_star.py)
* [Bidirectional Breadth First Search](graphs/bidirectional_breadth_first_search.py)
* [Bidirectional Search](graphs/bidirectional_search.py)
* [Boruvka](graphs/boruvka.py)
* [Breadth First Search](graphs/breadth_first_search.py)
* [Breadth First Search 2](graphs/breadth_first_search_2.py)
* [Breadth First Search Shortest Path](graphs/breadth_first_search_shortest_path.py)
* [Breadth First Search Shortest Path 2](graphs/breadth_first_search_shortest_path_2.py)
* [Breadth First Search Zero One Shortest Path](graphs/breadth_first_search_zero_one_shortest_path.py)
* [Check Bipatrite](graphs/check_bipatrite.py)
* [Check Cycle](graphs/check_cycle.py)
* [Connected Components](graphs/connected_components.py)
* [Deep Clone Graph](graphs/deep_clone_graph.py)
* [Depth First Search](graphs/depth_first_search.py)
* [Depth First Search 2](graphs/depth_first_search_2.py)
* [Dijkstra](graphs/dijkstra.py)
* [Dijkstra 2](graphs/dijkstra_2.py)
* [Dijkstra Algorithm](graphs/dijkstra_algorithm.py)
* [Dijkstra Alternate](graphs/dijkstra_alternate.py)
* [Dijkstra Binary Grid](graphs/dijkstra_binary_grid.py)
* [Dinic](graphs/dinic.py)
* [Directed And Undirected Weighted Graph](graphs/directed_and_undirected_weighted_graph.py)
* [Edmonds Karp Multiple Source And Sink](graphs/edmonds_karp_multiple_source_and_sink.py)
* [Eulerian Path And Circuit For Undirected Graph](graphs/eulerian_path_and_circuit_for_undirected_graph.py)
* [Even Tree](graphs/even_tree.py)
* [Finding Bridges](graphs/finding_bridges.py)
* [Frequent Pattern Graph Miner](graphs/frequent_pattern_graph_miner.py)
* [G Topological Sort](graphs/g_topological_sort.py)
* [Gale Shapley Bigraph](graphs/gale_shapley_bigraph.py)
* [Graph Adjacency List](graphs/graph_adjacency_list.py)
* [Graph Adjacency Matrix](graphs/graph_adjacency_matrix.py)
* [Graph List](graphs/graph_list.py)
* [Graphs Floyd Warshall](graphs/graphs_floyd_warshall.py)
* [Greedy Best First](graphs/greedy_best_first.py)
* [Greedy Min Vertex Cover](graphs/greedy_min_vertex_cover.py)
* [Kahns Algorithm Long](graphs/kahns_algorithm_long.py)
* [Kahns Algorithm Topo](graphs/kahns_algorithm_topo.py)
* [Karger](graphs/karger.py)
* [Lanczos Eigenvectors](graphs/lanczos_eigenvectors.py)
* [Markov Chain](graphs/markov_chain.py)
* [Matching Min Vertex Cover](graphs/matching_min_vertex_cover.py)
* [Minimum Path Sum](graphs/minimum_path_sum.py)
* [Minimum Spanning Tree Boruvka](graphs/minimum_spanning_tree_boruvka.py)
* [Minimum Spanning Tree Kruskal](graphs/minimum_spanning_tree_kruskal.py)
* [Minimum Spanning Tree Kruskal2](graphs/minimum_spanning_tree_kruskal2.py)
* [Minimum Spanning Tree Prims](graphs/minimum_spanning_tree_prims.py)
* [Minimum Spanning Tree Prims2](graphs/minimum_spanning_tree_prims2.py)
* [Multi Heuristic Astar](graphs/multi_heuristic_astar.py)
* [Page Rank](graphs/page_rank.py)
* [Prim](graphs/prim.py)
* [Random Graph Generator](graphs/random_graph_generator.py)
* [Scc Kosaraju](graphs/scc_kosaraju.py)
* [Strongly Connected Components](graphs/strongly_connected_components.py)
* [Tarjans Scc](graphs/tarjans_scc.py)
* Tests
* [Test Min Spanning Tree Kruskal](graphs/tests/test_min_spanning_tree_kruskal.py)
* [Test Min Spanning Tree Prim](graphs/tests/test_min_spanning_tree_prim.py)
## Greedy Methods
* [Best Time To Buy And Sell Stock](greedy_methods/best_time_to_buy_and_sell_stock.py)
* [Fractional Cover Problem](greedy_methods/fractional_cover_problem.py)
* [Fractional Knapsack](greedy_methods/fractional_knapsack.py)
* [Fractional Knapsack 2](greedy_methods/fractional_knapsack_2.py)
* [Gas Station](greedy_methods/gas_station.py)
* [Minimum Coin Change](greedy_methods/minimum_coin_change.py)
* [Minimum Waiting Time](greedy_methods/minimum_waiting_time.py)
* [Optimal Merge Pattern](greedy_methods/optimal_merge_pattern.py)
* [Smallest Range](greedy_methods/smallest_range.py)
## Hashes
* [Adler32](hashes/adler32.py)
* [Chaos Machine](hashes/chaos_machine.py)
* [Djb2](hashes/djb2.py)
* [Elf](hashes/elf.py)
* [Enigma Machine](hashes/enigma_machine.py)
* [Fletcher16](hashes/fletcher16.py)
* [Hamming Code](hashes/hamming_code.py)
* [Luhn](hashes/luhn.py)
* [Md5](hashes/md5.py)
* [Sdbm](hashes/sdbm.py)
* [Sha1](hashes/sha1.py)
* [Sha256](hashes/sha256.py)
## Knapsack
* [Greedy Knapsack](knapsack/greedy_knapsack.py)
* [Knapsack](knapsack/knapsack.py)
* [Recursive Approach Knapsack](knapsack/recursive_approach_knapsack.py)
* Tests
* [Test Greedy Knapsack](knapsack/tests/test_greedy_knapsack.py)
* [Test Knapsack](knapsack/tests/test_knapsack.py)
## Linear Algebra
* [Gaussian Elimination](linear_algebra/gaussian_elimination.py)
* [Jacobi Iteration Method](linear_algebra/jacobi_iteration_method.py)
* [Lu Decomposition](linear_algebra/lu_decomposition.py)
* [Matrix Inversion](linear_algebra/matrix_inversion.py)
* Src
* [Conjugate Gradient](linear_algebra/src/conjugate_gradient.py)
* [Gaussian Elimination Pivoting](linear_algebra/src/gaussian_elimination_pivoting.py)
* [Lib](linear_algebra/src/lib.py)
* [Polynom For Points](linear_algebra/src/polynom_for_points.py)
* [Power Iteration](linear_algebra/src/power_iteration.py)
* [Rank Of Matrix](linear_algebra/src/rank_of_matrix.py)
* [Rayleigh Quotient](linear_algebra/src/rayleigh_quotient.py)
* [Schur Complement](linear_algebra/src/schur_complement.py)
* [Test Linear Algebra](linear_algebra/src/test_linear_algebra.py)
* [Transformations 2D](linear_algebra/src/transformations_2d.py)
## Linear Programming
* [Simplex](linear_programming/simplex.py)
## Machine Learning
* [Apriori Algorithm](machine_learning/apriori_algorithm.py)
* [Astar](machine_learning/astar.py)
* [Automatic Differentiation](machine_learning/automatic_differentiation.py)
* [Data Transformations](machine_learning/data_transformations.py)
* [Decision Tree](machine_learning/decision_tree.py)
* [Dimensionality Reduction](machine_learning/dimensionality_reduction.py)
* Forecasting
* [Run](machine_learning/forecasting/run.py)
* [Frequent Pattern Growth](machine_learning/frequent_pattern_growth.py)
* [Gradient Boosting Classifier](machine_learning/gradient_boosting_classifier.py)
* [Gradient Descent](machine_learning/gradient_descent.py)
* [K Means Clust](machine_learning/k_means_clust.py)
* [K Nearest Neighbours](machine_learning/k_nearest_neighbours.py)
* [Linear Discriminant Analysis](machine_learning/linear_discriminant_analysis.py)
* [Linear Regression](machine_learning/linear_regression.py)
* Local Weighted Learning
* [Local Weighted Learning](machine_learning/local_weighted_learning/local_weighted_learning.py)
* [Logistic Regression](machine_learning/logistic_regression.py)
* [Loss Functions](machine_learning/loss_functions.py)
* Lstm
* [Lstm Prediction](machine_learning/lstm/lstm_prediction.py)
* [Mfcc](machine_learning/mfcc.py)
* [Multilayer Perceptron Classifier](machine_learning/multilayer_perceptron_classifier.py)
* [Polynomial Regression](machine_learning/polynomial_regression.py)
* [Principle Component Analysis](machine_learning/principle_component_analysis.py)
* [Scoring Functions](machine_learning/scoring_functions.py)
* [Self Organizing Map](machine_learning/self_organizing_map.py)
* [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py)
* [Similarity Search](machine_learning/similarity_search.py)
* [Support Vector Machines](machine_learning/support_vector_machines.py)
* [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py)
* [Word Frequency Functions](machine_learning/word_frequency_functions.py)
* [Xgboost Classifier](machine_learning/xgboost_classifier.py)
* [Xgboost Regressor](machine_learning/xgboost_regressor.py)
## Maths
* [Abs](maths/abs.py)
* [Addition Without Arithmetic](maths/addition_without_arithmetic.py)
* [Aliquot Sum](maths/aliquot_sum.py)
* [Allocation Number](maths/allocation_number.py)
* [Arc Length](maths/arc_length.py)
* [Area](maths/area.py)
* [Area Under Curve](maths/area_under_curve.py)
* [Average Absolute Deviation](maths/average_absolute_deviation.py)
* [Average Mean](maths/average_mean.py)
* [Average Median](maths/average_median.py)
* [Average Mode](maths/average_mode.py)
* [Bailey Borwein Plouffe](maths/bailey_borwein_plouffe.py)
* [Base Neg2 Conversion](maths/base_neg2_conversion.py)
* [Basic Maths](maths/basic_maths.py)
* [Binary Exponentiation](maths/binary_exponentiation.py)
* [Binary Multiplication](maths/binary_multiplication.py)
* [Binomial Coefficient](maths/binomial_coefficient.py)
* [Binomial Distribution](maths/binomial_distribution.py)
* [Ceil](maths/ceil.py)
* [Chebyshev Distance](maths/chebyshev_distance.py)
* [Check Polygon](maths/check_polygon.py)
* [Chinese Remainder Theorem](maths/chinese_remainder_theorem.py)
* [Chudnovsky Algorithm](maths/chudnovsky_algorithm.py)
* [Collatz Sequence](maths/collatz_sequence.py)
* [Combinations](maths/combinations.py)
* [Continued Fraction](maths/continued_fraction.py)
* [Decimal Isolate](maths/decimal_isolate.py)
* [Decimal To Fraction](maths/decimal_to_fraction.py)
* [Dodecahedron](maths/dodecahedron.py)
* [Double Factorial](maths/double_factorial.py)
* [Dual Number Automatic Differentiation](maths/dual_number_automatic_differentiation.py)
* [Entropy](maths/entropy.py)
* [Euclidean Distance](maths/euclidean_distance.py)
* [Euler Method](maths/euler_method.py)
* [Euler Modified](maths/euler_modified.py)
* [Eulers Totient](maths/eulers_totient.py)
* [Extended Euclidean Algorithm](maths/extended_euclidean_algorithm.py)
* [Factorial](maths/factorial.py)
* [Factors](maths/factors.py)
* [Fast Inverse Sqrt](maths/fast_inverse_sqrt.py)
* [Fermat Little Theorem](maths/fermat_little_theorem.py)
* [Fibonacci](maths/fibonacci.py)
* [Find Max](maths/find_max.py)
* [Find Min](maths/find_min.py)
* [Floor](maths/floor.py)
* [Gamma](maths/gamma.py)
* [Gaussian](maths/gaussian.py)
* [Gcd Of N Numbers](maths/gcd_of_n_numbers.py)
* [Geometric Mean](maths/geometric_mean.py)
* [Germain Primes](maths/germain_primes.py)
* [Greatest Common Divisor](maths/greatest_common_divisor.py)
* [Hardy Ramanujanalgo](maths/hardy_ramanujanalgo.py)
* [Integer Square Root](maths/integer_square_root.py)
* [Interquartile Range](maths/interquartile_range.py)
* [Is Int Palindrome](maths/is_int_palindrome.py)
* [Is Ip V4 Address Valid](maths/is_ip_v4_address_valid.py)
* [Is Square Free](maths/is_square_free.py)
* [Jaccard Similarity](maths/jaccard_similarity.py)
* [Joint Probability Distribution](maths/joint_probability_distribution.py)
* [Josephus Problem](maths/josephus_problem.py)
* [Juggler Sequence](maths/juggler_sequence.py)
* [Karatsuba](maths/karatsuba.py)
* [Kth Lexicographic Permutation](maths/kth_lexicographic_permutation.py)
* [Largest Of Very Large Numbers](maths/largest_of_very_large_numbers.py)
* [Least Common Multiple](maths/least_common_multiple.py)
* [Line Length](maths/line_length.py)
* [Liouville Lambda](maths/liouville_lambda.py)
* [Lucas Lehmer Primality Test](maths/lucas_lehmer_primality_test.py)
* [Lucas Series](maths/lucas_series.py)
* [Maclaurin Series](maths/maclaurin_series.py)
* [Manhattan Distance](maths/manhattan_distance.py)
* [Matrix Exponentiation](maths/matrix_exponentiation.py)
* [Max Sum Sliding Window](maths/max_sum_sliding_window.py)
* [Minkowski Distance](maths/minkowski_distance.py)
* [Mobius Function](maths/mobius_function.py)
* [Modular Division](maths/modular_division.py)
* [Modular Exponential](maths/modular_exponential.py)
* [Monte Carlo](maths/monte_carlo.py)
* [Monte Carlo Dice](maths/monte_carlo_dice.py)
* [Number Of Digits](maths/number_of_digits.py)
* Numerical Analysis
* [Adams Bashforth](maths/numerical_analysis/adams_bashforth.py)
* [Bisection](maths/numerical_analysis/bisection.py)
* [Bisection 2](maths/numerical_analysis/bisection_2.py)
* [Integration By Simpson Approx](maths/numerical_analysis/integration_by_simpson_approx.py)
* [Intersection](maths/numerical_analysis/intersection.py)
* [Nevilles Method](maths/numerical_analysis/nevilles_method.py)
* [Newton Forward Interpolation](maths/numerical_analysis/newton_forward_interpolation.py)
* [Newton Raphson](maths/numerical_analysis/newton_raphson.py)
* [Numerical Integration](maths/numerical_analysis/numerical_integration.py)
* [Proper Fractions](maths/numerical_analysis/proper_fractions.py)
* [Runge Kutta](maths/numerical_analysis/runge_kutta.py)
* [Runge Kutta Fehlberg 45](maths/numerical_analysis/runge_kutta_fehlberg_45.py)
* [Runge Kutta Gills](maths/numerical_analysis/runge_kutta_gills.py)
* [Secant Method](maths/numerical_analysis/secant_method.py)
* [Simpson Rule](maths/numerical_analysis/simpson_rule.py)
* [Square Root](maths/numerical_analysis/square_root.py)
* [Weierstrass Method](maths/numerical_analysis/weierstrass_method.py)
* [Odd Sieve](maths/odd_sieve.py)
* [Perfect Cube](maths/perfect_cube.py)
* [Perfect Number](maths/perfect_number.py)
* [Perfect Square](maths/perfect_square.py)
* [Persistence](maths/persistence.py)
* [Pi Generator](maths/pi_generator.py)
* [Pi Monte Carlo Estimation](maths/pi_monte_carlo_estimation.py)
* [Points Are Collinear 3D](maths/points_are_collinear_3d.py)
* [Pollard Rho](maths/pollard_rho.py)
* [Polynomial Evaluation](maths/polynomial_evaluation.py)
* Polynomials
* [Single Indeterminate Operations](maths/polynomials/single_indeterminate_operations.py)
* [Power Using Recursion](maths/power_using_recursion.py)
* [Prime Check](maths/prime_check.py)
* [Prime Factors](maths/prime_factors.py)
* [Prime Numbers](maths/prime_numbers.py)
* [Prime Sieve Eratosthenes](maths/prime_sieve_eratosthenes.py)
* [Primelib](maths/primelib.py)
* [Print Multiplication Table](maths/print_multiplication_table.py)
* [Pythagoras](maths/pythagoras.py)
* [Qr Decomposition](maths/qr_decomposition.py)
* [Quadratic Equations Complex Numbers](maths/quadratic_equations_complex_numbers.py)
* [Radians](maths/radians.py)
* [Radix2 Fft](maths/radix2_fft.py)
* [Remove Digit](maths/remove_digit.py)
* [Segmented Sieve](maths/segmented_sieve.py)
* Series
* [Arithmetic](maths/series/arithmetic.py)
* [Geometric](maths/series/geometric.py)
* [Geometric Series](maths/series/geometric_series.py)
* [Harmonic](maths/series/harmonic.py)
* [Harmonic Series](maths/series/harmonic_series.py)
* [Hexagonal Numbers](maths/series/hexagonal_numbers.py)
* [P Series](maths/series/p_series.py)
* [Sieve Of Eratosthenes](maths/sieve_of_eratosthenes.py)
* [Sigmoid](maths/sigmoid.py)
* [Signum](maths/signum.py)
* [Simultaneous Linear Equation Solver](maths/simultaneous_linear_equation_solver.py)
* [Sin](maths/sin.py)
* [Sock Merchant](maths/sock_merchant.py)
* [Softmax](maths/softmax.py)
* [Solovay Strassen Primality Test](maths/solovay_strassen_primality_test.py)
* [Spearman Rank Correlation Coefficient](maths/spearman_rank_correlation_coefficient.py)
* Special Numbers
* [Armstrong Numbers](maths/special_numbers/armstrong_numbers.py)
* [Automorphic Number](maths/special_numbers/automorphic_number.py)
* [Bell Numbers](maths/special_numbers/bell_numbers.py)
* [Carmichael Number](maths/special_numbers/carmichael_number.py)
* [Catalan Number](maths/special_numbers/catalan_number.py)
* [Hamming Numbers](maths/special_numbers/hamming_numbers.py)
* [Happy Number](maths/special_numbers/happy_number.py)
* [Harshad Numbers](maths/special_numbers/harshad_numbers.py)
* [Hexagonal Number](maths/special_numbers/hexagonal_number.py)
* [Krishnamurthy Number](maths/special_numbers/krishnamurthy_number.py)
* [Perfect Number](maths/special_numbers/perfect_number.py)
* [Polygonal Numbers](maths/special_numbers/polygonal_numbers.py)
* [Pronic Number](maths/special_numbers/pronic_number.py)
* [Proth Number](maths/special_numbers/proth_number.py)
* [Triangular Numbers](maths/special_numbers/triangular_numbers.py)
* [Ugly Numbers](maths/special_numbers/ugly_numbers.py)
* [Weird Number](maths/special_numbers/weird_number.py)
* [Sum Of Arithmetic Series](maths/sum_of_arithmetic_series.py)
* [Sum Of Digits](maths/sum_of_digits.py)
* [Sum Of Geometric Progression](maths/sum_of_geometric_progression.py)
* [Sum Of Harmonic Series](maths/sum_of_harmonic_series.py)
* [Sumset](maths/sumset.py)
* [Sylvester Sequence](maths/sylvester_sequence.py)
* [Tanh](maths/tanh.py)
* [Test Factorial](maths/test_factorial.py)
* [Test Prime Check](maths/test_prime_check.py)
* [Three Sum](maths/three_sum.py)
* [Trapezoidal Rule](maths/trapezoidal_rule.py)
* [Triplet Sum](maths/triplet_sum.py)
* [Twin Prime](maths/twin_prime.py)
* [Two Pointer](maths/two_pointer.py)
* [Two Sum](maths/two_sum.py)
* [Volume](maths/volume.py)
* [Zellers Congruence](maths/zellers_congruence.py)
## Matrix
* [Binary Search Matrix](matrix/binary_search_matrix.py)
* [Count Islands In Matrix](matrix/count_islands_in_matrix.py)
* [Count Negative Numbers In Sorted Matrix](matrix/count_negative_numbers_in_sorted_matrix.py)
* [Count Paths](matrix/count_paths.py)
* [Cramers Rule 2X2](matrix/cramers_rule_2x2.py)
* [Inverse Of Matrix](matrix/inverse_of_matrix.py)
* [Largest Square Area In Matrix](matrix/largest_square_area_in_matrix.py)
* [Matrix Based Game](matrix/matrix_based_game.py)
* [Matrix Class](matrix/matrix_class.py)
* [Matrix Equalization](matrix/matrix_equalization.py)
* [Matrix Multiplication Recursion](matrix/matrix_multiplication_recursion.py)
* [Matrix Operation](matrix/matrix_operation.py)
* [Max Area Of Island](matrix/max_area_of_island.py)
* [Median Matrix](matrix/median_matrix.py)
* [Nth Fibonacci Using Matrix Exponentiation](matrix/nth_fibonacci_using_matrix_exponentiation.py)
* [Pascal Triangle](matrix/pascal_triangle.py)
* [Rotate Matrix](matrix/rotate_matrix.py)
* [Searching In Sorted Matrix](matrix/searching_in_sorted_matrix.py)
* [Sherman Morrison](matrix/sherman_morrison.py)
* [Spiral Print](matrix/spiral_print.py)
* Tests
* [Test Matrix Operation](matrix/tests/test_matrix_operation.py)
* [Validate Sudoku Board](matrix/validate_sudoku_board.py)
## Networking Flow
* [Ford Fulkerson](networking_flow/ford_fulkerson.py)
* [Minimum Cut](networking_flow/minimum_cut.py)
## Neural Network
* Activation Functions
* [Binary Step](neural_network/activation_functions/binary_step.py)
* [Exponential Linear Unit](neural_network/activation_functions/exponential_linear_unit.py)
* [Gaussian Error Linear Unit](neural_network/activation_functions/gaussian_error_linear_unit.py)
* [Leaky Rectified Linear Unit](neural_network/activation_functions/leaky_rectified_linear_unit.py)
* [Mish](neural_network/activation_functions/mish.py)
* [Rectified Linear Unit](neural_network/activation_functions/rectified_linear_unit.py)
* [Scaled Exponential Linear Unit](neural_network/activation_functions/scaled_exponential_linear_unit.py)
* [Soboleva Modified Hyperbolic Tangent](neural_network/activation_functions/soboleva_modified_hyperbolic_tangent.py)
* [Softplus](neural_network/activation_functions/softplus.py)
* [Squareplus](neural_network/activation_functions/squareplus.py)
* [Swish](neural_network/activation_functions/swish.py)
* [Back Propagation Neural Network](neural_network/back_propagation_neural_network.py)
* [Convolution Neural Network](neural_network/convolution_neural_network.py)
* [Input Data](neural_network/input_data.py)
* [Simple Neural Network](neural_network/simple_neural_network.py)
* [Two Hidden Layers Neural Network](neural_network/two_hidden_layers_neural_network.py)
## Other
* [Activity Selection](other/activity_selection.py)
* [Alternative List Arrange](other/alternative_list_arrange.py)
* [Bankers Algorithm](other/bankers_algorithm.py)
* [Davis Putnam Logemann Loveland](other/davis_putnam_logemann_loveland.py)
* [Doomsday](other/doomsday.py)
* [Fischer Yates Shuffle](other/fischer_yates_shuffle.py)
* [Gauss Easter](other/gauss_easter.py)
* [Graham Scan](other/graham_scan.py)
* [Greedy](other/greedy.py)
* [Guess The Number Search](other/guess_the_number_search.py)
* [H Index](other/h_index.py)
* [Least Recently Used](other/least_recently_used.py)
* [Lfu Cache](other/lfu_cache.py)
* [Linear Congruential Generator](other/linear_congruential_generator.py)
* [Lru Cache](other/lru_cache.py)
* [Magicdiamondpattern](other/magicdiamondpattern.py)
* [Majority Vote Algorithm](other/majority_vote_algorithm.py)
* [Maximum Subsequence](other/maximum_subsequence.py)
* [Nested Brackets](other/nested_brackets.py)
* [Number Container System](other/number_container_system.py)
* [Password](other/password.py)
* [Quine](other/quine.py)
* [Scoring Algorithm](other/scoring_algorithm.py)
* [Sdes](other/sdes.py)
* [Sliding Window Maximum](other/sliding_window_maximum.py)
* [Tower Of Hanoi](other/tower_of_hanoi.py)
* [Word Search](other/word_search.py)
## Physics
* [Altitude Pressure](physics/altitude_pressure.py)
* [Archimedes Principle Of Buoyant Force](physics/archimedes_principle_of_buoyant_force.py)
* [Basic Orbital Capture](physics/basic_orbital_capture.py)
* [Casimir Effect](physics/casimir_effect.py)
* [Center Of Mass](physics/center_of_mass.py)
* [Centripetal Force](physics/centripetal_force.py)
* [Coulombs Law](physics/coulombs_law.py)
* [Doppler Frequency](physics/doppler_frequency.py)
* [Escape Velocity](physics/escape_velocity.py)
* [Grahams Law](physics/grahams_law.py)
* [Horizontal Projectile Motion](physics/horizontal_projectile_motion.py)
* [Hubble Parameter](physics/hubble_parameter.py)
* [Ideal Gas Law](physics/ideal_gas_law.py)
* [In Static Equilibrium](physics/in_static_equilibrium.py)
* [Kinetic Energy](physics/kinetic_energy.py)
* [Lens Formulae](physics/lens_formulae.py)
* [Lorentz Transformation Four Vector](physics/lorentz_transformation_four_vector.py)
* [Malus Law](physics/malus_law.py)
* [Mass Energy Equivalence](physics/mass_energy_equivalence.py)
* [Mirror Formulae](physics/mirror_formulae.py)
* [N Body Simulation](physics/n_body_simulation.py)
* [Newtons Law Of Gravitation](physics/newtons_law_of_gravitation.py)
* [Newtons Second Law Of Motion](physics/newtons_second_law_of_motion.py)
* [Orbital Transfer Work](physics/orbital_transfer_work.py)
* [Period Of Pendulum](physics/period_of_pendulum.py)
* [Photoelectric Effect](physics/photoelectric_effect.py)
* [Potential Energy](physics/potential_energy.py)
* [Rainfall Intensity](physics/rainfall_intensity.py)
* [Reynolds Number](physics/reynolds_number.py)
* [Rms Speed Of Molecule](physics/rms_speed_of_molecule.py)
* [Shear Stress](physics/shear_stress.py)
* [Speed Of Sound](physics/speed_of_sound.py)
* [Speeds Of Gas Molecules](physics/speeds_of_gas_molecules.py)
* [Terminal Velocity](physics/terminal_velocity.py)
## Project Euler
* Problem 001
* [Sol1](project_euler/problem_001/sol1.py)
* [Sol2](project_euler/problem_001/sol2.py)
* [Sol3](project_euler/problem_001/sol3.py)
* [Sol4](project_euler/problem_001/sol4.py)
* [Sol5](project_euler/problem_001/sol5.py)
* [Sol6](project_euler/problem_001/sol6.py)
* [Sol7](project_euler/problem_001/sol7.py)
* Problem 002
* [Sol1](project_euler/problem_002/sol1.py)
* [Sol2](project_euler/problem_002/sol2.py)
* [Sol3](project_euler/problem_002/sol3.py)
* [Sol4](project_euler/problem_002/sol4.py)
* [Sol5](project_euler/problem_002/sol5.py)
* Problem 003
* [Sol1](project_euler/problem_003/sol1.py)
* [Sol2](project_euler/problem_003/sol2.py)
* [Sol3](project_euler/problem_003/sol3.py)
* Problem 004
* [Sol1](project_euler/problem_004/sol1.py)
* [Sol2](project_euler/problem_004/sol2.py)
* Problem 005
* [Sol1](project_euler/problem_005/sol1.py)
* [Sol2](project_euler/problem_005/sol2.py)
* Problem 006
* [Sol1](project_euler/problem_006/sol1.py)
* [Sol2](project_euler/problem_006/sol2.py)
* [Sol3](project_euler/problem_006/sol3.py)
* [Sol4](project_euler/problem_006/sol4.py)
* Problem 007
* [Sol1](project_euler/problem_007/sol1.py)
* [Sol2](project_euler/problem_007/sol2.py)
* [Sol3](project_euler/problem_007/sol3.py)
* Problem 008
* [Sol1](project_euler/problem_008/sol1.py)
* [Sol2](project_euler/problem_008/sol2.py)
* [Sol3](project_euler/problem_008/sol3.py)
* Problem 009
* [Sol1](project_euler/problem_009/sol1.py)
* [Sol2](project_euler/problem_009/sol2.py)
* [Sol3](project_euler/problem_009/sol3.py)
* [Sol4](project_euler/problem_009/sol4.py)
* Problem 010
* [Sol1](project_euler/problem_010/sol1.py)
* [Sol2](project_euler/problem_010/sol2.py)
* [Sol3](project_euler/problem_010/sol3.py)
* Problem 011
* [Sol1](project_euler/problem_011/sol1.py)
* [Sol2](project_euler/problem_011/sol2.py)
* Problem 012
* [Sol1](project_euler/problem_012/sol1.py)
* [Sol2](project_euler/problem_012/sol2.py)
* Problem 013
* [Sol1](project_euler/problem_013/sol1.py)
* Problem 014
* [Sol1](project_euler/problem_014/sol1.py)
* [Sol2](project_euler/problem_014/sol2.py)
* Problem 015
* [Sol1](project_euler/problem_015/sol1.py)
* [Sol2](project_euler/problem_015/sol2.py)
* Problem 016
* [Sol1](project_euler/problem_016/sol1.py)
* [Sol2](project_euler/problem_016/sol2.py)
* Problem 017
* [Sol1](project_euler/problem_017/sol1.py)
* Problem 018
* [Solution](project_euler/problem_018/solution.py)
* Problem 019
* [Sol1](project_euler/problem_019/sol1.py)
* Problem 020
* [Sol1](project_euler/problem_020/sol1.py)
* [Sol2](project_euler/problem_020/sol2.py)
* [Sol3](project_euler/problem_020/sol3.py)
* [Sol4](project_euler/problem_020/sol4.py)
* Problem 021
* [Sol1](project_euler/problem_021/sol1.py)
* Problem 022
* [Sol1](project_euler/problem_022/sol1.py)
* [Sol2](project_euler/problem_022/sol2.py)
* Problem 023
* [Sol1](project_euler/problem_023/sol1.py)
* Problem 024
* [Sol1](project_euler/problem_024/sol1.py)
* Problem 025
* [Sol1](project_euler/problem_025/sol1.py)
* [Sol2](project_euler/problem_025/sol2.py)
* [Sol3](project_euler/problem_025/sol3.py)
* Problem 026
* [Sol1](project_euler/problem_026/sol1.py)
* Problem 027
* [Sol1](project_euler/problem_027/sol1.py)
* Problem 028
* [Sol1](project_euler/problem_028/sol1.py)
* Problem 029
* [Sol1](project_euler/problem_029/sol1.py)
* Problem 030
* [Sol1](project_euler/problem_030/sol1.py)
* Problem 031
* [Sol1](project_euler/problem_031/sol1.py)
* [Sol2](project_euler/problem_031/sol2.py)
* Problem 032
* [Sol32](project_euler/problem_032/sol32.py)
* Problem 033
* [Sol1](project_euler/problem_033/sol1.py)
* Problem 034
* [Sol1](project_euler/problem_034/sol1.py)
* Problem 035
* [Sol1](project_euler/problem_035/sol1.py)
* Problem 036
* [Sol1](project_euler/problem_036/sol1.py)
* Problem 037
* [Sol1](project_euler/problem_037/sol1.py)
* Problem 038
* [Sol1](project_euler/problem_038/sol1.py)
* Problem 039
* [Sol1](project_euler/problem_039/sol1.py)
* Problem 040
* [Sol1](project_euler/problem_040/sol1.py)
* Problem 041
* [Sol1](project_euler/problem_041/sol1.py)
* Problem 042
* [Solution42](project_euler/problem_042/solution42.py)
* Problem 043
* [Sol1](project_euler/problem_043/sol1.py)
* Problem 044
* [Sol1](project_euler/problem_044/sol1.py)
* Problem 045
* [Sol1](project_euler/problem_045/sol1.py)
* Problem 046
* [Sol1](project_euler/problem_046/sol1.py)
* Problem 047
* [Sol1](project_euler/problem_047/sol1.py)
* Problem 048
* [Sol1](project_euler/problem_048/sol1.py)
* Problem 049
* [Sol1](project_euler/problem_049/sol1.py)
* Problem 050
* [Sol1](project_euler/problem_050/sol1.py)
* Problem 051
* [Sol1](project_euler/problem_051/sol1.py)
* Problem 052
* [Sol1](project_euler/problem_052/sol1.py)
* Problem 053
* [Sol1](project_euler/problem_053/sol1.py)
* Problem 054
* [Sol1](project_euler/problem_054/sol1.py)
* [Test Poker Hand](project_euler/problem_054/test_poker_hand.py)
* Problem 055
* [Sol1](project_euler/problem_055/sol1.py)
* Problem 056
* [Sol1](project_euler/problem_056/sol1.py)
* Problem 057
* [Sol1](project_euler/problem_057/sol1.py)
* Problem 058
* [Sol1](project_euler/problem_058/sol1.py)
* Problem 059
* [Sol1](project_euler/problem_059/sol1.py)
* Problem 062
* [Sol1](project_euler/problem_062/sol1.py)
* Problem 063
* [Sol1](project_euler/problem_063/sol1.py)
* Problem 064
* [Sol1](project_euler/problem_064/sol1.py)
* Problem 065
* [Sol1](project_euler/problem_065/sol1.py)
* Problem 067
* [Sol1](project_euler/problem_067/sol1.py)
* [Sol2](project_euler/problem_067/sol2.py)
* Problem 068
* [Sol1](project_euler/problem_068/sol1.py)
* Problem 069
* [Sol1](project_euler/problem_069/sol1.py)
* Problem 070
* [Sol1](project_euler/problem_070/sol1.py)
* Problem 071
* [Sol1](project_euler/problem_071/sol1.py)
* Problem 072
* [Sol1](project_euler/problem_072/sol1.py)
* [Sol2](project_euler/problem_072/sol2.py)
* Problem 073
* [Sol1](project_euler/problem_073/sol1.py)
* Problem 074
* [Sol1](project_euler/problem_074/sol1.py)
* [Sol2](project_euler/problem_074/sol2.py)
* Problem 075
* [Sol1](project_euler/problem_075/sol1.py)
* Problem 076
* [Sol1](project_euler/problem_076/sol1.py)
* Problem 077
* [Sol1](project_euler/problem_077/sol1.py)
* Problem 078
* [Sol1](project_euler/problem_078/sol1.py)
* Problem 079
* [Sol1](project_euler/problem_079/sol1.py)
* Problem 080
* [Sol1](project_euler/problem_080/sol1.py)
* Problem 081
* [Sol1](project_euler/problem_081/sol1.py)
* Problem 082
* [Sol1](project_euler/problem_082/sol1.py)
* Problem 085
* [Sol1](project_euler/problem_085/sol1.py)
* Problem 086
* [Sol1](project_euler/problem_086/sol1.py)
* Problem 087
* [Sol1](project_euler/problem_087/sol1.py)
* Problem 089
* [Sol1](project_euler/problem_089/sol1.py)
* Problem 091
* [Sol1](project_euler/problem_091/sol1.py)
* Problem 092
* [Sol1](project_euler/problem_092/sol1.py)
* Problem 094
* [Sol1](project_euler/problem_094/sol1.py)
* Problem 095
* [Sol1](project_euler/problem_095/sol1.py)
* Problem 097
* [Sol1](project_euler/problem_097/sol1.py)
* Problem 099
* [Sol1](project_euler/problem_099/sol1.py)
* Problem 100
* [Sol1](project_euler/problem_100/sol1.py)
* Problem 101
* [Sol1](project_euler/problem_101/sol1.py)
* Problem 102
* [Sol1](project_euler/problem_102/sol1.py)
* Problem 104
* [Sol1](project_euler/problem_104/sol1.py)
* Problem 107
* [Sol1](project_euler/problem_107/sol1.py)
* Problem 109
* [Sol1](project_euler/problem_109/sol1.py)
* Problem 112
* [Sol1](project_euler/problem_112/sol1.py)
* Problem 113
* [Sol1](project_euler/problem_113/sol1.py)
* Problem 114
* [Sol1](project_euler/problem_114/sol1.py)
* Problem 115
* [Sol1](project_euler/problem_115/sol1.py)
* Problem 116
* [Sol1](project_euler/problem_116/sol1.py)
* Problem 117
* [Sol1](project_euler/problem_117/sol1.py)
* Problem 119
* [Sol1](project_euler/problem_119/sol1.py)
* Problem 120
* [Sol1](project_euler/problem_120/sol1.py)
* Problem 121
* [Sol1](project_euler/problem_121/sol1.py)
* Problem 122
* [Sol1](project_euler/problem_122/sol1.py)
* Problem 123
* [Sol1](project_euler/problem_123/sol1.py)
* Problem 125
* [Sol1](project_euler/problem_125/sol1.py)
* Problem 129
* [Sol1](project_euler/problem_129/sol1.py)
* Problem 131
* [Sol1](project_euler/problem_131/sol1.py)
* Problem 135
* [Sol1](project_euler/problem_135/sol1.py)
* Problem 136
* [Sol1](project_euler/problem_136/sol1.py)
* Problem 144
* [Sol1](project_euler/problem_144/sol1.py)
* Problem 145
* [Sol1](project_euler/problem_145/sol1.py)
* Problem 164
* [Sol1](project_euler/problem_164/sol1.py)
* Problem 173
* [Sol1](project_euler/problem_173/sol1.py)
* Problem 174
* [Sol1](project_euler/problem_174/sol1.py)
* Problem 180
* [Sol1](project_euler/problem_180/sol1.py)
* Problem 187
* [Sol1](project_euler/problem_187/sol1.py)
* Problem 188
* [Sol1](project_euler/problem_188/sol1.py)
* Problem 190
* [Sol1](project_euler/problem_190/sol1.py)
* Problem 191
* [Sol1](project_euler/problem_191/sol1.py)
* Problem 203
* [Sol1](project_euler/problem_203/sol1.py)
* Problem 205
* [Sol1](project_euler/problem_205/sol1.py)
* Problem 206
* [Sol1](project_euler/problem_206/sol1.py)
* Problem 207
* [Sol1](project_euler/problem_207/sol1.py)
* Problem 234
* [Sol1](project_euler/problem_234/sol1.py)
* Problem 301
* [Sol1](project_euler/problem_301/sol1.py)
* Problem 345
* [Sol1](project_euler/problem_345/sol1.py)
* Problem 493
* [Sol1](project_euler/problem_493/sol1.py)
* Problem 551
* [Sol1](project_euler/problem_551/sol1.py)
* Problem 587
* [Sol1](project_euler/problem_587/sol1.py)
* Problem 686
* [Sol1](project_euler/problem_686/sol1.py)
* Problem 800
* [Sol1](project_euler/problem_800/sol1.py)
## Quantum
* [Q Fourier Transform](quantum/q_fourier_transform.py)
## Scheduling
* [First Come First Served](scheduling/first_come_first_served.py)
* [Highest Response Ratio Next](scheduling/highest_response_ratio_next.py)
* [Job Sequence With Deadline](scheduling/job_sequence_with_deadline.py)
* [Job Sequencing With Deadline](scheduling/job_sequencing_with_deadline.py)
* [Multi Level Feedback Queue](scheduling/multi_level_feedback_queue.py)
* [Non Preemptive Shortest Job First](scheduling/non_preemptive_shortest_job_first.py)
* [Round Robin](scheduling/round_robin.py)
* [Shortest Job First](scheduling/shortest_job_first.py)
## Searches
* [Binary Search](searches/binary_search.py)
* [Binary Tree Traversal](searches/binary_tree_traversal.py)
* [Double Linear Search](searches/double_linear_search.py)
* [Double Linear Search Recursion](searches/double_linear_search_recursion.py)
* [Exponential Search](searches/exponential_search.py)
* [Fibonacci Search](searches/fibonacci_search.py)
* [Hill Climbing](searches/hill_climbing.py)
* [Interpolation Search](searches/interpolation_search.py)
* [Jump Search](searches/jump_search.py)
* [Linear Search](searches/linear_search.py)
* [Median Of Medians](searches/median_of_medians.py)
* [Quick Select](searches/quick_select.py)
* [Sentinel Linear Search](searches/sentinel_linear_search.py)
* [Simple Binary Search](searches/simple_binary_search.py)
* [Simulated Annealing](searches/simulated_annealing.py)
* [Tabu Search](searches/tabu_search.py)
* [Ternary Search](searches/ternary_search.py)
## Sorts
* [Bead Sort](sorts/bead_sort.py)
* [Binary Insertion Sort](sorts/binary_insertion_sort.py)
* [Bitonic Sort](sorts/bitonic_sort.py)
* [Bogo Sort](sorts/bogo_sort.py)
* [Bubble Sort](sorts/bubble_sort.py)
* [Bucket Sort](sorts/bucket_sort.py)
* [Circle Sort](sorts/circle_sort.py)
* [Cocktail Shaker Sort](sorts/cocktail_shaker_sort.py)
* [Comb Sort](sorts/comb_sort.py)
* [Counting Sort](sorts/counting_sort.py)
* [Cycle Sort](sorts/cycle_sort.py)
* [Cyclic Sort](sorts/cyclic_sort.py)
* [Double Sort](sorts/double_sort.py)
* [Dutch National Flag Sort](sorts/dutch_national_flag_sort.py)
* [Exchange Sort](sorts/exchange_sort.py)
* [External Sort](sorts/external_sort.py)
* [Gnome Sort](sorts/gnome_sort.py)
* [Heap Sort](sorts/heap_sort.py)
* [Insertion Sort](sorts/insertion_sort.py)
* [Intro Sort](sorts/intro_sort.py)
* [Iterative Merge Sort](sorts/iterative_merge_sort.py)
* [Merge Insertion Sort](sorts/merge_insertion_sort.py)
* [Merge Sort](sorts/merge_sort.py)
* [Msd Radix Sort](sorts/msd_radix_sort.py)
* [Natural Sort](sorts/natural_sort.py)
* [Odd Even Sort](sorts/odd_even_sort.py)
* [Odd Even Transposition Parallel](sorts/odd_even_transposition_parallel.py)
* [Odd Even Transposition Single Threaded](sorts/odd_even_transposition_single_threaded.py)
* [Pancake Sort](sorts/pancake_sort.py)
* [Patience Sort](sorts/patience_sort.py)
* [Pigeon Sort](sorts/pigeon_sort.py)
* [Pigeonhole Sort](sorts/pigeonhole_sort.py)
* [Quick Sort](sorts/quick_sort.py)
* [Quick Sort 3 Partition](sorts/quick_sort_3_partition.py)
* [Radix Sort](sorts/radix_sort.py)
* [Recursive Insertion Sort](sorts/recursive_insertion_sort.py)
* [Recursive Mergesort Array](sorts/recursive_mergesort_array.py)
* [Recursive Quick Sort](sorts/recursive_quick_sort.py)
* [Selection Sort](sorts/selection_sort.py)
* [Shell Sort](sorts/shell_sort.py)
* [Shrink Shell Sort](sorts/shrink_shell_sort.py)
* [Slowsort](sorts/slowsort.py)
* [Stalin Sort](sorts/stalin_sort.py)
* [Stooge Sort](sorts/stooge_sort.py)
* [Strand Sort](sorts/strand_sort.py)
* [Tim Sort](sorts/tim_sort.py)
* [Topological Sort](sorts/topological_sort.py)
* [Tree Sort](sorts/tree_sort.py)
* [Unknown Sort](sorts/unknown_sort.py)
* [Wiggle Sort](sorts/wiggle_sort.py)
## Strings
* [Aho Corasick](strings/aho_corasick.py)
* [Alternative String Arrange](strings/alternative_string_arrange.py)
* [Anagrams](strings/anagrams.py)
* [Autocomplete Using Trie](strings/autocomplete_using_trie.py)
* [Barcode Validator](strings/barcode_validator.py)
* [Bitap String Match](strings/bitap_string_match.py)
* [Boyer Moore Search](strings/boyer_moore_search.py)
* [Camel Case To Snake Case](strings/camel_case_to_snake_case.py)
* [Can String Be Rearranged As Palindrome](strings/can_string_be_rearranged_as_palindrome.py)
* [Capitalize](strings/capitalize.py)
* [Check Anagrams](strings/check_anagrams.py)
* [Count Vowels](strings/count_vowels.py)
* [Credit Card Validator](strings/credit_card_validator.py)
* [Damerau Levenshtein Distance](strings/damerau_levenshtein_distance.py)
* [Detecting English Programmatically](strings/detecting_english_programmatically.py)
* [Dna](strings/dna.py)
* [Edit Distance](strings/edit_distance.py)
* [Frequency Finder](strings/frequency_finder.py)
* [Hamming Distance](strings/hamming_distance.py)
* [Indian Phone Validator](strings/indian_phone_validator.py)
* [Is Contains Unique Chars](strings/is_contains_unique_chars.py)
* [Is Isogram](strings/is_isogram.py)
* [Is Pangram](strings/is_pangram.py)
* [Is Polish National Id](strings/is_polish_national_id.py)
* [Is Spain National Id](strings/is_spain_national_id.py)
* [Is Srilankan Phone Number](strings/is_srilankan_phone_number.py)
* [Is Valid Email Address](strings/is_valid_email_address.py)
* [Jaro Winkler](strings/jaro_winkler.py)
* [Join](strings/join.py)
* [Knuth Morris Pratt](strings/knuth_morris_pratt.py)
* [Levenshtein Distance](strings/levenshtein_distance.py)
* [Lower](strings/lower.py)
* [Manacher](strings/manacher.py)
* [Min Cost String Conversion](strings/min_cost_string_conversion.py)
* [Naive String Search](strings/naive_string_search.py)
* [Ngram](strings/ngram.py)
* [Palindrome](strings/palindrome.py)
* [Pig Latin](strings/pig_latin.py)
* [Prefix Function](strings/prefix_function.py)
* [Rabin Karp](strings/rabin_karp.py)
* [Remove Duplicate](strings/remove_duplicate.py)
* [Reverse Letters](strings/reverse_letters.py)
* [Reverse Words](strings/reverse_words.py)
* [Snake Case To Camel Pascal Case](strings/snake_case_to_camel_pascal_case.py)
* [Split](strings/split.py)
* [String Switch Case](strings/string_switch_case.py)
* [Strip](strings/strip.py)
* [Text Justification](strings/text_justification.py)
* [Title](strings/title.py)
* [Top K Frequent Words](strings/top_k_frequent_words.py)
* [Upper](strings/upper.py)
* [Wave String](strings/wave_string.py)
* [Wildcard Pattern Matching](strings/wildcard_pattern_matching.py)
* [Word Occurrence](strings/word_occurrence.py)
* [Word Patterns](strings/word_patterns.py)
* [Z Function](strings/z_function.py)
## Web Programming
* [Co2 Emission](web_programming/co2_emission.py)
* [Covid Stats Via Xpath](web_programming/covid_stats_via_xpath.py)
* [Crawl Google Results](web_programming/crawl_google_results.py)
* [Crawl Google Scholar Citation](web_programming/crawl_google_scholar_citation.py)
* [Currency Converter](web_programming/currency_converter.py)
* [Current Stock Price](web_programming/current_stock_price.py)
* [Current Weather](web_programming/current_weather.py)
* [Daily Horoscope](web_programming/daily_horoscope.py)
* [Download Images From Google Query](web_programming/download_images_from_google_query.py)
* [Emails From Url](web_programming/emails_from_url.py)
* [Fetch Anime And Play](web_programming/fetch_anime_and_play.py)
* [Fetch Bbc News](web_programming/fetch_bbc_news.py)
* [Fetch Github Info](web_programming/fetch_github_info.py)
* [Fetch Jobs](web_programming/fetch_jobs.py)
* [Fetch Quotes](web_programming/fetch_quotes.py)
* [Fetch Well Rx Price](web_programming/fetch_well_rx_price.py)
* [Get Amazon Product Data](web_programming/get_amazon_product_data.py)
* [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py)
* [Get Ip Geolocation](web_programming/get_ip_geolocation.py)
* [Get Top Billionaires](web_programming/get_top_billionaires.py)
* [Get Top Hn Posts](web_programming/get_top_hn_posts.py)
* [Giphy](web_programming/giphy.py)
* [Instagram Crawler](web_programming/instagram_crawler.py)
* [Instagram Pic](web_programming/instagram_pic.py)
* [Instagram Video](web_programming/instagram_video.py)
* [Nasa Data](web_programming/nasa_data.py)
* [Open Google Results](web_programming/open_google_results.py)
* [Random Anime Character](web_programming/random_anime_character.py)
* [Recaptcha Verification](web_programming/recaptcha_verification.py)
* [Reddit](web_programming/reddit.py)
* [Search Books By Isbn](web_programming/search_books_by_isbn.py)
* [Slack Message](web_programming/slack_message.py)
* [Test Fetch Github Info](web_programming/test_fetch_github_info.py)
* [World Covid19 Stats](web_programming/world_covid19_stats.py)
================================================
FILE: LICENSE.md
================================================
## MIT License
Copyright (c) 2016-2022 TheAlgorithms and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
All algorithms implemented in Python - for education 📚
Implementations are for learning purposes only. They may be less efficient than the implementations in the Python standard library. Use them at your discretion.
## 🚀 Getting Started
📋 Read through our [Contribution Guidelines](CONTRIBUTING.md) before you contribute.
## 🌐 Community Channels
We are on [Discord](https://the-algorithms.com/discord) and [Gitter](https://gitter.im/TheAlgorithms/community)! Community channels are a great way for you to ask questions and get help. Please join us!
## 📜 List of Algorithms
See our [directory](DIRECTORY.md) for easier navigation and a better overview of the project.
================================================
FILE: audio_filters/README.md
================================================
# Audio Filter
Audio filters work on the frequency of an audio signal to attenuate unwanted frequency and amplify wanted ones.
They are used within anything related to sound, whether it is radio communication or a hi-fi system.
*
*
*
*
================================================
FILE: audio_filters/__init__.py
================================================
================================================
FILE: audio_filters/butterworth_filter.py
================================================
from math import cos, sin, sqrt, tau
from audio_filters.iir_filter import IIRFilter
"""
Create 2nd-order IIR filters with Butterworth design.
Code based on https://webaudio.github.io/Audio-EQ-Cookbook/audio-eq-cookbook.html
Alternatively you can use scipy.signal.butter, which should yield the same results.
"""
def make_lowpass(
frequency: int,
samplerate: int,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a low-pass filter
>>> filter = make_lowpass(1000, 48000)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[1.0922959556412573, -1.9828897227476208, 0.9077040443587427, 0.004277569313094809,
0.008555138626189618, 0.004277569313094809]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
b0 = (1 - _cos) / 2
b1 = 1 - _cos
a0 = 1 + alpha
a1 = -2 * _cos
a2 = 1 - alpha
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b0])
return filt
def make_highpass(
frequency: int,
samplerate: int,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a high-pass filter
>>> filter = make_highpass(1000, 48000)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[1.0922959556412573, -1.9828897227476208, 0.9077040443587427, 0.9957224306869052,
-1.9914448613738105, 0.9957224306869052]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
b0 = (1 + _cos) / 2
b1 = -1 - _cos
a0 = 1 + alpha
a1 = -2 * _cos
a2 = 1 - alpha
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b0])
return filt
def make_bandpass(
frequency: int,
samplerate: int,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a band-pass filter
>>> filter = make_bandpass(1000, 48000)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[1.0922959556412573, -1.9828897227476208, 0.9077040443587427, 0.06526309611002579,
0, -0.06526309611002579]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
b0 = _sin / 2
b1 = 0
b2 = -b0
a0 = 1 + alpha
a1 = -2 * _cos
a2 = 1 - alpha
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b2])
return filt
def make_allpass(
frequency: int,
samplerate: int,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates an all-pass filter
>>> filter = make_allpass(1000, 48000)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[1.0922959556412573, -1.9828897227476208, 0.9077040443587427, 0.9077040443587427,
-1.9828897227476208, 1.0922959556412573]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
b0 = 1 - alpha
b1 = -2 * _cos
b2 = 1 + alpha
filt = IIRFilter(2)
filt.set_coefficients([b2, b1, b0], [b0, b1, b2])
return filt
def make_peak(
frequency: int,
samplerate: int,
gain_db: float,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a peak filter
>>> filter = make_peak(1000, 48000, 6)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[1.0653405327119334, -1.9828897227476208, 0.9346594672880666, 1.1303715025601122,
-1.9828897227476208, 0.8696284974398878]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
big_a = 10 ** (gain_db / 40)
b0 = 1 + alpha * big_a
b1 = -2 * _cos
b2 = 1 - alpha * big_a
a0 = 1 + alpha / big_a
a1 = -2 * _cos
a2 = 1 - alpha / big_a
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b2])
return filt
def make_lowshelf(
frequency: int,
samplerate: int,
gain_db: float,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a low-shelf filter
>>> filter = make_lowshelf(1000, 48000, 6)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[3.0409336710888786, -5.608870992220748, 2.602157875636628, 3.139954022810743,
-5.591841778072785, 2.5201667380627257]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
big_a = 10 ** (gain_db / 40)
pmc = (big_a + 1) - (big_a - 1) * _cos
ppmc = (big_a + 1) + (big_a - 1) * _cos
mpc = (big_a - 1) - (big_a + 1) * _cos
pmpc = (big_a - 1) + (big_a + 1) * _cos
aa2 = 2 * sqrt(big_a) * alpha
b0 = big_a * (pmc + aa2)
b1 = 2 * big_a * mpc
b2 = big_a * (pmc - aa2)
a0 = ppmc + aa2
a1 = -2 * pmpc
a2 = ppmc - aa2
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b2])
return filt
def make_highshelf(
frequency: int,
samplerate: int,
gain_db: float,
q_factor: float = 1 / sqrt(2),
) -> IIRFilter:
"""
Creates a high-shelf filter
>>> filter = make_highshelf(1000, 48000, 6)
>>> filter.a_coeffs + filter.b_coeffs # doctest: +NORMALIZE_WHITESPACE
[2.2229172136088806, -3.9587208137297303, 1.7841414181566304, 4.295432981120543,
-7.922740859457287, 3.6756456963725253]
"""
w0 = tau * frequency / samplerate
_sin = sin(w0)
_cos = cos(w0)
alpha = _sin / (2 * q_factor)
big_a = 10 ** (gain_db / 40)
pmc = (big_a + 1) - (big_a - 1) * _cos
ppmc = (big_a + 1) + (big_a - 1) * _cos
mpc = (big_a - 1) - (big_a + 1) * _cos
pmpc = (big_a - 1) + (big_a + 1) * _cos
aa2 = 2 * sqrt(big_a) * alpha
b0 = big_a * (ppmc + aa2)
b1 = -2 * big_a * pmpc
b2 = big_a * (ppmc - aa2)
a0 = pmc + aa2
a1 = 2 * mpc
a2 = pmc - aa2
filt = IIRFilter(2)
filt.set_coefficients([a0, a1, a2], [b0, b1, b2])
return filt
================================================
FILE: audio_filters/equal_loudness_filter.py.broken.txt
================================================
from json import loads
from pathlib import Path
import numpy as np
from yulewalker import yulewalk
from audio_filters.butterworth_filter import make_highpass
from audio_filters.iir_filter import IIRFilter
data = loads((Path(__file__).resolve().parent / "loudness_curve.json").read_text())
class EqualLoudnessFilter:
r"""
An equal-loudness filter which compensates for the human ear's non-linear response
to sound.
This filter corrects this by cascading a yulewalk filter and a butterworth filter.
Designed for use with samplerate of 44.1kHz and above. If you're using a lower
samplerate, use with caution.
Code based on matlab implementation at https://bit.ly/3eqh2HU
(url shortened for ruff)
Target curve: https://i.imgur.com/3g2VfaM.png
Yulewalk response: https://i.imgur.com/J9LnJ4C.png
Butterworth and overall response: https://i.imgur.com/3g2VfaM.png
Images and original matlab implementation by David Robinson, 2001
"""
def __init__(self, samplerate: int = 44100) -> None:
self.yulewalk_filter = IIRFilter(10)
self.butterworth_filter = make_highpass(150, samplerate)
# pad the data to nyquist
curve_freqs = np.array(data["frequencies"] + [max(20000.0, samplerate / 2)])
curve_gains = np.array(data["gains"] + [140])
# Convert to angular frequency
freqs_normalized = curve_freqs / samplerate * 2
# Invert the curve and normalize to 0dB
gains_normalized = np.power(10, (np.min(curve_gains) - curve_gains) / 20)
# Scipy's `yulewalk` function is a stub, so we're using the
# `yulewalker` library instead.
# This function computes the coefficients using a least-squares
# fit to the specified curve.
ya, yb = yulewalk(10, freqs_normalized, gains_normalized)
self.yulewalk_filter.set_coefficients(ya, yb)
def process(self, sample: float) -> float:
"""
Process a single sample through both filters
>>> filt = EqualLoudnessFilter()
>>> filt.process(0.0)
0.0
"""
tmp = self.yulewalk_filter.process(sample)
return self.butterworth_filter.process(tmp)
================================================
FILE: audio_filters/iir_filter.py
================================================
from __future__ import annotations
class IIRFilter:
r"""
N-Order IIR filter
Assumes working with float samples normalized on [-1, 1]
---
Implementation details:
Based on the 2nd-order function from
https://en.wikipedia.org/wiki/Digital_biquad_filter,
this generalized N-order function was made.
Using the following transfer function
.. math:: H(z)=\frac{b_{0}+b_{1}z^{-1}+b_{2}z^{-2}+...+b_{k}z^{-k}}
{a_{0}+a_{1}z^{-1}+a_{2}z^{-2}+...+a_{k}z^{-k}}
we can rewrite this to
.. math:: y[n]={\frac{1}{a_{0}}}
\left(\left(b_{0}x[n]+b_{1}x[n-1]+b_{2}x[n-2]+...+b_{k}x[n-k]\right)-
\left(a_{1}y[n-1]+a_{2}y[n-2]+...+a_{k}y[n-k]\right)\right)
"""
def __init__(self, order: int) -> None:
self.order = order
# a_{0} ... a_{k}
self.a_coeffs = [1.0] + [0.0] * order
# b_{0} ... b_{k}
self.b_coeffs = [1.0] + [0.0] * order
# x[n-1] ... x[n-k]
self.input_history = [0.0] * self.order
# y[n-1] ... y[n-k]
self.output_history = [0.0] * self.order
def set_coefficients(self, a_coeffs: list[float], b_coeffs: list[float]) -> None:
"""
Set the coefficients for the IIR filter.
These should both be of size `order` + 1.
:math:`a_0` may be left out, and it will use 1.0 as default value.
This method works well with scipy's filter design functions
>>> # Make a 2nd-order 1000Hz butterworth lowpass filter
>>> import scipy.signal
>>> b_coeffs, a_coeffs = scipy.signal.butter(2, 1000,
... btype='lowpass',
... fs=48000)
>>> filt = IIRFilter(2)
>>> filt.set_coefficients(a_coeffs, b_coeffs)
"""
if len(a_coeffs) < self.order:
a_coeffs = [1.0, *a_coeffs]
if len(a_coeffs) != self.order + 1:
msg = (
f"Expected a_coeffs to have {self.order + 1} elements "
f"for {self.order}-order filter, got {len(a_coeffs)}"
)
raise ValueError(msg)
if len(b_coeffs) != self.order + 1:
msg = (
f"Expected b_coeffs to have {self.order + 1} elements "
f"for {self.order}-order filter, got {len(a_coeffs)}"
)
raise ValueError(msg)
self.a_coeffs = a_coeffs
self.b_coeffs = b_coeffs
def process(self, sample: float) -> float:
"""
Calculate :math:`y[n]`
>>> filt = IIRFilter(2)
>>> filt.process(0)
0.0
"""
result = 0.0
# Start at index 1 and do index 0 at the end.
for i in range(1, self.order + 1):
result += (
self.b_coeffs[i] * self.input_history[i - 1]
- self.a_coeffs[i] * self.output_history[i - 1]
)
result = (result + self.b_coeffs[0] * sample) / self.a_coeffs[0]
self.input_history[1:] = self.input_history[:-1]
self.output_history[1:] = self.output_history[:-1]
self.input_history[0] = sample
self.output_history[0] = result
return result
================================================
FILE: audio_filters/loudness_curve.json
================================================
{
"_comment": "The following is a representative average of the Equal Loudness Contours as measured by Robinson and Dadson, 1956",
"_doi": "10.1088/0508-3443/7/5/302",
"frequencies": [
0,
20,
30,
40,
50,
60,
70,
80,
90,
100,
200,
300,
400,
500,
600,
700,
800,
900,
1000,
1500,
2000,
2500,
3000,
3700,
4000,
5000,
6000,
7000,
8000,
9000,
10000,
12000,
15000,
20000
],
"gains": [
120,
113,
103,
97,
93,
91,
89,
87,
86,
85,
78,
76,
76,
76,
76,
77,
78,
79.5,
80,
79,
77,
74,
71.5,
70,
70.5,
74,
79,
84,
86,
86,
85,
95,
110,
125
]
}
================================================
FILE: audio_filters/show_response.py
================================================
from __future__ import annotations
from abc import abstractmethod
from math import pi
from typing import Protocol
import matplotlib.pyplot as plt
import numpy as np
class FilterType(Protocol):
@abstractmethod
def process(self, sample: float) -> float:
"""
Calculate y[n]
>>> issubclass(FilterType, Protocol)
True
"""
def get_bounds(
fft_results: np.ndarray, samplerate: int
) -> tuple[int | float, int | float]:
"""
Get bounds for printing fft results
>>> import numpy
>>> array = numpy.linspace(-20.0, 20.0, 1000)
>>> get_bounds(array, 1000)
(-20, 20)
"""
lowest = min([-20, np.min(fft_results[1 : samplerate // 2 - 1])])
highest = max([20, np.max(fft_results[1 : samplerate // 2 - 1])])
return lowest, highest
def show_frequency_response(filter_type: FilterType, samplerate: int) -> None:
"""
Show frequency response of a filter
>>> from audio_filters.iir_filter import IIRFilter
>>> filt = IIRFilter(4)
>>> show_frequency_response(filt, 48000)
"""
size = 512
inputs = [1] + [0] * (size - 1)
outputs = [filter_type.process(item) for item in inputs]
filler = [0] * (samplerate - size) # zero-padding
outputs += filler
fft_out = np.abs(np.fft.fft(outputs))
fft_db = 20 * np.log10(fft_out)
# Frequencies on log scale from 24 to nyquist frequency
plt.xlim(24, samplerate / 2 - 1)
plt.xlabel("Frequency (Hz)")
plt.xscale("log")
# Display within reasonable bounds
bounds = get_bounds(fft_db, samplerate)
plt.ylim(max([-80, bounds[0]]), min([80, bounds[1]]))
plt.ylabel("Gain (dB)")
plt.plot(fft_db)
plt.show()
def show_phase_response(filter_type: FilterType, samplerate: int) -> None:
"""
Show phase response of a filter
>>> from audio_filters.iir_filter import IIRFilter
>>> filt = IIRFilter(4)
>>> show_phase_response(filt, 48000)
"""
size = 512
inputs = [1] + [0] * (size - 1)
outputs = [filter_type.process(item) for item in inputs]
filler = [0] * (samplerate - size) # zero-padding
outputs += filler
fft_out = np.angle(np.fft.fft(outputs))
# Frequencies on log scale from 24 to nyquist frequency
plt.xlim(24, samplerate / 2 - 1)
plt.xlabel("Frequency (Hz)")
plt.xscale("log")
plt.ylim(-2 * pi, 2 * pi)
plt.ylabel("Phase shift (Radians)")
plt.plot(np.unwrap(fft_out, -2 * pi))
plt.show()
================================================
FILE: backtracking/README.md
================================================
# Backtracking
Backtracking is a way to speed up the search process by removing candidates when they can't be the solution of a problem.
*
*
*
*
================================================
FILE: backtracking/__init__.py
================================================
================================================
FILE: backtracking/all_combinations.py
================================================
"""
In this problem, we want to determine all possible combinations of k
numbers out of 1 ... n. We use backtracking to solve this problem.
Time complexity: O(C(n,k)) which is O(n choose k) = O((n!/(k! * (n - k)!))),
"""
from __future__ import annotations
from itertools import combinations
def combination_lists(n: int, k: int) -> list[list[int]]:
"""
Generates all possible combinations of k numbers out of 1 ... n using itertools.
>>> combination_lists(n=4, k=2)
[[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]]
"""
return [list(x) for x in combinations(range(1, n + 1), k)]
def generate_all_combinations(n: int, k: int) -> list[list[int]]:
"""
Generates all possible combinations of k numbers out of 1 ... n using backtracking.
>>> generate_all_combinations(n=4, k=2)
[[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]]
>>> generate_all_combinations(n=0, k=0)
[[]]
>>> generate_all_combinations(n=10, k=-1)
Traceback (most recent call last):
...
ValueError: k must not be negative
>>> generate_all_combinations(n=-1, k=10)
Traceback (most recent call last):
...
ValueError: n must not be negative
>>> generate_all_combinations(n=5, k=4)
[[1, 2, 3, 4], [1, 2, 3, 5], [1, 2, 4, 5], [1, 3, 4, 5], [2, 3, 4, 5]]
>>> generate_all_combinations(n=3, k=3)
[[1, 2, 3]]
>>> generate_all_combinations(n=3, k=1)
[[1], [2], [3]]
>>> generate_all_combinations(n=1, k=0)
[[]]
>>> generate_all_combinations(n=1, k=1)
[[1]]
>>> from itertools import combinations
>>> all(generate_all_combinations(n, k) == combination_lists(n, k)
... for n in range(1, 6) for k in range(1, 6))
True
"""
if k < 0:
raise ValueError("k must not be negative")
if n < 0:
raise ValueError("n must not be negative")
result: list[list[int]] = []
create_all_state(1, n, k, [], result)
return result
def create_all_state(
increment: int,
total_number: int,
level: int,
current_list: list[int],
total_list: list[list[int]],
) -> None:
"""
Helper function to recursively build all combinations.
>>> create_all_state(1, 4, 2, [], result := [])
>>> result
[[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]]
>>> create_all_state(1, 3, 3, [], result := [])
>>> result
[[1, 2, 3]]
>>> create_all_state(2, 2, 1, [1], result := [])
>>> result
[[1, 2]]
>>> create_all_state(1, 0, 0, [], result := [])
>>> result
[[]]
>>> create_all_state(1, 4, 0, [1, 2], result := [])
>>> result
[[1, 2]]
>>> create_all_state(5, 4, 2, [1, 2], result := [])
>>> result
[]
"""
if level == 0:
total_list.append(current_list[:])
return
for i in range(increment, total_number - level + 2):
current_list.append(i)
create_all_state(i + 1, total_number, level - 1, current_list, total_list)
current_list.pop()
if __name__ == "__main__":
from doctest import testmod
testmod()
print(generate_all_combinations(n=4, k=2))
tests = ((n, k) for n in range(1, 5) for k in range(1, 5))
for n, k in tests:
print(n, k, generate_all_combinations(n, k) == combination_lists(n, k))
print("Benchmark:")
from timeit import timeit
for func in ("combination_lists", "generate_all_combinations"):
print(f"{func:>25}(): {timeit(f'{func}(n=4, k = 2)', globals=globals())}")
================================================
FILE: backtracking/all_permutations.py
================================================
"""
In this problem, we want to determine all possible permutations
of the given sequence. We use backtracking to solve this problem.
Time complexity: O(n! * n),
where n denotes the length of the given sequence.
"""
from __future__ import annotations
def generate_all_permutations(sequence: list[int | str]) -> None:
create_state_space_tree(sequence, [], 0, [0 for i in range(len(sequence))])
def create_state_space_tree(
sequence: list[int | str],
current_sequence: list[int | str],
index: int,
index_used: list[int],
) -> None:
"""
Creates a state space tree to iterate through each branch using DFS.
We know that each state has exactly len(sequence) - index children.
It terminates when it reaches the end of the given sequence.
:param sequence: The input sequence for which permutations are generated.
:param current_sequence: The current permutation being built.
:param index: The current index in the sequence.
:param index_used: list to track which elements are used in permutation.
Example 1:
>>> sequence = [1, 2, 3]
>>> current_sequence = []
>>> index_used = [False, False, False]
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
[1, 2, 3]
[1, 3, 2]
[2, 1, 3]
[2, 3, 1]
[3, 1, 2]
[3, 2, 1]
Example 2:
>>> sequence = ["A", "B", "C"]
>>> current_sequence = []
>>> index_used = [False, False, False]
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
['A', 'B', 'C']
['A', 'C', 'B']
['B', 'A', 'C']
['B', 'C', 'A']
['C', 'A', 'B']
['C', 'B', 'A']
Example 3:
>>> sequence = [1]
>>> current_sequence = []
>>> index_used = [False]
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
[1]
"""
if index == len(sequence):
print(current_sequence)
return
for i in range(len(sequence)):
if not index_used[i]:
current_sequence.append(sequence[i])
index_used[i] = True
create_state_space_tree(sequence, current_sequence, index + 1, index_used)
current_sequence.pop()
index_used[i] = False
"""
remove the comment to take an input from the user
print("Enter the elements")
sequence = list(map(int, input().split()))
"""
sequence: list[int | str] = [3, 1, 2, 4]
generate_all_permutations(sequence)
sequence_2: list[int | str] = ["A", "B", "C"]
generate_all_permutations(sequence_2)
================================================
FILE: backtracking/all_subsequences.py
================================================
"""
In this problem, we want to determine all possible subsequences
of the given sequence. We use backtracking to solve this problem.
Time complexity: O(2^n),
where n denotes the length of the given sequence.
"""
from __future__ import annotations
from typing import Any
def generate_all_subsequences(sequence: list[Any]) -> None:
create_state_space_tree(sequence, [], 0)
def create_state_space_tree(
sequence: list[Any], current_subsequence: list[Any], index: int
) -> None:
"""
Creates a state space tree to iterate through each branch using DFS.
We know that each state has exactly two children.
It terminates when it reaches the end of the given sequence.
:param sequence: The input sequence for which subsequences are generated.
:param current_subsequence: The current subsequence being built.
:param index: The current index in the sequence.
Example:
>>> sequence = [3, 2, 1]
>>> current_subsequence = []
>>> create_state_space_tree(sequence, current_subsequence, 0)
[]
[1]
[2]
[2, 1]
[3]
[3, 1]
[3, 2]
[3, 2, 1]
>>> sequence = ["A", "B"]
>>> current_subsequence = []
>>> create_state_space_tree(sequence, current_subsequence, 0)
[]
['B']
['A']
['A', 'B']
>>> sequence = []
>>> current_subsequence = []
>>> create_state_space_tree(sequence, current_subsequence, 0)
[]
>>> sequence = [1, 2, 3, 4]
>>> current_subsequence = []
>>> create_state_space_tree(sequence, current_subsequence, 0)
[]
[4]
[3]
[3, 4]
[2]
[2, 4]
[2, 3]
[2, 3, 4]
[1]
[1, 4]
[1, 3]
[1, 3, 4]
[1, 2]
[1, 2, 4]
[1, 2, 3]
[1, 2, 3, 4]
"""
if index == len(sequence):
print(current_subsequence)
return
create_state_space_tree(sequence, current_subsequence, index + 1)
current_subsequence.append(sequence[index])
create_state_space_tree(sequence, current_subsequence, index + 1)
current_subsequence.pop()
if __name__ == "__main__":
seq: list[Any] = [1, 2, 3]
generate_all_subsequences(seq)
seq.clear()
seq.extend(["A", "B", "C"])
generate_all_subsequences(seq)
================================================
FILE: backtracking/coloring.py
================================================
"""
Graph Coloring also called "m coloring problem"
consists of coloring a given graph with at most m colors
such that no adjacent vertices are assigned the same color
Wikipedia: https://en.wikipedia.org/wiki/Graph_coloring
"""
def valid_coloring(
neighbours: list[int], colored_vertices: list[int], color: int
) -> bool:
"""
For each neighbour check if the coloring constraint is satisfied
If any of the neighbours fail the constraint return False
If all neighbours validate the constraint return True
>>> neighbours = [0,1,0,1,0]
>>> colored_vertices = [0, 2, 1, 2, 0]
>>> color = 1
>>> valid_coloring(neighbours, colored_vertices, color)
True
>>> color = 2
>>> valid_coloring(neighbours, colored_vertices, color)
False
"""
# Does any neighbour not satisfy the constraints
return not any(
neighbour == 1 and colored_vertices[i] == color
for i, neighbour in enumerate(neighbours)
)
def util_color(
graph: list[list[int]], max_colors: int, colored_vertices: list[int], index: int
) -> bool:
"""
Pseudo-Code
Base Case:
1. Check if coloring is complete
1.1 If complete return True (meaning that we successfully colored the graph)
Recursive Step:
2. Iterates over each color:
Check if the current coloring is valid:
2.1. Color given vertex
2.2. Do recursive call, check if this coloring leads to a solution
2.4. if current coloring leads to a solution return
2.5. Uncolor given vertex
>>> graph = [[0, 1, 0, 0, 0],
... [1, 0, 1, 0, 1],
... [0, 1, 0, 1, 0],
... [0, 1, 1, 0, 0],
... [0, 1, 0, 0, 0]]
>>> max_colors = 3
>>> colored_vertices = [0, 1, 0, 0, 0]
>>> index = 3
>>> util_color(graph, max_colors, colored_vertices, index)
True
>>> max_colors = 2
>>> util_color(graph, max_colors, colored_vertices, index)
False
"""
# Base Case
if index == len(graph):
return True
# Recursive Step
for i in range(max_colors):
if valid_coloring(graph[index], colored_vertices, i):
# Color current vertex
colored_vertices[index] = i
# Validate coloring
if util_color(graph, max_colors, colored_vertices, index + 1):
return True
# Backtrack
colored_vertices[index] = -1
return False
def color(graph: list[list[int]], max_colors: int) -> list[int]:
"""
Wrapper function to call subroutine called util_color
which will either return True or False.
If True is returned colored_vertices list is filled with correct colorings
>>> graph = [[0, 1, 0, 0, 0],
... [1, 0, 1, 0, 1],
... [0, 1, 0, 1, 0],
... [0, 1, 1, 0, 0],
... [0, 1, 0, 0, 0]]
>>> max_colors = 3
>>> color(graph, max_colors)
[0, 1, 0, 2, 0]
>>> max_colors = 2
>>> color(graph, max_colors)
[]
>>> color([], 2) # empty graph
[]
>>> color([[0]], 1) # single node, 1 color
[0]
>>> color([[0, 1], [1, 0]], 1) # 2 nodes, 1 color (impossible)
[]
>>> color([[0, 1], [1, 0]], 2) # 2 nodes, 2 colors (possible)
[0, 1]
"""
colored_vertices = [-1] * len(graph)
if util_color(graph, max_colors, colored_vertices, 0):
return colored_vertices
return []
================================================
FILE: backtracking/combination_sum.py
================================================
"""
In the Combination Sum problem, we are given a list consisting of distinct integers.
We need to find all the combinations whose sum equals to target given.
We can use an element more than one.
Time complexity(Average Case): O(n!)
Constraints:
1 <= candidates.length <= 30
2 <= candidates[i] <= 40
All elements of candidates are distinct.
1 <= target <= 40
"""
def backtrack(
candidates: list, path: list, answer: list, target: int, previous_index: int
) -> None:
"""
A recursive function that searches for possible combinations. Backtracks in case
of a bigger current combination value than the target value.
Parameters
----------
previous_index: Last index from the previous search
target: The value we need to obtain by summing our integers in the path list.
answer: A list of possible combinations
path: Current combination
candidates: A list of integers we can use.
"""
if target == 0:
answer.append(path.copy())
else:
for index in range(previous_index, len(candidates)):
if target >= candidates[index]:
path.append(candidates[index])
backtrack(candidates, path, answer, target - candidates[index], index)
path.pop(len(path) - 1)
def combination_sum(candidates: list, target: int) -> list:
"""
>>> combination_sum([2, 3, 5], 8)
[[2, 2, 2, 2], [2, 3, 3], [3, 5]]
>>> combination_sum([2, 3, 6, 7], 7)
[[2, 2, 3], [7]]
>>> combination_sum([-8, 2.3, 0], 1)
Traceback (most recent call last):
...
ValueError: All elements in candidates must be non-negative
>>> combination_sum([], 1)
Traceback (most recent call last):
...
ValueError: Candidates list should not be empty
"""
if not candidates:
raise ValueError("Candidates list should not be empty")
if any(x < 0 for x in candidates):
raise ValueError("All elements in candidates must be non-negative")
path = [] # type: list[int]
answer = [] # type: list[int]
backtrack(candidates, path, answer, target, 0)
return answer
def main() -> None:
print(combination_sum([-8, 2.3, 0], 1))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: backtracking/crossword_puzzle_solver.py
================================================
# https://www.geeksforgeeks.org/solve-crossword-puzzle/
def is_valid(
puzzle: list[list[str]], word: str, row: int, col: int, vertical: bool
) -> bool:
"""
Check if a word can be placed at the given position.
>>> puzzle = [
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', '']
... ]
>>> is_valid(puzzle, 'word', 0, 0, True)
True
>>> puzzle = [
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', '']
... ]
>>> is_valid(puzzle, 'word', 0, 0, False)
True
"""
for i in range(len(word)):
if vertical:
if row + i >= len(puzzle) or puzzle[row + i][col] != "":
return False
elif col + i >= len(puzzle[0]) or puzzle[row][col + i] != "":
return False
return True
def place_word(
puzzle: list[list[str]], word: str, row: int, col: int, vertical: bool
) -> None:
"""
Place a word at the given position.
>>> puzzle = [
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', '']
... ]
>>> place_word(puzzle, 'word', 0, 0, True)
>>> puzzle
[['w', '', '', ''], ['o', '', '', ''], ['r', '', '', ''], ['d', '', '', '']]
"""
for i, char in enumerate(word):
if vertical:
puzzle[row + i][col] = char
else:
puzzle[row][col + i] = char
def remove_word(
puzzle: list[list[str]], word: str, row: int, col: int, vertical: bool
) -> None:
"""
Remove a word from the given position.
>>> puzzle = [
... ['w', '', '', ''],
... ['o', '', '', ''],
... ['r', '', '', ''],
... ['d', '', '', '']
... ]
>>> remove_word(puzzle, 'word', 0, 0, True)
>>> puzzle
[['', '', '', ''], ['', '', '', ''], ['', '', '', ''], ['', '', '', '']]
"""
for i in range(len(word)):
if vertical:
puzzle[row + i][col] = ""
else:
puzzle[row][col + i] = ""
def solve_crossword(puzzle: list[list[str]], words: list[str]) -> bool:
"""
Solve the crossword puzzle using backtracking.
>>> puzzle = [
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', '']
... ]
>>> words = ['word', 'four', 'more', 'last']
>>> solve_crossword(puzzle, words)
True
>>> puzzle = [
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', ''],
... ['', '', '', '']
... ]
>>> words = ['word', 'four', 'more', 'paragraphs']
>>> solve_crossword(puzzle, words)
False
"""
for row in range(len(puzzle)):
for col in range(len(puzzle[0])):
if puzzle[row][col] == "":
for word in words:
for vertical in [True, False]:
if is_valid(puzzle, word, row, col, vertical):
place_word(puzzle, word, row, col, vertical)
words.remove(word)
if solve_crossword(puzzle, words):
return True
words.append(word)
remove_word(puzzle, word, row, col, vertical)
return False
return True
if __name__ == "__main__":
PUZZLE = [[""] * 3 for _ in range(3)]
WORDS = ["cat", "dog", "car"]
if solve_crossword(PUZZLE, WORDS):
print("Solution found:")
for row in PUZZLE:
print(" ".join(row))
else:
print("No solution found:")
================================================
FILE: backtracking/generate_parentheses.py
================================================
"""
author: Aayush Soni
Given n pairs of parentheses, write a function to generate all
combinations of well-formed parentheses.
Input: n = 2
Output: ["(())","()()"]
Leetcode link: https://leetcode.com/problems/generate-parentheses/description/
"""
def backtrack(
partial: str, open_count: int, close_count: int, n: int, result: list[str]
) -> None:
"""
Generate valid combinations of balanced parentheses using recursion.
:param partial: A string representing the current combination.
:param open_count: An integer representing the count of open parentheses.
:param close_count: An integer representing the count of close parentheses.
:param n: An integer representing the total number of pairs.
:param result: A list to store valid combinations.
:return: None
This function uses recursion to explore all possible combinations,
ensuring that at each step, the parentheses remain balanced.
Example:
>>> result = []
>>> backtrack("", 0, 0, 2, result)
>>> result
['(())', '()()']
"""
if len(partial) == 2 * n:
# When the combination is complete, add it to the result.
result.append(partial)
return
if open_count < n:
# If we can add an open parenthesis, do so, and recurse.
backtrack(partial + "(", open_count + 1, close_count, n, result)
if close_count < open_count:
# If we can add a close parenthesis (it won't make the combination invalid),
# do so, and recurse.
backtrack(partial + ")", open_count, close_count + 1, n, result)
def generate_parenthesis(n: int) -> list[str]:
"""
Generate valid combinations of balanced parentheses for a given n.
:param n: An integer representing the number of pairs of parentheses.
:return: A list of strings with valid combinations.
This function uses a recursive approach to generate the combinations.
Time Complexity: O(2^(2n)) - In the worst case, we have 2^(2n) combinations.
Space Complexity: O(n) - where 'n' is the number of pairs.
Example 1:
>>> generate_parenthesis(3)
['((()))', '(()())', '(())()', '()(())', '()()()']
Example 2:
>>> generate_parenthesis(1)
['()']
Example 3:
>>> generate_parenthesis(0)
['']
"""
result: list[str] = []
backtrack("", 0, 0, n, result)
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: backtracking/generate_parentheses_iterative.py
================================================
def generate_parentheses_iterative(length: int) -> list[str]:
"""
Generate all valid combinations of parentheses (Iterative Approach).
The algorithm works as follows:
1. Initialize an empty list to store the combinations.
2. Initialize a stack to keep track of partial combinations.
3. Start with empty string and push it on stack along with
the counts of '(' and ')'.
4. While the stack is not empty:
a. Pop a partial combination and its open and close counts from the stack.
b. If the combination length is equal to 2*length, add it to the result.
c. If open count < length, push new combination with added '(' on stack.
d. If close count < open count, push new combination with added ')' on stack.
5. Return the result containing all valid combinations.
Args:
length: The desired length of the parentheses combinations
Returns:
A list of strings representing valid combinations of parentheses
Time Complexity:
O(2^(2*length))
Space Complexity:
O(2^(2*length))
>>> generate_parentheses_iterative(3)
['()()()', '()(())', '(())()', '(()())', '((()))']
>>> generate_parentheses_iterative(2)
['()()', '(())']
>>> generate_parentheses_iterative(1)
['()']
>>> generate_parentheses_iterative(0)
['']
"""
if length == 0:
return [""]
result: list[str] = []
stack: list[tuple[str, int, int]] = []
# Each element in stack is a tuple (current_combination, open_count, close_count)
stack.append(("", 0, 0))
while stack:
current_combination, open_count, close_count = stack.pop()
if len(current_combination) == 2 * length:
result.append(current_combination)
continue
if open_count < length:
stack.append((current_combination + "(", open_count + 1, close_count))
if close_count < open_count:
stack.append((current_combination + ")", open_count, close_count + 1))
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
print(generate_parentheses_iterative(3))
================================================
FILE: backtracking/hamiltonian_cycle.py
================================================
"""
A Hamiltonian cycle (Hamiltonian circuit) is a graph cycle
through a graph that visits each node exactly once.
Determining whether such paths and cycles exist in graphs
is the 'Hamiltonian path problem', which is NP-complete.
Wikipedia: https://en.wikipedia.org/wiki/Hamiltonian_path
"""
def valid_connection(
graph: list[list[int]], next_ver: int, curr_ind: int, path: list[int]
) -> bool:
"""
Checks whether it is possible to add next into path by validating 2 statements
1. There should be path between current and next vertex
2. Next vertex should not be in path
If both validations succeed we return True, saying that it is possible to connect
this vertices, otherwise we return False
Case 1:Use exact graph as in main function, with initialized values
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 1],
... [0, 1, 1, 1, 0]]
>>> path = [0, -1, -1, -1, -1, 0]
>>> curr_ind = 1
>>> next_ver = 1
>>> valid_connection(graph, next_ver, curr_ind, path)
True
Case 2: Same graph, but trying to connect to node that is already in path
>>> path = [0, 1, 2, 4, -1, 0]
>>> curr_ind = 4
>>> next_ver = 1
>>> valid_connection(graph, next_ver, curr_ind, path)
False
"""
# 1. Validate that path exists between current and next vertices
if graph[path[curr_ind - 1]][next_ver] == 0:
return False
# 2. Validate that next vertex is not already in path
return not any(vertex == next_ver for vertex in path)
def util_hamilton_cycle(graph: list[list[int]], path: list[int], curr_ind: int) -> bool:
"""
Pseudo-Code
Base Case:
1. Check if we visited all of vertices
1.1 If last visited vertex has path to starting vertex return True either
return False
Recursive Step:
2. Iterate over each vertex
Check if next vertex is valid for transiting from current vertex
2.1 Remember next vertex as next transition
2.2 Do recursive call and check if going to this vertex solves problem
2.3 If next vertex leads to solution return True
2.4 Else backtrack, delete remembered vertex
Case 1: Use exact graph as in main function, with initialized values
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 1],
... [0, 1, 1, 1, 0]]
>>> path = [0, -1, -1, -1, -1, 0]
>>> curr_ind = 1
>>> util_hamilton_cycle(graph, path, curr_ind)
True
>>> path
[0, 1, 2, 4, 3, 0]
Case 2: Use exact graph as in previous case, but in the properties taken from
middle of calculation
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 1],
... [0, 1, 1, 1, 0]]
>>> path = [0, 1, 2, -1, -1, 0]
>>> curr_ind = 3
>>> util_hamilton_cycle(graph, path, curr_ind)
True
>>> path
[0, 1, 2, 4, 3, 0]
"""
# Base Case
if curr_ind == len(graph):
# return whether path exists between current and starting vertices
return graph[path[curr_ind - 1]][path[0]] == 1
# Recursive Step
for next_ver in range(len(graph)):
if valid_connection(graph, next_ver, curr_ind, path):
# Insert current vertex into path as next transition
path[curr_ind] = next_ver
# Validate created path
if util_hamilton_cycle(graph, path, curr_ind + 1):
return True
# Backtrack
path[curr_ind] = -1
return False
def hamilton_cycle(graph: list[list[int]], start_index: int = 0) -> list[int]:
r"""
Wrapper function to call subroutine called util_hamilton_cycle,
which will either return array of vertices indicating hamiltonian cycle
or an empty list indicating that hamiltonian cycle was not found.
Case 1:
Following graph consists of 5 edges.
If we look closely, we can see that there are multiple Hamiltonian cycles.
For example one result is when we iterate like:
(0)->(1)->(2)->(4)->(3)->(0)
(0)---(1)---(2)
| / \ |
| / \ |
| / \ |
|/ \|
(3)---------(4)
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 1],
... [0, 1, 1, 1, 0]]
>>> hamilton_cycle(graph)
[0, 1, 2, 4, 3, 0]
Case 2:
Same Graph as it was in Case 1, changed starting index from default to 3
(0)---(1)---(2)
| / \ |
| / \ |
| / \ |
|/ \|
(3)---------(4)
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 1],
... [0, 1, 1, 1, 0]]
>>> hamilton_cycle(graph, 3)
[3, 0, 1, 2, 4, 3]
Case 3:
Following Graph is exactly what it was before, but edge 3-4 is removed.
Result is that there is no Hamiltonian Cycle anymore.
(0)---(1)---(2)
| / \ |
| / \ |
| / \ |
|/ \|
(3) (4)
>>> graph = [[0, 1, 0, 1, 0],
... [1, 0, 1, 1, 1],
... [0, 1, 0, 0, 1],
... [1, 1, 0, 0, 0],
... [0, 1, 1, 0, 0]]
>>> hamilton_cycle(graph,4)
[]
"""
# Initialize path with -1, indicating that we have not visited them yet
path = [-1] * (len(graph) + 1)
# initialize start and end of path with starting index
path[0] = path[-1] = start_index
# evaluate and if we find answer return path either return empty array
return path if util_hamilton_cycle(graph, path, 1) else []
================================================
FILE: backtracking/knight_tour.py
================================================
# Knight Tour Intro: https://www.youtube.com/watch?v=ab_dY3dZFHM
from __future__ import annotations
def get_valid_pos(position: tuple[int, int], n: int) -> list[tuple[int, int]]:
"""
Find all the valid positions a knight can move to from the current position.
>>> get_valid_pos((1, 3), 4)
[(2, 1), (0, 1), (3, 2)]
"""
y, x = position
positions = [
(y + 1, x + 2),
(y - 1, x + 2),
(y + 1, x - 2),
(y - 1, x - 2),
(y + 2, x + 1),
(y + 2, x - 1),
(y - 2, x + 1),
(y - 2, x - 1),
]
permissible_positions = []
for inner_position in positions:
y_test, x_test = inner_position
if 0 <= y_test < n and 0 <= x_test < n:
permissible_positions.append(inner_position)
return permissible_positions
def is_complete(board: list[list[int]]) -> bool:
"""
Check if the board (matrix) has been completely filled with non-zero values.
>>> is_complete([[1]])
True
>>> is_complete([[1, 2], [3, 0]])
False
"""
return not any(elem == 0 for row in board for elem in row)
def open_knight_tour_helper(
board: list[list[int]], pos: tuple[int, int], curr: int
) -> bool:
"""
Helper function to solve knight tour problem.
"""
if is_complete(board):
return True
for position in get_valid_pos(pos, len(board)):
y, x = position
if board[y][x] == 0:
board[y][x] = curr + 1
if open_knight_tour_helper(board, position, curr + 1):
return True
board[y][x] = 0
return False
def open_knight_tour(n: int) -> list[list[int]]:
"""
Find the solution for the knight tour problem for a board of size n. Raises
ValueError if the tour cannot be performed for the given size.
>>> open_knight_tour(1)
[[1]]
>>> open_knight_tour(2)
Traceback (most recent call last):
...
ValueError: Open Knight Tour cannot be performed on a board of size 2
"""
board = [[0 for i in range(n)] for j in range(n)]
for i in range(n):
for j in range(n):
board[i][j] = 1
if open_knight_tour_helper(board, (i, j), 1):
return board
board[i][j] = 0
msg = f"Open Knight Tour cannot be performed on a board of size {n}"
raise ValueError(msg)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: backtracking/match_word_pattern.py
================================================
def match_word_pattern(pattern: str, input_string: str) -> bool:
"""
Determine if a given pattern matches a string using backtracking.
pattern: The pattern to match.
input_string: The string to match against the pattern.
return: True if the pattern matches the string, False otherwise.
>>> match_word_pattern("aba", "GraphTreesGraph")
True
>>> match_word_pattern("xyx", "PythonRubyPython")
True
>>> match_word_pattern("GG", "PythonJavaPython")
False
"""
def backtrack(pattern_index: int, str_index: int) -> bool:
"""
>>> backtrack(0, 0)
True
>>> backtrack(0, 1)
True
>>> backtrack(0, 4)
False
"""
if pattern_index == len(pattern) and str_index == len(input_string):
return True
if pattern_index == len(pattern) or str_index == len(input_string):
return False
char = pattern[pattern_index]
if char in pattern_map:
mapped_str = pattern_map[char]
if input_string.startswith(mapped_str, str_index):
return backtrack(pattern_index + 1, str_index + len(mapped_str))
else:
return False
for end in range(str_index + 1, len(input_string) + 1):
substr = input_string[str_index:end]
if substr in str_map:
continue
pattern_map[char] = substr
str_map[substr] = char
if backtrack(pattern_index + 1, end):
return True
del pattern_map[char]
del str_map[substr]
return False
pattern_map: dict[str, str] = {}
str_map: dict[str, str] = {}
return backtrack(0, 0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: backtracking/minimax.py
================================================
"""
Minimax helps to achieve maximum score in a game by checking all possible moves
depth is current depth in game tree.
nodeIndex is index of current node in scores[].
if move is of maximizer return true else false
leaves of game tree is stored in scores[]
height is maximum height of Game tree
"""
from __future__ import annotations
import math
def minimax(
depth: int, node_index: int, is_max: bool, scores: list[int], height: float
) -> int:
"""
This function implements the minimax algorithm, which helps achieve the optimal
score for a player in a two-player game by checking all possible moves.
If the player is the maximizer, then the score is maximized.
If the player is the minimizer, then the score is minimized.
Parameters:
- depth: Current depth in the game tree.
- node_index: Index of the current node in the scores list.
- is_max: A boolean indicating whether the current move
is for the maximizer (True) or minimizer (False).
- scores: A list containing the scores of the leaves of the game tree.
- height: The maximum height of the game tree.
Returns:
- An integer representing the optimal score for the current player.
>>> import math
>>> scores = [90, 23, 6, 33, 21, 65, 123, 34423]
>>> height = math.log(len(scores), 2)
>>> minimax(0, 0, True, scores, height)
65
>>> minimax(-1, 0, True, scores, height)
Traceback (most recent call last):
...
ValueError: Depth cannot be less than 0
>>> minimax(0, 0, True, [], 2)
Traceback (most recent call last):
...
ValueError: Scores cannot be empty
>>> scores = [3, 5, 2, 9, 12, 5, 23, 23]
>>> height = math.log(len(scores), 2)
>>> minimax(0, 0, True, scores, height)
12
"""
if depth < 0:
raise ValueError("Depth cannot be less than 0")
if len(scores) == 0:
raise ValueError("Scores cannot be empty")
# Base case: If the current depth equals the height of the tree,
# return the score of the current node.
if depth == height:
return scores[node_index]
# If it's the maximizer's turn, choose the maximum score
# between the two possible moves.
if is_max:
return max(
minimax(depth + 1, node_index * 2, False, scores, height),
minimax(depth + 1, node_index * 2 + 1, False, scores, height),
)
# If it's the minimizer's turn, choose the minimum score
# between the two possible moves.
return min(
minimax(depth + 1, node_index * 2, True, scores, height),
minimax(depth + 1, node_index * 2 + 1, True, scores, height),
)
def main() -> None:
# Sample scores and height calculation
scores = [90, 23, 6, 33, 21, 65, 123, 34423]
height = math.log(len(scores), 2)
# Calculate and print the optimal value using the minimax algorithm
print("Optimal value : ", end="")
print(minimax(0, 0, True, scores, height))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: backtracking/n_queens.py
================================================
"""
The nqueens problem is of placing N queens on a N * N
chess board such that no queen can attack any other queens placed
on that chess board.
This means that one queen cannot have any other queen on its horizontal, vertical and
diagonal lines.
"""
from __future__ import annotations
solution = []
def is_safe(board: list[list[int]], row: int, column: int) -> bool:
"""
This function returns a boolean value True if it is safe to place a queen there
considering the current state of the board.
Parameters:
board (2D matrix): The chessboard
row, column: Coordinates of the cell on the board
Returns:
Boolean Value
>>> is_safe([[0, 0, 0], [0, 0, 0], [0, 0, 0]], 1, 1)
True
>>> is_safe([[0, 1, 0], [0, 0, 0], [0, 0, 0]], 1, 1)
False
>>> is_safe([[1, 0, 0], [0, 0, 0], [0, 0, 0]], 1, 1)
False
>>> is_safe([[0, 0, 1], [0, 0, 0], [0, 0, 0]], 1, 1)
False
>>> is_safe([[1, 0, 0], [0, 0, 0], [0, 0, 0]], 1, 2)
True
>>> is_safe([[1, 0, 0], [0, 0, 0], [0, 0, 0]], 2, 1)
True
>>> is_safe([[0, 0, 0], [1, 0, 0], [0, 0, 0]], 0, 2)
True
>>> is_safe([[0, 0, 0], [1, 0, 0], [0, 0, 0]], 2, 2)
True
"""
n = len(board) # Size of the board
# Check if there is any queen in the same upper column,
# left upper diagonal and right upper diagonal
return (
all(board[i][j] != 1 for i, j in zip(range(row), [column] * row))
and all(
board[i][j] != 1
for i, j in zip(range(row - 1, -1, -1), range(column - 1, -1, -1))
)
and all(
board[i][j] != 1
for i, j in zip(range(row - 1, -1, -1), range(column + 1, n))
)
)
def solve(board: list[list[int]], row: int) -> bool:
"""
This function creates a state space tree and calls the safe function until it
receives a False Boolean and terminates that branch and backtracks to the next
possible solution branch.
"""
if row >= len(board):
"""
If the row number exceeds N, we have a board with a successful combination
and that combination is appended to the solution list and the board is printed.
"""
solution.append(board)
printboard(board)
print()
return True
for i in range(len(board)):
"""
For every row, it iterates through each column to check if it is feasible to
place a queen there.
If all the combinations for that particular branch are successful, the board is
reinitialized for the next possible combination.
"""
if is_safe(board, row, i):
board[row][i] = 1
solve(board, row + 1)
board[row][i] = 0
return False
def printboard(board: list[list[int]]) -> None:
"""
Prints the boards that have a successful combination.
"""
for i in range(len(board)):
for j in range(len(board)):
if board[i][j] == 1:
print("Q", end=" ") # Queen is present
else:
print(".", end=" ") # Empty cell
print()
# Number of queens (e.g., n=8 for an 8x8 board)
n = 8
board = [[0 for i in range(n)] for j in range(n)]
solve(board, 0)
print("The total number of solutions are:", len(solution))
================================================
FILE: backtracking/n_queens_math.py
================================================
r"""
Problem:
The n queens problem is: placing N queens on a N * N chess board such that no queen
can attack any other queens placed on that chess board. This means that one queen
cannot have any other queen on its horizontal, vertical and diagonal lines.
Solution:
To solve this problem we will use simple math. First we know the queen can move in all
the possible ways, we can simplify it in this: vertical, horizontal, diagonal left and
diagonal right.
We can visualize it like this:
left diagonal = \
right diagonal = /
On a chessboard vertical movement could be the rows and horizontal movement could be
the columns.
In programming we can use an array, and in this array each index could be the rows and
each value in the array could be the column. For example:
. Q . . We have this chessboard with one queen in each column and each queen
. . . Q can't attack to each other.
Q . . . The array for this example would look like this: [1, 3, 0, 2]
. . Q .
So if we use an array and we verify that each value in the array is different to each
other we know that at least the queens can't attack each other in horizontal and
vertical.
At this point we have it halfway completed and we will treat the chessboard as a
Cartesian plane. Hereinafter we are going to remember basic math, so in the school we
learned this formula:
Slope of a line:
y2 - y1
m = ----------
x2 - x1
This formula allow us to get the slope. For the angles 45º (right diagonal) and 135º
(left diagonal) this formula gives us m = 1, and m = -1 respectively.
See::
https://www.enotes.com/homework-help/write-equation-line-that-hits-origin-45-degree-1474860
Then we have this other formula:
Slope intercept:
y = mx + b
b is where the line crosses the Y axis (to get more information see:
https://www.mathsisfun.com/y_intercept.html), if we change the formula to solve for b
we would have:
y - mx = b
And since we already have the m values for the angles 45º and 135º, this formula would
look like this:
45º: y - (1)x = b
45º: y - x = b
135º: y - (-1)x = b
135º: y + x = b
y = row
x = column
Applying these two formulas we can check if a queen in some position is being attacked
for another one or vice versa.
"""
from __future__ import annotations
def depth_first_search(
possible_board: list[int],
diagonal_right_collisions: list[int],
diagonal_left_collisions: list[int],
boards: list[list[str]],
n: int,
) -> None:
"""
>>> boards = []
>>> depth_first_search([], [], [], boards, 4)
>>> for board in boards:
... print(board)
['. Q . . ', '. . . Q ', 'Q . . . ', '. . Q . ']
['. . Q . ', 'Q . . . ', '. . . Q ', '. Q . . ']
"""
# Get next row in the current board (possible_board) to fill it with a queen
row = len(possible_board)
# If row is equal to the size of the board it means there are a queen in each row in
# the current board (possible_board)
if row == n:
# We convert the variable possible_board that looks like this: [1, 3, 0, 2] to
# this: ['. Q . . ', '. . . Q ', 'Q . . . ', '. . Q . ']
boards.append([". " * i + "Q " + ". " * (n - 1 - i) for i in possible_board])
return
# We iterate each column in the row to find all possible results in each row
for col in range(n):
# We apply that we learned previously. First we check that in the current board
# (possible_board) there are not other same value because if there is it means
# that there are a collision in vertical. Then we apply the two formulas we
# learned before:
#
# 45º: y - x = b or 45: row - col = b
# 135º: y + x = b or row + col = b.
#
# And we verify if the results of this two formulas not exist in their variables
# respectively. (diagonal_right_collisions, diagonal_left_collisions)
#
# If any or these are True it means there is a collision so we continue to the
# next value in the for loop.
if (
col in possible_board
or row - col in diagonal_right_collisions
or row + col in diagonal_left_collisions
):
continue
# If it is False we call dfs function again and we update the inputs
depth_first_search(
[*possible_board, col],
[*diagonal_right_collisions, row - col],
[*diagonal_left_collisions, row + col],
boards,
n,
)
def n_queens_solution(n: int) -> None:
boards: list[list[str]] = []
depth_first_search([], [], [], boards, n)
# Print all the boards
for board in boards:
for column in board:
print(column)
print("")
print(len(boards), "solutions were found.")
if __name__ == "__main__":
import doctest
doctest.testmod()
n_queens_solution(4)
================================================
FILE: backtracking/power_sum.py
================================================
"""
Problem source: https://www.hackerrank.com/challenges/the-power-sum/problem
Find the number of ways that a given integer X, can be expressed as the sum
of the Nth powers of unique, natural numbers. For example, if X=13 and N=2.
We have to find all combinations of unique squares adding up to 13.
The only solution is 2^2+3^2. Constraints: 1<=X<=1000, 2<=N<=10.
"""
def backtrack(
needed_sum: int,
power: int,
current_number: int,
current_sum: int,
solutions_count: int,
) -> tuple[int, int]:
"""
>>> backtrack(13, 2, 1, 0, 0)
(0, 1)
>>> backtrack(10, 2, 1, 0, 0)
(0, 1)
>>> backtrack(10, 3, 1, 0, 0)
(0, 0)
>>> backtrack(20, 2, 1, 0, 0)
(0, 1)
>>> backtrack(15, 10, 1, 0, 0)
(0, 0)
>>> backtrack(16, 2, 1, 0, 0)
(0, 1)
>>> backtrack(20, 1, 1, 0, 0)
(0, 64)
"""
if current_sum == needed_sum:
# If the sum of the powers is equal to needed_sum, then we have a solution.
solutions_count += 1
return current_sum, solutions_count
i_to_n = current_number**power
if current_sum + i_to_n <= needed_sum:
# If the sum of the powers is less than needed_sum, then continue adding powers.
current_sum += i_to_n
current_sum, solutions_count = backtrack(
needed_sum, power, current_number + 1, current_sum, solutions_count
)
current_sum -= i_to_n
if i_to_n < needed_sum:
# If the power of i is less than needed_sum, then try with the next power.
current_sum, solutions_count = backtrack(
needed_sum, power, current_number + 1, current_sum, solutions_count
)
return current_sum, solutions_count
def solve(needed_sum: int, power: int) -> int:
"""
>>> solve(13, 2)
1
>>> solve(10, 2)
1
>>> solve(10, 3)
0
>>> solve(20, 2)
1
>>> solve(15, 10)
0
>>> solve(16, 2)
1
>>> solve(20, 1)
Traceback (most recent call last):
...
ValueError: Invalid input
needed_sum must be between 1 and 1000, power between 2 and 10.
>>> solve(-10, 5)
Traceback (most recent call last):
...
ValueError: Invalid input
needed_sum must be between 1 and 1000, power between 2 and 10.
"""
if not (1 <= needed_sum <= 1000 and 2 <= power <= 10):
raise ValueError(
"Invalid input\n"
"needed_sum must be between 1 and 1000, power between 2 and 10."
)
return backtrack(needed_sum, power, 1, 0, 0)[1] # Return the solutions_count
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: backtracking/rat_in_maze.py
================================================
from __future__ import annotations
def solve_maze(
maze: list[list[int]],
source_row: int,
source_column: int,
destination_row: int,
destination_column: int,
) -> list[list[int]]:
"""
This method solves the "rat in maze" problem.
Parameters :
- maze: A two dimensional matrix of zeros and ones.
- source_row: The row index of the starting point.
- source_column: The column index of the starting point.
- destination_row: The row index of the destination point.
- destination_column: The column index of the destination point.
Returns:
- solution: A 2D matrix representing the solution path if it exists.
Raises:
- ValueError: If no solution exists or if the source or
destination coordinates are invalid.
Description:
This method navigates through a maze represented as an n by n matrix,
starting from a specified source cell and
aiming to reach a destination cell.
The maze consists of walls (1s) and open paths (0s).
By providing custom row and column values, the source and destination
cells can be adjusted.
>>> maze = [[0, 1, 0, 1, 1],
... [0, 0, 0, 0, 0],
... [1, 0, 1, 0, 1],
... [0, 0, 1, 0, 0],
... [1, 0, 0, 1, 0]]
>>> solve_maze(maze,0,0,len(maze)-1,len(maze)-1) # doctest: +NORMALIZE_WHITESPACE
[[0, 1, 1, 1, 1],
[0, 0, 0, 0, 1],
[1, 1, 1, 0, 1],
[1, 1, 1, 0, 0],
[1, 1, 1, 1, 0]]
Note:
In the output maze, the zeros (0s) represent one of the possible
paths from the source to the destination.
>>> maze = [[0, 1, 0, 1, 1],
... [0, 0, 0, 0, 0],
... [0, 0, 0, 0, 1],
... [0, 0, 0, 0, 0],
... [0, 0, 0, 0, 0]]
>>> solve_maze(maze,0,0,len(maze)-1,len(maze)-1) # doctest: +NORMALIZE_WHITESPACE
[[0, 1, 1, 1, 1],
[0, 1, 1, 1, 1],
[0, 1, 1, 1, 1],
[0, 1, 1, 1, 1],
[0, 0, 0, 0, 0]]
>>> maze = [[0, 0, 0],
... [0, 1, 0],
... [1, 0, 0]]
>>> solve_maze(maze,0,0,len(maze)-1,len(maze)-1) # doctest: +NORMALIZE_WHITESPACE
[[0, 0, 0],
[1, 1, 0],
[1, 1, 0]]
>>> maze = [[1, 0, 0],
... [0, 1, 0],
... [1, 0, 0]]
>>> solve_maze(maze,0,1,len(maze)-1,len(maze)-1) # doctest: +NORMALIZE_WHITESPACE
[[1, 0, 0],
[1, 1, 0],
[1, 1, 0]]
>>> maze = [[1, 1, 0, 0, 1, 0, 0, 1],
... [1, 0, 1, 0, 0, 1, 1, 1],
... [0, 1, 0, 1, 0, 0, 1, 0],
... [1, 1, 1, 0, 0, 1, 0, 1],
... [0, 1, 0, 0, 1, 0, 1, 1],
... [0, 0, 0, 1, 1, 1, 0, 1],
... [0, 1, 0, 1, 0, 1, 1, 1],
... [1, 1, 0, 0, 0, 0, 0, 1]]
>>> solve_maze(maze,0,2,len(maze)-1,2) # doctest: +NORMALIZE_WHITESPACE
[[1, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 1, 0, 0, 1, 1, 1],
[1, 1, 1, 1, 0, 1, 1, 1],
[1, 1, 1, 0, 0, 1, 1, 1],
[1, 1, 0, 0, 1, 1, 1, 1],
[1, 1, 0, 1, 1, 1, 1, 1],
[1, 1, 0, 1, 1, 1, 1, 1],
[1, 1, 0, 1, 1, 1, 1, 1]]
>>> maze = [[1, 0, 0],
... [0, 1, 1],
... [1, 0, 1]]
>>> solve_maze(maze,0,1,len(maze)-1,len(maze)-1)
Traceback (most recent call last):
...
ValueError: No solution exists!
>>> maze = [[0, 0],
... [1, 1]]
>>> solve_maze(maze,0,0,len(maze)-1,len(maze)-1)
Traceback (most recent call last):
...
ValueError: No solution exists!
>>> maze = [[0, 1],
... [1, 0]]
>>> solve_maze(maze,2,0,len(maze)-1,len(maze)-1)
Traceback (most recent call last):
...
ValueError: Invalid source or destination coordinates
>>> maze = [[1, 0, 0],
... [0, 1, 0],
... [1, 0, 0]]
>>> solve_maze(maze,0,1,len(maze),len(maze)-1)
Traceback (most recent call last):
...
ValueError: Invalid source or destination coordinates
"""
size = len(maze)
# Check if source and destination coordinates are Invalid.
if not (0 <= source_row <= size - 1 and 0 <= source_column <= size - 1) or (
not (0 <= destination_row <= size - 1 and 0 <= destination_column <= size - 1)
):
raise ValueError("Invalid source or destination coordinates")
# We need to create solution object to save path.
solutions = [[1 for _ in range(size)] for _ in range(size)]
solved = run_maze(
maze, source_row, source_column, destination_row, destination_column, solutions
)
if solved:
return solutions
else:
raise ValueError("No solution exists!")
def run_maze(
maze: list[list[int]],
i: int,
j: int,
destination_row: int,
destination_column: int,
solutions: list[list[int]],
) -> bool:
"""
This method is recursive starting from (i, j) and going in one of four directions:
up, down, left, right.
If a path is found to destination it returns True otherwise it returns False.
Parameters
maze: A two dimensional matrix of zeros and ones.
i, j : coordinates of matrix
solutions: A two dimensional matrix of solutions.
Returns:
Boolean if path is found True, Otherwise False.
"""
size = len(maze)
# Final check point.
if i == destination_row and j == destination_column and maze[i][j] == 0:
solutions[i][j] = 0
return True
lower_flag = (not i < 0) and (not j < 0) # Check lower bounds
upper_flag = (i < size) and (j < size) # Check upper bounds
if lower_flag and upper_flag:
# check for already visited and block points.
block_flag = (solutions[i][j]) and (not maze[i][j])
if block_flag:
# check visited
solutions[i][j] = 0
# check for directions
if (
run_maze(maze, i + 1, j, destination_row, destination_column, solutions)
or run_maze(
maze, i, j + 1, destination_row, destination_column, solutions
)
or run_maze(
maze, i - 1, j, destination_row, destination_column, solutions
)
or run_maze(
maze, i, j - 1, destination_row, destination_column, solutions
)
):
return True
solutions[i][j] = 1
return False
return False
if __name__ == "__main__":
import doctest
doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
================================================
FILE: backtracking/sudoku.py
================================================
"""
Given a partially filled 9x9 2D array, the objective is to fill a 9x9
square grid with digits numbered 1 to 9, so that every row, column, and
and each of the nine 3x3 sub-grids contains all of the digits.
This can be solved using Backtracking and is similar to n-queens.
We check to see if a cell is safe or not and recursively call the
function on the next column to see if it returns True. if yes, we
have solved the puzzle. else, we backtrack and place another number
in that cell and repeat this process.
"""
from __future__ import annotations
Matrix = list[list[int]]
# assigning initial values to the grid
initial_grid: Matrix = [
[3, 0, 6, 5, 0, 8, 4, 0, 0],
[5, 2, 0, 0, 0, 0, 0, 0, 0],
[0, 8, 7, 0, 0, 0, 0, 3, 1],
[0, 0, 3, 0, 1, 0, 0, 8, 0],
[9, 0, 0, 8, 6, 3, 0, 0, 5],
[0, 5, 0, 0, 9, 0, 6, 0, 0],
[1, 3, 0, 0, 0, 0, 2, 5, 0],
[0, 0, 0, 0, 0, 0, 0, 7, 4],
[0, 0, 5, 2, 0, 6, 3, 0, 0],
]
# a grid with no solution
no_solution: Matrix = [
[5, 0, 6, 5, 0, 8, 4, 0, 3],
[5, 2, 0, 0, 0, 0, 0, 0, 2],
[1, 8, 7, 0, 0, 0, 0, 3, 1],
[0, 0, 3, 0, 1, 0, 0, 8, 0],
[9, 0, 0, 8, 6, 3, 0, 0, 5],
[0, 5, 0, 0, 9, 0, 6, 0, 0],
[1, 3, 0, 0, 0, 0, 2, 5, 0],
[0, 0, 0, 0, 0, 0, 0, 7, 4],
[0, 0, 5, 2, 0, 6, 3, 0, 0],
]
def is_safe(grid: Matrix, row: int, column: int, n: int) -> bool:
"""
This function checks the grid to see if each row,
column, and the 3x3 subgrids contain the digit 'n'.
It returns False if it is not 'safe' (a duplicate digit
is found) else returns True if it is 'safe'
"""
for i in range(9):
if n in {grid[row][i], grid[i][column]}:
return False
for i in range(3):
for j in range(3):
if grid[(row - row % 3) + i][(column - column % 3) + j] == n:
return False
return True
def find_empty_location(grid: Matrix) -> tuple[int, int] | None:
"""
This function finds an empty location so that we can assign a number
for that particular row and column.
"""
for i in range(9):
for j in range(9):
if grid[i][j] == 0:
return i, j
return None
def sudoku(grid: Matrix) -> Matrix | None:
"""
Takes a partially filled-in grid and attempts to assign values to
all unassigned locations in such a way to meet the requirements
for Sudoku solution (non-duplication across rows, columns, and boxes)
>>> sudoku(initial_grid) # doctest: +NORMALIZE_WHITESPACE
[[3, 1, 6, 5, 7, 8, 4, 9, 2],
[5, 2, 9, 1, 3, 4, 7, 6, 8],
[4, 8, 7, 6, 2, 9, 5, 3, 1],
[2, 6, 3, 4, 1, 5, 9, 8, 7],
[9, 7, 4, 8, 6, 3, 1, 2, 5],
[8, 5, 1, 7, 9, 2, 6, 4, 3],
[1, 3, 8, 9, 4, 7, 2, 5, 6],
[6, 9, 2, 3, 5, 1, 8, 7, 4],
[7, 4, 5, 2, 8, 6, 3, 1, 9]]
>>> sudoku(no_solution) is None
True
"""
if location := find_empty_location(grid):
row, column = location
else:
# If the location is ``None``, then the grid is solved.
return grid
for digit in range(1, 10):
if is_safe(grid, row, column, digit):
grid[row][column] = digit
if sudoku(grid) is not None:
return grid
grid[row][column] = 0
return None
def print_solution(grid: Matrix) -> None:
"""
A function to print the solution in the form
of a 9x9 grid
"""
for row in grid:
for cell in row:
print(cell, end=" ")
print()
if __name__ == "__main__":
# make a copy of grid so that you can compare with the unmodified grid
for example_grid in (initial_grid, no_solution):
print("\nExample grid:\n" + "=" * 20)
print_solution(example_grid)
print("\nExample grid solution:")
solution = sudoku(example_grid)
if solution is not None:
print_solution(solution)
else:
print("Cannot find a solution.")
================================================
FILE: backtracking/sum_of_subsets.py
================================================
"""
The sum-of-subsets problem states that a set of non-negative integers, and a
value M, determine all possible subsets of the given set whose summation sum
equal to given M.
Summation of the chosen numbers must be equal to given number M and one number
can be used only once.
"""
def generate_sum_of_subsets_solutions(nums: list[int], max_sum: int) -> list[list[int]]:
"""
The main function. For list of numbers 'nums' find the subsets with sum
equal to 'max_sum'
>>> generate_sum_of_subsets_solutions(nums=[3, 34, 4, 12, 5, 2], max_sum=9)
[[3, 4, 2], [4, 5]]
>>> generate_sum_of_subsets_solutions(nums=[3, 34, 4, 12, 5, 2], max_sum=3)
[[3]]
>>> generate_sum_of_subsets_solutions(nums=[3, 34, 4, 12, 5, 2], max_sum=1)
[]
"""
result: list[list[int]] = []
path: list[int] = []
num_index = 0
remaining_nums_sum = sum(nums)
create_state_space_tree(nums, max_sum, num_index, path, result, remaining_nums_sum)
return result
def create_state_space_tree(
nums: list[int],
max_sum: int,
num_index: int,
path: list[int],
result: list[list[int]],
remaining_nums_sum: int,
) -> None:
"""
Creates a state space tree to iterate through each branch using DFS.
It terminates the branching of a node when any of the two conditions
given below satisfy.
This algorithm follows depth-fist-search and backtracks when the node is not
branchable.
>>> path = []
>>> result = []
>>> create_state_space_tree(
... nums=[1],
... max_sum=1,
... num_index=0,
... path=path,
... result=result,
... remaining_nums_sum=1)
>>> path
[]
>>> result
[[1]]
"""
if sum(path) > max_sum or (remaining_nums_sum + sum(path)) < max_sum:
return
if sum(path) == max_sum:
result.append(path)
return
for index in range(num_index, len(nums)):
create_state_space_tree(
nums,
max_sum,
index + 1,
[*path, nums[index]],
result,
remaining_nums_sum - nums[index],
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: backtracking/word_break.py
================================================
"""
Word Break Problem is a well-known problem in computer science.
Given a string and a dictionary of words, the task is to determine if
the string can be segmented into a sequence of one or more dictionary words.
Wikipedia: https://en.wikipedia.org/wiki/Word_break_problem
"""
def backtrack(input_string: str, word_dict: set[str], start: int) -> bool:
"""
Helper function that uses backtracking to determine if a valid
word segmentation is possible starting from index 'start'.
Parameters:
input_string (str): The input string to be segmented.
word_dict (set[str]): A set of valid dictionary words.
start (int): The starting index of the substring to be checked.
Returns:
bool: True if a valid segmentation is possible, otherwise False.
Example:
>>> backtrack("leetcode", {"leet", "code"}, 0)
True
>>> backtrack("applepenapple", {"apple", "pen"}, 0)
True
>>> backtrack("catsandog", {"cats", "dog", "sand", "and", "cat"}, 0)
False
"""
# Base case: if the starting index has reached the end of the string
if start == len(input_string):
return True
# Try every possible substring from 'start' to 'end'
for end in range(start + 1, len(input_string) + 1):
if input_string[start:end] in word_dict and backtrack(
input_string, word_dict, end
):
return True
return False
def word_break(input_string: str, word_dict: set[str]) -> bool:
"""
Determines if the input string can be segmented into a sequence of
valid dictionary words using backtracking.
Parameters:
input_string (str): The input string to segment.
word_dict (set[str]): The set of valid words.
Returns:
bool: True if the string can be segmented into valid words, otherwise False.
Example:
>>> word_break("leetcode", {"leet", "code"})
True
>>> word_break("applepenapple", {"apple", "pen"})
True
>>> word_break("catsandog", {"cats", "dog", "sand", "and", "cat"})
False
>>> word_break("applepenapple", {})
False
"""
return backtrack(input_string, word_dict, 0)
================================================
FILE: backtracking/word_ladder.py
================================================
"""
Word Ladder is a classic problem in computer science.
The problem is to transform a start word into an end word
by changing one letter at a time.
Each intermediate word must be a valid word from a given list of words.
The goal is to find a transformation sequence
from the start word to the end word.
Wikipedia: https://en.wikipedia.org/wiki/Word_ladder
"""
import string
def backtrack(
current_word: str, path: list[str], end_word: str, word_set: set[str]
) -> list[str]:
"""
Helper function to perform backtracking to find the transformation
from the current_word to the end_word.
Parameters:
current_word (str): The current word in the transformation sequence.
path (list[str]): The list of transformations from begin_word to current_word.
end_word (str): The target word for transformation.
word_set (set[str]): The set of valid words for transformation.
Returns:
list[str]: The list of transformations from begin_word to end_word.
Returns an empty list if there is no valid
transformation from current_word to end_word.
Example:
>>> backtrack("hit", ["hit"], "cog", {"hot", "dot", "dog", "lot", "log", "cog"})
['hit', 'hot', 'dot', 'lot', 'log', 'cog']
>>> backtrack("hit", ["hit"], "cog", {"hot", "dot", "dog", "lot", "log"})
[]
>>> backtrack("lead", ["lead"], "gold", {"load", "goad", "gold", "lead", "lord"})
['lead', 'lead', 'load', 'goad', 'gold']
>>> backtrack("game", ["game"], "code", {"came", "cage", "code", "cade", "gave"})
['game', 'came', 'cade', 'code']
"""
# Base case: If the current word is the end word, return the path
if current_word == end_word:
return path
# Try all possible single-letter transformations
for i in range(len(current_word)):
for c in string.ascii_lowercase: # Try changing each letter
transformed_word = current_word[:i] + c + current_word[i + 1 :]
if transformed_word in word_set:
word_set.remove(transformed_word)
# Recur with the new word added to the path
result = backtrack(
transformed_word, [*path, transformed_word], end_word, word_set
)
if result: # valid transformation found
return result
word_set.add(transformed_word) # backtrack
return [] # No valid transformation found
def word_ladder(begin_word: str, end_word: str, word_set: set[str]) -> list[str]:
"""
Solve the Word Ladder problem using Backtracking and return
the list of transformations from begin_word to end_word.
Parameters:
begin_word (str): The word from which the transformation starts.
end_word (str): The target word for transformation.
word_list (list[str]): The list of valid words for transformation.
Returns:
list[str]: The list of transformations from begin_word to end_word.
Returns an empty list if there is no valid transformation.
Example:
>>> word_ladder("hit", "cog", ["hot", "dot", "dog", "lot", "log", "cog"])
['hit', 'hot', 'dot', 'lot', 'log', 'cog']
>>> word_ladder("hit", "cog", ["hot", "dot", "dog", "lot", "log"])
[]
>>> word_ladder("lead", "gold", ["load", "goad", "gold", "lead", "lord"])
['lead', 'lead', 'load', 'goad', 'gold']
>>> word_ladder("game", "code", ["came", "cage", "code", "cade", "gave"])
['game', 'came', 'cade', 'code']
"""
if end_word not in word_set: # no valid transformation possible
return []
# Perform backtracking starting from the begin_word
return backtrack(begin_word, [begin_word], end_word, word_set)
================================================
FILE: backtracking/word_search.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 24, 2022
Task:
Given an m x n grid of characters board and a string word,
return true if word exists in the grid.
The word can be constructed from letters of sequentially adjacent cells,
where adjacent cells are horizontally or vertically neighboring.
The same letter cell may not be used more than once.
Example:
Matrix:
---------
|A|B|C|E|
|S|F|C|S|
|A|D|E|E|
---------
Word:
"ABCCED"
Result:
True
Implementation notes: Use backtracking approach.
At each point, check all neighbors to try to find the next letter of the word.
leetcode: https://leetcode.com/problems/word-search/
"""
def get_point_key(len_board: int, len_board_column: int, row: int, column: int) -> int:
"""
Returns the hash key of matrix indexes.
>>> get_point_key(10, 20, 1, 0)
200
"""
return len_board * len_board_column * row + column
def exits_word(
board: list[list[str]],
word: str,
row: int,
column: int,
word_index: int,
visited_points_set: set[int],
) -> bool:
"""
Return True if it's possible to search the word suffix
starting from the word_index.
>>> exits_word([["A"]], "B", 0, 0, 0, set())
False
"""
if board[row][column] != word[word_index]:
return False
if word_index == len(word) - 1:
return True
traverts_directions = [(0, 1), (0, -1), (-1, 0), (1, 0)]
len_board = len(board)
len_board_column = len(board[0])
for direction in traverts_directions:
next_i = row + direction[0]
next_j = column + direction[1]
if not (0 <= next_i < len_board and 0 <= next_j < len_board_column):
continue
key = get_point_key(len_board, len_board_column, next_i, next_j)
if key in visited_points_set:
continue
visited_points_set.add(key)
if exits_word(board, word, next_i, next_j, word_index + 1, visited_points_set):
return True
visited_points_set.remove(key)
return False
def word_exists(board: list[list[str]], word: str) -> bool:
"""
>>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "ABCCED")
True
>>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "SEE")
True
>>> word_exists([["A","B","C","E"],["S","F","C","S"],["A","D","E","E"]], "ABCB")
False
>>> word_exists([["A"]], "A")
True
>>> word_exists([["B", "A", "A"], ["A", "A", "A"], ["A", "B", "A"]], "ABB")
False
>>> word_exists([["A"]], 123)
Traceback (most recent call last):
...
ValueError: The word parameter should be a string of length greater than 0.
>>> word_exists([["A"]], "")
Traceback (most recent call last):
...
ValueError: The word parameter should be a string of length greater than 0.
>>> word_exists([[]], "AB")
Traceback (most recent call last):
...
ValueError: The board should be a non empty matrix of single chars strings.
>>> word_exists([], "AB")
Traceback (most recent call last):
...
ValueError: The board should be a non empty matrix of single chars strings.
>>> word_exists([["A"], [21]], "AB")
Traceback (most recent call last):
...
ValueError: The board should be a non empty matrix of single chars strings.
"""
# Validate board
board_error_message = (
"The board should be a non empty matrix of single chars strings."
)
len_board = len(board)
if not isinstance(board, list) or len(board) == 0:
raise ValueError(board_error_message)
for row in board:
if not isinstance(row, list) or len(row) == 0:
raise ValueError(board_error_message)
for item in row:
if not isinstance(item, str) or len(item) != 1:
raise ValueError(board_error_message)
# Validate word
if not isinstance(word, str) or len(word) == 0:
raise ValueError(
"The word parameter should be a string of length greater than 0."
)
len_board_column = len(board[0])
for i in range(len_board):
for j in range(len_board_column):
if exits_word(
board, word, i, j, 0, {get_point_key(len_board, len_board_column, i, j)}
):
return True
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/README.md
================================================
# Bit manipulation
Bit manipulation is the act of manipulating bits to detect errors (hamming code), encrypts and decrypts messages (more on that in the 'ciphers' folder) or just do anything at the lowest level of your computer.
*
*
*
*
*
*
*
================================================
FILE: bit_manipulation/__init__.py
================================================
================================================
FILE: bit_manipulation/binary_and_operator.py
================================================
# https://www.tutorialspoint.com/python3/bitwise_operators_example.htm
def binary_and(a: int, b: int) -> str:
"""
Take in 2 integers, convert them to binary,
return a binary number that is the
result of a binary and operation on the integers provided.
>>> binary_and(25, 32)
'0b000000'
>>> binary_and(37, 50)
'0b100000'
>>> binary_and(21, 30)
'0b10100'
>>> binary_and(58, 73)
'0b0001000'
>>> binary_and(0, 255)
'0b00000000'
>>> binary_and(256, 256)
'0b100000000'
>>> binary_and(0, -1)
Traceback (most recent call last):
...
ValueError: the value of both inputs must be positive
>>> binary_and(0, 1.1)
Traceback (most recent call last):
...
ValueError: Unknown format code 'b' for object of type 'float'
>>> binary_and("0", "1")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
"""
if a < 0 or b < 0:
raise ValueError("the value of both inputs must be positive")
a_binary = format(a, "b")
b_binary = format(b, "b")
max_len = max(len(a_binary), len(b_binary))
return "0b" + "".join(
str(int(char_a == "1" and char_b == "1"))
for char_a, char_b in zip(a_binary.zfill(max_len), b_binary.zfill(max_len))
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_coded_decimal.py
================================================
def binary_coded_decimal(number: int) -> str:
"""
Find binary coded decimal (bcd) of integer base 10.
Each digit of the number is represented by a 4-bit binary.
Example:
>>> binary_coded_decimal(-2)
'0b0000'
>>> binary_coded_decimal(-1)
'0b0000'
>>> binary_coded_decimal(0)
'0b0000'
>>> binary_coded_decimal(3)
'0b0011'
>>> binary_coded_decimal(2)
'0b0010'
>>> binary_coded_decimal(12)
'0b00010010'
>>> binary_coded_decimal(987)
'0b100110000111'
"""
return "0b" + "".join(
str(bin(int(digit)))[2:].zfill(4) for digit in str(max(0, number))
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_count_setbits.py
================================================
def binary_count_setbits(a: int) -> int:
"""
Take in 1 integer, return a number that is
the number of 1's in binary representation of that number.
>>> binary_count_setbits(25)
3
>>> binary_count_setbits(36)
2
>>> binary_count_setbits(16)
1
>>> binary_count_setbits(58)
4
>>> binary_count_setbits(4294967295)
32
>>> binary_count_setbits(0)
0
>>> binary_count_setbits(-10)
Traceback (most recent call last):
...
ValueError: Input value must be a positive integer
>>> binary_count_setbits(0.8)
Traceback (most recent call last):
...
TypeError: Input value must be a 'int' type
>>> binary_count_setbits("0")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
"""
if a < 0:
raise ValueError("Input value must be a positive integer")
elif isinstance(a, float):
raise TypeError("Input value must be a 'int' type")
return bin(a).count("1")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_count_trailing_zeros.py
================================================
from math import log2
def binary_count_trailing_zeros(a: int) -> int:
"""
Take in 1 integer, return a number that is
the number of trailing zeros in binary representation of that number.
>>> binary_count_trailing_zeros(25)
0
>>> binary_count_trailing_zeros(36)
2
>>> binary_count_trailing_zeros(16)
4
>>> binary_count_trailing_zeros(58)
1
>>> binary_count_trailing_zeros(4294967296)
32
>>> binary_count_trailing_zeros(0)
0
>>> binary_count_trailing_zeros(-10)
Traceback (most recent call last):
...
ValueError: Input value must be a positive integer
>>> binary_count_trailing_zeros(0.8)
Traceback (most recent call last):
...
TypeError: Input value must be a 'int' type
>>> binary_count_trailing_zeros("0")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
"""
if a < 0:
raise ValueError("Input value must be a positive integer")
elif isinstance(a, float):
raise TypeError("Input value must be a 'int' type")
return 0 if (a == 0) else int(log2(a & -a))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_or_operator.py
================================================
# https://www.tutorialspoint.com/python3/bitwise_operators_example.htm
def binary_or(a: int, b: int) -> str:
"""
Take in 2 integers, convert them to binary, and return a binary number that is the
result of a binary or operation on the integers provided.
>>> binary_or(25, 32)
'0b111001'
>>> binary_or(37, 50)
'0b110111'
>>> binary_or(21, 30)
'0b11111'
>>> binary_or(58, 73)
'0b1111011'
>>> binary_or(0, 255)
'0b11111111'
>>> binary_or(0, 256)
'0b100000000'
>>> binary_or(0, -1)
Traceback (most recent call last):
...
ValueError: the value of both inputs must be positive
>>> binary_or(0, 1.1)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> binary_or("0", "1")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
"""
if a < 0 or b < 0:
raise ValueError("the value of both inputs must be positive")
a_binary = str(bin(a))[2:] # remove the leading "0b"
b_binary = str(bin(b))[2:]
max_len = max(len(a_binary), len(b_binary))
return "0b" + "".join(
str(int("1" in (char_a, char_b)))
for char_a, char_b in zip(a_binary.zfill(max_len), b_binary.zfill(max_len))
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_shifts.py
================================================
# Information on binary shifts:
# https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types
# https://www.interviewcake.com/concept/java/bit-shift
def logical_left_shift(number: int, shift_amount: int) -> str:
"""
Take in 2 positive integers.
'number' is the integer to be logically left shifted 'shift_amount' times.
i.e. (number << shift_amount)
Return the shifted binary representation.
>>> logical_left_shift(0, 1)
'0b00'
>>> logical_left_shift(1, 1)
'0b10'
>>> logical_left_shift(1, 5)
'0b100000'
>>> logical_left_shift(17, 2)
'0b1000100'
>>> logical_left_shift(1983, 4)
'0b111101111110000'
>>> logical_left_shift(1, -1)
Traceback (most recent call last):
...
ValueError: both inputs must be positive integers
"""
if number < 0 or shift_amount < 0:
raise ValueError("both inputs must be positive integers")
binary_number = str(bin(number))
binary_number += "0" * shift_amount
return binary_number
def logical_right_shift(number: int, shift_amount: int) -> str:
"""
Take in positive 2 integers.
'number' is the integer to be logically right shifted 'shift_amount' times.
i.e. (number >>> shift_amount)
Return the shifted binary representation.
>>> logical_right_shift(0, 1)
'0b0'
>>> logical_right_shift(1, 1)
'0b0'
>>> logical_right_shift(1, 5)
'0b0'
>>> logical_right_shift(17, 2)
'0b100'
>>> logical_right_shift(1983, 4)
'0b1111011'
>>> logical_right_shift(1, -1)
Traceback (most recent call last):
...
ValueError: both inputs must be positive integers
"""
if number < 0 or shift_amount < 0:
raise ValueError("both inputs must be positive integers")
binary_number = str(bin(number))[2:]
if shift_amount >= len(binary_number):
return "0b0"
shifted_binary_number = binary_number[: len(binary_number) - shift_amount]
return "0b" + shifted_binary_number
def arithmetic_right_shift(number: int, shift_amount: int) -> str:
"""
Take in 2 integers.
'number' is the integer to be arithmetically right shifted 'shift_amount' times.
i.e. (number >> shift_amount)
Return the shifted binary representation.
>>> arithmetic_right_shift(0, 1)
'0b00'
>>> arithmetic_right_shift(1, 1)
'0b00'
>>> arithmetic_right_shift(-1, 1)
'0b11'
>>> arithmetic_right_shift(17, 2)
'0b000100'
>>> arithmetic_right_shift(-17, 2)
'0b111011'
>>> arithmetic_right_shift(-1983, 4)
'0b111110000100'
"""
if number >= 0: # Get binary representation of positive number
binary_number = "0" + str(bin(number)).strip("-")[2:]
else: # Get binary (2's complement) representation of negative number
binary_number_length = len(bin(number)[3:]) # Find 2's complement of number
binary_number = bin(abs(number) - (1 << binary_number_length))[3:]
binary_number = (
"1" + "0" * (binary_number_length - len(binary_number)) + binary_number
)
if shift_amount >= len(binary_number):
return "0b" + binary_number[0] * len(binary_number)
return (
"0b"
+ binary_number[0] * shift_amount
+ binary_number[: len(binary_number) - shift_amount]
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_twos_complement.py
================================================
# Information on 2's complement: https://en.wikipedia.org/wiki/Two%27s_complement
def twos_complement(number: int) -> str:
"""
Take in a negative integer 'number'.
Return the two's complement representation of 'number'.
>>> twos_complement(0)
'0b0'
>>> twos_complement(-1)
'0b11'
>>> twos_complement(-5)
'0b1011'
>>> twos_complement(-17)
'0b101111'
>>> twos_complement(-207)
'0b100110001'
>>> twos_complement(1)
Traceback (most recent call last):
...
ValueError: input must be a negative integer
"""
if number > 0:
raise ValueError("input must be a negative integer")
binary_number_length = len(bin(number)[3:])
twos_complement_number = bin(abs(number) - (1 << binary_number_length))[3:]
twos_complement_number = (
(
"1"
+ "0" * (binary_number_length - len(twos_complement_number))
+ twos_complement_number
)
if number < 0
else "0"
)
return "0b" + twos_complement_number
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/binary_xor_operator.py
================================================
# https://www.tutorialspoint.com/python3/bitwise_operators_example.htm
def binary_xor(a: int, b: int) -> str:
"""
Take in 2 integers, convert them to binary,
return a binary number that is the
result of a binary xor operation on the integers provided.
>>> binary_xor(25, 32)
'0b111001'
>>> binary_xor(37, 50)
'0b010111'
>>> binary_xor(21, 30)
'0b01011'
>>> binary_xor(58, 73)
'0b1110011'
>>> binary_xor(0, 255)
'0b11111111'
>>> binary_xor(256, 256)
'0b000000000'
>>> binary_xor(0, -1)
Traceback (most recent call last):
...
ValueError: the value of both inputs must be positive
>>> binary_xor(0, 1.1)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> binary_xor("0", "1")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
"""
if a < 0 or b < 0:
raise ValueError("the value of both inputs must be positive")
a_binary = str(bin(a))[2:] # remove the leading "0b"
b_binary = str(bin(b))[2:] # remove the leading "0b"
max_len = max(len(a_binary), len(b_binary))
return "0b" + "".join(
str(int(char_a != char_b))
for char_a, char_b in zip(a_binary.zfill(max_len), b_binary.zfill(max_len))
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/bitwise_addition_recursive.py
================================================
"""
Calculates the sum of two non-negative integers using bitwise operators
Wikipedia explanation: https://en.wikipedia.org/wiki/Binary_number
"""
def bitwise_addition_recursive(number: int, other_number: int) -> int:
"""
>>> bitwise_addition_recursive(4, 5)
9
>>> bitwise_addition_recursive(8, 9)
17
>>> bitwise_addition_recursive(0, 4)
4
>>> bitwise_addition_recursive(4.5, 9)
Traceback (most recent call last):
...
TypeError: Both arguments MUST be integers!
>>> bitwise_addition_recursive('4', 9)
Traceback (most recent call last):
...
TypeError: Both arguments MUST be integers!
>>> bitwise_addition_recursive('4.5', 9)
Traceback (most recent call last):
...
TypeError: Both arguments MUST be integers!
>>> bitwise_addition_recursive(-1, 9)
Traceback (most recent call last):
...
ValueError: Both arguments MUST be non-negative!
>>> bitwise_addition_recursive(1, -9)
Traceback (most recent call last):
...
ValueError: Both arguments MUST be non-negative!
"""
if not isinstance(number, int) or not isinstance(other_number, int):
raise TypeError("Both arguments MUST be integers!")
if number < 0 or other_number < 0:
raise ValueError("Both arguments MUST be non-negative!")
bitwise_sum = number ^ other_number
carry = number & other_number
if carry == 0:
return bitwise_sum
return bitwise_addition_recursive(bitwise_sum, carry << 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/count_1s_brian_kernighan_method.py
================================================
def get_1s_count(number: int) -> int:
"""
Count the number of set bits in a 32 bit integer using Brian Kernighan's way.
Ref - https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
>>> get_1s_count(25)
3
>>> get_1s_count(37)
3
>>> get_1s_count(21)
3
>>> get_1s_count(58)
4
>>> get_1s_count(0)
0
>>> get_1s_count(256)
1
>>> get_1s_count(-1)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
>>> get_1s_count(0.8)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
>>> get_1s_count("25")
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
"""
if not isinstance(number, int) or number < 0:
raise ValueError("Input must be a non-negative integer")
count = 0
while number:
# This way we arrive at next set bit (next 1) instead of looping
# through each bit and checking for 1s hence the
# loop won't run 32 times it will only run the number of `1` times
number &= number - 1
count += 1
return count
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/count_number_of_one_bits.py
================================================
from timeit import timeit
def get_set_bits_count_using_brian_kernighans_algorithm(number: int) -> int:
"""
Count the number of set bits in a 32 bit integer
>>> get_set_bits_count_using_brian_kernighans_algorithm(25)
3
>>> get_set_bits_count_using_brian_kernighans_algorithm(37)
3
>>> get_set_bits_count_using_brian_kernighans_algorithm(21)
3
>>> get_set_bits_count_using_brian_kernighans_algorithm(58)
4
>>> get_set_bits_count_using_brian_kernighans_algorithm(0)
0
>>> get_set_bits_count_using_brian_kernighans_algorithm(256)
1
>>> get_set_bits_count_using_brian_kernighans_algorithm(-1)
Traceback (most recent call last):
...
ValueError: the value of input must not be negative
"""
if number < 0:
raise ValueError("the value of input must not be negative")
result = 0
while number:
number &= number - 1
result += 1
return result
def get_set_bits_count_using_modulo_operator(number: int) -> int:
"""
Count the number of set bits in a 32 bit integer
>>> get_set_bits_count_using_modulo_operator(25)
3
>>> get_set_bits_count_using_modulo_operator(37)
3
>>> get_set_bits_count_using_modulo_operator(21)
3
>>> get_set_bits_count_using_modulo_operator(58)
4
>>> get_set_bits_count_using_modulo_operator(0)
0
>>> get_set_bits_count_using_modulo_operator(256)
1
>>> get_set_bits_count_using_modulo_operator(-1)
Traceback (most recent call last):
...
ValueError: the value of input must not be negative
"""
if number < 0:
raise ValueError("the value of input must not be negative")
result = 0
while number:
if number % 2 == 1:
result += 1
number >>= 1
return result
def benchmark() -> None:
"""
Benchmark code for comparing 2 functions, with different length int values.
Brian Kernighan's algorithm is consistently faster than using modulo_operator.
"""
def do_benchmark(number: int) -> None:
setup = "import __main__ as z"
print(f"Benchmark when {number = }:")
print(f"{get_set_bits_count_using_modulo_operator(number) = }")
timing = timeit(
f"z.get_set_bits_count_using_modulo_operator({number})", setup=setup
)
print(f"timeit() runs in {timing} seconds")
print(f"{get_set_bits_count_using_brian_kernighans_algorithm(number) = }")
timing = timeit(
f"z.get_set_bits_count_using_brian_kernighans_algorithm({number})",
setup=setup,
)
print(f"timeit() runs in {timing} seconds")
for number in (25, 37, 58, 0):
do_benchmark(number)
print()
if __name__ == "__main__":
import doctest
doctest.testmod()
benchmark()
================================================
FILE: bit_manipulation/excess_3_code.py
================================================
def excess_3_code(number: int) -> str:
"""
Find excess-3 code of integer base 10.
Add 3 to all digits in a decimal number then convert to a binary-coded decimal.
https://en.wikipedia.org/wiki/Excess-3
>>> excess_3_code(0)
'0b0011'
>>> excess_3_code(3)
'0b0110'
>>> excess_3_code(2)
'0b0101'
>>> excess_3_code(20)
'0b01010011'
>>> excess_3_code(120)
'0b010001010011'
"""
num = ""
for digit in str(max(0, number)):
num += str(bin(int(digit) + 3))[2:].zfill(4)
return "0b" + num
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/find_previous_power_of_two.py
================================================
def find_previous_power_of_two(number: int) -> int:
"""
Find the largest power of two that is less than or equal to a given integer.
https://stackoverflow.com/questions/1322510
>>> [find_previous_power_of_two(i) for i in range(18)]
[0, 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16]
>>> find_previous_power_of_two(-5)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
>>> find_previous_power_of_two(10.5)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
"""
if not isinstance(number, int) or number < 0:
raise ValueError("Input must be a non-negative integer")
if number == 0:
return 0
power = 1
while power <= number:
power <<= 1 # Equivalent to multiplying by 2
return power >> 1 if number > 1 else 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/find_unique_number.py
================================================
def find_unique_number(arr: list[int]) -> int:
"""
Given a list of integers where every element appears twice except for one,
this function returns the element that appears only once using bitwise XOR.
>>> find_unique_number([1, 1, 2, 2, 3])
3
>>> find_unique_number([4, 5, 4, 6, 6])
5
>>> find_unique_number([7])
7
>>> find_unique_number([10, 20, 10])
20
>>> find_unique_number([])
Traceback (most recent call last):
...
ValueError: input list must not be empty
>>> find_unique_number([1, 'a', 1])
Traceback (most recent call last):
...
TypeError: all elements must be integers
"""
if not arr:
raise ValueError("input list must not be empty")
if not all(isinstance(x, int) for x in arr):
raise TypeError("all elements must be integers")
result = 0
for num in arr:
result ^= num
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/gray_code_sequence.py
================================================
def gray_code(bit_count: int) -> list:
"""
Takes in an integer n and returns a n-bit
gray code sequence
An n-bit gray code sequence is a sequence of 2^n
integers where:
a) Every integer is between [0,2^n -1] inclusive
b) The sequence begins with 0
c) An integer appears at most one times in the sequence
d)The binary representation of every pair of integers differ
by exactly one bit
e) The binary representation of first and last bit also
differ by exactly one bit
>>> gray_code(2)
[0, 1, 3, 2]
>>> gray_code(1)
[0, 1]
>>> gray_code(3)
[0, 1, 3, 2, 6, 7, 5, 4]
>>> gray_code(-1)
Traceback (most recent call last):
...
ValueError: The given input must be positive
>>> gray_code(10.6)
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for <<: 'int' and 'float'
"""
# bit count represents no. of bits in the gray code
if bit_count < 0:
raise ValueError("The given input must be positive")
# get the generated string sequence
sequence = gray_code_sequence_string(bit_count)
#
# convert them to integers
for i in range(len(sequence)):
sequence[i] = int(sequence[i], 2)
return sequence
def gray_code_sequence_string(bit_count: int) -> list:
"""
Will output the n-bit grey sequence as a
string of bits
>>> gray_code_sequence_string(2)
['00', '01', '11', '10']
>>> gray_code_sequence_string(1)
['0', '1']
"""
# The approach is a recursive one
# Base case achieved when either n = 0 or n=1
if bit_count == 0:
return ["0"]
if bit_count == 1:
return ["0", "1"]
seq_len = 1 << bit_count # defines the length of the sequence
# 1<< n is equivalent to 2^n
# recursive answer will generate answer for n-1 bits
smaller_sequence = gray_code_sequence_string(bit_count - 1)
sequence = []
# append 0 to first half of the smaller sequence generated
for i in range(seq_len // 2):
generated_no = "0" + smaller_sequence[i]
sequence.append(generated_no)
# append 1 to second half ... start from the end of the list
for i in reversed(range(seq_len // 2)):
generated_no = "1" + smaller_sequence[i]
sequence.append(generated_no)
return sequence
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/highest_set_bit.py
================================================
def get_highest_set_bit_position(number: int) -> int:
"""
Returns position of the highest set bit of a number.
Ref - https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious
>>> get_highest_set_bit_position(25)
5
>>> get_highest_set_bit_position(37)
6
>>> get_highest_set_bit_position(1)
1
>>> get_highest_set_bit_position(4)
3
>>> get_highest_set_bit_position(0)
0
>>> get_highest_set_bit_position(0.8)
Traceback (most recent call last):
...
TypeError: Input value must be an 'int' type
"""
if not isinstance(number, int):
raise TypeError("Input value must be an 'int' type")
position = 0
while number:
position += 1
number >>= 1
return position
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/index_of_rightmost_set_bit.py
================================================
# Reference: https://www.geeksforgeeks.org/position-of-rightmost-set-bit/
def get_index_of_rightmost_set_bit(number: int) -> int:
"""
Take in a positive integer 'number'.
Returns the zero-based index of first set bit in that 'number' from right.
Returns -1, If no set bit found.
>>> get_index_of_rightmost_set_bit(0)
-1
>>> get_index_of_rightmost_set_bit(5)
0
>>> get_index_of_rightmost_set_bit(36)
2
>>> get_index_of_rightmost_set_bit(8)
3
>>> get_index_of_rightmost_set_bit(-18)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
>>> get_index_of_rightmost_set_bit('test')
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
>>> get_index_of_rightmost_set_bit(1.25)
Traceback (most recent call last):
...
ValueError: Input must be a non-negative integer
"""
if not isinstance(number, int) or number < 0:
raise ValueError("Input must be a non-negative integer")
intermediate = number & ~(number - 1)
index = 0
while intermediate:
intermediate >>= 1
index += 1
return index - 1
if __name__ == "__main__":
"""
Finding the index of rightmost set bit has some very peculiar use-cases,
especially in finding missing or/and repeating numbers in a list of
positive integers.
"""
import doctest
doctest.testmod(verbose=True)
================================================
FILE: bit_manipulation/is_even.py
================================================
def is_even(number: int) -> bool:
"""
return true if the input integer is even
Explanation: Lets take a look at the following decimal to binary conversions
2 => 10
14 => 1110
100 => 1100100
3 => 11
13 => 1101
101 => 1100101
from the above examples we can observe that
for all the odd integers there is always 1 set bit at the end
also, 1 in binary can be represented as 001, 00001, or 0000001
so for any odd integer n => n&1 is always equals 1 else the integer is even
>>> is_even(1)
False
>>> is_even(4)
True
>>> is_even(9)
False
>>> is_even(15)
False
>>> is_even(40)
True
>>> is_even(100)
True
>>> is_even(101)
False
"""
return number & 1 == 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/is_power_of_two.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 1, 2022
Task:
Given a positive int number. Return True if this number is power of 2
or False otherwise.
Implementation notes: Use bit manipulation.
For example if the number is the power of two it's bits representation:
n = 0..100..00
n - 1 = 0..011..11
n & (n - 1) - no intersections = 0
"""
def is_power_of_two(number: int) -> bool:
"""
Return True if this number is power of 2 or False otherwise.
>>> is_power_of_two(0)
True
>>> is_power_of_two(1)
True
>>> is_power_of_two(2)
True
>>> is_power_of_two(4)
True
>>> is_power_of_two(6)
False
>>> is_power_of_two(8)
True
>>> is_power_of_two(17)
False
>>> is_power_of_two(-1)
Traceback (most recent call last):
...
ValueError: number must not be negative
>>> is_power_of_two(1.2)
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for &: 'float' and 'float'
# Test all powers of 2 from 0 to 10,000
>>> all(is_power_of_two(int(2 ** i)) for i in range(10000))
True
"""
if number < 0:
raise ValueError("number must not be negative")
return number & (number - 1) == 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/largest_pow_of_two_le_num.py
================================================
"""
Author : Naman Sharma
Date : October 2, 2023
Task:
To Find the largest power of 2 less than or equal to a given number.
Implementation notes: Use bit manipulation.
We start from 1 & left shift the set bit to check if (res<<1)<=number.
Each left bit shift represents a pow of 2.
For example:
number: 15
res: 1 0b1
2 0b10
4 0b100
8 0b1000
16 0b10000 (Exit)
"""
def largest_pow_of_two_le_num(number: int) -> int:
"""
Return the largest power of two less than or equal to a number.
>>> largest_pow_of_two_le_num(0)
0
>>> largest_pow_of_two_le_num(1)
1
>>> largest_pow_of_two_le_num(-1)
0
>>> largest_pow_of_two_le_num(3)
2
>>> largest_pow_of_two_le_num(15)
8
>>> largest_pow_of_two_le_num(99)
64
>>> largest_pow_of_two_le_num(178)
128
>>> largest_pow_of_two_le_num(999999)
524288
>>> largest_pow_of_two_le_num(99.9)
Traceback (most recent call last):
...
TypeError: Input value must be a 'int' type
"""
if isinstance(number, float):
raise TypeError("Input value must be a 'int' type")
if number <= 0:
return 0
res = 1
while (res << 1) <= number:
res <<= 1
return res
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/missing_number.py
================================================
def find_missing_number(nums: list[int]) -> int:
"""
Finds the missing number in a list of consecutive integers.
Args:
nums: A list of integers.
Returns:
The missing number.
Example:
>>> find_missing_number([0, 1, 3, 4])
2
>>> find_missing_number([4, 3, 1, 0])
2
>>> find_missing_number([-4, -3, -1, 0])
-2
>>> find_missing_number([-2, 2, 1, 3, 0])
-1
>>> find_missing_number([1, 3, 4, 5, 6])
2
>>> find_missing_number([6, 5, 4, 2, 1])
3
>>> find_missing_number([6, 1, 5, 3, 4])
2
"""
low = min(nums)
high = max(nums)
missing_number = high
for i in range(low, high):
missing_number ^= i ^ nums[i - low]
return missing_number
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/numbers_different_signs.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 30, 2022
Task:
Given two int numbers. Return True these numbers have opposite signs
or False otherwise.
Implementation notes: Use bit manipulation.
Use XOR for two numbers.
"""
def different_signs(num1: int, num2: int) -> bool:
"""
Return True if numbers have opposite signs False otherwise.
>>> different_signs(1, -1)
True
>>> different_signs(1, 1)
False
>>> different_signs(1000000000000000000000000000, -1000000000000000000000000000)
True
>>> different_signs(-1000000000000000000000000000, 1000000000000000000000000000)
True
>>> different_signs(50, 278)
False
>>> different_signs(0, 2)
False
>>> different_signs(2, 0)
False
"""
return num1 ^ num2 < 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/power_of_4.py
================================================
"""
Task:
Given a positive int number. Return True if this number is power of 4
or False otherwise.
Implementation notes: Use bit manipulation.
For example if the number is the power of 2 it's bits representation:
n = 0..100..00
n - 1 = 0..011..11
n & (n - 1) - no intersections = 0
If the number is a power of 4 then it should be a power of 2
and the set bit should be at an odd position.
"""
def power_of_4(number: int) -> bool:
"""
Return True if this number is power of 4 or False otherwise.
>>> power_of_4(0)
Traceback (most recent call last):
...
ValueError: number must be positive
>>> power_of_4(1)
True
>>> power_of_4(2)
False
>>> power_of_4(4)
True
>>> power_of_4(6)
False
>>> power_of_4(8)
False
>>> power_of_4(17)
False
>>> power_of_4(64)
True
>>> power_of_4(-1)
Traceback (most recent call last):
...
ValueError: number must be positive
>>> power_of_4(1.2)
Traceback (most recent call last):
...
TypeError: number must be an integer
"""
if not isinstance(number, int):
raise TypeError("number must be an integer")
if number <= 0:
raise ValueError("number must be positive")
if number & (number - 1) == 0:
c = 0
while number:
c += 1
number >>= 1
return c % 2 == 1
else:
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/reverse_bits.py
================================================
def get_reverse_bit_string(number: int) -> str:
"""
Return the reverse bit string of a 32 bit integer
>>> get_reverse_bit_string(9)
'10010000000000000000000000000000'
>>> get_reverse_bit_string(43)
'11010100000000000000000000000000'
>>> get_reverse_bit_string(2873)
'10011100110100000000000000000000'
>>> get_reverse_bit_string(2550136832)
'00000000000000000000000000011001'
>>> get_reverse_bit_string("this is not a number")
Traceback (most recent call last):
...
TypeError: operation can not be conducted on an object of type str
"""
if not isinstance(number, int):
msg = (
"operation can not be conducted on an object of type "
f"{type(number).__name__}"
)
raise TypeError(msg)
bit_string = ""
for _ in range(32):
bit_string += str(number % 2)
number >>= 1
return bit_string
def reverse_bit(number: int) -> int:
"""
Take in a 32 bit integer, reverse its bits, return a 32 bit integer result
>>> reverse_bit(25)
2550136832
>>> reverse_bit(37)
2751463424
>>> reverse_bit(21)
2818572288
>>> reverse_bit(58)
1543503872
>>> reverse_bit(0)
0
>>> reverse_bit(256)
8388608
>>> reverse_bit(2550136832)
25
>>> reverse_bit(-1)
Traceback (most recent call last):
...
ValueError: The value of input must be non-negative
>>> reverse_bit(1.1)
Traceback (most recent call last):
...
TypeError: Input value must be an 'int' type
>>> reverse_bit("0")
Traceback (most recent call last):
...
TypeError: Input value must be an 'int' type
"""
if not isinstance(number, int):
raise TypeError("Input value must be an 'int' type")
if number < 0:
raise ValueError("The value of input must be non-negative")
result = 0
# iterator over [0 to 31], since we are dealing with a 32 bit integer
for _ in range(32):
# left shift the bits by unity
result <<= 1
# get the end bit
end_bit = number & 1
# right shift the bits by unity
number >>= 1
# add that bit to our answer
result |= end_bit
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/single_bit_manipulation_operations.py
================================================
#!/usr/bin/env python3
"""Provide the functionality to manipulate a single bit."""
def set_bit(number: int, position: int) -> int:
"""
Set the bit at position to 1.
Details: perform bitwise or for given number and X.
Where X is a number with all the bits - zeroes and bit on given
position - one.
>>> set_bit(0b1101, 1) # 0b1111
15
>>> set_bit(0b0, 5) # 0b100000
32
>>> set_bit(0b1111, 1) # 0b1111
15
"""
return number | (1 << position)
def clear_bit(number: int, position: int) -> int:
"""
Set the bit at position to 0.
Details: perform bitwise and for given number and X.
Where X is a number with all the bits - ones and bit on given
position - zero.
>>> clear_bit(0b10010, 1) # 0b10000
16
>>> clear_bit(0b0, 5) # 0b0
0
"""
return number & ~(1 << position)
def flip_bit(number: int, position: int) -> int:
"""
Flip the bit at position.
Details: perform bitwise xor for given number and X.
Where X is a number with all the bits - zeroes and bit on given
position - one.
>>> flip_bit(0b101, 1) # 0b111
7
>>> flip_bit(0b101, 0) # 0b100
4
"""
return number ^ (1 << position)
def is_bit_set(number: int, position: int) -> bool:
"""
Is the bit at position set?
Details: Shift the bit at position to be the first (smallest) bit.
Then check if the first bit is set by anding the shifted number with 1.
>>> is_bit_set(0b1010, 0)
False
>>> is_bit_set(0b1010, 1)
True
>>> is_bit_set(0b1010, 2)
False
>>> is_bit_set(0b1010, 3)
True
>>> is_bit_set(0b0, 17)
False
"""
return ((number >> position) & 1) == 1
def get_bit(number: int, position: int) -> int:
"""
Get the bit at the given position
Details: perform bitwise and for the given number and X,
Where X is a number with all the bits - zeroes and bit on given position - one.
If the result is not equal to 0, then the bit on the given position is 1, else 0.
>>> get_bit(0b1010, 0)
0
>>> get_bit(0b1010, 1)
1
>>> get_bit(0b1010, 2)
0
>>> get_bit(0b1010, 3)
1
"""
return int((number & (1 << position)) != 0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: bit_manipulation/swap_all_odd_and_even_bits.py
================================================
def show_bits(before: int, after: int) -> str:
"""
>>> print(show_bits(0, 0xFFFF))
0: 00000000
65535: 1111111111111111
"""
return f"{before:>5}: {before:08b}\n{after:>5}: {after:08b}"
def swap_odd_even_bits(num: int) -> int:
"""
1. We use bitwise AND operations to separate the even bits (0, 2, 4, 6, etc.) and
odd bits (1, 3, 5, 7, etc.) in the input number.
2. We then right-shift the even bits by 1 position and left-shift the odd bits by
1 position to swap them.
3. Finally, we combine the swapped even and odd bits using a bitwise OR operation
to obtain the final result.
>>> print(show_bits(0, swap_odd_even_bits(0)))
0: 00000000
0: 00000000
>>> print(show_bits(1, swap_odd_even_bits(1)))
1: 00000001
2: 00000010
>>> print(show_bits(2, swap_odd_even_bits(2)))
2: 00000010
1: 00000001
>>> print(show_bits(3, swap_odd_even_bits(3)))
3: 00000011
3: 00000011
>>> print(show_bits(4, swap_odd_even_bits(4)))
4: 00000100
8: 00001000
>>> print(show_bits(5, swap_odd_even_bits(5)))
5: 00000101
10: 00001010
>>> print(show_bits(6, swap_odd_even_bits(6)))
6: 00000110
9: 00001001
>>> print(show_bits(23, swap_odd_even_bits(23)))
23: 00010111
43: 00101011
"""
# Get all even bits - 0xAAAAAAAA is a 32-bit number with all even bits set to 1
even_bits = num & 0xAAAAAAAA
# Get all odd bits - 0x55555555 is a 32-bit number with all odd bits set to 1
odd_bits = num & 0x55555555
# Right shift even bits and left shift odd bits and swap them
return even_bits >> 1 | odd_bits << 1
if __name__ == "__main__":
import doctest
doctest.testmod()
for i in (-1, 0, 1, 2, 3, 4, 23, 24):
print(show_bits(i, swap_odd_even_bits(i)), "\n")
================================================
FILE: blockchain/README.md
================================================
# Blockchain
A Blockchain is a type of **distributed ledger** technology (DLT) that consists of a growing list of records, called **blocks**, that are securely linked together using **cryptography**.
Let's break down the terminologies in the above definition. We find below terminologies,
- Digital Ledger Technology (DLT)
- Blocks
- Cryptography
## Digital Ledger Technology
Blockchain is also called distributed ledger technology. It is simply the opposite of a centralized database. Firstly, what is a **ledger**? A ledger is a book or collection of accounts that records account transactions.
*Why is Blockchain addressed as a digital ledger if it can record more than account transactions? What other transaction details and information can it hold?*
Digital Ledger Technology is just a ledger that is shared among multiple nodes. This way there exists no need for a central authority to hold the info. Okay, how is it differentiated from a central database and what are their benefits?
Suppose that there is an organization that has 4 branches whose data are stored in a centralized database. So even if one branch needs any data from the ledger it needs approval from the database in charge. And if one hacks the central database he gets to tamper and control all the data.
Now let's assume every branch has a copy of the ledger and then once anything is added to the ledger by any branch it is gonna automatically reflect in all other ledgers available in other branches. This is done using a peer-to-peer network.
This means that even if information is tampered with in one branch we can find out. If one branch is hacked we can be alerted, so we can safeguard other branches. Now, assume these branches as computers or nodes and the ledger is a transaction record or digital receipt. If one ledger is hacked in a node we can detect since there will be a mismatch in comparison with other node information. So this is the concept of Digital Ledger Technology.
*Is it required for all nodes to have access to all information in other nodes? Wouldn't this require enormous storage space in each node?*
## Blocks
In short, a block is nothing but a collection of records with a labelled header. These are connected cryptographically. Once a new block is added to a chain, the previous block is connected, more precisely said as locked, and hence will remain unaltered. We can understand this concept once we get a clear understanding of the working mechanism of blockchain.
## Cryptography
Cryptography is the practice and study of secure communication techniques amid adversarial behavior. More broadly, cryptography is the creation and analysis of protocols that prevent third parties or the general public from accessing private messages.
*Which cryptography technology is most widely used in blockchain and why?*
So, in general, blockchain technology is a distributed record holder that records the information about ownership of an asset. To define precisely,
> Blockchain is a distributed, immutable ledger that makes it easier to record transactions and track assets in a corporate network.
An asset could be tangible (such as a house, car, cash, or land) or intangible (such as a business) (intellectual property, patents, copyrights, branding). A blockchain network can track and sell almost anything of value, lowering risk and costs for everyone involved.
So this is all about the introduction to blockchain technology. To learn more about the topic refer below links....
*
*
*
*
================================================
FILE: blockchain/__init__.py
================================================
================================================
FILE: blockchain/diophantine_equation.py
================================================
from __future__ import annotations
from maths.greatest_common_divisor import greatest_common_divisor
def diophantine(a: int, b: int, c: int) -> tuple[float, float]:
"""
Diophantine Equation : Given integers a,b,c ( at least one of a and b != 0), the
diophantine equation a*x + b*y = c has a solution (where x and y are integers)
iff greatest_common_divisor(a,b) divides c.
GCD ( Greatest Common Divisor ) or HCF ( Highest Common Factor )
>>> diophantine(10,6,14)
(-7.0, 14.0)
>>> diophantine(391,299,-69)
(9.0, -12.0)
But above equation has one more solution i.e., x = -4, y = 5.
That's why we need diophantine all solution function.
"""
assert (
c % greatest_common_divisor(a, b) == 0
) # greatest_common_divisor(a,b) is in maths directory
(d, x, y) = extended_gcd(a, b) # extended_gcd(a,b) function implemented below
r = c / d
return (r * x, r * y)
def diophantine_all_soln(a: int, b: int, c: int, n: int = 2) -> None:
"""
Lemma : if n|ab and gcd(a,n) = 1, then n|b.
Finding All solutions of Diophantine Equations:
Theorem : Let gcd(a,b) = d, a = d*p, b = d*q. If (x0,y0) is a solution of
Diophantine Equation a*x + b*y = c. a*x0 + b*y0 = c, then all the
solutions have the form a(x0 + t*q) + b(y0 - t*p) = c,
where t is an arbitrary integer.
n is the number of solution you want, n = 2 by default
>>> diophantine_all_soln(10, 6, 14)
-7.0 14.0
-4.0 9.0
>>> diophantine_all_soln(10, 6, 14, 4)
-7.0 14.0
-4.0 9.0
-1.0 4.0
2.0 -1.0
>>> diophantine_all_soln(391, 299, -69, n = 4)
9.0 -12.0
22.0 -29.0
35.0 -46.0
48.0 -63.0
"""
(x0, y0) = diophantine(a, b, c) # Initial value
d = greatest_common_divisor(a, b)
p = a // d
q = b // d
for i in range(n):
x = x0 + i * q
y = y0 - i * p
print(x, y)
def extended_gcd(a: int, b: int) -> tuple[int, int, int]:
"""
Extended Euclid's Algorithm : If d divides a and b and d = a*x + b*y for integers
x and y, then d = gcd(a,b)
>>> extended_gcd(10, 6)
(2, -1, 2)
>>> extended_gcd(7, 5)
(1, -2, 3)
"""
assert a >= 0
assert b >= 0
if b == 0:
d, x, y = a, 1, 0
else:
(d, p, q) = extended_gcd(b, a % b)
x = q
y = p - q * (a // b)
assert a % d == 0
assert b % d == 0
assert d == a * x + b * y
return (d, x, y)
if __name__ == "__main__":
from doctest import testmod
testmod(name="diophantine", verbose=True)
testmod(name="diophantine_all_soln", verbose=True)
testmod(name="extended_gcd", verbose=True)
testmod(name="greatest_common_divisor", verbose=True)
================================================
FILE: boolean_algebra/README.md
================================================
# Boolean Algebra
Boolean algebra is used to do arithmetic with bits of values True (1) or False (0).
There are three basic operations: 'and', 'or' and 'not'.
*
*
================================================
FILE: boolean_algebra/__init__.py
================================================
================================================
FILE: boolean_algebra/and_gate.py
================================================
"""
An AND Gate is a logic gate in boolean algebra which results to 1 (True) if all the
inputs are 1 (True), and 0 (False) otherwise.
Following is the truth table of a Two Input AND Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 0 |
| 0 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 1 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates/
"""
def and_gate(input_1: int, input_2: int) -> int:
"""
Calculate AND of the input values
>>> and_gate(0, 0)
0
>>> and_gate(0, 1)
0
>>> and_gate(1, 0)
0
>>> and_gate(1, 1)
1
"""
return int(input_1 and input_2)
def n_input_and_gate(inputs: list[int]) -> int:
"""
Calculate AND of a list of input values
>>> n_input_and_gate([1, 0, 1, 1, 0])
0
>>> n_input_and_gate([1, 1, 1, 1, 1])
1
"""
return int(all(inputs))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/imply_gate.py
================================================
"""
An IMPLY Gate is a logic gate in boolean algebra which results to 1 if
either input 1 is 0, or if input 1 is 1, then the output is 1 only if input 2 is 1.
It is true if input 1 implies input 2.
Following is the truth table of an IMPLY Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 1 |
| 0 | 1 | 1 |
| 1 | 0 | 0 |
| 1 | 1 | 1 |
------------------------------
Refer - https://en.wikipedia.org/wiki/IMPLY_gate
"""
def imply_gate(input_1: int, input_2: int) -> int:
"""
Calculate IMPLY of the input values
>>> imply_gate(0, 0)
1
>>> imply_gate(0, 1)
1
>>> imply_gate(1, 0)
0
>>> imply_gate(1, 1)
1
"""
return int(input_1 == 0 or input_2 == 1)
def recursive_imply_list(input_list: list[int]) -> int:
"""
Recursively calculates the implication of a list.
Strictly the implication is applied consecutively left to right:
( (a -> b) -> c ) -> d ...
>>> recursive_imply_list([])
Traceback (most recent call last):
...
ValueError: Input list must contain at least two elements
>>> recursive_imply_list([0])
Traceback (most recent call last):
...
ValueError: Input list must contain at least two elements
>>> recursive_imply_list([1])
Traceback (most recent call last):
...
ValueError: Input list must contain at least two elements
>>> recursive_imply_list([0, 0])
1
>>> recursive_imply_list([0, 1])
1
>>> recursive_imply_list([1, 0])
0
>>> recursive_imply_list([1, 1])
1
>>> recursive_imply_list([0, 0, 0])
0
>>> recursive_imply_list([0, 0, 1])
1
>>> recursive_imply_list([0, 1, 0])
0
>>> recursive_imply_list([0, 1, 1])
1
>>> recursive_imply_list([1, 0, 0])
1
>>> recursive_imply_list([1, 0, 1])
1
>>> recursive_imply_list([1, 1, 0])
0
>>> recursive_imply_list([1, 1, 1])
1
"""
if len(input_list) < 2:
raise ValueError("Input list must contain at least two elements")
first_implication = imply_gate(input_list[0], input_list[1])
if len(input_list) == 2:
return first_implication
new_list = [first_implication, *input_list[2:]]
return recursive_imply_list(new_list)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/karnaugh_map_simplification.py
================================================
"""
https://en.wikipedia.org/wiki/Karnaugh_map
https://www.allaboutcircuits.com/technical-articles/karnaugh-map-boolean-algebraic-simplification-technique
"""
def simplify_kmap(kmap: list[list[int]]) -> str:
"""
Simplify the Karnaugh map.
>>> simplify_kmap(kmap=[[0, 1], [1, 1]])
"A'B + AB' + AB"
>>> simplify_kmap(kmap=[[0, 0], [0, 0]])
''
>>> simplify_kmap(kmap=[[0, 1], [1, -1]])
"A'B + AB' + AB"
>>> simplify_kmap(kmap=[[0, 1], [1, 2]])
"A'B + AB' + AB"
>>> simplify_kmap(kmap=[[0, 1], [1, 1.1]])
"A'B + AB' + AB"
>>> simplify_kmap(kmap=[[0, 1], [1, 'a']])
"A'B + AB' + AB"
"""
simplified_f = []
for a, row in enumerate(kmap):
for b, item in enumerate(row):
if item:
term = ("A" if a else "A'") + ("B" if b else "B'")
simplified_f.append(term)
return " + ".join(simplified_f)
def main() -> None:
"""
Main function to create and simplify a K-Map.
>>> main()
[0, 1]
[1, 1]
Simplified Expression:
A'B + AB' + AB
"""
kmap = [[0, 1], [1, 1]]
# Manually generate the product of [0, 1] and [0, 1]
for row in kmap:
print(row)
print("Simplified Expression:")
print(simplify_kmap(kmap))
if __name__ == "__main__":
main()
print(f"{simplify_kmap(kmap=[[0, 1], [1, 1]]) = }")
================================================
FILE: boolean_algebra/multiplexer.py
================================================
def mux(input0: int, input1: int, select: int) -> int:
"""
Implement a 2-to-1 Multiplexer.
:param input0: The first input value (0 or 1).
:param input1: The second input value (0 or 1).
:param select: The select signal (0 or 1) to choose between input0 and input1.
:return: The output based on the select signal. input1 if select else input0.
https://www.electrically4u.com/solved-problems-on-multiplexer
https://en.wikipedia.org/wiki/Multiplexer
>>> mux(0, 1, 0)
0
>>> mux(0, 1, 1)
1
>>> mux(1, 0, 0)
1
>>> mux(1, 0, 1)
0
>>> mux(2, 1, 0)
Traceback (most recent call last):
...
ValueError: Inputs and select signal must be 0 or 1
>>> mux(0, -1, 0)
Traceback (most recent call last):
...
ValueError: Inputs and select signal must be 0 or 1
>>> mux(0, 1, 1.1)
Traceback (most recent call last):
...
ValueError: Inputs and select signal must be 0 or 1
"""
if all(i in (0, 1) for i in (input0, input1, select)):
return input1 if select else input0
raise ValueError("Inputs and select signal must be 0 or 1")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/nand_gate.py
================================================
"""
A NAND Gate is a logic gate in boolean algebra which results to 0 (False) if both
the inputs are 1, and 1 (True) otherwise. It's similar to adding
a NOT gate along with an AND gate.
Following is the truth table of a NAND Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 1 |
| 0 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 0 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates-in-python/
"""
def nand_gate(input_1: int, input_2: int) -> int:
"""
Calculate NAND of the input values
>>> nand_gate(0, 0)
1
>>> nand_gate(0, 1)
1
>>> nand_gate(1, 0)
1
>>> nand_gate(1, 1)
0
"""
return int(not (input_1 and input_2))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/nimply_gate.py
================================================
"""
An NIMPLY Gate is a logic gate in boolean algebra which results to 0 if
either input 1 is 0, or if input 1 is 1, then it is 0 only if input 2 is 1.
It is false if input 1 implies input 2. It is the negated form of imply
Following is the truth table of an NIMPLY Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 0 |
| 0 | 1 | 0 |
| 1 | 0 | 1 |
| 1 | 1 | 0 |
------------------------------
Refer - https://en.wikipedia.org/wiki/NIMPLY_gate
"""
def nimply_gate(input_1: int, input_2: int) -> int:
"""
Calculate NIMPLY of the input values
>>> nimply_gate(0, 0)
0
>>> nimply_gate(0, 1)
0
>>> nimply_gate(1, 0)
1
>>> nimply_gate(1, 1)
0
"""
return int(input_1 == 1 and input_2 == 0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/nor_gate.py
================================================
"""
A NOR Gate is a logic gate in boolean algebra which results in false(0) if any of the
inputs is 1, and True(1) if all inputs are 0.
Following is the truth table of a NOR Gate:
Truth Table of NOR Gate:
| Input 1 | Input 2 | Output |
| 0 | 0 | 1 |
| 0 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
Code provided by Akshaj Vishwanathan
https://www.geeksforgeeks.org/logic-gates-in-python
"""
from collections.abc import Callable
def nor_gate(input_1: int, input_2: int) -> int:
"""
>>> nor_gate(0, 0)
1
>>> nor_gate(0, 1)
0
>>> nor_gate(1, 0)
0
>>> nor_gate(1, 1)
0
>>> nor_gate(0.0, 0.0)
1
>>> nor_gate(0, -7)
0
"""
return int(input_1 == input_2 == 0)
def truth_table(func: Callable) -> str:
"""
>>> print(truth_table(nor_gate))
Truth Table of NOR Gate:
| Input 1 | Input 2 | Output |
| 0 | 0 | 1 |
| 0 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 0 |
"""
def make_table_row(items: list | tuple) -> str:
"""
>>> make_table_row(("One", "Two", "Three"))
'| One | Two | Three |'
"""
return f"| {' | '.join(f'{item:^8}' for item in items)} |"
return "\n".join(
(
"Truth Table of NOR Gate:",
make_table_row(("Input 1", "Input 2", "Output")),
*[make_table_row((i, j, func(i, j))) for i in (0, 1) for j in (0, 1)],
)
)
if __name__ == "__main__":
import doctest
doctest.testmod()
print(truth_table(nor_gate))
================================================
FILE: boolean_algebra/not_gate.py
================================================
"""
A NOT Gate is a logic gate in boolean algebra which results to 0 (False) if the
input is high, and 1 (True) if the input is low.
Following is the truth table of a XOR Gate:
------------------------------
| Input | Output |
------------------------------
| 0 | 1 |
| 1 | 0 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates-in-python/
"""
def not_gate(input_1: int) -> int:
"""
Calculate NOT of the input values
>>> not_gate(0)
1
>>> not_gate(1)
0
"""
return 1 if input_1 == 0 else 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/or_gate.py
================================================
"""
An OR Gate is a logic gate in boolean algebra which results to 0 (False) if both the
inputs are 0, and 1 (True) otherwise.
Following is the truth table of an AND Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 0 |
| 0 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 1 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates-in-python/
"""
def or_gate(input_1: int, input_2: int) -> int:
"""
Calculate OR of the input values
>>> or_gate(0, 0)
0
>>> or_gate(0, 1)
1
>>> or_gate(1, 0)
1
>>> or_gate(1, 1)
1
"""
return int((input_1, input_2).count(1) != 0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/quine_mc_cluskey.py
================================================
from __future__ import annotations
from collections.abc import Sequence
from typing import Literal
def compare_string(string1: str, string2: str) -> str | Literal[False]:
"""
>>> compare_string('0010','0110')
'0_10'
>>> compare_string('0110','1101')
False
"""
list1 = list(string1)
list2 = list(string2)
count = 0
for i in range(len(list1)):
if list1[i] != list2[i]:
count += 1
list1[i] = "_"
if count > 1:
return False
else:
return "".join(list1)
def check(binary: list[str]) -> list[str]:
"""
>>> check(['0.00.01.5'])
['0.00.01.5']
"""
pi = []
while True:
check1 = ["$"] * len(binary)
temp = []
for i in range(len(binary)):
for j in range(i + 1, len(binary)):
k = compare_string(binary[i], binary[j])
if k is False:
check1[i] = "*"
check1[j] = "*"
temp.append("X")
for i in range(len(binary)):
if check1[i] == "$":
pi.append(binary[i])
if len(temp) == 0:
return pi
binary = list(set(temp))
def decimal_to_binary(no_of_variable: int, minterms: Sequence[float]) -> list[str]:
"""
>>> decimal_to_binary(3,[1.5])
['0.00.01.5']
"""
temp = []
for minterm in minterms:
string = ""
for _ in range(no_of_variable):
string = str(minterm % 2) + string
minterm //= 2
temp.append(string)
return temp
def is_for_table(string1: str, string2: str, count: int) -> bool:
"""
>>> is_for_table('__1','011',2)
True
>>> is_for_table('01_','001',1)
False
"""
list1 = list(string1)
list2 = list(string2)
count_n = sum(item1 != item2 for item1, item2 in zip(list1, list2))
return count_n == count
def selection(chart: list[list[int]], prime_implicants: list[str]) -> list[str]:
"""
>>> selection([[1]],['0.00.01.5'])
['0.00.01.5']
>>> selection([[1]],['0.00.01.5'])
['0.00.01.5']
"""
temp = []
select = [0] * len(chart)
for i in range(len(chart[0])):
count = sum(row[i] == 1 for row in chart)
if count == 1:
rem = max(j for j, row in enumerate(chart) if row[i] == 1)
select[rem] = 1
for i, item in enumerate(select):
if item != 1:
continue
for j in range(len(chart[0])):
if chart[i][j] != 1:
continue
for row in chart:
row[j] = 0
temp.append(prime_implicants[i])
while True:
counts = [chart[i].count(1) for i in range(len(chart))]
max_n = max(counts)
rem = counts.index(max_n)
if max_n == 0:
return temp
temp.append(prime_implicants[rem])
for j in range(len(chart[0])):
if chart[rem][j] != 1:
continue
for i in range(len(chart)):
chart[i][j] = 0
def prime_implicant_chart(
prime_implicants: list[str], binary: list[str]
) -> list[list[int]]:
"""
>>> prime_implicant_chart(['0.00.01.5'],['0.00.01.5'])
[[1]]
"""
chart = [[0 for x in range(len(binary))] for x in range(len(prime_implicants))]
for i in range(len(prime_implicants)):
count = prime_implicants[i].count("_")
for j in range(len(binary)):
if is_for_table(prime_implicants[i], binary[j], count):
chart[i][j] = 1
return chart
def main() -> None:
no_of_variable = int(input("Enter the no. of variables\n"))
minterms = [
float(x)
for x in input(
"Enter the decimal representation of Minterms 'Spaces Separated'\n"
).split()
]
binary = decimal_to_binary(no_of_variable, minterms)
prime_implicants = check(binary)
print("Prime Implicants are:")
print(prime_implicants)
chart = prime_implicant_chart(prime_implicants, binary)
essential_prime_implicants = selection(chart, prime_implicants)
print("Essential Prime Implicants are:")
print(essential_prime_implicants)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: boolean_algebra/xnor_gate.py
================================================
"""
A XNOR Gate is a logic gate in boolean algebra which results to 0 (False) if both the
inputs are different, and 1 (True), if the inputs are same.
It's similar to adding a NOT gate to an XOR gate
Following is the truth table of a XNOR Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 1 |
| 0 | 1 | 0 |
| 1 | 0 | 0 |
| 1 | 1 | 1 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates-in-python/
"""
def xnor_gate(input_1: int, input_2: int) -> int:
"""
Calculate XOR of the input values
>>> xnor_gate(0, 0)
1
>>> xnor_gate(0, 1)
0
>>> xnor_gate(1, 0)
0
>>> xnor_gate(1, 1)
1
"""
return 1 if input_1 == input_2 else 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: boolean_algebra/xor_gate.py
================================================
"""
A XOR Gate is a logic gate in boolean algebra which results to 1 (True) if only one of
the two inputs is 1, and 0 (False) if an even number of inputs are 1.
Following is the truth table of a XOR Gate:
------------------------------
| Input 1 | Input 2 | Output |
------------------------------
| 0 | 0 | 0 |
| 0 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 0 |
------------------------------
Refer - https://www.geeksforgeeks.org/logic-gates-in-python/
"""
def xor_gate(input_1: int, input_2: int) -> int:
"""
calculate xor of the input values
>>> xor_gate(0, 0)
0
>>> xor_gate(0, 1)
1
>>> xor_gate(1, 0)
1
>>> xor_gate(1, 1)
0
"""
return (input_1, input_2).count(0) % 2
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: cellular_automata/README.md
================================================
# Cellular Automata
Cellular automata are a way to simulate the behavior of "life", no matter if it is a robot or cell.
They usually follow simple rules but can lead to the creation of complex forms.
The most popular cellular automaton is Conway's [Game of Life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life).
*
*
================================================
FILE: cellular_automata/__init__.py
================================================
================================================
FILE: cellular_automata/conways_game_of_life.py
================================================
"""
Conway's Game of Life implemented in Python.
https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life
"""
from __future__ import annotations
from PIL import Image
# Define glider example
GLIDER = [
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
]
# Define blinker example
BLINKER = [[0, 1, 0], [0, 1, 0], [0, 1, 0]]
def new_generation(cells: list[list[int]]) -> list[list[int]]:
"""
Generates the next generation for a given state of Conway's Game of Life.
>>> new_generation(BLINKER)
[[0, 0, 0], [1, 1, 1], [0, 0, 0]]
"""
next_generation = []
for i in range(len(cells)):
next_generation_row = []
for j in range(len(cells[i])):
# Get the number of live neighbours
neighbour_count = 0
if i > 0 and j > 0:
neighbour_count += cells[i - 1][j - 1]
if i > 0:
neighbour_count += cells[i - 1][j]
if i > 0 and j < len(cells[i]) - 1:
neighbour_count += cells[i - 1][j + 1]
if j > 0:
neighbour_count += cells[i][j - 1]
if j < len(cells[i]) - 1:
neighbour_count += cells[i][j + 1]
if i < len(cells) - 1 and j > 0:
neighbour_count += cells[i + 1][j - 1]
if i < len(cells) - 1:
neighbour_count += cells[i + 1][j]
if i < len(cells) - 1 and j < len(cells[i]) - 1:
neighbour_count += cells[i + 1][j + 1]
# Rules of the game of life (excerpt from Wikipedia):
# 1. Any live cell with two or three live neighbours survives.
# 2. Any dead cell with three live neighbours becomes a live cell.
# 3. All other live cells die in the next generation.
# Similarly, all other dead cells stay dead.
alive = cells[i][j] == 1
if (alive and 2 <= neighbour_count <= 3) or (
not alive and neighbour_count == 3
):
next_generation_row.append(1)
else:
next_generation_row.append(0)
next_generation.append(next_generation_row)
return next_generation
def generate_images(cells: list[list[int]], frames: int) -> list[Image.Image]:
"""
Generates a list of images of subsequent Game of Life states.
"""
images = []
for _ in range(frames):
# Create output image
img = Image.new("RGB", (len(cells[0]), len(cells)))
pixels = img.load()
# Save cells to image
for x in range(len(cells)):
for y in range(len(cells[0])):
colour = 255 - cells[y][x] * 255
pixels[x, y] = (colour, colour, colour)
# Save image
images.append(img)
cells = new_generation(cells)
return images
if __name__ == "__main__":
images = generate_images(GLIDER, 16)
images[0].save("out.gif", save_all=True, append_images=images[1:])
================================================
FILE: cellular_automata/game_of_life.py
================================================
"""Conway's Game Of Life, Author Anurag Kumar(mailto:anuragkumarak95@gmail.com)
Requirements:
- numpy
- random
- time
- matplotlib
Python:
- 3.5
Usage:
- $python3 game_of_life
Game-Of-Life Rules:
1.
Any live cell with fewer than two live neighbours
dies, as if caused by under-population.
2.
Any live cell with two or three live neighbours lives
on to the next generation.
3.
Any live cell with more than three live neighbours
dies, as if by over-population.
4.
Any dead cell with exactly three live neighbours be-
comes a live cell, as if by reproduction.
"""
import random
import sys
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
usage_doc = "Usage of script: script_name "
choice = [0] * 100 + [1] * 10
random.shuffle(choice)
def create_canvas(size: int) -> list[list[bool]]:
canvas = [[False for i in range(size)] for j in range(size)]
return canvas
def seed(canvas: list[list[bool]]) -> None:
for i, row in enumerate(canvas):
for j, _ in enumerate(row):
canvas[i][j] = bool(random.getrandbits(1))
def run(canvas: list[list[bool]]) -> list[list[bool]]:
"""
This function runs the rules of game through all points, and changes their
status accordingly.(in the same canvas)
@Args:
--
canvas : canvas of population to run the rules on.
@returns:
--
canvas of population after one step
"""
current_canvas = np.array(canvas)
next_gen_canvas = np.array(create_canvas(current_canvas.shape[0]))
for r, row in enumerate(current_canvas):
for c, pt in enumerate(row):
next_gen_canvas[r][c] = __judge_point(
pt, current_canvas[r - 1 : r + 2, c - 1 : c + 2]
)
return next_gen_canvas.tolist()
def __judge_point(pt: bool, neighbours: list[list[bool]]) -> bool:
dead = 0
alive = 0
# finding dead or alive neighbours count.
for i in neighbours:
for status in i:
if status:
alive += 1
else:
dead += 1
# handling duplicate entry for focus pt.
if pt:
alive -= 1
else:
dead -= 1
# running the rules of game here.
state = pt
if pt:
if alive < 2:
state = False
elif alive in {2, 3}:
state = True
elif alive > 3:
state = False
elif alive == 3:
state = True
return state
if __name__ == "__main__":
if len(sys.argv) != 2:
raise Exception(usage_doc)
canvas_size = int(sys.argv[1])
# main working structure of this module.
c = create_canvas(canvas_size)
seed(c)
fig, ax = plt.subplots()
fig.show()
cmap = ListedColormap(["w", "k"])
try:
while True:
c = run(c)
ax.matshow(c, cmap=cmap)
fig.canvas.draw()
ax.cla()
except KeyboardInterrupt:
# do nothing.
pass
================================================
FILE: cellular_automata/langtons_ant.py
================================================
"""
Langton's ant
@ https://en.wikipedia.org/wiki/Langton%27s_ant
@ https://upload.wikimedia.org/wikipedia/commons/0/09/LangtonsAntAnimated.gif
"""
from functools import partial
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
WIDTH = 80
HEIGHT = 80
class LangtonsAnt:
"""
Represents the main LangonsAnt algorithm.
>>> la = LangtonsAnt(2, 2)
>>> la.board
[[True, True], [True, True]]
>>> la.ant_position
(1, 1)
"""
def __init__(self, width: int, height: int) -> None:
# Each square is either True or False where True is white and False is black
self.board = [[True] * width for _ in range(height)]
self.ant_position: tuple[int, int] = (width // 2, height // 2)
# Initially pointing left (similar to the wikipedia image)
# (0 = 0° | 1 = 90° | 2 = 180 ° | 3 = 270°)
self.ant_direction: int = 3
def move_ant(self, axes: plt.Axes | None, display: bool, _frame: int) -> None:
"""
Performs three tasks:
1. The ant turns either clockwise or anti-clockwise according to the colour
of the square that it is currently on. If the square is white, the ant
turns clockwise, and if the square is black the ant turns anti-clockwise
2. The ant moves one square in the direction that it is currently facing
3. The square the ant was previously on is inverted (White -> Black and
Black -> White)
If display is True, the board will also be displayed on the axes
>>> la = LangtonsAnt(2, 2)
>>> la.move_ant(None, True, 0)
>>> la.board
[[True, True], [True, False]]
>>> la.move_ant(None, True, 0)
>>> la.board
[[True, False], [True, False]]
"""
directions = {
0: (-1, 0), # 0°
1: (0, 1), # 90°
2: (1, 0), # 180°
3: (0, -1), # 270°
}
x, y = self.ant_position
# Turn clockwise or anti-clockwise according to colour of square
if self.board[x][y] is True:
# The square is white so turn 90° clockwise
self.ant_direction = (self.ant_direction + 1) % 4
else:
# The square is black so turn 90° anti-clockwise
self.ant_direction = (self.ant_direction - 1) % 4
# Move ant
move_x, move_y = directions[self.ant_direction]
self.ant_position = (x + move_x, y + move_y)
# Flip colour of square
self.board[x][y] = not self.board[x][y]
if display and axes:
# Display the board on the axes
axes.get_xaxis().set_ticks([])
axes.get_yaxis().set_ticks([])
axes.imshow(self.board, cmap="gray", interpolation="nearest")
def display(self, frames: int = 100_000) -> None:
"""
Displays the board without delay in a matplotlib plot
to visually understand and track the ant.
>>> _ = LangtonsAnt(WIDTH, HEIGHT)
"""
fig, ax = plt.subplots()
# Assign animation to a variable to prevent it from getting garbage collected
self.animation = FuncAnimation(
fig, partial(self.move_ant, ax, True), frames=frames, interval=1
)
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod()
LangtonsAnt(WIDTH, HEIGHT).display()
================================================
FILE: cellular_automata/nagel_schrekenberg.py
================================================
"""
Simulate the evolution of a highway with only one road that is a loop.
The highway is divided in cells, each cell can have at most one car in it.
The highway is a loop so when a car comes to one end, it will come out on the other.
Each car is represented by its speed (from 0 to 5).
Some information about speed:
-1 means that the cell on the highway is empty
0 to 5 are the speed of the cars with 0 being the lowest and 5 the highest
highway: list[int] Where every position and speed of every car will be stored
probability The probability that a driver will slow down
initial_speed The speed of the cars a the start
frequency How many cells there are between two cars at the start
max_speed The maximum speed a car can go to
number_of_cells How many cell are there in the highway
number_of_update How many times will the position be updated
More information here: https://en.wikipedia.org/wiki/Nagel%E2%80%93Schreckenberg_model
Examples for doctest:
>>> simulate(construct_highway(6, 3, 0), 2, 0, 2)
[[0, -1, -1, 0, -1, -1], [-1, 1, -1, -1, 1, -1], [-1, -1, 1, -1, -1, 1]]
>>> simulate(construct_highway(5, 2, -2), 3, 0, 2)
[[0, -1, 0, -1, 0], [0, -1, 0, -1, -1], [0, -1, -1, 1, -1], [-1, 1, -1, 0, -1]]
"""
from random import randint, random
def construct_highway(
number_of_cells: int,
frequency: int,
initial_speed: int,
random_frequency: bool = False,
random_speed: bool = False,
max_speed: int = 5,
) -> list:
"""
Build the highway following the parameters given
>>> construct_highway(10, 2, 6)
[[6, -1, 6, -1, 6, -1, 6, -1, 6, -1]]
>>> construct_highway(10, 10, 2)
[[2, -1, -1, -1, -1, -1, -1, -1, -1, -1]]
"""
highway = [[-1] * number_of_cells] # Create a highway without any car
i = 0
initial_speed = max(initial_speed, 0)
while i < number_of_cells:
highway[0][i] = (
randint(0, max_speed) if random_speed else initial_speed
) # Place the cars
i += (
randint(1, max_speed * 2) if random_frequency else frequency
) # Arbitrary number, may need tuning
return highway
def get_distance(highway_now: list, car_index: int) -> int:
"""
Get the distance between a car (at index car_index) and the next car
>>> get_distance([6, -1, 6, -1, 6], 2)
1
>>> get_distance([2, -1, -1, -1, 3, 1, 0, 1, 3, 2], 0)
3
>>> get_distance([-1, -1, -1, -1, 2, -1, -1, -1, 3], -1)
4
"""
distance = 0
cells = highway_now[car_index + 1 :]
for cell in range(len(cells)): # May need a better name for this
if cells[cell] != -1: # If the cell is not empty then
return distance # we have the distance we wanted
distance += 1
# Here if the car is near the end of the highway
return distance + get_distance(highway_now, -1)
def update(highway_now: list, probability: float, max_speed: int) -> list:
"""
Update the speed of the cars
>>> update([-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, 3], 0.0, 5)
[-1, -1, -1, -1, -1, 3, -1, -1, -1, -1, 4]
>>> update([-1, -1, 2, -1, -1, -1, -1, 3], 0.0, 5)
[-1, -1, 3, -1, -1, -1, -1, 1]
"""
number_of_cells = len(highway_now)
# Beforce calculations, the highway is empty
next_highway = [-1] * number_of_cells
for car_index in range(number_of_cells):
if highway_now[car_index] != -1:
# Add 1 to the current speed of the car and cap the speed
next_highway[car_index] = min(highway_now[car_index] + 1, max_speed)
# Number of empty cell before the next car
dn = get_distance(highway_now, car_index) - 1
# We can't have the car causing an accident
next_highway[car_index] = min(next_highway[car_index], dn)
if random() < probability:
# Randomly, a driver will slow down
next_highway[car_index] = max(next_highway[car_index] - 1, 0)
return next_highway
def simulate(
highway: list, number_of_update: int, probability: float, max_speed: int
) -> list:
"""
The main function, it will simulate the evolution of the highway
>>> simulate([[-1, 2, -1, -1, -1, 3]], 2, 0.0, 3)
[[-1, 2, -1, -1, -1, 3], [-1, -1, -1, 2, -1, 0], [1, -1, -1, 0, -1, -1]]
>>> simulate([[-1, 2, -1, 3]], 4, 0.0, 3)
[[-1, 2, -1, 3], [-1, 0, -1, 0], [-1, 0, -1, 0], [-1, 0, -1, 0], [-1, 0, -1, 0]]
"""
number_of_cells = len(highway[0])
for i in range(number_of_update):
next_speeds_calculated = update(highway[i], probability, max_speed)
real_next_speeds = [-1] * number_of_cells
for car_index in range(number_of_cells):
speed = next_speeds_calculated[car_index]
if speed != -1:
# Change the position based on the speed (with % to create the loop)
index = (car_index + speed) % number_of_cells
# Commit the change of position
real_next_speeds[index] = speed
highway.append(real_next_speeds)
return highway
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: cellular_automata/one_dimensional.py
================================================
"""
Return an image of 16 generations of one-dimensional cellular automata based on a given
ruleset number
https://mathworld.wolfram.com/ElementaryCellularAutomaton.html
"""
from __future__ import annotations
from PIL import Image
# Define the first generation of cells
# fmt: off
CELLS = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
# fmt: on
def format_ruleset(ruleset: int) -> list[int]:
"""
>>> format_ruleset(11100)
[0, 0, 0, 1, 1, 1, 0, 0]
>>> format_ruleset(0)
[0, 0, 0, 0, 0, 0, 0, 0]
>>> format_ruleset(11111111)
[1, 1, 1, 1, 1, 1, 1, 1]
"""
return [int(c) for c in f"{ruleset:08}"[:8]]
def new_generation(cells: list[list[int]], rule: list[int], time: int) -> list[int]:
population = len(cells[0]) # 31
next_generation = []
for i in range(population):
# Get the neighbors of each cell
# Handle neighbours outside bounds by using 0 as their value
left_neighbor = 0 if i == 0 else cells[time][i - 1]
right_neighbor = 0 if i == population - 1 else cells[time][i + 1]
# Define a new cell and add it to the new generation
situation = 7 - int(f"{left_neighbor}{cells[time][i]}{right_neighbor}", 2)
next_generation.append(rule[situation])
return next_generation
def generate_image(cells: list[list[int]]) -> Image.Image:
"""
Convert the cells into a greyscale PIL.Image.Image and return it to the caller.
>>> from random import random
>>> cells = [[random() for w in range(31)] for h in range(16)]
>>> img = generate_image(cells)
>>> isinstance(img, Image.Image)
True
>>> img.width, img.height
(31, 16)
"""
# Create the output image
img = Image.new("RGB", (len(cells[0]), len(cells)))
pixels = img.load()
# Generates image
for w in range(img.width):
for h in range(img.height):
color = 255 - int(255 * cells[h][w])
pixels[w, h] = (color, color, color)
return img
if __name__ == "__main__":
rule_num = bin(int(input("Rule:\n").strip()))[2:]
rule = format_ruleset(int(rule_num))
for time in range(16):
CELLS.append(new_generation(CELLS, rule, time))
img = generate_image(CELLS)
# Uncomment to save the image
# img.save(f"rule_{rule_num}.png")
img.show()
================================================
FILE: cellular_automata/wa_tor.py
================================================
"""
Wa-Tor algorithm (1984)
| @ https://en.wikipedia.org/wiki/Wa-Tor
| @ https://beltoforion.de/en/wator/
| @ https://beltoforion.de/en/wator/images/wator_medium.webm
This solution aims to completely remove any systematic approach
to the Wa-Tor planet, and utilise fully random methods.
The constants are a working set that allows the Wa-Tor planet
to result in one of the three possible results.
"""
from collections.abc import Callable
from random import randint, shuffle
from time import sleep
from typing import Literal
WIDTH = 50 # Width of the Wa-Tor planet
HEIGHT = 50 # Height of the Wa-Tor planet
PREY_INITIAL_COUNT = 30 # The initial number of prey entities
PREY_REPRODUCTION_TIME = 5 # The chronons before reproducing
PREDATOR_INITIAL_COUNT = 50 # The initial number of predator entities
# The initial energy value of predator entities
PREDATOR_INITIAL_ENERGY_VALUE = 15
# The energy value provided when consuming prey
PREDATOR_FOOD_VALUE = 5
PREDATOR_REPRODUCTION_TIME = 20 # The chronons before reproducing
MAX_ENTITIES = 500 # The max number of organisms on the board
# The number of entities to delete from the unbalanced side
DELETE_UNBALANCED_ENTITIES = 50
class Entity:
"""
Represents an entity (either prey or predator).
>>> e = Entity(True, coords=(0, 0))
>>> e.prey
True
>>> e.coords
(0, 0)
>>> e.alive
True
"""
def __init__(self, prey: bool, coords: tuple[int, int]) -> None:
self.prey = prey
# The (row, col) pos of the entity
self.coords = coords
self.remaining_reproduction_time = (
PREY_REPRODUCTION_TIME if prey else PREDATOR_REPRODUCTION_TIME
)
self.energy_value = None if prey is True else PREDATOR_INITIAL_ENERGY_VALUE
self.alive = True
def reset_reproduction_time(self) -> None:
"""
>>> e = Entity(True, coords=(0, 0))
>>> e.reset_reproduction_time()
>>> e.remaining_reproduction_time == PREY_REPRODUCTION_TIME
True
>>> e = Entity(False, coords=(0, 0))
>>> e.reset_reproduction_time()
>>> e.remaining_reproduction_time == PREDATOR_REPRODUCTION_TIME
True
"""
self.remaining_reproduction_time = (
PREY_REPRODUCTION_TIME if self.prey is True else PREDATOR_REPRODUCTION_TIME
)
def __repr__(self) -> str:
"""
>>> Entity(prey=True, coords=(1, 1))
Entity(prey=True, coords=(1, 1), remaining_reproduction_time=5)
>>> Entity(prey=False, coords=(2, 1)) # doctest: +NORMALIZE_WHITESPACE
Entity(prey=False, coords=(2, 1),
remaining_reproduction_time=20, energy_value=15)
"""
repr_ = (
f"Entity(prey={self.prey}, coords={self.coords}, "
f"remaining_reproduction_time={self.remaining_reproduction_time}"
)
if self.energy_value is not None:
repr_ += f", energy_value={self.energy_value}"
return f"{repr_})"
class WaTor:
"""
Represents the main Wa-Tor algorithm.
:attr time_passed: A function that is called every time
time passes (a chronon) in order to visually display
the new Wa-Tor planet. The `time_passed` function can block
using ``time.sleep`` to slow the algorithm progression.
>>> wt = WaTor(10, 15)
>>> wt.width
10
>>> wt.height
15
>>> len(wt.planet)
15
>>> len(wt.planet[0])
10
>>> len(wt.get_entities()) == PREDATOR_INITIAL_COUNT + PREY_INITIAL_COUNT
True
"""
time_passed: Callable[["WaTor", int], None] | None
def __init__(self, width: int, height: int) -> None:
self.width = width
self.height = height
self.time_passed = None
self.planet: list[list[Entity | None]] = [[None] * width for _ in range(height)]
# Populate planet with predators and prey randomly
for _ in range(PREY_INITIAL_COUNT):
self.add_entity(prey=True)
for _ in range(PREDATOR_INITIAL_COUNT):
self.add_entity(prey=False)
self.set_planet(self.planet)
def set_planet(self, planet: list[list[Entity | None]]) -> None:
"""
Ease of access for testing
>>> wt = WaTor(WIDTH, HEIGHT)
>>> planet = [
... [None, None, None],
... [None, Entity(True, coords=(1, 1)), None]
... ]
>>> wt.set_planet(planet)
>>> wt.planet == planet
True
>>> wt.width
3
>>> wt.height
2
"""
self.planet = planet
self.width = len(planet[0])
self.height = len(planet)
def add_entity(self, prey: bool) -> None:
"""
Adds an entity, making sure the entity does
not override another entity
>>> wt = WaTor(WIDTH, HEIGHT)
>>> wt.set_planet([[None, None], [None, None]])
>>> wt.add_entity(True)
>>> len(wt.get_entities())
1
>>> wt.add_entity(False)
>>> len(wt.get_entities())
2
"""
while True:
row, col = randint(0, self.height - 1), randint(0, self.width - 1)
if self.planet[row][col] is None:
self.planet[row][col] = Entity(prey=prey, coords=(row, col))
return
def get_entities(self) -> list[Entity]:
"""
Returns a list of all the entities within the planet.
>>> wt = WaTor(WIDTH, HEIGHT)
>>> len(wt.get_entities()) == PREDATOR_INITIAL_COUNT + PREY_INITIAL_COUNT
True
"""
return [entity for column in self.planet for entity in column if entity]
def balance_predators_and_prey(self) -> None:
"""
Balances predators and preys so that prey
can not dominate the predators, blocking up
space for them to reproduce.
>>> wt = WaTor(WIDTH, HEIGHT)
>>> for i in range(2000):
... row, col = i // HEIGHT, i % WIDTH
... wt.planet[row][col] = Entity(True, coords=(row, col))
>>> entities = len(wt.get_entities())
>>> wt.balance_predators_and_prey()
>>> len(wt.get_entities()) == entities
False
"""
entities = self.get_entities()
shuffle(entities)
if len(entities) >= MAX_ENTITIES - MAX_ENTITIES / 10:
prey = [entity for entity in entities if entity.prey]
predators = [entity for entity in entities if not entity.prey]
prey_count, predator_count = len(prey), len(predators)
entities_to_purge = (
prey[:DELETE_UNBALANCED_ENTITIES]
if prey_count > predator_count
else predators[:DELETE_UNBALANCED_ENTITIES]
)
for entity in entities_to_purge:
self.planet[entity.coords[0]][entity.coords[1]] = None
def get_surrounding_prey(self, entity: Entity) -> list[Entity]:
"""
Returns all the prey entities around (N, S, E, W) a predator entity.
Subtly different to the `move_and_reproduce`.
>>> wt = WaTor(WIDTH, HEIGHT)
>>> wt.set_planet([
... [None, Entity(True, (0, 1)), None],
... [None, Entity(False, (1, 1)), None],
... [None, Entity(True, (2, 1)), None]])
>>> wt.get_surrounding_prey(
... Entity(False, (1, 1))) # doctest: +NORMALIZE_WHITESPACE
[Entity(prey=True, coords=(0, 1), remaining_reproduction_time=5),
Entity(prey=True, coords=(2, 1), remaining_reproduction_time=5)]
>>> wt.set_planet([[Entity(False, (0, 0))]])
>>> wt.get_surrounding_prey(Entity(False, (0, 0)))
[]
>>> wt.set_planet([
... [Entity(True, (0, 0)), Entity(False, (1, 0)), Entity(False, (2, 0))],
... [None, Entity(False, (1, 1)), Entity(True, (2, 1))],
... [None, None, None]])
>>> wt.get_surrounding_prey(Entity(False, (1, 0)))
[Entity(prey=True, coords=(0, 0), remaining_reproduction_time=5)]
"""
row, col = entity.coords
adjacent: list[tuple[int, int]] = [
(row - 1, col), # North
(row + 1, col), # South
(row, col - 1), # West
(row, col + 1), # East
]
return [
ent
for r, c in adjacent
if 0 <= r < self.height
and 0 <= c < self.width
and (ent := self.planet[r][c]) is not None
and ent.prey
]
def move_and_reproduce(
self, entity: Entity, direction_orders: list[Literal["N", "E", "S", "W"]]
) -> None:
"""
Attempts to move to an unoccupied neighbouring square
in either of the four directions (North, South, East, West).
If the move was successful and the `remaining_reproduction_time` is
equal to 0, then a new prey or predator can also be created
in the previous square.
:param direction_orders: Ordered list (like priority queue) depicting
order to attempt to move. Removes any systematic
approach of checking neighbouring squares.
>>> planet = [
... [None, None, None],
... [None, Entity(True, coords=(1, 1)), None],
... [None, None, None]
... ]
>>> wt = WaTor(WIDTH, HEIGHT)
>>> wt.set_planet(planet)
>>> wt.move_and_reproduce(Entity(True, coords=(1, 1)), direction_orders=["N"])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[None, Entity(prey=True, coords=(0, 1), remaining_reproduction_time=4), None],
[None, None, None],
[None, None, None]]
>>> wt.planet[0][0] = Entity(True, coords=(0, 0))
>>> wt.move_and_reproduce(Entity(True, coords=(0, 1)),
... direction_orders=["N", "W", "E", "S"])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[Entity(prey=True, coords=(0, 0), remaining_reproduction_time=5), None,
Entity(prey=True, coords=(0, 2), remaining_reproduction_time=4)],
[None, None, None],
[None, None, None]]
>>> wt.planet[0][1] = wt.planet[0][2]
>>> wt.planet[0][2] = None
>>> wt.move_and_reproduce(Entity(True, coords=(0, 1)),
... direction_orders=["N", "W", "S", "E"])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[Entity(prey=True, coords=(0, 0), remaining_reproduction_time=5), None, None],
[None, Entity(prey=True, coords=(1, 1), remaining_reproduction_time=4), None],
[None, None, None]]
>>> wt = WaTor(WIDTH, HEIGHT)
>>> reproducable_entity = Entity(False, coords=(0, 1))
>>> reproducable_entity.remaining_reproduction_time = 0
>>> wt.planet = [[None, reproducable_entity]]
>>> wt.move_and_reproduce(reproducable_entity,
... direction_orders=["N", "W", "S", "E"])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[Entity(prey=False, coords=(0, 0),
remaining_reproduction_time=20, energy_value=15),
Entity(prey=False, coords=(0, 1), remaining_reproduction_time=20,
energy_value=15)]]
"""
row, col = coords = entity.coords
adjacent_squares: dict[Literal["N", "E", "S", "W"], tuple[int, int]] = {
"N": (row - 1, col), # North
"S": (row + 1, col), # South
"W": (row, col - 1), # West
"E": (row, col + 1), # East
}
# Weight adjacent locations
adjacent: list[tuple[int, int]] = []
for order in direction_orders:
adjacent.append(adjacent_squares[order])
for r, c in adjacent:
if (
0 <= r < self.height
and 0 <= c < self.width
and self.planet[r][c] is None
):
# Move entity to empty adjacent square
self.planet[r][c] = entity
self.planet[row][col] = None
entity.coords = (r, c)
break
# (2.) See if it possible to reproduce in previous square
if coords != entity.coords and entity.remaining_reproduction_time <= 0:
# Check if the entities on the planet is less than the max limit
if len(self.get_entities()) < MAX_ENTITIES:
# Reproduce in previous square
self.planet[row][col] = Entity(prey=entity.prey, coords=coords)
entity.reset_reproduction_time()
else:
entity.remaining_reproduction_time -= 1
def perform_prey_actions(
self, entity: Entity, direction_orders: list[Literal["N", "E", "S", "W"]]
) -> None:
"""
Performs the actions for a prey entity
For prey the rules are:
1. At each chronon, a prey moves randomly to one of the adjacent unoccupied
squares. If there are no free squares, no movement takes place.
2. Once a prey has survived a certain number of chronons it may reproduce.
This is done as it moves to a neighbouring square,
leaving behind a new prey in its old position.
Its reproduction time is also reset to zero.
>>> wt = WaTor(WIDTH, HEIGHT)
>>> reproducable_entity = Entity(True, coords=(0, 1))
>>> reproducable_entity.remaining_reproduction_time = 0
>>> wt.planet = [[None, reproducable_entity]]
>>> wt.perform_prey_actions(reproducable_entity,
... direction_orders=["N", "W", "S", "E"])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[Entity(prey=True, coords=(0, 0), remaining_reproduction_time=5),
Entity(prey=True, coords=(0, 1), remaining_reproduction_time=5)]]
"""
self.move_and_reproduce(entity, direction_orders)
def perform_predator_actions(
self,
entity: Entity,
occupied_by_prey_coords: tuple[int, int] | None,
direction_orders: list[Literal["N", "E", "S", "W"]],
) -> None:
"""
Performs the actions for a predator entity
:param occupied_by_prey_coords: Move to this location if there is prey there
For predators the rules are:
1. At each chronon, a predator moves randomly to an adjacent square occupied
by a prey. If there is none, the predator moves to a random adjacent
unoccupied square. If there are no free squares, no movement takes place.
2. At each chronon, each predator is deprived of a unit of energy.
3. Upon reaching zero energy, a predator dies.
4. If a predator moves to a square occupied by a prey,
it eats the prey and earns a certain amount of energy.
5. Once a predator has survived a certain number of chronons
it may reproduce in exactly the same way as the prey.
>>> wt = WaTor(WIDTH, HEIGHT)
>>> wt.set_planet([[Entity(True, coords=(0, 0)), Entity(False, coords=(0, 1))]])
>>> wt.perform_predator_actions(Entity(False, coords=(0, 1)), (0, 0), [])
>>> wt.planet # doctest: +NORMALIZE_WHITESPACE
[[Entity(prey=False, coords=(0, 0),
remaining_reproduction_time=20, energy_value=19), None]]
"""
assert entity.energy_value is not None # [type checking]
# (3.) If the entity has 0 energy, it will die
if entity.energy_value == 0:
self.planet[entity.coords[0]][entity.coords[1]] = None
return
# (1.) Move to entity if possible
if occupied_by_prey_coords is not None:
# Kill the prey
prey = self.planet[occupied_by_prey_coords[0]][occupied_by_prey_coords[1]]
assert prey is not None
prey.alive = False
# Move onto prey
self.planet[occupied_by_prey_coords[0]][occupied_by_prey_coords[1]] = entity
self.planet[entity.coords[0]][entity.coords[1]] = None
entity.coords = occupied_by_prey_coords
# (4.) Eats the prey and earns energy
entity.energy_value += PREDATOR_FOOD_VALUE
else:
# (5.) If it has survived the certain number of chronons it will also
# reproduce in this function
self.move_and_reproduce(entity, direction_orders)
# (2.) Each chronon, the predator is deprived of a unit of energy
entity.energy_value -= 1
def run(self, *, iteration_count: int) -> None:
"""
Emulate time passing by looping `iteration_count` times
>>> wt = WaTor(WIDTH, HEIGHT)
>>> wt.run(iteration_count=PREDATOR_INITIAL_ENERGY_VALUE - 1)
>>> len(list(filter(lambda entity: entity.prey is False,
... wt.get_entities()))) >= PREDATOR_INITIAL_COUNT
True
"""
for iter_num in range(iteration_count):
# Generate list of all entities in order to randomly
# pop an entity at a time to simulate true randomness
# This removes the systematic approach of iterating
# through each entity width by height
all_entities = self.get_entities()
for __ in range(len(all_entities)):
entity = all_entities.pop(randint(0, len(all_entities) - 1))
if entity.alive is False:
continue
directions: list[Literal["N", "E", "S", "W"]] = ["N", "E", "S", "W"]
shuffle(directions) # Randomly shuffle directions
if entity.prey:
self.perform_prey_actions(entity, directions)
else:
# Create list of surrounding prey
surrounding_prey = self.get_surrounding_prey(entity)
surrounding_prey_coords = None
if surrounding_prey:
# Again, randomly shuffle directions
shuffle(surrounding_prey)
surrounding_prey_coords = surrounding_prey[0].coords
self.perform_predator_actions(
entity, surrounding_prey_coords, directions
)
# Balance out the predators and prey
self.balance_predators_and_prey()
if self.time_passed is not None:
# Call time_passed function for Wa-Tor planet
# visualisation in a terminal or a graph.
self.time_passed(self, iter_num)
def visualise(wt: WaTor, iter_number: int, *, colour: bool = True) -> None:
"""
Visually displays the Wa-Tor planet using
an ascii code in terminal to clear and re-print
the Wa-Tor planet at intervals.
Uses ascii colour codes to colourfully display the predators and prey:
* (0x60f197) Prey = ``#``
* (0xfffff) Predator = ``x``
>>> wt = WaTor(30, 30)
>>> wt.set_planet([
... [Entity(True, coords=(0, 0)), Entity(False, coords=(0, 1)), None],
... [Entity(False, coords=(1, 0)), None, Entity(False, coords=(1, 2))],
... [None, Entity(True, coords=(2, 1)), None]
... ])
>>> visualise(wt, 0, colour=False) # doctest: +NORMALIZE_WHITESPACE
# x .
x . x
. # .
Iteration: 0 | Prey count: 2 | Predator count: 3 |
"""
if colour:
__import__("os").system("")
print("\x1b[0;0H\x1b[2J\x1b[?25l")
reprint = "\x1b[0;0H" if colour else ""
ansi_colour_end = "\x1b[0m " if colour else " "
planet = wt.planet
output = ""
# Iterate over every entity in the planet
for row in planet:
for entity in row:
if entity is None:
output += " . "
else:
if colour is True:
output += (
"\x1b[38;2;96;241;151m"
if entity.prey
else "\x1b[38;2;255;255;15m"
)
output += f" {'#' if entity.prey else 'x'}{ansi_colour_end}"
output += "\n"
entities = wt.get_entities()
prey_count = sum(entity.prey for entity in entities)
print(
f"{output}\n Iteration: {iter_number} | Prey count: {prey_count} | "
f"Predator count: {len(entities) - prey_count} | {reprint}"
)
# Block the thread to be able to visualise seeing the algorithm
sleep(0.05)
if __name__ == "__main__":
import doctest
doctest.testmod()
wt = WaTor(WIDTH, HEIGHT)
wt.time_passed = visualise
wt.run(iteration_count=100_000)
================================================
FILE: ciphers/README.md
================================================
# Ciphers
Ciphers are used to protect data from people that are not allowed to have it. They are everywhere on the internet to protect your connections.
*
*
*
================================================
FILE: ciphers/__init__.py
================================================
================================================
FILE: ciphers/a1z26.py
================================================
"""
Convert a string of characters to a sequence of numbers
corresponding to the character's position in the alphabet.
https://www.dcode.fr/letter-number-cipher
http://bestcodes.weebly.com/a1z26.html
"""
from __future__ import annotations
def encode(plain: str) -> list[int]:
"""
>>> encode("myname")
[13, 25, 14, 1, 13, 5]
"""
return [ord(elem) - 96 for elem in plain]
def decode(encoded: list[int]) -> str:
"""
>>> decode([13, 25, 14, 1, 13, 5])
'myname'
"""
return "".join(chr(elem + 96) for elem in encoded)
def main() -> None:
encoded = encode(input("-> ").strip().lower())
print("Encoded: ", encoded)
print("Decoded:", decode(encoded))
if __name__ == "__main__":
main()
================================================
FILE: ciphers/affine_cipher.py
================================================
import random
import sys
from maths.greatest_common_divisor import gcd_by_iterative
from . import cryptomath_module as cryptomath
SYMBOLS = (
r""" !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`"""
r"""abcdefghijklmnopqrstuvwxyz{|}~"""
)
def check_keys(key_a: int, key_b: int, mode: str) -> None:
if mode == "encrypt":
if key_a == 1:
sys.exit(
"The affine cipher becomes weak when key "
"A is set to 1. Choose different key"
)
if key_b == 0:
sys.exit(
"The affine cipher becomes weak when key "
"B is set to 0. Choose different key"
)
if key_a < 0 or key_b < 0 or key_b > len(SYMBOLS) - 1:
sys.exit(
"Key A must be greater than 0 and key B must "
f"be between 0 and {len(SYMBOLS) - 1}."
)
if gcd_by_iterative(key_a, len(SYMBOLS)) != 1:
sys.exit(
f"Key A {key_a} and the symbol set size {len(SYMBOLS)} "
"are not relatively prime. Choose a different key."
)
def encrypt_message(key: int, message: str) -> str:
"""
>>> encrypt_message(4545, 'The affine cipher is a type of monoalphabetic '
... 'substitution cipher.')
'VL}p MM{I}p~{HL}Gp{vp pFsH}pxMpyxIx JHL O}F{~pvuOvF{FuF{xIp~{HL}Gi'
"""
key_a, key_b = divmod(key, len(SYMBOLS))
check_keys(key_a, key_b, "encrypt")
cipher_text = ""
for symbol in message:
if symbol in SYMBOLS:
sym_index = SYMBOLS.find(symbol)
cipher_text += SYMBOLS[(sym_index * key_a + key_b) % len(SYMBOLS)]
else:
cipher_text += symbol
return cipher_text
def decrypt_message(key: int, message: str) -> str:
"""
>>> decrypt_message(4545, 'VL}p MM{I}p~{HL}Gp{vp pFsH}pxMpyxIx JHL O}F{~pvuOvF{FuF'
... '{xIp~{HL}Gi')
'The affine cipher is a type of monoalphabetic substitution cipher.'
"""
key_a, key_b = divmod(key, len(SYMBOLS))
check_keys(key_a, key_b, "decrypt")
plain_text = ""
mod_inverse_of_key_a = cryptomath.find_mod_inverse(key_a, len(SYMBOLS))
for symbol in message:
if symbol in SYMBOLS:
sym_index = SYMBOLS.find(symbol)
plain_text += SYMBOLS[
(sym_index - key_b) * mod_inverse_of_key_a % len(SYMBOLS)
]
else:
plain_text += symbol
return plain_text
def get_random_key() -> int:
while True:
key_b = random.randint(2, len(SYMBOLS))
key_b = random.randint(2, len(SYMBOLS))
if gcd_by_iterative(key_b, len(SYMBOLS)) == 1 and key_b % len(SYMBOLS) != 0:
return key_b * len(SYMBOLS) + key_b
def main() -> None:
"""
>>> key = get_random_key()
>>> msg = "This is a test!"
>>> decrypt_message(key, encrypt_message(key, msg)) == msg
True
"""
message = input("Enter message: ").strip()
key = int(input("Enter key [2000 - 9000]: ").strip())
mode = input("Encrypt/Decrypt [E/D]: ").strip().lower()
if mode.startswith("e"):
mode = "encrypt"
translated = encrypt_message(key, message)
elif mode.startswith("d"):
mode = "decrypt"
translated = decrypt_message(key, message)
print(f"\n{mode.title()}ed text: \n{translated}")
if __name__ == "__main__":
import doctest
doctest.testmod()
# main()
================================================
FILE: ciphers/atbash.py
================================================
"""https://en.wikipedia.org/wiki/Atbash"""
import string
def atbash_slow(sequence: str) -> str:
"""
>>> atbash_slow("ABCDEFG")
'ZYXWVUT'
>>> atbash_slow("aW;;123BX")
'zD;;123YC'
"""
output = ""
for i in sequence:
extract = ord(i)
if 65 <= extract <= 90:
output += chr(155 - extract)
elif 97 <= extract <= 122:
output += chr(219 - extract)
else:
output += i
return output
def atbash(sequence: str) -> str:
"""
>>> atbash("ABCDEFG")
'ZYXWVUT'
>>> atbash("aW;;123BX")
'zD;;123YC'
"""
letters = string.ascii_letters
letters_reversed = string.ascii_lowercase[::-1] + string.ascii_uppercase[::-1]
return "".join(
letters_reversed[letters.index(c)] if c in letters else c for c in sequence
)
def benchmark() -> None:
"""Let's benchmark our functions side-by-side..."""
from timeit import timeit
print("Running performance benchmarks...")
setup = "from string import printable ; from __main__ import atbash, atbash_slow"
print(f"> atbash_slow(): {timeit('atbash_slow(printable)', setup=setup)} seconds")
print(f"> atbash(): {timeit('atbash(printable)', setup=setup)} seconds")
if __name__ == "__main__":
for example in ("ABCDEFGH", "123GGjj", "testStringtest", "with space"):
print(f"{example} encrypted in atbash: {atbash(example)}")
benchmark()
================================================
FILE: ciphers/autokey.py
================================================
"""
https://en.wikipedia.org/wiki/Autokey_cipher
An autokey cipher (also known as the autoclave cipher) is a cipher that
incorporates the message (the plaintext) into the key.
The key is generated from the message in some automated fashion,
sometimes by selecting certain letters from the text or, more commonly,
by adding a short primer key to the front of the message.
"""
def encrypt(plaintext: str, key: str) -> str:
"""
Encrypt a given `plaintext` (string) and `key` (string), returning the
encrypted ciphertext.
>>> encrypt("hello world", "coffee")
'jsqqs avvwo'
>>> encrypt("coffee is good as python", "TheAlgorithms")
'vvjfpk wj ohvp su ddylsv'
>>> encrypt("coffee is good as python", 2)
Traceback (most recent call last):
...
TypeError: key must be a string
>>> encrypt("", "TheAlgorithms")
Traceback (most recent call last):
...
ValueError: plaintext is empty
>>> encrypt("coffee is good as python", "")
Traceback (most recent call last):
...
ValueError: key is empty
>>> encrypt(527.26, "TheAlgorithms")
Traceback (most recent call last):
...
TypeError: plaintext must be a string
"""
if not isinstance(plaintext, str):
raise TypeError("plaintext must be a string")
if not isinstance(key, str):
raise TypeError("key must be a string")
if not plaintext:
raise ValueError("plaintext is empty")
if not key:
raise ValueError("key is empty")
key += plaintext
plaintext = plaintext.lower()
key = key.lower()
plaintext_iterator = 0
key_iterator = 0
ciphertext = ""
while plaintext_iterator < len(plaintext):
if (
ord(plaintext[plaintext_iterator]) < 97
or ord(plaintext[plaintext_iterator]) > 122
):
ciphertext += plaintext[plaintext_iterator]
plaintext_iterator += 1
elif ord(key[key_iterator]) < 97 or ord(key[key_iterator]) > 122:
key_iterator += 1
else:
ciphertext += chr(
(
(ord(plaintext[plaintext_iterator]) - 97 + ord(key[key_iterator]))
- 97
)
% 26
+ 97
)
key_iterator += 1
plaintext_iterator += 1
return ciphertext
def decrypt(ciphertext: str, key: str) -> str:
"""
Decrypt a given `ciphertext` (string) and `key` (string), returning the decrypted
ciphertext.
>>> decrypt("jsqqs avvwo", "coffee")
'hello world'
>>> decrypt("vvjfpk wj ohvp su ddylsv", "TheAlgorithms")
'coffee is good as python'
>>> decrypt("vvjfpk wj ohvp su ddylsv", "")
Traceback (most recent call last):
...
ValueError: key is empty
>>> decrypt(527.26, "TheAlgorithms")
Traceback (most recent call last):
...
TypeError: ciphertext must be a string
>>> decrypt("", "TheAlgorithms")
Traceback (most recent call last):
...
ValueError: ciphertext is empty
>>> decrypt("vvjfpk wj ohvp su ddylsv", 2)
Traceback (most recent call last):
...
TypeError: key must be a string
"""
if not isinstance(ciphertext, str):
raise TypeError("ciphertext must be a string")
if not isinstance(key, str):
raise TypeError("key must be a string")
if not ciphertext:
raise ValueError("ciphertext is empty")
if not key:
raise ValueError("key is empty")
key = key.lower()
ciphertext_iterator = 0
key_iterator = 0
plaintext = ""
while ciphertext_iterator < len(ciphertext):
if (
ord(ciphertext[ciphertext_iterator]) < 97
or ord(ciphertext[ciphertext_iterator]) > 122
):
plaintext += ciphertext[ciphertext_iterator]
else:
plaintext += chr(
(ord(ciphertext[ciphertext_iterator]) - ord(key[key_iterator])) % 26
+ 97
)
key += chr(
(ord(ciphertext[ciphertext_iterator]) - ord(key[key_iterator])) % 26
+ 97
)
key_iterator += 1
ciphertext_iterator += 1
return plaintext
if __name__ == "__main__":
import doctest
doctest.testmod()
operation = int(input("Type 1 to encrypt or 2 to decrypt:"))
if operation == 1:
plaintext = input("Typeplaintext to be encrypted:\n")
key = input("Type the key:\n")
print(encrypt(plaintext, key))
elif operation == 2:
ciphertext = input("Type the ciphertext to be decrypted:\n")
key = input("Type the key:\n")
print(decrypt(ciphertext, key))
decrypt("jsqqs avvwo", "coffee")
================================================
FILE: ciphers/baconian_cipher.py
================================================
"""
Program to encode and decode Baconian or Bacon's Cipher
Wikipedia reference : https://en.wikipedia.org/wiki/Bacon%27s_cipher
"""
encode_dict = {
"a": "AAAAA",
"b": "AAAAB",
"c": "AAABA",
"d": "AAABB",
"e": "AABAA",
"f": "AABAB",
"g": "AABBA",
"h": "AABBB",
"i": "ABAAA",
"j": "BBBAA",
"k": "ABAAB",
"l": "ABABA",
"m": "ABABB",
"n": "ABBAA",
"o": "ABBAB",
"p": "ABBBA",
"q": "ABBBB",
"r": "BAAAA",
"s": "BAAAB",
"t": "BAABA",
"u": "BAABB",
"v": "BBBAB",
"w": "BABAA",
"x": "BABAB",
"y": "BABBA",
"z": "BABBB",
" ": " ",
}
decode_dict = {value: key for key, value in encode_dict.items()}
def encode(word: str) -> str:
"""
Encodes to Baconian cipher
>>> encode("hello")
'AABBBAABAAABABAABABAABBAB'
>>> encode("hello world")
'AABBBAABAAABABAABABAABBAB BABAAABBABBAAAAABABAAAABB'
>>> encode("hello world!")
Traceback (most recent call last):
...
Exception: encode() accepts only letters of the alphabet and spaces
"""
encoded = ""
for letter in word.lower():
if letter.isalpha() or letter == " ":
encoded += encode_dict[letter]
else:
raise Exception("encode() accepts only letters of the alphabet and spaces")
return encoded
def decode(coded: str) -> str:
"""
Decodes from Baconian cipher
>>> decode("AABBBAABAAABABAABABAABBAB BABAAABBABBAAAAABABAAAABB")
'hello world'
>>> decode("AABBBAABAAABABAABABAABBAB")
'hello'
>>> decode("AABBBAABAAABABAABABAABBAB BABAAABBABBAAAAABABAAAABB!")
Traceback (most recent call last):
...
Exception: decode() accepts only 'A', 'B' and spaces
"""
if set(coded) - {"A", "B", " "} != set():
raise Exception("decode() accepts only 'A', 'B' and spaces")
decoded = ""
for word in coded.split():
while len(word) != 0:
decoded += decode_dict[word[:5]]
word = word[5:]
decoded += " "
return decoded.strip()
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: ciphers/base16.py
================================================
def base16_encode(data: bytes) -> str:
"""
Encodes the given bytes into base16.
>>> base16_encode(b'Hello World!')
'48656C6C6F20576F726C6421'
>>> base16_encode(b'HELLO WORLD!')
'48454C4C4F20574F524C4421'
>>> base16_encode(b'')
''
"""
# Turn the data into a list of integers (where each integer is a byte),
# Then turn each byte into its hexadecimal representation, make sure
# it is uppercase, and then join everything together and return it.
return "".join([hex(byte)[2:].zfill(2).upper() for byte in list(data)])
def base16_decode(data: str) -> bytes:
"""
Decodes the given base16 encoded data into bytes.
>>> base16_decode('48656C6C6F20576F726C6421')
b'Hello World!'
>>> base16_decode('48454C4C4F20574F524C4421')
b'HELLO WORLD!'
>>> base16_decode('')
b''
>>> base16_decode('486')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data does not have an even number of hex digits.
>>> base16_decode('48656c6c6f20576f726c6421')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters.
>>> base16_decode('This is not base64 encoded data.')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters.
"""
# Check data validity, following RFC3548
# https://www.ietf.org/rfc/rfc3548.txt
if (len(data) % 2) != 0:
raise ValueError(
"""Base16 encoded data is invalid:
Data does not have an even number of hex digits."""
)
# Check the character set - the standard base16 alphabet
# is uppercase according to RFC3548 section 6
if not set(data) <= set("0123456789ABCDEF"):
raise ValueError(
"""Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters."""
)
# For every two hexadecimal digits (= a byte), turn it into an integer.
# Then, string the result together into bytes, and return it.
return bytes(int(data[i] + data[i + 1], 16) for i in range(0, len(data), 2))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/base32.py
================================================
"""
Base32 encoding and decoding
https://en.wikipedia.org/wiki/Base32
"""
B32_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
def base32_encode(data: bytes) -> bytes:
"""
>>> base32_encode(b"Hello World!")
b'JBSWY3DPEBLW64TMMQQQ===='
>>> base32_encode(b"123456")
b'GEZDGNBVGY======'
>>> base32_encode(b"some long complex string")
b'ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY='
"""
binary_data = "".join(bin(ord(d))[2:].zfill(8) for d in data.decode("utf-8"))
binary_data = binary_data.ljust(5 * ((len(binary_data) // 5) + 1), "0")
b32_chunks = map("".join, zip(*[iter(binary_data)] * 5))
b32_result = "".join(B32_CHARSET[int(chunk, 2)] for chunk in b32_chunks)
return bytes(b32_result.ljust(8 * ((len(b32_result) // 8) + 1), "="), "utf-8")
def base32_decode(data: bytes) -> bytes:
"""
>>> base32_decode(b'JBSWY3DPEBLW64TMMQQQ====')
b'Hello World!'
>>> base32_decode(b'GEZDGNBVGY======')
b'123456'
>>> base32_decode(b'ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY=')
b'some long complex string'
"""
binary_chunks = "".join(
bin(B32_CHARSET.index(_d))[2:].zfill(5)
for _d in data.decode("utf-8").strip("=")
)
binary_data = list(map("".join, zip(*[iter(binary_chunks)] * 8)))
return bytes("".join([chr(int(_d, 2)) for _d in binary_data]), "utf-8")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/base64_cipher.py
================================================
B64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
def base64_encode(data: bytes) -> bytes:
"""Encodes data according to RFC4648.
The data is first transformed to binary and appended with binary digits so that its
length becomes a multiple of 6, then each 6 binary digits will match a character in
the B64_CHARSET string. The number of appended binary digits would later determine
how many "=" signs should be added, the padding.
For every 2 binary digits added, a "=" sign is added in the output.
We can add any binary digits to make it a multiple of 6, for instance, consider the
following example:
"AA" -> 0010100100101001 -> 001010 010010 1001
As can be seen above, 2 more binary digits should be added, so there's 4
possibilities here: 00, 01, 10 or 11.
That being said, Base64 encoding can be used in Steganography to hide data in these
appended digits.
>>> from base64 import b64encode
>>> a = b"This pull request is part of Hacktoberfest20!"
>>> b = b"https://tools.ietf.org/html/rfc4648"
>>> c = b"A"
>>> base64_encode(a) == b64encode(a)
True
>>> base64_encode(b) == b64encode(b)
True
>>> base64_encode(c) == b64encode(c)
True
>>> base64_encode("abc")
Traceback (most recent call last):
...
TypeError: a bytes-like object is required, not 'str'
"""
# Make sure the supplied data is a bytes-like object
if not isinstance(data, bytes):
msg = f"a bytes-like object is required, not '{data.__class__.__name__}'"
raise TypeError(msg)
binary_stream = "".join(bin(byte)[2:].zfill(8) for byte in data)
padding_needed = len(binary_stream) % 6 != 0
if padding_needed:
# The padding that will be added later
padding = b"=" * ((6 - len(binary_stream) % 6) // 2)
# Append binary_stream with arbitrary binary digits (0's by default) to make its
# length a multiple of 6.
binary_stream += "0" * (6 - len(binary_stream) % 6)
else:
padding = b""
# Encode every 6 binary digits to their corresponding Base64 character
return (
"".join(
B64_CHARSET[int(binary_stream[index : index + 6], 2)]
for index in range(0, len(binary_stream), 6)
).encode()
+ padding
)
def base64_decode(encoded_data: str) -> bytes:
"""Decodes data according to RFC4648.
This does the reverse operation of base64_encode.
We first transform the encoded data back to a binary stream, take off the
previously appended binary digits according to the padding, at this point we
would have a binary stream whose length is multiple of 8, the last step is
to convert every 8 bits to a byte.
>>> from base64 import b64decode
>>> a = "VGhpcyBwdWxsIHJlcXVlc3QgaXMgcGFydCBvZiBIYWNrdG9iZXJmZXN0MjAh"
>>> b = "aHR0cHM6Ly90b29scy5pZXRmLm9yZy9odG1sL3JmYzQ2NDg="
>>> c = "QQ=="
>>> base64_decode(a) == b64decode(a)
True
>>> base64_decode(b) == b64decode(b)
True
>>> base64_decode(c) == b64decode(c)
True
>>> base64_decode("abc")
Traceback (most recent call last):
...
AssertionError: Incorrect padding
"""
# Make sure encoded_data is either a string or a bytes-like object
if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str):
msg = (
"argument should be a bytes-like object or ASCII string, "
f"not '{encoded_data.__class__.__name__}'"
)
raise TypeError(msg)
# In case encoded_data is a bytes-like object, make sure it contains only
# ASCII characters so we convert it to a string object
if isinstance(encoded_data, bytes):
try:
encoded_data = encoded_data.decode("utf-8")
except UnicodeDecodeError:
raise ValueError("base64 encoded data should only contain ASCII characters")
padding = encoded_data.count("=")
# Check if the encoded string contains non base64 characters
if padding:
assert all(char in B64_CHARSET for char in encoded_data[:-padding]), (
"Invalid base64 character(s) found."
)
else:
assert all(char in B64_CHARSET for char in encoded_data), (
"Invalid base64 character(s) found."
)
# Check the padding
assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding"
if padding:
# Remove padding if there is one
encoded_data = encoded_data[:-padding]
binary_stream = "".join(
bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data
)[: -padding * 2]
else:
binary_stream = "".join(
bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data
)
data = [
int(binary_stream[index : index + 8], 2)
for index in range(0, len(binary_stream), 8)
]
return bytes(data)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/base85.py
================================================
"""
Base85 (Ascii85) encoding and decoding
https://en.wikipedia.org/wiki/Ascii85
"""
def _base10_to_85(d: int) -> str:
return "".join(chr(d % 85 + 33)) + _base10_to_85(d // 85) if d > 0 else ""
def _base85_to_10(digits: list) -> int:
return sum(char * 85**i for i, char in enumerate(reversed(digits)))
def ascii85_encode(data: bytes) -> bytes:
"""
>>> ascii85_encode(b"")
b''
>>> ascii85_encode(b"12345")
b'0etOA2#'
>>> ascii85_encode(b"base 85")
b'@UX=h+?24'
"""
binary_data = "".join(bin(ord(d))[2:].zfill(8) for d in data.decode("utf-8"))
null_values = (32 * ((len(binary_data) // 32) + 1) - len(binary_data)) // 8
binary_data = binary_data.ljust(32 * ((len(binary_data) // 32) + 1), "0")
b85_chunks = [int(_s, 2) for _s in map("".join, zip(*[iter(binary_data)] * 32))]
result = "".join(_base10_to_85(chunk)[::-1] for chunk in b85_chunks)
return bytes(result[:-null_values] if null_values % 4 != 0 else result, "utf-8")
def ascii85_decode(data: bytes) -> bytes:
"""
>>> ascii85_decode(b"")
b''
>>> ascii85_decode(b"0etOA2#")
b'12345'
>>> ascii85_decode(b"@UX=h+?24")
b'base 85'
"""
null_values = 5 * ((len(data) // 5) + 1) - len(data)
binary_data = data.decode("utf-8") + "u" * null_values
b85_chunks = map("".join, zip(*[iter(binary_data)] * 5))
b85_segments = [[ord(_s) - 33 for _s in chunk] for chunk in b85_chunks]
results = [bin(_base85_to_10(chunk))[2::].zfill(32) for chunk in b85_segments]
char_chunks = [
[chr(int(_s, 2)) for _s in map("".join, zip(*[iter(r)] * 8))] for r in results
]
result = "".join("".join(char) for char in char_chunks)
offset = int(null_values % 5 == 0)
return bytes(result[: offset - null_values], "utf-8")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/beaufort_cipher.py
================================================
"""
Author: Mohit Radadiya
"""
from string import ascii_uppercase
dict1 = {char: i for i, char in enumerate(ascii_uppercase)}
dict2 = dict(enumerate(ascii_uppercase))
# This function generates the key in
# a cyclic manner until it's length isn't
# equal to the length of original text
def generate_key(message: str, key: str) -> str:
"""
>>> generate_key("THE GERMAN ATTACK","SECRET")
'SECRETSECRETSECRE'
"""
x = len(message)
i = 0
while True:
if x == i:
i = 0
if len(key) == len(message):
break
key += key[i]
i += 1
return key
# This function returns the encrypted text
# generated with the help of the key
def cipher_text(message: str, key_new: str) -> str:
"""
>>> cipher_text("THE GERMAN ATTACK","SECRETSECRETSECRE")
'BDC PAYUWL JPAIYI'
"""
cipher_text = ""
i = 0
for letter in message:
if letter == " ":
cipher_text += " "
else:
x = (dict1[letter] - dict1[key_new[i]]) % 26
i += 1
cipher_text += dict2[x]
return cipher_text
# This function decrypts the encrypted text
# and returns the original text
def original_text(cipher_text: str, key_new: str) -> str:
"""
>>> original_text("BDC PAYUWL JPAIYI","SECRETSECRETSECRE")
'THE GERMAN ATTACK'
"""
or_txt = ""
i = 0
for letter in cipher_text:
if letter == " ":
or_txt += " "
else:
x = (dict1[letter] + dict1[key_new[i]] + 26) % 26
i += 1
or_txt += dict2[x]
return or_txt
def main() -> None:
message = "THE GERMAN ATTACK"
key = "SECRET"
key_new = generate_key(message, key)
s = cipher_text(message, key_new)
print(f"Encrypted Text = {s}")
print(f"Original Text = {original_text(s, key_new)}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/bifid.py
================================================
#!/usr/bin/env python3
"""
The Bifid Cipher uses a Polybius Square to encipher a message in a way that
makes it fairly difficult to decipher without knowing the secret.
https://www.braingle.com/brainteasers/codes/bifid.php
"""
import numpy as np
SQUARE = [
["a", "b", "c", "d", "e"],
["f", "g", "h", "i", "k"],
["l", "m", "n", "o", "p"],
["q", "r", "s", "t", "u"],
["v", "w", "x", "y", "z"],
]
class BifidCipher:
def __init__(self) -> None:
self.SQUARE = np.array(SQUARE)
def letter_to_numbers(self, letter: str) -> np.ndarray:
"""
Return the pair of numbers that represents the given letter in the
polybius square
>>> np.array_equal(BifidCipher().letter_to_numbers('a'), [1,1])
True
>>> np.array_equal(BifidCipher().letter_to_numbers('u'), [4,5])
True
"""
index1, index2 = np.where(letter == self.SQUARE)
indexes = np.concatenate([index1 + 1, index2 + 1])
return indexes
def numbers_to_letter(self, index1: int, index2: int) -> str:
"""
Return the letter corresponding to the position [index1, index2] in
the polybius square
>>> BifidCipher().numbers_to_letter(4, 5) == "u"
True
>>> BifidCipher().numbers_to_letter(1, 1) == "a"
True
"""
letter = self.SQUARE[index1 - 1, index2 - 1]
return letter
def encode(self, message: str) -> str:
"""
Return the encoded version of message according to the polybius cipher
>>> BifidCipher().encode('testmessage') == 'qtltbdxrxlk'
True
>>> BifidCipher().encode('Test Message') == 'qtltbdxrxlk'
True
>>> BifidCipher().encode('test j') == BifidCipher().encode('test i')
True
"""
message = message.lower()
message = message.replace(" ", "")
message = message.replace("j", "i")
first_step = np.empty((2, len(message)))
for letter_index in range(len(message)):
numbers = self.letter_to_numbers(message[letter_index])
first_step[0, letter_index] = numbers[0]
first_step[1, letter_index] = numbers[1]
second_step = first_step.reshape(2 * len(message))
encoded_message = ""
for numbers_index in range(len(message)):
index1 = int(second_step[numbers_index * 2])
index2 = int(second_step[(numbers_index * 2) + 1])
letter = self.numbers_to_letter(index1, index2)
encoded_message = encoded_message + letter
return encoded_message
def decode(self, message: str) -> str:
"""
Return the decoded version of message according to the polybius cipher
>>> BifidCipher().decode('qtltbdxrxlk') == 'testmessage'
True
"""
message = message.lower()
message.replace(" ", "")
first_step = np.empty(2 * len(message))
for letter_index in range(len(message)):
numbers = self.letter_to_numbers(message[letter_index])
first_step[letter_index * 2] = numbers[0]
first_step[letter_index * 2 + 1] = numbers[1]
second_step = first_step.reshape((2, len(message)))
decoded_message = ""
for numbers_index in range(len(message)):
index1 = int(second_step[0, numbers_index])
index2 = int(second_step[1, numbers_index])
letter = self.numbers_to_letter(index1, index2)
decoded_message = decoded_message + letter
return decoded_message
================================================
FILE: ciphers/brute_force_caesar_cipher.py
================================================
import string
def decrypt(message: str) -> None:
"""
>>> decrypt('TMDETUX PMDVU')
Decryption using Key #0: TMDETUX PMDVU
Decryption using Key #1: SLCDSTW OLCUT
Decryption using Key #2: RKBCRSV NKBTS
Decryption using Key #3: QJABQRU MJASR
Decryption using Key #4: PIZAPQT LIZRQ
Decryption using Key #5: OHYZOPS KHYQP
Decryption using Key #6: NGXYNOR JGXPO
Decryption using Key #7: MFWXMNQ IFWON
Decryption using Key #8: LEVWLMP HEVNM
Decryption using Key #9: KDUVKLO GDUML
Decryption using Key #10: JCTUJKN FCTLK
Decryption using Key #11: IBSTIJM EBSKJ
Decryption using Key #12: HARSHIL DARJI
Decryption using Key #13: GZQRGHK CZQIH
Decryption using Key #14: FYPQFGJ BYPHG
Decryption using Key #15: EXOPEFI AXOGF
Decryption using Key #16: DWNODEH ZWNFE
Decryption using Key #17: CVMNCDG YVMED
Decryption using Key #18: BULMBCF XULDC
Decryption using Key #19: ATKLABE WTKCB
Decryption using Key #20: ZSJKZAD VSJBA
Decryption using Key #21: YRIJYZC URIAZ
Decryption using Key #22: XQHIXYB TQHZY
Decryption using Key #23: WPGHWXA SPGYX
Decryption using Key #24: VOFGVWZ ROFXW
Decryption using Key #25: UNEFUVY QNEWV
"""
for key in range(len(string.ascii_uppercase)):
translated = ""
for symbol in message:
if symbol in string.ascii_uppercase:
num = string.ascii_uppercase.find(symbol)
num = num - key
if num < 0:
num = num + len(string.ascii_uppercase)
translated = translated + string.ascii_uppercase[num]
else:
translated = translated + symbol
print(f"Decryption using Key #{key}: {translated}")
def main() -> None:
message = input("Encrypted message: ")
message = message.upper()
decrypt(message)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/caesar_cipher.py
================================================
from __future__ import annotations
from string import ascii_letters
def encrypt(input_string: str, key: int, alphabet: str | None = None) -> str:
"""
encrypt
=======
Encodes a given string with the caesar cipher and returns the encoded
message
Parameters:
-----------
* `input_string`: the plain-text that needs to be encoded
* `key`: the number of letters to shift the message by
Optional:
* `alphabet` (``None``): the alphabet used to encode the cipher, if not
specified, the standard english alphabet with upper and lowercase
letters is used
Returns:
* A string containing the encoded cipher-text
More on the caesar cipher
=========================
The caesar cipher is named after Julius Caesar who used it when sending
secret military messages to his troops. This is a simple substitution cipher
where every character in the plain-text is shifted by a certain number known
as the "key" or "shift".
Example:
Say we have the following message:
``Hello, captain``
And our alphabet is made up of lower and uppercase letters:
``abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ``
And our shift is ``2``
We can then encode the message, one letter at a time. ``H`` would become ``J``,
since ``J`` is two letters away, and so on. If the shift is ever too large, or
our letter is at the end of the alphabet, we just start at the beginning
(``Z`` would shift to ``a`` then ``b`` and so on).
Our final message would be ``Jgnnq, ecrvckp``
Further reading
===============
* https://en.m.wikipedia.org/wiki/Caesar_cipher
Doctests
========
>>> encrypt('The quick brown fox jumps over the lazy dog', 8)
'bpm yCqks jzwEv nwF rCuxA wDmz Bpm tiHG lwo'
>>> encrypt('A very large key', 8000)
's nWjq dSjYW cWq'
>>> encrypt('a lowercase alphabet', 5, 'abcdefghijklmnopqrstuvwxyz')
'f qtbjwhfxj fqumfgjy'
"""
# Set default alphabet to lower and upper case english chars
alpha = alphabet or ascii_letters
# The final result string
result = ""
for character in input_string:
if character not in alpha:
# Append without encryption if character is not in the alphabet
result += character
else:
# Get the index of the new key and make sure it isn't too large
new_key = (alpha.index(character) + key) % len(alpha)
# Append the encoded character to the alphabet
result += alpha[new_key]
return result
def decrypt(input_string: str, key: int, alphabet: str | None = None) -> str:
"""
decrypt
=======
Decodes a given string of cipher-text and returns the decoded plain-text
Parameters:
-----------
* `input_string`: the cipher-text that needs to be decoded
* `key`: the number of letters to shift the message backwards by to decode
Optional:
* `alphabet` (``None``): the alphabet used to decode the cipher, if not
specified, the standard english alphabet with upper and lowercase
letters is used
Returns:
* A string containing the decoded plain-text
More on the caesar cipher
=========================
The caesar cipher is named after Julius Caesar who used it when sending
secret military messages to his troops. This is a simple substitution cipher
where very character in the plain-text is shifted by a certain number known
as the "key" or "shift". Please keep in mind, here we will be focused on
decryption.
Example:
Say we have the following cipher-text:
``Jgnnq, ecrvckp``
And our alphabet is made up of lower and uppercase letters:
``abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ``
And our shift is ``2``
To decode the message, we would do the same thing as encoding, but in
reverse. The first letter, ``J`` would become ``H`` (remember: we are decoding)
because ``H`` is two letters in reverse (to the left) of ``J``. We would
continue doing this. A letter like ``a`` would shift back to the end of
the alphabet, and would become ``Z`` or ``Y`` and so on.
Our final message would be ``Hello, captain``
Further reading
===============
* https://en.m.wikipedia.org/wiki/Caesar_cipher
Doctests
========
>>> decrypt('bpm yCqks jzwEv nwF rCuxA wDmz Bpm tiHG lwo', 8)
'The quick brown fox jumps over the lazy dog'
>>> decrypt('s nWjq dSjYW cWq', 8000)
'A very large key'
>>> decrypt('f qtbjwhfxj fqumfgjy', 5, 'abcdefghijklmnopqrstuvwxyz')
'a lowercase alphabet'
"""
# Turn on decode mode by making the key negative
key *= -1
return encrypt(input_string, key, alphabet)
def brute_force(input_string: str, alphabet: str | None = None) -> dict[int, str]:
"""
brute_force
===========
Returns all the possible combinations of keys and the decoded strings in the
form of a dictionary
Parameters:
-----------
* `input_string`: the cipher-text that needs to be used during brute-force
Optional:
* `alphabet` (``None``): the alphabet used to decode the cipher, if not
specified, the standard english alphabet with upper and lowercase
letters is used
More about brute force
======================
Brute force is when a person intercepts a message or password, not knowing
the key and tries every single combination. This is easy with the caesar
cipher since there are only all the letters in the alphabet. The more
complex the cipher, the larger amount of time it will take to do brute force
Ex:
Say we have a ``5`` letter alphabet (``abcde``), for simplicity and we intercepted
the following message: ``dbc``,
we could then just write out every combination:
``ecd``... and so on, until we reach a combination that makes sense:
``cab``
Further reading
===============
* https://en.wikipedia.org/wiki/Brute_force
Doctests
========
>>> brute_force("jFyuMy xIH'N vLONy zILwy Gy!")[20]
"Please don't brute force me!"
>>> brute_force(1)
Traceback (most recent call last):
TypeError: 'int' object is not iterable
"""
# Set default alphabet to lower and upper case english chars
alpha = alphabet or ascii_letters
# To store data on all the combinations
brute_force_data = {}
# Cycle through each combination
for key in range(1, len(alpha) + 1):
# Decrypt the message and store the result in the data
brute_force_data[key] = decrypt(input_string, key, alpha)
return brute_force_data
if __name__ == "__main__":
while True:
print(f"\n{'-' * 10}\n Menu\n{'-' * 10}")
print(*["1.Encrypt", "2.Decrypt", "3.BruteForce", "4.Quit"], sep="\n")
# get user input
choice = input("\nWhat would you like to do?: ").strip() or "4"
# run functions based on what the user chose
if choice not in ("1", "2", "3", "4"):
print("Invalid choice, please enter a valid choice")
elif choice == "1":
input_string = input("Please enter the string to be encrypted: ")
key = int(input("Please enter off-set: ").strip())
print(encrypt(input_string, key))
elif choice == "2":
input_string = input("Please enter the string to be decrypted: ")
key = int(input("Please enter off-set: ").strip())
print(decrypt(input_string, key))
elif choice == "3":
input_string = input("Please enter the string to be decrypted: ")
brute_force_data = brute_force(input_string)
for key, value in brute_force_data.items():
print(f"Key: {key} | Message: {value}")
elif choice == "4":
print("Goodbye.")
break
================================================
FILE: ciphers/cryptomath_module.py
================================================
from maths.greatest_common_divisor import gcd_by_iterative
def find_mod_inverse(a: int, m: int) -> int:
if gcd_by_iterative(a, m) != 1:
msg = f"mod inverse of {a!r} and {m!r} does not exist"
raise ValueError(msg)
u1, u2, u3 = 1, 0, a
v1, v2, v3 = 0, 1, m
while v3 != 0:
q = u3 // v3
v1, v2, v3, u1, u2, u3 = (u1 - q * v1), (u2 - q * v2), (u3 - q * v3), v1, v2, v3
return u1 % m
================================================
FILE: ciphers/decrypt_caesar_with_chi_squared.py
================================================
#!/usr/bin/env python3
from __future__ import annotations
def decrypt_caesar_with_chi_squared(
ciphertext: str,
cipher_alphabet: list[str] | None = None,
frequencies_dict: dict[str, float] | None = None,
case_sensitive: bool = False,
) -> tuple[int, float, str]:
"""
Basic Usage
===========
Arguments:
* `ciphertext` (str): the text to decode (encoded with the caesar cipher)
Optional Arguments:
* `cipher_alphabet` (list): the alphabet used for the cipher (each letter is
a string separated by commas)
* `frequencies_dict` (dict): a dictionary of word frequencies where keys are
the letters and values are a percentage representation of the frequency as
a decimal/float
* `case_sensitive` (bool): a boolean value: ``True`` if the case matters during
decryption, ``False`` if it doesn't
Returns:
* A tuple in the form of:
(`most_likely_cipher`, `most_likely_cipher_chi_squared_value`,
`decoded_most_likely_cipher`)
where...
- `most_likely_cipher` is an integer representing the shift of the smallest
chi-squared statistic (most likely key)
- `most_likely_cipher_chi_squared_value` is a float representing the
chi-squared statistic of the most likely shift
- `decoded_most_likely_cipher` is a string with the decoded cipher
(decoded by the most_likely_cipher key)
The Chi-squared test
====================
The caesar cipher
-----------------
The caesar cipher is a very insecure encryption algorithm, however it has
been used since Julius Caesar. The cipher is a simple substitution cipher
where each character in the plain text is replaced by a character in the
alphabet a certain number of characters after the original character. The
number of characters away is called the shift or key. For example:
| Plain text: ``hello``
| Key: ``1``
| Cipher text: ``ifmmp``
| (each letter in ``hello`` has been shifted one to the right in the eng. alphabet)
As you can imagine, this doesn't provide lots of security. In fact
decrypting ciphertext by brute-force is extremely easy even by hand. However
one way to do that is the chi-squared test.
The chi-squared test
--------------------
Each letter in the english alphabet has a frequency, or the amount of times
it shows up compared to other letters (usually expressed as a decimal
representing the percentage likelihood). The most common letter in the
english language is ``e`` with a frequency of ``0.11162`` or ``11.162%``.
The test is completed in the following fashion.
1. The ciphertext is decoded in a brute force way (every combination of the
``26`` possible combinations)
2. For every combination, for each letter in the combination, the average
amount of times the letter should appear the message is calculated by
multiplying the total number of characters by the frequency of the letter.
| For example:
| In a message of ``100`` characters, ``e`` should appear around ``11.162``
times.
3. Then, to calculate the margin of error (the amount of times the letter
SHOULD appear with the amount of times the letter DOES appear), we use
the chi-squared test. The following formula is used:
Let:
- n be the number of times the letter actually appears
- p be the predicted value of the number of times the letter should
appear (see item ``2``)
- let v be the chi-squared test result (referred to here as chi-squared
value/statistic)
::
(n - p)^2
--------- = v
p
4. Each chi squared value for each letter is then added up to the total.
The total is the chi-squared statistic for that encryption key.
5. The encryption key with the lowest chi-squared value is the most likely
to be the decoded answer.
Further Reading
===============
* http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/
* https://en.wikipedia.org/wiki/Letter_frequency
* https://en.wikipedia.org/wiki/Chi-squared_test
* https://en.m.wikipedia.org/wiki/Caesar_cipher
Doctests
========
>>> decrypt_caesar_with_chi_squared(
... 'dof pz aol jhlzhy jpwoly zv wvwbshy? pa pz avv lhzf av jyhjr!'
... ) # doctest: +NORMALIZE_WHITESPACE
(7, 3129.228005747531,
'why is the caesar cipher so popular? it is too easy to crack!')
>>> decrypt_caesar_with_chi_squared('crybd cdbsxq')
(10, 233.35343938980898, 'short string')
>>> decrypt_caesar_with_chi_squared('Crybd Cdbsxq', case_sensitive=True)
(10, 233.35343938980898, 'Short String')
>>> decrypt_caesar_with_chi_squared(12)
Traceback (most recent call last):
AttributeError: 'int' object has no attribute 'lower'
"""
alphabet_letters = cipher_alphabet or [chr(i) for i in range(97, 123)]
# If the argument is None or the user provided an empty dictionary
if not frequencies_dict:
# Frequencies of letters in the english language (how much they show up)
frequencies = {
"a": 0.08497,
"b": 0.01492,
"c": 0.02202,
"d": 0.04253,
"e": 0.11162,
"f": 0.02228,
"g": 0.02015,
"h": 0.06094,
"i": 0.07546,
"j": 0.00153,
"k": 0.01292,
"l": 0.04025,
"m": 0.02406,
"n": 0.06749,
"o": 0.07507,
"p": 0.01929,
"q": 0.00095,
"r": 0.07587,
"s": 0.06327,
"t": 0.09356,
"u": 0.02758,
"v": 0.00978,
"w": 0.02560,
"x": 0.00150,
"y": 0.01994,
"z": 0.00077,
}
else:
# Custom frequencies dictionary
frequencies = frequencies_dict
if not case_sensitive:
ciphertext = ciphertext.lower()
# Chi squared statistic values
chi_squared_statistic_values: dict[int, tuple[float, str]] = {}
# cycle through all of the shifts
for shift in range(len(alphabet_letters)):
decrypted_with_shift = ""
# decrypt the message with the shift
for letter in ciphertext:
try:
# Try to index the letter in the alphabet
new_key = (alphabet_letters.index(letter.lower()) - shift) % len(
alphabet_letters
)
decrypted_with_shift += (
alphabet_letters[new_key].upper()
if case_sensitive and letter.isupper()
else alphabet_letters[new_key]
)
except ValueError:
# Append the character if it isn't in the alphabet
decrypted_with_shift += letter
chi_squared_statistic = 0.0
# Loop through each letter in the decoded message with the shift
for letter in decrypted_with_shift:
if case_sensitive:
letter = letter.lower()
if letter in frequencies:
# Get the amount of times the letter occurs in the message
occurrences = decrypted_with_shift.lower().count(letter)
# Get the excepcted amount of times the letter should appear based
# on letter frequencies
expected = frequencies[letter] * occurrences
# Complete the chi squared statistic formula
chi_letter_value = ((occurrences - expected) ** 2) / expected
# Add the margin of error to the total chi squared statistic
chi_squared_statistic += chi_letter_value
elif letter.lower() in frequencies:
# Get the amount of times the letter occurs in the message
occurrences = decrypted_with_shift.count(letter)
# Get the excepcted amount of times the letter should appear based
# on letter frequencies
expected = frequencies[letter] * occurrences
# Complete the chi squared statistic formula
chi_letter_value = ((occurrences - expected) ** 2) / expected
# Add the margin of error to the total chi squared statistic
chi_squared_statistic += chi_letter_value
# Add the data to the chi_squared_statistic_values dictionary
chi_squared_statistic_values[shift] = (
chi_squared_statistic,
decrypted_with_shift,
)
# Get the most likely cipher by finding the cipher with the smallest chi squared
# statistic
def chi_squared_statistic_values_sorting_key(key: int) -> tuple[float, str]:
return chi_squared_statistic_values[key]
most_likely_cipher: int = min(
chi_squared_statistic_values,
key=chi_squared_statistic_values_sorting_key,
)
# Get all the data from the most likely cipher (key, decoded message)
(
most_likely_cipher_chi_squared_value,
decoded_most_likely_cipher,
) = chi_squared_statistic_values[most_likely_cipher]
# Return the data on the most likely shift
return (
most_likely_cipher,
most_likely_cipher_chi_squared_value,
decoded_most_likely_cipher,
)
================================================
FILE: ciphers/deterministic_miller_rabin.py
================================================
"""Created by Nathan Damon, @bizzfitch on github
>>> test_miller_rabin()
"""
def miller_rabin(n: int, allow_probable: bool = False) -> bool:
"""Deterministic Miller-Rabin algorithm for primes ~< 3.32e24.
Uses numerical analysis results to return whether or not the passed number
is prime. If the passed number is above the upper limit, and
allow_probable is True, then a return value of True indicates that n is
probably prime. This test does not allow False negatives- a return value
of False is ALWAYS composite.
Parameters
----------
n : int
The integer to be tested. Since we usually care if a number is prime,
n < 2 returns False instead of raising a ValueError.
allow_probable: bool, default False
Whether or not to test n above the upper bound of the deterministic test.
Raises
------
ValueError
Reference
---------
https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test
"""
if n == 2:
return True
if not n % 2 or n < 2:
return False
if n > 5 and n % 10 not in (1, 3, 7, 9): # can quickly check last digit
return False
if n > 3_317_044_064_679_887_385_961_981 and not allow_probable:
raise ValueError(
"Warning: upper bound of deterministic test is exceeded. "
"Pass allow_probable=True to allow probabilistic test. "
"A return value of True indicates a probable prime."
)
# array bounds provided by analysis
bounds = [
2_047,
1_373_653,
25_326_001,
3_215_031_751,
2_152_302_898_747,
3_474_749_660_383,
341_550_071_728_321,
1,
3_825_123_056_546_413_051,
1,
1,
318_665_857_834_031_151_167_461,
3_317_044_064_679_887_385_961_981,
]
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41]
for idx, _p in enumerate(bounds, 1):
if n < _p:
# then we have our last prime to check
plist = primes[:idx]
break
d, s = n - 1, 0
# break up n -1 into a power of 2 (s) and
# remaining odd component
# essentially, solve for d * 2 ** s == n - 1
while d % 2 == 0:
d //= 2
s += 1
for prime in plist:
pr = False
for r in range(s):
m = pow(prime, d * 2**r, n)
# see article for analysis explanation for m
if (r == 0 and m == 1) or ((m + 1) % n == 0):
pr = True
# this loop will not determine compositeness
break
if pr:
continue
# if pr is False, then the above loop never evaluated to true,
# and the n MUST be composite
return False
return True
def test_miller_rabin() -> None:
"""Testing a nontrivial (ends in 1, 3, 7, 9) composite
and a prime in each range.
"""
assert not miller_rabin(561)
assert miller_rabin(563)
# 2047
assert not miller_rabin(838_201)
assert miller_rabin(838_207)
# 1_373_653
assert not miller_rabin(17_316_001)
assert miller_rabin(17_316_017)
# 25_326_001
assert not miller_rabin(3_078_386_641)
assert miller_rabin(3_078_386_653)
# 3_215_031_751
assert not miller_rabin(1_713_045_574_801)
assert miller_rabin(1_713_045_574_819)
# 2_152_302_898_747
assert not miller_rabin(2_779_799_728_307)
assert miller_rabin(2_779_799_728_327)
# 3_474_749_660_383
assert not miller_rabin(113_850_023_909_441)
assert miller_rabin(113_850_023_909_527)
# 341_550_071_728_321
assert not miller_rabin(1_275_041_018_848_804_351)
assert miller_rabin(1_275_041_018_848_804_391)
# 3_825_123_056_546_413_051
assert not miller_rabin(79_666_464_458_507_787_791_867)
assert miller_rabin(79_666_464_458_507_787_791_951)
# 318_665_857_834_031_151_167_461
assert not miller_rabin(552_840_677_446_647_897_660_333)
assert miller_rabin(552_840_677_446_647_897_660_359)
# 3_317_044_064_679_887_385_961_981
# upper limit for probabilistic test
if __name__ == "__main__":
test_miller_rabin()
================================================
FILE: ciphers/diffie.py
================================================
from __future__ import annotations
def find_primitive(modulus: int) -> int | None:
"""
Find a primitive root modulo modulus, if one exists.
Args:
modulus : The modulus for which to find a primitive root.
Returns:
The primitive root if one exists, or None if there is none.
Examples:
>>> find_primitive(7) # Modulo 7 has primitive root 3
3
>>> find_primitive(11) # Modulo 11 has primitive root 2
2
>>> find_primitive(8) == None # Modulo 8 has no primitive root
True
"""
for r in range(1, modulus):
li = []
for x in range(modulus - 1):
val = pow(r, x, modulus)
if val in li:
break
li.append(val)
else:
return r
return None
if __name__ == "__main__":
import doctest
doctest.testmod()
prime = int(input("Enter a prime number q: "))
primitive_root = find_primitive(prime)
if primitive_root is None:
print(f"Cannot find the primitive for the value: {primitive_root!r}")
else:
a_private = int(input("Enter private key of A: "))
a_public = pow(primitive_root, a_private, prime)
b_private = int(input("Enter private key of B: "))
b_public = pow(primitive_root, b_private, prime)
a_secret = pow(b_public, a_private, prime)
b_secret = pow(a_public, b_private, prime)
print("The key value generated by A is: ", a_secret)
print("The key value generated by B is: ", b_secret)
================================================
FILE: ciphers/diffie_hellman.py
================================================
from binascii import hexlify
from hashlib import sha256
from os import urandom
# RFC 3526 - More Modular Exponential (MODP) Diffie-Hellman groups for
# Internet Key Exchange (IKE) https://tools.ietf.org/html/rfc3526
primes = {
# 1536-bit
5: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
"29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
"83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA237327FFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
# 2048-bit
14: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
"29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
"83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
"DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
"15728E5A8AACAA68FFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
# 3072-bit
15: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
"29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
"83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
"DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
"43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
# 4096-bit
16: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
"29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
"83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
"DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
"43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7"
"88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA"
"2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6"
"287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED"
"1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9"
"93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934063199"
"FFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
# 6144-bit
17: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E08"
"8A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B"
"302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9"
"A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE6"
"49286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8"
"FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C"
"180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF695581718"
"3995497CEA956AE515D2261898FA051015728E5A8AAAC42DAD33170D"
"04507A33A85521ABDF1CBA64ECFB850458DBEF0A8AEA71575D060C7D"
"B3970F85A6E1E4C7ABF5AE8CDB0933D71E8C94E04A25619DCEE3D226"
"1AD2EE6BF12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB3143DB5BFC"
"E0FD108E4B82D120A92108011A723C12A787E6D788719A10BDBA5B26"
"99C327186AF4E23C1A946834B6150BDA2583E9CA2AD44CE8DBBBC2DB"
"04DE8EF92E8EFC141FBECAA6287C59474E6BC05D99B2964FA090C3A2"
"233BA186515BE7ED1F612970CEE2D7AFB81BDD762170481CD0069127"
"D5B05AA993B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492"
"36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BDF8FF9406"
"AD9E530EE5DB382F413001AEB06A53ED9027D831179727B0865A8918"
"DA3EDBEBCF9B14ED44CE6CBACED4BB1BDB7F1447E6CC254B33205151"
"2BD7AF426FB8F401378CD2BF5983CA01C64B92ECF032EA15D1721D03"
"F482D7CE6E74FEF6D55E702F46980C82B5A84031900B1C9E59E7C97F"
"BEC7E8F323A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA"
"CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE32806A1D58B"
"B7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55CDA56C9EC2EF29632"
"387FE8D76E3C0468043E8F663F4860EE12BF2D5B0B7474D6E694F91E"
"6DCC4024FFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
# 8192-bit
18: {
"prime": int(
"FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1"
"29024E088A67CC74020BBEA63B139B22514A08798E3404DD"
"EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245"
"E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED"
"EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D"
"C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F"
"83655D23DCA3AD961C62F356208552BB9ED529077096966D"
"670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B"
"E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9"
"DE2BCBF6955817183995497CEA956AE515D2261898FA0510"
"15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64"
"ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7"
"ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B"
"F12FFA06D98A0864D87602733EC86A64521F2B18177B200C"
"BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31"
"43DB5BFCE0FD108E4B82D120A92108011A723C12A787E6D7"
"88719A10BDBA5B2699C327186AF4E23C1A946834B6150BDA"
"2583E9CA2AD44CE8DBBBC2DB04DE8EF92E8EFC141FBECAA6"
"287C59474E6BC05D99B2964FA090C3A2233BA186515BE7ED"
"1F612970CEE2D7AFB81BDD762170481CD0069127D5B05AA9"
"93B4EA988D8FDDC186FFB7DC90A6C08F4DF435C934028492"
"36C3FAB4D27C7026C1D4DCB2602646DEC9751E763DBA37BD"
"F8FF9406AD9E530EE5DB382F413001AEB06A53ED9027D831"
"179727B0865A8918DA3EDBEBCF9B14ED44CE6CBACED4BB1B"
"DB7F1447E6CC254B332051512BD7AF426FB8F401378CD2BF"
"5983CA01C64B92ECF032EA15D1721D03F482D7CE6E74FEF6"
"D55E702F46980C82B5A84031900B1C9E59E7C97FBEC7E8F3"
"23A97A7E36CC88BE0F1D45B7FF585AC54BD407B22B4154AA"
"CC8F6D7EBF48E1D814CC5ED20F8037E0A79715EEF29BE328"
"06A1D58BB7C5DA76F550AA3D8A1FBFF0EB19CCB1A313D55C"
"DA56C9EC2EF29632387FE8D76E3C0468043E8F663F4860EE"
"12BF2D5B0B7474D6E694F91E6DBE115974A3926F12FEE5E4"
"38777CB6A932DF8CD8BEC4D073B931BA3BC832B68D9DD300"
"741FA7BF8AFC47ED2576F6936BA424663AAB639C5AE4F568"
"3423B4742BF1C978238F16CBE39D652DE3FDB8BEFC848AD9"
"22222E04A4037C0713EB57A81A23F0C73473FC646CEA306B"
"4BCBC8862F8385DDFA9D4B7FA2C087E879683303ED5BDD3A"
"062B3CF5B3A278A66D2A13F83F44F82DDF310EE074AB6A36"
"4597E899A0255DC164F31CC50846851DF9AB48195DED7EA1"
"B1D510BD7EE74D73FAF36BC31ECFA268359046F4EB879F92"
"4009438B481C6CD7889A002ED5EE382BC9190DA6FC026E47"
"9558E4475677E9AA9E3050E2765694DFC81F56E880B96E71"
"60C980DD98EDD3DFFFFFFFFFFFFFFFFF",
base=16,
),
"generator": 2,
},
}
class DiffieHellman:
"""
Class to represent the Diffie-Hellman key exchange protocol
>>> alice = DiffieHellman()
>>> bob = DiffieHellman()
>>> alice_private = alice.get_private_key()
>>> alice_public = alice.generate_public_key()
>>> bob_private = bob.get_private_key()
>>> bob_public = bob.generate_public_key()
>>> # generating shared key using the DH object
>>> alice_shared = alice.generate_shared_key(bob_public)
>>> bob_shared = bob.generate_shared_key(alice_public)
>>> assert alice_shared == bob_shared
>>> # generating shared key using static methods
>>> alice_shared = DiffieHellman.generate_shared_key_static(
... alice_private, bob_public
... )
>>> bob_shared = DiffieHellman.generate_shared_key_static(
... bob_private, alice_public
... )
>>> assert alice_shared == bob_shared
"""
# Current minimum recommendation is 2048 bit (group 14)
def __init__(self, group: int = 14) -> None:
if group not in primes:
raise ValueError("Unsupported Group")
self.prime = primes[group]["prime"]
self.generator = primes[group]["generator"]
self.__private_key = int(hexlify(urandom(32)), base=16)
def get_private_key(self) -> str:
return hex(self.__private_key)[2:]
def generate_public_key(self) -> str:
public_key = pow(self.generator, self.__private_key, self.prime)
return hex(public_key)[2:]
def is_valid_public_key(self, key: int) -> bool:
# check if the other public key is valid based on NIST SP800-56
return (
2 <= key <= self.prime - 2
and pow(key, (self.prime - 1) // 2, self.prime) == 1
)
def generate_shared_key(self, other_key_str: str) -> str:
other_key = int(other_key_str, base=16)
if not self.is_valid_public_key(other_key):
raise ValueError("Invalid public key")
shared_key = pow(other_key, self.__private_key, self.prime)
return sha256(str(shared_key).encode()).hexdigest()
@staticmethod
def is_valid_public_key_static(remote_public_key_str: int, prime: int) -> bool:
# check if the other public key is valid based on NIST SP800-56
return (
2 <= remote_public_key_str <= prime - 2
and pow(remote_public_key_str, (prime - 1) // 2, prime) == 1
)
@staticmethod
def generate_shared_key_static(
local_private_key_str: str, remote_public_key_str: str, group: int = 14
) -> str:
local_private_key = int(local_private_key_str, base=16)
remote_public_key = int(remote_public_key_str, base=16)
prime = primes[group]["prime"]
if not DiffieHellman.is_valid_public_key_static(remote_public_key, prime):
raise ValueError("Invalid public key")
shared_key = pow(remote_public_key, local_private_key, prime)
return sha256(str(shared_key).encode()).hexdigest()
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/elgamal_key_generator.py
================================================
import os
import random
import sys
from . import cryptomath_module as cryptomath
from . import rabin_miller
min_primitive_root = 3
# I have written my code naively same as definition of primitive root
# however every time I run this program, memory exceeded...
# so I used 4.80 Algorithm in
# Handbook of Applied Cryptography(CRC Press, ISBN : 0-8493-8523-7, October 1996)
# and it seems to run nicely!
def primitive_root(p_val: int) -> int:
print("Generating primitive root of p")
while True:
g = random.randrange(3, p_val)
if pow(g, 2, p_val) == 1:
continue
if pow(g, p_val, p_val) == 1:
continue
return g
def generate_key(key_size: int) -> tuple[tuple[int, int, int, int], tuple[int, int]]:
print("Generating prime p...")
p = rabin_miller.generate_large_prime(key_size) # select large prime number.
e_1 = primitive_root(p) # one primitive root on modulo p.
d = random.randrange(3, p) # private_key -> have to be greater than 2 for safety.
e_2 = cryptomath.find_mod_inverse(pow(e_1, d, p), p)
public_key = (key_size, e_1, e_2, p)
private_key = (key_size, d)
return public_key, private_key
def make_key_files(name: str, key_size: int) -> None:
if os.path.exists(f"{name}_pubkey.txt") or os.path.exists(f"{name}_privkey.txt"):
print("\nWARNING:")
print(
f'"{name}_pubkey.txt" or "{name}_privkey.txt" already exists. \n'
"Use a different name or delete these files and re-run this program."
)
sys.exit()
public_key, private_key = generate_key(key_size)
print(f"\nWriting public key to file {name}_pubkey.txt...")
with open(f"{name}_pubkey.txt", "w") as fo:
fo.write(f"{public_key[0]},{public_key[1]},{public_key[2]},{public_key[3]}")
print(f"Writing private key to file {name}_privkey.txt...")
with open(f"{name}_privkey.txt", "w") as fo:
fo.write(f"{private_key[0]},{private_key[1]}")
def main() -> None:
print("Making key files...")
make_key_files("elgamal", 2048)
print("Key files generation successful")
if __name__ == "__main__":
main()
================================================
FILE: ciphers/enigma_machine2.py
================================================
"""
| Wikipedia: https://en.wikipedia.org/wiki/Enigma_machine
| Video explanation: https://youtu.be/QwQVMqfoB2E
| Also check out Numberphile's and Computerphile's videos on this topic
This module contains function ``enigma`` which emulates
the famous Enigma machine from WWII.
Module includes:
- ``enigma`` function
- showcase of function usage
- ``9`` randomly generated rotors
- reflector (aka static rotor)
- original alphabet
Created by TrapinchO
"""
from __future__ import annotations
RotorPositionT = tuple[int, int, int]
RotorSelectionT = tuple[str, str, str]
# used alphabet --------------------------
# from string.ascii_uppercase
abc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# -------------------------- default selection --------------------------
# rotors --------------------------
rotor1 = "EGZWVONAHDCLFQMSIPJBYUKXTR"
rotor2 = "FOBHMDKEXQNRAULPGSJVTYICZW"
rotor3 = "ZJXESIUQLHAVRMDOYGTNFWPBKC"
# reflector --------------------------
reflector = {
"A": "N",
"N": "A",
"B": "O",
"O": "B",
"C": "P",
"P": "C",
"D": "Q",
"Q": "D",
"E": "R",
"R": "E",
"F": "S",
"S": "F",
"G": "T",
"T": "G",
"H": "U",
"U": "H",
"I": "V",
"V": "I",
"J": "W",
"W": "J",
"K": "X",
"X": "K",
"L": "Y",
"Y": "L",
"M": "Z",
"Z": "M",
}
# -------------------------- extra rotors --------------------------
rotor4 = "RMDJXFUWGISLHVTCQNKYPBEZOA"
rotor5 = "SGLCPQWZHKXAREONTFBVIYJUDM"
rotor6 = "HVSICLTYKQUBXDWAJZOMFGPREN"
rotor7 = "RZWQHFMVDBKICJLNTUXAGYPSOE"
rotor8 = "LFKIJODBEGAMQPXVUHYSTCZRWN"
rotor9 = "KOAEGVDHXPQZMLFTYWJNBRCIUS"
def _validator(
rotpos: RotorPositionT, rotsel: RotorSelectionT, pb: str
) -> tuple[RotorPositionT, RotorSelectionT, dict[str, str]]:
"""
Checks if the values can be used for the ``enigma`` function
>>> _validator((1,1,1), (rotor1, rotor2, rotor3), 'POLAND')
((1, 1, 1), ('EGZWVONAHDCLFQMSIPJBYUKXTR', 'FOBHMDKEXQNRAULPGSJVTYICZW', \
'ZJXESIUQLHAVRMDOYGTNFWPBKC'), \
{'P': 'O', 'O': 'P', 'L': 'A', 'A': 'L', 'N': 'D', 'D': 'N'})
:param rotpos: rotor_positon
:param rotsel: rotor_selection
:param pb: plugb -> validated and transformed
:return: (`rotpos`, `rotsel`, `pb`)
"""
# Checks if there are 3 unique rotors
if (unique_rotsel := len(set(rotsel))) < 3:
msg = f"Please use 3 unique rotors (not {unique_rotsel})"
raise Exception(msg)
# Checks if rotor positions are valid
rotorpos1, rotorpos2, rotorpos3 = rotpos
if not 0 < rotorpos1 <= len(abc):
msg = f"First rotor position is not within range of 1..26 ({rotorpos1}"
raise ValueError(msg)
if not 0 < rotorpos2 <= len(abc):
msg = f"Second rotor position is not within range of 1..26 ({rotorpos2})"
raise ValueError(msg)
if not 0 < rotorpos3 <= len(abc):
msg = f"Third rotor position is not within range of 1..26 ({rotorpos3})"
raise ValueError(msg)
# Validates string and returns dict
pbdict = _plugboard(pb)
return rotpos, rotsel, pbdict
def _plugboard(pbstring: str) -> dict[str, str]:
"""
https://en.wikipedia.org/wiki/Enigma_machine#Plugboard
>>> _plugboard('PICTURES')
{'P': 'I', 'I': 'P', 'C': 'T', 'T': 'C', 'U': 'R', 'R': 'U', 'E': 'S', 'S': 'E'}
>>> _plugboard('POLAND')
{'P': 'O', 'O': 'P', 'L': 'A', 'A': 'L', 'N': 'D', 'D': 'N'}
In the code, ``pb`` stands for ``plugboard``
Pairs can be separated by spaces
:param pbstring: string containing plugboard setting for the Enigma machine
:return: dictionary containing converted pairs
"""
# tests the input string if it
# a) is type string
# b) has even length (so pairs can be made)
if not isinstance(pbstring, str):
msg = f"Plugboard setting isn't type string ({type(pbstring)})"
raise TypeError(msg)
elif len(pbstring) % 2 != 0:
msg = f"Odd number of symbols ({len(pbstring)})"
raise Exception(msg)
elif pbstring == "":
return {}
pbstring.replace(" ", "")
# Checks if all characters are unique
tmppbl = set()
for i in pbstring:
if i not in abc:
msg = f"'{i}' not in list of symbols"
raise Exception(msg)
elif i in tmppbl:
msg = f"Duplicate symbol ({i})"
raise Exception(msg)
else:
tmppbl.add(i)
del tmppbl
# Created the dictionary
pb = {}
for j in range(0, len(pbstring) - 1, 2):
pb[pbstring[j]] = pbstring[j + 1]
pb[pbstring[j + 1]] = pbstring[j]
return pb
def enigma(
text: str,
rotor_position: RotorPositionT,
rotor_selection: RotorSelectionT = (rotor1, rotor2, rotor3),
plugb: str = "",
) -> str:
"""
The only difference with real-world enigma is that ``I`` allowed string input.
All characters are converted to uppercase. (non-letter symbol are ignored)
| How it works:
| (for every letter in the message)
- Input letter goes into the plugboard.
If it is connected to another one, switch it.
- Letter goes through ``3`` rotors.
Each rotor can be represented as ``2`` sets of symbol, where one is shuffled.
Each symbol from the first set has corresponding symbol in
the second set and vice versa.
example::
| ABCDEFGHIJKLMNOPQRSTUVWXYZ | e.g. F=D and D=F
| VKLEPDBGRNWTFCJOHQAMUZYIXS |
- Symbol then goes through reflector (static rotor).
There it is switched with paired symbol.
The reflector can be represented as ``2`` sets, each with half of the alphanet.
There are usually ``10`` pairs of letters.
Example::
| ABCDEFGHIJKLM | e.g. E is paired to X
| ZYXWVUTSRQPON | so when E goes in X goes out and vice versa
- Letter then goes through the rotors again
- If the letter is connected to plugboard, it is switched.
- Return the letter
>>> enigma('Hello World!', (1, 2, 1), plugb='pictures')
'KORYH JUHHI!'
>>> enigma('KORYH, juhhi!', (1, 2, 1), plugb='pictures')
'HELLO, WORLD!'
>>> enigma('hello world!', (1, 1, 1), plugb='pictures')
'FPNCZ QWOBU!'
>>> enigma('FPNCZ QWOBU', (1, 1, 1), plugb='pictures')
'HELLO WORLD'
:param text: input message
:param rotor_position: tuple with ``3`` values in range ``1``.. ``26``
:param rotor_selection: tuple with ``3`` rotors
:param plugb: string containing plugboard configuration (default ``''``)
:return: en/decrypted string
"""
text = text.upper()
rotor_position, rotor_selection, plugboard = _validator(
rotor_position, rotor_selection, plugb.upper()
)
rotorpos1, rotorpos2, rotorpos3 = rotor_position
rotor1, rotor2, rotor3 = rotor_selection
rotorpos1 -= 1
rotorpos2 -= 1
rotorpos3 -= 1
result = []
# encryption/decryption process --------------------------
for symbol in text:
if symbol in abc:
# 1st plugboard --------------------------
if symbol in plugboard:
symbol = plugboard[symbol]
# rotor ra --------------------------
index = abc.index(symbol) + rotorpos1
symbol = rotor1[index % len(abc)]
# rotor rb --------------------------
index = abc.index(symbol) + rotorpos2
symbol = rotor2[index % len(abc)]
# rotor rc --------------------------
index = abc.index(symbol) + rotorpos3
symbol = rotor3[index % len(abc)]
# reflector --------------------------
# this is the reason you don't need another machine to decipher
symbol = reflector[symbol]
# 2nd rotors
symbol = abc[rotor3.index(symbol) - rotorpos3]
symbol = abc[rotor2.index(symbol) - rotorpos2]
symbol = abc[rotor1.index(symbol) - rotorpos1]
# 2nd plugboard
if symbol in plugboard:
symbol = plugboard[symbol]
# moves/resets rotor positions
rotorpos1 += 1
if rotorpos1 >= len(abc):
rotorpos1 = 0
rotorpos2 += 1
if rotorpos2 >= len(abc):
rotorpos2 = 0
rotorpos3 += 1
if rotorpos3 >= len(abc):
rotorpos3 = 0
# else:
# pass
# Error could be also raised
# raise ValueError(
# 'Invalid symbol('+repr(symbol)+')')
result.append(symbol)
return "".join(result)
if __name__ == "__main__":
message = "This is my Python script that emulates the Enigma machine from WWII."
rotor_pos = (1, 1, 1)
pb = "pictures"
rotor_sel = (rotor2, rotor4, rotor8)
en = enigma(message, rotor_pos, rotor_sel, pb)
print("Encrypted message:", en)
print("Decrypted message:", enigma(en, rotor_pos, rotor_sel, pb))
================================================
FILE: ciphers/fractionated_morse_cipher.py
================================================
"""
Python program for the Fractionated Morse Cipher.
The Fractionated Morse cipher first converts the plaintext to Morse code,
then enciphers fixed-size blocks of Morse code back to letters.
This procedure means plaintext letters are mixed into the ciphertext letters,
making it more secure than substitution ciphers.
http://practicalcryptography.com/ciphers/fractionated-morse-cipher/
"""
import string
MORSE_CODE_DICT = {
"A": ".-",
"B": "-...",
"C": "-.-.",
"D": "-..",
"E": ".",
"F": "..-.",
"G": "--.",
"H": "....",
"I": "..",
"J": ".---",
"K": "-.-",
"L": ".-..",
"M": "--",
"N": "-.",
"O": "---",
"P": ".--.",
"Q": "--.-",
"R": ".-.",
"S": "...",
"T": "-",
"U": "..-",
"V": "...-",
"W": ".--",
"X": "-..-",
"Y": "-.--",
"Z": "--..",
" ": "",
}
# Define possible trigrams of Morse code
MORSE_COMBINATIONS = [
"...",
"..-",
"..x",
".-.",
".--",
".-x",
".x.",
".x-",
".xx",
"-..",
"-.-",
"-.x",
"--.",
"---",
"--x",
"-x.",
"-x-",
"-xx",
"x..",
"x.-",
"x.x",
"x-.",
"x--",
"x-x",
"xx.",
"xx-",
"xxx",
]
# Create a reverse dictionary for Morse code
REVERSE_DICT = {value: key for key, value in MORSE_CODE_DICT.items()}
def encode_to_morse(plaintext: str) -> str:
"""Encode a plaintext message into Morse code.
Args:
plaintext: The plaintext message to encode.
Returns:
The Morse code representation of the plaintext message.
Example:
>>> encode_to_morse("defend the east")
'-..x.x..-.x.x-.x-..xx-x....x.xx.x.-x...x-'
"""
return "x".join([MORSE_CODE_DICT.get(letter.upper(), "") for letter in plaintext])
def encrypt_fractionated_morse(plaintext: str, key: str) -> str:
"""Encrypt a plaintext message using Fractionated Morse Cipher.
Args:
plaintext: The plaintext message to encrypt.
key: The encryption key.
Returns:
The encrypted ciphertext.
Example:
>>> encrypt_fractionated_morse("defend the east","Roundtable")
'ESOAVVLJRSSTRX'
"""
morse_code = encode_to_morse(plaintext)
key = key.upper() + string.ascii_uppercase
key = "".join(sorted(set(key), key=key.find))
# Ensure morse_code length is a multiple of 3
padding_length = 3 - (len(morse_code) % 3)
morse_code += "x" * padding_length
fractionated_morse_dict = {v: k for k, v in zip(key, MORSE_COMBINATIONS)}
fractionated_morse_dict["xxx"] = ""
encrypted_text = "".join(
[
fractionated_morse_dict[morse_code[i : i + 3]]
for i in range(0, len(morse_code), 3)
]
)
return encrypted_text
def decrypt_fractionated_morse(ciphertext: str, key: str) -> str:
"""Decrypt a ciphertext message encrypted with Fractionated Morse Cipher.
Args:
ciphertext: The ciphertext message to decrypt.
key: The decryption key.
Returns:
The decrypted plaintext message.
Example:
>>> decrypt_fractionated_morse("ESOAVVLJRSSTRX","Roundtable")
'DEFEND THE EAST'
"""
key = key.upper() + string.ascii_uppercase
key = "".join(sorted(set(key), key=key.find))
inverse_fractionated_morse_dict = dict(zip(key, MORSE_COMBINATIONS))
morse_code = "".join(
[inverse_fractionated_morse_dict.get(letter, "") for letter in ciphertext]
)
decrypted_text = "".join(
[REVERSE_DICT[code] for code in morse_code.split("x")]
).strip()
return decrypted_text
if __name__ == "__main__":
"""
Example usage of Fractionated Morse Cipher.
"""
plaintext = "defend the east"
print("Plain Text:", plaintext)
key = "ROUNDTABLE"
ciphertext = encrypt_fractionated_morse(plaintext, key)
print("Encrypted:", ciphertext)
decrypted_text = decrypt_fractionated_morse(ciphertext, key)
print("Decrypted:", decrypted_text)
================================================
FILE: ciphers/gronsfeld_cipher.py
================================================
from string import ascii_uppercase
def gronsfeld(text: str, key: str) -> str:
"""
Encrypt plaintext with the Gronsfeld cipher
>>> gronsfeld('hello', '412')
'LFNPP'
>>> gronsfeld('hello', '123')
'IGOMQ'
>>> gronsfeld('', '123')
''
>>> gronsfeld('yes, ¥€$ - _!@#%?', '0')
'YES, ¥€$ - _!@#%?'
>>> gronsfeld('yes, ¥€$ - _!@#%?', '01')
'YFS, ¥€$ - _!@#%?'
>>> gronsfeld('yes, ¥€$ - _!@#%?', '012')
'YFU, ¥€$ - _!@#%?'
>>> gronsfeld('yes, ¥€$ - _!@#%?', '')
Traceback (most recent call last):
...
ZeroDivisionError: division by zero
"""
ascii_len = len(ascii_uppercase)
key_len = len(key)
encrypted_text = ""
keys = [int(char) for char in key]
upper_case_text = text.upper()
for i, char in enumerate(upper_case_text):
if char in ascii_uppercase:
new_position = (ascii_uppercase.index(char) + keys[i % key_len]) % ascii_len
shifted_letter = ascii_uppercase[new_position]
encrypted_text += shifted_letter
else:
encrypted_text += char
return encrypted_text
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: ciphers/hill_cipher.py
================================================
"""
Hill Cipher:
The 'HillCipher' class below implements the Hill Cipher algorithm which uses
modern linear algebra techniques to encode and decode text using an encryption
key matrix.
Algorithm:
Let the order of the encryption key be N (as it is a square matrix).
Your text is divided into batches of length N and converted to numerical vectors
by a simple mapping starting with A=0 and so on.
The key is then multiplied with the newly created batch vector to obtain the
encoded vector. After each multiplication modular 36 calculations are performed
on the vectors so as to bring the numbers between 0 and 36 and then mapped with
their corresponding alphanumerics.
While decrypting, the decrypting key is found which is the inverse of the
encrypting key modular 36. The same process is repeated for decrypting to get
the original message back.
Constraints:
The determinant of the encryption key matrix must be relatively prime w.r.t 36.
Note:
This implementation only considers alphanumerics in the text. If the length of
the text to be encrypted is not a multiple of the break key(the length of one
batch of letters), the last character of the text is added to the text until the
length of the text reaches a multiple of the break_key. So the text after
decrypting might be a little different than the original text.
References:
https://apprendre-en-ligne.net/crypto/hill/Hillciph.pdf
https://www.youtube.com/watch?v=kfmNeskzs2o
https://www.youtube.com/watch?v=4RhLNDqcjpA
"""
import string
import numpy as np
from maths.greatest_common_divisor import greatest_common_divisor
class HillCipher:
key_string = string.ascii_uppercase + string.digits
# This cipher takes alphanumerics into account
# i.e. a total of 36 characters
# take x and return x % len(key_string)
modulus = np.vectorize(lambda x: x % 36)
to_int = np.vectorize(round)
def __init__(self, encrypt_key: np.ndarray) -> None:
"""
encrypt_key is an NxN numpy array
"""
self.encrypt_key = self.modulus(encrypt_key) # mod36 calc's on the encrypt key
self.check_determinant() # validate the determinant of the encryption key
self.break_key = encrypt_key.shape[0]
def replace_letters(self, letter: str) -> int:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.replace_letters('T')
19
>>> hill_cipher.replace_letters('0')
26
"""
return self.key_string.index(letter)
def replace_digits(self, num: int) -> str:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.replace_digits(19)
'T'
>>> hill_cipher.replace_digits(26)
'0'
>>> hill_cipher.replace_digits(26.1)
'0'
"""
return self.key_string[int(num)]
def check_determinant(self) -> None:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.check_determinant()
"""
det = round(np.linalg.det(self.encrypt_key))
if det < 0:
det = det % len(self.key_string)
req_l = len(self.key_string)
if greatest_common_divisor(det, len(self.key_string)) != 1:
msg = (
f"determinant modular {req_l} of encryption key({det}) "
f"is not co prime w.r.t {req_l}.\nTry another key."
)
raise ValueError(msg)
def process_text(self, text: str) -> str:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.process_text('Testing Hill Cipher')
'TESTINGHILLCIPHERR'
>>> hill_cipher.process_text('hello')
'HELLOO'
"""
chars = [char for char in text.upper() if char in self.key_string]
last = chars[-1]
while len(chars) % self.break_key != 0:
chars.append(last)
return "".join(chars)
def encrypt(self, text: str) -> str:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.encrypt('testing hill cipher')
'WHXYJOLM9C6XT085LL'
>>> hill_cipher.encrypt('hello')
'85FF00'
"""
text = self.process_text(text.upper())
encrypted = ""
for i in range(0, len(text) - self.break_key + 1, self.break_key):
batch = text[i : i + self.break_key]
vec = [self.replace_letters(char) for char in batch]
batch_vec = np.array([vec]).T
batch_encrypted = self.modulus(self.encrypt_key.dot(batch_vec)).T.tolist()[
0
]
encrypted_batch = "".join(
self.replace_digits(num) for num in batch_encrypted
)
encrypted += encrypted_batch
return encrypted
def make_decrypt_key(self) -> np.ndarray:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.make_decrypt_key()
array([[ 6, 25],
[ 5, 26]])
"""
det = round(np.linalg.det(self.encrypt_key))
if det < 0:
det = det % len(self.key_string)
det_inv = None
for i in range(len(self.key_string)):
if (det * i) % len(self.key_string) == 1:
det_inv = i
break
inv_key = (
det_inv * np.linalg.det(self.encrypt_key) * np.linalg.inv(self.encrypt_key)
)
return self.to_int(self.modulus(inv_key))
def decrypt(self, text: str) -> str:
"""
>>> hill_cipher = HillCipher(np.array([[2, 5], [1, 6]]))
>>> hill_cipher.decrypt('WHXYJOLM9C6XT085LL')
'TESTINGHILLCIPHERR'
>>> hill_cipher.decrypt('85FF00')
'HELLOO'
"""
decrypt_key = self.make_decrypt_key()
text = self.process_text(text.upper())
decrypted = ""
for i in range(0, len(text) - self.break_key + 1, self.break_key):
batch = text[i : i + self.break_key]
vec = [self.replace_letters(char) for char in batch]
batch_vec = np.array([vec]).T
batch_decrypted = self.modulus(decrypt_key.dot(batch_vec)).T.tolist()[0]
decrypted_batch = "".join(
self.replace_digits(num) for num in batch_decrypted
)
decrypted += decrypted_batch
return decrypted
def main() -> None:
n = int(input("Enter the order of the encryption key: "))
hill_matrix = []
print("Enter each row of the encryption key with space separated integers")
for _ in range(n):
row = [int(x) for x in input().split()]
hill_matrix.append(row)
hc = HillCipher(np.array(hill_matrix))
print("Would you like to encrypt or decrypt some text? (1 or 2)")
option = input("\n1. Encrypt\n2. Decrypt\n")
if option == "1":
text_e = input("What text would you like to encrypt?: ")
print("Your encrypted text is:")
print(hc.encrypt(text_e))
elif option == "2":
text_d = input("What text would you like to decrypt?: ")
print("Your decrypted text is:")
print(hc.decrypt(text_d))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/mixed_keyword_cypher.py
================================================
from string import ascii_uppercase
def mixed_keyword(
keyword: str, plaintext: str, verbose: bool = False, alphabet: str = ascii_uppercase
) -> str:
"""
For keyword: hello
H E L O
A B C D
F G I J
K M N P
Q R S T
U V W X
Y Z
and map vertically
>>> mixed_keyword("college", "UNIVERSITY", True) # doctest: +NORMALIZE_WHITESPACE
{'A': 'C', 'B': 'A', 'C': 'I', 'D': 'P', 'E': 'U', 'F': 'Z', 'G': 'O', 'H': 'B',
'I': 'J', 'J': 'Q', 'K': 'V', 'L': 'L', 'M': 'D', 'N': 'K', 'O': 'R', 'P': 'W',
'Q': 'E', 'R': 'F', 'S': 'M', 'T': 'S', 'U': 'X', 'V': 'G', 'W': 'H', 'X': 'N',
'Y': 'T', 'Z': 'Y'}
'XKJGUFMJST'
>>> mixed_keyword("college", "UNIVERSITY", False) # doctest: +NORMALIZE_WHITESPACE
'XKJGUFMJST'
"""
keyword = keyword.upper()
plaintext = plaintext.upper()
alphabet_set = set(alphabet)
# create a list of unique characters in the keyword - their order matters
# it determines how we will map plaintext characters to the ciphertext
unique_chars = []
for char in keyword:
if char in alphabet_set and char not in unique_chars:
unique_chars.append(char)
# the number of those unique characters will determine the number of rows
num_unique_chars_in_keyword = len(unique_chars)
# create a shifted version of the alphabet
shifted_alphabet = unique_chars + [
char for char in alphabet if char not in unique_chars
]
# create a modified alphabet by splitting the shifted alphabet into rows
modified_alphabet = [
shifted_alphabet[k : k + num_unique_chars_in_keyword]
for k in range(0, 26, num_unique_chars_in_keyword)
]
# map the alphabet characters to the modified alphabet characters
# going 'vertically' through the modified alphabet - consider columns first
mapping = {}
letter_index = 0
for column in range(num_unique_chars_in_keyword):
for row in modified_alphabet:
# if current row (the last one) is too short, break out of loop
if len(row) <= column:
break
# map current letter to letter in modified alphabet
mapping[alphabet[letter_index]] = row[column]
letter_index += 1
if verbose:
print(mapping)
# create the encrypted text by mapping the plaintext to the modified alphabet
return "".join(mapping.get(char, char) for char in plaintext)
if __name__ == "__main__":
# example use
print(mixed_keyword("college", "UNIVERSITY"))
================================================
FILE: ciphers/mono_alphabetic_ciphers.py
================================================
from typing import Literal
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def translate_message(
key: str, message: str, mode: Literal["encrypt", "decrypt"]
) -> str:
"""
>>> translate_message("QWERTYUIOPASDFGHJKLZXCVBNM","Hello World","encrypt")
'Pcssi Bidsm'
"""
chars_a = LETTERS if mode == "decrypt" else key
chars_b = key if mode == "decrypt" else LETTERS
translated = ""
# loop through each symbol in the message
for symbol in message:
if symbol.upper() in chars_a:
# encrypt/decrypt the symbol
sym_index = chars_a.find(symbol.upper())
if symbol.isupper():
translated += chars_b[sym_index].upper()
else:
translated += chars_b[sym_index].lower()
else:
# symbol is not in LETTERS, just add it
translated += symbol
return translated
def encrypt_message(key: str, message: str) -> str:
"""
>>> encrypt_message("QWERTYUIOPASDFGHJKLZXCVBNM", "Hello World")
'Pcssi Bidsm'
"""
return translate_message(key, message, "encrypt")
def decrypt_message(key: str, message: str) -> str:
"""
>>> decrypt_message("QWERTYUIOPASDFGHJKLZXCVBNM", "Hello World")
'Itssg Vgksr'
"""
return translate_message(key, message, "decrypt")
def main() -> None:
message = "Hello World"
key = "QWERTYUIOPASDFGHJKLZXCVBNM"
mode = "decrypt" # set to 'encrypt' or 'decrypt'
if mode == "encrypt":
translated = encrypt_message(key, message)
elif mode == "decrypt":
translated = decrypt_message(key, message)
print(f"Using the key {key}, the {mode}ed message is: {translated}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/morse_code.py
================================================
#!/usr/bin/env python3
"""
Python program to translate to and from Morse code.
https://en.wikipedia.org/wiki/Morse_code
"""
# fmt: off
MORSE_CODE_DICT = {
"A": ".-", "B": "-...", "C": "-.-.", "D": "-..", "E": ".", "F": "..-.", "G": "--.",
"H": "....", "I": "..", "J": ".---", "K": "-.-", "L": ".-..", "M": "--", "N": "-.",
"O": "---", "P": ".--.", "Q": "--.-", "R": ".-.", "S": "...", "T": "-", "U": "..-",
"V": "...-", "W": ".--", "X": "-..-", "Y": "-.--", "Z": "--..", "1": ".----",
"2": "..---", "3": "...--", "4": "....-", "5": ".....", "6": "-....", "7": "--...",
"8": "---..", "9": "----.", "0": "-----", "&": ".-...", "@": ".--.-.",
":": "---...", ",": "--..--", ".": ".-.-.-", "'": ".----.", '"': ".-..-.",
"?": "..--..", "/": "-..-.", "=": "-...-", "+": ".-.-.", "-": "-....-",
"(": "-.--.", ")": "-.--.-", "!": "-.-.--", " ": "/"
} # Exclamation mark is not in ITU-R recommendation
# fmt: on
REVERSE_DICT = {value: key for key, value in MORSE_CODE_DICT.items()}
def encrypt(message: str) -> str:
"""
>>> encrypt("Sos!")
'... --- ... -.-.--'
>>> encrypt("SOS!") == encrypt("sos!")
True
"""
return " ".join(MORSE_CODE_DICT[char] for char in message.upper())
def decrypt(message: str) -> str:
"""
>>> decrypt('... --- ... -.-.--')
'SOS!'
"""
return "".join(REVERSE_DICT[char] for char in message.split())
def main() -> None:
"""
>>> s = "".join(MORSE_CODE_DICT)
>>> decrypt(encrypt(s)) == s
True
"""
message = "Morse code here!"
print(message)
message = encrypt(message)
print(message)
message = decrypt(message)
print(message)
if __name__ == "__main__":
main()
================================================
FILE: ciphers/onepad_cipher.py
================================================
import random
class Onepad:
@staticmethod
def encrypt(text: str) -> tuple[list[int], list[int]]:
"""
Function to encrypt text using pseudo-random numbers
>>> Onepad().encrypt("")
([], [])
>>> Onepad().encrypt([])
([], [])
>>> random.seed(1)
>>> Onepad().encrypt(" ")
([6969], [69])
>>> random.seed(1)
>>> Onepad().encrypt("Hello")
([9729, 114756, 4653, 31309, 10492], [69, 292, 33, 131, 61])
>>> Onepad().encrypt(1)
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
>>> Onepad().encrypt(1.1)
Traceback (most recent call last):
...
TypeError: 'float' object is not iterable
"""
plain = [ord(i) for i in text]
key = []
cipher = []
for i in plain:
k = random.randint(1, 300)
c = (i + k) * k
cipher.append(c)
key.append(k)
return cipher, key
@staticmethod
def decrypt(cipher: list[int], key: list[int]) -> str:
"""
Function to decrypt text using pseudo-random numbers.
>>> Onepad().decrypt([], [])
''
>>> Onepad().decrypt([35], [])
''
>>> Onepad().decrypt([], [35])
Traceback (most recent call last):
...
IndexError: list index out of range
>>> random.seed(1)
>>> Onepad().decrypt([9729, 114756, 4653, 31309, 10492], [69, 292, 33, 131, 61])
'Hello'
"""
plain = []
for i in range(len(key)):
p = int((cipher[i] - (key[i]) ** 2) / key[i])
plain.append(chr(p))
return "".join(plain)
if __name__ == "__main__":
c, k = Onepad().encrypt("Hello")
print(c, k)
print(Onepad().decrypt(c, k))
================================================
FILE: ciphers/permutation_cipher.py
================================================
"""
The permutation cipher, also called the transposition cipher, is a simple encryption
technique that rearranges the characters in a message based on a secret key. It
divides the message into blocks and applies a permutation to the characters within
each block according to the key. The key is a sequence of unique integers that
determine the order of character rearrangement.
For more info: https://www.nku.edu/~christensen/1402%20permutation%20ciphers.pdf
"""
import random
def generate_valid_block_size(message_length: int) -> int:
"""
Generate a valid block size that is a factor of the message length.
Args:
message_length (int): The length of the message.
Returns:
int: A valid block size.
Example:
>>> random.seed(1)
>>> generate_valid_block_size(12)
3
"""
block_sizes = [
block_size
for block_size in range(2, message_length + 1)
if message_length % block_size == 0
]
return random.choice(block_sizes)
def generate_permutation_key(block_size: int) -> list[int]:
"""
Generate a random permutation key of a specified block size.
Args:
block_size (int): The size of each permutation block.
Returns:
list[int]: A list containing a random permutation of digits.
Example:
>>> random.seed(0)
>>> generate_permutation_key(4)
[2, 0, 1, 3]
"""
digits = list(range(block_size))
random.shuffle(digits)
return digits
def encrypt(
message: str, key: list[int] | None = None, block_size: int | None = None
) -> tuple[str, list[int]]:
"""
Encrypt a message using a permutation cipher with block rearrangement using a key.
Args:
message (str): The plaintext message to be encrypted.
key (list[int]): The permutation key for decryption.
block_size (int): The size of each permutation block.
Returns:
tuple: A tuple containing the encrypted message and the encryption key.
Example:
>>> encrypted_message, key = encrypt("HELLO WORLD")
>>> decrypted_message = decrypt(encrypted_message, key)
>>> decrypted_message
'HELLO WORLD'
"""
message = message.upper()
message_length = len(message)
if key is None or block_size is None:
block_size = generate_valid_block_size(message_length)
key = generate_permutation_key(block_size)
encrypted_message = ""
for i in range(0, message_length, block_size):
block = message[i : i + block_size]
rearranged_block = [block[digit] for digit in key]
encrypted_message += "".join(rearranged_block)
return encrypted_message, key
def decrypt(encrypted_message: str, key: list[int]) -> str:
"""
Decrypt an encrypted message using a permutation cipher with block rearrangement.
Args:
encrypted_message (str): The encrypted message.
key (list[int]): The permutation key for decryption.
Returns:
str: The decrypted plaintext message.
Example:
>>> encrypted_message, key = encrypt("HELLO WORLD")
>>> decrypted_message = decrypt(encrypted_message, key)
>>> decrypted_message
'HELLO WORLD'
"""
key_length = len(key)
decrypted_message = ""
for i in range(0, len(encrypted_message), key_length):
block = encrypted_message[i : i + key_length]
original_block = [""] * key_length
for j, digit in enumerate(key):
original_block[digit] = block[j]
decrypted_message += "".join(original_block)
return decrypted_message
def main() -> None:
"""
Driver function to pass message to get encrypted, then decrypted.
Example:
>>> main()
Decrypted message: HELLO WORLD
"""
message = "HELLO WORLD"
encrypted_message, key = encrypt(message)
decrypted_message = decrypt(encrypted_message, key)
print(f"Decrypted message: {decrypted_message}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/playfair_cipher.py
================================================
"""
https://en.wikipedia.org/wiki/Playfair_cipher#Description
The Playfair cipher was developed by Charles Wheatstone in 1854
It's use was heavily promotedby Lord Playfair, hence its name
Some features of the Playfair cipher are:
1) It was the first literal diagram substitution cipher
2) It is a manual symmetric encryption technique
3) It is a multiple letter encryption cipher
The implementation in the code below encodes alphabets only.
It removes spaces, special characters and numbers from the
code.
Playfair is no longer used by military forces because of known
insecurities and of the advent of automated encryption devices.
This cipher is regarded as insecure since before World War I.
"""
import itertools
import string
from collections.abc import Generator, Iterable
def chunker(seq: Iterable[str], size: int) -> Generator[tuple[str, ...]]:
it = iter(seq)
while True:
chunk = tuple(itertools.islice(it, size))
if not chunk:
return
yield chunk
def prepare_input(dirty: str) -> str:
"""
Prepare the plaintext by up-casing it
and separating repeated letters with X's
"""
dirty = "".join([c.upper() for c in dirty if c in string.ascii_letters])
clean = ""
if len(dirty) < 2:
return dirty
for i in range(len(dirty) - 1):
clean += dirty[i]
if dirty[i] == dirty[i + 1]:
clean += "X"
clean += dirty[-1]
if len(clean) & 1:
clean += "X"
return clean
def generate_table(key: str) -> list[str]:
# I and J are used interchangeably to allow
# us to use a 5x5 table (25 letters)
alphabet = "ABCDEFGHIKLMNOPQRSTUVWXYZ"
# we're using a list instead of a '2d' array because it makes the math
# for setting up the table and doing the actual encoding/decoding simpler
table = []
# copy key chars into the table if they are in `alphabet` ignoring duplicates
for char in key.upper():
if char not in table and char in alphabet:
table.append(char)
# fill the rest of the table in with the remaining alphabet chars
for char in alphabet:
if char not in table:
table.append(char)
return table
def encode(plaintext: str, key: str) -> str:
"""
Encode the given plaintext using the Playfair cipher.
Takes the plaintext and the key as input and returns the encoded string.
>>> encode("Hello", "MONARCHY")
'CFSUPM'
>>> encode("attack on the left flank", "EMERGENCY")
'DQZSBYFSDZFMFNLOHFDRSG'
>>> encode("Sorry!", "SPECIAL")
'AVXETX'
>>> encode("Number 1", "NUMBER")
'UMBENF'
>>> encode("Photosynthesis!", "THE SUN")
'OEMHQHVCHESUKE'
"""
table = generate_table(key)
plaintext = prepare_input(plaintext)
ciphertext = ""
for char1, char2 in chunker(plaintext, 2):
row1, col1 = divmod(table.index(char1), 5)
row2, col2 = divmod(table.index(char2), 5)
if row1 == row2:
ciphertext += table[row1 * 5 + (col1 + 1) % 5]
ciphertext += table[row2 * 5 + (col2 + 1) % 5]
elif col1 == col2:
ciphertext += table[((row1 + 1) % 5) * 5 + col1]
ciphertext += table[((row2 + 1) % 5) * 5 + col2]
else: # rectangle
ciphertext += table[row1 * 5 + col2]
ciphertext += table[row2 * 5 + col1]
return ciphertext
def decode(ciphertext: str, key: str) -> str:
"""
Decode the input string using the provided key.
>>> decode("BMZFAZRZDH", "HAZARD")
'FIREHAZARD'
>>> decode("HNBWBPQT", "AUTOMOBILE")
'DRIVINGX'
>>> decode("SLYSSAQS", "CASTLE")
'ATXTACKX'
"""
table = generate_table(key)
plaintext = ""
for char1, char2 in chunker(ciphertext, 2):
row1, col1 = divmod(table.index(char1), 5)
row2, col2 = divmod(table.index(char2), 5)
if row1 == row2:
plaintext += table[row1 * 5 + (col1 - 1) % 5]
plaintext += table[row2 * 5 + (col2 - 1) % 5]
elif col1 == col2:
plaintext += table[((row1 - 1) % 5) * 5 + col1]
plaintext += table[((row2 - 1) % 5) * 5 + col2]
else: # rectangle
plaintext += table[row1 * 5 + col2]
plaintext += table[row2 * 5 + col1]
return plaintext
if __name__ == "__main__":
import doctest
doctest.testmod()
print("Encoded:", encode("BYE AND THANKS", "GREETING"))
print("Decoded:", decode("CXRBANRLBALQ", "GREETING"))
================================================
FILE: ciphers/polybius.py
================================================
#!/usr/bin/env python3
"""
A Polybius Square is a table that allows someone to translate letters into numbers.
https://www.braingle.com/brainteasers/codes/polybius.php
"""
import numpy as np
SQUARE = [
["a", "b", "c", "d", "e"],
["f", "g", "h", "i", "k"],
["l", "m", "n", "o", "p"],
["q", "r", "s", "t", "u"],
["v", "w", "x", "y", "z"],
]
class PolybiusCipher:
def __init__(self) -> None:
self.SQUARE = np.array(SQUARE)
def letter_to_numbers(self, letter: str) -> np.ndarray:
"""
Return the pair of numbers that represents the given letter in the
polybius square
>>> np.array_equal(PolybiusCipher().letter_to_numbers('a'), [1,1])
True
>>> np.array_equal(PolybiusCipher().letter_to_numbers('u'), [4,5])
True
"""
index1, index2 = np.where(letter == self.SQUARE)
indexes = np.concatenate([index1 + 1, index2 + 1])
return indexes
def numbers_to_letter(self, index1: int, index2: int) -> str:
"""
Return the letter corresponding to the position [index1, index2] in
the polybius square
>>> PolybiusCipher().numbers_to_letter(4, 5) == "u"
True
>>> PolybiusCipher().numbers_to_letter(1, 1) == "a"
True
"""
return self.SQUARE[index1 - 1, index2 - 1]
def encode(self, message: str) -> str:
"""
Return the encoded version of message according to the polybius cipher
>>> PolybiusCipher().encode("test message") == "44154344 32154343112215"
True
>>> PolybiusCipher().encode("Test Message") == "44154344 32154343112215"
True
"""
message = message.lower()
message = message.replace("j", "i")
encoded_message = ""
for letter_index in range(len(message)):
if message[letter_index] != " ":
numbers = self.letter_to_numbers(message[letter_index])
encoded_message = encoded_message + str(numbers[0]) + str(numbers[1])
elif message[letter_index] == " ":
encoded_message = encoded_message + " "
return encoded_message
def decode(self, message: str) -> str:
"""
Return the decoded version of message according to the polybius cipher
>>> PolybiusCipher().decode("44154344 32154343112215") == "test message"
True
>>> PolybiusCipher().decode("4415434432154343112215") == "testmessage"
True
"""
message = message.replace(" ", " ")
decoded_message = ""
for numbers_index in range(int(len(message) / 2)):
if message[numbers_index * 2] != " ":
index1 = message[numbers_index * 2]
index2 = message[numbers_index * 2 + 1]
letter = self.numbers_to_letter(int(index1), int(index2))
decoded_message = decoded_message + letter
elif message[numbers_index * 2] == " ":
decoded_message = decoded_message + " "
return decoded_message
================================================
FILE: ciphers/porta_cipher.py
================================================
alphabet = {
"A": ("ABCDEFGHIJKLM", "NOPQRSTUVWXYZ"),
"B": ("ABCDEFGHIJKLM", "NOPQRSTUVWXYZ"),
"C": ("ABCDEFGHIJKLM", "ZNOPQRSTUVWXY"),
"D": ("ABCDEFGHIJKLM", "ZNOPQRSTUVWXY"),
"E": ("ABCDEFGHIJKLM", "YZNOPQRSTUVWX"),
"F": ("ABCDEFGHIJKLM", "YZNOPQRSTUVWX"),
"G": ("ABCDEFGHIJKLM", "XYZNOPQRSTUVW"),
"H": ("ABCDEFGHIJKLM", "XYZNOPQRSTUVW"),
"I": ("ABCDEFGHIJKLM", "WXYZNOPQRSTUV"),
"J": ("ABCDEFGHIJKLM", "WXYZNOPQRSTUV"),
"K": ("ABCDEFGHIJKLM", "VWXYZNOPQRSTU"),
"L": ("ABCDEFGHIJKLM", "VWXYZNOPQRSTU"),
"M": ("ABCDEFGHIJKLM", "UVWXYZNOPQRST"),
"N": ("ABCDEFGHIJKLM", "UVWXYZNOPQRST"),
"O": ("ABCDEFGHIJKLM", "TUVWXYZNOPQRS"),
"P": ("ABCDEFGHIJKLM", "TUVWXYZNOPQRS"),
"Q": ("ABCDEFGHIJKLM", "STUVWXYZNOPQR"),
"R": ("ABCDEFGHIJKLM", "STUVWXYZNOPQR"),
"S": ("ABCDEFGHIJKLM", "RSTUVWXYZNOPQ"),
"T": ("ABCDEFGHIJKLM", "RSTUVWXYZNOPQ"),
"U": ("ABCDEFGHIJKLM", "QRSTUVWXYZNOP"),
"V": ("ABCDEFGHIJKLM", "QRSTUVWXYZNOP"),
"W": ("ABCDEFGHIJKLM", "PQRSTUVWXYZNO"),
"X": ("ABCDEFGHIJKLM", "PQRSTUVWXYZNO"),
"Y": ("ABCDEFGHIJKLM", "OPQRSTUVWXYZN"),
"Z": ("ABCDEFGHIJKLM", "OPQRSTUVWXYZN"),
}
def generate_table(key: str) -> list[tuple[str, str]]:
"""
>>> generate_table('marvin') # doctest: +NORMALIZE_WHITESPACE
[('ABCDEFGHIJKLM', 'UVWXYZNOPQRST'), ('ABCDEFGHIJKLM', 'NOPQRSTUVWXYZ'),
('ABCDEFGHIJKLM', 'STUVWXYZNOPQR'), ('ABCDEFGHIJKLM', 'QRSTUVWXYZNOP'),
('ABCDEFGHIJKLM', 'WXYZNOPQRSTUV'), ('ABCDEFGHIJKLM', 'UVWXYZNOPQRST')]
"""
return [alphabet[char] for char in key.upper()]
def encrypt(key: str, words: str) -> str:
"""
>>> encrypt('marvin', 'jessica')
'QRACRWU'
"""
cipher = ""
count = 0
table = generate_table(key)
for char in words.upper():
cipher += get_opponent(table[count], char)
count = (count + 1) % len(table)
return cipher
def decrypt(key: str, words: str) -> str:
"""
>>> decrypt('marvin', 'QRACRWU')
'JESSICA'
"""
return encrypt(key, words)
def get_position(table: tuple[str, str], char: str) -> tuple[int, int]:
"""
>>> get_position(generate_table('marvin')[0], 'M')
(0, 12)
"""
# `char` is either in the 0th row or the 1st row
row = 0 if char in table[0] else 1
col = table[row].index(char)
return row, col
def get_opponent(table: tuple[str, str], char: str) -> str:
"""
>>> get_opponent(generate_table('marvin')[0], 'M')
'T'
"""
row, col = get_position(table, char.upper())
if row == 1:
return table[0][col]
else:
return table[1][col] if row == 0 else char
if __name__ == "__main__":
import doctest
doctest.testmod() # Fist ensure that all our tests are passing...
"""
Demo:
Enter key: marvin
Enter text to encrypt: jessica
Encrypted: QRACRWU
Decrypted with key: JESSICA
"""
key = input("Enter key: ").strip()
text = input("Enter text to encrypt: ").strip()
cipher_text = encrypt(key, text)
print(f"Encrypted: {cipher_text}")
print(f"Decrypted with key: {decrypt(key, cipher_text)}")
================================================
FILE: ciphers/prehistoric_men.txt
================================================
The Project Gutenberg eBook, Prehistoric Men, by Robert J. (Robert John)
Braidwood, Illustrated by Susan T. Richert
This eBook is for the use of anyone anywhere in the United States and most
other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms of
the Project Gutenberg License included with this eBook or online at
www.gutenberg.org. If you are not located in the United States, you'll have
to check the laws of the country where you are located before using this ebook.
Title: Prehistoric Men
Author: Robert J. (Robert John) Braidwood
Release Date: July 28, 2016 [eBook #52664]
Language: English
Character set encoding: UTF-8
***START OF THE PROJECT GUTENBERG EBOOK PREHISTORIC MEN***
E-text prepared by Stephen Hutcheson, Dave Morgan, Charlie Howard, and the
Online Distributed Proofreading Team (http://www.pgdp.net)
Note: Project Gutenberg also has an HTML version of this
file which includes the original illustrations.
See 52664-h.htm or 52664-h.zip:
(http://www.gutenberg.org/files/52664/52664-h/52664-h.htm)
or
(http://www.gutenberg.org/files/52664/52664-h.zip)
Transcriber's note:
Some characters might not display in this UTF-8 text
version. If so, the reader should consult the HTML
version referred to above. One example of this might
occur in the second paragraph under "Choppers and
Adze-like Tools", page 46, which contains the phrase
�an adze cutting edge is ? shaped�. The symbol before
�shaped� looks like a sharply-italicized sans-serif �L�.
Devices that cannot display that symbol may substitute
a question mark, a square, or other symbol.
PREHISTORIC MEN
by
ROBERT J. BRAIDWOOD
Research Associate, Old World Prehistory
Professor
Oriental Institute and Department of Anthropology
University of Chicago
Drawings by Susan T. Richert
[Illustration]
Chicago Natural History Museum
Popular Series
Anthropology, Number 37
Third Edition Issued in Co-operation with
The Oriental Institute, The University of Chicago
Edited by Lillian A. Ross
Printed in the United States of America
by Chicago Natural History Museum Press
Copyright 1948, 1951, and 1957 by Chicago Natural History Museum
First edition 1948
Second edition 1951
Third edition 1957
Fourth edition 1959
Preface
[Illustration]
Like the writing of most professional archeologists, mine has been
confined to so-called learned papers. Good, bad, or indifferent, these
papers were in a jargon that only my colleagues and a few advanced
students could understand. Hence, when I was asked to do this little
book, I soon found it extremely difficult to say what I meant in simple
fashion. The style is new to me, but I hope the reader will not find it
forced or pedantic; at least I have done my very best to tell the story
simply and clearly.
Many friends have aided in the preparation of the book. The whimsical
charm of Miss Susan Richert�s illustrations add enormously to the
spirit I wanted. She gave freely of her own time on the drawings and
in planning the book with me. My colleagues at the University of
Chicago, especially Professor Wilton M. Krogman (now of the University
of Pennsylvania), and also Mrs. Linda Braidwood, Associate of the
Oriental Institute, and Professors Fay-Cooper Cole and Sol Tax, of
the Department of Anthropology, gave me counsel in matters bearing on
their special fields, and the Department of Anthropology bore some of
the expense of the illustrations. From Mrs. Irma Hunter and Mr. Arnold
Maremont, who are not archeologists at all and have only an intelligent
layman�s notion of archeology, I had sound advice on how best to tell
the story. I am deeply indebted to all these friends.
While I was preparing the second edition, I had the great fortune
to be able to rework the third chapter with Professor Sherwood L.
Washburn, now of the Department of Anthropology of the University of
California, and the fourth, fifth, and sixth chapters with Professor
Hallum L. Movius, Jr., of the Peabody Museum, Harvard University. The
book has gained greatly in accuracy thereby. In matters of dating,
Professor Movius and the indications of Professor W. F. Libby�s Carbon
14 chronology project have both encouraged me to choose the lowest
dates now current for the events of the Pleistocene Ice Age. There is
still no certain way of fixing a direct chronology for most of the
Pleistocene, but Professor Libby�s method appears very promising for
its end range and for proto-historic dates. In any case, this book
names �periods,� and new dates may be written in against mine, if new
and better dating systems appear.
I wish to thank Dr. Clifford C. Gregg, Director of Chicago Natural
History Museum, for the opportunity to publish this book. My old
friend, Dr. Paul S. Martin, Chief Curator in the Department of
Anthropology, asked me to undertake the job and inspired me to complete
it. I am also indebted to Miss Lillian A. Ross, Associate Editor of
Scientific Publications, and to Mr. George I. Quimby, Curator of
Exhibits in Anthropology, for all the time they have given me in
getting the manuscript into proper shape.
ROBERT J. BRAIDWOOD
_June 15, 1950_
Preface to the Third Edition
In preparing the enlarged third edition, many of the above mentioned
friends have again helped me. I have picked the brains of Professor F.
Clark Howell of the Department of Anthropology of the University of
Chicago in reworking the earlier chapters, and he was very patient in
the matter, which I sincerely appreciate.
All of Mrs. Susan Richert Allen�s original drawings appear, but a few
necessary corrections have been made in some of the charts and some new
drawings have been added by Mr. John Pfiffner, Staff Artist, Chicago
Natural History Museum.
ROBERT J. BRAIDWOOD
_March 1, 1959_
Contents
PAGE
How We Learn about Prehistoric Men 7
The Changing World in Which Prehistoric Men Lived 17
Prehistoric Men Themselves 22
Cultural Beginnings 38
More Evidence of Culture 56
Early Moderns 70
End and Prelude 92
The First Revolution 121
The Conquest of Civilization 144
End of Prehistory 162
Summary 176
List of Books 180
Index 184
HOW WE LEARN about Prehistoric Men
[Illustration]
Prehistory means the time before written history began. Actually, more
than 99 per cent of man�s story is prehistory. Man is at least half a
million years old, but he did not begin to write history (or to write
anything) until about 5,000 years ago.
The men who lived in prehistoric times left us no history books, but
they did unintentionally leave a record of their presence and their way
of life. This record is studied and interpreted by different kinds of
scientists.
SCIENTISTS WHO FIND OUT ABOUT PREHISTORIC MEN
The scientists who study the bones and teeth and any other parts
they find of the bodies of prehistoric men, are called _physical
anthropologists_. Physical anthropologists are trained, much like
doctors, to know all about the human body. They study living people,
too; they know more about the biological facts of human �races� than
anybody else. If the police find a badly decayed body in a trunk,
they ask a physical anthropologist to tell them what the person
originally looked like. The physical anthropologists who specialize in
prehistoric men work with fossils, so they are sometimes called _human
paleontologists_.
ARCHEOLOGISTS
There is a kind of scientist who studies the things that prehistoric
men made and did. Such a scientist is called an _archeologist_. It is
the archeologist�s business to look for the stone and metal tools, the
pottery, the graves, and the caves or huts of the men who lived before
history began.
But there is more to archeology than just looking for things. In
Professor V. Gordon Childe�s words, archeology �furnishes a sort of
history of human activity, provided always that the actions have
produced concrete results and left recognizable material traces.� You
will see that there are at least three points in what Childe says:
1. The archeologists have to find the traces of things left behind by
ancient man, and
2. Only a few objects may be found, for most of these were probably
too soft or too breakable to last through the years. However,
3. The archeologist must use whatever he can find to tell a story--to
make a �sort of history�--from the objects and living-places and
graves that have escaped destruction.
What I mean is this: Let us say you are walking through a dump yard,
and you find a rusty old spark plug. If you want to think about what
the spark plug means, you quickly remember that it is a part of an
automobile motor. This tells you something about the man who threw
the spark plug on the dump. He either had an automobile, or he knew
or lived near someone who did. He can�t have lived so very long ago,
you�ll remember, because spark plugs and automobiles are only about
sixty years old.
When you think about the old spark plug in this way you have
just been making the beginnings of what we call an archeological
_interpretation_; you have been making the spark plug tell a story.
It is the same way with the man-made things we archeologists find
and put in museums. Usually, only a few of these objects are pretty
to look at; but each of them has some sort of story to tell. Making
the interpretation of his finds is the most important part of the
archeologist�s job. It is the way he gets at the �sort of history of
human activity� which is expected of archeology.
SOME OTHER SCIENTISTS
There are many other scientists who help the archeologist and the
physical anthropologist find out about prehistoric men. The geologists
help us tell the age of the rocks or caves or gravel beds in which
human bones or man-made objects are found. There are other scientists
with names which all begin with �paleo� (the Greek word for �old�). The
_paleontologists_ study fossil animals. There are also, for example,
such scientists as _paleobotanists_ and _paleoclimatologists_, who
study ancient plants and climates. These scientists help us to know
the kinds of animals and plants that were living in prehistoric times
and so could be used for food by ancient man; what the weather was
like; and whether there were glaciers. Also, when I tell you that
prehistoric men did not appear until long after the great dinosaurs had
disappeared, I go on the say-so of the paleontologists. They know that
fossils of men and of dinosaurs are not found in the same geological
period. The dinosaur fossils come in early periods, the fossils of men
much later.
Since World War II even the atomic scientists have been helping the
archeologists. By testing the amount of radioactivity left in charcoal,
wood, or other vegetable matter obtained from archeological sites, they
have been able to date the sites. Shell has been used also, and even
the hair of Egyptian mummies. The dates of geological and climatic
events have also been discovered. Some of this work has been done from
drillings taken from the bottom of the sea.
This dating by radioactivity has considerably shortened the dates which
the archeologists used to give. If you find that some of the dates
I give here are more recent than the dates you see in other books
on prehistory, it is because I am using one of the new lower dating
systems.
[Illustration: RADIOCARBON CHART
The rate of disappearance of radioactivity as time passes.[1]]
[1] It is important that the limitations of the radioactive carbon
�dating� system be held in mind. As the statistics involved in
the system are used, there are two chances in three that the
�date� of the sample falls within the range given as plus or
minus an added number of years. For example, the �date� for the
Jarmo village (see chart), given as 6750 � 200 B.C., really
means that there are only two chances in three that the real
date of the charcoal sampled fell between 6950 and 6550 B.C.
We have also begun to suspect that there are ways in which the
samples themselves may have become �contaminated,� either on
the early or on the late side. We now tend to be suspicious of
single radioactive carbon determinations, or of determinations
from one site alone. But as a fabric of consistent
determinations for several or more sites of one archeological
period, we gain confidence in the dates.
HOW THE SCIENTISTS FIND OUT
So far, this chapter has been mainly about the people who find out
about prehistoric men. We also need a word about _how_ they find out.
All our finds came by accident until about a hundred years ago. Men
digging wells, or digging in caves for fertilizer, often turned up
ancient swords or pots or stone arrowheads. People also found some odd
pieces of stone that didn�t look like natural forms, but they also
didn�t look like any known tool. As a result, the people who found them
gave them queer names; for example, �thunderbolts.� The people thought
the strange stones came to earth as bolts of lightning. We know now
that these strange stones were prehistoric stone tools.
Many important finds still come to us by accident. In 1935, a British
dentist, A. T. Marston, found the first of two fragments of a very
important fossil human skull, in a gravel pit at Swanscombe, on the
River Thames, England. He had to wait nine months, until the face of
the gravel pit had been dug eight yards farther back, before the second
fragment appeared. They fitted! Then, twenty years later, still another
piece appeared. In 1928 workmen who were blasting out rock for the
breakwater in the port of Haifa began to notice flint tools. Thus the
story of cave men on Mount Carmel, in Palestine, began to be known.
Planned archeological digging is only about a century old. Even before
this, however, a few men realized the significance of objects they dug
from the ground; one of these early archeologists was our own Thomas
Jefferson. The first real mound-digger was a German grocer�s clerk,
Heinrich Schliemann. Schliemann made a fortune as a merchant, first
in Europe and then in the California gold-rush of 1849. He became an
American citizen. Then he retired and had both money and time to test
an old idea of his. He believed that the heroes of ancient Troy and
Mycenae were once real Trojans and Greeks. He proved it by going to
Turkey and Greece and digging up the remains of both cities.
Schliemann had the great good fortune to find rich and spectacular
treasures, and he also had the common sense to keep notes and make
descriptions of what he found. He proved beyond doubt that many ancient
city mounds can be _stratified_. This means that there may be the
remains of many towns in a mound, one above another, like layers in a
cake.
You might like to have an idea of how mounds come to be in layers.
The original settlers may have chosen the spot because it had a good
spring and there were good fertile lands nearby, or perhaps because
it was close to some road or river or harbor. These settlers probably
built their town of stone and mud-brick. Finally, something would have
happened to the town--a flood, or a burning, or a raid by enemies--and
the walls of the houses would have fallen in or would have melted down
as mud in the rain. Nothing would have remained but the mud and debris
of a low mound of _one_ layer.
The second settlers would have wanted the spot for the same reasons
the first settlers did--good water, land, and roads. Also, the second
settlers would have found a nice low mound to build their houses on,
a protection from floods. But again, something would finally have
happened to the second town, and the walls of _its_ houses would have
come tumbling down. This makes the _second_ layer. And so on....
In Syria I once had the good fortune to dig on a large mound that had
no less than fifteen layers. Also, most of the layers were thick, and
there were signs of rebuilding and repairs within each layer. The mound
was more than a hundred feet high. In each layer, the building material
used had been a soft, unbaked mud-brick, and most of the debris
consisted of fallen or rain-melted mud from these mud-bricks.
This idea of _stratification_, like the cake layers, was already a
familiar one to the geologists by Schliemann�s time. They could show
that their lowest layer of rock was oldest or earliest, and that the
overlying layers became more recent as one moved upward. Schliemann�s
digging proved the same thing at Troy. His first (lowest and earliest)
city had at least nine layers above it; he thought that the second
layer contained the remains of Homer�s Troy. We now know that Homeric
Troy was layer VIIa from the bottom; also, we count eleven layers or
sub-layers in total.
Schliemann�s work marks the beginnings of modern archeology. Scholars
soon set out to dig on ancient sites, from Egypt to Central America.
ARCHEOLOGICAL INFORMATION
As time went on, the study of archeological materials--found either
by accident or by digging on purpose--began to show certain things.
Archeologists began to get ideas as to the kinds of objects that
belonged together. If you compared a mail-order catalogue of 1890 with
one of today, you would see a lot of differences. If you really studied
the two catalogues hard, you would also begin to see that certain
objects �go together.� Horseshoes and metal buggy tires and pieces of
harness would begin to fit into a picture with certain kinds of coal
stoves and furniture and china dishes and kerosene lamps. Our friend
the spark plug, and radios and electric refrigerators and light bulbs
would fit into a picture with different kinds of furniture and dishes
and tools. You won�t be old enough to remember the kind of hats that
women wore in 1890, but you�ve probably seen pictures of them, and you
know very well they couldn�t be worn with the fashions of today.
This is one of the ways that archeologists study their materials.
The various tools and weapons and jewelry, the pottery, the kinds
of houses, and even the ways of burying the dead tend to fit into
pictures. Some archeologists call all of the things that go together to
make such a picture an _assemblage_. The assemblage of the first layer
of Schliemann�s Troy was as different from that of the seventh layer as
our 1900 mail-order catalogue is from the one of today.
The archeologists who came after Schliemann began to notice other
things and to compare them with occurrences in modern times. The
idea that people will buy better mousetraps goes back into very
ancient times. Today, if we make good automobiles or radios, we can
sell some of them in Turkey or even in Timbuktu. This means that a
few present-day types of American automobiles and radios form part
of present-day �assemblages� in both Turkey and Timbuktu. The total
present-day �assemblage� of Turkey is quite different from that of
Timbuktu or that of America, but they have at least some automobiles
and some radios in common.
Now these automobiles and radios will eventually wear out. Let us
suppose we could go to some remote part of Turkey or to Timbuktu in a
dream. We don�t know what the date is, in our dream, but we see all
sorts of strange things and ways of living in both places. Nobody
tells us what the date is. But suddenly we see a 1936 Ford; so we
know that in our dream it has to be at least the year 1936, and only
as many years after that as we could reasonably expect a Ford to keep
in running order. The Ford would probably break down in twenty years�
time, so the Turkish or Timbuktu �assemblage� we�re seeing in our dream
has to date at about A.D. 1936-56.
Archeologists not only �date� their ancient materials in this way; they
also see over what distances and between which peoples trading was
done. It turns out that there was a good deal of trading in ancient
times, probably all on a barter and exchange basis.
EVERYTHING BEGINS TO FIT TOGETHER
Now we need to pull these ideas all together and see the complicated
structure the archeologists can build with their materials.
Even the earliest archeologists soon found that there was a very long
range of prehistoric time which would yield only very simple things.
For this very long early part of prehistory, there was little to be
found but the flint tools which wandering, hunting and gathering
people made, and the bones of the wild animals they ate. Toward the
end of prehistoric time there was a general settling down with the
coming of agriculture, and all sorts of new things began to be made.
Archeologists soon got a general notion of what ought to appear with
what. Thus, it would upset a French prehistorian digging at the bottom
of a very early cave if he found a fine bronze sword, just as much as
it would upset him if he found a beer bottle. The people of his very
early cave layer simply could not have made bronze swords, which came
later, just as do beer bottles. Some accidental disturbance of the
layers of his cave must have happened.
With any luck, archeologists do their digging in a layered, stratified
site. They find the remains of everything that would last through
time, in several different layers. They know that the assemblage in
the bottom layer was laid down earlier than the assemblage in the next
layer above, and so on up to the topmost layer, which is the latest.
They look at the results of other �digs� and find that some other
archeologist 900 miles away has found ax-heads in his lowest layer,
exactly like the ax-heads of their fifth layer. This means that their
fifth layer must have been lived in at about the same time as was the
first layer in the site 200 miles away. It also may mean that the
people who lived in the two layers knew and traded with each other. Or
it could mean that they didn�t necessarily know each other, but simply
that both traded with a third group at about the same time.
You can see that the more we dig and find, the more clearly the main
facts begin to stand out. We begin to be more sure of which people
lived at the same time, which earlier and which later. We begin to
know who traded with whom, and which peoples seemed to live off by
themselves. We begin to find enough skeletons in burials so that the
physical anthropologists can tell us what the people looked like. We
get animal bones, and a paleontologist may tell us they are all bones
of wild animals; or he may tell us that some or most of the bones are
those of domesticated animals, for instance, sheep or cattle, and
therefore the people must have kept herds.
More important than anything else--as our structure grows more
complicated and our materials increase--is the fact that �a sort
of history of human activity� does begin to appear. The habits or
traditions that men formed in the making of their tools and in the
ways they did things, begin to stand out for us. How characteristic
were these habits and traditions? What areas did they spread over?
How long did they last? We watch the different tools and the traces
of the way things were done--how the burials were arranged, what
the living-places were like, and so on. We wonder about the people
themselves, for the traces of habits and traditions are useful to us
only as clues to the men who once had them. So we ask the physical
anthropologists about the skeletons that we found in the burials. The
physical anthropologists tell us about the anatomy and the similarities
and differences which the skeletons show when compared with other
skeletons. The physical anthropologists are even working on a
method--chemical tests of the bones--that will enable them to discover
what the blood-type may have been. One thing is sure. We have never
found a group of skeletons so absolutely similar among themselves--so
cast from a single mould, so to speak--that we could claim to have a
�pure� race. I am sure we never shall.
We become particularly interested in any signs of change--when new
materials and tool types and ways of doing things replace old ones. We
watch for signs of social change and progress in one way or another.
We must do all this without one word of written history to aid us.
Everything we are concerned with goes back to the time _before_ men
learned to write. That is the prehistorian�s job--to find out what
happened before history began.
THE CHANGING WORLD in which Prehistoric Men Lived
[Illustration]
Mankind, we�ll say, is at least a half million years old. It is very
hard to understand how long a time half a million years really is.
If we were to compare this whole length of time to one day, we�d get
something like this: The present time is midnight, and Jesus was
born just five minutes and thirty-six seconds ago. Earliest history
began less than fifteen minutes ago. Everything before 11:45 was in
prehistoric time.
Or maybe we can grasp the length of time better in terms of
generations. As you know, primitive peoples tend to marry and have
children rather early in life. So suppose we say that twenty years
will make an average generation. At this rate there would be 25,000
generations in a half-million years. But our United States is much less
than ten generations old, twenty-five generations take us back before
the time of Columbus, Julius Caesar was alive just 100 generations ago,
David was king of Israel less than 150 generations ago, 250 generations
take us back to the beginning of written history. And there were 24,750
generations of men before written history began!
I should probably tell you that there is a new method of prehistoric
dating which would cut the earliest dates in my reckoning almost
in half. Dr. Cesare Emiliani, combining radioactive (C14) and
chemical (oxygen isotope) methods in the study of deep-sea borings,
has developed a system which would lower the total range of human
prehistory to about 300,000 years. The system is still too new to have
had general examination and testing. Hence, I have not used it in this
book; it would mainly affect the dates earlier than 25,000 years ago.
CHANGES IN ENVIRONMENT
The earth probably hasn�t changed much in the last 5,000 years (250
generations). Men have built things on its surface and dug into it and
drawn boundaries on maps of it, but the places where rivers, lakes,
seas, and mountains now stand have changed very little.
In earlier times the earth looked very different. Geologists call the
last great geological period the _Pleistocene_. It began somewhere
between a half million and a million years ago, and was a time of great
changes. Sometimes we call it the Ice Age, for in the Pleistocene
there were at least three or four times when large areas of earth
were covered with glaciers. The reason for my uncertainty is that
while there seem to have been four major mountain or alpine phases of
glaciation, there may only have been three general continental phases
in the Old World.[2]
[2] This is a complicated affair and I do not want to bother you
with its details. Both the alpine and the continental ice sheets
seem to have had minor fluctuations during their _main_ phases,
and the advances of the later phases destroyed many of the
traces of the earlier phases. The general textbooks have tended
to follow the names and numbers established for the Alps early
in this century by two German geologists. I will not bother you
with the names, but there were _four_ major phases. It is the
second of these alpine phases which seems to fit the traces of
the earliest of the great continental glaciations. In this book,
I will use the four-part system, since it is the most familiar,
but will add the word _alpine_ so you may remember to make the
transition to the continental system if you wish to do so.
Glaciers are great sheets of ice, sometimes over a thousand feet
thick, which are now known only in Greenland and Antarctica and in
high mountains. During several of the glacial periods in the Ice Age,
the glaciers covered most of Canada and the northern United States and
reached down to southern England and France in Europe. Smaller ice
sheets sat like caps on the Rockies, the Alps, and the Himalayas. The
continental glaciation only happened north of the equator, however, so
remember that �Ice Age� is only half true.
As you know, the amount of water on and about the earth does not vary.
These large glaciers contained millions of tons of water frozen into
ice. Because so much water was frozen and contained in the glaciers,
the water level of lakes and oceans was lowered. Flooded areas were
drained and appeared as dry land. There were times in the Ice Age when
there was no English Channel, so that England was not an island, and a
land bridge at the Dardanelles probably divided the Mediterranean from
the Black Sea.
A very important thing for people living during the time of a
glaciation was the region adjacent to the glacier. They could not, of
course, live on the ice itself. The questions would be how close could
they live to it, and how would they have had to change their way of
life to do so.
GLACIERS CHANGE THE WEATHER
Great sheets of ice change the weather. When the front of a glacier
stood at Milwaukee, the weather must have been bitterly cold in
Chicago. The climate of the whole world would have been different, and
you can see how animals and men would have been forced to move from one
place to another in search of food and warmth.
On the other hand, it looks as if only a minor proportion of the whole
Ice Age was really taken up by times of glaciation. In between came
the _interglacial_ periods. During these times the climate around
Chicago was as warm as it is now, and sometimes even warmer. It may
interest you to know that the last great glacier melted away less than
10,000 years ago. Professor Ernst Antevs thinks we may be living in an
interglacial period and that the Ice Age may not be over yet. So if you
want to make a killing in real estate for your several hundred times
great-grandchildren, you might buy some land in the Arizona desert or
the Sahara.
We do not yet know just why the glaciers appeared and disappeared, as
they did. It surely had something to do with an increase in rainfall
and a fall in temperature. It probably also had to do with a general
tendency for the land to rise at the beginning of the Pleistocene. We
know there was some mountain-building at that time. Hence, rain-bearing
winds nourished the rising and cooler uplands with snow. An increase
in all three of these factors--if they came together--would only have
needed to be slight. But exactly why this happened we do not know.
The reason I tell you about the glaciers is simply to remind you of the
changing world in which prehistoric men lived. Their surroundings--the
animals and plants they used for food, and the weather they had to
protect themselves from--were always changing. On the other hand, this
change happened over so long a period of time and was so slow that
individual people could not have noticed it. Glaciers, about which they
probably knew nothing, moved in hundreds of miles to the north of them.
The people must simply have wandered ever more southward in search
of the plants and animals on which they lived. Or some men may have
stayed where they were and learned to hunt different animals and eat
different foods. Prehistoric men had to keep adapting themselves to new
environments and those who were most adaptive were most successful.
OTHER CHANGES
Changes took place in the men themselves as well as in the ways they
lived. As time went on, they made better tools and weapons. Then, too,
we begin to find signs of how they started thinking of other things
than food and the tools to get it with. We find that they painted on
the walls of caves, and decorated their tools; we find that they buried
their dead.
At about the time when the last great glacier was finally melting away,
men in the Near East made the first basic change in human economy.
They began to plant grain, and they learned to raise and herd certain
animals. This meant that they could store food in granaries and �on the
hoof� against the bad times of the year. This first really basic change
in man�s way of living has been called the �food-producing revolution.�
By the time it happened, a modern kind of climate was beginning. Men
had already grown to look as they do now. Know-how in ways of living
had developed and progressed, slowly but surely, up to a point. It was
impossible for men to go beyond that point if they only hunted and
fished and gathered wild foods. Once the basic change was made--once
the food-producing revolution became effective--technology leaped ahead
and civilization and written history soon began.
Prehistoric Men THEMSELVES
[Illustration]
DO WE KNOW WHERE MAN ORIGINATED?
For a long time some scientists thought the �cradle of mankind� was in
central Asia. Other scientists insisted it was in Africa, and still
others said it might have been in Europe. Actually, we don�t know
where it was. We don�t even know that there was only _one_ �cradle.�
If we had to choose a �cradle� at this moment, we would probably say
Africa. But the southern portions of Asia and Europe may also have been
included in the general area. The scene of the early development of
mankind was certainly the Old World. It is pretty certain men didn�t
reach North or South America until almost the end of the Ice Age--had
they done so earlier we would certainly have found some trace of them
by now.
The earliest tools we have yet found come from central and south
Africa. By the dating system I�m using, these tools must be over
500,000 years old. There are now reports that a few such early tools
have been found--at the Sterkfontein cave in South Africa--along with
the bones of small fossil men called �australopithecines.�
Not all scientists would agree that the australopithecines were �men,�
or would agree that the tools were made by the australopithecines
themselves. For these sticklers, the earliest bones of men come from
the island of Java. The date would be about 450,000 years ago. So far,
we have not yet found the tools which we suppose these earliest men in
the Far East must have made.
Let me say it another way. How old are the earliest traces of men we
now have? Over half a million years. This was a time when the first
alpine glaciation was happening in the north. What has been found so
far? The tools which the men of those times made, in different parts
of Africa. It is now fairly generally agreed that the �men� who made
the tools were the australopithecines. There is also a more �man-like�
jawbone at Kanam in Kenya, but its find-spot has been questioned. The
next earliest bones we have were found in Java, and they may be almost
a hundred thousand years younger than the earliest African finds. We
haven�t yet found the tools of these early Javanese. Our knowledge of
tool-using in Africa spreads quickly as time goes on: soon after the
appearance of tools in the south we shall have them from as far north
as Algeria.
Very soon after the earliest Javanese come the bones of slightly more
developed people in Java, and the jawbone of a man who once lived in
what is now Germany. The same general glacial beds which yielded the
later Javanese bones and the German jawbone also include tools. These
finds come from the time of the second alpine glaciation.
So this is the situation. By the time of the end of the second alpine
or first continental glaciation (say 400,000 years ago) we have traces
of men from the extremes of the more southerly portions of the Old
World--South Africa, eastern Asia, and western Europe. There are also
some traces of men in the middle ground. In fact, Professor Franz
Weidenreich believed that creatures who were the immediate ancestors
of men had already spread over Europe, Africa, and Asia by the time
the Ice Age began. We certainly have no reason to disbelieve this, but
fortunate accidents of discovery have not yet given us the evidence to
prove it.
MEN AND APES
Many people used to get extremely upset at the ill-formed notion
that �man descended from the apes.� Such words were much more likely
to start fights or �monkey trials� than the correct notion that all
living animals, including man, ascended or evolved from a single-celled
organism which lived in the primeval seas hundreds of millions of years
ago. Men are mammals, of the order called Primates, and man�s living
relatives are the great apes. Men didn�t �descend� from the apes or
apes from men, and mankind must have had much closer relatives who have
since become extinct.
Men stand erect. They also walk and run on their two feet. Apes are
happiest in trees, swinging with their arms from branch to branch.
Few branches of trees will hold the mighty gorilla, although he still
manages to sleep in trees. Apes can�t stand really erect in our sense,
and when they have to run on the ground, they use the knuckles of their
hands as well as their feet.
A key group of fossil bones here are the south African
australopithecines. These are called the _Australopithecinae_ or
�man-apes� or sometimes even �ape-men.� We do not _know_ that they were
directly ancestral to men but they can hardly have been so to apes.
Presently I�ll describe them a bit more. The reason I mention them
here is that while they had brains no larger than those of apes, their
hipbones were enough like ours so that they must have stood erect.
There is no good reason to think they couldn�t have walked as we do.
BRAINS, HANDS, AND TOOLS
Whether the australopithecines were our ancestors or not, the proper
ancestors of men must have been able to stand erect and to walk on
their two feet. Three further important things probably were involved,
next, before they could become men proper. These are:
1. The increasing size and development of the brain.
2. The increasing usefulness (specialization) of the thumb and hand.
3. The use of tools.
Nobody knows which of these three is most important, or which came
first. Most probably the growth of all three things was very much
blended together. If you think about each of the things, you will see
what I mean. Unless your hand is more flexible than a paw, and your
thumb will work against (or oppose) your fingers, you can�t hold a tool
very well. But you wouldn�t get the idea of using a tool unless you had
enough brain to help you see cause and effect. And it is rather hard to
see how your hand and brain would develop unless they had something to
practice on--like using tools. In Professor Krogman�s words, �the hand
must become the obedient servant of the eye and the brain.� It is the
_co-ordination_ of these things that counts.
Many other things must have been happening to the bodies of the
creatures who were the ancestors of men. Our ancestors had to develop
organs of speech. More than that, they had to get the idea of letting
_certain sounds_ made with these speech organs have _certain meanings_.
All this must have gone very slowly. Probably everything was developing
little by little, all together. Men became men very slowly.
WHEN SHALL WE CALL MEN MEN?
What do I mean when I say �men�? People who looked pretty much as we
do, and who used different tools to do different things, are men to me.
We�ll probably never know whether the earliest ones talked or not. They
probably had vocal cords, so they could make sounds, but did they know
how to make sounds work as symbols to carry meanings? But if the fossil
bones look like our skeletons, and if we find tools which we�ll agree
couldn�t have been made by nature or by animals, then I�d say we had
traces of _men_.
The australopithecine finds of the Transvaal and Bechuanaland, in
south Africa, are bound to come into the discussion here. I�ve already
told you that the australopithecines could have stood upright and
walked on their two hind legs. They come from the very base of the
Pleistocene or Ice Age, and a few coarse stone tools have been found
with the australopithecine fossils. But there are three varieties
of the australopithecines and they last on until a time equal to
that of the second alpine glaciation. They are the best suggestion
we have yet as to what the ancestors of men _may_ have looked like.
They were certainly closer to men than to apes. Although their brain
size was no larger than the brains of modern apes their body size and
stature were quite small; hence, relative to their small size, their
brains were large. We have not been able to prove without doubt that
the australopithecines were _tool-making_ creatures, even though the
recent news has it that tools have been found with australopithecine
bones. The doubt as to whether the australopithecines used the tools
themselves goes like this--just suppose some man-like creature (whose
bones we have not yet found) made the tools and used them to kill
and butcher australopithecines. Hence a few experts tend to let
australopithecines still hang in limbo as �man-apes.�
THE EARLIEST MEN WE KNOW
I�ll postpone talking about the tools of early men until the next
chapter. The men whose bones were the earliest of the Java lot have
been given the name _Meganthropus_. The bones are very fragmentary. We
would not understand them very well unless we had the somewhat later
Javanese lot--the more commonly known _Pithecanthropus_ or �Java
man�--against which to refer them for study. One of the less well-known
and earliest fragments, a piece of lower jaw and some teeth, rather
strongly resembles the lower jaws and teeth of the australopithecine
type. Was _Meganthropus_ a sort of half-way point between the
australopithecines and _Pithecanthropus_? It is still too early to say.
We shall need more finds before we can be definite one way or the other.
Java man, _Pithecanthropus_, comes from geological beds equal in age
to the latter part of the second alpine glaciation; the _Meganthropus_
finds refer to beds of the beginning of this glaciation. The first
finds of Java man were made in 1891-92 by Dr. Eugene Dubois, a Dutch
doctor in the colonial service. Finds have continued to be made. There
are now bones enough to account for four skulls. There are also four
jaws and some odd teeth and thigh bones. Java man, generally speaking,
was about five feet six inches tall, and didn�t hold his head very
erect. His skull was very thick and heavy and had room for little more
than two-thirds as large a brain as we have. He had big teeth and a big
jaw and enormous eyebrow ridges.
No tools were found in the geological deposits where bones of Java man
appeared. There are some tools in the same general area, but they come
a bit later in time. One reason we accept the Java man as man--aside
from his general anatomical appearance--is that these tools probably
belonged to his near descendants.
Remember that there are several varieties of men in the whole early
Java lot, at least two of which are earlier than the _Pithecanthropus_,
�Java man.� Some of the earlier ones seem to have gone in for
bigness, in tooth-size at least. _Meganthropus_ is one of these
earlier varieties. As we said, he _may_ turn out to be a link to
the australopithecines, who _may_ or _may not_ be ancestral to men.
_Meganthropus_ is best understandable in terms of _Pithecanthropus_,
who appeared later in the same general area. _Pithecanthropus_ is
pretty well understandable from the bones he left us, and also because
of his strong resemblance to the fully tool-using cave-dwelling �Peking
man,� _Sinanthropus_, about whom we shall talk next. But you can see
that the physical anthropologists and prehistoric archeologists still
have a lot of work to do on the problem of earliest men.
PEKING MEN AND SOME EARLY WESTERNERS
The earliest known Chinese are called _Sinanthropus_, or �Peking man,�
because the finds were made near that city. In World War II, the United
States Marine guard at our Embassy in Peking tried to help get the
bones out of the city before the Japanese attack. Nobody knows where
these bones are now. The Red Chinese accuse us of having stolen them.
They were last seen on a dock-side at a Chinese port. But should you
catch a Marine with a sack of old bones, perhaps we could achieve peace
in Asia by returning them! Fortunately, there is a complete set of
casts of the bones.
Peking man lived in a cave in a limestone hill, made tools, cracked
animal bones to get the marrow out, and used fire. Incidentally, the
bones of Peking man were found because Chinese dig for what they call
�dragon bones� and �dragon teeth.� Uneducated Chinese buy these things
in their drug stores and grind them into powder for medicine. The
�dragon teeth� and �bones� are really fossils of ancient animals, and
sometimes of men. The people who supply the drug stores have learned
where to dig for strange bones and teeth. Paleontologists who get to
China go to the drug stores to buy fossils. In a roundabout way, this
is how the fallen-in cave of Peking man at Choukoutien was discovered.
Peking man was not quite as tall as Java man but he probably stood
straighter. His skull looked very much like that of the Java skull
except that it had room for a slightly larger brain. His face was less
brutish than was Java man�s face, but this isn�t saying much.
Peking man dates from early in the interglacial period following the
second alpine glaciation. He probably lived close to 350,000 years
ago. There are several finds to account for in Europe by about this
time, and one from northwest Africa. The very large jawbone found
near Heidelberg in Germany is doubtless even earlier than Peking man.
The beds where it was found are of second alpine glacial times, and
recently some tools have been said to have come from the same beds.
There is not much I need tell you about the Heidelberg jaw save that it
seems certainly to have belonged to an early man, and that it is very
big.
Another find in Germany was made at Steinheim. It consists of the
fragmentary skull of a man. It is very important because of its
relative completeness, but it has not yet been fully studied. The bone
is thick, but the back of the head is neither very low nor primitive,
and the face is also not primitive. The forehead does, however, have
big ridges over the eyes. The more fragmentary skull from Swanscombe in
England (p. 11) has been much more carefully studied. Only the top and
back of that skull have been found. Since the skull rounds up nicely,
it has been assumed that the face and forehead must have been quite
�modern.� Careful comparison with Steinheim shows that this was not
necessarily so. This is important because it bears on the question of
how early truly �modern� man appeared.
Recently two fragmentary jaws were found at Ternafine in Algeria,
northwest Africa. They look like the jaws of Peking man. Tools were
found with them. Since no jaws have yet been found at Steinheim or
Swanscombe, but the time is the same, one wonders if these people had
jaws like those of Ternafine.
WHAT HAPPENED TO JAVA AND PEKING MEN
Professor Weidenreich thought that there were at least a dozen ways in
which the Peking man resembled the modern Mongoloids. This would seem
to indicate that Peking man was really just a very early Chinese.
Several later fossil men have been found in the Java-Australian area.
The best known of these is the so-called Solo man. There are some finds
from Australia itself which we now know to be quite late. But it looks
as if we may assume a line of evolution from Java man down to the
modern Australian natives. During parts of the Ice Age there was a land
bridge all the way from Java to Australia.
TWO ENGLISHMEN WHO WEREN�T OLD
The older textbooks contain descriptions of two English finds which
were thought to be very old. These were called Piltdown (_Eoanthropus
dawsoni_) and Galley Hill. The skulls were very modern in appearance.
In 1948-49, British scientists began making chemical tests which proved
that neither of these finds is very old. It is now known that both
�Piltdown man� and the tools which were said to have been found with
him were part of an elaborate fake!
TYPICAL �CAVE MEN�
The next men we have to talk about are all members of a related group.
These are the Neanderthal group. �Neanderthal man� himself was found in
the Neander Valley, near D�sseldorf, Germany, in 1856. He was the first
human fossil to be recognized as such.
[Illustration: PRINCIPAL KNOWN TYPES OF FOSSIL MEN
CRO-MAGNON
NEANDERTHAL
MODERN SKULL
COMBE-CAPELLE
SINANTHROPUS
PITHECANTHROPUS]
Some of us think that the neanderthaloids proper are only those people
of western Europe who didn�t get out before the beginning of the last
great glaciation, and who found themselves hemmed in by the glaciers
in the Alps and northern Europe. Being hemmed in, they intermarried
a bit too much and developed into a special type. Professor F. Clark
Howell sees it this way. In Europe, the earliest trace of men we
now know is the Heidelberg jaw. Evolution continued in Europe, from
Heidelberg through the Swanscombe and Steinheim types to a group of
pre-neanderthaloids. There are traces of these pre-neanderthaloids
pretty much throughout Europe during the third interglacial period--say
100,000 years ago. The pre-neanderthaloids are represented by such
finds as the ones at Ehringsdorf in Germany and Saccopastore in Italy.
I won�t describe them for you, since they are simply less extreme than
the neanderthaloids proper--about half way between Steinheim and the
classic Neanderthal people.
Professor Howell believes that the pre-neanderthaloids who happened to
get caught in the pocket of the southwest corner of Europe at the onset
of the last great glaciation became the classic Neanderthalers. Out in
the Near East, Howell thinks, it is possible to see traces of people
evolving from the pre-neanderthaloid type toward that of fully modern
man. Certainly, we don�t see such extreme cases of �neanderthaloidism�
outside of western Europe.
There are at least a dozen good examples in the main or classic
Neanderthal group in Europe. They date to just before and in the
earlier part of the last great glaciation (85,000 to 40,000 years ago).
Many of the finds have been made in caves. The �cave men� the movies
and the cartoonists show you are probably meant to be Neanderthalers.
I�m not at all sure they dragged their women by the hair; the women
were probably pretty tough, too!
Neanderthal men had large bony heads, but plenty of room for brains.
Some had brain cases even larger than the average for modern man. Their
faces were heavy, and they had eyebrow ridges of bone, but the ridges
were not as big as those of Java man. Their foreheads were very low,
and they didn�t have much chin. They were about five feet three inches
tall, but were heavy and barrel-chested. But the Neanderthalers didn�t
slouch as much as they�ve been blamed for, either.
One important thing about the Neanderthal group is that there is a fair
number of them to study. Just as important is the fact that we know
something about how they lived, and about some of the tools they made.
OTHER MEN CONTEMPORARY WITH THE NEANDERTHALOIDS
We have seen that the neanderthaloids seem to be a specialization
in a corner of Europe. What was going on elsewhere? We think that
the pre-neanderthaloid type was a generally widespread form of men.
From this type evolved other more or less extreme although generally
related men. The Solo finds in Java form one such case. Another was the
Rhodesian man of Africa, and the more recent Hopefield finds show more
of the general Rhodesian type. It is more confusing than it needs to be
if these cases outside western Europe are called neanderthaloids. They
lived during the same approximate time range but they were all somewhat
different-looking people.
EARLY MODERN MEN
How early is modern man (_Homo sapiens_), the �wise man�? Some people
have thought that he was very early, a few still think so. Piltdown
and Galley Hill, which were quite modern in anatomical appearance and
_supposedly_ very early in date, were the best �evidence� for very
early modern men. Now that Piltdown has been liquidated and Galley Hill
is known to be very late, what is left of the idea?
The backs of the skulls of the Swanscombe and Steinheim finds look
rather modern. Unless you pay attention to the face and forehead of the
Steinheim find--which not many people have--and perhaps also consider
the Ternafine jaws, you might come to the conclusion that the crown of
the Swanscombe head was that of a modern-like man.
Two more skulls, again without faces, are available from a French
cave site, Font�chevade. They come from the time of the last great
interglacial, as did the pre-neanderthaloids. The crowns of the
Font�chevade skulls also look quite modern. There is a bit of the
forehead preserved on one of these skulls and the brow-ridge is not
heavy. Nevertheless, there is a suggestion that the bones belonged to
an immature individual. In this case, his (or even more so, if _her_)
brow-ridges would have been weak anyway. The case for the Font�chevade
fossils, as modern type men, is little stronger than that for
Swanscombe, although Professor Vallois believes it a good case.
It seems to add up to the fact that there were people living in
Europe--before the classic neanderthaloids--who looked more modern,
in some features, than the classic western neanderthaloids did. Our
best suggestion of what men looked like--just before they became fully
modern--comes from a cave on Mount Carmel in Palestine.
THE FIRST MODERNS
Professor T. D. McCown and the late Sir Arthur Keith, who studied the
Mount Carmel bones, figured out that one of the two groups involved
was as much as 70 per cent modern. There were, in fact, two groups or
varieties of men in the Mount Carmel caves and in at least two other
Palestinian caves of about the same time. The time would be about that
of the onset of colder weather, when the last glaciation was beginning
in the north--say 75,000 years ago.
The 70 per cent modern group came from only one cave, Mugharet es-Skhul
(�cave of the kids�). The other group, from several caves, had bones of
men of the type we�ve been calling pre-neanderthaloid which we noted
were widespread in Europe and beyond. The tools which came with each
of these finds were generally similar, and McCown and Keith, and other
scholars since their study, have tended to assume that both the Skhul
group and the pre-neanderthaloid group came from exactly the same time.
The conclusion was quite natural: here was a population of men in the
act of evolving in two different directions. But the time may not be
exactly the same. It is very difficult to be precise, within say 10,000
years, for a time some 75,000 years ago. If the Skhul men are in fact
later than the pre-neanderthaloid group of Palestine, as some of us
think, then they show how relatively modern some men were--men who
lived at the same time as the classic Neanderthalers of the European
pocket.
Soon after the first extremely cold phase of the last glaciation, we
begin to get a number of bones of completely modern men in Europe.
We also get great numbers of the tools they made, and their living
places in caves. Completely modern skeletons begin turning up in caves
dating back to toward 40,000 years ago. The time is about that of the
beginning of the second phase of the last glaciation. These skeletons
belonged to people no different from many people we see today. Like
people today, not everybody looked alike. (The positions of the more
important fossil men of later Europe are shown in the chart on page
72.)
DIFFERENCES IN THE EARLY MODERNS
The main early European moderns have been divided into two groups, the
Cro-Magnon group and the Combe Capelle-Br�nn group. Cro-Magnon people
were tall and big-boned, with large, long, and rugged heads. They
must have been built like many present-day Scandinavians. The Combe
Capelle-Br�nn people were shorter; they had narrow heads and faces, and
big eyebrow-ridges. Of course we don�t find the skin or hair of these
people. But there is little doubt they were Caucasoids (�Whites�).
Another important find came in the Italian Riviera, near Monte Carlo.
Here, in a cave near Grimaldi, there was a grave containing a woman
and a young boy, buried together. The two skeletons were first called
�Negroid� because some features of their bones were thought to resemble
certain features of modern African Negro bones. But more recently,
Professor E. A. Hooton and other experts questioned the use of the word
�Negroid� in describing the Grimaldi skeletons. It is true that nothing
is known of the skin color, hair form, or any other fleshy feature of
the Grimaldi people, so that the word �Negroid� in its usual meaning is
not proper here. It is also not clear whether the features of the bones
claimed to be �Negroid� are really so at all.
From a place called Wadjak, in Java, we have �proto-Australoid� skulls
which closely resemble those of modern Australian natives. Some of
the skulls found in South Africa, especially the Boskop skull, look
like those of modern Bushmen, but are much bigger. The ancestors of
the Bushmen seem to have once been very widespread south of the Sahara
Desert. True African Negroes were forest people who apparently expanded
out of the west central African area only in the last several thousand
years. Although dark in skin color, neither the Australians nor the
Bushmen are Negroes; neither the Wadjak nor the Boskop skulls are
�Negroid.�
As we�ve already mentioned, Professor Weidenreich believed that Peking
man was already on the way to becoming a Mongoloid. Anyway, the
Mongoloids would seem to have been present by the time of the �Upper
Cave� at Choukoutien, the _Sinanthropus_ find-spot.
WHAT THE DIFFERENCES MEAN
What does all this difference mean? It means that, at one moment in
time, within each different area, men tended to look somewhat alike.
From area to area, men tended to look somewhat different, just as
they do today. This is all quite natural. People _tended_ to mate
near home; in the anthropological jargon, they made up geographically
localized breeding populations. The simple continental division of
�stocks�--black = Africa, yellow = Asia, white = Europe--is too simple
a picture to fit the facts. People became accustomed to life in some
particular area within a continent (we might call it a �natural area�).
As they went on living there, they evolved towards some particular
physical variety. It would, of course, have been difficult to draw
a clear boundary between two adjacent areas. There must always have
been some mating across the boundaries in every case. One thing human
beings don�t do, and never have done, is to mate for �purity.� It is
self-righteous nonsense when we try to kid ourselves into thinking that
they do.
I am not going to struggle with the whole business of modern stocks and
races. This is a book about prehistoric men, not recent historic or
modern men. My physical anthropologist friends have been very patient
in helping me to write and rewrite this chapter--I am not going to
break their patience completely. Races are their business, not mine,
and they must do the writing about races. I shall, however, give two
modern definitions of race, and then make one comment.
Dr. William G. Boyd, professor of Immunochemistry, School of
Medicine, Boston University: �We may define a human race as a
population which differs significantly from other human populations
in regard to the frequency of one or more of the genes it
possesses.�
Professor Sherwood L. Washburn, professor of Physical Anthropology,
Department of Anthropology, the University of California: �A �race�
is a group of genetically similar populations, and races intergrade
because there are always intermediate populations.�
My comment is that the ideas involved here are all biological: they
concern groups, _not_ individuals. Boyd and Washburn may differ a bit
on what they want to consider a �population,� but a population is a
group nevertheless, and genetics is biology to the hilt. Now a lot of
people still think of race in terms of how people dress or fix their
food or of other habits or customs they have. The next step is to talk
about racial �purity.� None of this has anything whatever to do with
race proper, which is a matter of the biology of groups.
Incidentally, I�m told that if man very carefully _controls_
the breeding of certain animals over generations--dogs, cattle,
chickens--he might achieve a �pure� race of animals. But he doesn�t do
it. Some unfortunate genetic trait soon turns up, so this has just as
carefully to be bred out again, and so on.
SUMMARY OF PRESENT KNOWLEDGE OF FOSSIL MEN
The earliest bones of men we now have--upon which all the experts
would probably agree--are those of _Meganthropus_, from Java, of about
450,000 years ago. The earlier australopithecines of Africa were
possibly not tool-users and may not have been ancestral to men at all.
But there is an alternate and evidently increasingly stronger chance
that some of them may have been. The Kanam jaw from Kenya, another
early possibility, is not only very incomplete but its find-spot is
very questionable.
Java man proper, _Pithecanthropus_, comes next, at about 400,000 years
ago, and the big Heidelberg jaw in Germany must be of about the same
date. Next comes Swanscombe in England, Steinheim in Germany, the
Ternafine jaws in Algeria, and Peking man, _Sinanthropus_. They all
date to the second great interglacial period, about 350,000 years ago.
Piltdown and Galley Hill are out, and with them, much of the starch
in the old idea that there were two distinct lines of development
in human evolution: (1) a line of �paleoanthropic� development from
Heidelberg to the Neanderthalers where it became extinct, and (2) a
very early �modern� line, through Piltdown, Galley Hill, Swanscombe, to
us. Swanscombe, Steinheim, and Ternafine are just as easily cases of
very early pre-neanderthaloids.
The pre-neanderthaloids were very widespread during the third
interglacial: Ehringsdorf, Saccopastore, some of the Mount Carmel
people, and probably Font�chevade are cases in point. A variety of
their descendants can be seen, from Java (Solo), Africa (Rhodesian
man), and about the Mediterranean and in western Europe. As the acute
cold of the last glaciation set in, the western Europeans found
themselves surrounded by water, ice, or bitter cold tundra. To vastly
over-simplify it, they �bred in� and became classic neanderthaloids.
But on Mount Carmel, the Skhul cave-find with its 70 per cent modern
features shows what could happen elsewhere at the same time.
Lastly, from about 40,000 or 35,000 years ago--the time of the onset
of the second phase of the last glaciation--we begin to find the fully
modern skeletons of men. The modern skeletons differ from place to
place, just as different groups of men living in different places still
look different.
What became of the Neanderthalers? Nobody can tell me for sure. I�ve a
hunch they were simply �bred out� again when the cold weather was over.
Many Americans, as the years go by, are no longer ashamed to claim they
have �Indian blood in their veins.� Give us a few more generations
and there will not be very many other Americans left to whom we can
brag about it. It certainly isn�t inconceivable to me to imagine a
little Cro-Magnon boy bragging to his friends about his tough, strong,
Neanderthaler great-great-great-great-grandfather!
Cultural BEGINNINGS
[Illustration]
Men, unlike the lower animals, are made up of much more than flesh and
blood and bones; for men have �culture.�
WHAT IS CULTURE?
�Culture� is a word with many meanings. The doctors speak of making a
�culture� of a certain kind of bacteria, and ants are said to have a
�culture.� Then there is the Emily Post kind of �culture�--you say a
person is �cultured,� or that he isn�t, depending on such things as
whether or not he eats peas with his knife.
The anthropologists use the word too, and argue heatedly over its finer
meanings; but they all agree that every human being is part of or has
some kind of culture. Each particular human group has a particular
culture; that is one of the ways in which we can tell one group of
men from another. In this sense, a CULTURE means the way the members
of a group of people think and believe and live, the tools they make,
and the way they do things. Professor Robert Redfield says a culture
is an organized or formalized body of conventional understandings.
�Conventional understandings� means the whole set of rules, beliefs,
and standards which a group of people lives by. These understandings
show themselves in art, and in the other things a people may make and
do. The understandings continue to last, through tradition, from one
generation to another. They are what really characterize different
human groups.
SOME CHARACTERISTICS OF CULTURE
A culture lasts, although individual men in the group die off. On
the other hand, a culture changes as the different conventions and
understandings change. You could almost say that a culture lives in the
minds of the men who have it. But people are not born with it; they
get it as they grow up. Suppose a day-old Hungarian baby is adopted by
a family in Oshkosh, Wisconsin, and the child is not told that he is
Hungarian. He will grow up with no more idea of Hungarian culture than
anyone else in Oshkosh.
So when I speak of ancient Egyptian culture, I mean the whole body
of understandings and beliefs and knowledge possessed by the ancient
Egyptians. I mean their beliefs as to why grain grew, as well as their
ability to make tools with which to reap the grain. I mean their
beliefs about life after death. What I am thinking about as culture is
a thing which lasted in time. If any one Egyptian, even the Pharaoh,
died, it didn�t affect the Egyptian culture of that particular moment.
PREHISTORIC CULTURES
For that long period of man�s history that is all prehistory, we have
no written descriptions of cultures. We find only the tools men made,
the places where they lived, the graves in which they buried their
dead. Fortunately for us, these tools and living places and graves all
tell us something about the ways these men lived and the things they
believed. But the story we learn of the very early cultures must be
only a very small part of the whole, for we find so few things. The
rest of the story is gone forever. We have to do what we can with what
we find.
For all of the time up to about 75,000 years ago, which was the time
of the classic European Neanderthal group of men, we have found few
cave-dwelling places of very early prehistoric men. First, there is the
fallen-in cave where Peking man was found, near Peking. Then there are
two or three other _early_, but not _very early_, possibilities. The
finds at the base of the French cave of Font�chevade, those in one of
the Makapan caves in South Africa, and several open sites such as Dr.
L. S. B. Leakey�s Olorgesailie in Kenya doubtless all lie earlier than
the time of the main European Neanderthal group, but none are so early
as the Peking finds.
You can see that we know very little about the home life of earlier
prehistoric men. We find different kinds of early stone tools, but we
can�t even be really sure which tools may have been used together.
WHY LITTLE HAS LASTED FROM EARLY TIMES
Except for the rare find-spots mentioned above, all our very early
finds come from geological deposits, or from the wind-blown surfaces
of deserts. Here is what the business of geological deposits really
means. Let us say that a group of people was living in England about
300,000 years ago. They made the tools they needed, lived in some sort
of camp, almost certainly built fires, and perhaps buried their dead.
While the climate was still warm, many generations may have lived in
the same place, hunting, and gathering nuts and berries; but after some
few thousand years, the weather began very gradually to grow colder.
These early Englishmen would not have known that a glacier was forming
over northern Europe. They would only have noticed that the animals
they hunted seemed to be moving south, and that the berries grew larger
toward the south. So they would have moved south, too.
The camp site they left is the place we archeologists would really have
liked to find. All of the different tools the people used would have
been there together--many broken, some whole. The graves, and traces
of fire, and the tools would have been there. But the glacier got
there first! The front of this enormous sheet of ice moved down over
the country, crushing and breaking and plowing up everything, like a
gigantic bulldozer. You can see what happened to our camp site.
Everything the glacier couldn�t break, it pushed along in front of it
or plowed beneath it. Rocks were ground to gravel, and soil was caught
into the ice, which afterwards melted and ran off as muddy water. Hard
tools of flint sometimes remained whole. Human bones weren�t so hard;
it�s a wonder _any_ of them lasted. Gushing streams of melt water
flushed out the debris from underneath the glacier, and water flowed
off the surface and through great crevasses. The hard materials these
waters carried were even more rolled and ground up. Finally, such
materials were dropped by the rushing waters as gravels, miles from
the front of the glacier. At last the glacier reached its greatest
extent; then it melted backward toward the north. Debris held in the
ice was dropped where the ice melted, or was flushed off by more melt
water. When the glacier, leaving the land, had withdrawn to the sea,
great hunks of ice were broken off as icebergs. These icebergs probably
dropped the materials held in their ice wherever they floated and
melted. There must be many tools and fragmentary bones of prehistoric
men on the bottom of the Atlantic Ocean and the North Sea.
Remember, too, that these glaciers came and went at least three or four
times during the Ice Age. Then you will realize why the earlier things
we find are all mixed up. Stone tools from one camp site got mixed up
with stone tools from many other camp sites--tools which may have been
made tens of thousands or more years apart. The glaciers mixed them
all up, and so we cannot say which particular sets of tools belonged
together in the first place.
�EOLITHS�
But what sort of tools do we find earliest? For almost a century,
people have been picking up odd bits of flint and other stone in the
oldest Ice Age gravels in England and France. It is now thought these
odd bits of stone weren�t actually worked by prehistoric men. The
stones were given a name, _eoliths_, or �dawn stones.� You can see them
in many museums; but you can be pretty sure that very few of them were
actually fashioned by men.
It is impossible to pick out �eoliths� that seem to be made in any
one _tradition_. By �tradition� I mean a set of habits for making one
kind of tool for some particular job. No two �eoliths� look very much
alike: tools made as part of some one tradition all look much alike.
Now it�s easy to suppose that the very earliest prehistoric men picked
up and used almost any sort of stone. This wouldn�t be surprising; you
and I do it when we go camping. In other words, some of these �eoliths�
may actually have been used by prehistoric men. They must have used
anything that might be handy when they needed it. We could have figured
that out without the �eoliths.�
THE ROAD TO STANDARDIZATION
Reasoning from what we know or can easily imagine, there should have
been three major steps in the prehistory of tool-making. The first step
would have been simple _utilization_ of what was at hand. This is the
step into which the �eoliths� would fall. The second step would have
been _fashioning_--the haphazard preparation of a tool when there was a
need for it. Probably many of the earlier pebble tools, which I shall
describe next, fall into this group. The third step would have been
_standardization_. Here, men began to make tools according to certain
set traditions. Counting the better-made pebble tools, there are four
such traditions or sets of habits for the production of stone tools in
earliest prehistoric times. Toward the end of the Pleistocene, a fifth
tradition appears.
PEBBLE TOOLS
At the beginning of the last chapter, you�ll remember that I said there
were tools from very early geological beds. The earliest bones of men
have not yet been found in such early beds although the Sterkfontein
australopithecine cave approaches this early date. The earliest tools
come from Africa. They date back to the time of the first great
alpine glaciation and are at least 500,000 years old. The earliest
ones are made of split pebbles, about the size of your fist or a bit
bigger. They go under the name of pebble tools. There are many natural
exposures of early Pleistocene geological beds in Africa, and the
prehistoric archeologists of south and central Africa have concentrated
on searching for early tools. Other finds of early pebble tools have
recently been made in Algeria and Morocco.
[Illustration: SOUTH AFRICAN PEBBLE TOOL]
There are probably early pebble tools to be found in areas of the
Old World besides Africa; in fact, some prehistorians already claim
to have identified a few. Since the forms and the distinct ways of
making the earlier pebble tools had not yet sufficiently jelled into
a set tradition, they are difficult for us to recognize. It is not
so difficult, however, if there are great numbers of �possibles�
available. A little later in time the tradition becomes more clearly
set, and pebble tools are easier to recognize. So far, really large
collections of pebble tools have only been found and examined in Africa.
CORE-BIFACE TOOLS
The next tradition we�ll look at is the _core_ or biface one. The tools
are large pear-shaped pieces of stone trimmed flat on the two opposite
sides or �faces.� Hence �biface� has been used to describe these tools.
The front view is like that of a pear with a rather pointed top, and
the back view looks almost exactly the same. Look at them side on, and
you can see that the front and back faces are the same and have been
trimmed to a thin tip. The real purpose in trimming down the two faces
was to get a good cutting edge all around. You can see all this in the
illustration.
[Illustration: ABBEVILLIAN BIFACE]
We have very little idea of the way in which these core-bifaces were
used. They have been called �hand axes,� but this probably gives the
wrong idea, for an ax, to us, is not a pointed tool. All of these early
tools must have been used for a number of jobs--chopping, scraping,
cutting, hitting, picking, and prying. Since the core-bifaces tend to
be pointed, it seems likely that they were used for hitting, picking,
and prying. But they have rough cutting edges, so they could have been
used for chopping, scraping, and cutting.
FLAKE TOOLS
The third tradition is the _flake_ tradition. The idea was to get a
tool with a good cutting edge by simply knocking a nice large flake off
a big block of stone. You had to break off the flake in such a way that
it was broad and thin, and also had a good sharp cutting edge. Once you
really got on to the trick of doing it, this was probably a simpler way
to make a good cutting tool than preparing a biface. You have to know
how, though; I�ve tried it and have mashed my fingers more than once.
The flake tools look as if they were meant mainly for chopping,
scraping, and cutting jobs. When one made a flake tool, the idea seems
to have been to produce a broad, sharp, cutting edge.
[Illustration: CLACTONIAN FLAKE]
The core-biface and the flake traditions were spread, from earliest
times, over much of Europe, Africa, and western Asia. The map on page
52 shows the general area. Over much of this great region there was
flint. Both of these traditions seem well adapted to flint, although
good core-bifaces and flakes were made from other kinds of stone,
especially in Africa south of the Sahara.
CHOPPERS AND ADZE-LIKE TOOLS
The fourth early tradition is found in southern and eastern Asia, from
northwestern India through Java and Burma into China. Father Maringer
recently reported an early group of tools in Japan, which most resemble
those of Java, called Patjitanian. The prehistoric men in this general
area mostly used quartz and tuff and even petrified wood for their
stone tools (see illustration, p. 46).
This fourth early tradition is called the _chopper-chopping tool_
tradition. It probably has its earliest roots in the pebble tool
tradition of African type. There are several kinds of tools in this
tradition, but all differ from the western core-bifaces and flakes.
There are broad, heavy scrapers or cleavers, and tools with an
adze-like cutting edge. These last-named tools are called �hand adzes,�
just as the core-bifaces of the west have often been called �hand
axes.� The section of an adze cutting edge is ? shaped; the section of
an ax is < shaped.
[Illustration: ANYATHIAN ADZE-LIKE TOOL]
There are also pointed pebble tools. Thus the tool kit of these early
south and east Asiatic peoples seems to have included tools for doing
as many different jobs as did the tools of the Western traditions.
Dr. H. L. Movius has emphasized that the tools which were found in the
Peking cave with Peking man belong to the chopper-tool tradition. This
is the only case as yet where the tools and the man have been found
together from very earliest times--if we except Sterkfontein.
DIFFERENCES WITHIN THE TOOL-MAKING TRADITIONS
The latter three great traditions in the manufacture of stone
tools--and the less clear-cut pebble tools before them--are all we have
to show of the cultures of the men of those times. Changes happened in
each of the traditions. As time went on, the tools in each tradition
were better made. There could also be slight regional differences in
the tools within one tradition. Thus, tools with small differences, but
all belonging to one tradition, can be given special group (facies)
names.
This naming of special groups has been going on for some time. Here are
some of these names, since you may see them used in museum displays
of flint tools, or in books. Within each tradition of tool-making
(save the chopper tools), the earliest tool type is at the bottom
of the list, just as it appears in the lowest beds of a geological
stratification.[3]
[3] Archeologists usually make their charts and lists with the
earliest materials at the bottom and the latest on top, since
this is the way they find them in the ground.
Chopper tool (all about equally early):
Anyathian (Burma)
Choukoutienian (China)
Patjitanian (Java)
Soan (India)
Flake:
�Typical Mousterian�
Levalloiso-Mousterian
Levalloisian
Tayacian
Clactonian (localized in England)
Core-biface:
Some blended elements in �Mousterian�
Micoquian (= Acheulean 6 and 7)
Acheulean
Abbevillian (once called �Chellean�)
Pebble tool:
Oldowan
Ain Hanech
pre-Stellenbosch
Kafuan
The core-biface and the flake traditions appear in the chart (p. 65).
The early archeologists had many of the tool groups named before they
ever realized that there were broader tool preparation traditions. This
was understandable, for in dealing with the mixture of things that come
out of glacial gravels the easiest thing to do first is to isolate
individual types of tools into groups. First you put a bushel-basketful
of tools on a table and begin matching up types. Then you give names to
the groups of each type. The groups and the types are really matters of
the archeologists� choice; in real life, they were probably less exact
than the archeologists� lists of them. We now know pretty well in which
of the early traditions the various early groups belong.
THE MEANING OF THE DIFFERENT TRADITIONS
What do the traditions really mean? I see them as the standardization
of ways to make tools for particular jobs. We may not know exactly what
job the maker of a particular core-biface or flake tool had in mind. We
can easily see, however, that he already enjoyed a know-how, a set of
persistent habits of tool preparation, which would always give him the
same type of tool when he wanted to make it. Therefore, the traditions
show us that persistent habits already existed for the preparation of
one type of tool or another.
This tells us that one of the characteristic aspects of human culture
was already present. There must have been, in the minds of these
early men, a notion of the ideal type of tool for a particular job.
Furthermore, since we find so many thousands upon thousands of tools
of one type or another, the notion of the ideal types of tools _and_
the know-how for the making of each type must have been held in common
by many men. The notions of the ideal types and the know-how for their
production must have been passed on from one generation to another.
I could even guess that the notions of the ideal type of one or the
other of these tools stood out in the minds of men of those times
somewhat like a symbol of �perfect tool for good job.� If this were
so--remember it�s only a wild guess of mine--then men were already
symbol users. Now let�s go on a further step to the fact that the words
men speak are simply sounds, each different sound being a symbol for a
different meaning. If standardized tool-making suggests symbol-making,
is it also possible that crude word-symbols were also being made? I
suppose that it is not impossible.
There may, of course, be a real question whether tool-utilizing
creatures--our first step, on page 42--were actually men. Other
animals utilize things at hand as tools. The tool-fashioning creature
of our second step is more suggestive, although we may not yet feel
sure that many of the earlier pebble tools were man-made products. But
with the step to standardization and the appearance of the traditions,
I believe we must surely be dealing with the traces of culture-bearing
_men_. The �conventional understandings� which Professor Redfield�s
definition of culture suggests are now evidenced for us in the
persistent habits for the preparation of stone tools. Were we able to
see the other things these prehistoric men must have made--in materials
no longer preserved for the archeologist to find--I believe there would
be clear signs of further conventional understandings. The men may have
been physically primitive and pretty shaggy in appearance, but I think
we must surely call them men.
AN OLDER INTERPRETATION OF THE WESTERN TRADITIONS
In the last chapter, I told you that many of the older archeologists
and human paleontologists used to think that modern man was very old.
The supposed ages of Piltdown and Galley Hill were given as evidence
of the great age of anatomically modern man, and some interpretations
of the Swanscombe and Font�chevade fossils were taken to support
this view. The conclusion was that there were two parallel lines or
�phyla� of men already present well back in the Pleistocene. The
first of these, the more primitive or �paleoanthropic� line, was
said to include Heidelberg, the proto-neanderthaloids and classic
Neanderthal. The more anatomically modern or �neanthropic� line was
thought to consist of Piltdown and the others mentioned above. The
Neanderthaler or paleoanthropic line was thought to have become extinct
after the first phase of the last great glaciation. Of course, the
modern or neanthropic line was believed to have persisted into the
present, as the basis for the world�s population today. But with
Piltdown liquidated, Galley Hill known to be very late, and Swanscombe
and Font�chevade otherwise interpreted, there is little left of the
so-called parallel phyla theory.
While the theory was in vogue, however, and as long as the European
archeological evidence was looked at in one short-sighted way, the
archeological materials _seemed_ to fit the parallel phyla theory. It
was simply necessary to believe that the flake tools were made only
by the paleoanthropic Neanderthaler line, and that the more handsome
core-biface tools were the product of the neanthropic modern-man line.
Remember that _almost_ all of the early prehistoric European tools
came only from the redeposited gravel beds. This means that the tools
were not normally found in the remains of camp sites or work shops
where they had actually been dropped by the men who made and used
them. The tools came, rather, from the secondary hodge-podge of the
glacial gravels. I tried to give you a picture of the bulldozing action
of glaciers (p. 40) and of the erosion and weathering that were
side-effects of a glacially conditioned climate on the earth�s surface.
As we said above, if one simply plucks tools out of the redeposited
gravels, his natural tendency is to �type� the tools by groups, and to
think that the groups stand for something _on their own_.
In 1906, M. Victor Commont actually made a rare find of what seems
to have been a kind of workshop site, on a terrace above the Somme
river in France. Here, Commont realized, flake tools appeared clearly
in direct association with core-biface tools. Few prehistorians paid
attention to Commont or his site, however. It was easier to believe
that flake tools represented a distinct �culture� and that this
�culture� was that of the Neanderthaler or paleoanthropic line, and
that the core-bifaces stood for another �culture� which was that of the
supposed early modern or neanthropic line. Of course, I am obviously
skipping many details here. Some later sites with Neanderthal fossils
do seem to have only flake tools, but other such sites have both types
of tools. The flake tools which appeared _with_ the core-bifaces
in the Swanscombe gravels were never made much of, although it
was embarrassing for the parallel phyla people that Font�chevade
ran heavily to flake tools. All in all, the parallel phyla theory
flourished because it seemed so neat and easy to understand.
TRADITIONS ARE TOOL-MAKING HABITS, NOT CULTURES
In case you think I simply enjoy beating a dead horse, look in any
standard book on prehistory written twenty (or even ten) years ago, or
in most encyclopedias. You�ll find that each of the individual tool
types, of the West, at least, was supposed to represent a �culture.�
The �cultures� were believed to correspond to parallel lines of human
evolution.
In 1937, Mr. Harper Kelley strongly re-emphasized the importance
of Commont�s workshop site and the presence of flake tools with
core-bifaces. Next followed Dr. Movius� clear delineation of the
chopper-chopping tool tradition of the Far East. This spoiled the nice
symmetry of the flake-tool = paleoanthropic, core-biface = neanthropic
equations. Then came increasing understanding of the importance of
the pebble tools in Africa, and the location of several more workshop
sites there, especially at Olorgesailie in Kenya. Finally came the
liquidation of Piltdown and the deflation of Galley Hill�s date. So it
is at last possible to picture an individual prehistoric man making a
flake tool to do one job and a core-biface tool to do another. Commont
showed us this picture in 1906, but few believed him.
[Illustration: DISTRIBUTION OF TOOL-PREPARATION TRADITIONS
Time approximately 100,000 years ago]
There are certainly a few cases in which flake tools did appear with
few or no core-bifaces. The flake-tool group called Clactonian in
England is such a case. Another good, but certainly later case is
that of the cave on Mount Carmel in Palestine, where the blended
pre-neanderthaloid, 70 per cent modern-type skulls were found. Here, in
the same level with the skulls, were 9,784 flint tools. Of these, only
three--doubtless strays--were core-bifaces; all the rest were flake
tools or flake chips. We noted above how the Font�chevade cave ran to
flake tools. The only conclusion I would draw from this is that times
and circumstances did exist in which prehistoric men needed only flake
tools. So they only made flake tools for those particular times and
circumstances.
LIFE IN EARLIEST TIMES
What do we actually know of life in these earliest times? In the
glacial gravels, or in the terrace gravels of rivers once swollen by
floods of melt water or heavy rains, or on the windswept deserts, we
find stone tools. The earliest and coarsest of these are the pebble
tools. We do not yet know what the men who made them looked like,
although the Sterkfontein australopithecines probably give us a good
hint. Then begin the more formal tool preparation traditions of the
west--the core-bifaces and the flake tools--and the chopper-chopping
tool series of the farther east. There is an occasional roughly worked
piece of bone. From the gravels which yield the Clactonian flakes of
England comes the fire-hardened point of a wooden spear. There are
also the chance finds of the fossil human bones themselves, of which
we spoke in the last chapter. Aside from the cave of Peking man, none
of the earliest tools have been found in caves. Open air or �workshop�
sites which do not seem to have been disturbed later by some geological
agency are very rare.
The chart on page 65 shows graphically what the situation in
west-central Europe seems to have been. It is not yet certain whether
there were pebble tools there or not. The Font�chevade cave comes
into the picture about 100,000 years ago or more. But for the earlier
hundreds of thousands of years--below the red-dotted line on the
chart--the tools we find come almost entirely from the haphazard
mixture within the geological contexts.
The stone tools of each of the earlier traditions are the simplest
kinds of all-purpose tools. Almost any one of them could be used for
hacking, chopping, cutting, and scraping; so the men who used them must
have been living in a rough and ready sort of way. They found or hunted
their food wherever they could. In the anthropological jargon, they
were �food-gatherers,� pure and simple.
Because of the mixture in the gravels and in the materials they
carried, we can�t be sure which animals these men hunted. Bones of
the larger animals turn up in the gravels, but they could just as
well belong to the animals who hunted the men, rather than the other
way about. We don�t know. This is why camp sites like Commont�s and
Olorgesailie in Kenya are so important when we do find them. The animal
bones at Olorgesailie belonged to various mammals of extremely large
size. Probably they were taken in pit-traps, but there are a number of
groups of three round stones on the site which suggest that the people
used bolas. The South American Indians used three-ball bolas, with the
stones in separate leather bags connected by thongs. These were whirled
and then thrown through the air so as to entangle the feet of a fleeing
animal.
Professor F. Clark Howell recently returned from excavating another
important open air site at Isimila in Tanganyika. The site yielded
the bones of many fossil animals and also thousands of core-bifaces,
flakes, and choppers. But Howell�s reconstruction of the food-getting
habits of the Isimila people certainly suggests that the word �hunting�
is too dignified for what they did; �scavenging� would be much nearer
the mark.
During a great part of this time the climate was warm and pleasant. The
second interglacial period (the time between the second and third great
alpine glaciations) lasted a long time, and during much of this time
the climate may have been even better than ours is now. We don�t know
that earlier prehistoric men in Europe or Africa lived in caves. They
may not have needed to; much of the weather may have been so nice that
they lived in the open. Perhaps they didn�t wear clothes, either.
WHAT THE PEKING CAVE-FINDS TELL US
The one early cave-dwelling we have found is that of Peking man, in
China. Peking man had fire. He probably cooked his meat, or used
the fire to keep dangerous animals away from his den. In the cave
were bones of dangerous animals, members of the wolf, bear, and cat
families. Some of the cat bones belonged to beasts larger than tigers.
There were also bones of other wild animals: buffalo, camel, deer,
elephants, horses, sheep, and even ostriches. Seventy per cent of the
animals Peking man killed were fallow deer. It�s much too cold and dry
in north China for all these animals to live there today. So this list
helps us know that the weather was reasonably warm, and that there was
enough rain to grow grass for the grazing animals. The list also helps
the paleontologists to date the find.
Peking man also seems to have eaten plant food, for there are hackberry
seeds in the debris of the cave. His tools were made of sandstone and
quartz and sometimes of a rather bad flint. As we�ve already seen, they
belong in the chopper-tool tradition. It seems fairly clear that some
of the edges were chipped by right-handed people. There are also many
split pieces of heavy bone. Peking man probably split them so he could
eat the bone marrow, but he may have used some of them as tools.
Many of these split bones were the bones of Peking men. Each one of the
skulls had already had the base broken out of it. In no case were any
of the bones resting together in their natural relation to one another.
There is nothing like a burial; all of the bones are scattered. Now
it�s true that animals could have scattered bodies that were not cared
for or buried. But splitting bones lengthwise and carefully removing
the base of a skull call for both the tools and the people to use them.
It�s pretty clear who the people were. Peking man was a cannibal.
* * * * *
This rounds out about all we can say of the life and times of early
prehistoric men. In those days life was rough. You evidently had to
watch out not only for dangerous animals but also for your fellow men.
You ate whatever you could catch or find growing. But you had sense
enough to build fires, and you had already formed certain habits for
making the kinds of stone tools you needed. That�s about all we know.
But I think we�ll have to admit that cultural beginnings had been made,
and that these early people were really _men_.
MORE EVIDENCE of Culture
[Illustration]
While the dating is not yet sure, the material that we get from caves
in Europe must go back to about 100,000 years ago; the time of the
classic Neanderthal group followed soon afterwards. We don�t know why
there is no earlier material in the caves; apparently they were not
used before the last interglacial phase (the period just before the
last great glaciation). We know that men of the classic Neanderthal
group were living in caves from about 75,000 to 45,000 years ago.
New radioactive carbon dates even suggest that some of the traces of
culture we�ll describe in this chapter may have lasted to about 35,000
years ago. Probably some of the pre-neanderthaloid types of men had
also lived in caves. But we have so far found their bones in caves only
in Palestine and at Font�chevade.
THE CAVE LAYERS
In parts of France, some peasants still live in caves. In prehistoric
time, many generations of people lived in them. As a result, many
caves have deep layers of debris. The first people moved in and lived
on the rock floor. They threw on the floor whatever they didn�t want,
and they tracked in mud; nobody bothered to clean house in those days.
Their debris--junk and mud and garbage and what not--became packed
into a layer. As time went on, and generations passed, the layer grew
thicker. Then there might have been a break in the occupation of the
cave for a while. Perhaps the game animals got scarce and the people
moved away; or maybe the cave became flooded. Later on, other people
moved in and began making a new layer of their own on top of the first
layer. Perhaps this process of layering went on in the same cave for a
hundred thousand years; you can see what happened. The drawing on this
page shows a section through such a cave. The earliest layer is on the
bottom, the latest one on top. They go in order from bottom to top,
earliest to latest. This is the _stratification_ we talked about (p.
12).
[Illustration: SECTION OF SHELTER ON LOWER TERRACE, LE MOUSTIER]
While we may find a mix-up in caves, it�s not nearly as bad as the
mixing up that was done by glaciers. The animal bones and shells, the
fireplaces, the bones of men, and the tools the men made all belong
together, if they come from one layer. That�s the reason why the cave
of Peking man is so important. It is also the reason why the caves in
Europe and the Near East are so important. We can get an idea of which
things belong together and which lot came earliest and which latest.
In most cases, prehistoric men lived only in the mouths of caves.
They didn�t like the dark inner chambers as places to live in. They
preferred rock-shelters, at the bases of overhanging cliffs, if there
was enough overhang to give shelter. When the weather was good, they no
doubt lived in the open air as well. I�ll go on using the term �cave�
since it�s more familiar, but remember that I really mean rock-shelter,
as a place in which people actually lived.
The most important European cave sites are in Spain, France, and
central Europe; there are also sites in England and Italy. A few caves
are known in the Near East and Africa, and no doubt more sites will be
found when the out-of-the-way parts of Europe, Africa, and Asia are
studied.
AN �INDUSTRY� DEFINED
We have already seen that the earliest European cave materials are
those from the cave of Font�chevade. Movius feels certain that the
lowest materials here date back well into the third interglacial stage,
that which lay between the Riss (next to the last) and the W�rm I
(first stage of the last) alpine glaciations. This material consists
of an _industry_ of stone tools, apparently all made in the flake
tradition. This is the first time we have used the word �industry.�
It is useful to call all of the different tools found together in one
layer and made of _one kind of material_ an industry; that is, the
tools must be found together as men left them. Tools taken from the
glacial gravels (or from windswept desert surfaces or river gravels
or any geological deposit) are not �together� in this sense. We might
say the latter have only �geological,� not �archeological� context.
Archeological context means finding things just as men left them. We
can tell what tools go together in an �industrial� sense only if we
have archeological context.
Up to now, the only things we could have called �industries� were the
worked stone industry and perhaps the worked (?) bone industry of the
Peking cave. We could add some of the very clear cases of open air
sites, like Olorgesailie. We couldn�t use the term for the stone tools
from the glacial gravels, because we do not know which tools belonged
together. But when the cave materials begin to appear in Europe, we can
begin to speak of industries. Most of the European caves of this time
contain industries of flint tools alone.
THE EARLIEST EUROPEAN CAVE LAYERS
We�ve just mentioned the industry from what is said to be the oldest
inhabited cave in Europe; that is, the industry from the deepest layer
of the site at Font�chevade. Apparently it doesn�t amount to much. The
tools are made of stone, in the flake tradition, and are very poorly
worked. This industry is called _Tayacian_. Its type tool seems to be
a smallish flake tool, but there are also larger flakes which seem to
have been fashioned for hacking. In fact, the type tool seems to be
simply a smaller edition of the Clactonian tool (pictured on p. 45).
None of the Font�chevade tools are really good. There are scrapers,
and more or less pointed tools, and tools that may have been used
for hacking and chopping. Many of the tools from the earlier glacial
gravels are better made than those of this first industry we see in
a European cave. There is so little of this material available that
we do not know which is really typical and which is not. You would
probably find it hard to see much difference between this industry and
a collection of tools of the type called Clactonian, taken from the
glacial gravels, especially if the Clactonian tools were small-sized.
The stone industry of the bottommost layer of the Mount Carmel cave,
in Palestine, where somewhat similar tools were found, has also been
called Tayacian.
I shall have to bring in many unfamiliar words for the names of the
industries. The industries are usually named after the places where
they were first found, and since these were in most cases in France,
most of the names which follow will be of French origin. However,
the names have simply become handles and are in use far beyond the
boundaries of France. It would be better if we had a non-place-name
terminology, but archeologists have not yet been able to agree on such
a terminology.
THE ACHEULEAN INDUSTRY
Both in France and in Palestine, as well as in some African cave
sites, the next layers in the deep caves have an industry in both the
core-biface and the flake traditions. The core-biface tools usually
make up less than half of all the tools in the industry. However,
the name of the biface type of tool is generally given to the whole
industry. It is called the _Acheulean_, actually a late form of it, as
�Acheulean� is also used for earlier core-biface tools taken from the
glacial gravels. In western Europe, the name used is _Upper Acheulean_
or _Micoquian_. The same terms have been borrowed to name layers E and
F in the Tabun cave, on Mount Carmel in Palestine.
The Acheulean core-biface type of tool is worked on two faces so as
to give a cutting edge all around. The outline of its front view may
be oval, or egg-shaped, or a quite pointed pear shape. The large
chip-scars of the Acheulean core-bifaces are shallow and flat. It is
suspected that this resulted from the removal of the chips with a
wooden club; the deep chip-scars of the earlier Abbevillian core-biface
came from beating the tool against a stone anvil. These tools are
really the best and also the final products of the core-biface
tradition. We first noticed the tradition in the early glacial gravels
(p. 43); now we see its end, but also its finest examples, in the
deeper cave levels.
The flake tools, which really make up the greater bulk of this
industry, are simple scrapers and chips with sharp cutting edges. The
habits used to prepare them must have been pretty much the same as
those used for at least one of the flake industries we shall mention
presently.
There is very little else in these early cave layers. We do not have
a proper �industry� of bone tools. There are traces of fire, and of
animal bones, and a few shells. In Palestine, there are many more
bones of deer than of gazelle in these layers; the deer lives in a
wetter climate than does the gazelle. In the European cave layers, the
animal bones are those of beasts that live in a warm climate. They
belonged in the last interglacial period. We have not yet found the
bones of fossil men definitely in place with this industry.
[Illustration: ACHEULEAN BIFACE]
FLAKE INDUSTRIES FROM THE CAVES
Two more stone industries--the _Levalloisian_ and the
�_Mousterian_�--turn up at approximately the same time in the European
cave layers. Their tools seem to be mainly in the flake tradition,
but according to some of the authorities their preparation also shows
some combination with the habits by which the core-biface tools were
prepared.
Now notice that I don�t tell you the Levalloisian and the �Mousterian�
layers are both above the late Acheulean layers. Look at the cave
section (p. 57) and you�ll find that some �Mousterian of Acheulean
tradition� appears above some �typical Mousterian.� This means that
there may be some kinds of Acheulean industries that are later than
some kinds of �Mousterian.� The same is true of the Levalloisian.
There were now several different kinds of habits that men used in
making stone tools. These habits were based on either one or the other
of the two traditions--core-biface or flake--or on combinations of
the habits used in the preparation techniques of both traditions. All
were popular at about the same time. So we find that people who made
one kind of stone tool industry lived in a cave for a while. Then they
gave up the cave for some reason, and people with another industry
moved in. Then the first people came back--or at least somebody with
the same tool-making habits as the first people. Or maybe a third group
of tool-makers moved in. The people who had these different habits for
making their stone tools seem to have moved around a good deal. They no
doubt borrowed and exchanged tricks of the trade with each other. There
were no patent laws in those days.
The extremely complicated interrelationships of the different habits
used by the tool-makers of this range of time are at last being
systematically studied. M. Fran�ois Bordes has developed a statistical
method of great importance for understanding these tool preparation
habits.
THE LEVALLOISIAN AND MOUSTERIAN
The easiest Levalloisian tool to spot is a big flake tool. The trick
in making it was to fashion carefully a big chunk of stone (called
the Levalloisian �tortoise core,� because it resembles the shape of
a turtle-shell) and then to whack this in such a way that a large
flake flew off. This large thin flake, with sharp cutting edges, is
the finished Levalloisian tool. There were various other tools in a
Levalloisian industry, but this is the characteristic _Levalloisian_
tool.
There are several �typical Mousterian� stone tools. Different from
the tools of the Levalloisian type, these were made from �disc-like
cores.� There are medium-sized flake �side scrapers.� There are also
some small pointed tools and some small �hand axes.� The last of these
tool types is often a flake worked on both of the flat sides (that
is, bifacially). There are also pieces of flint worked into the form
of crude balls. The pointed tools may have been fixed on shafts to
make short jabbing spears; the round flint balls may have been used as
bolas. Actually, we don�t _know_ what either tool was used for. The
points and side scrapers are illustrated (pp. 64 and 66).
[Illustration: LEVALLOIS FLAKE]
THE MIXING OF TRADITIONS
Nowadays the archeologists are less and less sure of the importance
of any one specific tool type and name. Twenty years ago, they used
to speak simply of Acheulean or Levalloisian or Mousterian tools.
Now, more and more, _all_ of the tools from some one layer in a
cave are called an �industry,� which is given a mixed name. Thus we
have �Levalloiso-Mousterian,� and �Acheuleo-Levalloisian,� and even
�Acheuleo-Mousterian� (or �Mousterian of Acheulean tradition�). Bordes�
systematic work is beginning to clear up some of our confusion.
The time of these late Acheuleo-Levalloiso-Mousterioid industries
is from perhaps as early as 100,000 years ago. It may have lasted
until well past 50,000 years ago. This was the time of the first
phase of the last great glaciation. It was also the time that the
classic group of Neanderthal men was living in Europe. A number of
the Neanderthal fossil finds come from these cave layers. Before the
different habits of tool preparation were understood it used to be
popular to say Neanderthal man was �Mousterian man.� I think this is
wrong. What used to be called �Mousterian� is now known to be a variety
of industries with tools of both core-biface and flake habits, and
so mixed that the word �Mousterian� used alone really doesn�t mean
anything. The Neanderthalers doubtless understood the tool preparation
habits by means of which Acheulean, Levalloisian and Mousterian type
tools were produced. We also have the more modern-like Mount Carmel
people, found in a cave layer of Palestine with tools almost entirely
in the flake tradition, called �Levalloiso-Mousterian,� and the
Font�chevade-Tayacian (p. 59).
[Illustration: MOUSTERIAN POINT]
OTHER SUGGESTIONS OF LIFE IN THE EARLY CAVE LAYERS
Except for the stone tools, what do we know of the way men lived in the
time range after 100,000 to perhaps 40,000 years ago or even later?
We know that in the area from Europe to Palestine, at least some of
the people (some of the time) lived in the fronts of caves and warmed
themselves over fires. In Europe, in the cave layers of these times,
we find the bones of different animals; the bones in the lowest layers
belong to animals that lived in a warm climate; above them are the
bones of those who could stand the cold, like the reindeer and mammoth.
Thus, the meat diet must have been changing, as the glacier crept
farther south. Shells and possibly fish bones have lasted in these
cave layers, but there is not a trace of the vegetable foods and the
nuts and berries and other wild fruits that must have been eaten when
they could be found.
[Illustration: CHART SHOWING PRESENT UNDERSTANDING OF RELATIONSHIPS AND
SUCCESSION OF TOOL-PREPARATION TRADITIONS, INDUSTRIES, AND ASSEMBLAGES
OF WEST-CENTRAL EUROPE
Wavy lines indicate transitions in industrial habits. These transitions
are not yet understood in detail. The glacial and climatic scheme shown
is the alpine one.]
Bone tools have also been found from this period. Some are called
scrapers, and there are also long chisel-like leg-bone fragments
believed to have been used for skinning animals. Larger hunks of bone,
which seem to have served as anvils or chopping blocks, are fairly
common.
Bits of mineral, used as coloring matter, have also been found. We
don�t know what the color was used for.
[Illustration: MOUSTERIAN SIDE SCRAPER]
There is a small but certain number of cases of intentional burials.
These burials have been found on the floors of the caves; in other
words, the people dug graves in the places where they lived. The holes
made for the graves were small. For this reason (or perhaps for some
other?) the bodies were in a curled-up or contracted position. Flint or
bone tools or pieces of meat seem to have been put in with some of the
bodies. In several cases, flat stones had been laid over the graves.
TOOLS FROM AFRICA AND ASIA ABOUT 100,000 YEARS AGO
Professor Movius characterizes early prehistoric Africa as a continent
showing a variety of stone industries. Some of these industries were
purely local developments and some were practically identical with
industries found in Europe at the same time. From northwest Africa
to Capetown--excepting the tropical rain forest region of the west
center--tools of developed Acheulean, Levalloisian, and Mousterian
types have been recognized. Often they are named after African place
names.
In east and south Africa lived people whose industries show a
development of the Levalloisian technique. Such industries are
called Stillbay. Another industry, developed on the basis of the
Acheulean technique, is called Fauresmith. From the northwest comes
an industry with tanged points and flake-blades; this is called the
Aterian. The tropical rain forest region contained people whose stone
tools apparently show adjustment to this peculiar environment; the
so-called Sangoan industry includes stone picks, adzes, core-bifaces
of specialized Acheulean type, and bifacial points which were probably
spearheads.
In western Asia, even as far as the east coast of India, the tools of
the Eurafrican core-biface and flake tool traditions continued to be
used. But in the Far East, as we noted in the last chapter, men had
developed characteristic stone chopper and chopping tools. This tool
preparation tradition--basically a pebble tool tradition--lasted to the
very end of the Ice Age.
When more intact open air sites such as that of an earlier time at
Olorgesailie, and more stratified cave sites are found and excavated
in Asia and Africa, we shall be able to get a more complete picture.
So far, our picture of the general cultural level of the Old World at
about 100,000 years ago--and soon afterwards--is best from Europe, but
it is still far from complete there, too.
CULTURE AT THE BEGINNING OF THE LAST GREAT GLACIAL PERIOD
The few things we have found must indicate only a very small part
of the total activities of the people who lived at the time. All of
the things they made of wood and bark, of skins, of anything soft,
are gone. The fact that burials were made, at least in Europe and
Palestine, is pretty clear proof that the people had some notion of a
life after death. But what this notion really was, or what gods (if
any) men believed in, we cannot know. Dr. Movius has also reminded me
of the so-called bear cults--cases in which caves have been found which
contain the skulls of bears in apparently purposeful arrangement. This
might suggest some notion of hoarding up the spirits or the strength of
bears killed in the hunt. Probably the people lived in small groups,
as hunting and food-gathering seldom provide enough food for large
groups of people. These groups probably had some kind of leader or
�chief.� Very likely the rude beginnings of rules for community life
and politics, and even law, were being made. But what these were, we
do not know. We can only guess about such things, as we can only guess
about many others; for example, how the idea of a family must have been
growing, and how there may have been witch doctors who made beginnings
in medicine or in art, in the materials they gathered for their trade.
The stone tools help us most. They have lasted, and we can find
them. As they come to us, from this cave or that, and from this
layer or that, the tool industries show a variety of combinations
of the different basic habits or traditions of tool preparation.
This seems only natural, as the groups of people must have been very
small. The mixtures and blendings of the habits used in making stone
tools must mean that there were also mixtures and blends in many of
the other ideas and beliefs of these small groups. And what this
probably means is that there was no one _culture_ of the time. It is
certainly unlikely that there were simply three cultures, �Acheulean,�
�Levalloisian,� and �Mousterian,� as has been thought in the past.
Rather there must have been a great variety of loosely related cultures
at about the same stage of advancement. We could say, too, that here
we really begin to see, for the first time, that remarkable ability
of men to adapt themselves to a variety of conditions. We shall see
this adaptive ability even more clearly as time goes on and the record
becomes more complete.
Over how great an area did these loosely related cultures reach in
the time 75,000 to 45,000 or even as late as 35,000 years ago? We
have described stone tools made in one or another of the flake and
core-biface habits, for an enormous area. It covers all of Europe, all
of Africa, the Near East, and parts of India. It is perfectly possible
that the flake and core-biface habits lasted on after 35,000 years ago,
in some places outside of Europe. In northern Africa, for example, we
are certain that they did (see chart, p. 72).
On the other hand, in the Far East (China, Burma, Java) and in northern
India, the tools of the old chopper-tool tradition were still being
made. Out there, we must assume, there was a different set of loosely
related cultures. At least, there was a different set of loosely
related habits for the making of tools. But the men who made them must
have looked much like the men of the West. Their tools were different,
but just as useful.
As to what the men of the West looked like, I�ve already hinted at all
we know so far (pp. 29 ff.). The Neanderthalers were present at
the time. Some more modern-like men must have been about, too, since
fossils of them have turned up at Mount Carmel in Palestine, and at
Teshik Tash, in Trans-caspian Russia. It is still too soon to know
whether certain combinations of tools within industries were made
only by certain physical types of men. But since tools of both the
core-biface and the flake traditions, and their blends, turn up from
South Africa to England to India, it is most unlikely that only one
type of man used only one particular habit in the preparation of tools.
What seems perfectly clear is that men in Africa and men in India were
making just as good tools as the men who lived in western Europe.
EARLY MODERNS
[Illustration]
From some time during the first inter-stadial of the last great
glaciation (say some time after about 40,000 years ago), we have
more accurate dates for the European-Mediterranean area and less
accurate ones for the rest of the Old World. This is probably
because the effects of the last glaciation have been studied in the
European-Mediterranean area more than they have been elsewhere.
A NEW TRADITION APPEARS
Something new was probably beginning to happen in the
European-Mediterranean area about 40,000 years ago, though all the
rest of the Old World seems to have been going on as it had been. I
can�t be sure of this because the information we are using as a basis
for dates is very inaccurate for the areas outside of Europe and the
Mediterranean.
We can at least make a guess. In Egypt and north Africa, men were still
using the old methods of making stone tools. This was especially true
of flake tools of the Levalloisian type, save that they were growing
smaller and smaller as time went on. But at the same time, a new
tradition was becoming popular in westernmost Asia and in Europe. This
was the blade-tool tradition.
BLADE TOOLS
A stone blade is really just a long parallel-sided flake, as the
drawing shows. It has sharp cutting edges, and makes a very useful
knife. The real trick is to be able to make one. It is almost
impossible to make a blade out of any stone but flint or a natural
volcanic glass called obsidian. And even if you have flint or obsidian,
you first have to work up a special cone-shaped �blade-core,� from
which to whack off blades.
[Illustration: PLAIN BLADE]
You whack with a hammer stone against a bone or antler punch which is
directed at the proper place on the blade-core. The blade-core has to
be well supported or gripped while this is going on. To get a good
flint blade tool takes a great deal of know-how.
Remember that a tradition in stone tools means no more than that some
particular way of making the tools got started and lasted a long time.
Men who made some tools in one tradition or set of habits would also
make other tools for different purposes by means of another tradition
or set of habits. It was even possible for the two sets of habits to
become combined.
THE EARLIEST BLADE TOOLS
The oldest blade tools we have found were deep down in the layers of
the Mount Carmel caves, in Tabun Eb and Ea. Similar tools have been
found in equally early cave levels in Syria; their popularity there
seems to fluctuate a bit. Some more or less parallel-sided flakes are
known in the Levalloisian industry in France, but they are probably
no earlier than Tabun E. The Tabun blades are part of a local late
�Acheulean� industry, which is characterized by core-biface �hand
axes,� but which has many flake tools as well. Professor F. E.
Zeuner believes that this industry may be more than 120,000 years old;
actually its date has not yet been fixed, but it is very old--older
than the fossil finds of modern-like men in the same caves.
[Illustration: SUCCESSION OF ICE AGE FLINT TYPES, INDUSTRIES, AND
ASSEMBLAGES, AND OF FOSSIL MEN, IN NORTHWESTERN EURAFRASIA]
For some reason, the habit of making blades in Palestine and Syria was
interrupted. Blades only reappeared there at about the same time they
were first made in Europe, some time after 45,000 years ago; that is,
after the first phase of the last glaciation was ended.
[Illustration: BACKED BLADE]
We are not sure just where the earliest _persisting_ habits for the
production of blade tools developed. Impressed by the very early
momentary appearance of blades at Tabun on Mount Carmel, Professor
Dorothy A. Garrod first favored the Near East as a center of origin.
She spoke of �some as yet unidentified Asiatic centre,� which she
thought might be in the highlands of Iran or just beyond. But more
recent work has been done in this area, especially by Professor Coon,
and the blade tools do not seem to have an early appearance there. When
the blade tools reappear in the Syro-Palestinian area, they do so in
industries which also include Levalloiso-Mousterian flake tools. From
the point of view of form and workmanship, the blade tools themselves
are not so fine as those which seem to be making their appearance
in western Europe about the same time. There is a characteristic
Syro-Palestinian flake point, possibly a projectile tip, called the
Emiran, which is not known from Europe. The appearance of blade tools,
together with Levalloiso-Mousterian flakes, continues even after the
Emiran point has gone out of use.
It seems clear that the production of blade tools did not immediately
swamp the set of older habits in Europe, too; the use of flake
tools also continued there. This was not so apparent to the older
archeologists, whose attention was focused on individual tool types. It
is not, in fact, impossible--although it is certainly not proved--that
the technique developed in the preparation of the Levalloisian tortoise
core (and the striking of the Levalloisian flake from it) might have
followed through to the conical core and punch technique for the
production of blades. Professor Garrod is much impressed with the speed
of change during the later phases of the last glaciation, and its
probable consequences. She speaks of �the greater number of industries
having enough individual character to be classified as distinct ...
since evolution now starts to outstrip diffusion.� Her �evolution� here
is of course an industrial evolution rather than a biological one.
Certainly the people of Europe had begun to make blade tools during
the warm spell after the first phase of the last glaciation. By about
40,000 years ago blades were well established. The bones of the blade
tool makers we�ve found so far indicate that anatomically modern men
had now certainly appeared. Unfortunately, only a few fossil men have
so far been found from the very beginning of the blade tool range in
Europe (or elsewhere). What I certainly shall _not_ tell you is that
conquering bands of fine, strong, anatomically modern men, armed with
superior blade tools, came sweeping out of the East to exterminate the
lowly Neanderthalers. Even if we don�t know exactly what happened, I�d
lay a good bet it wasn�t that simple.
We do know a good deal about different blade industries in Europe.
Almost all of them come from cave layers. There is a great deal of
complication in what we find. The chart (p. 72) tries to simplify
this complication; in fact, it doubtless simplifies it too much. But
it may suggest all the complication of industries which is going
on at this time. You will note that the upper portion of my much
simpler chart (p. 65) covers the same material (in the section
marked �Various Blade-Tool Industries�). That chart is certainly too
simplified.
You will realize that all this complication comes not only from
the fact that we are finding more material. It is due also to the
increasing ability of men to adapt themselves to a great variety of
situations. Their tools indicate this adaptiveness. We know there was
a good deal of climatic change at this time. The plants and animals
that men used for food were changing, too. The great variety of tools
and industries we now find reflect these changes and the ability of men
to keep up with the times. Now, for example, is the first time we are
sure that there are tools to _make_ other tools. They also show men�s
increasing ability to adapt themselves.
SPECIAL TYPES OF BLADE TOOLS
The most useful tools that appear at this time were made from blades.
1. The �backed� blade. This is a knife made of a flint blade, with
one edge purposely blunted, probably to save the user�s fingers
from being cut. There are several shapes of backed blades (p.
73).
[Illustration: TWO BURINS]
2. The _burin_ or �graver.� The burin was the original chisel. Its
cutting edge is _transverse_, like a chisel�s. Some burins are
made like a screw-driver, save that burins are sharp. Others have
edges more like the blade of a chisel or a push plane, with
only one bevel. Burins were probably used to make slots in wood
and bone; that is, to make handles or shafts for other tools.
They must also be the tools with which much of the engraving on
bone (see p. 83) was done. There is a bewildering variety of
different kinds of burins.
[Illustration: TANGED POINT]
3. The �tanged� point. These stone points were used to tip arrows or
light spears. They were made from blades, and they had a long tang
at the bottom where they were fixed to the shaft. At the place
where the tang met the main body of the stone point, there was
a marked �shoulder,� the beginnings of a barb. Such points had
either one or two shoulders.
[Illustration: NOTCHED BLADE]
4. The �notched� or �strangulated� blade. Along with the points for
arrows or light spears must go a tool to prepare the arrow or
spear shaft. Today, such a tool would be called a �draw-knife� or
a �spoke-shave,� and this is what the notched blades probably are.
Our spoke-shaves have sharp straight cutting blades and really
�shave.� Notched blades of flint probably scraped rather than cut.
5. The �awl,� �drill,� or �borer.� These blade tools are worked out
to a spike-like point. They must have been used for making holes
in wood, bone, shell, skin, or other things.
[Illustration: DRILL OR AWL]
6. The �end-scraper on a blade� is a tool with one or both ends
worked so as to give a good scraping edge. It could have been used
to hollow out wood or bone, scrape hides, remove bark from trees,
and a number of other things (p. 78).
There is one very special type of flint tool, which is best known from
western Europe in an industry called the Solutrean. These tools were
usually made of blades, but the best examples are so carefully worked
on both sides (bifacially) that it is impossible to see the original
blade. This tool is
7. The �laurel leaf� point. Some of these tools were long and
dagger-like, and must have been used as knives or daggers. Others
were small, called �willow leaf,� and must have been mounted on
spear or arrow shafts. Another typical Solutrean tool is the
�shouldered� point. Both the �laurel leaf� and �shouldered� point
types are illustrated (see above and p. 79).
[Illustration: END-SCRAPER ON A BLADE]
[Illustration: LAUREL LEAF POINT]
The industries characterized by tools in the blade tradition also
yield some flake and core tools. We will end this list with two types
of tools that appear at this time. The first is made of a flake; the
second is a core tool.
[Illustration: SHOULDERED POINT]
8. The �keel-shaped round scraper� is usually small and quite round,
and has had chips removed up to a peak in the center. It is called
�keel-shaped� because it is supposed to look (when upside down)
like a section through a boat. Actually, it looks more like a tent
or an umbrella. Its outer edges are sharp all the way around, and
it was probably a general purpose scraping tool (see illustration,
p. 81).
9. The �keel-shaped nosed scraper� is a much larger and heavier tool
than the round scraper. It was made on a core with a flat bottom,
and has one nicely worked end or �nose.� Such tools are usually
large enough to be easily grasped, and probably were used like
push planes (see illustration, p. 81).
[Illustration: KEEL-SHAPED ROUND SCRAPER]
[Illustration: KEEL-SHAPED NOSED SCRAPER]
The stone tools (usually made of flint) we have just listed are among
the most easily recognized blade tools, although they show differences
in detail at different times. There are also many other kinds. Not
all of these tools appear in any one industry at one time. Thus the
different industries shown in the chart (p. 72) each have only some
of the blade tools we�ve just listed, and also a few flake tools. Some
industries even have a few core tools. The particular types of blade
tools appearing in one cave layer or another, and the frequency of
appearance of the different types, tell which industry we have in each
layer.
OTHER KINDS OF TOOLS
By this time in Europe--say from about 40,000 to about 10,000 years
ago--we begin to find other kinds of material too. Bone tools begin
to appear. There are knives, pins, needles with eyes, and little
double-pointed straight bars of bone that were probably fish-hooks. The
fish-line would have been fastened in the center of the bar; when the
fish swallowed the bait, the bar would have caught cross-wise in the
fish�s mouth.
One quite special kind of bone tool is a long flat point for a light
spear. It has a deep notch cut up into the breadth of its base, and is
called a �split-based bone point� (p. 82). We know examples of bone
beads from these times, and of bone handles for flint tools. Pierced
teeth of some animals were worn as beads or pendants, but I am not sure
that elks� teeth were worn this early. There are even spool-shaped
�buttons� or toggles.
[Illustration: SPLIT-BASED BONE POINT]
[Illustration: SPEAR-THROWER]
[Illustration: BONE HARPOON]
Antler came into use for tools, especially in central and western
Europe. We do not know the use of one particular antler tool that
has a large hole bored in one end. One suggestion is that it was
a thong-stropper used to strop or work up hide thongs (see
illustration, below); another suggestion is that it was an arrow-shaft
straightener.
Another interesting tool, usually of antler, is the spear-thrower,
which is little more than a stick with a notch or hook on one end.
The hook fits into the butt end of the spear, and the length of the
spear-thrower allows you to put much more power into the throw (p.
82). It works on pretty much the same principle as the sling.
Very fancy harpoons of antler were also made in the latter half of
the period in western Europe. These harpoons had barbs on one or both
sides and a base which would slip out of the shaft (p. 82). Some have
engraved decoration.
THE BEGINNING OF ART
[Illustration: THONG-STROPPER]
In western Europe, at least, the period saw the beginning of several
kinds of art work. It is handy to break the art down into two great
groups: the movable art, and the cave paintings and sculpture. The
movable art group includes the scratchings, engravings, and modeling
which decorate tools and weapons. Knives, stroppers, spear-throwers,
harpoons, and sometimes just plain fragments of bone or antler are
often carved. There is also a group of large flat pebbles which seem
almost to have served as sketch blocks. The surfaces of these various
objects may show animals, or rather abstract floral designs, or
geometric designs.
[Illustration: �VENUS� FIGURINE FROM WILLENDORF]
Some of the movable art is not done on tools. The most remarkable
examples of this class are little figures of women. These women seem to
be pregnant, and their most female characteristics are much emphasized.
It is thought that these �Venus� or �Mother-goddess� figurines may be
meant to show the great forces of nature--fertility and the birth of
life.
CAVE PAINTINGS
In the paintings on walls and ceilings of caves we have some examples
that compare with the best art of any time. The subjects were usually
animals, the great cold-weather beasts of the end of the Ice Age: the
mammoth, the wooly rhinoceros, the bison, the reindeer, the wild horse,
the bear, the wild boar, and wild cattle. As in the movable art, there
are different styles in the cave art. The really great cave art is
pretty well restricted to southern France and Cantabrian (northwestern)
Spain.
There are several interesting things about the �Franco-Cantabrian� cave
art. It was done deep down in the darkest and most dangerous parts of
the caves, although the men lived only in the openings of caves. If you
think what they must have had for lights--crude lamps of hollowed stone
have been found, which must have burned some kind of oil or grease,
with a matted hair or fiber wick--and of the animals that may have
lurked in the caves, you�ll understand the part about danger. Then,
too, we�re sure the pictures these people painted were not simply to be
looked at and admired, for they painted one picture right over other
pictures which had been done earlier. Clearly, it was the _act_ of
_painting_ that counted. The painter had to go way down into the most
mysterious depths of the earth and create an animal in paint. Possibly
he believed that by doing this he gained some sort of magic power over
the same kind of animal when he hunted it in the open air. It certainly
doesn�t look as if he cared very much about the picture he painted--as
a finished product to be admired--for he or somebody else soon went
down and painted another animal right over the one he had done.
The cave art of the Franco-Cantabrian style is one of the great
artistic achievements of all time. The subjects drawn are almost always
the larger animals of the time: the bison, wild cattle and horses, the
wooly rhinoceros, the mammoth, the wild boar, and the bear. In some of
the best examples, the beasts are drawn in full color and the paintings
are remarkably alive and charged with energy. They come from the hands
of men who knew the great animals well--knew the feel of their fur, the
tremendous drive of their muscles, and the danger one faced when he
hunted them.
Another artistic style has been found in eastern Spain. It includes
lively drawings, often of people hunting with bow and arrow. The East
Spanish art is found on open rock faces and in rock-shelters. It is
less spectacular and apparently more recent than the Franco-Cantabrian
cave art.
LIFE AT THE END OF THE ICE AGE IN EUROPE
Life in these times was probably as good as a hunter could expect it
to be. Game and fish seem to have been plentiful; berries and wild
fruits probably were, too. From France to Russia, great pits or
piles of animal bones have been found. Some of this killing was done
as our Plains Indians killed the buffalo--by stampeding them over
steep river banks or cliffs. There were also good tools for hunting,
however. In western Europe, people lived in the openings of caves and
under overhanging rocks. On the great plains of eastern Europe, very
crude huts were being built, half underground. The first part of this
time must have been cold, for it was the middle and end phases of the
last great glaciation. Northern Europe from Scotland to Scandinavia,
northern Germany and Russia, and also the higher mountains to the
south, were certainly covered with ice. But people had fire, and the
needles and tools that were used for scraping hides must mean that they
wore clothing.
It is clear that men were thinking of a great variety of things beside
the tools that helped them get food and shelter. Such burials as we
find have more grave-gifts than before. Beads and ornaments and often
flint, bone, or antler tools are included in the grave, and sometimes
the body is sprinkled with red ochre. Red is the color of blood, which
means life, and of fire, which means heat. Professor Childe wonders if
the red ochre was a pathetic attempt at magic--to give back to the body
the heat that had gone from it. But pathetic or not, it is sure proof
that these people were already moved by death as men still are moved by
it.
Their art is another example of the direction the human mind was
taking. And when I say human, I mean it in the fullest sense, for this
is the time in which fully modern man has appeared. On page 34, we
spoke of the Cro-Magnon group and of the Combe Capelle-Br�nn group of
Caucasoids and of the Grimaldi �Negroids,� who are no longer believed
to be Negroid. I doubt that any one of these groups produced most of
the achievements of the times. It�s not yet absolutely sure which
particular group produced the great cave art. The artists were almost
certainly a blend of several (no doubt already mixed) groups. The pair
of Grimaldians were buried in a grave with a sprinkling of red ochre,
and were provided with shell beads and ornaments and with some blade
tools of flint. Regardless of the different names once given them by
the human paleontologists, each of these groups seems to have shared
equally in the cultural achievements of the times, for all that the
archeologists can say.
MICROLITHS
One peculiar set of tools seems to serve as a marker for the very last
phase of the Ice Age in southwestern Europe. This tool-making habit is
also found about the shore of the Mediterranean basin, and it moved
into northern Europe as the last glaciation pulled northward. People
began making blade tools of very small size. They learned how to chip
very slender and tiny blades from a prepared core. Then they made these
little blades into tiny triangles, half-moons (�lunates�), trapezoids,
and several other geometric forms. These little tools are called
�microliths.� They are so small that most of them must have been fixed
in handles or shafts.
[Illustration: MICROLITHS
BLADE FRAGMENT
BURIN
LUNATE
TRAPEZOID
SCALENE TRIANGLE
ARROWHEAD]
We have found several examples of microliths mounted in shafts. In
northern Europe, where their use soon spread, the microlithic triangles
or lunates were set in rows down each side of a bone or wood point.
One corner of each little triangle stuck out, and the whole thing
made a fine barbed harpoon. In historic times in Egypt, geometric
trapezoidal microliths were still in use as arrowheads. They were
fastened--broad end out--on the end of an arrow shaft. It seems queer
to give an arrow a point shaped like a �T.� Actually, the little points
were very sharp, and must have pierced the hides of animals very
easily. We also think that the broader cutting edge of the point may
have caused more bleeding than a pointed arrowhead would. In hunting
fleet-footed animals like the gazelle, which might run for miles after
being shot with an arrow, it was an advantage to cause as much bleeding
as possible, for the animal would drop sooner.
We are not really sure where the microliths were first invented. There
is some evidence that they appear early in the Near East. Their use
was very common in northwest Africa but this came later. The microlith
makers who reached south Russia and central Europe possibly moved up
out of the Near East. Or it may have been the other way around; we
simply don�t yet know.
Remember that the microliths we are talking about here were made from
carefully prepared little blades, and are often geometric in outline.
Each microlithic industry proper was made up, in good part, of such
tiny blade tools. But there were also some normal-sized blade tools and
even some flake scrapers, in most microlithic industries. I emphasize
this bladelet and the geometric character of the microlithic industries
of the western Old World, since there has sometimes been confusion in
the matter. Sometimes small flake chips, utilized as minute pointed
tools, have been called �microliths.� They may be _microlithic_ in size
in terms of the general meaning of the word, but they do not seem to
belong to the sub-tradition of the blade tool preparation habits which
we have been discussing here.
LATER BLADE-TOOL INDUSTRIES OF THE NEAR EAST AND AFRICA
The blade-tool industries of normal size we talked about earlier spread
from Europe to central Siberia. We noted that blade tools were made
in western Asia too, and early, although Professor Garrod is no longer
sure that the whole tradition originated in the Near East. If you look
again at my chart (p. 72) you will note that in western Asia I list
some of the names of the western European industries, but with the
qualification �-like� (for example, �Gravettian-like�). The western
Asiatic blade-tool industries do vaguely recall some aspects of those
of western Europe, but we would probably be better off if we used
completely local names for them. The �Emiran� of my chart is such an
example; its industry includes a long spike-like blade point which has
no western European counterpart.
When we last spoke of Africa (p. 66), I told you that stone tools
there were continuing in the Levalloisian flake tradition, and were
becoming smaller. At some time during this process, two new tool
types appeared in northern Africa: one was the Aterian point with
a tang (p. 67), and the other was a sort of �laurel leaf� point,
called the �Sbaikian.� These two tool types were both produced from
flakes. The Sbaikian points, especially, are roughly similar to some
of the Solutrean points of Europe. It has been suggested that both the
Sbaikian and Aterian points may be seen on their way to France through
their appearance in the Spanish cave deposits of Parpallo, but there is
also a rival �pre-Solutrean� in central Europe. We still do not know
whether there was any contact between the makers of these north African
tools and the Solutrean tool-makers. What does seem clear is that the
blade-tool tradition itself arrived late in northern Africa.
NETHER AFRICA
Blade tools and �laurel leaf� points and some other probably late
stone tool types also appear in central and southern Africa. There
are geometric microliths on bladelets and even some coarse pottery in
east Africa. There is as yet no good way of telling just where these
items belong in time; in broad geological terms they are �late.�
Some people have guessed that they are as early as similar European
and Near Eastern examples, but I doubt it. The makers of small-sized
Levalloisian flake tools occupied much of Africa until very late in
time.
THE FAR EAST
India and the Far East still seem to be going their own way. In India,
some blade tools have been found. These are not well dated, save that
we believe they must be post-Pleistocene. In the Far East it looks as
if the old chopper-tool tradition was still continuing. For Burma,
Dr. Movius feels this is fairly certain; for China he feels even more
certain. Actually, we know very little about the Far East at about the
time of the last glaciation. This is a shame, too, as you will soon
agree.
THE NEW WORLD BECOMES INHABITED
At some time toward the end of the last great glaciation--almost
certainly after 20,000 years ago--people began to move over Bering
Strait, from Asia into America. As you know, the American Indians have
been assumed to be basically Mongoloids. New studies of blood group
types make this somewhat uncertain, but there is no doubt that the
ancestors of the American Indians came from Asia.
The stone-tool traditions of Europe, Africa, the Near and Middle East,
and central Siberia, did _not_ move into the New World. With only a
very few special or late exceptions, there are _no_ core-bifaces,
flakes, or blade tools of the Old World. Such things just haven�t been
found here.
This is why I say it�s a shame we don�t know more of the end of the
chopper-tool tradition in the Far East. According to Weidenreich,
the Mongoloids were in the Far East long before the end of the last
glaciation. If the genetics of the blood group types do demand a
non-Mongoloid ancestry for the American Indians, who else may have been
in the Far East 25,000 years ago? We know a little about the habits
for making stone tools which these first people brought with them,
and these habits don�t conform with those of the western Old World.
We�d better keep our eyes open for whatever happened to the end of
the chopper-tool tradition in northern China; already there are hints
that it lasted late there. Also we should watch future excavations
in eastern Siberia. Perhaps we shall find the chopper-tool tradition
spreading up that far.
THE NEW ERA
Perhaps it comes in part from the way I read the evidence and perhaps
in part it is only intuition, but I feel that the materials of this
chapter suggest a new era in the ways of life. Before about 40,000
years ago, people simply �gathered� their food, wandering over large
areas to scavenge or to hunt in a simple sort of way. But here we
have seen them �settling-in� more, perhaps restricting themselves in
their wanderings and adapting themselves to a given locality in more
intensive ways. This intensification might be suggested by the word
�collecting.� The ways of life we described in the earlier chapters
were �food-gathering� ways, but now an era of �food-collecting� has
begun. We shall see further intensifications of it in the next chapter.
End and PRELUDE
[Illustration]
Up to the end of the last glaciation, we prehistorians have a
relatively comfortable time schedule. The farther back we go the less
exact we can be about time and details. Elbow-room of five, ten,
even fifty or more thousands of years becomes available for us to
maneuver in as we work backward in time. But now our story has come
forward to the point where more exact methods of dating are at hand.
The radioactive carbon method reaches back into the span of the last
glaciation. There are other methods, developed by the geologists and
paleobotanists, which supplement and extend the usefulness of the
radioactive carbon dates. And, happily, as our means of being more
exact increases, our story grows more exciting. There are also more
details of culture for us to deal with, which add to the interest.
CHANGES AT THE END OF THE ICE AGE
The last great glaciation of the Ice Age was a two-part affair, with a
sub-phase at the end of the second part. In Europe the last sub-phase
of this glaciation commenced somewhere around 15,000 years ago. Then
the glaciers began to melt back, for the last time. Remember that
Professor Antevs (p. 19) isn�t sure the Ice Age is over yet! This
melting sometimes went by fits and starts, and the weather wasn�t
always changing for the better; but there was at least one time when
European weather was even better than it is now.
The melting back of the glaciers and the weather fluctuations caused
other changes, too. We know a fair amount about these changes in
Europe. In an earlier chapter, we said that the whole Ice Age was a
matter of continual change over long periods of time. As the last
glaciers began to melt back some interesting things happened to mankind.
In Europe, along with the melting of the last glaciers, geography
itself was changing. Britain and Ireland had certainly become islands
by 5000 B.C. The Baltic was sometimes a salt sea, sometimes a large
fresh-water lake. Forests began to grow where the glaciers had been,
and in what had once been the cold tundra areas in front of the
glaciers. The great cold-weather animals--the mammoth and the wooly
rhinoceros--retreated northward and finally died out. It is probable
that the efficient hunting of the earlier people of 20,000 or 25,000
to about 12,000 years ago had helped this process along (see p. 86).
Europeans, especially those of the post-glacial period, had to keep
changing to keep up with the times.
The archeological materials for the time from 10,000 to 6000 B.C. seem
simpler than those of the previous five thousand years. The great cave
art of France and Spain had gone; so had the fine carving in bone and
antler. Smaller, speedier animals were moving into the new forests. New
ways of hunting them, or ways of getting other food, had to be found.
Hence, new tools and weapons were necessary. Some of the people who
moved into northern Germany were successful reindeer hunters. Then the
reindeer moved off to the north, and again new sources of food had to
be found.
THE READJUSTMENTS COMPLETED IN EUROPE
After a few thousand years, things began to look better. Or at least
we can say this: By about 6000 B.C. we again get hotter archeological
materials. The best of these come from the north European area:
Britain, Belgium, Holland, Denmark, north Germany, southern Norway and
Sweden. Much of this north European material comes from bogs and swamps
where it had become water-logged and has kept very well. Thus we have
much more complete _assemblages_[4] than for any time earlier.
[4] �Assemblage� is a useful word when there are different kinds of
archeological materials belonging together, from one area and of
one time. An assemblage is made up of a number of �industries�
(that is, all the tools in chipped stone, all the tools in
bone, all the tools in wood, the traces of houses, etc.) and
everything else that manages to survive, such as the art, the
burials, the bones of the animals used as food, and the traces
of plant foods; in fact, everything that has been left to us
and can be used to help reconstruct the lives of the people to
whom it once belonged. Our own present-day �assemblage� would be
the sum total of all the objects in our mail-order catalogues,
department stores and supply houses of every sort, our churches,
our art galleries and other buildings, together with our roads,
canals, dams, irrigation ditches, and any other traces we might
leave of ourselves, from graves to garbage dumps. Not everything
would last, so that an archeologist digging us up--say 2,000
years from now--would find only the most durable items in our
assemblage.
The best known of these assemblages is the _Maglemosian_, named after a
great Danish peat-swamp where much has been found.
[Illustration: SKETCH OF MAGLEMOSIAN ASSEMBLAGE
CHIPPED STONE
HEMP
GROUND STONE
BONE AND ANTLER
WOOD]
In the Maglemosian assemblage the flint industry was still very
important. Blade tools, tanged arrow points, and burins were still
made, but there were also axes for cutting the trees in the new
forests. Moreover, the tiny microlithic blades, in a variety of
geometric forms, are also found. Thus, a specialized tradition that
possibly began east of the Mediterranean had reached northern Europe.
There was also a ground stone industry; some axes and club-heads were
made by grinding and polishing rather than by chipping. The industries
in bone and antler show a great variety of tools: axes, fish-hooks,
fish spears, handles and hafts for other tools, harpoons, and clubs.
A remarkable industry in wood has been preserved. Paddles, sled
runners, handles for tools, and bark floats for fish-nets have been
found. There are even fish-nets made of plant fibers. Canoes of some
kind were no doubt made. Bone and antler tools were decorated with
simple patterns, and amber was collected. Wooden bows and arrows are
found.
It seems likely that the Maglemosian bog finds are remains of summer
camps, and that in winter the people moved to higher and drier regions.
Childe calls them the �Forest folk�; they probably lived much the
same sort of life as did our pre-agricultural Indians of the north
central states. They hunted small game or deer; they did a great deal
of fishing; they collected what plant food they could find. In fact,
their assemblage shows us again that remarkable ability of men to adapt
themselves to change. They had succeeded in domesticating the dog; he
was still a very wolf-like dog, but his long association with mankind
had now begun. Professor Coon believes that these people were direct
descendants of the men of the glacial age and that they had much the
same appearance. He believes that most of the Ice Age survivors still
extant are living today in the northwestern European area.
SOUTH AND CENTRAL EUROPE PERHAPS AS READJUSTED AS THE NORTH
There is always one trouble with things that come from areas where
preservation is exceptionally good: The very quantity of materials in
such an assemblage tends to make things from other areas look poor
and simple, although they may not have been so originally at all. The
assemblages of the people who lived to the south of the Maglemosian
area may also have been quite large and varied; but, unfortunately,
relatively little of the southern assemblages has lasted. The
water-logged sites of the Maglemosian area preserved a great deal
more. Hence the Maglemosian itself _looks_ quite advanced to us, when
we compare it with the few things that have happened to last in other
areas. If we could go back and wander over the Europe of eight thousand
years ago, we would probably find that the peoples of France, central
Europe, and south central Russia were just as advanced as those of the
north European-Baltic belt.
South of the north European belt the hunting-food-collecting peoples
were living on as best they could during this time. One interesting
group, which seems to have kept to the regions of sandy soil and scrub
forest, made great quantities of geometric microliths. These are the
materials called _Tardenoisian_. The materials of the �Forest folk� of
France and central Europe generally are called _Azilian_; Dr. Movius
believes the term might best be restricted to the area south of the
Loire River.
HOW MUCH REAL CHANGE WAS THERE?
You can see that no really _basic_ change in the way of life has yet
been described. Childe sees the problem that faced the Europeans of
10,000 to 3000 B.C. as a problem in readaptation to the post-glacial
forest environment. By 6000 B.C. some quite successful solutions of
the problem--like the Maglemosian--had been made. The upsets that came
with the melting of the last ice gradually brought about all sorts of
changes in the tools and food-getting habits, but the people themselves
were still just as much simple hunters, fishers, and food-collectors as
they had been in 25,000 B.C. It could be said that they changed just
enough so that they would not have to change. But there is a bit more
to it than this.
Professor Mathiassen of Copenhagen, who knows the archeological remains
of this time very well, poses a question. He speaks of the material
as being neither rich nor progressive, in fact �rather stagnant,� but
he goes on to add that the people had a certain �receptiveness� and
were able to adapt themselves quickly when the next change did come.
My own understanding of the situation is that the �Forest folk� made
nothing as spectacular as had the producers of the earlier Magdalenian
assemblage and the Franco-Cantabrian art. On the other hand, they
_seem_ to have been making many more different kinds of tools for many
more different kinds of tasks than had their Ice Age forerunners. I
emphasize �seem� because the preservation in the Maglemosian bogs
is very complete; certainly we cannot list anywhere near as many
different things for earlier times as we did for the Maglemosians
(p. 94). I believe this experimentation with all kinds of new tools
and gadgets, this intensification of adaptiveness (p. 91), this
�receptiveness,� even if it is still only pointed toward hunting,
fishing, and food-collecting, is an important thing.
Remember that the only marker we have handy for the _beginning_ of
this tendency toward �receptiveness� and experimentation is the
little microlithic blade tools of various geometric forms. These, we
saw, began before the last ice had melted away, and they lasted on
in use for a very long time. I wish there were a better marker than
the microliths but I do not know of one. Remember, too, that as yet
we can only use the microliths as a marker in Europe and about the
Mediterranean.
CHANGES IN OTHER AREAS?
All this last section was about Europe. How about the rest of the world
when the last glaciers were melting away?
We simply don�t know much about this particular time in other parts
of the world except in Europe, the Mediterranean basin and the Middle
East. People were certainly continuing to move into the New World by
way of Siberia and the Bering Strait about this time. But for the
greater part of Africa and Asia, we do not know exactly what was
happening. Some day, we shall no doubt find out; today we are without
clear information.
REAL CHANGE AND PRELUDE IN THE NEAR EAST
The appearance of the microliths and the developments made by the
�Forest folk� of northwestern Europe also mark an end. They show us
the terminal phase of the old food-collecting way of life. It grows
increasingly clear that at about the same time that the Maglemosian and
other �Forest folk� were adapting themselves to hunting, fishing, and
collecting in new ways to fit the post-glacial environment, something
completely new was being made ready in western Asia.
Unfortunately, we do not have as much understanding of the climate and
environment of the late Ice Age in western Asia as we have for most
of Europe. Probably the weather was never so violent or life quite
so rugged as it was in northern Europe. We know that the microliths
made their appearance in western Asia at least by 10,000 B.C. and
possibly earlier, marking the beginning of the terminal phase of
food-collecting. Then, gradually, we begin to see the build-up towards
the first _basic change_ in human life.
This change amounted to a revolution just as important as the
Industrial Revolution. In it, men first learned to domesticate
plants and animals. They began _producing_ their food instead of
simply gathering or collecting it. When their food-production
became reasonably effective, people could and did settle down in
village-farming communities. With the appearance of the little farming
villages, a new way of life was actually under way. Professor Childe
has good reason to speak of the �food-producing revolution,� for it was
indeed a revolution.
QUESTIONS ABOUT CAUSE
We do not yet know _how_ and _why_ this great revolution took place. We
are only just beginning to put the questions properly. I suspect the
answers will concern some delicate and subtle interplay between man and
nature. Clearly, both the level of culture and the natural condition of
the environment must have been ready for the great change, before the
change itself could come about.
It is going to take years of co-operative field work by both
archeologists and the natural scientists who are most helpful to them
before the _how_ and _why_ answers begin to appear. Anthropologically
trained archeologists are fascinated with the cultures of men in times
of great change. About ten or twelve thousand years ago, the general
level of culture in many parts of the world seems to have been ready
for change. In northwestern Europe, we saw that cultures �changed
just enough so that they would not have to change.� We linked this to
environmental changes with the coming of post-glacial times.
In western Asia, we archeologists can prove that the food-producing
revolution actually took place. We can see _the_ important consequence
of effective domestication of plants and animals in the appearance of
the settled village-farming community. And within the village-farming
community was the seed of civilization. The way in which effective
domestication of plants and animals came about, however, must also be
linked closely with the natural environment. Thus the archeologists
will not solve the _how_ and _why_ questions alone--they will need the
help of interested natural scientists in the field itself.
PRECONDITIONS FOR THE REVOLUTION
Especially at this point in our story, we must remember how culture and
environment go hand in hand. Neither plants nor animals domesticate
themselves; men domesticate them. Furthermore, men usually domesticate
only those plants and animals which are useful. There is a good
question here: What is cultural usefulness? But I shall side-step it to
save time. Men cannot domesticate plants and animals that do not exist
in the environment where the men live. Also, there are certainly some
animals and probably some plants that resist domestication, although
they might be useful.
This brings me back again to the point that _both_ the level of culture
and the natural condition of the environment--with the proper plants
and animals in it--must have been ready before domestication could
have happened. But this is precondition, not cause. Why did effective
food-production happen first in the Near East? Why did it happen
independently in the New World slightly later? Why also in the Far
East? Why did it happen at all? Why are all human beings not still
living as the Maglemosians did? These are the questions we still have
to face.
CULTURAL �RECEPTIVENESS� AND PROMISING ENVIRONMENTS
Until the archeologists and the natural scientists--botanists,
geologists, zoologists, and general ecologists--have spent many more
years on the problem, we shall not have full _how_ and _why_ answers. I
do think, however, that we are beginning to understand what to look for.
We shall have to learn much more of what makes the cultures of men
�receptive� and experimental. Did change in the environment alone
force it? Was it simply a case of Professor Toynbee�s �challenge and
response?� I cannot believe the answer is quite that simple. Were it
so simple, we should want to know why the change hadn�t come earlier,
along with earlier environmental changes. We shall not know the answer,
however, until we have excavated the traces of many more cultures of
the time in question. We shall doubtless also have to learn more about,
and think imaginatively about, the simpler cultures still left today.
The �mechanics� of culture in general will be bound to interest us.
It will also be necessary to learn much more of the environments of
10,000 to 12,000 years ago. In which regions of the world were the
natural conditions most promising? Did this promise include plants and
animals which could be domesticated, or did it only offer new ways of
food-collecting? There is much work to do on this problem, but we are
beginning to get some general hints.
Before I begin to detail the hints we now have from western Asia, I
want to do two things. First, I shall tell you of an old theory as to
how food-production might have appeared. Second, I will bother you with
some definitions which should help us in our thinking as the story goes
on.
AN OLD THEORY AS TO THE CAUSE OF THE REVOLUTION
The idea that change would result, if the balance between nature
and culture became upset, is of course not a new one. For at least
twenty-five years, there has been a general theory as to _how_ the
food-producing revolution happened. This theory depends directly on the
idea of natural change in the environment.
The five thousand years following about 10,000 B.C. must have been
very difficult ones, the theory begins. These were the years when
the most marked melting of the last glaciers was going on. While the
glaciers were in place, the climate to the south of them must have been
different from the climate in those areas today. You have no doubt read
that people once lived in regions now covered by the Sahara Desert.
This is true; just when is not entirely clear. The theory is that
during the time of the glaciers, there was a broad belt of rain winds
south of the glaciers. These rain winds would have kept north Africa,
the Nile Valley, and the Middle East green and fertile. But when the
glaciers melted back to the north, the belt of rain winds is supposed
to have moved north too. Then the people living south and east of the
Mediterranean would have found that their water supply was drying up,
that the animals they hunted were dying or moving away, and that the
plant foods they collected were dried up and scarce.
According to the theory, all this would have been true except in the
valleys of rivers and in oases in the growing deserts. Here, in the
only places where water was left, the men and animals and plants would
have clustered. They would have been forced to live close to one
another, in order to live at all. Presently the men would have seen
that some animals were more useful or made better food than others,
and so they would have begun to protect these animals from their
natural enemies. The men would also have been forced to try new plant
foods--foods which possibly had to be prepared before they could be
eaten. Thus, with trials and errors, but by being forced to live close
to plants and animals, men would have learned to domesticate them.
THE OLD THEORY TOO SIMPLE FOR THE FACTS
This theory was set up before we really knew anything in detail about
the later prehistory of the Near and Middle East. We now know that
the facts which have been found don�t fit the old theory at all well.
Also, I have yet to find an American meteorologist who feels that we
know enough about the changes in the weather pattern to say that it can
have been so simple and direct. And, of course, the glacial ice which
began melting after 12,000 years ago was merely the last sub-phase of
the last great glaciation. There had also been three earlier periods
of great alpine glaciers, and long periods of warm weather in between.
If the rain belt moved north as the glaciers melted for the last time,
it must have moved in the same direction in earlier times. Thus, the
forced neighborliness of men, plants, and animals in river valleys and
oases must also have happened earlier. Why didn�t domestication happen
earlier, then?
Furthermore, it does not seem to be in the oases and river valleys
that we have our first or only traces of either food-production
or the earliest farming villages. These traces are also in the
hill-flanks of the mountains of western Asia. Our earliest sites of the
village-farmers do not seem to indicate a greatly different climate
from that which the same region now shows. In fact, everything we now
know suggests that the old theory was just too simple an explanation to
have been the true one. The only reason I mention it--beyond correcting
the ideas you may get in the general texts--is that it illustrates the
kind of thinking we shall have to do, even if it is doubtless wrong in
detail.
We archeologists shall have to depend much more than we ever have on
the natural scientists who can really help us. I can tell you this from
experience. I had the great good fortune to have on my expedition staff
in Iraq in 1954-55, a geologist, a botanist, and a zoologist. Their
studies added whole new bands of color to my spectrum of thinking about
_how_ and _why_ the revolution took place and how the village-farming
community began. But it was only a beginning; as I said earlier, we are
just now learning to ask the proper questions.
ABOUT STAGES AND ERAS
Now come some definitions, so I may describe my material more easily.
Archeologists have always loved to make divisions and subdivisions
within the long range of materials which they have found. They often
disagree violently about which particular assemblage of material
goes into which subdivision, about what the subdivisions should be
named, about what the subdivisions really mean culturally. Some
archeologists, probably through habit, favor an old scheme of Grecized
names for the subdivisions: paleolithic, mesolithic, neolithic. I
refuse to use these words myself. They have meant too many different
things to too many different people and have tended to hide some pretty
fuzzy thinking. Probably you haven�t even noticed my own scheme of
subdivision up to now, but I�d better tell you in general what it is.
I think of the earliest great group of archeological materials, from
which we can deduce only a food-gathering way of culture, as the
_food-gathering stage_. I say �stage� rather than �age,� because it
is not quite over yet; there are still a few primitive people in
out-of-the-way parts of the world who remain in the _food-gathering
stage_. In fact, Professor Julian Steward would probably prefer to call
it a food-gathering _level_ of existence, rather than a stage. This
would be perfectly acceptable to me. I also tend to find myself using
_collecting_, rather than _gathering_, for the more recent aspects or
era of the stage, as the word �collecting� appears to have more sense
of purposefulness and specialization than does �gathering� (see p.
91).
Now, while I think we could make several possible subdivisions of the
food-gathering stage--I call my subdivisions of stages _eras_[5]--I
believe the only one which means much to us here is the last or
_terminal sub-era of food-collecting_ of the whole food-gathering
stage. The microliths seem to mark its approach in the northwestern
part of the Old World. It is really shown best in the Old World by
the materials of the �Forest folk,� the cultural adaptation to the
post-glacial environment in northwestern Europe. We talked about
the �Forest folk� at the beginning of this chapter, and I used the
Maglemosian assemblage of Denmark as an example.
[5] It is difficult to find words which have a sequence or gradation
of meaning with respect to both development and a range of time
in the past, or with a range of time from somewhere in the past
which is perhaps not yet ended. One standard Webster definition
of _stage_ is: �One of the steps into which the material
development of man ... is divided.� I cannot find any dictionary
definition that suggests which of the words, _stage_ or _era_,
has the meaning of a longer span of time. Therefore, I have
chosen to let my eras be shorter, and to subdivide my stages
into eras. Webster gives _era_ as: �A signal stage of history,
an epoch.� When I want to subdivide my eras, I find myself using
_sub-eras_. Thus I speak of the _eras_ within a _stage_ and of
the _sub-eras_ within an _era_; that is, I do so when I feel
that I really have to, and when the evidence is clear enough to
allow it.
The food-producing revolution ushers in the _food-producing stage_.
This stage began to be replaced by the _industrial stage_ only about
two hundred years ago. Now notice that my stage divisions are in terms
of technology and economics. We must think sharply to be sure that the
subdivisions of the stages, the eras, are in the same terms. This does
not mean that I think technology and economics are the only important
realms of culture. It is rather that for most of prehistoric time the
materials left to the archeologists tend to limit our deductions to
technology and economics.
I�m so soon out of my competence, as conventional ancient history
begins, that I shall only suggest the earlier eras of the
food-producing stage to you. This book is about prehistory, and I�m not
a universal historian.
THE TWO EARLIEST ERAS OF THE FOOD-PRODUCING STAGE
The food-producing stage seems to appear in western Asia with really
revolutionary suddenness. It is seen by the relative speed with which
the traces of new crafts appear in the earliest village-farming
community sites we�ve dug. It is seen by the spread and multiplication
of these sites themselves, and the remarkable growth in human
population we deduce from this increase in sites. We�ll look at some
of these sites and the archeological traces they yield in the next
chapter. When such village sites begin to appear, I believe we are in
the _era of the primary village-farming community_. I also believe this
is the second era of the food-producing stage.
The first era of the food-producing stage, I believe, was an _era of
incipient cultivation and animal domestication_. I keep saying �I
believe� because the actual evidence for this earlier era is so slight
that one has to set it up mainly by playing a hunch for it. The reason
for playing the hunch goes about as follows.
One thing we seem to be able to see, in the food-collecting era in
general, is a tendency for people to begin to settle down. This
settling down seemed to become further intensified in the terminal
era. How this is connected with Professor Mathiassen�s �receptiveness�
and the tendency to be experimental, we do not exactly know. The
evidence from the New World comes into play here as well as that from
the Old World. With this settling down in one place, the people of the
terminal era--especially the �Forest folk� whom we know best--began
making a great variety of new things. I remarked about this earlier in
the chapter. Dr. Robert M. Adams is of the opinion that this atmosphere
of experimentation with new tools--with new ways of collecting food--is
the kind of atmosphere in which one might expect trials at planting
and at animal domestication to have been made. We first begin to find
traces of more permanent life in outdoor camp sites, although caves
were still inhabited at the beginning of the terminal era. It is not
surprising at all that the �Forest folk� had already domesticated the
dog. In this sense, the whole era of food-collecting was becoming ready
and almost �incipient� for cultivation and animal domestication.
Northwestern Europe was not the place for really effective beginnings
in agriculture and animal domestication. These would have had to take
place in one of those natural environments of promise, where a variety
of plants and animals, each possible of domestication, was available in
the wild state. Let me spell this out. Really effective food-production
must include a variety of items to make up a reasonably well-rounded
diet. The food-supply so produced must be trustworthy, even though
the food-producing peoples themselves might be happy to supplement
it with fish and wild strawberries, just as we do when such things
are available. So, as we said earlier, part of our problem is that
of finding a region with a natural environment which includes--and
did include, some ten thousand years ago--a variety of possibly
domesticable wild plants and animals.
NUCLEAR AREAS
Now comes the last of my definitions. A region with a natural
environment which included a variety of wild plants and animals,
both possible and ready for domestication, would be a central
or core or _nuclear area_, that is, it would be when and _if_
food-production took place within it. It is pretty hard for me to
imagine food-production having ever made an independent start outside
such a nuclear area, although there may be some possible nuclear areas
in which food-production never took place (possibly in parts of Africa,
for example).
We know of several such nuclear areas. In the New World, Middle America
and the Andean highlands make up one or two; it is my understanding
that the evidence is not yet clear as to which. There seems to have
been a nuclear area somewhere in southeastern Asia, in the Malay
peninsula or Burma perhaps, connected with the early cultivation of
taro, breadfruit, the banana and the mango. Possibly the cultivation
of rice and the domestication of the chicken and of zebu cattle and
the water buffalo belong to this southeast Asiatic nuclear area. We
know relatively little about it archeologically, as yet. The nuclear
area which was the scene of the earliest experiment in effective
food-production was in western Asia. Since I know it best, I shall use
it as my example.
THE NUCLEAR NEAR EAST
The nuclear area of western Asia is naturally the one of greatest
interest to people of the western cultural tradition. Our cultural
heritage began within it. The area itself is the region of the hilly
flanks of rain-watered grass-land which build up to the high mountain
ridges of Iran, Iraq, Turkey, Syria, and Palestine. The map on page
125 indicates the region. If you have a good atlas, try to locate the
zone which surrounds the drainage basin of the Tigris and Euphrates
Rivers at elevations of from approximately 2,000 to 5,000 feet. The
lower alluvial land of the Tigris-Euphrates basin itself has very
little rainfall. Some years ago Professor James Henry Breasted called
the alluvial lands of the Tigris-Euphrates a part of the �fertile
crescent.� These alluvial lands are very fertile if irrigated. Breasted
was most interested in the oriental civilizations of conventional
ancient history, and irrigation had been discovered before they
appeared.
The country of hilly flanks above Breasted�s crescent receives from
10 to 20 or more inches of winter rainfall each year, which is about
what Kansas has. Above the hilly-flanks zone tower the peaks and ridges
of the Lebanon-Amanus chain bordering the coast-line from Palestine
to Turkey, the Taurus Mountains of southern Turkey, and the Zagros
range of the Iraq-Iran borderland. This rugged mountain frame for our
hilly-flanks zone rises to some magnificent alpine scenery, with peaks
of from ten to fifteen thousand feet in elevation. There are several
gaps in the Mediterranean coastal portion of the frame, through which
the winter�s rain-bearing winds from the sea may break so as to carry
rain to the foothills of the Taurus and the Zagros.
The picture I hope you will have from this description is that of an
intermediate hilly-flanks zone lying between two regions of extremes.
The lower Tigris-Euphrates basin land is low and far too dry and hot
for agriculture based on rainfall alone; to the south and southwest, it
merges directly into the great desert of Arabia. The mountains which
lie above the hilly-flanks zone are much too high and rugged to have
encouraged farmers.
THE NATURAL ENVIRONMENT OF THE NUCLEAR NEAR EAST
The more we learn of this hilly-flanks zone that I describe, the
more it seems surely to have been a nuclear area. This is where we
archeologists need, and are beginning to get, the help of natural
scientists. They are coming to the conclusion that the natural
environment of the hilly-flanks zone today is much as it was some eight
to ten thousand years ago. There are still two kinds of wild wheat and
a wild barley, and the wild sheep, goat, and pig. We have discovered
traces of each of these at about nine thousand years ago, also traces
of wild ox, horse, and dog, each of which appears to be the probable
ancestor of the domesticated form. In fact, at about nine thousand
years ago, the two wheats, the barley, and at least the goat, were
already well on the road to domestication.
The wild wheats give us an interesting clue. They are only available
together with the wild barley within the hilly-flanks zone. While the
wild barley grows in a variety of elevations and beyond the zone,
at least one of the wild wheats does not seem to grow below the hill
country. As things look at the moment, the domestication of both the
wheats together could _only_ have taken place within the hilly-flanks
zone. Barley seems to have first come into cultivation due to its
presence as a weed in already cultivated wheat fields. There is also
a suggestion--there is still much more to learn in the matter--that
the animals which were first domesticated were most at home up in the
hilly-flanks zone in their wild state.
With a single exception--that of the dog--the earliest positive
evidence of domestication includes the two forms of wheat, the barley,
and the goat. The evidence comes from within the hilly-flanks zone.
However, it comes from a settled village proper, Jarmo (which I�ll
describe in the next chapter), and is thus from the era of the primary
village-farming community. We are still without positive evidence of
domesticated grain and animals in the first era of the food-producing
stage, that of incipient cultivation and animal domestication.
THE ERA OF INCIPIENT CULTIVATION AND ANIMAL DOMESTICATION
I said above (p. 105) that my era of incipient cultivation and animal
domestication is mainly set up by playing a hunch. Although we cannot
really demonstrate it--and certainly not in the Near East--it would
be very strange for food-collectors not to have known a great deal
about the plants and animals most useful to them. They do seem to have
domesticated the dog. We can easily imagine them remembering to go
back, season after season, to a particular patch of ground where seeds
or acorns or berries grew particularly well. Most human beings, unless
they are extremely hungry, are attracted to baby animals, and many wild
pups or fawns or piglets must have been brought back alive by hunting
parties.
In this last sense, man has probably always been an incipient
cultivator and domesticator. But I believe that Adams is right in
suggesting that this would be doubly true with the experimenters of
the terminal era of food-collecting. We noticed that they also seem
to have had a tendency to settle down. Now my hunch goes that _when_
this experimentation and settling down took place within a potential
nuclear area--where a whole constellation of plants and animals
possible of domestication was available--the change was easily made.
Professor Charles A. Reed, our field colleague in zoology, agrees that
year-round settlement with plant domestication probably came before
there were important animal domestications.
INCIPIENT ERAS AND NUCLEAR AREAS
I have put this scheme into a simple chart (p. 111) with the names
of a few of the sites we are going to talk about. You will see that my
hunch means that there are eras of incipient cultivation _only_ within
nuclear areas. In a nuclear area, the terminal era of food-collecting
would probably have been quite short. I do not know for how long a time
the era of incipient cultivation and domestication would have lasted,
but perhaps for several thousand years. Then it passed on into the era
of the primary village-farming community.
Outside a nuclear area, the terminal era of food-collecting would last
for a long time; in a few out-of-the-way parts of the world, it still
hangs on. It would end in any particular place through contact with
and the spread of ideas of people who had passed on into one of the
more developed eras. In many cases, the terminal era of food-collecting
was ended by the incoming of the food-producing peoples themselves.
For example, the practices of food-production were carried into Europe
by the actual movement of some numbers of peoples (we don�t know how
many) who had reached at least the level of the primary village-farming
community. The �Forest folk� learned food-production from them. There
was never an era of incipient cultivation and domestication proper in
Europe, if my hunch is right.
ARCHEOLOGICAL DIFFICULTIES IN SEEING THE INCIPIENT ERA
The way I see it, two things were required in order that an era of
incipient cultivation and domestication could begin. First, there had
to be the natural environment of a nuclear area, with its whole group
of plants and animals capable of domestication. This is the aspect of
the matter which we�ve said is directly given by nature. But it is
quite possible that such an environment with such a group of plants
and animals in it may have existed well before ten thousand years ago
in the Near East. It is also quite possible that the same promising
condition may have existed in regions which never developed into
nuclear areas proper. Here, again, we come back to the cultural factor.
I think it was that �atmosphere of experimentation� we�ve talked about
once or twice before. I can�t define it for you, other than to say that
by the end of the Ice Age, the general level of many cultures was ready
for change. Ask me how and why this was so, and I�ll tell you we don�t
know yet, and that if we did understand this kind of question, there
would be no need for me to go on being a prehistorian!
[Illustration: POSSIBLE RELATIONSHIPS OF STAGES AND ERAS IN WESTERN
ASIA AND NORTHEASTERN AFRICA]
Now since this was an era of incipience, of the birth of new ideas,
and of experimentation, it is very difficult to see its traces
archeologically. New tools having to do with the new ways of getting
and, in fact, producing food would have taken some time to develop.
It need not surprise us too much if we cannot find hoes for planting
and sickles for reaping grain at the very beginning. We might expect
a time of making-do with some of the older tools, or with make-shift
tools, for some of the new jobs. The present-day wild cousin of the
domesticated sheep still lives in the mountains of western Asia. It has
no wool, only a fine down under hair like that of a deer, so it need
not surprise us to find neither the whorls used for spinning nor traces
of woolen cloth. It must have taken some time for a wool-bearing sheep
to develop and also time for the invention of the new tools which go
with weaving. It would have been the same with other kinds of tools for
the new way of life.
It is difficult even for an experienced comparative zoologist to tell
which are the bones of domesticated animals and which are those of
their wild cousins. This is especially so because the animal bones the
archeologists find are usually fragmentary. Furthermore, we do not have
a sort of library collection of the skeletons of the animals or an
herbarium of the plants of those times, against which the traces which
the archeologists find may be checked. We are only beginning to get
such collections for the modern wild forms of animals and plants from
some of our nuclear areas. In the nuclear area in the Near East, some
of the wild animals, at least, have already become extinct. There are
no longer wild cattle or wild horses in western Asia. We know they were
there from the finds we�ve made in caves of late Ice Age times, and
from some slightly later sites.
SITES WITH ANTIQUITIES OF THE INCIPIENT ERA
So far, we know only a very few sites which would suit my notion of the
incipient era of cultivation and animal domestication. I am closing
this chapter with descriptions of two of the best Near Eastern examples
I know of. You may not be satisfied that what I am able to describe
makes a full-bodied era of development at all. Remember, however, that
I�ve told you I�m largely playing a kind of a hunch, and also that the
archeological materials of this era will always be extremely difficult
to interpret. At the beginning of any new way of life, there will be a
great tendency for people to make-do, at first, with tools and habits
they are already used to. I would suspect that a great deal of this
making-do went on almost to the end of this era.
THE NATUFIAN, AN ASSEMBLAGE OF THE INCIPIENT ERA
The assemblage called the Natufian comes from the upper layers of a
number of caves in Palestine. Traces of its flint industry have also
turned up in Syria and Lebanon. We don�t know just how old it is. I
guess that it probably falls within five hundred years either way of
about 5000 B.C.
Until recently, the people who produced the Natufian assemblage were
thought to have been only cave dwellers, but now at least three open
air Natufian sites have been briefly described. In their best-known
dwelling place, on Mount Carmel, the Natufian folk lived in the open
mouth of a large rock-shelter and on the terrace in front of it. On the
terrace, they had set at least two short curving lines of stones; but
these were hardly architecture; they seem more like benches or perhaps
the low walls of open pens. There were also one or two small clusters
of stones laid like paving, and a ring of stones around a hearth or
fireplace. One very round and regular basin-shaped depression had been
cut into the rocky floor of the terrace, and there were other less
regular basin-like depressions. In the newly reported open air sites,
there seem to have been huts with rounded corners.
Most of the finds in the Natufian layer of the Mount Carmel cave were
flints. About 80 per cent of these flint tools were microliths made
by the regular working of tiny blades into various tools, some having
geometric forms. The larger flint tools included backed blades, burins,
scrapers, a few arrow points, some larger hacking or picking tools, and
one special type. This last was the sickle blade.
We know a sickle blade of flint when we see one, because of a strange
polish or sheen which seems to develop on the cutting edge when the
blade has been used to cut grasses or grain, or--perhaps--reeds. In
the Natufian, we have even found the straight bone handles in which a
number of flint sickle blades were set in a line.
There was a small industry in ground or pecked stone (that is, abraded
not chipped) in the Natufian. This included some pestle and mortar
fragments. The mortars are said to have a deep and narrow hole,
and some of the pestles show traces of red ochre. We are not sure
that these mortars and pestles were also used for grinding food. In
addition, there were one or two bits of carving in stone.
NATUFIAN ANTIQUITIES IN OTHER MATERIALS; BURIALS AND PEOPLE
The Natufian industry in bone was quite rich. It included, beside the
sickle hafts mentioned above, points and harpoons, straight and curved
types of fish-hooks, awls, pins and needles, and a variety of beads and
pendants. There were also beads and pendants of pierced teeth and shell.
A number of Natufian burials have been found in the caves; some burials
were grouped together in one grave. The people who were buried within
the Mount Carmel cave were laid on their backs in an extended position,
while those on the terrace seem to have been �flexed� (placed in their
graves in a curled-up position). This may mean no more than that it was
easier to dig a long hole in cave dirt than in the hard-packed dirt of
the terrace. The people often had some kind of object buried with them,
and several of the best collections of beads come from the burials. On
two of the skulls there were traces of elaborate head-dresses of shell
beads.
[Illustration: SKETCH OF NATUFIAN ASSEMBLAGE
MICROLITHS
ARCHITECTURE?
BURIAL
CHIPPED STONE
GROUND STONE
BONE]
The animal bones of the Natufian layers show beasts of a �modern� type,
but with some differences from those of present-day Palestine. The
bones of the gazelle far outnumber those of the deer; since gazelles
like a much drier climate than deer, Palestine must then have had much
the same climate that it has today. Some of the animal bones were those
of large or dangerous beasts: the hyena, the bear, the wild boar,
and the leopard. But the Natufian people may have had the help of a
large domesticated dog. If our guess at a date for the Natufian is
right (about 7750 B.C.), this is an earlier dog than was that in the
Maglemosian of northern Europe. More recently, it has been reported
that a domesticated goat is also part of the Natufian finds.
The study of the human bones from the Natufian burials is not yet
complete. Until Professor McCown�s study becomes available, we may note
Professor Coon�s assessment that these people were of a �basically
Mediterranean type.�
THE KARIM SHAHIR ASSEMBLAGE
Karim Shahir differs from the Natufian sites in that it shows traces
of a temporary open site or encampment. It lies on the top of a bluff
in the Kurdish hill-country of northeastern Iraq. It was dug by Dr.
Bruce Howe of the expedition I directed in 1950-51 for the Oriental
Institute and the American Schools of Oriental Research. In 1954-55,
our expedition located another site, M�lefaat, with general resemblance
to Karim Shahir, but about a hundred miles north of it. In 1956, Dr.
Ralph Solecki located still another Karim Shahir type of site called
Zawi Chemi Shanidar. The Zawi Chemi site has a radiocarbon date of 8900
� 300 B.C.
Karim Shahir has evidence of only one very shallow level of occupation.
It was probably not lived on very long, although the people who lived
on it spread out over about three acres of area. In spots, the single
layer yielded great numbers of fist-sized cracked pieces of limestone,
which had been carried up from the bed of a stream at the bottom of the
bluff. We think these cracked stones had something to do with a kind of
architecture, but we were unable to find positive traces of hut plans.
At M�lefaat and Zawi Chemi, there were traces of rounded hut plans.
As in the Natufian, the great bulk of small objects of the Karim Shahir
assemblage was in chipped flint. A large proportion of the flint tools
were microlithic bladelets and geometric forms. The flint sickle blade
was almost non-existent, being far scarcer than in the Natufian. The
people of Karim Shahir did a modest amount of work in the grinding of
stone; there were milling stone fragments of both the mortar and the
quern type, and stone hoes or axes with polished bits. Beads, pendants,
rings, and bracelets were made of finer quality stone. We found a few
simple points and needles of bone, and even two rather formless unbaked
clay figurines which seemed to be of animal form.
[Illustration: SKETCH OF KARIM SHAHIR ASSEMBLAGE
CHIPPED STONE
GROUND STONE
UNBAKED CLAY
SHELL
BONE
�ARCHITECTURE�]
Karim Shahir did not yield direct evidence of the kind of vegetable
food its people ate. The animal bones showed a considerable
increase in the proportion of the bones of the species capable of
domestication--sheep, goat, cattle, horse, dog--as compared with animal
bones from the earlier cave sites of the area, which have a high
proportion of bones of wild forms like deer and gazelle. But we do not
know that any of the Karim Shahir animals were actually domesticated.
Some of them may have been, in an �incipient� way, but we have no means
at the moment that will tell us from the bones alone.
WERE THE NATUFIAN AND KARIM SHAHIR PEOPLES FOOD-PRODUCERS?
It is clear that a great part of the food of the Natufian people
must have been hunted or collected. Shells of land, fresh-water, and
sea animals occur in their cave layers. The same is true as regards
Karim Shahir, save for sea shells. But on the other hand, we have
the sickles, the milling stones, the possible Natufian dog, and the
goat, and the general animal situation at Karim Shahir to hint at an
incipient approach to food-production. At Karim Shahir, there was the
tendency to settle down out in the open; this is echoed by the new
reports of open air Natufian sites. The large number of cracked stones
certainly indicates that it was worth the peoples� while to have some
kind of structure, even if the site as a whole was short-lived.
It is a part of my hunch that these things all point toward
food-production--that the hints we seek are there. But in the sense
that the peoples of the era of the primary village-farming community,
which we shall look at next, are fully food-producing, the Natufian
and Karim Shahir folk had not yet arrived. I think they were part of
a general build-up to full scale food-production. They were possibly
controlling a few animals of several kinds and perhaps one or two
plants, without realizing the full possibilities of this �control� as a
new way of life.
This is why I think of the Karim Shahir and Natufian folk as being at
a level, or in an era, of incipient cultivation and domestication. But
we shall have to do a great deal more excavation in this range of time
before we�ll get the kind of positive information we need.
SUMMARY
I am sorry that this chapter has had to be so much more about ideas
than about the archeological traces of prehistoric men themselves.
But the antiquities of the incipient era of cultivation and animal
domestication will not be spectacular, even when we do have them
excavated in quantity. Few museums will be interested in these
antiquities for exhibition purposes. The charred bits or impressions
of plants, the fragments of animal bone and shell, and the varied
clues to climate and environment will be as important as the artifacts
themselves. It will be the ideas to which these traces lead us that
will be important. I am sure that this unspectacular material--when we
have much more of it, and learn how to understand what it says--will
lead us to how and why answers about the first great change in human
history.
We know the earliest village-farming communities appeared in western
Asia, in a nuclear area. We do not yet know why the Near Eastern
experiment came first, or why it didn�t happen earlier in some other
nuclear area. Apparently, the level of culture and the promise of the
natural environment were ready first in western Asia. The next sites
we look at will show a simple but effective food-production already
in existence. Without effective food-production and the settled
village-farming communities, civilization never could have followed.
How effective food-production came into being by the end of the
incipient era, is, I believe, one of the most fascinating questions any
archeologist could face.
It now seems probable--from possibly two of the Palestinian sites with
varieties of the Natufian (Jericho and Nahal Oren)--that there were
one or more local Palestinian developments out of the Natufian into
later times. In the same way, what followed after the Karim Shahir type
of assemblage in northeastern Iraq was in some ways a reflection of
beginnings made at Karim Shahir and Zawi Chemi.
THE First Revolution
[Illustration]
As the incipient era of cultivation and animal domestication passed
onward into the era of the primary village-farming community, the first
basic change in human economy was fully achieved. In southwestern Asia,
this seems to have taken place about nine thousand years ago. I am
going to restrict my description to this earliest Near Eastern case--I
do not know enough about the later comparable experiments in the Far
East and in the New World. Let us first, once again, think of the
contrast between food-collecting and food-producing as ways of life.
THE DIFFERENCE BETWEEN FOOD-COLLECTORS AND FOOD-PRODUCERS
Childe used the word �revolution� because of the radical change that
took place in the habits and customs of man. Food-collectors--that is,
hunters, fishers, berry- and nut-gatherers--had to live in small groups
or bands, for they had to be ready to move wherever their food supply
moved. Not many people can be fed in this way in one area, and small
children and old folks are a burden. There is not enough food to store,
and it is not the kind that can be stored for long.
Do you see how this all fits into a picture? Small groups of people
living now in this cave, now in that--or out in the open--as they moved
after the animals they hunted; no permanent villages, a few half-buried
huts at best; no breakable utensils; no pottery; no signs of anything
for clothing beyond the tools that were probably used to dress the
skins of animals; no time to think of much of anything but food and
protection and disposal of the dead when death did come: an existence
which takes nature as it finds it, which does little or nothing to
modify nature--all in all, a savage�s existence, and a very tough one.
A man who spends his whole life following animals just to kill them to
eat, or moving from one berry patch to another, is really living just
like an animal himself.
THE FOOD-PRODUCING ECONOMY
Against this picture let me try to draw another--that of man�s life
after food-production had begun. His meat was stored �on the hoof,�
his grain in silos or great pottery jars. He lived in a house: it was
worth his while to build one, because he couldn�t move far from his
fields and flocks. In his neighborhood enough food could be grown
and enough animals bred so that many people were kept busy. They all
lived close to their flocks and fields, in a village. The village was
already of a fair size, and it was growing, too. Everybody had more to
eat; they were presumably all stronger, and there were more children.
Children and old men could shepherd the animals by day or help with
the lighter work in the fields. After the crops had been harvested the
younger men might go hunting and some of them would fish, but the food
they brought in was only an addition to the food in the village; the
villagers wouldn�t starve, even if the hunters and fishermen came home
empty-handed.
There was more time to do different things, too. They began to modify
nature. They made pottery out of raw clay, and textiles out of hair
or fiber. People who became good at pottery-making traded their pots
for food and spent all of their time on pottery alone. Other people
were learning to weave cloth or to make new tools. There were already
people in the village who were becoming full-time craftsmen.
Other things were changing, too. The villagers must have had
to agree on new rules for living together. The head man of the
village had problems different from those of the chief of the small
food-collectors� band. If somebody�s flock of sheep spoiled a wheat
field, the owner wanted payment for the grain he lost. The chief of
the hunters was never bothered with such questions. Even the gods
had changed. The spirits and the magic that had been used by hunters
weren�t of any use to the villagers. They needed gods who would watch
over the fields and the flocks, and they eventually began to erect
buildings where their gods might dwell, and where the men who knew most
about the gods might live.
WAS FOOD-PRODUCTION A �REVOLUTION�?
If you can see the difference between these two pictures--between
life in the food-collecting stage and life after food-production
had begun--you�ll see why Professor Childe speaks of a revolution.
By revolution, he doesn�t mean that it happened over night or that
it happened only once. We don�t know exactly how long it took. Some
people think that all these changes may have occurred in less than
500 years, but I doubt that. The incipient era was probably an affair
of some duration. Once the level of the village-farming community had
been established, however, things did begin to move very fast. By
six thousand years ago, the descendants of the first villagers had
developed irrigation and plow agriculture in the relatively rainless
Mesopotamian alluvium and were living in towns with temples. Relative
to the half million years of food-gathering which lay behind, this had
been achieved with truly revolutionary suddenness.
GAPS IN OUR KNOWLEDGE OF THE NEAR EAST
If you�ll look again at the chart (p. 111) you�ll see that I have
very few sites and assemblages to name in the incipient era of
cultivation and domestication, and not many in the earlier part of
the primary village-farming level either. Thanks in no small part
to the intelligent co-operation given foreign excavators by the
Iraq Directorate General of Antiquities, our understanding of the
sequence in Iraq is growing more complete. I shall use Iraq as my main
yard-stick here. But I am far from being able to show you a series of
Sears Roebuck catalogues, even century by century, for any part of
the nuclear area. There is still a great deal of earth to move, and a
great mass of material to recover and interpret before we even begin to
understand �how� and �why.�
Perhaps here, because this kind of archeology is really my specialty,
you�ll excuse it if I become personal for a moment. I very much look
forward to having further part in closing some of the gaps in knowledge
of the Near East. This is not, as I�ve told you, the spectacular
range of Near Eastern archeology. There are no royal tombs, no gold,
no great buildings or sculpture, no writing, in fact nothing to
excite the normal museum at all. Nevertheless it is a range which,
idea-wise, gives the archeologist tremendous satisfaction. The country
of the hilly flanks is an exciting combination of green grasslands
and mountainous ridges. The Kurds, who inhabit the part of the area
in which I�ve worked most recently, are an extremely interesting and
hospitable people. Archeologists don�t become rich, but I�ll forego
the Cadillac for any bright spring morning in the Kurdish hills, on a
good site with a happy crew of workmen and an interested and efficient
staff. It is probably impossible to convey the full feeling which life
on such a dig holds--halcyon days for the body and acute pleasurable
stimulation for the mind. Old things coming newly out of the good dirt,
and the pieces of the human puzzle fitting into place! I think I am
an honest man; I cannot tell you that I am sorry the job is not yet
finished and that there are still gaps in this part of the Near Eastern
archeological sequence.
EARLIEST SITES OF THE VILLAGE FARMERS
So far, the Karim Shahir type of assemblage, which we looked at in the
last chapter, is the earliest material available in what I take to
be the nuclear area. We do not believe that Karim Shahir was a village
site proper: it looks more like the traces of a temporary encampment.
Two caves, called Belt and Hotu, which are outside the nuclear area
and down on the foreshore of the Caspian Sea, have been excavated
by Professor Coon. These probably belong in the later extension of
the terminal era of food-gathering; in their upper layers are traits
like the use of pottery borrowed from the more developed era of the
same time in the nuclear area. The same general explanation doubtless
holds true for certain materials in Egypt, along the upper Nile and in
the Kharga oasis: these materials, called Sebilian III, the Khartoum
�neolithic,� and the Khargan microlithic, are from surface sites,
not from caves. The chart (p. 111) shows where I would place these
materials in era and time.
[Illustration: THE HILLY FLANKS OF THE CRESCENT AND EARLY SITES OF THE
NEAR EAST]
Both M�lefaat and Dr. Solecki�s Zawi Chemi Shanidar site appear to have
been slightly more �settled in� than was Karim Shahir itself. But I do
not think they belong to the era of farming-villages proper. The first
site of this era, in the hills of Iraqi Kurdistan, is Jarmo, on which
we have spent three seasons of work. Following Jarmo comes a variety of
sites and assemblages which lie along the hilly flanks of the crescent
and just below it. I am going to describe and illustrate some of these
for you.
Since not very much archeological excavation has yet been done on sites
of this range of time, I shall have to mention the names of certain
single sites which now alone stand for an assemblage. This does not
mean that I think the individual sites I mention were unique. In the
times when their various cultures flourished, there must have been
many little villages which shared the same general assemblage. We are
only now beginning to locate them again. Thus, if I speak of Jarmo,
or Jericho, or Sialk as single examples of their particular kinds of
assemblages, I don�t mean that they were unique at all. I think I could
take you to the sites of at least three more Jarmos, within twenty
miles of the original one. They are there, but they simply haven�t yet
been excavated. In 1956, a Danish expedition discovered material of
Jarmo type at Shimshara, only two dozen miles northeast of Jarmo, and
below an assemblage of Hassunan type (which I shall describe presently).
THE GAP BETWEEN KARIM SHAHIR AND JARMO
As we see the matter now, there is probably still a gap in the
available archeological record between the Karim Shahir-M�lefaat-Zawi
Chemi group (of the incipient era) and that of Jarmo (of the
village-farming era). Although some items of the Jarmo type materials
do reflect the beginnings of traditions set in the Karim Shahir group
(see p. 120), there is not a clear continuity. Moreover--to the
degree that we may trust a few radiocarbon dates--there would appear
to be around two thousand years of difference in time. The single
available Zawi Chemi �date� is 8900 � 300 B.C.; the most reasonable
group of �dates� from Jarmo average to about 6750 � 200 B.C. I am
uncertain about this two thousand years--I do not think it can have
been so long.
This suggests that we still have much work to do in Iraq. You can
imagine how earnestly we await the return of political stability in the
Republic of Iraq.
JARMO, IN THE KURDISH HILLS, IRAQ
The site of Jarmo has a depth of deposit of about twenty-seven feet,
and approximately a dozen layers of architectural renovation and
change. Nevertheless it is a �one period� site: its assemblage remains
essentially the same throughout, although one or two new items are
added in later levels. It covers about four acres of the top of a
bluff, below which runs a small stream. Jarmo lies in the hill country
east of the modern oil town of Kirkuk. The Iraq Directorate General of
Antiquities suggested that we look at it in 1948, and we have had three
seasons of digging on it since.
The people of Jarmo grew the barley plant and two different kinds of
wheat. They made flint sickles with which to reap their grain, mortars
or querns on which to crack it, ovens in which it might be parched, and
stone bowls out of which they might eat their porridge. We are sure
that they had the domesticated goat, but Professor Reed (the staff
zoologist) is not convinced that the bones of the other potentially
domesticable animals of Jarmo--sheep, cattle, pig, horse, dog--show
sure signs of domestication. We had first thought that all of these
animals were domesticated ones, but Reed feels he must find out much
more before he can be sure. As well as their grain and the meat from
their animals, the people of Jarmo consumed great quantities of land
snails. Botanically, the Jarmo wheat stands about half way between
fully bred wheat and the wild forms.
ARCHITECTURE: HALL-MARK OF THE VILLAGE
The sure sign of the village proper is in its traces of architectural
permanence. The houses of Jarmo were only the size of a small cottage
by our standards, but each was provided with several rectangular rooms.
The walls of the houses were made of puddled mud, often set on crude
foundations of stone. (The puddled mud wall, which the Arabs call
_touf_, is built by laying a three to six inch course of soft mud,
letting this sun-dry for a day or two, then adding the next course,
etc.) The village probably looked much like the simple Kurdish farming
village of today, with its mud-walled houses and low mud-on-brush
roofs. I doubt that the Jarmo village had more than twenty houses at
any one moment of its existence. Today, an average of about seven
people live in a comparable Kurdish house; probably the population of
Jarmo was about 150 people.
[Illustration: SKETCH OF JARMO ASSEMBLAGE
CHIPPED STONE
UNBAKED CLAY
GROUND STONE
POTTERY _UPPER THIRD OF SITE ONLY._
REED MATTING
BONE
ARCHITECTURE]
It is interesting that portable pottery does not appear until the
last third of the life of the Jarmo village. Throughout the duration
of the village, however, its people had experimented with the plastic
qualities of clay. They modeled little figurines of animals and of
human beings in clay; one type of human figurine they favored was that
of a markedly pregnant woman, probably the expression of some sort of
fertility spirit. They provided their house floors with baked-in-place
depressions, either as basins or hearths, and later with domed ovens of
clay. As we�ve noted, the houses themselves were of clay or mud; one
could almost say they were built up like a house-sized pot. Then,
finally, the idea of making portable pottery itself appeared, although
I very much doubt that the people of the Jarmo village discovered the
art.
On the other hand, the old tradition of making flint blades and
microlithic tools was still very strong at Jarmo. The sickle-blade was
made in quantities, but so also were many of the much older tool types.
Strangely enough, it is within this age-old category of chipped stone
tools that we see one of the clearest pointers to a newer age. Many of
the Jarmo chipped stone tools--microliths--were made of obsidian, a
black volcanic natural glass. The obsidian beds nearest to Jarmo are
over three hundred miles to the north. Already a bulk carrying trade
had been established--the forerunner of commerce--and the routes were
set by which, in later times, the metal trade was to move.
There are now twelve radioactive carbon �dates� from Jarmo. The most
reasonable cluster of determinations averages to about 6750 � 200
B.C., although there is a completely unreasonable range of �dates�
running from 3250 to 9250 B.C.! _If_ I am right in what I take to be
�reasonable,� the first flush of the food-producing revolution had been
achieved almost nine thousand years ago.
HASSUNA, IN UPPER MESOPOTAMIAN IRAQ
We are not sure just how soon after Jarmo the next assemblage of Iraqi
material is to be placed. I do not think the time was long, and there
are a few hints that detailed habits in the making of pottery and
ground stone tools were actually continued from Jarmo times into the
time of the next full assemblage. This is called after a site named
Hassuna, a few miles to the south and west of modern Mosul. We also
have Hassunan type materials from several other sites in the same
general region. It is probably too soon to make generalizations about
it, but the Hassunan sites seem to cluster at slightly lower elevations
than those we have been talking about so far.
The catalogue of the Hassuna assemblage is of course more full and
elaborate than that of Jarmo. The Iraqi government�s archeologists
who dug Hassuna itself, exposed evidence of increasing architectural
know-how. The walls of houses were still formed of puddled mud;
sun-dried bricks appear only in later periods. There were now several
different ways of making and decorating pottery vessels. One style of
pottery painting, called the Samarran style, is an extremely handsome
one and must have required a great deal of concentration and excellence
of draftsmanship. On the other hand, the old habits for the preparation
of good chipped stone tools--still apparent at Jarmo--seem to have
largely disappeared by Hassunan times. The flint work of the Hassunan
catalogue is, by and large, a wretched affair. We might guess that the
kinaesthetic concentration of the Hassuna craftsmen now went into other
categories; that is, they suddenly discovered they might have more fun
working with the newer materials. It�s a shame, for example, that none
of their weaving is preserved for us.
The two available radiocarbon determinations from Hassunan contexts
stand at about 5100 and 5600 B.C. � 250 years.
OTHER EARLY VILLAGE SITES IN THE NUCLEAR AREA
I�ll now name and very briefly describe a few of the other early
village assemblages either in or adjacent to the hilly flanks of the
crescent. Unfortunately, we do not have radioactive carbon dates for
many of these materials. We may guess that some particular assemblage,
roughly comparable to that of Hassuna, for example, must reflect a
culture which lived at just about the same time as that of Hassuna. We
do this guessing on the basis of the general similarity and degree of
complexity of the Sears Roebuck catalogues of the particular assemblage
and that of Hassuna. We suppose that for sites near at hand and of a
comparable cultural level, as indicated by their generally similar
assemblages, the dating must be about the same. We may also know that
in a general stratigraphic sense, the sites in question may both appear
at the bottom of the ascending village sequence in their respective
areas. Without a number of consistent radioactive carbon dates, we
cannot be precise about priorities.
[Illustration: SKETCH OF HASSUNA ASSEMBLAGE
POTTERY
POTTERY OBJECTS
CHIPPED STONE
BONE
GROUND STONE
ARCHITECTURE
REED MATTING
BURIAL]
The ancient mound at Jericho, in the Dead Sea valley in Palestine,
yields some very interesting material. Its catalogue somewhat resembles
that of Jarmo, especially in the sense that there is a fair depth
of deposit without portable pottery vessels. On the other hand, the
architecture of Jericho is surprisingly complex, with traces of massive
stone fortification walls and the general use of formed sun-dried
mud brick. Jericho lies in a somewhat strange and tropically lush
ecological niche, some seven hundred feet below sea level; it is
geographically within the hilly-flanks zone but environmentally not
part of it.
Several radiocarbon �dates� for Jericho fall within the range of those
I find reasonable for Jarmo, and their internal statistical consistency
is far better than that for the Jarmo determinations. It is not yet
clear exactly what this means.
The mound at Jericho (Tell es-Sultan) contains a remarkably
fine sequence, which perhaps does not have the gap we noted in
Iraqi-Kurdistan between the Karim Shahir group and Jarmo. While I am
not sure that the Jericho sequence will prove valid for those parts
of Palestine outside the special Dead Sea environmental niche, the
sequence does appear to proceed from the local variety of Natufian into
that of a very well settled community. So far, we have little direct
evidence for the food-production basis upon which the Jericho people
subsisted.
There is an early village assemblage with strong characteristics of its
own in the land bordering the northeast corner of the Mediterranean
Sea, where Syria and the Cilician province of Turkey join. This early
Syro-Cilician assemblage must represent a general cultural pattern
which was at least in part contemporary with that of the Hassuna
assemblage. These materials from the bases of the mounds at Mersin, and
from Judaidah in the Amouq plain, as well as from a few other sites,
represent the remains of true villages. The walls of their houses were
built of puddled mud, but some of the house foundations were of stone.
Several different kinds of pottery were made by the people of these
villages. None of it resembles the pottery from Hassuna or from the
upper levels of Jarmo or Jericho. The Syro-Cilician people had not
lost their touch at working flint. An important southern variation of
the Syro-Cilician assemblage has been cleared recently at Byblos, a
port town famous in later Phoenician times. There are three radiocarbon
determinations which suggest that the time range for these developments
was in the sixth or early fifth millennium B.C.
It would be fascinating to search for traces of even earlier
village-farming communities and for the remains of the incipient
cultivation era, in the Syro-Cilician region.
THE IRANIAN PLATEAU AND THE NILE VALLEY
The map on page 125 shows some sites which lie either outside or in
an extension of the hilly-flanks zone proper. From the base of the
great mound at Sialk on the Iranian plateau came an assemblage of
early village material, generally similar, in the kinds of things it
contained, to the catalogues of Hassuna and Judaidah. The details of
how things were made are different; the Sialk assemblage represents
still another cultural pattern. I suspect it appeared a bit later
in time than did that of Hassuna. There is an important new item in
the Sialk catalogue. The Sialk people made small drills or pins of
hammered copper. Thus the metallurgist�s specialized craft had made its
appearance.
There is at least one very early Iranian site on the inward slopes
of the hilly-flanks zone. It is the earlier of two mounds at a place
called Bakun, in southwestern Iran; the results of the excavations
there are not yet published and we only know of its coarse and
primitive pottery. I only mention Bakun because it helps us to plot the
extent of the hilly-flanks zone villages on the map.
The Nile Valley lies beyond the peculiar environmental zone of the
hilly flanks of the crescent, and it is probable that the earliest
village-farming communities in Egypt were established by a few people
who wandered into the Nile delta area from the nuclear area. The
assemblage which is most closely comparable to the catalogue of Hassuna
or Judaidah, for example, is that from little settlements along the
shore of the Fayum lake. The Fayum materials come mainly from grain
bins or silos. Another site, Merimde, in the western part of the Nile
delta, shows the remains of a true village, but it may be slightly
later than the settlement of the Fayum. There are radioactive carbon
�dates� for the Fayum materials at about 4275 B.C. � 320 years, which
is almost fifteen hundred years later than the determinations suggested
for the Hassunan or Syro-Cilician assemblages. I suspect that this
is a somewhat over-extended indication of the time it took for the
generalized cultural pattern of village-farming community life to
spread from the nuclear area down into Egypt, but as yet we have no way
of testing these matters.
In this same vein, we have two radioactive carbon dates for an
assemblage from sites near Khartoum in the Sudan, best represented by
the mound called Shaheinab. The Shaheinab catalogue roughly corresponds
to that of the Fayum; the distance between the two places, as the Nile
flows, is roughly 1,500 miles. Thus it took almost a thousand years for
the new way of life to be carried as far south into Africa as Khartoum;
the two Shaheinab �dates� average about 3300 B.C. � 400 years.
If the movement was up the Nile (southward), as these dates suggest,
then I suspect that the earliest available village material of middle
Egypt, the so-called Tasian, is also later than that of the Fayum. The
Tasian materials come from a few graves near a village called Deir
Tasa, and I have an uncomfortable feeling that the Tasian �assemblage�
may be mainly an artificial selection of poor examples of objects which
belong in the following range of time.
SPREAD IN TIME AND SPACE
There are now two things we can do; in fact, we have already begun to
do them. We can watch the spread of the new way of life upward through
time in the nuclear area. We can also see how the new way of life
spread outward in space from the nuclear area, as time went on. There
is good archeological evidence that both these processes took place.
For the hill country of northeastern Iraq, in the nuclear area, we
have already noticed how the succession (still with gaps) from Karim
Shahir, through M�lefaat and Jarmo, to Hassuna can be charted (see
chart, p. 111). In the next chapter, we shall continue this charting
and description of what happened in Iraq upward through time. We also
watched traces of the new way of life move through space up the Nile
into Africa, to reach Khartoum in the Sudan some thirty-five hundred
years later than we had seen it at Jarmo or Jericho. We caught glimpses
of it in the Fayum and perhaps at Tasa along the way.
For the remainder of this chapter, I shall try to suggest briefly for
you the directions taken by the spread of the new way of life from the
nuclear area in the Near East. First, let me make clear again that
I _do not_ believe that the village-farming community way of life
was invented only once and in the Near East. It seems to me that the
evidence is very clear that a separate experiment arose in the New
World. For China, the question of independence or borrowing--in the
appearance of the village-farming community there--is still an open
one. In the last chapter, we noted the probability of an independent
nuclear area in southeastern Asia. Professor Carl Sauer strongly
champions the great importance of this area as _the_ original center
of agricultural pursuits, as a kind of �cradle� of all incipient eras
of the Old World at least. While there is certainly not the slightest
archeological evidence to allow us to go that far, we may easily expect
that an early southeast Asian development would have been felt in
China. However, the appearance of the village-farming community in the
northwest of India, at least, seems to have depended on the earlier
development in the Near East. It is also probable that ideas of the new
way of life moved well beyond Khartoum in Africa.
THE SPREAD OF THE VILLAGE-FARMING COMMUNITY WAY OF LIFE INTO EUROPE
How about Europe? I won�t give you many details. You can easily imagine
that the late prehistoric prelude to European history is a complicated
affair. We all know very well how complicated an area Europe is now,
with its welter of different languages and cultures. Remember, however,
that a great deal of archeology has been done on the late prehistory of
Europe, and very little on that of further Asia and Africa. If we knew
as much about these areas as we do of Europe, I expect we�d find them
just as complicated.
This much is clear for Europe, as far as the spread of the
village-community way of life is concerned. The general idea and much
of the know-how and the basic tools of food-production moved from the
Near East to Europe. So did the plants and animals which had been
domesticated; they were not naturally at home in Europe, as they were
in western Asia. I do not, of course, mean that there were traveling
salesmen who carried these ideas and things to Europe with a commercial
gleam in their eyes. The process took time, and the ideas and things
must have been passed on from one group of people to the next. There
was also some actual movement of peoples, but we don�t know the size of
the groups that moved.
The story of the �colonization� of Europe by the first farmers is
thus one of (1) the movement from the eastern Mediterranean lands
of some people who were farmers; (2) the spread of ideas and things
beyond the Near East itself and beyond the paths along which the
�colonists� moved; and (3) the adaptations of the ideas and things
by the indigenous �Forest folk�, about whose �receptiveness� Professor
Mathiassen speaks (p. 97). It is important to note that the resulting
cultures in the new European environment were European, not Near
Eastern. The late Professor Childe remarked that �the peoples of the
West were not slavish imitators; they adapted the gifts from the East
... into a new and organic whole capable of developing on its own
original lines.�
THE WAYS TO EUROPE
Suppose we want to follow the traces of those earliest village-farmers
who did travel from western Asia into Europe. Let us start from
Syro-Cilicia, that part of the hilly-flanks zone proper which lies in
the very northeastern corner of the Mediterranean. Three ways would be
open to us (of course we could not be worried about permission from the
Soviet authorities!). We would go north, or north and slightly east,
across Anatolian Turkey, and skirt along either shore of the Black Sea
or even to the east of the Caucasus Mountains along the Caspian Sea,
to reach the plains of Ukrainian Russia. From here, we could march
across eastern Europe to the Baltic and Scandinavia, or even hook back
southwestward to Atlantic Europe.
Our second way from Syro-Cilicia would also lie over Anatolia, to the
northwest, where we would have to swim or raft ourselves over the
Dardanelles or the Bosphorus to the European shore. Then we would bear
left toward Greece, but some of us might turn right again in Macedonia,
going up the valley of the Vardar River to its divide and on down
the valley of the Morava beyond, to reach the Danube near Belgrade
in Jugoslavia. Here we would turn left, following the great river
valley of the Danube up into central Europe. We would have a number of
tributary valleys to explore, or we could cross the divide and go down
the valley of the Rhine to the North Sea.
Our third way from Syro-Cilicia would be by sea. We would coast along
southern Anatolia and visit Cyprus, Crete, and the Aegean islands on
our way to Greece, where, in the north, we might meet some of those who
had taken the second route. From Greece, we would sail on to Italy and
the western isles, to reach southern France and the coasts of Spain.
Eventually a few of us would sail up the Atlantic coast of Europe, to
reach western Britain and even Ireland.
[Illustration: PROBABLE ROUTES AND TIMING IN THE SPREAD OF THE
VILLAGE-FARMING COMMUNITY WAY OF LIFE FROM THE NEAR EAST TO EUROPE]
Of course none of us could ever take these journeys as the first
farmers took them, since the whole course of each journey must have
lasted many lifetimes. The date given to the assemblage called Windmill
Hill, the earliest known trace of village-farming communities in
England, is about 2500 B.C. I would expect about 5500 B.C. to be a
safe date to give for the well-developed early village communities of
Syro-Cilicia. We suspect that the spread throughout Europe did not
proceed at an even rate. Professor Piggott writes that �at a date
probably about 2600 B.C., simple agricultural communities were being
established in Spain and southern France, and from the latter region a
spread northwards can be traced ... from points on the French seaboard
of the [English] Channel ... there were emigrations of a certain number
of these tribes by boat, across to the chalk lands of Wessex and Sussex
[in England], probably not more than three or four generations later
than the formation of the south French colonies.�
New radiocarbon determinations are becoming available all the
time--already several suggest that the food-producing way of life
had reached the lower Rhine and Holland by 4000 B.C. But not all
prehistorians accept these �dates,� so I do not show them on my map
(p. 139).
THE EARLIEST FARMERS OF ENGLAND
To describe the later prehistory of all Europe for you would take
another book and a much larger one than this is. Therefore, I have
decided to give you only a few impressions of the later prehistory of
Britain. Of course the British Isles lie at the other end of Europe
from our base-line in western Asia. Also, they received influences
along at least two of the three ways in which the new way of life
moved into Europe. We will look at more of their late prehistory in a
following chapter: here, I shall speak only of the first farmers.
The assemblage called Windmill Hill, which appears in the south of
England, exhibits three different kinds of structures, evidence of
grain-growing and of stock-breeding, and some distinctive types of
pottery and stone implements. The most remarkable type of structure
is the earthwork enclosures which seem to have served as seasonal
cattle corrals. These enclosures were roughly circular, reached over
a thousand feet in diameter, and sometimes included two or three
concentric sets of banks and ditches. Traces of oblong timber houses
have been found, but not within the enclosures. The second type of
structure is mine-shafts, dug down into the chalk beds where good
flint for the making of axes or hoes could be found. The third type
of structure is long simple mounds or �unchambered barrows,� in one
end of which burials were made. It has been commonly believed that the
Windmill Hill assemblage belonged entirely to the cultural tradition
which moved up through France to the Channel. Professor Piggott is now
convinced, however, that important elements of Windmill Hill stem from
northern Germany and Denmark--products of the first way into Europe
from the east.
The archeological traces of a second early culture are to be found
in the west of England, western and northern Scotland, and most of
Ireland. The bearers of this culture had come up the Atlantic coast
by sea from southern France and Spain. The evidence they have left us
consists mainly of tombs and the contents of tombs, with only very
rare settlement sites. The tombs were of some size and received the
bodies of many people. The tombs themselves were built of stone, heaped
over with earth; the stones enclosed a passage to a central chamber
(�passage graves�), or to a simple long gallery, along the sides of
which the bodies were laid (�gallery graves�). The general type of
construction is called �megalithic� (= great stone), and the whole
earth-mounded structure is often called a _barrow_. Since many have
proper chambers, in one sense or another, we used the term �unchambered
barrow� above to distinguish those of the Windmill Hill type from these
megalithic structures. There is some evidence for sacrifice, libations,
and ceremonial fires, and it is clear that some form of community
ritual was focused on the megalithic tombs.
The cultures of the people who produced the Windmill Hill assemblage
and of those who made the megalithic tombs flourished, at least in
part, at the same time. Although the distributions of the two different
types of archeological traces are in quite different parts of the
country, there is Windmill Hill pottery in some of the megalithic
tombs. But the tombs also contain pottery which seems to have arrived
with the tomb builders themselves.
The third early British group of antiquities of this general time
(following 2500 B.C.) comes from sites in southern and eastern England.
It is not so certain that the people who made this assemblage, called
Peterborough, were actually farmers. While they may on occasion have
practiced a simple agriculture, many items of their assemblage link
them closely with that of the �Forest folk� of earlier times in
England and in the Baltic countries. Their pottery is decorated with
impressions of cords and is quite different from that of Windmill Hill
and the megalithic builders. In addition, the distribution of their
finds extends into eastern Britain, where the other cultures have left
no trace. The Peterborough people had villages with semi-subterranean
huts, and the bones of oxen, pigs, and sheep have been found in a few
of these. On the whole, however, hunting and fishing seem to have been
their vital occupations. They also established trade routes especially
to acquire the raw material for stone axes.
A probably slightly later culture, whose traces are best known from
Skara Brae on Orkney, also had its roots in those cultures of the
Baltic area which fused out of the meeting of the �Forest folk� and
the peoples who took the eastern way into Europe. Skara Brae is very
well preserved, having been built of thin stone slabs about which
dune-sand drifted after the village died. The individual houses, the
bedsteads, the shelves, the chests for clothes and oddments--all built
of thin stone-slabs--may still be seen in place. But the Skara Brae
people lived entirely by sheep- and cattle-breeding, and by catching
shellfish. Neither grain nor the instruments of agriculture appeared at
Skara Brae.
THE EUROPEAN ACHIEVEMENT
The above is only a very brief description of what went on in Britain
with the arrival of the first farmers. There are many interesting
details which I have omitted in order to shorten the story.
I believe some of the difficulty we have in understanding the
establishment of the first farming communities in Europe is with
the word �colonization.� We have a natural tendency to think of
�colonization� as it has happened within the last few centuries. In the
case of the colonization of the Americas, for example, the colonists
came relatively quickly, and in increasingly vast numbers. They had
vastly superior technical, political, and war-making skills, compared
with those of the Indians. There was not much mixing with the Indians.
The case in Europe five or six thousand years ago must have been very
different. I wonder if it is even proper to call people �colonists�
who move some miles to a new region, settle down and farm it for some
years, then move on again, generation after generation? The ideas and
the things which these new people carried were only _potentially_
superior. The ideas and things and the people had to prove themselves
in their adaptation to each new environment. Once this was done another
link to the chain would be added, and then the forest-dwellers and
other indigenous folk of Europe along the way might accept the new
ideas and things. It is quite reasonable to expect that there must have
been much mixture of the migrants and the indigenes along the way; the
Peterborough and Skara Brae assemblages we mentioned above would seem
to be clear traces of such fused cultures. Sometimes, especially if the
migrants were moving by boat, long distances may have been covered in
a short time. Remember, however, we seem to have about three thousand
years between the early Syro-Cilician villages and Windmill Hill.
Let me repeat Professor Childe again. �The peoples of the West were
not slavish imitators: they adapted the gifts from the East ... into
a new and organic whole capable of developing on its own original
lines.� Childe is of course completely conscious of the fact that his
�peoples of the West� were in part the descendants of migrants who came
originally from the �East,� bringing their �gifts� with them. This
was the late prehistoric achievement of Europe--to take new ideas and
things and some migrant peoples and, by mixing them with the old in its
own environments, to forge a new and unique series of cultures.
What we know of the ways of men suggests to us that when the details
of the later prehistory of further Asia and Africa are learned, their
stories will be just as exciting.
THE Conquest of Civilization
[Illustration]
Now we must return to the Near East again. We are coming to the point
where history is about to begin. I am going to stick pretty close
to Iraq and Egypt in this chapter. These countries will perhaps be
the most interesting to most of us, for the foundations of western
civilization were laid in the river lands of the Tigris and Euphrates
and of the Nile. I shall probably stick closest of all to Iraq, because
things first happened there and also because I know it best.
There is another interesting thing, too. We have seen that the first
experiment in village-farming took place in the Near East. So did
the first experiment in civilization. Both experiments �took.� The
traditions we live by today are based, ultimately, on those ancient
beginnings in food-production and civilization in the Near East.
WHAT �CIVILIZATION� MEANS
I shall not try to define �civilization� for you; rather, I shall
tell you what the word brings to my mind. To me civilization means
urbanization: the fact that there are cities. It means a formal
political set-up--that there are kings or governing bodies that the
people have set up. It means formal laws--rules of conduct--which the
government (if not the people) believes are necessary. It probably
means that there are formalized projects--roads, harbors, irrigation
canals, and the like--and also some sort of army or police force
to protect them. It means quite new and different art forms. It
also usually means there is writing. (The people of the Andes--the
Incas--had everything which goes to make up a civilization but formal
writing. I can see no reason to say they were not civilized.) Finally,
as the late Professor Redfield reminded us, civilization seems to bring
with it the dawn of a new kind of moral order.
In different civilizations, there may be important differences in the
way such things as the above are managed. In early civilizations, it is
usual to find religion very closely tied in with government, law, and
so forth. The king may also be a high priest, or he may even be thought
of as a god. The laws are usually thought to have been given to the
people by the gods. The temples are protected just as carefully as the
other projects.
CIVILIZATION IMPOSSIBLE WITHOUT FOOD-PRODUCTION
Civilizations have to be made up of many people. Some of the people
live in the country; some live in very large towns or cities. Classes
of society have begun. There are officials and government people; there
are priests or religious officials; there are merchants and traders;
there are craftsmen, metal-workers, potters, builders, and so on; there
are also farmers, and these are the people who produce the food for the
whole population. It must be obvious that civilization cannot exist
without food-production and that food-production must also be at a
pretty efficient level of village-farming before civilization can even
begin.
But people can be food-producing without being civilized. In many
parts of the world this is still the case. When the white men first
came to America, the Indians in most parts of this hemisphere were
food-producers. They grew corn, potatoes, tomatoes, squash, and many
other things the white men had never eaten before. But only the Aztecs
of Mexico, the Mayas of Yucatan and Guatemala, and the Incas of the
Andes were civilized.
WHY DIDN�T CIVILIZATION COME TO ALL FOOD-PRODUCERS?
Once you have food-production, even at the well-advanced level of
the village-farming community, what else has to happen before you
get civilization? Many men have asked this question and have failed
to give a full and satisfactory answer. There is probably no _one_
answer. I shall give you my own idea about how civilization _may_ have
come about in the Near East alone. Remember, it is only a guess--a
putting together of hunches from incomplete evidence. It is _not_ meant
to explain how civilization began in any of the other areas--China,
southeast Asia, the Americas--where other early experiments in
civilization went on. The details in those areas are quite different.
Whether certain general principles hold, for the appearance of any
early civilization, is still an open and very interesting question.
WHERE CIVILIZATION FIRST APPEARED IN THE NEAR EAST
You remember that our earliest village-farming communities lay along
the hilly flanks of a great �crescent.� (See map on p. 125.)
Professor Breasted�s �fertile crescent� emphasized the rich river
valleys of the Nile and the Tigris-Euphrates Rivers. Our hilly-flanks
area of the crescent zone arches up from Egypt through Palestine and
Syria, along southern Turkey into northern Iraq, and down along the
southwestern fringe of Iran. The earliest food-producing villages we
know already existed in this area by about 6750 B.C. (� 200 years).
Now notice that this hilly-flanks zone does not include southern
Mesopotamia, the alluvial land of the lower Tigris and Euphrates in
Iraq, or the Nile Valley proper. The earliest known villages of classic
Mesopotamia and Egypt seem to appear fifteen hundred or more years
after those of the hilly-flanks zone. For example, the early Fayum
village which lies near a lake west of the Nile Valley proper (see p.
135) has a radiocarbon date of 4275 B.C. � 320 years. It was in the
river lands, however, that the immediate beginnings of civilization
were made.
We know that by about 3200 B.C. the Early Dynastic period had begun
in southern Mesopotamia. The beginnings of writing go back several
hundred years earlier, but we can safely say that civilization had
begun in Mesopotamia by 3200 B.C. In Egypt, the beginning of the First
Dynasty is slightly later, at about 3100 B.C., and writing probably
did not appear much earlier. There is no question but that history and
civilization were well under way in both Mesopotamia and Egypt by 3000
B.C.--about five thousand years ago.
THE HILLY-FLANKS ZONE VERSUS THE RIVER LANDS
Why did these two civilizations spring up in these two river
lands which apparently were not even part of the area where the
village-farming community began? Why didn�t we have the first
civilizations in Palestine, Syria, north Iraq, or Iran, where we�re
sure food-production had had a long time to develop? I think the
probable answer gives a clue to the ways in which civilization began in
Egypt and Mesopotamia.
The land in the hilly flanks is of a sort which people can farm without
too much trouble. There is a fairly fertile coastal strip in Palestine
and Syria. There are pleasant mountain slopes, streams running out to
the sea, and rain, at least in the winter months. The rain belt and the
foothills of the Turkish mountains also extend to northern Iraq and on
to the Iranian plateau. The Iranian plateau has its mountain valleys,
streams, and some rain. These hilly flanks of the �crescent,� through
most of its arc, are almost made-to-order for beginning farmers. The
grassy slopes of the higher hills would be pasture for their herds
and flocks. As soon as the earliest experiments with agriculture and
domestic animals had been successful, a pleasant living could be
made--and without too much trouble.
I should add here again, that our evidence points increasingly to a
climate for those times which is very little different from that for
the area today. Now look at Egypt and southern Mesopotamia. Both are
lands without rain, for all intents and purposes. Both are lands with
rivers that have laid down very fertile soil--soil perhaps superior to
that in the hilly flanks. But in both lands, the rivers are of no great
aid without some control.
The Nile floods its banks once a year, in late September or early
October. It not only soaks the narrow fertile strip of land on either
side; it lays down a fresh layer of new soil each year. Beyond the
fertile strip on either side rise great cliffs, and behind them is the
desert. In its natural, uncontrolled state, the yearly flood of the
Nile must have caused short-lived swamps that were full of crocodiles.
After a short time, the flood level would have dropped, the water and
the crocodiles would have run back into the river, and the swamp plants
would have become parched and dry.
The Tigris and the Euphrates of Mesopotamia are less likely to flood
regularly than the Nile. The Tigris has a shorter and straighter course
than the Euphrates; it is also the more violent river. Its banks are
high, and when the snows melt and flow into all of its tributary rivers
it is swift and dangerous. The Euphrates has a much longer and more
curving course and few important tributaries. Its banks are lower and
it is less likely to flood dangerously. The land on either side and
between the two rivers is very fertile, south of the modern city of
Baghdad. Unlike the Nile Valley, neither the Tigris nor the Euphrates
is flanked by cliffs. The land on either side of the rivers stretches
out for miles and is not much rougher than a poor tennis court.
THE RIVERS MUST BE CONTROLLED
The real trick in both Egypt and Mesopotamia is to make the rivers work
for you. In Egypt, this is a matter of building dikes and reservoirs
that will catch and hold the Nile flood. In this way, the water is held
and allowed to run off over the fields as it is needed. In Mesopotamia,
it is a matter of taking advantage of natural river channels and branch
channels, and of leading ditches from these onto the fields.
Obviously, we can no longer find the first dikes or reservoirs of
the Nile Valley, or the first canals or ditches of Mesopotamia. The
same land has been lived on far too long for any traces of the first
attempts to be left; or, especially in Egypt, it has been covered by
the yearly deposits of silt, dropped by the river floods. But we�re
pretty sure the first food-producers of Egypt and southern Mesopotamia
must have made such dikes, canals, and ditches. In the first place,
there can�t have been enough rain for them to grow things otherwise.
In the second place, the patterns for such projects seem to have been
pretty well set by historic times.
CONTROL OF THE RIVERS THE BUSINESS OF EVERYONE
Here, then, is a _part_ of the reason why civilization grew in Egypt
and Mesopotamia first--not in Palestine, Syria, or Iran. In the latter
areas, people could manage to produce their food as individuals. It
wasn�t too hard; there were rain and some streams, and good pasturage
for the animals even if a crop or two went wrong. In Egypt and
Mesopotamia, people had to put in a much greater amount of work, and
this work couldn�t be individual work. Whole villages or groups of
people had to turn out to fix dikes or dig ditches. The dikes had to be
repaired and the ditches carefully cleared of silt each year, or they
would become useless.
There also had to be hard and fast rules. The person who lived nearest
the ditch or the reservoir must not be allowed to take all the water
and leave none for his neighbors. It was not only a business of
learning to control the rivers and of making their waters do the
farmer�s work. It also meant controlling men. But once these men had
managed both kinds of controls, what a wonderful yield they had! The
soil was already fertile, and the silt which came in the floods and
ditches kept adding fertile soil.
THE GERM OF CIVILIZATION IN EGYPT AND MESOPOTAMIA
This learning to work together for the common good was the real germ of
the Egyptian and the Mesopotamian civilizations. The bare elements of
civilization were already there: the need for a governing hand and for
laws to see that the communities� work was done and that the water was
justly shared. You may object that there is a sort of chicken and egg
paradox in this idea. How could the people set up the rules until they
had managed to get a way to live, and how could they manage to get a
way to live until they had set up the rules? I think that small groups
must have moved down along the mud-flats of the river banks quite
early, making use of naturally favorable spots, and that the rules grew
out of such cases. It would have been like the hand-in-hand growth of
automobiles and paved highways in the United States.
Once the rules and the know-how did get going, there must have been a
constant interplay of the two. Thus, the more the crops yielded, the
richer and better-fed the people would have been, and the more the
population would have grown. As the population grew, more land would
have needed to be flooded or irrigated, and more complex systems of
dikes, reservoirs, canals, and ditches would have been built. The more
complex the system, the more necessity for work on new projects and for
the control of their use.... And so on....
What I have just put down for you is a guess at the manner of growth of
some of the formalized systems that go to make up a civilized society.
My explanation has been pointed particularly at Egypt and Mesopotamia.
I have already told you that the irrigation and water-control part of
it does not apply to the development of the Aztecs or the Mayas, or
perhaps anybody else. But I think that a fair part of the story of
Egypt and Mesopotamia must be as I�ve just told you.
I am particularly anxious that you do _not_ understand me to mean that
irrigation _caused_ civilization. I am sure it was not that simple at
all. For, in fact, a complex and highly engineered irrigation system
proper did not come until later times. Let�s say rather that the simple
beginnings of irrigation allowed and in fact encouraged a great number
of things in the technological, political, social, and moral realms of
culture. We do not yet understand what all these things were or how
they worked. But without these other aspects of culture, I do not
think that urbanization and civilization itself could have come into
being.
THE ARCHEOLOGICAL SEQUENCE TO CIVILIZATION IN IRAQ
We last spoke of the archeological materials of Iraq on page 130,
where I described the village-farming community of Hassunan type. The
Hassunan type villages appear in the hilly-flanks zone and in the
rolling land adjacent to the Tigris in northern Iraq. It is probable
that even before the Hassuna pattern of culture lived its course, a
new assemblage had been established in northern Iraq and Syria. This
assemblage is called Halaf, after a site high on a tributary of the
Euphrates, on the Syro-Turkish border.
[Illustration: SKETCH OF SELECTED ITEMS OF HALAFIAN ASSEMBLAGE
BEADS AND PENDANTS
POTTERY MOTIFS
POTTERY]
The Halafian assemblage is incompletely known. The culture it
represents included a remarkably handsome painted pottery.
Archeologists have tended to be so fascinated with this pottery that
they have bothered little with the rest of the Halafian assemblage. We
do know that strange stone-founded houses, with plans like those of the
popular notion of an Eskimo igloo, were built. Like the pottery of the
Samarran style, which appears as part of the Hassunan assemblage (see
p. 131), the Halafian painted pottery implies great concentration and
excellence of draftsmanship on the part of the people who painted it.
We must mention two very interesting sites adjacent to the mud-flats of
the rivers, half way down from northern Iraq to the classic alluvial
Mesopotamian area. One is Baghouz on the Euphrates; the other is
Samarra on the Tigris (see map, p. 125). Both these sites yield the
handsome painted pottery of the style called Samarran: in fact it
is Samarra which gives its name to the pottery. Neither Baghouz nor
Samarra have completely Hassunan types of assemblages, and at Samarra
there are a few pots of proper Halafian style. I suppose that Samarra
and Baghouz give us glimpses of those early farmers who had begun to
finger their way down the mud-flats of the river banks toward the
fertile but yet untilled southland.
CLASSIC SOUTHERN MESOPOTAMIA FIRST OCCUPIED
Our next step is into the southland proper. Here, deep in the core of
the mound which later became the holy Sumerian city of Eridu, Iraqi
archeologists uncovered a handsome painted pottery. Pottery of the same
type had been noticed earlier by German archeologists on the surface
of a small mound, awash in the spring floods, near the remains of the
Biblical city of Erich (Sumerian = Uruk; Arabic = Warka). This �Eridu�
pottery, which is about all we have of the assemblage of the people who
once produced it, may be seen as a blend of the Samarran and Halafian
painted pottery styles. This may over-simplify the case, but as yet we
do not have much evidence to go on. The idea does at least fit with my
interpretation of the meaning of Baghouz and Samarra as way-points on
the mud-flats of the rivers half way down from the north.
My colleague, Robert Adams, believes that there were certainly
riverine-adapted food-collectors living in lower Mesopotamia. The
presence of such would explain why the Eridu assemblage is not simply
the sum of the Halafian and Samarran assemblages. But the domesticated
plants and animals and the basic ways of food-production must have
come from the hilly-flanks country in the north.
Above the basal Eridu levels, and at a number of other sites in the
south, comes a full-fledged assemblage called Ubaid. Incidentally,
there is an aspect of the Ubaidian assemblage in the north as well. It
seems to move into place before the Halaf manifestation is finished,
and to blend with it. The Ubaidian assemblage in the south is by far
the more spectacular. The development of the temple has been traced
at Eridu from a simple little structure to a monumental building some
62 feet long, with a pilaster-decorated fa�ade and an altar in its
central chamber. There is painted Ubaidian pottery, but the style is
hurried and somewhat careless and gives the _impression_ of having been
a cheap mass-production means of decoration when compared with the
carefully drafted styles of Samarra and Halaf. The Ubaidian people made
other items of baked clay: sickles and axes of very hard-baked clay
are found. The northern Ubaidian sites have yielded tools of copper,
but metal tools of unquestionable Ubaidian find-spots are not yet
available from the south. Clay figurines of human beings with monstrous
turtle-like faces are another item in the southern Ubaidian assemblage.
[Illustration: SKETCH OF SELECTED ITEMS OF UBAIDIAN ASSEMBLAGE]
There is a large Ubaid cemetery at Eridu, much of it still awaiting
excavation. The few skeletons so far tentatively studied reveal a
completely modern type of �Mediterraneanoid�; the individuals whom the
skeletons represent would undoubtedly blend perfectly into the modern
population of southern Iraq. What the Ubaidian assemblage says to us is
that these people had already adapted themselves and their culture to
the peculiar riverine environment of classic southern Mesopotamia. For
example, hard-baked clay axes will chop bundles of reeds very well, or
help a mason dress his unbaked mud bricks, and there were only a few
soft and pithy species of trees available. The Ubaidian levels of Eridu
yield quantities of date pits; that excellent and characteristically
Iraqi fruit was already in use. The excavators also found the clay
model of a ship, with the stepping-point for a mast, so that Sinbad the
Sailor must have had his antecedents as early as the time of Ubaid.
The bones of fish, which must have flourished in the larger canals as
well as in the rivers, are common in the Ubaidian levels and thereafter.
THE UBAIDIAN ACHIEVEMENT
On present evidence, my tendency is to see the Ubaidian assemblage
in southern Iraq as the trace of a new era. I wish there were more
evidence, but what we have suggests this to me. The culture of southern
Ubaid soon became a culture of towns--of centrally located towns with
some rural villages about them. The town had a temple and there must
have been priests. These priests probably had political and economic
functions as well as religious ones, if the somewhat later history of
Mesopotamia may suggest a pattern for us. Presently the temple and its
priesthood were possibly the focus of the market; the temple received
its due, and may already have had its own lands and herds and flocks.
The people of the town, undoubtedly at least in consultation with the
temple administration, planned and maintained the simple irrigation
ditches. As the system flourished, the community of rural farmers would
have produced more than sufficient food. The tendency for specialized
crafts to develop--tentative at best at the cultural level of the
earlier village-farming community era--would now have been achieved,
and probably many other specialists in temple administration, water
control, architecture, and trade would also have appeared, as the
surplus food-supply was assured.
Southern Mesopotamia is not a land rich in natural resources other
than its fertile soil. Stone, good wood for construction, metal, and
innumerable other things would have had to be imported. Grain and
dates--although both are bulky and difficult to transport--and wool and
woven stuffs must have been the mediums of exchange. Over what area did
the trading net-work of Ubaid extend? We start with the idea that the
Ubaidian assemblage is most richly developed in the south. We assume, I
think, correctly, that it represents a cultural flowering of the south.
On the basis of the pottery of the still elusive �Eridu� immigrants
who had first followed the rivers into alluvial Mesopotamia, we get
the notion that the characteristic painted pottery style of Ubaid
was developed in the southland. If this reconstruction is correct
then we may watch with interest where the Ubaid pottery-painting
tradition spread. We have already mentioned that there is a substantial
assemblage of (and from the southern point of view, _fairly_ pure)
Ubaidian material in northern Iraq. The pottery appears all along the
Iranian flanks, even well east of the head of the Persian Gulf, and
ends in a later and spectacular flourish in an extremely handsome
painted style called the �Susa� style. Ubaidian pottery has been noted
up the valleys of both of the great rivers, well north of the Iraqi
and Syrian borders on the southern flanks of the Anatolian plateau.
It reaches the Mediterranean Sea and the valley of the Orontes in
Syria, and it may be faintly reflected in the painted style of a
site called Ghassul, on the east bank of the Jordan in the Dead Sea
Valley. Over this vast area--certainly in all of the great basin of
the Tigris-Euphrates drainage system and its natural extensions--I
believe we may lay our fingers on the traces of a peculiar way of
decorating pottery, which we call Ubaidian. This cursive and even
slap-dash decoration, it appears to me, was part of a new cultural
tradition which arose from the adjustments which immigrant northern
farmers first made to the new and challenging environment of southern
Mesopotamia. But exciting as the idea of the spread of influences of
the Ubaid tradition in space may be, I believe you will agree that the
consequences of the growth of that tradition in southern Mesopotamia
itself, as time passed, are even more important.
THE WARKA PHASE IN THE SOUTH
So far, there are only two radiocarbon determinations for the Ubaidian
assemblage, one from Tepe Gawra in the north and one from Warka in the
south. My hunch would be to use the dates 4500 to 3750 B.C., with a
plus or more probably a minus factor of about two hundred years for
each, as the time duration of the Ubaidian assemblage in southern
Mesopotamia.
Next, much to our annoyance, we have what is almost a temporary
black-out. According to the system of terminology I favor, our next
�assemblage� after that of Ubaid is called the _Warka_ phase, from
the Arabic name for the site of Uruk or Erich. We know it only from
six or seven levels in a narrow test-pit at Warka, and from an even
smaller hole at another site. This �assemblage,� so far, is known only
by its pottery, some of which still bears Ubaidian style painting. The
characteristic Warkan pottery is unpainted, with smoothed red or gray
surfaces and peculiar shapes. Unquestionably, there must be a great
deal more to say about the Warkan assemblage, but someone will first
have to excavate it!
THE DAWN OF CIVILIZATION
After our exasperation with the almost unknown Warka interlude,
following the brilliant �false dawn� of Ubaid, we move next to an
assemblage which yields traces of a preponderance of those elements
which we noted (p. 144) as meaning civilization. This assemblage
is that called _Proto-Literate_; it already contains writing. On
the somewhat shaky principle that writing, however early, means
history--and no longer prehistory--the assemblage is named for the
historical implications of its content, and no longer after the name of
the site where it was first found. Since some of the older books used
site-names for this assemblage, I will tell you that the Proto-Literate
includes the latter half of what used to be called the �Uruk period�
_plus_ all of what used to be called the �Jemdet Nasr period.� It shows
a consistent development from beginning to end.
I shall, in fact, leave much of the description and the historic
implications of the Proto-Literate assemblage to the conventional
historians. Professor T. J. Jacobsen, reaching backward from the
legends he finds in the cuneiform writings of slightly later times, can
in fact tell you a more complete story of Proto-Literate culture than
I can. It should be enough here if I sum up briefly what the excavated
archeological evidence shows.
We have yet to dig a Proto-Literate site in its entirety, but the
indications are that the sites cover areas the size of small cities.
In architecture, we know of large and monumental temple structures,
which were built on elaborate high terraces. The plans and decoration
of these temples follow the pattern set in the Ubaid phase: the chief
difference is one of size. The German excavators at the site of Warka
reckoned that the construction of only one of the Proto-Literate temple
complexes there must have taken 1,500 men, each working a ten-hour day,
five years to build.
ART AND WRITING
If the architecture, even in its monumental forms, can be seen to
stem from Ubaidian developments, this is not so with our other
evidence of Proto-Literate artistic expression. In relief and applied
sculpture, in sculpture in the round, and on the engraved cylinder
seals--all of which now make their appearance--several completely
new artistic principles are apparent. These include the composition
of subject-matter in groups, commemorative scenes, and especially
the ability and apparent desire to render the human form and face.
Excellent as the animals of the Franco-Cantabrian art may have been
(see p. 85), and however handsome were the carefully drafted
geometric designs and conventionalized figures on the pottery of the
early farmers, there seems to have been, up to this time, a mental
block about the drawing of the human figure and especially the human
face. We do not yet know what caused this self-consciousness about
picturing themselves which seems characteristic of men before the
appearance of civilization. We do know that with civilization, the
mental block seems to have been removed.
Clay tablets bearing pictographic signs are the Proto-Literate
forerunners of cuneiform writing. The earliest examples are not well
understood but they seem to be �devices for making accounts and
for remembering accounts.� Different from the later case in Egypt,
where writing appears fully formed in the earliest examples, the
development from simple pictographic signs to proper cuneiform writing
may be traced, step by step, in Mesopotamia. It is most probable
that the development of writing was connected with the temple and
the need for keeping account of the temple�s possessions. Professor
Jacobsen sees writing as a means for overcoming space, time, and the
increasing complications of human affairs: �Literacy, which began
with ... civilization, enhanced mightily those very tendencies in its
development which characterize it as a civilization and mark it off as
such from other types of culture.�
[Illustration: RELIEF ON A PROTO-LITERATE STONE VASE, WARKA
Unrolled drawing, with restoration suggested by figures from
contemporary cylinder seals]
While the new principles in art and the idea of writing are not
foreshadowed in the Ubaid phase, or in what little we know of the
Warkan, I do not think we need to look outside southern Mesopotamia
for their beginnings. We do know something of the adjacent areas,
too, and these beginnings are not there. I think we must accept them
as completely new discoveries, made by the people who were developing
the whole new culture pattern of classic southern Mesopotamia. Full
description of the art, architecture, and writing of the Proto-Literate
phase would call for many details. Men like Professor Jacobsen and Dr.
Adams can give you these details much better than I can. Nor shall I do
more than tell you that the common pottery of the Proto-Literate phase
was so well standardized that it looks factory made. There was also
some handsome painted pottery, and there were stone bowls with inlaid
decoration. Well-made tools in metal had by now become fairly common,
and the metallurgist was experimenting with the casting process. Signs
for plows have been identified in the early pictographs, and a wheeled
chariot is shown on a cylinder seal engraving. But if I were forced to
a guess in the matter, I would say that the development of plows and
draft-animals probably began in the Ubaid period and was another of the
great innovations of that time.
The Proto-Literate assemblage clearly suggests a highly developed and
sophisticated culture. While perhaps not yet fully urban, it is on
the threshold of urbanization. There seems to have been a very dense
settlement of Proto-Literate sites in classic southern Mesopotamia,
many of them newly founded on virgin soil where no earlier settlements
had been. When we think for a moment of what all this implies, of the
growth of an irrigation system which must have existed to allow the
flourish of this culture, and of the social and political organization
necessary to maintain the irrigation system, I think we will agree that
at last we are dealing with civilization proper.
FROM PREHISTORY TO HISTORY
Now it is time for the conventional ancient historians to take over
the story from me. Remember this when you read what they write. Their
real base-line is with cultures ruled over by later kings and emperors,
whose writings describe military campaigns and the administration of
laws and fully organized trading ventures. To these historians, the
Proto-Literate phase is still a simple beginning for what is to follow.
If they mention the Ubaid assemblage at all--the one I was so lyrical
about--it will be as some dim and fumbling step on the path to the
civilized way of life.
I suppose you could say that the difference in the approach is that as
a prehistorian I have been looking forward or upward in time, while the
historians look backward to glimpse what I�ve been describing here. My
base-line was half a million years ago with a being who had little more
than the capacity to make tools and fire to distinguish him from the
animals about him. Thus my point of view and that of the conventional
historian are bound to be different. You will need both if you want to
understand all of the story of men, as they lived through time to the
present.
End of PREHISTORY
[Illustration]
You�ll doubtless easily recall your general course in ancient history:
how the Sumerian dynasties of Mesopotamia were supplanted by those of
Babylonia, how the Hittite kingdom appeared in Anatolian Turkey, and
about the three great phases of Egyptian history. The literate kingdom
of Crete arose, and by 1500 B.C. there were splendid fortified Mycenean
towns on the mainland of Greece. This was the time--about the whole
eastern end of the Mediterranean--of what Professor Breasted called the
�first great internationalism,� with flourishing trade, international
treaties, and royal marriages between Egyptians, Babylonians, and
Hittites. By 1200 B.C., the whole thing had fragmented: �the peoples of
the sea were restless in their isles,� and the great ancient centers in
Egypt, Mesopotamia, and Anatolia were eclipsed. Numerous smaller states
arose--Assyria, Phoenicia, Israel--and the Trojan war was fought.
Finally Assyria became the paramount power of all the Near East,
presently to be replaced by Persia.
A new culture, partaking of older west Asiatic and Egyptian elements,
but casting them with its own tradition into a new mould, arose in
mainland Greece.
I once shocked my Classical colleagues to the core by referring to
Greece as �a second degree derived civilization,� but there is much
truth in this. The principles of bronze- and then of iron-working, of
the alphabet, and of many other elements in Greek culture were borrowed
from western Asia. Our debt to the Greeks is too well known for me even
to mention it, beyond recalling to you that it is to Greece we owe the
beginnings of rational or empirical science and thought in general. But
Greece fell in its turn to Rome, and in 55 B.C. Caesar invaded Britain.
I last spoke of Britain on page 142; I had chosen it as my single
example for telling you something of how the earliest farming
communities were established in Europe. Now I will continue with
Britain�s later prehistory, so you may sense something of the end of
prehistory itself. Remember that Britain is simply a single example
we select; the same thing could be done for all the other countries
of Europe, and will be possible also, some day, for further Asia and
Africa. Remember, too, that prehistory in most of Europe runs on for
three thousand or more years _after_ conventional ancient history
begins in the Near East. Britain is a good example to use in showing
how prehistory ended in Europe. As we said earlier, it lies at the
opposite end of Europe from the area of highest cultural achievement in
those times, and should you care to read more of the story in detail,
you may do so in the English language.
METAL USERS REACH ENGLAND
We left the story of Britain with the peoples who made three different
assemblages--the Windmill Hill, the megalith-builders, and the
Peterborough--making adjustments to their environments, to the original
inhabitants of the island, and to each other. They had first arrived
about 2500 B.C., and were simple pastoralists and hoe cultivators who
lived in little village communities. Some of them planted little if any
grain. By 2000 B.C., they were well settled in. Then, somewhere in the
range from about 1900 to 1800 B.C., the traces of the invasion of a new
series of peoples began to appear.
The first newcomers are called the Beaker folk, after the name of a
peculiar form of pottery they made. The beaker type of pottery seems
oldest in Spain, where it occurs with great collective tombs of
megalithic construction and with copper tools. But the Beaker folk who
reached England seem already to have moved first from Spain(?) to the
Rhineland and Holland. While in the Rhineland, and before leaving for
England, the Beaker folk seem to have mixed with the local population
and also with incomers from northeastern Europe whose culture included
elements brought originally from the Near East by the eastern way
through the steppes. This last group has also been named for a peculiar
article in its assemblage; the group is called the Battle-axe folk. A
few Battle-axe folk elements, including, in fact, stone battle-axes,
reached England with the earliest Beaker folk,[6] coming from the
Rhineland.
[6] The British authors use the term �Beaker folk� to mean both
archeological assemblage and human physical type. They speak
of a �... tall, heavy-boned, rugged, and round-headed� strain
which they take to have developed, apparently in the Rhineland,
by a mixture of the original (Spanish?) beaker-makers and
the northeast European battle-axe makers. However, since the
science of physical anthropology is very much in flux at the
moment, and since I am not able to assess the evidence for these
physical types, I _do not_ use the term �folk� in this book with
its usual meaning of standardized physical type. When I use
�folk� here, I mean simply _the makers of a given archeological
assemblage_. The difficulty only comes when assemblages are
named for some item in them; it is too clumsy to make an
adjective of the item and refer to a �beakerian� assemblage.
The Beaker folk settled earliest in the agriculturally fertile south
and east. There seem to have been several phases of Beaker folk
invasions, and it is not clear whether these all came strictly from the
Rhineland or Holland. We do know that their copper daggers and awls
and armlets are more of Irish or Atlantic European than of Rhineland
origin. A few simple habitation sites and many burials of the Beaker
folk are known. They buried their dead singly, sometimes in conspicuous
individual barrows with the dead warrior in his full trappings. The
spectacular element in the assemblage of the Beaker folk is a group
of large circular monuments with ditches and with uprights of wood or
stone. These �henges� became truly monumental several hundred years
later; while they were occasionally dedicated with a burial, they were
not primarily tombs. The effect of the invasion of the Beaker folk
seems to cut across the whole fabric of life in Britain.
[Illustration: BEAKER]
There was, however, a second major element in British life at this
time. It shows itself in the less well understood traces of a group
again called after one of the items in their catalogue, the Food-vessel
folk. There are many burials in these �food-vessel� pots in northern
England, Scotland, and Ireland, and the pottery itself seems to
link back to that of the Peterborough assemblage. Like the earlier
Peterborough people in the highland zone before them, the makers of
the food-vessels seem to have been heavily involved in trade. It is
quite proper to wonder whether the food-vessel pottery itself was made
by local women who were married to traders who were middlemen in the
transmission of Irish metal objects to north Germany and Scandinavia.
The belt of high, relatively woodless country, from southwest to
northeast, was already established as a natural route for inland trade.
MORE INVASIONS
About 1500 B.C., the situation became further complicated by the
arrival of new people in the region of southern England anciently
called Wessex. The traces suggest the Brittany coast of France as a
source, and the people seem at first to have been a small but �heroic�
group of aristocrats. Their �heroes� are buried with wealth and
ceremony, surrounded by their axes and daggers of bronze, their gold
ornaments, and amber and jet beads. These rich finds show that the
trade-linkage these warriors patronized spread from the Baltic sources
of amber to Mycenean Greece or even Egypt, as evidenced by glazed blue
beads.
The great visual trace of Wessex achievement is the final form of
the spectacular sanctuary at Stonehenge. A wooden henge or circular
monument was first made several hundred years earlier, but the site
now received its great circles of stone uprights and lintels. The
diameter of the surrounding ditch at Stonehenge is about 350 feet, the
diameter of the inner circle of large stones is about 100 feet, and
the tallest stone of the innermost horseshoe-shaped enclosure is 29
feet 8 inches high. One circle is made of blue stones which must have
been transported from Pembrokeshire, 145 miles away as the crow flies.
Recently, many carvings representing the profile of a standard type of
bronze axe of the time, and several profiles of bronze daggers--one of
which has been called Mycenean in type--have been found carved in the
stones. We cannot, of course, describe the details of the religious
ceremonies which must have been staged in Stonehenge, but we can
certainly imagine the well-integrated and smoothly working culture
which must have been necessary before such a great monument could have
been built.
�THIS ENGLAND�
The range from 1900 to about 1400 B.C. includes the time of development
of the archeological features usually called the �Early Bronze Age�
in Britain. In fact, traces of the Wessex warriors persisted down to
about 1200 B.C. The main regions of the island were populated, and the
adjustments to the highland and lowland zones were distinct and well
marked. The different aspects of the assemblages of the Beaker folk and
the clearly expressed activities of the Food-vessel folk and the Wessex
warriors show that Britain was already taking on her characteristic
trading role, separated from the European continent but conveniently
adjacent to it. The tin of Cornwall--so important in the production
of good bronze--as well as the copper of the west and of Ireland,
taken with the gold of Ireland and the general excellence of Irish
metal work, assured Britain a trader�s place in the then known world.
Contacts with the eastern Mediterranean may have been by sea, with
Cornish tin as the attraction, or may have been made by the Food-vessel
middlemen on their trips to the Baltic coast. There they would have
encountered traders who traveled the great north-south European road,
by which Baltic amber moved southward to Greece and the Levant, and
ideas and things moved northward again.
There was, however, the Channel between England and Europe, and this
relative isolation gave some peace and also gave time for a leveling
and further fusion of culture. The separate cultural traditions began
to have more in common. The growing of barley, the herding of sheep and
cattle, and the production of woolen garments were already features
common to all Britain�s inhabitants save a few in the remote highlands,
the far north, and the distant islands not yet fully touched by
food-production. The �personality of Britain� was being formed.
CREMATION BURIALS BEGIN
Along with people of certain religious faiths, archeologists are
against cremation (for other people!). Individuals to be cremated seem
in past times to have been dressed in their trappings and put upon a
large pyre: it takes a lot of wood and a very hot fire for a thorough
cremation. When the burning had been completed, the few fragile scraps
of bone and such odd beads of stone or other rare items as had resisted
the great heat seem to have been whisked into a pot and the pot buried.
The archeologist is left with the pot and the unsatisfactory scraps in
it.
Tentatively, after about 1400 B.C. and almost completely over the whole
island by 1200 B.C., Britain became the scene of cremation burials
in urns. We know very little of the people themselves. None of their
settlements have been identified, although there is evidence that they
grew barley and made enclosures for cattle. The urns used for the
burials seem to have antecedents in the pottery of the Food-vessel
folk, and there are some other links with earlier British traditions.
In Lancashire, a wooden circle seems to have been built about a grave
with cremated burials in urns. Even occasional instances of cremation
may be noticed earlier in Britain, and it is not clear what, if any,
connection the British cremation burials in urns have with the classic
_Urnfields_ which were now beginning in the east Mediterranean and
which we shall mention below.
The British cremation-burial-in-urns folk survived a long time in the
highland zone. In the general British scheme, they make up what is
called the �Middle Bronze Age,� but in the highland zone they last
until after 900 B.C. and are considered to be a specialized highland
�Late Bronze Age.� In the highland zone, these later cremation-burial
folk seem to have continued the older Food-vessel tradition of being
middlemen in the metal market.
Granting that our knowledge of this phase of British prehistory is
very restricted because the cremations have left so little for the
archeologist, it does not appear that the cremation-burial-urn folk can
be sharply set off from their immediate predecessors. But change on a
grander scale was on the way.
REVERBERATIONS FROM CENTRAL EUROPE
In the centuries immediately following 1000 B.C., we see with fair
clarity two phases of a cultural process which must have been going
on for some time. Certainly several of the invasions we have already
described in this chapter were due to earlier phases of the same
cultural process, but we could not see the details.
[Illustration: SLASHING SWORD]
Around 1200 B.C. central Europe was upset by the spread of the
so-called Urnfield folk, who practiced cremation burial in urns and
whom we also know to have been possessors of long, slashing swords and
the horse. I told you above that we have no idea that the Urnfield
folk proper were in any way connected with the people who made
cremation-burial-urn cemeteries a century or so earlier in Britain. It
has been supposed that the Urnfield folk themselves may have shared
ideas with the people who sacked Troy. We know that the Urnfield
pressure from central Europe displaced other people in northern France,
and perhaps in northwestern Germany, and that this reverberated into
Britain about 1000 B.C.
Soon after 750 B.C., the same thing happened again. This time, the
pressure from central Europe came from the Hallstatt folk who were iron
tool makers: the reverberation brought people from the western Alpine
region across the Channel into Britain.
At first it is possible to see the separate results of these folk
movements, but the developing cultures soon fused with each other and
with earlier British elements. Presently there were also strains of
other northern and western European pottery and traces of Urnfield
practices themselves which appeared in the finished British product. I
hope you will sense that I am vastly over-simplifying the details.
The result seems to have been--among other things--a new kind of
agricultural system. The land was marked off by ditched divisions.
Rectangular fields imply the plow rather than hoe cultivation. We seem
to get a picture of estate or tribal boundaries which included village
communities; we find a variety of tools in bronze, and even whetstones
which show that iron has been honed on them (although the scarce iron
has not been found). Let me give you the picture in Professor S.
Piggott�s words: �The ... Late Bronze Age of southern England was but
the forerunner of the earliest Iron Age in the same region, not only in
the techniques of agriculture, but almost certainly in terms of ethnic
kinship ... we can with some assurance talk of the Celts ... the great
early Celtic expansion of the Continent is recognized to be that of the
Urnfield people.�
Thus, certainly by 500 B.C., there were people in Britain, some of
whose descendants we may recognize today in name or language in remote
parts of Wales, Scotland, and the Hebrides.
THE COMING OF IRON
Iron--once the know-how of reducing it from its ore in a very hot,
closed fire has been achieved--produces a far cheaper and much more
efficient set of tools than does bronze. Iron tools seem first to
have been made in quantity in Hittite Anatolia about 1500 B.C. In
continental Europe, the earliest, so-called Hallstatt, iron-using
cultures appeared in Germany soon after 750 B.C. Somewhat later,
Greek and especially Etruscan exports of _objets d�art_--which moved
with a flourishing trans-Alpine wine trade--influenced the Hallstatt
iron-working tradition. Still later new classical motifs, together with
older Hallstatt, oriental, and northern nomad motifs, gave rise to a
new style in metal decoration which characterizes the so-called La T�ne
phase.
A few iron users reached Britain a little before 400 B.C. Not long
after that, a number of allied groups appeared in southern and
southeastern England. They came over the Channel from France and must
have been Celts with dialects related to those already in England. A
second wave of Celts arrived from the Marne district in France about
250 B.C. Finally, in the second quarter of the first century B.C.,
there were several groups of newcomers, some of whom were Belgae of
a mixed Teutonic-Celtic confederacy of tribes in northern France and
Belgium. The Belgae preceded the Romans by only a few years.
HILL-FORTS AND FARMS
The earliest iron-users seem to have entrenched themselves temporarily
within hill-top forts, mainly in the south. Gradually, they moved
inland, establishing _individual_ farm sites with extensive systems
of rectangular fields. We recognize these fields by the �lynchets� or
lines of soil-creep which plowing left on the slopes of hills. New
crops appeared; there were now bread wheat, oats, and rye, as well as
barley.
At Little Woodbury, near the town of Salisbury, a farmstead has been
rather completely excavated. The rustic buildings were within a
palisade, the round house itself was built of wood, and there were
various outbuildings and pits for the storage of grain. Weaving was
done on the farm, but not blacksmithing, which must have been a
specialized trade. Save for the lack of firearms, the place might
almost be taken for a farmstead on the American frontier in the early
1800�s.
Toward 250 B.C. there seems to have been a hasty attempt to repair the
hill-forts and to build new ones, evidently in response to signs of
restlessness being shown by remote relatives in France.
THE SECOND PHASE
Perhaps the hill-forts were not entirely effective or perhaps a
compromise was reached. In any case, the newcomers from the Marne
district did establish themselves, first in the southeast and then to
the north and west. They brought iron with decoration of the La T�ne
type and also the two-wheeled chariot. Like the Wessex warriors of
over a thousand years earlier, they made �heroes�� graves, with their
warriors buried in the war-chariots and dressed in full trappings.
[Illustration: CELTIC BUCKLE]
The metal work of these Marnian newcomers is excellent. The peculiar
Celtic art style, based originally on the classic tendril motif,
is colorful and virile, and fits with Greek and Roman descriptions
of Celtic love of color in dress. There is a strong trace of these
newcomers northward in Yorkshire, linked by Ptolemy�s description to
the Parisii, doubtless part of the Celtic tribe which originally gave
its name to Paris on the Seine. Near Glastonbury, in Somerset, two
villages in swamps have been excavated. They seem to date toward the
middle of the first century B.C., which was a troubled time in Britain.
The circular houses were built on timber platforms surrounded with
palisades. The preservation of antiquities by the water-logged peat of
the swamp has yielded us a long catalogue of the materials of these
villagers.
In Scotland, which yields its first iron tools at a date of about 100
B.C., and in northern Ireland even slightly earlier, the effects of the
two phases of newcomers tend especially to blend. Hill-forts, �brochs�
(stone-built round towers) and a variety of other strange structures
seem to appear as the new ideas develop in the comparative isolation of
northern Britain.
THE THIRD PHASE
For the time of about the middle of the first century B.C., we again
see traces of frantic hill-fort construction. This simple military
architecture now took some new forms. Its multiple ramparts must
reflect the use of slings as missiles, rather than spears. We probably
know the reason. In 56 B.C., Julius Caesar chastised the Veneti of
Brittany for outraging the dignity of Roman ambassadors. The Veneti
were famous slingers, and doubtless the reverberations of escaping
Veneti were felt across the Channel. The military architecture suggests
that some Veneti did escape to Britain.
Also, through Caesar, we learn the names of newcomers who arrived in
two waves, about 75 B.C. and about 50 B.C. These were the Belgae. Now,
at last, we can even begin to speak of dynasties and individuals.
Some time before 55 B.C., the Catuvellauni, originally from the Marne
district in France, had possessed themselves of a large part of
southeastern England. They evidently sailed up the Thames and built a
town of over a hundred acres in area. Here ruled Cassivellaunus, �the
first man in England whose name we know,� and whose town Caesar sacked.
The town sprang up elsewhere again, however.
THE END OF PREHISTORY
Prehistory, strictly speaking, is now over in southern Britain.
Claudius� effective invasion took place in 43 A.D.; by 83 A.D., a raid
had been made as far north as Aberdeen in Scotland. But by 127 A.D.,
Hadrian had completed his wall from the Solway to the Tyne, and the
Romans settled behind it. In Scotland, Romanization can have affected
the countryside very little. Professor Piggott adds that �... it is
when the pressure of Romanization is relaxed by the break-up of the
Dark Ages that we see again the Celtic metal-smiths handling their
material with the same consummate skill as they had before the Roman
Conquest, and with traditional styles that had not even then forgotten
their Marnian and Belgic heritage.�
In fact, many centuries go by, in Britain as well as in the rest of
Europe, before the archeologist�s task is complete and the historian on
his own is able to describe the ways of men in the past.
BRITAIN AS A SAMPLE OF THE GENERAL COURSE OF PREHISTORY IN EUROPE
In giving this very brief outline of the later prehistory of Britain,
you will have noticed how often I had to refer to the European
continent itself. Britain, beyond the English Channel for all of her
later prehistory, had a much simpler course of events than did most of
the rest of Europe in later prehistoric times. This holds, in spite
of all the �invasions� and �reverberations� from the continent. Most
of Europe was the scene of an even more complicated ebb and flow of
cultural change, save in some of its more remote mountain valleys and
peninsulas.
The whole course of later prehistory in Europe is, in fact, so very
complicated that there is no single good book to cover it all;
certainly there is none in English. There are some good regional
accounts and some good general accounts of part of the range from about
3000 B.C. to A.D. 1. I suspect that the difficulty of making a good
book that covers all of its later prehistory is another aspect of what
makes Europe so very complicated a continent today. The prehistoric
foundations for Europe�s very complicated set of civilizations,
cultures, and sub-cultures--which begin to appear as history
proceeds--were in themselves very complicated.
Hence, I selected the case of Britain as a single example of how
prehistory ends in Europe. It could have been more complicated than we
found it to be. Even in the subject matter on Britain in the chapter
before the last, we did not see direct traces of the effect on Britain
of the very important developments which took place in the Danubian
way from the Near East. Apparently Britain was not affected. Britain
received the impulses which brought copper, bronze, and iron tools from
an original east Mediterranean homeland into Europe, almost at the ends
of their journeys. But by the same token, they had had time en route to
take on their characteristic European aspects.
Some time ago, Sir Cyril Fox wrote a famous book called _The
Personality of Britain_, sub-titled �Its Influence on Inhabitant and
Invader in Prehistoric and Early Historic Times.� We have not gone
into the post-Roman early historic period here; there are still the
Anglo-Saxons and Normans to account for as well as the effects of
the Romans. But what I have tried to do was to begin the story of
how the personality of Britain was formed. The principles that Fox
used, in trying to balance cultural and environmental factors and
interrelationships would not be greatly different for other lands.
Summary
[Illustration]
In the pages you have read so far, you have been brought through the
earliest 99 per cent of the story of man�s life on this planet. I have
left only 1 per cent of the story for the historians to tell.
THE DRAMA OF THE PAST
Men first became men when evolution had carried them to a certain
point. This was the point where the eye-hand-brain co-ordination was
good enough so that tools could be made. When tools began to be made
according to sets of lasting habits, we know that men had appeared.
This happened over a half million years ago. The stage for the play
may have been as broad as all of Europe, Africa, and Asia. At least,
it seems unlikely that it was only one little region that saw the
beginning of the drama.
Glaciers and different climates came and went, to change the settings.
But the play went on in the same first act for a very long time. The
men who were the players had simple roles. They had to feed themselves
and protect themselves as best they could. They did this by hunting,
catching, and finding food wherever they could, and by taking such
protection as caves, fire, and their simple tools would give them.
Before the first act was over, the last of the glaciers was melting
away, and the players had added the New World to their stage. If
we want a special name for the first act, we could call it _The
Food-Gatherers_.
There were not many climaxes in the first act, so far as we can see.
But I think there may have been a few. Certainly the pace of the
first act accelerated with the swing from simple gathering to more
intensified collecting. The great cave art of France and Spain was
probably an expression of a climax. Even the ideas of burying the dead
and of the �Venus� figurines must also point to levels of human thought
and activity that were over and above pure food-getting.
THE SECOND ACT
The second act began only about ten thousand years ago. A few of the
players started it by themselves near the center of the Old World part
of the stage, in the Near East. It began as a plant and animal act, but
it soon became much more complicated.
But the players in this one part of the stage--in the Near East--were
not the only ones to start off on the second act by themselves. Other
players, possibly in several places in the Far East, and certainly in
the New World, also started second acts that began as plant and animal
acts, and then became complicated. We can call the whole second act
_The Food-Producers_.
THE FIRST GREAT CLIMAX OF THE SECOND ACT
In the Near East, the first marked climax of the second act happened
in Mesopotamia and Egypt. The play and the players reached that great
climax that we call civilization. This seems to have come less than
five thousand years after the second act began. But it could never have
happened in the first act at all.
There is another curious thing about the first act. Many of the players
didn�t know it was over and they kept on with their roles long after
the second act had begun. On the edges of the stage there are today
some players who are still going on with the first act. The Eskimos,
and the native Australians, and certain tribes in the Amazon jungle are
some of these players. They seem perfectly happy to keep on with the
first act.
The second act moved from climax to climax. The civilizations of
Mesopotamia and Egypt were only the earliest of these climaxes. The
players to the west caught the spirit of the thing, and climaxes
followed there. So also did climaxes come in the Far Eastern and New
World portions of the stage.
The greater part of the second act should really be described to you
by a historian. Although it was a very short act when compared to the
first one, the climaxes complicate it a great deal. I, a prehistorian,
have told you about only the first act, and the very beginning of the
second.
THE THIRD ACT
Also, as a prehistorian I probably should not even mention the third
act--it began so recently. The third act is _The Industrialization_.
It is the one in which we ourselves are players. If the pace of the
second act was so much faster than that of the first, the pace of the
third act is terrific. The danger is that it may wear down the players
completely.
What sort of climaxes will the third act have, and are we already in
one? You have seen by now that the acts of my play are given in terms
of modes or basic patterns of human economy--ways in which people
get food and protection and safety. The climaxes involve more than
human economy. Economics and technological factors may be part of the
climaxes, but they are not all. The climaxes may be revolutions in
their own way, intellectual and social revolutions if you like.
If the third act follows the pattern of the second act, a climax should
come soon after the act begins. We may be due for one soon if we are
not already in it. Remember the terrific pace of this third act.
WHY BOTHER WITH PREHISTORY?
Why do we bother about prehistory? The main reason is that we think it
may point to useful ideas for the present. We are in the troublesome
beginnings of the third act of the play. The beginnings of the second
act may have lessons for us and give depth to our thinking. I know
there are at least _some_ lessons, even in the present incomplete
state of our knowledge. The players who began the second act--that of
food-production--separately, in different parts of the world, were not
all of one �pure race� nor did they have �pure� cultural traditions.
Some apparently quite mixed Mediterraneans got off to the first start
on the second act and brought it to its first two climaxes as well.
Peoples of quite different physical type achieved the first climaxes in
China and in the New World.
In our British example of how the late prehistory of Europe worked, we
listed a continuous series of �invasions� and �reverberations.� After
each of these came fusion. Even though the Channel protected Britain
from some of the extreme complications of the mixture and fusion of
continental Europe, you can see how silly it would be to refer to a
�pure� British race or a �pure� British culture. We speak of the United
States as a �melting pot.� But this is nothing new. Actually, Britain
and all the rest of the world have been �melting pots� at one time or
another.
By the time the written records of Mesopotamia and Egypt begin to turn
up in number, the climaxes there are well under way. To understand the
beginnings of the climaxes, and the real beginnings of the second act
itself, we are thrown back on prehistoric archeology. And this is as
true for China, India, Middle America, and the Andes, as it is for the
Near East.
There are lessons to be learned from all of man�s past, not simply
lessons of how to fight battles or win peace conferences, but of how
human society evolves from one stage to another. Many of these lessons
can only be looked for in the prehistoric past. So far, we have only
made a beginning. There is much still to do, and many gaps in the story
are yet to be filled. The prehistorian�s job is to find the evidence,
to fill the gaps, and to discover the lessons men have learned in the
past. As I see it, this is not only an exciting but a very practical
goal for which to strive.
List of Books
BOOKS OF GENERAL INTEREST
(Chosen from a variety of the increasingly useful list of cheap
paperbound books.)
Childe, V. Gordon
_What Happened in History._ 1954. Penguin.
_Man Makes Himself._ 1955. Mentor.
_The Prehistory of European Society._ 1958. Penguin.
Dunn, L. C., and Dobzhansky, Th.
_Heredity, Race, and Society._ 1952. Mentor.
Frankfort, Henri, Frankfort, H. A., Jacobsen, Thorkild, and Wilson,
John A.
_Before Philosophy._ 1954. Penguin.
Simpson, George G.
_The Meaning of Evolution._ 1955. Mentor.
Wheeler, Sir Mortimer
_Archaeology from the Earth._ 1956. Penguin.
GEOCHRONOLOGY AND THE ICE AGE
(Two general books. Some Pleistocene geologists disagree with Zeuner�s
interpretation of the dating evidence, but their points of view appear
in professional journals, in articles too cumbersome to list here.)
Flint, R. F.
_Glacial Geology and the Pleistocene Epoch._ 1947. John Wiley
and Sons.
Zeuner, F. E.
_Dating the Past._ 1952 (3rd ed.). Methuen and Co.
FOSSIL MEN AND RACE
(The points of view of physical anthropologists and human
paleontologists are changing very quickly. Two of the different points
of view are listed here.)
Clark, W. E. Le Gros
_History of the Primates._ 1956 (5th ed.). British Museum
(Natural History). (Also in Phoenix edition, 1957.)
Howells, W. W.
_Mankind So Far._ 1944. Doubleday, Doran.
GENERAL ANTHROPOLOGY
(These are standard texts not absolutely up to date in every detail, or
interpretative essays concerned with cultural change through time as
well as in space.)
Kroeber, A. L.
_Anthropology._ 1948. Harcourt, Brace.
Linton, Ralph
_The Tree of Culture._ 1955. Alfred A. Knopf, Inc.
Redfield, Robert
_The Primitive World and Its Transformations._ 1953. Cornell
University Press.
Steward, Julian H.
_Theory of Culture Change._ 1955. University of Illinois Press.
White, Leslie
_The Science of Culture._ 1949. Farrar, Strauss.
GENERAL PREHISTORY
(A sampling of the more useful and current standard works in English.)
Childe, V. Gordon
_The Dawn of European Civilization._ 1957. Kegan Paul, Trench,
Trubner.
_Prehistoric Migrations in Europe._ 1950. Instituttet for
Sammenlignende Kulturforskning.
Clark, Grahame
_Archaeology and Society._ 1957. Harvard University Press.
Clark, J. G. D.
_Prehistoric Europe: The Economic Basis._ 1952. Methuen and Co.
Garrod, D. A. E.
_Environment, Tools, and Man._ 1946. Cambridge University
Press.
Movius, Hallam L., Jr.
�Old World Prehistory: Paleolithic� in _Anthropology Today_.
Kroeber, A. L., ed. 1953. University of Chicago Press.
Oakley, Kenneth P.
_Man the Tool-Maker._ 1956. British Museum (Natural History).
(Also in Phoenix edition, 1957.)
Piggott, Stuart
_British Prehistory._ 1949. Oxford University Press.
Pittioni, Richard
_Die Urgeschichtlichen Grundlagen der Europ�ischen Kultur._
1949. Deuticke. (A single book which does attempt to cover the
whole range of European prehistory to ca. 1 A.D.)
THE NEAR EAST
Adams, Robert M.
�Developmental Stages in Ancient Mesopotamia,� _in_ Steward,
Julian, _et al_, _Irrigation Civilizations: A Comparative
Study_. 1955. Pan American Union.
Braidwood, Robert J.
_The Near East and the Foundations for Civilization._ 1952.
University of Oregon.
Childe, V. Gordon
_New Light on the Most Ancient East._ 1952. Oriental Dept.,
Routledge and Kegan Paul.
Frankfort, Henri
_The Birth of Civilization in the Near East._ 1951. University
of Indiana Press. (Also in Anchor edition, 1956.)
Pallis, Svend A.
_The Antiquity of Iraq._ 1956. Munksgaard.
Wilson, John A.
_The Burden of Egypt._ 1951. University of Chicago Press. (Also
in Phoenix edition, called _The Culture of Ancient Egypt_,
1956.)
HOW DIGGING IS DONE
Braidwood, Linda
_Digging beyond the Tigris._ 1953. Schuman, New York.
Wheeler, Sir Mortimer
_Archaeology from the Earth._ 1954. Oxford, London.
Index
Abbevillian, 48;
core-biface tool, 44, 48
Acheulean, 48, 60
Acheuleo-Levalloisian, 63
Acheuleo-Mousterian, 63
Adams, R. M., 106
Adzes, 45
Africa, east, 67, 89;
north, 70, 89;
south, 22, 25, 34, 40, 67
Agriculture, incipient, in England, 140;
in Near East, 123
Ain Hanech, 48
Amber, taken from Baltic to Greece, 167
American Indians, 90, 142
Anatolia, used as route to Europe, 138
Animals, in caves, 54, 64;
in cave art, 85
Antevs, Ernst, 19
Anyathian, 47
Archeological interpretation, 8
Archeology, defined, 8
Architecture, at Jarmo, 128;
at Jericho, 133
Arrow, points, 94;
shaft straightener, 83
Art, in caves, 84;
East Spanish, 85;
figurines, 84;
Franco-Cantabrian, 84, 85;
movable (engravings, modeling, scratchings), 83;
painting, 83;
sculpture, 83
Asia, western, 67
Assemblage, defined, 13, 14;
European, 94;
Jarmo, 129;
Maglemosian, 94;
Natufian, 113
Aterian, industry, 67;
point, 89
Australopithecinae, 24
Australopithecine, 25, 26
Awls, 77
Axes, 62, 94
Ax-heads, 15
Azilian, 97
Aztecs, 145
Baghouz, 152
Bakun, 134
Baltic sea, 93
Banana, 107
Barley, wild, 108
Barrow, 141
Battle-axe folk, 164;
assemblage, 164
Beads, 80;
bone, 114
Beaker folk, 164;
assemblage, 164-165
Bear, in cave art, 85;
cult, 68
Belgium, 94
Belt cave, 126
Bering Strait, used as route to New World, 98
Bison, in cave art, 85
Blade, awl, 77;
backed, 75;
blade-core, 71;
end-scraper, 77;
stone, defined, 71;
strangulated (notched), 76;
tanged point, 76;
tools, 71, 75-80, 90;
tool tradition, 70
Boar, wild, in cave art, 85
Bogs, source of archeological materials, 94
Bolas, 54
Bordes, Fran�ois, 62
Borer, 77
Boskop skull, 34
Boyd, William C., 35
Bracelets, 118
Brain, development of, 24
Breadfruit, 107
Breasted, James H., 107
Brick, at Jericho, 133
Britain, 94;
late prehistory, 163-175;
invaders, 173
Broch, 172
Buffalo, in China, 54;
killed by stampede, 86
Burials, 66, 86;
in �henges,� 164;
in urns, 168
Burins, 75
Burma, 90
Byblos, 134
Camel, 54
Cannibalism, 55
Cattle, wild, 85, 112;
in cave art, 85;
domesticated, 15;
at Skara Brae, 142
Caucasoids, 34
Cave men, 29
Caves, 62;
art in, 84
Celts, 170
Chariot, 160
Chicken, domestication of, 107
Chiefs, in food-gathering groups, 68
Childe, V. Gordon, 8
China, 136
Choukoutien, 28, 35
Choukoutienian, 47
Civilization, beginnings, 144, 149, 157;
meaning of, 144
Clactonian, 45, 47
Clay, used in modeling, 128;
baked, used for tools, 153
Club-heads, 82, 94
Colonization, in America, 142;
in Europe, 142
Combe Capelle, 30
Combe Capelle-Br�nn group, 34
Commont, Victor, 51
Coon, Carlton S., 73
Copper, 134
Corn, in America, 145
Corrals for cattle, 140
�Cradle of mankind,� 136
Cremation, 167
Crete, 162
Cro-Magnon, 30, 34
Cultivation, incipient, 105, 109, 111
Culture, change, 99;
characteristics, defined, 38, 49;
prehistoric, 39
Danube Valley, used as route from Asia, 138
Dates, 153
Deer, 54, 96
Dog, domesticated, 96
Domestication, of animals, 100, 105, 107;
of plants, 100
�Dragon teeth� fossils in China, 28
Drill, 77
Dubois, Eugene, 26
Early Dynastic Period, Mesopotamia, 147
East Spanish art, 72, 85
Egypt, 70, 126
Ehringsdorf, 31
Elephant, 54
Emiliani, Cesare, 18
Emiran flake point, 73
England, 163-168;
prehistoric, 19, 40;
farmers in, 140
Eoanthropus dawsoni, 29
Eoliths, 41
Erich, 152
Eridu, 152
Euphrates River, floods in, 148
Europe, cave dwellings, 58;
at end of Ice Age, 93;
early farmers, 140;
glaciers in, 40;
huts in, 86;
routes into, 137-140;
spread of food-production to, 136
Far East, 69, 90
Farmers, 103
Fauresmith industry, 67
Fayum, 135;
radiocarbon date, 146
�Fertile Crescent,� 107, 146
Figurines, �Venus,� 84;
at Jarmo, 128;
at Ubaid, 153
Fire, used by Peking man, 54
First Dynasty, Egypt, 147
Fish-hooks, 80, 94
Fishing, 80;
by food-producers, 122
Fish-lines, 80
Fish spears, 94
Flint industry, 127
Font�chevade, 32, 56, 58
Food-collecting, 104, 121;
end of, 104
Food-gatherers, 53, 176
Food-gathering, 99, 104;
in Old World, 104;
stages of, 104
Food-producers, 176
Food-producing economy, 122;
in America, 145;
in Asia, 105
Food-producing revolution, 99, 105;
causes of, 101;
preconditions for, 100
Food-production, beginnings of, 99;
carried to Europe, 110
Food-vessel folk, 164
�Forest folk,� 97, 98, 104, 110
Fox, Sir Cyril, 174
France, caves in, 56
Galley Hill (fossil type), 29
Garrod, D. A., 73
Gazelle, 114
Germany, 94
Ghassul, 156
Glaciers, 18, 30;
destruction by, 40
Goat, wild, 108;
domesticated, 128
Grain, first planted, 20
Graves, passage, 141;
gallery, 141
Greece, civilization in, 163;
as route to western Europe, 138;
towns in, 162
Grimaldi skeletons, 34
Hackberry seeds used as food, 55
Halaf, 151;
assemblage, 151
Hallstatt, tradition, 169
Hand, development of, 24, 25
Hand adzes, 46
Hand axes, 44
Harpoons, antler, 83, 94;
bone, 82, 94
Hassuna, 131;
assemblage, 131, 132
Heidelberg, fossil type, 28
Hill-forts, in England, 171;
in Scotland, 172
Hilly flanks of Near East, 107, 108, 125, 131, 146, 147
History, beginning of, 7, 17
Hoes, 112
Holland, 164
Homo sapiens, 32
Hooton, E. A., 34
Horse, 112;
wild, in cave art, 85;
in China, 54
Hotu cave, 126
Houses, 122;
at Jarmo, 128;
at Halaf, 151
Howe, Bruce, 116
Howell, F. Clark, 30
Hunting, 93
Ice Age, in Asia, 99;
beginning of, 18;
glaciers in, 41;
last glaciation, 93
Incas, 145
India, 90, 136
Industrialization, 178
Industry, blade-tool, 88;
defined, 58;
ground stone, 94
Internationalism, 162
Iran, 107, 147
Iraq, 107, 124, 127, 136, 147
Iron, introduction of, 170
Irrigation, 123, 149, 155
Italy, 138
Jacobsen, T. J., 157
Jarmo, 109, 126, 128, 130;
assemblage, 129
Java, 23, 29
Java man, 26, 27, 29
Jefferson, Thomas, 11
Jericho, 119, 133
Judaidah, 134
Kafuan, 48
Kanam, 23, 36
Karim Shahir, 116-119, 124;
assemblage, 116, 117
Keith, Sir Arthur, 33
Kelley, Harper, 51
Kharga, 126
Khartoum, 136
Knives, 80
Krogman, W. M., 3, 25
Lamps, 85
Land bridges in Mediterranean, 19
La T�ne phase, 170
Laurel leaf point, 78, 89
Leakey, L. S. B., 40
Le Moustier, 57
Levalloisian, 47, 61, 62
Levalloiso-Mousterian, 47, 63
Little Woodbury, 170
Magic, used by hunters, 123
Maglemosian, assemblage, 94, 95;
folk, 98
Makapan, 40
Mammoth, 93;
in cave art, 85
�Man-apes,� 26
Mango, 107
Mankind, age, 17
Maringer, J., 45
Markets, 155
Marston, A. T., 11
Mathiassen, T., 97
McCown, T. D., 33
Meganthropus, 26, 27, 36
Men, defined, 25;
modern, 32
Merimde, 135
Mersin, 133
Metal-workers, 160, 163, 167, 172
Micoquian, 48, 60
Microliths, 87;
at Jarmo, 130;
�lunates,� 87;
trapezoids, 87;
triangles, 87
Minerals used as coloring matter, 66
Mine-shafts, 140
M�lefaat, 126, 127
Mongoloids, 29, 90
Mortars, 114, 118, 127
Mounds, how formed, 12
Mount Carmel, 11, 33, 52, 59, 64, 69, 113, 114
�Mousterian man,� 64
�Mousterian� tools, 61, 62;
of Acheulean tradition, 62
Movius, H. L., 47
Natufian, animals in, 114;
assemblage, 113, 114, 115;
burials, 114;
date of, 113
Neanderthal man, 29, 30, 31, 56
Near East, beginnings of civilization in, 20, 144;
cave sites, 58;
climate in Ice Age, 99;
�Fertile Crescent,� 107, 146;
food-production in, 99;
Natufian assemblage in, 113-115;
stone tools, 114
Needles, 80
Negroid, 34
New World, 90
Nile River valley, 102, 134;
floods in, 148
Nuclear area, 106, 110;
in Near East, 107
Obsidian, used for blade tools, 71;
at Jarmo, 130
Ochre, red, with burials, 86
Oldowan, 48
Old World, 67, 70, 90;
continental phases in, 18
Olorgesailie, 40, 51
Ostrich, in China, 54
Ovens, 128
Oxygen isotopes, 18
Paintings in caves, 83
Paleoanthropic man, 50
Palestine, burials, 56;
cave sites, 52;
types of man, 69
Parpallo, 89
Patjitanian, 45, 47
Pebble tools, 42
Peking cave, 54;
animals in, 54
Peking man, 27, 28, 29, 54, 58
Pendants, 80;
bone, 114
Pestle, 114
Peterborough, 141;
assemblage, 141
Pictographic signs, 158
Pig, wild, 108
�Piltdown man,� 29
Pins, 80
Pithecanthropus, 26, 27, 30, 36
Pleistocene, 18, 25
Plows developed, 123
Points, arrow, 76;
laurel leaf, 78;
shouldered, 78, 79;
split-based bone, 80, 82;
tanged, 76;
willow leaf, 78
Potatoes, in America, 145
Pottery, 122, 130, 156;
decorated, 142;
painted, 131, 151, 152;
Susa style, 156;
in tombs, 141
Prehistory, defined, 7;
range of, 18
Pre-neanderthaloids, 30, 31, 37
Pre-Solutrean point, 89
Pre-Stellenbosch, 48
Proto-Literate assemblage, 157-160
Race, 35;
biological, 36;
�pure,� 16
Radioactivity, 9, 10
Radioactive carbon dates, 18, 92, 120, 130, 135, 156
Redfield, Robert, 38, 49
Reed, C. A., 128
Reindeer, 94
Rhinoceros, 93;
in cave art, 85
Rhodesian man, 32
Riss glaciation, 58
Rock-shelters, 58;
art in, 85
Saccopastore, 31
Sahara Desert, 34, 102
Samarra, 152;
pottery, 131, 152
Sangoan industry, 67
Sauer, Carl, 136
Sbaikian point, 89
Schliemann, H., 11, 12
Scotland, 171
Scraper, flake, 79;
end-scraper on blade, 77, 78;
keel-shaped, 79, 80, 81
Sculpture in caves, 83
Sebilian III, 126
Shaheinab, 135
Sheep, wild, 108;
at Skara Brae, 142;
in China, 54
Shellfish, 142
Ship, Ubaidian, 153
Sialk, 126, 134;
assemblage, 134
Siberia, 88;
pathway to New World, 98
Sickle, 112, 153;
blade, 113, 130
Silo, 122
Sinanthropus, 27, 30, 35
Skara Brae, 142
Snails used as food, 128
Soan, 47
Solecki, R., 116
Solo (fossil type), 29, 32
Solutrean industry, 77
Spear, shaft, 78;
thrower, 82, 83
Speech, development of organs of, 25
Squash, in America, 145
Steinheim fossil skull, 28
Stillbay industry, 67
Stonehenge, 166
Stratification, in caves, 12, 57;
in sites, 12
Swanscombe (fossil type), 11, 28
Syria, 107
Tabun, 60, 71
Tardenoisian, 97
Taro, 107
Tasa, 135
Tayacian, 47, 59
Teeth, pierced, in beads and pendants, 114
Temples, 123, 155
Tepe Gawra, 156
Ternafine, 29
Teshik Tash, 69
Textiles, 122
Thong-stropper, 80
Tigris River, floods in, 148
Toggle, 80
Tomatoes, in America, 145
Tombs, megalithic, 141
Tool-making, 42, 49
Tool-preparation traditions, 65
Tools, 62;
antler, 80;
blade, 70, 71, 75;
bone, 66;
chopper, 47;
core-biface, 43, 48, 60, 61;
flake, 44, 47, 51, 60, 64;
flint, 80, 127;
ground stone, 68, 127;
handles, 94;
pebble, 42, 43, 48, 53;
use of, 24
Touf (mud wall), 128
Toynbee, A. J., 101
Trade, 130, 155, 162
Traders, 167
Traditions, 15;
blade tool, 70;
definition of, 51;
interpretation of, 49;
tool-making, 42, 48;
chopper-tool, 47;
chopper-chopping tool, 45;
core-biface, 43, 48;
flake, 44, 47;
pebble tool, 42, 48
Tool-making, prehistory of, 42
Turkey, 107, 108
Ubaid, 153;
assemblage, 153-155
Urnfields, 168, 169
Village-farming community era, 105, 119
Wad B, 72
Wadjak, 34
Warka phase, 156;
assemblage, 156
Washburn, Sherwood L., 36
Water buffalo, domestication of, 107
Weidenreich, F., 29, 34
Wessex, 166, 167
Wheat, wild, 108;
partially domesticated, 127
Willow leaf point, 78
Windmill Hill, 138;
assemblage, 138, 140
Witch doctors, 68
Wool, 112;
in garments, 167
Writing, 158;
cuneiform, 158
W�rm I glaciation, 58
Zebu cattle, domestication of, 107
Zeuner, F. E., 73
* * * * * *
Transcriber�s note:
Punctuation, hyphenation, and spelling were made consistent when a
predominant preference was found in this book; otherwise they were not
changed.
Simple typographical errors were corrected; occasional unbalanced
quotation marks retained.
Ambiguous hyphens at the ends of lines were retained.
Index not checked for proper alphabetization or correct page references.
In the original book, chapter headings were accompanied by
illustrations, sometimes above, sometimes below, and sometimes
adjacent. In this eBook those ilustrations always appear below the
headings.
***END OF THE PROJECT GUTENBERG EBOOK PREHISTORIC MEN***
******* This file should be named 52664-0.txt or 52664-0.zip *******
This and all associated files of various formats will be found in:
http://www.gutenberg.org/dirs/5/2/6/6/52664
Updated editions will replace the previous one--the old editions will
be renamed.
Creating the works from print editions not protected by U.S. copyright
law means that no one owns a United States copyright in these works,
so the Foundation (and you!) can copy and distribute it in the United
States without permission and without paying copyright
royalties. Special rules, set forth in the General Terms of Use part
of this license, apply to copying and distributing Project
Gutenberg-tm electronic works to protect the PROJECT GUTENBERG-tm
concept and trademark. Project Gutenberg is a registered trademark,
and may not be used if you charge for the eBooks, unless you receive
specific permission. If you do not charge anything for copies of this
eBook, complying with the rules is very easy. You may use this eBook
for nearly any purpose such as creation of derivative works, reports,
performances and research. They may be modified and printed and given
away--you may do practically ANYTHING in the United States with eBooks
not protected by U.S. copyright law. Redistribution is subject to the
trademark license, especially commercial redistribution.
START: FULL LICENSE
THE FULL PROJECT GUTENBERG LICENSE
PLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK
To protect the Project Gutenberg-tm mission of promoting the free
distribution of electronic works, by using or distributing this work
(or any other work associated in any way with the phrase "Project
Gutenberg"), you agree to comply with all the terms of the Full
Project Gutenberg-tm License available with this file or online at
www.gutenberg.org/license.
Section 1. General Terms of Use and Redistributing Project
Gutenberg-tm electronic works
1.A. By reading or using any part of this Project Gutenberg-tm
electronic work, you indicate that you have read, understand, agree to
and accept all the terms of this license and intellectual property
(trademark/copyright) agreement. If you do not agree to abide by all
the terms of this agreement, you must cease using and return or
destroy all copies of Project Gutenberg-tm electronic works in your
possession. If you paid a fee for obtaining a copy of or access to a
Project Gutenberg-tm electronic work and you do not agree to be bound
by the terms of this agreement, you may obtain a refund from the
person or entity to whom you paid the fee as set forth in paragraph
1.E.8.
1.B. "Project Gutenberg" is a registered trademark. It may only be
used on or associated in any way with an electronic work by people who
agree to be bound by the terms of this agreement. There are a few
things that you can do with most Project Gutenberg-tm electronic works
even without complying with the full terms of this agreement. See
paragraph 1.C below. There are a lot of things you can do with Project
Gutenberg-tm electronic works if you follow the terms of this
agreement and help preserve free future access to Project Gutenberg-tm
electronic works. See paragraph 1.E below.
1.C. The Project Gutenberg Literary Archive Foundation ("the
Foundation" or PGLAF), owns a compilation copyright in the collection
of Project Gutenberg-tm electronic works. Nearly all the individual
works in the collection are in the public domain in the United
States. If an individual work is unprotected by copyright law in the
United States and you are located in the United States, we do not
claim a right to prevent you from copying, distributing, performing,
displaying or creating derivative works based on the work as long as
all references to Project Gutenberg are removed. Of course, we hope
that you will support the Project Gutenberg-tm mission of promoting
free access to electronic works by freely sharing Project Gutenberg-tm
works in compliance with the terms of this agreement for keeping the
Project Gutenberg-tm name associated with the work. You can easily
comply with the terms of this agreement by keeping this work in the
same format with its attached full Project Gutenberg-tm License when
you share it without charge with others.
1.D. The copyright laws of the place where you are located also govern
what you can do with this work. Copyright laws in most countries are
in a constant state of change. If you are outside the United States,
check the laws of your country in addition to the terms of this
agreement before downloading, copying, displaying, performing,
distributing or creating derivative works based on this work or any
other Project Gutenberg-tm work. The Foundation makes no
representations concerning the copyright status of any work in any
country outside the United States.
1.E. Unless you have removed all references to Project Gutenberg:
1.E.1. The following sentence, with active links to, or other
immediate access to, the full Project Gutenberg-tm License must appear
prominently whenever any copy of a Project Gutenberg-tm work (any work
on which the phrase "Project Gutenberg" appears, or with which the
phrase "Project Gutenberg" is associated) is accessed, displayed,
performed, viewed, copied or distributed:
This eBook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no
restrictions whatsoever. You may copy it, give it away or re-use it
under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.org. If you are not located in the
United States, you'll have to check the laws of the country where you
are located before using this ebook.
1.E.2. If an individual Project Gutenberg-tm electronic work is
derived from texts not protected by U.S. copyright law (does not
contain a notice indicating that it is posted with permission of the
copyright holder), the work can be copied and distributed to anyone in
the United States without paying any fees or charges. If you are
redistributing or providing access to a work with the phrase "Project
Gutenberg" associated with or appearing on the work, you must comply
either with the requirements of paragraphs 1.E.1 through 1.E.7 or
obtain permission for the use of the work and the Project Gutenberg-tm
trademark as set forth in paragraphs 1.E.8 or 1.E.9.
1.E.3. If an individual Project Gutenberg-tm electronic work is posted
with the permission of the copyright holder, your use and distribution
must comply with both paragraphs 1.E.1 through 1.E.7 and any
additional terms imposed by the copyright holder. Additional terms
will be linked to the Project Gutenberg-tm License for all works
posted with the permission of the copyright holder found at the
beginning of this work.
1.E.4. Do not unlink or detach or remove the full Project Gutenberg-tm
License terms from this work, or any files containing a part of this
work or any other work associated with Project Gutenberg-tm.
1.E.5. Do not copy, display, perform, distribute or redistribute this
electronic work, or any part of this electronic work, without
prominently displaying the sentence set forth in paragraph 1.E.1 with
active links or immediate access to the full terms of the Project
Gutenberg-tm License.
1.E.6. You may convert to and distribute this work in any binary,
compressed, marked up, nonproprietary or proprietary form, including
any word processing or hypertext form. However, if you provide access
to or distribute copies of a Project Gutenberg-tm work in a format
other than "Plain Vanilla ASCII" or other format used in the official
version posted on the official Project Gutenberg-tm web site
(www.gutenberg.org), you must, at no additional cost, fee or expense
to the user, provide a copy, a means of exporting a copy, or a means
of obtaining a copy upon request, of the work in its original "Plain
Vanilla ASCII" or other form. Any alternate format must include the
full Project Gutenberg-tm License as specified in paragraph 1.E.1.
1.E.7. Do not charge a fee for access to, viewing, displaying,
performing, copying or distributing any Project Gutenberg-tm works
unless you comply with paragraph 1.E.8 or 1.E.9.
1.E.8. You may charge a reasonable fee for copies of or providing
access to or distributing Project Gutenberg-tm electronic works
provided that
* You pay a royalty fee of 20% of the gross profits you derive from
the use of Project Gutenberg-tm works calculated using the method
you already use to calculate your applicable taxes. The fee is owed
to the owner of the Project Gutenberg-tm trademark, but he has
agreed to donate royalties under this paragraph to the Project
Gutenberg Literary Archive Foundation. Royalty payments must be paid
within 60 days following each date on which you prepare (or are
legally required to prepare) your periodic tax returns. Royalty
payments should be clearly marked as such and sent to the Project
Gutenberg Literary Archive Foundation at the address specified in
Section 4, "Information about donations to the Project Gutenberg
Literary Archive Foundation."
* You provide a full refund of any money paid by a user who notifies
you in writing (or by e-mail) within 30 days of receipt that s/he
does not agree to the terms of the full Project Gutenberg-tm
License. You must require such a user to return or destroy all
copies of the works possessed in a physical medium and discontinue
all use of and all access to other copies of Project Gutenberg-tm
works.
* You provide, in accordance with paragraph 1.F.3, a full refund of
any money paid for a work or a replacement copy, if a defect in the
electronic work is discovered and reported to you within 90 days of
receipt of the work.
* You comply with all other terms of this agreement for free
distribution of Project Gutenberg-tm works.
1.E.9. If you wish to charge a fee or distribute a Project
Gutenberg-tm electronic work or group of works on different terms than
are set forth in this agreement, you must obtain permission in writing
from both the Project Gutenberg Literary Archive Foundation and The
Project Gutenberg Trademark LLC, the owner of the Project Gutenberg-tm
trademark. Contact the Foundation as set forth in Section 3 below.
1.F.
1.F.1. Project Gutenberg volunteers and employees expend considerable
effort to identify, do copyright research on, transcribe and proofread
works not protected by U.S. copyright law in creating the Project
Gutenberg-tm collection. Despite these efforts, Project Gutenberg-tm
electronic works, and the medium on which they may be stored, may
contain "Defects," such as, but not limited to, incomplete, inaccurate
or corrupt data, transcription errors, a copyright or other
intellectual property infringement, a defective or damaged disk or
other medium, a computer virus, or computer codes that damage or
cannot be read by your equipment.
1.F.2. LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the "Right
of Replacement or Refund" described in paragraph 1.F.3, the Project
Gutenberg Literary Archive Foundation, the owner of the Project
Gutenberg-tm trademark, and any other party distributing a Project
Gutenberg-tm electronic work under this agreement, disclaim all
liability to you for damages, costs and expenses, including legal
fees. YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT
LIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE
PROVIDED IN PARAGRAPH 1.F.3. YOU AGREE THAT THE FOUNDATION, THE
TRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE
LIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR
INCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH
DAMAGE.
1.F.3. LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a
defect in this electronic work within 90 days of receiving it, you can
receive a refund of the money (if any) you paid for it by sending a
written explanation to the person you received the work from. If you
received the work on a physical medium, you must return the medium
with your written explanation. The person or entity that provided you
with the defective work may elect to provide a replacement copy in
lieu of a refund. If you received the work electronically, the person
or entity providing it to you may choose to give you a second
opportunity to receive the work electronically in lieu of a refund. If
the second copy is also defective, you may demand a refund in writing
without further opportunities to fix the problem.
1.F.4. Except for the limited right of replacement or refund set forth
in paragraph 1.F.3, this work is provided to you 'AS-IS', WITH NO
OTHER WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
LIMITED TO WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PURPOSE.
1.F.5. Some states do not allow disclaimers of certain implied
warranties or the exclusion or limitation of certain types of
damages. If any disclaimer or limitation set forth in this agreement
violates the law of the state applicable to this agreement, the
agreement shall be interpreted to make the maximum disclaimer or
limitation permitted by the applicable state law. The invalidity or
unenforceability of any provision of this agreement shall not void the
remaining provisions.
1.F.6. INDEMNITY - You agree to indemnify and hold the Foundation, the
trademark owner, any agent or employee of the Foundation, anyone
providing copies of Project Gutenberg-tm electronic works in
accordance with this agreement, and any volunteers associated with the
production, promotion and distribution of Project Gutenberg-tm
electronic works, harmless from all liability, costs and expenses,
including legal fees, that arise directly or indirectly from any of
the following which you do or cause to occur: (a) distribution of this
or any Project Gutenberg-tm work, (b) alteration, modification, or
additions or deletions to any Project Gutenberg-tm work, and (c) any
Defect you cause.
Section 2. Information about the Mission of Project Gutenberg-tm
Project Gutenberg-tm is synonymous with the free distribution of
electronic works in formats readable by the widest variety of
computers including obsolete, old, middle-aged and new computers. It
exists because of the efforts of hundreds of volunteers and donations
from people in all walks of life.
Volunteers and financial support to provide volunteers with the
assistance they need are critical to reaching Project Gutenberg-tm's
goals and ensuring that the Project Gutenberg-tm collection will
remain freely available for generations to come. In 2001, the Project
Gutenberg Literary Archive Foundation was created to provide a secure
and permanent future for Project Gutenberg-tm and future
generations. To learn more about the Project Gutenberg Literary
Archive Foundation and how your efforts and donations can help, see
Sections 3 and 4 and the Foundation information page at
www.gutenberg.org
Section 3. Information about the Project Gutenberg Literary
Archive Foundation
The Project Gutenberg Literary Archive Foundation is a non profit
501(c)(3) educational corporation organized under the laws of the
state of Mississippi and granted tax exempt status by the Internal
Revenue Service. The Foundation's EIN or federal tax identification
number is 64-6221541. Contributions to the Project Gutenberg Literary
Archive Foundation are tax deductible to the full extent permitted by
U.S. federal laws and your state's laws.
The Foundation's principal office is in Fairbanks, Alaska, with the
mailing address: PO Box 750175, Fairbanks, AK 99775, but its
volunteers and employees are scattered throughout numerous
locations. Its business office is located at 809 North 1500 West, Salt
Lake City, UT 84116, (801) 596-1887. Email contact links and up to
date contact information can be found at the Foundation's web site and
official page at www.gutenberg.org/contact
For additional contact information:
Dr. Gregory B. Newby
Chief Executive and Director
gbnewby@pglaf.org
Section 4. Information about Donations to the Project Gutenberg
Literary Archive Foundation
Project Gutenberg-tm depends upon and cannot survive without wide
spread public support and donations to carry out its mission of
increasing the number of public domain and licensed works that can be
freely distributed in machine readable form accessible by the widest
array of equipment including outdated equipment. Many small donations
($1 to $5,000) are particularly important to maintaining tax exempt
status with the IRS.
The Foundation is committed to complying with the laws regulating
charities and charitable donations in all 50 states of the United
States. Compliance requirements are not uniform and it takes a
considerable effort, much paperwork and many fees to meet and keep up
with these requirements. We do not solicit donations in locations
where we have not received written confirmation of compliance. To SEND
DONATIONS or determine the status of compliance for any particular
state visit www.gutenberg.org/donate
While we cannot and do not solicit contributions from states where we
have not met the solicitation requirements, we know of no prohibition
against accepting unsolicited donations from donors in such states who
approach us with offers to donate.
International donations are gratefully accepted, but we cannot make
any statements concerning tax treatment of donations received from
outside the United States. U.S. laws alone swamp our small staff.
Please check the Project Gutenberg Web pages for current donation
methods and addresses. Donations are accepted in a number of other
ways including checks, online payments and credit card donations. To
donate, please visit: www.gutenberg.org/donate
Section 5. General Information About Project Gutenberg-tm electronic works.
Professor Michael S. Hart was the originator of the Project
Gutenberg-tm concept of a library of electronic works that could be
freely shared with anyone. For forty years, he produced and
distributed Project Gutenberg-tm eBooks with only a loose network of
volunteer support.
Project Gutenberg-tm eBooks are often created from several printed
editions, all of which are confirmed as not protected by copyright in
the U.S. unless a copyright notice is included. Thus, we do not
necessarily keep eBooks in compliance with any particular paper
edition.
Most people start at our Web site which has the main PG search
facility: www.gutenberg.org
This Web site includes information about Project Gutenberg-tm,
including how to make donations to the Project Gutenberg Literary
Archive Foundation, how to help produce our new eBooks, and how to
subscribe to our email newsletter to hear about new eBooks.
================================================
FILE: ciphers/rabin_miller.py
================================================
# Primality Testing with the Rabin-Miller Algorithm
import random
def rabin_miller(num: int) -> bool:
s = num - 1
t = 0
while s % 2 == 0:
s = s // 2
t += 1
for _ in range(5):
a = random.randrange(2, num - 1)
v = pow(a, s, num)
if v != 1:
i = 0
while v != (num - 1):
if i == t - 1:
return False
else:
i = i + 1
v = (v**2) % num
return True
def is_prime_low_num(num: int) -> bool:
if num < 2:
return False
low_primes = [
2,
3,
5,
7,
11,
13,
17,
19,
23,
29,
31,
37,
41,
43,
47,
53,
59,
61,
67,
71,
73,
79,
83,
89,
97,
101,
103,
107,
109,
113,
127,
131,
137,
139,
149,
151,
157,
163,
167,
173,
179,
181,
191,
193,
197,
199,
211,
223,
227,
229,
233,
239,
241,
251,
257,
263,
269,
271,
277,
281,
283,
293,
307,
311,
313,
317,
331,
337,
347,
349,
353,
359,
367,
373,
379,
383,
389,
397,
401,
409,
419,
421,
431,
433,
439,
443,
449,
457,
461,
463,
467,
479,
487,
491,
499,
503,
509,
521,
523,
541,
547,
557,
563,
569,
571,
577,
587,
593,
599,
601,
607,
613,
617,
619,
631,
641,
643,
647,
653,
659,
661,
673,
677,
683,
691,
701,
709,
719,
727,
733,
739,
743,
751,
757,
761,
769,
773,
787,
797,
809,
811,
821,
823,
827,
829,
839,
853,
857,
859,
863,
877,
881,
883,
887,
907,
911,
919,
929,
937,
941,
947,
953,
967,
971,
977,
983,
991,
997,
]
if num in low_primes:
return True
for prime in low_primes:
if (num % prime) == 0:
return False
return rabin_miller(num)
def generate_large_prime(keysize: int = 1024) -> int:
while True:
num = random.randrange(2 ** (keysize - 1), 2 ** (keysize))
if is_prime_low_num(num):
return num
if __name__ == "__main__":
num = generate_large_prime()
print(("Prime number:", num))
print(("is_prime_low_num:", is_prime_low_num(num)))
================================================
FILE: ciphers/rail_fence_cipher.py
================================================
"""https://en.wikipedia.org/wiki/Rail_fence_cipher"""
def encrypt(input_string: str, key: int) -> str:
"""
Shuffles the character of a string by placing each of them
in a grid (the height is dependent on the key) in a zigzag
formation and reading it left to right.
>>> encrypt("Hello World", 4)
'HWe olordll'
>>> encrypt("This is a message", 0)
Traceback (most recent call last):
...
ValueError: Height of grid can't be 0 or negative
>>> encrypt(b"This is a byte string", 5)
Traceback (most recent call last):
...
TypeError: sequence item 0: expected str instance, int found
"""
temp_grid: list[list[str]] = [[] for _ in range(key)]
lowest = key - 1
if key <= 0:
raise ValueError("Height of grid can't be 0 or negative")
if key == 1 or len(input_string) <= key:
return input_string
for position, character in enumerate(input_string):
num = position % (lowest * 2) # puts it in bounds
num = min(num, lowest * 2 - num) # creates zigzag pattern
temp_grid[num].append(character)
grid = ["".join(row) for row in temp_grid]
output_string = "".join(grid)
return output_string
def decrypt(input_string: str, key: int) -> str:
"""
Generates a template based on the key and fills it in with
the characters of the input string and then reading it in
a zigzag formation.
>>> decrypt("HWe olordll", 4)
'Hello World'
>>> decrypt("This is a message", -10)
Traceback (most recent call last):
...
ValueError: Height of grid can't be 0 or negative
>>> decrypt("My key is very big", 100)
'My key is very big'
"""
grid = []
lowest = key - 1
if key <= 0:
raise ValueError("Height of grid can't be 0 or negative")
if key == 1:
return input_string
temp_grid: list[list[str]] = [[] for _ in range(key)] # generates template
for position in range(len(input_string)):
num = position % (lowest * 2) # puts it in bounds
num = min(num, lowest * 2 - num) # creates zigzag pattern
temp_grid[num].append("*")
counter = 0
for row in temp_grid: # fills in the characters
splice = input_string[counter : counter + len(row)]
grid.append(list(splice))
counter += len(row)
output_string = "" # reads as zigzag
for position in range(len(input_string)):
num = position % (lowest * 2) # puts it in bounds
num = min(num, lowest * 2 - num) # creates zigzag pattern
output_string += grid[num][0]
grid[num].pop(0)
return output_string
def bruteforce(input_string: str) -> dict[int, str]:
"""Uses decrypt function by guessing every key
>>> bruteforce("HWe olordll")[4]
'Hello World'
"""
results = {}
for key_guess in range(1, len(input_string)): # tries every key
results[key_guess] = decrypt(input_string, key_guess)
return results
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/rot13.py
================================================
def dencrypt(s: str, n: int = 13) -> str:
"""
https://en.wikipedia.org/wiki/ROT13
>>> msg = "My secret bank account number is 173-52946 so don't tell anyone!!"
>>> s = dencrypt(msg)
>>> s
"Zl frperg onax nppbhag ahzore vf 173-52946 fb qba'g gryy nalbar!!"
>>> dencrypt(s) == msg
True
"""
out = ""
for c in s:
if "A" <= c <= "Z":
out += chr(ord("A") + (ord(c) - ord("A") + n) % 26)
elif "a" <= c <= "z":
out += chr(ord("a") + (ord(c) - ord("a") + n) % 26)
else:
out += c
return out
def main() -> None:
s0 = input("Enter message: ")
s1 = dencrypt(s0, 13)
print("Encryption:", s1)
s2 = dencrypt(s1, 13)
print("Decryption: ", s2)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/rsa_cipher.py
================================================
import os
import sys
from . import rsa_key_generator as rkg
DEFAULT_BLOCK_SIZE = 128
BYTE_SIZE = 256
def get_blocks_from_text(
message: str, block_size: int = DEFAULT_BLOCK_SIZE
) -> list[int]:
message_bytes = message.encode("ascii")
block_ints = []
for block_start in range(0, len(message_bytes), block_size):
block_int = 0
for i in range(block_start, min(block_start + block_size, len(message_bytes))):
block_int += message_bytes[i] * (BYTE_SIZE ** (i % block_size))
block_ints.append(block_int)
return block_ints
def get_text_from_blocks(
block_ints: list[int], message_length: int, block_size: int = DEFAULT_BLOCK_SIZE
) -> str:
message: list[str] = []
for block_int in block_ints:
block_message: list[str] = []
for i in range(block_size - 1, -1, -1):
if len(message) + i < message_length:
ascii_number = block_int // (BYTE_SIZE**i)
block_int = block_int % (BYTE_SIZE**i)
block_message.insert(0, chr(ascii_number))
message.extend(block_message)
return "".join(message)
def encrypt_message(
message: str, key: tuple[int, int], block_size: int = DEFAULT_BLOCK_SIZE
) -> list[int]:
encrypted_blocks = []
n, e = key
for block in get_blocks_from_text(message, block_size):
encrypted_blocks.append(pow(block, e, n))
return encrypted_blocks
def decrypt_message(
encrypted_blocks: list[int],
message_length: int,
key: tuple[int, int],
block_size: int = DEFAULT_BLOCK_SIZE,
) -> str:
decrypted_blocks = []
n, d = key
for block in encrypted_blocks:
decrypted_blocks.append(pow(block, d, n))
return get_text_from_blocks(decrypted_blocks, message_length, block_size)
def read_key_file(key_filename: str) -> tuple[int, int, int]:
with open(key_filename) as fo:
content = fo.read()
key_size, n, eor_d = content.split(",")
return (int(key_size), int(n), int(eor_d))
def encrypt_and_write_to_file(
message_filename: str,
key_filename: str,
message: str,
block_size: int = DEFAULT_BLOCK_SIZE,
) -> str:
key_size, n, e = read_key_file(key_filename)
if key_size < block_size * 8:
sys.exit(
f"ERROR: Block size is {block_size * 8} bits and key size is {key_size} "
"bits. The RSA cipher requires the block size to be equal to or greater "
"than the key size. Either decrease the block size or use different keys."
)
encrypted_blocks = [str(i) for i in encrypt_message(message, (n, e), block_size)]
encrypted_content = ",".join(encrypted_blocks)
encrypted_content = f"{len(message)}_{block_size}_{encrypted_content}"
with open(message_filename, "w") as fo:
fo.write(encrypted_content)
return encrypted_content
def read_from_file_and_decrypt(message_filename: str, key_filename: str) -> str:
key_size, n, d = read_key_file(key_filename)
with open(message_filename) as fo:
content = fo.read()
message_length_str, block_size_str, encrypted_message = content.split("_")
message_length = int(message_length_str)
block_size = int(block_size_str)
if key_size < block_size * 8:
sys.exit(
f"ERROR: Block size is {block_size * 8} bits and key size is {key_size} "
"bits. The RSA cipher requires the block size to be equal to or greater "
"than the key size. Were the correct key file and encrypted file specified?"
)
encrypted_blocks = []
for block in encrypted_message.split(","):
encrypted_blocks.append(int(block))
return decrypt_message(encrypted_blocks, message_length, (n, d), block_size)
def main() -> None:
filename = "encrypted_file.txt"
response = input(r"Encrypt\Decrypt [e\d]: ")
if response.lower().startswith("e"):
mode = "encrypt"
elif response.lower().startswith("d"):
mode = "decrypt"
if mode == "encrypt":
if not os.path.exists("rsa_pubkey.txt"):
rkg.make_key_files("rsa", 1024)
message = input("\nEnter message: ")
pubkey_filename = "rsa_pubkey.txt"
print(f"Encrypting and writing to {filename}...")
encrypted_text = encrypt_and_write_to_file(filename, pubkey_filename, message)
print("\nEncrypted text:")
print(encrypted_text)
elif mode == "decrypt":
privkey_filename = "rsa_privkey.txt"
print(f"Reading from {filename} and decrypting...")
decrypted_text = read_from_file_and_decrypt(filename, privkey_filename)
print("writing decryption to rsa_decryption.txt...")
with open("rsa_decryption.txt", "w") as dec:
dec.write(decrypted_text)
print("\nDecryption:")
print(decrypted_text)
if __name__ == "__main__":
main()
================================================
FILE: ciphers/rsa_factorization.py
================================================
"""
An RSA prime factor algorithm.
The program can efficiently factor RSA prime number given the private key d and
public key e.
| Source: on page ``3`` of https://crypto.stanford.edu/~dabo/papers/RSA-survey.pdf
| More readable source: https://www.di-mgt.com.au/rsa_factorize_n.html
large number can take minutes to factor, therefore are not included in doctest.
"""
from __future__ import annotations
import math
import random
def rsafactor(d: int, e: int, n: int) -> list[int]:
"""
This function returns the factors of N, where p*q=N
Return: [p, q]
We call N the RSA modulus, e the encryption exponent, and d the decryption exponent.
The pair (N, e) is the public key. As its name suggests, it is public and is used to
encrypt messages.
The pair (N, d) is the secret key or private key and is known only to the recipient
of encrypted messages.
>>> rsafactor(3, 16971, 25777)
[149, 173]
>>> rsafactor(7331, 11, 27233)
[113, 241]
>>> rsafactor(4021, 13, 17711)
[89, 199]
"""
k = d * e - 1
p = 0
q = 0
while p == 0:
g = random.randint(2, n - 1)
t = k
while True:
if t % 2 == 0:
t = t // 2
x = (g**t) % n
y = math.gcd(x - 1, n)
if x > 1 and y > 1:
p = y
q = n // y
break # find the correct factors
else:
break # t is not divisible by 2, break and choose another g
return sorted([p, q])
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/rsa_key_generator.py
================================================
import os
import random
import sys
from maths.greatest_common_divisor import gcd_by_iterative
from . import cryptomath_module, rabin_miller
def main() -> None:
print("Making key files...")
make_key_files("rsa", 1024)
print("Key files generation successful.")
def generate_key(key_size: int) -> tuple[tuple[int, int], tuple[int, int]]:
"""
>>> random.seed(0) # for repeatability
>>> public_key, private_key = generate_key(8)
>>> public_key
(26569, 239)
>>> private_key
(26569, 2855)
"""
p = rabin_miller.generate_large_prime(key_size)
q = rabin_miller.generate_large_prime(key_size)
n = p * q
# Generate e that is relatively prime to (p - 1) * (q - 1)
while True:
e = random.randrange(2 ** (key_size - 1), 2 ** (key_size))
if gcd_by_iterative(e, (p - 1) * (q - 1)) == 1:
break
# Calculate d that is mod inverse of e
d = cryptomath_module.find_mod_inverse(e, (p - 1) * (q - 1))
public_key = (n, e)
private_key = (n, d)
return (public_key, private_key)
def make_key_files(name: str, key_size: int) -> None:
if os.path.exists(f"{name}_pubkey.txt") or os.path.exists(f"{name}_privkey.txt"):
print("\nWARNING:")
print(
f'"{name}_pubkey.txt" or "{name}_privkey.txt" already exists. \n'
"Use a different name or delete these files and re-run this program."
)
sys.exit()
public_key, private_key = generate_key(key_size)
print(f"\nWriting public key to file {name}_pubkey.txt...")
with open(f"{name}_pubkey.txt", "w") as out_file:
out_file.write(f"{key_size},{public_key[0]},{public_key[1]}")
print(f"Writing private key to file {name}_privkey.txt...")
with open(f"{name}_privkey.txt", "w") as out_file:
out_file.write(f"{key_size},{private_key[0]},{private_key[1]}")
if __name__ == "__main__":
main()
================================================
FILE: ciphers/running_key_cipher.py
================================================
"""
https://en.wikipedia.org/wiki/Running_key_cipher
"""
def running_key_encrypt(key: str, plaintext: str) -> str:
"""
Encrypts the plaintext using the Running Key Cipher.
:param key: The running key (long piece of text).
:param plaintext: The plaintext to be encrypted.
:return: The ciphertext.
"""
plaintext = plaintext.replace(" ", "").upper()
key = key.replace(" ", "").upper()
key_length = len(key)
ciphertext = []
ord_a = ord("A")
for i, char in enumerate(plaintext):
p = ord(char) - ord_a
k = ord(key[i % key_length]) - ord_a
c = (p + k) % 26
ciphertext.append(chr(c + ord_a))
return "".join(ciphertext)
def running_key_decrypt(key: str, ciphertext: str) -> str:
"""
Decrypts the ciphertext using the Running Key Cipher.
:param key: The running key (long piece of text).
:param ciphertext: The ciphertext to be decrypted.
:return: The plaintext.
"""
ciphertext = ciphertext.replace(" ", "").upper()
key = key.replace(" ", "").upper()
key_length = len(key)
plaintext = []
ord_a = ord("A")
for i, char in enumerate(ciphertext):
c = ord(char) - ord_a
k = ord(key[i % key_length]) - ord_a
p = (c - k) % 26
plaintext.append(chr(p + ord_a))
return "".join(plaintext)
def test_running_key_encrypt() -> None:
"""
>>> key = "How does the duck know that? said Victor"
>>> ciphertext = running_key_encrypt(key, "DEFEND THIS")
>>> running_key_decrypt(key, ciphertext) == "DEFENDTHIS"
True
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
test_running_key_encrypt()
plaintext = input("Enter the plaintext: ").upper()
print(f"\n{plaintext = }")
key = "How does the duck know that? said Victor"
encrypted_text = running_key_encrypt(key, plaintext)
print(f"{encrypted_text = }")
decrypted_text = running_key_decrypt(key, encrypted_text)
print(f"{decrypted_text = }")
================================================
FILE: ciphers/shuffled_shift_cipher.py
================================================
from __future__ import annotations
import random
import string
class ShuffledShiftCipher:
"""
This algorithm uses the Caesar Cipher algorithm but removes the option to
use brute force to decrypt the message.
The passcode is a random password from the selection buffer of
1. uppercase letters of the English alphabet
2. lowercase letters of the English alphabet
3. digits from 0 to 9
Using unique characters from the passcode, the normal list of characters,
that can be allowed in the plaintext, is pivoted and shuffled. Refer to docstring
of __make_key_list() to learn more about the shuffling.
Then, using the passcode, a number is calculated which is used to encrypt the
plaintext message with the normal shift cipher method, only in this case, the
reference, to look back at while decrypting, is shuffled.
Each cipher object can possess an optional argument as passcode, without which a
new passcode is generated for that object automatically.
cip1 = ShuffledShiftCipher('d4usr9TWxw9wMD')
cip2 = ShuffledShiftCipher()
"""
def __init__(self, passcode: str | None = None) -> None:
"""
Initializes a cipher object with a passcode as it's entity
Note: No new passcode is generated if user provides a passcode
while creating the object
"""
self.__passcode = passcode or self.__passcode_creator()
self.__key_list = self.__make_key_list()
self.__shift_key = self.__make_shift_key()
def __str__(self) -> str:
"""
:return: passcode of the cipher object
"""
return "".join(self.__passcode)
def __neg_pos(self, iterlist: list[int]) -> list[int]:
"""
Mutates the list by changing the sign of each alternate element
:param iterlist: takes a list iterable
:return: the mutated list
"""
for i in range(1, len(iterlist), 2):
iterlist[i] *= -1
return iterlist
def __passcode_creator(self) -> list[str]:
"""
Creates a random password from the selection buffer of
1. uppercase letters of the English alphabet
2. lowercase letters of the English alphabet
3. digits from 0 to 9
:rtype: list
:return: a password of a random length between 10 to 20
"""
choices = string.ascii_letters + string.digits
password = [random.choice(choices) for _ in range(random.randint(10, 20))]
return password
def __make_key_list(self) -> list[str]:
"""
Shuffles the ordered character choices by pivoting at breakpoints
Breakpoints are the set of characters in the passcode
eg:
if, ABCDEFGHIJKLMNOPQRSTUVWXYZ are the possible characters
and CAMERA is the passcode
then, breakpoints = [A,C,E,M,R] # sorted set of characters from passcode
shuffled parts: [A,CB,ED,MLKJIHGF,RQPON,ZYXWVUTS]
shuffled __key_list : ACBEDMLKJIHGFRQPONZYXWVUTS
Shuffling only 26 letters of the english alphabet can generate 26!
combinations for the shuffled list. In the program we consider, a set of
97 characters (including letters, digits, punctuation and whitespaces),
thereby creating a possibility of 97! combinations (which is a 152 digit number
in itself), thus diminishing the possibility of a brute force approach.
Moreover, shift keys even introduce a multiple of 26 for a brute force approach
for each of the already 97! combinations.
"""
# key_list_options contain nearly all printable except few elements from
# string.whitespace
key_list_options = (
string.ascii_letters + string.digits + string.punctuation + " \t\n"
)
keys_l = []
# creates points known as breakpoints to break the key_list_options at those
# points and pivot each substring
breakpoints = sorted(set(self.__passcode))
temp_list: list[str] = []
# algorithm for creating a new shuffled list, keys_l, out of key_list_options
for i in key_list_options:
temp_list.extend(i)
# checking breakpoints at which to pivot temporary sublist and add it into
# keys_l
if i in breakpoints or i == key_list_options[-1]:
keys_l.extend(temp_list[::-1])
temp_list.clear()
# returning a shuffled keys_l to prevent brute force guessing of shift key
return keys_l
def __make_shift_key(self) -> int:
"""
sum() of the mutated list of ascii values of all characters where the
mutated list is the one returned by __neg_pos()
"""
num = sum(self.__neg_pos([ord(x) for x in self.__passcode]))
return num if num > 0 else len(self.__passcode)
def decrypt(self, encoded_message: str) -> str:
"""
Performs shifting of the encoded_message w.r.t. the shuffled __key_list
to create the decoded_message
>>> ssc = ShuffledShiftCipher('4PYIXyqeQZr44')
>>> ssc.decrypt("d>**-1z6&'5z'5z:z+-='$'>=zp:>5:#z<'.&>#")
'Hello, this is a modified Caesar cipher'
"""
decoded_message = ""
# decoding shift like Caesar cipher algorithm implementing negative shift or
# reverse shift or left shift
for i in encoded_message:
position = self.__key_list.index(i)
decoded_message += self.__key_list[
(position - self.__shift_key) % -len(self.__key_list)
]
return decoded_message
def encrypt(self, plaintext: str) -> str:
"""
Performs shifting of the plaintext w.r.t. the shuffled __key_list
to create the encoded_message
>>> ssc = ShuffledShiftCipher('4PYIXyqeQZr44')
>>> ssc.encrypt('Hello, this is a modified Caesar cipher')
"d>**-1z6&'5z'5z:z+-='$'>=zp:>5:#z<'.&>#"
"""
encoded_message = ""
# encoding shift like Caesar cipher algorithm implementing positive shift or
# forward shift or right shift
for i in plaintext:
position = self.__key_list.index(i)
encoded_message += self.__key_list[
(position + self.__shift_key) % len(self.__key_list)
]
return encoded_message
def test_end_to_end(msg: str = "Hello, this is a modified Caesar cipher") -> str:
"""
>>> test_end_to_end()
'Hello, this is a modified Caesar cipher'
"""
cip1 = ShuffledShiftCipher()
return cip1.decrypt(cip1.encrypt(msg))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: ciphers/simple_keyword_cypher.py
================================================
def remove_duplicates(key: str) -> str:
"""
Removes duplicate alphabetic characters in a keyword (letter is ignored after its
first appearance).
:param key: Keyword to use
:return: String with duplicates removed
>>> remove_duplicates('Hello World!!')
'Helo Wrd'
"""
key_no_dups = ""
for ch in key:
if ch == " " or (ch not in key_no_dups and ch.isalpha()):
key_no_dups += ch
return key_no_dups
def create_cipher_map(key: str) -> dict[str, str]:
"""
Returns a cipher map given a keyword.
:param key: keyword to use
:return: dictionary cipher map
"""
# Create a list of the letters in the alphabet
alphabet = [chr(i + 65) for i in range(26)]
# Remove duplicate characters from key
key = remove_duplicates(key.upper())
offset = len(key)
# First fill cipher with key characters
cipher_alphabet = {alphabet[i]: char for i, char in enumerate(key)}
# Then map remaining characters in alphabet to
# the alphabet from the beginning
for i in range(len(cipher_alphabet), 26):
char = alphabet[i - offset]
# Ensure we are not mapping letters to letters previously mapped
while char in key:
offset -= 1
char = alphabet[i - offset]
cipher_alphabet[alphabet[i]] = char
return cipher_alphabet
def encipher(message: str, cipher_map: dict[str, str]) -> str:
"""
Enciphers a message given a cipher map.
:param message: Message to encipher
:param cipher_map: Cipher map
:return: enciphered string
>>> encipher('Hello World!!', create_cipher_map('Goodbye!!'))
'CYJJM VMQJB!!'
"""
return "".join(cipher_map.get(ch, ch) for ch in message.upper())
def decipher(message: str, cipher_map: dict[str, str]) -> str:
"""
Deciphers a message given a cipher map
:param message: Message to decipher
:param cipher_map: Dictionary mapping to use
:return: Deciphered string
>>> cipher_map = create_cipher_map('Goodbye!!')
>>> decipher(encipher('Hello World!!', cipher_map), cipher_map)
'HELLO WORLD!!'
"""
# Reverse our cipher mappings
rev_cipher_map = {v: k for k, v in cipher_map.items()}
return "".join(rev_cipher_map.get(ch, ch) for ch in message.upper())
def main() -> None:
"""
Handles I/O
:return: void
"""
message = input("Enter message to encode or decode: ").strip()
key = input("Enter keyword: ").strip()
option = input("Encipher or decipher? E/D:").strip()[0].lower()
try:
func = {"e": encipher, "d": decipher}[option]
except KeyError:
raise KeyError("invalid input option")
cipher_map = create_cipher_map(key)
print(func(message, cipher_map))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/simple_substitution_cipher.py
================================================
import random
import sys
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def main() -> None:
message = input("Enter message: ")
key = "LFWOAYUISVKMNXPBDCRJTQEGHZ"
resp = input("Encrypt/Decrypt [e/d]: ")
check_valid_key(key)
if resp.lower().startswith("e"):
mode = "encrypt"
translated = encrypt_message(key, message)
elif resp.lower().startswith("d"):
mode = "decrypt"
translated = decrypt_message(key, message)
print(f"\n{mode.title()}ion: \n{translated}")
def check_valid_key(key: str) -> None:
key_list = list(key)
letters_list = list(LETTERS)
key_list.sort()
letters_list.sort()
if key_list != letters_list:
sys.exit("Error in the key or symbol set.")
def encrypt_message(key: str, message: str) -> str:
"""
>>> encrypt_message('LFWOAYUISVKMNXPBDCRJTQEGHZ', 'Harshil Darji')
'Ilcrism Olcvs'
"""
return translate_message(key, message, "encrypt")
def decrypt_message(key: str, message: str) -> str:
"""
>>> decrypt_message('LFWOAYUISVKMNXPBDCRJTQEGHZ', 'Ilcrism Olcvs')
'Harshil Darji'
"""
return translate_message(key, message, "decrypt")
def translate_message(key: str, message: str, mode: str) -> str:
translated = ""
chars_a = LETTERS
chars_b = key
if mode == "decrypt":
chars_a, chars_b = chars_b, chars_a
for symbol in message:
if symbol.upper() in chars_a:
sym_index = chars_a.find(symbol.upper())
if symbol.isupper():
translated += chars_b[sym_index].upper()
else:
translated += chars_b[sym_index].lower()
else:
translated += symbol
return translated
def get_random_key() -> str:
key = list(LETTERS)
random.shuffle(key)
return "".join(key)
if __name__ == "__main__":
main()
================================================
FILE: ciphers/transposition_cipher.py
================================================
import math
"""
In cryptography, the TRANSPOSITION cipher is a method of encryption where the
positions of plaintext are shifted a certain number(determined by the key) that
follows a regular system that results in the permuted text, known as the encrypted
text. The type of transposition cipher demonstrated under is the ROUTE cipher.
"""
def main() -> None:
message = input("Enter message: ")
key = int(input(f"Enter key [2-{len(message) - 1}]: "))
mode = input("Encryption/Decryption [e/d]: ")
if mode.lower().startswith("e"):
text = encrypt_message(key, message)
elif mode.lower().startswith("d"):
text = decrypt_message(key, message)
# Append pipe symbol (vertical bar) to identify spaces at the end.
print(f"Output:\n{text + '|'}")
def encrypt_message(key: int, message: str) -> str:
"""
>>> encrypt_message(6, 'Harshil Darji')
'Hlia rDsahrij'
"""
cipher_text = [""] * key
for col in range(key):
pointer = col
while pointer < len(message):
cipher_text[col] += message[pointer]
pointer += key
return "".join(cipher_text)
def decrypt_message(key: int, message: str) -> str:
"""
>>> decrypt_message(6, 'Hlia rDsahrij')
'Harshil Darji'
"""
num_cols = math.ceil(len(message) / key)
num_rows = key
num_shaded_boxes = (num_cols * num_rows) - len(message)
plain_text = [""] * num_cols
col = 0
row = 0
for symbol in message:
plain_text[col] += symbol
col += 1
if (col == num_cols) or (
(col == num_cols - 1) and (row >= num_rows - num_shaded_boxes)
):
col = 0
row += 1
return "".join(plain_text)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: ciphers/transposition_cipher_encrypt_decrypt_file.py
================================================
import os
import sys
import time
from . import transposition_cipher as trans_cipher
def main() -> None:
input_file = "./prehistoric_men.txt"
output_file = "./Output.txt"
key = int(input("Enter key: "))
mode = input("Encrypt/Decrypt [e/d]: ")
if not os.path.exists(input_file):
print(f"File {input_file} does not exist. Quitting...")
sys.exit()
if os.path.exists(output_file):
print(f"Overwrite {output_file}? [y/n]")
response = input("> ")
if not response.lower().startswith("y"):
sys.exit()
start_time = time.time()
if mode.lower().startswith("e"):
with open(input_file) as f:
content = f.read()
translated = trans_cipher.encrypt_message(key, content)
elif mode.lower().startswith("d"):
with open(output_file) as f:
content = f.read()
translated = trans_cipher.decrypt_message(key, content)
with open(output_file, "w") as output_obj:
output_obj.write(translated)
total_time = round(time.time() - start_time, 2)
print(("Done (", total_time, "seconds )"))
if __name__ == "__main__":
main()
================================================
FILE: ciphers/trifid_cipher.py
================================================
"""
The trifid cipher uses a table to fractionate each plaintext letter into a trigram,
mixes the constituents of the trigrams, and then applies the table in reverse to turn
these mixed trigrams into ciphertext letters.
https://en.wikipedia.org/wiki/Trifid_cipher
"""
from __future__ import annotations
# fmt: off
TEST_CHARACTER_TO_NUMBER = {
"A": "111", "B": "112", "C": "113", "D": "121", "E": "122", "F": "123", "G": "131",
"H": "132", "I": "133", "J": "211", "K": "212", "L": "213", "M": "221", "N": "222",
"O": "223", "P": "231", "Q": "232", "R": "233", "S": "311", "T": "312", "U": "313",
"V": "321", "W": "322", "X": "323", "Y": "331", "Z": "332", "+": "333",
}
# fmt: off
TEST_NUMBER_TO_CHARACTER = {val: key for key, val in TEST_CHARACTER_TO_NUMBER.items()}
def __encrypt_part(message_part: str, character_to_number: dict[str, str]) -> str:
"""
Arrange the triagram value of each letter of `message_part` vertically and join
them horizontally.
>>> __encrypt_part('ASK', TEST_CHARACTER_TO_NUMBER)
'132111112'
"""
one, two, three = "", "", ""
for each in (character_to_number[character] for character in message_part):
one += each[0]
two += each[1]
three += each[2]
return one + two + three
def __decrypt_part(
message_part: str, character_to_number: dict[str, str]
) -> tuple[str, str, str]:
"""
Convert each letter of the input string into their respective trigram values, join
them and split them into three equal groups of strings which are returned.
>>> __decrypt_part('ABCDE', TEST_CHARACTER_TO_NUMBER)
('11111', '21131', '21122')
"""
this_part = "".join(character_to_number[character] for character in message_part)
result = []
tmp = ""
for digit in this_part:
tmp += digit
if len(tmp) == len(message_part):
result.append(tmp)
tmp = ""
return result[0], result[1], result[2]
def __prepare(
message: str, alphabet: str
) -> tuple[str, str, dict[str, str], dict[str, str]]:
"""
A helper function that generates the triagrams and assigns each letter of the
alphabet to its corresponding triagram and stores this in a dictionary
(`character_to_number` and `number_to_character`) after confirming if the
alphabet's length is ``27``.
>>> test = __prepare('I aM a BOy','abCdeFghijkLmnopqrStuVwxYZ+')
>>> expected = ('IAMABOY','ABCDEFGHIJKLMNOPQRSTUVWXYZ+',
... TEST_CHARACTER_TO_NUMBER, TEST_NUMBER_TO_CHARACTER)
>>> test == expected
True
Testing with incomplete alphabet
>>> __prepare('I aM a BOy','abCdeFghijkLmnopqrStuVw')
Traceback (most recent call last):
...
KeyError: 'Length of alphabet has to be 27.'
Testing with extra long alphabets
>>> __prepare('I aM a BOy','abCdeFghijkLmnopqrStuVwxyzzwwtyyujjgfd')
Traceback (most recent call last):
...
KeyError: 'Length of alphabet has to be 27.'
Testing with punctuation not in the given alphabet
>>> __prepare('am i a boy?','abCdeFghijkLmnopqrStuVwxYZ+')
Traceback (most recent call last):
...
ValueError: Each message character has to be included in alphabet!
Testing with numbers
>>> __prepare(500,'abCdeFghijkLmnopqrStuVwxYZ+')
Traceback (most recent call last):
...
AttributeError: 'int' object has no attribute 'replace'
"""
# Validate message and alphabet, set to upper and remove spaces
alphabet = alphabet.replace(" ", "").upper()
message = message.replace(" ", "").upper()
# Check length and characters
if len(alphabet) != 27:
raise KeyError("Length of alphabet has to be 27.")
if any(char not in alphabet for char in message):
raise ValueError("Each message character has to be included in alphabet!")
# Generate dictionares
character_to_number = dict(zip(alphabet, TEST_CHARACTER_TO_NUMBER.values()))
number_to_character = {
number: letter for letter, number in character_to_number.items()
}
return message, alphabet, character_to_number, number_to_character
def encrypt_message(
message: str, alphabet: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.", period: int = 5
) -> str:
"""
encrypt_message
===============
Encrypts a message using the trifid_cipher. Any punctuatuion chars that
would be used should be added to the alphabet.
PARAMETERS
----------
* `message`: The message you want to encrypt.
* `alphabet` (optional): The characters to be used for the cipher .
* `period` (optional): The number of characters you want in a group whilst
encrypting.
>>> encrypt_message('I am a boy')
'BCDGBQY'
>>> encrypt_message(' ')
''
>>> encrypt_message(' aide toi le c iel ta id era ',
... 'FELIXMARDSTBCGHJKNOPQUVWYZ+',5)
'FMJFVOISSUFTFPUFEQQC'
"""
message, alphabet, character_to_number, number_to_character = __prepare(
message, alphabet
)
encrypted_numeric = ""
for i in range(0, len(message) + 1, period):
encrypted_numeric += __encrypt_part(
message[i : i + period], character_to_number
)
encrypted = ""
for i in range(0, len(encrypted_numeric), 3):
encrypted += number_to_character[encrypted_numeric[i : i + 3]]
return encrypted
def decrypt_message(
message: str, alphabet: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ.", period: int = 5
) -> str:
"""
decrypt_message
===============
Decrypts a trifid_cipher encrypted message.
PARAMETERS
----------
* `message`: The message you want to decrypt.
* `alphabet` (optional): The characters used for the cipher.
* `period` (optional): The number of characters used in grouping when it
was encrypted.
>>> decrypt_message('BCDGBQY')
'IAMABOY'
Decrypting with your own alphabet and period
>>> decrypt_message('FMJFVOISSUFTFPUFEQQC','FELIXMARDSTBCGHJKNOPQUVWYZ+',5)
'AIDETOILECIELTAIDERA'
"""
message, alphabet, character_to_number, number_to_character = __prepare(
message, alphabet
)
decrypted_numeric = []
for i in range(0, len(message), period):
a, b, c = __decrypt_part(message[i : i + period], character_to_number)
for j in range(len(a)):
decrypted_numeric.append(a[j] + b[j] + c[j])
return "".join(number_to_character[each] for each in decrypted_numeric)
if __name__ == "__main__":
import doctest
doctest.testmod()
msg = "DEFEND THE EAST WALL OF THE CASTLE."
encrypted = encrypt_message(msg, "EPSDUCVWYM.ZLKXNBTFGORIJHAQ")
decrypted = decrypt_message(encrypted, "EPSDUCVWYM.ZLKXNBTFGORIJHAQ")
print(f"Encrypted: {encrypted}\nDecrypted: {decrypted}")
================================================
FILE: ciphers/vernam_cipher.py
================================================
def vernam_encrypt(plaintext: str, key: str) -> str:
"""
>>> vernam_encrypt("HELLO","KEY")
'RIJVS'
"""
ciphertext = ""
for i in range(len(plaintext)):
ct = ord(key[i % len(key)]) - 65 + ord(plaintext[i]) - 65
while ct > 25:
ct = ct - 26
ciphertext += chr(65 + ct)
return ciphertext
def vernam_decrypt(ciphertext: str, key: str) -> str:
"""
>>> vernam_decrypt("RIJVS","KEY")
'HELLO'
"""
decrypted_text = ""
for i in range(len(ciphertext)):
ct = ord(ciphertext[i]) - ord(key[i % len(key)])
while ct < 0:
ct = 26 + ct
decrypted_text += chr(65 + ct)
return decrypted_text
if __name__ == "__main__":
from doctest import testmod
testmod()
# Example usage
plaintext = "HELLO"
key = "KEY"
encrypted_text = vernam_encrypt(plaintext, key)
decrypted_text = vernam_decrypt(encrypted_text, key)
print("\n\n")
print("Plaintext:", plaintext)
print("Encrypted:", encrypted_text)
print("Decrypted:", decrypted_text)
================================================
FILE: ciphers/vigenere_cipher.py
================================================
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def main() -> None:
message = input("Enter message: ")
key = input("Enter key [alphanumeric]: ")
mode = input("Encrypt/Decrypt [e/d]: ")
if mode.lower().startswith("e"):
mode = "encrypt"
translated = encrypt_message(key, message)
elif mode.lower().startswith("d"):
mode = "decrypt"
translated = decrypt_message(key, message)
print(f"\n{mode.title()}ed message:")
print(translated)
def encrypt_message(key: str, message: str) -> str:
"""
>>> encrypt_message('HDarji', 'This is Harshil Darji from Dharmaj.')
'Akij ra Odrjqqs Gaisq muod Mphumrs.'
"""
return translate_message(key, message, "encrypt")
def decrypt_message(key: str, message: str) -> str:
"""
>>> decrypt_message('HDarji', 'Akij ra Odrjqqs Gaisq muod Mphumrs.')
'This is Harshil Darji from Dharmaj.'
"""
return translate_message(key, message, "decrypt")
def translate_message(key: str, message: str, mode: str) -> str:
translated = []
key_index = 0
key = key.upper()
for symbol in message:
num = LETTERS.find(symbol.upper())
if num != -1:
if mode == "encrypt":
num += LETTERS.find(key[key_index])
elif mode == "decrypt":
num -= LETTERS.find(key[key_index])
num %= len(LETTERS)
if symbol.isupper():
translated.append(LETTERS[num])
elif symbol.islower():
translated.append(LETTERS[num].lower())
key_index += 1
if key_index == len(key):
key_index = 0
else:
translated.append(symbol)
return "".join(translated)
if __name__ == "__main__":
main()
================================================
FILE: ciphers/xor_cipher.py
================================================
"""
author: Christian Bender
date: 21.12.2017
class: XORCipher
This class implements the XOR-cipher algorithm and provides
some useful methods for encrypting and decrypting strings and
files.
Overview about methods
- encrypt : list of char
- decrypt : list of char
- encrypt_string : str
- decrypt_string : str
- encrypt_file : boolean
- decrypt_file : boolean
"""
from __future__ import annotations
class XORCipher:
def __init__(self, key: int = 0):
"""
simple constructor that receives a key or uses
default key = 0
"""
# private field
self.__key = key
def encrypt(self, content: str, key: int) -> list[str]:
"""
input: 'content' of type string and 'key' of type int
output: encrypted string 'content' as a list of chars
if key not passed the method uses the key by the constructor.
otherwise key = 1
Empty list
>>> XORCipher().encrypt("", 5)
[]
One key
>>> XORCipher().encrypt("hallo welt", 1)
['i', '`', 'm', 'm', 'n', '!', 'v', 'd', 'm', 'u']
Normal key
>>> XORCipher().encrypt("HALLO WELT", 32)
['h', 'a', 'l', 'l', 'o', '\\x00', 'w', 'e', 'l', 't']
Key greater than 255
>>> XORCipher().encrypt("hallo welt", 256)
['h', 'a', 'l', 'l', 'o', ' ', 'w', 'e', 'l', 't']
"""
# precondition
assert isinstance(key, int)
assert isinstance(content, str)
key = key or self.__key or 1
# make sure key is an appropriate size
key %= 256
return [chr(ord(ch) ^ key) for ch in content]
def decrypt(self, content: str, key: int) -> list[str]:
"""
input: 'content' of type list and 'key' of type int
output: decrypted string 'content' as a list of chars
if key not passed the method uses the key by the constructor.
otherwise key = 1
Empty list
>>> XORCipher().decrypt("", 5)
[]
One key
>>> XORCipher().decrypt("hallo welt", 1)
['i', '`', 'm', 'm', 'n', '!', 'v', 'd', 'm', 'u']
Normal key
>>> XORCipher().decrypt("HALLO WELT", 32)
['h', 'a', 'l', 'l', 'o', '\\x00', 'w', 'e', 'l', 't']
Key greater than 255
>>> XORCipher().decrypt("hallo welt", 256)
['h', 'a', 'l', 'l', 'o', ' ', 'w', 'e', 'l', 't']
"""
# precondition
assert isinstance(key, int)
assert isinstance(content, str)
key = key or self.__key or 1
# make sure key is an appropriate size
key %= 256
return [chr(ord(ch) ^ key) for ch in content]
def encrypt_string(self, content: str, key: int = 0) -> str:
"""
input: 'content' of type string and 'key' of type int
output: encrypted string 'content'
if key not passed the method uses the key by the constructor.
otherwise key = 1
Empty list
>>> XORCipher().encrypt_string("", 5)
''
One key
>>> XORCipher().encrypt_string("hallo welt", 1)
'i`mmn!vdmu'
Normal key
>>> XORCipher().encrypt_string("HALLO WELT", 32)
'hallo\\x00welt'
Key greater than 255
>>> XORCipher().encrypt_string("hallo welt", 256)
'hallo welt'
"""
# precondition
assert isinstance(key, int)
assert isinstance(content, str)
key = key or self.__key or 1
# make sure key is an appropriate size
key %= 256
# This will be returned
ans = ""
for ch in content:
ans += chr(ord(ch) ^ key)
return ans
def decrypt_string(self, content: str, key: int = 0) -> str:
"""
input: 'content' of type string and 'key' of type int
output: decrypted string 'content'
if key not passed the method uses the key by the constructor.
otherwise key = 1
Empty list
>>> XORCipher().decrypt_string("", 5)
''
One key
>>> XORCipher().decrypt_string("hallo welt", 1)
'i`mmn!vdmu'
Normal key
>>> XORCipher().decrypt_string("HALLO WELT", 32)
'hallo\\x00welt'
Key greater than 255
>>> XORCipher().decrypt_string("hallo welt", 256)
'hallo welt'
"""
# precondition
assert isinstance(key, int)
assert isinstance(content, str)
key = key or self.__key or 1
# make sure key is an appropriate size
key %= 256
# This will be returned
ans = ""
for ch in content:
ans += chr(ord(ch) ^ key)
return ans
def encrypt_file(self, file: str, key: int = 0) -> bool:
"""
input: filename (str) and a key (int)
output: returns true if encrypt process was
successful otherwise false
if key not passed the method uses the key by the constructor.
otherwise key = 1
"""
# precondition
assert isinstance(file, str)
assert isinstance(key, int)
# make sure key is an appropriate size
key %= 256
try:
with open(file) as fin, open("encrypt.out", "w+") as fout:
# actual encrypt-process
for line in fin:
fout.write(self.encrypt_string(line, key))
except OSError:
return False
return True
def decrypt_file(self, file: str, key: int) -> bool:
"""
input: filename (str) and a key (int)
output: returns true if decrypt process was
successful otherwise false
if key not passed the method uses the key by the constructor.
otherwise key = 1
"""
# precondition
assert isinstance(file, str)
assert isinstance(key, int)
# make sure key is an appropriate size
key %= 256
try:
with open(file) as fin, open("decrypt.out", "w+") as fout:
# actual encrypt-process
for line in fin:
fout.write(self.decrypt_string(line, key))
except OSError:
return False
return True
if __name__ == "__main__":
from doctest import testmod
testmod()
# Tests
# crypt = XORCipher()
# key = 67
# # test encrypt
# print(crypt.encrypt("hallo welt",key))
# # test decrypt
# print(crypt.decrypt(crypt.encrypt("hallo welt",key), key))
# # test encrypt_string
# print(crypt.encrypt_string("hallo welt",key))
# # test decrypt_string
# print(crypt.decrypt_string(crypt.encrypt_string("hallo welt",key),key))
# if (crypt.encrypt_file("test.txt",key)):
# print("encrypt successful")
# else:
# print("encrypt unsuccessful")
# if (crypt.decrypt_file("encrypt.out",key)):
# print("decrypt successful")
# else:
# print("decrypt unsuccessful")
================================================
FILE: computer_vision/README.md
================================================
# Computer Vision
Computer vision is a field of computer science that works on enabling computers to see, identify and process images in the same way that human does, and provide appropriate output.
It is like imparting human intelligence and instincts to a computer.
Image processing and computer vision are a little different from each other. Image processing means applying some algorithms for transforming image from one form to the other like smoothing, contrasting, stretching, etc.
While computer vision comes from modelling image processing using the techniques of machine learning, computer vision applies machine learning to recognize patterns for interpretation of images (much like the process of visual reasoning of human vision).
*
================================================
FILE: computer_vision/__init__.py
================================================
================================================
FILE: computer_vision/cnn_classification.py
================================================
"""
Convolutional Neural Network
Objective : To train a CNN model detect if TB is present in Lung X-ray or not.
Resources CNN Theory :
https://en.wikipedia.org/wiki/Convolutional_neural_network
Resources Tensorflow : https://www.tensorflow.org/tutorials/images/cnn
Download dataset from :
https://lhncbc.nlm.nih.gov/LHC-publications/pubs/TuberculosisChestXrayImageDataSets.html
1. Download the dataset folder and create two folder training set and test set
in the parent dataset folder
2. Move 30-40 image from both TB positive and TB Negative folder
in the test set folder
3. The labels of the images will be extracted from the folder name
the image is present in.
"""
# Part 1 - Building the CNN
import numpy as np
# Importing the Keras libraries and packages
import tensorflow as tf
from keras import layers, models
if __name__ == "__main__":
# Initialising the CNN
# (Sequential- Building the model layer by layer)
classifier = models.Sequential()
# Step 1 - Convolution
# Here 64,64 is the length & breadth of dataset images and 3 is for the RGB channel
# (3,3) is the kernel size (filter matrix)
classifier.add(
layers.Conv2D(32, (3, 3), input_shape=(64, 64, 3), activation="relu")
)
# Step 2 - Pooling
classifier.add(layers.MaxPooling2D(pool_size=(2, 2)))
# Adding a second convolutional layer
classifier.add(layers.Conv2D(32, (3, 3), activation="relu"))
classifier.add(layers.MaxPooling2D(pool_size=(2, 2)))
# Step 3 - Flattening
classifier.add(layers.Flatten())
# Step 4 - Full connection
classifier.add(layers.Dense(units=128, activation="relu"))
classifier.add(layers.Dense(units=1, activation="sigmoid"))
# Compiling the CNN
classifier.compile(
optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]
)
# Part 2 - Fitting the CNN to the images
# Load Trained model weights
# from keras.models import load_model
# regressor=load_model('cnn.h5')
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0 / 255)
training_set = train_datagen.flow_from_directory(
"dataset/training_set", target_size=(64, 64), batch_size=32, class_mode="binary"
)
test_set = test_datagen.flow_from_directory(
"dataset/test_set", target_size=(64, 64), batch_size=32, class_mode="binary"
)
classifier.fit_generator(
training_set, steps_per_epoch=5, epochs=30, validation_data=test_set
)
classifier.save("cnn.h5")
# Part 3 - Making new predictions
test_image = tf.keras.preprocessing.image.load_img(
"dataset/single_prediction/image.png", target_size=(64, 64)
)
test_image = tf.keras.preprocessing.image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis=0)
result = classifier.predict(test_image)
# training_set.class_indices
if result[0][0] == 0:
prediction = "Normal"
if result[0][0] == 1:
prediction = "Abnormality detected"
================================================
FILE: computer_vision/flip_augmentation.py
================================================
import glob
import os
import random
from string import ascii_lowercase, digits
import cv2
"""
Flip image and bounding box for computer vision task
https://paperswithcode.com/method/randomhorizontalflip
"""
# Params
LABEL_DIR = ""
IMAGE_DIR = ""
OUTPUT_DIR = ""
FLIP_TYPE = 1 # (0 is vertical, 1 is horizontal)
def main() -> None:
"""
Get images list and annotations list from input dir.
Update new images and annotations.
Save images and annotations in output dir.
"""
img_paths, annos = get_dataset(LABEL_DIR, IMAGE_DIR)
print("Processing...")
new_images, new_annos, paths = update_image_and_anno(img_paths, annos, FLIP_TYPE)
for index, image in enumerate(new_images):
# Get random string code: '7b7ad245cdff75241935e4dd860f3bad'
letter_code = random_chars(32)
file_name = paths[index].split(os.sep)[-1].rsplit(".", 1)[0]
file_root = f"{OUTPUT_DIR}/{file_name}_FLIP_{letter_code}"
cv2.imwrite(f"{file_root}.jpg", image, [cv2.IMWRITE_JPEG_QUALITY, 85])
print(f"Success {index + 1}/{len(new_images)} with {file_name}")
annos_list = []
for anno in new_annos[index]:
obj = f"{anno[0]} {anno[1]} {anno[2]} {anno[3]} {anno[4]}"
annos_list.append(obj)
with open(f"{file_root}.txt", "w") as outfile:
outfile.write("\n".join(line for line in annos_list))
def get_dataset(label_dir: str, img_dir: str) -> tuple[list, list]:
"""
- label_dir : Path to label include annotation of images
- img_dir : Path to folder contain images
Return : List of images path and labels
"""
img_paths = []
labels = []
for label_file in glob.glob(os.path.join(label_dir, "*.txt")):
label_name = label_file.split(os.sep)[-1].rsplit(".", 1)[0]
with open(label_file) as in_file:
obj_lists = in_file.readlines()
img_path = os.path.join(img_dir, f"{label_name}.jpg")
boxes = []
for obj_list in obj_lists:
obj = obj_list.rstrip("\n").split(" ")
boxes.append(
[
int(obj[0]),
float(obj[1]),
float(obj[2]),
float(obj[3]),
float(obj[4]),
]
)
if not boxes:
continue
img_paths.append(img_path)
labels.append(boxes)
return img_paths, labels
def update_image_and_anno(
img_list: list, anno_list: list, flip_type: int = 1
) -> tuple[list, list, list]:
"""
- img_list : list of all images
- anno_list : list of all annotations of specific image
- flip_type : 0 is vertical, 1 is horizontal
Return:
- new_imgs_list : image after resize
- new_annos_lists : list of new annotation after scale
- path_list : list the name of image file
"""
new_annos_lists = []
path_list = []
new_imgs_list = []
for idx in range(len(img_list)):
new_annos = []
path = img_list[idx]
path_list.append(path)
img_annos = anno_list[idx]
img = cv2.imread(path)
if flip_type == 1:
new_img = cv2.flip(img, flip_type)
for bbox in img_annos:
x_center_new = 1 - bbox[1]
new_annos.append([bbox[0], x_center_new, bbox[2], bbox[3], bbox[4]])
elif flip_type == 0:
new_img = cv2.flip(img, flip_type)
for bbox in img_annos:
y_center_new = 1 - bbox[2]
new_annos.append([bbox[0], bbox[1], y_center_new, bbox[3], bbox[4]])
new_annos_lists.append(new_annos)
new_imgs_list.append(new_img)
return new_imgs_list, new_annos_lists, path_list
def random_chars(number_char: int = 32) -> str:
"""
Automatic generate random 32 characters.
Get random string code: '7b7ad245cdff75241935e4dd860f3bad'
>>> len(random_chars(32))
32
"""
assert number_char > 1, "The number of character should greater than 1"
letter_code = ascii_lowercase + digits
return "".join(random.choice(letter_code) for _ in range(number_char))
if __name__ == "__main__":
main()
print("DONE ✅")
================================================
FILE: computer_vision/haralick_descriptors.py
================================================
"""
https://en.wikipedia.org/wiki/Image_texture
https://en.wikipedia.org/wiki/Co-occurrence_matrix#Application_to_image_analysis
"""
import imageio.v2 as imageio
import numpy as np
def root_mean_square_error(original: np.ndarray, reference: np.ndarray) -> float:
"""Simple implementation of Root Mean Squared Error
for two N dimensional numpy arrays.
Examples:
>>> root_mean_square_error(np.array([1, 2, 3]), np.array([1, 2, 3]))
0.0
>>> root_mean_square_error(np.array([1, 2, 3]), np.array([2, 2, 2]))
0.816496580927726
>>> root_mean_square_error(np.array([1, 2, 3]), np.array([6, 4, 2]))
3.1622776601683795
"""
return float(np.sqrt(((original - reference) ** 2).mean()))
def normalize_image(
image: np.ndarray, cap: float = 255.0, data_type: np.dtype = np.uint8
) -> np.ndarray:
"""
Normalizes image in Numpy 2D array format, between ranges 0-cap,
as to fit uint8 type.
Args:
image: 2D numpy array representing image as matrix, with values in any range
cap: Maximum cap amount for normalization
data_type: numpy data type to set output variable to
Returns:
return 2D numpy array of type uint8, corresponding to limited range matrix
Examples:
>>> normalize_image(np.array([[1, 2, 3], [4, 5, 10]]),
... cap=1.0, data_type=np.float64)
array([[0. , 0.11111111, 0.22222222],
[0.33333333, 0.44444444, 1. ]])
>>> normalize_image(np.array([[4, 4, 3], [1, 7, 2]]))
array([[127, 127, 85],
[ 0, 255, 42]], dtype=uint8)
"""
normalized = (image - np.min(image)) / (np.max(image) - np.min(image)) * cap
return normalized.astype(data_type)
def normalize_array(array: np.ndarray, cap: float = 1) -> np.ndarray:
"""Normalizes a 1D array, between ranges 0-cap.
Args:
array: List containing values to be normalized between cap range.
cap: Maximum cap amount for normalization.
Returns:
return 1D numpy array, corresponding to limited range array
Examples:
>>> normalize_array(np.array([2, 3, 5, 7]))
array([0. , 0.2, 0.6, 1. ])
>>> normalize_array(np.array([[5], [7], [11], [13]]))
array([[0. ],
[0.25],
[0.75],
[1. ]])
"""
diff = np.max(array) - np.min(array)
return (array - np.min(array)) / (1 if diff == 0 else diff) * cap
def grayscale(image: np.ndarray) -> np.ndarray:
"""
Uses luminance weights to transform RGB channel to greyscale, by
taking the dot product between the channel and the weights.
Example:
>>> grayscale(np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]]))
array([[158, 97],
[ 56, 200]], dtype=uint8)
"""
return np.dot(image[:, :, 0:3], [0.299, 0.587, 0.114]).astype(np.uint8)
def binarize(image: np.ndarray, threshold: float = 127.0) -> np.ndarray:
"""
Binarizes a grayscale image based on a given threshold value,
setting values to 1 or 0 accordingly.
Examples:
>>> binarize(np.array([[128, 255], [101, 156]]))
array([[1, 1],
[0, 1]])
>>> binarize(np.array([[0.07, 1], [0.51, 0.3]]), threshold=0.5)
array([[0, 1],
[1, 0]])
"""
return np.where(image > threshold, 1, 0)
def transform(
image: np.ndarray, kind: str, kernel: np.ndarray | None = None
) -> np.ndarray:
"""
Simple image transformation using one of two available filter functions:
Erosion and Dilation.
Args:
image: binarized input image, onto which to apply transformation
kind: Can be either 'erosion', in which case the :func:np.max
function is called, or 'dilation', when :func:np.min is used instead.
kernel: n x n kernel with shape < :attr:image.shape,
to be used when applying convolution to original image
Returns:
returns a numpy array with same shape as input image,
corresponding to applied binary transformation.
Examples:
>>> img = np.array([[1, 0.5], [0.2, 0.7]])
>>> img = binarize(img, threshold=0.5)
>>> transform(img, 'erosion')
array([[1, 1],
[1, 1]], dtype=uint8)
>>> transform(img, 'dilation')
array([[0, 0],
[0, 0]], dtype=uint8)
"""
if kernel is None:
kernel = np.ones((3, 3))
if kind == "erosion":
constant = 1
apply = np.max
else:
constant = 0
apply = np.min
center_x, center_y = (x // 2 for x in kernel.shape)
# Use padded image when applying convolution
# to not go out of bounds of the original the image
transformed = np.zeros(image.shape, dtype=np.uint8)
padded = np.pad(image, 1, "constant", constant_values=constant)
for x in range(center_x, padded.shape[0] - center_x):
for y in range(center_y, padded.shape[1] - center_y):
center = padded[
x - center_x : x + center_x + 1, y - center_y : y + center_y + 1
]
# Apply transformation method to the centered section of the image
transformed[x - center_x, y - center_y] = apply(center[kernel == 1])
return transformed
def opening_filter(image: np.ndarray, kernel: np.ndarray | None = None) -> np.ndarray:
"""
Opening filter, defined as the sequence of
erosion and then a dilation filter on the same image.
Examples:
>>> img = np.array([[1, 0.5], [0.2, 0.7]])
>>> img = binarize(img, threshold=0.5)
>>> opening_filter(img)
array([[1, 1],
[1, 1]], dtype=uint8)
"""
if kernel is None:
np.ones((3, 3))
return transform(transform(image, "dilation", kernel), "erosion", kernel)
def closing_filter(image: np.ndarray, kernel: np.ndarray | None = None) -> np.ndarray:
"""
Opening filter, defined as the sequence of
dilation and then erosion filter on the same image.
Examples:
>>> img = np.array([[1, 0.5], [0.2, 0.7]])
>>> img = binarize(img, threshold=0.5)
>>> closing_filter(img)
array([[0, 0],
[0, 0]], dtype=uint8)
"""
if kernel is None:
kernel = np.ones((3, 3))
return transform(transform(image, "erosion", kernel), "dilation", kernel)
def binary_mask(
image_gray: np.ndarray, image_map: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
"""
Apply binary mask, or thresholding based
on bit mask value (mapping mask is binary).
Returns the mapped true value mask and its complementary false value mask.
Example:
>>> img = np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]])
>>> gray = grayscale(img)
>>> binary = binarize(gray)
>>> morphological = opening_filter(binary)
>>> binary_mask(gray, morphological)
(array([[1, 1],
[1, 1]], dtype=uint8), array([[158, 97],
[ 56, 200]], dtype=uint8))
"""
true_mask, false_mask = image_gray.copy(), image_gray.copy()
true_mask[image_map == 1] = 1
false_mask[image_map == 0] = 0
return true_mask, false_mask
def matrix_concurrency(image: np.ndarray, coordinate: tuple[int, int]) -> np.ndarray:
"""
Calculate sample co-occurrence matrix based on input image
as well as selected coordinates on image.
Implementation is made using basic iteration,
as function to be performed (np.max) is non-linear and therefore
not callable on the frequency domain.
Example:
>>> img = np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]])
>>> gray = grayscale(img)
>>> binary = binarize(gray)
>>> morphological = opening_filter(binary)
>>> mask_1 = binary_mask(gray, morphological)[0]
>>> matrix_concurrency(mask_1, (0, 1))
array([[0., 0.],
[0., 0.]])
"""
matrix = np.zeros([np.max(image) + 1, np.max(image) + 1])
offset_x, offset_y = coordinate
for x in range(1, image.shape[0] - 1):
for y in range(1, image.shape[1] - 1):
base_pixel = image[x, y]
offset_pixel = image[x + offset_x, y + offset_y]
matrix[base_pixel, offset_pixel] += 1
matrix_sum = np.sum(matrix)
return matrix / (1 if matrix_sum == 0 else matrix_sum)
def haralick_descriptors(matrix: np.ndarray) -> list[float]:
"""Calculates all 8 Haralick descriptors based on co-occurrence input matrix.
All descriptors are as follows:
Maximum probability, Inverse Difference, Homogeneity, Entropy,
Energy, Dissimilarity, Contrast and Correlation
Args:
matrix: Co-occurrence matrix to use as base for calculating descriptors.
Returns:
Reverse ordered list of resulting descriptors
Example:
>>> img = np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]])
>>> gray = grayscale(img)
>>> binary = binarize(gray)
>>> morphological = opening_filter(binary)
>>> mask_1 = binary_mask(gray, morphological)[0]
>>> concurrency = matrix_concurrency(mask_1, (0, 1))
>>> [float(f) for f in haralick_descriptors(concurrency)]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
"""
# Function np.indices could be used for bigger input types,
# but np.ogrid works just fine
i, j = np.ogrid[0 : matrix.shape[0], 0 : matrix.shape[1]] # np.indices()
# Pre-calculate frequent multiplication and subtraction
prod = np.multiply(i, j)
sub = np.subtract(i, j)
# Calculate numerical value of Maximum Probability
maximum_prob = np.max(matrix)
# Using the definition for each descriptor individually to calculate its matrix
correlation = prod * matrix
energy = np.power(matrix, 2)
contrast = matrix * np.power(sub, 2)
dissimilarity = matrix * np.abs(sub)
inverse_difference = matrix / (1 + np.abs(sub))
homogeneity = matrix / (1 + np.power(sub, 2))
entropy = -(matrix[matrix > 0] * np.log(matrix[matrix > 0]))
# Sum values for descriptors ranging from the first one to the last,
# as all are their respective origin matrix and not the resulting value yet.
return [
maximum_prob,
correlation.sum(),
energy.sum(),
contrast.sum(),
dissimilarity.sum(),
inverse_difference.sum(),
homogeneity.sum(),
entropy.sum(),
]
def get_descriptors(
masks: tuple[np.ndarray, np.ndarray], coordinate: tuple[int, int]
) -> np.ndarray:
"""
Calculate all Haralick descriptors for a sequence of
different co-occurrence matrices, given input masks and coordinates.
Example:
>>> img = np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]])
>>> gray = grayscale(img)
>>> binary = binarize(gray)
>>> morphological = opening_filter(binary)
>>> get_descriptors(binary_mask(gray, morphological), (0, 1))
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
"""
descriptors = np.array(
[haralick_descriptors(matrix_concurrency(mask, coordinate)) for mask in masks]
)
# Concatenate each individual descriptor into
# one single list containing sequence of descriptors
return np.concatenate(descriptors, axis=None)
def euclidean(point_1: np.ndarray, point_2: np.ndarray) -> float:
"""
Simple method for calculating the euclidean distance between two points,
with type np.ndarray.
Example:
>>> a = np.array([1, 0, -2])
>>> b = np.array([2, -1, 1])
>>> euclidean(a, b)
3.3166247903554
"""
return float(np.sqrt(np.sum(np.square(point_1 - point_2))))
def get_distances(descriptors: np.ndarray, base: int) -> list[tuple[int, float]]:
"""
Calculate all Euclidean distances between a selected base descriptor
and all other Haralick descriptors
The resulting comparison is return in decreasing order,
showing which descriptor is the most similar to the selected base.
Args:
descriptors: Haralick descriptors to compare with base index
base: Haralick descriptor index to use as base when calculating respective
euclidean distance to other descriptors.
Returns:
Ordered distances between descriptors
Example:
>>> index = 1
>>> img = np.array([[[108, 201, 72], [255, 11, 127]],
... [[56, 56, 56], [128, 255, 107]]])
>>> gray = grayscale(img)
>>> binary = binarize(gray)
>>> morphological = opening_filter(binary)
>>> get_distances(get_descriptors(
... binary_mask(gray, morphological), (0, 1)),
... index)
[(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), (4, 0.0), (5, 0.0), \
(6, 0.0), (7, 0.0), (8, 0.0), (9, 0.0), (10, 0.0), (11, 0.0), (12, 0.0), \
(13, 0.0), (14, 0.0), (15, 0.0)]
"""
distances = np.array(
[euclidean(descriptor, descriptors[base]) for descriptor in descriptors]
)
# Normalize distances between range [0, 1]
normalized_distances: list[float] = normalize_array(distances, 1).tolist()
enum_distances = list(enumerate(normalized_distances))
enum_distances.sort(key=lambda tup: tup[1], reverse=True)
return enum_distances
if __name__ == "__main__":
# Index to compare haralick descriptors to
index = int(input())
q_value_list = [int(value) for value in input().split()]
q_value = (q_value_list[0], q_value_list[1])
# Format is the respective filter to apply,
# can be either 1 for the opening filter or else for the closing
parameters = {"format": int(input()), "threshold": int(input())}
# Number of images to perform methods on
b_number = int(input())
files, descriptors = [], []
for _ in range(b_number):
file = input().rstrip()
files.append(file)
# Open given image and calculate morphological filter,
# respective masks and correspondent Harralick Descriptors.
image = imageio.imread(file).astype(np.float32)
gray = grayscale(image)
threshold = binarize(gray, parameters["threshold"])
morphological = (
opening_filter(threshold)
if parameters["format"] == 1
else closing_filter(threshold)
)
masks = binary_mask(gray, morphological)
descriptors.append(get_descriptors(masks, q_value))
# Transform ordered distances array into a sequence of indexes
# corresponding to original file position
distances = get_distances(np.array(descriptors), index)
indexed_distances = np.array(distances).astype(np.uint8)[:, 0]
# Finally, print distances considering the Haralick descriptions from the base
# file to all other images using the morphology method of choice.
print(f"Query: {files[index]}")
print("Ranking:")
for idx, file_idx in enumerate(indexed_distances):
print(f"({idx}) {files[file_idx]}", end="\n")
================================================
FILE: computer_vision/harris_corner.py
================================================
import cv2
import numpy as np
"""
Harris Corner Detector
https://en.wikipedia.org/wiki/Harris_Corner_Detector
"""
class HarrisCorner:
def __init__(self, k: float, window_size: int):
"""
k : is an empirically determined constant in [0.04,0.06]
window_size : neighbourhoods considered
"""
if k in (0.04, 0.06):
self.k = k
self.window_size = window_size
else:
raise ValueError("invalid k value")
def __str__(self) -> str:
return str(self.k)
def detect(self, img_path: str) -> tuple[cv2.Mat, list[list[int]]]:
"""
Returns the image with corners identified
img_path : path of the image
output : list of the corner positions, image
"""
img = cv2.imread(img_path, 0)
h, w = img.shape
corner_list: list[list[int]] = []
color_img = img.copy()
color_img = cv2.cvtColor(color_img, cv2.COLOR_GRAY2RGB)
dy, dx = np.gradient(img)
ixx = dx**2
iyy = dy**2
ixy = dx * dy
k = 0.04
offset = self.window_size // 2
for y in range(offset, h - offset):
for x in range(offset, w - offset):
wxx = ixx[
y - offset : y + offset + 1, x - offset : x + offset + 1
].sum()
wyy = iyy[
y - offset : y + offset + 1, x - offset : x + offset + 1
].sum()
wxy = ixy[
y - offset : y + offset + 1, x - offset : x + offset + 1
].sum()
det = (wxx * wyy) - (wxy**2)
trace = wxx + wyy
r = det - k * (trace**2)
# Can change the value
if r > 0.5:
corner_list.append([x, y, r])
color_img.itemset((y, x, 0), 0)
color_img.itemset((y, x, 1), 0)
color_img.itemset((y, x, 2), 255)
return color_img, corner_list
if __name__ == "__main__":
edge_detect = HarrisCorner(0.04, 3)
color_img, _ = edge_detect.detect("path_to_image")
cv2.imwrite("detect.png", color_img)
================================================
FILE: computer_vision/horn_schunck.py
================================================
"""
The Horn-Schunck method estimates the optical flow for every single pixel of
a sequence of images.
It works by assuming brightness constancy between two consecutive frames
and smoothness in the optical flow.
Useful resources:
Wikipedia: https://en.wikipedia.org/wiki/Horn%E2%80%93Schunck_method
Paper: http://image.diku.dk/imagecanon/material/HornSchunckOptical_Flow.pdf
"""
from typing import SupportsIndex
import numpy as np
from scipy.ndimage import convolve
def warp(
image: np.ndarray, horizontal_flow: np.ndarray, vertical_flow: np.ndarray
) -> np.ndarray:
"""
Warps the pixels of an image into a new image using the horizontal and vertical
flows.
Pixels that are warped from an invalid location are set to 0.
Parameters:
image: Grayscale image
horizontal_flow: Horizontal flow
vertical_flow: Vertical flow
Returns: Warped image
>>> warp(np.array([[0, 1, 2], [0, 3, 0], [2, 2, 2]]), \
np.array([[0, 1, -1], [-1, 0, 0], [1, 1, 1]]), \
np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]))
array([[0, 0, 0],
[3, 1, 0],
[0, 2, 3]])
"""
flow = np.stack((horizontal_flow, vertical_flow), 2)
# Create a grid of all pixel coordinates and subtract the flow to get the
# target pixels coordinates
grid = np.stack(
np.meshgrid(np.arange(0, image.shape[1]), np.arange(0, image.shape[0])), 2
)
grid = np.round(grid - flow).astype(np.int32)
# Find the locations outside of the original image
invalid = (grid < 0) | (grid >= np.array([image.shape[1], image.shape[0]]))
grid[invalid] = 0
warped = image[grid[:, :, 1], grid[:, :, 0]]
# Set pixels at invalid locations to 0
warped[invalid[:, :, 0] | invalid[:, :, 1]] = 0
return warped
def horn_schunck(
image0: np.ndarray,
image1: np.ndarray,
num_iter: SupportsIndex,
alpha: float | None = None,
) -> tuple[np.ndarray, np.ndarray]:
"""
This function performs the Horn-Schunck algorithm and returns the estimated
optical flow. It is assumed that the input images are grayscale and
normalized to be in [0, 1].
Parameters:
image0: First image of the sequence
image1: Second image of the sequence
alpha: Regularization constant
num_iter: Number of iterations performed
Returns: estimated horizontal & vertical flow
>>> np.round(horn_schunck(np.array([[0, 0, 2], [0, 0, 2]]), \
np.array([[0, 2, 0], [0, 2, 0]]), alpha=0.1, num_iter=110)).\
astype(np.int32)
array([[[ 0, -1, -1],
[ 0, -1, -1]],
[[ 0, 0, 0],
[ 0, 0, 0]]], dtype=int32)
"""
if alpha is None:
alpha = 0.1
# Initialize flow
horizontal_flow = np.zeros_like(image0)
vertical_flow = np.zeros_like(image0)
# Prepare kernels for the calculation of the derivatives and the average velocity
kernel_x = np.array([[-1, 1], [-1, 1]]) * 0.25
kernel_y = np.array([[-1, -1], [1, 1]]) * 0.25
kernel_t = np.array([[1, 1], [1, 1]]) * 0.25
kernel_laplacian = np.array(
[[1 / 12, 1 / 6, 1 / 12], [1 / 6, 0, 1 / 6], [1 / 12, 1 / 6, 1 / 12]]
)
# Iteratively refine the flow
for _ in range(num_iter):
warped_image = warp(image0, horizontal_flow, vertical_flow)
derivative_x = convolve(warped_image, kernel_x) + convolve(image1, kernel_x)
derivative_y = convolve(warped_image, kernel_y) + convolve(image1, kernel_y)
derivative_t = convolve(warped_image, kernel_t) + convolve(image1, -kernel_t)
avg_horizontal_velocity = convolve(horizontal_flow, kernel_laplacian)
avg_vertical_velocity = convolve(vertical_flow, kernel_laplacian)
# This updates the flow as proposed in the paper (Step 12)
update = (
derivative_x * avg_horizontal_velocity
+ derivative_y * avg_vertical_velocity
+ derivative_t
)
update = update / (alpha**2 + derivative_x**2 + derivative_y**2)
horizontal_flow = avg_horizontal_velocity - derivative_x * update
vertical_flow = avg_vertical_velocity - derivative_y * update
return horizontal_flow, vertical_flow
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: computer_vision/intensity_based_segmentation.py
================================================
# Source: "https://www.ijcse.com/docs/IJCSE11-02-03-117.pdf"
# Importing necessary libraries
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
def segment_image(image: np.ndarray, thresholds: list[int]) -> np.ndarray:
"""
Performs image segmentation based on intensity thresholds.
Args:
image: Input grayscale image as a 2D array.
thresholds: Intensity thresholds to define segments.
Returns:
A labeled 2D array where each region corresponds to a threshold range.
Example:
>>> img = np.array([[80, 120, 180], [40, 90, 150], [20, 60, 100]])
>>> segment_image(img, [50, 100, 150])
array([[1, 2, 3],
[0, 1, 2],
[0, 1, 1]], dtype=int32)
"""
# Initialize segmented array with zeros
segmented = np.zeros_like(image, dtype=np.int32)
# Assign labels based on thresholds
for i, threshold in enumerate(thresholds):
segmented[image > threshold] = i + 1
return segmented
if __name__ == "__main__":
# Load the image
image_path = "path_to_image" # Replace with your image path
original_image = Image.open(image_path).convert("L")
image_array = np.array(original_image)
# Define thresholds
thresholds = [50, 100, 150, 200]
# Perform segmentation
segmented_image = segment_image(image_array, thresholds)
# Display the results
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title("Original Image")
plt.imshow(image_array, cmap="gray")
plt.axis("off")
plt.subplot(1, 2, 2)
plt.title("Segmented Image")
plt.imshow(segmented_image, cmap="tab20")
plt.axis("off")
plt.show()
================================================
FILE: computer_vision/mean_threshold.py
================================================
from PIL import Image
"""
Mean thresholding algorithm for image processing
https://en.wikipedia.org/wiki/Thresholding_(image_processing)
"""
def mean_threshold(image: Image) -> Image:
"""
image: is a grayscale PIL image object
"""
height, width = image.size
mean = 0
pixels = image.load()
for i in range(width):
for j in range(height):
pixel = pixels[j, i]
mean += pixel
mean //= width * height
for j in range(width):
for i in range(height):
pixels[i, j] = 255 if pixels[i, j] > mean else 0
return image
if __name__ == "__main__":
image = mean_threshold(Image.open("path_to_image").convert("L"))
image.save("output_image_path")
================================================
FILE: computer_vision/mosaic_augmentation.py
================================================
"""Source: https://github.com/jason9075/opencv-mosaic-data-aug"""
import glob
import os
import random
from string import ascii_lowercase, digits
import cv2
import numpy as np
# Parameters
OUTPUT_SIZE = (720, 1280) # Height, Width
SCALE_RANGE = (0.4, 0.6) # if height or width lower than this scale, drop it.
FILTER_TINY_SCALE = 1 / 100
LABEL_DIR = ""
IMG_DIR = ""
OUTPUT_DIR = ""
NUMBER_IMAGES = 250
def main() -> None:
"""
Get images list and annotations list from input dir.
Update new images and annotations.
Save images and annotations in output dir.
"""
img_paths, annos = get_dataset(LABEL_DIR, IMG_DIR)
for index in range(NUMBER_IMAGES):
idxs = random.sample(range(len(annos)), 4)
new_image, new_annos, path = update_image_and_anno(
img_paths,
annos,
idxs,
OUTPUT_SIZE,
SCALE_RANGE,
filter_scale=FILTER_TINY_SCALE,
)
# Get random string code: '7b7ad245cdff75241935e4dd860f3bad'
letter_code = random_chars(32)
file_name = path.split(os.sep)[-1].rsplit(".", 1)[0]
file_root = f"{OUTPUT_DIR}/{file_name}_MOSAIC_{letter_code}"
cv2.imwrite(f"{file_root}.jpg", new_image, [cv2.IMWRITE_JPEG_QUALITY, 85])
print(f"Succeeded {index + 1}/{NUMBER_IMAGES} with {file_name}")
annos_list = []
for anno in new_annos:
width = anno[3] - anno[1]
height = anno[4] - anno[2]
x_center = anno[1] + width / 2
y_center = anno[2] + height / 2
obj = f"{anno[0]} {x_center} {y_center} {width} {height}"
annos_list.append(obj)
with open(f"{file_root}.txt", "w") as outfile:
outfile.write("\n".join(line for line in annos_list))
def get_dataset(label_dir: str, img_dir: str) -> tuple[list, list]:
"""
- label_dir : Path to label include annotation of images
- img_dir : Path to folder contain images
Return : List of images path and labels
"""
img_paths = []
labels = []
for label_file in glob.glob(os.path.join(label_dir, "*.txt")):
label_name = label_file.split(os.sep)[-1].rsplit(".", 1)[0]
with open(label_file) as in_file:
obj_lists = in_file.readlines()
img_path = os.path.join(img_dir, f"{label_name}.jpg")
boxes = []
for obj_list in obj_lists:
obj = obj_list.rstrip("\n").split(" ")
xmin = float(obj[1]) - float(obj[3]) / 2
ymin = float(obj[2]) - float(obj[4]) / 2
xmax = float(obj[1]) + float(obj[3]) / 2
ymax = float(obj[2]) + float(obj[4]) / 2
boxes.append([int(obj[0]), xmin, ymin, xmax, ymax])
if not boxes:
continue
img_paths.append(img_path)
labels.append(boxes)
return img_paths, labels
def update_image_and_anno(
all_img_list: list,
all_annos: list,
idxs: list[int],
output_size: tuple[int, int],
scale_range: tuple[float, float],
filter_scale: float = 0.0,
) -> tuple[list, list, str]:
"""
- all_img_list : list of all images
- all_annos : list of all annotations of specific image
- idxs : index of image in list
- output_size : size of output image (Height, Width)
- scale_range : range of scale image
- filter_scale : the condition of downscale image and bounding box
Return:
- output_img : image after resize
- new_anno : list of new annotation after scale
- path[0] : get the name of image file
"""
output_img = np.zeros([output_size[0], output_size[1], 3], dtype=np.uint8)
scale_x = scale_range[0] + random.random() * (scale_range[1] - scale_range[0])
scale_y = scale_range[0] + random.random() * (scale_range[1] - scale_range[0])
divid_point_x = int(scale_x * output_size[1])
divid_point_y = int(scale_y * output_size[0])
new_anno = []
path_list = []
for i, index in enumerate(idxs):
path = all_img_list[index]
path_list.append(path)
img_annos = all_annos[index]
img = cv2.imread(path)
if i == 0: # top-left
img = cv2.resize(img, (divid_point_x, divid_point_y))
output_img[:divid_point_y, :divid_point_x, :] = img
for bbox in img_annos:
xmin = bbox[1] * scale_x
ymin = bbox[2] * scale_y
xmax = bbox[3] * scale_x
ymax = bbox[4] * scale_y
new_anno.append([bbox[0], xmin, ymin, xmax, ymax])
elif i == 1: # top-right
img = cv2.resize(img, (output_size[1] - divid_point_x, divid_point_y))
output_img[:divid_point_y, divid_point_x : output_size[1], :] = img
for bbox in img_annos:
xmin = scale_x + bbox[1] * (1 - scale_x)
ymin = bbox[2] * scale_y
xmax = scale_x + bbox[3] * (1 - scale_x)
ymax = bbox[4] * scale_y
new_anno.append([bbox[0], xmin, ymin, xmax, ymax])
elif i == 2: # bottom-left
img = cv2.resize(img, (divid_point_x, output_size[0] - divid_point_y))
output_img[divid_point_y : output_size[0], :divid_point_x, :] = img
for bbox in img_annos:
xmin = bbox[1] * scale_x
ymin = scale_y + bbox[2] * (1 - scale_y)
xmax = bbox[3] * scale_x
ymax = scale_y + bbox[4] * (1 - scale_y)
new_anno.append([bbox[0], xmin, ymin, xmax, ymax])
else: # bottom-right
img = cv2.resize(
img, (output_size[1] - divid_point_x, output_size[0] - divid_point_y)
)
output_img[
divid_point_y : output_size[0], divid_point_x : output_size[1], :
] = img
for bbox in img_annos:
xmin = scale_x + bbox[1] * (1 - scale_x)
ymin = scale_y + bbox[2] * (1 - scale_y)
xmax = scale_x + bbox[3] * (1 - scale_x)
ymax = scale_y + bbox[4] * (1 - scale_y)
new_anno.append([bbox[0], xmin, ymin, xmax, ymax])
# Remove bounding box small than scale of filter
if filter_scale > 0:
new_anno = [
anno
for anno in new_anno
if filter_scale < (anno[3] - anno[1]) and filter_scale < (anno[4] - anno[2])
]
return output_img, new_anno, path_list[0]
def random_chars(number_char: int) -> str:
"""
Automatic generate random 32 characters.
Get random string code: '7b7ad245cdff75241935e4dd860f3bad'
>>> len(random_chars(32))
32
"""
assert number_char > 1, "The number of character should greater than 1"
letter_code = ascii_lowercase + digits
return "".join(random.choice(letter_code) for _ in range(number_char))
if __name__ == "__main__":
main()
print("DONE ✅")
================================================
FILE: computer_vision/pooling_functions.py
================================================
# Source : https://computersciencewiki.org/index.php/Max-pooling_/_Pooling
# Importing the libraries
import numpy as np
from PIL import Image
# Maxpooling Function
def maxpooling(arr: np.ndarray, size: int, stride: int) -> np.ndarray:
"""
This function is used to perform maxpooling on the input array of 2D matrix(image)
Args:
arr: numpy array
size: size of pooling matrix
stride: the number of pixels shifts over the input matrix
Returns:
numpy array of maxpooled matrix
Sample Input Output:
>>> maxpooling([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]], 2, 2)
array([[ 6., 8.],
[14., 16.]])
>>> maxpooling([[147, 180, 122],[241, 76, 32],[126, 13, 157]], 2, 1)
array([[241., 180.],
[241., 157.]])
"""
arr = np.array(arr)
if arr.shape[0] != arr.shape[1]:
raise ValueError("The input array is not a square matrix")
i = 0
j = 0
mat_i = 0
mat_j = 0
# compute the shape of the output matrix
maxpool_shape = (arr.shape[0] - size) // stride + 1
# initialize the output matrix with zeros of shape maxpool_shape
updated_arr = np.zeros((maxpool_shape, maxpool_shape))
while i < arr.shape[0]:
if i + size > arr.shape[0]:
# if the end of the matrix is reached, break
break
while j < arr.shape[1]:
# if the end of the matrix is reached, break
if j + size > arr.shape[1]:
break
# compute the maximum of the pooling matrix
updated_arr[mat_i][mat_j] = np.max(arr[i : i + size, j : j + size])
# shift the pooling matrix by stride of column pixels
j += stride
mat_j += 1
# shift the pooling matrix by stride of row pixels
i += stride
mat_i += 1
# reset the column index to 0
j = 0
mat_j = 0
return updated_arr
# Averagepooling Function
def avgpooling(arr: np.ndarray, size: int, stride: int) -> np.ndarray:
"""
This function is used to perform avgpooling on the input array of 2D matrix(image)
Args:
arr: numpy array
size: size of pooling matrix
stride: the number of pixels shifts over the input matrix
Returns:
numpy array of avgpooled matrix
Sample Input Output:
>>> avgpooling([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]], 2, 2)
array([[ 3., 5.],
[11., 13.]])
>>> avgpooling([[147, 180, 122],[241, 76, 32],[126, 13, 157]], 2, 1)
array([[161., 102.],
[114., 69.]])
"""
arr = np.array(arr)
if arr.shape[0] != arr.shape[1]:
raise ValueError("The input array is not a square matrix")
i = 0
j = 0
mat_i = 0
mat_j = 0
# compute the shape of the output matrix
avgpool_shape = (arr.shape[0] - size) // stride + 1
# initialize the output matrix with zeros of shape avgpool_shape
updated_arr = np.zeros((avgpool_shape, avgpool_shape))
while i < arr.shape[0]:
# if the end of the matrix is reached, break
if i + size > arr.shape[0]:
break
while j < arr.shape[1]:
# if the end of the matrix is reached, break
if j + size > arr.shape[1]:
break
# compute the average of the pooling matrix
updated_arr[mat_i][mat_j] = int(np.average(arr[i : i + size, j : j + size]))
# shift the pooling matrix by stride of column pixels
j += stride
mat_j += 1
# shift the pooling matrix by stride of row pixels
i += stride
mat_i += 1
# reset the column index to 0
j = 0
mat_j = 0
return updated_arr
# Main Function
if __name__ == "__main__":
from doctest import testmod
testmod(name="avgpooling", verbose=True)
# Loading the image
image = Image.open("path_to_image")
# Converting the image to numpy array and maxpooling, displaying the result
# Ensure that the image is a square matrix
Image.fromarray(maxpooling(np.array(image), size=3, stride=2)).show()
# Converting the image to numpy array and averagepooling, displaying the result
# Ensure that the image is a square matrix
Image.fromarray(avgpooling(np.array(image), size=3, stride=2)).show()
================================================
FILE: conversions/README.md
================================================
# Conversion
Conversion programs convert a type of data, a number from a numerical base or unit into one of another type, base or unit, e.g. binary to decimal, integer to string or foot to meters.
*
*
================================================
FILE: conversions/__init__.py
================================================
================================================
FILE: conversions/astronomical_length_scale_conversion.py
================================================
"""
Conversion of length units.
Available Units:
Metre, Kilometre, Megametre, Gigametre,
Terametre, Petametre, Exametre, Zettametre, Yottametre
USAGE :
-> Import this file into their respective project.
-> Use the function length_conversion() for conversion of length units.
-> Parameters :
-> value : The number of from units you want to convert
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Meter
-> Wikipedia reference: https://en.wikipedia.org/wiki/Kilometer
-> Wikipedia reference: https://en.wikipedia.org/wiki/Orders_of_magnitude_(length)
"""
UNIT_SYMBOL = {
"meter": "m",
"kilometer": "km",
"megametre": "Mm",
"gigametre": "Gm",
"terametre": "Tm",
"petametre": "Pm",
"exametre": "Em",
"zettametre": "Zm",
"yottametre": "Ym",
}
# Exponent of the factor(meter)
METRIC_CONVERSION = {
"m": 0,
"km": 3,
"Mm": 6,
"Gm": 9,
"Tm": 12,
"Pm": 15,
"Em": 18,
"Zm": 21,
"Ym": 24,
}
def length_conversion(value: float, from_type: str, to_type: str) -> float:
"""
Conversion between astronomical length units.
>>> length_conversion(1, "meter", "kilometer")
0.001
>>> length_conversion(1, "meter", "megametre")
1e-06
>>> length_conversion(1, "gigametre", "meter")
1000000000
>>> length_conversion(1, "gigametre", "terametre")
0.001
>>> length_conversion(1, "petametre", "terametre")
1000
>>> length_conversion(1, "petametre", "exametre")
0.001
>>> length_conversion(1, "terametre", "zettametre")
1e-09
>>> length_conversion(1, "yottametre", "zettametre")
1000
>>> length_conversion(4, "wrongUnit", "inch")
Traceback (most recent call last):
...
ValueError: Invalid 'from_type' value: 'wrongUnit'.
Conversion abbreviations are: m, km, Mm, Gm, Tm, Pm, Em, Zm, Ym
"""
from_sanitized = from_type.lower().strip("s")
to_sanitized = to_type.lower().strip("s")
from_sanitized = UNIT_SYMBOL.get(from_sanitized, from_sanitized)
to_sanitized = UNIT_SYMBOL.get(to_sanitized, to_sanitized)
if from_sanitized not in METRIC_CONVERSION:
msg = (
f"Invalid 'from_type' value: {from_type!r}.\n"
f"Conversion abbreviations are: {', '.join(METRIC_CONVERSION)}"
)
raise ValueError(msg)
if to_sanitized not in METRIC_CONVERSION:
msg = (
f"Invalid 'to_type' value: {to_type!r}.\n"
f"Conversion abbreviations are: {', '.join(METRIC_CONVERSION)}"
)
raise ValueError(msg)
from_exponent = METRIC_CONVERSION[from_sanitized]
to_exponent = METRIC_CONVERSION[to_sanitized]
exponent = 1
if from_exponent > to_exponent:
exponent = from_exponent - to_exponent
else:
exponent = -(to_exponent - from_exponent)
return value * pow(10, exponent)
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/binary_to_decimal.py
================================================
def bin_to_decimal(bin_string: str) -> int:
"""
Convert a binary value to its decimal equivalent
>>> bin_to_decimal("101")
5
>>> bin_to_decimal(" 1010 ")
10
>>> bin_to_decimal("-11101")
-29
>>> bin_to_decimal("0")
0
>>> bin_to_decimal("a")
Traceback (most recent call last):
...
ValueError: Non-binary value was passed to the function
>>> bin_to_decimal("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
>>> bin_to_decimal("39")
Traceback (most recent call last):
...
ValueError: Non-binary value was passed to the function
"""
bin_string = str(bin_string).strip()
if not bin_string:
raise ValueError("Empty string was passed to the function")
is_negative = bin_string[0] == "-"
if is_negative:
bin_string = bin_string[1:]
if not all(char in "01" for char in bin_string):
raise ValueError("Non-binary value was passed to the function")
decimal_number = 0
for char in bin_string:
decimal_number = 2 * decimal_number + int(char)
return -decimal_number if is_negative else decimal_number
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/binary_to_hexadecimal.py
================================================
BITS_TO_HEX = {
"0000": "0",
"0001": "1",
"0010": "2",
"0011": "3",
"0100": "4",
"0101": "5",
"0110": "6",
"0111": "7",
"1000": "8",
"1001": "9",
"1010": "a",
"1011": "b",
"1100": "c",
"1101": "d",
"1110": "e",
"1111": "f",
}
def bin_to_hexadecimal(binary_str: str) -> str:
"""
Converting a binary string into hexadecimal using Grouping Method
>>> bin_to_hexadecimal('101011111')
'0x15f'
>>> bin_to_hexadecimal(' 1010 ')
'0x0a'
>>> bin_to_hexadecimal('-11101')
'-0x1d'
>>> bin_to_hexadecimal('a')
Traceback (most recent call last):
...
ValueError: Non-binary value was passed to the function
>>> bin_to_hexadecimal('')
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
"""
# Sanitising parameter
binary_str = str(binary_str).strip()
# Exceptions
if not binary_str:
raise ValueError("Empty string was passed to the function")
is_negative = binary_str[0] == "-"
binary_str = binary_str[1:] if is_negative else binary_str
if not all(char in "01" for char in binary_str):
raise ValueError("Non-binary value was passed to the function")
binary_str = (
"0" * (4 * (divmod(len(binary_str), 4)[0] + 1) - len(binary_str)) + binary_str
)
hexadecimal = []
for x in range(0, len(binary_str), 4):
hexadecimal.append(BITS_TO_HEX[binary_str[x : x + 4]])
hexadecimal_str = "0x" + "".join(hexadecimal)
return "-" + hexadecimal_str if is_negative else hexadecimal_str
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/binary_to_octal.py
================================================
"""
The function below will convert any binary string to the octal equivalent.
>>> bin_to_octal("1111")
'17'
>>> bin_to_octal("101010101010011")
'52523'
>>> bin_to_octal("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
>>> bin_to_octal("a-1")
Traceback (most recent call last):
...
ValueError: Non-binary value was passed to the function
"""
def bin_to_octal(bin_string: str) -> str:
if not all(char in "01" for char in bin_string):
raise ValueError("Non-binary value was passed to the function")
if not bin_string:
raise ValueError("Empty string was passed to the function")
oct_string = ""
while len(bin_string) % 3 != 0:
bin_string = "0" + bin_string
bin_string_in_3_list = [
bin_string[index : index + 3]
for index in range(len(bin_string))
if index % 3 == 0
]
for bin_group in bin_string_in_3_list:
oct_val = 0
for index, val in enumerate(bin_group):
oct_val += int(2 ** (2 - index) * int(val))
oct_string += str(oct_val)
return oct_string
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/convert_number_to_words.py
================================================
from enum import Enum
from typing import Literal
class NumberingSystem(Enum):
SHORT = (
(15, "quadrillion"),
(12, "trillion"),
(9, "billion"),
(6, "million"),
(3, "thousand"),
(2, "hundred"),
)
LONG = (
(15, "billiard"),
(9, "milliard"),
(6, "million"),
(3, "thousand"),
(2, "hundred"),
)
INDIAN = (
(14, "crore crore"),
(12, "lakh crore"),
(7, "crore"),
(5, "lakh"),
(3, "thousand"),
(2, "hundred"),
)
@classmethod
def max_value(cls, system: str) -> int:
"""
Gets the max value supported by the given number system.
>>> NumberingSystem.max_value("short") == 10**18 - 1
True
>>> NumberingSystem.max_value("long") == 10**21 - 1
True
>>> NumberingSystem.max_value("indian") == 10**19 - 1
True
"""
match system_enum := cls[system.upper()]:
case cls.SHORT:
max_exp = system_enum.value[0][0] + 3
case cls.LONG:
max_exp = system_enum.value[0][0] + 6
case cls.INDIAN:
max_exp = 19
case _:
raise ValueError("Invalid numbering system")
return 10**max_exp - 1
class NumberWords(Enum):
ONES = { # noqa: RUF012
0: "",
1: "one",
2: "two",
3: "three",
4: "four",
5: "five",
6: "six",
7: "seven",
8: "eight",
9: "nine",
}
TEENS = { # noqa: RUF012
0: "ten",
1: "eleven",
2: "twelve",
3: "thirteen",
4: "fourteen",
5: "fifteen",
6: "sixteen",
7: "seventeen",
8: "eighteen",
9: "nineteen",
}
TENS = { # noqa: RUF012
2: "twenty",
3: "thirty",
4: "forty",
5: "fifty",
6: "sixty",
7: "seventy",
8: "eighty",
9: "ninety",
}
def convert_small_number(num: int) -> str:
"""
Converts small, non-negative integers with irregular constructions in English (i.e.,
numbers under 100) into words.
>>> convert_small_number(0)
'zero'
>>> convert_small_number(5)
'five'
>>> convert_small_number(10)
'ten'
>>> convert_small_number(15)
'fifteen'
>>> convert_small_number(20)
'twenty'
>>> convert_small_number(25)
'twenty-five'
>>> convert_small_number(-1)
Traceback (most recent call last):
...
ValueError: This function only accepts non-negative integers
>>> convert_small_number(123)
Traceback (most recent call last):
...
ValueError: This function only converts numbers less than 100
"""
if num < 0:
raise ValueError("This function only accepts non-negative integers")
if num >= 100:
raise ValueError("This function only converts numbers less than 100")
tens, ones = divmod(num, 10)
if tens == 0:
return NumberWords.ONES.value[ones] or "zero"
if tens == 1:
return NumberWords.TEENS.value[ones]
return (
NumberWords.TENS.value[tens]
+ ("-" if NumberWords.ONES.value[ones] else "")
+ NumberWords.ONES.value[ones]
)
def convert_number(
num: int, system: Literal["short", "long", "indian"] = "short"
) -> str:
"""
Converts an integer to English words.
:param num: The integer to be converted
:param system: The numbering system (short, long, or Indian)
>>> convert_number(0)
'zero'
>>> convert_number(1)
'one'
>>> convert_number(100)
'one hundred'
>>> convert_number(-100)
'negative one hundred'
>>> convert_number(123_456_789_012_345) # doctest: +NORMALIZE_WHITESPACE
'one hundred twenty-three trillion four hundred fifty-six billion
seven hundred eighty-nine million twelve thousand three hundred forty-five'
>>> convert_number(123_456_789_012_345, "long") # doctest: +NORMALIZE_WHITESPACE
'one hundred twenty-three thousand four hundred fifty-six milliard
seven hundred eighty-nine million twelve thousand three hundred forty-five'
>>> convert_number(12_34_56_78_90_12_345, "indian") # doctest: +NORMALIZE_WHITESPACE
'one crore crore twenty-three lakh crore
forty-five thousand six hundred seventy-eight crore
ninety lakh twelve thousand three hundred forty-five'
>>> convert_number(10**18)
Traceback (most recent call last):
...
ValueError: Input number is too large
>>> convert_number(10**21, "long")
Traceback (most recent call last):
...
ValueError: Input number is too large
>>> convert_number(10**19, "indian")
Traceback (most recent call last):
...
ValueError: Input number is too large
"""
word_groups = []
if num < 0:
word_groups.append("negative")
num *= -1
if num > NumberingSystem.max_value(system):
raise ValueError("Input number is too large")
for power, unit in NumberingSystem[system.upper()].value:
digit_group, num = divmod(num, 10**power)
if digit_group > 0:
word_group = (
convert_number(digit_group, system)
if digit_group >= 100
else convert_small_number(digit_group)
)
word_groups.append(f"{word_group} {unit}")
if num > 0 or not word_groups: # word_groups is only empty if input num was 0
word_groups.append(convert_small_number(num))
return " ".join(word_groups)
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{convert_number(123456789) = }")
================================================
FILE: conversions/decimal_to_any.py
================================================
"""Convert a positive Decimal Number to Any Other Representation"""
from string import ascii_uppercase
ALPHABET_VALUES = {str(ord(c) - 55): c for c in ascii_uppercase}
def decimal_to_any(num: int, base: int) -> str:
"""
Convert a positive integer to another base as str.
>>> decimal_to_any(0, 2)
'0'
>>> decimal_to_any(5, 4)
'11'
>>> decimal_to_any(20, 3)
'202'
>>> decimal_to_any(58, 16)
'3A'
>>> decimal_to_any(243, 17)
'E5'
>>> decimal_to_any(34923, 36)
'QY3'
>>> decimal_to_any(10, 11)
'A'
>>> decimal_to_any(16, 16)
'10'
>>> decimal_to_any(36, 36)
'10'
>>> # negatives will error
>>> decimal_to_any(-45, 8) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: parameter must be positive int
>>> # floats will error
>>> decimal_to_any(34.4, 6) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: int() can't convert non-string with explicit base
>>> # a float base will error
>>> decimal_to_any(5, 2.5) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> # a str base will error
>>> decimal_to_any(10, '16') # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'str' object cannot be interpreted as an integer
>>> # a base less than 2 will error
>>> decimal_to_any(7, 0) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: base must be >= 2
>>> # a base greater than 36 will error
>>> decimal_to_any(34, 37) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: base must be <= 36
"""
if isinstance(num, float):
raise TypeError("int() can't convert non-string with explicit base")
if num < 0:
raise ValueError("parameter must be positive int")
if isinstance(base, str):
raise TypeError("'str' object cannot be interpreted as an integer")
if isinstance(base, float):
raise TypeError("'float' object cannot be interpreted as an integer")
if base in (0, 1):
raise ValueError("base must be >= 2")
if base > 36:
raise ValueError("base must be <= 36")
new_value = ""
mod = 0
div = 0
while div != 1:
div, mod = divmod(num, base)
if base >= 11 and 9 < mod < 36:
actual_value = ALPHABET_VALUES[str(mod)]
else:
actual_value = str(mod)
new_value += actual_value
div = num // base
num = div
if div == 0:
return str(new_value[::-1])
elif div == 1:
new_value += str(div)
return str(new_value[::-1])
return new_value[::-1]
if __name__ == "__main__":
import doctest
doctest.testmod()
for base in range(2, 37):
for num in range(1000):
assert int(decimal_to_any(num, base), base) == num, (
num,
base,
decimal_to_any(num, base),
int(decimal_to_any(num, base), base),
)
================================================
FILE: conversions/decimal_to_binary.py
================================================
"""Convert a Decimal Number to a Binary Number."""
def decimal_to_binary_iterative(num: int) -> str:
"""
Convert an Integer Decimal Number to a Binary Number as str.
>>> decimal_to_binary_iterative(0)
'0b0'
>>> decimal_to_binary_iterative(2)
'0b10'
>>> decimal_to_binary_iterative(7)
'0b111'
>>> decimal_to_binary_iterative(35)
'0b100011'
>>> # negatives work too
>>> decimal_to_binary_iterative(-2)
'-0b10'
>>> # other floats will error
>>> decimal_to_binary_iterative(16.16) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> # strings will error as well
>>> decimal_to_binary_iterative('0xfffff') # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: 'str' object cannot be interpreted as an integer
"""
if isinstance(num, float):
raise TypeError("'float' object cannot be interpreted as an integer")
if isinstance(num, str):
raise TypeError("'str' object cannot be interpreted as an integer")
if num == 0:
return "0b0"
negative = False
if num < 0:
negative = True
num = -num
binary: list[int] = []
while num > 0:
binary.insert(0, num % 2)
num >>= 1
if negative:
return "-0b" + "".join(str(e) for e in binary)
return "0b" + "".join(str(e) for e in binary)
def decimal_to_binary_recursive_helper(decimal: int) -> str:
"""
Take a positive integer value and return its binary equivalent.
>>> decimal_to_binary_recursive_helper(1000)
'1111101000'
>>> decimal_to_binary_recursive_helper("72")
'1001000'
>>> decimal_to_binary_recursive_helper("number")
Traceback (most recent call last):
...
ValueError: invalid literal for int() with base 10: 'number'
"""
decimal = int(decimal)
if decimal in (0, 1): # Exit cases for the recursion
return str(decimal)
div, mod = divmod(decimal, 2)
return decimal_to_binary_recursive_helper(div) + str(mod)
def decimal_to_binary_recursive(number: str) -> str:
"""
Take an integer value and raise ValueError for wrong inputs,
call the function above and return the output with prefix "0b" & "-0b"
for positive and negative integers respectively.
>>> decimal_to_binary_recursive(0)
'0b0'
>>> decimal_to_binary_recursive(40)
'0b101000'
>>> decimal_to_binary_recursive(-40)
'-0b101000'
>>> decimal_to_binary_recursive(40.8)
Traceback (most recent call last):
...
ValueError: Input value is not an integer
>>> decimal_to_binary_recursive("forty")
Traceback (most recent call last):
...
ValueError: Input value is not an integer
"""
number = str(number).strip()
if not number:
raise ValueError("No input value was provided")
negative = "-" if number.startswith("-") else ""
number = number.lstrip("-")
if not number.isnumeric():
raise ValueError("Input value is not an integer")
return f"{negative}0b{decimal_to_binary_recursive_helper(int(number))}"
if __name__ == "__main__":
import doctest
doctest.testmod()
print(decimal_to_binary_recursive(input("Input a decimal number: ")))
================================================
FILE: conversions/decimal_to_hexadecimal.py
================================================
"""Convert Base 10 (Decimal) Values to Hexadecimal Representations"""
# set decimal value for each hexadecimal digit
values = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4",
5: "5",
6: "6",
7: "7",
8: "8",
9: "9",
10: "a",
11: "b",
12: "c",
13: "d",
14: "e",
15: "f",
}
def decimal_to_hexadecimal(decimal: float) -> str:
"""
take integer decimal value, return hexadecimal representation as str beginning
with 0x
>>> decimal_to_hexadecimal(5)
'0x5'
>>> decimal_to_hexadecimal(15)
'0xf'
>>> decimal_to_hexadecimal(37)
'0x25'
>>> decimal_to_hexadecimal(255)
'0xff'
>>> decimal_to_hexadecimal(4096)
'0x1000'
>>> decimal_to_hexadecimal(999098)
'0xf3eba'
>>> # negatives work too
>>> decimal_to_hexadecimal(-256)
'-0x100'
>>> # floats are acceptable if equivalent to an int
>>> decimal_to_hexadecimal(17.0)
'0x11'
>>> # other floats will error
>>> decimal_to_hexadecimal(16.16) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError
>>> # strings will error as well
>>> decimal_to_hexadecimal('0xfffff') # doctest: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError
>>> # results are the same when compared to Python's default hex function
>>> decimal_to_hexadecimal(-256) == hex(-256)
True
"""
assert isinstance(decimal, (int, float))
assert decimal == int(decimal)
decimal = int(decimal)
hexadecimal = ""
negative = False
if decimal < 0:
negative = True
decimal *= -1
while decimal > 0:
decimal, remainder = divmod(decimal, 16)
hexadecimal = values[remainder] + hexadecimal
hexadecimal = "0x" + hexadecimal
if negative:
hexadecimal = "-" + hexadecimal
return hexadecimal
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/decimal_to_octal.py
================================================
"""Convert a Decimal Number to an Octal Number."""
import math
# Modified from:
# https://github.com/TheAlgorithms/Javascript/blob/master/Conversions/DecimalToOctal.js
def decimal_to_octal(num: int) -> str:
"""Convert a Decimal Number to an Octal Number.
>>> all(decimal_to_octal(i) == oct(i) for i
... in (0, 2, 8, 64, 65, 216, 255, 256, 512))
True
"""
octal = 0
counter = 0
while num > 0:
remainder = num % 8
octal = octal + (remainder * math.floor(math.pow(10, counter)))
counter += 1
num = math.floor(num / 8) # basically /= 8 without remainder if any
# This formatting removes trailing '.0' from `octal`.
return f"0o{int(octal)}"
def main() -> None:
"""Print octal equivalents of decimal numbers."""
print("\n2 in octal is:")
print(decimal_to_octal(2)) # = 2
print("\n8 in octal is:")
print(decimal_to_octal(8)) # = 10
print("\n65 in octal is:")
print(decimal_to_octal(65)) # = 101
print("\n216 in octal is:")
print(decimal_to_octal(216)) # = 330
print("\n512 in octal is:")
print(decimal_to_octal(512)) # = 1000
print("\n")
if __name__ == "__main__":
main()
================================================
FILE: conversions/energy_conversions.py
================================================
"""
Conversion of energy units.
Available units: joule, kilojoule, megajoule, gigajoule,\
wattsecond, watthour, kilowatthour, newtonmeter, calorie_nutr,\
kilocalorie_nutr, electronvolt, britishthermalunit_it, footpound
USAGE :
-> Import this file into their respective project.
-> Use the function energy_conversion() for conversion of energy units.
-> Parameters :
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
-> value : the value which you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Units_of_energy
-> Wikipedia reference: https://en.wikipedia.org/wiki/Joule
-> Wikipedia reference: https://en.wikipedia.org/wiki/Kilowatt-hour
-> Wikipedia reference: https://en.wikipedia.org/wiki/Newton-metre
-> Wikipedia reference: https://en.wikipedia.org/wiki/Calorie
-> Wikipedia reference: https://en.wikipedia.org/wiki/Electronvolt
-> Wikipedia reference: https://en.wikipedia.org/wiki/British_thermal_unit
-> Wikipedia reference: https://en.wikipedia.org/wiki/Foot-pound_(energy)
-> Unit converter reference: https://www.unitconverters.net/energy-converter.html
"""
ENERGY_CONVERSION: dict[str, float] = {
"joule": 1.0,
"kilojoule": 1_000,
"megajoule": 1_000_000,
"gigajoule": 1_000_000_000,
"wattsecond": 1.0,
"watthour": 3_600,
"kilowatthour": 3_600_000,
"newtonmeter": 1.0,
"calorie_nutr": 4_186.8,
"kilocalorie_nutr": 4_186_800.00,
"electronvolt": 1.602_176_634e-19,
"britishthermalunit_it": 1_055.055_85,
"footpound": 1.355_818,
}
def energy_conversion(from_type: str, to_type: str, value: float) -> float:
"""
Conversion of energy units.
>>> energy_conversion("joule", "joule", 1)
1.0
>>> energy_conversion("joule", "kilojoule", 1)
0.001
>>> energy_conversion("joule", "megajoule", 1)
1e-06
>>> energy_conversion("joule", "gigajoule", 1)
1e-09
>>> energy_conversion("joule", "wattsecond", 1)
1.0
>>> energy_conversion("joule", "watthour", 1)
0.0002777777777777778
>>> energy_conversion("joule", "kilowatthour", 1)
2.7777777777777776e-07
>>> energy_conversion("joule", "newtonmeter", 1)
1.0
>>> energy_conversion("joule", "calorie_nutr", 1)
0.00023884589662749592
>>> energy_conversion("joule", "kilocalorie_nutr", 1)
2.388458966274959e-07
>>> energy_conversion("joule", "electronvolt", 1)
6.241509074460763e+18
>>> energy_conversion("joule", "britishthermalunit_it", 1)
0.0009478171226670134
>>> energy_conversion("joule", "footpound", 1)
0.7375621211696556
>>> energy_conversion("joule", "megajoule", 1000)
0.001
>>> energy_conversion("calorie_nutr", "kilocalorie_nutr", 1000)
1.0
>>> energy_conversion("kilowatthour", "joule", 10)
36000000.0
>>> energy_conversion("britishthermalunit_it", "footpound", 1)
778.1692306784539
>>> energy_conversion("watthour", "joule", "a") # doctest: +ELLIPSIS
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for /: 'str' and 'float'
>>> energy_conversion("wrongunit", "joule", 1) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Incorrect 'from_type' or 'to_type' value: 'wrongunit', 'joule'
Valid values are: joule, ... footpound
>>> energy_conversion("joule", "wrongunit", 1) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Incorrect 'from_type' or 'to_type' value: 'joule', 'wrongunit'
Valid values are: joule, ... footpound
>>> energy_conversion("123", "abc", 1) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Incorrect 'from_type' or 'to_type' value: '123', 'abc'
Valid values are: joule, ... footpound
"""
if to_type not in ENERGY_CONVERSION or from_type not in ENERGY_CONVERSION:
msg = (
f"Incorrect 'from_type' or 'to_type' value: {from_type!r}, {to_type!r}\n"
f"Valid values are: {', '.join(ENERGY_CONVERSION)}"
)
raise ValueError(msg)
return value * ENERGY_CONVERSION[from_type] / ENERGY_CONVERSION[to_type]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/excel_title_to_column.py
================================================
def excel_title_to_column(column_title: str) -> int:
"""
Given a string column_title that represents
the column title in an Excel sheet, return
its corresponding column number.
>>> excel_title_to_column("A")
1
>>> excel_title_to_column("B")
2
>>> excel_title_to_column("AB")
28
>>> excel_title_to_column("Z")
26
"""
assert column_title.isupper()
answer = 0
index = len(column_title) - 1
power = 0
while index >= 0:
value = (ord(column_title[index]) - 64) * pow(26, power)
answer += value
power += 1
index -= 1
return answer
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/hex_to_bin.py
================================================
def hex_to_bin(hex_num: str) -> int:
"""
Convert a hexadecimal value to its binary equivalent
#https://stackoverflow.com/questions/1425493/convert-hex-to-binary
Here, we have used the bitwise right shift operator: >>
Shifts the bits of the number to the right and fills 0 on voids left as a result.
Similar effect as of dividing the number with some power of two.
Example:
a = 10
a >> 1 = 5
>>> hex_to_bin("AC")
10101100
>>> hex_to_bin("9A4")
100110100100
>>> hex_to_bin(" 12f ")
100101111
>>> hex_to_bin("FfFf")
1111111111111111
>>> hex_to_bin("-fFfF")
-1111111111111111
>>> hex_to_bin("F-f")
Traceback (most recent call last):
...
ValueError: Invalid value was passed to the function
>>> hex_to_bin("")
Traceback (most recent call last):
...
ValueError: No value was passed to the function
"""
hex_num = hex_num.strip()
if not hex_num:
raise ValueError("No value was passed to the function")
is_negative = hex_num[0] == "-"
if is_negative:
hex_num = hex_num[1:]
try:
int_num = int(hex_num, 16)
except ValueError:
raise ValueError("Invalid value was passed to the function")
bin_str = ""
while int_num > 0:
bin_str = str(int_num % 2) + bin_str
int_num >>= 1
return int(("-" + bin_str) if is_negative else bin_str)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/hexadecimal_to_decimal.py
================================================
hex_table = {hex(i)[2:]: i for i in range(16)} # Use [:2] to strip off the leading '0x'
def hex_to_decimal(hex_string: str) -> int:
"""
Convert a hexadecimal value to its decimal equivalent
#https://www.programiz.com/python-programming/methods/built-in/hex
>>> hex_to_decimal("a")
10
>>> hex_to_decimal("12f")
303
>>> hex_to_decimal(" 12f ")
303
>>> hex_to_decimal("FfFf")
65535
>>> hex_to_decimal("-Ff")
-255
>>> hex_to_decimal("F-f")
Traceback (most recent call last):
...
ValueError: Non-hexadecimal value was passed to the function
>>> hex_to_decimal("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
>>> hex_to_decimal("12m")
Traceback (most recent call last):
...
ValueError: Non-hexadecimal value was passed to the function
"""
hex_string = hex_string.strip().lower()
if not hex_string:
raise ValueError("Empty string was passed to the function")
is_negative = hex_string[0] == "-"
if is_negative:
hex_string = hex_string[1:]
if not all(char in hex_table for char in hex_string):
raise ValueError("Non-hexadecimal value was passed to the function")
decimal_number = 0
for char in hex_string:
decimal_number = 16 * decimal_number + hex_table[char]
return -decimal_number if is_negative else decimal_number
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/ipv4_conversion.py
================================================
# https://www.geeksforgeeks.org/convert-ip-address-to-integer-and-vice-versa/
def ipv4_to_decimal(ipv4_address: str) -> int:
"""
Convert an IPv4 address to its decimal representation.
Args:
ip_address: A string representing an IPv4 address (e.g., "192.168.0.1").
Returns:
int: The decimal representation of the IP address.
>>> ipv4_to_decimal("192.168.0.1")
3232235521
>>> ipv4_to_decimal("10.0.0.255")
167772415
>>> ipv4_to_decimal("10.0.255")
Traceback (most recent call last):
...
ValueError: Invalid IPv4 address format
>>> ipv4_to_decimal("10.0.0.256")
Traceback (most recent call last):
...
ValueError: Invalid IPv4 octet 256
"""
octets = [int(octet) for octet in ipv4_address.split(".")]
if len(octets) != 4:
raise ValueError("Invalid IPv4 address format")
decimal_ipv4 = 0
for octet in octets:
if not 0 <= octet <= 255:
raise ValueError(f"Invalid IPv4 octet {octet}") # noqa: EM102
decimal_ipv4 = (decimal_ipv4 << 8) + int(octet)
return decimal_ipv4
def alt_ipv4_to_decimal(ipv4_address: str) -> int:
"""
>>> alt_ipv4_to_decimal("192.168.0.1")
3232235521
>>> alt_ipv4_to_decimal("10.0.0.255")
167772415
"""
return int("0x" + "".join(f"{int(i):02x}" for i in ipv4_address.split(".")), 16)
def decimal_to_ipv4(decimal_ipv4: int) -> str:
"""
Convert a decimal representation of an IP address to its IPv4 format.
Args:
decimal_ipv4: An integer representing the decimal IP address.
Returns:
The IPv4 representation of the decimal IP address.
>>> decimal_to_ipv4(3232235521)
'192.168.0.1'
>>> decimal_to_ipv4(167772415)
'10.0.0.255'
>>> decimal_to_ipv4(-1)
Traceback (most recent call last):
...
ValueError: Invalid decimal IPv4 address
"""
if not (0 <= decimal_ipv4 <= 4294967295):
raise ValueError("Invalid decimal IPv4 address")
ip_parts = []
for _ in range(4):
ip_parts.append(str(decimal_ipv4 & 255))
decimal_ipv4 >>= 8
return ".".join(reversed(ip_parts))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/length_conversion.py
================================================
"""
Conversion of length units.
Available Units:- Metre,Kilometre,Feet,Inch,Centimeter,Yard,Foot,Mile,Millimeter
USAGE :
-> Import this file into their respective project.
-> Use the function length_conversion() for conversion of length units.
-> Parameters :
-> value : The number of from units you want to convert
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Meter
-> Wikipedia reference: https://en.wikipedia.org/wiki/Kilometer
-> Wikipedia reference: https://en.wikipedia.org/wiki/Feet
-> Wikipedia reference: https://en.wikipedia.org/wiki/Inch
-> Wikipedia reference: https://en.wikipedia.org/wiki/Centimeter
-> Wikipedia reference: https://en.wikipedia.org/wiki/Yard
-> Wikipedia reference: https://en.wikipedia.org/wiki/Foot
-> Wikipedia reference: https://en.wikipedia.org/wiki/Mile
-> Wikipedia reference: https://en.wikipedia.org/wiki/Millimeter
"""
from typing import NamedTuple
class FromTo(NamedTuple):
from_factor: float
to_factor: float
TYPE_CONVERSION = {
"millimeter": "mm",
"centimeter": "cm",
"meter": "m",
"kilometer": "km",
"inch": "in",
"inche": "in", # Trailing 's' has been stripped off
"feet": "ft",
"foot": "ft",
"yard": "yd",
"mile": "mi",
}
METRIC_CONVERSION = {
"mm": FromTo(0.001, 1000),
"cm": FromTo(0.01, 100),
"m": FromTo(1, 1),
"km": FromTo(1000, 0.001),
"in": FromTo(0.0254, 39.3701),
"ft": FromTo(0.3048, 3.28084),
"yd": FromTo(0.9144, 1.09361),
"mi": FromTo(1609.34, 0.000621371),
}
def length_conversion(value: float, from_type: str, to_type: str) -> float:
"""
Conversion between length units.
>>> length_conversion(4, "METER", "FEET")
13.12336
>>> length_conversion(4, "M", "FT")
13.12336
>>> length_conversion(1, "meter", "kilometer")
0.001
>>> length_conversion(1, "kilometer", "inch")
39370.1
>>> length_conversion(3, "kilometer", "mile")
1.8641130000000001
>>> length_conversion(2, "feet", "meter")
0.6096
>>> length_conversion(4, "feet", "yard")
1.333329312
>>> length_conversion(1, "inch", "meter")
0.0254
>>> length_conversion(2, "inch", "mile")
3.15656468e-05
>>> length_conversion(2, "centimeter", "millimeter")
20.0
>>> length_conversion(2, "centimeter", "yard")
0.0218722
>>> length_conversion(4, "yard", "meter")
3.6576
>>> length_conversion(4, "yard", "kilometer")
0.0036576
>>> length_conversion(3, "foot", "meter")
0.9144000000000001
>>> length_conversion(3, "foot", "inch")
36.00001944
>>> length_conversion(4, "mile", "kilometer")
6.43736
>>> length_conversion(2, "miles", "InChEs")
126719.753468
>>> length_conversion(3, "millimeter", "centimeter")
0.3
>>> length_conversion(3, "mm", "in")
0.1181103
>>> length_conversion(4, "wrongUnit", "inch")
Traceback (most recent call last):
...
ValueError: Invalid 'from_type' value: 'wrongUnit'.
Conversion abbreviations are: mm, cm, m, km, in, ft, yd, mi
"""
new_from = from_type.lower().rstrip("s")
new_from = TYPE_CONVERSION.get(new_from, new_from)
new_to = to_type.lower().rstrip("s")
new_to = TYPE_CONVERSION.get(new_to, new_to)
if new_from not in METRIC_CONVERSION:
msg = (
f"Invalid 'from_type' value: {from_type!r}.\n"
f"Conversion abbreviations are: {', '.join(METRIC_CONVERSION)}"
)
raise ValueError(msg)
if new_to not in METRIC_CONVERSION:
msg = (
f"Invalid 'to_type' value: {to_type!r}.\n"
f"Conversion abbreviations are: {', '.join(METRIC_CONVERSION)}"
)
raise ValueError(msg)
return (
value
* METRIC_CONVERSION[new_from].from_factor
* METRIC_CONVERSION[new_to].to_factor
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/molecular_chemistry.py
================================================
"""
Functions useful for doing molecular chemistry:
* molarity_to_normality
* moles_to_pressure
* moles_to_volume
* pressure_and_volume_to_temperature
"""
def molarity_to_normality(nfactor: int, moles: float, volume: float) -> float:
"""
Convert molarity to normality.
Volume is taken in litres.
Wikipedia reference: https://en.wikipedia.org/wiki/Equivalent_concentration
Wikipedia reference: https://en.wikipedia.org/wiki/Molar_concentration
>>> molarity_to_normality(2, 3.1, 0.31)
20
>>> molarity_to_normality(4, 11.4, 5.7)
8
"""
return round(float(moles / volume) * nfactor)
def moles_to_pressure(volume: float, moles: float, temperature: float) -> float:
"""
Convert moles to pressure.
Ideal gas laws are used.
Temperature is taken in kelvin.
Volume is taken in litres.
Pressure has atm as SI unit.
Wikipedia reference: https://en.wikipedia.org/wiki/Gas_laws
Wikipedia reference: https://en.wikipedia.org/wiki/Pressure
Wikipedia reference: https://en.wikipedia.org/wiki/Temperature
>>> moles_to_pressure(0.82, 3, 300)
90
>>> moles_to_pressure(8.2, 5, 200)
10
"""
return round(float((moles * 0.0821 * temperature) / (volume)))
def moles_to_volume(pressure: float, moles: float, temperature: float) -> float:
"""
Convert moles to volume.
Ideal gas laws are used.
Temperature is taken in kelvin.
Volume is taken in litres.
Pressure has atm as SI unit.
Wikipedia reference: https://en.wikipedia.org/wiki/Gas_laws
Wikipedia reference: https://en.wikipedia.org/wiki/Pressure
Wikipedia reference: https://en.wikipedia.org/wiki/Temperature
>>> moles_to_volume(0.82, 3, 300)
90
>>> moles_to_volume(8.2, 5, 200)
10
"""
return round(float((moles * 0.0821 * temperature) / (pressure)))
def pressure_and_volume_to_temperature(
pressure: float, moles: float, volume: float
) -> float:
"""
Convert pressure and volume to temperature.
Ideal gas laws are used.
Temperature is taken in kelvin.
Volume is taken in litres.
Pressure has atm as SI unit.
Wikipedia reference: https://en.wikipedia.org/wiki/Gas_laws
Wikipedia reference: https://en.wikipedia.org/wiki/Pressure
Wikipedia reference: https://en.wikipedia.org/wiki/Temperature
>>> pressure_and_volume_to_temperature(0.82, 1, 2)
20
>>> pressure_and_volume_to_temperature(8.2, 5, 3)
60
"""
return round(float((pressure * volume) / (0.0821 * moles)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/octal_to_binary.py
================================================
"""
* Author: Bama Charan Chhandogi (https://github.com/BamaCharanChhandogi)
* Description: Convert a Octal number to Binary.
References for better understanding:
https://en.wikipedia.org/wiki/Binary_number
https://en.wikipedia.org/wiki/Octal
"""
def octal_to_binary(octal_number: str) -> str:
"""
Convert an Octal number to Binary.
>>> octal_to_binary("17")
'001111'
>>> octal_to_binary("7")
'111'
>>> octal_to_binary("Av")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> octal_to_binary("@#")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> octal_to_binary("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
"""
if not octal_number:
raise ValueError("Empty string was passed to the function")
binary_number = ""
octal_digits = "01234567"
for digit in octal_number:
if digit not in octal_digits:
raise ValueError("Non-octal value was passed to the function")
binary_digit = ""
value = int(digit)
for _ in range(3):
binary_digit = str(value % 2) + binary_digit
value //= 2
binary_number += binary_digit
return binary_number
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/octal_to_decimal.py
================================================
def oct_to_decimal(oct_string: str) -> int:
"""
Convert a octal value to its decimal equivalent
>>> oct_to_decimal("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
>>> oct_to_decimal("-")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("e")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("8")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("-e")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("-8")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("1")
1
>>> oct_to_decimal("-1")
-1
>>> oct_to_decimal("12")
10
>>> oct_to_decimal(" 12 ")
10
>>> oct_to_decimal("-45")
-37
>>> oct_to_decimal("-")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("0")
0
>>> oct_to_decimal("-4055")
-2093
>>> oct_to_decimal("2-0Fm")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
>>> oct_to_decimal("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
>>> oct_to_decimal("19")
Traceback (most recent call last):
...
ValueError: Non-octal value was passed to the function
"""
oct_string = str(oct_string).strip()
if not oct_string:
raise ValueError("Empty string was passed to the function")
is_negative = oct_string[0] == "-"
if is_negative:
oct_string = oct_string[1:]
if not oct_string.isdigit() or not all(0 <= int(char) <= 7 for char in oct_string):
raise ValueError("Non-octal value was passed to the function")
decimal_number = 0
for char in oct_string:
decimal_number = 8 * decimal_number + int(char)
if is_negative:
decimal_number = -decimal_number
return decimal_number
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/octal_to_hexadecimal.py
================================================
def octal_to_hex(octal: str) -> str:
"""
Convert an Octal number to Hexadecimal number.
For more information: https://en.wikipedia.org/wiki/Octal
>>> octal_to_hex("100")
'0x40'
>>> octal_to_hex("235")
'0x9D'
>>> octal_to_hex(17)
Traceback (most recent call last):
...
TypeError: Expected a string as input
>>> octal_to_hex("Av")
Traceback (most recent call last):
...
ValueError: Not a Valid Octal Number
>>> octal_to_hex("")
Traceback (most recent call last):
...
ValueError: Empty string was passed to the function
"""
if not isinstance(octal, str):
raise TypeError("Expected a string as input")
if octal.startswith("0o"):
octal = octal[2:]
if octal == "":
raise ValueError("Empty string was passed to the function")
if any(char not in "01234567" for char in octal):
raise ValueError("Not a Valid Octal Number")
decimal = 0
for char in octal:
decimal <<= 3
decimal |= int(char)
hex_char = "0123456789ABCDEF"
revhex = ""
while decimal:
revhex += hex_char[decimal & 15]
decimal >>= 4
return "0x" + revhex[::-1]
if __name__ == "__main__":
import doctest
doctest.testmod()
nums = ["030", "100", "247", "235", "007"]
## Main Tests
for num in nums:
hexadecimal = octal_to_hex(num)
expected = "0x" + hex(int(num, 8))[2:].upper()
assert hexadecimal == expected
print(f"Hex of '0o{num}' is: {hexadecimal}")
print(f"Expected was: {expected}")
print("---")
================================================
FILE: conversions/prefix_conversions.py
================================================
"""
Convert International System of Units (SI) and Binary prefixes
"""
from __future__ import annotations
from enum import Enum
class SIUnit(Enum):
yotta = 24
zetta = 21
exa = 18
peta = 15
tera = 12
giga = 9
mega = 6
kilo = 3
hecto = 2
deca = 1
deci = -1
centi = -2
milli = -3
micro = -6
nano = -9
pico = -12
femto = -15
atto = -18
zepto = -21
yocto = -24
class BinaryUnit(Enum):
yotta = 8
zetta = 7
exa = 6
peta = 5
tera = 4
giga = 3
mega = 2
kilo = 1
def convert_si_prefix(
known_amount: float,
known_prefix: str | SIUnit,
unknown_prefix: str | SIUnit,
) -> float:
"""
Wikipedia reference: https://en.wikipedia.org/wiki/Binary_prefix
Wikipedia reference: https://en.wikipedia.org/wiki/International_System_of_Units
>>> convert_si_prefix(1, SIUnit.giga, SIUnit.mega)
1000
>>> convert_si_prefix(1, SIUnit.mega, SIUnit.giga)
0.001
>>> convert_si_prefix(1, SIUnit.kilo, SIUnit.kilo)
1
>>> convert_si_prefix(1, 'giga', 'mega')
1000
>>> convert_si_prefix(1, 'gIGa', 'mEGa')
1000
"""
if isinstance(known_prefix, str):
known_prefix = SIUnit[known_prefix.lower()]
if isinstance(unknown_prefix, str):
unknown_prefix = SIUnit[unknown_prefix.lower()]
unknown_amount: float = known_amount * (
10 ** (known_prefix.value - unknown_prefix.value)
)
return unknown_amount
def convert_binary_prefix(
known_amount: float,
known_prefix: str | BinaryUnit,
unknown_prefix: str | BinaryUnit,
) -> float:
"""
Wikipedia reference: https://en.wikipedia.org/wiki/Metric_prefix
>>> convert_binary_prefix(1, BinaryUnit.giga, BinaryUnit.mega)
1024
>>> convert_binary_prefix(1, BinaryUnit.mega, BinaryUnit.giga)
0.0009765625
>>> convert_binary_prefix(1, BinaryUnit.kilo, BinaryUnit.kilo)
1
>>> convert_binary_prefix(1, 'giga', 'mega')
1024
>>> convert_binary_prefix(1, 'gIGa', 'mEGa')
1024
"""
if isinstance(known_prefix, str):
known_prefix = BinaryUnit[known_prefix.lower()]
if isinstance(unknown_prefix, str):
unknown_prefix = BinaryUnit[unknown_prefix.lower()]
unknown_amount: float = known_amount * (
2 ** ((known_prefix.value - unknown_prefix.value) * 10)
)
return unknown_amount
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/prefix_conversions_string.py
================================================
"""
* Author: Manuel Di Lullo (https://github.com/manueldilullo)
* Description: Convert a number to use the correct SI or Binary unit prefix.
Inspired by prefix_conversion.py file in this repository by lance-pyles
URL: https://en.wikipedia.org/wiki/Metric_prefix#List_of_SI_prefixes
URL: https://en.wikipedia.org/wiki/Binary_prefix
"""
from __future__ import annotations
from enum import Enum, unique
from typing import TypeVar
# Create a generic variable that can be 'Enum', or any subclass.
T = TypeVar("T", bound="Enum")
@unique
class BinaryUnit(Enum):
yotta = 80
zetta = 70
exa = 60
peta = 50
tera = 40
giga = 30
mega = 20
kilo = 10
@unique
class SIUnit(Enum):
yotta = 24
zetta = 21
exa = 18
peta = 15
tera = 12
giga = 9
mega = 6
kilo = 3
hecto = 2
deca = 1
deci = -1
centi = -2
milli = -3
micro = -6
nano = -9
pico = -12
femto = -15
atto = -18
zepto = -21
yocto = -24
@classmethod
def get_positive(cls) -> dict:
"""
Returns a dictionary with only the elements of this enum
that has a positive value
>>> from itertools import islice
>>> positive = SIUnit.get_positive()
>>> inc = iter(positive.items())
>>> dict(islice(inc, len(positive) // 2))
{'yotta': 24, 'zetta': 21, 'exa': 18, 'peta': 15, 'tera': 12}
>>> dict(inc)
{'giga': 9, 'mega': 6, 'kilo': 3, 'hecto': 2, 'deca': 1}
"""
return {unit.name: unit.value for unit in cls if unit.value > 0}
@classmethod
def get_negative(cls) -> dict:
"""
Returns a dictionary with only the elements of this enum
that has a negative value
@example
>>> from itertools import islice
>>> negative = SIUnit.get_negative()
>>> inc = iter(negative.items())
>>> dict(islice(inc, len(negative) // 2))
{'deci': -1, 'centi': -2, 'milli': -3, 'micro': -6, 'nano': -9}
>>> dict(inc)
{'pico': -12, 'femto': -15, 'atto': -18, 'zepto': -21, 'yocto': -24}
"""
return {unit.name: unit.value for unit in cls if unit.value < 0}
def add_si_prefix(value: float) -> str:
"""
Function that converts a number to his version with SI prefix
@input value (an integer)
@example:
>>> add_si_prefix(10000)
'10.0 kilo'
"""
prefixes = SIUnit.get_positive() if value > 0 else SIUnit.get_negative()
for name_prefix, value_prefix in prefixes.items():
numerical_part = value / (10**value_prefix)
if numerical_part > 1:
return f"{numerical_part!s} {name_prefix}"
return str(value)
def add_binary_prefix(value: float) -> str:
"""
Function that converts a number to his version with Binary prefix
@input value (an integer)
@example:
>>> add_binary_prefix(65536)
'64.0 kilo'
"""
for prefix in BinaryUnit:
numerical_part = value / (2**prefix.value)
if numerical_part > 1:
return f"{numerical_part!s} {prefix.name}"
return str(value)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/pressure_conversions.py
================================================
"""
Conversion of pressure units.
Available Units:- Pascal,Bar,Kilopascal,Megapascal,psi(pound per square inch),
inHg(in mercury column),torr,atm
USAGE :
-> Import this file into their respective project.
-> Use the function pressure_conversion() for conversion of pressure units.
-> Parameters :
-> value : The number of from units you want to convert
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Pascal_(unit)
-> Wikipedia reference: https://en.wikipedia.org/wiki/Pound_per_square_inch
-> Wikipedia reference: https://en.wikipedia.org/wiki/Inch_of_mercury
-> Wikipedia reference: https://en.wikipedia.org/wiki/Torr
-> https://en.wikipedia.org/wiki/Standard_atmosphere_(unit)
-> https://msestudent.com/what-are-the-units-of-pressure/
-> https://www.unitconverters.net/pressure-converter.html
"""
from typing import NamedTuple
class FromTo(NamedTuple):
from_factor: float
to_factor: float
PRESSURE_CONVERSION = {
"atm": FromTo(1, 1),
"pascal": FromTo(0.0000098, 101325),
"bar": FromTo(0.986923, 1.01325),
"kilopascal": FromTo(0.00986923, 101.325),
"megapascal": FromTo(9.86923, 0.101325),
"psi": FromTo(0.068046, 14.6959),
"inHg": FromTo(0.0334211, 29.9213),
"torr": FromTo(0.00131579, 760),
}
def pressure_conversion(value: float, from_type: str, to_type: str) -> float:
"""
Conversion between pressure units.
>>> pressure_conversion(4, "atm", "pascal")
405300
>>> pressure_conversion(1, "pascal", "psi")
0.00014401981999999998
>>> pressure_conversion(1, "bar", "atm")
0.986923
>>> pressure_conversion(3, "kilopascal", "bar")
0.029999991892499998
>>> pressure_conversion(2, "megapascal", "psi")
290.074434314
>>> pressure_conversion(4, "psi", "torr")
206.85984
>>> pressure_conversion(1, "inHg", "atm")
0.0334211
>>> pressure_conversion(1, "torr", "psi")
0.019336718261000002
>>> pressure_conversion(4, "wrongUnit", "atm")
Traceback (most recent call last):
...
ValueError: Invalid 'from_type' value: 'wrongUnit' Supported values are:
atm, pascal, bar, kilopascal, megapascal, psi, inHg, torr
"""
if from_type not in PRESSURE_CONVERSION:
raise ValueError(
f"Invalid 'from_type' value: {from_type!r} Supported values are:\n"
+ ", ".join(PRESSURE_CONVERSION)
)
if to_type not in PRESSURE_CONVERSION:
raise ValueError(
f"Invalid 'to_type' value: {to_type!r}. Supported values are:\n"
+ ", ".join(PRESSURE_CONVERSION)
)
return (
value
* PRESSURE_CONVERSION[from_type].from_factor
* PRESSURE_CONVERSION[to_type].to_factor
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/rectangular_to_polar.py
================================================
import math
def rectangular_to_polar(real: float, img: float) -> tuple[float, float]:
"""
https://en.wikipedia.org/wiki/Polar_coordinate_system
>>> rectangular_to_polar(5,-5)
(7.07, -45.0)
>>> rectangular_to_polar(-1,1)
(1.41, 135.0)
>>> rectangular_to_polar(-1,-1)
(1.41, -135.0)
>>> rectangular_to_polar(1e-10,1e-10)
(0.0, 45.0)
>>> rectangular_to_polar(-1e-10,1e-10)
(0.0, 135.0)
>>> rectangular_to_polar(9.75,5.93)
(11.41, 31.31)
>>> rectangular_to_polar(10000,99999)
(100497.76, 84.29)
"""
mod = round(math.sqrt((real**2) + (img**2)), 2)
ang = round(math.degrees(math.atan2(img, real)), 2)
return (mod, ang)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/rgb_cmyk_conversion.py
================================================
def rgb_to_cmyk(r_input: int, g_input: int, b_input: int) -> tuple[int, int, int, int]:
"""
Simple RGB to CMYK conversion. Returns percentages of CMYK paint.
https://www.programmingalgorithms.com/algorithm/rgb-to-cmyk/
Note: this is a very popular algorithm that converts colors linearly and gives
only approximate results. Actual preparation for printing requires advanced color
conversion considering the color profiles and parameters of the target device.
>>> rgb_to_cmyk(255, 200, "a")
Traceback (most recent call last):
...
ValueError: Expected int, found (, , )
>>> rgb_to_cmyk(255, 255, 999)
Traceback (most recent call last):
...
ValueError: Expected int of the range 0..255
>>> rgb_to_cmyk(255, 255, 255) # white
(0, 0, 0, 0)
>>> rgb_to_cmyk(128, 128, 128) # gray
(0, 0, 0, 50)
>>> rgb_to_cmyk(0, 0, 0) # black
(0, 0, 0, 100)
>>> rgb_to_cmyk(255, 0, 0) # red
(0, 100, 100, 0)
>>> rgb_to_cmyk(0, 255, 0) # green
(100, 0, 100, 0)
>>> rgb_to_cmyk(0, 0, 255) # blue
(100, 100, 0, 0)
"""
if (
not isinstance(r_input, int)
or not isinstance(g_input, int)
or not isinstance(b_input, int)
):
msg = f"Expected int, found {type(r_input), type(g_input), type(b_input)}"
raise ValueError(msg)
if not 0 <= r_input < 256 or not 0 <= g_input < 256 or not 0 <= b_input < 256:
raise ValueError("Expected int of the range 0..255")
# changing range from 0..255 to 0..1
r = r_input / 255
g = g_input / 255
b = b_input / 255
k = 1 - max(r, g, b)
if k == 1: # pure black
return 0, 0, 0, 100
c = round(100 * (1 - r - k) / (1 - k))
m = round(100 * (1 - g - k) / (1 - k))
y = round(100 * (1 - b - k) / (1 - k))
k = round(100 * k)
return c, m, y, k
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: conversions/rgb_hsv_conversion.py
================================================
"""
The RGB color model is an additive color model in which red, green, and blue light
are added together in various ways to reproduce a broad array of colors. The name
of the model comes from the initials of the three additive primary colors, red,
green, and blue. Meanwhile, the HSV representation models how colors appear under
light. In it, colors are represented using three components: hue, saturation and
(brightness-)value. This file provides functions for converting colors from one
representation to the other.
(description adapted from https://en.wikipedia.org/wiki/RGB_color_model and
https://en.wikipedia.org/wiki/HSL_and_HSV).
"""
def hsv_to_rgb(hue: float, saturation: float, value: float) -> list[int]:
"""
Conversion from the HSV-representation to the RGB-representation.
Expected RGB-values taken from
https://www.rapidtables.com/convert/color/hsv-to-rgb.html
>>> hsv_to_rgb(0, 0, 0)
[0, 0, 0]
>>> hsv_to_rgb(0, 0, 1)
[255, 255, 255]
>>> hsv_to_rgb(0, 1, 1)
[255, 0, 0]
>>> hsv_to_rgb(60, 1, 1)
[255, 255, 0]
>>> hsv_to_rgb(120, 1, 1)
[0, 255, 0]
>>> hsv_to_rgb(240, 1, 1)
[0, 0, 255]
>>> hsv_to_rgb(300, 1, 1)
[255, 0, 255]
>>> hsv_to_rgb(180, 0.5, 0.5)
[64, 128, 128]
>>> hsv_to_rgb(234, 0.14, 0.88)
[193, 196, 224]
>>> hsv_to_rgb(330, 0.75, 0.5)
[128, 32, 80]
"""
if hue < 0 or hue > 360:
raise Exception("hue should be between 0 and 360")
if saturation < 0 or saturation > 1:
raise Exception("saturation should be between 0 and 1")
if value < 0 or value > 1:
raise Exception("value should be between 0 and 1")
chroma = value * saturation
hue_section = hue / 60
second_largest_component = chroma * (1 - abs(hue_section % 2 - 1))
match_value = value - chroma
if hue_section >= 0 and hue_section <= 1:
red = round(255 * (chroma + match_value))
green = round(255 * (second_largest_component + match_value))
blue = round(255 * (match_value))
elif hue_section > 1 and hue_section <= 2:
red = round(255 * (second_largest_component + match_value))
green = round(255 * (chroma + match_value))
blue = round(255 * (match_value))
elif hue_section > 2 and hue_section <= 3:
red = round(255 * (match_value))
green = round(255 * (chroma + match_value))
blue = round(255 * (second_largest_component + match_value))
elif hue_section > 3 and hue_section <= 4:
red = round(255 * (match_value))
green = round(255 * (second_largest_component + match_value))
blue = round(255 * (chroma + match_value))
elif hue_section > 4 and hue_section <= 5:
red = round(255 * (second_largest_component + match_value))
green = round(255 * (match_value))
blue = round(255 * (chroma + match_value))
else:
red = round(255 * (chroma + match_value))
green = round(255 * (match_value))
blue = round(255 * (second_largest_component + match_value))
return [red, green, blue]
def rgb_to_hsv(red: int, green: int, blue: int) -> list[float]:
"""
Conversion from the RGB-representation to the HSV-representation.
The tested values are the reverse values from the hsv_to_rgb-doctests.
Function "approximately_equal_hsv" is needed because of small deviations due to
rounding for the RGB-values.
>>> approximately_equal_hsv(rgb_to_hsv(0, 0, 0), [0, 0, 0])
True
>>> approximately_equal_hsv(rgb_to_hsv(255, 255, 255), [0, 0, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(255, 0, 0), [0, 1, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(255, 255, 0), [60, 1, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(0, 255, 0), [120, 1, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(0, 0, 255), [240, 1, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(255, 0, 255), [300, 1, 1])
True
>>> approximately_equal_hsv(rgb_to_hsv(64, 128, 128), [180, 0.5, 0.5])
True
>>> approximately_equal_hsv(rgb_to_hsv(193, 196, 224), [234, 0.14, 0.88])
True
>>> approximately_equal_hsv(rgb_to_hsv(128, 32, 80), [330, 0.75, 0.5])
True
"""
if red < 0 or red > 255:
raise Exception("red should be between 0 and 255")
if green < 0 or green > 255:
raise Exception("green should be between 0 and 255")
if blue < 0 or blue > 255:
raise Exception("blue should be between 0 and 255")
float_red = red / 255
float_green = green / 255
float_blue = blue / 255
value = max(float_red, float_green, float_blue)
chroma = value - min(float_red, float_green, float_blue)
saturation = 0 if value == 0 else chroma / value
if chroma == 0:
hue = 0.0
elif value == float_red:
hue = 60 * (0 + (float_green - float_blue) / chroma)
elif value == float_green:
hue = 60 * (2 + (float_blue - float_red) / chroma)
else:
hue = 60 * (4 + (float_red - float_green) / chroma)
hue = (hue + 360) % 360
return [hue, saturation, value]
def approximately_equal_hsv(hsv_1: list[float], hsv_2: list[float]) -> bool:
"""
Utility-function to check that two hsv-colors are approximately equal
>>> approximately_equal_hsv([0, 0, 0], [0, 0, 0])
True
>>> approximately_equal_hsv([180, 0.5, 0.3], [179.9999, 0.500001, 0.30001])
True
>>> approximately_equal_hsv([0, 0, 0], [1, 0, 0])
False
>>> approximately_equal_hsv([180, 0.5, 0.3], [179.9999, 0.6, 0.30001])
False
"""
check_hue = abs(hsv_1[0] - hsv_2[0]) < 0.2
check_saturation = abs(hsv_1[1] - hsv_2[1]) < 0.002
check_value = abs(hsv_1[2] - hsv_2[2]) < 0.002
return check_hue and check_saturation and check_value
================================================
FILE: conversions/roman_numerals.py
================================================
ROMAN = [
(1000, "M"),
(900, "CM"),
(500, "D"),
(400, "CD"),
(100, "C"),
(90, "XC"),
(50, "L"),
(40, "XL"),
(10, "X"),
(9, "IX"),
(5, "V"),
(4, "IV"),
(1, "I"),
]
def roman_to_int(roman: str) -> int:
"""
LeetCode No. 13 Roman to Integer
Given a roman numeral, convert it to an integer.
Input is guaranteed to be within the range from 1 to 3999.
https://en.wikipedia.org/wiki/Roman_numerals
>>> tests = {"III": 3, "CLIV": 154, "MIX": 1009, "MMD": 2500, "MMMCMXCIX": 3999}
>>> all(roman_to_int(key) == value for key, value in tests.items())
True
"""
vals = {"I": 1, "V": 5, "X": 10, "L": 50, "C": 100, "D": 500, "M": 1000}
total = 0
place = 0
while place < len(roman):
if (place + 1 < len(roman)) and (vals[roman[place]] < vals[roman[place + 1]]):
total += vals[roman[place + 1]] - vals[roman[place]]
place += 2
else:
total += vals[roman[place]]
place += 1
return total
def int_to_roman(number: int) -> str:
"""
Given a integer, convert it to an roman numeral.
https://en.wikipedia.org/wiki/Roman_numerals
>>> tests = {"III": 3, "CLIV": 154, "MIX": 1009, "MMD": 2500, "MMMCMXCIX": 3999}
>>> all(int_to_roman(value) == key for key, value in tests.items())
True
"""
result = []
for arabic, roman in ROMAN:
(factor, number) = divmod(number, arabic)
result.append(roman * factor)
if number == 0:
break
return "".join(result)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/speed_conversions.py
================================================
"""
Convert speed units
https://en.wikipedia.org/wiki/Kilometres_per_hour
https://en.wikipedia.org/wiki/Miles_per_hour
https://en.wikipedia.org/wiki/Knot_(unit)
https://en.wikipedia.org/wiki/Metre_per_second
"""
speed_chart: dict[str, float] = {
"km/h": 1.0,
"m/s": 3.6,
"mph": 1.609344,
"knot": 1.852,
}
speed_chart_inverse: dict[str, float] = {
"km/h": 1.0,
"m/s": 0.277777778,
"mph": 0.621371192,
"knot": 0.539956803,
}
def convert_speed(speed: float, unit_from: str, unit_to: str) -> float:
"""
Convert speed from one unit to another using the speed_chart above.
"km/h": 1.0,
"m/s": 3.6,
"mph": 1.609344,
"knot": 1.852,
>>> convert_speed(100, "km/h", "m/s")
27.778
>>> convert_speed(100, "km/h", "mph")
62.137
>>> convert_speed(100, "km/h", "knot")
53.996
>>> convert_speed(100, "m/s", "km/h")
360.0
>>> convert_speed(100, "m/s", "mph")
223.694
>>> convert_speed(100, "m/s", "knot")
194.384
>>> convert_speed(100, "mph", "km/h")
160.934
>>> convert_speed(100, "mph", "m/s")
44.704
>>> convert_speed(100, "mph", "knot")
86.898
>>> convert_speed(100, "knot", "km/h")
185.2
>>> convert_speed(100, "knot", "m/s")
51.444
>>> convert_speed(100, "knot", "mph")
115.078
"""
if unit_to not in speed_chart or unit_from not in speed_chart_inverse:
msg = (
f"Incorrect 'from_type' or 'to_type' value: {unit_from!r}, {unit_to!r}\n"
f"Valid values are: {', '.join(speed_chart_inverse)}"
)
raise ValueError(msg)
return round(speed * speed_chart[unit_from] * speed_chart_inverse[unit_to], 3)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/temperature_conversions.py
================================================
"""Convert between different units of temperature"""
def celsius_to_fahrenheit(celsius: float, ndigits: int = 2) -> float:
"""
Convert a given value from Celsius to Fahrenheit and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
>>> celsius_to_fahrenheit(273.354, 3)
524.037
>>> celsius_to_fahrenheit(273.354, 0)
524.0
>>> celsius_to_fahrenheit(-40.0)
-40.0
>>> celsius_to_fahrenheit(-20.0)
-4.0
>>> celsius_to_fahrenheit(0)
32.0
>>> celsius_to_fahrenheit(20)
68.0
>>> celsius_to_fahrenheit("40")
104.0
>>> celsius_to_fahrenheit("celsius")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'celsius'
"""
return round((float(celsius) * 9 / 5) + 32, ndigits)
def celsius_to_kelvin(celsius: float, ndigits: int = 2) -> float:
"""
Convert a given value from Celsius to Kelvin and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
>>> celsius_to_kelvin(273.354, 3)
546.504
>>> celsius_to_kelvin(273.354, 0)
547.0
>>> celsius_to_kelvin(0)
273.15
>>> celsius_to_kelvin(20.0)
293.15
>>> celsius_to_kelvin("40")
313.15
>>> celsius_to_kelvin("celsius")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'celsius'
"""
return round(float(celsius) + 273.15, ndigits)
def celsius_to_rankine(celsius: float, ndigits: int = 2) -> float:
"""
Convert a given value from Celsius to Rankine and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
>>> celsius_to_rankine(273.354, 3)
983.707
>>> celsius_to_rankine(273.354, 0)
984.0
>>> celsius_to_rankine(0)
491.67
>>> celsius_to_rankine(20.0)
527.67
>>> celsius_to_rankine("40")
563.67
>>> celsius_to_rankine("celsius")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'celsius'
"""
return round((float(celsius) * 9 / 5) + 491.67, ndigits)
def fahrenheit_to_celsius(fahrenheit: float, ndigits: int = 2) -> float:
"""
Convert a given value from Fahrenheit to Celsius and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
>>> fahrenheit_to_celsius(273.354, 3)
134.086
>>> fahrenheit_to_celsius(273.354, 0)
134.0
>>> fahrenheit_to_celsius(0)
-17.78
>>> fahrenheit_to_celsius(20.0)
-6.67
>>> fahrenheit_to_celsius(40.0)
4.44
>>> fahrenheit_to_celsius(60)
15.56
>>> fahrenheit_to_celsius(80)
26.67
>>> fahrenheit_to_celsius("100")
37.78
>>> fahrenheit_to_celsius("fahrenheit")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'fahrenheit'
"""
return round((float(fahrenheit) - 32) * 5 / 9, ndigits)
def fahrenheit_to_kelvin(fahrenheit: float, ndigits: int = 2) -> float:
"""
Convert a given value from Fahrenheit to Kelvin and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
>>> fahrenheit_to_kelvin(273.354, 3)
407.236
>>> fahrenheit_to_kelvin(273.354, 0)
407.0
>>> fahrenheit_to_kelvin(0)
255.37
>>> fahrenheit_to_kelvin(20.0)
266.48
>>> fahrenheit_to_kelvin(40.0)
277.59
>>> fahrenheit_to_kelvin(60)
288.71
>>> fahrenheit_to_kelvin(80)
299.82
>>> fahrenheit_to_kelvin("100")
310.93
>>> fahrenheit_to_kelvin("fahrenheit")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'fahrenheit'
"""
return round(((float(fahrenheit) - 32) * 5 / 9) + 273.15, ndigits)
def fahrenheit_to_rankine(fahrenheit: float, ndigits: int = 2) -> float:
"""
Convert a given value from Fahrenheit to Rankine and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
>>> fahrenheit_to_rankine(273.354, 3)
733.024
>>> fahrenheit_to_rankine(273.354, 0)
733.0
>>> fahrenheit_to_rankine(0)
459.67
>>> fahrenheit_to_rankine(20.0)
479.67
>>> fahrenheit_to_rankine(40.0)
499.67
>>> fahrenheit_to_rankine(60)
519.67
>>> fahrenheit_to_rankine(80)
539.67
>>> fahrenheit_to_rankine("100")
559.67
>>> fahrenheit_to_rankine("fahrenheit")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'fahrenheit'
"""
return round(float(fahrenheit) + 459.67, ndigits)
def kelvin_to_celsius(kelvin: float, ndigits: int = 2) -> float:
"""
Convert a given value from Kelvin to Celsius and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
>>> kelvin_to_celsius(273.354, 3)
0.204
>>> kelvin_to_celsius(273.354, 0)
0.0
>>> kelvin_to_celsius(273.15)
0.0
>>> kelvin_to_celsius(300)
26.85
>>> kelvin_to_celsius("315.5")
42.35
>>> kelvin_to_celsius("kelvin")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'kelvin'
"""
return round(float(kelvin) - 273.15, ndigits)
def kelvin_to_fahrenheit(kelvin: float, ndigits: int = 2) -> float:
"""
Convert a given value from Kelvin to Fahrenheit and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
>>> kelvin_to_fahrenheit(273.354, 3)
32.367
>>> kelvin_to_fahrenheit(273.354, 0)
32.0
>>> kelvin_to_fahrenheit(273.15)
32.0
>>> kelvin_to_fahrenheit(300)
80.33
>>> kelvin_to_fahrenheit("315.5")
108.23
>>> kelvin_to_fahrenheit("kelvin")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'kelvin'
"""
return round(((float(kelvin) - 273.15) * 9 / 5) + 32, ndigits)
def kelvin_to_rankine(kelvin: float, ndigits: int = 2) -> float:
"""
Convert a given value from Kelvin to Rankine and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
>>> kelvin_to_rankine(273.354, 3)
492.037
>>> kelvin_to_rankine(273.354, 0)
492.0
>>> kelvin_to_rankine(0)
0.0
>>> kelvin_to_rankine(20.0)
36.0
>>> kelvin_to_rankine("40")
72.0
>>> kelvin_to_rankine("kelvin")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'kelvin'
"""
return round((float(kelvin) * 9 / 5), ndigits)
def rankine_to_celsius(rankine: float, ndigits: int = 2) -> float:
"""
Convert a given value from Rankine to Celsius and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
Wikipedia reference: https://en.wikipedia.org/wiki/Celsius
>>> rankine_to_celsius(273.354, 3)
-121.287
>>> rankine_to_celsius(273.354, 0)
-121.0
>>> rankine_to_celsius(273.15)
-121.4
>>> rankine_to_celsius(300)
-106.48
>>> rankine_to_celsius("315.5")
-97.87
>>> rankine_to_celsius("rankine")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'rankine'
"""
return round((float(rankine) - 491.67) * 5 / 9, ndigits)
def rankine_to_fahrenheit(rankine: float, ndigits: int = 2) -> float:
"""
Convert a given value from Rankine to Fahrenheit and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
Wikipedia reference: https://en.wikipedia.org/wiki/Fahrenheit
>>> rankine_to_fahrenheit(273.15)
-186.52
>>> rankine_to_fahrenheit(300)
-159.67
>>> rankine_to_fahrenheit("315.5")
-144.17
>>> rankine_to_fahrenheit("rankine")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'rankine'
"""
return round(float(rankine) - 459.67, ndigits)
def rankine_to_kelvin(rankine: float, ndigits: int = 2) -> float:
"""
Convert a given value from Rankine to Kelvin and round it to 2 decimal places.
Wikipedia reference: https://en.wikipedia.org/wiki/Rankine_scale
Wikipedia reference: https://en.wikipedia.org/wiki/Kelvin
>>> rankine_to_kelvin(0)
0.0
>>> rankine_to_kelvin(20.0)
11.11
>>> rankine_to_kelvin("40")
22.22
>>> rankine_to_kelvin("rankine")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'rankine'
"""
return round((float(rankine) * 5 / 9), ndigits)
def reaumur_to_kelvin(reaumur: float, ndigits: int = 2) -> float:
"""
Convert a given value from reaumur to Kelvin and round it to 2 decimal places.
Reference:- http://www.csgnetwork.com/temp2conv.html
>>> reaumur_to_kelvin(0)
273.15
>>> reaumur_to_kelvin(20.0)
298.15
>>> reaumur_to_kelvin(40)
323.15
>>> reaumur_to_kelvin("reaumur")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'reaumur'
"""
return round((float(reaumur) * 1.25 + 273.15), ndigits)
def reaumur_to_fahrenheit(reaumur: float, ndigits: int = 2) -> float:
"""
Convert a given value from reaumur to fahrenheit and round it to 2 decimal places.
Reference:- http://www.csgnetwork.com/temp2conv.html
>>> reaumur_to_fahrenheit(0)
32.0
>>> reaumur_to_fahrenheit(20.0)
77.0
>>> reaumur_to_fahrenheit(40)
122.0
>>> reaumur_to_fahrenheit("reaumur")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'reaumur'
"""
return round((float(reaumur) * 2.25 + 32), ndigits)
def reaumur_to_celsius(reaumur: float, ndigits: int = 2) -> float:
"""
Convert a given value from reaumur to celsius and round it to 2 decimal places.
Reference:- http://www.csgnetwork.com/temp2conv.html
>>> reaumur_to_celsius(0)
0.0
>>> reaumur_to_celsius(20.0)
25.0
>>> reaumur_to_celsius(40)
50.0
>>> reaumur_to_celsius("reaumur")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'reaumur'
"""
return round((float(reaumur) * 1.25), ndigits)
def reaumur_to_rankine(reaumur: float, ndigits: int = 2) -> float:
"""
Convert a given value from reaumur to rankine and round it to 2 decimal places.
Reference:- http://www.csgnetwork.com/temp2conv.html
>>> reaumur_to_rankine(0)
491.67
>>> reaumur_to_rankine(20.0)
536.67
>>> reaumur_to_rankine(40)
581.67
>>> reaumur_to_rankine("reaumur")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'reaumur'
"""
return round((float(reaumur) * 2.25 + 32 + 459.67), ndigits)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/time_conversions.py
================================================
"""
A unit of time is any particular time interval, used as a standard way of measuring or
expressing duration. The base unit of time in the International System of Units (SI),
and by extension most of the Western world, is the second, defined as about 9 billion
oscillations of the caesium atom.
https://en.wikipedia.org/wiki/Unit_of_time
"""
time_chart: dict[str, float] = {
"seconds": 1.0,
"minutes": 60.0, # 1 minute = 60 sec
"hours": 3600.0, # 1 hour = 60 minutes = 3600 seconds
"days": 86400.0, # 1 day = 24 hours = 1440 min = 86400 sec
"weeks": 604800.0, # 1 week=7d=168hr=10080min = 604800 sec
"months": 2629800.0, # Approximate value for a month in seconds
"years": 31557600.0, # Approximate value for a year in seconds
}
time_chart_inverse: dict[str, float] = {
key: 1 / value for key, value in time_chart.items()
}
def convert_time(time_value: float, unit_from: str, unit_to: str) -> float:
"""
Convert time from one unit to another using the time_chart above.
>>> convert_time(3600, "seconds", "hours")
1.0
>>> convert_time(3500, "Seconds", "Hours")
0.972
>>> convert_time(1, "DaYs", "hours")
24.0
>>> convert_time(120, "minutes", "SeCoNdS")
7200.0
>>> convert_time(2, "WEEKS", "days")
14.0
>>> convert_time(0.5, "hours", "MINUTES")
30.0
>>> convert_time(-3600, "seconds", "hours")
Traceback (most recent call last):
...
ValueError: 'time_value' must be a non-negative number.
>>> convert_time("Hello", "hours", "minutes")
Traceback (most recent call last):
...
ValueError: 'time_value' must be a non-negative number.
>>> convert_time([0, 1, 2], "weeks", "days")
Traceback (most recent call last):
...
ValueError: 'time_value' must be a non-negative number.
>>> convert_time(1, "cool", "century") # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Invalid unit cool is not in seconds, minutes, hours, days, weeks, ...
>>> convert_time(1, "seconds", "hot") # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Invalid unit hot is not in seconds, minutes, hours, days, weeks, ...
"""
if not isinstance(time_value, (int, float)) or time_value < 0:
msg = "'time_value' must be a non-negative number."
raise ValueError(msg)
unit_from = unit_from.lower()
unit_to = unit_to.lower()
if unit_from not in time_chart or unit_to not in time_chart:
invalid_unit = unit_from if unit_from not in time_chart else unit_to
msg = f"Invalid unit {invalid_unit} is not in {', '.join(time_chart)}."
raise ValueError(msg)
return round(
time_value * time_chart[unit_from] * time_chart_inverse[unit_to],
3,
)
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{convert_time(3600,'seconds', 'hours') = :,}")
print(f"{convert_time(360, 'days', 'months') = :,}")
print(f"{convert_time(360, 'months', 'years') = :,}")
print(f"{convert_time(1, 'years', 'seconds') = :,}")
================================================
FILE: conversions/volume_conversions.py
================================================
"""
Conversion of volume units.
Available Units:- Cubic metre,Litre,KiloLitre,Gallon,Cubic yard,Cubic foot,cup
USAGE :
-> Import this file into their respective project.
-> Use the function length_conversion() for conversion of volume units.
-> Parameters :
-> value : The number of from units you want to convert
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Cubic_metre
-> Wikipedia reference: https://en.wikipedia.org/wiki/Litre
-> Wikipedia reference: https://en.wiktionary.org/wiki/kilolitre
-> Wikipedia reference: https://en.wikipedia.org/wiki/Gallon
-> Wikipedia reference: https://en.wikipedia.org/wiki/Cubic_yard
-> Wikipedia reference: https://en.wikipedia.org/wiki/Cubic_foot
-> Wikipedia reference: https://en.wikipedia.org/wiki/Cup_(unit)
"""
from typing import NamedTuple
class FromTo(NamedTuple):
from_factor: float
to_factor: float
METRIC_CONVERSION = {
"cubic meter": FromTo(1, 1),
"litre": FromTo(0.001, 1000),
"kilolitre": FromTo(1, 1),
"gallon": FromTo(0.00454, 264.172),
"cubic yard": FromTo(0.76455, 1.30795),
"cubic foot": FromTo(0.028, 35.3147),
"cup": FromTo(0.000236588, 4226.75),
}
def volume_conversion(value: float, from_type: str, to_type: str) -> float:
"""
Conversion between volume units.
>>> volume_conversion(4, "cubic meter", "litre")
4000
>>> volume_conversion(1, "litre", "gallon")
0.264172
>>> volume_conversion(1, "kilolitre", "cubic meter")
1
>>> volume_conversion(3, "gallon", "cubic yard")
0.017814279
>>> volume_conversion(2, "cubic yard", "litre")
1529.1
>>> volume_conversion(4, "cubic foot", "cup")
473.396
>>> volume_conversion(1, "cup", "kilolitre")
0.000236588
>>> volume_conversion(4, "wrongUnit", "litre")
Traceback (most recent call last):
...
ValueError: Invalid 'from_type' value: 'wrongUnit' Supported values are:
cubic meter, litre, kilolitre, gallon, cubic yard, cubic foot, cup
"""
if from_type not in METRIC_CONVERSION:
raise ValueError(
f"Invalid 'from_type' value: {from_type!r} Supported values are:\n"
+ ", ".join(METRIC_CONVERSION)
)
if to_type not in METRIC_CONVERSION:
raise ValueError(
f"Invalid 'to_type' value: {to_type!r}. Supported values are:\n"
+ ", ".join(METRIC_CONVERSION)
)
return (
value
* METRIC_CONVERSION[from_type].from_factor
* METRIC_CONVERSION[to_type].to_factor
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: conversions/weight_conversion.py
================================================
"""
Conversion of weight units.
__author__ = "Anubhav Solanki"
__license__ = "MIT"
__version__ = "1.1.0"
__maintainer__ = "Anubhav Solanki"
__email__ = "anubhavsolanki0@gmail.com"
USAGE :
-> Import this file into their respective project.
-> Use the function weight_conversion() for conversion of weight units.
-> Parameters :
-> from_type : From which type you want to convert
-> to_type : To which type you want to convert
-> value : the value which you want to convert
REFERENCES :
-> Wikipedia reference: https://en.wikipedia.org/wiki/Kilogram
-> Wikipedia reference: https://en.wikipedia.org/wiki/Gram
-> Wikipedia reference: https://en.wikipedia.org/wiki/Millimetre
-> Wikipedia reference: https://en.wikipedia.org/wiki/Tonne
-> Wikipedia reference: https://en.wikipedia.org/wiki/Long_ton
-> Wikipedia reference: https://en.wikipedia.org/wiki/Short_ton
-> Wikipedia reference: https://en.wikipedia.org/wiki/Pound
-> Wikipedia reference: https://en.wikipedia.org/wiki/Ounce
-> Wikipedia reference: https://en.wikipedia.org/wiki/Fineness#Karat
-> Wikipedia reference: https://en.wikipedia.org/wiki/Dalton_(unit)
-> Wikipedia reference: https://en.wikipedia.org/wiki/Stone_(unit)
"""
KILOGRAM_CHART: dict[str, float] = {
"kilogram": 1,
"gram": pow(10, 3),
"milligram": pow(10, 6),
"metric-ton": pow(10, -3),
"long-ton": 0.0009842073,
"short-ton": 0.0011023122,
"pound": 2.2046244202,
"stone": 0.1574731728,
"ounce": 35.273990723,
"carrat": 5000,
"atomic-mass-unit": 6.022136652e26,
}
WEIGHT_TYPE_CHART: dict[str, float] = {
"kilogram": 1,
"gram": pow(10, -3),
"milligram": pow(10, -6),
"metric-ton": pow(10, 3),
"long-ton": 1016.04608,
"short-ton": 907.184,
"pound": 0.453592,
"stone": 6.35029,
"ounce": 0.0283495,
"carrat": 0.0002,
"atomic-mass-unit": 1.660540199e-27,
}
def weight_conversion(from_type: str, to_type: str, value: float) -> float:
"""
Conversion of weight unit with the help of KILOGRAM_CHART
"kilogram" : 1,
"gram" : pow(10, 3),
"milligram" : pow(10, 6),
"metric-ton" : pow(10, -3),
"long-ton" : 0.0009842073,
"short-ton" : 0.0011023122,
"pound" : 2.2046244202,
"stone": 0.1574731728,
"ounce" : 35.273990723,
"carrat" : 5000,
"atomic-mass-unit" : 6.022136652E+26
>>> weight_conversion("kilogram","kilogram",4)
4
>>> weight_conversion("kilogram","gram",1)
1000
>>> weight_conversion("kilogram","milligram",4)
4000000
>>> weight_conversion("kilogram","metric-ton",4)
0.004
>>> weight_conversion("kilogram","long-ton",3)
0.0029526219
>>> weight_conversion("kilogram","short-ton",1)
0.0011023122
>>> weight_conversion("kilogram","pound",4)
8.8184976808
>>> weight_conversion("kilogram","stone",5)
0.7873658640000001
>>> weight_conversion("kilogram","ounce",4)
141.095962892
>>> weight_conversion("kilogram","carrat",3)
15000
>>> weight_conversion("kilogram","atomic-mass-unit",1)
6.022136652e+26
>>> weight_conversion("gram","kilogram",1)
0.001
>>> weight_conversion("gram","gram",3)
3.0
>>> weight_conversion("gram","milligram",2)
2000.0
>>> weight_conversion("gram","metric-ton",4)
4e-06
>>> weight_conversion("gram","long-ton",3)
2.9526219e-06
>>> weight_conversion("gram","short-ton",3)
3.3069366000000003e-06
>>> weight_conversion("gram","pound",3)
0.0066138732606
>>> weight_conversion("gram","stone",4)
0.0006298926912000001
>>> weight_conversion("gram","ounce",1)
0.035273990723
>>> weight_conversion("gram","carrat",2)
10.0
>>> weight_conversion("gram","atomic-mass-unit",1)
6.022136652e+23
>>> weight_conversion("milligram","kilogram",1)
1e-06
>>> weight_conversion("milligram","gram",2)
0.002
>>> weight_conversion("milligram","milligram",3)
3.0
>>> weight_conversion("milligram","metric-ton",3)
3e-09
>>> weight_conversion("milligram","long-ton",3)
2.9526219e-09
>>> weight_conversion("milligram","short-ton",1)
1.1023122e-09
>>> weight_conversion("milligram","pound",3)
6.6138732605999995e-06
>>> weight_conversion("milligram","ounce",2)
7.054798144599999e-05
>>> weight_conversion("milligram","carrat",1)
0.005
>>> weight_conversion("milligram","atomic-mass-unit",1)
6.022136652e+20
>>> weight_conversion("metric-ton","kilogram",2)
2000
>>> weight_conversion("metric-ton","gram",2)
2000000
>>> weight_conversion("metric-ton","milligram",3)
3000000000
>>> weight_conversion("metric-ton","metric-ton",2)
2.0
>>> weight_conversion("metric-ton","long-ton",3)
2.9526219
>>> weight_conversion("metric-ton","short-ton",2)
2.2046244
>>> weight_conversion("metric-ton","pound",3)
6613.8732606
>>> weight_conversion("metric-ton","ounce",4)
141095.96289199998
>>> weight_conversion("metric-ton","carrat",4)
20000000
>>> weight_conversion("metric-ton","atomic-mass-unit",1)
6.022136652e+29
>>> weight_conversion("long-ton","kilogram",4)
4064.18432
>>> weight_conversion("long-ton","gram",4)
4064184.32
>>> weight_conversion("long-ton","milligram",3)
3048138240.0
>>> weight_conversion("long-ton","metric-ton",4)
4.06418432
>>> weight_conversion("long-ton","long-ton",3)
2.999999907217152
>>> weight_conversion("long-ton","short-ton",1)
1.119999989746176
>>> weight_conversion("long-ton","pound",3)
6720.000000049448
>>> weight_conversion("long-ton","ounce",1)
35840.000000060514
>>> weight_conversion("long-ton","carrat",4)
20320921.599999998
>>> weight_conversion("long-ton","atomic-mass-unit",4)
2.4475073353955697e+30
>>> weight_conversion("short-ton","kilogram",3)
2721.5519999999997
>>> weight_conversion("short-ton","gram",3)
2721552.0
>>> weight_conversion("short-ton","milligram",1)
907184000.0
>>> weight_conversion("short-ton","metric-ton",4)
3.628736
>>> weight_conversion("short-ton","long-ton",3)
2.6785713457296
>>> weight_conversion("short-ton","short-ton",3)
2.9999999725344
>>> weight_conversion("short-ton","pound",2)
4000.0000000294335
>>> weight_conversion("short-ton","ounce",4)
128000.00000021611
>>> weight_conversion("short-ton","carrat",4)
18143680.0
>>> weight_conversion("short-ton","atomic-mass-unit",1)
5.463186016507968e+29
>>> weight_conversion("pound","kilogram",4)
1.814368
>>> weight_conversion("pound","gram",2)
907.184
>>> weight_conversion("pound","milligram",3)
1360776.0
>>> weight_conversion("pound","metric-ton",3)
0.001360776
>>> weight_conversion("pound","long-ton",2)
0.0008928571152432
>>> weight_conversion("pound","short-ton",1)
0.0004999999954224
>>> weight_conversion("pound","pound",3)
3.0000000000220752
>>> weight_conversion("pound","ounce",1)
16.000000000027015
>>> weight_conversion("pound","carrat",1)
2267.96
>>> weight_conversion("pound","atomic-mass-unit",4)
1.0926372033015936e+27
>>> weight_conversion("stone","kilogram",5)
31.751450000000002
>>> weight_conversion("stone","gram",2)
12700.58
>>> weight_conversion("stone","milligram",3)
19050870.0
>>> weight_conversion("stone","metric-ton",3)
0.01905087
>>> weight_conversion("stone","long-ton",3)
0.018750005325351003
>>> weight_conversion("stone","short-ton",3)
0.021000006421614002
>>> weight_conversion("stone","pound",2)
28.00000881870372
>>> weight_conversion("stone","ounce",1)
224.00007054835967
>>> weight_conversion("stone","carrat",2)
63502.9
>>> weight_conversion("ounce","kilogram",3)
0.0850485
>>> weight_conversion("ounce","gram",3)
85.0485
>>> weight_conversion("ounce","milligram",4)
113398.0
>>> weight_conversion("ounce","metric-ton",4)
0.000113398
>>> weight_conversion("ounce","long-ton",4)
0.0001116071394054
>>> weight_conversion("ounce","short-ton",4)
0.0001249999988556
>>> weight_conversion("ounce","pound",1)
0.0625000000004599
>>> weight_conversion("ounce","ounce",2)
2.000000000003377
>>> weight_conversion("ounce","carrat",1)
141.7475
>>> weight_conversion("ounce","atomic-mass-unit",1)
1.70724563015874e+25
>>> weight_conversion("carrat","kilogram",1)
0.0002
>>> weight_conversion("carrat","gram",4)
0.8
>>> weight_conversion("carrat","milligram",2)
400.0
>>> weight_conversion("carrat","metric-ton",2)
4.0000000000000003e-07
>>> weight_conversion("carrat","long-ton",3)
5.9052438e-07
>>> weight_conversion("carrat","short-ton",4)
8.818497600000002e-07
>>> weight_conversion("carrat","pound",1)
0.00044092488404000004
>>> weight_conversion("carrat","ounce",2)
0.0141095962892
>>> weight_conversion("carrat","carrat",4)
4.0
>>> weight_conversion("carrat","atomic-mass-unit",4)
4.8177093216e+23
>>> weight_conversion("atomic-mass-unit","kilogram",4)
6.642160796e-27
>>> weight_conversion("atomic-mass-unit","gram",2)
3.321080398e-24
>>> weight_conversion("atomic-mass-unit","milligram",2)
3.3210803980000002e-21
>>> weight_conversion("atomic-mass-unit","metric-ton",3)
4.9816205970000004e-30
>>> weight_conversion("atomic-mass-unit","long-ton",3)
4.9029473573977584e-30
>>> weight_conversion("atomic-mass-unit","short-ton",1)
1.830433719948128e-30
>>> weight_conversion("atomic-mass-unit","pound",3)
1.0982602420317504e-26
>>> weight_conversion("atomic-mass-unit","ounce",2)
1.1714775914938915e-25
>>> weight_conversion("atomic-mass-unit","carrat",2)
1.660540199e-23
>>> weight_conversion("atomic-mass-unit","atomic-mass-unit",2)
1.999999998903455
>>> weight_conversion("slug", "kilogram", 1)
Traceback (most recent call last):
...
ValueError: Invalid 'from_type' or 'to_type' value: 'slug', 'kilogram'
Supported values are: kilogram, gram, milligram, metric-ton, long-ton, short-ton, \
pound, stone, ounce, carrat, atomic-mass-unit
"""
if to_type not in KILOGRAM_CHART or from_type not in WEIGHT_TYPE_CHART:
msg = (
f"Invalid 'from_type' or 'to_type' value: {from_type!r}, {to_type!r}\n"
f"Supported values are: {', '.join(WEIGHT_TYPE_CHART)}"
)
raise ValueError(msg)
return value * KILOGRAM_CHART[to_type] * WEIGHT_TYPE_CHART[from_type]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_compression/README.md
================================================
# Compression
Data compression is everywhere, you need it to store data without taking too much space.
Either the compression loses some data (then we talk about lossy compression, such as .jpg) or it does not (and then it is lossless compression, such as .png)
Lossless compression is mainly used for archive purpose as it allows storing data without losing information about the file archived. On the other hand, lossy compression is used for transfer of file where quality isn't necessarily what is required (i.e: images on Twitter).
*
*
*
================================================
FILE: data_compression/__init__.py
================================================
================================================
FILE: data_compression/burrows_wheeler.py
================================================
"""
https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
The Burrows-Wheeler transform (BWT, also called block-sorting compression)
rearranges a character string into runs of similar characters. This is useful
for compression, since it tends to be easy to compress a string that has runs
of repeated characters by techniques such as move-to-front transform and
run-length encoding. More importantly, the transformation is reversible,
without needing to store any additional data except the position of the first
original character. The BWT is thus a "free" method of improving the efficiency
of text compression algorithms, costing only some extra computation.
"""
from __future__ import annotations
from typing import TypedDict
class BWTTransformDict(TypedDict):
bwt_string: str
idx_original_string: int
def all_rotations(s: str) -> list[str]:
"""
:param s: The string that will be rotated len(s) times.
:return: A list with the rotations.
:raises TypeError: If s is not an instance of str.
Examples:
>>> all_rotations("^BANANA|") # doctest: +NORMALIZE_WHITESPACE
['^BANANA|', 'BANANA|^', 'ANANA|^B', 'NANA|^BA', 'ANA|^BAN', 'NA|^BANA',
'A|^BANAN', '|^BANANA']
>>> all_rotations("a_asa_da_casa") # doctest: +NORMALIZE_WHITESPACE
['a_asa_da_casa', '_asa_da_casaa', 'asa_da_casaa_', 'sa_da_casaa_a',
'a_da_casaa_as', '_da_casaa_asa', 'da_casaa_asa_', 'a_casaa_asa_d',
'_casaa_asa_da', 'casaa_asa_da_', 'asaa_asa_da_c', 'saa_asa_da_ca',
'aa_asa_da_cas']
>>> all_rotations("panamabanana") # doctest: +NORMALIZE_WHITESPACE
['panamabanana', 'anamabananap', 'namabananapa', 'amabananapan',
'mabananapana', 'abananapanam', 'bananapanama', 'ananapanamab',
'nanapanamaba', 'anapanamaban', 'napanamabana', 'apanamabanan']
>>> all_rotations(5)
Traceback (most recent call last):
...
TypeError: The parameter s type must be str.
"""
if not isinstance(s, str):
raise TypeError("The parameter s type must be str.")
return [s[i:] + s[:i] for i in range(len(s))]
def bwt_transform(s: str) -> BWTTransformDict:
"""
:param s: The string that will be used at bwt algorithm
:return: the string composed of the last char of each row of the ordered
rotations and the index of the original string at ordered rotations list
:raises TypeError: If the s parameter type is not str
:raises ValueError: If the s parameter is empty
Examples:
>>> bwt_transform("^BANANA")
{'bwt_string': 'BNN^AAA', 'idx_original_string': 6}
>>> bwt_transform("a_asa_da_casa")
{'bwt_string': 'aaaadss_c__aa', 'idx_original_string': 3}
>>> bwt_transform("panamabanana")
{'bwt_string': 'mnpbnnaaaaaa', 'idx_original_string': 11}
>>> bwt_transform(4)
Traceback (most recent call last):
...
TypeError: The parameter s type must be str.
>>> bwt_transform('')
Traceback (most recent call last):
...
ValueError: The parameter s must not be empty.
"""
if not isinstance(s, str):
raise TypeError("The parameter s type must be str.")
if not s:
raise ValueError("The parameter s must not be empty.")
rotations = all_rotations(s)
rotations.sort() # sort the list of rotations in alphabetically order
# make a string composed of the last char of each rotation
response: BWTTransformDict = {
"bwt_string": "".join([word[-1] for word in rotations]),
"idx_original_string": rotations.index(s),
}
return response
def reverse_bwt(bwt_string: str, idx_original_string: int) -> str:
"""
:param bwt_string: The string returned from bwt algorithm execution
:param idx_original_string: A 0-based index of the string that was used to
generate bwt_string at ordered rotations list
:return: The string used to generate bwt_string when bwt was executed
:raises TypeError: If the bwt_string parameter type is not str
:raises ValueError: If the bwt_string parameter is empty
:raises TypeError: If the idx_original_string type is not int or if not
possible to cast it to int
:raises ValueError: If the idx_original_string value is lower than 0 or
greater than len(bwt_string) - 1
>>> reverse_bwt("BNN^AAA", 6)
'^BANANA'
>>> reverse_bwt("aaaadss_c__aa", 3)
'a_asa_da_casa'
>>> reverse_bwt("mnpbnnaaaaaa", 11)
'panamabanana'
>>> reverse_bwt(4, 11)
Traceback (most recent call last):
...
TypeError: The parameter bwt_string type must be str.
>>> reverse_bwt("", 11)
Traceback (most recent call last):
...
ValueError: The parameter bwt_string must not be empty.
>>> reverse_bwt("mnpbnnaaaaaa", "asd") # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
TypeError: The parameter idx_original_string type must be int or passive
of cast to int.
>>> reverse_bwt("mnpbnnaaaaaa", -1)
Traceback (most recent call last):
...
ValueError: The parameter idx_original_string must not be lower than 0.
>>> reverse_bwt("mnpbnnaaaaaa", 12) # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
ValueError: The parameter idx_original_string must be lower than
len(bwt_string).
>>> reverse_bwt("mnpbnnaaaaaa", 11.0)
'panamabanana'
>>> reverse_bwt("mnpbnnaaaaaa", 11.4)
'panamabanana'
"""
if not isinstance(bwt_string, str):
raise TypeError("The parameter bwt_string type must be str.")
if not bwt_string:
raise ValueError("The parameter bwt_string must not be empty.")
try:
idx_original_string = int(idx_original_string)
except ValueError:
raise TypeError(
"The parameter idx_original_string type must be int or passive"
" of cast to int."
)
if idx_original_string < 0:
raise ValueError("The parameter idx_original_string must not be lower than 0.")
if idx_original_string >= len(bwt_string):
raise ValueError(
"The parameter idx_original_string must be lower than len(bwt_string)."
)
ordered_rotations = [""] * len(bwt_string)
for _ in range(len(bwt_string)):
for i in range(len(bwt_string)):
ordered_rotations[i] = bwt_string[i] + ordered_rotations[i]
ordered_rotations.sort()
return ordered_rotations[idx_original_string]
if __name__ == "__main__":
entry_msg = "Provide a string that I will generate its BWT transform: "
s = input(entry_msg).strip()
result = bwt_transform(s)
print(
f"Burrows Wheeler transform for string '{s}' results "
f"in '{result['bwt_string']}'"
)
original_string = reverse_bwt(result["bwt_string"], result["idx_original_string"])
print(
f"Reversing Burrows Wheeler transform for entry '{result['bwt_string']}' "
f"we get original string '{original_string}'"
)
================================================
FILE: data_compression/coordinate_compression.py
================================================
"""
Assumption:
- The values to compress are assumed to be comparable,
values can be sorted and compared with '<' and '>' operators.
"""
class CoordinateCompressor:
"""
A class for coordinate compression.
This class allows you to compress and decompress a list of values.
Mapping:
In addition to compression and decompression, this class maintains a mapping
between original values and their compressed counterparts using two data
structures: a dictionary `coordinate_map` and a list `reverse_map`:
- `coordinate_map`: A dictionary that maps original values to their compressed
coordinates. Keys are original values, and values are compressed coordinates.
- `reverse_map`: A list used for reverse mapping, where each index corresponds
to a compressed coordinate, and the value at that index is the original value.
Example of mapping:
Original: 10, Compressed: 0
Original: 52, Compressed: 1
Original: 83, Compressed: 2
Original: 100, Compressed: 3
This mapping allows for efficient compression and decompression of values within
the list.
"""
def __init__(self, arr: list[int | float | str]) -> None:
"""
Initialize the CoordinateCompressor with a list.
Args:
arr: The list of values to be compressed.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.compress(100)
3
>>> cc.compress(52)
1
>>> cc.decompress(1)
52
"""
# A dictionary to store compressed coordinates
self.coordinate_map: dict[int | float | str, int] = {}
# A list to store reverse mapping
self.reverse_map: list[int | float | str] = [-1] * len(arr)
self.arr = sorted(arr) # The input list
self.n = len(arr) # The length of the input list
self.compress_coordinates()
def compress_coordinates(self) -> None:
"""
Compress the coordinates in the input list.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.coordinate_map[83]
2
>>> cc.coordinate_map[80] # Value not in the original list
Traceback (most recent call last):
...
KeyError: 80
>>> cc.reverse_map[2]
83
"""
key = 0
for val in self.arr:
if val not in self.coordinate_map:
self.coordinate_map[val] = key
self.reverse_map[key] = val
key += 1
def compress(self, original: float | str) -> int:
"""
Compress a single value.
Args:
original: The value to compress.
Returns:
The compressed integer, or -1 if not found in the original list.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.compress(100)
3
>>> cc.compress(7) # Value not in the original list
-1
"""
return self.coordinate_map.get(original, -1)
def decompress(self, num: int) -> int | float | str:
"""
Decompress a single integer.
Args:
num: The compressed integer to decompress.
Returns:
The original value.
>>> arr = [100, 10, 52, 83]
>>> cc = CoordinateCompressor(arr)
>>> cc.decompress(0)
10
>>> cc.decompress(5) # Compressed coordinate out of range
-1
"""
return self.reverse_map[num] if 0 <= num < len(self.reverse_map) else -1
if __name__ == "__main__":
from doctest import testmod
testmod()
arr: list[int | float | str] = [100, 10, 52, 83]
cc = CoordinateCompressor(arr)
for original in arr:
compressed = cc.compress(original)
decompressed = cc.decompress(compressed)
print(f"Original: {decompressed}, Compressed: {compressed}")
================================================
FILE: data_compression/huffman.py
================================================
from __future__ import annotations
import sys
class Letter:
def __init__(self, letter: str, freq: int):
self.letter: str = letter
self.freq: int = freq
self.bitstring: dict[str, str] = {}
def __repr__(self) -> str:
return f"{self.letter}:{self.freq}"
class TreeNode:
def __init__(self, freq: int, left: Letter | TreeNode, right: Letter | TreeNode):
self.freq: int = freq
self.left: Letter | TreeNode = left
self.right: Letter | TreeNode = right
def parse_file(file_path: str) -> list[Letter]:
"""
Read the file and build a dict of all letters and their
frequencies, then convert the dict into a list of Letters.
"""
chars: dict[str, int] = {}
with open(file_path) as f:
while True:
c = f.read(1)
if not c:
break
chars[c] = chars[c] + 1 if c in chars else 1
return sorted((Letter(c, f) for c, f in chars.items()), key=lambda x: x.freq)
def build_tree(letters: list[Letter]) -> Letter | TreeNode:
"""
Run through the list of Letters and build the min heap
for the Huffman Tree.
"""
response: list[Letter | TreeNode] = list(letters)
while len(response) > 1:
left = response.pop(0)
right = response.pop(0)
total_freq = left.freq + right.freq
node = TreeNode(total_freq, left, right)
response.append(node)
response.sort(key=lambda x: x.freq)
return response[0]
def traverse_tree(root: Letter | TreeNode, bitstring: str) -> list[Letter]:
"""
Recursively traverse the Huffman Tree to set each
Letter's bitstring dictionary, and return the list of Letters
"""
if isinstance(root, Letter):
root.bitstring[root.letter] = bitstring
return [root]
treenode: TreeNode = root
letters = []
letters += traverse_tree(treenode.left, bitstring + "0")
letters += traverse_tree(treenode.right, bitstring + "1")
return letters
def huffman(file_path: str) -> None:
"""
Parse the file, build the tree, then run through the file
again, using the letters dictionary to find and print out the
bitstring for each letter.
"""
letters_list = parse_file(file_path)
root = build_tree(letters_list)
letters = {
k: v for letter in traverse_tree(root, "") for k, v in letter.bitstring.items()
}
print(f"Huffman Coding of {file_path}: ")
with open(file_path) as f:
while True:
c = f.read(1)
if not c:
break
print(letters[c], end=" ")
print()
if __name__ == "__main__":
# pass the file path to the huffman function
huffman(sys.argv[1])
================================================
FILE: data_compression/lempel_ziv.py
================================================
"""
One of the several implementations of Lempel-Ziv-Welch compression algorithm
https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
"""
import math
import os
import sys
def read_file_binary(file_path: str) -> str:
"""
Reads given file as bytes and returns them as a long string
"""
result = ""
try:
with open(file_path, "rb") as binary_file:
data = binary_file.read()
for dat in data:
curr_byte = f"{dat:08b}"
result += curr_byte
return result
except OSError:
print("File not accessible")
sys.exit()
def add_key_to_lexicon(
lexicon: dict[str, str], curr_string: str, index: int, last_match_id: str
) -> None:
"""
Adds new strings (curr_string + "0", curr_string + "1") to the lexicon
"""
lexicon.pop(curr_string)
lexicon[curr_string + "0"] = last_match_id
if math.log2(index).is_integer():
for curr_key, value in lexicon.items():
lexicon[curr_key] = f"0{value}"
lexicon[curr_string + "1"] = bin(index)[2:]
def compress_data(data_bits: str) -> str:
"""
Compresses given data_bits using Lempel-Ziv-Welch compression algorithm
and returns the result as a string
"""
lexicon = {"0": "0", "1": "1"}
result, curr_string = "", ""
index = len(lexicon)
for i in range(len(data_bits)):
curr_string += data_bits[i]
if curr_string not in lexicon:
continue
last_match_id = lexicon[curr_string]
result += last_match_id
add_key_to_lexicon(lexicon, curr_string, index, last_match_id)
index += 1
curr_string = ""
while curr_string != "" and curr_string not in lexicon:
curr_string += "0"
if curr_string != "":
last_match_id = lexicon[curr_string]
result += last_match_id
return result
def add_file_length(source_path: str, compressed: str) -> str:
"""
Adds given file's length in front (using Elias gamma coding) of the compressed
string
"""
file_length = os.path.getsize(source_path)
file_length_binary = bin(file_length)[2:]
length_length = len(file_length_binary)
return "0" * (length_length - 1) + file_length_binary + compressed
def write_file_binary(file_path: str, to_write: str) -> None:
"""
Writes given to_write string (should only consist of 0's and 1's) as bytes in the
file
"""
byte_length = 8
try:
with open(file_path, "wb") as opened_file:
result_byte_array = [
to_write[i : i + byte_length]
for i in range(0, len(to_write), byte_length)
]
if len(result_byte_array[-1]) % byte_length == 0:
result_byte_array.append("10000000")
else:
result_byte_array[-1] += "1" + "0" * (
byte_length - len(result_byte_array[-1]) - 1
)
for elem in result_byte_array:
opened_file.write(int(elem, 2).to_bytes(1, byteorder="big"))
except OSError:
print("File not accessible")
sys.exit()
def compress(source_path: str, destination_path: str) -> None:
"""
Reads source file, compresses it and writes the compressed result in destination
file
"""
data_bits = read_file_binary(source_path)
compressed = compress_data(data_bits)
compressed = add_file_length(source_path, compressed)
write_file_binary(destination_path, compressed)
if __name__ == "__main__":
compress(sys.argv[1], sys.argv[2])
================================================
FILE: data_compression/lempel_ziv_decompress.py
================================================
"""
One of the several implementations of Lempel-Ziv-Welch decompression algorithm
https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
"""
import math
import sys
def read_file_binary(file_path: str) -> str:
"""
Reads given file as bytes and returns them as a long string
"""
result = ""
try:
with open(file_path, "rb") as binary_file:
data = binary_file.read()
for dat in data:
curr_byte = f"{dat:08b}"
result += curr_byte
return result
except OSError:
print("File not accessible")
sys.exit()
def decompress_data(data_bits: str) -> str:
"""
Decompresses given data_bits using Lempel-Ziv-Welch compression algorithm
and returns the result as a string
"""
lexicon = {"0": "0", "1": "1"}
result, curr_string = "", ""
index = len(lexicon)
for i in range(len(data_bits)):
curr_string += data_bits[i]
if curr_string not in lexicon:
continue
last_match_id = lexicon[curr_string]
result += last_match_id
lexicon[curr_string] = last_match_id + "0"
if math.log2(index).is_integer():
new_lex = {}
for curr_key in list(lexicon):
new_lex["0" + curr_key] = lexicon.pop(curr_key)
lexicon = new_lex
lexicon[bin(index)[2:]] = last_match_id + "1"
index += 1
curr_string = ""
return result
def write_file_binary(file_path: str, to_write: str) -> None:
"""
Writes given to_write string (should only consist of 0's and 1's) as bytes in the
file
"""
byte_length = 8
try:
with open(file_path, "wb") as opened_file:
result_byte_array = [
to_write[i : i + byte_length]
for i in range(0, len(to_write), byte_length)
]
if len(result_byte_array[-1]) % byte_length == 0:
result_byte_array.append("10000000")
else:
result_byte_array[-1] += "1" + "0" * (
byte_length - len(result_byte_array[-1]) - 1
)
for elem in result_byte_array[:-1]:
opened_file.write(int(elem, 2).to_bytes(1, byteorder="big"))
except OSError:
print("File not accessible")
sys.exit()
def remove_prefix(data_bits: str) -> str:
"""
Removes size prefix, that compressed file should have
Returns the result
"""
counter = 0
for letter in data_bits:
if letter == "1":
break
counter += 1
data_bits = data_bits[counter:]
data_bits = data_bits[counter + 1 :]
return data_bits
def compress(source_path: str, destination_path: str) -> None:
"""
Reads source file, decompresses it and writes the result in destination file
"""
data_bits = read_file_binary(source_path)
data_bits = remove_prefix(data_bits)
decompressed = decompress_data(data_bits)
write_file_binary(destination_path, decompressed)
if __name__ == "__main__":
compress(sys.argv[1], sys.argv[2])
================================================
FILE: data_compression/lz77.py
================================================
"""
LZ77 compression algorithm
- lossless data compression published in papers by Abraham Lempel and Jacob Ziv in 1977
- also known as LZ1 or sliding-window compression
- form the basis for many variations including LZW, LZSS, LZMA and others
It uses a “sliding window” method. Within the sliding window we have:
- search buffer
- look ahead buffer
len(sliding_window) = len(search_buffer) + len(look_ahead_buffer)
LZ77 manages a dictionary that uses triples composed of:
- Offset into search buffer, it's the distance between the start of a phrase and
the beginning of a file.
- Length of the match, it's the number of characters that make up a phrase.
- The indicator is represented by a character that is going to be encoded next.
As a file is parsed, the dictionary is dynamically updated to reflect the compressed
data contents and size.
Examples:
"cabracadabrarrarrad" <-> [(0, 0, 'c'), (0, 0, 'a'), (0, 0, 'b'), (0, 0, 'r'),
(3, 1, 'c'), (2, 1, 'd'), (7, 4, 'r'), (3, 5, 'd')]
"ababcbababaa" <-> [(0, 0, 'a'), (0, 0, 'b'), (2, 2, 'c'), (4, 3, 'a'), (2, 2, 'a')]
"aacaacabcabaaac" <-> [(0, 0, 'a'), (1, 1, 'c'), (3, 4, 'b'), (3, 3, 'a'), (1, 2, 'c')]
Sources:
en.wikipedia.org/wiki/LZ77_and_LZ78
"""
from dataclasses import dataclass
__version__ = "0.1"
__author__ = "Lucia Harcekova"
@dataclass
class Token:
"""
Dataclass representing triplet called token consisting of length, offset
and indicator. This triplet is used during LZ77 compression.
"""
offset: int
length: int
indicator: str
def __repr__(self) -> str:
"""
>>> token = Token(1, 2, "c")
>>> repr(token)
'(1, 2, c)'
>>> str(token)
'(1, 2, c)'
"""
return f"({self.offset}, {self.length}, {self.indicator})"
class LZ77Compressor:
"""
Class containing compress and decompress methods using LZ77 compression algorithm.
"""
def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> None:
self.window_size = window_size
self.lookahead_buffer_size = lookahead_buffer_size
self.search_buffer_size = self.window_size - self.lookahead_buffer_size
def compress(self, text: str) -> list[Token]:
"""
Compress the given string text using LZ77 compression algorithm.
Args:
text: string to be compressed
Returns:
output: the compressed text as a list of Tokens
>>> lz77_compressor = LZ77Compressor()
>>> str(lz77_compressor.compress("ababcbababaa"))
'[(0, 0, a), (0, 0, b), (2, 2, c), (4, 3, a), (2, 2, a)]'
>>> str(lz77_compressor.compress("aacaacabcabaaac"))
'[(0, 0, a), (1, 1, c), (3, 4, b), (3, 3, a), (1, 2, c)]'
"""
output = []
search_buffer = ""
# while there are still characters in text to compress
while text:
# find the next encoding phrase
# - triplet with offset, length, indicator (the next encoding character)
token = self._find_encoding_token(text, search_buffer)
# update the search buffer:
# - add new characters from text into it
# - check if size exceed the max search buffer size, if so, drop the
# oldest elements
search_buffer += text[: token.length + 1]
if len(search_buffer) > self.search_buffer_size:
search_buffer = search_buffer[-self.search_buffer_size :]
# update the text
text = text[token.length + 1 :]
# append the token to output
output.append(token)
return output
def decompress(self, tokens: list[Token]) -> str:
"""
Convert the list of tokens into an output string.
Args:
tokens: list containing triplets (offset, length, char)
Returns:
output: decompressed text
Tests:
>>> lz77_compressor = LZ77Compressor()
>>> lz77_compressor.decompress([Token(0, 0, 'c'), Token(0, 0, 'a'),
... Token(0, 0, 'b'), Token(0, 0, 'r'), Token(3, 1, 'c'),
... Token(2, 1, 'd'), Token(7, 4, 'r'), Token(3, 5, 'd')])
'cabracadabrarrarrad'
>>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(0, 0, 'b'),
... Token(2, 2, 'c'), Token(4, 3, 'a'), Token(2, 2, 'a')])
'ababcbababaa'
>>> lz77_compressor.decompress([Token(0, 0, 'a'), Token(1, 1, 'c'),
... Token(3, 4, 'b'), Token(3, 3, 'a'), Token(1, 2, 'c')])
'aacaacabcabaaac'
"""
output = ""
for token in tokens:
for _ in range(token.length):
output += output[-token.offset]
output += token.indicator
return output
def _find_encoding_token(self, text: str, search_buffer: str) -> Token:
"""Finds the encoding token for the first character in the text.
Tests:
>>> lz77_compressor = LZ77Compressor()
>>> lz77_compressor._find_encoding_token("abrarrarrad", "abracad").offset
7
>>> lz77_compressor._find_encoding_token("adabrarrarrad", "cabrac").length
1
>>> lz77_compressor._find_encoding_token("abc", "xyz").offset
0
>>> lz77_compressor._find_encoding_token("", "xyz").offset
Traceback (most recent call last):
...
ValueError: We need some text to work with.
>>> lz77_compressor._find_encoding_token("abc", "").offset
0
"""
if not text:
raise ValueError("We need some text to work with.")
# Initialise result parameters to default values
length, offset = 0, 0
if not search_buffer:
return Token(offset, length, text[length])
for i, character in enumerate(search_buffer):
found_offset = len(search_buffer) - i
if character == text[0]:
found_length = self._match_length_from_index(text, search_buffer, 0, i)
# if the found length is bigger than the current or if it's equal,
# which means it's offset is smaller: update offset and length
if found_length >= length:
offset, length = found_offset, found_length
return Token(offset, length, text[length])
def _match_length_from_index(
self, text: str, window: str, text_index: int, window_index: int
) -> int:
"""Calculate the longest possible match of text and window characters from
text_index in text and window_index in window.
Args:
text: _description_
window: sliding window
text_index: index of character in text
window_index: index of character in sliding window
Returns:
The maximum match between text and window, from given indexes.
Tests:
>>> lz77_compressor = LZ77Compressor(13, 6)
>>> lz77_compressor._match_length_from_index("rarrad", "adabrar", 0, 4)
5
>>> lz77_compressor._match_length_from_index("adabrarrarrad",
... "cabrac", 0, 1)
1
"""
if not text or text[text_index] != window[window_index]:
return 0
return 1 + self._match_length_from_index(
text, window + text[text_index], text_index + 1, window_index + 1
)
if __name__ == "__main__":
from doctest import testmod
testmod()
# Initialize compressor class
lz77_compressor = LZ77Compressor(window_size=13, lookahead_buffer_size=6)
# Example
TEXT = "cabracadabrarrarrad"
compressed_text = lz77_compressor.compress(TEXT)
print(lz77_compressor.compress("ababcbababaa"))
decompressed_text = lz77_compressor.decompress(compressed_text)
assert decompressed_text == TEXT, "The LZ77 algorithm returned the invalid result."
================================================
FILE: data_compression/peak_signal_to_noise_ratio.py
================================================
"""
Peak signal-to-noise ratio - PSNR
https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
Source:
https://tutorials.techonical.com/how-to-calculate-psnr-value-of-two-images-using-python
"""
import math
import os
import cv2
import numpy as np
PIXEL_MAX = 255.0
def peak_signal_to_noise_ratio(original: float, contrast: float) -> float:
mse = np.mean((original - contrast) ** 2)
if mse == 0:
return 100
return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))
def main() -> None:
dir_path = os.path.dirname(os.path.realpath(__file__))
# Loading images (original image and compressed image)
original = cv2.imread(os.path.join(dir_path, "image_data/original_image.png"))
contrast = cv2.imread(os.path.join(dir_path, "image_data/compressed_image.png"), 1)
original2 = cv2.imread(os.path.join(dir_path, "image_data/PSNR-example-base.png"))
contrast2 = cv2.imread(
os.path.join(dir_path, "image_data/PSNR-example-comp-10.jpg"), 1
)
# Value expected: 29.73dB
print("-- First Test --")
print(f"PSNR value is {peak_signal_to_noise_ratio(original, contrast)} dB")
# # Value expected: 31.53dB (Wikipedia Example)
print("\n-- Second Test --")
print(f"PSNR value is {peak_signal_to_noise_ratio(original2, contrast2)} dB")
if __name__ == "__main__":
main()
================================================
FILE: data_compression/run_length_encoding.py
================================================
# https://en.wikipedia.org/wiki/Run-length_encoding
def run_length_encode(text: str) -> list:
"""
Performs Run Length Encoding
>>> run_length_encode("AAAABBBCCDAA")
[('A', 4), ('B', 3), ('C', 2), ('D', 1), ('A', 2)]
>>> run_length_encode("A")
[('A', 1)]
>>> run_length_encode("AA")
[('A', 2)]
>>> run_length_encode("AAADDDDDDFFFCCCAAVVVV")
[('A', 3), ('D', 6), ('F', 3), ('C', 3), ('A', 2), ('V', 4)]
"""
encoded = []
count = 1
for i in range(len(text)):
if i + 1 < len(text) and text[i] == text[i + 1]:
count += 1
else:
encoded.append((text[i], count))
count = 1
return encoded
def run_length_decode(encoded: list) -> str:
"""
Performs Run Length Decoding
>>> run_length_decode([('A', 4), ('B', 3), ('C', 2), ('D', 1), ('A', 2)])
'AAAABBBCCDAA'
>>> run_length_decode([('A', 1)])
'A'
>>> run_length_decode([('A', 2)])
'AA'
>>> run_length_decode([('A', 3), ('D', 6), ('F', 3), ('C', 3), ('A', 2), ('V', 4)])
'AAADDDDDDFFFCCCAAVVVV'
"""
return "".join(char * length for char, length in encoded)
if __name__ == "__main__":
from doctest import testmod
testmod(name="run_length_encode", verbose=True)
testmod(name="run_length_decode", verbose=True)
================================================
FILE: data_structures/__init__.py
================================================
================================================
FILE: data_structures/arrays/__init__.py
================================================
================================================
FILE: data_structures/arrays/equilibrium_index_in_array.py
================================================
"""
Find the Equilibrium Index of an Array.
Reference: https://www.geeksforgeeks.org/equilibrium-index-of-an-array/
Python doctest can be run with the following command:
python -m doctest -v equilibrium_index_in_array.py
Given a sequence arr[] of size n, this function returns
an equilibrium index (if any) or -1 if no equilibrium index exists.
The equilibrium index of an array is an index such that the sum of
elements at lower indexes is equal to the sum of elements at higher indexes.
Example Input:
arr = [-7, 1, 5, 2, -4, 3, 0]
Output: 3
"""
def equilibrium_index(arr: list[int]) -> int:
"""
Find the equilibrium index of an array.
Args:
arr (list[int]): The input array of integers.
Returns:
int: The equilibrium index or -1 if no equilibrium index exists.
Examples:
>>> equilibrium_index([-7, 1, 5, 2, -4, 3, 0])
3
>>> equilibrium_index([1, 2, 3, 4, 5])
-1
>>> equilibrium_index([1, 1, 1, 1, 1])
2
>>> equilibrium_index([2, 4, 6, 8, 10, 3])
-1
"""
total_sum = sum(arr)
left_sum = 0
for i, value in enumerate(arr):
total_sum -= value
if left_sum == total_sum:
return i
left_sum += value
return -1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/find_triplets_with_0_sum.py
================================================
from itertools import combinations
def find_triplets_with_0_sum(nums: list[int]) -> list[list[int]]:
"""
Given a list of integers, return elements a, b, c such that a + b + c = 0.
Args:
nums: list of integers
Returns:
list of lists of integers where sum(each_list) == 0
Examples:
>>> find_triplets_with_0_sum([-1, 0, 1, 2, -1, -4])
[[-1, -1, 2], [-1, 0, 1]]
>>> find_triplets_with_0_sum([])
[]
>>> find_triplets_with_0_sum([0, 0, 0])
[[0, 0, 0]]
>>> find_triplets_with_0_sum([1, 2, 3, 0, -1, -2, -3])
[[-3, 0, 3], [-3, 1, 2], [-2, -1, 3], [-2, 0, 2], [-1, 0, 1]]
"""
return [
list(x)
for x in sorted({abc for abc in combinations(sorted(nums), 3) if not sum(abc)})
]
def find_triplets_with_0_sum_hashing(arr: list[int]) -> list[list[int]]:
"""
Function for finding the triplets with a given sum in the array using hashing.
Given a list of integers, return elements a, b, c such that a + b + c = 0.
Args:
nums: list of integers
Returns:
list of lists of integers where sum(each_list) == 0
Examples:
>>> find_triplets_with_0_sum_hashing([-1, 0, 1, 2, -1, -4])
[[-1, 0, 1], [-1, -1, 2]]
>>> find_triplets_with_0_sum_hashing([])
[]
>>> find_triplets_with_0_sum_hashing([0, 0, 0])
[[0, 0, 0]]
>>> find_triplets_with_0_sum_hashing([1, 2, 3, 0, -1, -2, -3])
[[-1, 0, 1], [-3, 1, 2], [-2, 0, 2], [-2, -1, 3], [-3, 0, 3]]
Time complexity: O(N^2)
Auxiliary Space: O(N)
"""
target_sum = 0
# Initialize the final output array with blank.
output_arr = []
# Set the initial element as arr[i].
for index, item in enumerate(arr[:-2]):
# to store second elements that can complement the final sum.
set_initialize = set()
# current sum needed for reaching the target sum
current_sum = target_sum - item
# Traverse the subarray arr[i+1:].
for other_item in arr[index + 1 :]:
# required value for the second element
required_value = current_sum - other_item
# Verify if the desired value exists in the set.
if required_value in set_initialize:
# finding triplet elements combination.
combination_array = sorted([item, other_item, required_value])
if combination_array not in output_arr:
output_arr.append(combination_array)
# Include the current element in the set
# for subsequent complement verification.
set_initialize.add(other_item)
# Return all the triplet combinations.
return output_arr
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/arrays/index_2d_array_in_1d.py
================================================
"""
Retrieves the value of an 0-indexed 1D index from a 2D array.
There are two ways to retrieve value(s):
1. Index2DArrayIterator(matrix) -> Iterator[int]
This iterator allows you to iterate through a 2D array by passing in the matrix and
calling next(your_iterator). You can also use the iterator in a loop.
Examples:
list(Index2DArrayIterator(matrix))
set(Index2DArrayIterator(matrix))
tuple(Index2DArrayIterator(matrix))
sum(Index2DArrayIterator(matrix))
-5 in Index2DArrayIterator(matrix)
2. index_2d_array_in_1d(array: list[int], index: int) -> int
This function allows you to provide a 2D array and a 0-indexed 1D integer index,
and retrieves the integer value at that index.
Python doctests can be run using this command:
python3 -m doctest -v index_2d_array_in_1d.py
"""
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Index2DArrayIterator:
matrix: list[list[int]]
def __iter__(self) -> Iterator[int]:
"""
>>> tuple(Index2DArrayIterator([[5], [-523], [-1], [34], [0]]))
(5, -523, -1, 34, 0)
>>> tuple(Index2DArrayIterator([[5, -523, -1], [34, 0]]))
(5, -523, -1, 34, 0)
>>> tuple(Index2DArrayIterator([[5, -523, -1, 34, 0]]))
(5, -523, -1, 34, 0)
>>> t = Index2DArrayIterator([[5, 2, 25], [23, 14, 5], [324, -1, 0]])
>>> tuple(t)
(5, 2, 25, 23, 14, 5, 324, -1, 0)
>>> list(t)
[5, 2, 25, 23, 14, 5, 324, -1, 0]
>>> sorted(t)
[-1, 0, 2, 5, 5, 14, 23, 25, 324]
>>> tuple(t)[3]
23
>>> sum(t)
397
>>> -1 in t
True
>>> t = iter(Index2DArrayIterator([[5], [-523], [-1], [34], [0]]))
>>> next(t)
5
>>> next(t)
-523
"""
for row in self.matrix:
yield from row
def index_2d_array_in_1d(array: list[list[int]], index: int) -> int:
"""
Retrieves the value of the one-dimensional index from a two-dimensional array.
Args:
array: A 2D array of integers where all rows are the same size and all
columns are the same size.
index: A 1D index.
Returns:
int: The 0-indexed value of the 1D index in the array.
Examples:
>>> index_2d_array_in_1d([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], 5)
5
>>> index_2d_array_in_1d([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], -1)
Traceback (most recent call last):
...
ValueError: index out of range
>>> index_2d_array_in_1d([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], 12)
Traceback (most recent call last):
...
ValueError: index out of range
>>> index_2d_array_in_1d([[]], 0)
Traceback (most recent call last):
...
ValueError: no items in array
"""
rows = len(array)
cols = len(array[0])
if rows == 0 or cols == 0:
raise ValueError("no items in array")
if index < 0 or index >= rows * cols:
raise ValueError("index out of range")
return array[index // cols][index % cols]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/kth_largest_element.py
================================================
"""
Given an array of integers and an integer k, find the kth largest element in the array.
https://stackoverflow.com/questions/251781
"""
def partition(arr: list[int], low: int, high: int) -> int:
"""
Partitions list based on the pivot element.
This function rearranges the elements in the input list 'elements' such that
all elements greater than or equal to the chosen pivot are on the right side
of the pivot, and all elements smaller than the pivot are on the left side.
Args:
arr: The list to be partitioned
low: The lower index of the list
high: The higher index of the list
Returns:
int: The index of pivot element after partitioning
Examples:
>>> partition([3, 1, 4, 5, 9, 2, 6, 5, 3, 5], 0, 9)
4
>>> partition([7, 1, 4, 5, 9, 2, 6, 5, 8], 0, 8)
1
>>> partition(['apple', 'cherry', 'date', 'banana'], 0, 3)
2
>>> partition([3.1, 1.2, 5.6, 4.7], 0, 3)
1
"""
pivot = arr[high]
i = low - 1
for j in range(low, high):
if arr[j] >= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1
def kth_largest_element(arr: list[int], position: int) -> int:
"""
Finds the kth largest element in a list.
Should deliver similar results to:
```python
def kth_largest_element(arr, position):
return sorted(arr)[-position]
```
Args:
nums: The list of numbers.
k: The position of the desired kth largest element.
Returns:
int: The kth largest element.
Examples:
>>> kth_largest_element([3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5], 3)
5
>>> kth_largest_element([2, 5, 6, 1, 9, 3, 8, 4, 7, 3, 5], 1)
9
>>> kth_largest_element([2, 5, 6, 1, 9, 3, 8, 4, 7, 3, 5], -2)
Traceback (most recent call last):
...
ValueError: Invalid value of 'position'
>>> kth_largest_element([9, 1, 3, 6, 7, 9, 8, 4, 2, 4, 9], 110)
Traceback (most recent call last):
...
ValueError: Invalid value of 'position'
>>> kth_largest_element([1, 2, 4, 3, 5, 9, 7, 6, 5, 9, 3], 0)
Traceback (most recent call last):
...
ValueError: Invalid value of 'position'
>>> kth_largest_element(['apple', 'cherry', 'date', 'banana'], 2)
'cherry'
>>> kth_largest_element([3.1, 1.2, 5.6, 4.7,7.9,5,0], 2)
5.6
>>> kth_largest_element([-2, -5, -4, -1], 1)
-1
>>> kth_largest_element([], 1)
-1
>>> kth_largest_element([3.1, 1.2, 5.6, 4.7, 7.9, 5, 0], 1.5)
Traceback (most recent call last):
...
ValueError: The position should be an integer
>>> kth_largest_element((4, 6, 1, 2), 4)
Traceback (most recent call last):
...
TypeError: 'tuple' object does not support item assignment
"""
if not arr:
return -1
if not isinstance(position, int):
raise ValueError("The position should be an integer")
if not 1 <= position <= len(arr):
raise ValueError("Invalid value of 'position'")
low, high = 0, len(arr) - 1
while low <= high:
if low > len(arr) - 1 or high < 0:
return -1
pivot_index = partition(arr, low, high)
if pivot_index == position - 1:
return arr[pivot_index]
elif pivot_index > position - 1:
high = pivot_index - 1
else:
low = pivot_index + 1
return -1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/median_two_array.py
================================================
"""
https://www.enjoyalgorithms.com/blog/median-of-two-sorted-arrays
"""
def find_median_sorted_arrays(nums1: list[int], nums2: list[int]) -> float:
"""
Find the median of two arrays.
Args:
nums1: The first array.
nums2: The second array.
Returns:
The median of the two arrays.
Examples:
>>> find_median_sorted_arrays([1, 3], [2])
2.0
>>> find_median_sorted_arrays([1, 2], [3, 4])
2.5
>>> find_median_sorted_arrays([0, 0], [0, 0])
0.0
>>> find_median_sorted_arrays([], [])
Traceback (most recent call last):
...
ValueError: Both input arrays are empty.
>>> find_median_sorted_arrays([], [1])
1.0
>>> find_median_sorted_arrays([-1000], [1000])
0.0
>>> find_median_sorted_arrays([-1.1, -2.2], [-3.3, -4.4])
-2.75
"""
if not nums1 and not nums2:
raise ValueError("Both input arrays are empty.")
# Merge the arrays into a single sorted array.
merged = sorted(nums1 + nums2)
total = len(merged)
if total % 2 == 1: # If the total number of elements is odd
return float(merged[total // 2]) # then return the middle element
# If the total number of elements is even, calculate
# the average of the two middle elements as the median.
middle1 = merged[total // 2 - 1]
middle2 = merged[total // 2]
return (float(middle1) + float(middle2)) / 2.0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/monotonic_array.py
================================================
# https://leetcode.com/problems/monotonic-array/
def is_monotonic(nums: list[int]) -> bool:
"""
Check if a list is monotonic.
>>> is_monotonic([1, 2, 2, 3])
True
>>> is_monotonic([6, 5, 4, 4])
True
>>> is_monotonic([1, 3, 2])
False
>>> is_monotonic([1,2,3,4,5,6,5])
False
>>> is_monotonic([-3,-2,-1])
True
>>> is_monotonic([-5,-6,-7])
True
>>> is_monotonic([0,0,0])
True
>>> is_monotonic([-100,0,100])
True
"""
return all(nums[i] <= nums[i + 1] for i in range(len(nums) - 1)) or all(
nums[i] >= nums[i + 1] for i in range(len(nums) - 1)
)
# Test the function with your examples
if __name__ == "__main__":
# Test the function with your examples
print(is_monotonic([1, 2, 2, 3])) # Output: True
print(is_monotonic([6, 5, 4, 4])) # Output: True
print(is_monotonic([1, 3, 2])) # Output: False
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/pairs_with_given_sum.py
================================================
#!/usr/bin/env python3
"""
Given an array of integers and an integer req_sum, find the number of pairs of array
elements whose sum is equal to req_sum.
https://practice.geeksforgeeks.org/problems/count-pairs-with-given-sum5022/0
"""
from itertools import combinations
def pairs_with_sum(arr: list, req_sum: int) -> int:
"""
Return the no. of pairs with sum "sum"
>>> pairs_with_sum([1, 5, 7, 1], 6)
2
>>> pairs_with_sum([1, 1, 1, 1, 1, 1, 1, 1], 2)
28
>>> pairs_with_sum([1, 7, 6, 2, 5, 4, 3, 1, 9, 8], 7)
4
"""
return len([1 for a, b in combinations(arr, 2) if a + b == req_sum])
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/arrays/permutations.py
================================================
def permute_recursive(nums: list[int]) -> list[list[int]]:
"""
Return all permutations.
>>> permute_recursive([1, 2, 3])
[[3, 2, 1], [2, 3, 1], [1, 3, 2], [3, 1, 2], [2, 1, 3], [1, 2, 3]]
"""
result: list[list[int]] = []
if len(nums) == 0:
return [[]]
for _ in range(len(nums)):
n = nums.pop(0)
permutations = permute_recursive(nums.copy())
for perm in permutations:
perm.append(n)
result.extend(permutations)
nums.append(n)
return result
def permute_backtrack(nums: list[int]) -> list[list[int]]:
"""
Return all permutations of the given list.
>>> permute_backtrack([1, 2, 3])
[[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 2, 1], [3, 1, 2]]
"""
def backtrack(start: int) -> None:
if start == len(nums) - 1:
output.append(nums[:])
else:
for i in range(start, len(nums)):
nums[start], nums[i] = nums[i], nums[start]
backtrack(start + 1)
nums[start], nums[i] = nums[i], nums[start] # backtrack
output: list[list[int]] = []
backtrack(0)
return output
if __name__ == "__main__":
import doctest
result = permute_backtrack([1, 2, 3])
print(result)
doctest.testmod()
================================================
FILE: data_structures/arrays/prefix_sum.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 3, 2022
Implement the class of prefix sum with useful functions based on it.
"""
class PrefixSum:
def __init__(self, array: list[int]) -> None:
len_array = len(array)
self.prefix_sum = [0] * len_array
if len_array > 0:
self.prefix_sum[0] = array[0]
for i in range(1, len_array):
self.prefix_sum[i] = self.prefix_sum[i - 1] + array[i]
def get_sum(self, start: int, end: int) -> int:
"""
The function returns the sum of array from the start to the end indexes.
Runtime : O(1)
Space: O(1)
>>> PrefixSum([1,2,3]).get_sum(0, 2)
6
>>> PrefixSum([1,2,3]).get_sum(1, 2)
5
>>> PrefixSum([1,2,3]).get_sum(2, 2)
3
>>> PrefixSum([]).get_sum(0, 0)
Traceback (most recent call last):
...
ValueError: The array is empty.
>>> PrefixSum([1,2,3]).get_sum(-1, 2)
Traceback (most recent call last):
...
ValueError: Invalid range specified.
>>> PrefixSum([1,2,3]).get_sum(2, 3)
Traceback (most recent call last):
...
ValueError: Invalid range specified.
>>> PrefixSum([1,2,3]).get_sum(2, 1)
Traceback (most recent call last):
...
ValueError: Invalid range specified.
"""
if not self.prefix_sum:
raise ValueError("The array is empty.")
if start < 0 or end >= len(self.prefix_sum) or start > end:
raise ValueError("Invalid range specified.")
if start == 0:
return self.prefix_sum[end]
return self.prefix_sum[end] - self.prefix_sum[start - 1]
def contains_sum(self, target_sum: int) -> bool:
"""
The function returns True if array contains the target_sum,
False otherwise.
Runtime : O(n)
Space: O(n)
>>> PrefixSum([1,2,3]).contains_sum(6)
True
>>> PrefixSum([1,2,3]).contains_sum(5)
True
>>> PrefixSum([1,2,3]).contains_sum(3)
True
>>> PrefixSum([1,2,3]).contains_sum(4)
False
>>> PrefixSum([1,2,3]).contains_sum(7)
False
>>> PrefixSum([1,-2,3]).contains_sum(2)
True
"""
sums = {0}
for sum_item in self.prefix_sum:
if sum_item - target_sum in sums:
return True
sums.add(sum_item)
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/product_sum.py
================================================
"""
Calculate the Product Sum from a Special Array.
reference: https://dev.to/sfrasica/algorithms-product-sum-from-an-array-dc6
Python doctests can be run with the following command:
python -m doctest -v product_sum.py
Calculate the product sum of a "special" array which can contain integers or nested
arrays. The product sum is obtained by adding all elements and multiplying by their
respective depths.
For example, in the array [x, y], the product sum is (x + y). In the array [x, [y, z]],
the product sum is x + 2 * (y + z). In the array [x, [y, [z]]],
the product sum is x + 2 * (y + 3z).
Example Input:
[5, 2, [-7, 1], 3, [6, [-13, 8], 4]]
Output: 12
"""
def product_sum(arr: list[int | list], depth: int) -> int:
"""
Recursively calculates the product sum of an array.
The product sum of an array is defined as the sum of its elements multiplied by
their respective depths. If an element is a list, its product sum is calculated
recursively by multiplying the sum of its elements with its depth plus one.
Args:
arr: The array of integers and nested lists.
depth: The current depth level.
Returns:
int: The product sum of the array.
Examples:
>>> product_sum([1, 2, 3], 1)
6
>>> product_sum([-1, 2, [-3, 4]], 2)
8
>>> product_sum([1, 2, 3], -1)
-6
>>> product_sum([1, 2, 3], 0)
0
>>> product_sum([1, 2, 3], 7)
42
>>> product_sum((1, 2, 3), 7)
42
>>> product_sum({1, 2, 3}, 7)
42
>>> product_sum([1, -1], 1)
0
>>> product_sum([1, -2], 1)
-1
>>> product_sum([-3.5, [1, [0.5]]], 1)
1.5
"""
total_sum = 0
for ele in arr:
total_sum += product_sum(ele, depth + 1) if isinstance(ele, list) else ele
return total_sum * depth
def product_sum_array(array: list[int | list]) -> int:
"""
Calculates the product sum of an array.
Args:
array (List[Union[int, List]]): The array of integers and nested lists.
Returns:
int: The product sum of the array.
Examples:
>>> product_sum_array([1, 2, 3])
6
>>> product_sum_array([1, [2, 3]])
11
>>> product_sum_array([1, [2, [3, 4]]])
47
>>> product_sum_array([0])
0
>>> product_sum_array([-3.5, [1, [0.5]]])
1.5
>>> product_sum_array([1, -2])
-1
"""
return product_sum(array, 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/arrays/rotate_array.py
================================================
def rotate_array(arr: list[int], steps: int) -> list[int]:
"""
Rotates a list to the right by steps positions.
Parameters:
arr (List[int]): The list of integers to rotate.
steps (int): Number of positions to rotate. Can be negative for left rotation.
Returns:
List[int]: Rotated list.
Examples:
>>> rotate_array([1, 2, 3, 4, 5], 2)
[4, 5, 1, 2, 3]
>>> rotate_array([1, 2, 3, 4, 5], -2)
[3, 4, 5, 1, 2]
>>> rotate_array([1, 2, 3, 4, 5], 7)
[4, 5, 1, 2, 3]
>>> rotate_array([], 3)
[]
"""
n = len(arr)
if n == 0:
return arr
steps = steps % n
if steps < 0:
steps += n
def reverse(start: int, end: int) -> None:
"""
Reverses a portion of the list in place from index start to end.
Parameters:
start (int): Starting index of the portion to reverse.
end (int): Ending index of the portion to reverse.
Returns:
None
Examples:
>>> example = [1, 2, 3, 4, 5]
>>> def reverse_test(arr, start, end):
... while start < end:
... arr[start], arr[end] = arr[end], arr[start]
... start += 1
... end -= 1
>>> reverse_test(example, 0, 2)
>>> example
[3, 2, 1, 4, 5]
>>> reverse_test(example, 2, 4)
>>> example
[3, 2, 5, 4, 1]
"""
while start < end:
arr[start], arr[end] = arr[end], arr[start]
start += 1
end -= 1
reverse(0, n - 1)
reverse(0, steps - 1)
reverse(steps, n - 1)
return arr
if __name__ == "__main__":
examples = [
([1, 2, 3, 4, 5], 2),
([1, 2, 3, 4, 5], -2),
([1, 2, 3, 4, 5], 7),
([], 3),
]
for arr, steps in examples:
rotated = rotate_array(arr.copy(), steps)
print(f"Rotate {arr} by {steps}: {rotated}")
================================================
FILE: data_structures/arrays/sparse_table.py
================================================
"""
Sparse table is a data structure that allows answering range queries on
a static number list, i.e. the elements do not change throughout all the queries.
The implementation below will solve the problem of Range Minimum Query:
Finding the minimum value of a subset [L..R] of a static number list.
Overall time complexity: O(nlogn)
Overall space complexity: O(nlogn)
Wikipedia link: https://en.wikipedia.org/wiki/Range_minimum_query
"""
from math import log2
def build_sparse_table(number_list: list[int]) -> list[list[int]]:
"""
Precompute range minimum queries with power of two length and store the precomputed
values in a table.
>>> build_sparse_table([8, 1, 0, 3, 4, 9, 3])
[[8, 1, 0, 3, 4, 9, 3], [1, 0, 0, 3, 4, 3, 0], [0, 0, 0, 3, 0, 0, 0]]
>>> build_sparse_table([3, 1, 9])
[[3, 1, 9], [1, 1, 0]]
>>> build_sparse_table([])
Traceback (most recent call last):
...
ValueError: empty number list not allowed
"""
if not number_list:
raise ValueError("empty number list not allowed")
length = len(number_list)
# Initialise sparse_table -- sparse_table[j][i] represents the minimum value of the
# subset of length (2 ** j) of number_list, starting from index i.
# smallest power of 2 subset length that fully covers number_list
row = int(log2(length)) + 1
sparse_table = [[0 for i in range(length)] for j in range(row)]
# minimum of subset of length 1 is that value itself
for i, value in enumerate(number_list):
sparse_table[0][i] = value
j = 1
# compute the minimum value for all intervals with size (2 ** j)
while (1 << j) <= length:
i = 0
# while subset starting from i still have at least (2 ** j) elements
while (i + (1 << j) - 1) < length:
# split range [i, i + 2 ** j] and find minimum of 2 halves
sparse_table[j][i] = min(
sparse_table[j - 1][i + (1 << (j - 1))], sparse_table[j - 1][i]
)
i += 1
j += 1
return sparse_table
def query(sparse_table: list[list[int]], left_bound: int, right_bound: int) -> int:
"""
>>> query(build_sparse_table([8, 1, 0, 3, 4, 9, 3]), 0, 4)
0
>>> query(build_sparse_table([8, 1, 0, 3, 4, 9, 3]), 4, 6)
3
>>> query(build_sparse_table([3, 1, 9]), 2, 2)
9
>>> query(build_sparse_table([3, 1, 9]), 0, 1)
1
>>> query(build_sparse_table([8, 1, 0, 3, 4, 9, 3]), 0, 11)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> query(build_sparse_table([]), 0, 0)
Traceback (most recent call last):
...
ValueError: empty number list not allowed
"""
if left_bound < 0 or right_bound >= len(sparse_table[0]):
raise IndexError("list index out of range")
# highest subset length of power of 2 that is within range [left_bound, right_bound]
j = int(log2(right_bound - left_bound + 1))
# minimum of 2 overlapping smaller subsets:
# [left_bound, left_bound + 2 ** j - 1] and [right_bound - 2 ** j + 1, right_bound]
return min(sparse_table[j][right_bound - (1 << j) + 1], sparse_table[j][left_bound])
if __name__ == "__main__":
from doctest import testmod
testmod()
print(f"{query(build_sparse_table([3, 1, 9]), 2, 2) = }")
================================================
FILE: data_structures/arrays/sudoku_solver.py
================================================
"""
Please do not modify this file! It is published at https://norvig.com/sudoku.html with
only minimal changes to work with modern versions of Python. If you have improvements,
please make them in a separate file.
"""
import random
import time
def cross(items_a, items_b):
"""
Cross product of elements in A and elements in B.
>>> cross('AB', '12')
['A1', 'A2', 'B1', 'B2']
>>> cross('ABC', '123')
['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']
>>> cross('ABC', '1234')
['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4', 'C1', 'C2', 'C3', 'C4']
>>> cross('', '12')
[]
>>> cross('A', '')
[]
>>> cross('', '')
[]
"""
return [a + b for a in items_a for b in items_b]
digits = "123456789"
rows = "ABCDEFGHI"
cols = digits
squares = cross(rows, cols)
unitlist = (
[cross(rows, c) for c in cols]
+ [cross(r, cols) for r in rows]
+ [cross(rs, cs) for rs in ("ABC", "DEF", "GHI") for cs in ("123", "456", "789")]
)
units = {s: [u for u in unitlist if s in u] for s in squares}
peers = {s: {x for u in units[s] for x in u} - {s} for s in squares}
def test():
"""A set of unit tests."""
assert len(squares) == 81
assert len(unitlist) == 27
assert all(len(units[s]) == 3 for s in squares)
assert all(len(peers[s]) == 20 for s in squares)
assert units["C2"] == [
["A2", "B2", "C2", "D2", "E2", "F2", "G2", "H2", "I2"],
["C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"],
["A1", "A2", "A3", "B1", "B2", "B3", "C1", "C2", "C3"],
]
# fmt: off
assert peers["C2"] == {
"A2", "B2", "D2", "E2", "F2", "G2", "H2", "I2", "C1", "C3",
"C4", "C5", "C6", "C7", "C8", "C9", "A1", "A3", "B1", "B3"
}
# fmt: on
print("All tests pass.")
def parse_grid(grid):
"""
Convert grid to a dict of possible values, {square: digits}, or
return False if a contradiction is detected.
"""
## To start, every square can be any digit; then assign values from the grid.
values = dict.fromkeys(squares, digits)
for s, d in grid_values(grid).items():
if d in digits and not assign(values, s, d):
return False ## (Fail if we can't assign d to square s.)
return values
def grid_values(grid):
"""
Convert grid into a dict of {square: char} with '0' or '.' for empties.
"""
chars = [c for c in grid if c in digits or c in "0."]
assert len(chars) == 81
return dict(zip(squares, chars))
def assign(values, s, d):
"""
Eliminate all the other values (except d) from values[s] and propagate.
Return values, except return False if a contradiction is detected.
"""
other_values = values[s].replace(d, "")
if all(eliminate(values, s, d2) for d2 in other_values):
return values
else:
return False
def eliminate(values, s, d):
"""
Eliminate d from values[s]; propagate when values or places <= 2.
Return values, except return False if a contradiction is detected.
"""
if d not in values[s]:
return values ## Already eliminated
values[s] = values[s].replace(d, "")
## (1) If a square s is reduced to one value d2, then eliminate d2 from the peers.
if len(values[s]) == 0:
return False ## Contradiction: removed last value
elif len(values[s]) == 1:
d2 = values[s]
if not all(eliminate(values, s2, d2) for s2 in peers[s]):
return False
## (2) If a unit u is reduced to only one place for a value d, then put it there.
for u in units[s]:
dplaces = [s for s in u if d in values[s]]
if len(dplaces) == 0:
return False ## Contradiction: no place for this value
# d can only be in one place in unit; assign it there
elif len(dplaces) == 1 and not assign(values, dplaces[0], d):
return False
return values
def display(values):
"""
Display these values as a 2-D grid.
"""
width = 1 + max(len(values[s]) for s in squares)
line = "+".join(["-" * (width * 3)] * 3)
for r in rows:
print(
"".join(
values[r + c].center(width) + ("|" if c in "36" else "") for c in cols
)
)
if r in "CF":
print(line)
print()
def solve(grid):
"""
Solve the grid.
"""
return search(parse_grid(grid))
def some(seq):
"""Return some element of seq that is true."""
for e in seq:
if e:
return e
return False
def search(values):
"""
Using depth-first search and propagation, try all possible values.
"""
if values is False:
return False ## Failed earlier
if all(len(values[s]) == 1 for s in squares):
return values ## Solved!
## Chose the unfilled square s with the fewest possibilities
_n, s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
return some(search(assign(values.copy(), s, d)) for d in values[s])
def solve_all(grids, name="", showif=0.0):
"""
Attempt to solve a sequence of grids. Report results.
When showif is a number of seconds, display puzzles that take longer.
When showif is None, don't display any puzzles.
"""
def time_solve(grid):
start = time.monotonic()
values = solve(grid)
t = time.monotonic() - start
## Display puzzles that take long enough
if showif is not None and t > showif:
display(grid_values(grid))
if values:
display(values)
print(f"({t:.5f} seconds)\n")
return (t, solved(values))
times, results = zip(*[time_solve(grid) for grid in grids])
if (n := len(grids)) > 1:
print(
"Solved %d of %d %s puzzles (avg %.2f secs (%d Hz), max %.2f secs)." # noqa: UP031
% (sum(results), n, name, sum(times) / n, n / sum(times), max(times))
)
def solved(values):
"""
A puzzle is solved if each unit is a permutation of the digits 1 to 9.
"""
def unitsolved(unit):
return {values[s] for s in unit} == set(digits)
return values is not False and all(unitsolved(unit) for unit in unitlist)
def from_file(filename, sep="\n"):
"Parse a file into a list of strings, separated by sep."
with open(filename) as file:
return file.read().strip().split(sep)
def random_puzzle(assignments=17):
"""
Make a random puzzle with N or more assignments. Restart on contradictions.
Note the resulting puzzle is not guaranteed to be solvable, but empirically
about 99.8% of them are solvable. Some have multiple solutions.
"""
values = dict.fromkeys(squares, digits)
for s in shuffled(squares):
if not assign(values, s, random.choice(values[s])):
break
ds = [values[s] for s in squares if len(values[s]) == 1]
if len(ds) >= assignments and len(set(ds)) >= 8:
return "".join(values[s] if len(values[s]) == 1 else "." for s in squares)
return random_puzzle(assignments) ## Give up and make a new puzzle
def shuffled(seq):
"""
Return a randomly shuffled copy of the input sequence.
"""
seq = list(seq)
random.shuffle(seq)
return seq
grid1 = (
"003020600900305001001806400008102900700000008006708200002609500800203009005010300"
)
grid2 = (
"4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......"
)
hard1 = (
".....6....59.....82....8....45........3........6..3.54...325..6.................."
)
if __name__ == "__main__":
test()
# solve_all(from_file("easy50.txt", '========'), "easy", None)
# solve_all(from_file("top95.txt"), "hard", None)
# solve_all(from_file("hardest.txt"), "hardest", None)
solve_all([random_puzzle() for _ in range(99)], "random", 100.0)
for puzzle in (grid1, grid2): # , hard1): # Takes 22 sec to solve on my M1 Mac.
display(parse_grid(puzzle))
start = time.monotonic()
solve(puzzle)
t = time.monotonic() - start
print(f"Solved: {t:.5f} sec")
================================================
FILE: data_structures/binary_tree/README.md
================================================
# Binary Tree Traversal
## Overview
The combination of binary trees being data structures and traversal being an algorithm relates to classic problems, either directly or indirectly.
> If you can grasp the traversal of binary trees, the traversal of other complicated trees will be easy for you.
The following are some common ways to traverse trees.
- Depth First Traversals (DFS): In-order, Pre-order, Post-order
- Level Order Traversal or Breadth First or Traversal (BFS)
There are applications for both DFS and BFS.
Stack can be used to simplify the process of DFS traversal. Besides, since tree is a recursive data structure, recursion and stack are two key points for DFS.
Graph for DFS:

The key point of BFS is how to determine whether the traversal of each level has been completed. The answer is to use a variable as a flag to represent the end of the traversal of current level.
## Pre-order Traversal
The traversal order of pre-order traversal is `root-left-right`.
Algorithm Pre-order
1. Visit the root node and push it into a stack.
2. Pop a node from the stack, and push its right and left child node into the stack respectively.
3. Repeat step 2.
Conclusion: This problem involves the classic recursive data structure (i.e. a binary tree), and the algorithm above demonstrates how a simplified solution can be reached by using a stack.
If you look at the bigger picture, you'll find that the process of traversal is as followed. `Visit the left subtrees respectively from top to bottom, and visit the right subtrees respectively from bottom to top`. If we are to implement it from this perspective, things will be somewhat different. For the `top to bottom` part we can simply use recursion, and for the `bottom to top` part we can turn to stack.
## In-order Traversal
The traversal order of in-order traversal is `left-root-right`.
So the root node is not printed first. Things are getting a bit complicated here.
Algorithm In-order
1. Visit the root and push it into a stack.
2. If there is a left child node, push it into the stack. Repeat this process until a leaf node reached.
> At this point the root node and all the left nodes are in the stack.
3. Start popping nodes from the stack. If a node has a right child node, push the child node into the stack. Repeat step 2.
It's worth pointing out that the in-order traversal of a binary search tree (BST) is a sorted array, which is helpful for coming up simplified solutions for some problems.
## Post-order Traversal
The traversal order of post-order traversal is `left-right-root`.
This one is a bit of a challenge. It deserves the `hard` tag of LeetCode.
In this case, the root node is printed not as the first but the last one. A cunning way to do it is to:
Record whether the current node has been visited. If 1) it's a leaf node or 2) both its left and right subtrees have been traversed, then it can be popped from the stack.
As for `1) it's a leaf node`, you can easily tell whether a node is a leaf if both its left and right are `null`.
As for `2) both its left and right subtrees have been traversed`, we only need a variable to record whether a node has been visited or not. In the worst case, we need to record the status for every single node and the space complexity is `O(n)`. But if you come to think about it, as we are using a stack and start printing the result from the leaf nodes, it makes sense that we only record the status for the current node popping from the stack, reducing the space complexity to `O(1)`.
## Level Order Traversal
The key point of level order traversal is how do we know whether the traversal of each level is done. The answer is that we use a variable as a flag representing the end of the traversal of the current level.

Algorithm Level-order
1. Visit the root node, put it in a FIFO queue, put in the queue a special flag (we are using `null` here).
2. Dequeue a node.
3. If the node equals `null`, it means that all nodes of the current level have been visited. If the queue is empty, we do nothing. Or else we put in another `null`.
4. If the node is not `null`, meaning the traversal of current level has not finished yet, we enqueue its left subtree and right subtree respectively.
## Bi-color marking
We know that there is a tri-color marking in garbage collection algorithm, which works as described below.
- The white color represents "not visited".
- The gray color represents "not all child nodes visited".
- The black color represents "all child nodes visited".
Enlightened by tri-color marking, a bi-color marking method can be invented to solve all three traversal problems with one solution.
The core idea is as follow.
- Use a color to mark whether a node has been visited or not. Nodes yet to be visited are marked as white and visited nodes are marked as gray.
- If we are visiting a white node, turn it into gray, and push its right child node, itself, and it's left child node into the stack respectively.
- If we are visiting a gray node, print it.
Implementation of pre-order and post-order traversal algorithms can be easily done by changing the order of pushing the child nodes into the stack.
Reference: [LeetCode](https://github.com/azl397985856/leetcode/blob/master/thinkings/binary-tree-traversal.en.md)
================================================
FILE: data_structures/binary_tree/__init__.py
================================================
================================================
FILE: data_structures/binary_tree/avl_tree.py
================================================
"""
Implementation of an auto-balanced binary tree!
For doctests run following command:
python3 -m doctest -v avl_tree.py
For testing run:
python avl_tree.py
"""
from __future__ import annotations
import math
import random
from typing import Any
class MyQueue:
def __init__(self) -> None:
self.data: list[Any] = []
self.head: int = 0
self.tail: int = 0
def is_empty(self) -> bool:
return self.head == self.tail
def push(self, data: Any) -> None:
self.data.append(data)
self.tail = self.tail + 1
def pop(self) -> Any:
ret = self.data[self.head]
self.head = self.head + 1
return ret
def count(self) -> int:
return self.tail - self.head
def print_queue(self) -> None:
print(self.data)
print("**************")
print(self.data[self.head : self.tail])
class MyNode:
def __init__(self, data: Any) -> None:
self.data = data
self.left: MyNode | None = None
self.right: MyNode | None = None
self.height: int = 1
def get_data(self) -> Any:
return self.data
def get_left(self) -> MyNode | None:
return self.left
def get_right(self) -> MyNode | None:
return self.right
def get_height(self) -> int:
return self.height
def set_data(self, data: Any) -> None:
self.data = data
def set_left(self, node: MyNode | None) -> None:
self.left = node
def set_right(self, node: MyNode | None) -> None:
self.right = node
def set_height(self, height: int) -> None:
self.height = height
def get_height(node: MyNode | None) -> int:
if node is None:
return 0
return node.get_height()
def my_max(a: int, b: int) -> int:
if a > b:
return a
return b
def right_rotation(node: MyNode) -> MyNode:
r"""
A B
/ \ / \
B C Bl A
/ \ --> / / \
Bl Br UB Br C
/
UB
UB = unbalanced node
"""
print("left rotation node:", node.get_data())
ret = node.get_left()
assert ret is not None
node.set_left(ret.get_right())
ret.set_right(node)
h1 = my_max(get_height(node.get_right()), get_height(node.get_left())) + 1
node.set_height(h1)
h2 = my_max(get_height(ret.get_right()), get_height(ret.get_left())) + 1
ret.set_height(h2)
return ret
def left_rotation(node: MyNode) -> MyNode:
"""
a mirror symmetry rotation of the left_rotation
"""
print("right rotation node:", node.get_data())
ret = node.get_right()
assert ret is not None
node.set_right(ret.get_left())
ret.set_left(node)
h1 = my_max(get_height(node.get_right()), get_height(node.get_left())) + 1
node.set_height(h1)
h2 = my_max(get_height(ret.get_right()), get_height(ret.get_left())) + 1
ret.set_height(h2)
return ret
def lr_rotation(node: MyNode) -> MyNode:
r"""
A A Br
/ \ / \ / \
B C LR Br C RR B A
/ \ --> / \ --> / / \
Bl Br B UB Bl UB C
\ /
UB Bl
RR = right_rotation LR = left_rotation
"""
left_child = node.get_left()
assert left_child is not None
node.set_left(left_rotation(left_child))
return right_rotation(node)
def rl_rotation(node: MyNode) -> MyNode:
right_child = node.get_right()
assert right_child is not None
node.set_right(right_rotation(right_child))
return left_rotation(node)
def insert_node(node: MyNode | None, data: Any) -> MyNode | None:
if node is None:
return MyNode(data)
if data < node.get_data():
node.set_left(insert_node(node.get_left(), data))
if (
get_height(node.get_left()) - get_height(node.get_right()) == 2
): # an unbalance detected
left_child = node.get_left()
assert left_child is not None
if (
data < left_child.get_data()
): # new node is the left child of the left child
node = right_rotation(node)
else:
node = lr_rotation(node)
else:
node.set_right(insert_node(node.get_right(), data))
if get_height(node.get_right()) - get_height(node.get_left()) == 2:
right_child = node.get_right()
assert right_child is not None
if data < right_child.get_data():
node = rl_rotation(node)
else:
node = left_rotation(node)
h1 = my_max(get_height(node.get_right()), get_height(node.get_left())) + 1
node.set_height(h1)
return node
def get_right_most(root: MyNode) -> Any:
while True:
right_child = root.get_right()
if right_child is None:
break
root = right_child
return root.get_data()
def get_left_most(root: MyNode) -> Any:
while True:
left_child = root.get_left()
if left_child is None:
break
root = left_child
return root.get_data()
def del_node(root: MyNode, data: Any) -> MyNode | None:
left_child = root.get_left()
right_child = root.get_right()
if root.get_data() == data:
if left_child is not None and right_child is not None:
temp_data = get_left_most(right_child)
root.set_data(temp_data)
root.set_right(del_node(right_child, temp_data))
elif left_child is not None:
root = left_child
elif right_child is not None:
root = right_child
else:
return None
elif root.get_data() > data:
if left_child is None:
print("No such data")
return root
else:
root.set_left(del_node(left_child, data))
# root.get_data() < data
elif right_child is None:
return root
else:
root.set_right(del_node(right_child, data))
# Re-fetch left_child and right_child references
left_child = root.get_left()
right_child = root.get_right()
if get_height(right_child) - get_height(left_child) == 2:
assert right_child is not None
if get_height(right_child.get_right()) > get_height(right_child.get_left()):
root = left_rotation(root)
else:
root = rl_rotation(root)
elif get_height(right_child) - get_height(left_child) == -2:
assert left_child is not None
if get_height(left_child.get_left()) > get_height(left_child.get_right()):
root = right_rotation(root)
else:
root = lr_rotation(root)
height = my_max(get_height(root.get_right()), get_height(root.get_left())) + 1
root.set_height(height)
return root
class AVLtree:
"""
An AVL tree doctest
Examples:
>>> t = AVLtree()
>>> t.insert(4)
insert:4
>>> print(str(t).replace(" \\n","\\n"))
4
*************************************
>>> t.insert(2)
insert:2
>>> print(str(t).replace(" \\n","\\n").replace(" \\n","\\n"))
4
2 *
*************************************
>>> t.insert(3)
insert:3
right rotation node: 2
left rotation node: 4
>>> print(str(t).replace(" \\n","\\n").replace(" \\n","\\n"))
3
2 4
*************************************
>>> t.get_height()
2
>>> t.del_node(3)
delete:3
>>> print(str(t).replace(" \\n","\\n").replace(" \\n","\\n"))
4
2 *
*************************************
"""
def __init__(self) -> None:
self.root: MyNode | None = None
def get_height(self) -> int:
return get_height(self.root)
def insert(self, data: Any) -> None:
print("insert:" + str(data))
self.root = insert_node(self.root, data)
def del_node(self, data: Any) -> None:
print("delete:" + str(data))
if self.root is None:
print("Tree is empty!")
return
self.root = del_node(self.root, data)
def __str__(
self,
) -> str: # a level traversale, gives a more intuitive look on the tree
output = ""
q = MyQueue()
q.push(self.root)
layer = self.get_height()
if layer == 0:
return output
cnt = 0
while not q.is_empty():
node = q.pop()
space = " " * int(math.pow(2, layer - 1))
output += space
if node is None:
output += "*"
q.push(None)
q.push(None)
else:
output += str(node.get_data())
q.push(node.get_left())
q.push(node.get_right())
output += space
cnt = cnt + 1
for i in range(100):
if cnt == math.pow(2, i) - 1:
layer = layer - 1
if layer == 0:
output += "\n*************************************"
return output
output += "\n"
break
output += "\n*************************************"
return output
def _test() -> None:
import doctest
doctest.testmod()
if __name__ == "__main__":
_test()
t = AVLtree()
lst = list(range(10))
random.shuffle(lst)
for i in lst:
t.insert(i)
print(str(t))
random.shuffle(lst)
for i in lst:
t.del_node(i)
print(str(t))
================================================
FILE: data_structures/binary_tree/basic_binary_tree.py
================================================
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Node:
data: int
left: Node | None = None
right: Node | None = None
def __iter__(self) -> Iterator[int]:
if self.left:
yield from self.left
yield self.data
if self.right:
yield from self.right
def __len__(self) -> int:
return sum(1 for _ in self)
def is_full(self) -> bool:
if not self or (not self.left and not self.right):
return True
if self.left and self.right:
return self.left.is_full() and self.right.is_full()
return False
@dataclass
class BinaryTree:
root: Node
def __iter__(self) -> Iterator[int]:
return iter(self.root)
def __len__(self) -> int:
return len(self.root)
@classmethod
def small_tree(cls) -> BinaryTree:
"""
Return a small binary tree with 3 nodes.
>>> binary_tree = BinaryTree.small_tree()
>>> len(binary_tree)
3
>>> list(binary_tree)
[1, 2, 3]
"""
binary_tree = BinaryTree(Node(2))
binary_tree.root.left = Node(1)
binary_tree.root.right = Node(3)
return binary_tree
@classmethod
def medium_tree(cls) -> BinaryTree:
"""
Return a medium binary tree with 3 nodes.
>>> binary_tree = BinaryTree.medium_tree()
>>> len(binary_tree)
7
>>> list(binary_tree)
[1, 2, 3, 4, 5, 6, 7]
"""
binary_tree = BinaryTree(Node(4))
binary_tree.root.left = two = Node(2)
two.left = Node(1)
two.right = Node(3)
binary_tree.root.right = five = Node(5)
five.right = six = Node(6)
six.right = Node(7)
return binary_tree
def depth(self) -> int:
"""
Returns the depth of the tree
>>> BinaryTree(Node(1)).depth()
1
>>> BinaryTree.small_tree().depth()
2
>>> BinaryTree.medium_tree().depth()
4
"""
return self._depth(self.root)
def _depth(self, node: Node | None) -> int:
if not node:
return 0
return 1 + max(self._depth(node.left), self._depth(node.right))
def is_full(self) -> bool:
"""
Returns True if the tree is full
>>> BinaryTree(Node(1)).is_full()
True
>>> BinaryTree.small_tree().is_full()
True
>>> BinaryTree.medium_tree().is_full()
False
"""
return self.root.is_full()
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/binary_search_tree.py
================================================
r"""
A binary search Tree
Example
8
/ \
3 10
/ \ \
1 6 14
/ \ /
4 7 13
>>> t = BinarySearchTree().insert(8, 3, 6, 1, 10, 14, 13, 4, 7)
>>> print(" ".join(repr(i.value) for i in t.traversal_tree()))
8 3 1 6 4 7 10 14 13
>>> tuple(i.value for i in t.traversal_tree(inorder))
(1, 3, 4, 6, 7, 8, 10, 13, 14)
>>> tuple(t)
(1, 3, 4, 6, 7, 8, 10, 13, 14)
>>> t.find_kth_smallest(3, t.root)
4
>>> tuple(t)[3-1]
4
>>> print(" ".join(repr(i.value) for i in t.traversal_tree(postorder)))
1 4 7 6 3 13 14 10 8
>>> t.remove(20)
Traceback (most recent call last):
...
ValueError: Value 20 not found
>>> BinarySearchTree().search(6)
Traceback (most recent call last):
...
IndexError: Warning: Tree is empty! please use another.
Other example:
>>> testlist = (8, 3, 6, 1, 10, 14, 13, 4, 7)
>>> t = BinarySearchTree()
>>> for i in testlist:
... t.insert(i) # doctest: +ELLIPSIS
BinarySearchTree(root=8)
BinarySearchTree(root={'8': (3, None)})
BinarySearchTree(root={'8': ({'3': (None, 6)}, None)})
BinarySearchTree(root={'8': ({'3': (1, 6)}, None)})
BinarySearchTree(root={'8': ({'3': (1, 6)}, 10)})
BinarySearchTree(root={'8': ({'3': (1, 6)}, {'10': (None, 14)})})
BinarySearchTree(root={'8': ({'3': (1, 6)}, {'10': (None, {'14': (13, None)})})})
BinarySearchTree(root={'8': ({'3': (1, {'6': (4, None)})}, {'10': (None, {'14': ...
BinarySearchTree(root={'8': ({'3': (1, {'6': (4, 7)})}, {'10': (None, {'14': (13, ...
Prints all the elements of the list in order traversal
>>> print(t)
{'8': ({'3': (1, {'6': (4, 7)})}, {'10': (None, {'14': (13, None)})})}
Test existence
>>> t.search(6) is not None
True
>>> 6 in t
True
>>> t.search(-1) is not None
False
>>> -1 in t
False
>>> t.search(6).is_right
True
>>> t.search(1).is_right
False
>>> t.get_max().value
14
>>> max(t)
14
>>> t.get_min().value
1
>>> min(t)
1
>>> t.empty()
False
>>> not t
False
>>> for i in testlist:
... t.remove(i)
>>> t.empty()
True
>>> not t
True
"""
from __future__ import annotations
from collections.abc import Iterable, Iterator
from dataclasses import dataclass
from typing import Any, Self
@dataclass
class Node:
value: int
left: Node | None = None
right: Node | None = None
parent: Node | None = None # Added in order to delete a node easier
def __iter__(self) -> Iterator[int]:
"""
>>> list(Node(0))
[0]
>>> list(Node(0, Node(-1), Node(1), None))
[-1, 0, 1]
"""
yield from self.left or []
yield self.value
yield from self.right or []
def __repr__(self) -> str:
from pprint import pformat
if self.left is None and self.right is None:
return str(self.value)
return pformat({f"{self.value}": (self.left, self.right)}, indent=1)
@property
def is_right(self) -> bool:
return bool(self.parent and self is self.parent.right)
@dataclass
class BinarySearchTree:
root: Node | None = None
def __bool__(self) -> bool:
return bool(self.root)
def __iter__(self) -> Iterator[int]:
yield from self.root or []
def __str__(self) -> str:
"""
Return a string of all the Nodes using in order traversal
"""
return str(self.root)
def __reassign_nodes(self, node: Node, new_children: Node | None) -> None:
if new_children is not None: # reset its kids
new_children.parent = node.parent
if node.parent is not None: # reset its parent
if node.is_right: # If it is the right child
node.parent.right = new_children
else:
node.parent.left = new_children
else:
self.root = new_children
def empty(self) -> bool:
"""
Returns True if the tree does not have any element(s).
False if the tree has element(s).
>>> BinarySearchTree().empty()
True
>>> BinarySearchTree().insert(1).empty()
False
>>> BinarySearchTree().insert(8, 3, 6, 1, 10, 14, 13, 4, 7).empty()
False
"""
return not self.root
def __insert(self, value) -> None:
"""
Insert a new node in Binary Search Tree with value label
"""
new_node = Node(value) # create a new Node
if self.empty(): # if Tree is empty
self.root = new_node # set its root
else: # Tree is not empty
parent_node = self.root # from root
if parent_node is None:
return
while True: # While we don't get to a leaf
if value < parent_node.value: # We go left
if parent_node.left is None:
parent_node.left = new_node # We insert the new node in a leaf
break
else:
parent_node = parent_node.left
elif parent_node.right is None:
parent_node.right = new_node
break
else:
parent_node = parent_node.right
new_node.parent = parent_node
def insert(self, *values) -> Self:
for value in values:
self.__insert(value)
return self
def search(self, value) -> Node | None:
"""
>>> tree = BinarySearchTree().insert(10, 20, 30, 40, 50)
>>> tree.search(10)
{'10': (None, {'20': (None, {'30': (None, {'40': (None, 50)})})})}
>>> tree.search(20)
{'20': (None, {'30': (None, {'40': (None, 50)})})}
>>> tree.search(30)
{'30': (None, {'40': (None, 50)})}
>>> tree.search(40)
{'40': (None, 50)}
>>> tree.search(50)
50
>>> tree.search(5) is None # element not present
True
>>> tree.search(0) is None # element not present
True
>>> tree.search(-5) is None # element not present
True
>>> BinarySearchTree().search(10)
Traceback (most recent call last):
...
IndexError: Warning: Tree is empty! please use another.
"""
if self.empty():
raise IndexError("Warning: Tree is empty! please use another.")
else:
node = self.root
# use lazy evaluation here to avoid NoneType Attribute error
while node is not None and node.value is not value:
node = node.left if value < node.value else node.right
return node
def get_max(self, node: Node | None = None) -> Node | None:
"""
We go deep on the right branch
>>> BinarySearchTree().insert(10, 20, 30, 40, 50).get_max()
50
>>> BinarySearchTree().insert(-5, -1, 0.1, -0.3, -4.5).get_max()
{'0.1': (-0.3, None)}
>>> BinarySearchTree().insert(1, 78.3, 30, 74.0, 1).get_max()
{'78.3': ({'30': (1, 74.0)}, None)}
>>> BinarySearchTree().insert(1, 783, 30, 740, 1).get_max()
{'783': ({'30': (1, 740)}, None)}
"""
if node is None:
if self.root is None:
return None
node = self.root
if not self.empty():
while node.right is not None:
node = node.right
return node
def get_min(self, node: Node | None = None) -> Node | None:
"""
We go deep on the left branch
>>> BinarySearchTree().insert(10, 20, 30, 40, 50).get_min()
{'10': (None, {'20': (None, {'30': (None, {'40': (None, 50)})})})}
>>> BinarySearchTree().insert(-5, -1, 0, -0.3, -4.5).get_min()
{'-5': (None, {'-1': (-4.5, {'0': (-0.3, None)})})}
>>> BinarySearchTree().insert(1, 78.3, 30, 74.0, 1).get_min()
{'1': (None, {'78.3': ({'30': (1, 74.0)}, None)})}
>>> BinarySearchTree().insert(1, 783, 30, 740, 1).get_min()
{'1': (None, {'783': ({'30': (1, 740)}, None)})}
"""
if node is None:
node = self.root
if self.root is None:
return None
if not self.empty():
node = self.root
while node.left is not None:
node = node.left
return node
def remove(self, value: int) -> None:
# Look for the node with that label
node = self.search(value)
if node is None:
msg = f"Value {value} not found"
raise ValueError(msg)
if node.left is None and node.right is None: # If it has no children
self.__reassign_nodes(node, None)
elif node.left is None: # Has only right children
self.__reassign_nodes(node, node.right)
elif node.right is None: # Has only left children
self.__reassign_nodes(node, node.left)
else:
predecessor = self.get_max(
node.left
) # Gets the max value of the left branch
self.remove(predecessor.value) # type: ignore[union-attr]
node.value = (
predecessor.value # type: ignore[union-attr]
) # Assigns the value to the node to delete and keep tree structure
def preorder_traverse(self, node: Node | None) -> Iterable:
if node is not None:
yield node # Preorder Traversal
yield from self.preorder_traverse(node.left)
yield from self.preorder_traverse(node.right)
def traversal_tree(self, traversal_function=None) -> Any:
"""
This function traversal the tree.
You can pass a function to traversal the tree as needed by client code
"""
if traversal_function is None:
return self.preorder_traverse(self.root)
else:
return traversal_function(self.root)
def inorder(self, arr: list, node: Node | None) -> None:
"""Perform an inorder traversal and append values of the nodes to
a list named arr"""
if node:
self.inorder(arr, node.left)
arr.append(node.value)
self.inorder(arr, node.right)
def find_kth_smallest(self, k: int, node: Node) -> int:
"""Return the kth smallest element in a binary search tree"""
arr: list[int] = []
self.inorder(arr, node) # append all values to list using inorder traversal
return arr[k - 1]
def inorder(curr_node: Node | None) -> list[Node]:
"""
inorder (left, self, right)
"""
node_list = []
if curr_node is not None:
node_list = [*inorder(curr_node.left), curr_node, *inorder(curr_node.right)]
return node_list
def postorder(curr_node: Node | None) -> list[Node]:
"""
postOrder (left, right, self)
"""
node_list = []
if curr_node is not None:
node_list = postorder(curr_node.left) + postorder(curr_node.right) + [curr_node]
return node_list
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
================================================
FILE: data_structures/binary_tree/binary_search_tree_recursive.py
================================================
"""
This is a python3 implementation of binary search tree using recursion
To run tests:
python -m unittest binary_search_tree_recursive.py
To run an example:
python binary_search_tree_recursive.py
"""
from __future__ import annotations
import unittest
from collections.abc import Iterator
import pytest
class Node:
def __init__(self, label: int, parent: Node | None) -> None:
self.label = label
self.parent = parent
self.left: Node | None = None
self.right: Node | None = None
class BinarySearchTree:
def __init__(self) -> None:
self.root: Node | None = None
def empty(self) -> None:
"""
Empties the tree
>>> t = BinarySearchTree()
>>> assert t.root is None
>>> t.put(8)
>>> assert t.root is not None
"""
self.root = None
def is_empty(self) -> bool:
"""
Checks if the tree is empty
>>> t = BinarySearchTree()
>>> t.is_empty()
True
>>> t.put(8)
>>> t.is_empty()
False
"""
return self.root is None
def put(self, label: int) -> None:
"""
Put a new node in the tree
>>> t = BinarySearchTree()
>>> t.put(8)
>>> assert t.root.parent is None
>>> assert t.root.label == 8
>>> t.put(10)
>>> assert t.root.right.parent == t.root
>>> assert t.root.right.label == 10
>>> t.put(3)
>>> assert t.root.left.parent == t.root
>>> assert t.root.left.label == 3
"""
self.root = self._put(self.root, label)
def _put(self, node: Node | None, label: int, parent: Node | None = None) -> Node:
if node is None:
node = Node(label, parent)
elif label < node.label:
node.left = self._put(node.left, label, node)
elif label > node.label:
node.right = self._put(node.right, label, node)
else:
msg = f"Node with label {label} already exists"
raise ValueError(msg)
return node
def search(self, label: int) -> Node:
"""
Searches a node in the tree
>>> t = BinarySearchTree()
>>> t.put(8)
>>> t.put(10)
>>> node = t.search(8)
>>> assert node.label == 8
>>> node = t.search(3)
Traceback (most recent call last):
...
ValueError: Node with label 3 does not exist
"""
return self._search(self.root, label)
def _search(self, node: Node | None, label: int) -> Node:
if node is None:
msg = f"Node with label {label} does not exist"
raise ValueError(msg)
elif label < node.label:
node = self._search(node.left, label)
elif label > node.label:
node = self._search(node.right, label)
return node
def remove(self, label: int) -> None:
"""
Removes a node in the tree
>>> t = BinarySearchTree()
>>> t.put(8)
>>> t.put(10)
>>> t.remove(8)
>>> assert t.root.label == 10
>>> t.remove(3)
Traceback (most recent call last):
...
ValueError: Node with label 3 does not exist
"""
node = self.search(label)
if node.right and node.left:
lowest_node = self._get_lowest_node(node.right)
lowest_node.left = node.left
lowest_node.right = node.right
node.left.parent = lowest_node
if node.right:
node.right.parent = lowest_node
self._reassign_nodes(node, lowest_node)
elif not node.right and node.left:
self._reassign_nodes(node, node.left)
elif node.right and not node.left:
self._reassign_nodes(node, node.right)
else:
self._reassign_nodes(node, None)
def _reassign_nodes(self, node: Node, new_children: Node | None) -> None:
if new_children:
new_children.parent = node.parent
if node.parent:
if node.parent.right == node:
node.parent.right = new_children
else:
node.parent.left = new_children
else:
self.root = new_children
def _get_lowest_node(self, node: Node) -> Node:
if node.left:
lowest_node = self._get_lowest_node(node.left)
else:
lowest_node = node
self._reassign_nodes(node, node.right)
return lowest_node
def exists(self, label: int) -> bool:
"""
Checks if a node exists in the tree
>>> t = BinarySearchTree()
>>> t.put(8)
>>> t.put(10)
>>> t.exists(8)
True
>>> t.exists(3)
False
"""
try:
self.search(label)
return True
except ValueError:
return False
def get_max_label(self) -> int:
"""
Gets the max label inserted in the tree
>>> t = BinarySearchTree()
>>> t.get_max_label()
Traceback (most recent call last):
...
ValueError: Binary search tree is empty
>>> t.put(8)
>>> t.put(10)
>>> t.get_max_label()
10
"""
if self.root is None:
raise ValueError("Binary search tree is empty")
node = self.root
while node.right is not None:
node = node.right
return node.label
def get_min_label(self) -> int:
"""
Gets the min label inserted in the tree
>>> t = BinarySearchTree()
>>> t.get_min_label()
Traceback (most recent call last):
...
ValueError: Binary search tree is empty
>>> t.put(8)
>>> t.put(10)
>>> t.get_min_label()
8
"""
if self.root is None:
raise ValueError("Binary search tree is empty")
node = self.root
while node.left is not None:
node = node.left
return node.label
def inorder_traversal(self) -> Iterator[Node]:
"""
Return the inorder traversal of the tree
>>> t = BinarySearchTree()
>>> [i.label for i in t.inorder_traversal()]
[]
>>> t.put(8)
>>> t.put(10)
>>> t.put(9)
>>> [i.label for i in t.inorder_traversal()]
[8, 9, 10]
"""
return self._inorder_traversal(self.root)
def _inorder_traversal(self, node: Node | None) -> Iterator[Node]:
if node is not None:
yield from self._inorder_traversal(node.left)
yield node
yield from self._inorder_traversal(node.right)
def preorder_traversal(self) -> Iterator[Node]:
"""
Return the preorder traversal of the tree
>>> t = BinarySearchTree()
>>> [i.label for i in t.preorder_traversal()]
[]
>>> t.put(8)
>>> t.put(10)
>>> t.put(9)
>>> [i.label for i in t.preorder_traversal()]
[8, 10, 9]
"""
return self._preorder_traversal(self.root)
def _preorder_traversal(self, node: Node | None) -> Iterator[Node]:
if node is not None:
yield node
yield from self._preorder_traversal(node.left)
yield from self._preorder_traversal(node.right)
class BinarySearchTreeTest(unittest.TestCase):
@staticmethod
def _get_binary_search_tree() -> BinarySearchTree:
r"""
8
/ \
3 10
/ \ \
1 6 14
/ \ /
4 7 13
\
5
"""
t = BinarySearchTree()
t.put(8)
t.put(3)
t.put(6)
t.put(1)
t.put(10)
t.put(14)
t.put(13)
t.put(4)
t.put(7)
t.put(5)
return t
def test_put(self) -> None:
t = BinarySearchTree()
assert t.is_empty()
t.put(8)
r"""
8
"""
assert t.root is not None
assert t.root.parent is None
assert t.root.label == 8
t.put(10)
r"""
8
\
10
"""
assert t.root.right is not None
assert t.root.right.parent == t.root
assert t.root.right.label == 10
t.put(3)
r"""
8
/ \
3 10
"""
assert t.root.left is not None
assert t.root.left.parent == t.root
assert t.root.left.label == 3
t.put(6)
r"""
8
/ \
3 10
\
6
"""
assert t.root.left.right is not None
assert t.root.left.right.parent == t.root.left
assert t.root.left.right.label == 6
t.put(1)
r"""
8
/ \
3 10
/ \
1 6
"""
assert t.root.left.left is not None
assert t.root.left.left.parent == t.root.left
assert t.root.left.left.label == 1
with pytest.raises(ValueError):
t.put(1)
def test_search(self) -> None:
t = self._get_binary_search_tree()
node = t.search(6)
assert node.label == 6
node = t.search(13)
assert node.label == 13
with pytest.raises(ValueError):
t.search(2)
def test_remove(self) -> None:
t = self._get_binary_search_tree()
t.remove(13)
r"""
8
/ \
3 10
/ \ \
1 6 14
/ \
4 7
\
5
"""
assert t.root is not None
assert t.root.right is not None
assert t.root.right.right is not None
assert t.root.right.right.right is None
assert t.root.right.right.left is None
t.remove(7)
r"""
8
/ \
3 10
/ \ \
1 6 14
/
4
\
5
"""
assert t.root.left is not None
assert t.root.left.right is not None
assert t.root.left.right.left is not None
assert t.root.left.right.right is None
assert t.root.left.right.left.label == 4
t.remove(6)
r"""
8
/ \
3 10
/ \ \
1 4 14
\
5
"""
assert t.root.left.left is not None
assert t.root.left.right.right is not None
assert t.root.left.left.label == 1
assert t.root.left.right.label == 4
assert t.root.left.right.right.label == 5
assert t.root.left.right.left is None
assert t.root.left.left.parent == t.root.left
assert t.root.left.right.parent == t.root.left
t.remove(3)
r"""
8
/ \
4 10
/ \ \
1 5 14
"""
assert t.root is not None
assert t.root.left.label == 4
assert t.root.left.right.label == 5
assert t.root.left.left.label == 1
assert t.root.left.parent == t.root
assert t.root.left.left.parent == t.root.left
assert t.root.left.right.parent == t.root.left
t.remove(4)
r"""
8
/ \
5 10
/ \
1 14
"""
assert t.root.left is not None
assert t.root.left.left is not None
assert t.root.left.label == 5
assert t.root.left.right is None
assert t.root.left.left.label == 1
assert t.root.left.parent == t.root
assert t.root.left.left.parent == t.root.left
def test_remove_2(self) -> None:
t = self._get_binary_search_tree()
t.remove(3)
r"""
8
/ \
4 10
/ \ \
1 6 14
/ \ /
5 7 13
"""
assert t.root is not None
assert t.root.left is not None
assert t.root.left.left is not None
assert t.root.left.right is not None
assert t.root.left.right.left is not None
assert t.root.left.right.right is not None
assert t.root.left.label == 4
assert t.root.left.right.label == 6
assert t.root.left.left.label == 1
assert t.root.left.right.right.label == 7
assert t.root.left.right.left.label == 5
assert t.root.left.parent == t.root
assert t.root.left.right.parent == t.root.left
assert t.root.left.left.parent == t.root.left
assert t.root.left.right.left.parent == t.root.left.right
def test_empty(self) -> None:
t = self._get_binary_search_tree()
t.empty()
assert t.root is None
def test_is_empty(self) -> None:
t = self._get_binary_search_tree()
assert not t.is_empty()
t.empty()
assert t.is_empty()
def test_exists(self) -> None:
t = self._get_binary_search_tree()
assert t.exists(6)
assert not t.exists(-1)
def test_get_max_label(self) -> None:
t = self._get_binary_search_tree()
assert t.get_max_label() == 14
t.empty()
with pytest.raises(ValueError):
t.get_max_label()
def test_get_min_label(self) -> None:
t = self._get_binary_search_tree()
assert t.get_min_label() == 1
t.empty()
with pytest.raises(ValueError):
t.get_min_label()
def test_inorder_traversal(self) -> None:
t = self._get_binary_search_tree()
inorder_traversal_nodes = [i.label for i in t.inorder_traversal()]
assert inorder_traversal_nodes == [1, 3, 4, 5, 6, 7, 8, 10, 13, 14]
def test_preorder_traversal(self) -> None:
t = self._get_binary_search_tree()
preorder_traversal_nodes = [i.label for i in t.preorder_traversal()]
assert preorder_traversal_nodes == [8, 3, 1, 6, 4, 5, 7, 10, 14, 13]
def binary_search_tree_example() -> None:
r"""
Example
8
/ \
3 10
/ \ \
1 6 14
/ \ /
4 7 13
\
5
Example After Deletion
4
/ \
1 7
\
5
"""
t = BinarySearchTree()
t.put(8)
t.put(3)
t.put(6)
t.put(1)
t.put(10)
t.put(14)
t.put(13)
t.put(4)
t.put(7)
t.put(5)
print(
"""
8
/ \\
3 10
/ \\ \\
1 6 14
/ \\ /
4 7 13
\\
5
"""
)
print("Label 6 exists:", t.exists(6))
print("Label 13 exists:", t.exists(13))
print("Label -1 exists:", t.exists(-1))
print("Label 12 exists:", t.exists(12))
# Prints all the elements of the list in inorder traversal
inorder_traversal_nodes = [i.label for i in t.inorder_traversal()]
print("Inorder traversal:", inorder_traversal_nodes)
# Prints all the elements of the list in preorder traversal
preorder_traversal_nodes = [i.label for i in t.preorder_traversal()]
print("Preorder traversal:", preorder_traversal_nodes)
print("Max. label:", t.get_max_label())
print("Min. label:", t.get_min_label())
# Delete elements
print("\nDeleting elements 13, 10, 8, 3, 6, 14")
print(
"""
4
/ \\
1 7
\\
5
"""
)
t.remove(13)
t.remove(10)
t.remove(8)
t.remove(3)
t.remove(6)
t.remove(14)
# Prints all the elements of the list in inorder traversal after delete
inorder_traversal_nodes = [i.label for i in t.inorder_traversal()]
print("Inorder traversal after delete:", inorder_traversal_nodes)
# Prints all the elements of the list in preorder traversal after delete
preorder_traversal_nodes = [i.label for i in t.preorder_traversal()]
print("Preorder traversal after delete:", preorder_traversal_nodes)
print("Max. label:", t.get_max_label())
print("Min. label:", t.get_min_label())
if __name__ == "__main__":
binary_search_tree_example()
================================================
FILE: data_structures/binary_tree/binary_tree_mirror.py
================================================
"""
Problem Description:
Given a binary tree, return its mirror.
"""
def binary_tree_mirror_dict(binary_tree_mirror_dictionary: dict, root: int):
if not root or root not in binary_tree_mirror_dictionary:
return
left_child, right_child = binary_tree_mirror_dictionary[root][:2]
binary_tree_mirror_dictionary[root] = [right_child, left_child]
binary_tree_mirror_dict(binary_tree_mirror_dictionary, left_child)
binary_tree_mirror_dict(binary_tree_mirror_dictionary, right_child)
def binary_tree_mirror(binary_tree: dict, root: int = 1) -> dict:
"""
>>> binary_tree_mirror({ 1: [2,3], 2: [4,5], 3: [6,7], 7: [8,9]}, 1)
{1: [3, 2], 2: [5, 4], 3: [7, 6], 7: [9, 8]}
>>> binary_tree_mirror({ 1: [2,3], 2: [4,5], 3: [6,7], 4: [10,11]}, 1)
{1: [3, 2], 2: [5, 4], 3: [7, 6], 4: [11, 10]}
>>> binary_tree_mirror({ 1: [2,3], 2: [4,5], 3: [6,7], 4: [10,11]}, 5)
Traceback (most recent call last):
...
ValueError: root 5 is not present in the binary_tree
>>> binary_tree_mirror({}, 5)
Traceback (most recent call last):
...
ValueError: binary tree cannot be empty
"""
if not binary_tree:
raise ValueError("binary tree cannot be empty")
if root not in binary_tree:
msg = f"root {root} is not present in the binary_tree"
raise ValueError(msg)
binary_tree_mirror_dictionary = dict(binary_tree)
binary_tree_mirror_dict(binary_tree_mirror_dictionary, root)
return binary_tree_mirror_dictionary
if __name__ == "__main__":
binary_tree = {1: [2, 3], 2: [4, 5], 3: [6, 7], 7: [8, 9]}
print(f"Binary tree: {binary_tree}")
binary_tree_mirror_dictionary = binary_tree_mirror(binary_tree, 5)
print(f"Binary tree mirror: {binary_tree_mirror_dictionary}")
================================================
FILE: data_structures/binary_tree/binary_tree_node_sum.py
================================================
"""
Sum of all nodes in a binary tree.
Python implementation:
O(n) time complexity - Recurses through :meth:`depth_first_search`
with each element.
O(n) space complexity - At any point in time maximum number of stack
frames that could be in memory is `n`
"""
from __future__ import annotations
from collections.abc import Iterator
class Node:
"""
A Node has a value variable and pointers to Nodes to its left and right.
"""
def __init__(self, value: int) -> None:
self.value = value
self.left: Node | None = None
self.right: Node | None = None
class BinaryTreeNodeSum:
r"""
The below tree looks like this
10
/ \
5 -3
/ / \
12 8 0
>>> tree = Node(10)
>>> sum(BinaryTreeNodeSum(tree))
10
>>> tree.left = Node(5)
>>> sum(BinaryTreeNodeSum(tree))
15
>>> tree.right = Node(-3)
>>> sum(BinaryTreeNodeSum(tree))
12
>>> tree.left.left = Node(12)
>>> sum(BinaryTreeNodeSum(tree))
24
>>> tree.right.left = Node(8)
>>> tree.right.right = Node(0)
>>> sum(BinaryTreeNodeSum(tree))
32
"""
def __init__(self, tree: Node) -> None:
self.tree = tree
def depth_first_search(self, node: Node | None) -> int:
if node is None:
return 0
return node.value + (
self.depth_first_search(node.left) + self.depth_first_search(node.right)
)
def __iter__(self) -> Iterator[int]:
yield self.depth_first_search(self.tree)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/binary_tree_path_sum.py
================================================
"""
Given the root of a binary tree and an integer target,
find the number of paths where the sum of the values
along the path equals target.
Leetcode reference: https://leetcode.com/problems/path-sum-iii/
"""
from __future__ import annotations
class Node:
"""
A Node has value variable and pointers to Nodes to its left and right.
"""
def __init__(self, value: int) -> None:
self.value = value
self.left: Node | None = None
self.right: Node | None = None
class BinaryTreePathSum:
r"""
The below tree looks like this
10
/ \
5 -3
/ \ \
3 2 11
/ \ \
3 -2 1
>>> tree = Node(10)
>>> tree.left = Node(5)
>>> tree.right = Node(-3)
>>> tree.left.left = Node(3)
>>> tree.left.right = Node(2)
>>> tree.right.right = Node(11)
>>> tree.left.left.left = Node(3)
>>> tree.left.left.right = Node(-2)
>>> tree.left.right.right = Node(1)
>>> BinaryTreePathSum().path_sum(tree, 8)
3
>>> BinaryTreePathSum().path_sum(tree, 7)
2
>>> tree.right.right = Node(10)
>>> BinaryTreePathSum().path_sum(tree, 8)
2
>>> BinaryTreePathSum().path_sum(None, 0)
0
>>> BinaryTreePathSum().path_sum(tree, 0)
0
The second tree looks like this
0
/ \
5 5
>>> tree2 = Node(0)
>>> tree2.left = Node(5)
>>> tree2.right = Node(5)
>>> BinaryTreePathSum().path_sum(tree2, 5)
4
>>> BinaryTreePathSum().path_sum(tree2, -1)
0
>>> BinaryTreePathSum().path_sum(tree2, 0)
1
"""
target: int
def __init__(self) -> None:
self.paths = 0
def depth_first_search(self, node: Node | None, path_sum: int) -> None:
if node is None:
return
if path_sum == self.target:
self.paths += 1
if node.left:
self.depth_first_search(node.left, path_sum + node.left.value)
if node.right:
self.depth_first_search(node.right, path_sum + node.right.value)
def path_sum(self, node: Node | None, target: int | None = None) -> int:
if node is None:
return 0
if target is not None:
self.target = target
self.depth_first_search(node, node.value)
self.path_sum(node.left)
self.path_sum(node.right)
return self.paths
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/binary_tree_traversals.py
================================================
from __future__ import annotations
from collections import deque
from collections.abc import Generator
from dataclasses import dataclass
# https://en.wikipedia.org/wiki/Tree_traversal
@dataclass
class Node:
data: int
left: Node | None = None
right: Node | None = None
def make_tree() -> Node | None:
r"""
The below tree
1
/ \
2 3
/ \
4 5
"""
tree = Node(1)
tree.left = Node(2)
tree.right = Node(3)
tree.left.left = Node(4)
tree.left.right = Node(5)
return tree
def preorder(root: Node | None) -> Generator[int]:
"""
Pre-order traversal visits root node, left subtree, right subtree.
>>> list(preorder(make_tree()))
[1, 2, 4, 5, 3]
"""
if not root:
return
yield root.data
yield from preorder(root.left)
yield from preorder(root.right)
def postorder(root: Node | None) -> Generator[int]:
"""
Post-order traversal visits left subtree, right subtree, root node.
>>> list(postorder(make_tree()))
[4, 5, 2, 3, 1]
"""
if not root:
return
yield from postorder(root.left)
yield from postorder(root.right)
yield root.data
def inorder(root: Node | None) -> Generator[int]:
"""
In-order traversal visits left subtree, root node, right subtree.
>>> list(inorder(make_tree()))
[4, 2, 5, 1, 3]
"""
if not root:
return
yield from inorder(root.left)
yield root.data
yield from inorder(root.right)
def reverse_inorder(root: Node | None) -> Generator[int]:
"""
Reverse in-order traversal visits right subtree, root node, left subtree.
>>> list(reverse_inorder(make_tree()))
[3, 1, 5, 2, 4]
"""
if not root:
return
yield from reverse_inorder(root.right)
yield root.data
yield from reverse_inorder(root.left)
def height(root: Node | None) -> int:
"""
Recursive function for calculating the height of the binary tree.
>>> height(None)
0
>>> height(make_tree())
3
"""
return (max(height(root.left), height(root.right)) + 1) if root else 0
def level_order(root: Node | None) -> Generator[int]:
"""
Returns a list of nodes value from a whole binary tree in Level Order Traverse.
Level Order traverse: Visit nodes of the tree level-by-level.
>>> list(level_order(make_tree()))
[1, 2, 3, 4, 5]
"""
if root is None:
return
process_queue = deque([root])
while process_queue:
node = process_queue.popleft()
yield node.data
if node.left:
process_queue.append(node.left)
if node.right:
process_queue.append(node.right)
def get_nodes_from_left_to_right(root: Node | None, level: int) -> Generator[int]:
"""
Returns a list of nodes value from a particular level:
Left to right direction of the binary tree.
>>> list(get_nodes_from_left_to_right(make_tree(), 1))
[1]
>>> list(get_nodes_from_left_to_right(make_tree(), 2))
[2, 3]
"""
def populate_output(root: Node | None, level: int) -> Generator[int]:
if not root:
return
if level == 1:
yield root.data
elif level > 1:
yield from populate_output(root.left, level - 1)
yield from populate_output(root.right, level - 1)
yield from populate_output(root, level)
def get_nodes_from_right_to_left(root: Node | None, level: int) -> Generator[int]:
"""
Returns a list of nodes value from a particular level:
Right to left direction of the binary tree.
>>> list(get_nodes_from_right_to_left(make_tree(), 1))
[1]
>>> list(get_nodes_from_right_to_left(make_tree(), 2))
[3, 2]
"""
def populate_output(root: Node | None, level: int) -> Generator[int]:
if not root:
return
if level == 1:
yield root.data
elif level > 1:
yield from populate_output(root.right, level - 1)
yield from populate_output(root.left, level - 1)
yield from populate_output(root, level)
def zigzag(root: Node | None) -> Generator[int]:
"""
ZigZag traverse:
Returns a list of nodes value from left to right and right to left, alternatively.
>>> list(zigzag(make_tree()))
[1, 3, 2, 4, 5]
"""
if root is None:
return
flag = 0
height_tree = height(root)
for h in range(1, height_tree + 1):
if not flag:
yield from get_nodes_from_left_to_right(root, h)
flag = 1
else:
yield from get_nodes_from_right_to_left(root, h)
flag = 0
def main() -> None: # Main function for testing.
# Create binary tree.
root = make_tree()
# All Traversals of the binary are as follows:
print(f"In-order Traversal: {list(inorder(root))}")
print(f"Reverse In-order Traversal: {list(reverse_inorder(root))}")
print(f"Pre-order Traversal: {list(preorder(root))}")
print(f"Post-order Traversal: {list(postorder(root))}", "\n")
print(f"Height of Tree: {height(root)}", "\n")
print("Complete Level Order Traversal: ")
print(f"{list(level_order(root))} \n")
print("Level-wise order Traversal: ")
for level in range(1, height(root) + 1):
print(f"Level {level}:", list(get_nodes_from_left_to_right(root, level=level)))
print("\nZigZag order Traversal: ")
print(f"{list(zigzag(root))}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: data_structures/binary_tree/diameter_of_binary_tree.py
================================================
"""
The diameter/width of a tree is defined as the number of nodes on the longest path
between two end nodes.
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class Node:
data: int
left: Node | None = None
right: Node | None = None
def depth(self) -> int:
"""
>>> root = Node(1)
>>> root.depth()
1
>>> root.left = Node(2)
>>> root.depth()
2
>>> root.left.depth()
1
>>> root.right = Node(3)
>>> root.depth()
2
"""
left_depth = self.left.depth() if self.left else 0
right_depth = self.right.depth() if self.right else 0
return max(left_depth, right_depth) + 1
def diameter(self) -> int:
"""
>>> root = Node(1)
>>> root.diameter()
1
>>> root.left = Node(2)
>>> root.diameter()
2
>>> root.left.diameter()
1
>>> root.right = Node(3)
>>> root.diameter()
3
"""
left_depth = self.left.depth() if self.left else 0
right_depth = self.right.depth() if self.right else 0
return left_depth + right_depth + 1
if __name__ == "__main__":
from doctest import testmod
testmod()
root = Node(1)
root.left = Node(2)
root.right = Node(3)
root.left.left = Node(4)
root.left.right = Node(5)
r"""
Constructed binary tree is
1
/ \
2 3
/ \
4 5
"""
print(f"{root.diameter() = }") # 4
print(f"{root.left.diameter() = }") # 3
print(f"{root.right.diameter() = }") # 1
================================================
FILE: data_structures/binary_tree/diff_views_of_binary_tree.py
================================================
r"""
Problem: Given root of a binary tree, return the:
1. binary-tree-right-side-view
2. binary-tree-left-side-view
3. binary-tree-top-side-view
4. binary-tree-bottom-side-view
"""
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
@dataclass
class TreeNode:
val: int
left: TreeNode | None = None
right: TreeNode | None = None
def make_tree() -> TreeNode:
"""
>>> make_tree().val
3
"""
return TreeNode(3, TreeNode(9), TreeNode(20, TreeNode(15), TreeNode(7)))
def binary_tree_right_side_view(root: TreeNode) -> list[int]:
r"""
Function returns the right side view of binary tree.
3 <- 3
/ \
9 20 <- 20
/ \
15 7 <- 7
>>> binary_tree_right_side_view(make_tree())
[3, 20, 7]
>>> binary_tree_right_side_view(None)
[]
"""
def depth_first_search(
root: TreeNode | None, depth: int, right_view: list[int]
) -> None:
"""
A depth first search preorder traversal to append the values at
right side of tree.
"""
if not root:
return
if depth == len(right_view):
right_view.append(root.val)
depth_first_search(root.right, depth + 1, right_view)
depth_first_search(root.left, depth + 1, right_view)
right_view: list = []
if not root:
return right_view
depth_first_search(root, 0, right_view)
return right_view
def binary_tree_left_side_view(root: TreeNode) -> list[int]:
r"""
Function returns the left side view of binary tree.
3 -> 3
/ \
9 -> 9 20
/ \
15 -> 15 7
>>> binary_tree_left_side_view(make_tree())
[3, 9, 15]
>>> binary_tree_left_side_view(None)
[]
"""
def depth_first_search(
root: TreeNode | None, depth: int, left_view: list[int]
) -> None:
"""
A depth first search preorder traversal to append the values
at left side of tree.
"""
if not root:
return
if depth == len(left_view):
left_view.append(root.val)
depth_first_search(root.left, depth + 1, left_view)
depth_first_search(root.right, depth + 1, left_view)
left_view: list = []
if not root:
return left_view
depth_first_search(root, 0, left_view)
return left_view
def binary_tree_top_side_view(root: TreeNode) -> list[int]:
r"""
Function returns the top side view of binary tree.
9 3 20 7
⬇ ⬇ ⬇ ⬇
3
/ \
9 20
/ \
15 7
>>> binary_tree_top_side_view(make_tree())
[9, 3, 20, 7]
>>> binary_tree_top_side_view(None)
[]
"""
def breadth_first_search(root: TreeNode, top_view: list[int]) -> None:
"""
A breadth first search traversal with defaultdict ds to append
the values of tree from top view
"""
queue = [(root, 0)]
lookup = defaultdict(list)
while queue:
first = queue.pop(0)
node, hd = first
lookup[hd].append(node.val)
if node.left:
queue.append((node.left, hd - 1))
if node.right:
queue.append((node.right, hd + 1))
for pair in sorted(lookup.items(), key=lambda each: each[0]):
top_view.append(pair[1][0])
top_view: list = []
if not root:
return top_view
breadth_first_search(root, top_view)
return top_view
def binary_tree_bottom_side_view(root: TreeNode) -> list[int]:
r"""
Function returns the bottom side view of binary tree
3
/ \
9 20
/ \
15 7
↑ ↑ ↑ ↑
9 15 20 7
>>> binary_tree_bottom_side_view(make_tree())
[9, 15, 20, 7]
>>> binary_tree_bottom_side_view(None)
[]
"""
from collections import defaultdict
def breadth_first_search(root: TreeNode, bottom_view: list[int]) -> None:
"""
A breadth first search traversal with defaultdict ds to append
the values of tree from bottom view
"""
queue = [(root, 0)]
lookup = defaultdict(list)
while queue:
first = queue.pop(0)
node, hd = first
lookup[hd].append(node.val)
if node.left:
queue.append((node.left, hd - 1))
if node.right:
queue.append((node.right, hd + 1))
for pair in sorted(lookup.items(), key=lambda each: each[0]):
bottom_view.append(pair[1][-1])
bottom_view: list = []
if not root:
return bottom_view
breadth_first_search(root, bottom_view)
return bottom_view
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/distribute_coins.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 7, 2022
Task:
You are given a tree root of a binary tree with n nodes, where each node has
node.data coins. There are exactly n coins in whole tree.
In one move, we may choose two adjacent nodes and move one coin from one node
to another. A move may be from parent to child, or from child to parent.
Return the minimum number of moves required to make every node have exactly one coin.
Example 1:
3
/ \
0 0
Result: 2
Example 2:
0
/ \
3 0
Result 3
leetcode: https://leetcode.com/problems/distribute-coins-in-binary-tree/
Implementation notes:
User depth-first search approach.
Let n is the number of nodes in tree
Runtime: O(n)
Space: O(1)
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import NamedTuple
@dataclass
class TreeNode:
data: int
left: TreeNode | None = None
right: TreeNode | None = None
class CoinsDistribResult(NamedTuple):
moves: int
excess: int
def distribute_coins(root: TreeNode | None) -> int:
"""
>>> distribute_coins(TreeNode(3, TreeNode(0), TreeNode(0)))
2
>>> distribute_coins(TreeNode(0, TreeNode(3), TreeNode(0)))
3
>>> distribute_coins(TreeNode(0, TreeNode(0), TreeNode(3)))
3
>>> distribute_coins(None)
0
>>> distribute_coins(TreeNode(0, TreeNode(0), TreeNode(0)))
Traceback (most recent call last):
...
ValueError: The nodes number should be same as the number of coins
>>> distribute_coins(TreeNode(0, TreeNode(1), TreeNode(1)))
Traceback (most recent call last):
...
ValueError: The nodes number should be same as the number of coins
"""
if root is None:
return 0
# Validation
def count_nodes(node: TreeNode | None) -> int:
"""
>>> count_nodes(None)
0
"""
if node is None:
return 0
return count_nodes(node.left) + count_nodes(node.right) + 1
def count_coins(node: TreeNode | None) -> int:
"""
>>> count_coins(None)
0
"""
if node is None:
return 0
return count_coins(node.left) + count_coins(node.right) + node.data
if count_nodes(root) != count_coins(root):
raise ValueError("The nodes number should be same as the number of coins")
# Main calculation
def get_distrib(node: TreeNode | None) -> CoinsDistribResult:
"""
>>> get_distrib(None)
namedtuple("CoinsDistribResult", "0 2")
"""
if node is None:
return CoinsDistribResult(0, 1)
left_distrib_moves, left_distrib_excess = get_distrib(node.left)
right_distrib_moves, right_distrib_excess = get_distrib(node.right)
coins_to_left = 1 - left_distrib_excess
coins_to_right = 1 - right_distrib_excess
result_moves = (
left_distrib_moves
+ right_distrib_moves
+ abs(coins_to_left)
+ abs(coins_to_right)
)
result_excess = node.data - coins_to_left - coins_to_right
return CoinsDistribResult(result_moves, result_excess)
return get_distrib(root)[0]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/fenwick_tree.py
================================================
from copy import deepcopy
class FenwickTree:
"""
Fenwick Tree
More info: https://en.wikipedia.org/wiki/Fenwick_tree
"""
def __init__(self, arr: list[int] | None = None, size: int | None = None) -> None:
"""
Constructor for the Fenwick tree
Parameters:
arr (list): list of elements to initialize the tree with (optional)
size (int): size of the Fenwick tree (if arr is None)
"""
if arr is None and size is not None:
self.size = size
self.tree = [0] * size
elif arr is not None:
self.init(arr)
else:
raise ValueError("Either arr or size must be specified")
def init(self, arr: list[int]) -> None:
"""
Initialize the Fenwick tree with arr in O(N)
Parameters:
arr (list): list of elements to initialize the tree with
Returns:
None
>>> a = [1, 2, 3, 4, 5]
>>> f1 = FenwickTree(a)
>>> f2 = FenwickTree(size=len(a))
>>> for index, value in enumerate(a):
... f2.add(index, value)
>>> f1.tree == f2.tree
True
"""
self.size = len(arr)
self.tree = deepcopy(arr)
for i in range(1, self.size):
j = self.next_(i)
if j < self.size:
self.tree[j] += self.tree[i]
def get_array(self) -> list[int]:
"""
Get the Normal Array of the Fenwick tree in O(N)
Returns:
list: Normal Array of the Fenwick tree
>>> a = [i for i in range(128)]
>>> f = FenwickTree(a)
>>> f.get_array() == a
True
"""
arr = self.tree[:]
for i in range(self.size - 1, 0, -1):
j = self.next_(i)
if j < self.size:
arr[j] -= arr[i]
return arr
@staticmethod
def next_(index: int) -> int:
return index + (index & (-index))
@staticmethod
def prev(index: int) -> int:
return index - (index & (-index))
def add(self, index: int, value: int) -> None:
"""
Add a value to index in O(lg N)
Parameters:
index (int): index to add value to
value (int): value to add to index
Returns:
None
>>> f = FenwickTree([1, 2, 3, 4, 5])
>>> f.add(0, 1)
>>> f.add(1, 2)
>>> f.add(2, 3)
>>> f.add(3, 4)
>>> f.add(4, 5)
>>> f.get_array()
[2, 4, 6, 8, 10]
"""
if index == 0:
self.tree[0] += value
return
while index < self.size:
self.tree[index] += value
index = self.next_(index)
def update(self, index: int, value: int) -> None:
"""
Set the value of index in O(lg N)
Parameters:
index (int): index to set value to
value (int): value to set in index
Returns:
None
>>> f = FenwickTree([5, 4, 3, 2, 1])
>>> f.update(0, 1)
>>> f.update(1, 2)
>>> f.update(2, 3)
>>> f.update(3, 4)
>>> f.update(4, 5)
>>> f.get_array()
[1, 2, 3, 4, 5]
"""
self.add(index, value - self.get(index))
def prefix(self, right: int) -> int:
"""
Prefix sum of all elements in [0, right) in O(lg N)
Parameters:
right (int): right bound of the query (exclusive)
Returns:
int: sum of all elements in [0, right)
>>> a = [i for i in range(128)]
>>> f = FenwickTree(a)
>>> res = True
>>> for i in range(len(a)):
... res = res and f.prefix(i) == sum(a[:i])
>>> res
True
"""
if right == 0:
return 0
result = self.tree[0]
right -= 1 # make right inclusive
while right > 0:
result += self.tree[right]
right = self.prev(right)
return result
def query(self, left: int, right: int) -> int:
"""
Query the sum of all elements in [left, right) in O(lg N)
Parameters:
left (int): left bound of the query (inclusive)
right (int): right bound of the query (exclusive)
Returns:
int: sum of all elements in [left, right)
>>> a = [i for i in range(128)]
>>> f = FenwickTree(a)
>>> res = True
>>> for i in range(len(a)):
... for j in range(i + 1, len(a)):
... res = res and f.query(i, j) == sum(a[i:j])
>>> res
True
"""
return self.prefix(right) - self.prefix(left)
def get(self, index: int) -> int:
"""
Get value at index in O(lg N)
Parameters:
index (int): index to get the value
Returns:
int: Value of element at index
>>> a = [i for i in range(128)]
>>> f = FenwickTree(a)
>>> res = True
>>> for i in range(len(a)):
... res = res and f.get(i) == a[i]
>>> res
True
"""
return self.query(index, index + 1)
def rank_query(self, value: int) -> int:
"""
Find the largest index with prefix(i) <= value in O(lg N)
NOTE: Requires that all values are non-negative!
Parameters:
value (int): value to find the largest index of
Returns:
-1: if value is smaller than all elements in prefix sum
int: largest index with prefix(i) <= value
>>> f = FenwickTree([1, 2, 0, 3, 0, 5])
>>> f.rank_query(0)
-1
>>> f.rank_query(2)
0
>>> f.rank_query(1)
0
>>> f.rank_query(3)
2
>>> f.rank_query(5)
2
>>> f.rank_query(6)
4
>>> f.rank_query(11)
5
"""
value -= self.tree[0]
if value < 0:
return -1
j = 1 # Largest power of 2 <= size
while j * 2 < self.size:
j *= 2
i = 0
while j > 0:
if i + j < self.size and self.tree[i + j] <= value:
value -= self.tree[i + j]
i += j
j //= 2
return i
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/flatten_binarytree_to_linkedlist.py
================================================
"""
Binary Tree Flattening Algorithm
This code defines an algorithm to flatten a binary tree into a linked list
represented using the right pointers of the tree nodes. It uses in-place
flattening and demonstrates the flattening process along with a display
function to visualize the flattened linked list.
https://www.geeksforgeeks.org/flatten-a-binary-tree-into-linked-list
Author: Arunkumar A
Date: 04/09/2023
"""
from __future__ import annotations
class TreeNode:
"""
A TreeNode has data variable and pointers to TreeNode objects
for its left and right children.
"""
def __init__(self, data: int) -> None:
self.data = data
self.left: TreeNode | None = None
self.right: TreeNode | None = None
def build_tree() -> TreeNode:
"""
Build and return a sample binary tree.
Returns:
TreeNode: The root of the binary tree.
Examples:
>>> root = build_tree()
>>> root.data
1
>>> root.left.data
2
>>> root.right.data
5
>>> root.left.left.data
3
>>> root.left.right.data
4
>>> root.right.right.data
6
"""
root = TreeNode(1)
root.left = TreeNode(2)
root.right = TreeNode(5)
root.left.left = TreeNode(3)
root.left.right = TreeNode(4)
root.right.right = TreeNode(6)
return root
def flatten(root: TreeNode | None) -> None:
"""
Flatten a binary tree into a linked list in-place, where the linked list is
represented using the right pointers of the tree nodes.
Args:
root (TreeNode): The root of the binary tree to be flattened.
Examples:
>>> root = TreeNode(1)
>>> root.left = TreeNode(2)
>>> root.right = TreeNode(5)
>>> root.left.left = TreeNode(3)
>>> root.left.right = TreeNode(4)
>>> root.right.right = TreeNode(6)
>>> flatten(root)
>>> root.data
1
>>> root.right.right is None
False
>>> root.right.right = TreeNode(3)
>>> root.right.right.right is None
True
"""
if not root:
return
# Flatten the left subtree
flatten(root.left)
# Save the right subtree
right_subtree = root.right
# Make the left subtree the new right subtree
root.right = root.left
root.left = None
# Find the end of the new right subtree
current = root
while current.right:
current = current.right
# Append the original right subtree to the end
current.right = right_subtree
# Flatten the updated right subtree
flatten(right_subtree)
def display_linked_list(root: TreeNode | None) -> None:
"""
Display the flattened linked list.
Args:
root (TreeNode | None): The root of the flattened linked list.
Examples:
>>> root = TreeNode(1)
>>> root.right = TreeNode(2)
>>> root.right.right = TreeNode(3)
>>> display_linked_list(root)
1 2 3
>>> root = None
>>> display_linked_list(root)
"""
current = root
while current:
if current.right is None:
print(current.data, end="")
break
print(current.data, end=" ")
current = current.right
if __name__ == "__main__":
print("Flattened Linked List:")
root = build_tree()
flatten(root)
display_linked_list(root)
================================================
FILE: data_structures/binary_tree/floor_and_ceiling.py
================================================
"""
In a binary search tree (BST):
* The floor of key 'k' is the maximum value that is smaller than or equal to 'k'.
* The ceiling of key 'k' is the minimum value that is greater than or equal to 'k'.
Reference:
https://bit.ly/46uB0a2
Author : Arunkumar
Date : 14th October 2023
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Node:
key: int
left: Node | None = None
right: Node | None = None
def __iter__(self) -> Iterator[int]:
if self.left:
yield from self.left
yield self.key
if self.right:
yield from self.right
def __len__(self) -> int:
return sum(1 for _ in self)
def floor_ceiling(root: Node | None, key: int) -> tuple[int | None, int | None]:
"""
Find the floor and ceiling values for a given key in a Binary Search Tree (BST).
Args:
root: The root of the binary search tree.
key: The key for which to find the floor and ceiling.
Returns:
A tuple containing the floor and ceiling values, respectively.
Examples:
>>> root = Node(10)
>>> root.left = Node(5)
>>> root.right = Node(20)
>>> root.left.left = Node(3)
>>> root.left.right = Node(7)
>>> root.right.left = Node(15)
>>> root.right.right = Node(25)
>>> tuple(root)
(3, 5, 7, 10, 15, 20, 25)
>>> floor_ceiling(root, 8)
(7, 10)
>>> floor_ceiling(root, 14)
(10, 15)
>>> floor_ceiling(root, -1)
(None, 3)
>>> floor_ceiling(root, 30)
(25, None)
"""
floor_val = None
ceiling_val = None
while root:
if root.key == key:
floor_val = root.key
ceiling_val = root.key
break
if key < root.key:
ceiling_val = root.key
root = root.left
else:
floor_val = root.key
root = root.right
return floor_val, ceiling_val
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/inorder_tree_traversal_2022.py
================================================
"""
Illustrate how to implement inorder traversal in binary search tree.
Author: Gurneet Singh
https://www.geeksforgeeks.org/tree-traversals-inorder-preorder-and-postorder/
"""
class BinaryTreeNode:
"""Defining the structure of BinaryTreeNode"""
def __init__(self, data: int) -> None:
self.data = data
self.left_child: BinaryTreeNode | None = None
self.right_child: BinaryTreeNode | None = None
def insert(node: BinaryTreeNode | None, new_value: int) -> BinaryTreeNode | None:
"""
If the binary search tree is empty, make a new node and declare it as root.
>>> node_a = BinaryTreeNode(12345)
>>> node_b = insert(node_a, 67890)
>>> node_a.left_child == node_b.left_child
True
>>> node_a.right_child == node_b.right_child
True
>>> node_a.data == node_b.data
True
"""
if node is None:
node = BinaryTreeNode(new_value)
return node
# binary search tree is not empty,
# so we will insert it into the tree
# if new_value is less than value of data in node,
# add it to left subtree and proceed recursively
if new_value < node.data:
node.left_child = insert(node.left_child, new_value)
else:
# if new_value is greater than value of data in node,
# add it to right subtree and proceed recursively
node.right_child = insert(node.right_child, new_value)
return node
def inorder(node: None | BinaryTreeNode) -> list[int]: # if node is None,return
"""
>>> inorder(make_tree())
[6, 10, 14, 15, 20, 25, 60]
"""
if node:
inorder_array = inorder(node.left_child)
inorder_array = [*inorder_array, node.data]
inorder_array = inorder_array + inorder(node.right_child)
else:
inorder_array = []
return inorder_array
def make_tree() -> BinaryTreeNode | None:
root = insert(None, 15)
insert(root, 10)
insert(root, 25)
insert(root, 6)
insert(root, 14)
insert(root, 20)
insert(root, 60)
return root
def main() -> None:
# main function
root = make_tree()
print("Printing values of binary search tree in Inorder Traversal.")
inorder(root)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: data_structures/binary_tree/is_sorted.py
================================================
"""
Given the root of a binary tree, determine if it is a valid binary search tree (BST).
A valid binary search tree is defined as follows:
- The left subtree of a node contains only nodes with keys less than the node's key.
- The right subtree of a node contains only nodes with keys greater than the node's key.
- Both the left and right subtrees must also be binary search trees.
In effect, a binary tree is a valid BST if its nodes are sorted in ascending order.
leetcode: https://leetcode.com/problems/validate-binary-search-tree/
If n is the number of nodes in the tree then:
Runtime: O(n)
Space: O(1)
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Node:
data: float
left: Node | None = None
right: Node | None = None
def __iter__(self) -> Iterator[float]:
"""
>>> root = Node(data=2.1)
>>> list(root)
[2.1]
>>> root.left=Node(data=2.0)
>>> list(root)
[2.0, 2.1]
>>> root.right=Node(data=2.2)
>>> list(root)
[2.0, 2.1, 2.2]
"""
if self.left:
yield from self.left
yield self.data
if self.right:
yield from self.right
@property
def is_sorted(self) -> bool:
"""
>>> Node(data='abc').is_sorted
True
>>> Node(data=2,
... left=Node(data=1.999),
... right=Node(data=3)).is_sorted
True
>>> Node(data=0,
... left=Node(data=0),
... right=Node(data=0)).is_sorted
True
>>> Node(data=0,
... left=Node(data=-11),
... right=Node(data=3)).is_sorted
True
>>> Node(data=5,
... left=Node(data=1),
... right=Node(data=4, left=Node(data=3))).is_sorted
False
>>> Node(data='a',
... left=Node(data=1),
... right=Node(data=4, left=Node(data=3))).is_sorted
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
>>> Node(data=2,
... left=Node([]),
... right=Node(data=4, left=Node(data=3))).is_sorted
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'int' and 'list'
"""
if self.left and (self.data < self.left.data or not self.left.is_sorted):
return False
return not (
self.right and (self.data > self.right.data or not self.right.is_sorted)
)
if __name__ == "__main__":
import doctest
doctest.testmod()
tree = Node(data=2.1, left=Node(data=2.0), right=Node(data=2.2))
print(f"Tree {list(tree)} is sorted: {tree.is_sorted = }.")
assert tree.right
tree.right.data = 2.0
print(f"Tree {list(tree)} is sorted: {tree.is_sorted = }.")
tree.right.data = 2.1
print(f"Tree {list(tree)} is sorted: {tree.is_sorted = }.")
================================================
FILE: data_structures/binary_tree/is_sum_tree.py
================================================
"""
Is a binary tree a sum tree where the value of every non-leaf node is equal to the sum
of the values of its left and right subtrees?
https://www.geeksforgeeks.org/check-if-a-given-binary-tree-is-sumtree
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Node:
data: int
left: Node | None = None
right: Node | None = None
def __iter__(self) -> Iterator[int]:
"""
>>> root = Node(2)
>>> list(root)
[2]
>>> root.left = Node(1)
>>> tuple(root)
(1, 2)
"""
if self.left:
yield from self.left
yield self.data
if self.right:
yield from self.right
def __len__(self) -> int:
"""
>>> root = Node(2)
>>> len(root)
1
>>> root.left = Node(1)
>>> len(root)
2
"""
return sum(1 for _ in self)
@property
def is_sum_node(self) -> bool:
"""
>>> root = Node(3)
>>> root.is_sum_node
True
>>> root.left = Node(1)
>>> root.is_sum_node
False
>>> root.right = Node(2)
>>> root.is_sum_node
True
"""
if not self.left and not self.right:
return True # leaf nodes are considered sum nodes
left_sum = sum(self.left) if self.left else 0
right_sum = sum(self.right) if self.right else 0
return all(
(
self.data == left_sum + right_sum,
self.left.is_sum_node if self.left else True,
self.right.is_sum_node if self.right else True,
)
)
@dataclass
class BinaryTree:
root: Node
def __iter__(self) -> Iterator[int]:
"""
>>> list(BinaryTree.build_a_tree())
[1, 2, 7, 11, 15, 29, 35, 40]
"""
return iter(self.root)
def __len__(self) -> int:
"""
>>> len(BinaryTree.build_a_tree())
8
"""
return len(self.root)
def __str__(self) -> str:
"""
Returns a string representation of the inorder traversal of the binary tree.
>>> str(list(BinaryTree.build_a_tree()))
'[1, 2, 7, 11, 15, 29, 35, 40]'
"""
return str(list(self))
@property
def is_sum_tree(self) -> bool:
"""
>>> BinaryTree.build_a_tree().is_sum_tree
False
>>> BinaryTree.build_a_sum_tree().is_sum_tree
True
"""
return self.root.is_sum_node
@classmethod
def build_a_tree(cls) -> BinaryTree:
r"""
Create a binary tree with the specified structure:
11
/ \
2 29
/ \ / \
1 7 15 40
\
35
>>> list(BinaryTree.build_a_tree())
[1, 2, 7, 11, 15, 29, 35, 40]
"""
tree = BinaryTree(Node(11))
root = tree.root
root.left = Node(2)
root.right = Node(29)
root.left.left = Node(1)
root.left.right = Node(7)
root.right.left = Node(15)
root.right.right = Node(40)
root.right.right.left = Node(35)
return tree
@classmethod
def build_a_sum_tree(cls) -> BinaryTree:
r"""
Create a binary tree with the specified structure:
26
/ \
10 3
/ \ \
4 6 3
>>> list(BinaryTree.build_a_sum_tree())
[4, 10, 6, 26, 3, 3]
"""
tree = BinaryTree(Node(26))
root = tree.root
root.left = Node(10)
root.right = Node(3)
root.left.left = Node(4)
root.left.right = Node(6)
root.right.right = Node(3)
return tree
if __name__ == "__main__":
from doctest import testmod
testmod()
tree = BinaryTree.build_a_tree()
print(f"{tree} has {len(tree)} nodes and {tree.is_sum_tree = }.")
tree = BinaryTree.build_a_sum_tree()
print(f"{tree} has {len(tree)} nodes and {tree.is_sum_tree = }.")
================================================
FILE: data_structures/binary_tree/lazy_segment_tree.py
================================================
from __future__ import annotations
import math
class SegmentTree:
def __init__(self, size: int) -> None:
self.size = size
# approximate the overall size of segment tree with given value
self.segment_tree = [0 for i in range(4 * size)]
# create array to store lazy update
self.lazy = [0 for i in range(4 * size)]
self.flag = [0 for i in range(4 * size)] # flag for lazy update
def left(self, idx: int) -> int:
"""
>>> segment_tree = SegmentTree(15)
>>> segment_tree.left(1)
2
>>> segment_tree.left(2)
4
>>> segment_tree.left(12)
24
"""
return idx * 2
def right(self, idx: int) -> int:
"""
>>> segment_tree = SegmentTree(15)
>>> segment_tree.right(1)
3
>>> segment_tree.right(2)
5
>>> segment_tree.right(12)
25
"""
return idx * 2 + 1
def build(
self, idx: int, left_element: int, right_element: int, a: list[int]
) -> None:
if left_element == right_element:
self.segment_tree[idx] = a[left_element - 1]
else:
mid = (left_element + right_element) // 2
self.build(self.left(idx), left_element, mid, a)
self.build(self.right(idx), mid + 1, right_element, a)
self.segment_tree[idx] = max(
self.segment_tree[self.left(idx)], self.segment_tree[self.right(idx)]
)
def update(
self, idx: int, left_element: int, right_element: int, a: int, b: int, val: int
) -> bool:
"""
update with O(lg n) (Normal segment tree without lazy update will take O(nlg n)
for each update)
update(1, 1, size, a, b, v) for update val v to [a,b]
"""
if self.flag[idx] is True:
self.segment_tree[idx] = self.lazy[idx]
self.flag[idx] = False
if left_element != right_element:
self.lazy[self.left(idx)] = self.lazy[idx]
self.lazy[self.right(idx)] = self.lazy[idx]
self.flag[self.left(idx)] = True
self.flag[self.right(idx)] = True
if right_element < a or left_element > b:
return True
if left_element >= a and right_element <= b:
self.segment_tree[idx] = val
if left_element != right_element:
self.lazy[self.left(idx)] = val
self.lazy[self.right(idx)] = val
self.flag[self.left(idx)] = True
self.flag[self.right(idx)] = True
return True
mid = (left_element + right_element) // 2
self.update(self.left(idx), left_element, mid, a, b, val)
self.update(self.right(idx), mid + 1, right_element, a, b, val)
self.segment_tree[idx] = max(
self.segment_tree[self.left(idx)], self.segment_tree[self.right(idx)]
)
return True
# query with O(lg n)
def query(
self, idx: int, left_element: int, right_element: int, a: int, b: int
) -> int | float:
"""
query(1, 1, size, a, b) for query max of [a,b]
>>> A = [1, 2, -4, 7, 3, -5, 6, 11, -20, 9, 14, 15, 5, 2, -8]
>>> segment_tree = SegmentTree(15)
>>> segment_tree.build(1, 1, 15, A)
>>> segment_tree.query(1, 1, 15, 4, 6)
7
>>> segment_tree.query(1, 1, 15, 7, 11)
14
>>> segment_tree.query(1, 1, 15, 7, 12)
15
"""
if self.flag[idx] is True:
self.segment_tree[idx] = self.lazy[idx]
self.flag[idx] = False
if left_element != right_element:
self.lazy[self.left(idx)] = self.lazy[idx]
self.lazy[self.right(idx)] = self.lazy[idx]
self.flag[self.left(idx)] = True
self.flag[self.right(idx)] = True
if right_element < a or left_element > b:
return -math.inf
if left_element >= a and right_element <= b:
return self.segment_tree[idx]
mid = (left_element + right_element) // 2
q1 = self.query(self.left(idx), left_element, mid, a, b)
q2 = self.query(self.right(idx), mid + 1, right_element, a, b)
return max(q1, q2)
def __str__(self) -> str:
return str([self.query(1, 1, self.size, i, i) for i in range(1, self.size + 1)])
if __name__ == "__main__":
A = [1, 2, -4, 7, 3, -5, 6, 11, -20, 9, 14, 15, 5, 2, -8]
size = 15
segt = SegmentTree(size)
segt.build(1, 1, size, A)
print(segt.query(1, 1, size, 4, 6))
print(segt.query(1, 1, size, 7, 11))
print(segt.query(1, 1, size, 7, 12))
segt.update(1, 1, size, 1, 3, 111)
print(segt.query(1, 1, size, 1, 15))
segt.update(1, 1, size, 7, 8, 235)
print(segt)
================================================
FILE: data_structures/binary_tree/lowest_common_ancestor.py
================================================
# https://en.wikipedia.org/wiki/Lowest_common_ancestor
# https://en.wikipedia.org/wiki/Breadth-first_search
from __future__ import annotations
from queue import Queue
def swap(a: int, b: int) -> tuple[int, int]:
"""
Return a tuple (b, a) when given two integers a and b
>>> swap(2,3)
(3, 2)
>>> swap(3,4)
(4, 3)
>>> swap(67, 12)
(12, 67)
>>> swap(3,-4)
(-4, 3)
"""
a ^= b
b ^= a
a ^= b
return a, b
def create_sparse(max_node: int, parent: list[list[int]]) -> list[list[int]]:
"""
creating sparse table which saves each nodes 2^i-th parent
>>> max_node = 6
>>> parent = [[0, 0, 1, 1, 2, 2, 3]] + [[0] * 7 for _ in range(19)]
>>> parent = create_sparse(max_node=max_node, parent=parent)
>>> parent[0]
[0, 0, 1, 1, 2, 2, 3]
>>> parent[1]
[0, 0, 0, 0, 1, 1, 1]
>>> parent[2]
[0, 0, 0, 0, 0, 0, 0]
>>> max_node = 1
>>> parent = [[0, 0]] + [[0] * 2 for _ in range(19)]
>>> parent = create_sparse(max_node=max_node, parent=parent)
>>> parent[0]
[0, 0]
>>> parent[1]
[0, 0]
"""
j = 1
while (1 << j) < max_node:
for i in range(1, max_node + 1):
parent[j][i] = parent[j - 1][parent[j - 1][i]]
j += 1
return parent
# returns lca of node u,v
def lowest_common_ancestor(
u: int, v: int, level: list[int], parent: list[list[int]]
) -> int:
"""
Return the lowest common ancestor between u and v
>>> level = [-1, 0, 1, 1, 2, 2, 2]
>>> parent = [[0, 0, 1, 1, 2, 2, 3],[0, 0, 0, 0, 1, 1, 1]] + \
[[0] * 7 for _ in range(17)]
>>> lowest_common_ancestor(u=4, v=5, level=level, parent=parent)
2
>>> lowest_common_ancestor(u=4, v=6, level=level, parent=parent)
1
>>> lowest_common_ancestor(u=2, v=3, level=level, parent=parent)
1
>>> lowest_common_ancestor(u=6, v=6, level=level, parent=parent)
6
"""
# u must be deeper in the tree than v
if level[u] < level[v]:
u, v = swap(u, v)
# making depth of u same as depth of v
for i in range(18, -1, -1):
if level[u] - (1 << i) >= level[v]:
u = parent[i][u]
# at the same depth if u==v that mean lca is found
if u == v:
return u
# moving both nodes upwards till lca in found
for i in range(18, -1, -1):
if parent[i][u] not in [0, parent[i][v]]:
u, v = parent[i][u], parent[i][v]
# returning longest common ancestor of u,v
return parent[0][u]
# runs a breadth first search from root node of the tree
def breadth_first_search(
level: list[int],
parent: list[list[int]],
max_node: int,
graph: dict[int, list[int]],
root: int = 1,
) -> tuple[list[int], list[list[int]]]:
"""
sets every nodes direct parent
parent of root node is set to 0
calculates depth of each node from root node
>>> level = [-1] * 7
>>> parent = [[0] * 7 for _ in range(20)]
>>> graph = {1: [2, 3], 2: [4, 5], 3: [6], 4: [], 5: [], 6: []}
>>> level, parent = breadth_first_search(
... level=level, parent=parent, max_node=6, graph=graph, root=1)
>>> level
[-1, 0, 1, 1, 2, 2, 2]
>>> parent[0]
[0, 0, 1, 1, 2, 2, 3]
>>> level = [-1] * 2
>>> parent = [[0] * 2 for _ in range(20)]
>>> graph = {1: []}
>>> level, parent = breadth_first_search(
... level=level, parent=parent, max_node=1, graph=graph, root=1)
>>> level
[-1, 0]
>>> parent[0]
[0, 0]
"""
level[root] = 0
q: Queue[int] = Queue(maxsize=max_node)
q.put(root)
while q.qsize() != 0:
u = q.get()
for v in graph[u]:
if level[v] == -1:
level[v] = level[u] + 1
q.put(v)
parent[0][v] = u
return level, parent
def main() -> None:
max_node = 13
# initializing with 0
parent = [[0 for _ in range(max_node + 10)] for _ in range(20)]
# initializing with -1 which means every node is unvisited
level = [-1 for _ in range(max_node + 10)]
graph: dict[int, list[int]] = {
1: [2, 3, 4],
2: [5],
3: [6, 7],
4: [8],
5: [9, 10],
6: [11],
7: [],
8: [12, 13],
9: [],
10: [],
11: [],
12: [],
13: [],
}
level, parent = breadth_first_search(level, parent, max_node, graph, 1)
parent = create_sparse(max_node, parent)
print("LCA of node 1 and 3 is: ", lowest_common_ancestor(1, 3, level, parent))
print("LCA of node 5 and 6 is: ", lowest_common_ancestor(5, 6, level, parent))
print("LCA of node 7 and 11 is: ", lowest_common_ancestor(7, 11, level, parent))
print("LCA of node 6 and 7 is: ", lowest_common_ancestor(6, 7, level, parent))
print("LCA of node 4 and 12 is: ", lowest_common_ancestor(4, 12, level, parent))
print("LCA of node 8 and 8 is: ", lowest_common_ancestor(8, 8, level, parent))
if __name__ == "__main__":
main()
================================================
FILE: data_structures/binary_tree/maximum_fenwick_tree.py
================================================
class MaxFenwickTree:
"""
Maximum Fenwick Tree
More info: https://cp-algorithms.com/data_structures/fenwick.html
---------
>>> ft = MaxFenwickTree(5)
>>> ft.query(0, 5)
0
>>> ft.update(4, 100)
>>> ft.query(0, 5)
100
>>> ft.update(4, 0)
>>> ft.update(2, 20)
>>> ft.query(0, 5)
20
>>> ft.update(4, 10)
>>> ft.query(2, 5)
20
>>> ft.query(1, 5)
20
>>> ft.update(2, 0)
>>> ft.query(0, 5)
10
>>> ft = MaxFenwickTree(10000)
>>> ft.update(255, 30)
>>> ft.query(0, 10000)
30
>>> ft = MaxFenwickTree(6)
>>> ft.update(5, 1)
>>> ft.query(5, 6)
1
>>> ft = MaxFenwickTree(6)
>>> ft.update(0, 1000)
>>> ft.query(0, 1)
1000
"""
def __init__(self, size: int) -> None:
"""
Create empty Maximum Fenwick Tree with specified size
Parameters:
size: size of Array
Returns:
None
"""
self.size = size
self.arr = [0] * size
self.tree = [0] * size
@staticmethod
def get_next(index: int) -> int:
"""
Get next index in O(1)
"""
return index | (index + 1)
@staticmethod
def get_prev(index: int) -> int:
"""
Get previous index in O(1)
"""
return (index & (index + 1)) - 1
def update(self, index: int, value: int) -> None:
"""
Set index to value in O(lg^2 N)
Parameters:
index: index to update
value: value to set
Returns:
None
"""
self.arr[index] = value
while index < self.size:
current_left_border = self.get_prev(index) + 1
if current_left_border == index:
self.tree[index] = value
else:
self.tree[index] = max(value, current_left_border, index)
index = self.get_next(index)
def query(self, left: int, right: int) -> int:
"""
Answer the query of maximum range [l, r) in O(lg^2 N)
Parameters:
left: left index of query range (inclusive)
right: right index of query range (exclusive)
Returns:
Maximum value of range [left, right)
"""
right -= 1 # Because of right is exclusive
result = 0
while left <= right:
current_left = self.get_prev(right)
if left <= current_left:
result = max(result, self.tree[right])
right = current_left
else:
result = max(result, self.arr[right])
right -= 1
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/maximum_sum_bst.py
================================================
from __future__ import annotations
import sys
from dataclasses import dataclass
INT_MIN = -sys.maxsize + 1
INT_MAX = sys.maxsize - 1
@dataclass
class TreeNode:
val: int = 0
left: TreeNode | None = None
right: TreeNode | None = None
def max_sum_bst(root: TreeNode | None) -> int:
"""
The solution traverses a binary tree to find the maximum sum of
keys in any subtree that is a Binary Search Tree (BST). It uses
recursion to validate BST properties and calculates sums, returning
the highest sum found among all valid BST subtrees.
>>> t1 = TreeNode(4)
>>> t1.left = TreeNode(3)
>>> t1.left.left = TreeNode(1)
>>> t1.left.right = TreeNode(2)
>>> print(max_sum_bst(t1))
2
>>> t2 = TreeNode(-4)
>>> t2.left = TreeNode(-2)
>>> t2.right = TreeNode(-5)
>>> print(max_sum_bst(t2))
0
>>> t3 = TreeNode(1)
>>> t3.left = TreeNode(4)
>>> t3.left.left = TreeNode(2)
>>> t3.left.right = TreeNode(4)
>>> t3.right = TreeNode(3)
>>> t3.right.left = TreeNode(2)
>>> t3.right.right = TreeNode(5)
>>> t3.right.right.left = TreeNode(4)
>>> t3.right.right.right = TreeNode(6)
>>> print(max_sum_bst(t3))
20
"""
ans: int = 0
def solver(node: TreeNode | None) -> tuple[bool, int, int, int]:
"""
Returns the maximum sum by making recursive calls
>>> t1 = TreeNode(1)
>>> print(solver(t1))
1
"""
nonlocal ans
if not node:
return True, INT_MAX, INT_MIN, 0 # Valid BST, min, max, sum
is_left_valid, min_left, max_left, sum_left = solver(node.left)
is_right_valid, min_right, max_right, sum_right = solver(node.right)
if is_left_valid and is_right_valid and max_left < node.val < min_right:
total_sum = sum_left + sum_right + node.val
ans = max(ans, total_sum)
return True, min(min_left, node.val), max(max_right, node.val), total_sum
return False, -1, -1, -1 # Not a valid BST
solver(root)
return ans
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/merge_two_binary_trees.py
================================================
#!/usr/local/bin/python3
"""
Problem Description: Given two binary tree, return the merged tree.
The rule for merging is that if two nodes overlap, then put the value sum of
both nodes to the new value of the merged node. Otherwise, the NOT null node
will be used as the node of new tree.
"""
from __future__ import annotations
class Node:
"""
A binary node has value variable and pointers to its left and right node.
"""
def __init__(self, value: int = 0) -> None:
self.value = value
self.left: Node | None = None
self.right: Node | None = None
def merge_two_binary_trees(tree1: Node | None, tree2: Node | None) -> Node | None:
"""
Returns root node of the merged tree.
>>> tree1 = Node(5)
>>> tree1.left = Node(6)
>>> tree1.right = Node(7)
>>> tree1.left.left = Node(2)
>>> tree2 = Node(4)
>>> tree2.left = Node(5)
>>> tree2.right = Node(8)
>>> tree2.left.right = Node(1)
>>> tree2.right.right = Node(4)
>>> merged_tree = merge_two_binary_trees(tree1, tree2)
>>> print_preorder(merged_tree)
9
11
2
1
15
4
"""
if tree1 is None:
return tree2
if tree2 is None:
return tree1
tree1.value = tree1.value + tree2.value
tree1.left = merge_two_binary_trees(tree1.left, tree2.left)
tree1.right = merge_two_binary_trees(tree1.right, tree2.right)
return tree1
def print_preorder(root: Node | None) -> None:
"""
Print pre-order traversal of the tree.
>>> root = Node(1)
>>> root.left = Node(2)
>>> root.right = Node(3)
>>> print_preorder(root)
1
2
3
>>> print_preorder(root.right)
3
"""
if root:
print(root.value)
print_preorder(root.left)
print_preorder(root.right)
if __name__ == "__main__":
tree1 = Node(1)
tree1.left = Node(2)
tree1.right = Node(3)
tree1.left.left = Node(4)
tree2 = Node(2)
tree2.left = Node(4)
tree2.right = Node(6)
tree2.left.right = Node(9)
tree2.right.right = Node(5)
print("Tree1 is: ")
print_preorder(tree1)
print("Tree2 is: ")
print_preorder(tree2)
merged_tree = merge_two_binary_trees(tree1, tree2)
print("Merged Tree is: ")
print_preorder(merged_tree)
================================================
FILE: data_structures/binary_tree/mirror_binary_tree.py
================================================
"""
Given the root of a binary tree, mirror the tree, and return its root.
Leetcode problem reference: https://leetcode.com/problems/mirror-binary-tree/
"""
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Node:
"""
A Node has value variable and pointers to Nodes to its left and right.
"""
value: int
left: Node | None = None
right: Node | None = None
def __iter__(self) -> Iterator[int]:
if self.left:
yield from self.left
yield self.value
if self.right:
yield from self.right
def __len__(self) -> int:
return sum(1 for _ in self)
def mirror(self) -> Node:
"""
Mirror the binary tree rooted at this node by swapping left and right children.
>>> tree = Node(0)
>>> list(tree)
[0]
>>> list(tree.mirror())
[0]
>>> tree = Node(1, Node(0), Node(3, Node(2), Node(4, None, Node(5))))
>>> tuple(tree)
(0, 1, 2, 3, 4, 5)
>>> tuple(tree.mirror())
(5, 4, 3, 2, 1, 0)
"""
self.left, self.right = self.right, self.left
if self.left:
self.left.mirror()
if self.right:
self.right.mirror()
return self
def make_tree_seven() -> Node:
r"""
Return a binary tree with 7 nodes that looks like this:
::
1
/ \
2 3
/ \ / \
4 5 6 7
>>> tree_seven = make_tree_seven()
>>> len(tree_seven)
7
>>> list(tree_seven)
[4, 2, 5, 1, 6, 3, 7]
"""
tree = Node(1)
tree.left = Node(2)
tree.right = Node(3)
tree.left.left = Node(4)
tree.left.right = Node(5)
tree.right.left = Node(6)
tree.right.right = Node(7)
return tree
def make_tree_nine() -> Node:
r"""
Return a binary tree with 9 nodes that looks like this:
::
1
/ \
2 3
/ \ \
4 5 6
/ \ \
7 8 9
>>> tree_nine = make_tree_nine()
>>> len(tree_nine)
9
>>> list(tree_nine)
[7, 4, 8, 2, 5, 9, 1, 3, 6]
"""
tree = Node(1)
tree.left = Node(2)
tree.right = Node(3)
tree.left.left = Node(4)
tree.left.right = Node(5)
tree.right.right = Node(6)
tree.left.left.left = Node(7)
tree.left.left.right = Node(8)
tree.left.right.right = Node(9)
return tree
def main() -> None:
r"""
Mirror binary trees with the given root and returns the root
>>> tree = make_tree_nine()
>>> tuple(tree)
(7, 4, 8, 2, 5, 9, 1, 3, 6)
>>> tuple(tree.mirror())
(6, 3, 1, 9, 5, 2, 8, 4, 7)
nine_tree::
1
/ \
2 3
/ \ \
4 5 6
/ \ \
7 8 9
The mirrored tree looks like this::
1
/ \
3 2
/ / \
6 5 4
/ / \
9 8 7
"""
trees = {"zero": Node(0), "seven": make_tree_seven(), "nine": make_tree_nine()}
for name, tree in trees.items():
print(f" The {name} tree: {tuple(tree)}")
# (0,)
# (4, 2, 5, 1, 6, 3, 7)
# (7, 4, 8, 2, 5, 9, 1, 3, 6)
print(f"Mirror of {name} tree: {tuple(tree.mirror())}")
# (0,)
# (7, 3, 6, 1, 5, 2, 4)
# (6, 3, 1, 9, 5, 2, 8, 4, 7)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: data_structures/binary_tree/non_recursive_segment_tree.py
================================================
"""
A non-recursive Segment Tree implementation with range query and single element update,
works virtually with any list of the same type of elements with a "commutative"
combiner.
Explanation:
https://www.geeksforgeeks.org/iterative-segment-tree-range-minimum-query/
https://www.geeksforgeeks.org/segment-tree-efficient-implementation/
>>> SegmentTree([1, 2, 3], lambda a, b: a + b).query(0, 2)
6
>>> SegmentTree([3, 1, 2], min).query(0, 2)
1
>>> SegmentTree([2, 3, 1], max).query(0, 2)
3
>>> st = SegmentTree([1, 5, 7, -1, 6], lambda a, b: a + b)
>>> st.update(1, -1)
>>> st.update(2, 3)
>>> st.query(1, 2)
2
>>> st.query(1, 1)
-1
>>> st.update(4, 1)
>>> st.query(3, 4)
0
>>> st = SegmentTree([[1, 2, 3], [3, 2, 1], [1, 1, 1]], lambda a, b: [a[i] + b[i] for i
... in range(len(a))])
>>> st.query(0, 1)
[4, 4, 4]
>>> st.query(1, 2)
[4, 3, 2]
>>> st.update(1, [-1, -1, -1])
>>> st.query(1, 2)
[0, 0, 0]
>>> st.query(0, 2)
[1, 2, 3]
"""
from __future__ import annotations
from collections.abc import Callable
from typing import Any, TypeVar
T = TypeVar("T")
class SegmentTree[T]:
def __init__(self, arr: list[T], fnc: Callable[[T, T], T]) -> None:
"""
Segment Tree constructor, it works just with commutative combiner.
:param arr: list of elements for the segment tree
:param fnc: commutative function for combine two elements
>>> SegmentTree(['a', 'b', 'c'], lambda a, b: f'{a}{b}').query(0, 2)
'abc'
>>> SegmentTree([(1, 2), (2, 3), (3, 4)],
... lambda a, b: (a[0] + b[0], a[1] + b[1])).query(0, 2)
(6, 9)
"""
any_type: Any | T = None
self.N: int = len(arr)
self.st: list[T] = [any_type for _ in range(self.N)] + arr
self.fn = fnc
self.build()
def build(self) -> None:
for p in range(self.N - 1, 0, -1):
self.st[p] = self.fn(self.st[p * 2], self.st[p * 2 + 1])
def update(self, p: int, v: T) -> None:
"""
Update an element in log(N) time
:param p: position to be update
:param v: new value
>>> st = SegmentTree([3, 1, 2, 4], min)
>>> st.query(0, 3)
1
>>> st.update(2, -1)
>>> st.query(0, 3)
-1
"""
p += self.N
self.st[p] = v
while p > 1:
p = p // 2
self.st[p] = self.fn(self.st[p * 2], self.st[p * 2 + 1])
def query(self, left: int, right: int) -> T | None:
"""
Get range query value in log(N) time
:param left: left element index
:param right: right element index
:return: element combined in the range [left, right]
>>> st = SegmentTree([1, 2, 3, 4], lambda a, b: a + b)
>>> st.query(0, 2)
6
>>> st.query(1, 2)
5
>>> st.query(0, 3)
10
>>> st.query(2, 3)
7
"""
left, right = left + self.N, right + self.N
res: T | None = None
while left <= right:
if left % 2 == 1:
res = self.st[left] if res is None else self.fn(res, self.st[left])
if right % 2 == 0:
res = self.st[right] if res is None else self.fn(res, self.st[right])
left, right = (left + 1) // 2, (right - 1) // 2
return res
if __name__ == "__main__":
from functools import reduce
test_array = [1, 10, -2, 9, -3, 8, 4, -7, 5, 6, 11, -12]
test_updates = {
0: 7,
1: 2,
2: 6,
3: -14,
4: 5,
5: 4,
6: 7,
7: -10,
8: 9,
9: 10,
10: 12,
11: 1,
}
min_segment_tree = SegmentTree(test_array, min)
max_segment_tree = SegmentTree(test_array, max)
sum_segment_tree = SegmentTree(test_array, lambda a, b: a + b)
def test_all_segments() -> None:
"""
Test all possible segments
"""
for i in range(len(test_array)):
for j in range(i, len(test_array)):
min_range = reduce(min, test_array[i : j + 1])
max_range = reduce(max, test_array[i : j + 1])
sum_range = reduce(lambda a, b: a + b, test_array[i : j + 1])
assert min_range == min_segment_tree.query(i, j)
assert max_range == max_segment_tree.query(i, j)
assert sum_range == sum_segment_tree.query(i, j)
test_all_segments()
for index, value in test_updates.items():
test_array[index] = value
min_segment_tree.update(index, value)
max_segment_tree.update(index, value)
sum_segment_tree.update(index, value)
test_all_segments()
================================================
FILE: data_structures/binary_tree/number_of_possible_binary_trees.py
================================================
"""
Hey, we are going to find an exciting number called Catalan number which is use to find
the number of possible binary search trees from tree of a given number of nodes.
We will use the formula: t(n) = SUMMATION(i = 1 to n)t(i-1)t(n-i)
Further details at Wikipedia: https://en.wikipedia.org/wiki/Catalan_number
"""
"""
Our Contribution:
Basically we Create the 2 function:
1. catalan_number(node_count: int) -> int
Returns the number of possible binary search trees for n nodes.
2. binary_tree_count(node_count: int) -> int
Returns the number of possible binary trees for n nodes.
"""
def binomial_coefficient(n: int, k: int) -> int:
"""
Since Here we Find the Binomial Coefficient:
https://en.wikipedia.org/wiki/Binomial_coefficient
C(n,k) = n! / k!(n-k)!
:param n: 2 times of Number of nodes
:param k: Number of nodes
:return: Integer Value
>>> binomial_coefficient(4, 2)
6
"""
result = 1 # To kept the Calculated Value
# Since C(n, k) = C(n, n-k)
k = min(k, n - k)
# Calculate C(n,k)
for i in range(k):
result *= n - i
result //= i + 1
return result
def catalan_number(node_count: int) -> int:
"""
We can find Catalan number many ways but here we use Binomial Coefficient because it
does the job in O(n)
return the Catalan number of n using 2nCn/(n+1).
:param n: number of nodes
:return: Catalan number of n nodes
>>> catalan_number(5)
42
>>> catalan_number(6)
132
"""
return binomial_coefficient(2 * node_count, node_count) // (node_count + 1)
def factorial(n: int) -> int:
"""
Return the factorial of a number.
:param n: Number to find the Factorial of.
:return: Factorial of n.
>>> import math
>>> all(factorial(i) == math.factorial(i) for i in range(10))
True
>>> factorial(-5) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: factorial() not defined for negative values
"""
if n < 0:
raise ValueError("factorial() not defined for negative values")
result = 1
for i in range(1, n + 1):
result *= i
return result
def binary_tree_count(node_count: int) -> int:
"""
Return the number of possible of binary trees.
:param n: number of nodes
:return: Number of possible binary trees
>>> binary_tree_count(5)
5040
>>> binary_tree_count(6)
95040
"""
return catalan_number(node_count) * factorial(node_count)
if __name__ == "__main__":
node_count = int(input("Enter the number of nodes: ").strip() or 0)
if node_count <= 0:
raise ValueError("We need some nodes to work with.")
print(
f"Given {node_count} nodes, there are {binary_tree_count(node_count)} "
f"binary trees and {catalan_number(node_count)} binary search trees."
)
================================================
FILE: data_structures/binary_tree/red_black_tree.py
================================================
from __future__ import annotations
from collections.abc import Iterator
class RedBlackTree:
"""
A Red-Black tree, which is a self-balancing BST (binary search
tree).
This tree has similar performance to AVL trees, but the balancing is
less strict, so it will perform faster for writing/deleting nodes
and slower for reading in the average case, though, because they're
both balanced binary search trees, both will get the same asymptotic
performance.
To read more about them, https://en.wikipedia.org/wiki/Red-black_tree
Unless otherwise specified, all asymptotic runtimes are specified in
terms of the size of the tree.
"""
def __init__(
self,
label: int | None = None,
color: int = 0,
parent: RedBlackTree | None = None,
left: RedBlackTree | None = None,
right: RedBlackTree | None = None,
) -> None:
"""Initialize a new Red-Black Tree node with the given values:
label: The value associated with this node
color: 0 if black, 1 if red
parent: The parent to this node
left: This node's left child
right: This node's right child
"""
self.label = label
self.parent = parent
self.left = left
self.right = right
self.color = color
# Here are functions which are specific to red-black trees
def rotate_left(self) -> RedBlackTree:
"""Rotate the subtree rooted at this node to the left and
returns the new root to this subtree.
Performing one rotation can be done in O(1).
"""
parent = self.parent
right = self.right
if right is None:
return self
self.right = right.left
if self.right:
self.right.parent = self
self.parent = right
right.left = self
if parent is not None:
if parent.left == self:
parent.left = right
else:
parent.right = right
right.parent = parent
return right
def rotate_right(self) -> RedBlackTree:
"""Rotate the subtree rooted at this node to the right and
returns the new root to this subtree.
Performing one rotation can be done in O(1).
"""
if self.left is None:
return self
parent = self.parent
left = self.left
self.left = left.right
if self.left:
self.left.parent = self
self.parent = left
left.right = self
if parent is not None:
if parent.right is self:
parent.right = left
else:
parent.left = left
left.parent = parent
return left
def insert(self, label: int) -> RedBlackTree:
"""Inserts label into the subtree rooted at self, performs any
rotations necessary to maintain balance, and then returns the
new root to this subtree (likely self).
This is guaranteed to run in O(log(n)) time.
"""
if self.label is None:
# Only possible with an empty tree
self.label = label
return self
if self.label == label:
return self
elif self.label > label:
if self.left:
self.left.insert(label)
else:
self.left = RedBlackTree(label, 1, self)
self.left._insert_repair()
elif self.right:
self.right.insert(label)
else:
self.right = RedBlackTree(label, 1, self)
self.right._insert_repair()
return self.parent or self
def _insert_repair(self) -> None:
"""Repair the coloring from inserting into a tree."""
if self.parent is None:
# This node is the root, so it just needs to be black
self.color = 0
elif color(self.parent) == 0:
# If the parent is black, then it just needs to be red
self.color = 1
else:
uncle = self.parent.sibling
if color(uncle) == 0:
if self.is_left() and self.parent.is_right():
self.parent.rotate_right()
if self.right:
self.right._insert_repair()
elif self.is_right() and self.parent.is_left():
self.parent.rotate_left()
if self.left:
self.left._insert_repair()
elif self.is_left():
if self.grandparent:
self.grandparent.rotate_right()
self.parent.color = 0
if self.parent.right:
self.parent.right.color = 1
else:
if self.grandparent:
self.grandparent.rotate_left()
self.parent.color = 0
if self.parent.left:
self.parent.left.color = 1
else:
self.parent.color = 0
if uncle and self.grandparent:
uncle.color = 0
self.grandparent.color = 1
self.grandparent._insert_repair()
def remove(self, label: int) -> RedBlackTree:
"""Remove label from this tree."""
if self.label == label:
if self.left and self.right:
# It's easier to balance a node with at most one child,
# so we replace this node with the greatest one less than
# it and remove that.
value = self.left.get_max()
if value is not None:
self.label = value
self.left.remove(value)
else:
# This node has at most one non-None child, so we don't
# need to replace
child = self.left or self.right
if self.color == 1:
# This node is red, and its child is black
# The only way this happens to a node with one child
# is if both children are None leaves.
# We can just remove this node and call it a day.
if self.parent:
if self.is_left():
self.parent.left = None
else:
self.parent.right = None
# The node is black
elif child is None:
# This node and its child are black
if self.parent is None:
# The tree is now empty
return RedBlackTree(None)
else:
self._remove_repair()
if self.is_left():
self.parent.left = None
else:
self.parent.right = None
self.parent = None
else:
# This node is black and its child is red
# Move the child node here and make it black
self.label = child.label
self.left = child.left
self.right = child.right
if self.left:
self.left.parent = self
if self.right:
self.right.parent = self
elif self.label is not None and self.label > label:
if self.left:
self.left.remove(label)
elif self.right:
self.right.remove(label)
return self.parent or self
def _remove_repair(self) -> None:
"""Repair the coloring of the tree that may have been messed up."""
if (
self.parent is None
or self.sibling is None
or self.parent.sibling is None
or self.grandparent is None
):
return
if color(self.sibling) == 1:
self.sibling.color = 0
self.parent.color = 1
if self.is_left():
self.parent.rotate_left()
else:
self.parent.rotate_right()
if (
color(self.parent) == 0
and color(self.sibling) == 0
and color(self.sibling.left) == 0
and color(self.sibling.right) == 0
):
self.sibling.color = 1
self.parent._remove_repair()
return
if (
color(self.parent) == 1
and color(self.sibling) == 0
and color(self.sibling.left) == 0
and color(self.sibling.right) == 0
):
self.sibling.color = 1
self.parent.color = 0
return
if (
self.is_left()
and color(self.sibling) == 0
and color(self.sibling.right) == 0
and color(self.sibling.left) == 1
):
self.sibling.rotate_right()
self.sibling.color = 0
if self.sibling.right:
self.sibling.right.color = 1
if (
self.is_right()
and color(self.sibling) == 0
and color(self.sibling.right) == 1
and color(self.sibling.left) == 0
):
self.sibling.rotate_left()
self.sibling.color = 0
if self.sibling.left:
self.sibling.left.color = 1
if (
self.is_left()
and color(self.sibling) == 0
and color(self.sibling.right) == 1
):
self.parent.rotate_left()
self.grandparent.color = self.parent.color
self.parent.color = 0
self.parent.sibling.color = 0
if (
self.is_right()
and color(self.sibling) == 0
and color(self.sibling.left) == 1
):
self.parent.rotate_right()
self.grandparent.color = self.parent.color
self.parent.color = 0
self.parent.sibling.color = 0
def check_color_properties(self) -> bool:
"""Check the coloring of the tree, and return True iff the tree
is colored in a way which matches these five properties:
(wording stolen from wikipedia article)
1. Each node is either red or black.
2. The root node is black.
3. All leaves are black.
4. If a node is red, then both its children are black.
5. Every path from any node to all of its descendent NIL nodes
has the same number of black nodes.
This function runs in O(n) time, because properties 4 and 5 take
that long to check.
"""
# I assume property 1 to hold because there is nothing that can
# make the color be anything other than 0 or 1.
# Property 2
if self.color:
# The root was red
print("Property 2")
return False
# Property 3 does not need to be checked, because None is assumed
# to be black and is all the leaves.
# Property 4
if not self.check_coloring():
print("Property 4")
return False
# Property 5
if self.black_height() is None:
print("Property 5")
return False
# All properties were met
return True
def check_coloring(self) -> bool:
"""A helper function to recursively check Property 4 of a
Red-Black Tree. See check_color_properties for more info.
"""
if self.color == 1 and 1 in (color(self.left), color(self.right)):
return False
if self.left and not self.left.check_coloring():
return False
return not (self.right and not self.right.check_coloring())
def black_height(self) -> int | None:
"""Returns the number of black nodes from this node to the
leaves of the tree, or None if there isn't one such value (the
tree is color incorrectly).
"""
if self is None or self.left is None or self.right is None:
# If we're already at a leaf, there is no path
return 1
left = RedBlackTree.black_height(self.left)
right = RedBlackTree.black_height(self.right)
if left is None or right is None:
# There are issues with coloring below children nodes
return None
if left != right:
# The two children have unequal depths
return None
# Return the black depth of children, plus one if this node is
# black
return left + (1 - self.color)
# Here are functions which are general to all binary search trees
def __contains__(self, label: int) -> bool:
"""Search through the tree for label, returning True iff it is
found somewhere in the tree.
Guaranteed to run in O(log(n)) time.
"""
return self.search(label) is not None
def search(self, label: int) -> RedBlackTree | None:
"""Search through the tree for label, returning its node if
it's found, and None otherwise.
This method is guaranteed to run in O(log(n)) time.
"""
if self.label == label:
return self
elif self.label is not None and label > self.label:
if self.right is None:
return None
else:
return self.right.search(label)
elif self.left is None:
return None
else:
return self.left.search(label)
def floor(self, label: int) -> int | None:
"""Returns the largest element in this tree which is at most label.
This method is guaranteed to run in O(log(n)) time."""
if self.label == label:
return self.label
elif self.label is not None and self.label > label:
if self.left:
return self.left.floor(label)
else:
return None
else:
if self.right:
attempt = self.right.floor(label)
if attempt is not None:
return attempt
return self.label
def ceil(self, label: int) -> int | None:
"""Returns the smallest element in this tree which is at least label.
This method is guaranteed to run in O(log(n)) time.
"""
if self.label == label:
return self.label
elif self.label is not None and self.label < label:
if self.right:
return self.right.ceil(label)
else:
return None
else:
if self.left:
attempt = self.left.ceil(label)
if attempt is not None:
return attempt
return self.label
def get_max(self) -> int | None:
"""Returns the largest element in this tree.
This method is guaranteed to run in O(log(n)) time.
"""
if self.right:
# Go as far right as possible
return self.right.get_max()
else:
return self.label
def get_min(self) -> int | None:
"""Returns the smallest element in this tree.
This method is guaranteed to run in O(log(n)) time.
"""
if self.left:
# Go as far left as possible
return self.left.get_min()
else:
return self.label
@property
def grandparent(self) -> RedBlackTree | None:
"""Get the current node's grandparent, or None if it doesn't exist."""
if self.parent is None:
return None
else:
return self.parent.parent
@property
def sibling(self) -> RedBlackTree | None:
"""Get the current node's sibling, or None if it doesn't exist."""
if self.parent is None:
return None
elif self.parent.left is self:
return self.parent.right
else:
return self.parent.left
def is_left(self) -> bool:
"""Returns true iff this node is the left child of its parent."""
if self.parent is None:
return False
return self.parent.left is self
def is_right(self) -> bool:
"""Returns true iff this node is the right child of its parent."""
if self.parent is None:
return False
return self.parent.right is self
def __bool__(self) -> bool:
return True
def __len__(self) -> int:
"""
Return the number of nodes in this tree.
"""
ln = 1
if self.left:
ln += len(self.left)
if self.right:
ln += len(self.right)
return ln
def preorder_traverse(self) -> Iterator[int | None]:
yield self.label
if self.left:
yield from self.left.preorder_traverse()
if self.right:
yield from self.right.preorder_traverse()
def inorder_traverse(self) -> Iterator[int | None]:
if self.left:
yield from self.left.inorder_traverse()
yield self.label
if self.right:
yield from self.right.inorder_traverse()
def postorder_traverse(self) -> Iterator[int | None]:
if self.left:
yield from self.left.postorder_traverse()
if self.right:
yield from self.right.postorder_traverse()
yield self.label
def __repr__(self) -> str:
from pprint import pformat
if self.left is None and self.right is None:
return f"'{self.label} {(self.color and 'red') or 'blk'}'"
return pformat(
{
f"{self.label} {(self.color and 'red') or 'blk'}": (
self.left,
self.right,
)
},
indent=1,
)
def __eq__(self, other: object) -> bool:
"""Test if two trees are equal."""
if not isinstance(other, RedBlackTree):
return NotImplemented
if self.label == other.label:
return self.left == other.left and self.right == other.right
else:
return False
def color(node: RedBlackTree | None) -> int:
"""Returns the color of a node, allowing for None leaves."""
if node is None:
return 0
else:
return node.color
"""
Code for testing the various
functions of the red-black tree.
"""
def test_rotations() -> bool:
"""Test that the rotate_left and rotate_right functions work."""
# Make a tree to test on
tree = RedBlackTree(0)
tree.left = RedBlackTree(-10, parent=tree)
tree.right = RedBlackTree(10, parent=tree)
tree.left.left = RedBlackTree(-20, parent=tree.left)
tree.left.right = RedBlackTree(-5, parent=tree.left)
tree.right.left = RedBlackTree(5, parent=tree.right)
tree.right.right = RedBlackTree(20, parent=tree.right)
# Make the right rotation
left_rot = RedBlackTree(10)
left_rot.left = RedBlackTree(0, parent=left_rot)
left_rot.left.left = RedBlackTree(-10, parent=left_rot.left)
left_rot.left.right = RedBlackTree(5, parent=left_rot.left)
left_rot.left.left.left = RedBlackTree(-20, parent=left_rot.left.left)
left_rot.left.left.right = RedBlackTree(-5, parent=left_rot.left.left)
left_rot.right = RedBlackTree(20, parent=left_rot)
tree = tree.rotate_left()
if tree != left_rot:
return False
tree = tree.rotate_right()
tree = tree.rotate_right()
# Make the left rotation
right_rot = RedBlackTree(-10)
right_rot.left = RedBlackTree(-20, parent=right_rot)
right_rot.right = RedBlackTree(0, parent=right_rot)
right_rot.right.left = RedBlackTree(-5, parent=right_rot.right)
right_rot.right.right = RedBlackTree(10, parent=right_rot.right)
right_rot.right.right.left = RedBlackTree(5, parent=right_rot.right.right)
right_rot.right.right.right = RedBlackTree(20, parent=right_rot.right.right)
return tree == right_rot
def test_insertion_speed() -> bool:
"""Test that the tree balances inserts to O(log(n)) by doing a lot
of them.
"""
tree = RedBlackTree(-1)
for i in range(300000):
tree = tree.insert(i)
return True
def test_insert() -> bool:
"""Test the insert() method of the tree correctly balances, colors,
and inserts.
"""
tree = RedBlackTree(0)
tree.insert(8)
tree.insert(-8)
tree.insert(4)
tree.insert(12)
tree.insert(10)
tree.insert(11)
ans = RedBlackTree(0, 0)
ans.left = RedBlackTree(-8, 0, ans)
ans.right = RedBlackTree(8, 1, ans)
ans.right.left = RedBlackTree(4, 0, ans.right)
ans.right.right = RedBlackTree(11, 0, ans.right)
ans.right.right.left = RedBlackTree(10, 1, ans.right.right)
ans.right.right.right = RedBlackTree(12, 1, ans.right.right)
return tree == ans
def test_insert_and_search() -> bool:
"""Tests searching through the tree for values."""
tree = RedBlackTree(0)
tree.insert(8)
tree.insert(-8)
tree.insert(4)
tree.insert(12)
tree.insert(10)
tree.insert(11)
if any(i in tree for i in (5, -6, -10, 13)):
# Found something not in there
return False
# Find all these things in there
return all(i in tree for i in (11, 12, -8, 0))
def test_insert_delete() -> bool:
"""Test the insert() and delete() method of the tree, verifying the
insertion and removal of elements, and the balancing of the tree.
"""
tree = RedBlackTree(0)
tree = tree.insert(-12)
tree = tree.insert(8)
tree = tree.insert(-8)
tree = tree.insert(15)
tree = tree.insert(4)
tree = tree.insert(12)
tree = tree.insert(10)
tree = tree.insert(9)
tree = tree.insert(11)
tree = tree.remove(15)
tree = tree.remove(-12)
tree = tree.remove(9)
if not tree.check_color_properties():
return False
return list(tree.inorder_traverse()) == [-8, 0, 4, 8, 10, 11, 12]
def test_floor_ceil() -> bool:
"""Tests the floor and ceiling functions in the tree."""
tree = RedBlackTree(0)
tree.insert(-16)
tree.insert(16)
tree.insert(8)
tree.insert(24)
tree.insert(20)
tree.insert(22)
tuples = [(-20, None, -16), (-10, -16, 0), (8, 8, 8), (50, 24, None)]
for val, floor, ceil in tuples:
if tree.floor(val) != floor or tree.ceil(val) != ceil:
return False
return True
def test_min_max() -> bool:
"""Tests the min and max functions in the tree."""
tree = RedBlackTree(0)
tree.insert(-16)
tree.insert(16)
tree.insert(8)
tree.insert(24)
tree.insert(20)
tree.insert(22)
return not (tree.get_max() != 22 or tree.get_min() != -16)
def test_tree_traversal() -> bool:
"""Tests the three different tree traversal functions."""
tree = RedBlackTree(0)
tree = tree.insert(-16)
tree.insert(16)
tree.insert(8)
tree.insert(24)
tree.insert(20)
tree.insert(22)
if list(tree.inorder_traverse()) != [-16, 0, 8, 16, 20, 22, 24]:
return False
if list(tree.preorder_traverse()) != [0, -16, 16, 8, 22, 20, 24]:
return False
return list(tree.postorder_traverse()) == [-16, 8, 20, 24, 22, 16, 0]
def test_tree_chaining() -> bool:
"""Tests the three different tree chaining functions."""
tree = RedBlackTree(0)
tree = tree.insert(-16).insert(16).insert(8).insert(24).insert(20).insert(22)
if list(tree.inorder_traverse()) != [-16, 0, 8, 16, 20, 22, 24]:
return False
if list(tree.preorder_traverse()) != [0, -16, 16, 8, 22, 20, 24]:
return False
return list(tree.postorder_traverse()) == [-16, 8, 20, 24, 22, 16, 0]
def print_results(msg: str, passes: bool) -> None:
print(str(msg), "works!" if passes else "doesn't work :(")
def pytests() -> None:
assert test_rotations()
assert test_insert()
assert test_insert_and_search()
assert test_insert_delete()
assert test_floor_ceil()
assert test_tree_traversal()
assert test_tree_chaining()
def main() -> None:
"""
>>> pytests()
"""
print_results("Rotating right and left", test_rotations())
print_results("Inserting", test_insert())
print_results("Searching", test_insert_and_search())
print_results("Deleting", test_insert_delete())
print_results("Floor and ceil", test_floor_ceil())
print_results("Tree traversal", test_tree_traversal())
print_results("Tree traversal", test_tree_chaining())
print("Testing tree balancing...")
print("This should only be a few seconds.")
test_insertion_speed()
print("Done!")
if __name__ == "__main__":
main()
================================================
FILE: data_structures/binary_tree/segment_tree.py
================================================
import math
class SegmentTree:
def __init__(self, a):
self.A = a
self.N = len(self.A)
self.st = [0] * (
4 * self.N
) # approximate the overall size of segment tree with array N
if self.N:
self.build(1, 0, self.N - 1)
def left(self, idx):
"""
Returns the left child index for a given index in a binary tree.
>>> s = SegmentTree([1, 2, 3])
>>> s.left(1)
2
>>> s.left(2)
4
"""
return idx * 2
def right(self, idx):
"""
Returns the right child index for a given index in a binary tree.
>>> s = SegmentTree([1, 2, 3])
>>> s.right(1)
3
>>> s.right(2)
5
"""
return idx * 2 + 1
def build(self, idx, left, right):
if left == right:
self.st[idx] = self.A[left]
else:
mid = (left + right) // 2
self.build(self.left(idx), left, mid)
self.build(self.right(idx), mid + 1, right)
self.st[idx] = max(self.st[self.left(idx)], self.st[self.right(idx)])
def update(self, a, b, val):
"""
Update the values in the segment tree in the range [a,b] with the given value.
>>> s = SegmentTree([1, 2, 3, 4, 5])
>>> s.update(2, 4, 10)
True
>>> s.query(1, 5)
10
"""
return self.update_recursive(1, 0, self.N - 1, a - 1, b - 1, val)
def update_recursive(self, idx, left, right, a, b, val):
"""
update(1, 1, N, a, b, v) for update val v to [a,b]
"""
if right < a or left > b:
return True
if left == right:
self.st[idx] = val
return True
mid = (left + right) // 2
self.update_recursive(self.left(idx), left, mid, a, b, val)
self.update_recursive(self.right(idx), mid + 1, right, a, b, val)
self.st[idx] = max(self.st[self.left(idx)], self.st[self.right(idx)])
return True
def query(self, a, b):
"""
Query the maximum value in the range [a,b].
>>> s = SegmentTree([1, 2, 3, 4, 5])
>>> s.query(1, 3)
3
>>> s.query(1, 5)
5
"""
return self.query_recursive(1, 0, self.N - 1, a - 1, b - 1)
def query_recursive(self, idx, left, right, a, b):
"""
query(1, 1, N, a, b) for query max of [a,b]
"""
if right < a or left > b:
return -math.inf
if left >= a and right <= b:
return self.st[idx]
mid = (left + right) // 2
q1 = self.query_recursive(self.left(idx), left, mid, a, b)
q2 = self.query_recursive(self.right(idx), mid + 1, right, a, b)
return max(q1, q2)
def show_data(self):
show_list = []
for i in range(1, self.N + 1):
show_list += [self.query(i, i)]
print(show_list)
if __name__ == "__main__":
A = [1, 2, -4, 7, 3, -5, 6, 11, -20, 9, 14, 15, 5, 2, -8]
N = 15
segt = SegmentTree(A)
print(segt.query(4, 6))
print(segt.query(7, 11))
print(segt.query(7, 12))
segt.update(1, 3, 111)
print(segt.query(1, 15))
segt.update(7, 8, 235)
segt.show_data()
================================================
FILE: data_structures/binary_tree/segment_tree_other.py
================================================
"""
Segment_tree creates a segment tree with a given array and function,
allowing queries to be done later in log(N) time
function takes 2 values and returns a same type value
"""
from collections.abc import Sequence
from queue import Queue
class SegmentTreeNode:
def __init__(self, start, end, val, left=None, right=None):
self.start = start
self.end = end
self.val = val
self.mid = (start + end) // 2
self.left = left
self.right = right
def __repr__(self):
return f"SegmentTreeNode(start={self.start}, end={self.end}, val={self.val})"
class SegmentTree:
"""
>>> import operator
>>> num_arr = SegmentTree([2, 1, 5, 3, 4], operator.add)
>>> tuple(num_arr.traverse()) # doctest: +NORMALIZE_WHITESPACE
(SegmentTreeNode(start=0, end=4, val=15),
SegmentTreeNode(start=0, end=2, val=8),
SegmentTreeNode(start=3, end=4, val=7),
SegmentTreeNode(start=0, end=1, val=3),
SegmentTreeNode(start=2, end=2, val=5),
SegmentTreeNode(start=3, end=3, val=3),
SegmentTreeNode(start=4, end=4, val=4),
SegmentTreeNode(start=0, end=0, val=2),
SegmentTreeNode(start=1, end=1, val=1))
>>>
>>> num_arr.update(1, 5)
>>> tuple(num_arr.traverse()) # doctest: +NORMALIZE_WHITESPACE
(SegmentTreeNode(start=0, end=4, val=19),
SegmentTreeNode(start=0, end=2, val=12),
SegmentTreeNode(start=3, end=4, val=7),
SegmentTreeNode(start=0, end=1, val=7),
SegmentTreeNode(start=2, end=2, val=5),
SegmentTreeNode(start=3, end=3, val=3),
SegmentTreeNode(start=4, end=4, val=4),
SegmentTreeNode(start=0, end=0, val=2),
SegmentTreeNode(start=1, end=1, val=5))
>>>
>>> num_arr.query_range(3, 4)
7
>>> num_arr.query_range(2, 2)
5
>>> num_arr.query_range(1, 3)
13
>>>
>>> max_arr = SegmentTree([2, 1, 5, 3, 4], max)
>>> for node in max_arr.traverse():
... print(node)
...
SegmentTreeNode(start=0, end=4, val=5)
SegmentTreeNode(start=0, end=2, val=5)
SegmentTreeNode(start=3, end=4, val=4)
SegmentTreeNode(start=0, end=1, val=2)
SegmentTreeNode(start=2, end=2, val=5)
SegmentTreeNode(start=3, end=3, val=3)
SegmentTreeNode(start=4, end=4, val=4)
SegmentTreeNode(start=0, end=0, val=2)
SegmentTreeNode(start=1, end=1, val=1)
>>>
>>> max_arr.update(1, 5)
>>> for node in max_arr.traverse():
... print(node)
...
SegmentTreeNode(start=0, end=4, val=5)
SegmentTreeNode(start=0, end=2, val=5)
SegmentTreeNode(start=3, end=4, val=4)
SegmentTreeNode(start=0, end=1, val=5)
SegmentTreeNode(start=2, end=2, val=5)
SegmentTreeNode(start=3, end=3, val=3)
SegmentTreeNode(start=4, end=4, val=4)
SegmentTreeNode(start=0, end=0, val=2)
SegmentTreeNode(start=1, end=1, val=5)
>>>
>>> max_arr.query_range(3, 4)
4
>>> max_arr.query_range(2, 2)
5
>>> max_arr.query_range(1, 3)
5
>>>
>>> min_arr = SegmentTree([2, 1, 5, 3, 4], min)
>>> for node in min_arr.traverse():
... print(node)
...
SegmentTreeNode(start=0, end=4, val=1)
SegmentTreeNode(start=0, end=2, val=1)
SegmentTreeNode(start=3, end=4, val=3)
SegmentTreeNode(start=0, end=1, val=1)
SegmentTreeNode(start=2, end=2, val=5)
SegmentTreeNode(start=3, end=3, val=3)
SegmentTreeNode(start=4, end=4, val=4)
SegmentTreeNode(start=0, end=0, val=2)
SegmentTreeNode(start=1, end=1, val=1)
>>>
>>> min_arr.update(1, 5)
>>> for node in min_arr.traverse():
... print(node)
...
SegmentTreeNode(start=0, end=4, val=2)
SegmentTreeNode(start=0, end=2, val=2)
SegmentTreeNode(start=3, end=4, val=3)
SegmentTreeNode(start=0, end=1, val=2)
SegmentTreeNode(start=2, end=2, val=5)
SegmentTreeNode(start=3, end=3, val=3)
SegmentTreeNode(start=4, end=4, val=4)
SegmentTreeNode(start=0, end=0, val=2)
SegmentTreeNode(start=1, end=1, val=5)
>>>
>>> min_arr.query_range(3, 4)
3
>>> min_arr.query_range(2, 2)
5
>>> min_arr.query_range(1, 3)
3
>>>
"""
def __init__(self, collection: Sequence, function):
self.collection = collection
self.fn = function
if self.collection:
self.root = self._build_tree(0, len(collection) - 1)
def update(self, i, val):
"""
Update an element in log(N) time
:param i: position to be update
:param val: new value
>>> import operator
>>> num_arr = SegmentTree([2, 1, 5, 3, 4], operator.add)
>>> num_arr.update(1, 5)
>>> num_arr.query_range(1, 3)
13
"""
self._update_tree(self.root, i, val)
def query_range(self, i, j):
"""
Get range query value in log(N) time
:param i: left element index
:param j: right element index
:return: element combined in the range [i, j]
>>> import operator
>>> num_arr = SegmentTree([2, 1, 5, 3, 4], operator.add)
>>> num_arr.update(1, 5)
>>> num_arr.query_range(3, 4)
7
>>> num_arr.query_range(2, 2)
5
>>> num_arr.query_range(1, 3)
13
>>>
"""
return self._query_range(self.root, i, j)
def _build_tree(self, start, end):
if start == end:
return SegmentTreeNode(start, end, self.collection[start])
mid = (start + end) // 2
left = self._build_tree(start, mid)
right = self._build_tree(mid + 1, end)
return SegmentTreeNode(start, end, self.fn(left.val, right.val), left, right)
def _update_tree(self, node, i, val):
if node.start == i and node.end == i:
node.val = val
return
if i <= node.mid:
self._update_tree(node.left, i, val)
else:
self._update_tree(node.right, i, val)
node.val = self.fn(node.left.val, node.right.val)
def _query_range(self, node, i, j):
if node.start == i and node.end == j:
return node.val
if i <= node.mid:
if j <= node.mid:
# range in left child tree
return self._query_range(node.left, i, j)
else:
# range in left child tree and right child tree
return self.fn(
self._query_range(node.left, i, node.mid),
self._query_range(node.right, node.mid + 1, j),
)
else:
# range in right child tree
return self._query_range(node.right, i, j)
def traverse(self):
if self.root is not None:
queue = Queue()
queue.put(self.root)
while not queue.empty():
node = queue.get()
yield node
if node.left is not None:
queue.put(node.left)
if node.right is not None:
queue.put(node.right)
if __name__ == "__main__":
import operator
for fn in [operator.add, max, min]:
print("*" * 50)
arr = SegmentTree([2, 1, 5, 3, 4], fn)
for node in arr.traverse():
print(node)
print()
arr.update(1, 5)
for node in arr.traverse():
print(node)
print()
print(arr.query_range(3, 4)) # 7
print(arr.query_range(2, 2)) # 5
print(arr.query_range(1, 3)) # 13
print()
================================================
FILE: data_structures/binary_tree/serialize_deserialize_binary_tree.py
================================================
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class TreeNode:
"""
A binary tree node has a value, left child, and right child.
Props:
value: The value of the node.
left: The left child of the node.
right: The right child of the node.
"""
value: int = 0
left: TreeNode | None = None
right: TreeNode | None = None
def __post_init__(self):
if not isinstance(self.value, int):
raise TypeError("Value must be an integer.")
def __iter__(self) -> Iterator[TreeNode]:
"""
Iterate through the tree in preorder.
Returns:
An iterator of the tree nodes.
>>> list(TreeNode(1))
[1,null,null]
>>> tuple(TreeNode(1, TreeNode(2), TreeNode(3)))
(1,2,null,null,3,null,null, 2,null,null, 3,null,null)
"""
yield self
yield from self.left or ()
yield from self.right or ()
def __len__(self) -> int:
"""
Count the number of nodes in the tree.
Returns:
The number of nodes in the tree.
>>> len(TreeNode(1))
1
>>> len(TreeNode(1, TreeNode(2), TreeNode(3)))
3
"""
return sum(1 for _ in self)
def __repr__(self) -> str:
"""
Represent the tree as a string.
Returns:
A string representation of the tree.
>>> repr(TreeNode(1))
'1,null,null'
>>> repr(TreeNode(1, TreeNode(2), TreeNode(3)))
'1,2,null,null,3,null,null'
>>> repr(TreeNode(1, TreeNode(2), TreeNode(3, TreeNode(4), TreeNode(5))))
'1,2,null,null,3,4,null,null,5,null,null'
"""
return f"{self.value},{self.left!r},{self.right!r}".replace("None", "null")
@classmethod
def five_tree(cls) -> TreeNode:
"""
>>> repr(TreeNode.five_tree())
'1,2,null,null,3,4,null,null,5,null,null'
"""
root = TreeNode(1)
root.left = TreeNode(2)
root.right = TreeNode(3)
root.right.left = TreeNode(4)
root.right.right = TreeNode(5)
return root
def deserialize(data: str) -> TreeNode | None:
"""
Deserialize a string to a binary tree.
Args:
data(str): The serialized string.
Returns:
The root of the binary tree.
>>> root = TreeNode.five_tree()
>>> serialzed_data = repr(root)
>>> deserialized = deserialize(serialzed_data)
>>> root == deserialized
True
>>> root is deserialized # two separate trees
False
>>> root.right.right.value = 6
>>> root == deserialized
False
>>> serialzed_data = repr(root)
>>> deserialized = deserialize(serialzed_data)
>>> root == deserialized
True
>>> deserialize("")
Traceback (most recent call last):
...
ValueError: Data cannot be empty.
"""
if not data:
raise ValueError("Data cannot be empty.")
# Split the serialized string by a comma to get node values
nodes = data.split(",")
def build_tree() -> TreeNode | None:
# Get the next value from the list
value = nodes.pop(0)
if value == "null":
return None
node = TreeNode(int(value))
node.left = build_tree() # Recursively build left subtree
node.right = build_tree() # Recursively build right subtree
return node
return build_tree()
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/binary_tree/symmetric_tree.py
================================================
"""
Given the root of a binary tree, check whether it is a mirror of itself
(i.e., symmetric around its center).
Leetcode reference: https://leetcode.com/problems/symmetric-tree/
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class Node:
"""
A Node represents an element of a binary tree, which contains:
Attributes:
data: The value stored in the node (int).
left: Pointer to the left child node (Node or None).
right: Pointer to the right child node (Node or None).
Example:
>>> node = Node(1, Node(2), Node(3))
>>> node.data
1
>>> node.left.data
2
>>> node.right.data
3
"""
data: int
left: Node | None = None
right: Node | None = None
def make_symmetric_tree() -> Node:
r"""
Create a symmetric tree for testing.
The tree looks like this:
1
/ \
2 2
/ \ / \
3 4 4 3
Returns:
Node: Root node of a symmetric tree.
Example:
>>> tree = make_symmetric_tree()
>>> tree.data
1
>>> tree.left.data == tree.right.data
True
>>> tree.left.left.data == tree.right.right.data
True
"""
root = Node(1)
root.left = Node(2)
root.right = Node(2)
root.left.left = Node(3)
root.left.right = Node(4)
root.right.left = Node(4)
root.right.right = Node(3)
return root
def make_asymmetric_tree() -> Node:
r"""
Create an asymmetric tree for testing.
The tree looks like this:
1
/ \
2 2
/ \ / \
3 4 3 4
Returns:
Node: Root node of an asymmetric tree.
Example:
>>> tree = make_asymmetric_tree()
>>> tree.data
1
>>> tree.left.data == tree.right.data
True
>>> tree.left.left.data == tree.right.right.data
False
"""
root = Node(1)
root.left = Node(2)
root.right = Node(2)
root.left.left = Node(3)
root.left.right = Node(4)
root.right.left = Node(3)
root.right.right = Node(4)
return root
def is_symmetric_tree(tree: Node) -> bool:
"""
Check if a binary tree is symmetric (i.e., a mirror of itself).
Parameters:
tree: The root node of the binary tree.
Returns:
bool: True if the tree is symmetric, False otherwise.
Example:
>>> is_symmetric_tree(make_symmetric_tree())
True
>>> is_symmetric_tree(make_asymmetric_tree())
False
"""
if tree:
return is_mirror(tree.left, tree.right)
return True # An empty tree is considered symmetric.
def is_mirror(left: Node | None, right: Node | None) -> bool:
"""
Check if two subtrees are mirror images of each other.
Parameters:
left: The root node of the left subtree.
right: The root node of the right subtree.
Returns:
bool: True if the two subtrees are mirrors of each other, False otherwise.
Example:
>>> tree1 = make_symmetric_tree()
>>> is_mirror(tree1.left, tree1.right)
True
>>> tree2 = make_asymmetric_tree()
>>> is_mirror(tree2.left, tree2.right)
False
"""
if left is None and right is None:
# Both sides are empty, which is symmetric.
return True
if left is None or right is None:
# One side is empty while the other is not, which is not symmetric.
return False
if left.data == right.data:
# The values match, so check the subtrees recursively.
return is_mirror(left.left, right.right) and is_mirror(left.right, right.left)
return False
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/binary_tree/treap.py
================================================
from __future__ import annotations
from random import random
class Node:
"""
Treap's node
Treap is a binary tree by value and heap by priority
"""
def __init__(self, value: int | None = None):
self.value = value
self.prior = random()
self.left: Node | None = None
self.right: Node | None = None
def __repr__(self) -> str:
from pprint import pformat
if self.left is None and self.right is None:
return f"'{self.value}: {self.prior:.5}'"
else:
return pformat(
{f"{self.value}: {self.prior:.5}": (self.left, self.right)}, indent=1
)
def __str__(self) -> str:
value = str(self.value) + " "
left = str(self.left or "")
right = str(self.right or "")
return value + left + right
def split(root: Node | None, value: int) -> tuple[Node | None, Node | None]:
"""
We split current tree into 2 trees with value:
Left tree contains all values less than split value.
Right tree contains all values greater or equal, than split value
"""
if root is None or root.value is None: # None tree is split into 2 Nones
return None, None
elif value < root.value:
"""
Right tree's root will be current node.
Now we split(with the same value) current node's left son
Left tree: left part of that split
Right tree's left son: right part of that split
"""
left, root.left = split(root.left, value)
return left, root
else:
"""
Just symmetric to previous case
"""
root.right, right = split(root.right, value)
return root, right
def merge(left: Node | None, right: Node | None) -> Node | None:
"""
We merge 2 trees into one.
Note: all left tree's values must be less than all right tree's
"""
if (not left) or (not right): # If one node is None, return the other
return left or right
elif left.prior < right.prior:
"""
Left will be root because it has more priority
Now we need to merge left's right son and right tree
"""
left.right = merge(left.right, right)
return left
else:
"""
Symmetric as well
"""
right.left = merge(left, right.left)
return right
def insert(root: Node | None, value: int) -> Node | None:
"""
Insert element
Split current tree with a value into left, right,
Insert new node into the middle
Merge left, node, right into root
"""
node = Node(value)
left, right = split(root, value)
return merge(merge(left, node), right)
def erase(root: Node | None, value: int) -> Node | None:
"""
Erase element
Split all nodes with values less into left,
Split all nodes with values greater into right.
Merge left, right
"""
left, right = split(root, value - 1)
_, right = split(right, value)
return merge(left, right)
def inorder(root: Node | None) -> None:
"""
Just recursive print of a tree
"""
if not root: # None
return
else:
inorder(root.left)
print(root.value, end=",")
inorder(root.right)
def interact_treap(root: Node | None, args: str) -> Node | None:
"""
Commands:
+ value to add value into treap
- value to erase all nodes with value
>>> root = interact_treap(None, "+1")
>>> inorder(root)
1,
>>> root = interact_treap(root, "+3 +5 +17 +19 +2 +16 +4 +0")
>>> inorder(root)
0,1,2,3,4,5,16,17,19,
>>> root = interact_treap(root, "+4 +4 +4")
>>> inorder(root)
0,1,2,3,4,4,4,4,5,16,17,19,
>>> root = interact_treap(root, "-0")
>>> inorder(root)
1,2,3,4,4,4,4,5,16,17,19,
>>> root = interact_treap(root, "-4")
>>> inorder(root)
1,2,3,5,16,17,19,
>>> root = interact_treap(root, "=0")
Unknown command
"""
for arg in args.split():
if arg[0] == "+":
root = insert(root, int(arg[1:]))
elif arg[0] == "-":
root = erase(root, int(arg[1:]))
else:
print("Unknown command")
return root
def main() -> None:
"""After each command, program prints treap"""
root = None
print(
"enter numbers to create a tree, + value to add value into treap, "
"- value to erase all nodes with value. 'q' to quit. "
)
args = input()
while args != "q":
root = interact_treap(root, args)
print(root)
args = input()
print("good by!")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: data_structures/binary_tree/wavelet_tree.py
================================================
"""
Wavelet tree is a data-structure designed to efficiently answer various range queries
for arrays. Wavelets trees are different from other binary trees in the sense that
the nodes are split based on the actual values of the elements and not on indices,
such as the with segment trees or fenwick trees. You can read more about them here:
1. https://users.dcc.uchile.cl/~jperez/papers/ioiconf16.pdf
2. https://www.youtube.com/watch?v=4aSv9PcecDw&t=811s
3. https://www.youtube.com/watch?v=CybAgVF-MMc&t=1178s
"""
from __future__ import annotations
test_array = [2, 1, 4, 5, 6, 0, 8, 9, 1, 2, 0, 6, 4, 2, 0, 6, 5, 3, 2, 7]
class Node:
def __init__(self, length: int) -> None:
self.minn: int = -1
self.maxx: int = -1
self.map_left: list[int] = [-1] * length
self.left: Node | None = None
self.right: Node | None = None
def __repr__(self) -> str:
"""
>>> node = Node(length=27)
>>> repr(node)
'Node(min_value=-1 max_value=-1)'
>>> repr(node) == str(node)
True
"""
return f"Node(min_value={self.minn} max_value={self.maxx})"
def build_tree(arr: list[int]) -> Node | None:
"""
Builds the tree for arr and returns the root
of the constructed tree
>>> build_tree(test_array)
Node(min_value=0 max_value=9)
"""
root = Node(len(arr))
root.minn, root.maxx = min(arr), max(arr)
# Leaf node case where the node contains only one unique value
if root.minn == root.maxx:
return root
"""
Take the mean of min and max element of arr as the pivot and
partition arr into left_arr and right_arr with all elements <= pivot in the
left_arr and the rest in right_arr, maintaining the order of the elements,
then recursively build trees for left_arr and right_arr
"""
pivot = (root.minn + root.maxx) // 2
left_arr: list[int] = []
right_arr: list[int] = []
for index, num in enumerate(arr):
if num <= pivot:
left_arr.append(num)
else:
right_arr.append(num)
root.map_left[index] = len(left_arr)
root.left = build_tree(left_arr)
root.right = build_tree(right_arr)
return root
def rank_till_index(node: Node | None, num: int, index: int) -> int:
"""
Returns the number of occurrences of num in interval [0, index] in the list
>>> root = build_tree(test_array)
>>> rank_till_index(root, 6, 6)
1
>>> rank_till_index(root, 2, 0)
1
>>> rank_till_index(root, 1, 10)
2
>>> rank_till_index(root, 17, 7)
0
>>> rank_till_index(root, 0, 9)
1
"""
if index < 0 or node is None:
return 0
# Leaf node cases
if node.minn == node.maxx:
return index + 1 if node.minn == num else 0
pivot = (node.minn + node.maxx) // 2
if num <= pivot:
# go the left subtree and map index to the left subtree
return rank_till_index(node.left, num, node.map_left[index] - 1)
else:
# go to the right subtree and map index to the right subtree
return rank_till_index(node.right, num, index - node.map_left[index])
def rank(node: Node | None, num: int, start: int, end: int) -> int:
"""
Returns the number of occurrences of num in interval [start, end] in the list
>>> root = build_tree(test_array)
>>> rank(root, 6, 3, 13)
2
>>> rank(root, 2, 0, 19)
4
>>> rank(root, 9, 2 ,2)
0
>>> rank(root, 0, 5, 10)
2
"""
if start > end:
return 0
rank_till_end = rank_till_index(node, num, end)
rank_before_start = rank_till_index(node, num, start - 1)
return rank_till_end - rank_before_start
def quantile(node: Node | None, index: int, start: int, end: int) -> int:
"""
Returns the index'th smallest element in interval [start, end] in the list
index is 0-indexed
>>> root = build_tree(test_array)
>>> quantile(root, 2, 2, 5)
5
>>> quantile(root, 5, 2, 13)
4
>>> quantile(root, 0, 6, 6)
8
>>> quantile(root, 4, 2, 5)
-1
"""
if index > (end - start) or start > end or node is None:
return -1
# Leaf node case
if node.minn == node.maxx:
return node.minn
# Number of elements in the left subtree in interval [start, end]
num_elements_in_left_tree = node.map_left[end] - (
node.map_left[start - 1] if start else 0
)
if num_elements_in_left_tree > index:
return quantile(
node.left,
index,
(node.map_left[start - 1] if start else 0),
node.map_left[end] - 1,
)
else:
return quantile(
node.right,
index - num_elements_in_left_tree,
start - (node.map_left[start - 1] if start else 0),
end - node.map_left[end],
)
def range_counting(
node: Node | None, start: int, end: int, start_num: int, end_num: int
) -> int:
"""
Returns the number of elements in range [start_num, end_num]
in interval [start, end] in the list
>>> root = build_tree(test_array)
>>> range_counting(root, 1, 10, 3, 7)
3
>>> range_counting(root, 2, 2, 1, 4)
1
>>> range_counting(root, 0, 19, 0, 100)
20
>>> range_counting(root, 1, 0, 1, 100)
0
>>> range_counting(root, 0, 17, 100, 1)
0
"""
if (
start > end
or node is None
or start_num > end_num
or node.minn > end_num
or node.maxx < start_num
):
return 0
if start_num <= node.minn and node.maxx <= end_num:
return end - start + 1
left = range_counting(
node.left,
(node.map_left[start - 1] if start else 0),
node.map_left[end] - 1,
start_num,
end_num,
)
right = range_counting(
node.right,
start - (node.map_left[start - 1] if start else 0),
end - node.map_left[end],
start_num,
end_num,
)
return left + right
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/disjoint_set/__init__.py
================================================
================================================
FILE: data_structures/disjoint_set/alternate_disjoint_set.py
================================================
"""
Implements a disjoint set using Lists and some added heuristics for efficiency
Union by Rank Heuristic and Path Compression
"""
class DisjointSet:
def __init__(self, set_counts: list) -> None:
"""
Initialize with a list of the number of items in each set
and with rank = 1 for each set
"""
self.set_counts = set_counts
self.max_set = max(set_counts)
num_sets = len(set_counts)
self.ranks = [1] * num_sets
self.parents = list(range(num_sets))
def merge(self, src: int, dst: int) -> bool:
"""
Merge two sets together using Union by rank heuristic
Return True if successful
Merge two disjoint sets
>>> A = DisjointSet([1, 1, 1])
>>> A.merge(1, 2)
True
>>> A.merge(0, 2)
True
>>> A.merge(0, 1)
False
"""
src_parent = self.get_parent(src)
dst_parent = self.get_parent(dst)
if src_parent == dst_parent:
return False
if self.ranks[dst_parent] >= self.ranks[src_parent]:
self.set_counts[dst_parent] += self.set_counts[src_parent]
self.set_counts[src_parent] = 0
self.parents[src_parent] = dst_parent
if self.ranks[dst_parent] == self.ranks[src_parent]:
self.ranks[dst_parent] += 1
joined_set_size = self.set_counts[dst_parent]
else:
self.set_counts[src_parent] += self.set_counts[dst_parent]
self.set_counts[dst_parent] = 0
self.parents[dst_parent] = src_parent
joined_set_size = self.set_counts[src_parent]
self.max_set = max(self.max_set, joined_set_size)
return True
def get_parent(self, disj_set: int) -> int:
"""
Find the Parent of a given set
>>> A = DisjointSet([1, 1, 1])
>>> A.merge(1, 2)
True
>>> A.get_parent(0)
0
>>> A.get_parent(1)
2
"""
if self.parents[disj_set] == disj_set:
return disj_set
self.parents[disj_set] = self.get_parent(self.parents[disj_set])
return self.parents[disj_set]
================================================
FILE: data_structures/disjoint_set/disjoint_set.py
================================================
"""
Disjoint set.
Reference: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
"""
class Node:
def __init__(self, data: int) -> None:
self.data = data
self.rank: int
self.parent: Node
def make_set(x: Node) -> None:
"""
Make x as a set.
"""
# rank is the distance from x to its' parent
# root's rank is 0
x.rank = 0
x.parent = x
def union_set(x: Node, y: Node) -> None:
"""
Union of two sets.
set with bigger rank should be parent, so that the
disjoint set tree will be more flat.
"""
x, y = find_set(x), find_set(y)
if x == y:
return
elif x.rank > y.rank:
y.parent = x
else:
x.parent = y
if x.rank == y.rank:
y.rank += 1
def find_set(x: Node) -> Node:
"""
Return the parent of x
"""
if x != x.parent:
x.parent = find_set(x.parent)
return x.parent
def find_python_set(node: Node) -> set:
"""
Return a Python Standard Library set that contains i.
"""
sets = ({0, 1, 2}, {3, 4, 5})
for s in sets:
if node.data in s:
return s
msg = f"{node.data} is not in {sets}"
raise ValueError(msg)
def test_disjoint_set() -> None:
"""
>>> test_disjoint_set()
"""
vertex = [Node(i) for i in range(6)]
for v in vertex:
make_set(v)
union_set(vertex[0], vertex[1])
union_set(vertex[1], vertex[2])
union_set(vertex[3], vertex[4])
union_set(vertex[3], vertex[5])
for node0 in vertex:
for node1 in vertex:
if find_python_set(node0).isdisjoint(find_python_set(node1)):
assert find_set(node0) != find_set(node1)
else:
assert find_set(node0) == find_set(node1)
if __name__ == "__main__":
test_disjoint_set()
================================================
FILE: data_structures/hashing/__init__.py
================================================
================================================
FILE: data_structures/hashing/bloom_filter.py
================================================
"""
See https://en.wikipedia.org/wiki/Bloom_filter
The use of this data structure is to test membership in a set.
Compared to Python's built-in set() it is more space-efficient.
In the following example, only 8 bits of memory will be used:
>>> bloom = Bloom(size=8)
Initially, the filter contains all zeros:
>>> bloom.bitstring
'00000000'
When an element is added, two bits are set to 1
since there are 2 hash functions in this implementation:
>>> "Titanic" in bloom
False
>>> bloom.add("Titanic")
>>> bloom.bitstring
'01100000'
>>> "Titanic" in bloom
True
However, sometimes only one bit is added
because both hash functions return the same value
>>> bloom.add("Avatar")
>>> "Avatar" in bloom
True
>>> bloom.format_hash("Avatar")
'00000100'
>>> bloom.bitstring
'01100100'
Not added elements should return False ...
>>> not_present_films = ("The Godfather", "Interstellar", "Parasite", "Pulp Fiction")
>>> {
... film: bloom.format_hash(film) for film in not_present_films
... } # doctest: +NORMALIZE_WHITESPACE
{'The Godfather': '00000101',
'Interstellar': '00000011',
'Parasite': '00010010',
'Pulp Fiction': '10000100'}
>>> any(film in bloom for film in not_present_films)
False
but sometimes there are false positives:
>>> "Ratatouille" in bloom
True
>>> bloom.format_hash("Ratatouille")
'01100000'
The probability increases with the number of elements added.
The probability decreases with the number of bits in the bitarray.
>>> bloom.estimated_error_rate
0.140625
>>> bloom.add("The Godfather")
>>> bloom.estimated_error_rate
0.25
>>> bloom.bitstring
'01100101'
"""
from hashlib import md5, sha256
HASH_FUNCTIONS = (sha256, md5)
class Bloom:
def __init__(self, size: int = 8) -> None:
self.bitarray = 0b0
self.size = size
def add(self, value: str) -> None:
h = self.hash_(value)
self.bitarray |= h
def exists(self, value: str) -> bool:
h = self.hash_(value)
return (h & self.bitarray) == h
def __contains__(self, other: str) -> bool:
return self.exists(other)
def format_bin(self, bitarray: int) -> str:
res = bin(bitarray)[2:]
return res.zfill(self.size)
@property
def bitstring(self) -> str:
return self.format_bin(self.bitarray)
def hash_(self, value: str) -> int:
res = 0b0
for func in HASH_FUNCTIONS:
position = (
int.from_bytes(func(value.encode()).digest(), "little") % self.size
)
res |= 2**position
return res
def format_hash(self, value: str) -> str:
return self.format_bin(self.hash_(value))
@property
def estimated_error_rate(self) -> float:
n_ones = bin(self.bitarray).count("1")
return (n_ones / self.size) ** len(HASH_FUNCTIONS)
================================================
FILE: data_structures/hashing/double_hash.py
================================================
#!/usr/bin/env python3
"""
Double hashing is a collision resolving technique in Open Addressed Hash tables.
Double hashing uses the idea of applying a second hash function to key when a collision
occurs. The advantage of Double hashing is that it is one of the best form of probing,
producing a uniform distribution of records throughout a hash table. This technique
does not yield any clusters. It is one of effective method for resolving collisions.
Double hashing can be done using: (hash1(key) + i * hash2(key)) % TABLE_SIZE
Where hash1() and hash2() are hash functions and TABLE_SIZE is size of hash table.
Reference: https://en.wikipedia.org/wiki/Double_hashing
"""
from .hash_table import HashTable
from .number_theory.prime_numbers import is_prime, next_prime
class DoubleHash(HashTable):
"""
Hash Table example with open addressing and Double Hash
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __hash_function_2(self, value, data):
next_prime_gt = (
next_prime(value % self.size_table)
if not is_prime(value % self.size_table)
else value % self.size_table
) # gt = bigger than
return next_prime_gt - (data % next_prime_gt)
def __hash_double_function(self, key, data, increment):
return (increment * self.__hash_function_2(key, data)) % self.size_table
def _collision_resolution(self, key, data=None):
"""
Examples:
1. Try to add three data elements when the size is three
>>> dh = DoubleHash(3)
>>> dh.insert_data(10)
>>> dh.insert_data(20)
>>> dh.insert_data(30)
>>> dh.keys()
{1: 10, 2: 20, 0: 30}
2. Try to add three data elements when the size is two
>>> dh = DoubleHash(2)
>>> dh.insert_data(10)
>>> dh.insert_data(20)
>>> dh.insert_data(30)
>>> dh.keys()
{10: 10, 9: 20, 8: 30}
3. Try to add three data elements when the size is four
>>> dh = DoubleHash(4)
>>> dh.insert_data(10)
>>> dh.insert_data(20)
>>> dh.insert_data(30)
>>> dh.keys()
{9: 20, 10: 10, 8: 30}
"""
i = 1
new_key = self.hash_function(data)
while self.values[new_key] is not None and self.values[new_key] != key:
new_key = (
self.__hash_double_function(key, data, i)
if self.balanced_factor() >= self.lim_charge
else None
)
if new_key is None:
break
else:
i += 1
return new_key
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/hashing/hash_map.py
================================================
"""
Hash map with open addressing.
https://en.wikipedia.org/wiki/Hash_table
Another hash map implementation, with a good explanation.
Modern Dictionaries by Raymond Hettinger
https://www.youtube.com/watch?v=p33CVV29OG8
"""
from collections.abc import Iterator, MutableMapping
from dataclasses import dataclass
from typing import TypeVar
KEY = TypeVar("KEY")
VAL = TypeVar("VAL")
@dataclass(slots=True)
class _Item[KEY, VAL]:
key: KEY
val: VAL
class _DeletedItem(_Item):
def __init__(self) -> None:
super().__init__(None, None)
def __bool__(self) -> bool:
return False
_deleted = _DeletedItem()
class HashMap(MutableMapping[KEY, VAL]):
"""
Hash map with open addressing.
"""
def __init__(
self, initial_block_size: int = 8, capacity_factor: float = 0.75
) -> None:
self._initial_block_size = initial_block_size
self._buckets: list[_Item | None] = [None] * initial_block_size
assert 0.0 < capacity_factor < 1.0
self._capacity_factor = capacity_factor
self._len = 0
def _get_bucket_index(self, key: KEY) -> int:
return hash(key) % len(self._buckets)
def _get_next_ind(self, ind: int) -> int:
"""
Get next index.
Implements linear open addressing.
>>> HashMap(5)._get_next_ind(3)
4
>>> HashMap(5)._get_next_ind(5)
1
>>> HashMap(5)._get_next_ind(6)
2
>>> HashMap(5)._get_next_ind(9)
0
"""
return (ind + 1) % len(self._buckets)
def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
"""
Try to add value to the bucket.
If bucket is empty or key is the same, does insert and return True.
If bucket has another key that means that we need to check next bucket.
"""
stored = self._buckets[ind]
if not stored:
# A falsy item means that bucket was never used (None)
# or was deleted (_deleted).
self._buckets[ind] = _Item(key, val)
self._len += 1
return True
elif stored.key == key:
stored.val = val
return True
else:
return False
def _is_full(self) -> bool:
"""
Return true if we have reached safe capacity.
So we need to increase the number of buckets to avoid collisions.
>>> hm = HashMap(2)
>>> hm._add_item(1, 10)
>>> hm._add_item(2, 20)
>>> hm._is_full()
True
>>> HashMap(2)._is_full()
False
"""
limit = len(self._buckets) * self._capacity_factor
return len(self) >= int(limit)
def _is_sparse(self) -> bool:
"""Return true if we need twice fewer buckets when we have now."""
if len(self._buckets) <= self._initial_block_size:
return False
limit = len(self._buckets) * self._capacity_factor / 2
return len(self) < limit
def _resize(self, new_size: int) -> None:
old_buckets = self._buckets
self._buckets = [None] * new_size
self._len = 0
for item in old_buckets:
if item:
self._add_item(item.key, item.val)
def _size_up(self) -> None:
self._resize(len(self._buckets) * 2)
def _size_down(self) -> None:
self._resize(len(self._buckets) // 2)
def _iterate_buckets(self, key: KEY) -> Iterator[int]:
ind = self._get_bucket_index(key)
for _ in range(len(self._buckets)):
yield ind
ind = self._get_next_ind(ind)
def _add_item(self, key: KEY, val: VAL) -> None:
"""
Try to add 3 elements when the size is 5
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm._add_item(2, 20)
>>> hm._add_item(3, 30)
>>> hm
HashMap(1: 10, 2: 20, 3: 30)
Try to add 3 elements when the size is 5
>>> hm = HashMap(5)
>>> hm._add_item(-5, 10)
>>> hm._add_item(6, 30)
>>> hm._add_item(-7, 20)
>>> hm
HashMap(-5: 10, 6: 30, -7: 20)
Try to add 3 elements when size is 1
>>> hm = HashMap(1)
>>> hm._add_item(10, 13.2)
>>> hm._add_item(6, 5.26)
>>> hm._add_item(7, 5.155)
>>> hm
HashMap(10: 13.2)
Trying to add an element with a key that is a floating point value
>>> hm = HashMap(5)
>>> hm._add_item(1.5, 10)
>>> hm
HashMap(1.5: 10)
5. Trying to add an item with the same key
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm._add_item(1, 20)
>>> hm
HashMap(1: 20)
"""
for ind in self._iterate_buckets(key):
if self._try_set(ind, key, val):
break
def __setitem__(self, key: KEY, val: VAL) -> None:
"""
1. Changing value of item whose key is present
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm.__setitem__(1, 20)
>>> hm
HashMap(1: 20)
2. Changing value of item whose key is not present
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm.__setitem__(0, 20)
>>> hm
HashMap(0: 20, 1: 10)
3. Changing the value of the same item multiple times
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm.__setitem__(1, 20)
>>> hm.__setitem__(1, 30)
>>> hm
HashMap(1: 30)
"""
if self._is_full():
self._size_up()
self._add_item(key, val)
def __delitem__(self, key: KEY) -> None:
"""
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm._add_item(2, 20)
>>> hm._add_item(3, 30)
>>> hm.__delitem__(3)
>>> hm
HashMap(1: 10, 2: 20)
>>> hm = HashMap(5)
>>> hm._add_item(-5, 10)
>>> hm._add_item(6, 30)
>>> hm._add_item(-7, 20)
>>> hm.__delitem__(-5)
>>> hm
HashMap(6: 30, -7: 20)
# Trying to remove a non-existing item
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm._add_item(2, 20)
>>> hm._add_item(3, 30)
>>> hm.__delitem__(4)
Traceback (most recent call last):
...
KeyError: 4
# Test resize down when sparse
## Setup: resize up
>>> hm = HashMap(initial_block_size=100, capacity_factor=0.75)
>>> len(hm._buckets)
100
>>> for i in range(75):
... hm[i] = i
>>> len(hm._buckets)
100
>>> hm[75] = 75
>>> len(hm._buckets)
200
## Resize down
>>> del hm[75]
>>> len(hm._buckets)
200
>>> del hm[74]
>>> len(hm._buckets)
100
"""
for ind in self._iterate_buckets(key):
item = self._buckets[ind]
if item is None:
raise KeyError(key)
if item is _deleted:
continue
if item.key == key:
self._buckets[ind] = _deleted
self._len -= 1
break
if self._is_sparse():
self._size_down()
def __getitem__(self, key: KEY) -> VAL:
"""
Returns the item at the given key
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm.__getitem__(1)
10
>>> hm = HashMap(5)
>>> hm._add_item(10, -10)
>>> hm._add_item(20, -20)
>>> hm.__getitem__(20)
-20
>>> hm = HashMap(5)
>>> hm._add_item(-1, 10)
>>> hm.__getitem__(-1)
10
"""
for ind in self._iterate_buckets(key):
item = self._buckets[ind]
if item is None:
break
if item is _deleted:
continue
if item.key == key:
return item.val
raise KeyError(key)
def __len__(self) -> int:
"""
Returns the number of items present in hashmap
>>> hm = HashMap(5)
>>> hm._add_item(1, 10)
>>> hm._add_item(2, 20)
>>> hm._add_item(3, 30)
>>> hm.__len__()
3
>>> hm = HashMap(5)
>>> hm.__len__()
0
"""
return self._len
def __iter__(self) -> Iterator[KEY]:
yield from (item.key for item in self._buckets if item)
def __repr__(self) -> str:
val_string = ", ".join(
f"{item.key}: {item.val}" for item in self._buckets if item
)
return f"HashMap({val_string})"
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/hashing/hash_table.py
================================================
#!/usr/bin/env python3
from abc import abstractmethod
from .number_theory.prime_numbers import next_prime
class HashTable:
"""
Basic Hash Table example with open addressing and linear probing
"""
def __init__(
self,
size_table: int,
charge_factor: int | None = None,
lim_charge: float | None = None,
) -> None:
self.size_table = size_table
self.values = [None] * self.size_table
self.lim_charge = 0.75 if lim_charge is None else lim_charge
self.charge_factor = 1 if charge_factor is None else charge_factor
self.__aux_list: list = []
self._keys: dict = {}
def keys(self):
"""
The keys function returns a dictionary containing the key value pairs.
key being the index number in hash table and value being the data value.
Examples:
1. creating HashTable with size 10 and inserting 3 elements
>>> ht = HashTable(10)
>>> ht.insert_data(10)
>>> ht.insert_data(20)
>>> ht.insert_data(30)
>>> ht.keys()
{0: 10, 1: 20, 2: 30}
2. creating HashTable with size 5 and inserting 5 elements
>>> ht = HashTable(5)
>>> ht.insert_data(5)
>>> ht.insert_data(4)
>>> ht.insert_data(3)
>>> ht.insert_data(2)
>>> ht.insert_data(1)
>>> ht.keys()
{0: 5, 4: 4, 3: 3, 2: 2, 1: 1}
"""
return self._keys
def balanced_factor(self):
return sum(1 for slot in self.values if slot is not None) / (
self.size_table * self.charge_factor
)
def hash_function(self, key):
"""
Generates hash for the given key value
Examples:
Creating HashTable with size 5
>>> ht = HashTable(5)
>>> ht.hash_function(10)
0
>>> ht.hash_function(20)
0
>>> ht.hash_function(4)
4
>>> ht.hash_function(18)
3
>>> ht.hash_function(-18)
2
>>> ht.hash_function(18.5)
3.5
>>> ht.hash_function(0)
0
>>> ht.hash_function(-0)
0
"""
return key % self.size_table
def _step_by_step(self, step_ord):
print(f"step {step_ord}")
print(list(range(len(self.values))))
print(self.values)
def bulk_insert(self, values):
"""
bulk_insert is used for entering more than one element at a time
in the HashTable.
Examples:
1.
>>> ht = HashTable(5)
>>> ht.bulk_insert((10,20,30))
step 1
[0, 1, 2, 3, 4]
[10, None, None, None, None]
step 2
[0, 1, 2, 3, 4]
[10, 20, None, None, None]
step 3
[0, 1, 2, 3, 4]
[10, 20, 30, None, None]
2.
>>> ht = HashTable(5)
>>> ht.bulk_insert([5,4,3,2,1])
step 1
[0, 1, 2, 3, 4]
[5, None, None, None, None]
step 2
[0, 1, 2, 3, 4]
[5, None, None, None, 4]
step 3
[0, 1, 2, 3, 4]
[5, None, None, 3, 4]
step 4
[0, 1, 2, 3, 4]
[5, None, 2, 3, 4]
step 5
[0, 1, 2, 3, 4]
[5, 1, 2, 3, 4]
"""
i = 1
self.__aux_list = values
for value in values:
self.insert_data(value)
self._step_by_step(i)
i += 1
def _set_value(self, key, data):
"""
_set_value functions allows to update value at a particular hash
Examples:
1. _set_value in HashTable of size 5
>>> ht = HashTable(5)
>>> ht.insert_data(10)
>>> ht.insert_data(20)
>>> ht.insert_data(30)
>>> ht._set_value(0,15)
>>> ht.keys()
{0: 15, 1: 20, 2: 30}
2. _set_value in HashTable of size 2
>>> ht = HashTable(2)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht._set_value(3,15)
>>> ht.keys()
{3: 15, 2: 17, 4: 99}
3. _set_value in HashTable when hash is not present
>>> ht = HashTable(2)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht._set_value(0,15)
>>> ht.keys()
{3: 18, 2: 17, 4: 99, 0: 15}
4. _set_value in HashTable when multiple hash are not present
>>> ht = HashTable(2)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht._set_value(0,15)
>>> ht._set_value(1,20)
>>> ht.keys()
{3: 18, 2: 17, 4: 99, 0: 15, 1: 20}
"""
self.values[key] = data
self._keys[key] = data
@abstractmethod
def _collision_resolution(self, key, data=None):
"""
This method is a type of open addressing which is used for handling collision.
In this implementation the concept of linear probing has been used.
The hash table is searched sequentially from the original location of the
hash, if the new hash/location we get is already occupied we check for the next
hash/location.
references:
- https://en.wikipedia.org/wiki/Linear_probing
Examples:
1. The collision will be with keys 18 & 99, so new hash will be created for 99
>>> ht = HashTable(3)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht.keys()
{2: 17, 0: 18, 1: 99}
2. The collision will be with keys 17 & 101, so new hash
will be created for 101
>>> ht = HashTable(4)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht.insert_data(101)
>>> ht.keys()
{1: 17, 2: 18, 3: 99, 0: 101}
2. The collision will be with all keys, so new hash will be created for all
>>> ht = HashTable(1)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99)
>>> ht.keys()
{2: 17, 3: 18, 4: 99}
3. Trying to insert float key in hash
>>> ht = HashTable(1)
>>> ht.insert_data(17)
>>> ht.insert_data(18)
>>> ht.insert_data(99.99)
Traceback (most recent call last):
...
TypeError: list indices must be integers or slices, not float
"""
new_key = self.hash_function(key + 1)
while self.values[new_key] is not None and self.values[new_key] != key:
if self.values.count(None) > 0:
new_key = self.hash_function(new_key + 1)
else:
new_key = None
break
return new_key
def rehashing(self):
survivor_values = [value for value in self.values if value is not None]
self.size_table = next_prime(self.size_table, factor=2)
self._keys.clear()
self.values = [None] * self.size_table # hell's pointers D: don't DRY ;/
for value in survivor_values:
self.insert_data(value)
def insert_data(self, data):
"""
insert_data is used for inserting a single element at a time in the HashTable.
Examples:
>>> ht = HashTable(3)
>>> ht.insert_data(5)
>>> ht.keys()
{2: 5}
>>> ht = HashTable(5)
>>> ht.insert_data(30)
>>> ht.insert_data(50)
>>> ht.keys()
{0: 30, 1: 50}
"""
key = self.hash_function(data)
if self.values[key] is None:
self._set_value(key, data)
elif self.values[key] == data:
pass
else:
collision_resolution = self._collision_resolution(key, data)
if collision_resolution is not None:
self._set_value(collision_resolution, data)
else:
self.rehashing()
self.insert_data(data)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/hashing/hash_table_with_linked_list.py
================================================
from collections import deque
from .hash_table import HashTable
class HashTableWithLinkedList(HashTable):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _set_value(self, key, data):
self.values[key] = deque() if self.values[key] is None else self.values[key]
self.values[key].appendleft(data)
self._keys[key] = self.values[key]
def balanced_factor(self):
return (
sum(self.charge_factor - len(slot) for slot in self.values)
/ self.size_table
* self.charge_factor
)
def _collision_resolution(self, key, data=None):
if not (
len(self.values[key]) == self.charge_factor and self.values.count(None) == 0
):
return key
return super()._collision_resolution(key, data)
================================================
FILE: data_structures/hashing/number_theory/__init__.py
================================================
================================================
FILE: data_structures/hashing/number_theory/prime_numbers.py
================================================
#!/usr/bin/env python3
"""
module to operations with prime numbers
"""
import math
def is_prime(number: int) -> bool:
"""Checks to see if a number is a prime in O(sqrt(n)).
A number is prime if it has exactly two factors: 1 and itself.
>>> is_prime(0)
False
>>> is_prime(1)
False
>>> is_prime(2)
True
>>> is_prime(3)
True
>>> is_prime(27)
False
>>> is_prime(87)
False
>>> is_prime(563)
True
>>> is_prime(2999)
True
>>> is_prime(67483)
False
"""
# precondition
assert isinstance(number, int) and (number >= 0), (
"'number' must been an int and positive"
)
if 1 < number < 4:
# 2 and 3 are primes
return True
elif number < 2 or not number % 2:
# Negatives, 0, 1 and all even numbers are not primes
return False
odd_numbers = range(3, int(math.sqrt(number) + 1), 2)
return not any(not number % i for i in odd_numbers)
def next_prime(value, factor=1, **kwargs):
value = factor * value
first_value_val = value
while not is_prime(value):
value += 1 if not ("desc" in kwargs and kwargs["desc"] is True) else -1
if value == first_value_val:
return next_prime(value + 1, **kwargs)
return value
================================================
FILE: data_structures/hashing/quadratic_probing.py
================================================
#!/usr/bin/env python3
from .hash_table import HashTable
class QuadraticProbing(HashTable):
"""
Basic Hash Table example with open addressing using Quadratic Probing
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _collision_resolution(self, key, data=None): # noqa: ARG002
"""
Quadratic probing is an open addressing scheme used for resolving
collisions in hash table.
It works by taking the original hash index and adding successive
values of an arbitrary quadratic polynomial until open slot is found.
Hash + 1², Hash + 2², Hash + 3² .... Hash + n²
reference:
- https://en.wikipedia.org/wiki/Quadratic_probing
e.g:
1. Create hash table with size 7
>>> qp = QuadraticProbing(7)
>>> qp.insert_data(90)
>>> qp.insert_data(340)
>>> qp.insert_data(24)
>>> qp.insert_data(45)
>>> qp.insert_data(99)
>>> qp.insert_data(73)
>>> qp.insert_data(7)
>>> qp.keys()
{11: 45, 14: 99, 7: 24, 0: 340, 5: 73, 6: 90, 8: 7}
2. Create hash table with size 8
>>> qp = QuadraticProbing(8)
>>> qp.insert_data(0)
>>> qp.insert_data(999)
>>> qp.insert_data(111)
>>> qp.keys()
{0: 0, 7: 999, 3: 111}
3. Try to add three data elements when the size is two
>>> qp = QuadraticProbing(2)
>>> qp.insert_data(0)
>>> qp.insert_data(999)
>>> qp.insert_data(111)
>>> qp.keys()
{0: 0, 4: 999, 1: 111}
4. Try to add three data elements when the size is one
>>> qp = QuadraticProbing(1)
>>> qp.insert_data(0)
>>> qp.insert_data(999)
>>> qp.insert_data(111)
>>> qp.keys()
{4: 999, 1: 111}
"""
i = 1
new_key = self.hash_function(key + i * i)
while self.values[new_key] is not None and self.values[new_key] != key:
i += 1
new_key = (
self.hash_function(key + i * i)
if not self.balanced_factor() >= self.lim_charge
else None
)
if new_key is None:
break
return new_key
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/hashing/tests/__init__.py
================================================
================================================
FILE: data_structures/hashing/tests/test_hash_map.py
================================================
from operator import delitem, getitem, setitem
import pytest
from data_structures.hashing.hash_map import HashMap
def _get(k):
return getitem, k
def _set(k, v):
return setitem, k, v
def _del(k):
return delitem, k
def _run_operation(obj, fun, *args):
try:
return fun(obj, *args), None
except Exception as e:
return None, e
_add_items = (
_set("key_a", "val_a"),
_set("key_b", "val_b"),
)
_overwrite_items = [
_set("key_a", "val_a"),
_set("key_a", "val_b"),
]
_delete_items = [
_set("key_a", "val_a"),
_set("key_b", "val_b"),
_del("key_a"),
_del("key_b"),
_set("key_a", "val_a"),
_del("key_a"),
]
_access_absent_items = [
_get("key_a"),
_del("key_a"),
_set("key_a", "val_a"),
_del("key_a"),
_del("key_a"),
_get("key_a"),
]
_add_with_resize_up = [
*[_set(x, x) for x in range(5)], # guaranteed upsize
]
_add_with_resize_down = [
*[_set(x, x) for x in range(5)], # guaranteed upsize
*[_del(x) for x in range(5)],
_set("key_a", "val_b"),
]
@pytest.mark.parametrize(
"operations",
[
pytest.param(_add_items, id="add items"),
pytest.param(_overwrite_items, id="overwrite items"),
pytest.param(_delete_items, id="delete items"),
pytest.param(_access_absent_items, id="access absent items"),
pytest.param(_add_with_resize_up, id="add with resize up"),
pytest.param(_add_with_resize_down, id="add with resize down"),
],
)
def test_hash_map_is_the_same_as_dict(operations):
my = HashMap(initial_block_size=4)
py = {}
for _, (fun, *args) in enumerate(operations):
my_res, my_exc = _run_operation(my, fun, *args)
py_res, py_exc = _run_operation(py, fun, *args)
assert my_res == py_res
assert str(my_exc) == str(py_exc)
assert set(py) == set(my)
assert len(py) == len(my)
assert set(my.items()) == set(py.items())
def test_no_new_methods_was_added_to_api():
def is_public(name: str) -> bool:
return not name.startswith("_")
dict_public_names = {name for name in dir({}) if is_public(name)}
hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
assert dict_public_names > hash_public_names
================================================
FILE: data_structures/heap/__init__.py
================================================
================================================
FILE: data_structures/heap/binomial_heap.py
================================================
"""
Binomial Heap
Reference: Advanced Data Structures, Peter Brass
"""
class Node:
"""
Node in a doubly-linked binomial tree, containing:
- value
- size of left subtree
- link to left, right and parent nodes
"""
def __init__(self, val):
self.val = val
# Number of nodes in left subtree
self.left_tree_size = 0
self.left = None
self.right = None
self.parent = None
def merge_trees(self, other):
"""
In-place merge of two binomial trees of equal size.
Returns the root of the resulting tree
"""
assert self.left_tree_size == other.left_tree_size, "Unequal Sizes of Blocks"
if self.val < other.val:
other.left = self.right
other.parent = None
if self.right:
self.right.parent = other
self.right = other
self.left_tree_size = self.left_tree_size * 2 + 1
return self
else:
self.left = other.right
self.parent = None
if other.right:
other.right.parent = self
other.right = self
other.left_tree_size = other.left_tree_size * 2 + 1
return other
class BinomialHeap:
r"""
Min-oriented priority queue implemented with the Binomial Heap data
structure implemented with the BinomialHeap class. It supports:
- Insert element in a heap with n elements: Guaranteed logn, amoratized 1
- Merge (meld) heaps of size m and n: O(logn + logm)
- Delete Min: O(logn)
- Peek (return min without deleting it): O(1)
Example:
Create a random permutation of 30 integers to be inserted and 19 of them deleted
>>> import numpy as np
>>> permutation = np.random.permutation(list(range(30)))
Create a Heap and insert the 30 integers
__init__() test
>>> first_heap = BinomialHeap()
30 inserts - insert() test
>>> for number in permutation:
... first_heap.insert(number)
Size test
>>> first_heap.size
30
Deleting - delete() test
>>> [int(first_heap.delete_min()) for _ in range(20)]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Create a new Heap
>>> second_heap = BinomialHeap()
>>> vals = [17, 20, 31, 34]
>>> for value in vals:
... second_heap.insert(value)
The heap should have the following structure:
17
/ \
# 31
/ \
20 34
/ \ / \
# # # #
preOrder() test
>>> " ".join(str(x) for x in second_heap.pre_order())
"(17, 0) ('#', 1) (31, 1) (20, 2) ('#', 3) ('#', 3) (34, 2) ('#', 3) ('#', 3)"
printing Heap - __str__() test
>>> print(second_heap)
17
-#
-31
--20
---#
---#
--34
---#
---#
mergeHeaps() test
>>>
>>> merged = second_heap.merge_heaps(first_heap)
>>> merged.peek()
17
values in merged heap; (merge is inplace)
>>> results = []
>>> while not first_heap.is_empty():
... results.append(int(first_heap.delete_min()))
>>> results
[17, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 34]
"""
def __init__(self, bottom_root=None, min_node=None, heap_size=0):
self.size = heap_size
self.bottom_root = bottom_root
self.min_node = min_node
def merge_heaps(self, other):
"""
In-place merge of two binomial heaps.
Both of them become the resulting merged heap
"""
# Empty heaps corner cases
if other.size == 0:
return None
if self.size == 0:
self.size = other.size
self.bottom_root = other.bottom_root
self.min_node = other.min_node
return None
# Update size
self.size = self.size + other.size
# Update min.node
if self.min_node.val > other.min_node.val:
self.min_node = other.min_node
# Merge
# Order roots by left_subtree_size
combined_roots_list = []
i, j = self.bottom_root, other.bottom_root
while i or j:
if i and ((not j) or i.left_tree_size < j.left_tree_size):
combined_roots_list.append((i, True))
i = i.parent
else:
combined_roots_list.append((j, False))
j = j.parent
# Insert links between them
for i in range(len(combined_roots_list) - 1):
if combined_roots_list[i][1] != combined_roots_list[i + 1][1]:
combined_roots_list[i][0].parent = combined_roots_list[i + 1][0]
combined_roots_list[i + 1][0].left = combined_roots_list[i][0]
# Consecutively merge roots with same left_tree_size
i = combined_roots_list[0][0]
while i.parent:
if (
(i.left_tree_size == i.parent.left_tree_size) and (not i.parent.parent)
) or (
i.left_tree_size == i.parent.left_tree_size
and i.left_tree_size != i.parent.parent.left_tree_size
):
# Neighbouring Nodes
previous_node = i.left
next_node = i.parent.parent
# Merging trees
i = i.merge_trees(i.parent)
# Updating links
i.left = previous_node
i.parent = next_node
if previous_node:
previous_node.parent = i
if next_node:
next_node.left = i
else:
i = i.parent
# Updating self.bottom_root
while i.left:
i = i.left
self.bottom_root = i
# Update other
other.size = self.size
other.bottom_root = self.bottom_root
other.min_node = self.min_node
# Return the merged heap
return self
def insert(self, val):
"""
insert a value in the heap
"""
if self.size == 0:
self.bottom_root = Node(val)
self.size = 1
self.min_node = self.bottom_root
else:
# Create new node
new_node = Node(val)
# Update size
self.size += 1
# update min_node
if val < self.min_node.val:
self.min_node = new_node
# Put new_node as a bottom_root in heap
self.bottom_root.left = new_node
new_node.parent = self.bottom_root
self.bottom_root = new_node
# Consecutively merge roots with same left_tree_size
while (
self.bottom_root.parent
and self.bottom_root.left_tree_size
== self.bottom_root.parent.left_tree_size
):
# Next node
next_node = self.bottom_root.parent.parent
# Merge
self.bottom_root = self.bottom_root.merge_trees(self.bottom_root.parent)
# Update Links
self.bottom_root.parent = next_node
self.bottom_root.left = None
if next_node:
next_node.left = self.bottom_root
def peek(self):
"""
return min element without deleting it
"""
return self.min_node.val
def is_empty(self):
return self.size == 0
def delete_min(self):
"""
delete min element and return it
"""
# assert not self.isEmpty(), "Empty Heap"
# Save minimal value
min_value = self.min_node.val
# Last element in heap corner case
if self.size == 1:
# Update size
self.size = 0
# Update bottom root
self.bottom_root = None
# Update min_node
self.min_node = None
return min_value
# No right subtree corner case
# The structure of the tree implies that this should be the bottom root
# and there is at least one other root
if self.min_node.right is None:
# Update size
self.size -= 1
# Update bottom root
self.bottom_root = self.bottom_root.parent
self.bottom_root.left = None
# Update min_node
self.min_node = self.bottom_root
i = self.bottom_root.parent
while i:
if i.val < self.min_node.val:
self.min_node = i
i = i.parent
return min_value
# General case
# Find the BinomialHeap of the right subtree of min_node
bottom_of_new = self.min_node.right
bottom_of_new.parent = None
min_of_new = bottom_of_new
size_of_new = 1
# Size, min_node and bottom_root
while bottom_of_new.left:
size_of_new = size_of_new * 2 + 1
bottom_of_new = bottom_of_new.left
if bottom_of_new.val < min_of_new.val:
min_of_new = bottom_of_new
# Corner case of single root on top left path
if (not self.min_node.left) and (not self.min_node.parent):
self.size = size_of_new
self.bottom_root = bottom_of_new
self.min_node = min_of_new
# print("Single root, multiple nodes case")
return min_value
# Remaining cases
# Construct heap of right subtree
new_heap = BinomialHeap(
bottom_root=bottom_of_new, min_node=min_of_new, heap_size=size_of_new
)
# Update size
self.size = self.size - 1 - size_of_new
# Neighbour nodes
previous_node = self.min_node.left
next_node = self.min_node.parent
# Initialize new bottom_root and min_node
self.min_node = previous_node or next_node
self.bottom_root = next_node
# Update links of previous_node and search below for new min_node and
# bottom_root
if previous_node:
previous_node.parent = next_node
# Update bottom_root and search for min_node below
self.bottom_root = previous_node
self.min_node = previous_node
while self.bottom_root.left:
self.bottom_root = self.bottom_root.left
if self.bottom_root.val < self.min_node.val:
self.min_node = self.bottom_root
if next_node:
next_node.left = previous_node
# Search for new min_node above min_node
i = next_node
while i:
if i.val < self.min_node.val:
self.min_node = i
i = i.parent
# Merge heaps
self.merge_heaps(new_heap)
return int(min_value)
def pre_order(self):
"""
Returns the Pre-order representation of the heap including
values of nodes plus their level distance from the root;
Empty nodes appear as #
"""
# Find top root
top_root = self.bottom_root
while top_root.parent:
top_root = top_root.parent
# preorder
heap_pre_order = []
self.__traversal(top_root, heap_pre_order)
return heap_pre_order
def __traversal(self, curr_node, preorder, level=0):
"""
Pre-order traversal of nodes
"""
if curr_node:
preorder.append((curr_node.val, level))
self.__traversal(curr_node.left, preorder, level + 1)
self.__traversal(curr_node.right, preorder, level + 1)
else:
preorder.append(("#", level))
def __str__(self):
"""
Overwriting str for a pre-order print of nodes in heap;
Performance is poor, so use only for small examples
"""
if self.is_empty():
return ""
preorder_heap = self.pre_order()
return "\n".join(("-" * level + str(value)) for value, level in preorder_heap)
# Unit Tests
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/heap/heap.py
================================================
from __future__ import annotations
from abc import abstractmethod
from collections.abc import Iterable
from typing import Protocol, TypeVar
class Comparable(Protocol):
@abstractmethod
def __lt__(self: T, other: T) -> bool:
pass
@abstractmethod
def __gt__(self: T, other: T) -> bool:
pass
@abstractmethod
def __eq__(self: T, other: object) -> bool:
pass
T = TypeVar("T", bound=Comparable)
class Heap[T: Comparable]:
"""A Max Heap Implementation
>>> unsorted = [103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5]
>>> h = Heap()
>>> h.build_max_heap(unsorted)
>>> h
[209, 201, 25, 103, 107, 15, 1, 9, 7, 11, 5]
>>>
>>> h.extract_max()
209
>>> h
[201, 107, 25, 103, 11, 15, 1, 9, 7, 5]
>>>
>>> h.insert(100)
>>> h
[201, 107, 25, 103, 100, 15, 1, 9, 7, 5, 11]
>>>
>>> h.heap_sort()
>>> h
[1, 5, 7, 9, 11, 15, 25, 100, 103, 107, 201]
"""
def __init__(self) -> None:
self.h: list[T] = []
self.heap_size: int = 0
def __repr__(self) -> str:
return str(self.h)
def parent_index(self, child_idx: int) -> int | None:
"""
returns the parent index based on the given child index
>>> h = Heap()
>>> h.build_max_heap([103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5])
>>> h
[209, 201, 25, 103, 107, 15, 1, 9, 7, 11, 5]
>>> h.parent_index(-1) # returns none if index is <=0
>>> h.parent_index(0) # returns none if index is <=0
>>> h.parent_index(1)
0
>>> h.parent_index(2)
0
>>> h.parent_index(3)
1
>>> h.parent_index(4)
1
>>> h.parent_index(5)
2
>>> h.parent_index(10.5)
4.0
>>> h.parent_index(209.0)
104.0
>>> h.parent_index("Test")
Traceback (most recent call last):
...
TypeError: '>' not supported between instances of 'str' and 'int'
"""
if child_idx > 0:
return (child_idx - 1) // 2
return None
def left_child_idx(self, parent_idx: int) -> int | None:
"""
return the left child index if the left child exists.
if not, return None.
"""
left_child_index = 2 * parent_idx + 1
if left_child_index < self.heap_size:
return left_child_index
return None
def right_child_idx(self, parent_idx: int) -> int | None:
"""
return the right child index if the right child exists.
if not, return None.
"""
right_child_index = 2 * parent_idx + 2
if right_child_index < self.heap_size:
return right_child_index
return None
def max_heapify(self, index: int) -> None:
"""
correct a single violation of the heap property in a subtree's root.
It is the function that is responsible for restoring the property
of Max heap i.e the maximum element is always at top.
"""
if index < self.heap_size:
violation: int = index
left_child = self.left_child_idx(index)
right_child = self.right_child_idx(index)
# check which child is larger than its parent
if left_child is not None and self.h[left_child] > self.h[violation]:
violation = left_child
if right_child is not None and self.h[right_child] > self.h[violation]:
violation = right_child
# if violation indeed exists
if violation != index:
# swap to fix the violation
self.h[violation], self.h[index] = self.h[index], self.h[violation]
# fix the subsequent violation recursively if any
self.max_heapify(violation)
def build_max_heap(self, collection: Iterable[T]) -> None:
"""
build max heap from an unsorted array
>>> h = Heap()
>>> h.build_max_heap([20,40,50,20,10])
>>> h
[50, 40, 20, 20, 10]
>>> h = Heap()
>>> h.build_max_heap([1,2,3,4,5,6,7,8,9,0])
>>> h
[9, 8, 7, 4, 5, 6, 3, 2, 1, 0]
>>> h = Heap()
>>> h.build_max_heap([514,5,61,57,8,99,105])
>>> h
[514, 57, 105, 5, 8, 99, 61]
>>> h = Heap()
>>> h.build_max_heap([514,5,61.6,57,8,9.9,105])
>>> h
[514, 57, 105, 5, 8, 9.9, 61.6]
"""
self.h = list(collection)
self.heap_size = len(self.h)
if self.heap_size > 1:
# max_heapify from right to left but exclude leaves (last level)
for i in range(self.heap_size // 2 - 1, -1, -1):
self.max_heapify(i)
def extract_max(self) -> T:
"""
get and remove max from heap
>>> h = Heap()
>>> h.build_max_heap([20,40,50,20,10])
>>> h.extract_max()
50
>>> h = Heap()
>>> h.build_max_heap([514,5,61,57,8,99,105])
>>> h.extract_max()
514
>>> h = Heap()
>>> h.build_max_heap([1,2,3,4,5,6,7,8,9,0])
>>> h.extract_max()
9
"""
if self.heap_size >= 2:
me = self.h[0]
self.h[0] = self.h.pop(-1)
self.heap_size -= 1
self.max_heapify(0)
return me
elif self.heap_size == 1:
self.heap_size -= 1
return self.h.pop(-1)
else:
raise Exception("Empty heap")
def insert(self, value: T) -> None:
"""
insert a new value into the max heap
>>> h = Heap()
>>> h.insert(10)
>>> h
[10]
>>> h = Heap()
>>> h.insert(10)
>>> h.insert(10)
>>> h
[10, 10]
>>> h = Heap()
>>> h.insert(10)
>>> h.insert(10.1)
>>> h
[10.1, 10]
>>> h = Heap()
>>> h.insert(0.1)
>>> h.insert(0)
>>> h.insert(9)
>>> h.insert(5)
>>> h
[9, 5, 0.1, 0]
"""
self.h.append(value)
idx = (self.heap_size - 1) // 2
self.heap_size += 1
while idx >= 0:
self.max_heapify(idx)
idx = (idx - 1) // 2
def heap_sort(self) -> None:
size = self.heap_size
for j in range(size - 1, 0, -1):
self.h[0], self.h[j] = self.h[j], self.h[0]
self.heap_size -= 1
self.max_heapify(0)
self.heap_size = size
if __name__ == "__main__":
import doctest
# run doc test
doctest.testmod()
# demo
for unsorted in [
[0],
[2],
[3, 5],
[5, 3],
[5, 5],
[0, 0, 0, 0],
[1, 1, 1, 1],
[2, 2, 3, 5],
[0, 2, 2, 3, 5],
[2, 5, 3, 0, 2, 3, 0, 3],
[6, 1, 2, 7, 9, 3, 4, 5, 10, 8],
[103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5],
[-45, -2, -5],
]:
print(f"unsorted array: {unsorted}")
heap: Heap[int] = Heap()
heap.build_max_heap(unsorted)
print(f"after build heap: {heap}")
print(f"max value: {heap.extract_max()}")
print(f"after max value removed: {heap}")
heap.insert(100)
print(f"after new value 100 inserted: {heap}")
heap.heap_sort()
print(f"heap-sorted array: {heap}\n")
================================================
FILE: data_structures/heap/heap_generic.py
================================================
from collections.abc import Callable
class Heap:
"""
A generic Heap class, can be used as min or max by passing the key function
accordingly.
"""
def __init__(self, key: Callable | None = None) -> None:
# Stores actual heap items.
self.arr: list = []
# Stores indexes of each item for supporting updates and deletion.
self.pos_map: dict = {}
# Stores current size of heap.
self.size = 0
# Stores function used to evaluate the score of an item on which basis ordering
# will be done.
self.key = key or (lambda x: x)
def _parent(self, i: int) -> int | None:
"""Returns parent index of given index if exists else None"""
return int((i - 1) / 2) if i > 0 else None
def _left(self, i: int) -> int | None:
"""Returns left-child-index of given index if exists else None"""
left = int(2 * i + 1)
return left if 0 < left < self.size else None
def _right(self, i: int) -> int | None:
"""Returns right-child-index of given index if exists else None"""
right = int(2 * i + 2)
return right if 0 < right < self.size else None
def _swap(self, i: int, j: int) -> None:
"""Performs changes required for swapping two elements in the heap"""
# First update the indexes of the items in index map.
self.pos_map[self.arr[i][0]], self.pos_map[self.arr[j][0]] = (
self.pos_map[self.arr[j][0]],
self.pos_map[self.arr[i][0]],
)
# Then swap the items in the list.
self.arr[i], self.arr[j] = self.arr[j], self.arr[i]
def _cmp(self, i: int, j: int) -> bool:
"""Compares the two items using default comparison"""
return self.arr[i][1] < self.arr[j][1]
def _get_valid_parent(self, i: int) -> int:
"""
Returns index of valid parent as per desired ordering among given index and
both it's children
"""
left = self._left(i)
right = self._right(i)
valid_parent = i
if left is not None and not self._cmp(left, valid_parent):
valid_parent = left
if right is not None and not self._cmp(right, valid_parent):
valid_parent = right
return valid_parent
def _heapify_up(self, index: int) -> None:
"""Fixes the heap in upward direction of given index"""
parent = self._parent(index)
while parent is not None and not self._cmp(index, parent):
self._swap(index, parent)
index, parent = parent, self._parent(parent)
def _heapify_down(self, index: int) -> None:
"""Fixes the heap in downward direction of given index"""
valid_parent = self._get_valid_parent(index)
while valid_parent != index:
self._swap(index, valid_parent)
index, valid_parent = valid_parent, self._get_valid_parent(valid_parent)
def update_item(self, item: int, item_value: int) -> None:
"""Updates given item value in heap if present"""
if item not in self.pos_map:
return
index = self.pos_map[item]
self.arr[index] = [item, self.key(item_value)]
# Make sure heap is right in both up and down direction.
# Ideally only one of them will make any change.
self._heapify_up(index)
self._heapify_down(index)
def delete_item(self, item: int) -> None:
"""Deletes given item from heap if present"""
if item not in self.pos_map:
return
index = self.pos_map[item]
del self.pos_map[item]
self.arr[index] = self.arr[self.size - 1]
self.pos_map[self.arr[self.size - 1][0]] = index
self.size -= 1
# Make sure heap is right in both up and down direction. Ideally only one
# of them will make any change- so no performance loss in calling both.
if self.size > index:
self._heapify_up(index)
self._heapify_down(index)
def insert_item(self, item: int, item_value: int) -> None:
"""Inserts given item with given value in heap"""
arr_len = len(self.arr)
if arr_len == self.size:
self.arr.append([item, self.key(item_value)])
else:
self.arr[self.size] = [item, self.key(item_value)]
self.pos_map[item] = self.size
self.size += 1
self._heapify_up(self.size - 1)
def get_top(self) -> tuple | None:
"""Returns top item tuple (Calculated value, item) from heap if present"""
return self.arr[0] if self.size else None
def extract_top(self) -> tuple | None:
"""
Return top item tuple (Calculated value, item) from heap and removes it as well
if present
"""
top_item_tuple = self.get_top()
if top_item_tuple:
self.delete_item(top_item_tuple[0])
return top_item_tuple
def test_heap() -> None:
"""
>>> h = Heap() # Max-heap
>>> h.insert_item(5, 34)
>>> h.insert_item(6, 31)
>>> h.insert_item(7, 37)
>>> h.get_top()
[7, 37]
>>> h.extract_top()
[7, 37]
>>> h.extract_top()
[5, 34]
>>> h.extract_top()
[6, 31]
>>> h = Heap(key=lambda x: -x) # Min heap
>>> h.insert_item(5, 34)
>>> h.insert_item(6, 31)
>>> h.insert_item(7, 37)
>>> h.get_top()
[6, -31]
>>> h.extract_top()
[6, -31]
>>> h.extract_top()
[5, -34]
>>> h.extract_top()
[7, -37]
>>> h.insert_item(8, 45)
>>> h.insert_item(9, 40)
>>> h.insert_item(10, 50)
>>> h.get_top()
[9, -40]
>>> h.update_item(10, 30)
>>> h.get_top()
[10, -30]
>>> h.delete_item(10)
>>> h.get_top()
[9, -40]
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/heap/max_heap.py
================================================
class BinaryHeap:
"""
A max-heap implementation in Python
>>> binary_heap = BinaryHeap()
>>> binary_heap.insert(6)
>>> binary_heap.insert(10)
>>> binary_heap.insert(15)
>>> binary_heap.insert(12)
>>> binary_heap.pop()
15
>>> binary_heap.pop()
12
>>> binary_heap.get_list
[10, 6]
>>> len(binary_heap)
2
"""
def __init__(self):
self.__heap = [0]
self.__size = 0
def __swap_up(self, i: int) -> None:
"""Swap the element up"""
temporary = self.__heap[i]
while i // 2 > 0:
if self.__heap[i] > self.__heap[i // 2]:
self.__heap[i] = self.__heap[i // 2]
self.__heap[i // 2] = temporary
i //= 2
def insert(self, value: int) -> None:
"""Insert new element"""
self.__heap.append(value)
self.__size += 1
self.__swap_up(self.__size)
def __swap_down(self, i: int) -> None:
"""Swap the element down"""
while self.__size >= 2 * i:
if 2 * i + 1 > self.__size: # noqa: SIM114
bigger_child = 2 * i
elif self.__heap[2 * i] > self.__heap[2 * i + 1]:
bigger_child = 2 * i
else:
bigger_child = 2 * i + 1
temporary = self.__heap[i]
if self.__heap[i] < self.__heap[bigger_child]:
self.__heap[i] = self.__heap[bigger_child]
self.__heap[bigger_child] = temporary
i = bigger_child
def pop(self) -> int:
"""Pop the root element"""
max_value = self.__heap[1]
self.__heap[1] = self.__heap[self.__size]
self.__size -= 1
self.__heap.pop()
self.__swap_down(1)
return max_value
@property
def get_list(self):
return self.__heap[1:]
def __len__(self):
"""Length of the array"""
return self.__size
if __name__ == "__main__":
import doctest
doctest.testmod()
# create an instance of BinaryHeap
binary_heap = BinaryHeap()
binary_heap.insert(6)
binary_heap.insert(10)
binary_heap.insert(15)
binary_heap.insert(12)
# pop root(max-values because it is max heap)
print(binary_heap.pop()) # 15
print(binary_heap.pop()) # 12
# get the list and size after operations
print(binary_heap.get_list)
print(len(binary_heap))
================================================
FILE: data_structures/heap/min_heap.py
================================================
# Min heap data structure
# with decrease key functionality - in O(log(n)) time
class Node:
def __init__(self, name, val):
self.name = name
self.val = val
def __str__(self):
return f"{self.__class__.__name__}({self.name}, {self.val})"
def __lt__(self, other):
return self.val < other.val
class MinHeap:
"""
>>> r = Node("R", -1)
>>> b = Node("B", 6)
>>> a = Node("A", 3)
>>> x = Node("X", 1)
>>> e = Node("E", 4)
>>> print(b)
Node(B, 6)
>>> myMinHeap = MinHeap([r, b, a, x, e])
>>> myMinHeap.decrease_key(b, -17)
>>> print(b)
Node(B, -17)
>>> myMinHeap["B"]
-17
"""
def __init__(self, array):
self.idx_of_element = {}
self.heap_dict = {}
self.heap = self.build_heap(array)
def __getitem__(self, key):
return self.get_value(key)
def get_parent_idx(self, idx):
return (idx - 1) // 2
def get_left_child_idx(self, idx):
return idx * 2 + 1
def get_right_child_idx(self, idx):
return idx * 2 + 2
def get_value(self, key):
return self.heap_dict[key]
def build_heap(self, array):
last_idx = len(array) - 1
start_from = self.get_parent_idx(last_idx)
for idx, i in enumerate(array):
self.idx_of_element[i] = idx
self.heap_dict[i.name] = i.val
for i in range(start_from, -1, -1):
self.sift_down(i, array)
return array
# this is min-heapify method
def sift_down(self, idx, array):
while True:
left = self.get_left_child_idx(idx)
right = self.get_right_child_idx(idx)
smallest = idx
if left < len(array) and array[left] < array[idx]:
smallest = left
if right < len(array) and array[right] < array[smallest]:
smallest = right
if smallest != idx:
array[idx], array[smallest] = array[smallest], array[idx]
(
self.idx_of_element[array[idx]],
self.idx_of_element[array[smallest]],
) = (
self.idx_of_element[array[smallest]],
self.idx_of_element[array[idx]],
)
idx = smallest
else:
break
def sift_up(self, idx):
p = self.get_parent_idx(idx)
while p >= 0 and self.heap[p] > self.heap[idx]:
self.heap[p], self.heap[idx] = self.heap[idx], self.heap[p]
self.idx_of_element[self.heap[p]], self.idx_of_element[self.heap[idx]] = (
self.idx_of_element[self.heap[idx]],
self.idx_of_element[self.heap[p]],
)
idx = p
p = self.get_parent_idx(idx)
def peek(self):
return self.heap[0]
def remove(self):
self.heap[0], self.heap[-1] = self.heap[-1], self.heap[0]
self.idx_of_element[self.heap[0]], self.idx_of_element[self.heap[-1]] = (
self.idx_of_element[self.heap[-1]],
self.idx_of_element[self.heap[0]],
)
x = self.heap.pop()
del self.idx_of_element[x]
self.sift_down(0, self.heap)
return x
def insert(self, node):
self.heap.append(node)
self.idx_of_element[node] = len(self.heap) - 1
self.heap_dict[node.name] = node.val
self.sift_up(len(self.heap) - 1)
def is_empty(self):
return len(self.heap) == 0
def decrease_key(self, node, new_value):
assert self.heap[self.idx_of_element[node]].val > new_value, (
"newValue must be less that current value"
)
node.val = new_value
self.heap_dict[node.name] = new_value
self.sift_up(self.idx_of_element[node])
# USAGE
r = Node("R", -1)
b = Node("B", 6)
a = Node("A", 3)
x = Node("X", 1)
e = Node("E", 4)
# Use one of these two ways to generate Min-Heap
# Generating Min-Heap from array
my_min_heap = MinHeap([r, b, a, x, e])
# Generating Min-Heap by Insert method
# myMinHeap.insert(a)
# myMinHeap.insert(b)
# myMinHeap.insert(x)
# myMinHeap.insert(r)
# myMinHeap.insert(e)
# Before
print("Min Heap - before decrease key")
for i in my_min_heap.heap:
print(i)
print("Min Heap - After decrease key of node [B -> -17]")
my_min_heap.decrease_key(b, -17)
# After
for i in my_min_heap.heap:
print(i)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/heap/randomized_heap.py
================================================
#!/usr/bin/env python3
from __future__ import annotations
import random
from collections.abc import Iterable
from typing import Any, TypeVar
T = TypeVar("T", bound=bool)
class RandomizedHeapNode[T: bool]:
"""
One node of the randomized heap. Contains the value and references to
two children.
"""
def __init__(self, value: T) -> None:
self._value: T = value
self.left: RandomizedHeapNode[T] | None = None
self.right: RandomizedHeapNode[T] | None = None
@property
def value(self) -> T:
"""
Return the value of the node.
>>> rhn = RandomizedHeapNode(10)
>>> rhn.value
10
>>> rhn = RandomizedHeapNode(-10)
>>> rhn.value
-10
"""
return self._value
@staticmethod
def merge(
root1: RandomizedHeapNode[T] | None, root2: RandomizedHeapNode[T] | None
) -> RandomizedHeapNode[T] | None:
"""
Merge 2 nodes together.
>>> rhn1 = RandomizedHeapNode(10)
>>> rhn2 = RandomizedHeapNode(20)
>>> RandomizedHeapNode.merge(rhn1, rhn2).value
10
>>> rhn1 = RandomizedHeapNode(20)
>>> rhn2 = RandomizedHeapNode(10)
>>> RandomizedHeapNode.merge(rhn1, rhn2).value
10
>>> rhn1 = RandomizedHeapNode(5)
>>> rhn2 = RandomizedHeapNode(0)
>>> RandomizedHeapNode.merge(rhn1, rhn2).value
0
"""
if not root1:
return root2
if not root2:
return root1
if root1.value > root2.value:
root1, root2 = root2, root1
if random.choice([True, False]):
root1.left, root1.right = root1.right, root1.left
root1.left = RandomizedHeapNode.merge(root1.left, root2)
return root1
class RandomizedHeap[T: bool]:
"""
A data structure that allows inserting a new value and to pop the smallest
values. Both operations take O(logN) time where N is the size of the
structure.
Wiki: https://en.wikipedia.org/wiki/Randomized_meldable_heap
>>> RandomizedHeap([2, 3, 1, 5, 1, 7]).to_sorted_list()
[1, 1, 2, 3, 5, 7]
>>> rh = RandomizedHeap()
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
>>> rh.insert(1)
>>> rh.insert(-1)
>>> rh.insert(0)
>>> rh.to_sorted_list()
[-1, 0, 1]
"""
def __init__(self, data: Iterable[T] | None = ()) -> None:
"""
>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
self._root: RandomizedHeapNode[T] | None = None
if data:
for item in data:
self.insert(item)
def insert(self, value: T) -> None:
"""
Insert the value into the heap.
>>> rh = RandomizedHeap()
>>> rh.insert(3)
>>> rh.insert(1)
>>> rh.insert(3)
>>> rh.insert(7)
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
self._root = RandomizedHeapNode.merge(self._root, RandomizedHeapNode(value))
def pop(self) -> T | None:
"""
Pop the smallest value from the heap and return it.
>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.pop()
1
>>> rh.pop()
3
>>> rh.pop()
3
>>> rh.pop()
7
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
result = self.top()
if self._root is None:
return None
self._root = RandomizedHeapNode.merge(self._root.left, self._root.right)
return result
def top(self) -> T:
"""
Return the smallest value from the heap.
>>> rh = RandomizedHeap()
>>> rh.insert(3)
>>> rh.top()
3
>>> rh.insert(1)
>>> rh.top()
1
>>> rh.insert(3)
>>> rh.top()
1
>>> rh.insert(7)
>>> rh.top()
1
"""
if not self._root:
raise IndexError("Can't get top element for the empty heap.")
return self._root.value
def clear(self) -> None:
"""
Clear the heap.
>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.clear()
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
self._root = None
def to_sorted_list(self) -> list[Any]:
"""
Returns sorted list containing all the values in the heap.
>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
result = []
while self:
result.append(self.pop())
return result
def __bool__(self) -> bool:
"""
Check if the heap is not empty.
>>> rh = RandomizedHeap()
>>> bool(rh)
False
>>> rh.insert(1)
>>> bool(rh)
True
>>> rh.clear()
>>> bool(rh)
False
"""
return self._root is not None
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/heap/skew_heap.py
================================================
#!/usr/bin/env python3
from __future__ import annotations
from collections.abc import Iterable, Iterator
from typing import Any, TypeVar
T = TypeVar("T", bound=bool)
class SkewNode[T: bool]:
"""
One node of the skew heap. Contains the value and references to
two children.
"""
def __init__(self, value: T) -> None:
self._value: T = value
self.left: SkewNode[T] | None = None
self.right: SkewNode[T] | None = None
@property
def value(self) -> T:
"""
Return the value of the node.
>>> SkewNode(0).value
0
>>> SkewNode(3.14159).value
3.14159
>>> SkewNode("hello").value
'hello'
>>> SkewNode(None).value
>>> SkewNode(True).value
True
>>> SkewNode([]).value
[]
>>> SkewNode({}).value
{}
>>> SkewNode(set()).value
set()
>>> SkewNode(0.0).value
0.0
>>> SkewNode(-1e-10).value
-1e-10
>>> SkewNode(10).value
10
>>> SkewNode(-10.5).value
-10.5
>>> SkewNode().value
Traceback (most recent call last):
...
TypeError: SkewNode.__init__() missing 1 required positional argument: 'value'
"""
return self._value
@staticmethod
def merge(
root1: SkewNode[T] | None, root2: SkewNode[T] | None
) -> SkewNode[T] | None:
"""
Merge 2 nodes together.
>>> SkewNode.merge(SkewNode(10),SkewNode(-10.5)).value
-10.5
>>> SkewNode.merge(SkewNode(10),SkewNode(10.5)).value
10
>>> SkewNode.merge(SkewNode(10),SkewNode(10)).value
10
>>> SkewNode.merge(SkewNode(-100),SkewNode(-10.5)).value
-100
"""
if not root1:
return root2
if not root2:
return root1
if root1.value > root2.value:
root1, root2 = root2, root1
result = root1
temp = root1.right
result.right = root1.left
result.left = SkewNode.merge(temp, root2)
return result
class SkewHeap[T: bool]:
"""
A data structure that allows inserting a new value and to pop the smallest
values. Both operations take O(logN) time where N is the size of the
structure.
Wiki: https://en.wikipedia.org/wiki/Skew_heap
Visualization: https://www.cs.usfca.edu/~galles/visualization/SkewHeap.html
>>> list(SkewHeap([2, 3, 1, 5, 1, 7]))
[1, 1, 2, 3, 5, 7]
>>> sh = SkewHeap()
>>> sh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
>>> sh.insert(1)
>>> sh.insert(-1)
>>> sh.insert(0)
>>> list(sh)
[-1, 0, 1]
"""
def __init__(self, data: Iterable[T] | None = ()) -> None:
"""
>>> sh = SkewHeap([3, 1, 3, 7])
>>> list(sh)
[1, 3, 3, 7]
"""
self._root: SkewNode[T] | None = None
if data:
for item in data:
self.insert(item)
def __bool__(self) -> bool:
"""
Check if the heap is not empty.
>>> sh = SkewHeap()
>>> bool(sh)
False
>>> sh.insert(1)
>>> bool(sh)
True
>>> sh.clear()
>>> bool(sh)
False
"""
return self._root is not None
def __iter__(self) -> Iterator[T]:
"""
Returns sorted list containing all the values in the heap.
>>> sh = SkewHeap([3, 1, 3, 7])
>>> list(sh)
[1, 3, 3, 7]
"""
result: list[Any] = []
while self:
result.append(self.pop())
# Pushing items back to the heap not to clear it.
for item in result:
self.insert(item)
return iter(result)
def insert(self, value: T) -> None:
"""
Insert the value into the heap.
>>> sh = SkewHeap()
>>> sh.insert(3)
>>> sh.insert(1)
>>> sh.insert(3)
>>> sh.insert(7)
>>> list(sh)
[1, 3, 3, 7]
"""
self._root = SkewNode.merge(self._root, SkewNode(value))
def pop(self) -> T | None:
"""
Pop the smallest value from the heap and return it.
>>> sh = SkewHeap([3, 1, 3, 7])
>>> sh.pop()
1
>>> sh.pop()
3
>>> sh.pop()
3
>>> sh.pop()
7
>>> sh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
result = self.top()
self._root = (
SkewNode.merge(self._root.left, self._root.right) if self._root else None
)
return result
def top(self) -> T:
"""
Return the smallest value from the heap.
>>> sh = SkewHeap()
>>> sh.insert(3)
>>> sh.top()
3
>>> sh.insert(1)
>>> sh.top()
1
>>> sh.insert(3)
>>> sh.top()
1
>>> sh.insert(7)
>>> sh.top()
1
"""
if not self._root:
raise IndexError("Can't get top element for the empty heap.")
return self._root.value
def clear(self) -> None:
"""
Clear the heap.
>>> sh = SkewHeap([3, 1, 3, 7])
>>> sh.clear()
>>> sh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
self._root = None
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/kd_tree/__init__.py
================================================
================================================
FILE: data_structures/kd_tree/build_kdtree.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from data_structures.kd_tree.kd_node import KDNode
def build_kdtree(points: list[list[float]], depth: int = 0) -> KDNode | None:
"""
Builds a KD-Tree from a list of points.
Args:
points: The list of points to build the KD-Tree from.
depth: The current depth in the tree
(used to determine axis for splitting).
Returns:
The root node of the KD-Tree,
or None if no points are provided.
"""
if not points:
return None
k = len(points[0]) # Dimensionality of the points
axis = depth % k
# Sort point list and choose median as pivot element
points.sort(key=lambda point: point[axis])
median_idx = len(points) // 2
# Create node and construct subtrees
left_points = points[:median_idx]
right_points = points[median_idx + 1 :]
return KDNode(
point=points[median_idx],
left=build_kdtree(left_points, depth + 1),
right=build_kdtree(right_points, depth + 1),
)
================================================
FILE: data_structures/kd_tree/example/__init__.py
================================================
================================================
FILE: data_structures/kd_tree/example/example_usage.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
import numpy as np
from data_structures.kd_tree.build_kdtree import build_kdtree
from data_structures.kd_tree.example.hypercube_points import hypercube_points
from data_structures.kd_tree.nearest_neighbour_search import nearest_neighbour_search
def main() -> None:
"""
Demonstrates the use of KD-Tree by building it from random points
in a 10-dimensional hypercube and performing a nearest neighbor search.
"""
num_points: int = 5000
cube_size: float = 10.0 # Size of the hypercube (edge length)
num_dimensions: int = 10
# Generate random points within the hypercube
points: np.ndarray = hypercube_points(num_points, cube_size, num_dimensions)
hypercube_kdtree = build_kdtree(points.tolist())
# Generate a random query point within the same space
rng = np.random.default_rng()
query_point: list[float] = rng.random(num_dimensions).tolist()
# Perform nearest neighbor search
nearest_point, nearest_dist, nodes_visited = nearest_neighbour_search(
hypercube_kdtree, query_point
)
# Print the results
print(f"Query point: {query_point}")
print(f"Nearest point: {nearest_point}")
print(f"Distance: {nearest_dist:.4f}")
print(f"Nodes visited: {nodes_visited}")
if __name__ == "__main__":
main()
================================================
FILE: data_structures/kd_tree/example/hypercube_points.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
import numpy as np
def hypercube_points(
num_points: int, hypercube_size: float, num_dimensions: int
) -> np.ndarray:
"""
Generates random points uniformly distributed within an n-dimensional hypercube.
Args:
num_points: Number of points to generate.
hypercube_size: Size of the hypercube.
num_dimensions: Number of dimensions of the hypercube.
Returns:
An array of shape (num_points, num_dimensions)
with generated points.
"""
rng = np.random.default_rng()
shape = (num_points, num_dimensions)
return hypercube_size * rng.random(shape)
================================================
FILE: data_structures/kd_tree/kd_node.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from __future__ import annotations
class KDNode:
"""
Represents a node in a KD-Tree.
Attributes:
point: The point stored in this node.
left: The left child node.
right: The right child node.
"""
def __init__(
self,
point: list[float],
left: KDNode | None = None,
right: KDNode | None = None,
) -> None:
"""
Initializes a KDNode with the given point and child nodes.
Args:
point (list[float]): The point stored in this node.
left (Optional[KDNode]): The left child node.
right (Optional[KDNode]): The right child node.
"""
self.point = point
self.left = left
self.right = right
================================================
FILE: data_structures/kd_tree/nearest_neighbour_search.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from data_structures.kd_tree.kd_node import KDNode
def nearest_neighbour_search(
root: KDNode | None, query_point: list[float]
) -> tuple[list[float] | None, float, int]:
"""
Performs a nearest neighbor search in a KD-Tree for a given query point.
Args:
root (KDNode | None): The root node of the KD-Tree.
query_point (list[float]): The point for which the nearest neighbor
is being searched.
Returns:
tuple[list[float] | None, float, int]:
- The nearest point found in the KD-Tree to the query point,
or None if no point is found.
- The squared distance to the nearest point.
- The number of nodes visited during the search.
"""
nearest_point: list[float] | None = None
nearest_dist: float = float("inf")
nodes_visited: int = 0
def search(node: KDNode | None, depth: int = 0) -> None:
"""
Recursively searches for the nearest neighbor in the KD-Tree.
Args:
node: The current node in the KD-Tree.
depth: The current depth in the KD-Tree.
"""
nonlocal nearest_point, nearest_dist, nodes_visited
if node is None:
return
nodes_visited += 1
# Calculate the current distance (squared distance)
current_point = node.point
current_dist = sum(
(query_coord - point_coord) ** 2
for query_coord, point_coord in zip(query_point, current_point)
)
# Update nearest point if the current node is closer
if nearest_point is None or current_dist < nearest_dist:
nearest_point = current_point
nearest_dist = current_dist
# Determine which subtree to search first (based on axis and query point)
k = len(query_point) # Dimensionality of points
axis = depth % k
if query_point[axis] <= current_point[axis]:
nearer_subtree = node.left
further_subtree = node.right
else:
nearer_subtree = node.right
further_subtree = node.left
# Search the nearer subtree first
search(nearer_subtree, depth + 1)
# If the further subtree has a closer point
if (query_point[axis] - current_point[axis]) ** 2 < nearest_dist:
search(further_subtree, depth + 1)
search(root, 0)
return nearest_point, nearest_dist, nodes_visited
================================================
FILE: data_structures/kd_tree/tests/__init__.py
================================================
================================================
FILE: data_structures/kd_tree/tests/test_kdtree.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11532
# https://github.com/TheAlgorithms/Python/pull/11532
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
import numpy as np
import pytest
from data_structures.kd_tree.build_kdtree import build_kdtree
from data_structures.kd_tree.example.hypercube_points import hypercube_points
from data_structures.kd_tree.kd_node import KDNode
from data_structures.kd_tree.nearest_neighbour_search import nearest_neighbour_search
@pytest.mark.parametrize(
("num_points", "cube_size", "num_dimensions", "depth", "expected_result"),
[
(0, 10.0, 2, 0, None), # Empty points list
(10, 10.0, 2, 2, KDNode), # Depth = 2, 2D points
(10, 10.0, 3, -2, KDNode), # Depth = -2, 3D points
],
)
def test_build_kdtree(num_points, cube_size, num_dimensions, depth, expected_result):
"""
Test that KD-Tree is built correctly.
Cases:
- Empty points list.
- Positive depth value.
- Negative depth value.
"""
points = (
hypercube_points(num_points, cube_size, num_dimensions).tolist()
if num_points > 0
else []
)
kdtree = build_kdtree(points, depth=depth)
if expected_result is None:
# Empty points list case
assert kdtree is None, f"Expected None for empty points list, got {kdtree}"
else:
# Check if root node is not None
assert kdtree is not None, "Expected a KDNode, got None"
# Check if root has correct dimensions
assert len(kdtree.point) == num_dimensions, (
f"Expected point dimension {num_dimensions}, got {len(kdtree.point)}"
)
# Check that the tree is balanced to some extent (simplistic check)
assert isinstance(kdtree, KDNode), (
f"Expected KDNode instance, got {type(kdtree)}"
)
def test_nearest_neighbour_search():
"""
Test the nearest neighbor search function.
"""
num_points = 10
cube_size = 10.0
num_dimensions = 2
points = hypercube_points(num_points, cube_size, num_dimensions)
kdtree = build_kdtree(points.tolist())
rng = np.random.default_rng()
query_point = rng.random(num_dimensions).tolist()
nearest_point, nearest_dist, nodes_visited = nearest_neighbour_search(
kdtree, query_point
)
# Check that nearest point is not None
assert nearest_point is not None
# Check that distance is a non-negative number
assert nearest_dist >= 0
# Check that nodes visited is a non-negative integer
assert nodes_visited >= 0
def test_edge_cases():
"""
Test edge cases such as an empty KD-Tree.
"""
empty_kdtree = build_kdtree([])
query_point = [0.0] * 2 # Using a default 2D query point
nearest_point, nearest_dist, nodes_visited = nearest_neighbour_search(
empty_kdtree, query_point
)
# With an empty KD-Tree, nearest_point should be None
assert nearest_point is None
assert nearest_dist == float("inf")
assert nodes_visited == 0
if __name__ == "__main__":
import pytest
pytest.main()
================================================
FILE: data_structures/linked_list/__init__.py
================================================
"""
Linked Lists consists of Nodes.
Nodes contain data and also may link to other nodes:
- Head Node: First node, the address of the
head node gives us access of the complete list
- Last node: points to null
"""
from __future__ import annotations
from typing import Any
class Node:
def __init__(self, item: Any, next: Any) -> None: # noqa: A002
self.item = item
self.next = next
class LinkedList:
def __init__(self) -> None:
self.head: Node | None = None
self.size = 0
def add(self, item: Any, position: int = 0) -> None:
"""
Add an item to the LinkedList at the specified position.
Default position is 0 (the head).
Args:
item (Any): The item to add to the LinkedList.
position (int, optional): The position at which to add the item.
Defaults to 0.
Raises:
ValueError: If the position is negative or out of bounds.
>>> linked_list = LinkedList()
>>> linked_list.add(1)
>>> linked_list.add(2)
>>> linked_list.add(3)
>>> linked_list.add(4, 2)
>>> print(linked_list)
3 --> 2 --> 4 --> 1
# Test adding to a negative position
>>> linked_list.add(5, -3)
Traceback (most recent call last):
...
ValueError: Position must be non-negative
# Test adding to an out-of-bounds position
>>> linked_list.add(5,7)
Traceback (most recent call last):
...
ValueError: Out of bounds
>>> linked_list.add(5, 4)
>>> print(linked_list)
3 --> 2 --> 4 --> 1 --> 5
"""
if position < 0:
raise ValueError("Position must be non-negative")
if position == 0 or self.head is None:
new_node = Node(item, self.head)
self.head = new_node
else:
current = self.head
for _ in range(position - 1):
current = current.next
if current is None:
raise ValueError("Out of bounds")
new_node = Node(item, current.next)
current.next = new_node
self.size += 1
def remove(self) -> Any:
# Switched 'self.is_empty()' to 'self.head is None'
# because mypy was considering the possibility that 'self.head'
# can be None in below else part and giving error
if self.head is None:
return None
else:
item = self.head.item
self.head = self.head.next
self.size -= 1
return item
def is_empty(self) -> bool:
return self.head is None
def __str__(self) -> str:
"""
>>> linked_list = LinkedList()
>>> linked_list.add(23)
>>> linked_list.add(14)
>>> linked_list.add(9)
>>> print(linked_list)
9 --> 14 --> 23
"""
if self.is_empty():
return ""
else:
iterate = self.head
item_str = ""
item_list: list[str] = []
while iterate:
item_list.append(str(iterate.item))
iterate = iterate.next
item_str = " --> ".join(item_list)
return item_str
def __len__(self) -> int:
"""
>>> linked_list = LinkedList()
>>> len(linked_list)
0
>>> linked_list.add("a")
>>> len(linked_list)
1
>>> linked_list.add("b")
>>> len(linked_list)
2
>>> _ = linked_list.remove()
>>> len(linked_list)
1
>>> _ = linked_list.remove()
>>> len(linked_list)
0
"""
return self.size
================================================
FILE: data_structures/linked_list/circular_linked_list.py
================================================
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any
@dataclass
class Node:
data: Any
next_node: Node | None = None
@dataclass
class CircularLinkedList:
head: Node | None = None # Reference to the head (first node)
tail: Node | None = None # Reference to the tail (last node)
def __iter__(self) -> Iterator[Any]:
"""
Iterate through all nodes in the Circular Linked List yielding their data.
Yields:
The data of each node in the linked list.
"""
node = self.head
while node:
yield node.data
node = node.next_node
if node == self.head:
break
def __len__(self) -> int:
"""
Get the length (number of nodes) in the Circular Linked List.
"""
return sum(1 for _ in self)
def __repr__(self) -> str:
"""
Generate a string representation of the Circular Linked List.
Returns:
A string of the format "1->2->....->N".
"""
return "->".join(str(item) for item in iter(self))
def insert_tail(self, data: Any) -> None:
"""
Insert a node with the given data at the end of the Circular Linked List.
"""
self.insert_nth(len(self), data)
def insert_head(self, data: Any) -> None:
"""
Insert a node with the given data at the beginning of the Circular Linked List.
"""
self.insert_nth(0, data)
def insert_nth(self, index: int, data: Any) -> None:
"""
Insert the data of the node at the nth pos in the Circular Linked List.
Args:
index: The index at which the data should be inserted.
data: The data to be inserted.
Raises:
IndexError: If the index is out of range.
"""
if index < 0 or index > len(self):
raise IndexError("list index out of range.")
new_node: Node = Node(data)
if self.head is None:
new_node.next_node = new_node # First node points to itself
self.tail = self.head = new_node
elif index == 0: # Insert at the head
new_node.next_node = self.head
assert self.tail is not None # List is not empty, tail exists
self.head = self.tail.next_node = new_node
else:
temp: Node | None = self.head
for _ in range(index - 1):
assert temp is not None
temp = temp.next_node
assert temp is not None
new_node.next_node = temp.next_node
temp.next_node = new_node
if index == len(self) - 1: # Insert at the tail
self.tail = new_node
def delete_front(self) -> Any:
"""
Delete and return the data of the node at the front of the Circular Linked List.
Raises:
IndexError: If the list is empty.
"""
return self.delete_nth(0)
def delete_tail(self) -> Any:
"""
Delete and return the data of the node at the end of the Circular Linked List.
Returns:
Any: The data of the deleted node.
Raises:
IndexError: If the index is out of range.
"""
return self.delete_nth(len(self) - 1)
def delete_nth(self, index: int = 0) -> Any:
"""
Delete and return the data of the node at the nth pos in Circular Linked List.
Args:
index (int): The index of the node to be deleted. Defaults to 0.
Returns:
Any: The data of the deleted node.
Raises:
IndexError: If the index is out of range.
"""
if not 0 <= index < len(self):
raise IndexError("list index out of range.")
assert self.head is not None
assert self.tail is not None
delete_node: Node = self.head
if self.head == self.tail: # Just one node
self.head = self.tail = None
elif index == 0: # Delete head node
assert self.tail.next_node is not None
self.tail.next_node = self.tail.next_node.next_node
self.head = self.head.next_node
else:
temp: Node | None = self.head
for _ in range(index - 1):
assert temp is not None
temp = temp.next_node
assert temp is not None
assert temp.next_node is not None
delete_node = temp.next_node
temp.next_node = temp.next_node.next_node
if index == len(self) - 1: # Delete at tail
self.tail = temp
return delete_node.data
def is_empty(self) -> bool:
"""
Check if the Circular Linked List is empty.
Returns:
bool: True if the list is empty, False otherwise.
"""
return len(self) == 0
def test_circular_linked_list() -> None:
"""
Test cases for the CircularLinkedList class.
>>> test_circular_linked_list()
"""
circular_linked_list = CircularLinkedList()
assert len(circular_linked_list) == 0
assert circular_linked_list.is_empty() is True
assert str(circular_linked_list) == ""
try:
circular_linked_list.delete_front()
raise AssertionError # This should not happen
except IndexError:
assert True # This should happen
try:
circular_linked_list.delete_tail()
raise AssertionError # This should not happen
except IndexError:
assert True # This should happen
try:
circular_linked_list.delete_nth(-1)
raise AssertionError
except IndexError:
assert True
try:
circular_linked_list.delete_nth(0)
raise AssertionError
except IndexError:
assert True
assert circular_linked_list.is_empty() is True
for i in range(5):
assert len(circular_linked_list) == i
circular_linked_list.insert_nth(i, i + 1)
assert str(circular_linked_list) == "->".join(str(i) for i in range(1, 6))
circular_linked_list.insert_tail(6)
assert str(circular_linked_list) == "->".join(str(i) for i in range(1, 7))
circular_linked_list.insert_head(0)
assert str(circular_linked_list) == "->".join(str(i) for i in range(7))
assert circular_linked_list.delete_front() == 0
assert circular_linked_list.delete_tail() == 6
assert str(circular_linked_list) == "->".join(str(i) for i in range(1, 6))
assert circular_linked_list.delete_nth(2) == 3
circular_linked_list.insert_nth(2, 3)
assert str(circular_linked_list) == "->".join(str(i) for i in range(1, 6))
assert circular_linked_list.is_empty() is False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/linked_list/deque_doubly.py
================================================
"""
Implementing Deque using DoublyLinkedList ...
Operations:
1. insertion in the front -> O(1)
2. insertion in the end -> O(1)
3. remove from the front -> O(1)
4. remove from the end -> O(1)
"""
class _DoublyLinkedBase:
"""A Private class (to be inherited)"""
class _Node:
__slots__ = "_data", "_next", "_prev"
def __init__(self, link_p, element, link_n):
self._prev = link_p
self._data = element
self._next = link_n
def has_next_and_prev(self):
return (
f" Prev -> {self._prev is not None}, Next -> {self._next is not None}"
)
def __init__(self):
self._header = self._Node(None, None, None)
self._trailer = self._Node(None, None, None)
self._header._next = self._trailer
self._trailer._prev = self._header
self._size = 0
def __len__(self):
return self._size
def is_empty(self):
return self.__len__() == 0
def _insert(self, predecessor, e, successor):
# Create new_node by setting it's prev.link -> header
# setting it's next.link -> trailer
new_node = self._Node(predecessor, e, successor)
predecessor._next = new_node
successor._prev = new_node
self._size += 1
return self
def _delete(self, node):
predecessor = node._prev
successor = node._next
predecessor._next = successor
successor._prev = predecessor
self._size -= 1
temp = node._data
node._prev = node._next = node._data = None
del node
return temp
class LinkedDeque(_DoublyLinkedBase):
def first(self):
"""return first element
>>> d = LinkedDeque()
>>> d.add_first('A').first()
'A'
>>> d.add_first('B').first()
'B'
"""
if self.is_empty():
raise Exception("List is empty")
return self._header._next._data
def last(self):
"""return last element
>>> d = LinkedDeque()
>>> d.add_last('A').last()
'A'
>>> d.add_last('B').last()
'B'
"""
if self.is_empty():
raise Exception("List is empty")
return self._trailer._prev._data
# DEque Insert Operations (At the front, At the end)
def add_first(self, element):
"""insertion in the front
>>> LinkedDeque().add_first('AV').first()
'AV'
"""
return self._insert(self._header, element, self._header._next)
def add_last(self, element):
"""insertion in the end
>>> LinkedDeque().add_last('B').last()
'B'
"""
return self._insert(self._trailer._prev, element, self._trailer)
# DEqueu Remove Operations (At the front, At the end)
def remove_first(self):
"""removal from the front
>>> d = LinkedDeque()
>>> d.is_empty()
True
>>> d.remove_first()
Traceback (most recent call last):
...
IndexError: remove_first from empty list
>>> d.add_first('A') # doctest: +ELLIPSIS
>> d.remove_first()
'A'
>>> d.is_empty()
True
"""
if self.is_empty():
raise IndexError("remove_first from empty list")
return self._delete(self._header._next)
def remove_last(self):
"""removal in the end
>>> d = LinkedDeque()
>>> d.is_empty()
True
>>> d.remove_last()
Traceback (most recent call last):
...
IndexError: remove_first from empty list
>>> d.add_first('A') # doctest: +ELLIPSIS
>> d.remove_last()
'A'
>>> d.is_empty()
True
"""
if self.is_empty():
raise IndexError("remove_first from empty list")
return self._delete(self._trailer._prev)
================================================
FILE: data_structures/linked_list/doubly_linked_list.py
================================================
"""
https://en.wikipedia.org/wiki/Doubly_linked_list
"""
class Node:
def __init__(self, data):
self.data = data
self.previous = None
self.next = None
def __str__(self):
return f"{self.data}"
class DoublyLinkedList:
def __init__(self):
self.head = None
self.tail = None
def __iter__(self):
"""
>>> linked_list = DoublyLinkedList()
>>> linked_list.insert_at_head('b')
>>> linked_list.insert_at_head('a')
>>> linked_list.insert_at_tail('c')
>>> tuple(linked_list)
('a', 'b', 'c')
"""
node = self.head
while node:
yield node.data
node = node.next
def __str__(self):
"""
>>> linked_list = DoublyLinkedList()
>>> linked_list.insert_at_tail('a')
>>> linked_list.insert_at_tail('b')
>>> linked_list.insert_at_tail('c')
>>> str(linked_list)
'a->b->c'
"""
return "->".join([str(item) for item in self])
def __len__(self):
"""
>>> linked_list = DoublyLinkedList()
>>> for i in range(0, 5):
... linked_list.insert_at_nth(i, i + 1)
>>> len(linked_list) == 5
True
"""
return sum(1 for _ in self)
def insert_at_head(self, data):
self.insert_at_nth(0, data)
def insert_at_tail(self, data):
self.insert_at_nth(len(self), data)
def insert_at_nth(self, index: int, data):
"""
>>> linked_list = DoublyLinkedList()
>>> linked_list.insert_at_nth(-1, 666)
Traceback (most recent call last):
....
IndexError: list index out of range
>>> linked_list.insert_at_nth(1, 666)
Traceback (most recent call last):
....
IndexError: list index out of range
>>> linked_list.insert_at_nth(0, 2)
>>> linked_list.insert_at_nth(0, 1)
>>> linked_list.insert_at_nth(2, 4)
>>> linked_list.insert_at_nth(2, 3)
>>> str(linked_list)
'1->2->3->4'
>>> linked_list.insert_at_nth(5, 5)
Traceback (most recent call last):
....
IndexError: list index out of range
"""
length = len(self)
if not 0 <= index <= length:
raise IndexError("list index out of range")
new_node = Node(data)
if self.head is None:
self.head = self.tail = new_node
elif index == 0:
self.head.previous = new_node
new_node.next = self.head
self.head = new_node
elif index == length:
self.tail.next = new_node
new_node.previous = self.tail
self.tail = new_node
else:
temp = self.head
for _ in range(index):
temp = temp.next
temp.previous.next = new_node
new_node.previous = temp.previous
new_node.next = temp
temp.previous = new_node
def delete_head(self):
return self.delete_at_nth(0)
def delete_tail(self):
return self.delete_at_nth(len(self) - 1)
def delete_at_nth(self, index: int):
"""
>>> linked_list = DoublyLinkedList()
>>> linked_list.delete_at_nth(0)
Traceback (most recent call last):
....
IndexError: list index out of range
>>> for i in range(0, 5):
... linked_list.insert_at_nth(i, i + 1)
>>> linked_list.delete_at_nth(0) == 1
True
>>> linked_list.delete_at_nth(3) == 5
True
>>> linked_list.delete_at_nth(1) == 3
True
>>> str(linked_list)
'2->4'
>>> linked_list.delete_at_nth(2)
Traceback (most recent call last):
....
IndexError: list index out of range
"""
length = len(self)
if not 0 <= index <= length - 1:
raise IndexError("list index out of range")
delete_node = self.head # default first node
if length == 1:
self.head = self.tail = None
elif index == 0:
self.head = self.head.next
self.head.previous = None
elif index == length - 1:
delete_node = self.tail
self.tail = self.tail.previous
self.tail.next = None
else:
temp = self.head
for _ in range(index):
temp = temp.next
delete_node = temp
temp.next.previous = temp.previous
temp.previous.next = temp.next
return delete_node.data
def delete(self, data) -> str:
current = self.head
while current.data != data: # Find the position to delete
if current.next:
current = current.next
else: # We have reached the end an no value matches
raise ValueError("No data matching given value")
if current == self.head:
self.delete_head()
elif current == self.tail:
self.delete_tail()
else: # Before: 1 <--> 2(current) <--> 3
current.previous.next = current.next # 1 --> 3
current.next.previous = current.previous # 1 <--> 3
return data
def is_empty(self):
"""
>>> linked_list = DoublyLinkedList()
>>> linked_list.is_empty()
True
>>> linked_list.insert_at_tail(1)
>>> linked_list.is_empty()
False
"""
return len(self) == 0
def test_doubly_linked_list() -> None:
"""
>>> test_doubly_linked_list()
"""
linked_list = DoublyLinkedList()
assert linked_list.is_empty() is True
assert str(linked_list) == ""
try:
linked_list.delete_head()
raise AssertionError # This should not happen.
except IndexError:
assert True # This should happen.
try:
linked_list.delete_tail()
raise AssertionError # This should not happen.
except IndexError:
assert True # This should happen.
for i in range(10):
assert len(linked_list) == i
linked_list.insert_at_nth(i, i + 1)
assert str(linked_list) == "->".join(str(i) for i in range(1, 11))
linked_list.insert_at_head(0)
linked_list.insert_at_tail(11)
assert str(linked_list) == "->".join(str(i) for i in range(12))
assert linked_list.delete_head() == 0
assert linked_list.delete_at_nth(9) == 10
assert linked_list.delete_tail() == 11
assert len(linked_list) == 9
assert str(linked_list) == "->".join(str(i) for i in range(1, 10))
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/linked_list/doubly_linked_list_two.py
================================================
"""
- A linked list is similar to an array, it holds values. However, links in a linked
list do not have indexes.
- This is an example of a double ended, doubly linked list.
- Each link references the next link and the previous one.
- A Doubly Linked List (DLL) contains an extra pointer, typically called previous
pointer, together with next pointer and data which are there in singly linked list.
- Advantages over SLL - It can be traversed in both forward and backward direction.
Delete operation is more efficient
"""
from dataclasses import dataclass
from typing import Self, TypeVar
DataType = TypeVar("DataType")
@dataclass
class Node[DataType]:
data: DataType
previous: Self | None = None
next: Self | None = None
def __str__(self) -> str:
return f"{self.data}"
class LinkedListIterator:
def __init__(self, head):
self.current = head
def __iter__(self):
return self
def __next__(self):
if not self.current:
raise StopIteration
else:
value = self.current.data
self.current = self.current.next
return value
@dataclass
class LinkedList:
head: Node | None = None # First node in list
tail: Node | None = None # Last node in list
def __str__(self):
current = self.head
nodes = []
while current is not None:
nodes.append(current.data)
current = current.next
return " ".join(str(node) for node in nodes)
def __contains__(self, value: DataType):
current = self.head
while current:
if current.data == value:
return True
current = current.next
return False
def __iter__(self):
return LinkedListIterator(self.head)
def get_head_data(self):
if self.head:
return self.head.data
return None
def get_tail_data(self):
if self.tail:
return self.tail.data
return None
def set_head(self, node: Node) -> None:
if self.head is None:
self.head = node
self.tail = node
else:
self.insert_before_node(self.head, node)
def set_tail(self, node: Node) -> None:
if self.tail is None:
self.head = node
self.tail = node
else:
self.insert_after_node(self.tail, node)
def insert(self, value: DataType) -> None:
node = Node(value)
if self.head is None:
self.set_head(node)
else:
self.set_tail(node)
def insert_before_node(self, node: Node, node_to_insert: Node) -> None:
node_to_insert.next = node
node_to_insert.previous = node.previous
if node.previous is None:
self.head = node_to_insert
else:
node.previous.next = node_to_insert
node.previous = node_to_insert
def insert_after_node(self, node: Node, node_to_insert: Node) -> None:
node_to_insert.previous = node
node_to_insert.next = node.next
if node.next is None:
self.tail = node_to_insert
else:
node.next.previous = node_to_insert
node.next = node_to_insert
def insert_at_position(self, position: int, value: DataType) -> None:
current_position = 1
new_node = Node(value)
node = self.head
while node:
if current_position == position:
self.insert_before_node(node, new_node)
return
current_position += 1
node = node.next
self.set_tail(new_node)
def get_node(self, item: DataType) -> Node:
node = self.head
while node:
if node.data == item:
return node
node = node.next
raise Exception("Node not found")
def delete_value(self, value):
if (node := self.get_node(value)) is not None:
if node == self.head:
self.head = self.head.next
if node == self.tail:
self.tail = self.tail.previous
self.remove_node_pointers(node)
@staticmethod
def remove_node_pointers(node: Node) -> None:
if node.next:
node.next.previous = node.previous
if node.previous:
node.previous.next = node.next
node.next = None
node.previous = None
def is_empty(self):
return self.head is None
def create_linked_list() -> None:
"""
>>> new_linked_list = LinkedList()
>>> new_linked_list.get_head_data() is None
True
>>> new_linked_list.get_tail_data() is None
True
>>> new_linked_list.is_empty()
True
>>> new_linked_list.insert(10)
>>> new_linked_list.get_head_data()
10
>>> new_linked_list.get_tail_data()
10
>>> new_linked_list.insert_at_position(position=3, value=20)
>>> new_linked_list.get_head_data()
10
>>> new_linked_list.get_tail_data()
20
>>> new_linked_list.set_head(Node(1000))
>>> new_linked_list.get_head_data()
1000
>>> new_linked_list.get_tail_data()
20
>>> new_linked_list.set_tail(Node(2000))
>>> new_linked_list.get_head_data()
1000
>>> new_linked_list.get_tail_data()
2000
>>> for value in new_linked_list:
... print(value)
1000
10
20
2000
>>> new_linked_list.is_empty()
False
>>> for value in new_linked_list:
... print(value)
1000
10
20
2000
>>> 10 in new_linked_list
True
>>> new_linked_list.delete_value(value=10)
>>> 10 in new_linked_list
False
>>> new_linked_list.delete_value(value=2000)
>>> new_linked_list.get_tail_data()
20
>>> new_linked_list.delete_value(value=1000)
>>> new_linked_list.get_tail_data()
20
>>> new_linked_list.get_head_data()
20
>>> for value in new_linked_list:
... print(value)
20
>>> new_linked_list.delete_value(value=20)
>>> for value in new_linked_list:
... print(value)
>>> for value in range(1,10):
... new_linked_list.insert(value=value)
>>> for value in new_linked_list:
... print(value)
1
2
3
4
5
6
7
8
9
>>> linked_list = LinkedList()
>>> linked_list.insert_at_position(position=1, value=10)
>>> str(linked_list)
'10'
>>> linked_list.insert_at_position(position=2, value=20)
>>> str(linked_list)
'10 20'
>>> linked_list.insert_at_position(position=1, value=30)
>>> str(linked_list)
'30 10 20'
>>> linked_list.insert_at_position(position=3, value=40)
>>> str(linked_list)
'30 10 40 20'
>>> linked_list.insert_at_position(position=5, value=50)
>>> str(linked_list)
'30 10 40 20 50'
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/linked_list/floyds_cycle_detection.py
================================================
"""
Floyd's cycle detection algorithm is a popular algorithm used to detect cycles
in a linked list. It uses two pointers, a slow pointer and a fast pointer,
to traverse the linked list. The slow pointer moves one node at a time while the fast
pointer moves two nodes at a time. If there is a cycle in the linked list,
the fast pointer will eventually catch up to the slow pointer and they will
meet at the same node. If there is no cycle, the fast pointer will reach the end of
the linked list and the algorithm will terminate.
For more information: https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_tortoise_and_hare
"""
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any, Self
@dataclass
class Node:
"""
A class representing a node in a singly linked list.
"""
data: Any
next_node: Self | None = None
@dataclass
class LinkedList:
"""
A class representing a singly linked list.
"""
head: Node | None = None
def __iter__(self) -> Iterator:
"""
Iterates through the linked list.
Returns:
Iterator: An iterator over the linked list.
Examples:
>>> linked_list = LinkedList()
>>> list(linked_list)
[]
>>> linked_list.add_node(1)
>>> tuple(linked_list)
(1,)
"""
visited = []
node = self.head
while node:
# Avoid infinite loop in there's a cycle
if node in visited:
return
visited.append(node)
yield node.data
node = node.next_node
def add_node(self, data: Any) -> None:
"""
Adds a new node to the end of the linked list.
Args:
data (Any): The data to be stored in the new node.
Examples:
>>> linked_list = LinkedList()
>>> linked_list.add_node(1)
>>> linked_list.add_node(2)
>>> linked_list.add_node(3)
>>> linked_list.add_node(4)
>>> tuple(linked_list)
(1, 2, 3, 4)
"""
new_node = Node(data)
if self.head is None:
self.head = new_node
return
current_node = self.head
while current_node.next_node is not None:
current_node = current_node.next_node
current_node.next_node = new_node
def detect_cycle(self) -> bool:
"""
Detects if there is a cycle in the linked list using
Floyd's cycle detection algorithm.
Returns:
bool: True if there is a cycle, False otherwise.
Examples:
>>> linked_list = LinkedList()
>>> linked_list.add_node(1)
>>> linked_list.add_node(2)
>>> linked_list.add_node(3)
>>> linked_list.add_node(4)
>>> linked_list.detect_cycle()
False
# Create a cycle in the linked list
>>> linked_list.head.next_node.next_node.next_node = linked_list.head.next_node
>>> linked_list.detect_cycle()
True
"""
if self.head is None:
return False
slow_pointer: Node | None = self.head
fast_pointer: Node | None = self.head
while fast_pointer is not None and fast_pointer.next_node is not None:
slow_pointer = slow_pointer.next_node if slow_pointer else None
fast_pointer = fast_pointer.next_node.next_node
if slow_pointer == fast_pointer:
return True
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
linked_list = LinkedList()
linked_list.add_node(1)
linked_list.add_node(2)
linked_list.add_node(3)
linked_list.add_node(4)
# Create a cycle in the linked list
# It first checks if the head, next_node, and next_node.next_node attributes of the
# linked list are not None to avoid any potential type errors.
if (
linked_list.head
and linked_list.head.next_node
and linked_list.head.next_node.next_node
):
linked_list.head.next_node.next_node.next_node = linked_list.head.next_node
has_cycle = linked_list.detect_cycle()
print(has_cycle) # Output: True
================================================
FILE: data_structures/linked_list/from_sequence.py
================================================
"""
Recursive Program to create a Linked List from a sequence and
print a string representation of it.
"""
class Node:
def __init__(self, data=None):
self.data = data
self.next = None
def __repr__(self):
"""Returns a visual representation of the node and all its following nodes."""
string_rep = ""
temp = self
while temp:
string_rep += f"<{temp.data}> ---> "
temp = temp.next
string_rep += ""
return string_rep
def make_linked_list(elements_list: list | tuple) -> Node:
"""
Creates a Linked List from the elements of the given sequence
(list/tuple) and returns the head of the Linked List.
>>> make_linked_list([])
Traceback (most recent call last):
...
ValueError: The Elements List is empty
>>> make_linked_list(())
Traceback (most recent call last):
...
ValueError: The Elements List is empty
>>> make_linked_list([1])
<1> --->
>>> make_linked_list((1,))
<1> --->
>>> make_linked_list([1, 3, 5, 32, 44, 12, 43])
<1> ---> <3> ---> <5> ---> <32> ---> <44> ---> <12> ---> <43> --->
>>> make_linked_list((1, 3, 5, 32, 44, 12, 43))
<1> ---> <3> ---> <5> ---> <32> ---> <44> ---> <12> ---> <43> --->
"""
# if elements_list is empty
if not elements_list:
raise ValueError("The Elements List is empty")
# Set first element as Head
head = Node(elements_list[0])
current = head
# Loop through elements from position 1
for data in elements_list[1:]:
current.next = Node(data)
current = current.next
return head
================================================
FILE: data_structures/linked_list/has_loop.py
================================================
from __future__ import annotations
from typing import Any
class ContainsLoopError(Exception):
pass
class Node:
def __init__(self, data: Any) -> None:
self.data: Any = data
self.next_node: Node | None = None
def __iter__(self):
node = self
visited = set()
while node:
if node in visited:
raise ContainsLoopError
visited.add(node)
yield node.data
node = node.next_node
@property
def has_loop(self) -> bool:
"""
A loop is when the exact same Node appears more than once in a linked list.
>>> root_node = Node(1)
>>> root_node.next_node = Node(2)
>>> root_node.next_node.next_node = Node(3)
>>> root_node.next_node.next_node.next_node = Node(4)
>>> root_node.has_loop
False
>>> root_node.next_node.next_node.next_node = root_node.next_node
>>> root_node.has_loop
True
"""
try:
list(self)
return False
except ContainsLoopError:
return True
if __name__ == "__main__":
root_node = Node(1)
root_node.next_node = Node(2)
root_node.next_node.next_node = Node(3)
root_node.next_node.next_node.next_node = Node(4)
print(root_node.has_loop) # False
root_node.next_node.next_node.next_node = root_node.next_node
print(root_node.has_loop) # True
root_node = Node(5)
root_node.next_node = Node(6)
root_node.next_node.next_node = Node(5)
root_node.next_node.next_node.next_node = Node(6)
print(root_node.has_loop) # False
root_node = Node(1)
print(root_node.has_loop) # False
================================================
FILE: data_structures/linked_list/is_palindrome.py
================================================
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class ListNode:
val: int = 0
next_node: ListNode | None = None
def is_palindrome(head: ListNode | None) -> bool:
"""
Check if a linked list is a palindrome.
Args:
head: The head of the linked list.
Returns:
bool: True if the linked list is a palindrome, False otherwise.
Examples:
>>> is_palindrome(None)
True
>>> is_palindrome(ListNode(1))
True
>>> is_palindrome(ListNode(1, ListNode(2)))
False
>>> is_palindrome(ListNode(1, ListNode(2, ListNode(1))))
True
>>> is_palindrome(ListNode(1, ListNode(2, ListNode(2, ListNode(1)))))
True
"""
if not head:
return True
# split the list to two parts
fast: ListNode | None = head.next_node
slow: ListNode | None = head
while fast and fast.next_node:
fast = fast.next_node.next_node
slow = slow.next_node if slow else None
if slow:
# slow will always be defined,
# adding this check to resolve mypy static check
second = slow.next_node
slow.next_node = None # Don't forget here! But forget still works!
# reverse the second part
node: ListNode | None = None
while second:
nxt = second.next_node
second.next_node = node
node = second
second = nxt
# compare two parts
# second part has the same or one less node
while node and head:
if node.val != head.val:
return False
node = node.next_node
head = head.next_node
return True
def is_palindrome_stack(head: ListNode | None) -> bool:
"""
Check if a linked list is a palindrome using a stack.
Args:
head (ListNode): The head of the linked list.
Returns:
bool: True if the linked list is a palindrome, False otherwise.
Examples:
>>> is_palindrome_stack(None)
True
>>> is_palindrome_stack(ListNode(1))
True
>>> is_palindrome_stack(ListNode(1, ListNode(2)))
False
>>> is_palindrome_stack(ListNode(1, ListNode(2, ListNode(1))))
True
>>> is_palindrome_stack(ListNode(1, ListNode(2, ListNode(2, ListNode(1)))))
True
"""
if not head or not head.next_node:
return True
# 1. Get the midpoint (slow)
slow: ListNode | None = head
fast: ListNode | None = head
while fast and fast.next_node:
fast = fast.next_node.next_node
slow = slow.next_node if slow else None
# slow will always be defined,
# adding this check to resolve mypy static check
if slow:
stack = [slow.val]
# 2. Push the second half into the stack
while slow.next_node:
slow = slow.next_node
stack.append(slow.val)
# 3. Comparison
cur: ListNode | None = head
while stack and cur:
if stack.pop() != cur.val:
return False
cur = cur.next_node
return True
def is_palindrome_dict(head: ListNode | None) -> bool:
"""
Check if a linked list is a palindrome using a dictionary.
Args:
head (ListNode): The head of the linked list.
Returns:
bool: True if the linked list is a palindrome, False otherwise.
Examples:
>>> is_palindrome_dict(None)
True
>>> is_palindrome_dict(ListNode(1))
True
>>> is_palindrome_dict(ListNode(1, ListNode(2)))
False
>>> is_palindrome_dict(ListNode(1, ListNode(2, ListNode(1))))
True
>>> is_palindrome_dict(ListNode(1, ListNode(2, ListNode(2, ListNode(1)))))
True
>>> is_palindrome_dict(
... ListNode(
... 1, ListNode(2, ListNode(1, ListNode(3, ListNode(2, ListNode(1)))))
... )
... )
False
"""
if not head or not head.next_node:
return True
d: dict[int, list[int]] = {}
pos = 0
while head:
if head.val in d:
d[head.val].append(pos)
else:
d[head.val] = [pos]
head = head.next_node
pos += 1
checksum = pos - 1
middle = 0
for v in d.values():
if len(v) % 2 != 0:
middle += 1
else:
for step, i in enumerate(range(len(v))):
if v[i] + v[len(v) - 1 - step] != checksum:
return False
if middle > 1:
return False
return True
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/linked_list/merge_two_lists.py
================================================
"""
Algorithm that merges two sorted linked lists into one sorted linked list.
"""
from __future__ import annotations
from collections.abc import Iterable, Iterator
from dataclasses import dataclass
test_data_odd = (3, 9, -11, 0, 7, 5, 1, -1)
test_data_even = (4, 6, 2, 0, 8, 10, 3, -2)
@dataclass
class Node:
data: int
next_node: Node | None
class SortedLinkedList:
def __init__(self, ints: Iterable[int]) -> None:
self.head: Node | None = None
for i in sorted(ints, reverse=True):
self.head = Node(i, self.head)
def __iter__(self) -> Iterator[int]:
"""
>>> tuple(SortedLinkedList(test_data_odd)) == tuple(sorted(test_data_odd))
True
>>> tuple(SortedLinkedList(test_data_even)) == tuple(sorted(test_data_even))
True
"""
node = self.head
while node:
yield node.data
node = node.next_node
def __len__(self) -> int:
"""
>>> for i in range(3):
... len(SortedLinkedList(range(i))) == i
True
True
True
>>> len(SortedLinkedList(test_data_odd))
8
"""
return sum(1 for _ in self)
def __str__(self) -> str:
"""
>>> str(SortedLinkedList([]))
''
>>> str(SortedLinkedList(test_data_odd))
'-11 -> -1 -> 0 -> 1 -> 3 -> 5 -> 7 -> 9'
>>> str(SortedLinkedList(test_data_even))
'-2 -> 0 -> 2 -> 3 -> 4 -> 6 -> 8 -> 10'
"""
return " -> ".join([str(node) for node in self])
def merge_lists(
sll_one: SortedLinkedList, sll_two: SortedLinkedList
) -> SortedLinkedList:
"""
>>> SSL = SortedLinkedList
>>> merged = merge_lists(SSL(test_data_odd), SSL(test_data_even))
>>> len(merged)
16
>>> str(merged)
'-11 -> -2 -> -1 -> 0 -> 0 -> 1 -> 2 -> 3 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10'
>>> list(merged) == list(sorted(test_data_odd + test_data_even))
True
"""
return SortedLinkedList(list(sll_one) + list(sll_two))
if __name__ == "__main__":
import doctest
doctest.testmod()
SSL = SortedLinkedList
print(merge_lists(SSL(test_data_odd), SSL(test_data_even)))
================================================
FILE: data_structures/linked_list/middle_element_of_linked_list.py
================================================
from __future__ import annotations
class Node:
def __init__(self, data: int) -> None:
self.data = data
self.next = None
class LinkedList:
def __init__(self):
self.head = None
def push(self, new_data: int) -> int:
new_node = Node(new_data)
new_node.next = self.head
self.head = new_node
return self.head.data
def middle_element(self) -> int | None:
"""
>>> link = LinkedList()
>>> link.middle_element()
No element found.
>>> link.push(5)
5
>>> link.push(6)
6
>>> link.push(8)
8
>>> link.push(8)
8
>>> link.push(10)
10
>>> link.push(12)
12
>>> link.push(17)
17
>>> link.push(7)
7
>>> link.push(3)
3
>>> link.push(20)
20
>>> link.push(-20)
-20
>>> link.middle_element()
12
>>>
"""
slow_pointer = self.head
fast_pointer = self.head
if self.head:
while fast_pointer and fast_pointer.next:
fast_pointer = fast_pointer.next.next
slow_pointer = slow_pointer.next
return slow_pointer.data
else:
print("No element found.")
return None
if __name__ == "__main__":
link = LinkedList()
for _ in range(int(input().strip())):
data = int(input().strip())
link.push(data)
print(link.middle_element())
================================================
FILE: data_structures/linked_list/print_reverse.py
================================================
from __future__ import annotations
from collections.abc import Iterable, Iterator
from dataclasses import dataclass
@dataclass
class Node:
data: int
next_node: Node | None = None
class LinkedList:
"""A class to represent a Linked List.
Use a tail pointer to speed up the append() operation.
"""
def __init__(self) -> None:
"""Initialize a LinkedList with the head node set to None.
>>> linked_list = LinkedList()
>>> (linked_list.head, linked_list.tail)
(None, None)
"""
self.head: Node | None = None
self.tail: Node | None = None # Speeds up the append() operation
def __iter__(self) -> Iterator[int]:
"""Iterate the LinkedList yielding each Node's data.
>>> linked_list = LinkedList()
>>> items = (1, 2, 3, 4, 5)
>>> linked_list.extend(items)
>>> tuple(linked_list) == items
True
"""
node = self.head
while node:
yield node.data
node = node.next_node
def __repr__(self) -> str:
"""Returns a string representation of the LinkedList.
>>> linked_list = LinkedList()
>>> str(linked_list)
''
>>> linked_list.append(1)
>>> str(linked_list)
'1'
>>> linked_list.extend([2, 3, 4, 5])
>>> str(linked_list)
'1 -> 2 -> 3 -> 4 -> 5'
"""
return " -> ".join([str(data) for data in self])
def append(self, data: int) -> None:
"""Appends a new node with the given data to the end of the LinkedList.
>>> linked_list = LinkedList()
>>> str(linked_list)
''
>>> linked_list.append(1)
>>> str(linked_list)
'1'
>>> linked_list.append(2)
>>> str(linked_list)
'1 -> 2'
"""
if self.tail:
self.tail.next_node = self.tail = Node(data)
else:
self.head = self.tail = Node(data)
def extend(self, items: Iterable[int]) -> None:
"""Appends each item to the end of the LinkedList.
>>> linked_list = LinkedList()
>>> linked_list.extend([])
>>> str(linked_list)
''
>>> linked_list.extend([1, 2])
>>> str(linked_list)
'1 -> 2'
>>> linked_list.extend([3,4])
>>> str(linked_list)
'1 -> 2 -> 3 -> 4'
"""
for item in items:
self.append(item)
def make_linked_list(elements_list: Iterable[int]) -> LinkedList:
"""Creates a Linked List from the elements of the given sequence
(list/tuple) and returns the head of the Linked List.
>>> make_linked_list([])
Traceback (most recent call last):
...
Exception: The Elements List is empty
>>> make_linked_list([7])
7
>>> make_linked_list(['abc'])
abc
>>> make_linked_list([7, 25])
7 -> 25
"""
if not elements_list:
raise Exception("The Elements List is empty")
linked_list = LinkedList()
linked_list.extend(elements_list)
return linked_list
def in_reverse(linked_list: LinkedList) -> str:
"""Prints the elements of the given Linked List in reverse order
>>> in_reverse(LinkedList())
''
>>> in_reverse(make_linked_list([69, 88, 73]))
'73 <- 88 <- 69'
"""
return " <- ".join(str(line) for line in reversed(tuple(linked_list)))
if __name__ == "__main__":
from doctest import testmod
testmod()
linked_list = make_linked_list((14, 52, 14, 12, 43))
print(f"Linked List: {linked_list}")
print(f"Reverse List: {in_reverse(linked_list)}")
================================================
FILE: data_structures/linked_list/reverse_k_group.py
================================================
from __future__ import annotations
from collections.abc import Iterable, Iterator
from dataclasses import dataclass
@dataclass
class Node:
data: int
next_node: Node | None = None
class LinkedList:
def __init__(self, ints: Iterable[int]) -> None:
self.head: Node | None = None
for i in ints:
self.append(i)
def __iter__(self) -> Iterator[int]:
"""
>>> ints = []
>>> list(LinkedList(ints)) == ints
True
>>> ints = tuple(range(5))
>>> tuple(LinkedList(ints)) == ints
True
"""
node = self.head
while node:
yield node.data
node = node.next_node
def __len__(self) -> int:
"""
>>> for i in range(3):
... len(LinkedList(range(i))) == i
True
True
True
>>> len(LinkedList("abcdefgh"))
8
"""
return sum(1 for _ in self)
def __str__(self) -> str:
"""
>>> str(LinkedList([]))
''
>>> str(LinkedList(range(5)))
'0 -> 1 -> 2 -> 3 -> 4'
"""
return " -> ".join([str(node) for node in self])
def append(self, data: int) -> None:
"""
>>> ll = LinkedList([1, 2])
>>> tuple(ll)
(1, 2)
>>> ll.append(3)
>>> tuple(ll)
(1, 2, 3)
>>> ll.append(4)
>>> tuple(ll)
(1, 2, 3, 4)
>>> len(ll)
4
"""
if not self.head:
self.head = Node(data)
return
node = self.head
while node.next_node:
node = node.next_node
node.next_node = Node(data)
def reverse_k_nodes(self, group_size: int) -> None:
"""
reverse nodes within groups of size k
>>> ll = LinkedList([1, 2, 3, 4, 5])
>>> ll.reverse_k_nodes(2)
>>> tuple(ll)
(2, 1, 4, 3, 5)
>>> str(ll)
'2 -> 1 -> 4 -> 3 -> 5'
"""
if self.head is None or self.head.next_node is None:
return
length = len(self)
dummy_head = Node(0)
dummy_head.next_node = self.head
previous_node = dummy_head
while length >= group_size:
current_node = previous_node.next_node
assert current_node
next_node = current_node.next_node
for _ in range(1, group_size):
assert next_node, current_node
current_node.next_node = next_node.next_node
assert previous_node
next_node.next_node = previous_node.next_node
previous_node.next_node = next_node
next_node = current_node.next_node
previous_node = current_node
length -= group_size
self.head = dummy_head.next_node
if __name__ == "__main__":
import doctest
doctest.testmod()
ll = LinkedList([1, 2, 3, 4, 5])
print(f"Original Linked List: {ll}")
k = 2
ll.reverse_k_nodes(k)
print(f"After reversing groups of size {k}: {ll}")
================================================
FILE: data_structures/linked_list/rotate_to_the_right.py
================================================
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class Node:
data: int
next_node: Node | None = None
def print_linked_list(head: Node | None) -> None:
"""
Print the entire linked list iteratively.
This function prints the elements of a linked list separated by '->'.
Parameters:
head (Node | None): The head of the linked list to be printed,
or None if the linked list is empty.
>>> head = insert_node(None, 0)
>>> head = insert_node(head, 2)
>>> head = insert_node(head, 1)
>>> print_linked_list(head)
0->2->1
>>> head = insert_node(head, 4)
>>> head = insert_node(head, 5)
>>> print_linked_list(head)
0->2->1->4->5
"""
if head is None:
return
while head.next_node is not None:
print(head.data, end="->")
head = head.next_node
print(head.data)
def insert_node(head: Node | None, data: int) -> Node:
"""
Insert a new node at the end of a linked list and return the new head.
Parameters:
head (Node | None): The head of the linked list.
data (int): The data to be inserted into the new node.
Returns:
Node: The new head of the linked list.
>>> head = insert_node(None, 10)
>>> head = insert_node(head, 9)
>>> head = insert_node(head, 8)
>>> print_linked_list(head)
10->9->8
"""
new_node = Node(data)
# If the linked list is empty, the new_node becomes the head
if head is None:
return new_node
temp_node = head
while temp_node.next_node:
temp_node = temp_node.next_node
temp_node.next_node = new_node
return head
def rotate_to_the_right(head: Node, places: int) -> Node:
"""
Rotate a linked list to the right by places times.
Parameters:
head: The head of the linked list.
places: The number of places to rotate.
Returns:
Node: The head of the rotated linked list.
>>> rotate_to_the_right(None, places=1)
Traceback (most recent call last):
...
ValueError: The linked list is empty.
>>> head = insert_node(None, 1)
>>> rotate_to_the_right(head, places=1) == head
True
>>> head = insert_node(None, 1)
>>> head = insert_node(head, 2)
>>> head = insert_node(head, 3)
>>> head = insert_node(head, 4)
>>> head = insert_node(head, 5)
>>> new_head = rotate_to_the_right(head, places=2)
>>> print_linked_list(new_head)
4->5->1->2->3
"""
# Check if the list is empty or has only one element
if not head:
raise ValueError("The linked list is empty.")
if head.next_node is None:
return head
# Calculate the length of the linked list
length = 1
temp_node = head
while temp_node.next_node is not None:
length += 1
temp_node = temp_node.next_node
# Adjust the value of places to avoid places longer than the list.
places %= length
if places == 0:
return head # As no rotation is needed.
# Find the new head position after rotation.
new_head_index = length - places
# Traverse to the new head position
temp_node = head
for _ in range(new_head_index - 1):
assert temp_node.next_node
temp_node = temp_node.next_node
# Update pointers to perform rotation
assert temp_node.next_node
new_head = temp_node.next_node
temp_node.next_node = None
temp_node = new_head
while temp_node.next_node:
temp_node = temp_node.next_node
temp_node.next_node = head
assert new_head
return new_head
if __name__ == "__main__":
import doctest
doctest.testmod()
head = insert_node(None, 5)
head = insert_node(head, 1)
head = insert_node(head, 2)
head = insert_node(head, 4)
head = insert_node(head, 3)
print("Original list: ", end="")
print_linked_list(head)
places = 3
new_head = rotate_to_the_right(head, places)
print(f"After {places} iterations: ", end="")
print_linked_list(new_head)
================================================
FILE: data_structures/linked_list/singly_linked_list.py
================================================
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any
@dataclass
class Node:
"""
Create and initialize Node class instance.
>>> Node(20)
Node(20)
>>> Node("Hello, world!")
Node(Hello, world!)
>>> Node(None)
Node(None)
>>> Node(True)
Node(True)
"""
data: Any
next_node: Node | None = None
def __repr__(self) -> str:
"""
Get the string representation of this node.
>>> Node(10).__repr__()
'Node(10)'
>>> repr(Node(10))
'Node(10)'
>>> str(Node(10))
'Node(10)'
>>> Node(10)
Node(10)
"""
return f"Node({self.data})"
class LinkedList:
def __init__(self):
"""
Create and initialize LinkedList class instance.
>>> linked_list = LinkedList()
>>> linked_list.head is None
True
"""
self.head = None
def __iter__(self) -> Iterator[Any]:
"""
This function is intended for iterators to access
and iterate through data inside linked list.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("tail")
>>> linked_list.insert_tail("tail_1")
>>> linked_list.insert_tail("tail_2")
>>> for node in linked_list: # __iter__ used here.
... node
'tail'
'tail_1'
'tail_2'
"""
node = self.head
while node:
yield node.data
node = node.next_node
def __len__(self) -> int:
"""
Return length of linked list i.e. number of nodes
>>> linked_list = LinkedList()
>>> len(linked_list)
0
>>> linked_list.insert_tail("tail")
>>> len(linked_list)
1
>>> linked_list.insert_head("head")
>>> len(linked_list)
2
>>> _ = linked_list.delete_tail()
>>> len(linked_list)
1
>>> _ = linked_list.delete_head()
>>> len(linked_list)
0
"""
return sum(1 for _ in self)
def __repr__(self) -> str:
"""
String representation/visualization of a Linked Lists
>>> linked_list = LinkedList()
>>> linked_list.insert_tail(1)
>>> linked_list.insert_tail(3)
>>> linked_list.__repr__()
'1 -> 3'
>>> repr(linked_list)
'1 -> 3'
>>> str(linked_list)
'1 -> 3'
>>> linked_list.insert_tail(5)
>>> f"{linked_list}"
'1 -> 3 -> 5'
"""
return " -> ".join([str(item) for item in self])
def __getitem__(self, index: int) -> Any:
"""
Indexing Support. Used to get a node at particular position
>>> linked_list = LinkedList()
>>> for i in range(0, 10):
... linked_list.insert_nth(i, i)
>>> all(str(linked_list[i]) == str(i) for i in range(0, 10))
True
>>> linked_list[-10]
Traceback (most recent call last):
...
ValueError: list index out of range.
>>> linked_list[len(linked_list)]
Traceback (most recent call last):
...
ValueError: list index out of range.
"""
if not 0 <= index < len(self):
raise ValueError("list index out of range.")
for i, node in enumerate(self):
if i == index:
return node
return None
# Used to change the data of a particular node
def __setitem__(self, index: int, data: Any) -> None:
"""
>>> linked_list = LinkedList()
>>> for i in range(0, 10):
... linked_list.insert_nth(i, i)
>>> linked_list[0] = 666
>>> linked_list[0]
666
>>> linked_list[5] = -666
>>> linked_list[5]
-666
>>> linked_list[-10] = 666
Traceback (most recent call last):
...
ValueError: list index out of range.
>>> linked_list[len(linked_list)] = 666
Traceback (most recent call last):
...
ValueError: list index out of range.
"""
if not 0 <= index < len(self):
raise ValueError("list index out of range.")
current = self.head
for _ in range(index):
current = current.next_node
current.data = data
def insert_tail(self, data: Any) -> None:
"""
Insert data to the end of linked list.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("tail")
>>> linked_list
tail
>>> linked_list.insert_tail("tail_2")
>>> linked_list
tail -> tail_2
>>> linked_list.insert_tail("tail_3")
>>> linked_list
tail -> tail_2 -> tail_3
"""
self.insert_nth(len(self), data)
def insert_head(self, data: Any) -> None:
"""
Insert data to the beginning of linked list.
>>> linked_list = LinkedList()
>>> linked_list.insert_head("head")
>>> linked_list
head
>>> linked_list.insert_head("head_2")
>>> linked_list
head_2 -> head
>>> linked_list.insert_head("head_3")
>>> linked_list
head_3 -> head_2 -> head
"""
self.insert_nth(0, data)
def insert_nth(self, index: int, data: Any) -> None:
"""
Insert data at given index.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
>>> linked_list.insert_nth(1, "fourth")
>>> linked_list
first -> fourth -> second -> third
>>> linked_list.insert_nth(3, "fifth")
>>> linked_list
first -> fourth -> second -> fifth -> third
"""
if not 0 <= index <= len(self):
raise IndexError("list index out of range")
new_node = Node(data)
if self.head is None:
self.head = new_node
elif index == 0:
new_node.next_node = self.head # link new_node to head
self.head = new_node
else:
temp = self.head
for _ in range(index - 1):
temp = temp.next_node
new_node.next_node = temp.next_node
temp.next_node = new_node
def print_list(self) -> None: # print every node data
"""
This method prints every node data.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
"""
print(self)
def delete_head(self) -> Any:
"""
Delete the first node and return the
node's data.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
>>> linked_list.delete_head()
'first'
>>> linked_list
second -> third
>>> linked_list.delete_head()
'second'
>>> linked_list
third
>>> linked_list.delete_head()
'third'
>>> linked_list.delete_head()
Traceback (most recent call last):
...
IndexError: List index out of range.
"""
return self.delete_nth(0)
def delete_tail(self) -> Any: # delete from tail
"""
Delete the tail end node and return the
node's data.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
>>> linked_list.delete_tail()
'third'
>>> linked_list
first -> second
>>> linked_list.delete_tail()
'second'
>>> linked_list
first
>>> linked_list.delete_tail()
'first'
>>> linked_list.delete_tail()
Traceback (most recent call last):
...
IndexError: List index out of range.
"""
return self.delete_nth(len(self) - 1)
def delete_nth(self, index: int = 0) -> Any:
"""
Delete node at given index and return the
node's data.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
>>> linked_list.delete_nth(1) # delete middle
'second'
>>> linked_list
first -> third
>>> linked_list.delete_nth(5) # this raises error
Traceback (most recent call last):
...
IndexError: List index out of range.
>>> linked_list.delete_nth(-1) # this also raises error
Traceback (most recent call last):
...
IndexError: List index out of range.
"""
if not 0 <= index <= len(self) - 1: # test if index is valid
raise IndexError("List index out of range.")
delete_node = self.head # default first node
if index == 0:
self.head = self.head.next_node
else:
temp = self.head
for _ in range(index - 1):
temp = temp.next_node
delete_node = temp.next_node
temp.next_node = temp.next_node.next_node
return delete_node.data
def is_empty(self) -> bool:
"""
Check if linked list is empty.
>>> linked_list = LinkedList()
>>> linked_list.is_empty()
True
>>> linked_list.insert_head("first")
>>> linked_list.is_empty()
False
"""
return self.head is None
def reverse(self) -> None:
"""
This reverses the linked list order.
>>> linked_list = LinkedList()
>>> linked_list.insert_tail("first")
>>> linked_list.insert_tail("second")
>>> linked_list.insert_tail("third")
>>> linked_list
first -> second -> third
>>> linked_list.reverse()
>>> linked_list
third -> second -> first
"""
prev = None
current = self.head
while current:
# Store the current node's next node.
next_node = current.next_node
# Make the current node's next_node point backwards
current.next_node = prev
# Make the previous node be the current node
prev = current
# Make the current node the next_node node (to progress iteration)
current = next_node
# Return prev in order to put the head at the end
self.head = prev
def test_singly_linked_list() -> None:
"""
>>> test_singly_linked_list()
"""
linked_list = LinkedList()
assert linked_list.is_empty() is True
assert str(linked_list) == ""
try:
linked_list.delete_head()
raise AssertionError # This should not happen.
except IndexError:
assert True # This should happen.
try:
linked_list.delete_tail()
raise AssertionError # This should not happen.
except IndexError:
assert True # This should happen.
for i in range(10):
assert len(linked_list) == i
linked_list.insert_nth(i, i + 1)
assert str(linked_list) == " -> ".join(str(i) for i in range(1, 11))
linked_list.insert_head(0)
linked_list.insert_tail(11)
assert str(linked_list) == " -> ".join(str(i) for i in range(12))
assert linked_list.delete_head() == 0
assert linked_list.delete_nth(9) == 10
assert linked_list.delete_tail() == 11
assert len(linked_list) == 9
assert str(linked_list) == " -> ".join(str(i) for i in range(1, 10))
assert all(linked_list[i] == i + 1 for i in range(9)) is True
for i in range(9):
linked_list[i] = -i
assert all(linked_list[i] == -i for i in range(9)) is True
linked_list.reverse()
assert str(linked_list) == " -> ".join(str(i) for i in range(-8, 1))
def test_singly_linked_list_2() -> None:
"""
This section of the test used varying data types for input.
>>> test_singly_linked_list_2()
"""
test_input = [
-9,
100,
Node(77345112),
"dlrow olleH",
7,
5555,
0,
-192.55555,
"Hello, world!",
77.9,
Node(10),
None,
None,
12.20,
]
linked_list = LinkedList()
for i in test_input:
linked_list.insert_tail(i)
# Check if it's empty or not
assert linked_list.is_empty() is False
assert (
str(linked_list)
== "-9 -> 100 -> Node(77345112) -> dlrow olleH -> 7 -> 5555 -> "
"0 -> -192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None -> None -> 12.2"
)
# Delete the head
result = linked_list.delete_head()
assert result == -9
assert (
str(linked_list) == "100 -> Node(77345112) -> dlrow olleH -> 7 -> 5555 -> 0 -> "
"-192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None -> None -> 12.2"
)
# Delete the tail
result = linked_list.delete_tail()
assert result == 12.2
assert (
str(linked_list) == "100 -> Node(77345112) -> dlrow olleH -> 7 -> 5555 -> 0 -> "
"-192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None -> None"
)
# Delete a node in specific location in linked list
result = linked_list.delete_nth(10)
assert result is None
assert (
str(linked_list) == "100 -> Node(77345112) -> dlrow olleH -> 7 -> 5555 -> 0 -> "
"-192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None"
)
# Add a Node instance to its head
linked_list.insert_head(Node("Hello again, world!"))
assert (
str(linked_list)
== "Node(Hello again, world!) -> 100 -> Node(77345112) -> dlrow olleH -> "
"7 -> 5555 -> 0 -> -192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None"
)
# Add None to its tail
linked_list.insert_tail(None)
assert (
str(linked_list)
== "Node(Hello again, world!) -> 100 -> Node(77345112) -> dlrow olleH -> 7 -> "
"5555 -> 0 -> -192.55555 -> Hello, world! -> 77.9 -> Node(10) -> None -> None"
)
# Reverse the linked list
linked_list.reverse()
assert (
str(linked_list)
== "None -> None -> Node(10) -> 77.9 -> Hello, world! -> -192.55555 -> 0 -> "
"5555 -> 7 -> dlrow olleH -> Node(77345112) -> 100 -> Node(Hello again, world!)"
)
def main():
from doctest import testmod
testmod()
linked_list = LinkedList()
linked_list.insert_head(input("Inserting 1st at head ").strip())
linked_list.insert_head(input("Inserting 2nd at head ").strip())
print("\nPrint list:")
linked_list.print_list()
linked_list.insert_tail(input("\nInserting 1st at tail ").strip())
linked_list.insert_tail(input("Inserting 2nd at tail ").strip())
print("\nPrint list:")
linked_list.print_list()
print("\nDelete head")
linked_list.delete_head()
print("Delete tail")
linked_list.delete_tail()
print("\nPrint list:")
linked_list.print_list()
print("\nReverse linked list")
linked_list.reverse()
print("\nPrint list:")
linked_list.print_list()
print("\nString representation of linked list:")
print(linked_list)
print("\nReading/changing Node data using indexing:")
print(f"Element at Position 1: {linked_list[1]}")
linked_list[1] = input("Enter New Value: ").strip()
print("New list:")
print(linked_list)
print(f"length of linked_list is : {len(linked_list)}")
if __name__ == "__main__":
main()
================================================
FILE: data_structures/linked_list/skip_list.py
================================================
"""
Based on "Skip Lists: A Probabilistic Alternative to Balanced Trees" by William Pugh
https://epaperpress.com/sortsearch/download/skiplist.pdf
"""
from __future__ import annotations
from itertools import pairwise
from random import random
from typing import TypeVar
KT = TypeVar("KT")
VT = TypeVar("VT")
class Node[KT, VT]:
def __init__(self, key: KT | str = "root", value: VT | None = None):
self.key = key
self.value = value
self.forward: list[Node[KT, VT]] = []
def __repr__(self) -> str:
"""
:return: Visual representation of Node
>>> node = Node("Key", 2)
>>> repr(node)
'Node(Key: 2)'
"""
return f"Node({self.key}: {self.value})"
@property
def level(self) -> int:
"""
:return: Number of forward references
>>> node = Node("Key", 2)
>>> node.level
0
>>> node.forward.append(Node("Key2", 4))
>>> node.level
1
>>> node.forward.append(Node("Key3", 6))
>>> node.level
2
"""
return len(self.forward)
class SkipList[KT, VT]:
def __init__(self, p: float = 0.5, max_level: int = 16):
self.head: Node[KT, VT] = Node[KT, VT]()
self.level = 0
self.p = p
self.max_level = max_level
def __str__(self) -> str:
"""
:return: Visual representation of SkipList
>>> skip_list = SkipList()
>>> print(skip_list)
SkipList(level=0)
>>> skip_list.insert("Key1", "Value")
>>> print(skip_list) # doctest: +ELLIPSIS
SkipList(level=...
[root]--...
[Key1]--Key1...
None *...
>>> skip_list.insert("Key2", "OtherValue")
>>> print(skip_list) # doctest: +ELLIPSIS
SkipList(level=...
[root]--...
[Key1]--Key1...
[Key2]--Key2...
None *...
"""
items = list(self)
if len(items) == 0:
return f"SkipList(level={self.level})"
label_size = max((len(str(item)) for item in items), default=4)
label_size = max(label_size, 4) + 4
node = self.head
lines = []
forwards = node.forward.copy()
lines.append(f"[{node.key}]".ljust(label_size, "-") + "* " * len(forwards))
lines.append(" " * label_size + "| " * len(forwards))
while len(node.forward) != 0:
node = node.forward[0]
lines.append(
f"[{node.key}]".ljust(label_size, "-")
+ " ".join(str(n.key) if n.key == node.key else "|" for n in forwards)
)
lines.append(" " * label_size + "| " * len(forwards))
forwards[: node.level] = node.forward
lines.append("None".ljust(label_size) + "* " * len(forwards))
return f"SkipList(level={self.level})\n" + "\n".join(lines)
def __iter__(self):
node = self.head
while len(node.forward) != 0:
yield node.forward[0].key
node = node.forward[0]
def random_level(self) -> int:
"""
:return: Random level from [1, self.max_level] interval.
Higher values are less likely.
"""
level = 1
while random() < self.p and level < self.max_level:
level += 1
return level
def _locate_node(self, key) -> tuple[Node[KT, VT] | None, list[Node[KT, VT]]]:
"""
:param key: Searched key,
:return: Tuple with searched node (or None if given key is not present)
and list of nodes that refer (if key is present) of should refer to
given node.
"""
# Nodes with refer or should refer to output node
update_vector = []
node = self.head
for i in reversed(range(self.level)):
# i < node.level - When node level is lesser than `i` decrement `i`.
# node.forward[i].key < key - Jumping to node with key value higher
# or equal to searched key would result
# in skipping searched key.
while i < node.level and node.forward[i].key < key:
node = node.forward[i]
# Each leftmost node (relative to searched node) will potentially have to
# be updated.
update_vector.append(node)
update_vector.reverse() # Note that we were inserting values in reverse order.
# len(node.forward) != 0 - If current node doesn't contain any further
# references then searched key is not present.
# node.forward[0].key == key - Next node key should be equal to search key
# if key is present.
if len(node.forward) != 0 and node.forward[0].key == key:
return node.forward[0], update_vector
else:
return None, update_vector
def delete(self, key: KT):
"""
:param key: Key to remove from list.
>>> skip_list = SkipList()
>>> skip_list.insert(2, "Two")
>>> skip_list.insert(1, "One")
>>> skip_list.insert(3, "Three")
>>> list(skip_list)
[1, 2, 3]
>>> skip_list.delete(2)
>>> list(skip_list)
[1, 3]
"""
node, update_vector = self._locate_node(key)
if node is not None:
for i, update_node in enumerate(update_vector):
# Remove or replace all references to removed node.
if update_node.level > i and update_node.forward[i].key == key:
if node.level > i:
update_node.forward[i] = node.forward[i]
else:
update_node.forward = update_node.forward[:i]
def insert(self, key: KT, value: VT):
"""
:param key: Key to insert.
:param value: Value associated with given key.
>>> skip_list = SkipList()
>>> skip_list.insert(2, "Two")
>>> skip_list.find(2)
'Two'
>>> list(skip_list)
[2]
"""
node, update_vector = self._locate_node(key)
if node is not None:
node.value = value
else:
level = self.random_level()
if level > self.level:
# After level increase we have to add additional nodes to head.
for _ in range(self.level - 1, level):
update_vector.append(self.head)
self.level = level
new_node = Node(key, value)
for i, update_node in enumerate(update_vector[:level]):
# Change references to pass through new node.
if update_node.level > i:
new_node.forward.append(update_node.forward[i])
if update_node.level < i + 1:
update_node.forward.append(new_node)
else:
update_node.forward[i] = new_node
def find(self, key: VT) -> VT | None:
"""
:param key: Search key.
:return: Value associated with given key or None if given key is not present.
>>> skip_list = SkipList()
>>> skip_list.find(2)
>>> skip_list.insert(2, "Two")
>>> skip_list.find(2)
'Two'
>>> skip_list.insert(2, "Three")
>>> skip_list.find(2)
'Three'
"""
node, _ = self._locate_node(key)
if node is not None:
return node.value
return None
def test_insert():
skip_list = SkipList()
skip_list.insert("Key1", 3)
skip_list.insert("Key2", 12)
skip_list.insert("Key3", 41)
skip_list.insert("Key4", -19)
node = skip_list.head
all_values = {}
while node.level != 0:
node = node.forward[0]
all_values[node.key] = node.value
assert len(all_values) == 4
assert all_values["Key1"] == 3
assert all_values["Key2"] == 12
assert all_values["Key3"] == 41
assert all_values["Key4"] == -19
def test_insert_overrides_existing_value():
skip_list = SkipList()
skip_list.insert("Key1", 10)
skip_list.insert("Key1", 12)
skip_list.insert("Key5", 7)
skip_list.insert("Key7", 10)
skip_list.insert("Key10", 5)
skip_list.insert("Key7", 7)
skip_list.insert("Key5", 5)
skip_list.insert("Key10", 10)
node = skip_list.head
all_values = {}
while node.level != 0:
node = node.forward[0]
all_values[node.key] = node.value
if len(all_values) != 4:
print()
assert len(all_values) == 4
assert all_values["Key1"] == 12
assert all_values["Key7"] == 7
assert all_values["Key5"] == 5
assert all_values["Key10"] == 10
def test_searching_empty_list_returns_none():
skip_list = SkipList()
assert skip_list.find("Some key") is None
def test_search():
skip_list = SkipList()
skip_list.insert("Key2", 20)
assert skip_list.find("Key2") == 20
skip_list.insert("Some Key", 10)
skip_list.insert("Key2", 8)
skip_list.insert("V", 13)
assert skip_list.find("Y") is None
assert skip_list.find("Key2") == 8
assert skip_list.find("Some Key") == 10
assert skip_list.find("V") == 13
def test_deleting_item_from_empty_list_do_nothing():
skip_list = SkipList()
skip_list.delete("Some key")
assert len(skip_list.head.forward) == 0
def test_deleted_items_are_not_founded_by_find_method():
skip_list = SkipList()
skip_list.insert("Key1", 12)
skip_list.insert("V", 13)
skip_list.insert("X", 14)
skip_list.insert("Key2", 15)
skip_list.delete("V")
skip_list.delete("Key2")
assert skip_list.find("V") is None
assert skip_list.find("Key2") is None
def test_delete_removes_only_given_key():
skip_list = SkipList()
skip_list.insert("Key1", 12)
skip_list.insert("V", 13)
skip_list.insert("X", 14)
skip_list.insert("Key2", 15)
skip_list.delete("V")
assert skip_list.find("V") is None
assert skip_list.find("X") == 14
assert skip_list.find("Key1") == 12
assert skip_list.find("Key2") == 15
skip_list.delete("X")
assert skip_list.find("V") is None
assert skip_list.find("X") is None
assert skip_list.find("Key1") == 12
assert skip_list.find("Key2") == 15
skip_list.delete("Key1")
assert skip_list.find("V") is None
assert skip_list.find("X") is None
assert skip_list.find("Key1") is None
assert skip_list.find("Key2") == 15
skip_list.delete("Key2")
assert skip_list.find("V") is None
assert skip_list.find("X") is None
assert skip_list.find("Key1") is None
assert skip_list.find("Key2") is None
def test_delete_doesnt_leave_dead_nodes():
skip_list = SkipList()
skip_list.insert("Key1", 12)
skip_list.insert("V", 13)
skip_list.insert("X", 142)
skip_list.insert("Key2", 15)
skip_list.delete("X")
def traverse_keys(node):
yield node.key
for forward_node in node.forward:
yield from traverse_keys(forward_node)
assert len(set(traverse_keys(skip_list.head))) == 4
def test_iter_always_yields_sorted_values():
def is_sorted(lst):
return all(next_item >= item for item, next_item in pairwise(lst))
skip_list = SkipList()
for i in range(10):
skip_list.insert(i, i)
assert is_sorted(list(skip_list))
skip_list.delete(5)
skip_list.delete(8)
skip_list.delete(2)
assert is_sorted(list(skip_list))
skip_list.insert(-12, -12)
skip_list.insert(77, 77)
assert is_sorted(list(skip_list))
def pytests():
for _ in range(100):
# Repeat test 100 times due to the probabilistic nature of skip list
# random values == random bugs
test_insert()
test_insert_overrides_existing_value()
test_searching_empty_list_returns_none()
test_search()
test_deleting_item_from_empty_list_do_nothing()
test_deleted_items_are_not_founded_by_find_method()
test_delete_removes_only_given_key()
test_delete_doesnt_leave_dead_nodes()
test_iter_always_yields_sorted_values()
def main():
"""
>>> pytests()
"""
skip_list = SkipList()
skip_list.insert(2, "2")
skip_list.insert(4, "4")
skip_list.insert(6, "4")
skip_list.insert(4, "5")
skip_list.insert(8, "4")
skip_list.insert(9, "4")
skip_list.delete(4)
print(skip_list)
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: data_structures/linked_list/swap_nodes.py
================================================
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Any
@dataclass
class Node:
data: Any
next_node: Node | None = None
@dataclass
class LinkedList:
head: Node | None = None
def __iter__(self) -> Iterator:
"""
>>> linked_list = LinkedList()
>>> list(linked_list)
[]
>>> linked_list.push(0)
>>> tuple(linked_list)
(0,)
"""
node = self.head
while node:
yield node.data
node = node.next_node
def __len__(self) -> int:
"""
>>> linked_list = LinkedList()
>>> len(linked_list)
0
>>> linked_list.push(0)
>>> len(linked_list)
1
"""
return sum(1 for _ in self)
def push(self, new_data: Any) -> None:
"""
Add a new node with the given data to the beginning of the Linked List.
Args:
new_data (Any): The data to be added to the new node.
Returns:
None
Examples:
>>> linked_list = LinkedList()
>>> linked_list.push(5)
>>> linked_list.push(4)
>>> linked_list.push(3)
>>> linked_list.push(2)
>>> linked_list.push(1)
>>> list(linked_list)
[1, 2, 3, 4, 5]
"""
new_node = Node(new_data)
new_node.next_node = self.head
self.head = new_node
def swap_nodes(self, node_data_1: Any, node_data_2: Any) -> None:
"""
Swap the positions of two nodes in the Linked List based on their data values.
Args:
node_data_1: Data value of the first node to be swapped.
node_data_2: Data value of the second node to be swapped.
Note:
If either of the specified data values isn't found then, no swapping occurs.
Examples:
When both values are present in a linked list.
>>> linked_list = LinkedList()
>>> linked_list.push(5)
>>> linked_list.push(4)
>>> linked_list.push(3)
>>> linked_list.push(2)
>>> linked_list.push(1)
>>> list(linked_list)
[1, 2, 3, 4, 5]
>>> linked_list.swap_nodes(1, 5)
>>> tuple(linked_list)
(5, 2, 3, 4, 1)
When one value is present and the other isn't in the linked list.
>>> second_list = LinkedList()
>>> second_list.push(6)
>>> second_list.push(7)
>>> second_list.push(8)
>>> second_list.push(9)
>>> second_list.swap_nodes(1, 6) is None
True
When both values are absent in the linked list.
>>> second_list = LinkedList()
>>> second_list.push(10)
>>> second_list.push(9)
>>> second_list.push(8)
>>> second_list.push(7)
>>> second_list.swap_nodes(1, 3) is None
True
When linkedlist is empty.
>>> second_list = LinkedList()
>>> second_list.swap_nodes(1, 3) is None
True
Returns:
None
"""
if node_data_1 == node_data_2:
return
node_1 = self.head
while node_1 and node_1.data != node_data_1:
node_1 = node_1.next_node
node_2 = self.head
while node_2 and node_2.data != node_data_2:
node_2 = node_2.next_node
if node_1 is None or node_2 is None:
return
# Swap the data values of the two nodes
node_1.data, node_2.data = node_2.data, node_1.data
if __name__ == "__main__":
"""
Python script that outputs the swap of nodes in a linked list.
"""
from doctest import testmod
testmod()
linked_list = LinkedList()
for i in range(5, 0, -1):
linked_list.push(i)
print(f"Original Linked List: {list(linked_list)}")
linked_list.swap_nodes(1, 4)
print(f"Modified Linked List: {list(linked_list)}")
print("After swapping the nodes whose data is 1 and 4.")
================================================
FILE: data_structures/queues/__init__.py
================================================
================================================
FILE: data_structures/queues/circular_queue.py
================================================
# Implementation of Circular Queue (using Python lists)
class CircularQueue:
"""Circular FIFO queue with a fixed capacity"""
def __init__(self, n: int):
self.n = n
self.array = [None] * self.n
self.front = 0 # index of the first element
self.rear = 0
self.size = 0
def __len__(self) -> int:
"""
>>> cq = CircularQueue(5)
>>> len(cq)
0
>>> cq.enqueue("A") # doctest: +ELLIPSIS
>>> cq.array
['A', None, None, None, None]
>>> len(cq)
1
"""
return self.size
def is_empty(self) -> bool:
"""
Checks whether the queue is empty or not
>>> cq = CircularQueue(5)
>>> cq.is_empty()
True
>>> cq.enqueue("A").is_empty()
False
"""
return self.size == 0
def first(self):
"""
Returns the first element of the queue
>>> cq = CircularQueue(5)
>>> cq.first()
False
>>> cq.enqueue("A").first()
'A'
"""
return False if self.is_empty() else self.array[self.front]
def enqueue(self, data):
"""
This function inserts an element at the end of the queue using self.rear value
as an index.
>>> cq = CircularQueue(5)
>>> cq.enqueue("A") # doctest: +ELLIPSIS
>>> (cq.size, cq.first())
(1, 'A')
>>> cq.enqueue("B") # doctest: +ELLIPSIS
>>> cq.array
['A', 'B', None, None, None]
>>> (cq.size, cq.first())
(2, 'A')
>>> cq.enqueue("C").enqueue("D").enqueue("E") # doctest: +ELLIPSIS
>>> cq.enqueue("F")
Traceback (most recent call last):
...
Exception: QUEUE IS FULL
"""
if self.size >= self.n:
raise Exception("QUEUE IS FULL")
self.array[self.rear] = data
self.rear = (self.rear + 1) % self.n
self.size += 1
return self
def dequeue(self):
"""
This function removes an element from the queue using on self.front value as an
index and returns it
>>> cq = CircularQueue(5)
>>> cq.dequeue()
Traceback (most recent call last):
...
Exception: UNDERFLOW
>>> cq.enqueue("A").enqueue("B").dequeue()
'A'
>>> (cq.size, cq.first())
(1, 'B')
>>> cq.dequeue()
'B'
>>> cq.dequeue()
Traceback (most recent call last):
...
Exception: UNDERFLOW
"""
if self.size == 0:
raise Exception("UNDERFLOW")
temp = self.array[self.front]
self.array[self.front] = None
self.front = (self.front + 1) % self.n
self.size -= 1
return temp
================================================
FILE: data_structures/queues/circular_queue_linked_list.py
================================================
# Implementation of Circular Queue using linked lists
# https://en.wikipedia.org/wiki/Circular_buffer
from __future__ import annotations
from typing import Any
class CircularQueueLinkedList:
"""
Circular FIFO list with the given capacity (default queue length : 6)
>>> cq = CircularQueueLinkedList(2)
>>> cq.enqueue('a')
>>> cq.enqueue('b')
>>> cq.enqueue('c')
Traceback (most recent call last):
...
Exception: Full Queue
"""
def __init__(self, initial_capacity: int = 6) -> None:
self.front: Node | None = None
self.rear: Node | None = None
self.create_linked_list(initial_capacity)
def create_linked_list(self, initial_capacity: int) -> None:
current_node = Node()
self.front = current_node
self.rear = current_node
previous_node = current_node
for _ in range(1, initial_capacity):
current_node = Node()
previous_node.next = current_node
current_node.prev = previous_node
previous_node = current_node
previous_node.next = self.front
self.front.prev = previous_node
def is_empty(self) -> bool:
"""
Checks whether the queue is empty or not
>>> cq = CircularQueueLinkedList()
>>> cq.is_empty()
True
>>> cq.enqueue('a')
>>> cq.is_empty()
False
>>> cq.dequeue()
'a'
>>> cq.is_empty()
True
"""
return (
self.front == self.rear
and self.front is not None
and self.front.data is None
)
def first(self) -> Any | None:
"""
Returns the first element of the queue
>>> cq = CircularQueueLinkedList()
>>> cq.first()
Traceback (most recent call last):
...
Exception: Empty Queue
>>> cq.enqueue('a')
>>> cq.first()
'a'
>>> cq.dequeue()
'a'
>>> cq.first()
Traceback (most recent call last):
...
Exception: Empty Queue
>>> cq.enqueue('b')
>>> cq.enqueue('c')
>>> cq.first()
'b'
"""
self.check_can_perform_operation()
return self.front.data if self.front else None
def enqueue(self, data: Any) -> None:
"""
Saves data at the end of the queue
>>> cq = CircularQueueLinkedList()
>>> cq.enqueue('a')
>>> cq.enqueue('b')
>>> cq.dequeue()
'a'
>>> cq.dequeue()
'b'
>>> cq.dequeue()
Traceback (most recent call last):
...
Exception: Empty Queue
"""
if self.rear is None:
return
self.check_is_full()
if not self.is_empty():
self.rear = self.rear.next
if self.rear:
self.rear.data = data
def dequeue(self) -> Any:
"""
Removes and retrieves the first element of the queue
>>> cq = CircularQueueLinkedList()
>>> cq.dequeue()
Traceback (most recent call last):
...
Exception: Empty Queue
>>> cq.enqueue('a')
>>> cq.dequeue()
'a'
>>> cq.dequeue()
Traceback (most recent call last):
...
Exception: Empty Queue
"""
self.check_can_perform_operation()
if self.rear is None or self.front is None:
return None
if self.front == self.rear:
data = self.front.data
self.front.data = None
return data
old_front = self.front
self.front = old_front.next
data = old_front.data
old_front.data = None
return data
def check_can_perform_operation(self) -> None:
if self.is_empty():
raise Exception("Empty Queue")
def check_is_full(self) -> None:
if self.rear and self.rear.next == self.front:
raise Exception("Full Queue")
class Node:
def __init__(self) -> None:
self.data: Any | None = None
self.next: Node | None = None
self.prev: Node | None = None
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/queues/double_ended_queue.py
================================================
"""
Implementation of double ended queue.
"""
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Any
class Deque:
"""
Deque data structure.
Operations
----------
append(val: Any) -> None
appendleft(val: Any) -> None
extend(iterable: Iterable) -> None
extendleft(iterable: Iterable) -> None
pop() -> Any
popleft() -> Any
Observers
---------
is_empty() -> bool
Attributes
----------
_front: _Node
front of the deque a.k.a. the first element
_back: _Node
back of the element a.k.a. the last element
_len: int
the number of nodes
"""
__slots__ = ("_back", "_front", "_len")
@dataclass
class _Node:
"""
Representation of a node.
Contains a value and a pointer to the next node as well as to the previous one.
"""
val: Any = None
next_node: Deque._Node | None = None
prev_node: Deque._Node | None = None
class _Iterator:
"""
Helper class for iteration. Will be used to implement iteration.
Attributes
----------
_cur: _Node
the current node of the iteration.
"""
__slots__ = ("_cur",)
def __init__(self, cur: Deque._Node | None) -> None:
self._cur = cur
def __iter__(self) -> Deque._Iterator:
"""
>>> our_deque = Deque([1, 2, 3])
>>> iterator = iter(our_deque)
"""
return self
def __next__(self) -> Any:
"""
>>> our_deque = Deque([1, 2, 3])
>>> iterator = iter(our_deque)
>>> next(iterator)
1
>>> next(iterator)
2
>>> next(iterator)
3
"""
if self._cur is None:
# finished iterating
raise StopIteration
val = self._cur.val
self._cur = self._cur.next_node
return val
def __init__(self, iterable: Iterable[Any] | None = None) -> None:
self._front: Any = None
self._back: Any = None
self._len: int = 0
if iterable is not None:
# append every value to the deque
for val in iterable:
self.append(val)
def append(self, val: Any) -> None:
"""
Adds val to the end of the deque.
Time complexity: O(1)
>>> our_deque_1 = Deque([1, 2, 3])
>>> our_deque_1.append(4)
>>> our_deque_1
[1, 2, 3, 4]
>>> our_deque_2 = Deque('ab')
>>> our_deque_2.append('c')
>>> our_deque_2
['a', 'b', 'c']
>>> from collections import deque
>>> deque_collections_1 = deque([1, 2, 3])
>>> deque_collections_1.append(4)
>>> deque_collections_1
deque([1, 2, 3, 4])
>>> deque_collections_2 = deque('ab')
>>> deque_collections_2.append('c')
>>> deque_collections_2
deque(['a', 'b', 'c'])
>>> list(our_deque_1) == list(deque_collections_1)
True
>>> list(our_deque_2) == list(deque_collections_2)
True
"""
node = self._Node(val, None, None)
if self.is_empty():
# front = back
self._front = self._back = node
self._len = 1
else:
# connect nodes
self._back.next_node = node
node.prev_node = self._back
self._back = node # assign new back to the new node
self._len += 1
# make sure there were no errors
assert not self.is_empty(), "Error on appending value."
def appendleft(self, val: Any) -> None:
"""
Adds val to the beginning of the deque.
Time complexity: O(1)
>>> our_deque_1 = Deque([2, 3])
>>> our_deque_1.appendleft(1)
>>> our_deque_1
[1, 2, 3]
>>> our_deque_2 = Deque('bc')
>>> our_deque_2.appendleft('a')
>>> our_deque_2
['a', 'b', 'c']
>>> from collections import deque
>>> deque_collections_1 = deque([2, 3])
>>> deque_collections_1.appendleft(1)
>>> deque_collections_1
deque([1, 2, 3])
>>> deque_collections_2 = deque('bc')
>>> deque_collections_2.appendleft('a')
>>> deque_collections_2
deque(['a', 'b', 'c'])
>>> list(our_deque_1) == list(deque_collections_1)
True
>>> list(our_deque_2) == list(deque_collections_2)
True
"""
node = self._Node(val, None, None)
if self.is_empty():
# front = back
self._front = self._back = node
self._len = 1
else:
# connect nodes
node.next_node = self._front
self._front.prev_node = node
self._front = node # assign new front to the new node
self._len += 1
# make sure there were no errors
assert not self.is_empty(), "Error on appending value."
def extend(self, iterable: Iterable[Any]) -> None:
"""
Appends every value of iterable to the end of the deque.
Time complexity: O(n)
>>> our_deque_1 = Deque([1, 2, 3])
>>> our_deque_1.extend([4, 5])
>>> our_deque_1
[1, 2, 3, 4, 5]
>>> our_deque_2 = Deque('ab')
>>> our_deque_2.extend('cd')
>>> our_deque_2
['a', 'b', 'c', 'd']
>>> from collections import deque
>>> deque_collections_1 = deque([1, 2, 3])
>>> deque_collections_1.extend([4, 5])
>>> deque_collections_1
deque([1, 2, 3, 4, 5])
>>> deque_collections_2 = deque('ab')
>>> deque_collections_2.extend('cd')
>>> deque_collections_2
deque(['a', 'b', 'c', 'd'])
>>> list(our_deque_1) == list(deque_collections_1)
True
>>> list(our_deque_2) == list(deque_collections_2)
True
"""
for val in iterable:
self.append(val)
def extendleft(self, iterable: Iterable[Any]) -> None:
"""
Appends every value of iterable to the beginning of the deque.
Time complexity: O(n)
>>> our_deque_1 = Deque([1, 2, 3])
>>> our_deque_1.extendleft([0, -1])
>>> our_deque_1
[-1, 0, 1, 2, 3]
>>> our_deque_2 = Deque('cd')
>>> our_deque_2.extendleft('ba')
>>> our_deque_2
['a', 'b', 'c', 'd']
>>> from collections import deque
>>> deque_collections_1 = deque([1, 2, 3])
>>> deque_collections_1.extendleft([0, -1])
>>> deque_collections_1
deque([-1, 0, 1, 2, 3])
>>> deque_collections_2 = deque('cd')
>>> deque_collections_2.extendleft('ba')
>>> deque_collections_2
deque(['a', 'b', 'c', 'd'])
>>> list(our_deque_1) == list(deque_collections_1)
True
>>> list(our_deque_2) == list(deque_collections_2)
True
"""
for val in iterable:
self.appendleft(val)
def pop(self) -> Any:
"""
Removes the last element of the deque and returns it.
Time complexity: O(1)
@returns topop.val: the value of the node to pop.
>>> our_deque1 = Deque([1])
>>> our_popped1 = our_deque1.pop()
>>> our_popped1
1
>>> our_deque1
[]
>>> our_deque2 = Deque([1, 2, 3, 15182])
>>> our_popped2 = our_deque2.pop()
>>> our_popped2
15182
>>> our_deque2
[1, 2, 3]
>>> from collections import deque
>>> deque_collections = deque([1, 2, 3, 15182])
>>> collections_popped = deque_collections.pop()
>>> collections_popped
15182
>>> deque_collections
deque([1, 2, 3])
>>> list(our_deque2) == list(deque_collections)
True
>>> our_popped2 == collections_popped
True
"""
# make sure the deque has elements to pop
assert not self.is_empty(), "Deque is empty."
topop = self._back
# if only one element in the queue: point the front and back to None
# else remove one element from back
if self._front == self._back:
self._front = None
self._back = None
else:
self._back = self._back.prev_node # set new back
# drop the last node, python will deallocate memory automatically
self._back.next_node = None
self._len -= 1
return topop.val
def popleft(self) -> Any:
"""
Removes the first element of the deque and returns it.
Time complexity: O(1)
@returns topop.val: the value of the node to pop.
>>> our_deque1 = Deque([1])
>>> our_popped1 = our_deque1.pop()
>>> our_popped1
1
>>> our_deque1
[]
>>> our_deque2 = Deque([15182, 1, 2, 3])
>>> our_popped2 = our_deque2.popleft()
>>> our_popped2
15182
>>> our_deque2
[1, 2, 3]
>>> from collections import deque
>>> deque_collections = deque([15182, 1, 2, 3])
>>> collections_popped = deque_collections.popleft()
>>> collections_popped
15182
>>> deque_collections
deque([1, 2, 3])
>>> list(our_deque2) == list(deque_collections)
True
>>> our_popped2 == collections_popped
True
"""
# make sure the deque has elements to pop
assert not self.is_empty(), "Deque is empty."
topop = self._front
# if only one element in the queue: point the front and back to None
# else remove one element from front
if self._front == self._back:
self._front = None
self._back = None
else:
self._front = self._front.next_node # set new front and drop the first node
self._front.prev_node = None
self._len -= 1
return topop.val
def is_empty(self) -> bool:
"""
Checks if the deque is empty.
Time complexity: O(1)
>>> our_deque = Deque([1, 2, 3])
>>> our_deque.is_empty()
False
>>> our_empty_deque = Deque()
>>> our_empty_deque.is_empty()
True
>>> from collections import deque
>>> empty_deque_collections = deque()
>>> list(our_empty_deque) == list(empty_deque_collections)
True
"""
return self._front is None
def __len__(self) -> int:
"""
Implements len() function. Returns the length of the deque.
Time complexity: O(1)
>>> our_deque = Deque([1, 2, 3])
>>> len(our_deque)
3
>>> our_empty_deque = Deque()
>>> len(our_empty_deque)
0
>>> from collections import deque
>>> deque_collections = deque([1, 2, 3])
>>> len(deque_collections)
3
>>> empty_deque_collections = deque()
>>> len(empty_deque_collections)
0
>>> len(our_empty_deque) == len(empty_deque_collections)
True
"""
return self._len
def __eq__(self, other: object) -> bool:
"""
Implements "==" operator. Returns if *self* is equal to *other*.
Time complexity: O(n)
>>> our_deque_1 = Deque([1, 2, 3])
>>> our_deque_2 = Deque([1, 2, 3])
>>> our_deque_1 == our_deque_2
True
>>> our_deque_3 = Deque([1, 2])
>>> our_deque_1 == our_deque_3
False
>>> from collections import deque
>>> deque_collections_1 = deque([1, 2, 3])
>>> deque_collections_2 = deque([1, 2, 3])
>>> deque_collections_1 == deque_collections_2
True
>>> deque_collections_3 = deque([1, 2])
>>> deque_collections_1 == deque_collections_3
False
>>> (our_deque_1 == our_deque_2) == (deque_collections_1 == deque_collections_2)
True
>>> (our_deque_1 == our_deque_3) == (deque_collections_1 == deque_collections_3)
True
"""
if not isinstance(other, Deque):
return NotImplemented
me = self._front
oth = other._front
# if the length of the dequeues are not the same, they are not equal
if len(self) != len(other):
return False
while me is not None and oth is not None:
# compare every value
if me.val != oth.val:
return False
me = me.next_node
oth = oth.next_node
return True
def __iter__(self) -> Deque._Iterator:
"""
Implements iteration.
Time complexity: O(1)
>>> our_deque = Deque([1, 2, 3])
>>> for v in our_deque:
... print(v)
1
2
3
>>> from collections import deque
>>> deque_collections = deque([1, 2, 3])
>>> for v in deque_collections:
... print(v)
1
2
3
"""
return Deque._Iterator(self._front)
def __repr__(self) -> str:
"""
Implements representation of the deque.
Represents it as a list, with its values between '[' and ']'.
Time complexity: O(n)
>>> our_deque = Deque([1, 2, 3])
>>> our_deque
[1, 2, 3]
"""
values_list = []
aux = self._front
while aux is not None:
# append the values in a list to display
values_list.append(aux.val)
aux = aux.next_node
return f"[{', '.join(repr(val) for val in values_list)}]"
if __name__ == "__main__":
import doctest
doctest.testmod()
dq = Deque([3])
dq.pop()
================================================
FILE: data_structures/queues/linked_queue.py
================================================
"""A Queue using a linked list like structure"""
from __future__ import annotations
from collections.abc import Iterator
from typing import Any
class Node:
def __init__(self, data: Any) -> None:
self.data: Any = data
self.next: Node | None = None
def __str__(self) -> str:
return f"{self.data}"
class LinkedQueue:
"""
>>> queue = LinkedQueue()
>>> queue.is_empty()
True
>>> queue.put(5)
>>> queue.put(9)
>>> queue.put('python')
>>> queue.is_empty()
False
>>> queue.get()
5
>>> queue.put('algorithms')
>>> queue.get()
9
>>> queue.get()
'python'
>>> queue.get()
'algorithms'
>>> queue.is_empty()
True
>>> queue.get()
Traceback (most recent call last):
...
IndexError: dequeue from empty queue
"""
def __init__(self) -> None:
self.front: Node | None = None
self.rear: Node | None = None
def __iter__(self) -> Iterator[Any]:
node = self.front
while node:
yield node.data
node = node.next
def __len__(self) -> int:
"""
>>> queue = LinkedQueue()
>>> for i in range(1, 6):
... queue.put(i)
>>> len(queue)
5
>>> for i in range(1, 6):
... assert len(queue) == 6 - i
... _ = queue.get()
>>> len(queue)
0
"""
return len(tuple(iter(self)))
def __str__(self) -> str:
"""
>>> queue = LinkedQueue()
>>> for i in range(1, 4):
... queue.put(i)
>>> queue.put("Python")
>>> queue.put(3.14)
>>> queue.put(True)
>>> str(queue)
'1 <- 2 <- 3 <- Python <- 3.14 <- True'
"""
return " <- ".join(str(item) for item in self)
def is_empty(self) -> bool:
"""
>>> queue = LinkedQueue()
>>> queue.is_empty()
True
>>> for i in range(1, 6):
... queue.put(i)
>>> queue.is_empty()
False
"""
return len(self) == 0
def put(self, item: Any) -> None:
"""
>>> queue = LinkedQueue()
>>> queue.get()
Traceback (most recent call last):
...
IndexError: dequeue from empty queue
>>> for i in range(1, 6):
... queue.put(i)
>>> str(queue)
'1 <- 2 <- 3 <- 4 <- 5'
"""
node = Node(item)
if self.is_empty():
self.front = self.rear = node
else:
assert isinstance(self.rear, Node)
self.rear.next = node
self.rear = node
def get(self) -> Any:
"""
>>> queue = LinkedQueue()
>>> queue.get()
Traceback (most recent call last):
...
IndexError: dequeue from empty queue
>>> queue = LinkedQueue()
>>> for i in range(1, 6):
... queue.put(i)
>>> for i in range(1, 6):
... assert queue.get() == i
>>> len(queue)
0
"""
if self.is_empty():
raise IndexError("dequeue from empty queue")
assert isinstance(self.front, Node)
node = self.front
self.front = self.front.next
if self.front is None:
self.rear = None
return node.data
def clear(self) -> None:
"""
>>> queue = LinkedQueue()
>>> for i in range(1, 6):
... queue.put(i)
>>> queue.clear()
>>> len(queue)
0
>>> str(queue)
''
"""
self.front = self.rear = None
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/queues/priority_queue_using_list.py
================================================
"""
Pure Python implementations of a Fixed Priority Queue and an Element Priority Queue
using Python lists.
"""
class OverFlowError(Exception):
pass
class UnderFlowError(Exception):
pass
class FixedPriorityQueue:
"""
Tasks can be added to a Priority Queue at any time and in any order but when Tasks
are removed then the Task with the highest priority is removed in FIFO order. In
code we will use three levels of priority with priority zero Tasks being the most
urgent (high priority) and priority 2 tasks being the least urgent.
Examples
>>> fpq = FixedPriorityQueue()
>>> fpq.enqueue(0, 10)
>>> fpq.enqueue(1, 70)
>>> fpq.enqueue(0, 100)
>>> fpq.enqueue(2, 1)
>>> fpq.enqueue(2, 5)
>>> fpq.enqueue(1, 7)
>>> fpq.enqueue(2, 4)
>>> fpq.enqueue(1, 64)
>>> fpq.enqueue(0, 128)
>>> print(fpq)
Priority 0: [10, 100, 128]
Priority 1: [70, 7, 64]
Priority 2: [1, 5, 4]
>>> fpq.dequeue()
10
>>> fpq.dequeue()
100
>>> fpq.dequeue()
128
>>> fpq.dequeue()
70
>>> fpq.dequeue()
7
>>> print(fpq)
Priority 0: []
Priority 1: [64]
Priority 2: [1, 5, 4]
>>> fpq.dequeue()
64
>>> fpq.dequeue()
1
>>> fpq.dequeue()
5
>>> fpq.dequeue()
4
>>> fpq.dequeue()
Traceback (most recent call last):
...
data_structures.queues.priority_queue_using_list.UnderFlowError: All queues are empty
>>> print(fpq)
Priority 0: []
Priority 1: []
Priority 2: []
""" # noqa: E501
def __init__(self):
self.queues = [
[],
[],
[],
]
def enqueue(self, priority: int, data: int) -> None:
"""
Add an element to a queue based on its priority.
If the priority is invalid ValueError is raised.
If the queue is full an OverFlowError is raised.
"""
try:
if len(self.queues[priority]) >= 100:
raise OverflowError("Maximum queue size is 100")
self.queues[priority].append(data)
except IndexError:
raise ValueError("Valid priorities are 0, 1, and 2")
def dequeue(self) -> int:
"""
Return the highest priority element in FIFO order.
If the queue is empty then an under flow exception is raised.
"""
for queue in self.queues:
if queue:
return queue.pop(0)
raise UnderFlowError("All queues are empty")
def __str__(self) -> str:
return "\n".join(f"Priority {i}: {q}" for i, q in enumerate(self.queues))
class ElementPriorityQueue:
"""
Element Priority Queue is the same as Fixed Priority Queue except that the value of
the element itself is the priority. The rules for priorities are the same the as
Fixed Priority Queue.
>>> epq = ElementPriorityQueue()
>>> epq.enqueue(10)
>>> epq.enqueue(70)
>>> epq.enqueue(4)
>>> epq.enqueue(1)
>>> epq.enqueue(5)
>>> epq.enqueue(7)
>>> epq.enqueue(4)
>>> epq.enqueue(64)
>>> epq.enqueue(128)
>>> print(epq)
[10, 70, 4, 1, 5, 7, 4, 64, 128]
>>> epq.dequeue()
1
>>> epq.dequeue()
4
>>> epq.dequeue()
4
>>> epq.dequeue()
5
>>> epq.dequeue()
7
>>> epq.dequeue()
10
>>> print(epq)
[70, 64, 128]
>>> epq.dequeue()
64
>>> epq.dequeue()
70
>>> epq.dequeue()
128
>>> epq.dequeue()
Traceback (most recent call last):
...
data_structures.queues.priority_queue_using_list.UnderFlowError: The queue is empty
>>> print(epq)
[]
"""
def __init__(self):
self.queue = []
def enqueue(self, data: int) -> None:
"""
This function enters the element into the queue
If the queue is full an Exception is raised saying Over Flow!
"""
if len(self.queue) == 100:
raise OverFlowError("Maximum queue size is 100")
self.queue.append(data)
def dequeue(self) -> int:
"""
Return the highest priority element in FIFO order.
If the queue is empty then an under flow exception is raised.
"""
if not self.queue:
raise UnderFlowError("The queue is empty")
else:
data = min(self.queue)
self.queue.remove(data)
return data
def __str__(self) -> str:
"""
Prints all the elements within the Element Priority Queue
"""
return str(self.queue)
def fixed_priority_queue():
fpq = FixedPriorityQueue()
fpq.enqueue(0, 10)
fpq.enqueue(1, 70)
fpq.enqueue(0, 100)
fpq.enqueue(2, 1)
fpq.enqueue(2, 5)
fpq.enqueue(1, 7)
fpq.enqueue(2, 4)
fpq.enqueue(1, 64)
fpq.enqueue(0, 128)
print(fpq)
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq)
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
print(fpq.dequeue())
def element_priority_queue():
epq = ElementPriorityQueue()
epq.enqueue(10)
epq.enqueue(70)
epq.enqueue(100)
epq.enqueue(1)
epq.enqueue(5)
epq.enqueue(7)
epq.enqueue(4)
epq.enqueue(64)
epq.enqueue(128)
print(epq)
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
print(epq)
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
print(epq.dequeue())
if __name__ == "__main__":
fixed_priority_queue()
element_priority_queue()
================================================
FILE: data_structures/queues/queue_by_list.py
================================================
"""Queue represented by a Python list"""
from collections.abc import Iterable
class QueueByList[T]:
def __init__(self, iterable: Iterable[T] | None = None) -> None:
"""
>>> QueueByList()
Queue(())
>>> QueueByList([10, 20, 30])
Queue((10, 20, 30))
>>> QueueByList((i**2 for i in range(1, 4)))
Queue((1, 4, 9))
"""
self.entries: list[T] = list(iterable or [])
def __len__(self) -> int:
"""
>>> len(QueueByList())
0
>>> from string import ascii_lowercase
>>> len(QueueByList(ascii_lowercase))
26
>>> queue = QueueByList()
>>> for i in range(1, 11):
... queue.put(i)
>>> len(queue)
10
>>> for i in range(2):
... queue.get()
1
2
>>> len(queue)
8
"""
return len(self.entries)
def __repr__(self) -> str:
"""
>>> queue = QueueByList()
>>> queue
Queue(())
>>> str(queue)
'Queue(())'
>>> queue.put(10)
>>> queue
Queue((10,))
>>> queue.put(20)
>>> queue.put(30)
>>> queue
Queue((10, 20, 30))
"""
return f"Queue({tuple(self.entries)})"
def put(self, item: T) -> None:
"""Put `item` to the Queue
>>> queue = QueueByList()
>>> queue.put(10)
>>> queue.put(20)
>>> len(queue)
2
>>> queue
Queue((10, 20))
"""
self.entries.append(item)
def get(self) -> T:
"""
Get `item` from the Queue
>>> queue = QueueByList((10, 20, 30))
>>> queue.get()
10
>>> queue.put(40)
>>> queue.get()
20
>>> queue.get()
30
>>> len(queue)
1
>>> queue.get()
40
>>> queue.get()
Traceback (most recent call last):
...
IndexError: Queue is empty
"""
if not self.entries:
raise IndexError("Queue is empty")
return self.entries.pop(0)
def rotate(self, rotation: int) -> None:
"""Rotate the items of the Queue `rotation` times
>>> queue = QueueByList([10, 20, 30, 40])
>>> queue
Queue((10, 20, 30, 40))
>>> queue.rotate(1)
>>> queue
Queue((20, 30, 40, 10))
>>> queue.rotate(2)
>>> queue
Queue((40, 10, 20, 30))
"""
put = self.entries.append
get = self.entries.pop
for _ in range(rotation):
put(get(0))
def get_front(self) -> T:
"""Get the front item from the Queue
>>> queue = QueueByList((10, 20, 30))
>>> queue.get_front()
10
>>> queue
Queue((10, 20, 30))
>>> queue.get()
10
>>> queue.get_front()
20
"""
return self.entries[0]
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/queues/queue_by_two_stacks.py
================================================
"""Queue implementation using two stacks"""
from collections.abc import Iterable
class QueueByTwoStacks[T]:
def __init__(self, iterable: Iterable[T] | None = None) -> None:
"""
>>> QueueByTwoStacks()
Queue(())
>>> QueueByTwoStacks([10, 20, 30])
Queue((10, 20, 30))
>>> QueueByTwoStacks((i**2 for i in range(1, 4)))
Queue((1, 4, 9))
"""
self._stack1: list[T] = list(iterable or [])
self._stack2: list[T] = []
def __len__(self) -> int:
"""
>>> len(QueueByTwoStacks())
0
>>> from string import ascii_lowercase
>>> len(QueueByTwoStacks(ascii_lowercase))
26
>>> queue = QueueByTwoStacks()
>>> for i in range(1, 11):
... queue.put(i)
...
>>> len(queue)
10
>>> for i in range(2):
... queue.get()
1
2
>>> len(queue)
8
"""
return len(self._stack1) + len(self._stack2)
def __repr__(self) -> str:
"""
>>> queue = QueueByTwoStacks()
>>> queue
Queue(())
>>> str(queue)
'Queue(())'
>>> queue.put(10)
>>> queue
Queue((10,))
>>> queue.put(20)
>>> queue.put(30)
>>> queue
Queue((10, 20, 30))
"""
return f"Queue({tuple(self._stack2[::-1] + self._stack1)})"
def put(self, item: T) -> None:
"""
Put `item` into the Queue
>>> queue = QueueByTwoStacks()
>>> queue.put(10)
>>> queue.put(20)
>>> len(queue)
2
>>> queue
Queue((10, 20))
"""
self._stack1.append(item)
def get(self) -> T:
"""
Get `item` from the Queue
>>> queue = QueueByTwoStacks((10, 20, 30))
>>> queue.get()
10
>>> queue.put(40)
>>> queue.get()
20
>>> queue.get()
30
>>> len(queue)
1
>>> queue.get()
40
>>> queue.get()
Traceback (most recent call last):
...
IndexError: Queue is empty
"""
# To reduce number of attribute look-ups in `while` loop.
stack1_pop = self._stack1.pop
stack2_append = self._stack2.append
if not self._stack2:
while self._stack1:
stack2_append(stack1_pop())
if not self._stack2:
raise IndexError("Queue is empty")
return self._stack2.pop()
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/queues/queue_on_pseudo_stack.py
================================================
"""Queue represented by a pseudo stack (represented by a list with pop and append)"""
from typing import Any
class Queue:
def __init__(self):
self.stack = []
self.length = 0
def __str__(self):
printed = "<" + str(self.stack)[1:-1] + ">"
return printed
"""Enqueues {@code item}
@param item
item to enqueue"""
def put(self, item: Any) -> None:
self.stack.append(item)
self.length = self.length + 1
"""Dequeues {@code item}
@requirement: |self.length| > 0
@return dequeued
item that was dequeued"""
def get(self) -> Any:
self.rotate(1)
dequeued = self.stack[self.length - 1]
self.stack = self.stack[:-1]
self.rotate(self.length - 1)
self.length = self.length - 1
return dequeued
"""Rotates the queue {@code rotation} times
@param rotation
number of times to rotate queue"""
def rotate(self, rotation: int) -> None:
for _ in range(rotation):
temp = self.stack[0]
self.stack = self.stack[1:]
self.put(temp)
self.length = self.length - 1
"""Reports item at the front of self
@return item at front of self.stack"""
def front(self) -> Any:
front = self.get()
self.put(front)
self.rotate(self.length - 1)
return front
"""Returns the length of this.stack"""
def size(self) -> int:
return self.length
================================================
FILE: data_structures/stacks/__init__.py
================================================
================================================
FILE: data_structures/stacks/balanced_parentheses.py
================================================
from .stack import Stack
def balanced_parentheses(parentheses: str) -> bool:
"""Use a stack to check if a string of parentheses is balanced.
>>> balanced_parentheses("([]{})")
True
>>> balanced_parentheses("[()]{}{[()()]()}")
True
>>> balanced_parentheses("[(])")
False
>>> balanced_parentheses("1+2*3-4")
True
>>> balanced_parentheses("")
True
"""
stack: Stack[str] = Stack()
bracket_pairs = {"(": ")", "[": "]", "{": "}"}
for bracket in parentheses:
if bracket in bracket_pairs:
stack.push(bracket)
elif bracket in (")", "]", "}") and (
stack.is_empty() or bracket_pairs[stack.pop()] != bracket
):
return False
return stack.is_empty()
if __name__ == "__main__":
from doctest import testmod
testmod()
examples = ["((()))", "((())", "(()))"]
print("Balanced parentheses demonstration:\n")
for example in examples:
not_str = "" if balanced_parentheses(example) else "not "
print(f"{example} is {not_str}balanced")
================================================
FILE: data_structures/stacks/dijkstras_two_stack_algorithm.py
================================================
"""
Author: Alexander Joslin
GitHub: github.com/echoaj
Explanation: https://medium.com/@haleesammar/implemented-in-js-dijkstras-2-stack-
algorithm-for-evaluating-mathematical-expressions-fc0837dae1ea
We can use Dijkstra's two stack algorithm to solve an equation
such as: (5 + ((4 * 2) * (2 + 3)))
THESE ARE THE ALGORITHM'S RULES:
RULE 1: Scan the expression from left to right. When an operand is encountered,
push it onto the operand stack.
RULE 2: When an operator is encountered in the expression,
push it onto the operator stack.
RULE 3: When a left parenthesis is encountered in the expression, ignore it.
RULE 4: When a right parenthesis is encountered in the expression,
pop an operator off the operator stack. The two operands it must
operate on must be the last two operands pushed onto the operand stack.
We therefore pop the operand stack twice, perform the operation,
and push the result back onto the operand stack so it will be available
for use as an operand of the next operator popped off the operator stack.
RULE 5: When the entire infix expression has been scanned, the value left on
the operand stack represents the value of the expression.
NOTE: It only works with whole numbers.
"""
__author__ = "Alexander Joslin"
import operator as op
from .stack import Stack
def dijkstras_two_stack_algorithm(equation: str) -> int:
"""
DocTests
>>> dijkstras_two_stack_algorithm("(5 + 3)")
8
>>> dijkstras_two_stack_algorithm("((9 - (2 + 9)) + (8 - 1))")
5
>>> dijkstras_two_stack_algorithm("((((3 - 2) - (2 + 3)) + (2 - 4)) + 3)")
-3
:param equation: a string
:return: result: an integer
"""
operators = {"*": op.mul, "/": op.truediv, "+": op.add, "-": op.sub}
operand_stack: Stack[int] = Stack()
operator_stack: Stack[str] = Stack()
for i in equation:
if i.isdigit():
# RULE 1
operand_stack.push(int(i))
elif i in operators:
# RULE 2
operator_stack.push(i)
elif i == ")":
# RULE 4
opr = operator_stack.peek()
operator_stack.pop()
num1 = operand_stack.peek()
operand_stack.pop()
num2 = operand_stack.peek()
operand_stack.pop()
total = operators[opr](num2, num1)
operand_stack.push(total)
# RULE 5
return operand_stack.peek()
if __name__ == "__main__":
equation = "(5 + ((4 * 2) * (2 + 3)))"
# answer = 45
print(f"{equation} = {dijkstras_two_stack_algorithm(equation)}")
================================================
FILE: data_structures/stacks/infix_to_postfix_conversion.py
================================================
"""
https://en.wikipedia.org/wiki/Infix_notation
https://en.wikipedia.org/wiki/Reverse_Polish_notation
https://en.wikipedia.org/wiki/Shunting-yard_algorithm
"""
from typing import Literal
from .balanced_parentheses import balanced_parentheses
from .stack import Stack
PRECEDENCES: dict[str, int] = {
"+": 1,
"-": 1,
"*": 2,
"/": 2,
"^": 3,
}
ASSOCIATIVITIES: dict[str, Literal["LR", "RL"]] = {
"+": "LR",
"-": "LR",
"*": "LR",
"/": "LR",
"^": "RL",
}
def precedence(char: str) -> int:
"""
Return integer value representing an operator's precedence, or
order of operation.
https://en.wikipedia.org/wiki/Order_of_operations
"""
return PRECEDENCES.get(char, -1)
def associativity(char: str) -> Literal["LR", "RL"]:
"""
Return the associativity of the operator `char`.
https://en.wikipedia.org/wiki/Operator_associativity
"""
return ASSOCIATIVITIES[char]
def infix_to_postfix(expression_str: str) -> str:
"""
>>> infix_to_postfix("(1*(2+3)+4))")
Traceback (most recent call last):
...
ValueError: Mismatched parentheses
>>> infix_to_postfix("")
''
>>> infix_to_postfix("3+2")
'3 2 +'
>>> infix_to_postfix("(3+4)*5-6")
'3 4 + 5 * 6 -'
>>> infix_to_postfix("(1+2)*3/4-5")
'1 2 + 3 * 4 / 5 -'
>>> infix_to_postfix("a+b*c+(d*e+f)*g")
'a b c * + d e * f + g * +'
>>> infix_to_postfix("x^y/(5*z)+2")
'x y ^ 5 z * / 2 +'
>>> infix_to_postfix("2^3^2")
'2 3 2 ^ ^'
"""
if not balanced_parentheses(expression_str):
raise ValueError("Mismatched parentheses")
stack: Stack[str] = Stack()
postfix = []
for char in expression_str:
if char.isalpha() or char.isdigit():
postfix.append(char)
elif char == "(":
stack.push(char)
elif char == ")":
while not stack.is_empty() and stack.peek() != "(":
postfix.append(stack.pop())
stack.pop()
else:
while True:
if stack.is_empty():
stack.push(char)
break
char_precedence = precedence(char)
tos_precedence = precedence(stack.peek())
if char_precedence > tos_precedence:
stack.push(char)
break
if char_precedence < tos_precedence:
postfix.append(stack.pop())
continue
# Precedences are equal
if associativity(char) == "RL":
stack.push(char)
break
postfix.append(stack.pop())
while not stack.is_empty():
postfix.append(stack.pop())
return " ".join(postfix)
if __name__ == "__main__":
from doctest import testmod
testmod()
expression = "a+b*(c^d-e)^(f+g*h)-i"
print("Infix to Postfix Notation demonstration:\n")
print("Infix notation: " + expression)
print("Postfix notation: " + infix_to_postfix(expression))
================================================
FILE: data_structures/stacks/infix_to_prefix_conversion.py
================================================
"""
Output:
Enter an Infix Equation = a + b ^c
Symbol | Stack | Postfix
----------------------------
c | | c
^ | ^ | c
b | ^ | cb
+ | + | cb^
a | + | cb^a
| | cb^a+
a+b^c (Infix) -> +a^bc (Prefix)
"""
def infix_2_postfix(infix: str) -> str:
"""
>>> infix_2_postfix("a+b^c") # doctest: +NORMALIZE_WHITESPACE
Symbol | Stack | Postfix
----------------------------
a | | a
+ | + | a
b | + | ab
^ | +^ | ab
c | +^ | abc
| + | abc^
| | abc^+
'abc^+'
>>> infix_2_postfix("1*((-a)*2+b)") # doctest: +NORMALIZE_WHITESPACE
Symbol | Stack | Postfix
-------------------------------------------
1 | | 1
* | * | 1
( | *( | 1
( | *(( | 1
- | *((- | 1
a | *((- | 1a
) | *( | 1a-
* | *(* | 1a-
2 | *(* | 1a-2
+ | *(+ | 1a-2*
b | *(+ | 1a-2*b
) | * | 1a-2*b+
| | 1a-2*b+*
'1a-2*b+*'
>>> infix_2_postfix("")
Symbol | Stack | Postfix
----------------------------
''
>>> infix_2_postfix("(()")
Traceback (most recent call last):
...
ValueError: invalid expression
>>> infix_2_postfix("())")
Traceback (most recent call last):
...
IndexError: list index out of range
"""
stack = []
post_fix = []
priority = {
"^": 3,
"*": 2,
"/": 2,
"%": 2,
"+": 1,
"-": 1,
} # Priority of each operator
print_width = max(len(infix), 7)
# Print table header for output
print(
"Symbol".center(8),
"Stack".center(print_width),
"Postfix".center(print_width),
sep=" | ",
)
print("-" * (print_width * 3 + 7))
for x in infix:
if x.isalpha() or x.isdigit():
post_fix.append(x) # if x is Alphabet / Digit, add it to Postfix
elif x == "(":
stack.append(x) # if x is "(" push to Stack
elif x == ")": # if x is ")" pop stack until "(" is encountered
if len(stack) == 0: # close bracket without open bracket
raise IndexError("list index out of range")
while stack[-1] != "(":
post_fix.append(stack.pop()) # Pop stack & add the content to Postfix
stack.pop()
elif len(stack) == 0:
stack.append(x) # If stack is empty, push x to stack
else: # while priority of x is not > priority of element in the stack
while stack and stack[-1] != "(" and priority[x] <= priority[stack[-1]]:
post_fix.append(stack.pop()) # pop stack & add to Postfix
stack.append(x) # push x to stack
print(
x.center(8),
("".join(stack)).ljust(print_width),
("".join(post_fix)).ljust(print_width),
sep=" | ",
) # Output in tabular format
while len(stack) > 0: # while stack is not empty
if stack[-1] == "(": # open bracket with no close bracket
raise ValueError("invalid expression")
post_fix.append(stack.pop()) # pop stack & add to Postfix
print(
" ".center(8),
("".join(stack)).ljust(print_width),
("".join(post_fix)).ljust(print_width),
sep=" | ",
) # Output in tabular format
return "".join(post_fix) # return Postfix as str
def infix_2_prefix(infix: str) -> str:
"""
>>> infix_2_prefix("a+b^c") # doctest: +NORMALIZE_WHITESPACE
Symbol | Stack | Postfix
----------------------------
c | | c
^ | ^ | c
b | ^ | cb
+ | + | cb^
a | + | cb^a
| | cb^a+
'+a^bc'
>>> infix_2_prefix("1*((-a)*2+b)") # doctest: +NORMALIZE_WHITESPACE
Symbol | Stack | Postfix
-------------------------------------------
( | ( |
b | ( | b
+ | (+ | b
2 | (+ | b2
* | (+* | b2
( | (+*( | b2
a | (+*( | b2a
- | (+*(- | b2a
) | (+* | b2a-
) | | b2a-*+
* | * | b2a-*+
1 | * | b2a-*+1
| | b2a-*+1*
'*1+*-a2b'
>>> infix_2_prefix('')
Symbol | Stack | Postfix
----------------------------
''
>>> infix_2_prefix('(()')
Traceback (most recent call last):
...
IndexError: list index out of range
>>> infix_2_prefix('())')
Traceback (most recent call last):
...
ValueError: invalid expression
"""
reversed_infix = list(infix[::-1]) # reverse the infix equation
for i in range(len(reversed_infix)):
if reversed_infix[i] == "(":
reversed_infix[i] = ")" # change "(" to ")"
elif reversed_infix[i] == ")":
reversed_infix[i] = "(" # change ")" to "("
# call infix_2_postfix on Infix, return reverse of Postfix
return (infix_2_postfix("".join(reversed_infix)))[::-1]
if __name__ == "__main__":
from doctest import testmod
testmod()
Infix = input("\nEnter an Infix Equation = ") # Input an Infix equation
Infix = "".join(Infix.split()) # Remove spaces from the input
print("\n\t", Infix, "(Infix) -> ", infix_2_prefix(Infix), "(Prefix)")
================================================
FILE: data_structures/stacks/largest_rectangle_histogram.py
================================================
def largest_rectangle_area(heights: list[int]) -> int:
"""
Inputs an array of integers representing the heights of bars,
and returns the area of the largest rectangle that can be formed
>>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
10
>>> largest_rectangle_area([2, 4])
4
>>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
12
>>> largest_rectangle_area([1])
1
"""
stack: list[int] = []
max_area = 0
heights = [*heights, 0] # make a new list by appending the sentinel 0
n = len(heights)
for i in range(n):
# make sure the stack remains in increasing order
while stack and heights[i] < heights[stack[-1]]:
h = heights[stack.pop()] # height of the bar
# if stack is empty, it means entire width can be taken from index 0 to i-1
w = i if not stack else i - stack[-1] - 1 # calculate width
max_area = max(max_area, h * w)
stack.append(i)
return max_area
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: data_structures/stacks/lexicographical_numbers.py
================================================
from collections.abc import Iterator
def lexical_order(max_number: int) -> Iterator[int]:
"""
Generate numbers in lexical order from 1 to max_number.
>>> " ".join(map(str, lexical_order(13)))
'1 10 11 12 13 2 3 4 5 6 7 8 9'
>>> list(lexical_order(1))
[1]
>>> " ".join(map(str, lexical_order(20)))
'1 10 11 12 13 14 15 16 17 18 19 2 20 3 4 5 6 7 8 9'
>>> " ".join(map(str, lexical_order(25)))
'1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 3 4 5 6 7 8 9'
>>> list(lexical_order(12))
[1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]
"""
stack = [1]
while stack:
num = stack.pop()
if num > max_number:
continue
yield num
if (num % 10) != 9:
stack.append(num + 1)
stack.append(num * 10)
if __name__ == "__main__":
from doctest import testmod
testmod()
print(f"Numbers from 1 to 25 in lexical order: {list(lexical_order(26))}")
================================================
FILE: data_structures/stacks/next_greater_element.py
================================================
from __future__ import annotations
arr = [-10, -5, 0, 5, 5.1, 11, 13, 21, 3, 4, -21, -10, -5, -1, 0]
expect = [-5, 0, 5, 5.1, 11, 13, 21, -1, 4, -1, -10, -5, -1, 0, -1]
def next_greatest_element_slow(arr: list[float]) -> list[float]:
"""
Get the Next Greatest Element (NGE) for each element in the array
by checking all subsequent elements to find the next greater one.
This is a brute-force implementation, and it has a time complexity
of O(n^2), where n is the size of the array.
Args:
arr: List of numbers for which the NGE is calculated.
Returns:
List containing the next greatest elements. If no
greater element is found, -1 is placed in the result.
Example:
>>> next_greatest_element_slow(arr) == expect
True
"""
result = []
arr_size = len(arr)
for i in range(arr_size):
next_element: float = -1
for j in range(i + 1, arr_size):
if arr[i] < arr[j]:
next_element = arr[j]
break
result.append(next_element)
return result
def next_greatest_element_fast(arr: list[float]) -> list[float]:
"""
Find the Next Greatest Element (NGE) for each element in the array
using a more readable approach. This implementation utilizes
enumerate() for the outer loop and slicing for the inner loop.
While this improves readability over next_greatest_element_slow(),
it still has a time complexity of O(n^2).
Args:
arr: List of numbers for which the NGE is calculated.
Returns:
List containing the next greatest elements. If no
greater element is found, -1 is placed in the result.
Example:
>>> next_greatest_element_fast(arr) == expect
True
"""
result = []
for i, outer in enumerate(arr):
next_item: float = -1
for inner in arr[i + 1 :]:
if outer < inner:
next_item = inner
break
result.append(next_item)
return result
def next_greatest_element(arr: list[float]) -> list[float]:
"""
Efficient solution to find the Next Greatest Element (NGE) for all elements
using a stack. The time complexity is reduced to O(n), making it suitable
for larger arrays.
The stack keeps track of elements for which the next greater element hasn't
been found yet. By iterating through the array in reverse (from the last
element to the first), the stack is used to efficiently determine the next
greatest element for each element.
Args:
arr: List of numbers for which the NGE is calculated.
Returns:
List containing the next greatest elements. If no
greater element is found, -1 is placed in the result.
Example:
>>> next_greatest_element(arr) == expect
True
"""
arr_size = len(arr)
stack: list[float] = []
result: list[float] = [-1] * arr_size
for index in reversed(range(arr_size)):
if stack:
while stack[-1] <= arr[index]:
stack.pop()
if not stack:
break
if stack:
result[index] = stack[-1]
stack.append(arr[index])
return result
if __name__ == "__main__":
from doctest import testmod
from timeit import timeit
testmod()
print(next_greatest_element_slow(arr))
print(next_greatest_element_fast(arr))
print(next_greatest_element(arr))
setup = (
"from __main__ import arr, next_greatest_element_slow, "
"next_greatest_element_fast, next_greatest_element"
)
print(
"next_greatest_element_slow():",
timeit("next_greatest_element_slow(arr)", setup=setup),
)
print(
"next_greatest_element_fast():",
timeit("next_greatest_element_fast(arr)", setup=setup),
)
print(
" next_greatest_element():",
timeit("next_greatest_element(arr)", setup=setup),
)
================================================
FILE: data_structures/stacks/postfix_evaluation.py
================================================
"""
Reverse Polish Nation is also known as Polish postfix notation or simply postfix
notation.
https://en.wikipedia.org/wiki/Reverse_Polish_notation
Classic examples of simple stack implementations.
Valid operators are +, -, *, /.
Each operand may be an integer or another expression.
Output:
Enter a Postfix Equation (space separated) = 5 6 9 * +
Symbol | Action | Stack
-----------------------------------
5 | push(5) | 5
6 | push(6) | 5,6
9 | push(9) | 5,6,9
| pop(9) | 5,6
| pop(6) | 5
* | push(6*9) | 5,54
| pop(54) | 5
| pop(5) |
+ | push(5+54) | 59
Result = 59
"""
# Defining valid unary operator symbols
UNARY_OP_SYMBOLS = ("-", "+")
# operators & their respective operation
OPERATORS = {
"^": lambda p, q: p**q,
"*": lambda p, q: p * q,
"/": lambda p, q: p / q,
"+": lambda p, q: p + q,
"-": lambda p, q: p - q,
}
def parse_token(token: str | float) -> float | str:
"""
Converts the given data to the appropriate number if it is indeed a number, else
returns the data as it is with a False flag. This function also serves as a check
of whether the input is a number or not.
Parameters
----------
token: The data that needs to be converted to the appropriate operator or number.
Returns
-------
float or str
Returns a float if `token` is a number or a str if `token` is an operator
"""
if token in OPERATORS:
return token
try:
return float(token)
except ValueError:
msg = f"{token} is neither a number nor a valid operator"
raise ValueError(msg)
def evaluate(post_fix: list[str], verbose: bool = False) -> float:
"""
Evaluate postfix expression using a stack.
>>> evaluate(["0"])
0.0
>>> evaluate(["-0"])
-0.0
>>> evaluate(["1"])
1.0
>>> evaluate(["-1"])
-1.0
>>> evaluate(["-1.1"])
-1.1
>>> evaluate(["2", "1", "+", "3", "*"])
9.0
>>> evaluate(["2", "1.9", "+", "3", "*"])
11.7
>>> evaluate(["2", "-1.9", "+", "3", "*"])
0.30000000000000027
>>> evaluate(["4", "13", "5", "/", "+"])
6.6
>>> evaluate(["2", "-", "3", "+"])
1.0
>>> evaluate(["-4", "5", "*", "6", "-"])
-26.0
>>> evaluate([])
0
>>> evaluate(["4", "-", "6", "7", "/", "9", "8"])
Traceback (most recent call last):
...
ArithmeticError: Input is not a valid postfix expression
Parameters
----------
post_fix:
The postfix expression is tokenized into operators and operands and stored
as a Python list
verbose:
Display stack contents while evaluating the expression if verbose is True
Returns
-------
float
The evaluated value
"""
if not post_fix:
return 0
# Checking the list to find out whether the postfix expression is valid
valid_expression = [parse_token(token) for token in post_fix]
if verbose:
# print table header
print("Symbol".center(8), "Action".center(12), "Stack", sep=" | ")
print("-" * (30 + len(post_fix)))
stack = []
for x in valid_expression:
if x not in OPERATORS:
stack.append(x) # append x to stack
if verbose:
# output in tabular format
print(
f"{x}".rjust(8),
f"push({x})".ljust(12),
stack,
sep=" | ",
)
continue
# If x is operator
# If only 1 value is inside the stack and + or - is encountered
# then this is unary + or - case
if x in UNARY_OP_SYMBOLS and len(stack) < 2:
b = stack.pop() # pop stack
if x == "-":
b *= -1 # negate b
stack.append(b)
if verbose:
# output in tabular format
print(
"".rjust(8),
f"pop({b})".ljust(12),
stack,
sep=" | ",
)
print(
str(x).rjust(8),
f"push({x}{b})".ljust(12),
stack,
sep=" | ",
)
continue
b = stack.pop() # pop stack
if verbose:
# output in tabular format
print(
"".rjust(8),
f"pop({b})".ljust(12),
stack,
sep=" | ",
)
a = stack.pop() # pop stack
if verbose:
# output in tabular format
print(
"".rjust(8),
f"pop({a})".ljust(12),
stack,
sep=" | ",
)
# evaluate the 2 values popped from stack & push result to stack
stack.append(OPERATORS[x](a, b)) # type: ignore[index]
if verbose:
# output in tabular format
print(
f"{x}".rjust(8),
f"push({a}{x}{b})".ljust(12),
stack,
sep=" | ",
)
# If everything is executed correctly, the stack will contain
# only one element which is the result
if len(stack) != 1:
raise ArithmeticError("Input is not a valid postfix expression")
return float(stack[0])
if __name__ == "__main__":
# Create a loop so that the user can evaluate postfix expressions multiple times
while True:
expression = input("Enter a Postfix Expression (space separated): ").split(" ")
prompt = "Do you want to see stack contents while evaluating? [y/N]: "
verbose = input(prompt).strip().lower() == "y"
output = evaluate(expression, verbose)
print("Result = ", output)
prompt = "Do you want to enter another expression? [y/N]: "
if input(prompt).strip().lower() != "y":
break
================================================
FILE: data_structures/stacks/prefix_evaluation.py
================================================
"""
Program to evaluate a prefix expression.
https://en.wikipedia.org/wiki/Polish_notation
"""
operators = {
"+": lambda x, y: x + y,
"-": lambda x, y: x - y,
"*": lambda x, y: x * y,
"/": lambda x, y: x / y,
}
def is_operand(c):
"""
Return True if the given char c is an operand, e.g. it is a number
>>> is_operand("1")
True
>>> is_operand("+")
False
"""
return c.isdigit()
def evaluate(expression):
"""
Evaluate a given expression in prefix notation.
Asserts that the given expression is valid.
>>> evaluate("+ 9 * 2 6")
21
>>> evaluate("/ * 10 2 + 4 1 ")
4.0
>>> evaluate("2")
2
>>> evaluate("+ * 2 3 / 8 4")
8.0
"""
stack = []
# iterate over the string in reverse order
for c in expression.split()[::-1]:
# push operand to stack
if is_operand(c):
stack.append(int(c))
else:
# pop values from stack can calculate the result
# push the result onto the stack again
o1 = stack.pop()
o2 = stack.pop()
stack.append(operators[c](o1, o2))
return stack.pop()
def evaluate_recursive(expression: list[str]):
"""
Alternative recursive implementation
>>> evaluate_recursive(['2'])
2
>>> expression = ['+', '*', '2', '3', '/', '8', '4']
>>> evaluate_recursive(expression)
8.0
>>> expression
[]
>>> evaluate_recursive(['+', '9', '*', '2', '6'])
21
>>> evaluate_recursive(['/', '*', '10', '2', '+', '4', '1'])
4.0
"""
op = expression.pop(0)
if is_operand(op):
return int(op)
operation = operators[op]
a = evaluate_recursive(expression)
b = evaluate_recursive(expression)
return operation(a, b)
# Driver code
if __name__ == "__main__":
test_expression = "+ 9 * 2 6"
print(evaluate(test_expression))
test_expression = "/ * 10 2 + 4 1 "
print(evaluate(test_expression))
================================================
FILE: data_structures/stacks/stack.py
================================================
from __future__ import annotations
from typing import TypeVar
T = TypeVar("T")
class StackOverflowError(BaseException):
pass
class StackUnderflowError(BaseException):
pass
class Stack[T]:
"""A stack is an abstract data type that serves as a collection of
elements with two principal operations: push() and pop(). push() adds an
element to the top of the stack, and pop() removes an element from the top
of a stack. The order in which elements come off of a stack are
Last In, First Out (LIFO).
https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
"""
def __init__(self, limit: int = 10):
self.stack: list[T] = []
self.limit = limit
def __bool__(self) -> bool:
return bool(self.stack)
def __str__(self) -> str:
return str(self.stack)
def push(self, data: T) -> None:
"""
Push an element to the top of the stack.
>>> S = Stack(2) # stack size = 2
>>> S.push(10)
>>> S.push(20)
>>> print(S)
[10, 20]
>>> S = Stack(1) # stack size = 1
>>> S.push(10)
>>> S.push(20)
Traceback (most recent call last):
...
data_structures.stacks.stack.StackOverflowError
"""
if len(self.stack) >= self.limit:
raise StackOverflowError
self.stack.append(data)
def pop(self) -> T:
"""
Pop an element off of the top of the stack.
>>> S = Stack()
>>> S.push(-5)
>>> S.push(10)
>>> S.pop()
10
>>> Stack().pop()
Traceback (most recent call last):
...
data_structures.stacks.stack.StackUnderflowError
"""
if not self.stack:
raise StackUnderflowError
return self.stack.pop()
def peek(self) -> T:
"""
Peek at the top-most element of the stack.
>>> S = Stack()
>>> S.push(-5)
>>> S.push(10)
>>> S.peek()
10
>>> Stack().peek()
Traceback (most recent call last):
...
data_structures.stacks.stack.StackUnderflowError
"""
if not self.stack:
raise StackUnderflowError
return self.stack[-1]
def is_empty(self) -> bool:
"""
Check if a stack is empty.
>>> S = Stack()
>>> S.is_empty()
True
>>> S = Stack()
>>> S.push(10)
>>> S.is_empty()
False
"""
return not bool(self.stack)
def is_full(self) -> bool:
"""
>>> S = Stack()
>>> S.is_full()
False
>>> S = Stack(1)
>>> S.push(10)
>>> S.is_full()
True
"""
return self.size() == self.limit
def size(self) -> int:
"""
Return the size of the stack.
>>> S = Stack(3)
>>> S.size()
0
>>> S = Stack(3)
>>> S.push(10)
>>> S.size()
1
>>> S = Stack(3)
>>> S.push(10)
>>> S.push(20)
>>> S.size()
2
"""
return len(self.stack)
def __contains__(self, item: T) -> bool:
"""
Check if item is in stack
>>> S = Stack(3)
>>> S.push(10)
>>> 10 in S
True
>>> S = Stack(3)
>>> S.push(10)
>>> 20 in S
False
"""
return item in self.stack
def test_stack() -> None:
"""
>>> test_stack()
"""
stack: Stack[int] = Stack(10)
assert bool(stack) is False
assert stack.is_empty() is True
assert stack.is_full() is False
assert str(stack) == "[]"
try:
_ = stack.pop()
raise AssertionError # This should not happen
except StackUnderflowError:
assert True # This should happen
try:
_ = stack.peek()
raise AssertionError # This should not happen
except StackUnderflowError:
assert True # This should happen
for i in range(10):
assert stack.size() == i
stack.push(i)
assert bool(stack)
assert not stack.is_empty()
assert stack.is_full()
assert str(stack) == str(list(range(10)))
assert stack.pop() == 9
assert stack.peek() == 8
stack.push(100)
assert str(stack) == str([0, 1, 2, 3, 4, 5, 6, 7, 8, 100])
try:
stack.push(200)
raise AssertionError # This should not happen
except StackOverflowError:
assert True # This should happen
assert not stack.is_empty()
assert stack.size() == 10
assert 5 in stack
assert 55 not in stack
if __name__ == "__main__":
test_stack()
import doctest
doctest.testmod()
================================================
FILE: data_structures/stacks/stack_using_two_queues.py
================================================
from __future__ import annotations
from collections import deque
from dataclasses import dataclass, field
@dataclass
class StackWithQueues:
"""
https://www.geeksforgeeks.org/implement-stack-using-queue/
>>> stack = StackWithQueues()
>>> stack.push(1)
>>> stack.push(2)
>>> stack.push(3)
>>> stack.peek()
3
>>> stack.pop()
3
>>> stack.peek()
2
>>> stack.pop()
2
>>> stack.pop()
1
>>> stack.peek() is None
True
>>> stack.pop()
Traceback (most recent call last):
...
IndexError: pop from an empty deque
"""
main_queue: deque[int] = field(default_factory=deque)
temp_queue: deque[int] = field(default_factory=deque)
def push(self, item: int) -> None:
self.temp_queue.append(item)
while self.main_queue:
self.temp_queue.append(self.main_queue.popleft())
self.main_queue, self.temp_queue = self.temp_queue, self.main_queue
def pop(self) -> int:
return self.main_queue.popleft()
def peek(self) -> int | None:
return self.main_queue[0] if self.main_queue else None
if __name__ == "__main__":
import doctest
doctest.testmod()
stack: StackWithQueues | None = StackWithQueues()
while stack:
print("\nChoose operation:")
print("1. Push")
print("2. Pop")
print("3. Peek")
print("4. Quit")
choice = input("Enter choice (1/2/3/4): ")
if choice == "1":
element = int(input("Enter an integer to push: ").strip())
stack.push(element)
print(f"{element} pushed onto the stack.")
elif choice == "2":
popped_element = stack.pop()
if popped_element is not None:
print(f"Popped element: {popped_element}")
else:
print("Stack is empty.")
elif choice == "3":
peeked_element = stack.peek()
if peeked_element is not None:
print(f"Top element: {peeked_element}")
else:
print("Stack is empty.")
elif choice == "4":
del stack
stack = None
else:
print("Invalid choice. Please try again.")
================================================
FILE: data_structures/stacks/stack_with_doubly_linked_list.py
================================================
# A complete working Python program to demonstrate all
# stack operations using a doubly linked list
from __future__ import annotations
from typing import TypeVar
T = TypeVar("T")
class Node[T]:
def __init__(self, data: T):
self.data = data # Assign data
self.next: Node[T] | None = None # Initialize next as null
self.prev: Node[T] | None = None # Initialize prev as null
class Stack[T]:
"""
>>> stack = Stack()
>>> stack.is_empty()
True
>>> stack.print_stack()
stack elements are:
>>> for i in range(4):
... stack.push(i)
...
>>> stack.is_empty()
False
>>> stack.print_stack()
stack elements are:
3->2->1->0->
>>> stack.top()
3
>>> len(stack)
4
>>> stack.pop()
3
>>> stack.print_stack()
stack elements are:
2->1->0->
"""
def __init__(self) -> None:
self.head: Node[T] | None = None
def push(self, data: T) -> None:
"""add a Node to the stack"""
if self.head is None:
self.head = Node(data)
else:
new_node = Node(data)
self.head.prev = new_node
new_node.next = self.head
new_node.prev = None
self.head = new_node
def pop(self) -> T | None:
"""pop the top element off the stack"""
if self.head is None:
return None
else:
assert self.head is not None
temp = self.head.data
self.head = self.head.next
if self.head is not None:
self.head.prev = None
return temp
def top(self) -> T | None:
"""return the top element of the stack"""
return self.head.data if self.head is not None else None
def __len__(self) -> int:
temp = self.head
count = 0
while temp is not None:
count += 1
temp = temp.next
return count
def is_empty(self) -> bool:
return self.head is None
def print_stack(self) -> None:
print("stack elements are:")
temp = self.head
while temp is not None:
print(temp.data, end="->")
temp = temp.next
# Code execution starts here
if __name__ == "__main__":
# Start with the empty stack
stack: Stack[int] = Stack()
# Insert 4 at the beginning. So stack becomes 4->None
print("Stack operations using Doubly LinkedList")
stack.push(4)
# Insert 5 at the beginning. So stack becomes 4->5->None
stack.push(5)
# Insert 6 at the beginning. So stack becomes 4->5->6->None
stack.push(6)
# Insert 7 at the beginning. So stack becomes 4->5->6->7->None
stack.push(7)
# Print the stack
stack.print_stack()
# Print the top element
print("\nTop element is ", stack.top())
# Print the stack size
print("Size of the stack is ", len(stack))
# pop the top element
stack.pop()
# pop the top element
stack.pop()
# two elements have now been popped off
stack.print_stack()
# Print True if the stack is empty else False
print("\nstack is empty:", stack.is_empty())
================================================
FILE: data_structures/stacks/stack_with_singly_linked_list.py
================================================
"""A Stack using a linked list like structure"""
from __future__ import annotations
from collections.abc import Iterator
from typing import TypeVar
T = TypeVar("T")
class Node[T]:
def __init__(self, data: T):
self.data = data
self.next: Node[T] | None = None
def __str__(self) -> str:
return f"{self.data}"
class LinkedStack[T]:
"""
Linked List Stack implementing push (to top),
pop (from top) and is_empty
>>> stack = LinkedStack()
>>> stack.is_empty()
True
>>> stack.push(5)
>>> stack.push(9)
>>> stack.push('python')
>>> stack.is_empty()
False
>>> stack.pop()
'python'
>>> stack.push('algorithms')
>>> stack.pop()
'algorithms'
>>> stack.pop()
9
>>> stack.pop()
5
>>> stack.is_empty()
True
>>> stack.pop()
Traceback (most recent call last):
...
IndexError: pop from empty stack
"""
def __init__(self) -> None:
self.top: Node[T] | None = None
def __iter__(self) -> Iterator[T]:
node = self.top
while node:
yield node.data
node = node.next
def __str__(self) -> str:
"""
>>> stack = LinkedStack()
>>> stack.push("c")
>>> stack.push("b")
>>> stack.push("a")
>>> str(stack)
'a->b->c'
"""
return "->".join([str(item) for item in self])
def __len__(self) -> int:
"""
>>> stack = LinkedStack()
>>> len(stack) == 0
True
>>> stack.push("c")
>>> stack.push("b")
>>> stack.push("a")
>>> len(stack) == 3
True
"""
return len(tuple(iter(self)))
def is_empty(self) -> bool:
"""
>>> stack = LinkedStack()
>>> stack.is_empty()
True
>>> stack.push(1)
>>> stack.is_empty()
False
"""
return self.top is None
def push(self, item: T) -> None:
"""
>>> stack = LinkedStack()
>>> stack.push("Python")
>>> stack.push("Java")
>>> stack.push("C")
>>> str(stack)
'C->Java->Python'
"""
node = Node(item)
if not self.is_empty():
node.next = self.top
self.top = node
def pop(self) -> T:
"""
>>> stack = LinkedStack()
>>> stack.pop()
Traceback (most recent call last):
...
IndexError: pop from empty stack
>>> stack.push("c")
>>> stack.push("b")
>>> stack.push("a")
>>> stack.pop() == 'a'
True
>>> stack.pop() == 'b'
True
>>> stack.pop() == 'c'
True
"""
if self.is_empty():
raise IndexError("pop from empty stack")
assert isinstance(self.top, Node)
pop_node = self.top
self.top = self.top.next
return pop_node.data
def peek(self) -> T:
"""
>>> stack = LinkedStack()
>>> stack.push("Java")
>>> stack.push("C")
>>> stack.push("Python")
>>> stack.peek()
'Python'
"""
if self.is_empty():
raise IndexError("peek from empty stack")
assert self.top is not None
return self.top.data
def clear(self) -> None:
"""
>>> stack = LinkedStack()
>>> stack.push("Java")
>>> stack.push("C")
>>> stack.push("Python")
>>> str(stack)
'Python->C->Java'
>>> stack.clear()
>>> len(stack) == 0
True
"""
self.top = None
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: data_structures/stacks/stock_span_problem.py
================================================
"""
The stock span problem is a financial problem where we have a series of n daily
price quotes for a stock and we need to calculate span of stock's price for all n days.
The span Si of the stock's price on a given day i is defined as the maximum
number of consecutive days just before the given day, for which the price of the stock
on the current day is less than or equal to its price on the given day.
"""
def calculate_span(price: list[int]) -> list[int]:
"""
Calculate the span values for a given list of stock prices.
Args:
price: List of stock prices.
Returns:
List of span values.
>>> calculate_span([10, 4, 5, 90, 120, 80])
[1, 1, 2, 4, 5, 1]
>>> calculate_span([100, 50, 60, 70, 80, 90])
[1, 1, 2, 3, 4, 5]
>>> calculate_span([5, 4, 3, 2, 1])
[1, 1, 1, 1, 1]
>>> calculate_span([1, 2, 3, 4, 5])
[1, 2, 3, 4, 5]
>>> calculate_span([10, 20, 30, 40, 50])
[1, 2, 3, 4, 5]
>>> calculate_span([100, 80, 60, 70, 60, 75, 85])
[1, 1, 1, 2, 1, 4, 6]
"""
n = len(price)
s = [0] * n
# Create a stack and push index of fist element to it
st = []
st.append(0)
# Span value of first element is always 1
s[0] = 1
# Calculate span values for rest of the elements
for i in range(1, n):
# Pop elements from stack while stack is not
# empty and top of stack is smaller than price[i]
while len(st) > 0 and price[st[-1]] <= price[i]:
st.pop()
# If stack becomes empty, then price[i] is greater
# than all elements on left of it, i.e. price[0],
# price[1], ..price[i-1]. Else the price[i] is
# greater than elements after top of stack
s[i] = i + 1 if len(st) <= 0 else (i - st[-1])
# Push this element to stack
st.append(i)
return s
# A utility function to print elements of array
def print_array(arr, n):
for i in range(n):
print(arr[i], end=" ")
# Driver program to test above function
price = [10, 4, 5, 90, 120, 80]
# Calculate the span values
S = calculate_span(price)
# Print the calculated span values
print_array(S, len(price))
================================================
FILE: data_structures/suffix_tree/__init__.py
================================================
================================================
FILE: data_structures/suffix_tree/example/__init__.py
================================================
================================================
FILE: data_structures/suffix_tree/example/example_usage.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11554
# https://github.com/TheAlgorithms/Python/pull/11554
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from data_structures.suffix_tree.suffix_tree import SuffixTree
def main() -> None:
"""
Demonstrate the usage of the SuffixTree class.
- Initializes a SuffixTree with a predefined text.
- Defines a list of patterns to search for within the suffix tree.
- Searches for each pattern in the suffix tree.
Patterns tested:
- "ana" (found) --> True
- "ban" (found) --> True
- "na" (found) --> True
- "xyz" (not found) --> False
- "mon" (found) --> True
"""
text = "monkey banana"
suffix_tree = SuffixTree(text)
patterns = ["ana", "ban", "na", "xyz", "mon"]
for pattern in patterns:
found = suffix_tree.search(pattern)
print(f"Pattern '{pattern}' found: {found}")
if __name__ == "__main__":
main()
================================================
FILE: data_structures/suffix_tree/suffix_tree.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11554
# https://github.com/TheAlgorithms/Python/pull/11554
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from data_structures.suffix_tree.suffix_tree_node import SuffixTreeNode
class SuffixTree:
def __init__(self, text: str) -> None:
"""
Initializes the suffix tree with the given text.
Args:
text (str): The text for which the suffix tree is to be built.
"""
self.text: str = text
self.root: SuffixTreeNode = SuffixTreeNode()
self.build_suffix_tree()
def build_suffix_tree(self) -> None:
"""
Builds the suffix tree for the given text by adding all suffixes.
"""
text = self.text
n = len(text)
for i in range(n):
suffix = text[i:]
self._add_suffix(suffix, i)
def _add_suffix(self, suffix: str, index: int) -> None:
"""
Adds a suffix to the suffix tree.
Args:
suffix (str): The suffix to add.
index (int): The starting index of the suffix in the original text.
"""
node = self.root
for char in suffix:
if char not in node.children:
node.children[char] = SuffixTreeNode()
node = node.children[char]
node.is_end_of_string = True
node.start = index
node.end = index + len(suffix) - 1
def search(self, pattern: str) -> bool:
"""
Searches for a pattern in the suffix tree.
Args:
pattern (str): The pattern to search for.
Returns:
bool: True if the pattern is found, False otherwise.
"""
node = self.root
for char in pattern:
if char not in node.children:
return False
node = node.children[char]
return True
================================================
FILE: data_structures/suffix_tree/suffix_tree_node.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11554
# https://github.com/TheAlgorithms/Python/pull/11554
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
from __future__ import annotations
class SuffixTreeNode:
def __init__(
self,
children: dict[str, SuffixTreeNode] | None = None,
is_end_of_string: bool = False,
start: int | None = None,
end: int | None = None,
suffix_link: SuffixTreeNode | None = None,
) -> None:
"""
Initializes a suffix tree node.
Parameters:
children (dict[str, SuffixTreeNode] | None): The children of this node.
is_end_of_string (bool): Indicates if this node represents
the end of a string.
start (int | None): The start index of the suffix in the text.
end (int | None): The end index of the suffix in the text.
suffix_link (SuffixTreeNode | None): Link to another suffix tree node.
"""
self.children = children or {}
self.is_end_of_string = is_end_of_string
self.start = start
self.end = end
self.suffix_link = suffix_link
================================================
FILE: data_structures/suffix_tree/tests/__init__.py
================================================
================================================
FILE: data_structures/suffix_tree/tests/test_suffix_tree.py
================================================
# Created by: Ramy-Badr-Ahmed (https://github.com/Ramy-Badr-Ahmed)
# in Pull Request: #11554
# https://github.com/TheAlgorithms/Python/pull/11554
#
# Please mention me (@Ramy-Badr-Ahmed) in any issue or pull request
# addressing bugs/corrections to this file.
# Thank you!
import unittest
from data_structures.suffix_tree.suffix_tree import SuffixTree
class TestSuffixTree(unittest.TestCase):
def setUp(self) -> None:
"""Set up the initial conditions for each test."""
self.text = "banana"
self.suffix_tree = SuffixTree(self.text)
def test_search_existing_patterns(self) -> None:
"""Test searching for patterns that exist in the suffix tree."""
patterns = ["ana", "ban", "na"]
for pattern in patterns:
with self.subTest(pattern=pattern):
assert self.suffix_tree.search(pattern), (
f"Pattern '{pattern}' should be found."
)
def test_search_non_existing_patterns(self) -> None:
"""Test searching for patterns that do not exist in the suffix tree."""
patterns = ["xyz", "apple", "cat"]
for pattern in patterns:
with self.subTest(pattern=pattern):
assert not self.suffix_tree.search(pattern), (
f"Pattern '{pattern}' should not be found."
)
def test_search_empty_pattern(self) -> None:
"""Test searching for an empty pattern."""
assert self.suffix_tree.search(""), "An empty pattern should be found."
def test_search_full_text(self) -> None:
"""Test searching for the full text."""
assert self.suffix_tree.search(self.text), (
"The full text should be found in the suffix tree."
)
def test_search_substrings(self) -> None:
"""Test searching for substrings of the full text."""
substrings = ["ban", "ana", "a", "na"]
for substring in substrings:
with self.subTest(substring=substring):
assert self.suffix_tree.search(substring), (
f"Substring '{substring}' should be found."
)
if __name__ == "__main__":
unittest.main()
================================================
FILE: data_structures/trie/__init__.py
================================================
================================================
FILE: data_structures/trie/radix_tree.py
================================================
"""
A Radix Tree is a data structure that represents a space-optimized
trie (prefix tree) in whicheach node that is the only child is merged
with its parent [https://en.wikipedia.org/wiki/Radix_tree]
"""
class RadixNode:
def __init__(self, prefix: str = "", is_leaf: bool = False) -> None:
# Mapping from the first character of the prefix of the node
self.nodes: dict[str, RadixNode] = {}
# A node will be a leaf if the tree contains its word
self.is_leaf = is_leaf
self.prefix = prefix
def match(self, word: str) -> tuple[str, str, str]:
"""Compute the common substring of the prefix of the node and a word
Args:
word (str): word to compare
Returns:
(str, str, str): common substring, remaining prefix, remaining word
>>> RadixNode("myprefix").match("mystring")
('my', 'prefix', 'string')
"""
x = 0
for q, w in zip(self.prefix, word):
if q != w:
break
x += 1
return self.prefix[:x], self.prefix[x:], word[x:]
def insert_many(self, words: list[str]) -> None:
"""Insert many words in the tree
Args:
words (list[str]): list of words
>>> RadixNode("myprefix").insert_many(["mystring", "hello"])
"""
for word in words:
self.insert(word)
def insert(self, word: str) -> None:
"""Insert a word into the tree
Args:
word (str): word to insert
>>> RadixNode("myprefix").insert("mystring")
>>> root = RadixNode()
>>> root.insert_many(['myprefix', 'myprefixA', 'myprefixAA'])
>>> root.print_tree()
- myprefix (leaf)
-- A (leaf)
--- A (leaf)
"""
# Case 1: If the word is the prefix of the node
# Solution: We set the current node as leaf
if self.prefix == word and not self.is_leaf:
self.is_leaf = True
# Case 2: The node has no edges that have a prefix to the word
# Solution: We create an edge from the current node to a new one
# containing the word
elif word[0] not in self.nodes:
self.nodes[word[0]] = RadixNode(prefix=word, is_leaf=True)
else:
incoming_node = self.nodes[word[0]]
matching_string, remaining_prefix, remaining_word = incoming_node.match(
word
)
# Case 3: The node prefix is equal to the matching
# Solution: We insert remaining word on the next node
if remaining_prefix == "":
self.nodes[matching_string[0]].insert(remaining_word)
# Case 4: The word is greater equal to the matching
# Solution: Create a node in between both nodes, change
# prefixes and add the new node for the remaining word
else:
incoming_node.prefix = remaining_prefix
aux_node = self.nodes[matching_string[0]]
self.nodes[matching_string[0]] = RadixNode(matching_string, False)
self.nodes[matching_string[0]].nodes[remaining_prefix[0]] = aux_node
if remaining_word == "":
self.nodes[matching_string[0]].is_leaf = True
else:
self.nodes[matching_string[0]].insert(remaining_word)
def find(self, word: str) -> bool:
"""Returns if the word is on the tree
Args:
word (str): word to check
Returns:
bool: True if the word appears on the tree
>>> RadixNode("myprefix").find("mystring")
False
"""
incoming_node = self.nodes.get(word[0], None)
if not incoming_node:
return False
else:
_matching_string, remaining_prefix, remaining_word = incoming_node.match(
word
)
# If there is remaining prefix, the word can't be on the tree
if remaining_prefix != "":
return False
# This applies when the word and the prefix are equal
elif remaining_word == "":
return incoming_node.is_leaf
# We have word remaining so we check the next node
else:
return incoming_node.find(remaining_word)
def delete(self, word: str) -> bool:
"""Deletes a word from the tree if it exists
Args:
word (str): word to be deleted
Returns:
bool: True if the word was found and deleted. False if word is not found
>>> RadixNode("myprefix").delete("mystring")
False
"""
incoming_node = self.nodes.get(word[0], None)
if not incoming_node:
return False
else:
_matching_string, remaining_prefix, remaining_word = incoming_node.match(
word
)
# If there is remaining prefix, the word can't be on the tree
if remaining_prefix != "":
return False
# We have word remaining so we check the next node
elif remaining_word != "":
return incoming_node.delete(remaining_word)
# If it is not a leaf, we don't have to delete
elif not incoming_node.is_leaf:
return False
else:
# We delete the nodes if no edges go from it
if len(incoming_node.nodes) == 0:
del self.nodes[word[0]]
# We merge the current node with its only child
if len(self.nodes) == 1 and not self.is_leaf:
merging_node = next(iter(self.nodes.values()))
self.is_leaf = merging_node.is_leaf
self.prefix += merging_node.prefix
self.nodes = merging_node.nodes
# If there is more than 1 edge, we just mark it as non-leaf
elif len(incoming_node.nodes) > 1:
incoming_node.is_leaf = False
# If there is 1 edge, we merge it with its child
else:
merging_node = next(iter(incoming_node.nodes.values()))
incoming_node.is_leaf = merging_node.is_leaf
incoming_node.prefix += merging_node.prefix
incoming_node.nodes = merging_node.nodes
return True
def print_tree(self, height: int = 0) -> None:
"""Print the tree
Args:
height (int, optional): Height of the printed node
"""
if self.prefix != "":
print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "")
for value in self.nodes.values():
value.print_tree(height + 1)
def test_trie() -> bool:
words = "banana bananas bandana band apple all beast".split()
root = RadixNode()
root.insert_many(words)
assert all(root.find(word) for word in words)
assert not root.find("bandanas")
assert not root.find("apps")
root.delete("all")
assert not root.find("all")
root.delete("banana")
assert not root.find("banana")
assert root.find("bananas")
return True
def pytests() -> None:
assert test_trie()
def main() -> None:
"""
>>> pytests()
"""
root = RadixNode()
words = "banana bananas bandanas bandana band apple all beast".split()
root.insert_many(words)
print("Words:", words)
print("Tree:")
root.print_tree()
if __name__ == "__main__":
main()
================================================
FILE: data_structures/trie/trie.py
================================================
"""
A Trie/Prefix Tree is a kind of search tree used to provide quick lookup
of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity
making it impractical in practice. It however provides O(max(search_string, length of
longest word)) lookup time making it an optimal approach when space is not an issue.
"""
class TrieNode:
def __init__(self) -> None:
self.nodes: dict[str, TrieNode] = {} # Mapping from char to TrieNode
self.is_leaf = False
def insert_many(self, words: list[str]) -> None:
"""
Inserts a list of words into the Trie
:param words: list of string words
:return: None
"""
for word in words:
self.insert(word)
def insert(self, word: str) -> None:
"""
Inserts a word into the Trie
:param word: word to be inserted
:return: None
"""
curr = self
for char in word:
if char not in curr.nodes:
curr.nodes[char] = TrieNode()
curr = curr.nodes[char]
curr.is_leaf = True
def find(self, word: str) -> bool:
"""
Tries to find word in a Trie
:param word: word to look for
:return: Returns True if word is found, False otherwise
"""
curr = self
for char in word:
if char not in curr.nodes:
return False
curr = curr.nodes[char]
return curr.is_leaf
def delete(self, word: str) -> None:
"""
Deletes a word in a Trie
:param word: word to delete
:return: None
"""
def _delete(curr: TrieNode, word: str, index: int) -> bool:
if index == len(word):
# If word does not exist
if not curr.is_leaf:
return False
curr.is_leaf = False
return len(curr.nodes) == 0
char = word[index]
char_node = curr.nodes.get(char)
# If char not in current trie node
if not char_node:
return False
# Flag to check if node can be deleted
delete_curr = _delete(char_node, word, index + 1)
if delete_curr:
del curr.nodes[char]
return len(curr.nodes) == 0
return delete_curr
_delete(self, word, 0)
def print_words(node: TrieNode, word: str) -> None:
"""
Prints all the words in a Trie
:param node: root node of Trie
:param word: Word variable should be empty at start
:return: None
"""
if node.is_leaf:
print(word, end=" ")
for key, value in node.nodes.items():
print_words(value, word + key)
def test_trie() -> bool:
words = "banana bananas bandana band apple all beast".split()
root = TrieNode()
root.insert_many(words)
# print_words(root, "")
assert all(root.find(word) for word in words)
assert root.find("banana")
assert not root.find("bandanas")
assert not root.find("apps")
assert root.find("apple")
assert root.find("all")
root.delete("all")
assert not root.find("all")
root.delete("banana")
assert not root.find("banana")
assert root.find("bananas")
return True
def print_results(msg: str, passes: bool) -> None:
print(str(msg), "works!" if passes else "doesn't work :(")
def pytests() -> None:
assert test_trie()
def main() -> None:
"""
>>> pytests()
"""
print_results("Testing trie functionality", test_trie())
if __name__ == "__main__":
main()
================================================
FILE: digital_image_processing/__init__.py
================================================
================================================
FILE: digital_image_processing/change_brightness.py
================================================
from PIL import Image
def change_brightness(img: Image, level: float) -> Image:
"""
Change the brightness of a PIL Image to a given level.
"""
def brightness(c: int) -> float:
"""
Fundamental Transformation/Operation that'll be performed on
every bit.
"""
return 128 + level + (c - 128)
if not -255.0 <= level <= 255.0:
raise ValueError("level must be between -255.0 (black) and 255.0 (white)")
return img.point(brightness)
if __name__ == "__main__":
# Load image
with Image.open("image_data/lena.jpg") as img:
# Change brightness to 100
brigt_img = change_brightness(img, 100)
brigt_img.save("image_data/lena_brightness.png", format="png")
================================================
FILE: digital_image_processing/change_contrast.py
================================================
"""
Changing contrast with PIL
This algorithm is used in
https://noivce.pythonanywhere.com/ Python web app.
psf/black: True
ruff : True
"""
from PIL import Image
def change_contrast(img: Image, level: int) -> Image:
"""
Function to change contrast
"""
factor = (259 * (level + 255)) / (255 * (259 - level))
def contrast(c: int) -> int:
"""
Fundamental Transformation/Operation that'll be performed on
every bit.
"""
return int(128 + factor * (c - 128))
return img.point(contrast)
if __name__ == "__main__":
# Load image
with Image.open("image_data/lena.jpg") as img:
# Change contrast to 170
cont_img = change_contrast(img, 170)
cont_img.save("image_data/lena_high_contrast.png", format="png")
================================================
FILE: digital_image_processing/convert_to_negative.py
================================================
"""
Implemented an algorithm using opencv to convert a colored image into its negative
"""
from cv2 import destroyAllWindows, imread, imshow, waitKey
def convert_to_negative(img):
# getting number of pixels in the image
pixel_h, pixel_v = img.shape[0], img.shape[1]
# converting each pixel's color to its negative
for i in range(pixel_h):
for j in range(pixel_v):
img[i][j] = [255, 255, 255] - img[i][j]
return img
if __name__ == "__main__":
# read original image
img = imread("image_data/lena.jpg", 1)
# convert to its negative
neg = convert_to_negative(img)
# show result image
imshow("negative of original image", img)
waitKey(0)
destroyAllWindows()
================================================
FILE: digital_image_processing/dithering/__init__.py
================================================
================================================
FILE: digital_image_processing/dithering/burkes.py
================================================
"""
Implementation Burke's algorithm (dithering)
"""
import numpy as np
from cv2 import destroyAllWindows, imread, imshow, waitKey
class Burkes:
"""
Burke's algorithm is using for converting grayscale image to black and white version
Source: Source: https://en.wikipedia.org/wiki/Dither
Note:
* Best results are given with threshold= ~1/2 * max greyscale value.
* This implementation get RGB image and converts it to greyscale in runtime.
"""
def __init__(self, input_img, threshold: int):
self.min_threshold = 0
# max greyscale value for #FFFFFF
self.max_threshold = int(self.get_greyscale(255, 255, 255))
if not self.min_threshold < threshold < self.max_threshold:
msg = f"Factor value should be from 0 to {self.max_threshold}"
raise ValueError(msg)
self.input_img = input_img
self.threshold = threshold
self.width, self.height = self.input_img.shape[1], self.input_img.shape[0]
# error table size (+4 columns and +1 row) greater than input image because of
# lack of if statements
self.error_table = [
[0 for _ in range(self.height + 4)] for __ in range(self.width + 1)
]
self.output_img = np.ones((self.width, self.height, 3), np.uint8) * 255
@classmethod
def get_greyscale(cls, blue: int, green: int, red: int) -> float:
"""
>>> Burkes.get_greyscale(3, 4, 5)
4.185
>>> Burkes.get_greyscale(0, 0, 0)
0.0
>>> Burkes.get_greyscale(255, 255, 255)
255.0
"""
"""
Formula from https://en.wikipedia.org/wiki/HSL_and_HSV
cf Lightness section, and Fig 13c.
We use the first of four possible.
"""
return 0.114 * blue + 0.587 * green + 0.299 * red
def process(self) -> None:
for y in range(self.height):
for x in range(self.width):
greyscale = int(self.get_greyscale(*self.input_img[y][x]))
if self.threshold > greyscale + self.error_table[y][x]:
self.output_img[y][x] = (0, 0, 0)
current_error = greyscale + self.error_table[y][x]
else:
self.output_img[y][x] = (255, 255, 255)
current_error = greyscale + self.error_table[y][x] - 255
"""
Burkes error propagation (`*` is current pixel):
* 8/32 4/32
2/32 4/32 8/32 4/32 2/32
"""
self.error_table[y][x + 1] += int(8 / 32 * current_error)
self.error_table[y][x + 2] += int(4 / 32 * current_error)
self.error_table[y + 1][x] += int(8 / 32 * current_error)
self.error_table[y + 1][x + 1] += int(4 / 32 * current_error)
self.error_table[y + 1][x + 2] += int(2 / 32 * current_error)
self.error_table[y + 1][x - 1] += int(4 / 32 * current_error)
self.error_table[y + 1][x - 2] += int(2 / 32 * current_error)
if __name__ == "__main__":
# create Burke's instances with original images in greyscale
burkes_instances = [
Burkes(imread("image_data/lena.jpg", 1), threshold)
for threshold in (1, 126, 130, 140)
]
for burkes in burkes_instances:
burkes.process()
for burkes in burkes_instances:
imshow(
f"Original image with dithering threshold: {burkes.threshold}",
burkes.output_img,
)
waitKey(0)
destroyAllWindows()
================================================
FILE: digital_image_processing/edge_detection/__init__.py
================================================
================================================
FILE: digital_image_processing/edge_detection/canny.py
================================================
import cv2
import numpy as np
from digital_image_processing.filters.convolve import img_convolve
from digital_image_processing.filters.sobel_filter import sobel_filter
PI = 180
def gen_gaussian_kernel(k_size, sigma):
center = k_size // 2
x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center]
g = (
1
/ (2 * np.pi * sigma)
* np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
)
return g
def suppress_non_maximum(image_shape, gradient_direction, sobel_grad):
"""
Non-maximum suppression. If the edge strength of the current pixel is the largest
compared to the other pixels in the mask with the same direction, the value will be
preserved. Otherwise, the value will be suppressed.
"""
destination = np.zeros(image_shape)
for row in range(1, image_shape[0] - 1):
for col in range(1, image_shape[1] - 1):
direction = gradient_direction[row, col]
if (
0 <= direction < PI / 8
or 15 * PI / 8 <= direction <= 2 * PI
or 7 * PI / 8 <= direction <= 9 * PI / 8
):
w = sobel_grad[row, col - 1]
e = sobel_grad[row, col + 1]
if sobel_grad[row, col] >= w and sobel_grad[row, col] >= e:
destination[row, col] = sobel_grad[row, col]
elif (
PI / 8 <= direction < 3 * PI / 8
or 9 * PI / 8 <= direction < 11 * PI / 8
):
sw = sobel_grad[row + 1, col - 1]
ne = sobel_grad[row - 1, col + 1]
if sobel_grad[row, col] >= sw and sobel_grad[row, col] >= ne:
destination[row, col] = sobel_grad[row, col]
elif (
3 * PI / 8 <= direction < 5 * PI / 8
or 11 * PI / 8 <= direction < 13 * PI / 8
):
n = sobel_grad[row - 1, col]
s = sobel_grad[row + 1, col]
if sobel_grad[row, col] >= n and sobel_grad[row, col] >= s:
destination[row, col] = sobel_grad[row, col]
elif (
5 * PI / 8 <= direction < 7 * PI / 8
or 13 * PI / 8 <= direction < 15 * PI / 8
):
nw = sobel_grad[row - 1, col - 1]
se = sobel_grad[row + 1, col + 1]
if sobel_grad[row, col] >= nw and sobel_grad[row, col] >= se:
destination[row, col] = sobel_grad[row, col]
return destination
def detect_high_low_threshold(
image_shape, destination, threshold_low, threshold_high, weak, strong
):
"""
High-Low threshold detection. If an edge pixel's gradient value is higher
than the high threshold value, it is marked as a strong edge pixel. If an
edge pixel's gradient value is smaller than the high threshold value and
larger than the low threshold value, it is marked as a weak edge pixel. If
an edge pixel's value is smaller than the low threshold value, it will be
suppressed.
"""
for row in range(1, image_shape[0] - 1):
for col in range(1, image_shape[1] - 1):
if destination[row, col] >= threshold_high:
destination[row, col] = strong
elif destination[row, col] <= threshold_low:
destination[row, col] = 0
else:
destination[row, col] = weak
def track_edge(image_shape, destination, weak, strong):
"""
Edge tracking. Usually a weak edge pixel caused from true edges will be connected
to a strong edge pixel while noise responses are unconnected. As long as there is
one strong edge pixel that is involved in its 8-connected neighborhood, that weak
edge point can be identified as one that should be preserved.
"""
for row in range(1, image_shape[0]):
for col in range(1, image_shape[1]):
if destination[row, col] == weak:
if 255 in (
destination[row, col + 1],
destination[row, col - 1],
destination[row - 1, col],
destination[row + 1, col],
destination[row - 1, col - 1],
destination[row + 1, col - 1],
destination[row - 1, col + 1],
destination[row + 1, col + 1],
):
destination[row, col] = strong
else:
destination[row, col] = 0
def canny(image, threshold_low=15, threshold_high=30, weak=128, strong=255):
# gaussian_filter
gaussian_out = img_convolve(image, gen_gaussian_kernel(9, sigma=1.4))
# get the gradient and degree by sobel_filter
sobel_grad, sobel_theta = sobel_filter(gaussian_out)
gradient_direction = PI + np.rad2deg(sobel_theta)
destination = suppress_non_maximum(image.shape, gradient_direction, sobel_grad)
detect_high_low_threshold(
image.shape, destination, threshold_low, threshold_high, weak, strong
)
track_edge(image.shape, destination, weak, strong)
return destination
if __name__ == "__main__":
# read original image in gray mode
lena = cv2.imread(r"../image_data/lena.jpg", 0)
# canny edge detection
canny_destination = canny(lena)
cv2.imshow("canny", canny_destination)
cv2.waitKey(0)
================================================
FILE: digital_image_processing/filters/__init__.py
================================================
================================================
FILE: digital_image_processing/filters/bilateral_filter.py
================================================
"""
Implementation of Bilateral filter
Inputs:
img: A 2d image with values in between 0 and 1
varS: variance in space dimension.
varI: variance in Intensity.
N: Kernel size(Must be an odd number)
Output:
img:A 2d zero padded image with values in between 0 and 1
"""
import math
import sys
import cv2
import numpy as np
def vec_gaussian(img: np.ndarray, variance: float) -> np.ndarray:
# For applying gaussian function for each element in matrix.
sigma = math.sqrt(variance)
cons = 1 / (sigma * math.sqrt(2 * math.pi))
return cons * np.exp(-((img / sigma) ** 2) * 0.5)
def get_slice(img: np.ndarray, x: int, y: int, kernel_size: int) -> np.ndarray:
half = kernel_size // 2
return img[x - half : x + half + 1, y - half : y + half + 1]
def get_gauss_kernel(kernel_size: int, spatial_variance: float) -> np.ndarray:
# Creates a gaussian kernel of given dimension.
arr = np.zeros((kernel_size, kernel_size))
for i in range(kernel_size):
for j in range(kernel_size):
arr[i, j] = math.sqrt(
abs(i - kernel_size // 2) ** 2 + abs(j - kernel_size // 2) ** 2
)
return vec_gaussian(arr, spatial_variance)
def bilateral_filter(
img: np.ndarray,
spatial_variance: float,
intensity_variance: float,
kernel_size: int,
) -> np.ndarray:
img2 = np.zeros(img.shape)
gauss_ker = get_gauss_kernel(kernel_size, spatial_variance)
size_x, size_y = img.shape
for i in range(kernel_size // 2, size_x - kernel_size // 2):
for j in range(kernel_size // 2, size_y - kernel_size // 2):
img_s = get_slice(img, i, j, kernel_size)
img_i = img_s - img_s[kernel_size // 2, kernel_size // 2]
img_ig = vec_gaussian(img_i, intensity_variance)
weights = np.multiply(gauss_ker, img_ig)
vals = np.multiply(img_s, weights)
val = np.sum(vals) / np.sum(weights)
img2[i, j] = val
return img2
def parse_args(args: list) -> tuple:
filename = args[1] if args[1:] else "../image_data/lena.jpg"
spatial_variance = float(args[2]) if args[2:] else 1.0
intensity_variance = float(args[3]) if args[3:] else 1.0
if args[4:]:
kernel_size = int(args[4])
kernel_size = kernel_size + abs(kernel_size % 2 - 1)
else:
kernel_size = 5
return filename, spatial_variance, intensity_variance, kernel_size
if __name__ == "__main__":
filename, spatial_variance, intensity_variance, kernel_size = parse_args(sys.argv)
img = cv2.imread(filename, 0)
cv2.imshow("input image", img)
out = img / 255
out = out.astype("float32")
out = bilateral_filter(out, spatial_variance, intensity_variance, kernel_size)
out = out * 255
out = np.uint8(out)
cv2.imshow("output image", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
================================================
FILE: digital_image_processing/filters/convolve.py
================================================
# @Author : lightXu
# @File : convolve.py
# @Time : 2019/7/8 0008 下午 16:13
from cv2 import COLOR_BGR2GRAY, cvtColor, imread, imshow, waitKey
from numpy import array, dot, pad, ravel, uint8, zeros
def im2col(image, block_size):
rows, cols = image.shape
dst_height = cols - block_size[1] + 1
dst_width = rows - block_size[0] + 1
image_array = zeros((dst_height * dst_width, block_size[1] * block_size[0]))
row = 0
for i in range(dst_height):
for j in range(dst_width):
window = ravel(image[i : i + block_size[0], j : j + block_size[1]])
image_array[row, :] = window
row += 1
return image_array
def img_convolve(image, filter_kernel):
height, width = image.shape[0], image.shape[1]
k_size = filter_kernel.shape[0]
pad_size = k_size // 2
# Pads image with the edge values of array.
image_tmp = pad(image, pad_size, mode="edge")
# im2col, turn the k_size*k_size pixels into a row and np.vstack all rows
image_array = im2col(image_tmp, (k_size, k_size))
# turn the kernel into shape(k*k, 1)
kernel_array = ravel(filter_kernel)
# reshape and get the dst image
dst = dot(image_array, kernel_array).reshape(height, width)
return dst
if __name__ == "__main__":
# read original image
img = imread(r"../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
# Laplace operator
Laplace_kernel = array([[0, 1, 0], [1, -4, 1], [0, 1, 0]])
out = img_convolve(gray, Laplace_kernel).astype(uint8)
imshow("Laplacian", out)
waitKey(0)
================================================
FILE: digital_image_processing/filters/gabor_filter.py
================================================
# Implementation of the Gaborfilter
# https://en.wikipedia.org/wiki/Gabor_filter
import numpy as np
from cv2 import COLOR_BGR2GRAY, CV_8UC3, cvtColor, filter2D, imread, imshow, waitKey
def gabor_filter_kernel(
ksize: int, sigma: int, theta: int, lambd: int, gamma: int, psi: int
) -> np.ndarray:
"""
:param ksize: The kernelsize of the convolutional filter (ksize x ksize)
:param sigma: standard deviation of the gaussian bell curve
:param theta: The orientation of the normal to the parallel stripes
of Gabor function.
:param lambd: Wavelength of the sinusoidal component.
:param gamma: The spatial aspect ratio and specifies the ellipticity
of the support of Gabor function.
:param psi: The phase offset of the sinusoidal function.
>>> gabor_filter_kernel(3, 8, 0, 10, 0, 0).tolist()
[[0.8027212023735046, 1.0, 0.8027212023735046], [0.8027212023735046, 1.0, \
0.8027212023735046], [0.8027212023735046, 1.0, 0.8027212023735046]]
"""
# prepare kernel
# the kernel size have to be odd
if (ksize % 2) == 0:
ksize = ksize + 1
gabor = np.zeros((ksize, ksize), dtype=np.float32)
# each value
for y in range(ksize):
for x in range(ksize):
# distance from center
px = x - ksize // 2
py = y - ksize // 2
# degree to radiant
_theta = theta / 180 * np.pi
cos_theta = np.cos(_theta)
sin_theta = np.sin(_theta)
# get kernel x
_x = cos_theta * px + sin_theta * py
# get kernel y
_y = -sin_theta * px + cos_theta * py
# fill kernel
gabor[y, x] = np.exp(-(_x**2 + gamma**2 * _y**2) / (2 * sigma**2)) * np.cos(
2 * np.pi * _x / lambd + psi
)
return gabor
if __name__ == "__main__":
import doctest
doctest.testmod()
# read original image
img = imread("../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
# Apply multiple Kernel to detect edges
out = np.zeros(gray.shape[:2])
for theta in [0, 30, 60, 90, 120, 150]:
"""
ksize = 10
sigma = 8
lambd = 10
gamma = 0
psi = 0
"""
kernel_10 = gabor_filter_kernel(10, 8, theta, 10, 0, 0)
out += filter2D(gray, CV_8UC3, kernel_10)
out = out / out.max() * 255
out = out.astype(np.uint8)
imshow("Original", gray)
imshow("Gabor filter with 20x20 mask and 6 directions", out)
waitKey(0)
================================================
FILE: digital_image_processing/filters/gaussian_filter.py
================================================
"""
Implementation of gaussian filter algorithm
"""
from itertools import product
from cv2 import COLOR_BGR2GRAY, cvtColor, imread, imshow, waitKey
from numpy import dot, exp, mgrid, pi, ravel, square, uint8, zeros
def gen_gaussian_kernel(k_size, sigma):
center = k_size // 2
x, y = mgrid[0 - center : k_size - center, 0 - center : k_size - center]
g = 1 / (2 * pi * sigma) * exp(-(square(x) + square(y)) / (2 * square(sigma)))
return g
def gaussian_filter(image, k_size, sigma):
height, width = image.shape[0], image.shape[1]
# dst image height and width
dst_height = height - k_size + 1
dst_width = width - k_size + 1
# im2col, turn the k_size*k_size pixels into a row and np.vstack all rows
image_array = zeros((dst_height * dst_width, k_size * k_size))
for row, (i, j) in enumerate(product(range(dst_height), range(dst_width))):
window = ravel(image[i : i + k_size, j : j + k_size])
image_array[row, :] = window
# turn the kernel into shape(k*k, 1)
gaussian_kernel = gen_gaussian_kernel(k_size, sigma)
filter_array = ravel(gaussian_kernel)
# reshape and get the dst image
dst = dot(image_array, filter_array).reshape(dst_height, dst_width).astype(uint8)
return dst
if __name__ == "__main__":
# read original image
img = imread(r"../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
# get values with two different mask size
gaussian3x3 = gaussian_filter(gray, 3, sigma=1)
gaussian5x5 = gaussian_filter(gray, 5, sigma=0.8)
# show result images
imshow("gaussian filter with 3x3 mask", gaussian3x3)
imshow("gaussian filter with 5x5 mask", gaussian5x5)
waitKey()
================================================
FILE: digital_image_processing/filters/laplacian_filter.py
================================================
# @Author : ojas-wani
# @File : laplacian_filter.py
# @Date : 10/04/2023
import numpy as np
from cv2 import (
BORDER_DEFAULT,
COLOR_BGR2GRAY,
CV_64F,
cvtColor,
filter2D,
imread,
imshow,
waitKey,
)
from digital_image_processing.filters.gaussian_filter import gaussian_filter
def my_laplacian(src: np.ndarray, ksize: int) -> np.ndarray:
"""
:param src: the source image, which should be a grayscale or color image.
:param ksize: the size of the kernel used to compute the Laplacian filter,
which can be 1, 3, 5, or 7.
>>> my_laplacian(src=np.array([]), ksize=0)
Traceback (most recent call last):
...
ValueError: ksize must be in (1, 3, 5, 7)
"""
kernels = {
1: np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]),
3: np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]]),
5: np.array(
[
[0, 0, -1, 0, 0],
[0, -1, -2, -1, 0],
[-1, -2, 16, -2, -1],
[0, -1, -2, -1, 0],
[0, 0, -1, 0, 0],
]
),
7: np.array(
[
[0, 0, 0, -1, 0, 0, 0],
[0, 0, -2, -3, -2, 0, 0],
[0, -2, -7, -10, -7, -2, 0],
[-1, -3, -10, 68, -10, -3, -1],
[0, -2, -7, -10, -7, -2, 0],
[0, 0, -2, -3, -2, 0, 0],
[0, 0, 0, -1, 0, 0, 0],
]
),
}
if ksize not in kernels:
msg = f"ksize must be in {tuple(kernels)}"
raise ValueError(msg)
# Apply the Laplacian kernel using convolution
return filter2D(
src, CV_64F, kernels[ksize], 0, borderType=BORDER_DEFAULT, anchor=(0, 0)
)
if __name__ == "__main__":
# read original image
img = imread(r"../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
# Applying gaussian filter
blur_image = gaussian_filter(gray, 3, sigma=1)
# Apply multiple Kernel to detect edges
laplacian_image = my_laplacian(ksize=3, src=blur_image)
imshow("Original image", img)
imshow("Detected edges using laplacian filter", laplacian_image)
waitKey(0)
================================================
FILE: digital_image_processing/filters/local_binary_pattern.py
================================================
import cv2
import numpy as np
def get_neighbors_pixel(
image: np.ndarray, x_coordinate: int, y_coordinate: int, center: int
) -> int:
"""
Comparing local neighborhood pixel value with threshold value of centre pixel.
Exception is required when neighborhood value of a center pixel value is null.
i.e. values present at boundaries.
:param image: The image we're working with
:param x_coordinate: x-coordinate of the pixel
:param y_coordinate: The y coordinate of the pixel
:param center: center pixel value
:return: The value of the pixel is being returned.
"""
try:
return int(image[x_coordinate][y_coordinate] >= center)
except (IndexError, TypeError):
return 0
def local_binary_value(image: np.ndarray, x_coordinate: int, y_coordinate: int) -> int:
"""
It takes an image, an x and y coordinate, and returns the
decimal value of the local binary patternof the pixel
at that coordinate
:param image: the image to be processed
:param x_coordinate: x coordinate of the pixel
:param y_coordinate: the y coordinate of the pixel
:return: The decimal value of the binary value of the pixels
around the center pixel.
"""
center = image[x_coordinate][y_coordinate]
powers = [1, 2, 4, 8, 16, 32, 64, 128]
# skip get_neighbors_pixel if center is null
if center is None:
return 0
# Starting from the top right, assigning value to pixels clockwise
binary_values = [
get_neighbors_pixel(image, x_coordinate - 1, y_coordinate + 1, center),
get_neighbors_pixel(image, x_coordinate, y_coordinate + 1, center),
get_neighbors_pixel(image, x_coordinate - 1, y_coordinate, center),
get_neighbors_pixel(image, x_coordinate + 1, y_coordinate + 1, center),
get_neighbors_pixel(image, x_coordinate + 1, y_coordinate, center),
get_neighbors_pixel(image, x_coordinate + 1, y_coordinate - 1, center),
get_neighbors_pixel(image, x_coordinate, y_coordinate - 1, center),
get_neighbors_pixel(image, x_coordinate - 1, y_coordinate - 1, center),
]
# Converting the binary value to decimal.
return sum(
binary_value * power for binary_value, power in zip(binary_values, powers)
)
if __name__ == "__main__":
# Reading the image and converting it to grayscale.
image = cv2.imread(
"digital_image_processing/image_data/lena.jpg", cv2.IMREAD_GRAYSCALE
)
# Create a numpy array as the same height and width of read image
lbp_image = np.zeros((image.shape[0], image.shape[1]))
# Iterating through the image and calculating the
# local binary pattern value for each pixel.
for i in range(image.shape[0]):
for j in range(image.shape[1]):
lbp_image[i][j] = local_binary_value(image, i, j)
cv2.imshow("local binary pattern", lbp_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
================================================
FILE: digital_image_processing/filters/median_filter.py
================================================
"""
Implementation of median filter algorithm
"""
from cv2 import COLOR_BGR2GRAY, cvtColor, imread, imshow, waitKey
from numpy import divide, int8, multiply, ravel, sort, zeros_like
def median_filter(gray_img, mask=3):
"""
:param gray_img: gray image
:param mask: mask size
:return: image with median filter
"""
# set image borders
bd = int(mask / 2)
# copy image size
median_img = zeros_like(gray_img)
for i in range(bd, gray_img.shape[0] - bd):
for j in range(bd, gray_img.shape[1] - bd):
# get mask according with mask
kernel = ravel(gray_img[i - bd : i + bd + 1, j - bd : j + bd + 1])
# calculate mask median
median = sort(kernel)[int8(divide((multiply(mask, mask)), 2) + 1)]
median_img[i, j] = median
return median_img
if __name__ == "__main__":
# read original image
img = imread("../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
# get values with two different mask size
median3x3 = median_filter(gray, 3)
median5x5 = median_filter(gray, 5)
# show result images
imshow("median filter with 3x3 mask", median3x3)
imshow("median filter with 5x5 mask", median5x5)
waitKey(0)
================================================
FILE: digital_image_processing/filters/sobel_filter.py
================================================
# @Author : lightXu
# @File : sobel_filter.py
# @Time : 2019/7/8 0008 下午 16:26
import numpy as np
from cv2 import COLOR_BGR2GRAY, cvtColor, imread, imshow, waitKey
from digital_image_processing.filters.convolve import img_convolve
def sobel_filter(image):
kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
kernel_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
dst_x = np.abs(img_convolve(image, kernel_x))
dst_y = np.abs(img_convolve(image, kernel_y))
# modify the pix within [0, 255]
dst_x = dst_x * 255 / np.max(dst_x)
dst_y = dst_y * 255 / np.max(dst_y)
dst_xy = np.sqrt((np.square(dst_x)) + (np.square(dst_y)))
dst_xy = dst_xy * 255 / np.max(dst_xy)
dst = dst_xy.astype(np.uint8)
theta = np.arctan2(dst_y, dst_x)
return dst, theta
if __name__ == "__main__":
# read original image
img = imread("../image_data/lena.jpg")
# turn image in gray scale value
gray = cvtColor(img, COLOR_BGR2GRAY)
sobel_grad, sobel_theta = sobel_filter(gray)
# show result images
imshow("sobel filter", sobel_grad)
imshow("sobel theta", sobel_theta)
waitKey(0)
================================================
FILE: digital_image_processing/histogram_equalization/__init__.py
================================================
================================================
FILE: digital_image_processing/histogram_equalization/histogram_stretch.py
================================================
"""
Created on Fri Sep 28 15:22:29 2018
@author: Binish125
"""
import copy
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
class ConstantStretch:
def __init__(self):
self.img = ""
self.original_image = ""
self.last_list = []
self.rem = 0
self.L = 256
self.sk = 0
self.k = 0
self.number_of_rows = 0
self.number_of_cols = 0
def stretch(self, input_image):
self.img = cv2.imread(input_image, 0)
self.original_image = copy.deepcopy(self.img)
x, _, _ = plt.hist(self.img.ravel(), 256, [0, 256], label="x")
self.k = np.sum(x)
for i in range(len(x)):
prk = x[i] / self.k
self.sk += prk
last = (self.L - 1) * self.sk
if self.rem != 0:
self.rem = int(last % last)
last = int(last + 1 if self.rem >= 0.5 else last)
self.last_list.append(last)
self.number_of_rows = int(np.ma.count(self.img) / self.img[1].size)
self.number_of_cols = self.img[1].size
for i in range(self.number_of_cols):
for j in range(self.number_of_rows):
num = self.img[j][i]
if num != self.last_list[num]:
self.img[j][i] = self.last_list[num]
cv2.imwrite("output_data/output.jpg", self.img)
def plot_histogram(self):
plt.hist(self.img.ravel(), 256, [0, 256])
def show_image(self):
cv2.imshow("Output-Image", self.img)
cv2.imshow("Input-Image", self.original_image)
cv2.waitKey(5000)
cv2.destroyAllWindows()
if __name__ == "__main__":
file_path = os.path.join(os.path.basename(__file__), "image_data/input.jpg")
stretcher = ConstantStretch()
stretcher.stretch(file_path)
stretcher.plot_histogram()
stretcher.show_image()
================================================
FILE: digital_image_processing/histogram_equalization/image_data/__init__.py
================================================
================================================
FILE: digital_image_processing/histogram_equalization/output_data/__init__.py
================================================
================================================
FILE: digital_image_processing/image_data/__init__.py
================================================
================================================
FILE: digital_image_processing/index_calculation.py
================================================
# Author: João Gustavo A. Amorim
# Author email: joaogustavoamorim@gmail.com
# Coding date: jan 2019
# python/black: True
# Imports
import numpy as np
# Class implemented to calculus the index
class IndexCalculation:
"""
# Class Summary
This algorithm consists in calculating vegetation indices, these
indices can be used for precision agriculture for example (or remote
sensing). There are functions to define the data and to calculate the
implemented indices.
# Vegetation index
https://en.wikipedia.org/wiki/Vegetation_Index
A Vegetation Index (VI) is a spectral transformation of two or more bands
designed to enhance the contribution of vegetation properties and allow
reliable spatial and temporal inter-comparisons of terrestrial
photosynthetic activity and canopy structural variations
# Information about channels (Wavelength range for each)
* nir - near-infrared
https://www.malvernpanalytical.com/br/products/technology/near-infrared-spectroscopy
Wavelength Range 700 nm to 2500 nm
* Red Edge
https://en.wikipedia.org/wiki/Red_edge
Wavelength Range 680 nm to 730 nm
* red
https://en.wikipedia.org/wiki/Color
Wavelength Range 635 nm to 700 nm
* blue
https://en.wikipedia.org/wiki/Color
Wavelength Range 450 nm to 490 nm
* green
https://en.wikipedia.org/wiki/Color
Wavelength Range 520 nm to 560 nm
# Implemented index list
#"abbreviationOfIndexName" -- list of channels used
#"ARVI2" -- red, nir
#"CCCI" -- red, redEdge, nir
#"CVI" -- red, green, nir
#"GLI" -- red, green, blue
#"NDVI" -- red, nir
#"BNDVI" -- blue, nir
#"redEdgeNDVI" -- red, redEdge
#"GNDVI" -- green, nir
#"GBNDVI" -- green, blue, nir
#"GRNDVI" -- red, green, nir
#"RBNDVI" -- red, blue, nir
#"PNDVI" -- red, green, blue, nir
#"ATSAVI" -- red, nir
#"BWDRVI" -- blue, nir
#"CIgreen" -- green, nir
#"CIrededge" -- redEdge, nir
#"CI" -- red, blue
#"CTVI" -- red, nir
#"GDVI" -- green, nir
#"EVI" -- red, blue, nir
#"GEMI" -- red, nir
#"GOSAVI" -- green, nir
#"GSAVI" -- green, nir
#"Hue" -- red, green, blue
#"IVI" -- red, nir
#"IPVI" -- red, nir
#"I" -- red, green, blue
#"RVI" -- red, nir
#"MRVI" -- red, nir
#"MSAVI" -- red, nir
#"NormG" -- red, green, nir
#"NormNIR" -- red, green, nir
#"NormR" -- red, green, nir
#"NGRDI" -- red, green
#"RI" -- red, green
#"S" -- red, green, blue
#"IF" -- red, green, blue
#"DVI" -- red, nir
#"TVI" -- red, nir
#"NDRE" -- redEdge, nir
#list of all index implemented
#allIndex = ["ARVI2", "CCCI", "CVI", "GLI", "NDVI", "BNDVI", "redEdgeNDVI",
"GNDVI", "GBNDVI", "GRNDVI", "RBNDVI", "PNDVI", "ATSAVI",
"BWDRVI", "CIgreen", "CIrededge", "CI", "CTVI", "GDVI", "EVI",
"GEMI", "GOSAVI", "GSAVI", "Hue", "IVI", "IPVI", "I", "RVI",
"MRVI", "MSAVI", "NormG", "NormNIR", "NormR", "NGRDI", "RI",
"S", "IF", "DVI", "TVI", "NDRE"]
#list of index with not blue channel
#notBlueIndex = ["ARVI2", "CCCI", "CVI", "NDVI", "redEdgeNDVI", "GNDVI",
"GRNDVI", "ATSAVI", "CIgreen", "CIrededge", "CTVI", "GDVI",
"GEMI", "GOSAVI", "GSAVI", "IVI", "IPVI", "RVI", "MRVI",
"MSAVI", "NormG", "NormNIR", "NormR", "NGRDI", "RI", "DVI",
"TVI", "NDRE"]
#list of index just with RGB channels
#RGBIndex = ["GLI", "CI", "Hue", "I", "NGRDI", "RI", "S", "IF"]
"""
def __init__(self, red=None, green=None, blue=None, red_edge=None, nir=None):
self.set_matricies(red=red, green=green, blue=blue, red_edge=red_edge, nir=nir)
def set_matricies(self, red=None, green=None, blue=None, red_edge=None, nir=None):
if red is not None:
self.red = red
if green is not None:
self.green = green
if blue is not None:
self.blue = blue
if red_edge is not None:
self.redEdge = red_edge
if nir is not None:
self.nir = nir
return True
def calculation(
self, index="", red=None, green=None, blue=None, red_edge=None, nir=None
):
"""
performs the calculation of the index with the values instantiated in the class
:str index: abbreviation of index name to perform
"""
self.set_matricies(red=red, green=green, blue=blue, red_edge=red_edge, nir=nir)
funcs = {
"ARVI2": self.arv12,
"CCCI": self.ccci,
"CVI": self.cvi,
"GLI": self.gli,
"NDVI": self.ndvi,
"BNDVI": self.bndvi,
"redEdgeNDVI": self.red_edge_ndvi,
"GNDVI": self.gndvi,
"GBNDVI": self.gbndvi,
"GRNDVI": self.grndvi,
"RBNDVI": self.rbndvi,
"PNDVI": self.pndvi,
"ATSAVI": self.atsavi,
"BWDRVI": self.bwdrvi,
"CIgreen": self.ci_green,
"CIrededge": self.ci_rededge,
"CI": self.ci,
"CTVI": self.ctvi,
"GDVI": self.gdvi,
"EVI": self.evi,
"GEMI": self.gemi,
"GOSAVI": self.gosavi,
"GSAVI": self.gsavi,
"Hue": self.hue,
"IVI": self.ivi,
"IPVI": self.ipvi,
"I": self.i,
"RVI": self.rvi,
"MRVI": self.mrvi,
"MSAVI": self.m_savi,
"NormG": self.norm_g,
"NormNIR": self.norm_nir,
"NormR": self.norm_r,
"NGRDI": self.ngrdi,
"RI": self.ri,
"S": self.s,
"IF": self._if,
"DVI": self.dvi,
"TVI": self.tvi,
"NDRE": self.ndre,
}
try:
return funcs[index]()
except KeyError:
print("Index not in the list!")
return False
def arv12(self):
"""
Atmospherically Resistant Vegetation Index 2
https://www.indexdatabase.de/db/i-single.php?id=396
:return: index
-0.18+1.17*(self.nir-self.red)/(self.nir+self.red)
"""
return -0.18 + (1.17 * ((self.nir - self.red) / (self.nir + self.red)))
def ccci(self):
"""
Canopy Chlorophyll Content Index
https://www.indexdatabase.de/db/i-single.php?id=224
:return: index
"""
return ((self.nir - self.redEdge) / (self.nir + self.redEdge)) / (
(self.nir - self.red) / (self.nir + self.red)
)
def cvi(self):
"""
Chlorophyll vegetation index
https://www.indexdatabase.de/db/i-single.php?id=391
:return: index
"""
return self.nir * (self.red / (self.green**2))
def gli(self):
"""
self.green leaf index
https://www.indexdatabase.de/db/i-single.php?id=375
:return: index
"""
return (2 * self.green - self.red - self.blue) / (
2 * self.green + self.red + self.blue
)
def ndvi(self):
"""
Normalized Difference self.nir/self.red Normalized Difference Vegetation
Index, Calibrated NDVI - CDVI
https://www.indexdatabase.de/db/i-single.php?id=58
:return: index
"""
return (self.nir - self.red) / (self.nir + self.red)
def bndvi(self):
"""
Normalized Difference self.nir/self.blue self.blue-normalized difference
vegetation index
https://www.indexdatabase.de/db/i-single.php?id=135
:return: index
"""
return (self.nir - self.blue) / (self.nir + self.blue)
def red_edge_ndvi(self):
"""
Normalized Difference self.rededge/self.red
https://www.indexdatabase.de/db/i-single.php?id=235
:return: index
"""
return (self.redEdge - self.red) / (self.redEdge + self.red)
def gndvi(self):
"""
Normalized Difference self.nir/self.green self.green NDVI
https://www.indexdatabase.de/db/i-single.php?id=401
:return: index
"""
return (self.nir - self.green) / (self.nir + self.green)
def gbndvi(self):
"""
self.green-self.blue NDVI
https://www.indexdatabase.de/db/i-single.php?id=186
:return: index
"""
return (self.nir - (self.green + self.blue)) / (
self.nir + (self.green + self.blue)
)
def grndvi(self):
"""
self.green-self.red NDVI
https://www.indexdatabase.de/db/i-single.php?id=185
:return: index
"""
return (self.nir - (self.green + self.red)) / (
self.nir + (self.green + self.red)
)
def rbndvi(self):
"""
self.red-self.blue NDVI
https://www.indexdatabase.de/db/i-single.php?id=187
:return: index
"""
return (self.nir - (self.blue + self.red)) / (self.nir + (self.blue + self.red))
def pndvi(self):
"""
Pan NDVI
https://www.indexdatabase.de/db/i-single.php?id=188
:return: index
"""
return (self.nir - (self.green + self.red + self.blue)) / (
self.nir + (self.green + self.red + self.blue)
)
def atsavi(self, x=0.08, a=1.22, b=0.03):
"""
Adjusted transformed soil-adjusted VI
https://www.indexdatabase.de/db/i-single.php?id=209
:return: index
"""
return a * (
(self.nir - a * self.red - b)
/ (a * self.nir + self.red - a * b + x * (1 + a**2))
)
def bwdrvi(self):
"""
self.blue-wide dynamic range vegetation index
https://www.indexdatabase.de/db/i-single.php?id=136
:return: index
"""
return (0.1 * self.nir - self.blue) / (0.1 * self.nir + self.blue)
def ci_green(self):
"""
Chlorophyll Index self.green
https://www.indexdatabase.de/db/i-single.php?id=128
:return: index
"""
return (self.nir / self.green) - 1
def ci_rededge(self):
"""
Chlorophyll Index self.redEdge
https://www.indexdatabase.de/db/i-single.php?id=131
:return: index
"""
return (self.nir / self.redEdge) - 1
def ci(self):
"""
Coloration Index
https://www.indexdatabase.de/db/i-single.php?id=11
:return: index
"""
return (self.red - self.blue) / self.red
def ctvi(self):
"""
Corrected Transformed Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=244
:return: index
"""
ndvi = self.ndvi()
return ((ndvi + 0.5) / (abs(ndvi + 0.5))) * (abs(ndvi + 0.5) ** (1 / 2))
def gdvi(self):
"""
Difference self.nir/self.green self.green Difference Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=27
:return: index
"""
return self.nir - self.green
def evi(self):
"""
Enhanced Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=16
:return: index
"""
return 2.5 * (
(self.nir - self.red) / (self.nir + 6 * self.red - 7.5 * self.blue + 1)
)
def gemi(self):
"""
Global Environment Monitoring Index
https://www.indexdatabase.de/db/i-single.php?id=25
:return: index
"""
n = (2 * (self.nir**2 - self.red**2) + 1.5 * self.nir + 0.5 * self.red) / (
self.nir + self.red + 0.5
)
return n * (1 - 0.25 * n) - (self.red - 0.125) / (1 - self.red)
def gosavi(self, y=0.16):
"""
self.green Optimized Soil Adjusted Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=29
mit Y = 0,16
:return: index
"""
return (self.nir - self.green) / (self.nir + self.green + y)
def gsavi(self, n=0.5):
"""
self.green Soil Adjusted Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=31
mit N = 0,5
:return: index
"""
return ((self.nir - self.green) / (self.nir + self.green + n)) * (1 + n)
def hue(self):
"""
Hue
https://www.indexdatabase.de/db/i-single.php?id=34
:return: index
"""
return np.arctan(
((2 * self.red - self.green - self.blue) / 30.5) * (self.green - self.blue)
)
def ivi(self, a=None, b=None):
"""
Ideal vegetation index
https://www.indexdatabase.de/db/i-single.php?id=276
b=intercept of vegetation line
a=soil line slope
:return: index
"""
return (self.nir - b) / (a * self.red)
def ipvi(self):
"""
Infraself.red percentage vegetation index
https://www.indexdatabase.de/db/i-single.php?id=35
:return: index
"""
return (self.nir / ((self.nir + self.red) / 2)) * (self.ndvi() + 1)
def i(self):
"""
Intensity
https://www.indexdatabase.de/db/i-single.php?id=36
:return: index
"""
return (self.red + self.green + self.blue) / 30.5
def rvi(self):
"""
Ratio-Vegetation-Index
http://www.seos-project.eu/modules/remotesensing/remotesensing-c03-s01-p01.html
:return: index
"""
return self.nir / self.red
def mrvi(self):
"""
Modified Normalized Difference Vegetation Index RVI
https://www.indexdatabase.de/db/i-single.php?id=275
:return: index
"""
return (self.rvi() - 1) / (self.rvi() + 1)
def m_savi(self):
"""
Modified Soil Adjusted Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=44
:return: index
"""
return (
(2 * self.nir + 1)
- ((2 * self.nir + 1) ** 2 - 8 * (self.nir - self.red)) ** (1 / 2)
) / 2
def norm_g(self):
"""
Norm G
https://www.indexdatabase.de/db/i-single.php?id=50
:return: index
"""
return self.green / (self.nir + self.red + self.green)
def norm_nir(self):
"""
Norm self.nir
https://www.indexdatabase.de/db/i-single.php?id=51
:return: index
"""
return self.nir / (self.nir + self.red + self.green)
def norm_r(self):
"""
Norm R
https://www.indexdatabase.de/db/i-single.php?id=52
:return: index
"""
return self.red / (self.nir + self.red + self.green)
def ngrdi(self):
"""
Normalized Difference self.green/self.red Normalized self.green self.red
difference index, Visible Atmospherically Resistant Indices self.green
(VIself.green)
https://www.indexdatabase.de/db/i-single.php?id=390
:return: index
"""
return (self.green - self.red) / (self.green + self.red)
def ri(self):
"""
Normalized Difference self.red/self.green self.redness Index
https://www.indexdatabase.de/db/i-single.php?id=74
:return: index
"""
return (self.red - self.green) / (self.red + self.green)
def s(self):
"""
Saturation
https://www.indexdatabase.de/db/i-single.php?id=77
:return: index
"""
max_value = np.max([np.max(self.red), np.max(self.green), np.max(self.blue)])
min_value = np.min([np.min(self.red), np.min(self.green), np.min(self.blue)])
return (max_value - min_value) / max_value
def _if(self):
"""
Shape Index
https://www.indexdatabase.de/db/i-single.php?id=79
:return: index
"""
return (2 * self.red - self.green - self.blue) / (self.green - self.blue)
def dvi(self):
"""
Simple Ratio self.nir/self.red Difference Vegetation Index, Vegetation Index
Number (VIN)
https://www.indexdatabase.de/db/i-single.php?id=12
:return: index
"""
return self.nir / self.red
def tvi(self):
"""
Transformed Vegetation Index
https://www.indexdatabase.de/db/i-single.php?id=98
:return: index
"""
return (self.ndvi() + 0.5) ** (1 / 2)
def ndre(self):
return (self.nir - self.redEdge) / (self.nir + self.redEdge)
"""
# genering a random matrices to test this class
red = np.ones((1000,1000, 1),dtype="float64") * 46787
green = np.ones((1000,1000, 1),dtype="float64") * 23487
blue = np.ones((1000,1000, 1),dtype="float64") * 14578
redEdge = np.ones((1000,1000, 1),dtype="float64") * 51045
nir = np.ones((1000,1000, 1),dtype="float64") * 52200
# Examples of how to use the class
# instantiating the class
cl = IndexCalculation()
# instantiating the class with the values
#cl = indexCalculation(red=red, green=green, blue=blue, redEdge=redEdge, nir=nir)
# how set the values after instantiate the class cl, (for update the data or when don't
# instantiating the class with the values)
cl.setMatrices(red=red, green=green, blue=blue, redEdge=redEdge, nir=nir)
# calculating the indices for the instantiated values in the class
# Note: the CCCI index can be changed to any index implemented in the class.
indexValue_form1 = cl.calculation("CCCI", red=red, green=green, blue=blue,
redEdge=redEdge, nir=nir).astype(np.float64)
indexValue_form2 = cl.CCCI()
# calculating the index with the values directly -- you can set just the values
# preferred note: the *calculation* function performs the function *setMatrices*
indexValue_form3 = cl.calculation("CCCI", red=red, green=green, blue=blue,
redEdge=redEdge, nir=nir).astype(np.float64)
print("Form 1: "+np.array2string(indexValue_form1, precision=20, separator=', ',
floatmode='maxprec_equal'))
print("Form 2: "+np.array2string(indexValue_form2, precision=20, separator=', ',
floatmode='maxprec_equal'))
print("Form 3: "+np.array2string(indexValue_form3, precision=20, separator=', ',
floatmode='maxprec_equal'))
# A list of examples results for different type of data at NDVI
# float16 -> 0.31567383 #NDVI (red = 50, nir = 100)
# float32 -> 0.31578946 #NDVI (red = 50, nir = 100)
# float64 -> 0.3157894736842105 #NDVI (red = 50, nir = 100)
# longdouble -> 0.3157894736842105 #NDVI (red = 50, nir = 100)
"""
================================================
FILE: digital_image_processing/morphological_operations/__init__.py
================================================
================================================
FILE: digital_image_processing/morphological_operations/dilation_operation.py
================================================
from pathlib import Path
import numpy as np
from PIL import Image
def rgb_to_gray(rgb: np.ndarray) -> np.ndarray:
"""
Return gray image from rgb image
>>> rgb_to_gray(np.array([[[127, 255, 0]]]))
array([[187.6453]])
>>> rgb_to_gray(np.array([[[0, 0, 0]]]))
array([[0.]])
>>> rgb_to_gray(np.array([[[2, 4, 1]]]))
array([[3.0598]])
>>> rgb_to_gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
array([[159.0524, 90.0635, 117.6989]])
"""
r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
return 0.2989 * r + 0.5870 * g + 0.1140 * b
def gray_to_binary(gray: np.ndarray) -> np.ndarray:
"""
Return binary image from gray image
>>> gray_to_binary(np.array([[127, 255, 0]]))
array([[False, True, False]])
>>> gray_to_binary(np.array([[0]]))
array([[False]])
>>> gray_to_binary(np.array([[26.2409, 4.9315, 1.4729]]))
array([[False, False, False]])
>>> gray_to_binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
array([[False, True, False],
[False, True, False],
[False, True, False]])
"""
return (gray > 127) & (gray <= 255)
def dilation(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
"""
Return dilated image
>>> dilation(np.array([[True, False, True]]), np.array([[0, 1, 0]]))
array([[False, False, False]])
>>> dilation(np.array([[False, False, True]]), np.array([[1, 0, 1]]))
array([[False, False, False]])
"""
output = np.zeros_like(image)
image_padded = np.zeros(
(image.shape[0] + kernel.shape[0] - 1, image.shape[1] + kernel.shape[1] - 1)
)
# Copy image to padded image
image_padded[kernel.shape[0] - 2 : -1 :, kernel.shape[1] - 2 : -1 :] = image
# Iterate over image & apply kernel
for x in range(image.shape[1]):
for y in range(image.shape[0]):
summation = (
kernel * image_padded[y : y + kernel.shape[0], x : x + kernel.shape[1]]
).sum()
output[y, x] = int(summation > 0)
return output
if __name__ == "__main__":
# read original image
lena_path = Path(__file__).resolve().parent / "image_data" / "lena.jpg"
lena = np.array(Image.open(lena_path))
# kernel to be applied
structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
output = dilation(gray_to_binary(rgb_to_gray(lena)), structuring_element)
# Save the output image
pil_img = Image.fromarray(output).convert("RGB")
pil_img.save("result_dilation.png")
================================================
FILE: digital_image_processing/morphological_operations/erosion_operation.py
================================================
from pathlib import Path
import numpy as np
from PIL import Image
def rgb_to_gray(rgb: np.ndarray) -> np.ndarray:
"""
Return gray image from rgb image
>>> rgb_to_gray(np.array([[[127, 255, 0]]]))
array([[187.6453]])
>>> rgb_to_gray(np.array([[[0, 0, 0]]]))
array([[0.]])
>>> rgb_to_gray(np.array([[[2, 4, 1]]]))
array([[3.0598]])
>>> rgb_to_gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
array([[159.0524, 90.0635, 117.6989]])
"""
r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
return 0.2989 * r + 0.5870 * g + 0.1140 * b
def gray_to_binary(gray: np.ndarray) -> np.ndarray:
"""
Return binary image from gray image
>>> gray_to_binary(np.array([[127, 255, 0]]))
array([[False, True, False]])
>>> gray_to_binary(np.array([[0]]))
array([[False]])
>>> gray_to_binary(np.array([[26.2409, 4.9315, 1.4729]]))
array([[False, False, False]])
>>> gray_to_binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
array([[False, True, False],
[False, True, False],
[False, True, False]])
"""
return (gray > 127) & (gray <= 255)
def erosion(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
"""
Return eroded image
>>> erosion(np.array([[True, True, False]]), np.array([[0, 1, 0]]))
array([[False, False, False]])
>>> erosion(np.array([[True, False, False]]), np.array([[1, 1, 0]]))
array([[False, False, False]])
"""
output = np.zeros_like(image)
image_padded = np.zeros(
(image.shape[0] + kernel.shape[0] - 1, image.shape[1] + kernel.shape[1] - 1)
)
# Copy image to padded image
image_padded[kernel.shape[0] - 2 : -1 :, kernel.shape[1] - 2 : -1 :] = image
# Iterate over image & apply kernel
for x in range(image.shape[1]):
for y in range(image.shape[0]):
summation = (
kernel * image_padded[y : y + kernel.shape[0], x : x + kernel.shape[1]]
).sum()
output[y, x] = int(summation == 5)
return output
if __name__ == "__main__":
# read original image
lena_path = Path(__file__).resolve().parent / "image_data" / "lena.jpg"
lena = np.array(Image.open(lena_path))
# kernel to be applied
structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
# Apply erosion operation to a binary image
output = erosion(gray_to_binary(rgb_to_gray(lena)), structuring_element)
# Save the output image
pil_img = Image.fromarray(output).convert("RGB")
pil_img.save("result_erosion.png")
================================================
FILE: digital_image_processing/resize/__init__.py
================================================
================================================
FILE: digital_image_processing/resize/resize.py
================================================
"""Multiple image resizing techniques"""
import numpy as np
from cv2 import destroyAllWindows, imread, imshow, waitKey
class NearestNeighbour:
"""
Simplest and fastest version of image resizing.
Source: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
"""
def __init__(self, img, dst_width: int, dst_height: int):
if dst_width < 0 or dst_height < 0:
raise ValueError("Destination width/height should be > 0")
self.img = img
self.src_w = img.shape[1]
self.src_h = img.shape[0]
self.dst_w = dst_width
self.dst_h = dst_height
self.ratio_x = self.src_w / self.dst_w
self.ratio_y = self.src_h / self.dst_h
self.output = self.output_img = (
np.ones((self.dst_h, self.dst_w, 3), np.uint8) * 255
)
def process(self):
for i in range(self.dst_h):
for j in range(self.dst_w):
self.output[i][j] = self.img[self.get_y(i)][self.get_x(j)]
def get_x(self, x: int) -> int:
"""
Get parent X coordinate for destination X
:param x: Destination X coordinate
:return: Parent X coordinate based on `x ratio`
>>> nn = NearestNeighbour(imread("digital_image_processing/image_data/lena.jpg",
... 1), 100, 100)
>>> nn.ratio_x = 0.5
>>> nn.get_x(4)
2
"""
return int(self.ratio_x * x)
def get_y(self, y: int) -> int:
"""
Get parent Y coordinate for destination Y
:param y: Destination X coordinate
:return: Parent X coordinate based on `y ratio`
>>> nn = NearestNeighbour(imread("digital_image_processing/image_data/lena.jpg",
... 1), 100, 100)
>>> nn.ratio_y = 0.5
>>> nn.get_y(4)
2
"""
return int(self.ratio_y * y)
if __name__ == "__main__":
dst_w, dst_h = 800, 600
im = imread("image_data/lena.jpg", 1)
n = NearestNeighbour(im, dst_w, dst_h)
n.process()
imshow(
f"Image resized from: {im.shape[1]}x{im.shape[0]} to {dst_w}x{dst_h}", n.output
)
waitKey(0)
destroyAllWindows()
================================================
FILE: digital_image_processing/rotation/__init__.py
================================================
================================================
FILE: digital_image_processing/rotation/rotation.py
================================================
from pathlib import Path
import cv2
import numpy as np
from matplotlib import pyplot as plt
def get_rotation(
img: np.ndarray, pt1: np.ndarray, pt2: np.ndarray, rows: int, cols: int
) -> np.ndarray:
"""
Get image rotation
:param img: np.ndarray
:param pt1: 3x2 list
:param pt2: 3x2 list
:param rows: columns image shape
:param cols: rows image shape
:return: np.ndarray
"""
matrix = cv2.getAffineTransform(pt1, pt2)
return cv2.warpAffine(img, matrix, (rows, cols))
if __name__ == "__main__":
# read original image
image = cv2.imread(
str(Path(__file__).resolve().parent.parent / "image_data" / "lena.jpg")
)
# turn image in gray scale value
gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# get image shape
img_rows, img_cols = gray_img.shape
# set different points to rotate image
pts1 = np.array([[50, 50], [200, 50], [50, 200]], np.float32)
pts2 = np.array([[10, 100], [200, 50], [100, 250]], np.float32)
pts3 = np.array([[50, 50], [150, 50], [120, 200]], np.float32)
pts4 = np.array([[10, 100], [80, 50], [180, 250]], np.float32)
# add all rotated images in a list
images = [
gray_img,
get_rotation(gray_img, pts1, pts2, img_rows, img_cols),
get_rotation(gray_img, pts2, pts3, img_rows, img_cols),
get_rotation(gray_img, pts2, pts4, img_rows, img_cols),
]
# plot different image rotations
fig = plt.figure(1)
titles = ["Original", "Rotation 1", "Rotation 2", "Rotation 3"]
for i, image in enumerate(images):
plt.subplot(2, 2, i + 1), plt.imshow(image, "gray")
plt.title(titles[i])
plt.axis("off")
plt.subplots_adjust(left=0.0, bottom=0.05, right=1.0, top=0.95)
plt.show()
================================================
FILE: digital_image_processing/sepia.py
================================================
"""
Implemented an algorithm using opencv to tone an image with sepia technique
"""
from cv2 import destroyAllWindows, imread, imshow, waitKey
def make_sepia(img, factor: int):
"""
Function create sepia tone.
Source: https://en.wikipedia.org/wiki/Sepia_(color)
"""
pixel_h, pixel_v = img.shape[0], img.shape[1]
def to_grayscale(blue, green, red):
"""
Helper function to create pixel's greyscale representation
Src: https://pl.wikipedia.org/wiki/YUV
"""
return 0.2126 * red + 0.587 * green + 0.114 * blue
def normalize(value):
"""Helper function to normalize R/G/B value -> return 255 if value > 255"""
return min(value, 255)
for i in range(pixel_h):
for j in range(pixel_v):
greyscale = int(to_grayscale(*img[i][j]))
img[i][j] = [
normalize(greyscale),
normalize(greyscale + factor),
normalize(greyscale + 2 * factor),
]
return img
if __name__ == "__main__":
# read original image
images = {
percentage: imread("image_data/lena.jpg", 1) for percentage in (10, 20, 30, 40)
}
for percentage, img in images.items():
make_sepia(img, percentage)
for percentage, img in images.items():
imshow(f"Original image with sepia (factor: {percentage})", img)
waitKey(0)
destroyAllWindows()
================================================
FILE: digital_image_processing/test_digital_image_processing.py
================================================
"""
PyTest's for Digital Image Processing
"""
import numpy as np
from cv2 import COLOR_BGR2GRAY, cvtColor, imread
from numpy import array, uint8
from PIL import Image
from digital_image_processing import change_contrast as cc
from digital_image_processing import convert_to_negative as cn
from digital_image_processing import sepia as sp
from digital_image_processing.dithering import burkes as bs
from digital_image_processing.edge_detection import canny
from digital_image_processing.filters import convolve as conv
from digital_image_processing.filters import gaussian_filter as gg
from digital_image_processing.filters import local_binary_pattern as lbp
from digital_image_processing.filters import median_filter as med
from digital_image_processing.filters import sobel_filter as sob
from digital_image_processing.resize import resize as rs
img = imread(r"digital_image_processing/image_data/lena_small.jpg")
gray = cvtColor(img, COLOR_BGR2GRAY)
# Test: convert_to_negative()
def test_convert_to_negative():
negative_img = cn.convert_to_negative(img)
# assert negative_img array for at least one True
assert negative_img.any()
# Test: change_contrast()
def test_change_contrast():
with Image.open("digital_image_processing/image_data/lena_small.jpg") as img:
# Work around assertion for response
assert str(cc.change_contrast(img, 110)).startswith(
" Divide and conquer
The points are sorted based on Xco-ords and
then based on Yco-ords separately.
And by applying divide and conquer approach,
minimum distance is obtained recursively.
>> Closest points can lie on different sides of partition.
This case handled by forming a strip of points
whose Xco-ords distance is less than closest_pair_dis
from mid-point's Xco-ords. Points sorted based on Yco-ords
are used in this step to reduce sorting time.
Closest pair distance is found in the strip of points. (closest_in_strip)
min(closest_pair_dis, closest_in_strip) would be the final answer.
Time complexity: O(n * log n)
"""
def euclidean_distance_sqr(point1, point2):
"""
>>> euclidean_distance_sqr([1,2],[2,4])
5
"""
return (point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
def column_based_sort(array, column=0):
"""
>>> column_based_sort([(5, 1), (4, 2), (3, 0)], 1)
[(3, 0), (5, 1), (4, 2)]
"""
return sorted(array, key=lambda x: x[column])
def dis_between_closest_pair(points, points_counts, min_dis=float("inf")):
"""
brute force approach to find distance between closest pair points
Parameters :
points, points_count, min_dis (list(tuple(int, int)), int, int)
Returns :
min_dis (float): distance between closest pair of points
>>> dis_between_closest_pair([[1,2],[2,4],[5,7],[8,9],[11,0]],5)
5
"""
for i in range(points_counts - 1):
for j in range(i + 1, points_counts):
current_dis = euclidean_distance_sqr(points[i], points[j])
min_dis = min(min_dis, current_dis)
return min_dis
def dis_between_closest_in_strip(points, points_counts, min_dis=float("inf")):
"""
closest pair of points in strip
Parameters :
points, points_count, min_dis (list(tuple(int, int)), int, int)
Returns :
min_dis (float): distance btw closest pair of points in the strip (< min_dis)
>>> dis_between_closest_in_strip([[1,2],[2,4],[5,7],[8,9],[11,0]],5)
85
"""
for i in range(min(6, points_counts - 1), points_counts):
for j in range(max(0, i - 6), i):
current_dis = euclidean_distance_sqr(points[i], points[j])
min_dis = min(min_dis, current_dis)
return min_dis
def closest_pair_of_points_sqr(points_sorted_on_x, points_sorted_on_y, points_counts):
"""divide and conquer approach
Parameters :
points, points_count (list(tuple(int, int)), int)
Returns :
(float): distance btw closest pair of points
>>> closest_pair_of_points_sqr([(1, 2), (3, 4)], [(5, 6), (7, 8)], 2)
8
"""
# base case
if points_counts <= 3:
return dis_between_closest_pair(points_sorted_on_x, points_counts)
# recursion
mid = points_counts // 2
closest_in_left = closest_pair_of_points_sqr(
points_sorted_on_x, points_sorted_on_y[:mid], mid
)
closest_in_right = closest_pair_of_points_sqr(
points_sorted_on_y, points_sorted_on_y[mid:], points_counts - mid
)
closest_pair_dis = min(closest_in_left, closest_in_right)
"""
cross_strip contains the points, whose Xcoords are at a
distance(< closest_pair_dis) from mid's Xcoord
"""
cross_strip = []
for point in points_sorted_on_x:
if abs(point[0] - points_sorted_on_x[mid][0]) < closest_pair_dis:
cross_strip.append(point)
closest_in_strip = dis_between_closest_in_strip(
cross_strip, len(cross_strip), closest_pair_dis
)
return min(closest_pair_dis, closest_in_strip)
def closest_pair_of_points(points, points_counts):
"""
>>> closest_pair_of_points([(2, 3), (12, 30)], len([(2, 3), (12, 30)]))
28.792360097775937
"""
points_sorted_on_x = column_based_sort(points, column=0)
points_sorted_on_y = column_based_sort(points, column=1)
return (
closest_pair_of_points_sqr(
points_sorted_on_x, points_sorted_on_y, points_counts
)
) ** 0.5
if __name__ == "__main__":
points = [(2, 3), (12, 30), (40, 50), (5, 1), (12, 10), (3, 4)]
print("Distance:", closest_pair_of_points(points, len(points)))
================================================
FILE: divide_and_conquer/convex_hull.py
================================================
"""
The convex hull problem is problem of finding all the vertices of convex polygon, P of
a set of points in a plane such that all the points are either on the vertices of P or
inside P. TH convex hull problem has several applications in geometrical problems,
computer graphics and game development.
Two algorithms have been implemented for the convex hull problem here.
1. A brute-force algorithm which runs in O(n^3)
2. A divide-and-conquer algorithm which runs in O(n log(n))
There are other several other algorithms for the convex hull problem
which have not been implemented here, yet.
"""
from __future__ import annotations
from collections.abc import Iterable
class Point:
"""
Defines a 2-d point for use by all convex-hull algorithms.
Parameters
----------
x: an int or a float, the x-coordinate of the 2-d point
y: an int or a float, the y-coordinate of the 2-d point
Examples
--------
>>> Point(1, 2)
(1.0, 2.0)
>>> Point("1", "2")
(1.0, 2.0)
>>> Point(1, 2) > Point(0, 1)
True
>>> Point(1, 1) == Point(1, 1)
True
>>> Point(-0.5, 1) == Point(0.5, 1)
False
>>> Point("pi", "e")
Traceback (most recent call last):
...
ValueError: could not convert string to float: 'pi'
"""
def __init__(self, x, y):
self.x, self.y = float(x), float(y)
def __eq__(self, other):
return self.x == other.x and self.y == other.y
def __ne__(self, other):
return not self == other
def __gt__(self, other):
if self.x > other.x:
return True
elif self.x == other.x:
return self.y > other.y
return False
def __lt__(self, other):
return not self > other
def __ge__(self, other):
if self.x > other.x:
return True
elif self.x == other.x:
return self.y >= other.y
return False
def __le__(self, other):
if self.x < other.x:
return True
elif self.x == other.x:
return self.y <= other.y
return False
def __repr__(self):
return f"({self.x}, {self.y})"
def __hash__(self):
return hash(self.x)
def _construct_points(
list_of_tuples: list[Point] | list[list[float]] | Iterable[list[float]],
) -> list[Point]:
"""
constructs a list of points from an array-like object of numbers
Arguments
---------
list_of_tuples: array-like object of type numbers. Acceptable types so far
are lists, tuples and sets.
Returns
--------
points: a list where each item is of type Point. This contains only objects
which can be converted into a Point.
Examples
-------
>>> _construct_points([[1, 1], [2, -1], [0.3, 4]])
[(1.0, 1.0), (2.0, -1.0), (0.3, 4.0)]
>>> _construct_points([1, 2])
Ignoring deformed point 1. All points must have at least 2 coordinates.
Ignoring deformed point 2. All points must have at least 2 coordinates.
[]
>>> _construct_points([])
[]
>>> _construct_points(None)
[]
"""
points: list[Point] = []
if list_of_tuples:
for p in list_of_tuples:
if isinstance(p, Point):
points.append(p)
else:
try:
points.append(Point(p[0], p[1]))
except (IndexError, TypeError):
print(
f"Ignoring deformed point {p}. All points"
" must have at least 2 coordinates."
)
return points
def _validate_input(points: list[Point] | list[list[float]]) -> list[Point]:
"""
validates an input instance before a convex-hull algorithms uses it
Parameters
---------
points: array-like, the 2d points to validate before using with
a convex-hull algorithm. The elements of points must be either lists, tuples or
Points.
Returns
-------
points: array_like, an iterable of all well-defined Points constructed passed in.
Exception
---------
ValueError: if points is empty or None, or if a wrong data structure like a scalar
is passed
TypeError: if an iterable but non-indexable object (eg. dictionary) is passed.
The exception to this a set which we'll convert to a list before using
Examples
-------
>>> _validate_input([[1, 2]])
[(1.0, 2.0)]
>>> _validate_input([(1, 2)])
[(1.0, 2.0)]
>>> _validate_input([Point(2, 1), Point(-1, 2)])
[(2.0, 1.0), (-1.0, 2.0)]
>>> _validate_input([])
Traceback (most recent call last):
...
ValueError: Expecting a list of points but got []
>>> _validate_input(1)
Traceback (most recent call last):
...
ValueError: Expecting an iterable object but got an non-iterable type 1
"""
if not hasattr(points, "__iter__"):
msg = f"Expecting an iterable object but got an non-iterable type {points}"
raise ValueError(msg)
if not points:
msg = f"Expecting a list of points but got {points}"
raise ValueError(msg)
return _construct_points(points)
def _det(a: Point, b: Point, c: Point) -> float:
"""
Computes the sign perpendicular distance of a 2d point c from a line segment
ab. The sign indicates the direction of c relative to ab.
A Positive value means c is above ab (to the left), while a negative value
means c is below ab (to the right). 0 means all three points are on a straight line.
As a side note, 0.5 * abs|det| is the area of triangle abc
Parameters
----------
a: point, the point on the left end of line segment ab
b: point, the point on the right end of line segment ab
c: point, the point for which the direction and location is desired.
Returns
--------
det: float, abs(det) is the distance of c from ab. The sign
indicates which side of line segment ab c is. det is computed as
(a_xb_y + c_xa_y + b_xc_y) - (a_yb_x + c_ya_x + b_yc_x)
Examples
----------
>>> _det(Point(1, 1), Point(1, 2), Point(1, 5))
0.0
>>> _det(Point(0, 0), Point(10, 0), Point(0, 10))
100.0
>>> _det(Point(0, 0), Point(10, 0), Point(0, -10))
-100.0
"""
det = (a.x * b.y + b.x * c.y + c.x * a.y) - (a.y * b.x + b.y * c.x + c.y * a.x)
return det
def convex_hull_bf(points: list[Point]) -> list[Point]:
"""
Constructs the convex hull of a set of 2D points using a brute force algorithm.
The algorithm basically considers all combinations of points (i, j) and uses the
definition of convexity to determine whether (i, j) is part of the convex hull or
not. (i, j) is part of the convex hull if and only iff there are no points on both
sides of the line segment connecting the ij, and there is no point k such that k is
on either end of the ij.
Runtime: O(n^3) - definitely horrible
Parameters
---------
points: array-like of object of Points, lists or tuples.
The set of 2d points for which the convex-hull is needed
Returns
------
convex_set: list, the convex-hull of points sorted in non-decreasing order.
See Also
--------
convex_hull_recursive,
Examples
---------
>>> convex_hull_bf([[0, 0], [1, 0], [10, 1]])
[(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)]
>>> convex_hull_bf([[0, 0], [1, 0], [10, 0]])
[(0.0, 0.0), (10.0, 0.0)]
>>> convex_hull_bf([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1],
... [-0.75, 1]])
[(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)]
>>> convex_hull_bf([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3),
... (2, -1), (2, -4), (1, -3)])
[(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)]
"""
points = sorted(_validate_input(points))
n = len(points)
convex_set = set()
for i in range(n - 1):
for j in range(i + 1, n):
points_left_of_ij = points_right_of_ij = False
ij_part_of_convex_hull = True
for k in range(n):
if k not in {i, j}:
det_k = _det(points[i], points[j], points[k])
if det_k > 0:
points_left_of_ij = True
elif det_k < 0:
points_right_of_ij = True
# point[i], point[j], point[k] all lie on a straight line
# if point[k] is to the left of point[i] or it's to the
# right of point[j], then point[i], point[j] cannot be
# part of the convex hull of A
elif points[k] < points[i] or points[k] > points[j]:
ij_part_of_convex_hull = False
break
if points_left_of_ij and points_right_of_ij:
ij_part_of_convex_hull = False
break
if ij_part_of_convex_hull:
convex_set.update([points[i], points[j]])
return sorted(convex_set)
def convex_hull_recursive(points: list[Point]) -> list[Point]:
"""
Constructs the convex hull of a set of 2D points using a divide-and-conquer strategy
The algorithm exploits the geometric properties of the problem by repeatedly
partitioning the set of points into smaller hulls, and finding the convex hull of
these smaller hulls. The union of the convex hull from smaller hulls is the
solution to the convex hull of the larger problem.
Parameter
---------
points: array-like of object of Points, lists or tuples.
The set of 2d points for which the convex-hull is needed
Runtime: O(n log n)
Returns
-------
convex_set: list, the convex-hull of points sorted in non-decreasing order.
Examples
---------
>>> convex_hull_recursive([[0, 0], [1, 0], [10, 1]])
[(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)]
>>> convex_hull_recursive([[0, 0], [1, 0], [10, 0]])
[(0.0, 0.0), (10.0, 0.0)]
>>> convex_hull_recursive([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1],
... [-0.75, 1]])
[(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)]
>>> convex_hull_recursive([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3),
... (2, -1), (2, -4), (1, -3)])
[(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)]
"""
points = sorted(_validate_input(points))
n = len(points)
# divide all the points into an upper hull and a lower hull
# the left most point and the right most point are definitely
# members of the convex hull by definition.
# use these two anchors to divide all the points into two hulls,
# an upper hull and a lower hull.
# all points to the left (above) the line joining the extreme points belong to the
# upper hull
# all points to the right (below) the line joining the extreme points below to the
# lower hull
# ignore all points on the line joining the extreme points since they cannot be
# part of the convex hull
left_most_point = points[0]
right_most_point = points[n - 1]
convex_set = {left_most_point, right_most_point}
upper_hull = []
lower_hull = []
for i in range(1, n - 1):
det = _det(left_most_point, right_most_point, points[i])
if det > 0:
upper_hull.append(points[i])
elif det < 0:
lower_hull.append(points[i])
_construct_hull(upper_hull, left_most_point, right_most_point, convex_set)
_construct_hull(lower_hull, right_most_point, left_most_point, convex_set)
return sorted(convex_set)
def _construct_hull(
points: list[Point], left: Point, right: Point, convex_set: set[Point]
) -> None:
"""
Parameters
---------
points: list or None, the hull of points from which to choose the next convex-hull
point
left: Point, the point to the left of line segment joining left and right
right: The point to the right of the line segment joining left and right
convex_set: set, the current convex-hull. The state of convex-set gets updated by
this function
Note
----
For the line segment 'ab', 'a' is on the left and 'b' on the right.
but the reverse is true for the line segment 'ba'.
Returns
-------
Nothing, only updates the state of convex-set
"""
if points:
extreme_point = None
extreme_point_distance = float("-inf")
candidate_points = []
for p in points:
det = _det(left, right, p)
if det > 0:
candidate_points.append(p)
if det > extreme_point_distance:
extreme_point_distance = det
extreme_point = p
if extreme_point:
_construct_hull(candidate_points, left, extreme_point, convex_set)
convex_set.add(extreme_point)
_construct_hull(candidate_points, extreme_point, right, convex_set)
def convex_hull_melkman(points: list[Point]) -> list[Point]:
"""
Constructs the convex hull of a set of 2D points using the melkman algorithm.
The algorithm works by iteratively inserting points of a simple polygonal chain
(meaning that no line segments between two consecutive points cross each other).
Sorting the points yields such a polygonal chain.
For a detailed description, see http://cgm.cs.mcgill.ca/~athens/cs601/Melkman.html
Runtime: O(n log n) - O(n) if points are already sorted in the input
Parameters
---------
points: array-like of object of Points, lists or tuples.
The set of 2d points for which the convex-hull is needed
Returns
------
convex_set: list, the convex-hull of points sorted in non-decreasing order.
See Also
--------
Examples
---------
>>> convex_hull_melkman([[0, 0], [1, 0], [10, 1]])
[(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)]
>>> convex_hull_melkman([[0, 0], [1, 0], [10, 0]])
[(0.0, 0.0), (10.0, 0.0)]
>>> convex_hull_melkman([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1],
... [-0.75, 1]])
[(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)]
>>> convex_hull_melkman([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3),
... (2, -1), (2, -4), (1, -3)])
[(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)]
"""
points = sorted(_validate_input(points))
n = len(points)
convex_hull = points[:2]
for i in range(2, n):
det = _det(convex_hull[1], convex_hull[0], points[i])
if det > 0:
convex_hull.insert(0, points[i])
break
elif det < 0:
convex_hull.append(points[i])
break
else:
convex_hull[1] = points[i]
i += 1
for j in range(i, n):
if (
_det(convex_hull[0], convex_hull[-1], points[j]) > 0
and _det(convex_hull[-1], convex_hull[0], points[1]) < 0
):
# The point lies within the convex hull
continue
convex_hull.insert(0, points[j])
convex_hull.append(points[j])
while _det(convex_hull[0], convex_hull[1], convex_hull[2]) >= 0:
del convex_hull[1]
while _det(convex_hull[-1], convex_hull[-2], convex_hull[-3]) <= 0:
del convex_hull[-2]
# `convex_hull` is contains the convex hull in circular order
return sorted(convex_hull[1:] if len(convex_hull) > 3 else convex_hull)
def main():
points = [
(0, 3),
(2, 2),
(1, 1),
(2, 1),
(3, 0),
(0, 0),
(3, 3),
(2, -1),
(2, -4),
(1, -3),
]
# the convex set of points is
# [(0, 0), (0, 3), (1, -3), (2, -4), (3, 0), (3, 3)]
results_bf = convex_hull_bf(points)
results_recursive = convex_hull_recursive(points)
assert results_bf == results_recursive
results_melkman = convex_hull_melkman(points)
assert results_bf == results_melkman
print(results_bf)
if __name__ == "__main__":
main()
================================================
FILE: divide_and_conquer/heaps_algorithm.py
================================================
"""
Heap's algorithm returns the list of all permutations possible from a list.
It minimizes movement by generating each permutation from the previous one
by swapping only two elements.
More information:
https://en.wikipedia.org/wiki/Heap%27s_algorithm.
"""
def heaps(arr: list) -> list:
"""
Pure python implementation of the Heap's algorithm (recursive version),
returning all permutations of a list.
>>> heaps([])
[()]
>>> heaps([0])
[(0,)]
>>> heaps([-1, 1])
[(-1, 1), (1, -1)]
>>> heaps([1, 2, 3])
[(1, 2, 3), (2, 1, 3), (3, 1, 2), (1, 3, 2), (2, 3, 1), (3, 2, 1)]
>>> from itertools import permutations
>>> sorted(heaps([1,2,3])) == sorted(permutations([1,2,3]))
True
>>> all(sorted(heaps(x)) == sorted(permutations(x))
... for x in ([], [0], [-1, 1], [1, 2, 3]))
True
"""
if len(arr) <= 1:
return [tuple(arr)]
res = []
def generate(k: int, arr: list):
if k == 1:
res.append(tuple(arr[:]))
return
generate(k - 1, arr)
for i in range(k - 1):
if k % 2 == 0: # k is even
arr[i], arr[k - 1] = arr[k - 1], arr[i]
else: # k is odd
arr[0], arr[k - 1] = arr[k - 1], arr[0]
generate(k - 1, arr)
generate(len(arr), arr)
return res
if __name__ == "__main__":
user_input = input("Enter numbers separated by a comma:\n").strip()
arr = [int(item) for item in user_input.split(",")]
print(heaps(arr))
================================================
FILE: divide_and_conquer/heaps_algorithm_iterative.py
================================================
"""
Heap's (iterative) algorithm returns the list of all permutations possible from a list.
It minimizes movement by generating each permutation from the previous one
by swapping only two elements.
More information:
https://en.wikipedia.org/wiki/Heap%27s_algorithm.
"""
def heaps(arr: list) -> list:
"""
Pure python implementation of the iterative Heap's algorithm,
returning all permutations of a list.
>>> heaps([])
[()]
>>> heaps([0])
[(0,)]
>>> heaps([-1, 1])
[(-1, 1), (1, -1)]
>>> heaps([1, 2, 3])
[(1, 2, 3), (2, 1, 3), (3, 1, 2), (1, 3, 2), (2, 3, 1), (3, 2, 1)]
>>> from itertools import permutations
>>> sorted(heaps([1,2,3])) == sorted(permutations([1,2,3]))
True
>>> all(sorted(heaps(x)) == sorted(permutations(x))
... for x in ([], [0], [-1, 1], [1, 2, 3]))
True
"""
if len(arr) <= 1:
return [tuple(arr)]
res = []
def generate(n: int, arr: list):
c = [0] * n
res.append(tuple(arr))
i = 0
while i < n:
if c[i] < i:
if i % 2 == 0:
arr[0], arr[i] = arr[i], arr[0]
else:
arr[c[i]], arr[i] = arr[i], arr[c[i]]
res.append(tuple(arr))
c[i] += 1
i = 0
else:
c[i] = 0
i += 1
generate(len(arr), arr)
return res
if __name__ == "__main__":
user_input = input("Enter numbers separated by a comma:\n").strip()
arr = [int(item) for item in user_input.split(",")]
print(heaps(arr))
================================================
FILE: divide_and_conquer/inversions.py
================================================
"""
Given an array-like data structure A[1..n], how many pairs
(i, j) for all 1 <= i < j <= n such that A[i] > A[j]? These pairs are
called inversions. Counting the number of such inversions in an array-like
object is the important. Among other things, counting inversions can help
us determine how close a given array is to being sorted.
In this implementation, I provide two algorithms, a divide-and-conquer
algorithm which runs in nlogn and the brute-force n^2 algorithm.
"""
def count_inversions_bf(arr):
"""
Counts the number of inversions using a naive brute-force algorithm
Parameters
----------
arr: arr: array-like, the list containing the items for which the number
of inversions is desired. The elements of `arr` must be comparable.
Returns
-------
num_inversions: The total number of inversions in `arr`
Examples
---------
>>> count_inversions_bf([1, 4, 2, 4, 1])
4
>>> count_inversions_bf([1, 1, 2, 4, 4])
0
>>> count_inversions_bf([])
0
"""
num_inversions = 0
n = len(arr)
for i in range(n - 1):
for j in range(i + 1, n):
if arr[i] > arr[j]:
num_inversions += 1
return num_inversions
def count_inversions_recursive(arr):
"""
Counts the number of inversions using a divide-and-conquer algorithm
Parameters
-----------
arr: array-like, the list containing the items for which the number
of inversions is desired. The elements of `arr` must be comparable.
Returns
-------
C: a sorted copy of `arr`.
num_inversions: int, the total number of inversions in 'arr'
Examples
--------
>>> count_inversions_recursive([1, 4, 2, 4, 1])
([1, 1, 2, 4, 4], 4)
>>> count_inversions_recursive([1, 1, 2, 4, 4])
([1, 1, 2, 4, 4], 0)
>>> count_inversions_recursive([])
([], 0)
"""
if len(arr) <= 1:
return arr, 0
mid = len(arr) // 2
p = arr[0:mid]
q = arr[mid:]
a, inversion_p = count_inversions_recursive(p)
b, inversions_q = count_inversions_recursive(q)
c, cross_inversions = _count_cross_inversions(a, b)
num_inversions = inversion_p + inversions_q + cross_inversions
return c, num_inversions
def _count_cross_inversions(p, q):
"""
Counts the inversions across two sorted arrays.
And combine the two arrays into one sorted array
For all 1<= i<=len(P) and for all 1 <= j <= len(Q),
if P[i] > Q[j], then (i, j) is a cross inversion
Parameters
----------
P: array-like, sorted in non-decreasing order
Q: array-like, sorted in non-decreasing order
Returns
------
R: array-like, a sorted array of the elements of `P` and `Q`
num_inversion: int, the number of inversions across `P` and `Q`
Examples
--------
>>> _count_cross_inversions([1, 2, 3], [0, 2, 5])
([0, 1, 2, 2, 3, 5], 4)
>>> _count_cross_inversions([1, 2, 3], [3, 4, 5])
([1, 2, 3, 3, 4, 5], 0)
"""
r = []
i = j = num_inversion = 0
while i < len(p) and j < len(q):
if p[i] > q[j]:
# if P[1] > Q[j], then P[k] > Q[k] for all i < k <= len(P)
# These are all inversions. The claim emerges from the
# property that P is sorted.
num_inversion += len(p) - i
r.append(q[j])
j += 1
else:
r.append(p[i])
i += 1
if i < len(p):
r.extend(p[i:])
else:
r.extend(q[j:])
return r, num_inversion
def main():
arr_1 = [10, 2, 1, 5, 5, 2, 11]
# this arr has 8 inversions:
# (10, 2), (10, 1), (10, 5), (10, 5), (10, 2), (2, 1), (5, 2), (5, 2)
num_inversions_bf = count_inversions_bf(arr_1)
_, num_inversions_recursive = count_inversions_recursive(arr_1)
assert num_inversions_bf == num_inversions_recursive == 8
print("number of inversions = ", num_inversions_bf)
# testing an array with zero inversion (a sorted arr_1)
arr_1.sort()
num_inversions_bf = count_inversions_bf(arr_1)
_, num_inversions_recursive = count_inversions_recursive(arr_1)
assert num_inversions_bf == num_inversions_recursive == 0
print("number of inversions = ", num_inversions_bf)
# an empty list should also have zero inversions
arr_1 = []
num_inversions_bf = count_inversions_bf(arr_1)
_, num_inversions_recursive = count_inversions_recursive(arr_1)
assert num_inversions_bf == num_inversions_recursive == 0
print("number of inversions = ", num_inversions_bf)
if __name__ == "__main__":
main()
================================================
FILE: divide_and_conquer/kth_order_statistic.py
================================================
"""
Find the kth smallest element in linear time using divide and conquer.
Recall we can do this trivially in O(nlogn) time. Sort the list and
access kth element in constant time.
This is a divide and conquer algorithm that can find a solution in O(n) time.
For more information of this algorithm:
https://web.stanford.edu/class/archive/cs/cs161/cs161.1138/lectures/08/Small08.pdf
"""
from __future__ import annotations
from random import choice
def random_pivot(lst):
"""
Choose a random pivot for the list.
We can use a more sophisticated algorithm here, such as the median-of-medians
algorithm.
"""
return choice(lst)
def kth_number(lst: list[int], k: int) -> int:
"""
Return the kth smallest number in lst.
>>> kth_number([2, 1, 3, 4, 5], 3)
3
>>> kth_number([2, 1, 3, 4, 5], 1)
1
>>> kth_number([2, 1, 3, 4, 5], 5)
5
>>> kth_number([3, 2, 5, 6, 7, 8], 2)
3
>>> kth_number([25, 21, 98, 100, 76, 22, 43, 60, 89, 87], 4)
43
"""
# pick a pivot and separate into list based on pivot.
pivot = random_pivot(lst)
# partition based on pivot
# linear time
small = [e for e in lst if e < pivot]
big = [e for e in lst if e > pivot]
# if we get lucky, pivot might be the element we want.
# we can easily see this:
# small (elements smaller than k)
# + pivot (kth element)
# + big (elements larger than k)
if len(small) == k - 1:
return pivot
# pivot is in elements bigger than k
elif len(small) < k - 1:
return kth_number(big, k - len(small) - 1)
# pivot is in elements smaller than k
else:
return kth_number(small, k)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: divide_and_conquer/max_difference_pair.py
================================================
def max_difference(a: list[int]) -> tuple[int, int]:
"""
We are given an array A[1..n] of integers, n >= 1. We want to
find a pair of indices (i, j) such that
1 <= i <= j <= n and A[j] - A[i] is as large as possible.
Explanation:
https://www.geeksforgeeks.org/maximum-difference-between-two-elements/
>>> max_difference([5, 11, 2, 1, 7, 9, 0, 7])
(1, 9)
"""
# base case
if len(a) == 1:
return a[0], a[0]
else:
# split A into half.
first = a[: len(a) // 2]
second = a[len(a) // 2 :]
# 2 sub problems, 1/2 of original size.
small1, big1 = max_difference(first)
small2, big2 = max_difference(second)
# get min of first and max of second
# linear time
min_first = min(first)
max_second = max(second)
# 3 cases, either (small1, big1),
# (min_first, max_second), (small2, big2)
# constant comparisons
if big2 - small2 > max_second - min_first and big2 - small2 > big1 - small1:
return small2, big2
elif big1 - small1 > max_second - min_first:
return small1, big1
else:
return min_first, max_second
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: divide_and_conquer/max_subarray.py
================================================
"""
The maximum subarray problem is the task of finding the continuous subarray that has the
maximum sum within a given array of numbers. For example, given the array
[-2, 1, -3, 4, -1, 2, 1, -5, 4], the contiguous subarray with the maximum sum is
[4, -1, 2, 1], which has a sum of 6.
This divide-and-conquer algorithm finds the maximum subarray in O(n log n) time.
"""
from __future__ import annotations
import time
from collections.abc import Sequence
from random import randint
from matplotlib import pyplot as plt
def max_subarray(
arr: Sequence[float], low: int, high: int
) -> tuple[int | None, int | None, float]:
"""
Solves the maximum subarray problem using divide and conquer.
:param arr: the given array of numbers
:param low: the start index
:param high: the end index
:return: the start index of the maximum subarray, the end index of the
maximum subarray, and the maximum subarray sum
>>> nums = [-2, 1, -3, 4, -1, 2, 1, -5, 4]
>>> max_subarray(nums, 0, len(nums) - 1)
(3, 6, 6)
>>> nums = [2, 8, 9]
>>> max_subarray(nums, 0, len(nums) - 1)
(0, 2, 19)
>>> nums = [0, 0]
>>> max_subarray(nums, 0, len(nums) - 1)
(0, 0, 0)
>>> nums = [-1.0, 0.0, 1.0]
>>> max_subarray(nums, 0, len(nums) - 1)
(2, 2, 1.0)
>>> nums = [-2, -3, -1, -4, -6]
>>> max_subarray(nums, 0, len(nums) - 1)
(2, 2, -1)
>>> max_subarray([], 0, 0)
(None, None, 0)
"""
if not arr:
return None, None, 0
if low == high:
return low, high, arr[low]
mid = (low + high) // 2
left_low, left_high, left_sum = max_subarray(arr, low, mid)
right_low, right_high, right_sum = max_subarray(arr, mid + 1, high)
cross_left, cross_right, cross_sum = max_cross_sum(arr, low, mid, high)
if left_sum >= right_sum and left_sum >= cross_sum:
return left_low, left_high, left_sum
elif right_sum >= left_sum and right_sum >= cross_sum:
return right_low, right_high, right_sum
return cross_left, cross_right, cross_sum
def max_cross_sum(
arr: Sequence[float], low: int, mid: int, high: int
) -> tuple[int, int, float]:
left_sum, max_left = float("-inf"), -1
right_sum, max_right = float("-inf"), -1
summ: int | float = 0
for i in range(mid, low - 1, -1):
summ += arr[i]
if summ > left_sum:
left_sum = summ
max_left = i
summ = 0
for i in range(mid + 1, high + 1):
summ += arr[i]
if summ > right_sum:
right_sum = summ
max_right = i
return max_left, max_right, (left_sum + right_sum)
def time_max_subarray(input_size: int) -> float:
arr = [randint(1, input_size) for _ in range(input_size)]
start = time.time()
max_subarray(arr, 0, input_size - 1)
end = time.time()
return end - start
def plot_runtimes() -> None:
input_sizes = [10, 100, 1000, 10000, 50000, 100000, 200000, 300000, 400000, 500000]
runtimes = [time_max_subarray(input_size) for input_size in input_sizes]
print("No of Inputs\t\tTime Taken")
for input_size, runtime in zip(input_sizes, runtimes):
print(input_size, "\t\t", runtime)
plt.plot(input_sizes, runtimes)
plt.xlabel("Number of Inputs")
plt.ylabel("Time taken in seconds")
plt.show()
if __name__ == "__main__":
"""
A random simulation of this algorithm.
"""
from doctest import testmod
testmod()
================================================
FILE: divide_and_conquer/mergesort.py
================================================
from __future__ import annotations
def merge(left_half: list, right_half: list) -> list:
"""Helper function for mergesort.
>>> left_half = [-2]
>>> right_half = [-1]
>>> merge(left_half, right_half)
[-2, -1]
>>> left_half = [1,2,3]
>>> right_half = [4,5,6]
>>> merge(left_half, right_half)
[1, 2, 3, 4, 5, 6]
>>> left_half = [-2]
>>> right_half = [-1]
>>> merge(left_half, right_half)
[-2, -1]
>>> left_half = [12, 15]
>>> right_half = [13, 14]
>>> merge(left_half, right_half)
[12, 13, 14, 15]
>>> left_half = []
>>> right_half = []
>>> merge(left_half, right_half)
[]
"""
sorted_array = [None] * (len(right_half) + len(left_half))
pointer1 = 0 # pointer to current index for left Half
pointer2 = 0 # pointer to current index for the right Half
index = 0 # pointer to current index for the sorted array Half
while pointer1 < len(left_half) and pointer2 < len(right_half):
if left_half[pointer1] < right_half[pointer2]:
sorted_array[index] = left_half[pointer1]
pointer1 += 1
index += 1
else:
sorted_array[index] = right_half[pointer2]
pointer2 += 1
index += 1
while pointer1 < len(left_half):
sorted_array[index] = left_half[pointer1]
pointer1 += 1
index += 1
while pointer2 < len(right_half):
sorted_array[index] = right_half[pointer2]
pointer2 += 1
index += 1
return sorted_array
def merge_sort(array: list) -> list:
"""Returns a list of sorted array elements using merge sort.
>>> from random import shuffle
>>> array = [-2, 3, -10, 11, 99, 100000, 100, -200]
>>> shuffle(array)
>>> merge_sort(array)
[-200, -10, -2, 3, 11, 99, 100, 100000]
>>> shuffle(array)
>>> merge_sort(array)
[-200, -10, -2, 3, 11, 99, 100, 100000]
>>> array = [-200]
>>> merge_sort(array)
[-200]
>>> array = [-2, 3, -10, 11, 99, 100000, 100, -200]
>>> shuffle(array)
>>> sorted(array) == merge_sort(array)
True
>>> array = [-2]
>>> merge_sort(array)
[-2]
>>> array = []
>>> merge_sort(array)
[]
>>> array = [10000000, 1, -1111111111, 101111111112, 9000002]
>>> sorted(array) == merge_sort(array)
True
"""
if len(array) <= 1:
return array
# the actual formula to calculate the middle element = left + (right - left) // 2
# this avoids integer overflow in case of large N
middle = 0 + (len(array) - 0) // 2
# Split the array into halves till the array length becomes equal to One
# merge the arrays of single length returned by mergeSort function and
# pass them into the merge arrays function which merges the array
left_half = array[:middle]
right_half = array[middle:]
return merge(merge_sort(left_half), merge_sort(right_half))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: divide_and_conquer/peak.py
================================================
"""
Finding the peak of a unimodal list using divide and conquer.
A unimodal array is defined as follows: array is increasing up to index p,
then decreasing afterwards. (for p >= 1)
An obvious solution can be performed in O(n),
to find the maximum of the array.
(From Kleinberg and Tardos. Algorithm Design.
Addison Wesley 2006: Chapter 5 Solved Exercise 1)
"""
from __future__ import annotations
def peak(lst: list[int]) -> int:
"""
Return the peak value of `lst`.
>>> peak([1, 2, 3, 4, 5, 4, 3, 2, 1])
5
>>> peak([1, 10, 9, 8, 7, 6, 5, 4])
10
>>> peak([1, 9, 8, 7])
9
>>> peak([1, 2, 3, 4, 5, 6, 7, 0])
7
>>> peak([1, 2, 3, 4, 3, 2, 1, 0, -1, -2])
4
"""
# middle index
m = len(lst) // 2
# choose the middle 3 elements
three = lst[m - 1 : m + 2]
# if middle element is peak
if three[1] > three[0] and three[1] > three[2]:
return three[1]
# if increasing, recurse on right
elif three[0] < three[2]:
if len(lst[:m]) == 2:
m -= 1
return peak(lst[m:])
# decreasing
else:
if len(lst[:m]) == 2:
m += 1
return peak(lst[:m])
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: divide_and_conquer/power.py
================================================
def actual_power(a: int, b: int) -> int:
"""
Function using divide and conquer to calculate a^b.
It only works for integer a,b.
:param a: The base of the power operation, an integer.
:param b: The exponent of the power operation, a non-negative integer.
:return: The result of a^b.
Examples:
>>> actual_power(3, 2)
9
>>> actual_power(5, 3)
125
>>> actual_power(2, 5)
32
>>> actual_power(7, 0)
1
"""
if b == 0:
return 1
half = actual_power(a, b // 2)
if (b % 2) == 0:
return half * half
else:
return a * half * half
def power(a: int, b: int) -> float:
"""
:param a: The base (integer).
:param b: The exponent (integer).
:return: The result of a^b, as a float for negative exponents.
>>> power(4,6)
4096
>>> power(2,3)
8
>>> power(-2,3)
-8
>>> power(2,-3)
0.125
>>> power(-2,-3)
-0.125
"""
if b < 0:
return 1 / actual_power(a, -b)
return actual_power(a, b)
if __name__ == "__main__":
print(power(-2, -3)) # output -0.125
================================================
FILE: divide_and_conquer/strassen_matrix_multiplication.py
================================================
from __future__ import annotations
import math
def default_matrix_multiplication(a: list, b: list) -> list:
"""
Multiplication only for 2x2 matrices
"""
if len(a) != 2 or len(a[0]) != 2 or len(b) != 2 or len(b[0]) != 2:
raise Exception("Matrices are not 2x2")
new_matrix = [
[a[0][0] * b[0][0] + a[0][1] * b[1][0], a[0][0] * b[0][1] + a[0][1] * b[1][1]],
[a[1][0] * b[0][0] + a[1][1] * b[1][0], a[1][0] * b[0][1] + a[1][1] * b[1][1]],
]
return new_matrix
def matrix_addition(matrix_a: list, matrix_b: list):
return [
[matrix_a[row][col] + matrix_b[row][col] for col in range(len(matrix_a[row]))]
for row in range(len(matrix_a))
]
def matrix_subtraction(matrix_a: list, matrix_b: list):
return [
[matrix_a[row][col] - matrix_b[row][col] for col in range(len(matrix_a[row]))]
for row in range(len(matrix_a))
]
def split_matrix(a: list) -> tuple[list, list, list, list]:
"""
Given an even length matrix, returns the top_left, top_right, bot_left, bot_right
quadrant.
>>> split_matrix([[4,3,2,4],[2,3,1,1],[6,5,4,3],[8,4,1,6]])
([[4, 3], [2, 3]], [[2, 4], [1, 1]], [[6, 5], [8, 4]], [[4, 3], [1, 6]])
>>> split_matrix([
... [4,3,2,4,4,3,2,4],[2,3,1,1,2,3,1,1],[6,5,4,3,6,5,4,3],[8,4,1,6,8,4,1,6],
... [4,3,2,4,4,3,2,4],[2,3,1,1,2,3,1,1],[6,5,4,3,6,5,4,3],[8,4,1,6,8,4,1,6]
... ]) # doctest: +NORMALIZE_WHITESPACE
([[4, 3, 2, 4], [2, 3, 1, 1], [6, 5, 4, 3], [8, 4, 1, 6]], [[4, 3, 2, 4],
[2, 3, 1, 1], [6, 5, 4, 3], [8, 4, 1, 6]], [[4, 3, 2, 4], [2, 3, 1, 1],
[6, 5, 4, 3], [8, 4, 1, 6]], [[4, 3, 2, 4], [2, 3, 1, 1], [6, 5, 4, 3],
[8, 4, 1, 6]])
"""
if len(a) % 2 != 0 or len(a[0]) % 2 != 0:
raise Exception("Odd matrices are not supported!")
matrix_length = len(a)
mid = matrix_length // 2
top_right = [[a[i][j] for j in range(mid, matrix_length)] for i in range(mid)]
bot_right = [
[a[i][j] for j in range(mid, matrix_length)] for i in range(mid, matrix_length)
]
top_left = [[a[i][j] for j in range(mid)] for i in range(mid)]
bot_left = [[a[i][j] for j in range(mid)] for i in range(mid, matrix_length)]
return top_left, top_right, bot_left, bot_right
def matrix_dimensions(matrix: list) -> tuple[int, int]:
return len(matrix), len(matrix[0])
def print_matrix(matrix: list) -> None:
print("\n".join(str(line) for line in matrix))
def actual_strassen(matrix_a: list, matrix_b: list) -> list:
"""
Recursive function to calculate the product of two matrices, using the Strassen
Algorithm. It only supports square matrices of any size that is a power of 2.
"""
if matrix_dimensions(matrix_a) == (2, 2):
return default_matrix_multiplication(matrix_a, matrix_b)
a, b, c, d = split_matrix(matrix_a)
e, f, g, h = split_matrix(matrix_b)
t1 = actual_strassen(a, matrix_subtraction(f, h))
t2 = actual_strassen(matrix_addition(a, b), h)
t3 = actual_strassen(matrix_addition(c, d), e)
t4 = actual_strassen(d, matrix_subtraction(g, e))
t5 = actual_strassen(matrix_addition(a, d), matrix_addition(e, h))
t6 = actual_strassen(matrix_subtraction(b, d), matrix_addition(g, h))
t7 = actual_strassen(matrix_subtraction(a, c), matrix_addition(e, f))
top_left = matrix_addition(matrix_subtraction(matrix_addition(t5, t4), t2), t6)
top_right = matrix_addition(t1, t2)
bot_left = matrix_addition(t3, t4)
bot_right = matrix_subtraction(matrix_subtraction(matrix_addition(t1, t5), t3), t7)
# construct the new matrix from our 4 quadrants
new_matrix = []
for i in range(len(top_right)):
new_matrix.append(top_left[i] + top_right[i])
for i in range(len(bot_right)):
new_matrix.append(bot_left[i] + bot_right[i])
return new_matrix
def strassen(matrix1: list, matrix2: list) -> list:
"""
>>> strassen([[2,1,3],[3,4,6],[1,4,2],[7,6,7]], [[4,2,3,4],[2,1,1,1],[8,6,4,2]])
[[34, 23, 19, 15], [68, 46, 37, 28], [28, 18, 15, 12], [96, 62, 55, 48]]
>>> strassen([[3,7,5,6,9],[1,5,3,7,8],[1,4,4,5,7]], [[2,4],[5,2],[1,7],[5,5],[7,8]])
[[139, 163], [121, 134], [100, 121]]
"""
if matrix_dimensions(matrix1)[1] != matrix_dimensions(matrix2)[0]:
msg = (
"Unable to multiply these matrices, please check the dimensions.\n"
f"Matrix A: {matrix1}\n"
f"Matrix B: {matrix2}"
)
raise Exception(msg)
dimension1 = matrix_dimensions(matrix1)
dimension2 = matrix_dimensions(matrix2)
if dimension1[0] == dimension1[1] and dimension2[0] == dimension2[1]:
return [matrix1, matrix2]
maximum = max(*dimension1, *dimension2)
maxim = int(math.pow(2, math.ceil(math.log2(maximum))))
new_matrix1 = matrix1
new_matrix2 = matrix2
# Adding zeros to the matrices to convert them both into square matrices of equal
# dimensions that are a power of 2
for i in range(maxim):
if i < dimension1[0]:
for _ in range(dimension1[1], maxim):
new_matrix1[i].append(0)
else:
new_matrix1.append([0] * maxim)
if i < dimension2[0]:
for _ in range(dimension2[1], maxim):
new_matrix2[i].append(0)
else:
new_matrix2.append([0] * maxim)
final_matrix = actual_strassen(new_matrix1, new_matrix2)
# Removing the additional zeros
for i in range(maxim):
if i < dimension1[0]:
for _ in range(dimension2[1], maxim):
final_matrix[i].pop()
else:
final_matrix.pop()
return final_matrix
if __name__ == "__main__":
matrix1 = [
[2, 3, 4, 5],
[6, 4, 3, 1],
[2, 3, 6, 7],
[3, 1, 2, 4],
[2, 3, 4, 5],
[6, 4, 3, 1],
[2, 3, 6, 7],
[3, 1, 2, 4],
[2, 3, 4, 5],
[6, 2, 3, 1],
]
matrix2 = [[0, 2, 1, 1], [16, 2, 3, 3], [2, 2, 7, 7], [13, 11, 22, 4]]
print(strassen(matrix1, matrix2))
================================================
FILE: docs/__init__.py
================================================
================================================
FILE: docs/conf.py
================================================
from sphinx_pyproject import SphinxConfig
project = SphinxConfig("../pyproject.toml", globalns=globals()).name
================================================
FILE: docs/source/__init__.py
================================================
================================================
FILE: dynamic_programming/__init__.py
================================================
================================================
FILE: dynamic_programming/abbreviation.py
================================================
"""
https://www.hackerrank.com/challenges/abbr/problem
You can perform the following operation on some string, :
1. Capitalize zero or more of 's lowercase letters at some index i
(i.e., make them uppercase).
2. Delete all of the remaining lowercase letters in .
Example:
a=daBcd and b="ABC"
daBcd -> capitalize a and c(dABCd) -> remove d (ABC)
"""
def abbr(a: str, b: str) -> bool:
"""
>>> abbr("daBcd", "ABC")
True
>>> abbr("dBcd", "ABC")
False
"""
n = len(a)
m = len(b)
dp = [[False for _ in range(m + 1)] for _ in range(n + 1)]
dp[0][0] = True
for i in range(n):
for j in range(m + 1):
if dp[i][j]:
if j < m and a[i].upper() == b[j]:
dp[i + 1][j + 1] = True
if a[i].islower():
dp[i + 1][j] = True
return dp[n][m]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/all_construct.py
================================================
"""
Program to list all the ways a target string can be
constructed from the given list of substrings
"""
from __future__ import annotations
def all_construct(target: str, word_bank: list[str] | None = None) -> list[list[str]]:
"""
returns the list containing all the possible
combinations a string(`target`) can be constructed from
the given list of substrings(`word_bank`)
>>> all_construct("hello", ["he", "l", "o"])
[['he', 'l', 'l', 'o']]
>>> all_construct("purple",["purp","p","ur","le","purpl"])
[['purp', 'le'], ['p', 'ur', 'p', 'le']]
"""
word_bank = word_bank or []
# create a table
table_size: int = len(target) + 1
table: list[list[list[str]]] = []
for _ in range(table_size):
table.append([])
# seed value
table[0] = [[]] # because empty string has empty combination
# iterate through the indices
for i in range(table_size):
# condition
if table[i] != []:
for word in word_bank:
# slice condition
if target[i : i + len(word)] == word:
new_combinations: list[list[str]] = [
[word, *way] for way in table[i]
]
# adds the word to every combination the current position holds
# now,push that combination to the table[i+len(word)]
table[i + len(word)] += new_combinations
# combinations are in reverse order so reverse for better output
for combination in table[len(target)]:
combination.reverse()
return table[len(target)]
if __name__ == "__main__":
print(all_construct("jwajalapa", ["jwa", "j", "w", "a", "la", "lapa"]))
print(all_construct("rajamati", ["s", "raj", "amat", "raja", "ma", "i", "t"]))
print(
all_construct(
"hexagonosaurus",
["h", "ex", "hex", "ag", "ago", "ru", "auru", "rus", "go", "no", "o", "s"],
)
)
================================================
FILE: dynamic_programming/bitmask.py
================================================
"""
This is a Python implementation for questions involving task assignments between people.
Here Bitmasking and DP are used for solving this.
Question :-
We have N tasks and M people. Each person in M can do only certain of these tasks. Also
a person can do only one task and a task is performed only by one person.
Find the total no of ways in which the tasks can be distributed.
"""
from collections import defaultdict
class AssignmentUsingBitmask:
def __init__(self, task_performed, total):
self.total_tasks = total # total no of tasks (N)
# DP table will have a dimension of (2^M)*N
# initially all values are set to -1
self.dp = [
[-1 for i in range(total + 1)] for j in range(2 ** len(task_performed))
]
self.task = defaultdict(list) # stores the list of persons for each task
# final_mask is used to check if all persons are included by setting all bits
# to 1
self.final_mask = (1 << len(task_performed)) - 1
def count_ways_until(self, mask, task_no):
# if mask == self.finalmask all persons are distributed tasks, return 1
if mask == self.final_mask:
return 1
# if not everyone gets the task and no more tasks are available, return 0
if task_no > self.total_tasks:
return 0
# if case already considered
if self.dp[mask][task_no] != -1:
return self.dp[mask][task_no]
# Number of ways when we don't this task in the arrangement
total_ways_until = self.count_ways_until(mask, task_no + 1)
# now assign the tasks one by one to all possible persons and recursively
# assign for the remaining tasks.
if task_no in self.task:
for p in self.task[task_no]:
# if p is already given a task
if mask & (1 << p):
continue
# assign this task to p and change the mask value. And recursively
# assign tasks with the new mask value.
total_ways_until += self.count_ways_until(mask | (1 << p), task_no + 1)
# save the value.
self.dp[mask][task_no] = total_ways_until
return self.dp[mask][task_no]
def count_no_of_ways(self, task_performed):
# Store the list of persons for each task
for i in range(len(task_performed)):
for j in task_performed[i]:
self.task[j].append(i)
# call the function to fill the DP table, final answer is stored in dp[0][1]
return self.count_ways_until(0, 1)
if __name__ == "__main__":
total_tasks = 5 # total no of tasks (the value of N)
# the list of tasks that can be done by M persons.
task_performed = [[1, 3, 4], [1, 2, 5], [3, 4]]
print(
AssignmentUsingBitmask(task_performed, total_tasks).count_no_of_ways(
task_performed
)
)
"""
For the particular example the tasks can be distributed as
(1,2,3), (1,2,4), (1,5,3), (1,5,4), (3,1,4),
(3,2,4), (3,5,4), (4,1,3), (4,2,3), (4,5,3)
total 10
"""
================================================
FILE: dynamic_programming/catalan_numbers.py
================================================
"""
Print all the Catalan numbers from 0 to n, n being the user input.
* The Catalan numbers are a sequence of positive integers that
* appear in many counting problems in combinatorics [1]. Such
* problems include counting [2]:
* - The number of Dyck words of length 2n
* - The number well-formed expressions with n pairs of parentheses
* (e.g., `()()` is valid but `())(` is not)
* - The number of different ways n + 1 factors can be completely
* parenthesized (e.g., for n = 2, C(n) = 2 and (ab)c and a(bc)
* are the two valid ways to parenthesize.
* - The number of full binary trees with n + 1 leaves
* A Catalan number satisfies the following recurrence relation
* which we will use in this algorithm [1].
* C(0) = C(1) = 1
* C(n) = sum(C(i).C(n-i-1)), from i = 0 to n-1
* In addition, the n-th Catalan number can be calculated using
* the closed form formula below [1]:
* C(n) = (1 / (n + 1)) * (2n choose n)
* Sources:
* [1] https://brilliant.org/wiki/catalan-numbers/
* [2] https://en.wikipedia.org/wiki/Catalan_number
"""
def catalan_numbers(upper_limit: int) -> "list[int]":
"""
Return a list of the Catalan number sequence from 0 through `upper_limit`.
>>> catalan_numbers(5)
[1, 1, 2, 5, 14, 42]
>>> catalan_numbers(2)
[1, 1, 2]
>>> catalan_numbers(-1)
Traceback (most recent call last):
ValueError: Limit for the Catalan sequence must be ≥ 0
"""
if upper_limit < 0:
raise ValueError("Limit for the Catalan sequence must be ≥ 0")
catalan_list = [0] * (upper_limit + 1)
# Base case: C(0) = C(1) = 1
catalan_list[0] = 1
if upper_limit > 0:
catalan_list[1] = 1
# Recurrence relation: C(i) = sum(C(j).C(i-j-1)), from j = 0 to i
for i in range(2, upper_limit + 1):
for j in range(i):
catalan_list[i] += catalan_list[j] * catalan_list[i - j - 1]
return catalan_list
if __name__ == "__main__":
print("\n********* Catalan Numbers Using Dynamic Programming ************\n")
print("\n*** Enter -1 at any time to quit ***")
print("\nEnter the upper limit (≥ 0) for the Catalan number sequence: ", end="")
try:
while True:
N = int(input().strip())
if N < 0:
print("\n********* Goodbye!! ************")
break
else:
print(f"The Catalan numbers from 0 through {N} are:")
print(catalan_numbers(N))
print("Try another upper limit for the sequence: ", end="")
except (NameError, ValueError):
print("\n********* Invalid input, goodbye! ************\n")
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/climbing_stairs.py
================================================
#!/usr/bin/env python3
def climb_stairs(number_of_steps: int) -> int:
"""
LeetCdoe No.70: Climbing Stairs
Distinct ways to climb a number_of_steps staircase where each time you can either
climb 1 or 2 steps.
Args:
number_of_steps: number of steps on the staircase
Returns:
Distinct ways to climb a number_of_steps staircase
Raises:
AssertionError: number_of_steps not positive integer
>>> climb_stairs(3)
3
>>> climb_stairs(1)
1
>>> climb_stairs(-7) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError: number_of_steps needs to be positive integer, your input -7
"""
assert isinstance(number_of_steps, int) and number_of_steps > 0, (
f"number_of_steps needs to be positive integer, your input {number_of_steps}"
)
if number_of_steps == 1:
return 1
previous, current = 1, 1
for _ in range(number_of_steps - 1):
current, previous = current + previous, current
return current
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/combination_sum_iv.py
================================================
"""
Question:
You are given an array of distinct integers and you have to tell how many
different ways of selecting the elements from the array are there such that
the sum of chosen elements is equal to the target number tar.
Example
Input:
* N = 3
* target = 5
* array = [1, 2, 5]
Output:
9
Approach:
The basic idea is to go over recursively to find the way such that the sum
of chosen elements is `target`. For every element, we have two choices
1. Include the element in our set of chosen elements.
2. Don't include the element in our set of chosen elements.
"""
def combination_sum_iv(array: list[int], target: int) -> int:
"""
Function checks the all possible combinations, and returns the count
of possible combination in exponential Time Complexity.
>>> combination_sum_iv([1,2,5], 5)
9
"""
def count_of_possible_combinations(target: int) -> int:
if target < 0:
return 0
if target == 0:
return 1
return sum(count_of_possible_combinations(target - item) for item in array)
return count_of_possible_combinations(target)
def combination_sum_iv_dp_array(array: list[int], target: int) -> int:
"""
Function checks the all possible combinations, and returns the count
of possible combination in O(N^2) Time Complexity as we are using Dynamic
programming array here.
>>> combination_sum_iv_dp_array([1,2,5], 5)
9
"""
def count_of_possible_combinations_with_dp_array(
target: int, dp_array: list[int]
) -> int:
if target < 0:
return 0
if target == 0:
return 1
if dp_array[target] != -1:
return dp_array[target]
answer = sum(
count_of_possible_combinations_with_dp_array(target - item, dp_array)
for item in array
)
dp_array[target] = answer
return answer
dp_array = [-1] * (target + 1)
return count_of_possible_combinations_with_dp_array(target, dp_array)
def combination_sum_iv_bottom_up(n: int, array: list[int], target: int) -> int:
"""
Function checks the all possible combinations with using bottom up approach,
and returns the count of possible combination in O(N^2) Time Complexity
as we are using Dynamic programming array here.
>>> combination_sum_iv_bottom_up(3, [1,2,5], 5)
9
"""
dp_array = [0] * (target + 1)
dp_array[0] = 1
for i in range(1, target + 1):
for j in range(n):
if i - array[j] >= 0:
dp_array[i] += dp_array[i - array[j]]
return dp_array[target]
if __name__ == "__main__":
import doctest
doctest.testmod()
target = 5
array = [1, 2, 5]
print(combination_sum_iv(array, target))
================================================
FILE: dynamic_programming/edit_distance.py
================================================
"""
Author : Turfa Auliarachman
Date : October 12, 2016
This is a pure Python implementation of Dynamic Programming solution to the edit
distance problem.
The problem is :
Given two strings A and B. Find the minimum number of operations to string B such that
A = B. The permitted operations are removal, insertion, and substitution.
"""
class EditDistance:
"""
Use :
solver = EditDistance()
editDistanceResult = solver.solve(firstString, secondString)
"""
def __init__(self):
self.word1 = ""
self.word2 = ""
self.dp = []
def __min_dist_top_down_dp(self, m: int, n: int) -> int:
if m == -1:
return n + 1
elif n == -1:
return m + 1
elif self.dp[m][n] > -1:
return self.dp[m][n]
else:
if self.word1[m] == self.word2[n]:
self.dp[m][n] = self.__min_dist_top_down_dp(m - 1, n - 1)
else:
insert = self.__min_dist_top_down_dp(m, n - 1)
delete = self.__min_dist_top_down_dp(m - 1, n)
replace = self.__min_dist_top_down_dp(m - 1, n - 1)
self.dp[m][n] = 1 + min(insert, delete, replace)
return self.dp[m][n]
def min_dist_top_down(self, word1: str, word2: str) -> int:
"""
>>> EditDistance().min_dist_top_down("intention", "execution")
5
>>> EditDistance().min_dist_top_down("intention", "")
9
>>> EditDistance().min_dist_top_down("", "")
0
"""
self.word1 = word1
self.word2 = word2
self.dp = [[-1 for _ in range(len(word2))] for _ in range(len(word1))]
return self.__min_dist_top_down_dp(len(word1) - 1, len(word2) - 1)
def min_dist_bottom_up(self, word1: str, word2: str) -> int:
"""
>>> EditDistance().min_dist_bottom_up("intention", "execution")
5
>>> EditDistance().min_dist_bottom_up("intention", "")
9
>>> EditDistance().min_dist_bottom_up("", "")
0
"""
self.word1 = word1
self.word2 = word2
m = len(word1)
n = len(word2)
self.dp = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0: # first string is empty
self.dp[i][j] = j
elif j == 0: # second string is empty
self.dp[i][j] = i
elif word1[i - 1] == word2[j - 1]: # last characters are equal
self.dp[i][j] = self.dp[i - 1][j - 1]
else:
insert = self.dp[i][j - 1]
delete = self.dp[i - 1][j]
replace = self.dp[i - 1][j - 1]
self.dp[i][j] = 1 + min(insert, delete, replace)
return self.dp[m][n]
if __name__ == "__main__":
solver = EditDistance()
print("****************** Testing Edit Distance DP Algorithm ******************")
print()
S1 = input("Enter the first string: ").strip()
S2 = input("Enter the second string: ").strip()
print()
print(f"The minimum edit distance is: {solver.min_dist_top_down(S1, S2)}")
print(f"The minimum edit distance is: {solver.min_dist_bottom_up(S1, S2)}")
print()
print("*************** End of Testing Edit Distance DP Algorithm ***************")
================================================
FILE: dynamic_programming/factorial.py
================================================
# Factorial of a number using memoization
from functools import lru_cache
@lru_cache
def factorial(num: int) -> int:
"""
>>> factorial(7)
5040
>>> factorial(-1)
Traceback (most recent call last):
...
ValueError: Number should not be negative.
>>> [factorial(i) for i in range(10)]
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
"""
if num < 0:
raise ValueError("Number should not be negative.")
return 1 if num in (0, 1) else num * factorial(num - 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/fast_fibonacci.py
================================================
#!/usr/bin/env python3
"""
This program calculates the nth Fibonacci number in O(log(n)).
It's possible to calculate F(1_000_000) in less than a second.
"""
from __future__ import annotations
import sys
def fibonacci(n: int) -> int:
"""
return F(n)
>>> [fibonacci(i) for i in range(13)]
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144]
"""
if n < 0:
raise ValueError("Negative arguments are not supported")
return _fib(n)[0]
# returns (F(n), F(n-1))
def _fib(n: int) -> tuple[int, int]:
if n == 0: # (F(0), F(1))
return (0, 1)
# F(2n) = F(n)[2F(n+1) - F(n)]
# F(2n+1) = F(n+1)^2+F(n)^2
a, b = _fib(n // 2)
c = a * (b * 2 - a)
d = a * a + b * b
return (d, c + d) if n % 2 else (c, d)
if __name__ == "__main__":
n = int(sys.argv[1])
print(f"fibonacci({n}) is {fibonacci(n)}")
================================================
FILE: dynamic_programming/fibonacci.py
================================================
"""
This is a pure Python implementation of Dynamic Programming solution to the fibonacci
sequence problem.
"""
class Fibonacci:
def __init__(self) -> None:
self.sequence = [0, 1]
def get(self, index: int) -> list:
"""
Get the Fibonacci number of `index`. If the number does not exist,
calculate all missing numbers leading up to the number of `index`.
>>> Fibonacci().get(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
>>> Fibonacci().get(5)
[0, 1, 1, 2, 3]
"""
if (difference := index - (len(self.sequence) - 2)) >= 1:
for _ in range(difference):
self.sequence.append(self.sequence[-1] + self.sequence[-2])
return self.sequence[:index]
def main() -> None:
print(
"Fibonacci Series Using Dynamic Programming\n",
"Enter the index of the Fibonacci number you want to calculate ",
"in the prompt below. (To exit enter exit or Ctrl-C)\n",
sep="",
)
fibonacci = Fibonacci()
while True:
prompt: str = input(">> ")
if prompt in {"exit", "quit"}:
break
try:
index: int = int(prompt)
except ValueError:
print("Enter a number or 'exit'")
continue
print(fibonacci.get(index))
if __name__ == "__main__":
main()
================================================
FILE: dynamic_programming/fizz_buzz.py
================================================
# https://en.wikipedia.org/wiki/Fizz_buzz#Programming
def fizz_buzz(number: int, iterations: int) -> str:
"""
| Plays FizzBuzz.
| Prints Fizz if number is a multiple of ``3``.
| Prints Buzz if its a multiple of ``5``.
| Prints FizzBuzz if its a multiple of both ``3`` and ``5`` or ``15``.
| Else Prints The Number Itself.
>>> fizz_buzz(1,7)
'1 2 Fizz 4 Buzz Fizz 7 '
>>> fizz_buzz(1,0)
Traceback (most recent call last):
...
ValueError: Iterations must be done more than 0 times to play FizzBuzz
>>> fizz_buzz(-5,5)
Traceback (most recent call last):
...
ValueError: starting number must be
and integer and be more than 0
>>> fizz_buzz(10,-5)
Traceback (most recent call last):
...
ValueError: Iterations must be done more than 0 times to play FizzBuzz
>>> fizz_buzz(1.5,5)
Traceback (most recent call last):
...
ValueError: starting number must be
and integer and be more than 0
>>> fizz_buzz(1,5.5)
Traceback (most recent call last):
...
ValueError: iterations must be defined as integers
"""
if not isinstance(iterations, int):
raise ValueError("iterations must be defined as integers")
if not isinstance(number, int) or not number >= 1:
raise ValueError(
"""starting number must be
and integer and be more than 0"""
)
if not iterations >= 1:
raise ValueError("Iterations must be done more than 0 times to play FizzBuzz")
out = ""
while number <= iterations:
if number % 3 == 0:
out += "Fizz"
if number % 5 == 0:
out += "Buzz"
if 0 not in (number % 3, number % 5):
out += str(number)
# print(out)
number += 1
out += " "
return out
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/floyd_warshall.py
================================================
import math
class Graph:
def __init__(self, n=0): # a graph with Node 0,1,...,N-1
self.n = n
self.w = [
[math.inf for j in range(n)] for i in range(n)
] # adjacency matrix for weight
self.dp = [
[math.inf for j in range(n)] for i in range(n)
] # dp[i][j] stores minimum distance from i to j
def add_edge(self, u, v, w):
"""
Adds a directed edge from node u
to node v with weight w.
>>> g = Graph(3)
>>> g.add_edge(0, 1, 5)
>>> g.dp[0][1]
5
"""
self.dp[u][v] = w
def floyd_warshall(self):
"""
Computes the shortest paths between all pairs of
nodes using the Floyd-Warshall algorithm.
>>> g = Graph(3)
>>> g.add_edge(0, 1, 1)
>>> g.add_edge(1, 2, 2)
>>> g.floyd_warshall()
>>> g.show_min(0, 2)
3
>>> g.show_min(2, 0)
inf
"""
for k in range(self.n):
for i in range(self.n):
for j in range(self.n):
self.dp[i][j] = min(self.dp[i][j], self.dp[i][k] + self.dp[k][j])
def show_min(self, u, v):
"""
Returns the minimum distance from node u to node v.
>>> g = Graph(3)
>>> g.add_edge(0, 1, 3)
>>> g.add_edge(1, 2, 4)
>>> g.floyd_warshall()
>>> g.show_min(0, 2)
7
>>> g.show_min(1, 0)
inf
"""
return self.dp[u][v]
if __name__ == "__main__":
import doctest
doctest.testmod()
# Example usage
graph = Graph(5)
graph.add_edge(0, 2, 9)
graph.add_edge(0, 4, 10)
graph.add_edge(1, 3, 5)
graph.add_edge(2, 3, 7)
graph.add_edge(3, 0, 10)
graph.add_edge(3, 1, 2)
graph.add_edge(3, 2, 1)
graph.add_edge(3, 4, 6)
graph.add_edge(4, 1, 3)
graph.add_edge(4, 2, 4)
graph.add_edge(4, 3, 9)
graph.floyd_warshall()
print(
graph.show_min(1, 4)
) # Should output the minimum distance from node 1 to node 4
print(
graph.show_min(0, 3)
) # Should output the minimum distance from node 0 to node 3
================================================
FILE: dynamic_programming/integer_partition.py
================================================
"""
The number of partitions of a number n into at least k parts equals the number of
partitions into exactly k parts plus the number of partitions into at least k-1 parts.
Subtracting 1 from each part of a partition of n into k parts gives a partition of n-k
into k parts. These two facts together are used for this algorithm.
* https://en.wikipedia.org/wiki/Partition_(number_theory)
* https://en.wikipedia.org/wiki/Partition_function_(number_theory)
"""
def partition(m: int) -> int:
"""
>>> partition(5)
7
>>> partition(7)
15
>>> partition(100)
190569292
>>> partition(1_000)
24061467864032622473692149727991
>>> partition(-7)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> partition(0)
Traceback (most recent call last):
...
IndexError: list assignment index out of range
>>> partition(7.8)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
"""
memo: list[list[int]] = [[0 for _ in range(m)] for _ in range(m + 1)]
for i in range(m + 1):
memo[i][0] = 1
for n in range(m + 1):
for k in range(1, m):
memo[n][k] += memo[n][k - 1]
if n - k > 0:
memo[n][k] += memo[n - k - 1][k]
return memo[m][m - 1]
if __name__ == "__main__":
import sys
if len(sys.argv) == 1:
try:
n = int(input("Enter a number: ").strip())
print(partition(n))
except ValueError:
print("Please enter a number.")
else:
try:
n = int(sys.argv[1])
print(partition(n))
except ValueError:
print("Please pass a number.")
================================================
FILE: dynamic_programming/iterating_through_submasks.py
================================================
"""
Author : Syed Faizan (3rd Year Student IIIT Pune)
github : faizan2700
You are given a bitmask m and you want to efficiently iterate through all of
its submasks. The mask s is submask of m if only bits that were included in
bitmask are set
"""
from __future__ import annotations
def list_of_submasks(mask: int) -> list[int]:
"""
Args:
mask : number which shows mask ( always integer > 0, zero does not have any
submasks )
Returns:
all_submasks : the list of submasks of mask (mask s is called submask of mask
m if only bits that were included in original mask are set
Raises:
AssertionError: mask not positive integer
>>> list_of_submasks(15)
[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
>>> list_of_submasks(13)
[13, 12, 9, 8, 5, 4, 1]
>>> list_of_submasks(-7) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError: mask needs to be positive integer, your input -7
>>> list_of_submasks(0) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
AssertionError: mask needs to be positive integer, your input 0
"""
assert isinstance(mask, int) and mask > 0, (
f"mask needs to be positive integer, your input {mask}"
)
"""
first submask iterated will be mask itself then operation will be performed
to get other submasks till we reach empty submask that is zero ( zero is not
included in final submasks list )
"""
all_submasks = []
submask = mask
while submask:
all_submasks.append(submask)
submask = (submask - 1) & mask
return all_submasks
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/k_means_clustering_tensorflow.py
================================================
from random import shuffle
import tensorflow as tf
from numpy import array
def tf_k_means_cluster(vectors, noofclusters):
"""
K-Means Clustering using TensorFlow.
'vectors' should be a n*k 2-D NumPy array, where n is the number
of vectors of dimensionality k.
'noofclusters' should be an integer.
"""
noofclusters = int(noofclusters)
assert noofclusters < len(vectors)
# Find out the dimensionality
dim = len(vectors[0])
# Will help select random centroids from among the available vectors
vector_indices = list(range(len(vectors)))
shuffle(vector_indices)
# GRAPH OF COMPUTATION
# We initialize a new graph and set it as the default during each run
# of this algorithm. This ensures that as this function is called
# multiple times, the default graph doesn't keep getting crowded with
# unused ops and Variables from previous function calls.
graph = tf.Graph()
with graph.as_default():
# SESSION OF COMPUTATION
sess = tf.Session()
##CONSTRUCTING THE ELEMENTS OF COMPUTATION
##First lets ensure we have a Variable vector for each centroid,
##initialized to one of the vectors from the available data points
centroids = [
tf.Variable(vectors[vector_indices[i]]) for i in range(noofclusters)
]
##These nodes will assign the centroid Variables the appropriate
##values
centroid_value = tf.placeholder("float64", [dim])
cent_assigns = []
for centroid in centroids:
cent_assigns.append(tf.assign(centroid, centroid_value))
##Variables for cluster assignments of individual vectors(initialized
##to 0 at first)
assignments = [tf.Variable(0) for i in range(len(vectors))]
##These nodes will assign an assignment Variable the appropriate
##value
assignment_value = tf.placeholder("int32")
cluster_assigns = []
for assignment in assignments:
cluster_assigns.append(tf.assign(assignment, assignment_value))
##Now lets construct the node that will compute the mean
# The placeholder for the input
mean_input = tf.placeholder("float", [None, dim])
# The Node/op takes the input and computes a mean along the 0th
# dimension, i.e. the list of input vectors
mean_op = tf.reduce_mean(mean_input, 0)
##Node for computing Euclidean distances
# Placeholders for input
v1 = tf.placeholder("float", [dim])
v2 = tf.placeholder("float", [dim])
euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(v1, v2), 2)))
##This node will figure out which cluster to assign a vector to,
##based on Euclidean distances of the vector from the centroids.
# Placeholder for input
centroid_distances = tf.placeholder("float", [noofclusters])
cluster_assignment = tf.argmin(centroid_distances, 0)
##INITIALIZING STATE VARIABLES
##This will help initialization of all Variables defined with respect
##to the graph. The Variable-initializer should be defined after
##all the Variables have been constructed, so that each of them
##will be included in the initialization.
init_op = tf.initialize_all_variables()
# Initialize all variables
sess.run(init_op)
##CLUSTERING ITERATIONS
# Now perform the Expectation-Maximization steps of K-Means clustering
# iterations. To keep things simple, we will only do a set number of
# iterations, instead of using a Stopping Criterion.
noofiterations = 100
for _ in range(noofiterations):
##EXPECTATION STEP
##Based on the centroid locations till last iteration, compute
##the _expected_ centroid assignments.
# Iterate over each vector
for vector_n in range(len(vectors)):
vect = vectors[vector_n]
# Compute Euclidean distance between this vector and each
# centroid. Remember that this list cannot be named
#'centroid_distances', since that is the input to the
# cluster assignment node.
distances = [
sess.run(euclid_dist, feed_dict={v1: vect, v2: sess.run(centroid)})
for centroid in centroids
]
# Now use the cluster assignment node, with the distances
# as the input
assignment = sess.run(
cluster_assignment, feed_dict={centroid_distances: distances}
)
# Now assign the value to the appropriate state variable
sess.run(
cluster_assigns[vector_n], feed_dict={assignment_value: assignment}
)
##MAXIMIZATION STEP
# Based on the expected state computed from the Expectation Step,
# compute the locations of the centroids so as to maximize the
# overall objective of minimizing within-cluster Sum-of-Squares
for cluster_n in range(noofclusters):
# Collect all the vectors assigned to this cluster
assigned_vects = [
vectors[i]
for i in range(len(vectors))
if sess.run(assignments[i]) == cluster_n
]
# Compute new centroid location
new_location = sess.run(
mean_op, feed_dict={mean_input: array(assigned_vects)}
)
# Assign value to appropriate variable
sess.run(
cent_assigns[cluster_n], feed_dict={centroid_value: new_location}
)
# Return centroids and assignments
centroids = sess.run(centroids)
assignments = sess.run(assignments)
return centroids, assignments
================================================
FILE: dynamic_programming/knapsack.py
================================================
"""
Given weights and values of n items, put these items in a knapsack of
capacity W to get the maximum total value in the knapsack.
Note that only the integer weights 0-1 knapsack problem is solvable
using dynamic programming.
"""
def mf_knapsack(i, wt, val, j):
"""
This code involves the concept of memory functions. Here we solve the subproblems
which are needed unlike the below example
F is a 2D array with ``-1`` s filled up
"""
global f # a global dp table for knapsack
if f[i][j] < 0:
if j < wt[i - 1]:
val = mf_knapsack(i - 1, wt, val, j)
else:
val = max(
mf_knapsack(i - 1, wt, val, j),
mf_knapsack(i - 1, wt, val, j - wt[i - 1]) + val[i - 1],
)
f[i][j] = val
return f[i][j]
def knapsack(w, wt, val, n):
dp = [[0] * (w + 1) for _ in range(n + 1)]
for i in range(1, n + 1):
for w_ in range(1, w + 1):
if wt[i - 1] <= w_:
dp[i][w_] = max(val[i - 1] + dp[i - 1][w_ - wt[i - 1]], dp[i - 1][w_])
else:
dp[i][w_] = dp[i - 1][w_]
return dp[n][w_], dp
def knapsack_with_example_solution(w: int, wt: list, val: list):
"""
Solves the integer weights knapsack problem returns one of
the several possible optimal subsets.
Parameters
----------
* `w`: int, the total maximum weight for the given knapsack problem.
* `wt`: list, the vector of weights for all items where ``wt[i]`` is the weight
of the ``i``-th item.
* `val`: list, the vector of values for all items where ``val[i]`` is the value
of the ``i``-th item
Returns
-------
* `optimal_val`: float, the optimal value for the given knapsack problem
* `example_optional_set`: set, the indices of one of the optimal subsets
which gave rise to the optimal value.
Examples
--------
>>> knapsack_with_example_solution(10, [1, 3, 5, 2], [10, 20, 100, 22])
(142, {2, 3, 4})
>>> knapsack_with_example_solution(6, [4, 3, 2, 3], [3, 2, 4, 4])
(8, {3, 4})
>>> knapsack_with_example_solution(6, [4, 3, 2, 3], [3, 2, 4])
Traceback (most recent call last):
...
ValueError: The number of weights must be the same as the number of values.
But got 4 weights and 3 values
"""
if not (isinstance(wt, (list, tuple)) and isinstance(val, (list, tuple))):
raise ValueError(
"Both the weights and values vectors must be either lists or tuples"
)
num_items = len(wt)
if num_items != len(val):
msg = (
"The number of weights must be the same as the number of values.\n"
f"But got {num_items} weights and {len(val)} values"
)
raise ValueError(msg)
for i in range(num_items):
if not isinstance(wt[i], int):
msg = (
"All weights must be integers but got weight of "
f"type {type(wt[i])} at index {i}"
)
raise TypeError(msg)
optimal_val, dp_table = knapsack(w, wt, val, num_items)
example_optional_set: set = set()
_construct_solution(dp_table, wt, num_items, w, example_optional_set)
return optimal_val, example_optional_set
def _construct_solution(dp: list, wt: list, i: int, j: int, optimal_set: set):
"""
Recursively reconstructs one of the optimal subsets given
a filled DP table and the vector of weights
Parameters
----------
* `dp`: list of list, the table of a solved integer weight dynamic programming
problem
* `wt`: list or tuple, the vector of weights of the items
* `i`: int, the index of the item under consideration
* `j`: int, the current possible maximum weight
* `optimal_set`: set, the optimal subset so far. This gets modified by the function.
Returns
-------
``None``
"""
# for the current item i at a maximum weight j to be part of an optimal subset,
# the optimal value at (i, j) must be greater than the optimal value at (i-1, j).
# where i - 1 means considering only the previous items at the given maximum weight
if i > 0 and j > 0:
if dp[i - 1][j] == dp[i][j]:
_construct_solution(dp, wt, i - 1, j, optimal_set)
else:
optimal_set.add(i)
_construct_solution(dp, wt, i - 1, j - wt[i - 1], optimal_set)
if __name__ == "__main__":
"""
Adding test case for knapsack
"""
val = [3, 2, 4, 4]
wt = [4, 3, 2, 3]
n = 4
w = 6
f = [[0] * (w + 1)] + [[0] + [-1] * (w + 1) for _ in range(n + 1)]
optimal_solution, _ = knapsack(w, wt, val, n)
print(optimal_solution)
print(mf_knapsack(n, wt, val, w)) # switched the n and w
# testing the dynamic programming problem with example
# the optimal subset for the above example are items 3 and 4
optimal_solution, optimal_subset = knapsack_with_example_solution(w, wt, val)
assert optimal_solution == 8
assert optimal_subset == {3, 4}
print("optimal_value = ", optimal_solution)
print("An optimal subset corresponding to the optimal value", optimal_subset)
================================================
FILE: dynamic_programming/largest_divisible_subset.py
================================================
from __future__ import annotations
def largest_divisible_subset(items: list[int]) -> list[int]:
"""
Algorithm to find the biggest subset in the given array such that for any 2 elements
x and y in the subset, either x divides y or y divides x.
>>> largest_divisible_subset([1, 16, 7, 8, 4])
[16, 8, 4, 1]
>>> largest_divisible_subset([1, 2, 3])
[2, 1]
>>> largest_divisible_subset([-1, -2, -3])
[-3]
>>> largest_divisible_subset([1, 2, 4, 8])
[8, 4, 2, 1]
>>> largest_divisible_subset((1, 2, 4, 8))
[8, 4, 2, 1]
>>> largest_divisible_subset([1, 1, 1])
[1, 1, 1]
>>> largest_divisible_subset([0, 0, 0])
[0, 0, 0]
>>> largest_divisible_subset([-1, -1, -1])
[-1, -1, -1]
>>> largest_divisible_subset([])
[]
"""
# Sort the array in ascending order as the sequence does not matter we only have to
# pick up a subset.
items = sorted(items)
number_of_items = len(items)
# Initialize memo with 1s and hash with increasing numbers
memo = [1] * number_of_items
hash_array = list(range(number_of_items))
# Iterate through the array
for i, item in enumerate(items):
for prev_index in range(i):
if ((items[prev_index] != 0 and item % items[prev_index]) == 0) and (
(1 + memo[prev_index]) > memo[i]
):
memo[i] = 1 + memo[prev_index]
hash_array[i] = prev_index
ans = -1
last_index = -1
# Find the maximum length and its corresponding index
for i, memo_item in enumerate(memo):
if memo_item > ans:
ans = memo_item
last_index = i
# Reconstruct the divisible subset
if last_index == -1:
return []
result = [items[last_index]]
while hash_array[last_index] != last_index:
last_index = hash_array[last_index]
result.append(items[last_index])
return result
if __name__ == "__main__":
from doctest import testmod
testmod()
items = [1, 16, 7, 8, 4]
print(
f"The longest divisible subset of {items} is {largest_divisible_subset(items)}."
)
================================================
FILE: dynamic_programming/longest_common_subsequence.py
================================================
"""
LCS Problem Statement: Given two sequences, find the length of longest subsequence
present in both of them. A subsequence is a sequence that appears in the same relative
order, but not necessarily continuous.
Example:"abc", "abg" are subsequences of "abcdefgh".
"""
def longest_common_subsequence(x: str, y: str):
"""
Finds the longest common subsequence between two strings. Also returns the
The subsequence found
Parameters
----------
x: str, one of the strings
y: str, the other string
Returns
-------
L[m][n]: int, the length of the longest subsequence. Also equal to len(seq)
Seq: str, the subsequence found
>>> longest_common_subsequence("programming", "gaming")
(6, 'gaming')
>>> longest_common_subsequence("physics", "smartphone")
(2, 'ph')
>>> longest_common_subsequence("computer", "food")
(1, 'o')
>>> longest_common_subsequence("", "abc") # One string is empty
(0, '')
>>> longest_common_subsequence("abc", "") # Other string is empty
(0, '')
>>> longest_common_subsequence("", "") # Both strings are empty
(0, '')
>>> longest_common_subsequence("abc", "def") # No common subsequence
(0, '')
>>> longest_common_subsequence("abc", "abc") # Identical strings
(3, 'abc')
>>> longest_common_subsequence("a", "a") # Single character match
(1, 'a')
>>> longest_common_subsequence("a", "b") # Single character no match
(0, '')
>>> longest_common_subsequence("abcdef", "ace") # Interleaved subsequence
(3, 'ace')
>>> longest_common_subsequence("ABCD", "ACBD") # No repeated characters
(3, 'ABD')
"""
# find the length of strings
assert x is not None
assert y is not None
m = len(x)
n = len(y)
# declaring the array for storing the dp values
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
match = 1 if x[i - 1] == y[j - 1] else 0
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1] + match)
seq = ""
i, j = m, n
while i > 0 and j > 0:
match = 1 if x[i - 1] == y[j - 1] else 0
if dp[i][j] == dp[i - 1][j - 1] + match:
if match == 1:
seq = x[i - 1] + seq
i -= 1
j -= 1
elif dp[i][j] == dp[i - 1][j]:
i -= 1
else:
j -= 1
return dp[m][n], seq
if __name__ == "__main__":
a = "AGGTAB"
b = "GXTXAYB"
expected_ln = 4
expected_subseq = "GTAB"
ln, subseq = longest_common_subsequence(a, b)
print("len =", ln, ", sub-sequence =", subseq)
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/longest_common_substring.py
================================================
"""
Longest Common Substring Problem Statement:
Given two sequences, find the
longest common substring present in both of them. A substring is
necessarily continuous.
Example:
``abcdef`` and ``xabded`` have two longest common substrings, ``ab`` or ``de``.
Therefore, algorithm should return any one of them.
"""
def longest_common_substring(text1: str, text2: str) -> str:
"""
Finds the longest common substring between two strings.
>>> longest_common_substring("", "")
''
>>> longest_common_substring("a","")
''
>>> longest_common_substring("", "a")
''
>>> longest_common_substring("a", "a")
'a'
>>> longest_common_substring("abcdef", "bcd")
'bcd'
>>> longest_common_substring("abcdef", "xabded")
'ab'
>>> longest_common_substring("GeeksforGeeks", "GeeksQuiz")
'Geeks'
>>> longest_common_substring("abcdxyz", "xyzabcd")
'abcd'
>>> longest_common_substring("zxabcdezy", "yzabcdezx")
'abcdez'
>>> longest_common_substring("OldSite:GeeksforGeeks.org", "NewSite:GeeksQuiz.com")
'Site:Geeks'
>>> longest_common_substring(1, 1)
Traceback (most recent call last):
...
ValueError: longest_common_substring() takes two strings for inputs
"""
if not (isinstance(text1, str) and isinstance(text2, str)):
raise ValueError("longest_common_substring() takes two strings for inputs")
if not text1 or not text2:
return ""
text1_length = len(text1)
text2_length = len(text2)
dp = [[0] * (text2_length + 1) for _ in range(text1_length + 1)]
end_pos = 0
max_length = 0
for i in range(1, text1_length + 1):
for j in range(1, text2_length + 1):
if text1[i - 1] == text2[j - 1]:
dp[i][j] = 1 + dp[i - 1][j - 1]
if dp[i][j] > max_length:
end_pos = i
max_length = dp[i][j]
return text1[end_pos - max_length : end_pos]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/longest_increasing_subsequence.py
================================================
"""
Author : Mehdi ALAOUI
This is a pure Python implementation of Dynamic Programming solution to the longest
increasing subsequence of a given sequence.
The problem is:
Given an array, to find the longest and increasing sub-array in that given array and
return it.
Example:
``[10, 22, 9, 33, 21, 50, 41, 60, 80]`` as input will return
``[10, 22, 33, 41, 60, 80]`` as output
"""
from __future__ import annotations
def longest_subsequence(array: list[int]) -> list[int]: # This function is recursive
"""
Some examples
>>> longest_subsequence([10, 22, 9, 33, 21, 50, 41, 60, 80])
[10, 22, 33, 41, 60, 80]
>>> longest_subsequence([4, 8, 7, 5, 1, 12, 2, 3, 9])
[1, 2, 3, 9]
>>> longest_subsequence([28, 26, 12, 23, 35, 39])
[12, 23, 35, 39]
>>> longest_subsequence([9, 8, 7, 6, 5, 7])
[5, 7]
>>> longest_subsequence([1, 1, 1])
[1, 1, 1]
>>> longest_subsequence([])
[]
"""
array_length = len(array)
# If the array contains only one element, we return it (it's the stop condition of
# recursion)
if array_length <= 1:
return array
# Else
pivot = array[0]
is_found = False
i = 1
longest_subseq: list[int] = []
while not is_found and i < array_length:
if array[i] < pivot:
is_found = True
temp_array = array[i:]
temp_array = longest_subsequence(temp_array)
if len(temp_array) > len(longest_subseq):
longest_subseq = temp_array
else:
i += 1
temp_array = [element for element in array[1:] if element >= pivot]
temp_array = [pivot, *longest_subsequence(temp_array)]
if len(temp_array) > len(longest_subseq):
return temp_array
else:
return longest_subseq
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/longest_increasing_subsequence_iterative.py
================================================
"""
Author : Sanjay Muthu
This is a pure Python implementation of Dynamic Programming solution to the longest
increasing subsequence of a given sequence.
The problem is:
Given an array, to find the longest and increasing sub-array in that given array and
return it.
Example:
``[10, 22, 9, 33, 21, 50, 41, 60, 80]`` as input will return
``[10, 22, 33, 50, 60, 80]`` as output
"""
from __future__ import annotations
import copy
def longest_subsequence(array: list[int]) -> list[int]:
"""
Some examples
>>> longest_subsequence([10, 22, 9, 33, 21, 50, 41, 60, 80])
[10, 22, 33, 50, 60, 80]
>>> longest_subsequence([4, 8, 7, 5, 1, 12, 2, 3, 9])
[1, 2, 3, 9]
>>> longest_subsequence([9, 8, 7, 6, 5, 7])
[7, 7]
>>> longest_subsequence([28, 26, 12, 23, 35, 39])
[12, 23, 35, 39]
>>> longest_subsequence([1, 1, 1])
[1, 1, 1]
>>> longest_subsequence([])
[]
"""
n = len(array)
# The longest increasing subsequence ending at array[i]
longest_increasing_subsequence = []
for i in range(n):
longest_increasing_subsequence.append([array[i]])
for i in range(1, n):
for prev in range(i):
# If array[prev] is less than or equal to array[i], then
# longest_increasing_subsequence[prev] + array[i]
# is a valid increasing subsequence
# longest_increasing_subsequence[i] is only set to
# longest_increasing_subsequence[prev] + array[i] if the length is longer.
if array[prev] <= array[i] and len(
longest_increasing_subsequence[prev]
) + 1 > len(longest_increasing_subsequence[i]):
longest_increasing_subsequence[i] = copy.copy(
longest_increasing_subsequence[prev]
)
longest_increasing_subsequence[i].append(array[i])
result: list[int] = []
for i in range(n):
if len(longest_increasing_subsequence[i]) > len(result):
result = longest_increasing_subsequence[i]
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/longest_increasing_subsequence_o_nlogn.py
================================================
#############################
# Author: Aravind Kashyap
# File: lis.py
# comments: This programme outputs the Longest Strictly Increasing Subsequence in
# O(NLogN) Where N is the Number of elements in the list
#############################
from __future__ import annotations
def ceil_index(v, left, right, key):
while right - left > 1:
middle = (left + right) // 2
if v[middle] >= key:
right = middle
else:
left = middle
return right
def longest_increasing_subsequence_length(v: list[int]) -> int:
"""
>>> longest_increasing_subsequence_length([2, 5, 3, 7, 11, 8, 10, 13, 6])
6
>>> longest_increasing_subsequence_length([])
0
>>> longest_increasing_subsequence_length([0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13,
... 3, 11, 7, 15])
6
>>> longest_increasing_subsequence_length([5, 4, 3, 2, 1])
1
"""
if len(v) == 0:
return 0
tail = [0] * len(v)
length = 1
tail[0] = v[0]
for i in range(1, len(v)):
if v[i] < tail[0]:
tail[0] = v[i]
elif v[i] > tail[length - 1]:
tail[length] = v[i]
length += 1
else:
tail[ceil_index(tail, -1, length - 1, v[i])] = v[i]
return length
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/longest_palindromic_subsequence.py
================================================
"""
author: Sanket Kittad
Given a string s, find the longest palindromic subsequence's length in s.
Input: s = "bbbab"
Output: 4
Explanation: One possible longest palindromic subsequence is "bbbb".
Leetcode link: https://leetcode.com/problems/longest-palindromic-subsequence/description/
"""
def longest_palindromic_subsequence(input_string: str) -> int:
"""
This function returns the longest palindromic subsequence in a string
>>> longest_palindromic_subsequence("bbbab")
4
>>> longest_palindromic_subsequence("bbabcbcab")
7
"""
n = len(input_string)
rev = input_string[::-1]
m = len(rev)
dp = [[-1] * (m + 1) for i in range(n + 1)]
for i in range(n + 1):
dp[i][0] = 0
for i in range(m + 1):
dp[0][i] = 0
# create and initialise dp array
for i in range(1, n + 1):
for j in range(1, m + 1):
# If characters at i and j are the same
# include them in the palindromic subsequence
if input_string[i - 1] == rev[j - 1]:
dp[i][j] = 1 + dp[i - 1][j - 1]
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
return dp[n][m]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/matrix_chain_multiplication.py
================================================
"""
| Find the minimum number of multiplications needed to multiply chain of matrices.
| Reference: https://www.geeksforgeeks.org/matrix-chain-multiplication-dp-8/
The algorithm has interesting real-world applications.
Example:
1. Image transformations in Computer Graphics as images are composed of matrix.
2. Solve complex polynomial equations in the field of algebra using least processing
power.
3. Calculate overall impact of macroeconomic decisions as economic equations involve a
number of variables.
4. Self-driving car navigation can be made more accurate as matrix multiplication can
accurately determine position and orientation of obstacles in short time.
Python doctests can be run with the following command::
python -m doctest -v matrix_chain_multiply.py
Given a sequence ``arr[]`` that represents chain of 2D matrices such that the dimension
of the ``i`` th matrix is ``arr[i-1]*arr[i]``.
So suppose ``arr = [40, 20, 30, 10, 30]`` means we have ``4`` matrices of dimensions
``40*20``, ``20*30``, ``30*10`` and ``10*30``.
``matrix_chain_multiply()`` returns an integer denoting minimum number of
multiplications to multiply the chain.
We do not need to perform actual multiplication here.
We only need to decide the order in which to perform the multiplication.
Hints:
1. Number of multiplications (ie cost) to multiply ``2`` matrices
of size ``m*p`` and ``p*n`` is ``m*p*n``.
2. Cost of matrix multiplication is not associative ie ``(M1*M2)*M3 != M1*(M2*M3)``
3. Matrix multiplication is not commutative. So, ``M1*M2`` does not mean ``M2*M1``
can be done.
4. To determine the required order, we can try different combinations.
So, this problem has overlapping sub-problems and can be solved using recursion.
We use Dynamic Programming for optimal time complexity.
Example input:
``arr = [40, 20, 30, 10, 30]``
output:
``26000``
"""
from collections.abc import Iterator
from contextlib import contextmanager
from functools import cache
from sys import maxsize
def matrix_chain_multiply(arr: list[int]) -> int:
"""
Find the minimum number of multiplcations required to multiply the chain of matrices
Args:
`arr`: The input array of integers.
Returns:
Minimum number of multiplications needed to multiply the chain
Examples:
>>> matrix_chain_multiply([1, 2, 3, 4, 3])
30
>>> matrix_chain_multiply([10])
0
>>> matrix_chain_multiply([10, 20])
0
>>> matrix_chain_multiply([19, 2, 19])
722
>>> matrix_chain_multiply(list(range(1, 100)))
323398
>>> # matrix_chain_multiply(list(range(1, 251)))
# 2626798
"""
if len(arr) < 2:
return 0
# initialising 2D dp matrix
n = len(arr)
dp = [[maxsize for j in range(n)] for i in range(n)]
# we want minimum cost of multiplication of matrices
# of dimension (i*k) and (k*j). This cost is arr[i-1]*arr[k]*arr[j].
for i in range(n - 1, 0, -1):
for j in range(i, n):
if i == j:
dp[i][j] = 0
continue
for k in range(i, j):
dp[i][j] = min(
dp[i][j], dp[i][k] + dp[k + 1][j] + arr[i - 1] * arr[k] * arr[j]
)
return dp[1][n - 1]
def matrix_chain_order(dims: list[int]) -> int:
"""
Source: https://en.wikipedia.org/wiki/Matrix_chain_multiplication
The dynamic programming solution is faster than cached the recursive solution and
can handle larger inputs.
>>> matrix_chain_order([1, 2, 3, 4, 3])
30
>>> matrix_chain_order([10])
0
>>> matrix_chain_order([10, 20])
0
>>> matrix_chain_order([19, 2, 19])
722
>>> matrix_chain_order(list(range(1, 100)))
323398
>>> # matrix_chain_order(list(range(1, 251))) # Max before RecursionError is raised
# 2626798
"""
@cache
def a(i: int, j: int) -> int:
return min(
(a(i, k) + dims[i] * dims[k] * dims[j] + a(k, j) for k in range(i + 1, j)),
default=0,
)
return a(0, len(dims) - 1)
@contextmanager
def elapsed_time(msg: str) -> Iterator:
# print(f"Starting: {msg}")
from time import perf_counter_ns
start = perf_counter_ns()
yield
print(f"Finished: {msg} in {(perf_counter_ns() - start) / 10**9} seconds.")
if __name__ == "__main__":
import doctest
doctest.testmod()
with elapsed_time("matrix_chain_order"):
print(f"{matrix_chain_order(list(range(1, 251))) = }")
with elapsed_time("matrix_chain_multiply"):
print(f"{matrix_chain_multiply(list(range(1, 251))) = }")
with elapsed_time("matrix_chain_order"):
print(f"{matrix_chain_order(list(range(1, 251))) = }")
with elapsed_time("matrix_chain_multiply"):
print(f"{matrix_chain_multiply(list(range(1, 251))) = }")
================================================
FILE: dynamic_programming/matrix_chain_order.py
================================================
import sys
"""
Dynamic Programming
Implementation of Matrix Chain Multiplication
Time Complexity: O(n^3)
Space Complexity: O(n^2)
Reference: https://en.wikipedia.org/wiki/Matrix_chain_multiplication
"""
def matrix_chain_order(array: list[int]) -> tuple[list[list[int]], list[list[int]]]:
"""
>>> matrix_chain_order([10, 30, 5])
([[0, 0, 0], [0, 0, 1500], [0, 0, 0]], [[0, 0, 0], [0, 0, 1], [0, 0, 0]])
"""
n = len(array)
matrix = [[0 for _ in range(n)] for _ in range(n)]
sol = [[0 for _ in range(n)] for _ in range(n)]
for chain_length in range(2, n):
for a in range(1, n - chain_length + 1):
b = a + chain_length - 1
matrix[a][b] = sys.maxsize
for c in range(a, b):
cost = (
matrix[a][c] + matrix[c + 1][b] + array[a - 1] * array[c] * array[b]
)
if cost < matrix[a][b]:
matrix[a][b] = cost
sol[a][b] = c
return matrix, sol
def print_optimal_solution(optimal_solution: list[list[int]], i: int, j: int):
"""
Print order of matrix with Ai as Matrix.
"""
if i == j:
print("A" + str(i), end=" ")
else:
print("(", end=" ")
print_optimal_solution(optimal_solution, i, optimal_solution[i][j])
print_optimal_solution(optimal_solution, optimal_solution[i][j] + 1, j)
print(")", end=" ")
def main():
"""
Size of matrix created from array [30, 35, 15, 5, 10, 20, 25] will be:
30*35 35*15 15*5 5*10 10*20 20*25
"""
array = [30, 35, 15, 5, 10, 20, 25]
n = len(array)
matrix, optimal_solution = matrix_chain_order(array)
print("No. of Operation required: " + str(matrix[1][n - 1]))
print_optimal_solution(optimal_solution, 1, n - 1)
if __name__ == "__main__":
main()
================================================
FILE: dynamic_programming/max_non_adjacent_sum.py
================================================
# Video Explanation: https://www.youtube.com/watch?v=6w60Zi1NtL8&feature=emb_logo
from __future__ import annotations
def maximum_non_adjacent_sum(nums: list[int]) -> int:
"""
Find the maximum non-adjacent sum of the integers in the nums input list
>>> maximum_non_adjacent_sum([1, 2, 3])
4
>>> maximum_non_adjacent_sum([1, 5, 3, 7, 2, 2, 6])
18
>>> maximum_non_adjacent_sum([-1, -5, -3, -7, -2, -2, -6])
0
>>> maximum_non_adjacent_sum([499, 500, -3, -7, -2, -2, -6])
500
"""
if not nums:
return 0
max_including = nums[0]
max_excluding = 0
for num in nums[1:]:
max_including, max_excluding = (
max_excluding + num,
max(max_including, max_excluding),
)
return max(max_excluding, max_including)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/max_product_subarray.py
================================================
def max_product_subarray(numbers: list[int]) -> int:
"""
Returns the maximum product that can be obtained by multiplying a
contiguous subarray of the given integer list `numbers`.
Example:
>>> max_product_subarray([2, 3, -2, 4])
6
>>> max_product_subarray((-2, 0, -1))
0
>>> max_product_subarray([2, 3, -2, 4, -1])
48
>>> max_product_subarray([-1])
-1
>>> max_product_subarray([0])
0
>>> max_product_subarray([])
0
>>> max_product_subarray("")
0
>>> max_product_subarray(None)
0
>>> max_product_subarray([2, 3, -2, 4.5, -1])
Traceback (most recent call last):
...
ValueError: numbers must be an iterable of integers
>>> max_product_subarray("ABC")
Traceback (most recent call last):
...
ValueError: numbers must be an iterable of integers
"""
if not numbers:
return 0
if not isinstance(numbers, (list, tuple)) or not all(
isinstance(number, int) for number in numbers
):
raise ValueError("numbers must be an iterable of integers")
max_till_now = min_till_now = max_prod = numbers[0]
for i in range(1, len(numbers)):
# update the maximum and minimum subarray products
number = numbers[i]
if number < 0:
max_till_now, min_till_now = min_till_now, max_till_now
max_till_now = max(number, max_till_now * number)
min_till_now = min(number, min_till_now * number)
# update the maximum product found till now
max_prod = max(max_prod, max_till_now)
return max_prod
================================================
FILE: dynamic_programming/max_subarray_sum.py
================================================
"""
The maximum subarray sum problem is the task of finding the maximum sum that can be
obtained from a contiguous subarray within a given array of numbers. For example, given
the array [-2, 1, -3, 4, -1, 2, 1, -5, 4], the contiguous subarray with the maximum sum
is [4, -1, 2, 1], so the maximum subarray sum is 6.
Kadane's algorithm is a simple dynamic programming algorithm that solves the maximum
subarray sum problem in O(n) time and O(1) space.
Reference: https://en.wikipedia.org/wiki/Maximum_subarray_problem
"""
from collections.abc import Sequence
def max_subarray_sum(
arr: Sequence[float], allow_empty_subarrays: bool = False
) -> float:
"""
Solves the maximum subarray sum problem using Kadane's algorithm.
:param arr: the given array of numbers
:param allow_empty_subarrays: if True, then the algorithm considers empty subarrays
>>> max_subarray_sum([2, 8, 9])
19
>>> max_subarray_sum([0, 0])
0
>>> max_subarray_sum([-1.0, 0.0, 1.0])
1.0
>>> max_subarray_sum([1, 2, 3, 4, -2])
10
>>> max_subarray_sum([-2, 1, -3, 4, -1, 2, 1, -5, 4])
6
>>> max_subarray_sum([2, 3, -9, 8, -2])
8
>>> max_subarray_sum([-2, -3, -1, -4, -6])
-1
>>> max_subarray_sum([-2, -3, -1, -4, -6], allow_empty_subarrays=True)
0
>>> max_subarray_sum([])
0
"""
if not arr:
return 0
max_sum = 0 if allow_empty_subarrays else float("-inf")
curr_sum = 0.0
for num in arr:
curr_sum = max(0 if allow_empty_subarrays else num, curr_sum + num)
max_sum = max(max_sum, curr_sum)
return max_sum
if __name__ == "__main__":
from doctest import testmod
testmod()
nums = [-2, 1, -3, 4, -1, 2, 1, -5, 4]
print(f"{max_subarray_sum(nums) = }")
================================================
FILE: dynamic_programming/min_distance_up_bottom.py
================================================
"""
Author : Alexander Pantyukhin
Date : October 14, 2022
This is an implementation of the up-bottom approach to find edit distance.
The implementation was tested on Leetcode: https://leetcode.com/problems/edit-distance/
Levinstein distance
Dynamic Programming: up -> down.
"""
import functools
def min_distance_up_bottom(word1: str, word2: str) -> int:
"""
>>> min_distance_up_bottom("intention", "execution")
5
>>> min_distance_up_bottom("intention", "")
9
>>> min_distance_up_bottom("", "")
0
>>> min_distance_up_bottom("zooicoarchaeologist", "zoologist")
10
"""
len_word1 = len(word1)
len_word2 = len(word2)
@functools.cache
def min_distance(index1: int, index2: int) -> int:
# if first word index overflows - delete all from the second word
if index1 >= len_word1:
return len_word2 - index2
# if second word index overflows - delete all from the first word
if index2 >= len_word2:
return len_word1 - index1
diff = int(word1[index1] != word2[index2]) # current letters not identical
return min(
1 + min_distance(index1 + 1, index2),
1 + min_distance(index1, index2 + 1),
diff + min_distance(index1 + 1, index2 + 1),
)
return min_distance(0, 0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/minimum_coin_change.py
================================================
"""
You have m types of coins available in infinite quantities
where the value of each coins is given in the array S=[S0,... Sm-1]
Can you determine number of ways of making change for n units using
the given types of coins?
https://www.hackerrank.com/challenges/coin-change/problem
"""
def dp_count(s, n):
"""
>>> dp_count([1, 2, 3], 4)
4
>>> dp_count([1, 2, 3], 7)
8
>>> dp_count([2, 5, 3, 6], 10)
5
>>> dp_count([10], 99)
0
>>> dp_count([4, 5, 6], 0)
1
>>> dp_count([1, 2, 3], -5)
0
"""
if n < 0:
return 0
# table[i] represents the number of ways to get to amount i
table = [0] * (n + 1)
# There is exactly 1 way to get to zero(You pick no coins).
table[0] = 1
# Pick all coins one by one and update table[] values
# after the index greater than or equal to the value of the
# picked coin
for coin_val in s:
for j in range(coin_val, n + 1):
table[j] += table[j - coin_val]
return table[n]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/minimum_cost_path.py
================================================
# Youtube Explanation: https://www.youtube.com/watch?v=lBRtnuxg-gU
from __future__ import annotations
def minimum_cost_path(matrix: list[list[int]]) -> int:
"""
Find the minimum cost traced by all possible paths from top left to bottom right in
a given matrix
>>> minimum_cost_path([[2, 1], [3, 1], [4, 2]])
6
>>> minimum_cost_path([[2, 1, 4], [2, 1, 3], [3, 2, 1]])
7
"""
# preprocessing the first row
for i in range(1, len(matrix[0])):
matrix[0][i] += matrix[0][i - 1]
# preprocessing the first column
for i in range(1, len(matrix)):
matrix[i][0] += matrix[i - 1][0]
# updating the path cost for current position
for i in range(1, len(matrix)):
for j in range(1, len(matrix[0])):
matrix[i][j] += min(matrix[i - 1][j], matrix[i][j - 1])
return matrix[-1][-1]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/minimum_partition.py
================================================
"""
Partition a set into two subsets such that the difference of subset sums is minimum
"""
def find_min(numbers: list[int]) -> int:
"""
>>> find_min([1, 2, 3, 4, 5])
1
>>> find_min([5, 5, 5, 5, 5])
5
>>> find_min([5, 5, 5, 5])
0
>>> find_min([3])
3
>>> find_min([])
0
>>> find_min([1, 2, 3, 4])
0
>>> find_min([0, 0, 0, 0])
0
>>> find_min([-1, -5, 5, 1])
0
>>> find_min([-1, -5, 5, 1])
0
>>> find_min([9, 9, 9, 9, 9])
9
>>> find_min([1, 5, 10, 3])
1
>>> find_min([-1, 0, 1])
0
>>> find_min(range(10, 0, -1))
1
>>> find_min([-1])
Traceback (most recent call last):
--
IndexError: list assignment index out of range
>>> find_min([0, 0, 0, 1, 2, -4])
Traceback (most recent call last):
...
IndexError: list assignment index out of range
>>> find_min([-1, -5, -10, -3])
Traceback (most recent call last):
...
IndexError: list assignment index out of range
"""
n = len(numbers)
s = sum(numbers)
dp = [[False for x in range(s + 1)] for y in range(n + 1)]
for i in range(n + 1):
dp[i][0] = True
for i in range(1, s + 1):
dp[0][i] = False
for i in range(1, n + 1):
for j in range(1, s + 1):
dp[i][j] = dp[i - 1][j]
if numbers[i - 1] <= j:
dp[i][j] = dp[i][j] or dp[i - 1][j - numbers[i - 1]]
for j in range(int(s / 2), -1, -1):
if dp[n][j] is True:
diff = s - 2 * j
break
return diff
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: dynamic_programming/minimum_size_subarray_sum.py
================================================
import sys
def minimum_subarray_sum(target: int, numbers: list[int]) -> int:
"""
Return the length of the shortest contiguous subarray in a list of numbers whose sum
is at least target. Reference: https://stackoverflow.com/questions/8269916
>>> minimum_subarray_sum(7, [2, 3, 1, 2, 4, 3])
2
>>> minimum_subarray_sum(7, [2, 3, -1, 2, 4, -3])
4
>>> minimum_subarray_sum(11, [1, 1, 1, 1, 1, 1, 1, 1])
0
>>> minimum_subarray_sum(10, [1, 2, 3, 4, 5, 6, 7])
2
>>> minimum_subarray_sum(5, [1, 1, 1, 1, 1, 5])
1
>>> minimum_subarray_sum(0, [])
0
>>> minimum_subarray_sum(0, [1, 2, 3])
1
>>> minimum_subarray_sum(10, [10, 20, 30])
1
>>> minimum_subarray_sum(7, [1, 1, 1, 1, 1, 1, 10])
1
>>> minimum_subarray_sum(6, [])
0
>>> minimum_subarray_sum(2, [1, 2, 3])
1
>>> minimum_subarray_sum(-6, [])
0
>>> minimum_subarray_sum(-6, [3, 4, 5])
1
>>> minimum_subarray_sum(8, None)
0
>>> minimum_subarray_sum(2, "ABC")
Traceback (most recent call last):
...
ValueError: numbers must be an iterable of integers
"""
if not numbers:
return 0
if target == 0 and target in numbers:
return 0
if not isinstance(numbers, (list, tuple)) or not all(
isinstance(number, int) for number in numbers
):
raise ValueError("numbers must be an iterable of integers")
left = right = curr_sum = 0
min_len = sys.maxsize
while right < len(numbers):
curr_sum += numbers[right]
while curr_sum >= target and left <= right:
min_len = min(min_len, right - left + 1)
curr_sum -= numbers[left]
left += 1
right += 1
return 0 if min_len == sys.maxsize else min_len
================================================
FILE: dynamic_programming/minimum_squares_to_represent_a_number.py
================================================
import math
import sys
def minimum_squares_to_represent_a_number(number: int) -> int:
"""
Count the number of minimum squares to represent a number
>>> minimum_squares_to_represent_a_number(25)
1
>>> minimum_squares_to_represent_a_number(37)
2
>>> minimum_squares_to_represent_a_number(21)
3
>>> minimum_squares_to_represent_a_number(58)
2
>>> minimum_squares_to_represent_a_number(-1)
Traceback (most recent call last):
...
ValueError: the value of input must not be a negative number
>>> minimum_squares_to_represent_a_number(0)
1
>>> minimum_squares_to_represent_a_number(12.34)
Traceback (most recent call last):
...
ValueError: the value of input must be a natural number
"""
if number != int(number):
raise ValueError("the value of input must be a natural number")
if number < 0:
raise ValueError("the value of input must not be a negative number")
if number == 0:
return 1
answers = [-1] * (number + 1)
answers[0] = 0
for i in range(1, number + 1):
answer = sys.maxsize
root = int(math.sqrt(i))
for j in range(1, root + 1):
current_answer = 1 + answers[i - (j**2)]
answer = min(answer, current_answer)
answers[i] = answer
return answers[number]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/minimum_steps_to_one.py
================================================
"""
YouTube Explanation: https://www.youtube.com/watch?v=f2xi3c1S95M
Given an integer n, return the minimum steps from n to 1
AVAILABLE STEPS:
* Decrement by 1
* if n is divisible by 2, divide by 2
* if n is divisible by 3, divide by 3
Example 1: n = 10
10 -> 9 -> 3 -> 1
Result: 3 steps
Example 2: n = 15
15 -> 5 -> 4 -> 2 -> 1
Result: 4 steps
Example 3: n = 6
6 -> 2 -> 1
Result: 2 step
"""
from __future__ import annotations
__author__ = "Alexander Joslin"
def min_steps_to_one(number: int) -> int:
"""
Minimum steps to 1 implemented using tabulation.
>>> min_steps_to_one(10)
3
>>> min_steps_to_one(15)
4
>>> min_steps_to_one(6)
2
:param number:
:return int:
"""
if number <= 0:
msg = f"n must be greater than 0. Got n = {number}"
raise ValueError(msg)
table = [number + 1] * (number + 1)
# starting position
table[1] = 0
for i in range(1, number):
table[i + 1] = min(table[i + 1], table[i] + 1)
# check if out of bounds
if i * 2 <= number:
table[i * 2] = min(table[i * 2], table[i] + 1)
# check if out of bounds
if i * 3 <= number:
table[i * 3] = min(table[i * 3], table[i] + 1)
return table[number]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/minimum_tickets_cost.py
================================================
"""
Author : Alexander Pantyukhin
Date : November 1, 2022
Task:
Given a list of days when you need to travel. Each day is integer from 1 to 365.
You are able to use tickets for 1 day, 7 days and 30 days.
Each ticket has a cost.
Find the minimum cost you need to travel every day in the given list of days.
Implementation notes:
implementation Dynamic Programming up bottom approach.
Runtime complexity: O(n)
The implementation was tested on the
leetcode: https://leetcode.com/problems/minimum-cost-for-tickets/
Minimum Cost For Tickets
Dynamic Programming: up -> down.
"""
import functools
def mincost_tickets(days: list[int], costs: list[int]) -> int:
"""
>>> mincost_tickets([1, 4, 6, 7, 8, 20], [2, 7, 15])
11
>>> mincost_tickets([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [2, 7, 15])
17
>>> mincost_tickets([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [2, 90, 150])
24
>>> mincost_tickets([2], [2, 90, 150])
2
>>> mincost_tickets([], [2, 90, 150])
0
>>> mincost_tickets('hello', [2, 90, 150])
Traceback (most recent call last):
...
ValueError: The parameter days should be a list of integers
>>> mincost_tickets([], 'world')
Traceback (most recent call last):
...
ValueError: The parameter costs should be a list of three integers
>>> mincost_tickets([0.25, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [2, 90, 150])
Traceback (most recent call last):
...
ValueError: The parameter days should be a list of integers
>>> mincost_tickets([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [2, 0.9, 150])
Traceback (most recent call last):
...
ValueError: The parameter costs should be a list of three integers
>>> mincost_tickets([-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [2, 90, 150])
Traceback (most recent call last):
...
ValueError: All days elements should be greater than 0
>>> mincost_tickets([2, 367], [2, 90, 150])
Traceback (most recent call last):
...
ValueError: All days elements should be less than 366
>>> mincost_tickets([2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [])
Traceback (most recent call last):
...
ValueError: The parameter costs should be a list of three integers
>>> mincost_tickets([], [])
Traceback (most recent call last):
...
ValueError: The parameter costs should be a list of three integers
>>> mincost_tickets([2, 3, 4, 5, 6, 7, 8, 9, 10, 30, 31], [1, 2, 3, 4])
Traceback (most recent call last):
...
ValueError: The parameter costs should be a list of three integers
"""
# Validation
if not isinstance(days, list) or not all(isinstance(day, int) for day in days):
raise ValueError("The parameter days should be a list of integers")
if len(costs) != 3 or not all(isinstance(cost, int) for cost in costs):
raise ValueError("The parameter costs should be a list of three integers")
if len(days) == 0:
return 0
if min(days) <= 0:
raise ValueError("All days elements should be greater than 0")
if max(days) >= 366:
raise ValueError("All days elements should be less than 366")
days_set = set(days)
@functools.cache
def dynamic_programming(index: int) -> int:
if index > 365:
return 0
if index not in days_set:
return dynamic_programming(index + 1)
return min(
costs[0] + dynamic_programming(index + 1),
costs[1] + dynamic_programming(index + 7),
costs[2] + dynamic_programming(index + 30),
)
return dynamic_programming(1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/narcissistic_number.py
================================================
"""
Find all narcissistic numbers up to a given limit using dynamic programming.
A narcissistic number (also known as an Armstrong number or plus perfect number)
is a number that is the sum of its own digits each raised to the power of the
number of digits.
For example, 153 is a narcissistic number because 153 = 1^3 + 5^3 + 3^3.
This implementation uses dynamic programming with memoization to efficiently
compute digit powers and find all narcissistic numbers up to a specified limit.
The DP optimization caches digit^power calculations. When searching through many
numbers, the same digit power calculations occur repeatedly (e.g., 153, 351, 135
all need 1^3, 5^3, 3^3). Memoization avoids these redundant calculations.
Examples of narcissistic numbers:
Single digit: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
Three digit: 153, 370, 371, 407
Four digit: 1634, 8208, 9474
Five digit: 54748, 92727, 93084
Reference: https://en.wikipedia.org/wiki/Narcissistic_number
"""
def find_narcissistic_numbers(limit: int) -> list[int]:
"""
Find all narcissistic numbers up to the given limit using dynamic programming.
This function uses memoization to cache digit power calculations, avoiding
redundant computations across different numbers with the same digit count.
Args:
limit: The upper bound for searching narcissistic numbers (exclusive)
Returns:
list[int]: A sorted list of all narcissistic numbers below the limit
Examples:
>>> find_narcissistic_numbers(10)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> find_narcissistic_numbers(160)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 153]
>>> find_narcissistic_numbers(400)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 153, 370, 371]
>>> find_narcissistic_numbers(1000)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 153, 370, 371, 407]
>>> find_narcissistic_numbers(10000)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 153, 370, 371, 407, 1634, 8208, 9474]
>>> find_narcissistic_numbers(1)
[0]
>>> find_narcissistic_numbers(0)
[]
"""
if limit <= 0:
return []
narcissistic_nums = []
# Memoization: cache[(power, digit)] = digit^power
# This avoids recalculating the same power for different numbers
power_cache: dict[tuple[int, int], int] = {}
def get_digit_power(digit: int, power: int) -> int:
"""Get digit^power using memoization (DP optimization)."""
if (power, digit) not in power_cache:
power_cache[(power, digit)] = digit**power
return power_cache[(power, digit)]
# Check each number up to the limit
for number in range(limit):
# Count digits
num_digits = len(str(number))
# Calculate sum of powered digits using memoized powers
remaining = number
digit_sum = 0
while remaining > 0:
digit = remaining % 10
digit_sum += get_digit_power(digit, num_digits)
remaining //= 10
# Check if narcissistic
if digit_sum == number:
narcissistic_nums.append(number)
return narcissistic_nums
if __name__ == "__main__":
import doctest
doctest.testmod()
# Demonstrate the dynamic programming approach
print("Finding all narcissistic numbers up to 10000:")
print("(Using memoization to cache digit power calculations)")
print()
narcissistic_numbers = find_narcissistic_numbers(10000)
print(f"Found {len(narcissistic_numbers)} narcissistic numbers:")
print(narcissistic_numbers)
================================================
FILE: dynamic_programming/optimal_binary_search_tree.py
================================================
#!/usr/bin/env python3
# This Python program implements an optimal binary search tree (abbreviated BST)
# building dynamic programming algorithm that delivers O(n^2) performance.
#
# The goal of the optimal BST problem is to build a low-cost BST for a
# given set of nodes, each with its own key and frequency. The frequency
# of the node is defined as how many time the node is being searched.
# The search cost of binary search tree is given by this formula:
#
# cost(1, n) = sum{i = 1 to n}((depth(node_i) + 1) * node_i_freq)
#
# where n is number of nodes in the BST. The characteristic of low-cost
# BSTs is having a faster overall search time than other implementations.
# The reason for their fast search time is that the nodes with high
# frequencies will be placed near the root of the tree while the nodes
# with low frequencies will be placed near the leaves of the tree thus
# reducing search time in the most frequent instances.
import sys
from random import randint
class Node:
"""Binary Search Tree Node"""
def __init__(self, key, freq):
self.key = key
self.freq = freq
def __str__(self):
"""
>>> str(Node(1, 2))
'Node(key=1, freq=2)'
"""
return f"Node(key={self.key}, freq={self.freq})"
def print_binary_search_tree(root, key, i, j, parent, is_left):
"""
Recursive function to print a BST from a root table.
>>> key = [3, 8, 9, 10, 17, 21]
>>> root = [[0, 1, 1, 1, 1, 1], [0, 1, 1, 1, 1, 3], [0, 0, 2, 3, 3, 3], \
[0, 0, 0, 3, 3, 3], [0, 0, 0, 0, 4, 5], [0, 0, 0, 0, 0, 5]]
>>> print_binary_search_tree(root, key, 0, 5, -1, False)
8 is the root of the binary search tree.
3 is the left child of key 8.
10 is the right child of key 8.
9 is the left child of key 10.
21 is the right child of key 10.
17 is the left child of key 21.
"""
if i > j or i < 0 or j > len(root) - 1:
return
node = root[i][j]
if parent == -1: # root does not have a parent
print(f"{key[node]} is the root of the binary search tree.")
elif is_left:
print(f"{key[node]} is the left child of key {parent}.")
else:
print(f"{key[node]} is the right child of key {parent}.")
print_binary_search_tree(root, key, i, node - 1, key[node], True)
print_binary_search_tree(root, key, node + 1, j, key[node], False)
def find_optimal_binary_search_tree(nodes):
"""
This function calculates and prints the optimal binary search tree.
The dynamic programming algorithm below runs in O(n^2) time.
Implemented from CLRS (Introduction to Algorithms) book.
https://en.wikipedia.org/wiki/Introduction_to_Algorithms
>>> find_optimal_binary_search_tree([Node(12, 8), Node(10, 34), Node(20, 50), \
Node(42, 3), Node(25, 40), Node(37, 30)])
Binary search tree nodes:
Node(key=10, freq=34)
Node(key=12, freq=8)
Node(key=20, freq=50)
Node(key=25, freq=40)
Node(key=37, freq=30)
Node(key=42, freq=3)
The cost of optimal BST for given tree nodes is 324.
20 is the root of the binary search tree.
10 is the left child of key 20.
12 is the right child of key 10.
25 is the right child of key 20.
37 is the right child of key 25.
42 is the right child of key 37.
"""
# Tree nodes must be sorted first, the code below sorts the keys in
# increasing order and rearrange its frequencies accordingly.
nodes.sort(key=lambda node: node.key)
n = len(nodes)
keys = [nodes[i].key for i in range(n)]
freqs = [nodes[i].freq for i in range(n)]
# This 2D array stores the overall tree cost (which's as minimized as possible);
# for a single key, cost is equal to frequency of the key.
dp = [[freqs[i] if i == j else 0 for j in range(n)] for i in range(n)]
# sum[i][j] stores the sum of key frequencies between i and j inclusive in nodes
# array
total = [[freqs[i] if i == j else 0 for j in range(n)] for i in range(n)]
# stores tree roots that will be used later for constructing binary search tree
root = [[i if i == j else 0 for j in range(n)] for i in range(n)]
for interval_length in range(2, n + 1):
for i in range(n - interval_length + 1):
j = i + interval_length - 1
dp[i][j] = sys.maxsize # set the value to "infinity"
total[i][j] = total[i][j - 1] + freqs[j]
# Apply Knuth's optimization
# Loop without optimization: for r in range(i, j + 1):
for r in range(root[i][j - 1], root[i + 1][j] + 1): # r is a temporal root
left = dp[i][r - 1] if r != i else 0 # optimal cost for left subtree
right = dp[r + 1][j] if r != j else 0 # optimal cost for right subtree
cost = left + total[i][j] + right
if dp[i][j] > cost:
dp[i][j] = cost
root[i][j] = r
print("Binary search tree nodes:")
for node in nodes:
print(node)
print(f"\nThe cost of optimal BST for given tree nodes is {dp[0][n - 1]}.")
print_binary_search_tree(root, keys, 0, n - 1, -1, False)
def main():
# A sample binary search tree
nodes = [Node(i, randint(1, 50)) for i in range(10, 0, -1)]
find_optimal_binary_search_tree(nodes)
if __name__ == "__main__":
main()
================================================
FILE: dynamic_programming/palindrome_partitioning.py
================================================
"""
Given a string s, partition s such that every substring of the
partition is a palindrome.
Find the minimum cuts needed for a palindrome partitioning of s.
Time Complexity: O(n^2)
Space Complexity: O(n^2)
For other explanations refer to: https://www.youtube.com/watch?v=_H8V5hJUGd0
"""
def find_minimum_partitions(string: str) -> int:
"""
Returns the minimum cuts needed for a palindrome partitioning of string
>>> find_minimum_partitions("aab")
1
>>> find_minimum_partitions("aaa")
0
>>> find_minimum_partitions("ababbbabbababa")
3
"""
length = len(string)
cut = [0] * length
is_palindromic = [[False for i in range(length)] for j in range(length)]
for i, c in enumerate(string):
mincut = i
for j in range(i + 1):
if c == string[j] and (i - j < 2 or is_palindromic[j + 1][i - 1]):
is_palindromic[j][i] = True
mincut = min(mincut, 0 if j == 0 else (cut[j - 1] + 1))
cut[i] = mincut
return cut[length - 1]
if __name__ == "__main__":
s = input("Enter the string: ").strip()
ans = find_minimum_partitions(s)
print(f"Minimum number of partitions required for the '{s}' is {ans}")
================================================
FILE: dynamic_programming/range_sum_query.py
================================================
"""
Author: Sanjay Muthu
This is an implementation of the Dynamic Programming solution to the Range Sum Query.
The problem statement is:
Given an array and q queries,
each query stating you to find the sum of elements from l to r (inclusive)
Example:
arr = [1, 4, 6, 2, 61, 12]
queries = 3
l_1 = 2, r_1 = 5
l_2 = 1, r_2 = 5
l_3 = 3, r_3 = 4
as input will return
[81, 85, 63]
as output
0-indexing:
NOTE: 0-indexing means the indexing of the array starts from 0
Example: a = [1, 2, 3, 4, 5, 6]
Here, the 0th index of a is 1,
the 1st index of a is 2,
and so forth
Time Complexity: O(N + Q)
* O(N) pre-calculation time to calculate the prefix sum array
* and O(1) time per each query = O(1 * Q) = O(Q) time
Space Complexity: O(N)
* O(N) to store the prefix sum
Algorithm:
So, first we calculate the prefix sum (dp) of the array.
The prefix sum of the index i is the sum of all elements indexed
from 0 to i (inclusive).
The prefix sum of the index i is the prefix sum of index (i - 1) + the current element.
So, the state of the dp is dp[i] = dp[i - 1] + a[i].
After we calculate the prefix sum,
for each query [l, r]
the answer is dp[r] - dp[l - 1] (we need to be careful because l might be 0).
For example take this array:
[4, 2, 1, 6, 3]
The prefix sum calculated for this array would be:
[4, 4 + 2, 4 + 2 + 1, 4 + 2 + 1 + 6, 4 + 2 + 1 + 6 + 3]
==> [4, 6, 7, 13, 16]
If the query was l = 3, r = 4,
the answer would be 6 + 3 = 9 but this would require O(r - l + 1) time ≈ O(N) time
If we use prefix sums we can find it in O(1) by using the formula
prefix[r] - prefix[l - 1].
This formula works because prefix[r] is the sum of elements from [0, r]
and prefix[l - 1] is the sum of elements from [0, l - 1],
so if we do prefix[r] - prefix[l - 1] it will be
[0, r] - [0, l - 1] = [0, l - 1] + [l, r] - [0, l - 1] = [l, r]
"""
def prefix_sum(array: list[int], queries: list[tuple[int, int]]) -> list[int]:
"""
>>> prefix_sum([1, 4, 6, 2, 61, 12], [(2, 5), (1, 5), (3, 4)])
[81, 85, 63]
>>> prefix_sum([4, 2, 1, 6, 3], [(3, 4), (1, 3), (0, 2)])
[9, 9, 7]
"""
# The prefix sum array
dp = [0] * len(array)
dp[0] = array[0]
for i in range(1, len(array)):
dp[i] = dp[i - 1] + array[i]
# See Algorithm section (Line 44)
result = []
for query in queries:
left, right = query
res = dp[right]
if left > 0:
res -= dp[left - 1]
result.append(res)
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/regex_match.py
================================================
"""
Regex matching check if a text matches pattern or not.
Pattern:
1. ``.`` Matches any single character.
2. ``*`` Matches zero or more of the preceding element.
More info:
https://medium.com/trick-the-interviwer/regular-expression-matching-9972eb74c03
"""
def recursive_match(text: str, pattern: str) -> bool:
r"""
Recursive matching algorithm.
| Time complexity: O(2^(\|text\| + \|pattern\|))
| Space complexity: Recursion depth is O(\|text\| + \|pattern\|).
:param text: Text to match.
:param pattern: Pattern to match.
:return: ``True`` if `text` matches `pattern`, ``False`` otherwise.
>>> recursive_match('abc', 'a.c')
True
>>> recursive_match('abc', 'af*.c')
True
>>> recursive_match('abc', 'a.c*')
True
>>> recursive_match('abc', 'a.c*d')
False
>>> recursive_match('aa', '.*')
True
"""
if not pattern:
return not text
if not text:
return pattern[-1] == "*" and recursive_match(text, pattern[:-2])
if text[-1] == pattern[-1] or pattern[-1] == ".":
return recursive_match(text[:-1], pattern[:-1])
if pattern[-1] == "*":
return recursive_match(text[:-1], pattern) or recursive_match(
text, pattern[:-2]
)
return False
def dp_match(text: str, pattern: str) -> bool:
r"""
Dynamic programming matching algorithm.
| Time complexity: O(\|text\| * \|pattern\|)
| Space complexity: O(\|text\| * \|pattern\|)
:param text: Text to match.
:param pattern: Pattern to match.
:return: ``True`` if `text` matches `pattern`, ``False`` otherwise.
>>> dp_match('abc', 'a.c')
True
>>> dp_match('abc', 'af*.c')
True
>>> dp_match('abc', 'a.c*')
True
>>> dp_match('abc', 'a.c*d')
False
>>> dp_match('aa', '.*')
True
"""
m = len(text)
n = len(pattern)
dp = [[False for _ in range(n + 1)] for _ in range(m + 1)]
dp[0][0] = True
for j in range(1, n + 1):
dp[0][j] = pattern[j - 1] == "*" and dp[0][j - 2]
for i in range(1, m + 1):
for j in range(1, n + 1):
if pattern[j - 1] in {".", text[i - 1]}:
dp[i][j] = dp[i - 1][j - 1]
elif pattern[j - 1] == "*":
dp[i][j] = dp[i][j - 2]
if pattern[j - 2] in {".", text[i - 1]}:
dp[i][j] |= dp[i - 1][j]
else:
dp[i][j] = False
return dp[m][n]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/rod_cutting.py
================================================
"""
This module provides two implementations for the rod-cutting problem:
1. A naive recursive implementation which has an exponential runtime
2. Two dynamic programming implementations which have quadratic runtime
The rod-cutting problem is the problem of finding the maximum possible revenue
obtainable from a rod of length ``n`` given a list of prices for each integral piece
of the rod. The maximum revenue can thus be obtained by cutting the rod and selling the
pieces separately or not cutting it at all if the price of it is the maximum obtainable.
"""
def naive_cut_rod_recursive(n: int, prices: list):
"""
Solves the rod-cutting problem via naively without using the benefit of dynamic
programming. The results is the same sub-problems are solved several times
leading to an exponential runtime
Runtime: O(2^n)
Arguments
---------
* `n`: int, the length of the rod
* `prices`: list, the prices for each piece of rod. ``p[i-i]`` is the
price for a rod of length ``i``
Returns
-------
The maximum revenue obtainable for a rod of length `n` given the list of prices
for each piece.
Examples
--------
>>> naive_cut_rod_recursive(4, [1, 5, 8, 9])
10
>>> naive_cut_rod_recursive(10, [1, 5, 8, 9, 10, 17, 17, 20, 24, 30])
30
"""
_enforce_args(n, prices)
if n == 0:
return 0
max_revue = float("-inf")
for i in range(1, n + 1):
max_revue = max(
max_revue, prices[i - 1] + naive_cut_rod_recursive(n - i, prices)
)
return max_revue
def top_down_cut_rod(n: int, prices: list):
"""
Constructs a top-down dynamic programming solution for the rod-cutting
problem via memoization. This function serves as a wrapper for
``_top_down_cut_rod_recursive``
Runtime: O(n^2)
Arguments
---------
* `n`: int, the length of the rod
* `prices`: list, the prices for each piece of rod. ``p[i-i]`` is the
price for a rod of length ``i``
.. note::
For convenience and because Python's lists using ``0``-indexing, ``length(max_rev)
= n + 1``, to accommodate for the revenue obtainable from a rod of length ``0``.
Returns
-------
The maximum revenue obtainable for a rod of length `n` given the list of prices
for each piece.
Examples
--------
>>> top_down_cut_rod(4, [1, 5, 8, 9])
10
>>> top_down_cut_rod(10, [1, 5, 8, 9, 10, 17, 17, 20, 24, 30])
30
"""
_enforce_args(n, prices)
max_rev = [float("-inf") for _ in range(n + 1)]
return _top_down_cut_rod_recursive(n, prices, max_rev)
def _top_down_cut_rod_recursive(n: int, prices: list, max_rev: list):
"""
Constructs a top-down dynamic programming solution for the rod-cutting problem
via memoization.
Runtime: O(n^2)
Arguments
---------
* `n`: int, the length of the rod
* `prices`: list, the prices for each piece of rod. ``p[i-i]`` is the
price for a rod of length ``i``
* `max_rev`: list, the computed maximum revenue for a piece of rod.
``max_rev[i]`` is the maximum revenue obtainable for a rod of length ``i``
Returns
-------
The maximum revenue obtainable for a rod of length `n` given the list of prices
for each piece.
"""
if max_rev[n] >= 0:
return max_rev[n]
elif n == 0:
return 0
else:
max_revenue = float("-inf")
for i in range(1, n + 1):
max_revenue = max(
max_revenue,
prices[i - 1] + _top_down_cut_rod_recursive(n - i, prices, max_rev),
)
max_rev[n] = max_revenue
return max_rev[n]
def bottom_up_cut_rod(n: int, prices: list):
"""
Constructs a bottom-up dynamic programming solution for the rod-cutting problem
Runtime: O(n^2)
Arguments
---------
* `n`: int, the maximum length of the rod.
* `prices`: list, the prices for each piece of rod. ``p[i-i]`` is the
price for a rod of length ``i``
Returns
-------
The maximum revenue obtainable from cutting a rod of length `n` given
the prices for each piece of rod p.
Examples
--------
>>> bottom_up_cut_rod(4, [1, 5, 8, 9])
10
>>> bottom_up_cut_rod(10, [1, 5, 8, 9, 10, 17, 17, 20, 24, 30])
30
"""
_enforce_args(n, prices)
# length(max_rev) = n + 1, to accommodate for the revenue obtainable from a rod of
# length 0.
max_rev = [float("-inf") for _ in range(n + 1)]
max_rev[0] = 0
for i in range(1, n + 1):
max_revenue_i = max_rev[i]
for j in range(1, i + 1):
max_revenue_i = max(max_revenue_i, prices[j - 1] + max_rev[i - j])
max_rev[i] = max_revenue_i
return max_rev[n]
def _enforce_args(n: int, prices: list):
"""
Basic checks on the arguments to the rod-cutting algorithms
* `n`: int, the length of the rod
* `prices`: list, the price list for each piece of rod.
Throws ``ValueError``:
if `n` is negative or there are fewer items in the price list than the length of
the rod
"""
if n < 0:
msg = f"n must be greater than or equal to 0. Got n = {n}"
raise ValueError(msg)
if n > len(prices):
msg = (
"Each integral piece of rod must have a corresponding price. "
f"Got n = {n} but length of prices = {len(prices)}"
)
raise ValueError(msg)
def main():
prices = [6, 10, 12, 15, 20, 23]
n = len(prices)
# the best revenue comes from cutting the rod into 6 pieces, each
# of length 1 resulting in a revenue of 6 * 6 = 36.
expected_max_revenue = 36
max_rev_top_down = top_down_cut_rod(n, prices)
max_rev_bottom_up = bottom_up_cut_rod(n, prices)
max_rev_naive = naive_cut_rod_recursive(n, prices)
assert expected_max_revenue == max_rev_top_down
assert max_rev_top_down == max_rev_bottom_up
assert max_rev_bottom_up == max_rev_naive
if __name__ == "__main__":
main()
================================================
FILE: dynamic_programming/smith_waterman.py
================================================
"""
https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence
alignment. It is particularly useful for finding similarities between two sequences,
such as DNA or protein sequences. In this implementation, gaps are penalized
linearly, meaning that the score is reduced by a fixed amount for each gap introduced
in the alignment. However, it's important to note that the Smith-Waterman algorithm
supports other gap penalty methods as well.
"""
def score_function(
source_char: str,
target_char: str,
match: int = 1,
mismatch: int = -1,
gap: int = -2,
) -> int:
"""
Calculate the score for a character pair based on whether they match or mismatch.
Returns 1 if the characters match, -1 if they mismatch, and -2 if either of the
characters is a gap.
>>> score_function('A', 'A')
1
>>> score_function('A', 'C')
-1
>>> score_function('-', 'A')
-2
>>> score_function('A', '-')
-2
>>> score_function('-', '-')
-2
"""
if "-" in (source_char, target_char):
return gap
return match if source_char == target_char else mismatch
def smith_waterman(
query: str,
subject: str,
match: int = 1,
mismatch: int = -1,
gap: int = -2,
) -> list[list[int]]:
"""
Perform the Smith-Waterman local sequence alignment algorithm.
Returns a 2D list representing the score matrix. Each value in the matrix
corresponds to the score of the best local alignment ending at that point.
>>> smith_waterman('ACAC', 'CA')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('acac', 'ca')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('ACAC', 'ca')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('acac', 'CA')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('ACAC', '')
[[0], [0], [0], [0], [0]]
>>> smith_waterman('', 'CA')
[[0, 0, 0]]
>>> smith_waterman('ACAC', 'CA')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('acac', 'ca')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('ACAC', 'ca')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('acac', 'CA')
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
>>> smith_waterman('ACAC', '')
[[0], [0], [0], [0], [0]]
>>> smith_waterman('', 'CA')
[[0, 0, 0]]
>>> smith_waterman('AGT', 'AGT')
[[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3]]
>>> smith_waterman('AGT', 'GTA')
[[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 2, 0]]
>>> smith_waterman('AGT', 'GTC')
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0]]
>>> smith_waterman('AGT', 'G')
[[0, 0], [0, 0], [0, 1], [0, 0]]
>>> smith_waterman('G', 'AGT')
[[0, 0, 0, 0], [0, 0, 1, 0]]
>>> smith_waterman('AGT', 'AGTCT')
[[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0], [0, 0, 0, 3, 1, 1]]
>>> smith_waterman('AGTCT', 'AGT')
[[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 0, 1], [0, 0, 0, 1]]
>>> smith_waterman('AGTCT', 'GTC')
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 1, 1]]
"""
# make both query and subject uppercase
query = query.upper()
subject = subject.upper()
# Initialize score matrix
m = len(query)
n = len(subject)
score = [[0] * (n + 1) for _ in range(m + 1)]
kwargs = {"match": match, "mismatch": mismatch, "gap": gap}
for i in range(1, m + 1):
for j in range(1, n + 1):
# Calculate scores for each cell
match = score[i - 1][j - 1] + score_function(
query[i - 1], subject[j - 1], **kwargs
)
delete = score[i - 1][j] + gap
insert = score[i][j - 1] + gap
# Take maximum score
score[i][j] = max(0, match, delete, insert)
return score
def traceback(score: list[list[int]], query: str, subject: str) -> str:
r"""
Perform traceback to find the optimal local alignment.
Starts from the highest scoring cell in the matrix and traces back recursively
until a 0 score is found. Returns the alignment strings.
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA')
'CA\nCA'
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'ca')
'CA\nCA'
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'ca')
'CA\nCA'
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'CA')
'CA\nCA'
>>> traceback([[0, 0, 0]], 'ACAC', '')
''
"""
# make both query and subject uppercase
query = query.upper()
subject = subject.upper()
# find the indices of the maximum value in the score matrix
max_value = float("-inf")
i_max = j_max = 0
for i, row in enumerate(score):
for j, value in enumerate(row):
if value > max_value:
max_value = value
i_max, j_max = i, j
# Traceback logic to find optimal alignment
i = i_max
j = j_max
align1 = ""
align2 = ""
gap = score_function("-", "-")
# guard against empty query or subject
if i == 0 or j == 0:
return ""
while i > 0 and j > 0:
if score[i][j] == score[i - 1][j - 1] + score_function(
query[i - 1], subject[j - 1]
):
# optimal path is a diagonal take both letters
align1 = query[i - 1] + align1
align2 = subject[j - 1] + align2
i -= 1
j -= 1
elif score[i][j] == score[i - 1][j] + gap:
# optimal path is a vertical
align1 = query[i - 1] + align1
align2 = f"-{align2}"
i -= 1
else:
# optimal path is a horizontal
align1 = f"-{align1}"
align2 = subject[j - 1] + align2
j -= 1
return f"{align1}\n{align2}"
if __name__ == "__main__":
query = "HEAGAWGHEE"
subject = "PAWHEAE"
score = smith_waterman(query, subject, match=1, mismatch=-1, gap=-2)
print(traceback(score, query, subject))
================================================
FILE: dynamic_programming/subset_generation.py
================================================
def subset_combinations(elements: list[int], n: int) -> list:
"""
Compute n-element combinations from a given list using dynamic programming.
Args:
* `elements`: The list of elements from which combinations will be generated.
* `n`: The number of elements in each combination.
Returns:
A list of tuples, each representing a combination of `n` elements.
>>> subset_combinations(elements=[10, 20, 30, 40], n=2)
[(10, 20), (10, 30), (10, 40), (20, 30), (20, 40), (30, 40)]
>>> subset_combinations(elements=[1, 2, 3], n=1)
[(1,), (2,), (3,)]
>>> subset_combinations(elements=[1, 2, 3], n=3)
[(1, 2, 3)]
>>> subset_combinations(elements=[42], n=1)
[(42,)]
>>> subset_combinations(elements=[6, 7, 8, 9], n=4)
[(6, 7, 8, 9)]
>>> subset_combinations(elements=[10, 20, 30, 40, 50], n=0)
[()]
>>> subset_combinations(elements=[1, 2, 3, 4], n=2)
[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]
>>> subset_combinations(elements=[1, 'apple', 3.14], n=2)
[(1, 'apple'), (1, 3.14), ('apple', 3.14)]
>>> subset_combinations(elements=['single'], n=0)
[()]
>>> subset_combinations(elements=[], n=9)
[]
>>> from itertools import combinations
>>> all(subset_combinations(items, n) == list(combinations(items, n))
... for items, n in (
... ([10, 20, 30, 40], 2), ([1, 2, 3], 1), ([1, 2, 3], 3), ([42], 1),
... ([6, 7, 8, 9], 4), ([10, 20, 30, 40, 50], 1), ([1, 2, 3, 4], 2),
... ([1, 'apple', 3.14], 2), (['single'], 0), ([], 9)))
True
"""
r = len(elements)
if n > r:
return []
dp: list[list[tuple]] = [[] for _ in range(r + 1)]
dp[0].append(())
for i in range(1, r + 1):
for j in range(i, 0, -1):
for prev_combination in dp[j - 1]:
dp[j].append((*prev_combination, elements[i - 1]))
try:
return sorted(dp[n])
except TypeError:
return dp[n]
if __name__ == "__main__":
from doctest import testmod
testmod()
print(f"{subset_combinations(elements=[10, 20, 30, 40], n=2) = }")
================================================
FILE: dynamic_programming/sum_of_subset.py
================================================
def is_sum_subset(arr: list[int], required_sum: int) -> bool:
"""
>>> is_sum_subset([2, 4, 6, 8], 5)
False
>>> is_sum_subset([2, 4, 6, 8], 14)
True
"""
# a subset value says 1 if that subset sum can be formed else 0
# initially no subsets can be formed hence False/0
arr_len = len(arr)
subset = [[False] * (required_sum + 1) for _ in range(arr_len + 1)]
# for each arr value, a sum of zero(0) can be formed by not taking any element
# hence True/1
for i in range(arr_len + 1):
subset[i][0] = True
# sum is not zero and set is empty then false
for i in range(1, required_sum + 1):
subset[0][i] = False
for i in range(1, arr_len + 1):
for j in range(1, required_sum + 1):
if arr[i - 1] > j:
subset[i][j] = subset[i - 1][j]
if arr[i - 1] <= j:
subset[i][j] = subset[i - 1][j] or subset[i - 1][j - arr[i - 1]]
return subset[arr_len][required_sum]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/trapped_water.py
================================================
"""
Given an array of non-negative integers representing an elevation map where the width
of each bar is 1, this program calculates how much rainwater can be trapped.
Example - height = (0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1)
Output: 6
This problem can be solved using the concept of "DYNAMIC PROGRAMMING".
We calculate the maximum height of bars on the left and right of every bar in array.
Then iterate over the width of structure and at each index.
The amount of water that will be stored is equal to minimum of maximum height of bars
on both sides minus height of bar at current position.
"""
def trapped_rainwater(heights: tuple[int, ...]) -> int:
"""
The trapped_rainwater function calculates the total amount of rainwater that can be
trapped given an array of bar heights.
It uses a dynamic programming approach, determining the maximum height of bars on
both sides for each bar, and then computing the trapped water above each bar.
The function returns the total trapped water.
>>> trapped_rainwater((0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1))
6
>>> trapped_rainwater((7, 1, 5, 3, 6, 4))
9
>>> trapped_rainwater((7, 1, 5, 3, 6, -1))
Traceback (most recent call last):
...
ValueError: No height can be negative
"""
if not heights:
return 0
if any(h < 0 for h in heights):
raise ValueError("No height can be negative")
length = len(heights)
left_max = [0] * length
left_max[0] = heights[0]
for i, height in enumerate(heights[1:], start=1):
left_max[i] = max(height, left_max[i - 1])
right_max = [0] * length
right_max[-1] = heights[-1]
for i in range(length - 2, -1, -1):
right_max[i] = max(heights[i], right_max[i + 1])
return sum(
min(left, right) - height
for left, right, height in zip(left_max, right_max, heights)
)
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{trapped_rainwater((0, 1, 0, 2, 1, 0, 1, 3, 2, 1, 2, 1)) = }")
print(f"{trapped_rainwater((7, 1, 5, 3, 6, 4)) = }")
================================================
FILE: dynamic_programming/tribonacci.py
================================================
# Tribonacci sequence using Dynamic Programming
def tribonacci(num: int) -> list[int]:
"""
Given a number, return first n Tribonacci Numbers.
>>> tribonacci(5)
[0, 0, 1, 1, 2]
>>> tribonacci(8)
[0, 0, 1, 1, 2, 4, 7, 13]
"""
dp = [0] * num
dp[2] = 1
for i in range(3, num):
dp[i] = dp[i - 1] + dp[i - 2] + dp[i - 3]
return dp
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: dynamic_programming/viterbi.py
================================================
from typing import Any
def viterbi(
observations_space: list,
states_space: list,
initial_probabilities: dict,
transition_probabilities: dict,
emission_probabilities: dict,
) -> list:
"""
Viterbi Algorithm, to find the most likely path of
states from the start and the expected output.
https://en.wikipedia.org/wiki/Viterbi_algorithm
Wikipedia example
>>> observations = ["normal", "cold", "dizzy"]
>>> states = ["Healthy", "Fever"]
>>> start_p = {"Healthy": 0.6, "Fever": 0.4}
>>> trans_p = {
... "Healthy": {"Healthy": 0.7, "Fever": 0.3},
... "Fever": {"Healthy": 0.4, "Fever": 0.6},
... }
>>> emit_p = {
... "Healthy": {"normal": 0.5, "cold": 0.4, "dizzy": 0.1},
... "Fever": {"normal": 0.1, "cold": 0.3, "dizzy": 0.6},
... }
>>> viterbi(observations, states, start_p, trans_p, emit_p)
['Healthy', 'Healthy', 'Fever']
>>> viterbi((), states, start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> viterbi(observations, (), start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> viterbi(observations, states, {}, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> viterbi(observations, states, start_p, {}, emit_p)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> viterbi(observations, states, start_p, trans_p, {})
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> viterbi("invalid", states, start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: observations_space must be a list
>>> viterbi(["valid", 123], states, start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: observations_space must be a list of strings
>>> viterbi(observations, "invalid", start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: states_space must be a list
>>> viterbi(observations, ["valid", 123], start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: states_space must be a list of strings
>>> viterbi(observations, states, "invalid", trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: initial_probabilities must be a dict
>>> viterbi(observations, states, {2:2}, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: initial_probabilities all keys must be strings
>>> viterbi(observations, states, {"a":2}, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: initial_probabilities all values must be float
>>> viterbi(observations, states, start_p, "invalid", emit_p)
Traceback (most recent call last):
...
ValueError: transition_probabilities must be a dict
>>> viterbi(observations, states, start_p, {"a":2}, emit_p)
Traceback (most recent call last):
...
ValueError: transition_probabilities all values must be dict
>>> viterbi(observations, states, start_p, {2:{2:2}}, emit_p)
Traceback (most recent call last):
...
ValueError: transition_probabilities all keys must be strings
>>> viterbi(observations, states, start_p, {"a":{2:2}}, emit_p)
Traceback (most recent call last):
...
ValueError: transition_probabilities all keys must be strings
>>> viterbi(observations, states, start_p, {"a":{"b":2}}, emit_p)
Traceback (most recent call last):
...
ValueError: transition_probabilities nested dictionary all values must be float
>>> viterbi(observations, states, start_p, trans_p, "invalid")
Traceback (most recent call last):
...
ValueError: emission_probabilities must be a dict
>>> viterbi(observations, states, start_p, trans_p, None)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
"""
_validation(
observations_space,
states_space,
initial_probabilities,
transition_probabilities,
emission_probabilities,
)
# Creates data structures and fill initial step
probabilities: dict = {}
pointers: dict = {}
for state in states_space:
observation = observations_space[0]
probabilities[(state, observation)] = (
initial_probabilities[state] * emission_probabilities[state][observation]
)
pointers[(state, observation)] = None
# Fills the data structure with the probabilities of
# different transitions and pointers to previous states
for o in range(1, len(observations_space)):
observation = observations_space[o]
prior_observation = observations_space[o - 1]
for state in states_space:
# Calculates the argmax for probability function
arg_max = ""
max_probability = -1
for k_state in states_space:
probability = (
probabilities[(k_state, prior_observation)]
* transition_probabilities[k_state][state]
* emission_probabilities[state][observation]
)
if probability > max_probability:
max_probability = probability
arg_max = k_state
# Update probabilities and pointers dicts
probabilities[(state, observation)] = (
probabilities[(arg_max, prior_observation)]
* transition_probabilities[arg_max][state]
* emission_probabilities[state][observation]
)
pointers[(state, observation)] = arg_max
# The final observation
final_observation = observations_space[len(observations_space) - 1]
# argmax for given final observation
arg_max = ""
max_probability = -1
for k_state in states_space:
probability = probabilities[(k_state, final_observation)]
if probability > max_probability:
max_probability = probability
arg_max = k_state
last_state = arg_max
# Process pointers backwards
previous = last_state
result = []
for o in range(len(observations_space) - 1, -1, -1):
result.append(previous)
previous = pointers[previous, observations_space[o]]
result.reverse()
return result
def _validation(
observations_space: Any,
states_space: Any,
initial_probabilities: Any,
transition_probabilities: Any,
emission_probabilities: Any,
) -> None:
"""
>>> observations = ["normal", "cold", "dizzy"]
>>> states = ["Healthy", "Fever"]
>>> start_p = {"Healthy": 0.6, "Fever": 0.4}
>>> trans_p = {
... "Healthy": {"Healthy": 0.7, "Fever": 0.3},
... "Fever": {"Healthy": 0.4, "Fever": 0.6},
... }
>>> emit_p = {
... "Healthy": {"normal": 0.5, "cold": 0.4, "dizzy": 0.1},
... "Fever": {"normal": 0.1, "cold": 0.3, "dizzy": 0.6},
... }
>>> _validation(observations, states, start_p, trans_p, emit_p)
>>> _validation([], states, start_p, trans_p, emit_p)
Traceback (most recent call last):
...
ValueError: There's an empty parameter
"""
_validate_not_empty(
observations_space,
states_space,
initial_probabilities,
transition_probabilities,
emission_probabilities,
)
_validate_lists(observations_space, states_space)
_validate_dicts(
initial_probabilities, transition_probabilities, emission_probabilities
)
def _validate_not_empty(
observations_space: Any,
states_space: Any,
initial_probabilities: Any,
transition_probabilities: Any,
emission_probabilities: Any,
) -> None:
"""
>>> _validate_not_empty(["a"], ["b"], {"c":0.5},
... {"d": {"e": 0.6}}, {"f": {"g": 0.7}})
>>> _validate_not_empty(["a"], ["b"], {"c":0.5}, {}, {"f": {"g": 0.7}})
Traceback (most recent call last):
...
ValueError: There's an empty parameter
>>> _validate_not_empty(["a"], ["b"], None, {"d": {"e": 0.6}}, {"f": {"g": 0.7}})
Traceback (most recent call last):
...
ValueError: There's an empty parameter
"""
if not all(
[
observations_space,
states_space,
initial_probabilities,
transition_probabilities,
emission_probabilities,
]
):
raise ValueError("There's an empty parameter")
def _validate_lists(observations_space: Any, states_space: Any) -> None:
"""
>>> _validate_lists(["a"], ["b"])
>>> _validate_lists(1234, ["b"])
Traceback (most recent call last):
...
ValueError: observations_space must be a list
>>> _validate_lists(["a"], [3])
Traceback (most recent call last):
...
ValueError: states_space must be a list of strings
"""
_validate_list(observations_space, "observations_space")
_validate_list(states_space, "states_space")
def _validate_list(_object: Any, var_name: str) -> None:
"""
>>> _validate_list(["a"], "mock_name")
>>> _validate_list("a", "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name must be a list
>>> _validate_list([0.5], "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name must be a list of strings
"""
if not isinstance(_object, list):
msg = f"{var_name} must be a list"
raise ValueError(msg)
else:
for x in _object:
if not isinstance(x, str):
msg = f"{var_name} must be a list of strings"
raise ValueError(msg)
def _validate_dicts(
initial_probabilities: Any,
transition_probabilities: Any,
emission_probabilities: Any,
) -> None:
"""
>>> _validate_dicts({"c":0.5}, {"d": {"e": 0.6}}, {"f": {"g": 0.7}})
>>> _validate_dicts("invalid", {"d": {"e": 0.6}}, {"f": {"g": 0.7}})
Traceback (most recent call last):
...
ValueError: initial_probabilities must be a dict
>>> _validate_dicts({"c":0.5}, {2: {"e": 0.6}}, {"f": {"g": 0.7}})
Traceback (most recent call last):
...
ValueError: transition_probabilities all keys must be strings
>>> _validate_dicts({"c":0.5}, {"d": {"e": 0.6}}, {"f": {2: 0.7}})
Traceback (most recent call last):
...
ValueError: emission_probabilities all keys must be strings
>>> _validate_dicts({"c":0.5}, {"d": {"e": 0.6}}, {"f": {"g": "h"}})
Traceback (most recent call last):
...
ValueError: emission_probabilities nested dictionary all values must be float
"""
_validate_dict(initial_probabilities, "initial_probabilities", float)
_validate_nested_dict(transition_probabilities, "transition_probabilities")
_validate_nested_dict(emission_probabilities, "emission_probabilities")
def _validate_nested_dict(_object: Any, var_name: str) -> None:
"""
>>> _validate_nested_dict({"a":{"b": 0.5}}, "mock_name")
>>> _validate_nested_dict("invalid", "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name must be a dict
>>> _validate_nested_dict({"a": 8}, "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name all values must be dict
>>> _validate_nested_dict({"a":{2: 0.5}}, "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name all keys must be strings
>>> _validate_nested_dict({"a":{"b": 4}}, "mock_name")
Traceback (most recent call last):
...
ValueError: mock_name nested dictionary all values must be float
"""
_validate_dict(_object, var_name, dict)
for x in _object.values():
_validate_dict(x, var_name, float, True)
def _validate_dict(
_object: Any, var_name: str, value_type: type, nested: bool = False
) -> None:
"""
>>> _validate_dict({"b": 0.5}, "mock_name", float)
>>> _validate_dict("invalid", "mock_name", float)
Traceback (most recent call last):
...
ValueError: mock_name must be a dict
>>> _validate_dict({"a": 8}, "mock_name", dict)
Traceback (most recent call last):
...
ValueError: mock_name all values must be dict
>>> _validate_dict({2: 0.5}, "mock_name",float, True)
Traceback (most recent call last):
...
ValueError: mock_name all keys must be strings
>>> _validate_dict({"b": 4}, "mock_name", float,True)
Traceback (most recent call last):
...
ValueError: mock_name nested dictionary all values must be float
"""
if not isinstance(_object, dict):
msg = f"{var_name} must be a dict"
raise ValueError(msg)
if not all(isinstance(x, str) for x in _object):
msg = f"{var_name} all keys must be strings"
raise ValueError(msg)
if not all(isinstance(x, value_type) for x in _object.values()):
nested_text = "nested dictionary " if nested else ""
msg = f"{var_name} {nested_text}all values must be {value_type.__name__}"
raise ValueError(msg)
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: dynamic_programming/wildcard_matching.py
================================================
"""
Author : ilyas dahhou
Date : Oct 7, 2023
Task:
Given an input string and a pattern, implement wildcard pattern matching with support
for '?' and '*' where:
'?' matches any single character.
'*' matches any sequence of characters (including the empty sequence).
The matching should cover the entire input string (not partial).
Runtime complexity: O(m * n)
The implementation was tested on the
leetcode: https://leetcode.com/problems/wildcard-matching/
"""
def is_match(string: str, pattern: str) -> bool:
"""
>>> is_match("", "")
True
>>> is_match("aa", "a")
False
>>> is_match("abc", "abc")
True
>>> is_match("abc", "*c")
True
>>> is_match("abc", "a*")
True
>>> is_match("abc", "*a*")
True
>>> is_match("abc", "?b?")
True
>>> is_match("abc", "*?")
True
>>> is_match("abc", "a*d")
False
>>> is_match("abc", "a*c?")
False
>>> is_match('baaabab','*****ba*****ba')
False
>>> is_match('baaabab','*****ba*****ab')
True
>>> is_match('aa','*')
True
"""
dp = [[False] * (len(pattern) + 1) for _ in string + "1"]
dp[0][0] = True
# Fill in the first row
for j, char in enumerate(pattern, 1):
if char == "*":
dp[0][j] = dp[0][j - 1]
# Fill in the rest of the DP table
for i, s_char in enumerate(string, 1):
for j, p_char in enumerate(pattern, 1):
if p_char in (s_char, "?"):
dp[i][j] = dp[i - 1][j - 1]
elif pattern[j - 1] == "*":
dp[i][j] = dp[i - 1][j] or dp[i][j - 1]
return dp[len(string)][len(pattern)]
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{is_match('baaabab','*****ba*****ab') = }")
================================================
FILE: dynamic_programming/word_break.py
================================================
"""
Author : Alexander Pantyukhin
Date : December 12, 2022
Task:
Given a string and a list of words, return true if the string can be
segmented into a space-separated sequence of one or more words.
Note that the same word may be reused
multiple times in the segmentation.
Implementation notes: Trie + Dynamic programming up -> down.
The Trie will be used to store the words. It will be useful for scanning
available words for the current position in the string.
Leetcode:
https://leetcode.com/problems/word-break/description/
Runtime: O(n * n)
Space: O(n)
"""
import functools
from typing import Any
def word_break(string: str, words: list[str]) -> bool:
"""
Return True if numbers have opposite signs False otherwise.
>>> word_break("applepenapple", ["apple","pen"])
True
>>> word_break("catsandog", ["cats","dog","sand","and","cat"])
False
>>> word_break("cars", ["car","ca","rs"])
True
>>> word_break('abc', [])
False
>>> word_break(123, ['a'])
Traceback (most recent call last):
...
ValueError: the string should be not empty string
>>> word_break('', ['a'])
Traceback (most recent call last):
...
ValueError: the string should be not empty string
>>> word_break('abc', [123])
Traceback (most recent call last):
...
ValueError: the words should be a list of non-empty strings
>>> word_break('abc', [''])
Traceback (most recent call last):
...
ValueError: the words should be a list of non-empty strings
"""
# Validation
if not isinstance(string, str) or len(string) == 0:
raise ValueError("the string should be not empty string")
if not isinstance(words, list) or not all(
isinstance(item, str) and len(item) > 0 for item in words
):
raise ValueError("the words should be a list of non-empty strings")
# Build trie
trie: dict[str, Any] = {}
word_keeper_key = "WORD_KEEPER"
for word in words:
trie_node = trie
for c in word:
if c not in trie_node:
trie_node[c] = {}
trie_node = trie_node[c]
trie_node[word_keeper_key] = True
len_string = len(string)
# Dynamic programming method
@functools.cache
def is_breakable(index: int) -> bool:
"""
>>> string = 'a'
>>> is_breakable(1)
True
"""
if index == len_string:
return True
trie_node: Any = trie
for i in range(index, len_string):
trie_node = trie_node.get(string[i], None)
if trie_node is None:
return False
if trie_node.get(word_keeper_key, False) and is_breakable(i + 1):
return True
return False
return is_breakable(0)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/__init__.py
================================================
================================================
FILE: electronics/apparent_power.py
================================================
import cmath
import math
def apparent_power(
voltage: float, current: float, voltage_angle: float, current_angle: float
) -> complex:
"""
Calculate the apparent power in a single-phase AC circuit.
Reference: https://en.wikipedia.org/wiki/AC_power#Apparent_power
>>> apparent_power(100, 5, 0, 0)
(500+0j)
>>> apparent_power(100, 5, 90, 0)
(3.061616997868383e-14+500j)
>>> apparent_power(100, 5, -45, -60)
(-129.40952255126027-482.9629131445341j)
>>> apparent_power(200, 10, -30, -90)
(-999.9999999999998-1732.0508075688776j)
"""
# Convert angles from degrees to radians
voltage_angle_rad = math.radians(voltage_angle)
current_angle_rad = math.radians(current_angle)
# Convert voltage and current to rectangular form
voltage_rect = cmath.rect(voltage, voltage_angle_rad)
current_rect = cmath.rect(current, current_angle_rad)
# Calculate apparent power
return voltage_rect * current_rect
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/builtin_voltage.py
================================================
from math import log
from scipy.constants import Boltzmann, physical_constants
T = 300 # TEMPERATURE (unit = K)
def builtin_voltage(
donor_conc: float, # donor concentration
acceptor_conc: float, # acceptor concentration
intrinsic_conc: float, # intrinsic concentration
) -> float:
"""
This function can calculate the Builtin Voltage of a pn junction diode.
This is calculated from the given three values.
Examples -
>>> builtin_voltage(donor_conc=1e17, acceptor_conc=1e17, intrinsic_conc=1e10)
0.833370010652644
>>> builtin_voltage(donor_conc=0, acceptor_conc=1600, intrinsic_conc=200)
Traceback (most recent call last):
...
ValueError: Donor concentration should be positive
>>> builtin_voltage(donor_conc=1000, acceptor_conc=0, intrinsic_conc=1200)
Traceback (most recent call last):
...
ValueError: Acceptor concentration should be positive
>>> builtin_voltage(donor_conc=1000, acceptor_conc=1000, intrinsic_conc=0)
Traceback (most recent call last):
...
ValueError: Intrinsic concentration should be positive
>>> builtin_voltage(donor_conc=1000, acceptor_conc=3000, intrinsic_conc=2000)
Traceback (most recent call last):
...
ValueError: Donor concentration should be greater than intrinsic concentration
>>> builtin_voltage(donor_conc=3000, acceptor_conc=1000, intrinsic_conc=2000)
Traceback (most recent call last):
...
ValueError: Acceptor concentration should be greater than intrinsic concentration
"""
if donor_conc <= 0:
raise ValueError("Donor concentration should be positive")
elif acceptor_conc <= 0:
raise ValueError("Acceptor concentration should be positive")
elif intrinsic_conc <= 0:
raise ValueError("Intrinsic concentration should be positive")
elif donor_conc <= intrinsic_conc:
raise ValueError(
"Donor concentration should be greater than intrinsic concentration"
)
elif acceptor_conc <= intrinsic_conc:
raise ValueError(
"Acceptor concentration should be greater than intrinsic concentration"
)
else:
return (
Boltzmann
* T
* log((donor_conc * acceptor_conc) / intrinsic_conc**2)
/ physical_constants["electron volt"][0]
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/capacitor_equivalence.py
================================================
# https://farside.ph.utexas.edu/teaching/316/lectures/node46.html
from __future__ import annotations
def capacitor_parallel(capacitors: list[float]) -> float:
"""
Ceq = C1 + C2 + ... + Cn
Calculate the equivalent resistance for any number of capacitors in parallel.
>>> capacitor_parallel([5.71389, 12, 3])
20.71389
>>> capacitor_parallel([5.71389, 12, -3])
Traceback (most recent call last):
...
ValueError: Capacitor at index 2 has a negative value!
"""
sum_c = 0.0
for index, capacitor in enumerate(capacitors):
if capacitor < 0:
msg = f"Capacitor at index {index} has a negative value!"
raise ValueError(msg)
sum_c += capacitor
return sum_c
def capacitor_series(capacitors: list[float]) -> float:
"""
Ceq = 1/ (1/C1 + 1/C2 + ... + 1/Cn)
>>> capacitor_series([5.71389, 12, 3])
1.6901062252507735
>>> capacitor_series([5.71389, 12, -3])
Traceback (most recent call last):
...
ValueError: Capacitor at index 2 has a negative or zero value!
>>> capacitor_series([5.71389, 12, 0.000])
Traceback (most recent call last):
...
ValueError: Capacitor at index 2 has a negative or zero value!
"""
first_sum = 0.0
for index, capacitor in enumerate(capacitors):
if capacitor <= 0:
msg = f"Capacitor at index {index} has a negative or zero value!"
raise ValueError(msg)
first_sum += 1 / capacitor
return 1 / first_sum
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/carrier_concentration.py
================================================
# https://en.wikipedia.org/wiki/Charge_carrier_density
# https://www.pveducation.org/pvcdrom/pn-junctions/equilibrium-carrier-concentration
# http://www.ece.utep.edu/courses/ee3329/ee3329/Studyguide/ToC/Fundamentals/Carriers/concentrations.html
from __future__ import annotations
def carrier_concentration(
electron_conc: float,
hole_conc: float,
intrinsic_conc: float,
) -> tuple:
"""
This function can calculate any one of the three -
1. Electron Concentration
2, Hole Concentration
3. Intrinsic Concentration
given the other two.
Examples -
>>> carrier_concentration(electron_conc=25, hole_conc=100, intrinsic_conc=0)
('intrinsic_conc', 50.0)
>>> carrier_concentration(electron_conc=0, hole_conc=1600, intrinsic_conc=200)
('electron_conc', 25.0)
>>> carrier_concentration(electron_conc=1000, hole_conc=0, intrinsic_conc=1200)
('hole_conc', 1440.0)
>>> carrier_concentration(electron_conc=1000, hole_conc=400, intrinsic_conc=1200)
Traceback (most recent call last):
...
ValueError: You cannot supply more or less than 2 values
>>> carrier_concentration(electron_conc=-1000, hole_conc=0, intrinsic_conc=1200)
Traceback (most recent call last):
...
ValueError: Electron concentration cannot be negative in a semiconductor
>>> carrier_concentration(electron_conc=0, hole_conc=-400, intrinsic_conc=1200)
Traceback (most recent call last):
...
ValueError: Hole concentration cannot be negative in a semiconductor
>>> carrier_concentration(electron_conc=0, hole_conc=400, intrinsic_conc=-1200)
Traceback (most recent call last):
...
ValueError: Intrinsic concentration cannot be negative in a semiconductor
"""
if (electron_conc, hole_conc, intrinsic_conc).count(0) != 1:
raise ValueError("You cannot supply more or less than 2 values")
elif electron_conc < 0:
raise ValueError("Electron concentration cannot be negative in a semiconductor")
elif hole_conc < 0:
raise ValueError("Hole concentration cannot be negative in a semiconductor")
elif intrinsic_conc < 0:
raise ValueError(
"Intrinsic concentration cannot be negative in a semiconductor"
)
elif electron_conc == 0:
return (
"electron_conc",
intrinsic_conc**2 / hole_conc,
)
elif hole_conc == 0:
return (
"hole_conc",
intrinsic_conc**2 / electron_conc,
)
elif intrinsic_conc == 0:
return (
"intrinsic_conc",
(electron_conc * hole_conc) ** 0.5,
)
else:
return (-1, -1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/charging_capacitor.py
================================================
# source - The ARRL Handbook for Radio Communications
# https://en.wikipedia.org/wiki/RC_time_constant
"""
Description
-----------
When a capacitor is connected with a potential source (AC or DC). It starts to charge
at a general speed but when a resistor is connected in the circuit with in series to
a capacitor then the capacitor charges slowly means it will take more time than usual.
while the capacitor is being charged, the voltage is in exponential function with time.
'resistance(ohms) * capacitance(farads)' is called RC-timeconstant which may also be
represented as τ (tau). By using this RC-timeconstant we can find the voltage at any
time 't' from the initiation of charging a capacitor with the help of the exponential
function containing RC. Both at charging and discharging of a capacitor.
"""
from math import exp # value of exp = 2.718281828459…
def charging_capacitor(
source_voltage: float, # voltage in volts.
resistance: float, # resistance in ohms.
capacitance: float, # capacitance in farads.
time_sec: float, # time in seconds after charging initiation of capacitor.
) -> float:
"""
Find capacitor voltage at any nth second after initiating its charging.
Examples
--------
>>> charging_capacitor(source_voltage=.2,resistance=.9,capacitance=8.4,time_sec=.5)
0.013
>>> charging_capacitor(source_voltage=2.2,resistance=3.5,capacitance=2.4,time_sec=9)
1.446
>>> charging_capacitor(source_voltage=15,resistance=200,capacitance=20,time_sec=2)
0.007
>>> charging_capacitor(20, 2000, 30*pow(10,-5), 4)
19.975
>>> charging_capacitor(source_voltage=0,resistance=10.0,capacitance=.30,time_sec=3)
Traceback (most recent call last):
...
ValueError: Source voltage must be positive.
>>> charging_capacitor(source_voltage=20,resistance=-2000,capacitance=30,time_sec=4)
Traceback (most recent call last):
...
ValueError: Resistance must be positive.
>>> charging_capacitor(source_voltage=30,resistance=1500,capacitance=0,time_sec=4)
Traceback (most recent call last):
...
ValueError: Capacitance must be positive.
"""
if source_voltage <= 0:
raise ValueError("Source voltage must be positive.")
if resistance <= 0:
raise ValueError("Resistance must be positive.")
if capacitance <= 0:
raise ValueError("Capacitance must be positive.")
return round(source_voltage * (1 - exp(-time_sec / (resistance * capacitance))), 3)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/charging_inductor.py
================================================
# source - The ARRL Handbook for Radio Communications
# https://en.wikipedia.org/wiki/RL_circuit
"""
Description
-----------
Inductor is a passive electronic device which stores energy but unlike capacitor, it
stores energy in its 'magnetic field' or 'magnetostatic field'.
When inductor is connected to 'DC' current source nothing happens it just works like a
wire because it's real effect cannot be seen while 'DC' is connected, its not even
going to store energy. Inductor stores energy only when it is working on 'AC' current.
Connecting a inductor in series with a resistor(when R = 0) to a 'AC' potential source,
from zero to a finite value causes a sudden voltage to induced in inductor which
opposes the current. which results in initially slowly current rise. However it would
cease if there is no further changes in current. With resistance zero current will never
stop rising.
'Resistance(ohms) / Inductance(henrys)' is known as RL-timeconstant. It also represents
as τ (tau). While the charging of a inductor with a resistor results in
a exponential function.
when inductor is connected across 'AC' potential source. It starts to store the energy
in its 'magnetic field'.with the help 'RL-time-constant' we can find current at any time
in inductor while it is charging.
"""
from math import exp # value of exp = 2.718281828459…
def charging_inductor(
source_voltage: float, # source_voltage should be in volts.
resistance: float, # resistance should be in ohms.
inductance: float, # inductance should be in henrys.
time: float, # time should in seconds.
) -> float:
"""
Find inductor current at any nth second after initiating its charging.
Examples
--------
>>> charging_inductor(source_voltage=5.8,resistance=1.5,inductance=2.3,time=2)
2.817
>>> charging_inductor(source_voltage=8,resistance=5,inductance=3,time=2)
1.543
>>> charging_inductor(source_voltage=8,resistance=5*pow(10,2),inductance=3,time=2)
0.016
>>> charging_inductor(source_voltage=-8,resistance=100,inductance=15,time=12)
Traceback (most recent call last):
...
ValueError: Source voltage must be positive.
>>> charging_inductor(source_voltage=80,resistance=-15,inductance=100,time=5)
Traceback (most recent call last):
...
ValueError: Resistance must be positive.
>>> charging_inductor(source_voltage=12,resistance=200,inductance=-20,time=5)
Traceback (most recent call last):
...
ValueError: Inductance must be positive.
>>> charging_inductor(source_voltage=0,resistance=200,inductance=20,time=5)
Traceback (most recent call last):
...
ValueError: Source voltage must be positive.
>>> charging_inductor(source_voltage=10,resistance=0,inductance=20,time=5)
Traceback (most recent call last):
...
ValueError: Resistance must be positive.
>>> charging_inductor(source_voltage=15, resistance=25, inductance=0, time=5)
Traceback (most recent call last):
...
ValueError: Inductance must be positive.
"""
if source_voltage <= 0:
raise ValueError("Source voltage must be positive.")
if resistance <= 0:
raise ValueError("Resistance must be positive.")
if inductance <= 0:
raise ValueError("Inductance must be positive.")
return round(
source_voltage / resistance * (1 - exp((-time * resistance) / inductance)), 3
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/circular_convolution.py
================================================
# https://en.wikipedia.org/wiki/Circular_convolution
"""
Circular convolution, also known as cyclic convolution,
is a special case of periodic convolution, which is the convolution of two
periodic functions that have the same period. Periodic convolution arises,
for example, in the context of the discrete-time Fourier transform (DTFT).
In particular, the DTFT of the product of two discrete sequences is the periodic
convolution of the DTFTs of the individual sequences. And each DTFT is a periodic
summation of a continuous Fourier transform function.
Source: https://en.wikipedia.org/wiki/Circular_convolution
"""
import doctest
from collections import deque
import numpy as np
class CircularConvolution:
"""
This class stores the first and second signal and performs the circular convolution
"""
def __init__(self) -> None:
"""
First signal and second signal are stored as 1-D array
"""
self.first_signal = [2, 1, 2, -1]
self.second_signal = [1, 2, 3, 4]
def circular_convolution(self) -> list[float]:
"""
This function performs the circular convolution of the first and second signal
using matrix method
Usage:
>>> convolution = CircularConvolution()
>>> convolution.circular_convolution()
[10.0, 10.0, 6.0, 14.0]
>>> convolution.first_signal = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6]
>>> convolution.second_signal = [0.1, 0.3, 0.5, 0.7, 0.9, 1.1, 1.3, 1.5]
>>> convolution.circular_convolution()
[5.2, 6.0, 6.48, 6.64, 6.48, 6.0, 5.2, 4.08]
>>> convolution.first_signal = [-1, 1, 2, -2]
>>> convolution.second_signal = [0.5, 1, -1, 2, 0.75]
>>> convolution.circular_convolution()
[6.25, -3.0, 1.5, -2.0, -2.75]
>>> convolution.first_signal = [1, -1, 2, 3, -1]
>>> convolution.second_signal = [1, 2, 3]
>>> convolution.circular_convolution()
[8.0, -2.0, 3.0, 4.0, 11.0]
"""
length_first_signal = len(self.first_signal)
length_second_signal = len(self.second_signal)
max_length = max(length_first_signal, length_second_signal)
# create a zero matrix of max_length x max_length
matrix = [[0] * max_length for i in range(max_length)]
# fills the smaller signal with zeros to make both signals of same length
if length_first_signal < length_second_signal:
self.first_signal += [0] * (max_length - length_first_signal)
elif length_first_signal > length_second_signal:
self.second_signal += [0] * (max_length - length_second_signal)
"""
Fills the matrix in the following way assuming 'x' is the signal of length 4
[
[x[0], x[3], x[2], x[1]],
[x[1], x[0], x[3], x[2]],
[x[2], x[1], x[0], x[3]],
[x[3], x[2], x[1], x[0]]
]
"""
for i in range(max_length):
rotated_signal = deque(self.second_signal)
rotated_signal.rotate(i)
for j, item in enumerate(rotated_signal):
matrix[i][j] += item
# multiply the matrix with the first signal
final_signal = np.matmul(np.transpose(matrix), np.transpose(self.first_signal))
# rounding-off to two decimal places
return [float(round(i, 2)) for i in final_signal]
if __name__ == "__main__":
doctest.testmod()
================================================
FILE: electronics/coulombs_law.py
================================================
# https://en.wikipedia.org/wiki/Coulomb%27s_law
from __future__ import annotations
COULOMBS_CONSTANT = 8.988e9 # units = N * m^s * C^-2
def couloumbs_law(
force: float, charge1: float, charge2: float, distance: float
) -> dict[str, float]:
"""
Apply Coulomb's Law on any three given values. These can be force, charge1,
charge2, or distance, and then in a Python dict return name/value pair of
the zero value.
Coulomb's Law states that the magnitude of the electrostatic force of
attraction or repulsion between two point charges is directly proportional
to the product of the magnitudes of charges and inversely proportional to
the square of the distance between them.
Reference
----------
Coulomb (1785) "Premier mémoire sur l'électricité et le magnétisme,"
Histoire de l'Académie Royale des Sciences, pp. 569-577.
Parameters
----------
force : float with units in Newtons
charge1 : float with units in Coulombs
charge2 : float with units in Coulombs
distance : float with units in meters
Returns
-------
result : dict name/value pair of the zero value
>>> couloumbs_law(force=0, charge1=3, charge2=5, distance=2000)
{'force': 33705.0}
>>> couloumbs_law(force=10, charge1=3, charge2=5, distance=0)
{'distance': 116112.01488218177}
>>> couloumbs_law(force=10, charge1=0, charge2=5, distance=2000)
{'charge1': 0.0008900756564307966}
>>> couloumbs_law(force=0, charge1=0, charge2=5, distance=2000)
Traceback (most recent call last):
...
ValueError: One and only one argument must be 0
>>> couloumbs_law(force=0, charge1=3, charge2=5, distance=-2000)
Traceback (most recent call last):
...
ValueError: Distance cannot be negative
"""
charge_product = abs(charge1 * charge2)
if (force, charge1, charge2, distance).count(0) != 1:
raise ValueError("One and only one argument must be 0")
if distance < 0:
raise ValueError("Distance cannot be negative")
if force == 0:
force = COULOMBS_CONSTANT * charge_product / (distance**2)
return {"force": force}
elif charge1 == 0:
charge1 = abs(force) * (distance**2) / (COULOMBS_CONSTANT * charge2)
return {"charge1": charge1}
elif charge2 == 0:
charge2 = abs(force) * (distance**2) / (COULOMBS_CONSTANT * charge1)
return {"charge2": charge2}
elif distance == 0:
distance = (COULOMBS_CONSTANT * charge_product / abs(force)) ** 0.5
return {"distance": distance}
raise ValueError("Exactly one argument must be 0")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/electric_conductivity.py
================================================
from __future__ import annotations
ELECTRON_CHARGE = 1.6021e-19 # units = C
def electric_conductivity(
conductivity: float,
electron_conc: float,
mobility: float,
) -> tuple[str, float]:
"""
This function can calculate any one of the three -
1. Conductivity
2. Electron Concentration
3. Electron Mobility
This is calculated from the other two provided values
Examples -
>>> electric_conductivity(conductivity=25, electron_conc=100, mobility=0)
('mobility', 1.5604519068722301e+18)
>>> electric_conductivity(conductivity=0, electron_conc=1600, mobility=200)
('conductivity', 5.12672e-14)
>>> electric_conductivity(conductivity=1000, electron_conc=0, mobility=1200)
('electron_conc', 5.201506356240767e+18)
>>> electric_conductivity(conductivity=-10, electron_conc=100, mobility=0)
Traceback (most recent call last):
...
ValueError: Conductivity cannot be negative
>>> electric_conductivity(conductivity=50, electron_conc=-10, mobility=0)
Traceback (most recent call last):
...
ValueError: Electron concentration cannot be negative
>>> electric_conductivity(conductivity=50, electron_conc=0, mobility=-10)
Traceback (most recent call last):
...
ValueError: mobility cannot be negative
>>> electric_conductivity(conductivity=50, electron_conc=0, mobility=0)
Traceback (most recent call last):
...
ValueError: You cannot supply more or less than 2 values
>>> electric_conductivity(conductivity=50, electron_conc=200, mobility=300)
Traceback (most recent call last):
...
ValueError: You cannot supply more or less than 2 values
"""
if (conductivity, electron_conc, mobility).count(0) != 1:
raise ValueError("You cannot supply more or less than 2 values")
elif conductivity < 0:
raise ValueError("Conductivity cannot be negative")
elif electron_conc < 0:
raise ValueError("Electron concentration cannot be negative")
elif mobility < 0:
raise ValueError("mobility cannot be negative")
elif conductivity == 0:
return (
"conductivity",
mobility * electron_conc * ELECTRON_CHARGE,
)
elif electron_conc == 0:
return (
"electron_conc",
conductivity / (mobility * ELECTRON_CHARGE),
)
else:
return (
"mobility",
conductivity / (electron_conc * ELECTRON_CHARGE),
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/electric_power.py
================================================
# https://en.m.wikipedia.org/wiki/Electric_power
from __future__ import annotations
from typing import NamedTuple
class Result(NamedTuple):
name: str
value: float
def electric_power(voltage: float, current: float, power: float) -> tuple:
"""
This function can calculate any one of the three (voltage, current, power),
fundamental value of electrical system.
examples are below:
>>> electric_power(voltage=0, current=2, power=5)
Result(name='voltage', value=2.5)
>>> electric_power(voltage=2, current=2, power=0)
Result(name='power', value=4.0)
>>> electric_power(voltage=-2, current=3, power=0)
Result(name='power', value=6.0)
>>> electric_power(voltage=2, current=4, power=2)
Traceback (most recent call last):
...
ValueError: Exactly one argument must be 0
>>> electric_power(voltage=0, current=0, power=2)
Traceback (most recent call last):
...
ValueError: Exactly one argument must be 0
>>> electric_power(voltage=0, current=2, power=-4)
Traceback (most recent call last):
...
ValueError: Power cannot be negative in any electrical/electronics system
>>> electric_power(voltage=2.2, current=2.2, power=0)
Result(name='power', value=4.84)
>>> electric_power(current=0, power=6, voltage=2)
Result(name='current', value=3.0)
"""
if (voltage, current, power).count(0) != 1:
raise ValueError("Exactly one argument must be 0")
elif power < 0:
raise ValueError(
"Power cannot be negative in any electrical/electronics system"
)
elif voltage == 0:
return Result("voltage", power / current)
elif current == 0:
return Result("current", power / voltage)
elif power == 0:
return Result("power", float(round(abs(voltage * current), 2)))
else:
raise AssertionError
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/electrical_impedance.py
================================================
"""Electrical impedance is the measure of the opposition that a
circuit presents to a current when a voltage is applied.
Impedance extends the concept of resistance to alternating current (AC) circuits.
Source: https://en.wikipedia.org/wiki/Electrical_impedance
"""
from __future__ import annotations
from math import pow, sqrt # noqa: A004
def electrical_impedance(
resistance: float, reactance: float, impedance: float
) -> dict[str, float]:
"""
Apply Electrical Impedance formula, on any two given electrical values,
which can be resistance, reactance, and impedance, and then in a Python dict
return name/value pair of the zero value.
>>> electrical_impedance(3,4,0)
{'impedance': 5.0}
>>> electrical_impedance(0,4,5)
{'resistance': 3.0}
>>> electrical_impedance(3,0,5)
{'reactance': 4.0}
>>> electrical_impedance(3,4,5)
Traceback (most recent call last):
...
ValueError: One and only one argument must be 0
"""
if (resistance, reactance, impedance).count(0) != 1:
raise ValueError("One and only one argument must be 0")
if resistance == 0:
return {"resistance": sqrt(pow(impedance, 2) - pow(reactance, 2))}
elif reactance == 0:
return {"reactance": sqrt(pow(impedance, 2) - pow(resistance, 2))}
elif impedance == 0:
return {"impedance": sqrt(pow(resistance, 2) + pow(reactance, 2))}
else:
raise ValueError("Exactly one argument must be 0")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/ic_555_timer.py
================================================
from __future__ import annotations
"""
Calculate the frequency and/or duty cycle of an astable 555 timer.
* https://en.wikipedia.org/wiki/555_timer_IC#Astable
These functions take in the value of the external resistances (in ohms)
and capacitance (in Microfarad), and calculates the following:
-------------------------------------
| Freq = 1.44 /[( R1+ 2 x R2) x C1] | ... in Hz
-------------------------------------
where Freq is the frequency,
R1 is the first resistance in ohms,
R2 is the second resistance in ohms,
C1 is the capacitance in Microfarads.
------------------------------------------------
| Duty Cycle = (R1 + R2) / (R1 + 2 x R2) x 100 | ... in %
------------------------------------------------
where R1 is the first resistance in ohms,
R2 is the second resistance in ohms.
"""
def astable_frequency(
resistance_1: float, resistance_2: float, capacitance: float
) -> float:
"""
Usage examples:
>>> astable_frequency(resistance_1=45, resistance_2=45, capacitance=7)
1523.8095238095239
>>> astable_frequency(resistance_1=356, resistance_2=234, capacitance=976)
1.7905459175553078
>>> astable_frequency(resistance_1=2, resistance_2=-1, capacitance=2)
Traceback (most recent call last):
...
ValueError: All values must be positive
>>> astable_frequency(resistance_1=45, resistance_2=45, capacitance=0)
Traceback (most recent call last):
...
ValueError: All values must be positive
"""
if resistance_1 <= 0 or resistance_2 <= 0 or capacitance <= 0:
raise ValueError("All values must be positive")
return (1.44 / ((resistance_1 + 2 * resistance_2) * capacitance)) * 10**6
def astable_duty_cycle(resistance_1: float, resistance_2: float) -> float:
"""
Usage examples:
>>> astable_duty_cycle(resistance_1=45, resistance_2=45)
66.66666666666666
>>> astable_duty_cycle(resistance_1=356, resistance_2=234)
71.60194174757282
>>> astable_duty_cycle(resistance_1=2, resistance_2=-1)
Traceback (most recent call last):
...
ValueError: All values must be positive
>>> astable_duty_cycle(resistance_1=0, resistance_2=0)
Traceback (most recent call last):
...
ValueError: All values must be positive
"""
if resistance_1 <= 0 or resistance_2 <= 0:
raise ValueError("All values must be positive")
return (resistance_1 + resistance_2) / (resistance_1 + 2 * resistance_2) * 100
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/ind_reactance.py
================================================
# https://en.wikipedia.org/wiki/Electrical_reactance#Inductive_reactance
from __future__ import annotations
from math import pi
def ind_reactance(
inductance: float, frequency: float, reactance: float
) -> dict[str, float]:
"""
Calculate inductive reactance, frequency or inductance from two given electrical
properties then return name/value pair of the zero value in a Python dict.
Parameters
----------
inductance : float with units in Henries
frequency : float with units in Hertz
reactance : float with units in Ohms
>>> ind_reactance(-35e-6, 1e3, 0)
Traceback (most recent call last):
...
ValueError: Inductance cannot be negative
>>> ind_reactance(35e-6, -1e3, 0)
Traceback (most recent call last):
...
ValueError: Frequency cannot be negative
>>> ind_reactance(35e-6, 0, -1)
Traceback (most recent call last):
...
ValueError: Inductive reactance cannot be negative
>>> ind_reactance(0, 10e3, 50)
{'inductance': 0.0007957747154594767}
>>> ind_reactance(35e-3, 0, 50)
{'frequency': 227.36420441699332}
>>> ind_reactance(35e-6, 1e3, 0)
{'reactance': 0.2199114857512855}
"""
if (inductance, frequency, reactance).count(0) != 1:
raise ValueError("One and only one argument must be 0")
if inductance < 0:
raise ValueError("Inductance cannot be negative")
if frequency < 0:
raise ValueError("Frequency cannot be negative")
if reactance < 0:
raise ValueError("Inductive reactance cannot be negative")
if inductance == 0:
return {"inductance": reactance / (2 * pi * frequency)}
elif frequency == 0:
return {"frequency": reactance / (2 * pi * inductance)}
elif reactance == 0:
return {"reactance": 2 * pi * frequency * inductance}
else:
raise ValueError("Exactly one argument must be 0")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/ohms_law.py
================================================
# https://en.wikipedia.org/wiki/Ohm%27s_law
from __future__ import annotations
def ohms_law(voltage: float, current: float, resistance: float) -> dict[str, float]:
"""
Apply Ohm's Law, on any two given electrical values, which can be voltage, current,
and resistance, and then in a Python dict return name/value pair of the zero value.
>>> ohms_law(voltage=10, resistance=5, current=0)
{'current': 2.0}
>>> ohms_law(voltage=0, current=0, resistance=10)
Traceback (most recent call last):
...
ValueError: One and only one argument must be 0
>>> ohms_law(voltage=0, current=1, resistance=-2)
Traceback (most recent call last):
...
ValueError: Resistance cannot be negative
>>> ohms_law(resistance=0, voltage=-10, current=1)
{'resistance': -10.0}
>>> ohms_law(voltage=0, current=-1.5, resistance=2)
{'voltage': -3.0}
"""
if (voltage, current, resistance).count(0) != 1:
raise ValueError("One and only one argument must be 0")
if resistance < 0:
raise ValueError("Resistance cannot be negative")
if voltage == 0:
return {"voltage": float(current * resistance)}
elif current == 0:
return {"current": voltage / resistance}
elif resistance == 0:
return {"resistance": voltage / current}
else:
raise ValueError("Exactly one argument must be 0")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/real_and_reactive_power.py
================================================
import math
def real_power(apparent_power: float, power_factor: float) -> float:
"""
Calculate real power from apparent power and power factor.
Examples:
>>> real_power(100, 0.9)
90.0
>>> real_power(0, 0.8)
0.0
>>> real_power(100, -0.9)
-90.0
"""
if (
not isinstance(power_factor, (int, float))
or power_factor < -1
or power_factor > 1
):
raise ValueError("power_factor must be a valid float value between -1 and 1.")
return apparent_power * power_factor
def reactive_power(apparent_power: float, power_factor: float) -> float:
"""
Calculate reactive power from apparent power and power factor.
Examples:
>>> reactive_power(100, 0.9)
43.58898943540673
>>> reactive_power(0, 0.8)
0.0
>>> reactive_power(100, -0.9)
43.58898943540673
"""
if (
not isinstance(power_factor, (int, float))
or power_factor < -1
or power_factor > 1
):
raise ValueError("power_factor must be a valid float value between -1 and 1.")
return apparent_power * math.sqrt(1 - power_factor**2)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/resistor_color_code.py
================================================
"""
Title : Calculating the resistance of a n band resistor using the color codes
Description :
Resistors resist the flow of electrical current.Each one has a value that tells how
strongly it resists current flow.This value's unit is the ohm, often noted with the
Greek letter omega: Ω.
The colored bands on a resistor can tell you everything you need to know about its
value and tolerance, as long as you understand how to read them. The order in which
the colors are arranged is very important, and each value of resistor has its own
unique combination.
The color coding for resistors is an international standard that is defined in IEC
60062.
The number of bands present in a resistor varies from three to six. These represent
significant figures, multiplier, tolerance, reliability, and temperature coefficient
Each color used for a type of band has a value assigned to it. It is read from left
to right.
All resistors will have significant figures and multiplier bands. In a three band
resistor first two bands from the left represent significant figures and the third
represents the multiplier band.
Significant figures - The number of significant figures band in a resistor can vary
from two to three.
Colors and values associated with significant figure bands -
(Black = 0, Brown = 1, Red = 2, Orange = 3, Yellow = 4, Green = 5, Blue = 6,
Violet = 7, Grey = 8, White = 9)
Multiplier - There will be one multiplier band in a resistor. It is multiplied with
the significant figures obtained from previous bands.
Colors and values associated with multiplier band -
(Black = 100, Brown = 10^1, Red = 10^2, Orange = 10^3, Yellow = 10^4, Green = 10^5,
Blue = 10^6, Violet = 10^7, Grey = 10^8, White = 10^9, Gold = 10^-1, Silver = 10^-2)
Note that multiplier bands use Gold and Silver which are not used for significant
figure bands.
Tolerance - The tolerance band is not always present. It can be seen in four band
resistors and above. This is a percentage by which the resistor value can vary.
Colors and values associated with tolerance band -
(Brown = 1%, Red = 2%, Orange = 0.05%, Yellow = 0.02%, Green = 0.5%,Blue = 0.25%,
Violet = 0.1%, Grey = 0.01%, Gold = 5%, Silver = 10%)
If no color is mentioned then by default tolerance is 20%
Note that tolerance band does not use Black and White colors.
Temperature Coeffecient - Indicates the change in resistance of the component as
a function of ambient temperature in terms of ppm/K.
It is present in six band resistors.
Colors and values associated with Temperature coeffecient -
(Black = 250 ppm/K, Brown = 100 ppm/K, Red = 50 ppm/K, Orange = 15 ppm/K,
Yellow = 25 ppm/K, Green = 20 ppm/K, Blue = 10 ppm/K, Violet = 5 ppm/K,
Grey = 1 ppm/K)
Note that temperature coeffecient band does not use White, Gold, Silver colors.
Sources :
https://www.calculator.net/resistor-calculator.html
https://learn.parallax.com/support/reference/resistor-color-codes
https://byjus.com/physics/resistor-colour-codes/
"""
valid_colors: list = [
"Black",
"Brown",
"Red",
"Orange",
"Yellow",
"Green",
"Blue",
"Violet",
"Grey",
"White",
"Gold",
"Silver",
]
significant_figures_color_values: dict[str, int] = {
"Black": 0,
"Brown": 1,
"Red": 2,
"Orange": 3,
"Yellow": 4,
"Green": 5,
"Blue": 6,
"Violet": 7,
"Grey": 8,
"White": 9,
}
multiplier_color_values: dict[str, float] = {
"Black": 10**0,
"Brown": 10**1,
"Red": 10**2,
"Orange": 10**3,
"Yellow": 10**4,
"Green": 10**5,
"Blue": 10**6,
"Violet": 10**7,
"Grey": 10**8,
"White": 10**9,
"Gold": 10**-1,
"Silver": 10**-2,
}
tolerance_color_values: dict[str, float] = {
"Brown": 1,
"Red": 2,
"Orange": 0.05,
"Yellow": 0.02,
"Green": 0.5,
"Blue": 0.25,
"Violet": 0.1,
"Grey": 0.01,
"Gold": 5,
"Silver": 10,
}
temperature_coeffecient_color_values: dict[str, int] = {
"Black": 250,
"Brown": 100,
"Red": 50,
"Orange": 15,
"Yellow": 25,
"Green": 20,
"Blue": 10,
"Violet": 5,
"Grey": 1,
}
band_types: dict[int, dict[str, int]] = {
3: {"significant": 2, "multiplier": 1},
4: {"significant": 2, "multiplier": 1, "tolerance": 1},
5: {"significant": 3, "multiplier": 1, "tolerance": 1},
6: {"significant": 3, "multiplier": 1, "tolerance": 1, "temp_coeffecient": 1},
}
def get_significant_digits(colors: list) -> str:
"""
Function returns the digit associated with the color. Function takes a
list containing colors as input and returns digits as string
>>> get_significant_digits(['Black','Blue'])
'06'
>>> get_significant_digits(['Aqua','Blue'])
Traceback (most recent call last):
...
ValueError: Aqua is not a valid color for significant figure bands
"""
digit = ""
for color in colors:
if color not in significant_figures_color_values:
msg = f"{color} is not a valid color for significant figure bands"
raise ValueError(msg)
digit = digit + str(significant_figures_color_values[color])
return str(digit)
def get_multiplier(color: str) -> float:
"""
Function returns the multiplier value associated with the color.
Function takes color as input and returns multiplier value
>>> get_multiplier('Gold')
0.1
>>> get_multiplier('Ivory')
Traceback (most recent call last):
...
ValueError: Ivory is not a valid color for multiplier band
"""
if color not in multiplier_color_values:
msg = f"{color} is not a valid color for multiplier band"
raise ValueError(msg)
return multiplier_color_values[color]
def get_tolerance(color: str) -> float:
"""
Function returns the tolerance value associated with the color.
Function takes color as input and returns tolerance value.
>>> get_tolerance('Green')
0.5
>>> get_tolerance('Indigo')
Traceback (most recent call last):
...
ValueError: Indigo is not a valid color for tolerance band
"""
if color not in tolerance_color_values:
msg = f"{color} is not a valid color for tolerance band"
raise ValueError(msg)
return tolerance_color_values[color]
def get_temperature_coeffecient(color: str) -> int:
"""
Function returns the temperature coeffecient value associated with the color.
Function takes color as input and returns temperature coeffecient value.
>>> get_temperature_coeffecient('Yellow')
25
>>> get_temperature_coeffecient('Cyan')
Traceback (most recent call last):
...
ValueError: Cyan is not a valid color for temperature coeffecient band
"""
if color not in temperature_coeffecient_color_values:
msg = f"{color} is not a valid color for temperature coeffecient band"
raise ValueError(msg)
return temperature_coeffecient_color_values[color]
def get_band_type_count(total_number_of_bands: int, type_of_band: str) -> int:
"""
Function returns the number of bands of a given type in a resistor with n bands
Function takes total_number_of_bands and type_of_band as input and returns
number of bands belonging to that type in the given resistor
>>> get_band_type_count(3,'significant')
2
>>> get_band_type_count(2,'significant')
Traceback (most recent call last):
...
ValueError: 2 is not a valid number of bands
>>> get_band_type_count(3,'sign')
Traceback (most recent call last):
...
ValueError: sign is not valid for a 3 band resistor
>>> get_band_type_count(3,'tolerance')
Traceback (most recent call last):
...
ValueError: tolerance is not valid for a 3 band resistor
>>> get_band_type_count(5,'temp_coeffecient')
Traceback (most recent call last):
...
ValueError: temp_coeffecient is not valid for a 5 band resistor
"""
if total_number_of_bands not in band_types:
msg = f"{total_number_of_bands} is not a valid number of bands"
raise ValueError(msg)
if type_of_band not in band_types[total_number_of_bands]:
msg = f"{type_of_band} is not valid for a {total_number_of_bands} band resistor"
raise ValueError(msg)
return band_types[total_number_of_bands][type_of_band]
def check_validity(number_of_bands: int, colors: list) -> bool:
"""
Function checks if the input provided is valid or not.
Function takes number_of_bands and colors as input and returns
True if it is valid
>>> check_validity(3, ["Black","Blue","Orange"])
True
>>> check_validity(4, ["Black","Blue","Orange"])
Traceback (most recent call last):
...
ValueError: Expecting 4 colors, provided 3 colors
>>> check_validity(3, ["Cyan","Red","Yellow"])
Traceback (most recent call last):
...
ValueError: Cyan is not a valid color
"""
if number_of_bands >= 3 and number_of_bands <= 6:
if number_of_bands == len(colors):
for color in colors:
if color not in valid_colors:
msg = f"{color} is not a valid color"
raise ValueError(msg)
return True
else:
msg = f"Expecting {number_of_bands} colors, provided {len(colors)} colors"
raise ValueError(msg)
else:
msg = "Invalid number of bands. Resistor bands must be 3 to 6"
raise ValueError(msg)
def calculate_resistance(number_of_bands: int, color_code_list: list) -> dict:
"""
Function calculates the total resistance of the resistor using the color codes.
Function takes number_of_bands, color_code_list as input and returns
resistance
>>> calculate_resistance(3, ["Black","Blue","Orange"])
{'resistance': '6000Ω ±20% '}
>>> calculate_resistance(4, ["Orange","Green","Blue","Gold"])
{'resistance': '35000000Ω ±5% '}
>>> calculate_resistance(5, ["Violet","Brown","Grey","Silver","Green"])
{'resistance': '7.18Ω ±0.5% '}
>>> calculate_resistance(6, ["Red","Green","Blue","Yellow","Orange","Grey"])
{'resistance': '2560000Ω ±0.05% 1 ppm/K'}
>>> calculate_resistance(0, ["Violet","Brown","Grey","Silver","Green"])
Traceback (most recent call last):
...
ValueError: Invalid number of bands. Resistor bands must be 3 to 6
>>> calculate_resistance(4, ["Violet","Brown","Grey","Silver","Green"])
Traceback (most recent call last):
...
ValueError: Expecting 4 colors, provided 5 colors
>>> calculate_resistance(4, ["Violet","Silver","Brown","Grey"])
Traceback (most recent call last):
...
ValueError: Silver is not a valid color for significant figure bands
>>> calculate_resistance(4, ["Violet","Blue","Lime","Grey"])
Traceback (most recent call last):
...
ValueError: Lime is not a valid color
"""
is_valid = check_validity(number_of_bands, color_code_list)
if is_valid:
number_of_significant_bands = get_band_type_count(
number_of_bands, "significant"
)
significant_colors = color_code_list[:number_of_significant_bands]
significant_digits = int(get_significant_digits(significant_colors))
multiplier_color = color_code_list[number_of_significant_bands]
multiplier = get_multiplier(multiplier_color)
if number_of_bands == 3:
tolerance_color = None
else:
tolerance_color = color_code_list[number_of_significant_bands + 1]
tolerance = (
20 if tolerance_color is None else get_tolerance(str(tolerance_color))
)
if number_of_bands != 6:
temperature_coeffecient_color = None
else:
temperature_coeffecient_color = color_code_list[
number_of_significant_bands + 2
]
temperature_coeffecient = (
0
if temperature_coeffecient_color is None
else get_temperature_coeffecient(str(temperature_coeffecient_color))
)
resisitance = significant_digits * multiplier
if temperature_coeffecient == 0:
answer = f"{resisitance}Ω ±{tolerance}% "
else:
answer = f"{resisitance}Ω ±{tolerance}% {temperature_coeffecient} ppm/K"
return {"resistance": answer}
else:
raise ValueError("Input is invalid")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/resistor_equivalence.py
================================================
# https://byjus.com/equivalent-resistance-formula/
from __future__ import annotations
def resistor_parallel(resistors: list[float]) -> float:
"""
Req = 1/ (1/R1 + 1/R2 + ... + 1/Rn)
>>> resistor_parallel([3.21389, 2, 3])
0.8737571620498019
>>> resistor_parallel([3.21389, 2, -3])
Traceback (most recent call last):
...
ValueError: Resistor at index 2 has a negative or zero value!
>>> resistor_parallel([3.21389, 2, 0.000])
Traceback (most recent call last):
...
ValueError: Resistor at index 2 has a negative or zero value!
"""
first_sum = 0.00
for index, resistor in enumerate(resistors):
if resistor <= 0:
msg = f"Resistor at index {index} has a negative or zero value!"
raise ValueError(msg)
first_sum += 1 / float(resistor)
return 1 / first_sum
def resistor_series(resistors: list[float]) -> float:
"""
Req = R1 + R2 + ... + Rn
Calculate the equivalent resistance for any number of resistors in parallel.
>>> resistor_series([3.21389, 2, 3])
8.21389
>>> resistor_series([3.21389, 2, -3])
Traceback (most recent call last):
...
ValueError: Resistor at index 2 has a negative value!
"""
sum_r = 0.00
for index, resistor in enumerate(resistors):
sum_r += resistor
if resistor < 0:
msg = f"Resistor at index {index} has a negative value!"
raise ValueError(msg)
return sum_r
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/resonant_frequency.py
================================================
# https://en.wikipedia.org/wiki/LC_circuit
"""An LC circuit, also called a resonant circuit, tank circuit, or tuned circuit,
is an electric circuit consisting of an inductor, represented by the letter L,
and a capacitor, represented by the letter C, connected together.
The circuit can act as an electrical resonator, an electrical analogue of a
tuning fork, storing energy oscillating at the circuit's resonant frequency.
Source: https://en.wikipedia.org/wiki/LC_circuit
"""
from __future__ import annotations
from math import pi, sqrt
def resonant_frequency(inductance: float, capacitance: float) -> tuple:
"""
This function can calculate the resonant frequency of LC circuit,
for the given value of inductance and capacitnace.
Examples are given below:
>>> resonant_frequency(inductance=10, capacitance=5)
('Resonant frequency', 0.022507907903927652)
>>> resonant_frequency(inductance=0, capacitance=5)
Traceback (most recent call last):
...
ValueError: Inductance cannot be 0 or negative
>>> resonant_frequency(inductance=10, capacitance=0)
Traceback (most recent call last):
...
ValueError: Capacitance cannot be 0 or negative
"""
if inductance <= 0:
raise ValueError("Inductance cannot be 0 or negative")
elif capacitance <= 0:
raise ValueError("Capacitance cannot be 0 or negative")
else:
return (
"Resonant frequency",
float(1 / (2 * pi * (sqrt(inductance * capacitance)))),
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: electronics/wheatstone_bridge.py
================================================
# https://en.wikipedia.org/wiki/Wheatstone_bridge
from __future__ import annotations
def wheatstone_solver(
resistance_1: float, resistance_2: float, resistance_3: float
) -> float:
"""
This function can calculate the unknown resistance in an wheatstone network,
given that the three other resistances in the network are known.
The formula to calculate the same is:
---------------
|Rx=(R2/R1)*R3|
---------------
Usage examples:
>>> wheatstone_solver(resistance_1=2, resistance_2=4, resistance_3=5)
10.0
>>> wheatstone_solver(resistance_1=356, resistance_2=234, resistance_3=976)
641.5280898876405
>>> wheatstone_solver(resistance_1=2, resistance_2=-1, resistance_3=2)
Traceback (most recent call last):
...
ValueError: All resistance values must be positive
>>> wheatstone_solver(resistance_1=0, resistance_2=0, resistance_3=2)
Traceback (most recent call last):
...
ValueError: All resistance values must be positive
"""
if resistance_1 <= 0 or resistance_2 <= 0 or resistance_3 <= 0:
raise ValueError("All resistance values must be positive")
else:
return float((resistance_2 / resistance_1) * resistance_3)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: file_transfer/__init__.py
================================================
================================================
FILE: file_transfer/mytext.txt
================================================
Hello
This is sample data
«küßî»
“ЌύБЇ”
😀😉
😋
================================================
FILE: file_transfer/receive_file.py
================================================
import socket
def main():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = socket.gethostname()
port = 12312
sock.connect((host, port))
sock.send(b"Hello server!")
with open("Received_file", "wb") as out_file:
print("File opened")
print("Receiving data...")
while True:
data = sock.recv(1024)
if not data:
break
out_file.write(data)
print("Successfully received the file")
sock.close()
print("Connection closed")
if __name__ == "__main__":
main()
================================================
FILE: file_transfer/send_file.py
================================================
def send_file(filename: str = "mytext.txt", testing: bool = False) -> None:
import socket
port = 12312 # Reserve a port for your service.
sock = socket.socket() # Create a socket object
host = socket.gethostname() # Get local machine name
sock.bind((host, port)) # Bind to the port
sock.listen(5) # Now wait for client connection.
print("Server listening....")
while True:
conn, addr = sock.accept() # Establish connection with client.
print(f"Got connection from {addr}")
data = conn.recv(1024)
print(f"Server received: {data = }")
with open(filename, "rb") as in_file:
data = in_file.read(1024)
while data:
conn.send(data)
print(f"Sent {data!r}")
data = in_file.read(1024)
print("Done sending")
conn.close()
if testing: # Allow the test to complete
break
sock.shutdown(1)
sock.close()
if __name__ == "__main__":
send_file()
================================================
FILE: file_transfer/tests/__init__.py
================================================
================================================
FILE: file_transfer/tests/test_send_file.py
================================================
from unittest.mock import Mock, patch
from file_transfer.send_file import send_file
@patch("socket.socket")
@patch("builtins.open")
def test_send_file_running_as_expected(file, sock):
# ===== initialization =====
conn = Mock()
sock.return_value.accept.return_value = conn, Mock()
f = iter([1, None])
file.return_value.__enter__.return_value.read.side_effect = lambda _: next(f)
# ===== invoke =====
send_file(filename="mytext.txt", testing=True)
# ===== ensurance =====
sock.assert_called_once()
sock.return_value.bind.assert_called_once()
sock.return_value.listen.assert_called_once()
sock.return_value.accept.assert_called_once()
conn.recv.assert_called_once()
file.return_value.__enter__.assert_called_once()
file.return_value.__enter__.return_value.read.assert_called()
conn.send.assert_called_once()
conn.close.assert_called_once()
sock.return_value.shutdown.assert_called_once()
sock.return_value.close.assert_called_once()
================================================
FILE: financial/README.md
================================================
# Interest
* Compound Interest: "Compound interest is calculated by multiplying the initial principal amount by one plus the annual interest rate raised to the number of compound periods minus one." [Compound Interest](https://www.investopedia.com/)
* Simple Interest: "Simple interest paid or received over a certain period is a fixed percentage of the principal amount that was borrowed or lent. " [Simple Interest](https://www.investopedia.com/)
================================================
FILE: financial/__init__.py
================================================
================================================
FILE: financial/equated_monthly_installments.py
================================================
"""
Program to calculate the amortization amount per month, given
- Principal borrowed
- Rate of interest per annum
- Years to repay the loan
Wikipedia Reference: https://en.wikipedia.org/wiki/Equated_monthly_installment
"""
def equated_monthly_installments(
principal: float, rate_per_annum: float, years_to_repay: int
) -> float:
"""
Formula for amortization amount per month:
A = p * r * (1 + r)^n / ((1 + r)^n - 1)
where p is the principal, r is the rate of interest per month
and n is the number of payments
>>> equated_monthly_installments(25000, 0.12, 3)
830.3577453212793
>>> equated_monthly_installments(25000, 0.12, 10)
358.67737100646826
>>> equated_monthly_installments(0, 0.12, 3)
Traceback (most recent call last):
...
Exception: Principal borrowed must be > 0
>>> equated_monthly_installments(25000, -1, 3)
Traceback (most recent call last):
...
Exception: Rate of interest must be >= 0
>>> equated_monthly_installments(25000, 0.12, 0)
Traceback (most recent call last):
...
Exception: Years to repay must be an integer > 0
"""
if principal <= 0:
raise Exception("Principal borrowed must be > 0")
if rate_per_annum < 0:
raise Exception("Rate of interest must be >= 0")
if years_to_repay <= 0 or not isinstance(years_to_repay, int):
raise Exception("Years to repay must be an integer > 0")
# Yearly rate is divided by 12 to get monthly rate
rate_per_month = rate_per_annum / 12
# Years to repay is multiplied by 12 to get number of payments as payment is monthly
number_of_payments = years_to_repay * 12
return (
principal
* rate_per_month
* (1 + rate_per_month) ** number_of_payments
/ ((1 + rate_per_month) ** number_of_payments - 1)
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: financial/exponential_moving_average.py
================================================
"""
Calculate the exponential moving average (EMA) on the series of stock prices.
Wikipedia Reference: https://en.wikipedia.org/wiki/Exponential_smoothing
https://www.investopedia.com/terms/e/ema.asp#toc-what-is-an-exponential
-moving-average-ema
Exponential moving average is used in finance to analyze changes stock prices.
EMA is used in conjunction with Simple moving average (SMA), EMA reacts to the
changes in the value quicker than SMA, which is one of the advantages of using EMA.
"""
from collections.abc import Iterator
def exponential_moving_average(
stock_prices: Iterator[float], window_size: int
) -> Iterator[float]:
"""
Yields exponential moving averages of the given stock prices.
>>> tuple(exponential_moving_average(iter([2, 5, 3, 8.2, 6, 9, 10]), 3))
(2, 3.5, 3.25, 5.725, 5.8625, 7.43125, 8.715625)
:param stock_prices: A stream of stock prices
:param window_size: The number of stock prices that will trigger a new calculation
of the exponential average (window_size > 0)
:return: Yields a sequence of exponential moving averages
Formula:
st = alpha * xt + (1 - alpha) * st_prev
Where,
st : Exponential moving average at timestamp t
xt : stock price in from the stock prices at timestamp t
st_prev : Exponential moving average at timestamp t-1
alpha : 2/(1 + window_size) - smoothing factor
Exponential moving average (EMA) is a rule of thumb technique for
smoothing time series data using an exponential window function.
"""
if window_size <= 0:
raise ValueError("window_size must be > 0")
# Calculating smoothing factor
alpha = 2 / (1 + window_size)
# Exponential average at timestamp t
moving_average = 0.0
for i, stock_price in enumerate(stock_prices):
if i <= window_size:
# Assigning simple moving average till the window_size for the first time
# is reached
moving_average = (moving_average + stock_price) * 0.5 if i else stock_price
else:
# Calculating exponential moving average based on current timestamp data
# point and previous exponential average value
moving_average = (alpha * stock_price) + ((1 - alpha) * moving_average)
yield moving_average
if __name__ == "__main__":
import doctest
doctest.testmod()
stock_prices = [2.0, 5, 3, 8.2, 6, 9, 10]
window_size = 3
result = tuple(exponential_moving_average(iter(stock_prices), window_size))
print(f"{stock_prices = }")
print(f"{window_size = }")
print(f"{result = }")
================================================
FILE: financial/interest.py
================================================
# https://www.investopedia.com
from __future__ import annotations
def simple_interest(
principal: float, daily_interest_rate: float, days_between_payments: float
) -> float:
"""
>>> simple_interest(18000.0, 0.06, 3)
3240.0
>>> simple_interest(0.5, 0.06, 3)
0.09
>>> simple_interest(18000.0, 0.01, 10)
1800.0
>>> simple_interest(18000.0, 0.0, 3)
0.0
>>> simple_interest(5500.0, 0.01, 100)
5500.0
>>> simple_interest(10000.0, -0.06, 3)
Traceback (most recent call last):
...
ValueError: daily_interest_rate must be >= 0
>>> simple_interest(-10000.0, 0.06, 3)
Traceback (most recent call last):
...
ValueError: principal must be > 0
>>> simple_interest(5500.0, 0.01, -5)
Traceback (most recent call last):
...
ValueError: days_between_payments must be > 0
"""
if days_between_payments <= 0:
raise ValueError("days_between_payments must be > 0")
if daily_interest_rate < 0:
raise ValueError("daily_interest_rate must be >= 0")
if principal <= 0:
raise ValueError("principal must be > 0")
return principal * daily_interest_rate * days_between_payments
def compound_interest(
principal: float,
nominal_annual_interest_rate_percentage: float,
number_of_compounding_periods: float,
) -> float:
"""
>>> compound_interest(10000.0, 0.05, 3)
1576.2500000000014
>>> compound_interest(10000.0, 0.05, 1)
500.00000000000045
>>> compound_interest(0.5, 0.05, 3)
0.07881250000000006
>>> compound_interest(10000.0, 0.06, -4)
Traceback (most recent call last):
...
ValueError: number_of_compounding_periods must be > 0
>>> compound_interest(10000.0, -3.5, 3.0)
Traceback (most recent call last):
...
ValueError: nominal_annual_interest_rate_percentage must be >= 0
>>> compound_interest(-5500.0, 0.01, 5)
Traceback (most recent call last):
...
ValueError: principal must be > 0
"""
if number_of_compounding_periods <= 0:
raise ValueError("number_of_compounding_periods must be > 0")
if nominal_annual_interest_rate_percentage < 0:
raise ValueError("nominal_annual_interest_rate_percentage must be >= 0")
if principal <= 0:
raise ValueError("principal must be > 0")
return principal * (
(1 + nominal_annual_interest_rate_percentage) ** number_of_compounding_periods
- 1
)
def apr_interest(
principal: float,
nominal_annual_percentage_rate: float,
number_of_years: float,
) -> float:
"""
>>> apr_interest(10000.0, 0.05, 3)
1618.223072263547
>>> apr_interest(10000.0, 0.05, 1)
512.6749646744732
>>> apr_interest(0.5, 0.05, 3)
0.08091115361317736
>>> apr_interest(10000.0, 0.06, -4)
Traceback (most recent call last):
...
ValueError: number_of_years must be > 0
>>> apr_interest(10000.0, -3.5, 3.0)
Traceback (most recent call last):
...
ValueError: nominal_annual_percentage_rate must be >= 0
>>> apr_interest(-5500.0, 0.01, 5)
Traceback (most recent call last):
...
ValueError: principal must be > 0
"""
if number_of_years <= 0:
raise ValueError("number_of_years must be > 0")
if nominal_annual_percentage_rate < 0:
raise ValueError("nominal_annual_percentage_rate must be >= 0")
if principal <= 0:
raise ValueError("principal must be > 0")
return compound_interest(
principal, nominal_annual_percentage_rate / 365, number_of_years * 365
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: financial/present_value.py
================================================
"""
Reference: https://www.investopedia.com/terms/p/presentvalue.asp
An algorithm that calculates the present value of a stream of yearly cash flows given...
1. The discount rate (as a decimal, not a percent)
2. An array of cash flows, with the index of the cash flow being the associated year
Note: This algorithm assumes that cash flows are paid at the end of the specified year
"""
def present_value(discount_rate: float, cash_flows: list[float]) -> float:
"""
>>> present_value(0.13, [10, 20.70, -293, 297])
4.69
>>> present_value(0.07, [-109129.39, 30923.23, 15098.93, 29734,39])
-42739.63
>>> present_value(0.07, [109129.39, 30923.23, 15098.93, 29734,39])
175519.15
>>> present_value(-1, [109129.39, 30923.23, 15098.93, 29734,39])
Traceback (most recent call last):
...
ValueError: Discount rate cannot be negative
>>> present_value(0.03, [])
Traceback (most recent call last):
...
ValueError: Cash flows list cannot be empty
"""
if discount_rate < 0:
raise ValueError("Discount rate cannot be negative")
if not cash_flows:
raise ValueError("Cash flows list cannot be empty")
present_value = sum(
cash_flow / ((1 + discount_rate) ** i) for i, cash_flow in enumerate(cash_flows)
)
return round(present_value, ndigits=2)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: financial/price_plus_tax.py
================================================
"""
Calculate price plus tax of a good or service given its price and a tax rate.
"""
def price_plus_tax(price: float, tax_rate: float) -> float:
"""
>>> price_plus_tax(100, 0.25)
125.0
>>> price_plus_tax(125.50, 0.05)
131.775
"""
return price * (1 + tax_rate)
if __name__ == "__main__":
print(f"{price_plus_tax(100, 0.25) = }")
print(f"{price_plus_tax(125.50, 0.05) = }")
================================================
FILE: financial/simple_moving_average.py
================================================
"""
The Simple Moving Average (SMA) is a statistical calculation used to analyze data points
by creating a constantly updated average price over a specific time period.
In finance, SMA is often used in time series analysis to smooth out price data
and identify trends.
Reference: https://en.wikipedia.org/wiki/Moving_average
"""
from collections.abc import Sequence
def simple_moving_average(
data: Sequence[float], window_size: int
) -> list[float | None]:
"""
Calculate the simple moving average (SMA) for some given time series data.
:param data: A list of numerical data points.
:param window_size: An integer representing the size of the SMA window.
:return: A list of SMA values with the same length as the input data.
Examples:
>>> sma = simple_moving_average([10, 12, 15, 13, 14, 16, 18, 17, 19, 21], 3)
>>> [round(value, 2) if value is not None else None for value in sma]
[None, None, 12.33, 13.33, 14.0, 14.33, 16.0, 17.0, 18.0, 19.0]
>>> simple_moving_average([10, 12, 15], 5)
[None, None, None]
>>> simple_moving_average([10, 12, 15, 13, 14, 16, 18, 17, 19, 21], 0)
Traceback (most recent call last):
...
ValueError: Window size must be a positive integer
"""
if window_size < 1:
raise ValueError("Window size must be a positive integer")
sma: list[float | None] = []
for i in range(len(data)):
if i < window_size - 1:
sma.append(None) # SMA not available for early data points
else:
window = data[i - window_size + 1 : i + 1]
sma_value = sum(window) / window_size
sma.append(sma_value)
return sma
if __name__ == "__main__":
import doctest
doctest.testmod()
# Example data (replace with your own time series data)
data = [10, 12, 15, 13, 14, 16, 18, 17, 19, 21]
# Specify the window size for the SMA
window_size = 3
# Calculate the Simple Moving Average
sma_values = simple_moving_average(data, window_size)
# Print the SMA values
print("Simple Moving Average (SMA) Values:")
for i, value in enumerate(sma_values):
if value is not None:
print(f"Day {i + 1}: {value:.2f}")
else:
print(f"Day {i + 1}: Not enough data for SMA")
================================================
FILE: financial/straight_line_depreciation.py
================================================
"""
In accounting, depreciation refers to the decreases in the value
of a fixed asset during the asset's useful life.
When an organization purchases a fixed asset,
the purchase expenditure is not recognized as an expense immediately.
Instead, the decreases in the asset's value are recognized as expenses
over the years during which the asset is used.
The following methods are widely used
for depreciation calculation in accounting:
- Straight-line method
- Diminishing balance method
- Units-of-production method
The straight-line method is the simplest and most widely used.
This method calculates depreciation by spreading the cost evenly
over the asset's useful life.
The following formula shows how to calculate the yearly depreciation expense:
- annual depreciation expense =
(purchase cost of asset - residual value) / useful life of asset(years)
Further information on:
https://en.wikipedia.org/wiki/Depreciation
The function, straight_line_depreciation, returns a list of
the depreciation expenses over the given period.
"""
def straight_line_depreciation(
useful_years: int,
purchase_value: float,
residual_value: float = 0.0,
) -> list[float]:
"""
Calculate the depreciation expenses over the given period
:param useful_years: Number of years the asset will be used
:param purchase_value: Purchase expenditure for the asset
:param residual_value: Residual value of the asset at the end of its useful life
:return: A list of annual depreciation expenses over the asset's useful life
>>> straight_line_depreciation(10, 1100.0, 100.0)
[100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
>>> straight_line_depreciation(6, 1250.0, 50.0)
[200.0, 200.0, 200.0, 200.0, 200.0, 200.0]
>>> straight_line_depreciation(4, 1001.0)
[250.25, 250.25, 250.25, 250.25]
>>> straight_line_depreciation(11, 380.0, 50.0)
[30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0]
>>> straight_line_depreciation(1, 4985, 100)
[4885.0]
"""
if not isinstance(useful_years, int):
raise TypeError("Useful years must be an integer")
if useful_years < 1:
raise ValueError("Useful years cannot be less than 1")
if not isinstance(purchase_value, (float, int)):
raise TypeError("Purchase value must be numeric")
if not isinstance(residual_value, (float, int)):
raise TypeError("Residual value must be numeric")
if purchase_value < 0.0:
raise ValueError("Purchase value cannot be less than zero")
if purchase_value < residual_value:
raise ValueError("Purchase value cannot be less than residual value")
# Calculate annual depreciation expense
depreciable_cost = purchase_value - residual_value
annual_depreciation_expense = depreciable_cost / useful_years
# List of annual depreciation expenses
list_of_depreciation_expenses = []
accumulated_depreciation_expense = 0.0
for period in range(useful_years):
if period != useful_years - 1:
accumulated_depreciation_expense += annual_depreciation_expense
list_of_depreciation_expenses.append(annual_depreciation_expense)
else:
depreciation_expense_in_end_year = (
depreciable_cost - accumulated_depreciation_expense
)
list_of_depreciation_expenses.append(depreciation_expense_in_end_year)
return list_of_depreciation_expenses
if __name__ == "__main__":
user_input_useful_years = int(input("Please Enter Useful Years:\n > "))
user_input_purchase_value = float(input("Please Enter Purchase Value:\n > "))
user_input_residual_value = float(input("Please Enter Residual Value:\n > "))
print(
straight_line_depreciation(
user_input_useful_years,
user_input_purchase_value,
user_input_residual_value,
)
)
================================================
FILE: financial/time_and_half_pay.py
================================================
"""
Calculate time and a half pay
"""
def pay(hours_worked: float, pay_rate: float, hours: float = 40) -> float:
"""
hours_worked = The total hours worked
pay_rate = Amount of money per hour
hours = Number of hours that must be worked before you receive time and a half
>>> pay(41, 1)
41.5
>>> pay(65, 19)
1472.5
>>> pay(10, 1)
10.0
"""
# Check that all input parameters are float or integer
assert isinstance(hours_worked, (float, int)), (
"Parameter 'hours_worked' must be of type 'int' or 'float'"
)
assert isinstance(pay_rate, (float, int)), (
"Parameter 'pay_rate' must be of type 'int' or 'float'"
)
assert isinstance(hours, (float, int)), (
"Parameter 'hours' must be of type 'int' or 'float'"
)
normal_pay = hours_worked * pay_rate
over_time = max(0, hours_worked - hours)
over_time_pay = over_time * pay_rate / 2
return normal_pay + over_time_pay
if __name__ == "__main__":
# Test
import doctest
doctest.testmod()
================================================
FILE: fractals/__init__.py
================================================
================================================
FILE: fractals/julia_sets.py
================================================
"""Author Alexandre De Zotti
Draws Julia sets of quadratic polynomials and exponential maps.
More specifically, this iterates the function a fixed number of times
then plots whether the absolute value of the last iterate is greater than
a fixed threshold (named "escape radius"). For the exponential map this is not
really an escape radius but rather a convenient way to approximate the Julia
set with bounded orbits.
The examples presented here are:
- The Cauliflower Julia set, see e.g.
https://en.wikipedia.org/wiki/File:Julia_z2%2B0,25.png
- Other examples from https://en.wikipedia.org/wiki/Julia_set
- An exponential map Julia set, ambiantly homeomorphic to the examples in
https://www.math.univ-toulouse.fr/~cheritat/GalII/galery.html
and
https://ddd.uab.cat/pub/pubmat/02141493v43n1/02141493v43n1p27.pdf
Remark: Some overflow runtime warnings are suppressed. This is because of the
way the iteration loop is implemented, using numpy's efficient computations.
Overflows and infinites are replaced after each step by a large number.
"""
import warnings
from collections.abc import Callable
from typing import Any
import matplotlib.pyplot as plt
import numpy as np
c_cauliflower = 0.25 + 0.0j
c_polynomial_1 = -0.4 + 0.6j
c_polynomial_2 = -0.1 + 0.651j
c_exponential = -2.0
nb_iterations = 56
window_size = 2.0
nb_pixels = 666
def eval_exponential(c_parameter: complex, z_values: np.ndarray) -> np.ndarray:
"""
Evaluate $e^z + c$.
>>> float(eval_exponential(0, 0))
1.0
>>> bool(abs(eval_exponential(1, np.pi*1.j)) < 1e-15)
True
>>> bool(abs(eval_exponential(1.j, 0)-1-1.j) < 1e-15)
True
"""
return np.exp(z_values) + c_parameter
def eval_quadratic_polynomial(c_parameter: complex, z_values: np.ndarray) -> np.ndarray:
"""
>>> eval_quadratic_polynomial(0, 2)
4
>>> eval_quadratic_polynomial(-1, 1)
0
>>> round(eval_quadratic_polynomial(1.j, 0).imag)
1
>>> round(eval_quadratic_polynomial(1.j, 0).real)
0
"""
return z_values * z_values + c_parameter
def prepare_grid(window_size: float, nb_pixels: int) -> np.ndarray:
"""
Create a grid of complex values of size nb_pixels*nb_pixels with real and
imaginary parts ranging from -window_size to window_size (inclusive).
Returns a numpy array.
>>> prepare_grid(1,3)
array([[-1.-1.j, -1.+0.j, -1.+1.j],
[ 0.-1.j, 0.+0.j, 0.+1.j],
[ 1.-1.j, 1.+0.j, 1.+1.j]])
"""
x = np.linspace(-window_size, window_size, nb_pixels)
x = x.reshape((nb_pixels, 1))
y = np.linspace(-window_size, window_size, nb_pixels)
y = y.reshape((1, nb_pixels))
return x + 1.0j * y
def iterate_function(
eval_function: Callable[[Any, np.ndarray], np.ndarray],
function_params: Any,
nb_iterations: int,
z_0: np.ndarray,
infinity: float | None = None,
) -> np.ndarray:
"""
Iterate the function "eval_function" exactly nb_iterations times.
The first argument of the function is a parameter which is contained in
function_params. The variable z_0 is an array that contains the initial
values to iterate from.
This function returns the final iterates.
>>> iterate_function(eval_quadratic_polynomial, 0, 3, np.array([0,1,2])).shape
(3,)
>>> complex(np.round(iterate_function(eval_quadratic_polynomial,
... 0,
... 3,
... np.array([0,1,2]))[0]))
0j
>>> complex(np.round(iterate_function(eval_quadratic_polynomial,
... 0,
... 3,
... np.array([0,1,2]))[1]))
(1+0j)
>>> complex(np.round(iterate_function(eval_quadratic_polynomial,
... 0,
... 3,
... np.array([0,1,2]))[2]))
(256+0j)
"""
z_n = z_0.astype("complex64")
for _ in range(nb_iterations):
z_n = eval_function(function_params, z_n)
if infinity is not None:
np.nan_to_num(z_n, copy=False, nan=infinity)
z_n[abs(z_n) == np.inf] = infinity
return z_n
def show_results(
function_label: str,
function_params: Any,
escape_radius: float,
z_final: np.ndarray,
) -> None:
"""
Plots of whether the absolute value of z_final is greater than
the value of escape_radius. Adds the function_label and function_params to
the title.
>>> show_results('80', 0, 1, np.array([[0,1,.5],[.4,2,1.1],[.2,1,1.3]]))
"""
abs_z_final = (abs(z_final)).transpose()
abs_z_final[:, :] = abs_z_final[::-1, :]
plt.matshow(abs_z_final < escape_radius)
plt.title(f"Julia set of ${function_label}$, $c={function_params}$")
plt.show()
def ignore_overflow_warnings() -> None:
"""
Ignore some overflow and invalid value warnings.
>>> ignore_overflow_warnings()
"""
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="overflow encountered in multiply"
)
warnings.filterwarnings(
"ignore",
category=RuntimeWarning,
message="invalid value encountered in multiply",
)
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="overflow encountered in absolute"
)
warnings.filterwarnings(
"ignore", category=RuntimeWarning, message="overflow encountered in exp"
)
if __name__ == "__main__":
z_0 = prepare_grid(window_size, nb_pixels)
ignore_overflow_warnings() # See file header for explanations
nb_iterations = 24
escape_radius = 2 * abs(c_cauliflower) + 1
z_final = iterate_function(
eval_quadratic_polynomial,
c_cauliflower,
nb_iterations,
z_0,
infinity=1.1 * escape_radius,
)
show_results("z^2+c", c_cauliflower, escape_radius, z_final)
nb_iterations = 64
escape_radius = 2 * abs(c_polynomial_1) + 1
z_final = iterate_function(
eval_quadratic_polynomial,
c_polynomial_1,
nb_iterations,
z_0,
infinity=1.1 * escape_radius,
)
show_results("z^2+c", c_polynomial_1, escape_radius, z_final)
nb_iterations = 161
escape_radius = 2 * abs(c_polynomial_2) + 1
z_final = iterate_function(
eval_quadratic_polynomial,
c_polynomial_2,
nb_iterations,
z_0,
infinity=1.1 * escape_radius,
)
show_results("z^2+c", c_polynomial_2, escape_radius, z_final)
nb_iterations = 12
escape_radius = 10000.0
z_final = iterate_function(
eval_exponential,
c_exponential,
nb_iterations,
z_0 + 2,
infinity=1.0e10,
)
show_results("e^z+c", c_exponential, escape_radius, z_final)
================================================
FILE: fractals/koch_snowflake.py
================================================
"""
Description
The Koch snowflake is a fractal curve and one of the earliest fractals to
have been described. The Koch snowflake can be built up iteratively, in a
sequence of stages. The first stage is an equilateral triangle, and each
successive stage is formed by adding outward bends to each side of the
previous stage, making smaller equilateral triangles.
This can be achieved through the following steps for each line:
1. divide the line segment into three segments of equal length.
2. draw an equilateral triangle that has the middle segment from step 1
as its base and points outward.
3. remove the line segment that is the base of the triangle from step 2.
(description adapted from https://en.wikipedia.org/wiki/Koch_snowflake )
(for a more detailed explanation and an implementation in the
Processing language, see https://natureofcode.com/book/chapter-8-fractals/
#84-the-koch-curve-and-the-arraylist-technique )
Requirements (pip):
- matplotlib
- numpy
"""
from __future__ import annotations
import matplotlib.pyplot as plt
import numpy as np
# initial triangle of Koch snowflake
VECTOR_1 = np.array([0, 0])
VECTOR_2 = np.array([0.5, 0.8660254])
VECTOR_3 = np.array([1, 0])
INITIAL_VECTORS = [VECTOR_1, VECTOR_2, VECTOR_3, VECTOR_1]
# uncomment for simple Koch curve instead of Koch snowflake
# INITIAL_VECTORS = [VECTOR_1, VECTOR_3]
def iterate(initial_vectors: list[np.ndarray], steps: int) -> list[np.ndarray]:
"""
Go through the number of iterations determined by the argument "steps".
Be careful with high values (above 5) since the time to calculate increases
exponentially.
>>> iterate([np.array([0, 0]), np.array([1, 0])], 1)
[array([0, 0]), array([0.33333333, 0. ]), array([0.5 , \
0.28867513]), array([0.66666667, 0. ]), array([1, 0])]
"""
vectors = initial_vectors
for _ in range(steps):
vectors = iteration_step(vectors)
return vectors
def iteration_step(vectors: list[np.ndarray]) -> list[np.ndarray]:
"""
Loops through each pair of adjacent vectors. Each line between two adjacent
vectors is divided into 4 segments by adding 3 additional vectors in-between
the original two vectors. The vector in the middle is constructed through a
60 degree rotation so it is bent outwards.
>>> iteration_step([np.array([0, 0]), np.array([1, 0])])
[array([0, 0]), array([0.33333333, 0. ]), array([0.5 , \
0.28867513]), array([0.66666667, 0. ]), array([1, 0])]
"""
new_vectors = []
for i, start_vector in enumerate(vectors[:-1]):
end_vector = vectors[i + 1]
new_vectors.append(start_vector)
difference_vector = end_vector - start_vector
new_vectors.append(start_vector + difference_vector / 3)
new_vectors.append(
start_vector + difference_vector / 3 + rotate(difference_vector / 3, 60)
)
new_vectors.append(start_vector + difference_vector * 2 / 3)
new_vectors.append(vectors[-1])
return new_vectors
def rotate(vector: np.ndarray, angle_in_degrees: float) -> np.ndarray:
"""
Standard rotation of a 2D vector with a rotation matrix
(see https://en.wikipedia.org/wiki/Rotation_matrix )
>>> rotate(np.array([1, 0]), 60)
array([0.5 , 0.8660254])
>>> rotate(np.array([1, 0]), 90)
array([6.123234e-17, 1.000000e+00])
"""
theta = np.radians(angle_in_degrees)
c, s = np.cos(theta), np.sin(theta)
rotation_matrix = np.array(((c, -s), (s, c)))
return np.dot(rotation_matrix, vector)
def plot(vectors: list[np.ndarray]) -> None:
"""
Utility function to plot the vectors using matplotlib.pyplot
No doctest was implemented since this function does not have a return value
"""
# avoid stretched display of graph
axes = plt.gca()
axes.set_aspect("equal")
# matplotlib.pyplot.plot takes a list of all x-coordinates and a list of all
# y-coordinates as inputs, which are constructed from the vector-list using
# zip()
x_coordinates, y_coordinates = zip(*vectors)
plt.plot(x_coordinates, y_coordinates)
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod()
processed_vectors = iterate(INITIAL_VECTORS, 5)
plot(processed_vectors)
================================================
FILE: fractals/mandelbrot.py
================================================
"""
The Mandelbrot set is the set of complex numbers "c" for which the series
"z_(n+1) = z_n * z_n + c" does not diverge, i.e. remains bounded. Thus, a
complex number "c" is a member of the Mandelbrot set if, when starting with
"z_0 = 0" and applying the iteration repeatedly, the absolute value of
"z_n" remains bounded for all "n > 0". Complex numbers can be written as
"a + b*i": "a" is the real component, usually drawn on the x-axis, and "b*i"
is the imaginary component, usually drawn on the y-axis. Most visualizations
of the Mandelbrot set use a color-coding to indicate after how many steps in
the series the numbers outside the set diverge. Images of the Mandelbrot set
exhibit an elaborate and infinitely complicated boundary that reveals
progressively ever-finer recursive detail at increasing magnifications, making
the boundary of the Mandelbrot set a fractal curve.
(description adapted from https://en.wikipedia.org/wiki/Mandelbrot_set )
(see also https://en.wikipedia.org/wiki/Plotting_algorithms_for_the_Mandelbrot_set )
"""
import colorsys
from PIL import Image
def get_distance(x: float, y: float, max_step: int) -> float:
"""
Return the relative distance (= step/max_step) after which the complex number
constituted by this x-y-pair diverges. Members of the Mandelbrot set do not
diverge so their distance is 1.
>>> get_distance(0, 0, 50)
1.0
>>> get_distance(0.5, 0.5, 50)
0.061224489795918366
>>> get_distance(2, 0, 50)
0.0
"""
a = x
b = y
for step in range(max_step): # noqa: B007
a_new = a * a - b * b + x
b = 2 * a * b + y
a = a_new
# divergence happens for all complex number with an absolute value
# greater than 4
if a * a + b * b > 4:
break
return step / (max_step - 1)
def get_black_and_white_rgb(distance: float) -> tuple:
"""
Black&white color-coding that ignores the relative distance. The Mandelbrot
set is black, everything else is white.
>>> get_black_and_white_rgb(0)
(255, 255, 255)
>>> get_black_and_white_rgb(0.5)
(255, 255, 255)
>>> get_black_and_white_rgb(1)
(0, 0, 0)
"""
if distance == 1:
return (0, 0, 0)
else:
return (255, 255, 255)
def get_color_coded_rgb(distance: float) -> tuple:
"""
Color-coding taking the relative distance into account. The Mandelbrot set
is black.
>>> get_color_coded_rgb(0)
(255, 0, 0)
>>> get_color_coded_rgb(0.5)
(0, 255, 255)
>>> get_color_coded_rgb(1)
(0, 0, 0)
"""
if distance == 1:
return (0, 0, 0)
else:
return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(distance, 1, 1))
def get_image(
image_width: int = 800,
image_height: int = 600,
figure_center_x: float = -0.6,
figure_center_y: float = 0,
figure_width: float = 3.2,
max_step: int = 50,
use_distance_color_coding: bool = True,
) -> Image.Image:
"""
Function to generate the image of the Mandelbrot set. Two types of coordinates
are used: image-coordinates that refer to the pixels and figure-coordinates
that refer to the complex numbers inside and outside the Mandelbrot set. The
figure-coordinates in the arguments of this function determine which section
of the Mandelbrot set is viewed. The main area of the Mandelbrot set is
roughly between "-1.5 < x < 0.5" and "-1 < y < 1" in the figure-coordinates.
Commenting out tests that slow down pytest...
# 13.35s call fractals/mandelbrot.py::mandelbrot.get_image
# >>> get_image().load()[0,0]
(255, 0, 0)
# >>> get_image(use_distance_color_coding = False).load()[0,0]
(255, 255, 255)
"""
img = Image.new("RGB", (image_width, image_height))
pixels = img.load()
# loop through the image-coordinates
for image_x in range(image_width):
for image_y in range(image_height):
# determine the figure-coordinates based on the image-coordinates
figure_height = figure_width / image_width * image_height
figure_x = figure_center_x + (image_x / image_width - 0.5) * figure_width
figure_y = figure_center_y + (image_y / image_height - 0.5) * figure_height
distance = get_distance(figure_x, figure_y, max_step)
# color the corresponding pixel based on the selected coloring-function
if use_distance_color_coding:
pixels[image_x, image_y] = get_color_coded_rgb(distance)
else:
pixels[image_x, image_y] = get_black_and_white_rgb(distance)
return img
if __name__ == "__main__":
import doctest
doctest.testmod()
# colored version, full figure
img = get_image()
# uncomment for colored version, different section, zoomed in
# img = get_image(figure_center_x = -0.6, figure_center_y = -0.4,
# figure_width = 0.8)
# uncomment for black and white version, full figure
# img = get_image(use_distance_color_coding = False)
# uncomment to save the image
# img.save("mandelbrot.png")
img.show()
================================================
FILE: fractals/sierpinski_triangle.py
================================================
"""
Author Anurag Kumar | anuragkumarak95@gmail.com | git/anuragkumarak95
Simple example of fractal generation using recursion.
What is the Sierpiński Triangle?
The Sierpiński triangle (sometimes spelled Sierpinski), also called the
Sierpiński gasket or Sierpiński sieve, is a fractal attractive fixed set with
the overall shape of an equilateral triangle, subdivided recursively into
smaller equilateral triangles. Originally constructed as a curve, this is one of
the basic examples of self-similar sets—that is, it is a mathematically
generated pattern that is reproducible at any magnification or reduction. It is
named after the Polish mathematician Wacław Sierpiński, but appeared as a
decorative pattern many centuries before the work of Sierpiński.
Usage: python sierpinski_triangle.py
Credits:
The above description is taken from
https://en.wikipedia.org/wiki/Sierpi%C5%84ski_triangle
This code was written by editing the code from
https://www.riannetrujillo.com/blog/python-fractal/
"""
import sys
import turtle
def get_mid(p1: tuple[float, float], p2: tuple[float, float]) -> tuple[float, float]:
"""
Find the midpoint of two points
>>> get_mid((0, 0), (2, 2))
(1.0, 1.0)
>>> get_mid((-3, -3), (3, 3))
(0.0, 0.0)
>>> get_mid((1, 0), (3, 2))
(2.0, 1.0)
>>> get_mid((0, 0), (1, 1))
(0.5, 0.5)
>>> get_mid((0, 0), (0, 0))
(0.0, 0.0)
"""
return (p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2
def triangle(
vertex1: tuple[float, float],
vertex2: tuple[float, float],
vertex3: tuple[float, float],
depth: int,
) -> None:
"""
Recursively draw the Sierpinski triangle given the vertices of the triangle
and the recursion depth
"""
my_pen.up()
my_pen.goto(vertex1[0], vertex1[1])
my_pen.down()
my_pen.goto(vertex2[0], vertex2[1])
my_pen.goto(vertex3[0], vertex3[1])
my_pen.goto(vertex1[0], vertex1[1])
if depth == 0:
return
triangle(vertex1, get_mid(vertex1, vertex2), get_mid(vertex1, vertex3), depth - 1)
triangle(vertex2, get_mid(vertex1, vertex2), get_mid(vertex2, vertex3), depth - 1)
triangle(vertex3, get_mid(vertex3, vertex2), get_mid(vertex1, vertex3), depth - 1)
if __name__ == "__main__":
if len(sys.argv) != 2:
raise ValueError(
"Correct format for using this script: "
"python fractals.py "
)
my_pen = turtle.Turtle()
my_pen.ht()
my_pen.speed(5)
my_pen.pencolor("red")
vertices = [(-175, -125), (0, 175), (175, -125)] # vertices of triangle
triangle(vertices[0], vertices[1], vertices[2], int(sys.argv[1]))
turtle.Screen().exitonclick()
================================================
FILE: fractals/vicsek.py
================================================
"""Authors Bastien Capiaux & Mehdi Oudghiri
The Vicsek fractal algorithm is a recursive algorithm that creates a
pattern known as the Vicsek fractal or the Vicsek square.
It is based on the concept of self-similarity, where the pattern at each
level of recursion resembles the overall pattern.
The algorithm involves dividing a square into 9 equal smaller squares,
removing the center square, and then repeating this process on the remaining 8 squares.
This results in a pattern that exhibits self-similarity and has a
square-shaped outline with smaller squares within it.
Source: https://en.wikipedia.org/wiki/Vicsek_fractal
"""
import turtle
def draw_cross(x: float, y: float, length: float):
"""
Draw a cross at the specified position and with the specified length.
"""
turtle.up()
turtle.goto(x - length / 2, y - length / 6)
turtle.down()
turtle.seth(0)
turtle.begin_fill()
for _ in range(4):
turtle.fd(length / 3)
turtle.right(90)
turtle.fd(length / 3)
turtle.left(90)
turtle.fd(length / 3)
turtle.left(90)
turtle.end_fill()
def draw_fractal_recursive(x: float, y: float, length: float, depth: float):
"""
Recursively draw the Vicsek fractal at the specified position, with the
specified length and depth.
"""
if depth == 0:
draw_cross(x, y, length)
return
draw_fractal_recursive(x, y, length / 3, depth - 1)
draw_fractal_recursive(x + length / 3, y, length / 3, depth - 1)
draw_fractal_recursive(x - length / 3, y, length / 3, depth - 1)
draw_fractal_recursive(x, y + length / 3, length / 3, depth - 1)
draw_fractal_recursive(x, y - length / 3, length / 3, depth - 1)
def set_color(rgb: str):
turtle.color(rgb)
def draw_vicsek_fractal(x: float, y: float, length: float, depth: float, color="blue"):
"""
Draw the Vicsek fractal at the specified position, with the specified
length and depth.
"""
turtle.speed(0)
turtle.hideturtle()
set_color(color)
draw_fractal_recursive(x, y, length, depth)
turtle.Screen().update()
def main():
draw_vicsek_fractal(0, 0, 800, 4)
turtle.done()
if __name__ == "__main__":
main()
================================================
FILE: fuzzy_logic/__init__.py
================================================
================================================
FILE: fuzzy_logic/fuzzy_operations.py
================================================
"""
By @Shreya123714
https://en.wikipedia.org/wiki/Fuzzy_set
"""
from __future__ import annotations
from dataclasses import dataclass
import matplotlib.pyplot as plt
import numpy as np
@dataclass
class FuzzySet:
"""
A class for representing and manipulating triangular fuzzy sets.
Attributes:
name: The name or label of the fuzzy set.
left_boundary: The left boundary of the fuzzy set.
peak: The peak (central) value of the fuzzy set.
right_boundary: The right boundary of the fuzzy set.
Methods:
membership(x): Calculate the membership value of an input 'x' in the fuzzy set.
union(other): Calculate the union of this fuzzy set with another fuzzy set.
intersection(other): Calculate the intersection of this fuzzy set with another.
complement(): Calculate the complement (negation) of this fuzzy set.
plot(): Plot the membership function of the fuzzy set.
>>> sheru = FuzzySet("Sheru", 0.4, 1, 0.6)
>>> sheru
FuzzySet(name='Sheru', left_boundary=0.4, peak=1, right_boundary=0.6)
>>> str(sheru)
'Sheru: [0.4, 1, 0.6]'
>>> siya = FuzzySet("Siya", 0.5, 1, 0.7)
>>> siya
FuzzySet(name='Siya', left_boundary=0.5, peak=1, right_boundary=0.7)
# Complement Operation
>>> sheru.complement()
FuzzySet(name='¬Sheru', left_boundary=0.4, peak=0.6, right_boundary=0)
>>> siya.complement() # doctest: +NORMALIZE_WHITESPACE
FuzzySet(name='¬Siya', left_boundary=0.30000000000000004, peak=0.5,
right_boundary=0)
# Intersection Operation
>>> siya.intersection(sheru)
FuzzySet(name='Siya ∩ Sheru', left_boundary=0.5, peak=0.6, right_boundary=1.0)
# Membership Operation
>>> sheru.membership(0.5)
0.16666666666666663
>>> sheru.membership(0.6)
0.0
# Union Operations
>>> siya.union(sheru)
FuzzySet(name='Siya U Sheru', left_boundary=0.4, peak=0.7, right_boundary=1.0)
"""
name: str
left_boundary: float
peak: float
right_boundary: float
def __str__(self) -> str:
"""
>>> FuzzySet("fuzzy_set", 0.1, 0.2, 0.3)
FuzzySet(name='fuzzy_set', left_boundary=0.1, peak=0.2, right_boundary=0.3)
"""
return (
f"{self.name}: [{self.left_boundary}, {self.peak}, {self.right_boundary}]"
)
def complement(self) -> FuzzySet:
"""
Calculate the complement (negation) of this fuzzy set.
Returns:
FuzzySet: A new fuzzy set representing the complement.
>>> FuzzySet("fuzzy_set", 0.1, 0.2, 0.3).complement()
FuzzySet(name='¬fuzzy_set', left_boundary=0.7, peak=0.9, right_boundary=0.8)
"""
return FuzzySet(
f"¬{self.name}",
1 - self.right_boundary,
1 - self.left_boundary,
1 - self.peak,
)
def intersection(self, other) -> FuzzySet:
"""
Calculate the intersection of this fuzzy set
with another fuzzy set.
Args:
other: Another fuzzy set to intersect with.
Returns:
A new fuzzy set representing the intersection.
>>> FuzzySet("a", 0.1, 0.2, 0.3).intersection(FuzzySet("b", 0.4, 0.5, 0.6))
FuzzySet(name='a ∩ b', left_boundary=0.4, peak=0.3, right_boundary=0.35)
"""
return FuzzySet(
f"{self.name} ∩ {other.name}",
max(self.left_boundary, other.left_boundary),
min(self.right_boundary, other.right_boundary),
(self.peak + other.peak) / 2,
)
def membership(self, x: float) -> float:
"""
Calculate the membership value of an input 'x' in the fuzzy set.
Returns:
The membership value of 'x' in the fuzzy set.
>>> a = FuzzySet("a", 0.1, 0.2, 0.3)
>>> a.membership(0.09)
0.0
>>> a.membership(0.1)
0.0
>>> a.membership(0.11)
0.09999999999999995
>>> a.membership(0.4)
0.0
>>> FuzzySet("A", 0, 0.5, 1).membership(0.1)
0.2
>>> FuzzySet("B", 0.2, 0.7, 1).membership(0.6)
0.8
"""
if x <= self.left_boundary or x >= self.right_boundary:
return 0.0
elif self.left_boundary < x <= self.peak:
return (x - self.left_boundary) / (self.peak - self.left_boundary)
elif self.peak < x < self.right_boundary:
return (self.right_boundary - x) / (self.right_boundary - self.peak)
msg = f"Invalid value {x} for fuzzy set {self}"
raise ValueError(msg)
def union(self, other) -> FuzzySet:
"""
Calculate the union of this fuzzy set with another fuzzy set.
Args:
other (FuzzySet): Another fuzzy set to union with.
Returns:
FuzzySet: A new fuzzy set representing the union.
>>> FuzzySet("a", 0.1, 0.2, 0.3).union(FuzzySet("b", 0.4, 0.5, 0.6))
FuzzySet(name='a U b', left_boundary=0.1, peak=0.6, right_boundary=0.35)
"""
return FuzzySet(
f"{self.name} U {other.name}",
min(self.left_boundary, other.left_boundary),
max(self.right_boundary, other.right_boundary),
(self.peak + other.peak) / 2,
)
def plot(self):
"""
Plot the membership function of the fuzzy set.
"""
x = np.linspace(0, 1, 1000)
y = [self.membership(xi) for xi in x]
plt.plot(x, y, label=self.name)
if __name__ == "__main__":
from doctest import testmod
testmod()
a = FuzzySet("A", 0, 0.5, 1)
b = FuzzySet("B", 0.2, 0.7, 1)
a.plot()
b.plot()
plt.xlabel("x")
plt.ylabel("Membership")
plt.legend()
plt.show()
union_ab = a.union(b)
intersection_ab = a.intersection(b)
complement_a = a.complement()
union_ab.plot()
intersection_ab.plot()
complement_a.plot()
plt.xlabel("x")
plt.ylabel("Membership")
plt.legend()
plt.show()
================================================
FILE: fuzzy_logic/fuzzy_operations.py.DISABLED.txt
================================================
"""
README, Author - Jigyasa Gandhi(mailto:jigsgandhi97@gmail.com)
Requirements:
- scikit-fuzzy
- numpy
- matplotlib
Python:
- 3.5
"""
import numpy as np
import skfuzzy as fuzz
if __name__ == "__main__":
# Create universe of discourse in Python using linspace ()
X = np.linspace(start=0, stop=75, num=75, endpoint=True, retstep=False)
# Create two fuzzy sets by defining any membership function
# (trapmf(), gbellmf(), gaussmf(), etc).
abc1 = [0, 25, 50]
abc2 = [25, 50, 75]
young = fuzz.membership.trimf(X, abc1)
middle_aged = fuzz.membership.trimf(X, abc2)
# Compute the different operations using inbuilt functions.
one = np.ones(75)
zero = np.zeros((75,))
# 1. Union = max(µA(x), µB(x))
union = fuzz.fuzzy_or(X, young, X, middle_aged)[1]
# 2. Intersection = min(µA(x), µB(x))
intersection = fuzz.fuzzy_and(X, young, X, middle_aged)[1]
# 3. Complement (A) = (1 - min(µA(x)))
complement_a = fuzz.fuzzy_not(young)
# 4. Difference (A/B) = min(µA(x),(1- µB(x)))
difference = fuzz.fuzzy_and(X, young, X, fuzz.fuzzy_not(middle_aged)[1])[1]
# 5. Algebraic Sum = [µA(x) + µB(x) – (µA(x) * µB(x))]
alg_sum = young + middle_aged - (young * middle_aged)
# 6. Algebraic Product = (µA(x) * µB(x))
alg_product = young * middle_aged
# 7. Bounded Sum = min[1,(µA(x), µB(x))]
bdd_sum = fuzz.fuzzy_and(X, one, X, young + middle_aged)[1]
# 8. Bounded difference = min[0,(µA(x), µB(x))]
bdd_difference = fuzz.fuzzy_or(X, zero, X, young - middle_aged)[1]
# max-min composition
# max-product composition
# Plot each set A, set B and each operation result using plot() and subplot().
from matplotlib import pyplot as plt
plt.figure()
plt.subplot(4, 3, 1)
plt.plot(X, young)
plt.title("Young")
plt.grid(True)
plt.subplot(4, 3, 2)
plt.plot(X, middle_aged)
plt.title("Middle aged")
plt.grid(True)
plt.subplot(4, 3, 3)
plt.plot(X, union)
plt.title("union")
plt.grid(True)
plt.subplot(4, 3, 4)
plt.plot(X, intersection)
plt.title("intersection")
plt.grid(True)
plt.subplot(4, 3, 5)
plt.plot(X, complement_a)
plt.title("complement_a")
plt.grid(True)
plt.subplot(4, 3, 6)
plt.plot(X, difference)
plt.title("difference a/b")
plt.grid(True)
plt.subplot(4, 3, 7)
plt.plot(X, alg_sum)
plt.title("alg_sum")
plt.grid(True)
plt.subplot(4, 3, 8)
plt.plot(X, alg_product)
plt.title("alg_product")
plt.grid(True)
plt.subplot(4, 3, 9)
plt.plot(X, bdd_sum)
plt.title("bdd_sum")
plt.grid(True)
plt.subplot(4, 3, 10)
plt.plot(X, bdd_difference)
plt.title("bdd_difference")
plt.grid(True)
plt.subplots_adjust(hspace=0.5)
plt.show()
================================================
FILE: genetic_algorithm/__init__.py
================================================
================================================
FILE: genetic_algorithm/basic_string.py
================================================
"""
Simple multithreaded algorithm to show how the 4 phases of a genetic algorithm works
(Evaluation, Selection, Crossover and Mutation)
https://en.wikipedia.org/wiki/Genetic_algorithm
Author: D4rkia
"""
from __future__ import annotations
import random
# Maximum size of the population. Bigger could be faster but is more memory expensive.
N_POPULATION = 200
# Number of elements selected in every generation of evolution. The selection takes
# place from best to worst of that generation and must be smaller than N_POPULATION.
N_SELECTED = 50
# Probability that an element of a generation can mutate, changing one of its genes.
# This will guarantee that all genes will be used during evolution.
MUTATION_PROBABILITY = 0.4
# Just a seed to improve randomness required by the algorithm.
random.seed(random.randint(0, 1000))
def evaluate(item: str, main_target: str) -> tuple[str, float]:
"""
Evaluate how similar the item is with the target by just
counting each char in the right position
>>> evaluate("Helxo Worlx", "Hello World")
('Helxo Worlx', 9.0)
"""
score = len([g for position, g in enumerate(item) if g == main_target[position]])
return (item, float(score))
def crossover(parent_1: str, parent_2: str) -> tuple[str, str]:
"""
Slice and combine two strings at a random point.
>>> random.seed(42)
>>> crossover("123456", "abcdef")
('12345f', 'abcde6')
"""
random_slice = random.randint(0, len(parent_1) - 1)
child_1 = parent_1[:random_slice] + parent_2[random_slice:]
child_2 = parent_2[:random_slice] + parent_1[random_slice:]
return (child_1, child_2)
def mutate(child: str, genes: list[str]) -> str:
"""
Mutate a random gene of a child with another one from the list.
>>> random.seed(123)
>>> mutate("123456", list("ABCDEF"))
'12345A'
"""
child_list = list(child)
if random.uniform(0, 1) < MUTATION_PROBABILITY:
child_list[random.randint(0, len(child)) - 1] = random.choice(genes)
return "".join(child_list)
# Select, crossover and mutate a new population.
def select(
parent_1: tuple[str, float],
population_score: list[tuple[str, float]],
genes: list[str],
) -> list[str]:
"""
Select the second parent and generate new population
>>> random.seed(42)
>>> parent_1 = ("123456", 8.0)
>>> population_score = [("abcdef", 4.0), ("ghijkl", 5.0), ("mnopqr", 7.0)]
>>> genes = list("ABCDEF")
>>> child_n = int(min(parent_1[1] + 1, 10))
>>> population = []
>>> for _ in range(child_n):
... parent_2 = population_score[random.randrange(len(population_score))][0]
... child_1, child_2 = crossover(parent_1[0], parent_2)
... population.extend((mutate(child_1, genes), mutate(child_2, genes)))
>>> len(population) == (int(parent_1[1]) + 1) * 2
True
"""
pop = []
# Generate more children proportionally to the fitness score.
child_n = int(parent_1[1] * 100) + 1
child_n = 10 if child_n >= 10 else child_n
for _ in range(child_n):
parent_2 = population_score[random.randint(0, N_SELECTED)][0]
child_1, child_2 = crossover(parent_1[0], parent_2)
# Append new string to the population list.
pop.append(mutate(child_1, genes))
pop.append(mutate(child_2, genes))
return pop
def basic(target: str, genes: list[str], debug: bool = True) -> tuple[int, int, str]:
"""
Verify that the target contains no genes besides the ones inside genes variable.
>>> from string import ascii_lowercase
>>> basic("doctest", ascii_lowercase, debug=False)[2]
'doctest'
>>> genes = list(ascii_lowercase)
>>> genes.remove("e")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e'] is not in genes list, evolution cannot converge
>>> genes.remove("s")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e', 's'] is not in genes list, evolution cannot converge
>>> genes.remove("t")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e', 's', 't'] is not in genes list, evolution cannot converge
"""
# Verify if N_POPULATION is bigger than N_SELECTED
if N_POPULATION < N_SELECTED:
msg = f"{N_POPULATION} must be bigger than {N_SELECTED}"
raise ValueError(msg)
# Verify that the target contains no genes besides the ones inside genes variable.
not_in_genes_list = sorted({c for c in target if c not in genes})
if not_in_genes_list:
msg = f"{not_in_genes_list} is not in genes list, evolution cannot converge"
raise ValueError(msg)
# Generate random starting population.
population = []
for _ in range(N_POPULATION):
population.append("".join([random.choice(genes) for i in range(len(target))]))
# Just some logs to know what the algorithms is doing.
generation, total_population = 0, 0
# This loop will end when we find a perfect match for our target.
while True:
generation += 1
total_population += len(population)
# Random population created. Now it's time to evaluate.
# (Option 1) Adding a bit of concurrency can make everything faster,
#
# import concurrent.futures
# population_score: list[tuple[str, float]] = []
# with concurrent.futures.ThreadPoolExecutor(
# max_workers=NUM_WORKERS) as executor:
# futures = {executor.submit(evaluate, item, target) for item in population}
# concurrent.futures.wait(futures)
# population_score = [item.result() for item in futures]
#
# but with a simple algorithm like this, it will probably be slower.
# (Option 2) We just need to call evaluate for every item inside the population.
population_score = [evaluate(item, target) for item in population]
# Check if there is a matching evolution.
population_score = sorted(population_score, key=lambda x: x[1], reverse=True)
if population_score[0][0] == target:
return (generation, total_population, population_score[0][0])
# Print the best result every 10 generation.
# Just to know that the algorithm is working.
if debug and generation % 10 == 0:
print(
f"\nGeneration: {generation}"
f"\nTotal Population:{total_population}"
f"\nBest score: {population_score[0][1]}"
f"\nBest string: {population_score[0][0]}"
)
# Flush the old population, keeping some of the best evolutions.
# Keeping this avoid regression of evolution.
population_best = population[: int(N_POPULATION / 3)]
population.clear()
population.extend(population_best)
# Normalize population score to be between 0 and 1.
population_score = [
(item, score / len(target)) for item, score in population_score
]
# This is selection
for i in range(N_SELECTED):
population.extend(select(population_score[int(i)], population_score, genes))
# Check if the population has already reached the maximum value and if so,
# break the cycle. If this check is disabled, the algorithm will take
# forever to compute large strings, but will also calculate small strings in
# a far fewer generations.
if len(population) > N_POPULATION:
break
if __name__ == "__main__":
target_str = (
"This is a genetic algorithm to evaluate, combine, evolve, and mutate a string!"
)
genes_list = list(
" ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"
"nopqrstuvwxyz.,;!?+-*#@^'èéòà€ù=)(&%$£/\\"
)
generation, population, target = basic(target_str, genes_list)
print(
f"\nGeneration: {generation}\nTotal Population: {population}\nTarget: {target}"
)
================================================
FILE: geodesy/__init__.py
================================================
================================================
FILE: geodesy/haversine_distance.py
================================================
from math import asin, atan, cos, radians, sin, sqrt, tan
AXIS_A = 6378137.0
AXIS_B = 6356752.314245
RADIUS = 6378137
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate great circle distance between two points in a sphere,
given longitudes and latitudes https://en.wikipedia.org/wiki/Haversine_formula
We know that the globe is "sort of" spherical, so a path between two points
isn't exactly a straight line. We need to account for the Earth's curvature
when calculating distance from point A to B. This effect is negligible for
small distances but adds up as distance increases. The Haversine method treats
the earth as a sphere which allows us to "project" the two points A and B
onto the surface of that sphere and approximate the spherical distance between
them. Since the Earth is not a perfect sphere, other methods which model the
Earth's ellipsoidal nature are more accurate but a quick and modifiable
computation like Haversine can be handy for shorter range distances.
Args:
* `lat1`, `lon1`: latitude and longitude of coordinate 1
* `lat2`, `lon2`: latitude and longitude of coordinate 2
Returns:
geographical distance between two points in metres
>>> from collections import namedtuple
>>> point_2d = namedtuple("point_2d", "lat lon")
>>> SAN_FRANCISCO = point_2d(37.774856, -122.424227)
>>> YOSEMITE = point_2d(37.864742, -119.537521)
>>> f"{haversine_distance(*SAN_FRANCISCO, *YOSEMITE):0,.0f} meters"
'254,352 meters'
"""
# CONSTANTS per WGS84 https://en.wikipedia.org/wiki/World_Geodetic_System
# Distance in metres(m)
# Equation parameters
# Equation https://en.wikipedia.org/wiki/Haversine_formula#Formulation
flattening = (AXIS_A - AXIS_B) / AXIS_A
phi_1 = atan((1 - flattening) * tan(radians(lat1)))
phi_2 = atan((1 - flattening) * tan(radians(lat2)))
lambda_1 = radians(lon1)
lambda_2 = radians(lon2)
# Equation
sin_sq_phi = sin((phi_2 - phi_1) / 2)
sin_sq_lambda = sin((lambda_2 - lambda_1) / 2)
# Square both values
sin_sq_phi *= sin_sq_phi
sin_sq_lambda *= sin_sq_lambda
h_value = sqrt(sin_sq_phi + (cos(phi_1) * cos(phi_2) * sin_sq_lambda))
return 2 * RADIUS * asin(h_value)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: geodesy/lamberts_ellipsoidal_distance.py
================================================
from math import atan, cos, radians, sin, tan
from .haversine_distance import haversine_distance
AXIS_A = 6378137.0
AXIS_B = 6356752.314245
EQUATORIAL_RADIUS = 6378137
def lamberts_ellipsoidal_distance(
lat1: float, lon1: float, lat2: float, lon2: float
) -> float:
"""
Calculate the shortest distance along the surface of an ellipsoid between
two points on the surface of earth given longitudes and latitudes
https://en.wikipedia.org/wiki/Geographical_distance#Lambert's_formula_for_long_lines
NOTE: This algorithm uses geodesy/haversine_distance.py to compute central angle,
sigma
Representing the earth as an ellipsoid allows us to approximate distances between
points on the surface much better than a sphere. Ellipsoidal formulas treat the
Earth as an oblate ellipsoid which means accounting for the flattening that happens
at the North and South poles. Lambert's formulae provide accuracy on the order of
10 meteres over thousands of kilometeres. Other methods can provide
millimeter-level accuracy but this is a simpler method to calculate long range
distances without increasing computational intensity.
Args:
lat1, lon1: latitude and longitude of coordinate 1
lat2, lon2: latitude and longitude of coordinate 2
Returns:
geographical distance between two points in metres
>>> lamberts_ellipsoidal_distance(100, 0, 0, 0)
Traceback (most recent call last):
...
ValueError: Latitude must be between -90 and 90 degrees
>>> lamberts_ellipsoidal_distance(0, 0, -100, 0)
Traceback (most recent call last):
...
ValueError: Latitude must be between -90 and 90 degrees
>>> lamberts_ellipsoidal_distance(0, 200, 0, 0)
Traceback (most recent call last):
...
ValueError: Longitude must be between -180 and 180 degrees
>>> lamberts_ellipsoidal_distance(0, 0, 0, -200)
Traceback (most recent call last):
...
ValueError: Longitude must be between -180 and 180 degrees
>>> from collections import namedtuple
>>> point_2d = namedtuple("point_2d", "lat lon")
>>> SAN_FRANCISCO = point_2d(37.774856, -122.424227)
>>> YOSEMITE = point_2d(37.864742, -119.537521)
>>> NEW_YORK = point_2d(40.713019, -74.012647)
>>> VENICE = point_2d(45.443012, 12.313071)
>>> f"{lamberts_ellipsoidal_distance(*SAN_FRANCISCO, *YOSEMITE):0,.0f} meters"
'254,351 meters'
>>> f"{lamberts_ellipsoidal_distance(*SAN_FRANCISCO, *NEW_YORK):0,.0f} meters"
'4,138,992 meters'
>>> f"{lamberts_ellipsoidal_distance(*SAN_FRANCISCO, *VENICE):0,.0f} meters"
'9,737,326 meters'
"""
# Validate latitude values
if not -90 <= lat1 <= 90 or not -90 <= lat2 <= 90:
raise ValueError("Latitude must be between -90 and 90 degrees")
# Validate longitude values
if not -180 <= lon1 <= 180 or not -180 <= lon2 <= 180:
raise ValueError("Longitude must be between -180 and 180 degrees")
# CONSTANTS per WGS84 https://en.wikipedia.org/wiki/World_Geodetic_System
# Distance in metres(m)
# Equation Parameters
# https://en.wikipedia.org/wiki/Geographical_distance#Lambert's_formula_for_long_lines
flattening = (AXIS_A - AXIS_B) / AXIS_A
# Parametric latitudes
# https://en.wikipedia.org/wiki/Latitude#Parametric_(or_reduced)_latitude
b_lat1 = atan((1 - flattening) * tan(radians(lat1)))
b_lat2 = atan((1 - flattening) * tan(radians(lat2)))
# Compute central angle between two points
# using haversine theta. sigma = haversine_distance / equatorial radius
sigma = haversine_distance(lat1, lon1, lat2, lon2) / EQUATORIAL_RADIUS
# Intermediate P and Q values
p_value = (b_lat1 + b_lat2) / 2
q_value = (b_lat2 - b_lat1) / 2
# Intermediate X value
# X = (sigma - sin(sigma)) * sin^2Pcos^2Q / cos^2(sigma/2)
x_numerator = (sin(p_value) ** 2) * (cos(q_value) ** 2)
x_demonimator = cos(sigma / 2) ** 2
x_value = (sigma - sin(sigma)) * (x_numerator / x_demonimator)
# Intermediate Y value
# Y = (sigma + sin(sigma)) * cos^2Psin^2Q / sin^2(sigma/2)
y_numerator = (cos(p_value) ** 2) * (sin(q_value) ** 2)
y_denominator = sin(sigma / 2) ** 2
y_value = (sigma + sin(sigma)) * (y_numerator / y_denominator)
return EQUATORIAL_RADIUS * (sigma - ((flattening / 2) * (x_value + y_value)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: geometry/__init__.py
================================================
================================================
FILE: geometry/geometry.py
================================================
from __future__ import annotations
import math
from dataclasses import dataclass, field
from types import NoneType
from typing import Self
# Building block classes
@dataclass
class Angle:
"""
An Angle in degrees (unit of measurement)
>>> Angle()
Angle(degrees=90)
>>> Angle(45.5)
Angle(degrees=45.5)
>>> Angle(-1)
Traceback (most recent call last):
...
TypeError: degrees must be a numeric value between 0 and 360.
>>> Angle(361)
Traceback (most recent call last):
...
TypeError: degrees must be a numeric value between 0 and 360.
"""
degrees: float = 90
def __post_init__(self) -> None:
if not isinstance(self.degrees, (int, float)) or not 0 <= self.degrees <= 360:
raise TypeError("degrees must be a numeric value between 0 and 360.")
@dataclass
class Side:
"""
A side of a two dimensional Shape such as Polygon, etc.
adjacent_sides: a list of sides which are adjacent to the current side
angle: the angle in degrees between each adjacent side
length: the length of the current side in meters
>>> Side(5)
Side(length=5, angle=Angle(degrees=90), next_side=None)
>>> Side(5, Angle(45.6))
Side(length=5, angle=Angle(degrees=45.6), next_side=None)
>>> Side(5, Angle(45.6), Side(1, Angle(2))) # doctest: +ELLIPSIS
Side(length=5, angle=Angle(degrees=45.6), next_side=Side(length=1, angle=Angle(d...
>>> Side(-1)
Traceback (most recent call last):
...
TypeError: length must be a positive numeric value.
>>> Side(5, None)
Traceback (most recent call last):
...
TypeError: angle must be an Angle object.
>>> Side(5, Angle(90), "Invalid next_side")
Traceback (most recent call last):
...
TypeError: next_side must be a Side or None.
"""
length: float
angle: Angle = field(default_factory=Angle)
next_side: Side | None = None
def __post_init__(self) -> None:
if not isinstance(self.length, (int, float)) or self.length <= 0:
raise TypeError("length must be a positive numeric value.")
if not isinstance(self.angle, Angle):
raise TypeError("angle must be an Angle object.")
if not isinstance(self.next_side, (Side, NoneType)):
raise TypeError("next_side must be a Side or None.")
@dataclass
class Ellipse:
"""
A geometric Ellipse on a 2D surface
>>> Ellipse(5, 10)
Ellipse(major_radius=5, minor_radius=10)
>>> Ellipse(5, 10) is Ellipse(5, 10)
False
>>> Ellipse(5, 10) == Ellipse(5, 10)
True
"""
major_radius: float
minor_radius: float
@property
def area(self) -> float:
"""
>>> Ellipse(5, 10).area
157.07963267948966
"""
return math.pi * self.major_radius * self.minor_radius
@property
def perimeter(self) -> float:
"""
>>> Ellipse(5, 10).perimeter
47.12388980384689
"""
return math.pi * (self.major_radius + self.minor_radius)
class Circle(Ellipse):
"""
A geometric Circle on a 2D surface
>>> Circle(5)
Circle(radius=5)
>>> Circle(5) is Circle(5)
False
>>> Circle(5) == Circle(5)
True
>>> Circle(5).area
78.53981633974483
>>> Circle(5).perimeter
31.41592653589793
"""
def __init__(self, radius: float) -> None:
super().__init__(radius, radius)
self.radius = radius
def __repr__(self) -> str:
return f"Circle(radius={self.radius})"
@property
def diameter(self) -> float:
"""
>>> Circle(5).diameter
10
"""
return self.radius * 2
def max_parts(self, num_cuts: float) -> float:
"""
Return the maximum number of parts that circle can be divided into if cut
'num_cuts' times.
>>> circle = Circle(5)
>>> circle.max_parts(0)
1.0
>>> circle.max_parts(7)
29.0
>>> circle.max_parts(54)
1486.0
>>> circle.max_parts(22.5)
265.375
>>> circle.max_parts(-222)
Traceback (most recent call last):
...
TypeError: num_cuts must be a positive numeric value.
>>> circle.max_parts("-222")
Traceback (most recent call last):
...
TypeError: num_cuts must be a positive numeric value.
"""
if not isinstance(num_cuts, (int, float)) or num_cuts < 0:
raise TypeError("num_cuts must be a positive numeric value.")
return (num_cuts + 2 + num_cuts**2) * 0.5
@dataclass
class Polygon:
"""
An abstract class which represents Polygon on a 2D surface.
>>> Polygon()
Polygon(sides=[])
>>> polygon = Polygon()
>>> polygon.add_side(Side(5)).get_side(0)
Side(length=5, angle=Angle(degrees=90), next_side=None)
>>> polygon.get_side(1)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> polygon.set_side(0, Side(10)).get_side(0)
Side(length=10, angle=Angle(degrees=90), next_side=None)
>>> polygon.set_side(1, Side(10))
Traceback (most recent call last):
...
IndexError: list assignment index out of range
"""
sides: list[Side] = field(default_factory=list)
def add_side(self, side: Side) -> Self:
"""
>>> Polygon().add_side(Side(5))
Polygon(sides=[Side(length=5, angle=Angle(degrees=90), next_side=None)])
"""
self.sides.append(side)
return self
def get_side(self, index: int) -> Side:
"""
>>> Polygon().get_side(0)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> Polygon().add_side(Side(5)).get_side(-1)
Side(length=5, angle=Angle(degrees=90), next_side=None)
"""
return self.sides[index]
def set_side(self, index: int, side: Side) -> Self:
"""
>>> Polygon().set_side(0, Side(5))
Traceback (most recent call last):
...
IndexError: list assignment index out of range
>>> Polygon().add_side(Side(5)).set_side(0, Side(10))
Polygon(sides=[Side(length=10, angle=Angle(degrees=90), next_side=None)])
"""
self.sides[index] = side
return self
class Rectangle(Polygon):
"""
A geometric rectangle on a 2D surface.
>>> rectangle_one = Rectangle(5, 10)
>>> rectangle_one.perimeter()
30
>>> rectangle_one.area()
50
>>> Rectangle(-5, 10)
Traceback (most recent call last):
...
TypeError: length must be a positive numeric value.
"""
def __init__(self, short_side_length: float, long_side_length: float) -> None:
super().__init__()
self.short_side_length = short_side_length
self.long_side_length = long_side_length
self.post_init()
def post_init(self) -> None:
"""
>>> Rectangle(5, 10) # doctest: +NORMALIZE_WHITESPACE
Rectangle(sides=[Side(length=5, angle=Angle(degrees=90), next_side=None),
Side(length=10, angle=Angle(degrees=90), next_side=None)])
"""
self.short_side = Side(self.short_side_length)
self.long_side = Side(self.long_side_length)
super().add_side(self.short_side)
super().add_side(self.long_side)
def perimeter(self) -> float:
return (self.short_side.length + self.long_side.length) * 2
def area(self) -> float:
return self.short_side.length * self.long_side.length
@dataclass
class Square(Rectangle):
"""
a structure which represents a
geometrical square on a 2D surface
>>> square_one = Square(5)
>>> square_one.perimeter()
20
>>> square_one.area()
25
"""
def __init__(self, side_length: float) -> None:
super().__init__(side_length, side_length)
def perimeter(self) -> float:
return super().perimeter()
def area(self) -> float:
return super().area()
if __name__ == "__main__":
__import__("doctest").testmod()
================================================
FILE: geometry/graham_scan.py
================================================
"""
Graham Scan algorithm for finding the convex hull of a set of points.
The Graham scan is a method of computing the convex hull of a finite set of points
in the plane with time complexity O(n log n). It is named after Ronald Graham, who
published the original algorithm in 1972.
The algorithm finds all vertices of the convex hull ordered along its boundary.
It uses a stack to efficiently identify and remove points that would create
non-convex angles.
References:
- https://en.wikipedia.org/wiki/Graham_scan
- Graham, R.L. (1972). "An Efficient Algorithm for Determining the Convex Hull of a
Finite Planar Set"
"""
from __future__ import annotations
from collections.abc import Sequence
from dataclasses import dataclass
from typing import TypeVar
T = TypeVar("T", bound="Point")
@dataclass
class Point:
"""
A point in 2D space.
>>> Point(0, 0)
Point(x=0.0, y=0.0)
>>> Point(1.5, 2.5)
Point(x=1.5, y=2.5)
"""
x: float
y: float
def __init__(self, x_coordinate: float, y_coordinate: float) -> None:
"""
Initialize a 2D point.
Args:
x_coordinate: The x-coordinate (horizontal position) of the point
y_coordinate: The y-coordinate (vertical position) of the point
"""
self.x = float(x_coordinate)
self.y = float(y_coordinate)
def __eq__(self, other: object) -> bool:
"""
Check if two points are equal.
>>> Point(1, 2) == Point(1, 2)
True
>>> Point(1, 2) == Point(2, 1)
False
"""
if not isinstance(other, Point):
return NotImplemented
return self.x == other.x and self.y == other.y
def __lt__(self, other: Point) -> bool:
"""
Compare two points for sorting (bottom-most, then left-most).
>>> Point(1, 2) < Point(1, 3)
True
>>> Point(1, 2) < Point(2, 2)
True
>>> Point(2, 2) < Point(1, 2)
False
"""
if self.y == other.y:
return self.x < other.x
return self.y < other.y
def euclidean_distance(self, other: Point) -> float:
"""
Calculate Euclidean distance between two points.
>>> Point(0, 0).euclidean_distance(Point(3, 4))
5.0
>>> Point(1, 1).euclidean_distance(Point(4, 5))
5.0
"""
return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
def consecutive_orientation(self, point_a: Point, point_b: Point) -> float:
"""
Calculate the cross product of vectors (self -> point_a) and
(point_a -> point_b).
Returns:
- Positive value: counter-clockwise turn
- Negative value: clockwise turn
- Zero: collinear points
>>> Point(0, 0).consecutive_orientation(Point(1, 0), Point(1, 1))
1.0
>>> Point(0, 0).consecutive_orientation(Point(1, 0), Point(1, -1))
-1.0
>>> Point(0, 0).consecutive_orientation(Point(1, 0), Point(2, 0))
0.0
"""
return (point_a.x - self.x) * (point_b.y - point_a.y) - (point_a.y - self.y) * (
point_b.x - point_a.x
)
def graham_scan(points: Sequence[Point]) -> list[Point]:
"""
Find the convex hull of a set of points using the Graham scan algorithm.
The algorithm works as follows:
1. Find the bottom-most point (or left-most in case of tie)
2. Sort all other points by polar angle with respect to the bottom-most point
3. Process points in order, maintaining a stack of hull candidates
4. Remove points that would create a clockwise turn
Args:
points: A sequence of Point objects
Returns:
A list of Point objects representing the convex hull in counter-clockwise order.
Returns an empty list if there are fewer than 3 distinct points or if all
points are collinear.
Time Complexity: O(n log n) due to sorting
Space Complexity: O(n) for the output hull
>>> graham_scan([])
[]
>>> graham_scan([Point(0, 0)])
[]
>>> graham_scan([Point(0, 0), Point(1, 1)])
[]
>>> hull = graham_scan([Point(0, 0), Point(1, 0), Point(0.5, 1)])
>>> len(hull)
3
>>> Point(0, 0) in hull and Point(1, 0) in hull and Point(0.5, 1) in hull
True
"""
if len(points) <= 2:
return []
# Find the bottom-most point (left-most in case of tie)
min_point = min(points)
# Remove the min_point from the list
points_list = [p for p in points if p != min_point]
if not points_list:
# Edge case where all points are the same
return []
def polar_angle_key(point: Point) -> tuple[float, float, float]:
"""
Key function for sorting points by polar angle relative to min_point.
Points are sorted counter-clockwise. When two points have the same angle,
the farther point comes first (we'll remove duplicates later).
"""
# We use a dummy third point (min_point itself) to calculate relative angles
# Instead, we'll compute the angle between points
dx = point.x - min_point.x
dy = point.y - min_point.y
# Use atan2 for angle, but we can also use cross product for comparison
# For sorting, we compare orientations between consecutive points
distance = min_point.euclidean_distance(point)
return (dx, dy, -distance) # Negative distance to sort farther points first
# Sort by polar angle using a comparison based on cross product
def compare_points(point_a: Point, point_b: Point) -> int:
"""Compare two points by polar angle relative to min_point."""
orientation = min_point.consecutive_orientation(point_a, point_b)
if orientation < 0.0:
return 1 # point_a comes after point_b (clockwise)
elif orientation > 0.0:
return -1 # point_a comes before point_b (counter-clockwise)
else:
# Collinear: farther point should come first
dist_a = min_point.euclidean_distance(point_a)
dist_b = min_point.euclidean_distance(point_b)
if dist_b < dist_a:
return -1
elif dist_b > dist_a:
return 1
else:
return 0
from functools import cmp_to_key
points_list.sort(key=cmp_to_key(compare_points))
# Build the convex hull
convex_hull: list[Point] = [min_point, points_list[0]]
for point in points_list[1:]:
# Skip consecutive points with the same angle (collinear with min_point)
if min_point.consecutive_orientation(point, convex_hull[-1]) == 0.0:
continue
# Remove points that create a clockwise turn (or are collinear)
while len(convex_hull) >= 2:
orientation = convex_hull[-2].consecutive_orientation(
convex_hull[-1], point
)
if orientation <= 0.0:
convex_hull.pop()
else:
break
convex_hull.append(point)
# Need at least 3 points for a valid convex hull
if len(convex_hull) <= 2:
return []
return convex_hull
if __name__ == "__main__":
import doctest
doctest.testmod()
# Example usage
points = [
Point(0, 0),
Point(1, 0),
Point(2, 0),
Point(2, 1),
Point(2, 2),
Point(1, 2),
Point(0, 2),
Point(0, 1),
Point(1, 1), # Interior point
]
hull = graham_scan(points)
print("Convex hull vertices:")
for point in hull:
print(f" ({point.x}, {point.y})")
================================================
FILE: geometry/jarvis_march.py
================================================
"""
Jarvis March (Gift Wrapping) algorithm for finding the convex hull of a set of points.
The convex hull is the smallest convex polygon that contains all the points.
Time Complexity: O(n*h) where n is the number of points and h is the number of
hull points.
Space Complexity: O(h) where h is the number of hull points.
USAGE:
-> Import this file into your project.
-> Use the jarvis_march() function to find the convex hull of a set of points.
-> Parameters:
-> points: A list of Point objects representing 2D coordinates
REFERENCES:
-> Wikipedia reference: https://en.wikipedia.org/wiki/Gift_wrapping_algorithm
-> GeeksforGeeks:
https://www.geeksforgeeks.org/convex-hull-set-1-jarviss-algorithm-or-wrapping/
"""
from __future__ import annotations
class Point:
"""Represents a 2D point with x and y coordinates."""
def __init__(self, x_coordinate: float, y_coordinate: float) -> None:
self.x = x_coordinate
self.y = y_coordinate
def __eq__(self, other: object) -> bool:
if not isinstance(other, Point):
return NotImplemented
return self.x == other.x and self.y == other.y
def __repr__(self) -> str:
return f"Point({self.x}, {self.y})"
def __hash__(self) -> int:
return hash((self.x, self.y))
def _cross_product(origin: Point, point_a: Point, point_b: Point) -> float:
"""
Calculate the cross product of vectors OA and OB.
Returns:
> 0: Counter-clockwise turn (left turn)
= 0: Collinear
< 0: Clockwise turn (right turn)
"""
return (point_a.x - origin.x) * (point_b.y - origin.y) - (point_a.y - origin.y) * (
point_b.x - origin.x
)
def _is_point_on_segment(p1: Point, p2: Point, point: Point) -> bool:
"""Check if a point lies on the line segment between p1 and p2."""
# Check if point is collinear with segment endpoints
cross = (point.y - p1.y) * (p2.x - p1.x) - (point.x - p1.x) * (p2.y - p1.y)
if abs(cross) > 1e-9:
return False
# Check if point is within the bounding box of the segment
return min(p1.x, p2.x) <= point.x <= max(p1.x, p2.x) and min(
p1.y, p2.y
) <= point.y <= max(p1.y, p2.y)
def _find_leftmost_point(points: list[Point]) -> int:
"""Find index of leftmost point (and bottom-most in case of tie)."""
left_idx = 0
for i in range(1, len(points)):
if points[i].x < points[left_idx].x or (
points[i].x == points[left_idx].x and points[i].y < points[left_idx].y
):
left_idx = i
return left_idx
def _find_next_hull_point(points: list[Point], current_idx: int) -> int:
"""Find the next point on the convex hull."""
next_idx = (current_idx + 1) % len(points)
# Ensure next_idx is not the same as current_idx
while next_idx == current_idx:
next_idx = (next_idx + 1) % len(points)
for i in range(len(points)):
if i == current_idx:
continue
cross = _cross_product(points[current_idx], points[i], points[next_idx])
if cross > 0:
next_idx = i
return next_idx
def _is_valid_polygon(hull: list[Point]) -> bool:
"""Check if hull forms a valid polygon (has at least one non-collinear turn)."""
for i in range(len(hull)):
p1 = hull[i]
p2 = hull[(i + 1) % len(hull)]
p3 = hull[(i + 2) % len(hull)]
if abs(_cross_product(p1, p2, p3)) > 1e-9:
return True
return False
def _add_point_to_hull(hull: list[Point], point: Point) -> None:
"""Add a point to hull, removing collinear intermediate points."""
last = len(hull) - 1
if len(hull) > 1 and _is_point_on_segment(hull[last - 1], hull[last], point):
hull[last] = Point(point.x, point.y)
else:
hull.append(Point(point.x, point.y))
def jarvis_march(points: list[Point]) -> list[Point]:
"""
Find the convex hull of a set of points using the Jarvis March algorithm.
The algorithm starts with the leftmost point and wraps around the set of
points, selecting the most counter-clockwise point at each step.
Args:
points: List of Point objects representing 2D coordinates
Returns:
List of Points that form the convex hull in counter-clockwise order.
Returns empty list if there are fewer than 3 non-collinear points.
"""
if len(points) <= 2:
return []
# Remove duplicate points to avoid infinite loops
unique_points = list(set(points))
if len(unique_points) <= 2:
return []
convex_hull: list[Point] = []
# Find the leftmost point
left_point_idx = _find_leftmost_point(unique_points)
convex_hull.append(
Point(unique_points[left_point_idx].x, unique_points[left_point_idx].y)
)
current_idx = left_point_idx
while True:
# Find the next counter-clockwise point
next_idx = _find_next_hull_point(unique_points, current_idx)
if next_idx == left_point_idx:
break
if next_idx == current_idx:
break
current_idx = next_idx
_add_point_to_hull(convex_hull, unique_points[current_idx])
# Check for degenerate cases
if len(convex_hull) <= 2:
return []
# Check if last point is collinear with first and second-to-last
last = len(convex_hull) - 1
if _is_point_on_segment(convex_hull[last - 1], convex_hull[last], convex_hull[0]):
convex_hull.pop()
if len(convex_hull) == 2:
return []
# Verify the hull forms a valid polygon
if not _is_valid_polygon(convex_hull):
return []
return convex_hull
if __name__ == "__main__":
# Example usage
points = [Point(0, 0), Point(1, 1), Point(0, 1), Point(1, 0), Point(0.5, 0.5)]
hull = jarvis_march(points)
print(f"Convex hull: {hull}")
================================================
FILE: geometry/tests/__init__.py
================================================
================================================
FILE: geometry/tests/test_graham_scan.py
================================================
"""
Tests for the Graham scan convex hull algorithm.
"""
from geometry.graham_scan import Point, graham_scan
def test_empty_points() -> None:
"""Test with no points."""
assert graham_scan([]) == []
def test_single_point() -> None:
"""Test with a single point."""
assert graham_scan([Point(0, 0)]) == []
def test_two_points() -> None:
"""Test with two points."""
assert graham_scan([Point(0, 0), Point(1, 1)]) == []
def test_duplicate_points() -> None:
"""Test with all duplicate points."""
p = Point(0, 0)
points = [p, Point(0, 0), Point(0, 0), Point(0, 0), Point(0, 0)]
assert graham_scan(points) == []
def test_collinear_points() -> None:
"""Test with all points on the same line."""
points = [
Point(1, 0),
Point(2, 0),
Point(3, 0),
Point(4, 0),
Point(5, 0),
]
assert graham_scan(points) == []
def test_triangle() -> None:
"""Test with a triangle (3 points)."""
p1 = Point(1, 1)
p2 = Point(2, 1)
p3 = Point(1.5, 2)
points = [p1, p2, p3]
hull = graham_scan(points)
assert len(hull) == 3
assert p1 in hull
assert p2 in hull
assert p3 in hull
def test_rectangle() -> None:
"""Test with a rectangle (4 points)."""
p1 = Point(1, 1)
p2 = Point(2, 1)
p3 = Point(2, 2)
p4 = Point(1, 2)
points = [p1, p2, p3, p4]
hull = graham_scan(points)
assert len(hull) == 4
assert all(p in hull for p in points)
def test_triangle_with_interior_points() -> None:
"""Test triangle with points inside."""
p1 = Point(1, 1)
p2 = Point(2, 1)
p3 = Point(1.5, 2)
p4 = Point(1.5, 1.5) # Interior
p5 = Point(1.2, 1.3) # Interior
p6 = Point(1.8, 1.2) # Interior
p7 = Point(1.5, 1.9) # Interior
hull_points = [p1, p2, p3]
interior_points = [p4, p5, p6, p7]
all_points = hull_points + interior_points
hull = graham_scan(all_points)
# All hull points should be in the result
for p in hull_points:
assert p in hull
# No interior points should be in the result
for p in interior_points:
assert p not in hull
def test_rectangle_with_interior_points() -> None:
"""Test rectangle with points inside."""
p1 = Point(1, 1)
p2 = Point(2, 1)
p3 = Point(2, 2)
p4 = Point(1, 2)
p5 = Point(1.5, 1.5) # Interior
p6 = Point(1.2, 1.3) # Interior
p7 = Point(1.8, 1.2) # Interior
p8 = Point(1.9, 1.7) # Interior
p9 = Point(1.4, 1.9) # Interior
hull_points = [p1, p2, p3, p4]
interior_points = [p5, p6, p7, p8, p9]
all_points = hull_points + interior_points
hull = graham_scan(all_points)
# All hull points should be in the result
for p in hull_points:
assert p in hull
# No interior points should be in the result
for p in interior_points:
assert p not in hull
def test_star_shape() -> None:
"""Test with a star shape where only tips are on the convex hull."""
# Tips of the star (on convex hull)
p1 = Point(-5, 6)
p2 = Point(-11, 0)
p3 = Point(-9, -8)
p4 = Point(4, 4)
p5 = Point(6, -7)
# Interior points (not on convex hull)
p6 = Point(-7, -2)
p7 = Point(-2, -4)
p8 = Point(0, 1)
p9 = Point(1, 0)
p10 = Point(-6, 1)
hull_points = [p1, p2, p3, p4, p5]
interior_points = [p6, p7, p8, p9, p10]
all_points = hull_points + interior_points
hull = graham_scan(all_points)
# All hull points should be in the result
for p in hull_points:
assert p in hull
# No interior points should be in the result
for p in interior_points:
assert p not in hull
def test_rectangle_with_collinear_points() -> None:
"""Test rectangle with points on the edges (collinear with vertices)."""
p1 = Point(1, 1)
p2 = Point(2, 1)
p3 = Point(2, 2)
p4 = Point(1, 2)
p5 = Point(1.5, 1) # On edge p1-p2
p6 = Point(1, 1.5) # On edge p1-p4
p7 = Point(2, 1.5) # On edge p2-p3
p8 = Point(1.5, 2) # On edge p3-p4
hull_points = [p1, p2, p3, p4]
edge_points = [p5, p6, p7, p8]
all_points = hull_points + edge_points
hull = graham_scan(all_points)
# All corner points should be in the result
for p in hull_points:
assert p in hull
# Edge points should not be in the result (only corners)
for p in edge_points:
assert p not in hull
def test_point_equality() -> None:
"""Test Point equality."""
p1 = Point(1, 2)
p2 = Point(1, 2)
p3 = Point(2, 1)
assert p1 == p2
assert p1 != p3
def test_point_comparison() -> None:
"""Test Point comparison for sorting."""
p1 = Point(1, 2)
p2 = Point(1, 3)
p3 = Point(2, 2)
assert p1 < p2 # Lower y value
assert p1 < p3 # Same y, lower x
assert not p2 < p1
def test_euclidean_distance() -> None:
"""Test Euclidean distance calculation."""
p1 = Point(0, 0)
p2 = Point(3, 4)
assert p1.euclidean_distance(p2) == 5.0
def test_consecutive_orientation() -> None:
"""Test orientation calculation."""
p1 = Point(0, 0)
p2 = Point(1, 0)
p3_ccw = Point(1, 1) # Counter-clockwise
p3_cw = Point(1, -1) # Clockwise
p3_collinear = Point(2, 0) # Collinear
assert p1.consecutive_orientation(p2, p3_ccw) > 0 # Counter-clockwise
assert p1.consecutive_orientation(p2, p3_cw) < 0 # Clockwise
assert p1.consecutive_orientation(p2, p3_collinear) == 0 # Collinear
def test_large_hull() -> None:
"""Test with a larger set of points."""
# Create a circle of points
import math
points = []
for i in range(20):
angle = 2 * math.pi * i / 20
x = math.cos(angle)
y = math.sin(angle)
points.append(Point(x, y))
# Add some interior points
points.append(Point(0, 0))
points.append(Point(0.5, 0.5))
points.append(Point(-0.3, 0.2))
hull = graham_scan(points)
# The hull should contain the circle points but not the interior points
assert len(hull) >= 3
assert Point(0, 0) not in hull
assert Point(0.5, 0.5) not in hull
assert Point(-0.3, 0.2) not in hull
def test_random_order() -> None:
"""Test that point order doesn't affect the result."""
p1 = Point(0, 0)
p2 = Point(4, 0)
p3 = Point(4, 3)
p4 = Point(0, 3)
p5 = Point(2, 1.5) # Interior
# Try different orderings
order1 = [p1, p2, p3, p4, p5]
order2 = [p5, p4, p3, p2, p1]
order3 = [p3, p5, p1, p4, p2]
hull1 = graham_scan(order1)
hull2 = graham_scan(order2)
hull3 = graham_scan(order3)
# All should have the same points (though possibly in different order)
assert len(hull1) == len(hull2) == len(hull3) == 4
assert {(p.x, p.y) for p in hull1} == {(p.x, p.y) for p in hull2}
assert {(p.x, p.y) for p in hull2} == {(p.x, p.y) for p in hull3}
================================================
FILE: geometry/tests/test_jarvis_march.py
================================================
"""
Unit tests for Jarvis March (Gift Wrapping) algorithm.
"""
from geometry.jarvis_march import Point, jarvis_march
class TestPoint:
"""Tests for the Point class."""
def test_point_creation(self) -> None:
"""Test Point initialization."""
p = Point(1.0, 2.0)
assert p.x == 1.0
assert p.y == 2.0
def test_point_equality(self) -> None:
"""Test Point equality comparison."""
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
p3 = Point(2.0, 1.0)
assert p1 == p2
assert p1 != p3
def test_point_repr(self) -> None:
"""Test Point string representation."""
p = Point(1.5, 2.5)
assert repr(p) == "Point(1.5, 2.5)"
def test_point_hash(self) -> None:
"""Test Point hashing."""
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
assert hash(p1) == hash(p2)
class TestJarvisMarch:
"""Tests for the jarvis_march function."""
def test_triangle(self) -> None:
"""Test convex hull of a triangle."""
p1, p2, p3 = Point(1, 1), Point(2, 1), Point(1.5, 2)
hull = jarvis_march([p1, p2, p3])
assert len(hull) == 3
assert all(p in hull for p in [p1, p2, p3])
def test_collinear_points(self) -> None:
"""Test that collinear points return empty hull."""
points = [Point(i, 0) for i in range(5)]
hull = jarvis_march(points)
assert hull == []
def test_rectangle_with_interior_point(self) -> None:
"""Test rectangle with interior point - interior point excluded."""
p1, p2 = Point(1, 1), Point(2, 1)
p3, p4 = Point(2, 2), Point(1, 2)
p5 = Point(1.5, 1.5)
hull = jarvis_march([p1, p2, p3, p4, p5])
assert len(hull) == 4
assert p5 not in hull
def test_star_shape(self) -> None:
"""Test star shape - only tips are in hull."""
tips = [
Point(-5, 6),
Point(-11, 0),
Point(-9, -8),
Point(4, 4),
Point(6, -7),
]
interior = [Point(-7, -2), Point(-2, -4), Point(0, 1)]
hull = jarvis_march(tips + interior)
assert len(hull) == 5
assert all(p in hull for p in tips)
assert not any(p in hull for p in interior)
def test_empty_list(self) -> None:
"""Test empty list returns empty hull."""
assert jarvis_march([]) == []
def test_single_point(self) -> None:
"""Test single point returns empty hull."""
assert jarvis_march([Point(0, 0)]) == []
def test_two_points(self) -> None:
"""Test two points return empty hull."""
assert jarvis_march([Point(0, 0), Point(1, 1)]) == []
def test_square(self) -> None:
"""Test convex hull of a square."""
p1, p2 = Point(0, 0), Point(1, 0)
p3, p4 = Point(1, 1), Point(0, 1)
hull = jarvis_march([p1, p2, p3, p4])
assert len(hull) == 4
assert all(p in hull for p in [p1, p2, p3, p4])
def test_duplicate_points(self) -> None:
"""Test handling of duplicate points."""
p1, p2, p3 = Point(0, 0), Point(1, 0), Point(0, 1)
points = [p1, p2, p3, p1, p2] # Include duplicates
hull = jarvis_march(points)
assert len(hull) == 3
def test_pentagon(self) -> None:
"""Test convex hull of a pentagon."""
points = [
Point(0, 1),
Point(1, 2),
Point(2, 1),
Point(1.5, 0),
Point(0.5, 0),
]
hull = jarvis_march(points)
assert len(hull) == 5
assert all(p in hull for p in points)
================================================
FILE: graphics/__init__.py
================================================
================================================
FILE: graphics/bezier_curve.py
================================================
# https://en.wikipedia.org/wiki/B%C3%A9zier_curve
# https://www.tutorialspoint.com/computer_graphics/computer_graphics_curves.htm
from __future__ import annotations
from scipy.special import comb
class BezierCurve:
"""
Bezier curve is a weighted sum of a set of control points.
Generate Bezier curves from a given set of control points.
This implementation works only for 2d coordinates in the xy plane.
"""
def __init__(self, list_of_points: list[tuple[float, float]]):
"""
list_of_points: Control points in the xy plane on which to interpolate. These
points control the behavior (shape) of the Bezier curve.
"""
self.list_of_points = list_of_points
# Degree determines the flexibility of the curve.
# Degree = 1 will produce a straight line.
self.degree = len(list_of_points) - 1
def basis_function(self, t: float) -> list[float]:
"""
The basis function determines the weight of each control point at time t.
t: time value between 0 and 1 inclusive at which to evaluate the basis of
the curve.
returns the x, y values of basis function at time t
>>> curve = BezierCurve([(1,1), (1,2)])
>>> [float(x) for x in curve.basis_function(0)]
[1.0, 0.0]
>>> [float(x) for x in curve.basis_function(1)]
[0.0, 1.0]
"""
assert 0 <= t <= 1, "Time t must be between 0 and 1."
output_values: list[float] = []
for i in range(len(self.list_of_points)):
# basis function for each i
output_values.append(
comb(self.degree, i) * ((1 - t) ** (self.degree - i)) * (t**i)
)
# the basis must sum up to 1 for it to produce a valid Bezier curve.
assert round(sum(output_values), 5) == 1
return output_values
def bezier_curve_function(self, t: float) -> tuple[float, float]:
"""
The function to produce the values of the Bezier curve at time t.
t: the value of time t at which to evaluate the Bezier function
Returns the x, y coordinates of the Bezier curve at time t.
The first point in the curve is when t = 0.
The last point in the curve is when t = 1.
>>> curve = BezierCurve([(1,1), (1,2)])
>>> tuple(float(x) for x in curve.bezier_curve_function(0))
(1.0, 1.0)
>>> tuple(float(x) for x in curve.bezier_curve_function(1))
(1.0, 2.0)
"""
assert 0 <= t <= 1, "Time t must be between 0 and 1."
basis_function = self.basis_function(t)
x = 0.0
y = 0.0
for i in range(len(self.list_of_points)):
# For all points, sum up the product of i-th basis function and i-th point.
x += basis_function[i] * self.list_of_points[i][0]
y += basis_function[i] * self.list_of_points[i][1]
return (x, y)
def plot_curve(self, step_size: float = 0.01):
"""
Plots the Bezier curve using matplotlib plotting capabilities.
step_size: defines the step(s) at which to evaluate the Bezier curve.
The smaller the step size, the finer the curve produced.
"""
from matplotlib import pyplot as plt
to_plot_x: list[float] = [] # x coordinates of points to plot
to_plot_y: list[float] = [] # y coordinates of points to plot
t = 0.0
while t <= 1:
value = self.bezier_curve_function(t)
to_plot_x.append(value[0])
to_plot_y.append(value[1])
t += step_size
x = [i[0] for i in self.list_of_points]
y = [i[1] for i in self.list_of_points]
plt.plot(
to_plot_x,
to_plot_y,
color="blue",
label="Curve of Degree " + str(self.degree),
)
plt.scatter(x, y, color="red", label="Control Points")
plt.legend()
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod()
BezierCurve([(1, 2), (3, 5)]).plot_curve() # degree 1
BezierCurve([(0, 0), (5, 5), (5, 0)]).plot_curve() # degree 2
BezierCurve([(0, 0), (5, 5), (5, 0), (2.5, -2.5)]).plot_curve() # degree 3
================================================
FILE: graphics/butterfly_pattern.py
================================================
def butterfly_pattern(n: int) -> str:
"""
Creates a butterfly pattern of size n and returns it as a string.
>>> print(butterfly_pattern(3))
* *
** **
*****
** **
* *
>>> print(butterfly_pattern(5))
* *
** **
*** ***
**** ****
*********
**** ****
*** ***
** **
* *
"""
result = []
# Upper part
for i in range(1, n):
left_stars = "*" * i
spaces = " " * (2 * (n - i) - 1)
right_stars = "*" * i
result.append(left_stars + spaces + right_stars)
# Middle part
result.append("*" * (2 * n - 1))
# Lower part
for i in range(n - 1, 0, -1):
left_stars = "*" * i
spaces = " " * (2 * (n - i) - 1)
right_stars = "*" * i
result.append(left_stars + spaces + right_stars)
return "\n".join(result)
if __name__ == "__main__":
n = int(input("Enter the size of the butterfly pattern: "))
print(butterfly_pattern(n))
================================================
FILE: graphics/digital_differential_analyzer_line.py
================================================
import matplotlib.pyplot as plt
def digital_differential_analyzer_line(
p1: tuple[int, int], p2: tuple[int, int]
) -> list[tuple[int, int]]:
"""
Draws a line between two points using the DDA algorithm.
Args:
- p1: Coordinates of the starting point.
- p2: Coordinates of the ending point.
Returns:
- List of coordinate points that form the line.
>>> digital_differential_analyzer_line((1, 1), (4, 4))
[(2, 2), (3, 3), (4, 4)]
"""
x1, y1 = p1
x2, y2 = p2
dx = x2 - x1
dy = y2 - y1
steps = max(abs(dx), abs(dy))
x_increment = dx / float(steps)
y_increment = dy / float(steps)
coordinates = []
x: float = x1
y: float = y1
for _ in range(steps):
x += x_increment
y += y_increment
coordinates.append((round(x), round(y)))
return coordinates
if __name__ == "__main__":
import doctest
doctest.testmod()
x1 = int(input("Enter the x-coordinate of the starting point: "))
y1 = int(input("Enter the y-coordinate of the starting point: "))
x2 = int(input("Enter the x-coordinate of the ending point: "))
y2 = int(input("Enter the y-coordinate of the ending point: "))
coordinates = digital_differential_analyzer_line((x1, y1), (x2, y2))
x_points, y_points = zip(*coordinates)
plt.plot(x_points, y_points, marker="o")
plt.title("Digital Differential Analyzer Line Drawing Algorithm")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid()
plt.show()
================================================
FILE: graphics/vector3_for_2d_rendering.py
================================================
"""
render 3d points for 2d surfaces.
"""
from __future__ import annotations
import math
__version__ = "2020.9.26"
__author__ = "xcodz-dot, cclaus, dhruvmanila"
def convert_to_2d(
x: float, y: float, z: float, scale: float, distance: float
) -> tuple[float, float]:
"""
Converts 3d point to a 2d drawable point
>>> convert_to_2d(1.0, 2.0, 3.0, 10.0, 10.0)
(7.6923076923076925, 15.384615384615385)
>>> convert_to_2d(1, 2, 3, 10, 10)
(7.6923076923076925, 15.384615384615385)
>>> convert_to_2d("1", 2, 3, 10, 10) # '1' is str
Traceback (most recent call last):
...
TypeError: Input values must either be float or int: ['1', 2, 3, 10, 10]
"""
if not all(isinstance(val, (float, int)) for val in locals().values()):
msg = f"Input values must either be float or int: {list(locals().values())}"
raise TypeError(msg)
projected_x = ((x * distance) / (z + distance)) * scale
projected_y = ((y * distance) / (z + distance)) * scale
return projected_x, projected_y
def rotate(
x: float, y: float, z: float, axis: str, angle: float
) -> tuple[float, float, float]:
"""
rotate a point around a certain axis with a certain angle
angle can be any integer between 1, 360 and axis can be any one of
'x', 'y', 'z'
>>> rotate(1.0, 2.0, 3.0, 'y', 90.0)
(3.130524675073759, 2.0, 0.4470070007889556)
>>> rotate(1, 2, 3, "z", 180)
(0.999736015495891, -2.0001319704760485, 3)
>>> rotate('1', 2, 3, "z", 90.0) # '1' is str
Traceback (most recent call last):
...
TypeError: Input values except axis must either be float or int: ['1', 2, 3, 90.0]
>>> rotate(1, 2, 3, "n", 90) # 'n' is not a valid axis
Traceback (most recent call last):
...
ValueError: not a valid axis, choose one of 'x', 'y', 'z'
>>> rotate(1, 2, 3, "x", -90)
(1, -2.5049096187183877, -2.5933429780983657)
>>> rotate(1, 2, 3, "x", 450) # 450 wrap around to 90
(1, 3.5776792428178217, -0.44744970165427644)
"""
if not isinstance(axis, str):
raise TypeError("Axis must be a str")
input_variables = locals()
del input_variables["axis"]
if not all(isinstance(val, (float, int)) for val in input_variables.values()):
msg = (
"Input values except axis must either be float or int: "
f"{list(input_variables.values())}"
)
raise TypeError(msg)
angle = (angle % 360) / 450 * 180 / math.pi
if axis == "z":
new_x = x * math.cos(angle) - y * math.sin(angle)
new_y = y * math.cos(angle) + x * math.sin(angle)
new_z = z
elif axis == "x":
new_y = y * math.cos(angle) - z * math.sin(angle)
new_z = z * math.cos(angle) + y * math.sin(angle)
new_x = x
elif axis == "y":
new_x = x * math.cos(angle) - z * math.sin(angle)
new_z = z * math.cos(angle) + x * math.sin(angle)
new_y = y
else:
raise ValueError("not a valid axis, choose one of 'x', 'y', 'z'")
return new_x, new_y, new_z
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{convert_to_2d(1.0, 2.0, 3.0, 10.0, 10.0) = }")
print(f"{rotate(1.0, 2.0, 3.0, 'y', 90.0) = }")
================================================
FILE: graphs/__init__.py
================================================
================================================
FILE: graphs/a_star.py
================================================
from __future__ import annotations
DIRECTIONS = [
[-1, 0], # left
[0, -1], # down
[1, 0], # right
[0, 1], # up
]
# function to search the path
def search(
grid: list[list[int]],
init: list[int],
goal: list[int],
cost: int,
heuristic: list[list[int]],
) -> tuple[list[list[int]], list[list[int]]]:
"""
Search for a path on a grid avoiding obstacles.
>>> grid = [[0, 1, 0, 0, 0, 0],
... [0, 1, 0, 0, 0, 0],
... [0, 1, 0, 0, 0, 0],
... [0, 1, 0, 0, 1, 0],
... [0, 0, 0, 0, 1, 0]]
>>> init = [0, 0]
>>> goal = [len(grid) - 1, len(grid[0]) - 1]
>>> cost = 1
>>> heuristic = [[0] * len(grid[0]) for _ in range(len(grid))]
>>> heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
>>> for i in range(len(grid)):
... for j in range(len(grid[0])):
... heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1])
... if grid[i][j] == 1:
... heuristic[i][j] = 99
>>> path, action = search(grid, init, goal, cost, heuristic)
>>> path # doctest: +NORMALIZE_WHITESPACE
[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [4, 1], [4, 2], [4, 3], [3, 3],
[2, 3], [2, 4], [2, 5], [3, 5], [4, 5]]
>>> action # doctest: +NORMALIZE_WHITESPACE
[[0, 0, 0, 0, 0, 0], [2, 0, 0, 0, 0, 0], [2, 0, 0, 0, 3, 3],
[2, 0, 0, 0, 0, 2], [2, 3, 3, 3, 0, 2]]
"""
closed = [
[0 for col in range(len(grid[0]))] for row in range(len(grid))
] # the reference grid
closed[init[0]][init[1]] = 1
action = [
[0 for col in range(len(grid[0]))] for row in range(len(grid))
] # the action grid
x = init[0]
y = init[1]
g = 0
f = g + heuristic[x][y] # cost from starting cell to destination cell
cell = [[f, g, x, y]]
found = False # flag that is set when search is complete
resign = False # flag set if we can't find expand
while not found and not resign:
if len(cell) == 0:
raise ValueError("Algorithm is unable to find solution")
else: # to choose the least costliest action so as to move closer to the goal
cell.sort()
cell.reverse()
next_cell = cell.pop()
x = next_cell[2]
y = next_cell[3]
g = next_cell[1]
if x == goal[0] and y == goal[1]:
found = True
else:
for i in range(len(DIRECTIONS)): # to try out different valid actions
x2 = x + DIRECTIONS[i][0]
y2 = y + DIRECTIONS[i][1]
if (
x2 >= 0
and x2 < len(grid)
and y2 >= 0
and y2 < len(grid[0])
and closed[x2][y2] == 0
and grid[x2][y2] == 0
):
g2 = g + cost
f2 = g2 + heuristic[x2][y2]
cell.append([f2, g2, x2, y2])
closed[x2][y2] = 1
action[x2][y2] = i
invpath = []
x = goal[0]
y = goal[1]
invpath.append([x, y]) # we get the reverse path from here
while x != init[0] or y != init[1]:
x2 = x - DIRECTIONS[action[x][y]][0]
y2 = y - DIRECTIONS[action[x][y]][1]
x = x2
y = y2
invpath.append([x, y])
path = []
for i in range(len(invpath)):
path.append(invpath[len(invpath) - 1 - i])
return path, action
if __name__ == "__main__":
grid = [
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0], # 0 are free path whereas 1's are obstacles
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0],
]
init = [0, 0]
# all coordinates are given in format [y,x]
goal = [len(grid) - 1, len(grid[0]) - 1]
cost = 1
# the cost map which pushes the path closer to the goal
heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1])
if grid[i][j] == 1:
# added extra penalty in the heuristic map
heuristic[i][j] = 99
path, action = search(grid, init, goal, cost, heuristic)
print("ACTION MAP")
for i in range(len(action)):
print(action[i])
for i in range(len(path)):
print(path[i])
================================================
FILE: graphs/ant_colony_optimization_algorithms.py
================================================
"""
Use an ant colony optimization algorithm to solve the travelling salesman problem (TSP)
which asks the following question:
"Given a list of cities and the distances between each pair of cities, what is the
shortest possible route that visits each city exactly once and returns to the origin
city?"
https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms
https://en.wikipedia.org/wiki/Travelling_salesman_problem
Author: Clark
"""
import copy
import random
cities = {
0: [0, 0],
1: [0, 5],
2: [3, 8],
3: [8, 10],
4: [12, 8],
5: [12, 4],
6: [8, 0],
7: [6, 2],
}
def main(
cities: dict[int, list[int]],
ants_num: int,
iterations_num: int,
pheromone_evaporation: float,
alpha: float,
beta: float,
q: float, # Pheromone system parameters Q, which is a constant
) -> tuple[list[int], float]:
"""
Ant colony algorithm main function
>>> main(cities=cities, ants_num=10, iterations_num=20,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
([0, 1, 2, 3, 4, 5, 6, 7, 0], 37.909778143828696)
>>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
([0, 1, 0], 5.656854249492381)
>>> main(cities={0: [0, 0], 1: [2, 2], 4: [4, 4]}, ants_num=5, iterations_num=5,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> main(cities={}, ants_num=5, iterations_num=5,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
Traceback (most recent call last):
...
StopIteration
>>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=0, iterations_num=5,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
([], inf)
>>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=0,
... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10)
([], inf)
>>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5,
... pheromone_evaporation=1, alpha=1.0, beta=5.0, q=10)
([0, 1, 0], 5.656854249492381)
>>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5,
... pheromone_evaporation=0, alpha=1.0, beta=5.0, q=10)
([0, 1, 0], 5.656854249492381)
"""
# Initialize the pheromone matrix
cities_num = len(cities)
pheromone = [[1.0] * cities_num] * cities_num
best_path: list[int] = []
best_distance = float("inf")
for _ in range(iterations_num):
ants_route = []
for _ in range(ants_num):
unvisited_cities = copy.deepcopy(cities)
current_city = {next(iter(cities.keys())): next(iter(cities.values()))}
del unvisited_cities[next(iter(current_city.keys()))]
ant_route = [next(iter(current_city.keys()))]
while unvisited_cities:
current_city, unvisited_cities = city_select(
pheromone, current_city, unvisited_cities, alpha, beta
)
ant_route.append(next(iter(current_city.keys())))
ant_route.append(0)
ants_route.append(ant_route)
pheromone, best_path, best_distance = pheromone_update(
pheromone,
cities,
pheromone_evaporation,
ants_route,
q,
best_path,
best_distance,
)
return best_path, best_distance
def distance(city1: list[int], city2: list[int]) -> float:
"""
Calculate the distance between two coordinate points
>>> distance([0, 0], [3, 4] )
5.0
>>> distance([0, 0], [-3, 4] )
5.0
>>> distance([0, 0], [-3, -4] )
5.0
"""
return (((city1[0] - city2[0]) ** 2) + ((city1[1] - city2[1]) ** 2)) ** 0.5
def pheromone_update(
pheromone: list[list[float]],
cities: dict[int, list[int]],
pheromone_evaporation: float,
ants_route: list[list[int]],
q: float, # Pheromone system parameters Q, which is a constant
best_path: list[int],
best_distance: float,
) -> tuple[list[list[float]], list[int], float]:
"""
Update pheromones on the route and update the best route
>>>
>>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]],
... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7,
... ants_route=[[0, 1, 0]], q=10, best_path=[],
... best_distance=float("inf"))
([[0.7, 4.235533905932737], [4.235533905932737, 0.7]], [0, 1, 0], 5.656854249492381)
>>> pheromone_update(pheromone=[],
... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7,
... ants_route=[[0, 1, 0]], q=10, best_path=[],
... best_distance=float("inf"))
Traceback (most recent call last):
...
IndexError: list index out of range
>>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]],
... cities={}, pheromone_evaporation=0.7,
... ants_route=[[0, 1, 0]], q=10, best_path=[],
... best_distance=float("inf"))
Traceback (most recent call last):
...
KeyError: 0
"""
for a in range(len(cities)): # Update the volatilization of pheromone on all routes
for b in range(len(cities)):
pheromone[a][b] *= pheromone_evaporation
for ant_route in ants_route:
total_distance = 0.0
for i in range(len(ant_route) - 1): # Calculate total distance
total_distance += distance(cities[ant_route[i]], cities[ant_route[i + 1]])
delta_pheromone = q / total_distance
for i in range(len(ant_route) - 1): # Update pheromones
pheromone[ant_route[i]][ant_route[i + 1]] += delta_pheromone
pheromone[ant_route[i + 1]][ant_route[i]] = pheromone[ant_route[i]][
ant_route[i + 1]
]
if total_distance < best_distance:
best_path = ant_route
best_distance = total_distance
return pheromone, best_path, best_distance
def city_select(
pheromone: list[list[float]],
current_city: dict[int, list[int]],
unvisited_cities: dict[int, list[int]],
alpha: float,
beta: float,
) -> tuple[dict[int, list[int]], dict[int, list[int]]]:
"""
Choose the next city for ants
>>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]},
... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0)
({1: [2, 2]}, {})
>>> city_select(pheromone=[], current_city={0: [0,0]},
... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={},
... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0)
Traceback (most recent call last):
...
StopIteration
>>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]},
... unvisited_cities={}, alpha=1.0, beta=5.0)
Traceback (most recent call last):
...
IndexError: list index out of range
"""
probabilities = []
for city, value in unvisited_cities.items():
city_distance = distance(value, next(iter(current_city.values())))
probability = (pheromone[city][next(iter(current_city.keys()))] ** alpha) * (
(1 / city_distance) ** beta
)
probabilities.append(probability)
chosen_city_i = random.choices(
list(unvisited_cities.keys()), weights=probabilities
)[0]
chosen_city = {chosen_city_i: unvisited_cities[chosen_city_i]}
del unvisited_cities[next(iter(chosen_city.keys()))]
return chosen_city, unvisited_cities
if __name__ == "__main__":
best_path, best_distance = main(
cities=cities,
ants_num=10,
iterations_num=20,
pheromone_evaporation=0.7,
alpha=1.0,
beta=5.0,
q=10,
)
print(f"{best_path = }")
print(f"{best_distance = }")
================================================
FILE: graphs/articulation_points.py
================================================
# Finding Articulation Points in Undirected Graph
def compute_ap(graph):
n = len(graph)
out_edge_count = 0
low = [0] * n
visited = [False] * n
is_art = [False] * n
def dfs(root, at, parent, out_edge_count):
if parent == root:
out_edge_count += 1
visited[at] = True
low[at] = at
for to in graph[at]:
if to == parent:
pass
elif not visited[to]:
out_edge_count = dfs(root, to, at, out_edge_count)
low[at] = min(low[at], low[to])
# AP found via bridge
if at < low[to]:
is_art[at] = True
# AP found via cycle
if at == low[to]:
is_art[at] = True
else:
low[at] = min(low[at], to)
return out_edge_count
for i in range(n):
if not visited[i]:
out_edge_count = 0
out_edge_count = dfs(i, i, -1, out_edge_count)
is_art[i] = out_edge_count > 1
for x in range(len(is_art)):
if is_art[x] is True:
print(x)
# Adjacency list of graph
graph = {
0: [1, 2],
1: [0, 2],
2: [0, 1, 3, 5],
3: [2, 4],
4: [3],
5: [2, 6, 8],
6: [5, 7],
7: [6, 8],
8: [5, 7],
}
compute_ap(graph)
================================================
FILE: graphs/basic_graphs.py
================================================
from collections import deque
def _input(message):
return input(message).strip().split(" ")
def initialize_unweighted_directed_graph(
node_count: int, edge_count: int
) -> dict[int, list[int]]:
graph: dict[int, list[int]] = {}
for i in range(node_count):
graph[i + 1] = []
for e in range(edge_count):
x, y = (int(i) for i in _input(f"Edge {e + 1}: "))
graph[x].append(y)
return graph
def initialize_unweighted_undirected_graph(
node_count: int, edge_count: int
) -> dict[int, list[int]]:
graph: dict[int, list[int]] = {}
for i in range(node_count):
graph[i + 1] = []
for e in range(edge_count):
x, y = (int(i) for i in _input(f"Edge {e + 1}: "))
graph[x].append(y)
graph[y].append(x)
return graph
def initialize_weighted_undirected_graph(
node_count: int, edge_count: int
) -> dict[int, list[tuple[int, int]]]:
graph: dict[int, list[tuple[int, int]]] = {}
for i in range(node_count):
graph[i + 1] = []
for e in range(edge_count):
x, y, w = (int(i) for i in _input(f"Edge {e + 1}: "))
graph[x].append((y, w))
graph[y].append((x, w))
return graph
if __name__ == "__main__":
n, m = (int(i) for i in _input("Number of nodes and edges: "))
graph_choice = int(
_input(
"Press 1 or 2 or 3 \n"
"1. Unweighted directed \n"
"2. Unweighted undirected \n"
"3. Weighted undirected \n"
)[0]
)
g = {
1: initialize_unweighted_directed_graph,
2: initialize_unweighted_undirected_graph,
3: initialize_weighted_undirected_graph,
}[graph_choice](n, m)
"""
--------------------------------------------------------------------------------
Depth First Search.
Args : G - Dictionary of edges
s - Starting Node
Vars : vis - Set of visited nodes
S - Traversal Stack
--------------------------------------------------------------------------------
"""
def dfs(g, s):
"""
>>> dfs({1: [2, 3], 2: [4, 5], 3: [], 4: [], 5: []}, 1)
1
2
4
5
3
"""
vis, _s = {s}, [s]
print(s)
while _s:
flag = 0
for i in g[_s[-1]]:
if i not in vis:
_s.append(i)
vis.add(i)
flag = 1
print(i)
break
if not flag:
_s.pop()
"""
--------------------------------------------------------------------------------
Breadth First Search.
Args : G - Dictionary of edges
s - Starting Node
Vars : vis - Set of visited nodes
Q - Traversal Stack
--------------------------------------------------------------------------------
"""
def bfs(g, s):
"""
>>> bfs({1: [2, 3], 2: [4, 5], 3: [6, 7], 4: [], 5: [8], 6: [], 7: [], 8: []}, 1)
1
2
3
4
5
6
7
8
"""
vis, q = {s}, deque([s])
print(s)
while q:
u = q.popleft()
for v in g[u]:
if v not in vis:
vis.add(v)
q.append(v)
print(v)
"""
--------------------------------------------------------------------------------
Dijkstra's shortest path Algorithm
Args : G - Dictionary of edges
s - Starting Node
Vars : dist - Dictionary storing shortest distance from s to every other node
known - Set of knows nodes
path - Preceding node in path
--------------------------------------------------------------------------------
"""
def dijk(g, s):
"""
dijk({1: [(2, 7), (3, 9), (6, 14)],
2: [(1, 7), (3, 10), (4, 15)],
3: [(1, 9), (2, 10), (4, 11), (6, 2)],
4: [(2, 15), (3, 11), (5, 6)],
5: [(4, 6), (6, 9)],
6: [(1, 14), (3, 2), (5, 9)]}, 1)
7
9
11
20
20
"""
dist, known, path = {s: 0}, set(), {s: 0}
while True:
if len(known) == len(g) - 1:
break
mini = 100000
for key, value in dist:
if key not in known and value < mini:
mini = value
u = key
known.add(u)
for v in g[u]:
if v[0] not in known and dist[u] + v[1] < dist.get(v[0], 100000):
dist[v[0]] = dist[u] + v[1]
path[v[0]] = u
for key, value in dist.items():
if key != s:
print(value)
"""
--------------------------------------------------------------------------------
Topological Sort
--------------------------------------------------------------------------------
"""
def topo(g, ind=None, q=None):
if q is None:
q = [1]
if ind is None:
ind = [0] * (len(g) + 1) # SInce oth Index is ignored
for u in g:
for v in g[u]:
ind[v] += 1
q = deque()
for i in g:
if ind[i] == 0:
q.append(i)
if len(q) == 0:
return
v = q.popleft()
print(v)
for w in g[v]:
ind[w] -= 1
if ind[w] == 0:
q.append(w)
topo(g, ind, q)
"""
--------------------------------------------------------------------------------
Reading an Adjacency matrix
--------------------------------------------------------------------------------
"""
def adjm():
r"""
Reading an Adjacency matrix
Parameters:
None
Returns:
tuple: A tuple containing a list of edges and number of edges
Example:
>>> # Simulate user input for 3 nodes
>>> input_data = "4\n0 1 0 1\n1 0 1 0\n0 1 0 1\n1 0 1 0\n"
>>> import sys,io
>>> original_input = sys.stdin
>>> sys.stdin = io.StringIO(input_data) # Redirect stdin for testing
>>> adjm()
([(0, 1, 0, 1), (1, 0, 1, 0), (0, 1, 0, 1), (1, 0, 1, 0)], 4)
>>> sys.stdin = original_input # Restore original stdin
"""
n = int(input().strip())
a = []
for _ in range(n):
a.append(tuple(map(int, input().strip().split())))
return a, n
"""
--------------------------------------------------------------------------------
Floyd Warshall's algorithm
Args : G - Dictionary of edges
s - Starting Node
Vars : dist - Dictionary storing shortest distance from s to every other node
known - Set of knows nodes
path - Preceding node in path
--------------------------------------------------------------------------------
"""
def floy(a_and_n):
(a, n) = a_and_n
dist = list(a)
path = [[0] * n for i in range(n)]
for k in range(n):
for i in range(n):
for j in range(n):
if dist[i][j] > dist[i][k] + dist[k][j]:
dist[i][j] = dist[i][k] + dist[k][j]
path[i][k] = k
print(dist)
"""
--------------------------------------------------------------------------------
Prim's MST Algorithm
Args : G - Dictionary of edges
s - Starting Node
Vars : dist - Dictionary storing shortest distance from s to nearest node
known - Set of knows nodes
path - Preceding node in path
--------------------------------------------------------------------------------
"""
def prim(g, s):
dist, known, path = {s: 0}, set(), {s: 0}
while True:
if len(known) == len(g) - 1:
break
mini = 100000
for key, value in dist.items():
if key not in known and value < mini:
mini = value
u = key
known.add(u)
for v in g[u]:
if v[0] not in known and v[1] < dist.get(v[0], 100000):
dist[v[0]] = v[1]
path[v[0]] = u
return dist
"""
--------------------------------------------------------------------------------
Accepting Edge list
Vars : n - Number of nodes
m - Number of edges
Returns : l - Edge list
n - Number of Nodes
--------------------------------------------------------------------------------
"""
def edglist():
r"""
Get the edges and number of edges from the user
Parameters:
None
Returns:
tuple: A tuple containing a list of edges and number of edges
Example:
>>> # Simulate user input for 3 edges and 4 vertices: (1, 2), (2, 3), (3, 4)
>>> input_data = "4 3\n1 2\n2 3\n3 4\n"
>>> import sys,io
>>> original_input = sys.stdin
>>> sys.stdin = io.StringIO(input_data) # Redirect stdin for testing
>>> edglist()
([(1, 2), (2, 3), (3, 4)], 4)
>>> sys.stdin = original_input # Restore original stdin
"""
n, m = tuple(map(int, input().split(" ")))
edges = []
for _ in range(m):
edges.append(tuple(map(int, input().split(" "))))
return edges, n
"""
--------------------------------------------------------------------------------
Kruskal's MST Algorithm
Args : E - Edge list
n - Number of Nodes
Vars : s - Set of all nodes as unique disjoint sets (initially)
--------------------------------------------------------------------------------
"""
def krusk(e_and_n):
"""
Sort edges on the basis of distance
"""
(e, n) = e_and_n
e.sort(reverse=True, key=lambda x: x[2])
s = [{i} for i in range(1, n + 1)]
while True:
if len(s) == 1:
break
print(s)
x = e.pop()
for i in range(len(s)):
if x[0] in s[i]:
break
for j in range(len(s)):
if x[1] in s[j]:
if i == j:
break
s[j].update(s[i])
s.pop(i)
break
def find_isolated_nodes(graph):
"""
Find the isolated node in the graph
Parameters:
graph (dict): A dictionary representing a graph.
Returns:
list: A list of isolated nodes.
Examples:
>>> graph1 = {1: [2, 3], 2: [1, 3], 3: [1, 2], 4: []}
>>> find_isolated_nodes(graph1)
[4]
>>> graph2 = {'A': ['B', 'C'], 'B': ['A'], 'C': ['A'], 'D': []}
>>> find_isolated_nodes(graph2)
['D']
>>> graph3 = {'X': [], 'Y': [], 'Z': []}
>>> find_isolated_nodes(graph3)
['X', 'Y', 'Z']
>>> graph4 = {1: [2, 3], 2: [1, 3], 3: [1, 2]}
>>> find_isolated_nodes(graph4)
[]
>>> graph5 = {}
>>> find_isolated_nodes(graph5)
[]
"""
isolated = []
for node in graph:
if not graph[node]:
isolated.append(node)
return isolated
================================================
FILE: graphs/bellman_ford.py
================================================
from __future__ import annotations
def print_distance(distance: list[float], src):
print(f"Vertex\tShortest Distance from vertex {src}")
for i, d in enumerate(distance):
print(f"{i}\t\t{d}")
def check_negative_cycle(
graph: list[dict[str, int]], distance: list[float], edge_count: int
):
for j in range(edge_count):
u, v, w = (graph[j][k] for k in ["src", "dst", "weight"])
if distance[u] != float("inf") and distance[u] + w < distance[v]:
return True
return False
def bellman_ford(
graph: list[dict[str, int]], vertex_count: int, edge_count: int, src: int
) -> list[float]:
"""
Returns shortest paths from a vertex src to all
other vertices.
>>> edges = [(2, 1, -10), (3, 2, 3), (0, 3, 5), (0, 1, 4)]
>>> g = [{"src": s, "dst": d, "weight": w} for s, d, w in edges]
>>> bellman_ford(g, 4, 4, 0)
[0.0, -2.0, 8.0, 5.0]
>>> g = [{"src": s, "dst": d, "weight": w} for s, d, w in edges + [(1, 3, 5)]]
>>> bellman_ford(g, 4, 5, 0)
Traceback (most recent call last):
...
Exception: Negative cycle found
"""
distance = [float("inf")] * vertex_count
distance[src] = 0.0
for _ in range(vertex_count - 1):
for j in range(edge_count):
u, v, w = (graph[j][k] for k in ["src", "dst", "weight"])
if distance[u] != float("inf") and distance[u] + w < distance[v]:
distance[v] = distance[u] + w
negative_cycle_exists = check_negative_cycle(graph, distance, edge_count)
if negative_cycle_exists:
raise Exception("Negative cycle found")
return distance
if __name__ == "__main__":
import doctest
doctest.testmod()
V = int(input("Enter number of vertices: ").strip())
E = int(input("Enter number of edges: ").strip())
graph: list[dict[str, int]] = [{} for _ in range(E)]
for i in range(E):
print("Edge ", i + 1)
src, dest, weight = (
int(x)
for x in input("Enter source, destination, weight: ").strip().split(" ")
)
graph[i] = {"src": src, "dst": dest, "weight": weight}
source = int(input("\nEnter shortest path source:").strip())
shortest_distance = bellman_ford(graph, V, E, source)
print_distance(shortest_distance, 0)
================================================
FILE: graphs/bi_directional_dijkstra.py
================================================
"""
Bi-directional Dijkstra's algorithm.
A bi-directional approach is an efficient and
less time consuming optimization for Dijkstra's
searching algorithm
Reference: shorturl.at/exHM7
"""
# Author: Swayam Singh (https://github.com/practice404)
from queue import PriorityQueue
from typing import Any
import numpy as np
def pass_and_relaxation(
graph: dict,
v: str,
visited_forward: set,
visited_backward: set,
cst_fwd: dict,
cst_bwd: dict,
queue: PriorityQueue,
parent: dict,
shortest_distance: float,
) -> float:
for nxt, d in graph[v]:
if nxt in visited_forward:
continue
old_cost_f = cst_fwd.get(nxt, np.inf)
new_cost_f = cst_fwd[v] + d
if new_cost_f < old_cost_f:
queue.put((new_cost_f, nxt))
cst_fwd[nxt] = new_cost_f
parent[nxt] = v
if (
nxt in visited_backward
and cst_fwd[v] + d + cst_bwd[nxt] < shortest_distance
):
shortest_distance = cst_fwd[v] + d + cst_bwd[nxt]
return shortest_distance
def bidirectional_dij(
source: str, destination: str, graph_forward: dict, graph_backward: dict
) -> int:
"""
Bi-directional Dijkstra's algorithm.
Returns:
shortest_path_distance (int): length of the shortest path.
Warnings:
If the destination is not reachable, function returns -1
>>> bidirectional_dij("E", "F", graph_fwd, graph_bwd)
3
"""
shortest_path_distance = -1
visited_forward = set()
visited_backward = set()
cst_fwd = {source: 0}
cst_bwd = {destination: 0}
parent_forward = {source: None}
parent_backward = {destination: None}
queue_forward: PriorityQueue[Any] = PriorityQueue()
queue_backward: PriorityQueue[Any] = PriorityQueue()
shortest_distance = np.inf
queue_forward.put((0, source))
queue_backward.put((0, destination))
if source == destination:
return 0
while not queue_forward.empty() and not queue_backward.empty():
_, v_fwd = queue_forward.get()
visited_forward.add(v_fwd)
_, v_bwd = queue_backward.get()
visited_backward.add(v_bwd)
shortest_distance = pass_and_relaxation(
graph_forward,
v_fwd,
visited_forward,
visited_backward,
cst_fwd,
cst_bwd,
queue_forward,
parent_forward,
shortest_distance,
)
shortest_distance = pass_and_relaxation(
graph_backward,
v_bwd,
visited_backward,
visited_forward,
cst_bwd,
cst_fwd,
queue_backward,
parent_backward,
shortest_distance,
)
if cst_fwd[v_fwd] + cst_bwd[v_bwd] >= shortest_distance:
break
if shortest_distance != np.inf:
shortest_path_distance = shortest_distance
return shortest_path_distance
graph_fwd = {
"B": [["C", 1]],
"C": [["D", 1]],
"D": [["F", 1]],
"E": [["B", 1], ["G", 2]],
"F": [],
"G": [["F", 1]],
}
graph_bwd = {
"B": [["E", 1]],
"C": [["B", 1]],
"D": [["C", 1]],
"F": [["D", 1], ["G", 1]],
"E": [[None, np.inf]],
"G": [["E", 2]],
}
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/bidirectional_a_star.py
================================================
"""
https://en.wikipedia.org/wiki/Bidirectional_search
"""
from __future__ import annotations
import time
from math import sqrt
# 1 for manhattan, 0 for euclidean
HEURISTIC = 0
grid = [
[0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0], # 0 are free path whereas 1's are obstacles
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
]
delta = [[-1, 0], [0, -1], [1, 0], [0, 1]] # up, left, down, right
TPosition = tuple[int, int]
class Node:
"""
>>> k = Node(0, 0, 4, 3, 0, None)
>>> k.calculate_heuristic()
5.0
>>> n = Node(1, 4, 3, 4, 2, None)
>>> n.calculate_heuristic()
2.0
>>> l = [k, n]
>>> n == l[0]
False
>>> l.sort()
>>> n == l[0]
True
"""
def __init__(
self,
pos_x: int,
pos_y: int,
goal_x: int,
goal_y: int,
g_cost: int,
parent: Node | None,
) -> None:
self.pos_x = pos_x
self.pos_y = pos_y
self.pos = (pos_y, pos_x)
self.goal_x = goal_x
self.goal_y = goal_y
self.g_cost = g_cost
self.parent = parent
self.h_cost = self.calculate_heuristic()
self.f_cost = self.g_cost + self.h_cost
def calculate_heuristic(self) -> float:
"""
Heuristic for the A*
"""
dy = self.pos_x - self.goal_x
dx = self.pos_y - self.goal_y
if HEURISTIC == 1:
return abs(dx) + abs(dy)
else:
return sqrt(dy**2 + dx**2)
def __lt__(self, other: Node) -> bool:
return self.f_cost < other.f_cost
class AStar:
"""
>>> astar = AStar((0, 0), (len(grid) - 1, len(grid[0]) - 1))
>>> (astar.start.pos_y + delta[3][0], astar.start.pos_x + delta[3][1])
(0, 1)
>>> [x.pos for x in astar.get_successors(astar.start)]
[(1, 0), (0, 1)]
>>> (astar.start.pos_y + delta[2][0], astar.start.pos_x + delta[2][1])
(1, 0)
>>> astar.retrace_path(astar.start)
[(0, 0)]
>>> astar.search() # doctest: +NORMALIZE_WHITESPACE
[(0, 0), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (3, 3),
(4, 3), (4, 4), (5, 4), (5, 5), (6, 5), (6, 6)]
"""
def __init__(self, start: TPosition, goal: TPosition):
self.start = Node(start[1], start[0], goal[1], goal[0], 0, None)
self.target = Node(goal[1], goal[0], goal[1], goal[0], 99999, None)
self.open_nodes = [self.start]
self.closed_nodes: list[Node] = []
self.reached = False
def search(self) -> list[TPosition]:
while self.open_nodes:
# Open Nodes are sorted using __lt__
self.open_nodes.sort()
current_node = self.open_nodes.pop(0)
if current_node.pos == self.target.pos:
return self.retrace_path(current_node)
self.closed_nodes.append(current_node)
successors = self.get_successors(current_node)
for child_node in successors:
if child_node in self.closed_nodes:
continue
if child_node not in self.open_nodes:
self.open_nodes.append(child_node)
else:
# retrieve the best current path
better_node = self.open_nodes.pop(self.open_nodes.index(child_node))
if child_node.g_cost < better_node.g_cost:
self.open_nodes.append(child_node)
else:
self.open_nodes.append(better_node)
return [self.start.pos]
def get_successors(self, parent: Node) -> list[Node]:
"""
Returns a list of successors (both in the grid and free spaces)
"""
successors = []
for action in delta:
pos_x = parent.pos_x + action[1]
pos_y = parent.pos_y + action[0]
if not (0 <= pos_x <= len(grid[0]) - 1 and 0 <= pos_y <= len(grid) - 1):
continue
if grid[pos_y][pos_x] != 0:
continue
successors.append(
Node(
pos_x,
pos_y,
self.target.pos_y,
self.target.pos_x,
parent.g_cost + 1,
parent,
)
)
return successors
def retrace_path(self, node: Node | None) -> list[TPosition]:
"""
Retrace the path from parents to parents until start node
"""
current_node = node
path = []
while current_node is not None:
path.append((current_node.pos_y, current_node.pos_x))
current_node = current_node.parent
path.reverse()
return path
class BidirectionalAStar:
"""
>>> bd_astar = BidirectionalAStar((0, 0), (len(grid) - 1, len(grid[0]) - 1))
>>> bd_astar.fwd_astar.start.pos == bd_astar.bwd_astar.target.pos
True
>>> bd_astar.retrace_bidirectional_path(bd_astar.fwd_astar.start,
... bd_astar.bwd_astar.start)
[(0, 0)]
>>> bd_astar.search() # doctest: +NORMALIZE_WHITESPACE
[(0, 0), (0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (2, 4),
(2, 5), (3, 5), (4, 5), (5, 5), (5, 6), (6, 6)]
"""
def __init__(self, start: TPosition, goal: TPosition) -> None:
self.fwd_astar = AStar(start, goal)
self.bwd_astar = AStar(goal, start)
self.reached = False
def search(self) -> list[TPosition]:
while self.fwd_astar.open_nodes or self.bwd_astar.open_nodes:
self.fwd_astar.open_nodes.sort()
self.bwd_astar.open_nodes.sort()
current_fwd_node = self.fwd_astar.open_nodes.pop(0)
current_bwd_node = self.bwd_astar.open_nodes.pop(0)
if current_bwd_node.pos == current_fwd_node.pos:
return self.retrace_bidirectional_path(
current_fwd_node, current_bwd_node
)
self.fwd_astar.closed_nodes.append(current_fwd_node)
self.bwd_astar.closed_nodes.append(current_bwd_node)
self.fwd_astar.target = current_bwd_node
self.bwd_astar.target = current_fwd_node
successors = {
self.fwd_astar: self.fwd_astar.get_successors(current_fwd_node),
self.bwd_astar: self.bwd_astar.get_successors(current_bwd_node),
}
for astar in [self.fwd_astar, self.bwd_astar]:
for child_node in successors[astar]:
if child_node in astar.closed_nodes:
continue
if child_node not in astar.open_nodes:
astar.open_nodes.append(child_node)
else:
# retrieve the best current path
better_node = astar.open_nodes.pop(
astar.open_nodes.index(child_node)
)
if child_node.g_cost < better_node.g_cost:
astar.open_nodes.append(child_node)
else:
astar.open_nodes.append(better_node)
return [self.fwd_astar.start.pos]
def retrace_bidirectional_path(
self, fwd_node: Node, bwd_node: Node
) -> list[TPosition]:
fwd_path = self.fwd_astar.retrace_path(fwd_node)
bwd_path = self.bwd_astar.retrace_path(bwd_node)
bwd_path.pop()
bwd_path.reverse()
path = fwd_path + bwd_path
return path
if __name__ == "__main__":
# all coordinates are given in format [y,x]
init = (0, 0)
goal = (len(grid) - 1, len(grid[0]) - 1)
for elem in grid:
print(elem)
start_time = time.time()
a_star = AStar(init, goal)
path = a_star.search()
end_time = time.time() - start_time
print(f"AStar execution time = {end_time:f} seconds")
bd_start_time = time.time()
bidir_astar = BidirectionalAStar(init, goal)
bd_end_time = time.time() - bd_start_time
print(f"BidirectionalAStar execution time = {bd_end_time:f} seconds")
================================================
FILE: graphs/bidirectional_breadth_first_search.py
================================================
"""
https://en.wikipedia.org/wiki/Bidirectional_search
"""
from __future__ import annotations
import time
Path = list[tuple[int, int]]
grid = [
[0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0], # 0 are free path whereas 1's are obstacles
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
]
delta = [[-1, 0], [0, -1], [1, 0], [0, 1]] # up, left, down, right
class Node:
def __init__(
self, pos_x: int, pos_y: int, goal_x: int, goal_y: int, parent: Node | None
):
self.pos_x = pos_x
self.pos_y = pos_y
self.pos = (pos_y, pos_x)
self.goal_x = goal_x
self.goal_y = goal_y
self.parent = parent
class BreadthFirstSearch:
"""
# Comment out slow pytests...
# 9.15s call graphs/bidirectional_breadth_first_search.py:: \
# graphs.bidirectional_breadth_first_search.BreadthFirstSearch
# >>> bfs = BreadthFirstSearch((0, 0), (len(grid) - 1, len(grid[0]) - 1))
# >>> (bfs.start.pos_y + delta[3][0], bfs.start.pos_x + delta[3][1])
(0, 1)
# >>> [x.pos for x in bfs.get_successors(bfs.start)]
[(1, 0), (0, 1)]
# >>> (bfs.start.pos_y + delta[2][0], bfs.start.pos_x + delta[2][1])
(1, 0)
# >>> bfs.retrace_path(bfs.start)
[(0, 0)]
# >>> bfs.search() # doctest: +NORMALIZE_WHITESPACE
[(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 1),
(5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (6, 5), (6, 6)]
"""
def __init__(self, start: tuple[int, int], goal: tuple[int, int]):
self.start = Node(start[1], start[0], goal[1], goal[0], None)
self.target = Node(goal[1], goal[0], goal[1], goal[0], None)
self.node_queue = [self.start]
self.reached = False
def search(self) -> Path | None:
while self.node_queue:
current_node = self.node_queue.pop(0)
if current_node.pos == self.target.pos:
self.reached = True
return self.retrace_path(current_node)
successors = self.get_successors(current_node)
for node in successors:
self.node_queue.append(node)
if not self.reached:
return [self.start.pos]
return None
def get_successors(self, parent: Node) -> list[Node]:
"""
Returns a list of successors (both in the grid and free spaces)
"""
successors = []
for action in delta:
pos_x = parent.pos_x + action[1]
pos_y = parent.pos_y + action[0]
if not (0 <= pos_x <= len(grid[0]) - 1 and 0 <= pos_y <= len(grid) - 1):
continue
if grid[pos_y][pos_x] != 0:
continue
successors.append(
Node(pos_x, pos_y, self.target.pos_y, self.target.pos_x, parent)
)
return successors
def retrace_path(self, node: Node | None) -> Path:
"""
Retrace the path from parents to parents until start node
"""
current_node = node
path = []
while current_node is not None:
path.append((current_node.pos_y, current_node.pos_x))
current_node = current_node.parent
path.reverse()
return path
class BidirectionalBreadthFirstSearch:
"""
>>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1,
... len(grid[0]) - 1))
>>> bd_bfs.fwd_bfs.start.pos == bd_bfs.bwd_bfs.target.pos
True
>>> bd_bfs.retrace_bidirectional_path(bd_bfs.fwd_bfs.start,
... bd_bfs.bwd_bfs.start)
[(0, 0)]
>>> bd_bfs.search() # doctest: +NORMALIZE_WHITESPACE
[(0, 0), (0, 1), (0, 2), (1, 2), (2, 2), (2, 3),
(2, 4), (3, 4), (3, 5), (3, 6), (4, 6), (5, 6), (6, 6)]
"""
def __init__(self, start, goal):
self.fwd_bfs = BreadthFirstSearch(start, goal)
self.bwd_bfs = BreadthFirstSearch(goal, start)
self.reached = False
def search(self) -> Path | None:
while self.fwd_bfs.node_queue or self.bwd_bfs.node_queue:
current_fwd_node = self.fwd_bfs.node_queue.pop(0)
current_bwd_node = self.bwd_bfs.node_queue.pop(0)
if current_bwd_node.pos == current_fwd_node.pos:
self.reached = True
return self.retrace_bidirectional_path(
current_fwd_node, current_bwd_node
)
self.fwd_bfs.target = current_bwd_node
self.bwd_bfs.target = current_fwd_node
successors = {
self.fwd_bfs: self.fwd_bfs.get_successors(current_fwd_node),
self.bwd_bfs: self.bwd_bfs.get_successors(current_bwd_node),
}
for bfs in [self.fwd_bfs, self.bwd_bfs]:
for node in successors[bfs]:
bfs.node_queue.append(node)
if not self.reached:
return [self.fwd_bfs.start.pos]
return None
def retrace_bidirectional_path(self, fwd_node: Node, bwd_node: Node) -> Path:
fwd_path = self.fwd_bfs.retrace_path(fwd_node)
bwd_path = self.bwd_bfs.retrace_path(bwd_node)
bwd_path.pop()
bwd_path.reverse()
path = fwd_path + bwd_path
return path
if __name__ == "__main__":
# all coordinates are given in format [y,x]
import doctest
doctest.testmod()
init = (0, 0)
goal = (len(grid) - 1, len(grid[0]) - 1)
for elem in grid:
print(elem)
start_bfs_time = time.time()
bfs = BreadthFirstSearch(init, goal)
path = bfs.search()
bfs_time = time.time() - start_bfs_time
print("Unidirectional BFS computation time : ", bfs_time)
start_bd_bfs_time = time.time()
bd_bfs = BidirectionalBreadthFirstSearch(init, goal)
bd_path = bd_bfs.search()
bd_bfs_time = time.time() - start_bd_bfs_time
print("Bidirectional BFS computation time : ", bd_bfs_time)
================================================
FILE: graphs/bidirectional_search.py
================================================
"""
Bidirectional Search Algorithm.
This algorithm searches from both the source and target nodes simultaneously,
meeting somewhere in the middle. This approach can significantly reduce the
search space compared to a traditional one-directional search.
Time Complexity: O(b^(d/2)) where b is the branching factor and d is the depth
Space Complexity: O(b^(d/2))
https://en.wikipedia.org/wiki/Bidirectional_search
"""
from collections import deque
def expand_search(
graph: dict[int, list[int]],
queue: deque[int],
parents: dict[int, int | None],
opposite_direction_parents: dict[int, int | None],
) -> int | None:
if not queue:
return None
current = queue.popleft()
for neighbor in graph[current]:
if neighbor in parents:
continue
parents[neighbor] = current
queue.append(neighbor)
# Check if this creates an intersection
if neighbor in opposite_direction_parents:
return neighbor
return None
def construct_path(current: int | None, parents: dict[int, int | None]) -> list[int]:
path: list[int] = []
while current is not None:
path.append(current)
current = parents[current]
return path
def bidirectional_search(
graph: dict[int, list[int]], start: int, goal: int
) -> list[int] | None:
"""
Perform bidirectional search on a graph to find the shortest path.
Args:
graph: A dictionary where keys are nodes and values are lists of adjacent nodes
start: The starting node
goal: The target node
Returns:
A list representing the path from start to goal, or None if no path exists
Examples:
>>> graph = {
... 0: [1, 2],
... 1: [0, 3, 4],
... 2: [0, 5, 6],
... 3: [1, 7],
... 4: [1, 8],
... 5: [2, 9],
... 6: [2, 10],
... 7: [3, 11],
... 8: [4, 11],
... 9: [5, 11],
... 10: [6, 11],
... 11: [7, 8, 9, 10],
... }
>>> bidirectional_search(graph=graph, start=0, goal=11)
[0, 1, 3, 7, 11]
>>> bidirectional_search(graph=graph, start=5, goal=5)
[5]
>>> disconnected_graph = {
... 0: [1, 2],
... 1: [0],
... 2: [0],
... 3: [4],
... 4: [3],
... }
>>> bidirectional_search(graph=disconnected_graph, start=0, goal=3) is None
True
"""
if start == goal:
return [start]
# Check if start and goal are in the graph
if start not in graph or goal not in graph:
return None
# Initialize forward and backward search dictionaries
# Each maps a node to its parent in the search
forward_parents: dict[int, int | None] = {start: None}
backward_parents: dict[int, int | None] = {goal: None}
# Initialize forward and backward search queues
forward_queue = deque([start])
backward_queue = deque([goal])
# Intersection node (where the two searches meet)
intersection = None
# Continue until both queues are empty or an intersection is found
while forward_queue and backward_queue and intersection is None:
# Expand forward search
intersection = expand_search(
graph=graph,
queue=forward_queue,
parents=forward_parents,
opposite_direction_parents=backward_parents,
)
# If no intersection found, expand backward search
if intersection is not None:
break
intersection = expand_search(
graph=graph,
queue=backward_queue,
parents=backward_parents,
opposite_direction_parents=forward_parents,
)
# If no intersection found, there's no path
if intersection is None:
return None
# Construct path from start to intersection
forward_path: list[int] = construct_path(
current=intersection, parents=forward_parents
)
forward_path.reverse()
# Construct path from intersection to goal
backward_path: list[int] = construct_path(
current=backward_parents[intersection], parents=backward_parents
)
# Return the complete path
return forward_path + backward_path
def main() -> None:
"""
Run example of bidirectional search algorithm.
Examples:
>>> main() # doctest: +NORMALIZE_WHITESPACE
Path from 0 to 11: [0, 1, 3, 7, 11]
Path from 5 to 5: [5]
Path from 0 to 3: None
"""
# Example graph represented as an adjacency list
example_graph = {
0: [1, 2],
1: [0, 3, 4],
2: [0, 5, 6],
3: [1, 7],
4: [1, 8],
5: [2, 9],
6: [2, 10],
7: [3, 11],
8: [4, 11],
9: [5, 11],
10: [6, 11],
11: [7, 8, 9, 10],
}
# Test case 1: Path exists
start, goal = 0, 11
path = bidirectional_search(graph=example_graph, start=start, goal=goal)
print(f"Path from {start} to {goal}: {path}")
# Test case 2: Start and goal are the same
start, goal = 5, 5
path = bidirectional_search(graph=example_graph, start=start, goal=goal)
print(f"Path from {start} to {goal}: {path}")
# Test case 3: No path exists (disconnected graph)
disconnected_graph = {
0: [1, 2],
1: [0],
2: [0],
3: [4],
4: [3],
}
start, goal = 0, 3
path = bidirectional_search(graph=disconnected_graph, start=start, goal=goal)
print(f"Path from {start} to {goal}: {path}")
if __name__ == "__main__":
main()
================================================
FILE: graphs/boruvka.py
================================================
"""Borůvka's algorithm.
Determines the minimum spanning tree (MST) of a graph using the Borůvka's algorithm.
Borůvka's algorithm is a greedy algorithm for finding a minimum spanning tree in a
connected graph, or a minimum spanning forest if a graph that is not connected.
The time complexity of this algorithm is O(ELogV), where E represents the number
of edges, while V represents the number of nodes.
O(number_of_edges Log number_of_nodes)
The space complexity of this algorithm is O(V + E), since we have to keep a couple
of lists whose sizes are equal to the number of nodes, as well as keep all the
edges of a graph inside of the data structure itself.
Borůvka's algorithm gives us pretty much the same result as other MST Algorithms -
they all find the minimum spanning tree, and the time complexity is approximately
the same.
One advantage that Borůvka's algorithm has compared to the alternatives is that it
doesn't need to presort the edges or maintain a priority queue in order to find the
minimum spanning tree.
Even though that doesn't help its complexity, since it still passes the edges logE
times, it is a bit simpler to code.
Details: https://en.wikipedia.org/wiki/Bor%C5%AFvka%27s_algorithm
"""
from __future__ import annotations
from typing import Any
class Graph:
def __init__(self, num_of_nodes: int) -> None:
"""
Arguments:
num_of_nodes - the number of nodes in the graph
Attributes:
m_num_of_nodes - the number of nodes in the graph.
m_edges - the list of edges.
m_component - the dictionary which stores the index of the component which
a node belongs to.
"""
self.m_num_of_nodes = num_of_nodes
self.m_edges: list[list[int]] = []
self.m_component: dict[int, int] = {}
def add_edge(self, u_node: int, v_node: int, weight: int) -> None:
"""Adds an edge in the format [first, second, edge weight] to graph."""
self.m_edges.append([u_node, v_node, weight])
def find_component(self, u_node: int) -> int:
"""Propagates a new component throughout a given component."""
if self.m_component[u_node] == u_node:
return u_node
return self.find_component(self.m_component[u_node])
def set_component(self, u_node: int) -> None:
"""Finds the component index of a given node"""
if self.m_component[u_node] != u_node:
for k in self.m_component:
self.m_component[k] = self.find_component(k)
def union(self, component_size: list[int], u_node: int, v_node: int) -> None:
"""Union finds the roots of components for two nodes, compares the components
in terms of size, and attaches the smaller one to the larger one to form
single component"""
if component_size[u_node] <= component_size[v_node]:
self.m_component[u_node] = v_node
component_size[v_node] += component_size[u_node]
self.set_component(u_node)
elif component_size[u_node] >= component_size[v_node]:
self.m_component[v_node] = self.find_component(u_node)
component_size[u_node] += component_size[v_node]
self.set_component(v_node)
def boruvka(self) -> None:
"""Performs Borůvka's algorithm to find MST."""
# Initialize additional lists required to algorithm.
component_size = []
mst_weight = 0
minimum_weight_edge: list[Any] = [-1] * self.m_num_of_nodes
# A list of components (initialized to all of the nodes)
for node in range(self.m_num_of_nodes):
self.m_component.update({node: node})
component_size.append(1)
num_of_components = self.m_num_of_nodes
while num_of_components > 1:
for edge in self.m_edges:
u, v, w = edge
u_component = self.m_component[u]
v_component = self.m_component[v]
if u_component != v_component:
"""If the current minimum weight edge of component u doesn't
exist (is -1), or if it's greater than the edge we're
observing right now, we will assign the value of the edge
we're observing to it.
If the current minimum weight edge of component v doesn't
exist (is -1), or if it's greater than the edge we're
observing right now, we will assign the value of the edge
we're observing to it"""
for component in (u_component, v_component):
if (
minimum_weight_edge[component] == -1
or minimum_weight_edge[component][2] > w
):
minimum_weight_edge[component] = [u, v, w]
for edge in minimum_weight_edge:
if isinstance(edge, list):
u, v, w = edge
u_component = self.m_component[u]
v_component = self.m_component[v]
if u_component != v_component:
mst_weight += w
self.union(component_size, u_component, v_component)
print(f"Added edge [{u} - {v}]\nAdded weight: {w}\n")
num_of_components -= 1
minimum_weight_edge = [-1] * self.m_num_of_nodes
print(f"The total weight of the minimal spanning tree is: {mst_weight}")
def test_vector() -> None:
"""
>>> g = Graph(8)
>>> for u_v_w in ((0, 1, 10), (0, 2, 6), (0, 3, 5), (1, 3, 15), (2, 3, 4),
... (3, 4, 8), (4, 5, 10), (4, 6, 6), (4, 7, 5), (5, 7, 15), (6, 7, 4)):
... g.add_edge(*u_v_w)
>>> g.boruvka()
Added edge [0 - 3]
Added weight: 5
Added edge [0 - 1]
Added weight: 10
Added edge [2 - 3]
Added weight: 4
Added edge [4 - 7]
Added weight: 5
Added edge [4 - 5]
Added weight: 10
Added edge [6 - 7]
Added weight: 4
Added edge [3 - 4]
Added weight: 8
The total weight of the minimal spanning tree is: 46
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/breadth_first_search.py
================================================
#!/usr/bin/python
"""Author: OMKAR PATHAK"""
from __future__ import annotations
from queue import Queue
class Graph:
def __init__(self) -> None:
self.vertices: dict[int, list[int]] = {}
def print_graph(self) -> None:
"""
prints adjacency list representation of graaph
>>> g = Graph()
>>> g.print_graph()
>>> g.add_edge(0, 1)
>>> g.print_graph()
0 : 1
"""
for i in self.vertices:
print(i, " : ", " -> ".join([str(j) for j in self.vertices[i]]))
def add_edge(self, from_vertex: int, to_vertex: int) -> None:
"""
adding the edge between two vertices
>>> g = Graph()
>>> g.print_graph()
>>> g.add_edge(0, 1)
>>> g.print_graph()
0 : 1
"""
if from_vertex in self.vertices:
self.vertices[from_vertex].append(to_vertex)
else:
self.vertices[from_vertex] = [to_vertex]
def bfs(self, start_vertex: int) -> set[int]:
"""
>>> g = Graph()
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 2)
>>> g.add_edge(1, 2)
>>> g.add_edge(2, 0)
>>> g.add_edge(2, 3)
>>> g.add_edge(3, 3)
>>> sorted(g.bfs(2))
[0, 1, 2, 3]
"""
# initialize set for storing already visited vertices
visited = set()
# create a first in first out queue to store all the vertices for BFS
queue: Queue = Queue()
# mark the source node as visited and enqueue it
visited.add(start_vertex)
queue.put(start_vertex)
while not queue.empty():
vertex = queue.get()
# loop through all adjacent vertex and enqueue it if not yet visited
for adjacent_vertex in self.vertices[vertex]:
if adjacent_vertex not in visited:
queue.put(adjacent_vertex)
visited.add(adjacent_vertex)
return visited
if __name__ == "__main__":
from doctest import testmod
testmod(verbose=True)
g = Graph()
g.add_edge(0, 1)
g.add_edge(0, 2)
g.add_edge(1, 2)
g.add_edge(2, 0)
g.add_edge(2, 3)
g.add_edge(3, 3)
g.print_graph()
# 0 : 1 -> 2
# 1 : 2
# 2 : 0 -> 3
# 3 : 3
assert sorted(g.bfs(2)) == [0, 1, 2, 3]
================================================
FILE: graphs/breadth_first_search_2.py
================================================
"""
https://en.wikipedia.org/wiki/Breadth-first_search
pseudo-code:
breadth_first_search(graph G, start vertex s):
// all nodes initially unexplored
mark s as explored
let Q = queue data structure, initialized with s
while Q is non-empty:
remove the first node of Q, call it v
for each edge(v, w): // for w in graph[v]
if w unexplored:
mark w as explored
add w to Q (at the end)
"""
from __future__ import annotations
from collections import deque
from queue import Queue
from timeit import timeit
G = {
"A": ["B", "C"],
"B": ["A", "D", "E"],
"C": ["A", "F"],
"D": ["B"],
"E": ["B", "F"],
"F": ["C", "E"],
}
def breadth_first_search(graph: dict, start: str) -> list[str]:
"""
Implementation of breadth first search using queue.Queue.
>>> ''.join(breadth_first_search(G, 'A'))
'ABCDEF'
"""
explored = {start}
result = [start]
queue: Queue = Queue()
queue.put(start)
while not queue.empty():
v = queue.get()
for w in graph[v]:
if w not in explored:
explored.add(w)
result.append(w)
queue.put(w)
return result
def breadth_first_search_with_deque(graph: dict, start: str) -> list[str]:
"""
Implementation of breadth first search using collection.queue.
>>> ''.join(breadth_first_search_with_deque(G, 'A'))
'ABCDEF'
"""
visited = {start}
result = [start]
queue = deque([start])
while queue:
v = queue.popleft()
for child in graph[v]:
if child not in visited:
visited.add(child)
result.append(child)
queue.append(child)
return result
def benchmark_function(name: str) -> None:
setup = f"from __main__ import G, {name}"
number = 10000
res = timeit(f"{name}(G, 'A')", setup=setup, number=number)
print(f"{name:<35} finished {number} runs in {res:.5f} seconds")
if __name__ == "__main__":
import doctest
doctest.testmod()
benchmark_function("breadth_first_search")
benchmark_function("breadth_first_search_with_deque")
# breadth_first_search finished 10000 runs in 0.20999 seconds
# breadth_first_search_with_deque finished 10000 runs in 0.01421 seconds
================================================
FILE: graphs/breadth_first_search_shortest_path.py
================================================
"""Breath First Search (BFS) can be used when finding the shortest path
from a given source node to a target node in an unweighted graph.
"""
from __future__ import annotations
graph = {
"A": ["B", "C", "E"],
"B": ["A", "D", "E"],
"C": ["A", "F", "G"],
"D": ["B"],
"E": ["A", "B", "D"],
"F": ["C"],
"G": ["C"],
}
class Graph:
def __init__(self, graph: dict[str, list[str]], source_vertex: str) -> None:
"""
Graph is implemented as dictionary of adjacency lists. Also,
Source vertex have to be defined upon initialization.
"""
self.graph = graph
# mapping node to its parent in resulting breadth first tree
self.parent: dict[str, str | None] = {}
self.source_vertex = source_vertex
def breath_first_search(self) -> None:
"""
This function is a helper for running breath first search on this graph.
>>> g = Graph(graph, "G")
>>> g.breath_first_search()
>>> g.parent
{'G': None, 'C': 'G', 'A': 'C', 'F': 'C', 'B': 'A', 'E': 'A', 'D': 'B'}
"""
visited = {self.source_vertex}
self.parent[self.source_vertex] = None
queue = [self.source_vertex] # first in first out queue
while queue:
vertex = queue.pop(0)
for adjacent_vertex in self.graph[vertex]:
if adjacent_vertex not in visited:
visited.add(adjacent_vertex)
self.parent[adjacent_vertex] = vertex
queue.append(adjacent_vertex)
def shortest_path(self, target_vertex: str) -> str:
"""
This shortest path function returns a string, describing the result:
1.) No path is found. The string is a human readable message to indicate this.
2.) The shortest path is found. The string is in the form
`v1(->v2->v3->...->vn)`, where v1 is the source vertex and vn is the target
vertex, if it exists separately.
>>> g = Graph(graph, "G")
>>> g.breath_first_search()
Case 1 - No path is found.
>>> g.shortest_path("Foo")
Traceback (most recent call last):
...
ValueError: No path from vertex: G to vertex: Foo
Case 2 - The path is found.
>>> g.shortest_path("D")
'G->C->A->B->D'
>>> g.shortest_path("G")
'G'
"""
if target_vertex == self.source_vertex:
return self.source_vertex
target_vertex_parent = self.parent.get(target_vertex)
if target_vertex_parent is None:
msg = (
f"No path from vertex: {self.source_vertex} to vertex: {target_vertex}"
)
raise ValueError(msg)
return self.shortest_path(target_vertex_parent) + f"->{target_vertex}"
if __name__ == "__main__":
g = Graph(graph, "G")
g.breath_first_search()
print(g.shortest_path("D"))
print(g.shortest_path("G"))
print(g.shortest_path("Foo"))
================================================
FILE: graphs/breadth_first_search_shortest_path_2.py
================================================
"""Breadth-first search the shortest path implementations.
doctest:
python -m doctest -v breadth_first_search_shortest_path_2.py
Manual test:
python breadth_first_search_shortest_path_2.py
"""
from collections import deque
demo_graph = {
"A": ["B", "C", "E"],
"B": ["A", "D", "E"],
"C": ["A", "F", "G"],
"D": ["B"],
"E": ["A", "B", "D"],
"F": ["C"],
"G": ["C"],
}
def bfs_shortest_path(graph: dict, start, goal) -> list[str]:
"""Find the shortest path between `start` and `goal` nodes.
Args:
graph (dict): node/list of neighboring nodes key/value pairs.
start: start node.
goal: target node.
Returns:
Shortest path between `start` and `goal` nodes as a string of nodes.
'Not found' string if no path found.
Example:
>>> bfs_shortest_path(demo_graph, "G", "D")
['G', 'C', 'A', 'B', 'D']
>>> bfs_shortest_path(demo_graph, "G", "G")
['G']
>>> bfs_shortest_path(demo_graph, "G", "Unknown")
[]
"""
# keep track of explored nodes
explored = set()
# keep track of all the paths to be checked
queue = deque([[start]])
# return path if start is goal
if start == goal:
return [start]
# keeps looping until all possible paths have been checked
while queue:
# pop the first path from the queue
path = queue.popleft()
# get the last node from the path
node = path[-1]
if node not in explored:
neighbours = graph[node]
# go through all neighbour nodes, construct a new path and
# push it into the queue
for neighbour in neighbours:
new_path = list(path)
new_path.append(neighbour)
queue.append(new_path)
# return path if neighbour is goal
if neighbour == goal:
return new_path
# mark node as explored
explored.add(node)
# in case there's no path between the 2 nodes
return []
def bfs_shortest_path_distance(graph: dict, start, target) -> int:
"""Find the shortest path distance between `start` and `target` nodes.
Args:
graph: node/list of neighboring nodes key/value pairs.
start: node to start search from.
target: node to search for.
Returns:
Number of edges in the shortest path between `start` and `target` nodes.
-1 if no path exists.
Example:
>>> bfs_shortest_path_distance(demo_graph, "G", "D")
4
>>> bfs_shortest_path_distance(demo_graph, "A", "A")
0
>>> bfs_shortest_path_distance(demo_graph, "A", "Unknown")
-1
"""
if not graph or start not in graph or target not in graph:
return -1
if start == target:
return 0
queue = deque([start])
visited = set(start)
# Keep tab on distances from `start` node.
dist = {start: 0, target: -1}
while queue:
node = queue.popleft()
if node == target:
dist[target] = (
dist[node] if dist[target] == -1 else min(dist[target], dist[node])
)
for adjacent in graph[node]:
if adjacent not in visited:
visited.add(adjacent)
queue.append(adjacent)
dist[adjacent] = dist[node] + 1
return dist[target]
if __name__ == "__main__":
print(bfs_shortest_path(demo_graph, "G", "D")) # returns ['G', 'C', 'A', 'B', 'D']
print(bfs_shortest_path_distance(demo_graph, "G", "D")) # returns 4
================================================
FILE: graphs/breadth_first_search_zero_one_shortest_path.py
================================================
"""
Finding the shortest path in 0-1-graph in O(E + V) which is faster than dijkstra.
0-1-graph is the weighted graph with the weights equal to 0 or 1.
Link: https://codeforces.com/blog/entry/22276
"""
from __future__ import annotations
from collections import deque
from collections.abc import Iterator
from dataclasses import dataclass
@dataclass
class Edge:
"""Weighted directed graph edge."""
destination_vertex: int
weight: int
class AdjacencyList:
"""Graph adjacency list."""
def __init__(self, size: int):
self._graph: list[list[Edge]] = [[] for _ in range(size)]
self._size = size
def __getitem__(self, vertex: int) -> Iterator[Edge]:
"""Get all the vertices adjacent to the given one."""
return iter(self._graph[vertex])
@property
def size(self):
return self._size
def add_edge(self, from_vertex: int, to_vertex: int, weight: int):
"""
>>> g = AdjacencyList(2)
>>> g.add_edge(0, 1, 0)
>>> g.add_edge(1, 0, 1)
>>> list(g[0])
[Edge(destination_vertex=1, weight=0)]
>>> list(g[1])
[Edge(destination_vertex=0, weight=1)]
>>> g.add_edge(0, 1, 2)
Traceback (most recent call last):
...
ValueError: Edge weight must be either 0 or 1.
>>> g.add_edge(0, 2, 1)
Traceback (most recent call last):
...
ValueError: Vertex indexes must be in [0; size).
"""
if weight not in (0, 1):
raise ValueError("Edge weight must be either 0 or 1.")
if to_vertex < 0 or to_vertex >= self.size:
raise ValueError("Vertex indexes must be in [0; size).")
self._graph[from_vertex].append(Edge(to_vertex, weight))
def get_shortest_path(self, start_vertex: int, finish_vertex: int) -> int | None:
"""
Return the shortest distance from start_vertex to finish_vertex in 0-1-graph.
1 1 1
0--------->3 6--------7>------->8
| ^ ^ ^ |1
| | | |0 v
0| |0 1| 9-------->10
| | | ^ 1
v | | |0
1--------->2<-------4------->5
0 1 1
>>> g = AdjacencyList(11)
>>> g.add_edge(0, 1, 0)
>>> g.add_edge(0, 3, 1)
>>> g.add_edge(1, 2, 0)
>>> g.add_edge(2, 3, 0)
>>> g.add_edge(4, 2, 1)
>>> g.add_edge(4, 5, 1)
>>> g.add_edge(4, 6, 1)
>>> g.add_edge(5, 9, 0)
>>> g.add_edge(6, 7, 1)
>>> g.add_edge(7, 8, 1)
>>> g.add_edge(8, 10, 1)
>>> g.add_edge(9, 7, 0)
>>> g.add_edge(9, 10, 1)
>>> g.add_edge(1, 2, 2)
Traceback (most recent call last):
...
ValueError: Edge weight must be either 0 or 1.
>>> g.get_shortest_path(0, 3)
0
>>> g.get_shortest_path(0, 4)
Traceback (most recent call last):
...
ValueError: No path from start_vertex to finish_vertex.
>>> g.get_shortest_path(4, 10)
2
>>> g.get_shortest_path(4, 8)
2
>>> g.get_shortest_path(0, 1)
0
>>> g.get_shortest_path(1, 0)
Traceback (most recent call last):
...
ValueError: No path from start_vertex to finish_vertex.
"""
queue = deque([start_vertex])
distances: list[int | None] = [None] * self.size
distances[start_vertex] = 0
while queue:
current_vertex = queue.popleft()
current_distance = distances[current_vertex]
if current_distance is None:
continue
for edge in self[current_vertex]:
new_distance = current_distance + edge.weight
dest_vertex_distance = distances[edge.destination_vertex]
if (
isinstance(dest_vertex_distance, int)
and new_distance >= dest_vertex_distance
):
continue
distances[edge.destination_vertex] = new_distance
if edge.weight == 0:
queue.appendleft(edge.destination_vertex)
else:
queue.append(edge.destination_vertex)
if distances[finish_vertex] is None:
raise ValueError("No path from start_vertex to finish_vertex.")
return distances[finish_vertex]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/check_bipatrite.py
================================================
from collections import defaultdict, deque
def is_bipartite_dfs(graph: dict[int, list[int]]) -> bool:
"""
Check if a graph is bipartite using depth-first search (DFS).
Args:
`graph`: Adjacency list representing the graph.
Returns:
``True`` if bipartite, ``False`` otherwise.
Checks if the graph can be divided into two sets of vertices, such that no two
vertices within the same set are connected by an edge.
Examples:
>>> is_bipartite_dfs({0: [1, 2], 1: [0, 3], 2: [0, 4]})
True
>>> is_bipartite_dfs({0: [1, 2], 1: [0, 3], 2: [0, 1]})
False
>>> is_bipartite_dfs({})
True
>>> is_bipartite_dfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
True
>>> is_bipartite_dfs({0: [1, 2, 3], 1: [0, 2], 2: [0, 1, 3], 3: [0, 2]})
False
>>> is_bipartite_dfs({0: [4], 1: [], 2: [4], 3: [4], 4: [0, 2, 3]})
True
>>> is_bipartite_dfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> is_bipartite_dfs({7: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> # FIXME: This test should fails with KeyError: 4.
>>> is_bipartite_dfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 9: [0]})
False
>>> is_bipartite_dfs({0: [-1, 3], 1: [0, -2]})
False
>>> is_bipartite_dfs({-1: [0, 2], 0: [-1, 1], 1: [0, 2], 2: [-1, 1]})
True
>>> is_bipartite_dfs({0.9: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
True
>>> # FIXME: This test should fails with
>>> # TypeError: list indices must be integers or...
>>> is_bipartite_dfs({0: [1.0, 3.0], 1.0: [0, 2.0], 2.0: [1.0, 3.0], 3.0: [0, 2.0]})
True
>>> is_bipartite_dfs({"a": [1, 3], "b": [0, 2], "c": [1, 3], "d": [0, 2]})
True
>>> is_bipartite_dfs({0: ["b", "d"], 1: ["a", "c"], 2: ["b", "d"], 3: ["a", "c"]})
True
"""
def depth_first_search(node: int, color: int) -> bool:
"""
Perform Depth-First Search (DFS) on the graph starting from a node.
Args:
node: The current node being visited.
color: The color assigned to the current node.
Returns:
True if the graph is bipartite starting from the current node,
False otherwise.
"""
if visited[node] == -1:
visited[node] = color
if node not in graph:
return True
for neighbor in graph[node]:
if not depth_first_search(neighbor, 1 - color):
return False
return visited[node] == color
visited: defaultdict[int, int] = defaultdict(lambda: -1)
for node in graph:
if visited[node] == -1 and not depth_first_search(node, 0):
return False
return True
def is_bipartite_bfs(graph: dict[int, list[int]]) -> bool:
"""
Check if a graph is bipartite using a breadth-first search (BFS).
Args:
`graph`: Adjacency list representing the graph.
Returns:
``True`` if bipartite, ``False`` otherwise.
Check if the graph can be divided into two sets of vertices, such that no two
vertices within the same set are connected by an edge.
Examples:
>>> is_bipartite_bfs({0: [1, 2], 1: [0, 3], 2: [0, 4]})
True
>>> is_bipartite_bfs({0: [1, 2], 1: [0, 2], 2: [0, 1]})
False
>>> is_bipartite_bfs({})
True
>>> is_bipartite_bfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
True
>>> is_bipartite_bfs({0: [1, 2, 3], 1: [0, 2], 2: [0, 1, 3], 3: [0, 2]})
False
>>> is_bipartite_bfs({0: [4], 1: [], 2: [4], 3: [4], 4: [0, 2, 3]})
True
>>> is_bipartite_bfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> is_bipartite_bfs({7: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 4: [0]})
False
>>> # FIXME: This test should fails with KeyError: 4.
>>> is_bipartite_bfs({0: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2], 9: [0]})
False
>>> is_bipartite_bfs({0: [-1, 3], 1: [0, -2]})
False
>>> is_bipartite_bfs({-1: [0, 2], 0: [-1, 1], 1: [0, 2], 2: [-1, 1]})
True
>>> is_bipartite_bfs({0.9: [1, 3], 1: [0, 2], 2: [1, 3], 3: [0, 2]})
True
>>> # FIXME: This test should fails with
>>> # TypeError: list indices must be integers or...
>>> is_bipartite_bfs({0: [1.0, 3.0], 1.0: [0, 2.0], 2.0: [1.0, 3.0], 3.0: [0, 2.0]})
True
>>> is_bipartite_bfs({"a": [1, 3], "b": [0, 2], "c": [1, 3], "d": [0, 2]})
True
>>> is_bipartite_bfs({0: ["b", "d"], 1: ["a", "c"], 2: ["b", "d"], 3: ["a", "c"]})
True
"""
visited: defaultdict[int, int] = defaultdict(lambda: -1)
for node in graph:
if visited[node] == -1:
queue: deque[int] = deque()
queue.append(node)
visited[node] = 0
while queue:
curr_node = queue.popleft()
if curr_node not in graph:
continue
for neighbor in graph[curr_node]:
if visited[neighbor] == -1:
visited[neighbor] = 1 - visited[curr_node]
queue.append(neighbor)
elif visited[neighbor] == visited[curr_node]:
return False
return True
if __name__ == "__main__":
import doctest
result = doctest.testmod()
if result.failed:
print(f"{result.failed} test(s) failed.")
else:
print("All tests passed!")
================================================
FILE: graphs/check_cycle.py
================================================
"""
Program to check if a cycle is present in a given graph
"""
def check_cycle(graph: dict) -> bool:
"""
Returns True if graph is cyclic else False
>>> check_cycle(graph={0:[], 1:[0, 3], 2:[0, 4], 3:[5], 4:[5], 5:[]})
False
>>> check_cycle(graph={0:[1, 2], 1:[2], 2:[0, 3], 3:[3]})
True
"""
# Keep track of visited nodes
visited: set[int] = set()
# To detect a back edge, keep track of vertices currently in the recursion stack
rec_stk: set[int] = set()
return any(
node not in visited and depth_first_search(graph, node, visited, rec_stk)
for node in graph
)
def depth_first_search(graph: dict, vertex: int, visited: set, rec_stk: set) -> bool:
"""
Recur for all neighbours.
If any neighbour is visited and in rec_stk then graph is cyclic.
>>> graph = {0:[], 1:[0, 3], 2:[0, 4], 3:[5], 4:[5], 5:[]}
>>> vertex, visited, rec_stk = 0, set(), set()
>>> depth_first_search(graph, vertex, visited, rec_stk)
False
"""
# Mark current node as visited and add to recursion stack
visited.add(vertex)
rec_stk.add(vertex)
for node in graph[vertex]:
if node not in visited:
if depth_first_search(graph, node, visited, rec_stk):
return True
elif node in rec_stk:
return True
# The node needs to be removed from recursion stack before function ends
rec_stk.remove(vertex)
return False
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: graphs/connected_components.py
================================================
"""
https://en.wikipedia.org/wiki/Component_(graph_theory)
Finding connected components in graph
"""
test_graph_1 = {0: [1, 2], 1: [0, 3], 2: [0], 3: [1], 4: [5, 6], 5: [4, 6], 6: [4, 5]}
test_graph_2 = {0: [1, 2, 3], 1: [0, 3], 2: [0], 3: [0, 1], 4: [], 5: []}
def dfs(graph: dict, vert: int, visited: list) -> list:
"""
Use depth first search to find all vertices
being in the same component as initial vertex
>>> dfs(test_graph_1, 0, 5 * [False])
[0, 1, 3, 2]
>>> dfs(test_graph_2, 0, 6 * [False])
[0, 1, 3, 2]
"""
visited[vert] = True
connected_verts = []
for neighbour in graph[vert]:
if not visited[neighbour]:
connected_verts += dfs(graph, neighbour, visited)
return [vert, *connected_verts]
def connected_components(graph: dict) -> list:
"""
This function takes graph as a parameter
and then returns the list of connected components
>>> connected_components(test_graph_1)
[[0, 1, 3, 2], [4, 5, 6]]
>>> connected_components(test_graph_2)
[[0, 1, 3, 2], [4], [5]]
"""
graph_size = len(graph)
visited = graph_size * [False]
components_list = []
for i in range(graph_size):
if not visited[i]:
i_connected = dfs(graph, i, visited)
components_list.append(i_connected)
return components_list
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/deep_clone_graph.py
================================================
"""
LeetCode 133. Clone Graph
https://leetcode.com/problems/clone-graph/
Given a reference of a node in a connected undirected graph.
Return a deep copy (clone) of the graph.
Each node in the graph contains a value (int) and a list (List[Node]) of its
neighbors.
"""
from dataclasses import dataclass
@dataclass
class Node:
value: int = 0
neighbors: list["Node"] | None = None
def __post_init__(self) -> None:
"""
>>> Node(3).neighbors
[]
"""
self.neighbors = self.neighbors or []
def __hash__(self) -> int:
"""
>>> hash(Node(3)) != 0
True
"""
return id(self)
def clone_graph(node: Node | None) -> Node | None:
"""
This function returns a clone of a connected undirected graph.
>>> clone_graph(Node(1))
Node(value=1, neighbors=[])
>>> clone_graph(Node(1, [Node(2)]))
Node(value=1, neighbors=[Node(value=2, neighbors=[])])
>>> clone_graph(None) is None
True
"""
if not node:
return None
originals_to_clones = {} # map nodes to clones
stack = [node]
while stack:
original = stack.pop()
if original in originals_to_clones:
continue
originals_to_clones[original] = Node(original.value)
stack.extend(original.neighbors or [])
for original, clone in originals_to_clones.items():
for neighbor in original.neighbors or []:
cloned_neighbor = originals_to_clones[neighbor]
if not clone.neighbors:
clone.neighbors = []
clone.neighbors.append(cloned_neighbor)
return originals_to_clones[node]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/depth_first_search.py
================================================
"""Non recursive implementation of a DFS algorithm."""
from __future__ import annotations
def depth_first_search(graph: dict, start: str) -> set[str]:
"""Depth First Search on Graph
:param graph: directed graph in dictionary format
:param start: starting vertex as a string
:returns: the trace of the search
>>> input_G = { "A": ["B", "C", "D"], "B": ["A", "D", "E"],
... "C": ["A", "F"], "D": ["B", "D"], "E": ["B", "F"],
... "F": ["C", "E", "G"], "G": ["F"] }
>>> output_G = list({'A', 'B', 'C', 'D', 'E', 'F', 'G'})
>>> all(x in output_G for x in list(depth_first_search(input_G, "A")))
True
>>> all(x in output_G for x in list(depth_first_search(input_G, "G")))
True
"""
explored, stack = set(start), [start]
while stack:
v = stack.pop()
explored.add(v)
# Differences from BFS:
# 1) pop last element instead of first one
# 2) add adjacent elements to stack without exploring them
for adj in reversed(graph[v]):
if adj not in explored:
stack.append(adj)
return explored
G = {
"A": ["B", "C", "D"],
"B": ["A", "D", "E"],
"C": ["A", "F"],
"D": ["B", "D"],
"E": ["B", "F"],
"F": ["C", "E", "G"],
"G": ["F"],
}
if __name__ == "__main__":
import doctest
doctest.testmod()
print(depth_first_search(G, "A"))
================================================
FILE: graphs/depth_first_search_2.py
================================================
#!/usr/bin/python
"""Author: OMKAR PATHAK"""
class Graph:
def __init__(self):
self.vertex = {}
# for printing the Graph vertices
def print_graph(self) -> None:
"""
Print the graph vertices.
Example:
>>> g = Graph()
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 2)
>>> g.add_edge(1, 2)
>>> g.add_edge(2, 0)
>>> g.add_edge(2, 3)
>>> g.add_edge(3, 3)
>>> g.print_graph()
{0: [1, 2], 1: [2], 2: [0, 3], 3: [3]}
0 -> 1 -> 2
1 -> 2
2 -> 0 -> 3
3 -> 3
"""
print(self.vertex)
for i in self.vertex:
print(i, " -> ", " -> ".join([str(j) for j in self.vertex[i]]))
# for adding the edge between two vertices
def add_edge(self, from_vertex: int, to_vertex: int) -> None:
"""
Add an edge between two vertices.
:param from_vertex: The source vertex.
:param to_vertex: The destination vertex.
Example:
>>> g = Graph()
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 2)
>>> g.print_graph()
{0: [1, 2]}
0 -> 1 -> 2
"""
# check if vertex is already present,
if from_vertex in self.vertex:
self.vertex[from_vertex].append(to_vertex)
else:
# else make a new vertex
self.vertex[from_vertex] = [to_vertex]
def dfs(self) -> None:
"""
Perform depth-first search (DFS) traversal on the graph
and print the visited vertices.
Example:
>>> g = Graph()
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 2)
>>> g.add_edge(1, 2)
>>> g.add_edge(2, 0)
>>> g.add_edge(2, 3)
>>> g.add_edge(3, 3)
>>> g.dfs()
0 1 2 3
"""
# visited array for storing already visited nodes
visited = [False] * len(self.vertex)
# call the recursive helper function
for i in range(len(self.vertex)):
if not visited[i]:
self.dfs_recursive(i, visited)
def dfs_recursive(self, start_vertex: int, visited: list) -> None:
"""
Perform a recursive depth-first search (DFS) traversal on the graph.
:param start_vertex: The starting vertex for the traversal.
:param visited: A list to track visited vertices.
Example:
>>> g = Graph()
>>> g.add_edge(0, 1)
>>> g.add_edge(0, 2)
>>> g.add_edge(1, 2)
>>> g.add_edge(2, 0)
>>> g.add_edge(2, 3)
>>> g.add_edge(3, 3)
>>> visited = [False] * len(g.vertex)
>>> g.dfs_recursive(0, visited)
0 1 2 3
"""
# mark start vertex as visited
visited[start_vertex] = True
print(start_vertex, end="")
# Recur for all the vertices that are adjacent to this node
for i in self.vertex:
if not visited[i]:
print(" ", end="")
self.dfs_recursive(i, visited)
if __name__ == "__main__":
import doctest
doctest.testmod()
g = Graph()
g.add_edge(0, 1)
g.add_edge(0, 2)
g.add_edge(1, 2)
g.add_edge(2, 0)
g.add_edge(2, 3)
g.add_edge(3, 3)
g.print_graph()
print("DFS:")
g.dfs()
================================================
FILE: graphs/dijkstra.py
================================================
"""
pseudo-code
DIJKSTRA(graph G, start vertex s, destination vertex d):
//all nodes initially unexplored
1 - let H = min heap data structure, initialized with 0 and s [here 0 indicates
the distance from start vertex s]
2 - while H is non-empty:
3 - remove the first node and cost of H, call it U and cost
4 - if U has been previously explored:
5 - go to the while loop, line 2 //Once a node is explored there is no need
to make it again
6 - mark U as explored
7 - if U is d:
8 - return cost // total cost from start to destination vertex
9 - for each edge(U, V): c=cost of edge(U,V) // for V in graph[U]
10 - if V explored:
11 - go to next V in line 9
12 - total_cost = cost + c
13 - add (total_cost,V) to H
You can think at cost as a distance where Dijkstra finds the shortest distance
between vertices s and v in a graph G. The use of a min heap as H guarantees
that if a vertex has already been explored there will be no other path with
shortest distance, that happens because heapq.heappop will always return the
next vertex with the shortest distance, considering that the heap stores not
only the distance between previous vertex and current vertex but the entire
distance between each vertex that makes up the path from start vertex to target
vertex.
"""
import heapq
def dijkstra(graph, start, end):
"""Return the cost of the shortest path between vertices start and end.
>>> dijkstra(G, "E", "C")
6
>>> dijkstra(G2, "E", "F")
3
>>> dijkstra(G3, "E", "F")
3
"""
heap = [(0, start)] # cost from start node,end node
visited = set()
while heap:
(cost, u) = heapq.heappop(heap)
if u in visited:
continue
visited.add(u)
if u == end:
return cost
for v, c in graph[u]:
if v in visited:
continue
next_item = cost + c
heapq.heappush(heap, (next_item, v))
return -1
G = {
"A": [["B", 2], ["C", 5]],
"B": [["A", 2], ["D", 3], ["E", 1], ["F", 1]],
"C": [["A", 5], ["F", 3]],
"D": [["B", 3]],
"E": [["B", 4], ["F", 3]],
"F": [["C", 3], ["E", 3]],
}
r"""
Layout of G2:
E -- 1 --> B -- 1 --> C -- 1 --> D -- 1 --> F
\ /\
\ ||
----------------- 3 --------------------
"""
G2 = {
"B": [["C", 1]],
"C": [["D", 1]],
"D": [["F", 1]],
"E": [["B", 1], ["F", 3]],
"F": [],
}
r"""
Layout of G3:
E -- 1 --> B -- 1 --> C -- 1 --> D -- 1 --> F
\ /\
\ ||
-------- 2 ---------> G ------- 1 ------
"""
G3 = {
"B": [["C", 1]],
"C": [["D", 1]],
"D": [["F", 1]],
"E": [["B", 1], ["G", 2]],
"F": [],
"G": [["F", 1]],
}
short_distance = dijkstra(G, "E", "C")
print(short_distance) # E -- 3 --> F -- 3 --> C == 6
short_distance = dijkstra(G2, "E", "F")
print(short_distance) # E -- 3 --> F == 3
short_distance = dijkstra(G3, "E", "F")
print(short_distance) # E -- 2 --> G -- 1 --> F == 3
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/dijkstra_2.py
================================================
def print_dist(dist, v):
print("\nVertex Distance")
for i in range(v):
if dist[i] != float("inf"):
print(i, "\t", int(dist[i]), end="\t")
else:
print(i, "\t", "INF", end="\t")
print()
def min_dist(mdist, vset, v):
min_val = float("inf")
min_ind = -1
for i in range(v):
if (not vset[i]) and mdist[i] < min_val:
min_ind = i
min_val = mdist[i]
return min_ind
def dijkstra(graph, v, src):
mdist = [float("inf") for _ in range(v)]
vset = [False for _ in range(v)]
mdist[src] = 0.0
for _ in range(v - 1):
u = min_dist(mdist, vset, v)
vset[u] = True
for i in range(v):
if (
(not vset[i])
and graph[u][i] != float("inf")
and mdist[u] + graph[u][i] < mdist[i]
):
mdist[i] = mdist[u] + graph[u][i]
print_dist(mdist, i)
if __name__ == "__main__":
V = int(input("Enter number of vertices: ").strip())
E = int(input("Enter number of edges: ").strip())
graph = [[float("inf") for i in range(V)] for j in range(V)]
for i in range(V):
graph[i][i] = 0.0
for i in range(E):
print("\nEdge ", i + 1)
src = int(input("Enter source:").strip())
dst = int(input("Enter destination:").strip())
weight = float(input("Enter weight:").strip())
graph[src][dst] = weight
gsrc = int(input("\nEnter shortest path source:").strip())
dijkstra(graph, V, gsrc)
================================================
FILE: graphs/dijkstra_algorithm.py
================================================
# Title: Dijkstra's Algorithm for finding single source shortest path from scratch
# Author: Shubham Malik
# References: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
import math
import sys
# For storing the vertex set to retrieve node with the lowest distance
class PriorityQueue:
# Based on Min Heap
def __init__(self):
"""
Priority queue class constructor method.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.cur_size
0
>>> priority_queue_test.array
[]
>>> priority_queue_test.pos
{}
"""
self.cur_size = 0
self.array = []
self.pos = {} # To store the pos of node in array
def is_empty(self):
"""
Conditional boolean method to determine if the priority queue is empty or not.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.is_empty()
True
>>> priority_queue_test.insert((2, 'A'))
>>> priority_queue_test.is_empty()
False
"""
return self.cur_size == 0
def min_heapify(self, idx):
"""
Sorts the queue array so that the minimum element is root.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.cur_size = 3
>>> priority_queue_test.pos = {'A': 0, 'B': 1, 'C': 2}
>>> priority_queue_test.array = [(5, 'A'), (10, 'B'), (15, 'C')]
>>> priority_queue_test.min_heapify(0)
>>> priority_queue_test.array
[(5, 'A'), (10, 'B'), (15, 'C')]
>>> priority_queue_test.array = [(10, 'A'), (5, 'B'), (15, 'C')]
>>> priority_queue_test.min_heapify(0)
>>> priority_queue_test.array
[(5, 'B'), (10, 'A'), (15, 'C')]
>>> priority_queue_test.array = [(10, 'A'), (15, 'B'), (5, 'C')]
>>> priority_queue_test.min_heapify(0)
>>> priority_queue_test.array
[(5, 'C'), (15, 'B'), (10, 'A')]
>>> priority_queue_test.array = [(10, 'A'), (5, 'B')]
>>> priority_queue_test.cur_size = len(priority_queue_test.array)
>>> priority_queue_test.pos = {'A': 0, 'B': 1}
>>> priority_queue_test.min_heapify(0)
>>> priority_queue_test.array
[(5, 'B'), (10, 'A')]
"""
lc = self.left(idx)
rc = self.right(idx)
if lc < self.cur_size and self.array[lc][0] < self.array[idx][0]:
smallest = lc
else:
smallest = idx
if rc < self.cur_size and self.array[rc][0] < self.array[smallest][0]:
smallest = rc
if smallest != idx:
self.swap(idx, smallest)
self.min_heapify(smallest)
def insert(self, tup):
"""
Inserts a node into the Priority Queue.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.insert((10, 'A'))
>>> priority_queue_test.array
[(10, 'A')]
>>> priority_queue_test.insert((15, 'B'))
>>> priority_queue_test.array
[(10, 'A'), (15, 'B')]
>>> priority_queue_test.insert((5, 'C'))
>>> priority_queue_test.array
[(5, 'C'), (10, 'A'), (15, 'B')]
"""
self.pos[tup[1]] = self.cur_size
self.cur_size += 1
self.array.append((sys.maxsize, tup[1]))
self.decrease_key((sys.maxsize, tup[1]), tup[0])
def extract_min(self):
"""
Removes and returns the min element at top of priority queue.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.array = [(10, 'A'), (15, 'B')]
>>> priority_queue_test.cur_size = len(priority_queue_test.array)
>>> priority_queue_test.pos = {'A': 0, 'B': 1}
>>> priority_queue_test.insert((5, 'C'))
>>> priority_queue_test.extract_min()
'C'
>>> priority_queue_test.array[0]
(10, 'A')
"""
min_node = self.array[0][1]
self.array[0] = self.array[self.cur_size - 1]
self.cur_size -= 1
self.min_heapify(0)
del self.pos[min_node]
return min_node
def left(self, i):
"""
Returns the index of left child
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.left(0)
1
>>> priority_queue_test.left(1)
3
"""
return 2 * i + 1
def right(self, i):
"""
Returns the index of right child
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.right(0)
2
>>> priority_queue_test.right(1)
4
"""
return 2 * i + 2
def par(self, i):
"""
Returns the index of parent
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.par(1)
0
>>> priority_queue_test.par(2)
1
>>> priority_queue_test.par(4)
2
"""
return math.floor(i / 2)
def swap(self, i, j):
"""
Swaps array elements at indices i and j, update the pos{}
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.array = [(10, 'A'), (15, 'B')]
>>> priority_queue_test.cur_size = len(priority_queue_test.array)
>>> priority_queue_test.pos = {'A': 0, 'B': 1}
>>> priority_queue_test.swap(0, 1)
>>> priority_queue_test.array
[(15, 'B'), (10, 'A')]
>>> priority_queue_test.pos
{'A': 1, 'B': 0}
"""
self.pos[self.array[i][1]] = j
self.pos[self.array[j][1]] = i
temp = self.array[i]
self.array[i] = self.array[j]
self.array[j] = temp
def decrease_key(self, tup, new_d):
"""
Decrease the key value for a given tuple, assuming the new_d is at most old_d.
Examples:
>>> priority_queue_test = PriorityQueue()
>>> priority_queue_test.array = [(10, 'A'), (15, 'B')]
>>> priority_queue_test.cur_size = len(priority_queue_test.array)
>>> priority_queue_test.pos = {'A': 0, 'B': 1}
>>> priority_queue_test.decrease_key((10, 'A'), 5)
>>> priority_queue_test.array
[(5, 'A'), (15, 'B')]
"""
idx = self.pos[tup[1]]
# assuming the new_d is at most old_d
self.array[idx] = (new_d, tup[1])
while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]:
self.swap(idx, self.par(idx))
idx = self.par(idx)
class Graph:
def __init__(self, num):
"""
Graph class constructor
Examples:
>>> graph_test = Graph(1)
>>> graph_test.num_nodes
1
>>> graph_test.dist
[0]
>>> graph_test.par
[-1]
>>> graph_test.adjList
{}
"""
self.adjList = {} # To store graph: u -> (v,w)
self.num_nodes = num # Number of nodes in graph
# To store the distance from source vertex
self.dist = [0] * self.num_nodes
self.par = [-1] * self.num_nodes # To store the path
def add_edge(self, u, v, w):
"""
Add edge going from node u to v and v to u with weight w: u (w)-> v, v (w) -> u
Examples:
>>> graph_test = Graph(1)
>>> graph_test.add_edge(1, 2, 1)
>>> graph_test.add_edge(2, 3, 2)
>>> graph_test.adjList
{1: [(2, 1)], 2: [(1, 1), (3, 2)], 3: [(2, 2)]}
"""
# Check if u already in graph
if u in self.adjList:
self.adjList[u].append((v, w))
else:
self.adjList[u] = [(v, w)]
# Assuming undirected graph
if v in self.adjList:
self.adjList[v].append((u, w))
else:
self.adjList[v] = [(u, w)]
def show_graph(self):
"""
Show the graph: u -> v(w)
Examples:
>>> graph_test = Graph(1)
>>> graph_test.add_edge(1, 2, 1)
>>> graph_test.show_graph()
1 -> 2(1)
2 -> 1(1)
>>> graph_test.add_edge(2, 3, 2)
>>> graph_test.show_graph()
1 -> 2(1)
2 -> 1(1) -> 3(2)
3 -> 2(2)
"""
for u in self.adjList:
print(u, "->", " -> ".join(str(f"{v}({w})") for v, w in self.adjList[u]))
def dijkstra(self, src):
"""
Dijkstra algorithm
Examples:
>>> graph_test = Graph(3)
>>> graph_test.add_edge(0, 1, 2)
>>> graph_test.add_edge(1, 2, 2)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 2
Node 2 has distance: 4
>>> graph_test.dist
[0, 2, 4]
>>> graph_test = Graph(2)
>>> graph_test.add_edge(0, 1, 2)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 2
>>> graph_test.dist
[0, 2]
>>> graph_test = Graph(3)
>>> graph_test.add_edge(0, 1, 2)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 2
Node 2 has distance: 0
>>> graph_test.dist
[0, 2, 0]
>>> graph_test = Graph(3)
>>> graph_test.add_edge(0, 1, 2)
>>> graph_test.add_edge(1, 2, 2)
>>> graph_test.add_edge(0, 2, 1)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 2
Node 2 has distance: 1
>>> graph_test.dist
[0, 2, 1]
>>> graph_test = Graph(4)
>>> graph_test.add_edge(0, 1, 4)
>>> graph_test.add_edge(1, 2, 2)
>>> graph_test.add_edge(2, 3, 1)
>>> graph_test.add_edge(0, 2, 3)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 4
Node 2 has distance: 3
Node 3 has distance: 4
>>> graph_test.dist
[0, 4, 3, 4]
>>> graph_test = Graph(4)
>>> graph_test.add_edge(0, 1, 4)
>>> graph_test.add_edge(1, 2, 2)
>>> graph_test.add_edge(2, 3, 1)
>>> graph_test.add_edge(0, 2, 7)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 4
Node 2 has distance: 6
Node 3 has distance: 7
>>> graph_test.dist
[0, 4, 6, 7]
"""
# Flush old junk values in par[]
self.par = [-1] * self.num_nodes
# src is the source node
self.dist[src] = 0
q = PriorityQueue()
q.insert((0, src)) # (dist from src, node)
for u in self.adjList:
if u != src:
self.dist[u] = sys.maxsize # Infinity
self.par[u] = -1
while not q.is_empty():
u = q.extract_min() # Returns node with the min dist from source
# Update the distance of all the neighbours of u and
# if their prev dist was INFINITY then push them in Q
for v, w in self.adjList[u]:
new_dist = self.dist[u] + w
if self.dist[v] > new_dist:
if self.dist[v] == sys.maxsize:
q.insert((new_dist, v))
else:
q.decrease_key((self.dist[v], v), new_dist)
self.dist[v] = new_dist
self.par[v] = u
# Show the shortest distances from src
self.show_distances(src)
def show_distances(self, src):
"""
Show the distances from src to all other nodes in a graph
Examples:
>>> graph_test = Graph(1)
>>> graph_test.show_distances(0)
Distance from node: 0
Node 0 has distance: 0
"""
print(f"Distance from node: {src}")
for u in range(self.num_nodes):
print(f"Node {u} has distance: {self.dist[u]}")
def show_path(self, src, dest):
"""
Shows the shortest path from src to dest.
WARNING: Use it *after* calling dijkstra.
Examples:
>>> graph_test = Graph(4)
>>> graph_test.add_edge(0, 1, 1)
>>> graph_test.add_edge(1, 2, 2)
>>> graph_test.add_edge(2, 3, 3)
>>> graph_test.dijkstra(0)
Distance from node: 0
Node 0 has distance: 0
Node 1 has distance: 1
Node 2 has distance: 3
Node 3 has distance: 6
>>> graph_test.show_path(0, 3) # doctest: +NORMALIZE_WHITESPACE
----Path to reach 3 from 0----
0 -> 1 -> 2 -> 3
Total cost of path: 6
"""
path = []
cost = 0
temp = dest
# Backtracking from dest to src
while self.par[temp] != -1:
path.append(temp)
if temp != src:
for v, w in self.adjList[temp]:
if v == self.par[temp]:
cost += w
break
temp = self.par[temp]
path.append(src)
path.reverse()
print(f"----Path to reach {dest} from {src}----")
for u in path:
print(f"{u}", end=" ")
if u != dest:
print("-> ", end="")
print("\nTotal cost of path: ", cost)
if __name__ == "__main__":
from doctest import testmod
testmod()
graph = Graph(9)
graph.add_edge(0, 1, 4)
graph.add_edge(0, 7, 8)
graph.add_edge(1, 2, 8)
graph.add_edge(1, 7, 11)
graph.add_edge(2, 3, 7)
graph.add_edge(2, 8, 2)
graph.add_edge(2, 5, 4)
graph.add_edge(3, 4, 9)
graph.add_edge(3, 5, 14)
graph.add_edge(4, 5, 10)
graph.add_edge(5, 6, 2)
graph.add_edge(6, 7, 1)
graph.add_edge(6, 8, 6)
graph.add_edge(7, 8, 7)
graph.show_graph()
graph.dijkstra(0)
graph.show_path(0, 4)
# OUTPUT
# 0 -> 1(4) -> 7(8)
# 1 -> 0(4) -> 2(8) -> 7(11)
# 7 -> 0(8) -> 1(11) -> 6(1) -> 8(7)
# 2 -> 1(8) -> 3(7) -> 8(2) -> 5(4)
# 3 -> 2(7) -> 4(9) -> 5(14)
# 8 -> 2(2) -> 6(6) -> 7(7)
# 5 -> 2(4) -> 3(14) -> 4(10) -> 6(2)
# 4 -> 3(9) -> 5(10)
# 6 -> 5(2) -> 7(1) -> 8(6)
# Distance from node: 0
# Node 0 has distance: 0
# Node 1 has distance: 4
# Node 2 has distance: 12
# Node 3 has distance: 19
# Node 4 has distance: 21
# Node 5 has distance: 11
# Node 6 has distance: 9
# Node 7 has distance: 8
# Node 8 has distance: 14
# ----Path to reach 4 from 0----
# 0 -> 7 -> 6 -> 5 -> 4
# Total cost of path: 21
================================================
FILE: graphs/dijkstra_alternate.py
================================================
from __future__ import annotations
class Graph:
def __init__(self, vertices: int) -> None:
"""
>>> graph = Graph(2)
>>> graph.vertices
2
>>> len(graph.graph)
2
>>> len(graph.graph[0])
2
"""
self.vertices = vertices
self.graph = [[0] * vertices for _ in range(vertices)]
def print_solution(self, distances_from_source: list[int]) -> None:
"""
>>> Graph(0).print_solution([]) # doctest: +NORMALIZE_WHITESPACE
Vertex Distance from Source
"""
print("Vertex \t Distance from Source")
for vertex in range(self.vertices):
print(vertex, "\t\t", distances_from_source[vertex])
def minimum_distance(
self, distances_from_source: list[int], visited: list[bool]
) -> int:
"""
A utility function to find the vertex with minimum distance value, from the set
of vertices not yet included in shortest path tree.
>>> Graph(3).minimum_distance([1, 2, 3], [False, False, True])
0
"""
# Initialize minimum distance for next node
minimum = 1e7
min_index = 0
# Search not nearest vertex not in the shortest path tree
for vertex in range(self.vertices):
if distances_from_source[vertex] < minimum and visited[vertex] is False:
minimum = distances_from_source[vertex]
min_index = vertex
return min_index
def dijkstra(self, source: int) -> None:
"""
Function that implements Dijkstra's single source shortest path algorithm for a
graph represented using adjacency matrix representation.
>>> Graph(4).dijkstra(1) # doctest: +NORMALIZE_WHITESPACE
Vertex Distance from Source
0 10000000
1 0
2 10000000
3 10000000
"""
distances = [int(1e7)] * self.vertices # distances from the source
distances[source] = 0
visited = [False] * self.vertices
for _ in range(self.vertices):
u = self.minimum_distance(distances, visited)
visited[u] = True
# Update dist value of the adjacent vertices
# of the picked vertex only if the current
# distance is greater than new distance and
# the vertex in not in the shortest path tree
for v in range(self.vertices):
if (
self.graph[u][v] > 0
and visited[v] is False
and distances[v] > distances[u] + self.graph[u][v]
):
distances[v] = distances[u] + self.graph[u][v]
self.print_solution(distances)
if __name__ == "__main__":
graph = Graph(9)
graph.graph = [
[0, 4, 0, 0, 0, 0, 0, 8, 0],
[4, 0, 8, 0, 0, 0, 0, 11, 0],
[0, 8, 0, 7, 0, 4, 0, 0, 2],
[0, 0, 7, 0, 9, 14, 0, 0, 0],
[0, 0, 0, 9, 0, 10, 0, 0, 0],
[0, 0, 4, 14, 10, 0, 2, 0, 0],
[0, 0, 0, 0, 0, 2, 0, 1, 6],
[8, 11, 0, 0, 0, 0, 1, 0, 7],
[0, 0, 2, 0, 0, 0, 6, 7, 0],
]
graph.dijkstra(0)
================================================
FILE: graphs/dijkstra_binary_grid.py
================================================
"""
This script implements the Dijkstra algorithm on a binary grid.
The grid consists of 0s and 1s, where 1 represents
a walkable node and 0 represents an obstacle.
The algorithm finds the shortest path from a start node to a destination node.
Diagonal movement can be allowed or disallowed.
"""
from heapq import heappop, heappush
import numpy as np
def dijkstra(
grid: np.ndarray,
source: tuple[int, int],
destination: tuple[int, int],
allow_diagonal: bool,
) -> tuple[float | int, list[tuple[int, int]]]:
"""
Implements Dijkstra's algorithm on a binary grid.
Args:
grid (np.ndarray): A 2D numpy array representing the grid.
1 represents a walkable node and 0 represents an obstacle.
source (Tuple[int, int]): A tuple representing the start node.
destination (Tuple[int, int]): A tuple representing the
destination node.
allow_diagonal (bool): A boolean determining whether
diagonal movements are allowed.
Returns:
Tuple[Union[float, int], List[Tuple[int, int]]]:
The shortest distance from the start node to the destination node
and the shortest path as a list of nodes.
>>> dijkstra(np.array([[1, 1, 1], [0, 1, 0], [0, 1, 1]]), (0, 0), (2, 2), False)
(4.0, [(0, 0), (0, 1), (1, 1), (2, 1), (2, 2)])
>>> dijkstra(np.array([[1, 1, 1], [0, 1, 0], [0, 1, 1]]), (0, 0), (2, 2), True)
(2.0, [(0, 0), (1, 1), (2, 2)])
>>> dijkstra(np.array([[1, 1, 1], [0, 0, 1], [0, 1, 1]]), (0, 0), (2, 2), False)
(4.0, [(0, 0), (0, 1), (0, 2), (1, 2), (2, 2)])
"""
rows, cols = grid.shape
dx = [-1, 1, 0, 0]
dy = [0, 0, -1, 1]
if allow_diagonal:
dx += [-1, -1, 1, 1]
dy += [-1, 1, -1, 1]
queue, visited = [(0, source)], set()
matrix = np.full((rows, cols), np.inf)
matrix[source] = 0
predecessors = np.empty((rows, cols), dtype=object)
predecessors[source] = None
while queue:
(dist, (x, y)) = heappop(queue)
if (x, y) in visited:
continue
visited.add((x, y))
if (x, y) == destination:
path = []
while (x, y) != source:
path.append((x, y))
x, y = predecessors[x, y]
path.append(source) # add the source manually
path.reverse()
return float(matrix[destination]), path
for i in range(len(dx)):
nx, ny = x + dx[i], y + dy[i]
if 0 <= nx < rows and 0 <= ny < cols:
next_node = grid[nx][ny]
if next_node == 1 and matrix[nx, ny] > dist + 1:
heappush(queue, (dist + 1, (nx, ny)))
matrix[nx, ny] = dist + 1
predecessors[nx, ny] = (x, y)
return np.inf, []
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/dinic.py
================================================
INF = float("inf")
class Dinic:
def __init__(self, n):
self.lvl = [0] * n
self.ptr = [0] * n
self.q = [0] * n
self.adj = [[] for _ in range(n)]
"""
Here we will add our edges containing with the following parameters:
vertex closest to source, vertex closest to sink and flow capacity
through that edge ...
"""
def add_edge(self, a, b, c, rcap=0):
self.adj[a].append([b, len(self.adj[b]), c, 0])
self.adj[b].append([a, len(self.adj[a]) - 1, rcap, 0])
# This is a sample depth first search to be used at max_flow
def depth_first_search(self, vertex, sink, flow):
if vertex == sink or not flow:
return flow
for i in range(self.ptr[vertex], len(self.adj[vertex])):
e = self.adj[vertex][i]
if self.lvl[e[0]] == self.lvl[vertex] + 1:
p = self.depth_first_search(e[0], sink, min(flow, e[2] - e[3]))
if p:
self.adj[vertex][i][3] += p
self.adj[e[0]][e[1]][3] -= p
return p
self.ptr[vertex] = self.ptr[vertex] + 1
return 0
# Here we calculate the flow that reaches the sink
def max_flow(self, source, sink):
flow, self.q[0] = 0, source
for l in range(31): # l = 30 maybe faster for random data # noqa: E741
while True:
self.lvl, self.ptr = [0] * len(self.q), [0] * len(self.q)
qi, qe, self.lvl[source] = 0, 1, 1
while qi < qe and not self.lvl[sink]:
v = self.q[qi]
qi += 1
for e in self.adj[v]:
if not self.lvl[e[0]] and (e[2] - e[3]) >> (30 - l):
self.q[qe] = e[0]
qe += 1
self.lvl[e[0]] = self.lvl[v] + 1
p = self.depth_first_search(source, sink, INF)
while p:
flow += p
p = self.depth_first_search(source, sink, INF)
if not self.lvl[sink]:
break
return flow
# Example to use
"""
Will be a bipartite graph, than it has the vertices near the source(4)
and the vertices near the sink(4)
"""
# Here we make a graphs with 10 vertex(source and sink includes)
graph = Dinic(10)
source = 0
sink = 9
"""
Now we add the vertices next to the font in the font with 1 capacity in this edge
(source -> source vertices)
"""
for vertex in range(1, 5):
graph.add_edge(source, vertex, 1)
"""
We will do the same thing for the vertices near the sink, but from vertex to sink
(sink vertices -> sink)
"""
for vertex in range(5, 9):
graph.add_edge(vertex, sink, 1)
"""
Finally we add the verices near the sink to the vertices near the source.
(source vertices -> sink vertices)
"""
for vertex in range(1, 5):
graph.add_edge(vertex, vertex + 4, 1)
# Now we can know that is the maximum flow(source -> sink)
print(graph.max_flow(source, sink))
================================================
FILE: graphs/directed_and_undirected_weighted_graph.py
================================================
from collections import deque
from math import floor
from random import random
from time import time
# the default weight is 1 if not assigned but all the implementation is weighted
class DirectedGraph:
def __init__(self):
self.graph = {}
# adding vertices and edges
# adding the weight is optional
# handles repetition
def add_pair(self, u, v, w=1):
if self.graph.get(u):
if self.graph[u].count([w, v]) == 0:
self.graph[u].append([w, v])
else:
self.graph[u] = [[w, v]]
if not self.graph.get(v):
self.graph[v] = []
def all_nodes(self):
return list(self.graph)
# handles if the input does not exist
def remove_pair(self, u, v):
if self.graph.get(u):
for _ in self.graph[u]:
if _[1] == v:
self.graph[u].remove(_)
# if no destination is meant the default value is -1
def dfs(self, s=-2, d=-1):
if s == d:
return []
stack = []
visited = []
if s == -2:
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
ss = s
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if visited.count(node[1]) < 1:
if node[1] == d:
visited.append(d)
return visited
else:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return visited
# c is the count of nodes you want and if you leave it or pass -1 to the function
# the count will be random from 10 to 10000
def fill_graph_randomly(self, c=-1):
if c == -1:
c = floor(random() * 10000) + 10
for i in range(c):
# every vertex has max 100 edges
for _ in range(floor(random() * 102) + 1):
n = floor(random() * c) + 1
if n != i:
self.add_pair(i, n, 1)
def bfs(self, s=-2):
d = deque()
visited = []
if s == -2:
s = next(iter(self.graph))
d.append(s)
visited.append(s)
while d:
s = d.popleft()
if len(self.graph[s]) != 0:
for node in self.graph[s]:
if visited.count(node[1]) < 1:
d.append(node[1])
visited.append(node[1])
return visited
def in_degree(self, u):
count = 0
for x in self.graph:
for y in self.graph[x]:
if y[1] == u:
count += 1
return count
def out_degree(self, u):
return len(self.graph[u])
def topological_sort(self, s=-2):
stack = []
visited = []
if s == -2:
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
ss = s
sorted_nodes = []
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if visited.count(node[1]) < 1:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
sorted_nodes.append(stack.pop())
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return sorted_nodes
def cycle_nodes(self):
stack = []
visited = []
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
parent = -2
indirect_parents = []
ss = s
on_the_way_back = False
anticipating_nodes = set()
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if (
visited.count(node[1]) > 0
and node[1] != parent
and indirect_parents.count(node[1]) > 0
and not on_the_way_back
):
len_stack = len(stack) - 1
while len_stack >= 0:
if stack[len_stack] == node[1]:
anticipating_nodes.add(node[1])
break
else:
anticipating_nodes.add(stack[len_stack])
len_stack -= 1
if visited.count(node[1]) < 1:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
on_the_way_back = True
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
on_the_way_back = False
indirect_parents.append(parent)
parent = s
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return list(anticipating_nodes)
def has_cycle(self):
stack = []
visited = []
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
parent = -2
indirect_parents = []
ss = s
on_the_way_back = False
anticipating_nodes = set()
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if (
visited.count(node[1]) > 0
and node[1] != parent
and indirect_parents.count(node[1]) > 0
and not on_the_way_back
):
len_stack_minus_one = len(stack) - 1
while len_stack_minus_one >= 0:
if stack[len_stack_minus_one] == node[1]:
anticipating_nodes.add(node[1])
break
else:
return True
if visited.count(node[1]) < 1:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
on_the_way_back = True
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
on_the_way_back = False
indirect_parents.append(parent)
parent = s
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return False
def dfs_time(self, s=-2, e=-1):
begin = time()
self.dfs(s, e)
end = time()
return end - begin
def bfs_time(self, s=-2):
begin = time()
self.bfs(s)
end = time()
return end - begin
class Graph:
def __init__(self):
self.graph = {}
# adding vertices and edges
# adding the weight is optional
# handles repetition
def add_pair(self, u, v, w=1):
# check if the u exists
if self.graph.get(u):
# if there already is a edge
if self.graph[u].count([w, v]) == 0:
self.graph[u].append([w, v])
else:
# if u does not exist
self.graph[u] = [[w, v]]
# add the other way
if self.graph.get(v):
# if there already is a edge
if self.graph[v].count([w, u]) == 0:
self.graph[v].append([w, u])
else:
# if u does not exist
self.graph[v] = [[w, u]]
# handles if the input does not exist
def remove_pair(self, u, v):
if self.graph.get(u):
for _ in self.graph[u]:
if _[1] == v:
self.graph[u].remove(_)
# the other way round
if self.graph.get(v):
for _ in self.graph[v]:
if _[1] == u:
self.graph[v].remove(_)
# if no destination is meant the default value is -1
def dfs(self, s=-2, d=-1):
if s == d:
return []
stack = []
visited = []
if s == -2:
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
ss = s
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if visited.count(node[1]) < 1:
if node[1] == d:
visited.append(d)
return visited
else:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return visited
# c is the count of nodes you want and if you leave it or pass -1 to the function
# the count will be random from 10 to 10000
def fill_graph_randomly(self, c=-1):
if c == -1:
c = floor(random() * 10000) + 10
for i in range(c):
# every vertex has max 100 edges
for _ in range(floor(random() * 102) + 1):
n = floor(random() * c) + 1
if n != i:
self.add_pair(i, n, 1)
def bfs(self, s=-2):
d = deque()
visited = []
if s == -2:
s = next(iter(self.graph))
d.append(s)
visited.append(s)
while d:
s = d.popleft()
if len(self.graph[s]) != 0:
for node in self.graph[s]:
if visited.count(node[1]) < 1:
d.append(node[1])
visited.append(node[1])
return visited
def degree(self, u):
return len(self.graph[u])
def cycle_nodes(self):
stack = []
visited = []
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
parent = -2
indirect_parents = []
ss = s
on_the_way_back = False
anticipating_nodes = set()
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if (
visited.count(node[1]) > 0
and node[1] != parent
and indirect_parents.count(node[1]) > 0
and not on_the_way_back
):
len_stack = len(stack) - 1
while len_stack >= 0:
if stack[len_stack] == node[1]:
anticipating_nodes.add(node[1])
break
else:
anticipating_nodes.add(stack[len_stack])
len_stack -= 1
if visited.count(node[1]) < 1:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
on_the_way_back = True
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
on_the_way_back = False
indirect_parents.append(parent)
parent = s
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return list(anticipating_nodes)
def has_cycle(self):
stack = []
visited = []
s = next(iter(self.graph))
stack.append(s)
visited.append(s)
parent = -2
indirect_parents = []
ss = s
on_the_way_back = False
anticipating_nodes = set()
while True:
# check if there is any non isolated nodes
if len(self.graph[s]) != 0:
ss = s
for node in self.graph[s]:
if (
visited.count(node[1]) > 0
and node[1] != parent
and indirect_parents.count(node[1]) > 0
and not on_the_way_back
):
len_stack_minus_one = len(stack) - 1
while len_stack_minus_one >= 0:
if stack[len_stack_minus_one] == node[1]:
anticipating_nodes.add(node[1])
break
else:
return True
if visited.count(node[1]) < 1:
stack.append(node[1])
visited.append(node[1])
ss = node[1]
break
# check if all the children are visited
if s == ss:
stack.pop()
on_the_way_back = True
if len(stack) != 0:
s = stack[len(stack) - 1]
else:
on_the_way_back = False
indirect_parents.append(parent)
parent = s
s = ss
# check if se have reached the starting point
if len(stack) == 0:
return False
def all_nodes(self):
return list(self.graph)
def dfs_time(self, s=-2, e=-1):
begin = time()
self.dfs(s, e)
end = time()
return end - begin
def bfs_time(self, s=-2):
begin = time()
self.bfs(s)
end = time()
return end - begin
================================================
FILE: graphs/edmonds_karp_multiple_source_and_sink.py
================================================
class FlowNetwork:
def __init__(self, graph, sources, sinks):
self.source_index = None
self.sink_index = None
self.graph = graph
self._normalize_graph(sources, sinks)
self.vertices_count = len(graph)
self.maximum_flow_algorithm = None
# make only one source and one sink
def _normalize_graph(self, sources, sinks):
if sources is int:
sources = [sources]
if sinks is int:
sinks = [sinks]
if len(sources) == 0 or len(sinks) == 0:
return
self.source_index = sources[0]
self.sink_index = sinks[0]
# make fake vertex if there are more
# than one source or sink
if len(sources) > 1 or len(sinks) > 1:
max_input_flow = 0
for i in sources:
max_input_flow += sum(self.graph[i])
size = len(self.graph) + 1
for room in self.graph:
room.insert(0, 0)
self.graph.insert(0, [0] * size)
for i in sources:
self.graph[0][i + 1] = max_input_flow
self.source_index = 0
size = len(self.graph) + 1
for room in self.graph:
room.append(0)
self.graph.append([0] * size)
for i in sinks:
self.graph[i + 1][size - 1] = max_input_flow
self.sink_index = size - 1
def find_maximum_flow(self):
if self.maximum_flow_algorithm is None:
raise Exception("You need to set maximum flow algorithm before.")
if self.source_index is None or self.sink_index is None:
return 0
self.maximum_flow_algorithm.execute()
return self.maximum_flow_algorithm.getMaximumFlow()
def set_maximum_flow_algorithm(self, algorithm):
self.maximum_flow_algorithm = algorithm(self)
class FlowNetworkAlgorithmExecutor:
def __init__(self, flow_network):
self.flow_network = flow_network
self.verticies_count = flow_network.verticesCount
self.source_index = flow_network.sourceIndex
self.sink_index = flow_network.sinkIndex
# it's just a reference, so you shouldn't change
# it in your algorithms, use deep copy before doing that
self.graph = flow_network.graph
self.executed = False
def execute(self):
if not self.executed:
self._algorithm()
self.executed = True
# You should override it
def _algorithm(self):
pass
class MaximumFlowAlgorithmExecutor(FlowNetworkAlgorithmExecutor):
def __init__(self, flow_network):
super().__init__(flow_network)
# use this to save your result
self.maximum_flow = -1
def get_maximum_flow(self):
if not self.executed:
raise Exception("You should execute algorithm before using its result!")
return self.maximum_flow
class PushRelabelExecutor(MaximumFlowAlgorithmExecutor):
def __init__(self, flow_network):
super().__init__(flow_network)
self.preflow = [[0] * self.verticies_count for i in range(self.verticies_count)]
self.heights = [0] * self.verticies_count
self.excesses = [0] * self.verticies_count
def _algorithm(self):
self.heights[self.source_index] = self.verticies_count
# push some substance to graph
for nextvertex_index, bandwidth in enumerate(self.graph[self.source_index]):
self.preflow[self.source_index][nextvertex_index] += bandwidth
self.preflow[nextvertex_index][self.source_index] -= bandwidth
self.excesses[nextvertex_index] += bandwidth
# Relabel-to-front selection rule
vertices_list = [
i
for i in range(self.verticies_count)
if i not in {self.source_index, self.sink_index}
]
# move through list
i = 0
while i < len(vertices_list):
vertex_index = vertices_list[i]
previous_height = self.heights[vertex_index]
self.process_vertex(vertex_index)
if self.heights[vertex_index] > previous_height:
# if it was relabeled, swap elements
# and start from 0 index
vertices_list.insert(0, vertices_list.pop(i))
i = 0
else:
i += 1
self.maximum_flow = sum(self.preflow[self.source_index])
def process_vertex(self, vertex_index):
while self.excesses[vertex_index] > 0:
for neighbour_index in range(self.verticies_count):
# if it's neighbour and current vertex is higher
if (
self.graph[vertex_index][neighbour_index]
- self.preflow[vertex_index][neighbour_index]
> 0
and self.heights[vertex_index] > self.heights[neighbour_index]
):
self.push(vertex_index, neighbour_index)
self.relabel(vertex_index)
def push(self, from_index, to_index):
preflow_delta = min(
self.excesses[from_index],
self.graph[from_index][to_index] - self.preflow[from_index][to_index],
)
self.preflow[from_index][to_index] += preflow_delta
self.preflow[to_index][from_index] -= preflow_delta
self.excesses[from_index] -= preflow_delta
self.excesses[to_index] += preflow_delta
def relabel(self, vertex_index):
min_height = None
for to_index in range(self.verticies_count):
if (
self.graph[vertex_index][to_index]
- self.preflow[vertex_index][to_index]
> 0
) and (min_height is None or self.heights[to_index] < min_height):
min_height = self.heights[to_index]
if min_height is not None:
self.heights[vertex_index] = min_height + 1
if __name__ == "__main__":
entrances = [0]
exits = [3]
# graph = [
# [0, 0, 4, 6, 0, 0],
# [0, 0, 5, 2, 0, 0],
# [0, 0, 0, 0, 4, 4],
# [0, 0, 0, 0, 6, 6],
# [0, 0, 0, 0, 0, 0],
# [0, 0, 0, 0, 0, 0],
# ]
graph = [[0, 7, 0, 0], [0, 0, 6, 0], [0, 0, 0, 8], [9, 0, 0, 0]]
# prepare our network
flow_network = FlowNetwork(graph, entrances, exits)
# set algorithm
flow_network.set_maximum_flow_algorithm(PushRelabelExecutor)
# and calculate
maximum_flow = flow_network.find_maximum_flow()
print(f"maximum flow is {maximum_flow}")
================================================
FILE: graphs/eulerian_path_and_circuit_for_undirected_graph.py
================================================
# Eulerian Path is a path in graph that visits every edge exactly once.
# Eulerian Circuit is an Eulerian Path which starts and ends on the same
# vertex.
# time complexity is O(V+E)
# space complexity is O(VE)
# using dfs for finding eulerian path traversal
def dfs(u, graph, visited_edge, path=None):
path = (path or []) + [u]
for v in graph[u]:
if visited_edge[u][v] is False:
visited_edge[u][v], visited_edge[v][u] = True, True
path = dfs(v, graph, visited_edge, path)
return path
# for checking in graph has euler path or circuit
def check_circuit_or_path(graph, max_node):
odd_degree_nodes = 0
odd_node = -1
for i in range(max_node):
if i not in graph:
continue
if len(graph[i]) % 2 == 1:
odd_degree_nodes += 1
odd_node = i
if odd_degree_nodes == 0:
return 1, odd_node
if odd_degree_nodes == 2:
return 2, odd_node
return 3, odd_node
def check_euler(graph, max_node):
visited_edge = [[False for _ in range(max_node + 1)] for _ in range(max_node + 1)]
check, odd_node = check_circuit_or_path(graph, max_node)
if check == 3:
print("graph is not Eulerian")
print("no path")
return
start_node = 1
if check == 2:
start_node = odd_node
print("graph has a Euler path")
if check == 1:
print("graph has a Euler cycle")
path = dfs(start_node, graph, visited_edge)
print(path)
def main():
g1 = {1: [2, 3, 4], 2: [1, 3], 3: [1, 2], 4: [1, 5], 5: [4]}
g2 = {1: [2, 3, 4, 5], 2: [1, 3], 3: [1, 2], 4: [1, 5], 5: [1, 4]}
g3 = {1: [2, 3, 4], 2: [1, 3, 4], 3: [1, 2], 4: [1, 2, 5], 5: [4]}
g4 = {1: [2, 3], 2: [1, 3], 3: [1, 2]}
g5 = {
1: [],
2: [],
# all degree is zero
}
max_node = 10
check_euler(g1, max_node)
check_euler(g2, max_node)
check_euler(g3, max_node)
check_euler(g4, max_node)
check_euler(g5, max_node)
if __name__ == "__main__":
main()
================================================
FILE: graphs/even_tree.py
================================================
"""
You are given a tree(a simple connected graph with no cycles). The tree has N
nodes numbered from 1 to N and is rooted at node 1.
Find the maximum number of edges you can remove from the tree to get a forest
such that each connected component of the forest contains an even number of
nodes.
Constraints
2 <= 2 <= 100
Note: The tree input will be such that it can always be decomposed into
components containing an even number of nodes.
"""
# pylint: disable=invalid-name
from collections import defaultdict
def dfs(start: int) -> int:
"""DFS traversal"""
# pylint: disable=redefined-outer-name
ret = 1
visited[start] = True
for v in tree[start]:
if v not in visited:
ret += dfs(v)
if ret % 2 == 0:
cuts.append(start)
return ret
def even_tree():
"""
2 1
3 1
4 3
5 2
6 1
7 2
8 6
9 8
10 8
On removing edges (1,3) and (1,6), we can get the desired result 2.
"""
dfs(1)
if __name__ == "__main__":
n, m = 10, 9
tree = defaultdict(list)
visited: dict[int, bool] = {}
cuts: list[int] = []
count = 0
edges = [(2, 1), (3, 1), (4, 3), (5, 2), (6, 1), (7, 2), (8, 6), (9, 8), (10, 8)]
for u, v in edges:
tree[u].append(v)
tree[v].append(u)
even_tree()
print(len(cuts) - 1)
================================================
FILE: graphs/finding_bridges.py
================================================
"""
An edge is a bridge if, after removing it count of connected components in graph will
be increased by one. Bridges represent vulnerabilities in a connected network and are
useful for designing reliable networks. For example, in a wired computer network, an
articulation point indicates the critical computers and a bridge indicates the critical
wires or connections.
For more details, refer this article:
https://www.geeksforgeeks.org/bridge-in-a-graph/
"""
def __get_demo_graph(index):
return [
{
0: [1, 2],
1: [0, 2],
2: [0, 1, 3, 5],
3: [2, 4],
4: [3],
5: [2, 6, 8],
6: [5, 7],
7: [6, 8],
8: [5, 7],
},
{
0: [6],
1: [9],
2: [4, 5],
3: [4],
4: [2, 3],
5: [2],
6: [0, 7],
7: [6],
8: [],
9: [1],
},
{
0: [4],
1: [6],
2: [],
3: [5, 6, 7],
4: [0, 6],
5: [3, 8, 9],
6: [1, 3, 4, 7],
7: [3, 6, 8, 9],
8: [5, 7],
9: [5, 7],
},
{
0: [1, 3],
1: [0, 2, 4],
2: [1, 3, 4],
3: [0, 2, 4],
4: [1, 2, 3],
},
][index]
def compute_bridges(graph: dict[int, list[int]]) -> list[tuple[int, int]]:
"""
Return the list of undirected graph bridges [(a1, b1), ..., (ak, bk)]; ai <= bi
>>> compute_bridges(__get_demo_graph(0))
[(3, 4), (2, 3), (2, 5)]
>>> compute_bridges(__get_demo_graph(1))
[(6, 7), (0, 6), (1, 9), (3, 4), (2, 4), (2, 5)]
>>> compute_bridges(__get_demo_graph(2))
[(1, 6), (4, 6), (0, 4)]
>>> compute_bridges(__get_demo_graph(3))
[]
>>> compute_bridges({})
[]
"""
id_ = 0
n = len(graph) # No of vertices in graph
low = [0] * n
visited = [False] * n
def dfs(at, parent, bridges, id_):
visited[at] = True
low[at] = id_
id_ += 1
for to in graph[at]:
if to == parent:
pass
elif not visited[to]:
dfs(to, at, bridges, id_)
low[at] = min(low[at], low[to])
if id_ <= low[to]:
bridges.append((at, to) if at < to else (to, at))
else:
# This edge is a back edge and cannot be a bridge
low[at] = min(low[at], low[to])
bridges: list[tuple[int, int]] = []
for i in range(n):
if not visited[i]:
dfs(i, -1, bridges, id_)
return bridges
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/frequent_pattern_graph_miner.py
================================================
"""
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs
A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph).
This graph can be used to efficiently mine frequent subgraphs including maximal
frequent subgraphs and maximum common subgraphs.
URL: https://www.researchgate.net/publication/235255851
"""
# fmt: off
edge_array = [
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4',
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'],
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8',
'ef-e3', 'eg-e2', 'fg-e6'],
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2',
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'],
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'],
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8',
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6']
]
# fmt: on
def get_distinct_edge(edge_array):
"""
Return Distinct edges from edge array of multiple graphs
>>> sorted(get_distinct_edge(edge_array))
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
"""
distinct_edge = set()
for row in edge_array:
for item in row:
distinct_edge.add(item[0])
return list(distinct_edge)
def get_bitcode(edge_array, distinct_edge):
"""
Return bitcode of distinct_edge
"""
bitcode = ["0"] * len(edge_array)
for i, row in enumerate(edge_array):
for item in row:
if distinct_edge in item[0]:
bitcode[i] = "1"
break
return "".join(bitcode)
def get_frequency_table(edge_array):
"""
Returns Frequency Table
"""
distinct_edge = get_distinct_edge(edge_array)
frequency_table = {}
for item in distinct_edge:
bit = get_bitcode(edge_array, item)
# print('bit',bit)
# bt=''.join(bit)
s = bit.count("1")
frequency_table[item] = [s, bit]
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order
sorted_frequency_table = [
[k, v[0], v[1]]
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True)
]
return sorted_frequency_table
def get_nodes(frequency_table):
"""
Returns nodes
format nodes={bitcode:edges that represent the bitcode}
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'],
... ['bd', 5, '11111'], ['bc', 5, '11111']])
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']}
"""
nodes = {}
for _, item in enumerate(frequency_table):
nodes.setdefault(item[2], []).append(item[0])
return nodes
def get_cluster(nodes):
"""
Returns cluster
format cluster:{WT(bitcode):nodes with same WT}
"""
cluster = {}
for key, value in nodes.items():
cluster.setdefault(key.count("1"), {})[key] = value
return cluster
def get_support(cluster):
"""
Returns support
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']},
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']},
... 3: {'11001': ['ad'], '10101': ['dg']},
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'],
... '10001': ['ce']},
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}})
[100.0, 80.0, 60.0, 40.0, 20.0]
"""
return [i * 100 / len(cluster) for i in cluster]
def print_all() -> None:
print("\nNodes\n")
for key, value in nodes.items():
print(key, value)
print("\nSupport\n")
print(support)
print("\n Cluster \n")
for key, value in sorted(cluster.items(), reverse=True):
print(key, value)
print("\n Graph\n")
for key, value in graph.items():
print(key, value)
print("\n Edge List of Frequent subgraphs \n")
for edge_list in freq_subgraph_edge_list:
print(edge_list)
def create_edge(nodes, graph, cluster, c1):
"""
create edge between the nodes
"""
for i in cluster[c1]:
count = 0
c2 = c1 + 1
while c2 < max(cluster.keys()):
for j in cluster[c2]:
"""
creates edge only if the condition satisfies
"""
if int(i, 2) & int(j, 2) == int(i, 2):
if tuple(nodes[i]) in graph:
graph[tuple(nodes[i])].append(nodes[j])
else:
graph[tuple(nodes[i])] = [nodes[j]]
count += 1
if count == 0:
c2 = c2 + 1
else:
break
def construct_graph(cluster, nodes):
x = cluster[max(cluster.keys())]
cluster[max(cluster.keys()) + 1] = "Header"
graph = {}
for i in x:
if (["Header"],) in graph:
graph[(["Header"],)].append(x[i])
else:
graph[(["Header"],)] = [x[i]]
for i in x:
graph[(x[i],)] = [["Header"]]
i = 1
while i < max(cluster) - 1:
create_edge(nodes, graph, cluster, i)
i = i + 1
return graph
def my_dfs(graph, start, end, path=None):
"""
find different DFS walk from given node to Header node
"""
path = (path or []) + [start]
if start == end:
paths.append(path)
for node in graph[start]:
if tuple(node) not in path:
my_dfs(graph, tuple(node), end, path)
def find_freq_subgraph_given_support(s, cluster, graph):
"""
find edges of multiple frequent subgraphs
"""
k = int(s / 100 * (len(cluster) - 1))
for i in cluster[k]:
my_dfs(graph, tuple(cluster[k][i]), (["Header"],))
def freq_subgraphs_edge_list(paths):
"""
returns Edge list for frequent subgraphs
"""
freq_sub_el = []
for edges in paths:
el = []
for j in range(len(edges) - 1):
temp = list(edges[j])
for e in temp:
edge = (e[0], e[1])
el.append(edge)
freq_sub_el.append(el)
return freq_sub_el
def preprocess(edge_array):
"""
Preprocess the edge array
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12',
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3',
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']])
"""
for i in range(len(edge_array)):
for j in range(len(edge_array[i])):
t = edge_array[i][j].split("-")
edge_array[i][j] = t
if __name__ == "__main__":
preprocess(edge_array)
frequency_table = get_frequency_table(edge_array)
nodes = get_nodes(frequency_table)
cluster = get_cluster(nodes)
support = get_support(cluster)
graph = construct_graph(cluster, nodes)
find_freq_subgraph_given_support(60, cluster, graph)
paths: list = []
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths)
print_all()
================================================
FILE: graphs/g_topological_sort.py
================================================
# Author: Phyllipe Bezerra (https://github.com/pmba)
clothes = {
0: "underwear",
1: "pants",
2: "belt",
3: "suit",
4: "shoe",
5: "socks",
6: "shirt",
7: "tie",
8: "watch",
}
graph = [[1, 4], [2, 4], [3], [], [], [4], [2, 7], [3], []]
visited = [0 for x in range(len(graph))]
stack = []
def print_stack(stack, clothes):
order = 1
while stack:
current_clothing = stack.pop()
print(order, clothes[current_clothing])
order += 1
def depth_first_search(u, visited, graph):
visited[u] = 1
for v in graph[u]:
if not visited[v]:
depth_first_search(v, visited, graph)
stack.append(u)
def topological_sort(graph, visited):
for v in range(len(graph)):
if not visited[v]:
depth_first_search(v, visited, graph)
if __name__ == "__main__":
topological_sort(graph, visited)
print(stack)
print_stack(stack, clothes)
================================================
FILE: graphs/gale_shapley_bigraph.py
================================================
from __future__ import annotations
def stable_matching(
donor_pref: list[list[int]], recipient_pref: list[list[int]]
) -> list[int]:
"""
Finds the stable match in any bipartite graph, i.e a pairing where no 2 objects
prefer each other over their partner. The function accepts the preferences of
oegan donors and recipients (where both are assigned numbers from 0 to n-1) and
returns a list where the index position corresponds to the donor and value at the
index is the organ recipient.
To better understand the algorithm, see also:
https://github.com/akashvshroff/Gale_Shapley_Stable_Matching (README).
https://www.youtube.com/watch?v=Qcv1IqHWAzg&t=13s (Numberphile YouTube).
>>> donor_pref = [[0, 1, 3, 2], [0, 2, 3, 1], [1, 0, 2, 3], [0, 3, 1, 2]]
>>> recipient_pref = [[3, 1, 2, 0], [3, 1, 0, 2], [0, 3, 1, 2], [1, 0, 3, 2]]
>>> stable_matching(donor_pref, recipient_pref)
[1, 2, 3, 0]
"""
assert len(donor_pref) == len(recipient_pref)
n = len(donor_pref)
unmatched_donors = list(range(n))
donor_record = [-1] * n # who the donor has donated to
rec_record = [-1] * n # who the recipient has received from
num_donations = [0] * n
while unmatched_donors:
donor = unmatched_donors[0]
donor_preference = donor_pref[donor]
recipient = donor_preference[num_donations[donor]]
num_donations[donor] += 1
rec_preference = recipient_pref[recipient]
prev_donor = rec_record[recipient]
if prev_donor != -1:
if rec_preference.index(prev_donor) > rec_preference.index(donor):
rec_record[recipient] = donor
donor_record[donor] = recipient
unmatched_donors.append(prev_donor)
unmatched_donors.remove(donor)
else:
rec_record[recipient] = donor
donor_record[donor] = recipient
unmatched_donors.remove(donor)
return donor_record
================================================
FILE: graphs/graph_adjacency_list.py
================================================
#!/usr/bin/env python3
"""
Author: Vikram Nithyanandam
Description:
The following implementation is a robust unweighted Graph data structure
implemented using an adjacency list. This vertices and edges of this graph can be
effectively initialized and modified while storing your chosen generic
value in each vertex.
Adjacency List: https://en.wikipedia.org/wiki/Adjacency_list
Potential Future Ideas:
- Add a flag to set edge weights on and set edge weights
- Make edge weights and vertex values customizable to store whatever the client wants
- Support multigraph functionality if the client wants it
"""
from __future__ import annotations
import random
import unittest
from pprint import pformat
from typing import TypeVar
import pytest
T = TypeVar("T")
class GraphAdjacencyList[T]:
def __init__(
self, vertices: list[T], edges: list[list[T]], directed: bool = True
) -> None:
"""
Parameters:
- vertices: (list[T]) The list of vertex names the client wants to
pass in. Default is empty.
- edges: (list[list[T]]) The list of edges the client wants to
pass in. Each edge is a 2-element list. Default is empty.
- directed: (bool) Indicates if graph is directed or undirected.
Default is True.
"""
self.adj_list: dict[T, list[T]] = {} # dictionary of lists of T
self.directed = directed
# Falsey checks
edges = edges or []
vertices = vertices or []
for vertex in vertices:
self.add_vertex(vertex)
for edge in edges:
if len(edge) != 2:
msg = f"Invalid input: {edge} is the wrong length."
raise ValueError(msg)
self.add_edge(edge[0], edge[1])
def add_vertex(self, vertex: T) -> None:
"""
Adds a vertex to the graph. If the given vertex already exists,
a ValueError will be thrown.
>>> g = GraphAdjacencyList(vertices=[], edges=[], directed=False)
>>> g.add_vertex("A")
>>> g.adj_list
{'A': []}
>>> g.add_vertex("A")
Traceback (most recent call last):
...
ValueError: Incorrect input: A is already in the graph.
"""
if self.contains_vertex(vertex):
msg = f"Incorrect input: {vertex} is already in the graph."
raise ValueError(msg)
self.adj_list[vertex] = []
def add_edge(self, source_vertex: T, destination_vertex: T) -> None:
"""
Creates an edge from source vertex to destination vertex. If any
given vertex doesn't exist or the edge already exists, a ValueError
will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} or "
f"{destination_vertex} does not exist"
)
raise ValueError(msg)
if self.contains_edge(source_vertex, destination_vertex):
msg = (
"Incorrect input: The edge already exists between "
f"{source_vertex} and {destination_vertex}"
)
raise ValueError(msg)
# add the destination vertex to the list associated with the source vertex
# and vice versa if not directed
self.adj_list[source_vertex].append(destination_vertex)
if not self.directed:
self.adj_list[destination_vertex].append(source_vertex)
def remove_vertex(self, vertex: T) -> None:
"""
Removes the given vertex from the graph and deletes all incoming and
outgoing edges from the given vertex as well. If the given vertex
does not exist, a ValueError will be thrown.
"""
if not self.contains_vertex(vertex):
msg = f"Incorrect input: {vertex} does not exist in this graph."
raise ValueError(msg)
if not self.directed:
# If not directed, find all neighboring vertices and delete all references
# of edges connecting to the given vertex
for neighbor in self.adj_list[vertex]:
self.adj_list[neighbor].remove(vertex)
else:
# If directed, search all neighbors of all vertices and delete all
# references of edges connecting to the given vertex
for edge_list in self.adj_list.values():
if vertex in edge_list:
edge_list.remove(vertex)
# Finally, delete the given vertex and all of its outgoing edge references
self.adj_list.pop(vertex)
def remove_edge(self, source_vertex: T, destination_vertex: T) -> None:
"""
Removes the edge between the two vertices. If any given vertex
doesn't exist or the edge does not exist, a ValueError will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} or "
f"{destination_vertex} does not exist"
)
raise ValueError(msg)
if not self.contains_edge(source_vertex, destination_vertex):
msg = (
"Incorrect input: The edge does NOT exist between "
f"{source_vertex} and {destination_vertex}"
)
raise ValueError(msg)
# remove the destination vertex from the list associated with the source
# vertex and vice versa if not directed
self.adj_list[source_vertex].remove(destination_vertex)
if not self.directed:
self.adj_list[destination_vertex].remove(source_vertex)
def contains_vertex(self, vertex: T) -> bool:
"""
Returns True if the graph contains the vertex, False otherwise.
"""
return vertex in self.adj_list
def contains_edge(self, source_vertex: T, destination_vertex: T) -> bool:
"""
Returns True if the graph contains the edge from the source_vertex to the
destination_vertex, False otherwise. If any given vertex doesn't exist, a
ValueError will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} "
f"or {destination_vertex} does not exist."
)
raise ValueError(msg)
return destination_vertex in self.adj_list[source_vertex]
def clear_graph(self) -> None:
"""
Clears all vertices and edges.
"""
self.adj_list = {}
def __repr__(self) -> str:
return pformat(self.adj_list)
class TestGraphAdjacencyList(unittest.TestCase):
def __assert_graph_edge_exists_check(
self,
undirected_graph: GraphAdjacencyList,
directed_graph: GraphAdjacencyList,
edge: list[int],
) -> None:
assert undirected_graph.contains_edge(edge[0], edge[1])
assert undirected_graph.contains_edge(edge[1], edge[0])
assert directed_graph.contains_edge(edge[0], edge[1])
def __assert_graph_edge_does_not_exist_check(
self,
undirected_graph: GraphAdjacencyList,
directed_graph: GraphAdjacencyList,
edge: list[int],
) -> None:
assert not undirected_graph.contains_edge(edge[0], edge[1])
assert not undirected_graph.contains_edge(edge[1], edge[0])
assert not directed_graph.contains_edge(edge[0], edge[1])
def __assert_graph_vertex_exists_check(
self,
undirected_graph: GraphAdjacencyList,
directed_graph: GraphAdjacencyList,
vertex: int,
) -> None:
assert undirected_graph.contains_vertex(vertex)
assert directed_graph.contains_vertex(vertex)
def __assert_graph_vertex_does_not_exist_check(
self,
undirected_graph: GraphAdjacencyList,
directed_graph: GraphAdjacencyList,
vertex: int,
) -> None:
assert not undirected_graph.contains_vertex(vertex)
assert not directed_graph.contains_vertex(vertex)
def __generate_random_edges(
self, vertices: list[int], edge_pick_count: int
) -> list[list[int]]:
assert edge_pick_count <= len(vertices)
random_source_vertices: list[int] = random.sample(
vertices[0 : int(len(vertices) / 2)], edge_pick_count
)
random_destination_vertices: list[int] = random.sample(
vertices[int(len(vertices) / 2) :], edge_pick_count
)
random_edges: list[list[int]] = []
for source in random_source_vertices:
for dest in random_destination_vertices:
random_edges.append([source, dest])
return random_edges
def __generate_graphs(
self, vertex_count: int, min_val: int, max_val: int, edge_pick_count: int
) -> tuple[GraphAdjacencyList, GraphAdjacencyList, list[int], list[list[int]]]:
if max_val - min_val + 1 < vertex_count:
raise ValueError(
"Will result in duplicate vertices. Either increase range "
"between min_val and max_val or decrease vertex count."
)
# generate graph input
random_vertices: list[int] = random.sample(
range(min_val, max_val + 1), vertex_count
)
random_edges: list[list[int]] = self.__generate_random_edges(
random_vertices, edge_pick_count
)
# build graphs
undirected_graph = GraphAdjacencyList(
vertices=random_vertices, edges=random_edges, directed=False
)
directed_graph = GraphAdjacencyList(
vertices=random_vertices, edges=random_edges, directed=True
)
return undirected_graph, directed_graph, random_vertices, random_edges
def test_init_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# test graph initialization with vertices and edges
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
for edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
assert not undirected_graph.directed
assert directed_graph.directed
def test_contains_vertex(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# Build graphs WITHOUT edges
undirected_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=True
)
# Test contains_vertex
for num in range(101):
assert (num in random_vertices) == undirected_graph.contains_vertex(num)
assert (num in random_vertices) == directed_graph.contains_vertex(num)
def test_add_vertices(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# build empty graphs
undirected_graph: GraphAdjacencyList = GraphAdjacencyList(
vertices=[], edges=[], directed=False
)
directed_graph: GraphAdjacencyList = GraphAdjacencyList(
vertices=[], edges=[], directed=True
)
# run add_vertex
for num in random_vertices:
undirected_graph.add_vertex(num)
for num in random_vertices:
directed_graph.add_vertex(num)
# test add_vertex worked
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
def test_remove_vertices(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=True
)
# test remove_vertex worked
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
undirected_graph.remove_vertex(num)
directed_graph.remove_vertex(num)
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, num
)
def test_add_and_remove_vertices_repeatedly(self) -> None:
random_vertices1: list[int] = random.sample(range(51), 20)
random_vertices2: list[int] = random.sample(range(51, 101), 20)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyList(
vertices=random_vertices1, edges=[], directed=False
)
directed_graph = GraphAdjacencyList(
vertices=random_vertices1, edges=[], directed=True
)
# test adding and removing vertices
for i, _ in enumerate(random_vertices1):
undirected_graph.add_vertex(random_vertices2[i])
directed_graph.add_vertex(random_vertices2[i])
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, random_vertices2[i]
)
undirected_graph.remove_vertex(random_vertices1[i])
directed_graph.remove_vertex(random_vertices1[i])
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, random_vertices1[i]
)
# remove all vertices
for i, _ in enumerate(random_vertices1):
undirected_graph.remove_vertex(random_vertices2[i])
directed_graph.remove_vertex(random_vertices2[i])
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, random_vertices2[i]
)
def test_contains_edge(self) -> None:
# generate graphs and graph input
vertex_count = 20
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(vertex_count, 0, 100, 4)
# generate all possible edges for testing
all_possible_edges: list[list[int]] = []
for i in range(vertex_count - 1):
for j in range(i + 1, vertex_count):
all_possible_edges.append([random_vertices[i], random_vertices[j]])
all_possible_edges.append([random_vertices[j], random_vertices[i]])
# test contains_edge function
for edge in all_possible_edges:
if edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
elif [edge[1], edge[0]] in random_edges:
# since this edge exists for undirected but the reverse
# may not exist for directed
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, [edge[1], edge[0]]
)
else:
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, edge
)
def test_add_edge(self) -> None:
# generate graph input
random_vertices: list[int] = random.sample(range(101), 15)
random_edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyList(
vertices=random_vertices, edges=[], directed=True
)
# run and test add_edge
for edge in random_edges:
undirected_graph.add_edge(edge[0], edge[1])
directed_graph.add_edge(edge[0], edge[1])
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
def test_remove_edge(self) -> None:
# generate graph input and graphs
(
undirected_graph,
directed_graph,
_random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# run and test remove_edge
for edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
undirected_graph.remove_edge(edge[0], edge[1])
directed_graph.remove_edge(edge[0], edge[1])
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, edge
)
def test_add_and_remove_edges_repeatedly(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# make some more edge options!
more_random_edges: list[list[int]] = []
while len(more_random_edges) != len(random_edges):
edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
for edge in edges:
if len(more_random_edges) == len(random_edges):
break
elif edge not in more_random_edges and edge not in random_edges:
more_random_edges.append(edge)
for i, _ in enumerate(random_edges):
undirected_graph.add_edge(more_random_edges[i][0], more_random_edges[i][1])
directed_graph.add_edge(more_random_edges[i][0], more_random_edges[i][1])
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, more_random_edges[i]
)
undirected_graph.remove_edge(random_edges[i][0], random_edges[i][1])
directed_graph.remove_edge(random_edges[i][0], random_edges[i][1])
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, random_edges[i]
)
def test_add_vertex_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
with pytest.raises(ValueError):
undirected_graph.add_vertex(vertex)
with pytest.raises(ValueError):
directed_graph.add_vertex(vertex)
def test_remove_vertex_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for i in range(101):
if i not in random_vertices:
with pytest.raises(ValueError):
undirected_graph.remove_vertex(i)
with pytest.raises(ValueError):
directed_graph.remove_vertex(i)
def test_add_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
_random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for edge in random_edges:
with pytest.raises(ValueError):
undirected_graph.add_edge(edge[0], edge[1])
with pytest.raises(ValueError):
directed_graph.add_edge(edge[0], edge[1])
def test_remove_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
more_random_edges: list[list[int]] = []
while len(more_random_edges) != len(random_edges):
edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
for edge in edges:
if len(more_random_edges) == len(random_edges):
break
elif edge not in more_random_edges and edge not in random_edges:
more_random_edges.append(edge)
for edge in more_random_edges:
with pytest.raises(ValueError):
undirected_graph.remove_edge(edge[0], edge[1])
with pytest.raises(ValueError):
directed_graph.remove_edge(edge[0], edge[1])
def test_contains_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
with pytest.raises(ValueError):
undirected_graph.contains_edge(vertex, 102)
with pytest.raises(ValueError):
directed_graph.contains_edge(vertex, 102)
with pytest.raises(ValueError):
undirected_graph.contains_edge(103, 102)
with pytest.raises(ValueError):
directed_graph.contains_edge(103, 102)
if __name__ == "__main__":
unittest.main()
================================================
FILE: graphs/graph_adjacency_matrix.py
================================================
#!/usr/bin/env python3
"""
Author: Vikram Nithyanandam
Description:
The following implementation is a robust unweighted Graph data structure
implemented using an adjacency matrix. This vertices and edges of this graph can be
effectively initialized and modified while storing your chosen generic
value in each vertex.
Adjacency Matrix: https://mathworld.wolfram.com/AdjacencyMatrix.html
Potential Future Ideas:
- Add a flag to set edge weights on and set edge weights
- Make edge weights and vertex values customizable to store whatever the client wants
- Support multigraph functionality if the client wants it
"""
from __future__ import annotations
import random
import unittest
from pprint import pformat
from typing import TypeVar
import pytest
T = TypeVar("T")
class GraphAdjacencyMatrix[T]:
def __init__(
self, vertices: list[T], edges: list[list[T]], directed: bool = True
) -> None:
"""
Parameters:
- vertices: (list[T]) The list of vertex names the client wants to
pass in. Default is empty.
- edges: (list[list[T]]) The list of edges the client wants to
pass in. Each edge is a 2-element list. Default is empty.
- directed: (bool) Indicates if graph is directed or undirected.
Default is True.
"""
self.directed = directed
self.vertex_to_index: dict[T, int] = {}
self.adj_matrix: list[list[int]] = []
# Falsey checks
edges = edges or []
vertices = vertices or []
for vertex in vertices:
self.add_vertex(vertex)
for edge in edges:
if len(edge) != 2:
msg = f"Invalid input: {edge} must have length 2."
raise ValueError(msg)
self.add_edge(edge[0], edge[1])
def add_edge(self, source_vertex: T, destination_vertex: T) -> None:
"""
Creates an edge from source vertex to destination vertex. If any
given vertex doesn't exist or the edge already exists, a ValueError
will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} or "
f"{destination_vertex} does not exist"
)
raise ValueError(msg)
if self.contains_edge(source_vertex, destination_vertex):
msg = (
"Incorrect input: The edge already exists between "
f"{source_vertex} and {destination_vertex}"
)
raise ValueError(msg)
# Get the indices of the corresponding vertices and set their edge value to 1.
u: int = self.vertex_to_index[source_vertex]
v: int = self.vertex_to_index[destination_vertex]
self.adj_matrix[u][v] = 1
if not self.directed:
self.adj_matrix[v][u] = 1
def remove_edge(self, source_vertex: T, destination_vertex: T) -> None:
"""
Removes the edge between the two vertices. If any given vertex
doesn't exist or the edge does not exist, a ValueError will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} or "
f"{destination_vertex} does not exist"
)
raise ValueError(msg)
if not self.contains_edge(source_vertex, destination_vertex):
msg = (
"Incorrect input: The edge does NOT exist between "
f"{source_vertex} and {destination_vertex}"
)
raise ValueError(msg)
# Get the indices of the corresponding vertices and set their edge value to 0.
u: int = self.vertex_to_index[source_vertex]
v: int = self.vertex_to_index[destination_vertex]
self.adj_matrix[u][v] = 0
if not self.directed:
self.adj_matrix[v][u] = 0
def add_vertex(self, vertex: T) -> None:
"""
Adds a vertex to the graph. If the given vertex already exists,
a ValueError will be thrown.
"""
if self.contains_vertex(vertex):
msg = f"Incorrect input: {vertex} already exists in this graph."
raise ValueError(msg)
# build column for vertex
for row in self.adj_matrix:
row.append(0)
# build row for vertex and update other data structures
self.adj_matrix.append([0] * (len(self.adj_matrix) + 1))
self.vertex_to_index[vertex] = len(self.adj_matrix) - 1
def remove_vertex(self, vertex: T) -> None:
"""
Removes the given vertex from the graph and deletes all incoming and
outgoing edges from the given vertex as well. If the given vertex
does not exist, a ValueError will be thrown.
"""
if not self.contains_vertex(vertex):
msg = f"Incorrect input: {vertex} does not exist in this graph."
raise ValueError(msg)
# first slide up the rows by deleting the row corresponding to
# the vertex being deleted.
start_index = self.vertex_to_index[vertex]
self.adj_matrix.pop(start_index)
# next, slide the columns to the left by deleting the values in
# the column corresponding to the vertex being deleted
for lst in self.adj_matrix:
lst.pop(start_index)
# final clean up
self.vertex_to_index.pop(vertex)
# decrement indices for vertices shifted by the deleted vertex in the adj matrix
for inner_vertex in self.vertex_to_index:
if self.vertex_to_index[inner_vertex] >= start_index:
self.vertex_to_index[inner_vertex] = (
self.vertex_to_index[inner_vertex] - 1
)
def contains_vertex(self, vertex: T) -> bool:
"""
Returns True if the graph contains the vertex, False otherwise.
"""
return vertex in self.vertex_to_index
def contains_edge(self, source_vertex: T, destination_vertex: T) -> bool:
"""
Returns True if the graph contains the edge from the source_vertex to the
destination_vertex, False otherwise. If any given vertex doesn't exist, a
ValueError will be thrown.
"""
if not (
self.contains_vertex(source_vertex)
and self.contains_vertex(destination_vertex)
):
msg = (
f"Incorrect input: Either {source_vertex} "
f"or {destination_vertex} does not exist."
)
raise ValueError(msg)
u = self.vertex_to_index[source_vertex]
v = self.vertex_to_index[destination_vertex]
return self.adj_matrix[u][v] == 1
def clear_graph(self) -> None:
"""
Clears all vertices and edges.
"""
self.vertex_to_index = {}
self.adj_matrix = []
def __repr__(self) -> str:
first = "Adj Matrix:\n" + pformat(self.adj_matrix)
second = "\nVertex to index mapping:\n" + pformat(self.vertex_to_index)
return first + second
class TestGraphMatrix(unittest.TestCase):
def __assert_graph_edge_exists_check(
self,
undirected_graph: GraphAdjacencyMatrix,
directed_graph: GraphAdjacencyMatrix,
edge: list[int],
) -> None:
assert undirected_graph.contains_edge(edge[0], edge[1])
assert undirected_graph.contains_edge(edge[1], edge[0])
assert directed_graph.contains_edge(edge[0], edge[1])
def __assert_graph_edge_does_not_exist_check(
self,
undirected_graph: GraphAdjacencyMatrix,
directed_graph: GraphAdjacencyMatrix,
edge: list[int],
) -> None:
assert not undirected_graph.contains_edge(edge[0], edge[1])
assert not undirected_graph.contains_edge(edge[1], edge[0])
assert not directed_graph.contains_edge(edge[0], edge[1])
def __assert_graph_vertex_exists_check(
self,
undirected_graph: GraphAdjacencyMatrix,
directed_graph: GraphAdjacencyMatrix,
vertex: int,
) -> None:
assert undirected_graph.contains_vertex(vertex)
assert directed_graph.contains_vertex(vertex)
def __assert_graph_vertex_does_not_exist_check(
self,
undirected_graph: GraphAdjacencyMatrix,
directed_graph: GraphAdjacencyMatrix,
vertex: int,
) -> None:
assert not undirected_graph.contains_vertex(vertex)
assert not directed_graph.contains_vertex(vertex)
def __generate_random_edges(
self, vertices: list[int], edge_pick_count: int
) -> list[list[int]]:
assert edge_pick_count <= len(vertices)
random_source_vertices: list[int] = random.sample(
vertices[0 : int(len(vertices) / 2)], edge_pick_count
)
random_destination_vertices: list[int] = random.sample(
vertices[int(len(vertices) / 2) :], edge_pick_count
)
random_edges: list[list[int]] = []
for source in random_source_vertices:
for dest in random_destination_vertices:
random_edges.append([source, dest])
return random_edges
def __generate_graphs(
self, vertex_count: int, min_val: int, max_val: int, edge_pick_count: int
) -> tuple[GraphAdjacencyMatrix, GraphAdjacencyMatrix, list[int], list[list[int]]]:
if max_val - min_val + 1 < vertex_count:
raise ValueError(
"Will result in duplicate vertices. Either increase "
"range between min_val and max_val or decrease vertex count"
)
# generate graph input
random_vertices: list[int] = random.sample(
range(min_val, max_val + 1), vertex_count
)
random_edges: list[list[int]] = self.__generate_random_edges(
random_vertices, edge_pick_count
)
# build graphs
undirected_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=random_edges, directed=False
)
directed_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=random_edges, directed=True
)
return undirected_graph, directed_graph, random_vertices, random_edges
def test_init_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# test graph initialization with vertices and edges
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
for edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
assert not undirected_graph.directed
assert directed_graph.directed
def test_contains_vertex(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# Build graphs WITHOUT edges
undirected_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=True
)
# Test contains_vertex
for num in range(101):
assert (num in random_vertices) == undirected_graph.contains_vertex(num)
assert (num in random_vertices) == directed_graph.contains_vertex(num)
def test_add_vertices(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# build empty graphs
undirected_graph: GraphAdjacencyMatrix = GraphAdjacencyMatrix(
vertices=[], edges=[], directed=False
)
directed_graph: GraphAdjacencyMatrix = GraphAdjacencyMatrix(
vertices=[], edges=[], directed=True
)
# run add_vertex
for num in random_vertices:
undirected_graph.add_vertex(num)
for num in random_vertices:
directed_graph.add_vertex(num)
# test add_vertex worked
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
def test_remove_vertices(self) -> None:
random_vertices: list[int] = random.sample(range(101), 20)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=True
)
# test remove_vertex worked
for num in random_vertices:
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, num
)
undirected_graph.remove_vertex(num)
directed_graph.remove_vertex(num)
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, num
)
def test_add_and_remove_vertices_repeatedly(self) -> None:
random_vertices1: list[int] = random.sample(range(51), 20)
random_vertices2: list[int] = random.sample(range(51, 101), 20)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyMatrix(
vertices=random_vertices1, edges=[], directed=False
)
directed_graph = GraphAdjacencyMatrix(
vertices=random_vertices1, edges=[], directed=True
)
# test adding and removing vertices
for i, _ in enumerate(random_vertices1):
undirected_graph.add_vertex(random_vertices2[i])
directed_graph.add_vertex(random_vertices2[i])
self.__assert_graph_vertex_exists_check(
undirected_graph, directed_graph, random_vertices2[i]
)
undirected_graph.remove_vertex(random_vertices1[i])
directed_graph.remove_vertex(random_vertices1[i])
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, random_vertices1[i]
)
# remove all vertices
for i, _ in enumerate(random_vertices1):
undirected_graph.remove_vertex(random_vertices2[i])
directed_graph.remove_vertex(random_vertices2[i])
self.__assert_graph_vertex_does_not_exist_check(
undirected_graph, directed_graph, random_vertices2[i]
)
def test_contains_edge(self) -> None:
# generate graphs and graph input
vertex_count = 20
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(vertex_count, 0, 100, 4)
# generate all possible edges for testing
all_possible_edges: list[list[int]] = []
for i in range(vertex_count - 1):
for j in range(i + 1, vertex_count):
all_possible_edges.append([random_vertices[i], random_vertices[j]])
all_possible_edges.append([random_vertices[j], random_vertices[i]])
# test contains_edge function
for edge in all_possible_edges:
if edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
elif [edge[1], edge[0]] in random_edges:
# since this edge exists for undirected but the reverse may
# not exist for directed
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, [edge[1], edge[0]]
)
else:
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, edge
)
def test_add_edge(self) -> None:
# generate graph input
random_vertices: list[int] = random.sample(range(101), 15)
random_edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
# build graphs WITHOUT edges
undirected_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=False
)
directed_graph = GraphAdjacencyMatrix(
vertices=random_vertices, edges=[], directed=True
)
# run and test add_edge
for edge in random_edges:
undirected_graph.add_edge(edge[0], edge[1])
directed_graph.add_edge(edge[0], edge[1])
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
def test_remove_edge(self) -> None:
# generate graph input and graphs
(
undirected_graph,
directed_graph,
_random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# run and test remove_edge
for edge in random_edges:
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, edge
)
undirected_graph.remove_edge(edge[0], edge[1])
directed_graph.remove_edge(edge[0], edge[1])
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, edge
)
def test_add_and_remove_edges_repeatedly(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
# make some more edge options!
more_random_edges: list[list[int]] = []
while len(more_random_edges) != len(random_edges):
edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
for edge in edges:
if len(more_random_edges) == len(random_edges):
break
elif edge not in more_random_edges and edge not in random_edges:
more_random_edges.append(edge)
for i, _ in enumerate(random_edges):
undirected_graph.add_edge(more_random_edges[i][0], more_random_edges[i][1])
directed_graph.add_edge(more_random_edges[i][0], more_random_edges[i][1])
self.__assert_graph_edge_exists_check(
undirected_graph, directed_graph, more_random_edges[i]
)
undirected_graph.remove_edge(random_edges[i][0], random_edges[i][1])
directed_graph.remove_edge(random_edges[i][0], random_edges[i][1])
self.__assert_graph_edge_does_not_exist_check(
undirected_graph, directed_graph, random_edges[i]
)
def test_add_vertex_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
with pytest.raises(ValueError):
undirected_graph.add_vertex(vertex)
with pytest.raises(ValueError):
directed_graph.add_vertex(vertex)
def test_remove_vertex_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for i in range(101):
if i not in random_vertices:
with pytest.raises(ValueError):
undirected_graph.remove_vertex(i)
with pytest.raises(ValueError):
directed_graph.remove_vertex(i)
def test_add_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
_random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for edge in random_edges:
with pytest.raises(ValueError):
undirected_graph.add_edge(edge[0], edge[1])
with pytest.raises(ValueError):
directed_graph.add_edge(edge[0], edge[1])
def test_remove_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
more_random_edges: list[list[int]] = []
while len(more_random_edges) != len(random_edges):
edges: list[list[int]] = self.__generate_random_edges(random_vertices, 4)
for edge in edges:
if len(more_random_edges) == len(random_edges):
break
elif edge not in more_random_edges and edge not in random_edges:
more_random_edges.append(edge)
for edge in more_random_edges:
with pytest.raises(ValueError):
undirected_graph.remove_edge(edge[0], edge[1])
with pytest.raises(ValueError):
directed_graph.remove_edge(edge[0], edge[1])
def test_contains_edge_exception_check(self) -> None:
(
undirected_graph,
directed_graph,
random_vertices,
_random_edges,
) = self.__generate_graphs(20, 0, 100, 4)
for vertex in random_vertices:
with pytest.raises(ValueError):
undirected_graph.contains_edge(vertex, 102)
with pytest.raises(ValueError):
directed_graph.contains_edge(vertex, 102)
with pytest.raises(ValueError):
undirected_graph.contains_edge(103, 102)
with pytest.raises(ValueError):
directed_graph.contains_edge(103, 102)
if __name__ == "__main__":
unittest.main()
================================================
FILE: graphs/graph_list.py
================================================
#!/usr/bin/env python3
# Author: OMKAR PATHAK, Nwachukwu Chidiebere
# Use a Python dictionary to construct the graph.
from __future__ import annotations
from pprint import pformat
from typing import TypeVar
T = TypeVar("T")
class GraphAdjacencyList[T]:
"""
Adjacency List type Graph Data Structure that accounts for directed and undirected
Graphs. Initialize graph object indicating whether it's directed or undirected.
Directed graph example:
>>> d_graph = GraphAdjacencyList()
>>> print(d_graph)
{}
>>> d_graph.add_edge(0, 1)
{0: [1], 1: []}
>>> d_graph.add_edge(1, 2).add_edge(1, 4).add_edge(1, 5)
{0: [1], 1: [2, 4, 5], 2: [], 4: [], 5: []}
>>> d_graph.add_edge(2, 0).add_edge(2, 6).add_edge(2, 7)
{0: [1], 1: [2, 4, 5], 2: [0, 6, 7], 4: [], 5: [], 6: [], 7: []}
>>> d_graph
{0: [1], 1: [2, 4, 5], 2: [0, 6, 7], 4: [], 5: [], 6: [], 7: []}
>>> print(repr(d_graph))
{0: [1], 1: [2, 4, 5], 2: [0, 6, 7], 4: [], 5: [], 6: [], 7: []}
Undirected graph example:
>>> u_graph = GraphAdjacencyList(directed=False)
>>> u_graph.add_edge(0, 1)
{0: [1], 1: [0]}
>>> u_graph.add_edge(1, 2).add_edge(1, 4).add_edge(1, 5)
{0: [1], 1: [0, 2, 4, 5], 2: [1], 4: [1], 5: [1]}
>>> u_graph.add_edge(2, 0).add_edge(2, 6).add_edge(2, 7)
{0: [1, 2], 1: [0, 2, 4, 5], 2: [1, 0, 6, 7], 4: [1], 5: [1], 6: [2], 7: [2]}
>>> u_graph.add_edge(4, 5)
{0: [1, 2],
1: [0, 2, 4, 5],
2: [1, 0, 6, 7],
4: [1, 5],
5: [1, 4],
6: [2],
7: [2]}
>>> print(u_graph)
{0: [1, 2],
1: [0, 2, 4, 5],
2: [1, 0, 6, 7],
4: [1, 5],
5: [1, 4],
6: [2],
7: [2]}
>>> print(repr(u_graph))
{0: [1, 2],
1: [0, 2, 4, 5],
2: [1, 0, 6, 7],
4: [1, 5],
5: [1, 4],
6: [2],
7: [2]}
>>> char_graph = GraphAdjacencyList(directed=False)
>>> char_graph.add_edge('a', 'b')
{'a': ['b'], 'b': ['a']}
>>> char_graph.add_edge('b', 'c').add_edge('b', 'e').add_edge('b', 'f')
{'a': ['b'], 'b': ['a', 'c', 'e', 'f'], 'c': ['b'], 'e': ['b'], 'f': ['b']}
>>> char_graph
{'a': ['b'], 'b': ['a', 'c', 'e', 'f'], 'c': ['b'], 'e': ['b'], 'f': ['b']}
"""
def __init__(self, directed: bool = True) -> None:
"""
Parameters:
directed: (bool) Indicates if graph is directed or undirected. Default is True.
"""
self.adj_list: dict[T, list[T]] = {} # dictionary of lists
self.directed = directed
def add_edge(
self, source_vertex: T, destination_vertex: T
) -> GraphAdjacencyList[T]:
"""
Connects vertices together. Creates and Edge from source vertex to destination
vertex.
Vertices will be created if not found in graph
"""
if not self.directed: # For undirected graphs
# if both source vertex and destination vertex are both present in the
# adjacency list, add destination vertex to source vertex list of adjacent
# vertices and add source vertex to destination vertex list of adjacent
# vertices.
if source_vertex in self.adj_list and destination_vertex in self.adj_list:
self.adj_list[source_vertex].append(destination_vertex)
self.adj_list[destination_vertex].append(source_vertex)
# if only source vertex is present in adjacency list, add destination vertex
# to source vertex list of adjacent vertices, then create a new vertex with
# destination vertex as key and assign a list containing the source vertex
# as it's first adjacent vertex.
elif source_vertex in self.adj_list:
self.adj_list[source_vertex].append(destination_vertex)
self.adj_list[destination_vertex] = [source_vertex]
# if only destination vertex is present in adjacency list, add source vertex
# to destination vertex list of adjacent vertices, then create a new vertex
# with source vertex as key and assign a list containing the source vertex
# as it's first adjacent vertex.
elif destination_vertex in self.adj_list:
self.adj_list[destination_vertex].append(source_vertex)
self.adj_list[source_vertex] = [destination_vertex]
# if both source vertex and destination vertex are not present in adjacency
# list, create a new vertex with source vertex as key and assign a list
# containing the destination vertex as it's first adjacent vertex also
# create a new vertex with destination vertex as key and assign a list
# containing the source vertex as it's first adjacent vertex.
else:
self.adj_list[source_vertex] = [destination_vertex]
self.adj_list[destination_vertex] = [source_vertex]
# For directed graphs
# if both source vertex and destination vertex are present in adjacency
# list, add destination vertex to source vertex list of adjacent vertices.
elif source_vertex in self.adj_list and destination_vertex in self.adj_list:
self.adj_list[source_vertex].append(destination_vertex)
# if only source vertex is present in adjacency list, add destination
# vertex to source vertex list of adjacent vertices and create a new vertex
# with destination vertex as key, which has no adjacent vertex
elif source_vertex in self.adj_list:
self.adj_list[source_vertex].append(destination_vertex)
self.adj_list[destination_vertex] = []
# if only destination vertex is present in adjacency list, create a new
# vertex with source vertex as key and assign a list containing destination
# vertex as first adjacent vertex
elif destination_vertex in self.adj_list:
self.adj_list[source_vertex] = [destination_vertex]
# if both source vertex and destination vertex are not present in adjacency
# list, create a new vertex with source vertex as key and a list containing
# destination vertex as it's first adjacent vertex. Then create a new vertex
# with destination vertex as key, which has no adjacent vertex
else:
self.adj_list[source_vertex] = [destination_vertex]
self.adj_list[destination_vertex] = []
return self
def __repr__(self) -> str:
return pformat(self.adj_list)
================================================
FILE: graphs/graphs_floyd_warshall.py
================================================
# floyd_warshall.py
"""
The problem is to find the shortest distance between all pairs of vertices in a
weighted directed graph that can have negative edge weights.
"""
def _print_dist(dist, v):
print("\nThe shortest path matrix using Floyd Warshall algorithm\n")
for i in range(v):
for j in range(v):
if dist[i][j] != float("inf"):
print(int(dist[i][j]), end="\t")
else:
print("INF", end="\t")
print()
def floyd_warshall(graph, v):
"""
:param graph: 2D array calculated from weight[edge[i, j]]
:type graph: List[List[float]]
:param v: number of vertices
:type v: int
:return: shortest distance between all vertex pairs
distance[u][v] will contain the shortest distance from vertex u to v.
1. For all edges from v to n, distance[i][j] = weight(edge(i, j)).
3. The algorithm then performs distance[i][j] = min(distance[i][j], distance[i][k] +
distance[k][j]) for each possible pair i, j of vertices.
4. The above is repeated for each vertex k in the graph.
5. Whenever distance[i][j] is given a new minimum value, next vertex[i][j] is
updated to the next vertex[i][k].
"""
dist = [[float("inf") for _ in range(v)] for _ in range(v)]
for i in range(v):
for j in range(v):
dist[i][j] = graph[i][j]
# check vertex k against all other vertices (i, j)
for k in range(v):
# looping through rows of graph array
for i in range(v):
# looping through columns of graph array
for j in range(v):
if (
dist[i][k] != float("inf")
and dist[k][j] != float("inf")
and dist[i][k] + dist[k][j] < dist[i][j]
):
dist[i][j] = dist[i][k] + dist[k][j]
_print_dist(dist, v)
return dist, v
if __name__ == "__main__":
v = int(input("Enter number of vertices: "))
e = int(input("Enter number of edges: "))
graph = [[float("inf") for i in range(v)] for j in range(v)]
for i in range(v):
graph[i][i] = 0.0
# src and dst are indices that must be within the array size graph[e][v]
# failure to follow this will result in an error
for i in range(e):
print("\nEdge ", i + 1)
src = int(input("Enter source:"))
dst = int(input("Enter destination:"))
weight = float(input("Enter weight:"))
graph[src][dst] = weight
floyd_warshall(graph, v)
# Example Input
# Enter number of vertices: 3
# Enter number of edges: 2
# # generated graph from vertex and edge inputs
# [[inf, inf, inf], [inf, inf, inf], [inf, inf, inf]]
# [[0.0, inf, inf], [inf, 0.0, inf], [inf, inf, 0.0]]
# specify source, destination and weight for edge #1
# Edge 1
# Enter source:1
# Enter destination:2
# Enter weight:2
# specify source, destination and weight for edge #2
# Edge 2
# Enter source:2
# Enter destination:1
# Enter weight:1
# # Expected Output from the vertice, edge and src, dst, weight inputs!!
# 0 INF INF
# INF 0 2
# INF 1 0
================================================
FILE: graphs/greedy_best_first.py
================================================
"""
https://en.wikipedia.org/wiki/Best-first_search#Greedy_BFS
"""
from __future__ import annotations
Path = list[tuple[int, int]]
# 0's are free path whereas 1's are obstacles
TEST_GRIDS = [
[
[0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
],
[
[0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0],
[1, 0, 0, 1, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0],
],
[
[0, 0, 1, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 1],
[1, 0, 0, 1, 1],
[0, 0, 0, 0, 0],
],
]
delta = ([-1, 0], [0, -1], [1, 0], [0, 1]) # up, left, down, right
class Node:
"""
>>> k = Node(0, 0, 4, 5, 0, None)
>>> k.calculate_heuristic()
9
>>> n = Node(1, 4, 3, 4, 2, None)
>>> n.calculate_heuristic()
2
>>> l = [k, n]
>>> n == l[0]
False
>>> l.sort()
>>> n == l[0]
True
"""
def __init__(
self,
pos_x: int,
pos_y: int,
goal_x: int,
goal_y: int,
g_cost: float,
parent: Node | None,
):
self.pos_x = pos_x
self.pos_y = pos_y
self.pos = (pos_y, pos_x)
self.goal_x = goal_x
self.goal_y = goal_y
self.g_cost = g_cost
self.parent = parent
self.f_cost = self.calculate_heuristic()
def calculate_heuristic(self) -> float:
"""
The heuristic here is the Manhattan Distance
Could elaborate to offer more than one choice
"""
dx = abs(self.pos_x - self.goal_x)
dy = abs(self.pos_y - self.goal_y)
return dx + dy
def __lt__(self, other) -> bool:
return self.f_cost < other.f_cost
def __eq__(self, other) -> bool:
return self.pos == other.pos
class GreedyBestFirst:
"""
>>> grid = TEST_GRIDS[2]
>>> gbf = GreedyBestFirst(grid, (0, 0), (len(grid) - 1, len(grid[0]) - 1))
>>> [x.pos for x in gbf.get_successors(gbf.start)]
[(1, 0), (0, 1)]
>>> (gbf.start.pos_y + delta[3][0], gbf.start.pos_x + delta[3][1])
(0, 1)
>>> (gbf.start.pos_y + delta[2][0], gbf.start.pos_x + delta[2][1])
(1, 0)
>>> gbf.retrace_path(gbf.start)
[(0, 0)]
>>> gbf.search() # doctest: +NORMALIZE_WHITESPACE
[(0, 0), (1, 0), (2, 0), (2, 1), (3, 1), (4, 1), (4, 2), (4, 3),
(4, 4)]
"""
def __init__(
self, grid: list[list[int]], start: tuple[int, int], goal: tuple[int, int]
):
self.grid = grid
self.start = Node(start[1], start[0], goal[1], goal[0], 0, None)
self.target = Node(goal[1], goal[0], goal[1], goal[0], 99999, None)
self.open_nodes = [self.start]
self.closed_nodes: list[Node] = []
self.reached = False
def search(self) -> Path | None:
"""
Search for the path,
if a path is not found, only the starting position is returned
"""
while self.open_nodes:
# Open Nodes are sorted using __lt__
self.open_nodes.sort()
current_node = self.open_nodes.pop(0)
if current_node.pos == self.target.pos:
self.reached = True
return self.retrace_path(current_node)
self.closed_nodes.append(current_node)
successors = self.get_successors(current_node)
for child_node in successors:
if child_node in self.closed_nodes:
continue
if child_node not in self.open_nodes:
self.open_nodes.append(child_node)
if not self.reached:
return [self.start.pos]
return None
def get_successors(self, parent: Node) -> list[Node]:
"""
Returns a list of successors (both in the grid and free spaces)
"""
return [
Node(
pos_x,
pos_y,
self.target.pos_x,
self.target.pos_y,
parent.g_cost + 1,
parent,
)
for action in delta
if (
0 <= (pos_x := parent.pos_x + action[1]) < len(self.grid[0])
and 0 <= (pos_y := parent.pos_y + action[0]) < len(self.grid)
and self.grid[pos_y][pos_x] == 0
)
]
def retrace_path(self, node: Node | None) -> Path:
"""
Retrace the path from parents to parents until start node
"""
current_node = node
path = []
while current_node is not None:
path.append((current_node.pos_y, current_node.pos_x))
current_node = current_node.parent
path.reverse()
return path
if __name__ == "__main__":
for idx, grid in enumerate(TEST_GRIDS):
print(f"==grid-{idx + 1}==")
init = (0, 0)
goal = (len(grid) - 1, len(grid[0]) - 1)
for elem in grid:
print(elem)
print("------")
greedy_bf = GreedyBestFirst(grid, init, goal)
path = greedy_bf.search()
if path:
for pos_x, pos_y in path:
grid[pos_x][pos_y] = 2
for elem in grid:
print(elem)
================================================
FILE: graphs/greedy_min_vertex_cover.py
================================================
"""
* Author: Manuel Di Lullo (https://github.com/manueldilullo)
* Description: Approximization algorithm for minimum vertex cover problem.
Greedy Approach. Uses graphs represented with an adjacency list
URL: https://mathworld.wolfram.com/MinimumVertexCover.html
URL: https://cs.stackexchange.com/questions/129017/greedy-algorithm-for-vertex-cover
"""
import heapq
def greedy_min_vertex_cover(graph: dict) -> set[int]:
"""
Greedy APX Algorithm for min Vertex Cover
@input: graph (graph stored in an adjacency list where each vertex
is represented with an integer)
@example:
>>> graph = {0: [1, 3], 1: [0, 3], 2: [0, 3, 4], 3: [0, 1, 2], 4: [2, 3]}
>>> greedy_min_vertex_cover(graph)
{0, 1, 2, 4}
"""
# queue used to store nodes and their rank
queue: list[list] = []
# for each node and his adjacency list add them and the rank of the node to queue
# using heapq module the queue will be filled like a Priority Queue
# heapq works with a min priority queue, so I used -1*len(v) to build it
for key, value in graph.items():
# O(log(n))
heapq.heappush(queue, [-1 * len(value), (key, value)])
# chosen_vertices = set of chosen vertices
chosen_vertices = set()
# while queue isn't empty and there are still edges
# (queue[0][0] is the rank of the node with max rank)
while queue and queue[0][0] != 0:
# extract vertex with max rank from queue and add it to chosen_vertices
argmax = heapq.heappop(queue)[1][0]
chosen_vertices.add(argmax)
# Remove all arcs adjacent to argmax
for elem in queue:
# if v haven't adjacent node, skip
if elem[0] == 0:
continue
# if argmax is reachable from elem
# remove argmax from elem's adjacent list and update his rank
if argmax in elem[1][1]:
index = elem[1][1].index(argmax)
del elem[1][1][index]
elem[0] += 1
# re-order the queue
heapq.heapify(queue)
return chosen_vertices
if __name__ == "__main__":
import doctest
doctest.testmod()
graph = {0: [1, 3], 1: [0, 3], 2: [0, 3, 4], 3: [0, 1, 2], 4: [2, 3]}
print(f"Minimum vertex cover:\n{greedy_min_vertex_cover(graph)}")
================================================
FILE: graphs/kahns_algorithm_long.py
================================================
# Finding longest distance in Directed Acyclic Graph using KahnsAlgorithm
def longest_distance(graph):
indegree = [0] * len(graph)
queue = []
long_dist = [1] * len(graph)
for values in graph.values():
for i in values:
indegree[i] += 1
for i in range(len(indegree)):
if indegree[i] == 0:
queue.append(i)
while queue:
vertex = queue.pop(0)
for x in graph[vertex]:
indegree[x] -= 1
long_dist[x] = max(long_dist[x], long_dist[vertex] + 1)
if indegree[x] == 0:
queue.append(x)
print(max(long_dist))
# Adjacency list of Graph
graph = {0: [2, 3, 4], 1: [2, 7], 2: [5], 3: [5, 7], 4: [7], 5: [6], 6: [7], 7: []}
longest_distance(graph)
================================================
FILE: graphs/kahns_algorithm_topo.py
================================================
def topological_sort(graph: dict[int, list[int]]) -> list[int] | None:
"""
Perform topological sorting of a Directed Acyclic Graph (DAG)
using Kahn's Algorithm via Breadth-First Search (BFS).
Topological sorting is a linear ordering of vertices in a graph such that for
every directed edge u → v, vertex u comes before vertex v in the ordering.
Parameters:
graph: Adjacency list representing the directed graph where keys are
vertices, and values are lists of adjacent vertices.
Returns:
The topologically sorted order of vertices if the graph is a DAG.
Returns None if the graph contains a cycle.
Example:
>>> graph = {0: [1, 2], 1: [3], 2: [3], 3: [4, 5], 4: [], 5: []}
>>> topological_sort(graph)
[0, 1, 2, 3, 4, 5]
>>> graph_with_cycle = {0: [1], 1: [2], 2: [0]}
>>> topological_sort(graph_with_cycle)
"""
indegree = [0] * len(graph)
queue = []
topo_order = []
processed_vertices_count = 0
# Calculate the indegree of each vertex
for values in graph.values():
for i in values:
indegree[i] += 1
# Add all vertices with 0 indegree to the queue
for i in range(len(indegree)):
if indegree[i] == 0:
queue.append(i)
# Perform BFS
while queue:
vertex = queue.pop(0)
processed_vertices_count += 1
topo_order.append(vertex)
# Traverse neighbors
for neighbor in graph[vertex]:
indegree[neighbor] -= 1
if indegree[neighbor] == 0:
queue.append(neighbor)
if processed_vertices_count != len(graph):
return None # no topological ordering exists due to cycle
return topo_order # valid topological ordering
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/karger.py
================================================
"""
An implementation of Karger's Algorithm for partitioning a graph.
"""
from __future__ import annotations
import random
# Adjacency list representation of this graph:
# https://en.wikipedia.org/wiki/File:Single_run_of_Karger%E2%80%99s_Mincut_algorithm.svg
TEST_GRAPH = {
"1": ["2", "3", "4", "5"],
"2": ["1", "3", "4", "5"],
"3": ["1", "2", "4", "5", "10"],
"4": ["1", "2", "3", "5", "6"],
"5": ["1", "2", "3", "4", "7"],
"6": ["7", "8", "9", "10", "4"],
"7": ["6", "8", "9", "10", "5"],
"8": ["6", "7", "9", "10"],
"9": ["6", "7", "8", "10"],
"10": ["6", "7", "8", "9", "3"],
}
def partition_graph(graph: dict[str, list[str]]) -> set[tuple[str, str]]:
"""
Partitions a graph using Karger's Algorithm. Implemented from
pseudocode found here:
https://en.wikipedia.org/wiki/Karger%27s_algorithm.
This function involves random choices, meaning it will not give
consistent outputs.
Args:
graph: A dictionary containing adacency lists for the graph.
Nodes must be strings.
Returns:
The cutset of the cut found by Karger's Algorithm.
>>> graph = {'0':['1'], '1':['0']}
>>> partition_graph(graph)
{('0', '1')}
"""
# Dict that maps contracted nodes to a list of all the nodes it "contains."
contracted_nodes = {node: {node} for node in graph}
graph_copy = {node: graph[node][:] for node in graph}
while len(graph_copy) > 2:
# Choose a random edge.
u = random.choice(list(graph_copy.keys()))
v = random.choice(graph_copy[u])
# Contract edge (u, v) to new node uv
uv = u + v
uv_neighbors = list(set(graph_copy[u] + graph_copy[v]))
uv_neighbors.remove(u)
uv_neighbors.remove(v)
graph_copy[uv] = uv_neighbors
for neighbor in uv_neighbors:
graph_copy[neighbor].append(uv)
contracted_nodes[uv] = set(contracted_nodes[u].union(contracted_nodes[v]))
# Remove nodes u and v.
del graph_copy[u]
del graph_copy[v]
for neighbor in uv_neighbors:
if u in graph_copy[neighbor]:
graph_copy[neighbor].remove(u)
if v in graph_copy[neighbor]:
graph_copy[neighbor].remove(v)
# Find cutset.
groups = [contracted_nodes[node] for node in graph_copy]
return {
(node, neighbor)
for node in groups[0]
for neighbor in graph[node]
if neighbor in groups[1]
}
if __name__ == "__main__":
print(partition_graph(TEST_GRAPH))
================================================
FILE: graphs/lanczos_eigenvectors.py
================================================
"""
Lanczos Method for Finding Eigenvalues and Eigenvectors of a Graph.
This module demonstrates the Lanczos method to approximate the largest eigenvalues
and corresponding eigenvectors of a symmetric matrix represented as a graph's
adjacency list. The method efficiently handles large, sparse matrices by converting
the graph to a tridiagonal matrix, whose eigenvalues and eigenvectors are then
computed.
Key Functions:
- `find_lanczos_eigenvectors`: Computes the k largest eigenvalues and vectors.
- `lanczos_iteration`: Constructs the tridiagonal matrix and orthonormal basis vectors.
- `multiply_matrix_vector`: Multiplies an adjacency list graph with a vector.
Complexity:
- Time: O(k * n), where k is the number of eigenvalues and n is the matrix size.
- Space: O(n), due to sparse representation and tridiagonal matrix structure.
Further Reading:
- Lanczos Algorithm: https://en.wikipedia.org/wiki/Lanczos_algorithm
- Eigenvector Centrality: https://en.wikipedia.org/wiki/Eigenvector_centrality
Example Usage:
Given a graph represented by an adjacency list, the `find_lanczos_eigenvectors`
function returns the largest eigenvalues and eigenvectors. This can be used to
analyze graph centrality.
"""
import numpy as np
def validate_adjacency_list(graph: list[list[int | None]]) -> None:
"""Validates the adjacency list format for the graph.
Args:
graph: A list of lists where each sublist contains the neighbors of a node.
Raises:
ValueError: If the graph is not a list of lists, or if any node has
invalid neighbors (e.g., out-of-range or non-integer values).
>>> validate_adjacency_list([[1, 2], [0], [0, 1]])
>>> validate_adjacency_list([[]]) # No neighbors, valid case
>>> validate_adjacency_list([[1], [2], [-1]]) # Invalid neighbor
Traceback (most recent call last):
...
ValueError: Invalid neighbor -1 in node 2 adjacency list.
"""
if not isinstance(graph, list):
raise ValueError("Graph should be a list of lists.")
for node_index, neighbors in enumerate(graph):
if not isinstance(neighbors, list):
no_neighbors_message: str = (
f"Node {node_index} should have a list of neighbors."
)
raise ValueError(no_neighbors_message)
for neighbor_index in neighbors:
if (
not isinstance(neighbor_index, int)
or neighbor_index < 0
or neighbor_index >= len(graph)
):
invalid_neighbor_message: str = (
f"Invalid neighbor {neighbor_index} in node {node_index} "
f"adjacency list."
)
raise ValueError(invalid_neighbor_message)
def lanczos_iteration(
graph: list[list[int | None]], num_eigenvectors: int
) -> tuple[np.ndarray, np.ndarray]:
"""Constructs the tridiagonal matrix and orthonormal basis vectors using the
Lanczos method.
Args:
graph: The graph represented as a list of adjacency lists.
num_eigenvectors: The number of largest eigenvalues and eigenvectors
to approximate.
Returns:
A tuple containing:
- tridiagonal_matrix: A (num_eigenvectors x num_eigenvectors) symmetric
matrix.
- orthonormal_basis: A (num_nodes x num_eigenvectors) matrix of orthonormal
basis vectors.
Raises:
ValueError: If num_eigenvectors is less than 1 or greater than the number of
nodes.
>>> graph = [[1, 2], [0, 2], [0, 1]]
>>> T, Q = lanczos_iteration(graph, 2)
>>> T.shape == (2, 2) and Q.shape == (3, 2)
True
"""
num_nodes: int = len(graph)
if not (1 <= num_eigenvectors <= num_nodes):
raise ValueError(
"Number of eigenvectors must be between 1 and the number of "
"nodes in the graph."
)
orthonormal_basis: np.ndarray = np.zeros((num_nodes, num_eigenvectors))
tridiagonal_matrix: np.ndarray = np.zeros((num_eigenvectors, num_eigenvectors))
rng = np.random.default_rng()
initial_vector: np.ndarray = rng.random(num_nodes)
initial_vector /= np.sqrt(np.dot(initial_vector, initial_vector))
orthonormal_basis[:, 0] = initial_vector
prev_beta: float = 0.0
for iter_index in range(num_eigenvectors):
result_vector: np.ndarray = multiply_matrix_vector(
graph, orthonormal_basis[:, iter_index]
)
if iter_index > 0:
result_vector -= prev_beta * orthonormal_basis[:, iter_index - 1]
alpha_value: float = np.dot(orthonormal_basis[:, iter_index], result_vector)
result_vector -= alpha_value * orthonormal_basis[:, iter_index]
prev_beta = np.sqrt(np.dot(result_vector, result_vector))
if iter_index < num_eigenvectors - 1 and prev_beta > 1e-10:
orthonormal_basis[:, iter_index + 1] = result_vector / prev_beta
tridiagonal_matrix[iter_index, iter_index] = alpha_value
if iter_index < num_eigenvectors - 1:
tridiagonal_matrix[iter_index, iter_index + 1] = prev_beta
tridiagonal_matrix[iter_index + 1, iter_index] = prev_beta
return tridiagonal_matrix, orthonormal_basis
def multiply_matrix_vector(
graph: list[list[int | None]], vector: np.ndarray
) -> np.ndarray:
"""Performs multiplication of a graph's adjacency list representation with a vector.
Args:
graph: The adjacency list of the graph.
vector: A 1D numpy array representing the vector to multiply.
Returns:
A numpy array representing the product of the adjacency list and the vector.
Raises:
ValueError: If the vector's length does not match the number of nodes in the
graph.
>>> multiply_matrix_vector([[1, 2], [0, 2], [0, 1]], np.array([1, 1, 1]))
array([2., 2., 2.])
>>> multiply_matrix_vector([[1, 2], [0, 2], [0, 1]], np.array([0, 1, 0]))
array([1., 0., 1.])
"""
num_nodes: int = len(graph)
if vector.shape[0] != num_nodes:
raise ValueError("Vector length must match the number of nodes in the graph.")
result: np.ndarray = np.zeros(num_nodes)
for node_index, neighbors in enumerate(graph):
for neighbor_index in neighbors:
result[node_index] += vector[neighbor_index]
return result
def find_lanczos_eigenvectors(
graph: list[list[int | None]], num_eigenvectors: int
) -> tuple[np.ndarray, np.ndarray]:
"""Computes the largest eigenvalues and their corresponding eigenvectors using the
Lanczos method.
Args:
graph: The graph as a list of adjacency lists.
num_eigenvectors: Number of largest eigenvalues and eigenvectors to compute.
Returns:
A tuple containing:
- eigenvalues: 1D array of the largest eigenvalues in descending order.
- eigenvectors: 2D array where each column is an eigenvector corresponding
to an eigenvalue.
Raises:
ValueError: If the graph format is invalid or num_eigenvectors is out of bounds.
>>> eigenvalues, eigenvectors = find_lanczos_eigenvectors(
... [[1, 2], [0, 2], [0, 1]], 2
... )
>>> len(eigenvalues) == 2 and eigenvectors.shape[1] == 2
True
"""
validate_adjacency_list(graph)
tridiagonal_matrix, orthonormal_basis = lanczos_iteration(graph, num_eigenvectors)
eigenvalues, eigenvectors = np.linalg.eigh(tridiagonal_matrix)
return eigenvalues[::-1], np.dot(orthonormal_basis, eigenvectors[:, ::-1])
def main() -> None:
"""
Main driver function for testing the implementation with doctests.
"""
import doctest
doctest.testmod()
if __name__ == "__main__":
main()
================================================
FILE: graphs/markov_chain.py
================================================
from __future__ import annotations
from collections import Counter
from random import random
class MarkovChainGraphUndirectedUnweighted:
"""
Undirected Unweighted Graph for running Markov Chain Algorithm
"""
def __init__(self):
self.connections = {}
def add_node(self, node: str) -> None:
self.connections[node] = {}
def add_transition_probability(
self, node1: str, node2: str, probability: float
) -> None:
if node1 not in self.connections:
self.add_node(node1)
if node2 not in self.connections:
self.add_node(node2)
self.connections[node1][node2] = probability
def get_nodes(self) -> list[str]:
return list(self.connections)
def transition(self, node: str) -> str:
current_probability = 0
random_value = random()
for dest in self.connections[node]:
current_probability += self.connections[node][dest]
if current_probability > random_value:
return dest
return ""
def get_transitions(
start: str, transitions: list[tuple[str, str, float]], steps: int
) -> dict[str, int]:
"""
Running Markov Chain algorithm and calculating the number of times each node is
visited
>>> transitions = [
... ('a', 'a', 0.9),
... ('a', 'b', 0.075),
... ('a', 'c', 0.025),
... ('b', 'a', 0.15),
... ('b', 'b', 0.8),
... ('b', 'c', 0.05),
... ('c', 'a', 0.25),
... ('c', 'b', 0.25),
... ('c', 'c', 0.5)
... ]
>>> result = get_transitions('a', transitions, 5000)
>>> result['a'] > result['b'] > result['c']
True
"""
graph = MarkovChainGraphUndirectedUnweighted()
for node1, node2, probability in transitions:
graph.add_transition_probability(node1, node2, probability)
visited = Counter(graph.get_nodes())
node = start
for _ in range(steps):
node = graph.transition(node)
visited[node] += 1
return visited
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/matching_min_vertex_cover.py
================================================
"""
* Author: Manuel Di Lullo (https://github.com/manueldilullo)
* Description: Approximization algorithm for minimum vertex cover problem.
Matching Approach. Uses graphs represented with an adjacency list
URL: https://mathworld.wolfram.com/MinimumVertexCover.html
URL: https://www.princeton.edu/~aaa/Public/Teaching/ORF523/ORF523_Lec6.pdf
"""
def matching_min_vertex_cover(graph: dict) -> set:
"""
APX Algorithm for min Vertex Cover using Matching Approach
@input: graph (graph stored in an adjacency list where each vertex
is represented as an integer)
@example:
>>> graph = {0: [1, 3], 1: [0, 3], 2: [0, 3, 4], 3: [0, 1, 2], 4: [2, 3]}
>>> matching_min_vertex_cover(graph)
{0, 1, 2, 4}
"""
# chosen_vertices = set of chosen vertices
chosen_vertices = set()
# edges = list of graph's edges
edges = get_edges(graph)
# While there are still elements in edges list, take an arbitrary edge
# (from_node, to_node) and add his extremity to chosen_vertices and then
# remove all arcs adjacent to the from_node and to_node
while edges:
from_node, to_node = edges.pop()
chosen_vertices.add(from_node)
chosen_vertices.add(to_node)
for edge in edges.copy():
if from_node in edge or to_node in edge:
edges.discard(edge)
return chosen_vertices
def get_edges(graph: dict) -> set:
"""
Return a set of couples that represents all of the edges.
@input: graph (graph stored in an adjacency list where each vertex is
represented as an integer)
@example:
>>> graph = {0: [1, 3], 1: [0, 3], 2: [0, 3], 3: [0, 1, 2]}
>>> get_edges(graph)
{(0, 1), (3, 1), (0, 3), (2, 0), (3, 0), (2, 3), (1, 0), (3, 2), (1, 3)}
"""
edges = set()
for from_node, to_nodes in graph.items():
for to_node in to_nodes:
edges.add((from_node, to_node))
return edges
if __name__ == "__main__":
import doctest
doctest.testmod()
# graph = {0: [1, 3], 1: [0, 3], 2: [0, 3, 4], 3: [0, 1, 2], 4: [2, 3]}
# print(f"Matching vertex cover:\n{matching_min_vertex_cover(graph)}")
================================================
FILE: graphs/minimum_path_sum.py
================================================
def min_path_sum(grid: list) -> int:
"""
Find the path from top left to bottom right of array of numbers
with the lowest possible sum and return the sum along this path.
>>> min_path_sum([
... [1, 3, 1],
... [1, 5, 1],
... [4, 2, 1],
... ])
7
>>> min_path_sum([
... [1, 0, 5, 6, 7],
... [8, 9, 0, 4, 2],
... [4, 4, 4, 5, 1],
... [9, 6, 3, 1, 0],
... [8, 4, 3, 2, 7],
... ])
20
>>> min_path_sum(None)
Traceback (most recent call last):
...
TypeError: The grid does not contain the appropriate information
>>> min_path_sum([[]])
Traceback (most recent call last):
...
TypeError: The grid does not contain the appropriate information
"""
if not grid or not grid[0]:
raise TypeError("The grid does not contain the appropriate information")
for cell_n in range(1, len(grid[0])):
grid[0][cell_n] += grid[0][cell_n - 1]
row_above = grid[0]
for row_n in range(1, len(grid)):
current_row = grid[row_n]
grid[row_n] = fill_row(current_row, row_above)
row_above = grid[row_n]
return grid[-1][-1]
def fill_row(current_row: list, row_above: list) -> list:
"""
>>> fill_row([2, 2, 2], [1, 2, 3])
[3, 4, 5]
"""
current_row[0] += row_above[0]
for cell_n in range(1, len(current_row)):
current_row[cell_n] += min(current_row[cell_n - 1], row_above[cell_n])
return current_row
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/minimum_spanning_tree_boruvka.py
================================================
class Graph:
"""
Data structure to store graphs (based on adjacency lists)
"""
def __init__(self):
self.num_vertices = 0
self.num_edges = 0
self.adjacency = {}
def add_vertex(self, vertex):
"""
Adds a vertex to the graph
"""
if vertex not in self.adjacency:
self.adjacency[vertex] = {}
self.num_vertices += 1
def add_edge(self, head, tail, weight):
"""
Adds an edge to the graph
"""
self.add_vertex(head)
self.add_vertex(tail)
if head == tail:
return
self.adjacency[head][tail] = weight
self.adjacency[tail][head] = weight
def distinct_weight(self):
"""
For Boruvks's algorithm the weights should be distinct
Converts the weights to be distinct
"""
edges = self.get_edges()
for edge in edges:
head, tail, weight = edge
edges.remove((tail, head, weight))
for i in range(len(edges)):
edges[i] = list(edges[i])
edges.sort(key=lambda e: e[2])
for i in range(len(edges) - 1):
if edges[i][2] >= edges[i + 1][2]:
edges[i + 1][2] = edges[i][2] + 1
for edge in edges:
head, tail, weight = edge
self.adjacency[head][tail] = weight
self.adjacency[tail][head] = weight
def __str__(self):
"""
Returns string representation of the graph
"""
string = ""
for tail in self.adjacency:
for head in self.adjacency[tail]:
weight = self.adjacency[head][tail]
string += f"{head} -> {tail} == {weight}\n"
return string.rstrip("\n")
def get_edges(self):
"""
Returna all edges in the graph
"""
output = []
for tail in self.adjacency:
for head in self.adjacency[tail]:
output.append((tail, head, self.adjacency[head][tail]))
return output
def get_vertices(self):
"""
Returns all vertices in the graph
"""
return self.adjacency.keys()
@staticmethod
def build(vertices=None, edges=None):
"""
Builds a graph from the given set of vertices and edges
"""
g = Graph()
if vertices is None:
vertices = []
if edges is None:
edge = []
for vertex in vertices:
g.add_vertex(vertex)
for edge in edges:
g.add_edge(*edge)
return g
class UnionFind:
"""
Disjoint set Union and Find for Boruvka's algorithm
"""
def __init__(self):
self.parent = {}
self.rank = {}
def __len__(self):
return len(self.parent)
def make_set(self, item):
if item in self.parent:
return self.find(item)
self.parent[item] = item
self.rank[item] = 0
return item
def find(self, item):
if item not in self.parent:
return self.make_set(item)
if item != self.parent[item]:
self.parent[item] = self.find(self.parent[item])
return self.parent[item]
def union(self, item1, item2):
root1 = self.find(item1)
root2 = self.find(item2)
if root1 == root2:
return root1
if self.rank[root1] > self.rank[root2]:
self.parent[root2] = root1
return root1
if self.rank[root1] < self.rank[root2]:
self.parent[root1] = root2
return root2
if self.rank[root1] == self.rank[root2]:
self.rank[root1] += 1
self.parent[root2] = root1
return root1
return None
@staticmethod
def boruvka_mst(graph):
"""
Implementation of Boruvka's algorithm
>>> g = Graph()
>>> g = Graph.build([0, 1, 2, 3], [[0, 1, 1], [0, 2, 1],[2, 3, 1]])
>>> g.distinct_weight()
>>> bg = Graph.boruvka_mst(g)
>>> print(bg)
1 -> 0 == 1
2 -> 0 == 2
0 -> 1 == 1
0 -> 2 == 2
3 -> 2 == 3
2 -> 3 == 3
"""
num_components = graph.num_vertices
union_find = Graph.UnionFind()
mst_edges = []
while num_components > 1:
cheap_edge = {}
for vertex in graph.get_vertices():
cheap_edge[vertex] = -1
edges = graph.get_edges()
for edge in edges:
head, tail, weight = edge
edges.remove((tail, head, weight))
for edge in edges:
head, tail, weight = edge
set1 = union_find.find(head)
set2 = union_find.find(tail)
if set1 != set2:
if cheap_edge[set1] == -1 or cheap_edge[set1][2] > weight:
cheap_edge[set1] = [head, tail, weight]
if cheap_edge[set2] == -1 or cheap_edge[set2][2] > weight:
cheap_edge[set2] = [head, tail, weight]
for head_tail_weight in cheap_edge.values():
if head_tail_weight != -1:
head, tail, weight = head_tail_weight
if union_find.find(head) != union_find.find(tail):
union_find.union(head, tail)
mst_edges.append(head_tail_weight)
num_components = num_components - 1
mst = Graph.build(edges=mst_edges)
return mst
================================================
FILE: graphs/minimum_spanning_tree_kruskal.py
================================================
def kruskal(
num_nodes: int, edges: list[tuple[int, int, int]]
) -> list[tuple[int, int, int]]:
"""
>>> kruskal(4, [(0, 1, 3), (1, 2, 5), (2, 3, 1)])
[(2, 3, 1), (0, 1, 3), (1, 2, 5)]
>>> kruskal(4, [(0, 1, 3), (1, 2, 5), (2, 3, 1), (0, 2, 1), (0, 3, 2)])
[(2, 3, 1), (0, 2, 1), (0, 1, 3)]
>>> kruskal(4, [(0, 1, 3), (1, 2, 5), (2, 3, 1), (0, 2, 1), (0, 3, 2),
... (2, 1, 1)])
[(2, 3, 1), (0, 2, 1), (2, 1, 1)]
"""
edges = sorted(edges, key=lambda edge: edge[2])
parent = list(range(num_nodes))
def find_parent(i):
if i != parent[i]:
parent[i] = find_parent(parent[i])
return parent[i]
minimum_spanning_tree_cost = 0
minimum_spanning_tree = []
for edge in edges:
parent_a = find_parent(edge[0])
parent_b = find_parent(edge[1])
if parent_a != parent_b:
minimum_spanning_tree_cost += edge[2]
minimum_spanning_tree.append(edge)
parent[parent_a] = parent_b
return minimum_spanning_tree
if __name__ == "__main__": # pragma: no cover
num_nodes, num_edges = list(map(int, input().strip().split()))
edges = []
for _ in range(num_edges):
node1, node2, cost = (int(x) for x in input().strip().split())
edges.append((node1, node2, cost))
kruskal(num_nodes, edges)
================================================
FILE: graphs/minimum_spanning_tree_kruskal2.py
================================================
from __future__ import annotations
from typing import TypeVar
T = TypeVar("T")
class DisjointSetTreeNode[T]:
# Disjoint Set Node to store the parent and rank
def __init__(self, data: T) -> None:
self.data = data
self.parent = self
self.rank = 0
class DisjointSetTree[T]:
# Disjoint Set DataStructure
def __init__(self) -> None:
# map from node name to the node object
self.map: dict[T, DisjointSetTreeNode[T]] = {}
def make_set(self, data: T) -> None:
# create a new set with x as its member
self.map[data] = DisjointSetTreeNode(data)
def find_set(self, data: T) -> DisjointSetTreeNode[T]:
# find the set x belongs to (with path-compression)
elem_ref = self.map[data]
if elem_ref != elem_ref.parent:
elem_ref.parent = self.find_set(elem_ref.parent.data)
return elem_ref.parent
def link(
self, node1: DisjointSetTreeNode[T], node2: DisjointSetTreeNode[T]
) -> None:
# helper function for union operation
if node1.rank > node2.rank:
node2.parent = node1
else:
node1.parent = node2
if node1.rank == node2.rank:
node2.rank += 1
def union(self, data1: T, data2: T) -> None:
# merge 2 disjoint sets
self.link(self.find_set(data1), self.find_set(data2))
class GraphUndirectedWeighted[T]:
def __init__(self) -> None:
# connections: map from the node to the neighbouring nodes (with weights)
self.connections: dict[T, dict[T, int]] = {}
def add_node(self, node: T) -> None:
# add a node ONLY if its not present in the graph
if node not in self.connections:
self.connections[node] = {}
def add_edge(self, node1: T, node2: T, weight: int) -> None:
# add an edge with the given weight
self.add_node(node1)
self.add_node(node2)
self.connections[node1][node2] = weight
self.connections[node2][node1] = weight
def kruskal(self) -> GraphUndirectedWeighted[T]:
# Kruskal's Algorithm to generate a Minimum Spanning Tree (MST) of a graph
"""
Details: https://en.wikipedia.org/wiki/Kruskal%27s_algorithm
Example:
>>> g1 = GraphUndirectedWeighted[int]()
>>> g1.add_edge(1, 2, 1)
>>> g1.add_edge(2, 3, 2)
>>> g1.add_edge(3, 4, 1)
>>> g1.add_edge(3, 5, 100) # Removed in MST
>>> g1.add_edge(4, 5, 5)
>>> assert 5 in g1.connections[3]
>>> mst = g1.kruskal()
>>> assert 5 not in mst.connections[3]
>>> g2 = GraphUndirectedWeighted[str]()
>>> g2.add_edge('A', 'B', 1)
>>> g2.add_edge('B', 'C', 2)
>>> g2.add_edge('C', 'D', 1)
>>> g2.add_edge('C', 'E', 100) # Removed in MST
>>> g2.add_edge('D', 'E', 5)
>>> assert 'E' in g2.connections["C"]
>>> mst = g2.kruskal()
>>> assert 'E' not in mst.connections['C']
"""
# getting the edges in ascending order of weights
edges = []
seen = set()
for start in self.connections:
for end in self.connections[start]:
if (start, end) not in seen:
seen.add((end, start))
edges.append((start, end, self.connections[start][end]))
edges.sort(key=lambda x: x[2])
# creating the disjoint set
disjoint_set = DisjointSetTree[T]()
for node in self.connections:
disjoint_set.make_set(node)
# MST generation
num_edges = 0
index = 0
graph = GraphUndirectedWeighted[T]()
while num_edges < len(self.connections) - 1:
u, v, w = edges[index]
index += 1
parent_u = disjoint_set.find_set(u)
parent_v = disjoint_set.find_set(v)
if parent_u != parent_v:
num_edges += 1
graph.add_edge(u, v, w)
disjoint_set.union(u, v)
return graph
================================================
FILE: graphs/minimum_spanning_tree_prims.py
================================================
import sys
from collections import defaultdict
class Heap:
def __init__(self):
self.node_position = []
def get_position(self, vertex):
return self.node_position[vertex]
def set_position(self, vertex, pos):
self.node_position[vertex] = pos
def top_to_bottom(self, heap, start, size, positions):
if start > size // 2 - 1:
return
else:
if 2 * start + 2 >= size: # noqa: SIM114
smallest_child = 2 * start + 1
elif heap[2 * start + 1] < heap[2 * start + 2]:
smallest_child = 2 * start + 1
else:
smallest_child = 2 * start + 2
if heap[smallest_child] < heap[start]:
temp, temp1 = heap[smallest_child], positions[smallest_child]
heap[smallest_child], positions[smallest_child] = (
heap[start],
positions[start],
)
heap[start], positions[start] = temp, temp1
temp = self.get_position(positions[smallest_child])
self.set_position(
positions[smallest_child], self.get_position(positions[start])
)
self.set_position(positions[start], temp)
self.top_to_bottom(heap, smallest_child, size, positions)
# Update function if value of any node in min-heap decreases
def bottom_to_top(self, val, index, heap, position):
temp = position[index]
while index != 0:
parent = int((index - 2) / 2) if index % 2 == 0 else int((index - 1) / 2)
if val < heap[parent]:
heap[index] = heap[parent]
position[index] = position[parent]
self.set_position(position[parent], index)
else:
heap[index] = val
position[index] = temp
self.set_position(temp, index)
break
index = parent
else:
heap[0] = val
position[0] = temp
self.set_position(temp, 0)
def heapify(self, heap, positions):
start = len(heap) // 2 - 1
for i in range(start, -1, -1):
self.top_to_bottom(heap, i, len(heap), positions)
def delete_minimum(self, heap, positions):
temp = positions[0]
heap[0] = sys.maxsize
self.top_to_bottom(heap, 0, len(heap), positions)
return temp
def prisms_algorithm(adjacency_list):
"""
>>> adjacency_list = {0: [[1, 1], [3, 3]],
... 1: [[0, 1], [2, 6], [3, 5], [4, 1]],
... 2: [[1, 6], [4, 5], [5, 2]],
... 3: [[0, 3], [1, 5], [4, 1]],
... 4: [[1, 1], [2, 5], [3, 1], [5, 4]],
... 5: [[2, 2], [4, 4]]}
>>> prisms_algorithm(adjacency_list)
[(0, 1), (1, 4), (4, 3), (4, 5), (5, 2)]
"""
heap = Heap()
visited = [0] * len(adjacency_list)
nbr_tv = [-1] * len(adjacency_list) # Neighboring Tree Vertex of selected vertex
# Minimum Distance of explored vertex with neighboring vertex of partial tree
# formed in graph
distance_tv = [] # Heap of Distance of vertices from their neighboring vertex
positions = []
for vertex in range(len(adjacency_list)):
distance_tv.append(sys.maxsize)
positions.append(vertex)
heap.node_position.append(vertex)
tree_edges = []
visited[0] = 1
distance_tv[0] = sys.maxsize
for neighbor, distance in adjacency_list[0]:
nbr_tv[neighbor] = 0
distance_tv[neighbor] = distance
heap.heapify(distance_tv, positions)
for _ in range(1, len(adjacency_list)):
vertex = heap.delete_minimum(distance_tv, positions)
if visited[vertex] == 0:
tree_edges.append((nbr_tv[vertex], vertex))
visited[vertex] = 1
for neighbor, distance in adjacency_list[vertex]:
if (
visited[neighbor] == 0
and distance < distance_tv[heap.get_position(neighbor)]
):
distance_tv[heap.get_position(neighbor)] = distance
heap.bottom_to_top(
distance, heap.get_position(neighbor), distance_tv, positions
)
nbr_tv[neighbor] = vertex
return tree_edges
if __name__ == "__main__": # pragma: no cover
# < --------- Prims Algorithm --------- >
edges_number = int(input("Enter number of edges: ").strip())
adjacency_list = defaultdict(list)
for _ in range(edges_number):
edge = [int(x) for x in input().strip().split()]
adjacency_list[edge[0]].append([edge[1], edge[2]])
adjacency_list[edge[1]].append([edge[0], edge[2]])
print(prisms_algorithm(adjacency_list))
================================================
FILE: graphs/minimum_spanning_tree_prims2.py
================================================
"""
Prim's (also known as Jarník's) algorithm is a greedy algorithm that finds a minimum
spanning tree for a weighted undirected graph. This means it finds a subset of the
edges that forms a tree that includes every vertex, where the total weight of all the
edges in the tree is minimized. The algorithm operates by building this tree one vertex
at a time, from an arbitrary starting vertex, at each step adding the cheapest possible
connection from the tree to another vertex.
"""
from __future__ import annotations
from sys import maxsize
from typing import TypeVar
T = TypeVar("T")
def get_parent_position(position: int) -> int:
"""
heap helper function get the position of the parent of the current node
>>> get_parent_position(1)
0
>>> get_parent_position(2)
0
"""
return (position - 1) // 2
def get_child_left_position(position: int) -> int:
"""
heap helper function get the position of the left child of the current node
>>> get_child_left_position(0)
1
"""
return (2 * position) + 1
def get_child_right_position(position: int) -> int:
"""
heap helper function get the position of the right child of the current node
>>> get_child_right_position(0)
2
"""
return (2 * position) + 2
class MinPriorityQueue[T]:
"""
Minimum Priority Queue Class
Functions:
is_empty: function to check if the priority queue is empty
push: function to add an element with given priority to the queue
extract_min: function to remove and return the element with lowest weight (highest
priority)
update_key: function to update the weight of the given key
_bubble_up: helper function to place a node at the proper position (upward
movement)
_bubble_down: helper function to place a node at the proper position (downward
movement)
_swap_nodes: helper function to swap the nodes at the given positions
>>> queue = MinPriorityQueue()
>>> queue.push(1, 1000)
>>> queue.push(2, 100)
>>> queue.push(3, 4000)
>>> queue.push(4, 3000)
>>> queue.extract_min()
2
>>> queue.update_key(4, 50)
>>> queue.extract_min()
4
>>> queue.extract_min()
1
>>> queue.extract_min()
3
"""
def __init__(self) -> None:
self.heap: list[tuple[T, int]] = []
self.position_map: dict[T, int] = {}
self.elements: int = 0
def __len__(self) -> int:
return self.elements
def __repr__(self) -> str:
return str(self.heap)
def is_empty(self) -> bool:
# Check if the priority queue is empty
return self.elements == 0
def push(self, elem: T, weight: int) -> None:
# Add an element with given priority to the queue
self.heap.append((elem, weight))
self.position_map[elem] = self.elements
self.elements += 1
self._bubble_up(elem)
def extract_min(self) -> T:
# Remove and return the element with lowest weight (highest priority)
if self.elements > 1:
self._swap_nodes(0, self.elements - 1)
elem, _ = self.heap.pop()
del self.position_map[elem]
self.elements -= 1
if self.elements > 0:
bubble_down_elem, _ = self.heap[0]
self._bubble_down(bubble_down_elem)
return elem
def update_key(self, elem: T, weight: int) -> None:
# Update the weight of the given key
position = self.position_map[elem]
self.heap[position] = (elem, weight)
if position > 0:
parent_position = get_parent_position(position)
_, parent_weight = self.heap[parent_position]
if parent_weight > weight:
self._bubble_up(elem)
else:
self._bubble_down(elem)
else:
self._bubble_down(elem)
def _bubble_up(self, elem: T) -> None:
# Place a node at the proper position (upward movement) [to be used internally
# only]
curr_pos = self.position_map[elem]
if curr_pos == 0:
return None
parent_position = get_parent_position(curr_pos)
_, weight = self.heap[curr_pos]
_, parent_weight = self.heap[parent_position]
if parent_weight > weight:
self._swap_nodes(parent_position, curr_pos)
return self._bubble_up(elem)
return None
def _bubble_down(self, elem: T) -> None:
# Place a node at the proper position (downward movement) [to be used
# internally only]
curr_pos = self.position_map[elem]
_, weight = self.heap[curr_pos]
child_left_position = get_child_left_position(curr_pos)
child_right_position = get_child_right_position(curr_pos)
if child_left_position < self.elements and child_right_position < self.elements:
_, child_left_weight = self.heap[child_left_position]
_, child_right_weight = self.heap[child_right_position]
if child_right_weight < child_left_weight and child_right_weight < weight:
self._swap_nodes(child_right_position, curr_pos)
return self._bubble_down(elem)
if child_left_position < self.elements:
_, child_left_weight = self.heap[child_left_position]
if child_left_weight < weight:
self._swap_nodes(child_left_position, curr_pos)
return self._bubble_down(elem)
else:
return None
if child_right_position < self.elements:
_, child_right_weight = self.heap[child_right_position]
if child_right_weight < weight:
self._swap_nodes(child_right_position, curr_pos)
return self._bubble_down(elem)
return None
def _swap_nodes(self, node1_pos: int, node2_pos: int) -> None:
# Swap the nodes at the given positions
node1_elem = self.heap[node1_pos][0]
node2_elem = self.heap[node2_pos][0]
self.heap[node1_pos], self.heap[node2_pos] = (
self.heap[node2_pos],
self.heap[node1_pos],
)
self.position_map[node1_elem] = node2_pos
self.position_map[node2_elem] = node1_pos
class GraphUndirectedWeighted[T]:
"""
Graph Undirected Weighted Class
Functions:
add_node: function to add a node in the graph
add_edge: function to add an edge between 2 nodes in the graph
"""
def __init__(self) -> None:
self.connections: dict[T, dict[T, int]] = {}
self.nodes: int = 0
def __repr__(self) -> str:
return str(self.connections)
def __len__(self) -> int:
return self.nodes
def add_node(self, node: T) -> None:
# Add a node in the graph if it is not in the graph
if node not in self.connections:
self.connections[node] = {}
self.nodes += 1
def add_edge(self, node1: T, node2: T, weight: int) -> None:
# Add an edge between 2 nodes in the graph
self.add_node(node1)
self.add_node(node2)
self.connections[node1][node2] = weight
self.connections[node2][node1] = weight
def prims_algo[T](
graph: GraphUndirectedWeighted[T],
) -> tuple[dict[T, int], dict[T, T | None]]:
"""
>>> graph = GraphUndirectedWeighted()
>>> graph.add_edge("a", "b", 3)
>>> graph.add_edge("b", "c", 10)
>>> graph.add_edge("c", "d", 5)
>>> graph.add_edge("a", "c", 15)
>>> graph.add_edge("b", "d", 100)
>>> dist, parent = prims_algo(graph)
>>> abs(dist["a"] - dist["b"])
3
>>> abs(dist["d"] - dist["b"])
15
>>> abs(dist["a"] - dist["c"])
13
"""
# prim's algorithm for minimum spanning tree
dist: dict[T, int] = dict.fromkeys(graph.connections, maxsize)
parent: dict[T, T | None] = dict.fromkeys(graph.connections)
priority_queue: MinPriorityQueue[T] = MinPriorityQueue()
for node, weight in dist.items():
priority_queue.push(node, weight)
if priority_queue.is_empty():
return dist, parent
# initialization
node = priority_queue.extract_min()
dist[node] = 0
for neighbour in graph.connections[node]:
if dist[neighbour] > dist[node] + graph.connections[node][neighbour]:
dist[neighbour] = dist[node] + graph.connections[node][neighbour]
priority_queue.update_key(neighbour, dist[neighbour])
parent[neighbour] = node
# running prim's algorithm
while not priority_queue.is_empty():
node = priority_queue.extract_min()
for neighbour in graph.connections[node]:
if dist[neighbour] > dist[node] + graph.connections[node][neighbour]:
dist[neighbour] = dist[node] + graph.connections[node][neighbour]
priority_queue.update_key(neighbour, dist[neighbour])
parent[neighbour] = node
return dist, parent
================================================
FILE: graphs/multi_heuristic_astar.py
================================================
import heapq
import sys
import numpy as np
TPos = tuple[int, int]
class PriorityQueue:
def __init__(self):
self.elements = []
self.set = set()
def minkey(self):
if not self.empty():
return self.elements[0][0]
else:
return float("inf")
def empty(self):
return len(self.elements) == 0
def put(self, item, priority):
if item not in self.set:
heapq.heappush(self.elements, (priority, item))
self.set.add(item)
else:
# update
# print("update", item)
temp = []
(pri, x) = heapq.heappop(self.elements)
while x != item:
temp.append((pri, x))
(pri, x) = heapq.heappop(self.elements)
temp.append((priority, item))
for pro, xxx in temp:
heapq.heappush(self.elements, (pro, xxx))
def remove_element(self, item):
if item in self.set:
self.set.remove(item)
temp = []
(pro, x) = heapq.heappop(self.elements)
while x != item:
temp.append((pro, x))
(pro, x) = heapq.heappop(self.elements)
for prito, yyy in temp:
heapq.heappush(self.elements, (prito, yyy))
def top_show(self):
return self.elements[0][1]
def get(self):
(priority, item) = heapq.heappop(self.elements)
self.set.remove(item)
return (priority, item)
def consistent_heuristic(p: TPos, goal: TPos):
# euclidean distance
a = np.array(p)
b = np.array(goal)
return np.linalg.norm(a - b)
def heuristic_2(p: TPos, goal: TPos):
# integer division by time variable
return consistent_heuristic(p, goal) // t
def heuristic_1(p: TPos, goal: TPos):
# manhattan distance
return abs(p[0] - goal[0]) + abs(p[1] - goal[1])
def key(start: TPos, i: int, goal: TPos, g_function: dict[TPos, float]):
ans = g_function[start] + W1 * heuristics[i](start, goal)
return ans
def do_something(back_pointer, goal, start):
grid = np.char.chararray((n, n))
for i in range(n):
for j in range(n):
grid[i][j] = "*"
for i in range(n):
for j in range(n):
if (j, (n - 1) - i) in blocks:
grid[i][j] = "#"
grid[0][(n - 1)] = "-"
x = back_pointer[goal]
while x != start:
(x_c, y_c) = x
# print(x)
grid[(n - 1) - y_c][x_c] = "-"
x = back_pointer[x]
grid[(n - 1)][0] = "-"
for i in range(n):
for j in range(n):
if (i, j) == (0, n - 1):
print(grid[i][j], end=" ")
print("<-- End position", end=" ")
else:
print(grid[i][j], end=" ")
print()
print("^")
print("Start position")
print()
print("# is an obstacle")
print("- is the path taken by algorithm")
print("PATH TAKEN BY THE ALGORITHM IS:-")
x = back_pointer[goal]
while x != start:
print(x, end=" ")
x = back_pointer[x]
print(x)
sys.exit()
def valid(p: TPos):
if p[0] < 0 or p[0] > n - 1:
return False
return not (p[1] < 0 or p[1] > n - 1)
def expand_state(
s,
j,
visited,
g_function,
close_list_anchor,
close_list_inad,
open_list,
back_pointer,
):
for itera in range(n_heuristic):
open_list[itera].remove_element(s)
# print("s", s)
# print("j", j)
(x, y) = s
left = (x - 1, y)
right = (x + 1, y)
up = (x, y + 1)
down = (x, y - 1)
for neighbours in [left, right, up, down]:
if neighbours not in blocks:
if valid(neighbours) and neighbours not in visited:
# print("neighbour", neighbours)
visited.add(neighbours)
back_pointer[neighbours] = -1
g_function[neighbours] = float("inf")
if valid(neighbours) and g_function[neighbours] > g_function[s] + 1:
g_function[neighbours] = g_function[s] + 1
back_pointer[neighbours] = s
if neighbours not in close_list_anchor:
open_list[0].put(neighbours, key(neighbours, 0, goal, g_function))
if neighbours not in close_list_inad:
for var in range(1, n_heuristic):
if key(neighbours, var, goal, g_function) <= W2 * key(
neighbours, 0, goal, g_function
):
open_list[j].put(
neighbours, key(neighbours, var, goal, g_function)
)
def make_common_ground():
some_list = []
for x in range(1, 5):
for y in range(1, 6):
some_list.append((x, y))
for x in range(15, 20):
some_list.append((x, 17))
for x in range(10, 19):
for y in range(1, 15):
some_list.append((x, y))
# L block
for x in range(1, 4):
for y in range(12, 19):
some_list.append((x, y))
for x in range(3, 13):
for y in range(16, 19):
some_list.append((x, y))
return some_list
heuristics = {0: consistent_heuristic, 1: heuristic_1, 2: heuristic_2}
blocks_blk = [
(0, 1),
(1, 1),
(2, 1),
(3, 1),
(4, 1),
(5, 1),
(6, 1),
(7, 1),
(8, 1),
(9, 1),
(10, 1),
(11, 1),
(12, 1),
(13, 1),
(14, 1),
(15, 1),
(16, 1),
(17, 1),
(18, 1),
(19, 1),
]
blocks_all = make_common_ground()
blocks = blocks_blk
# hyper parameters
W1 = 1
W2 = 1
n = 20
n_heuristic = 3 # one consistent and two other inconsistent
# start and end destination
start = (0, 0)
goal = (n - 1, n - 1)
t = 1
def multi_a_star(start: TPos, goal: TPos, n_heuristic: int):
g_function = {start: 0, goal: float("inf")}
back_pointer = {start: -1, goal: -1}
open_list = []
visited = set()
for i in range(n_heuristic):
open_list.append(PriorityQueue())
open_list[i].put(start, key(start, i, goal, g_function))
close_list_anchor: list[int] = []
close_list_inad: list[int] = []
while open_list[0].minkey() < float("inf"):
for i in range(1, n_heuristic):
# print(open_list[0].minkey(), open_list[i].minkey())
if open_list[i].minkey() <= W2 * open_list[0].minkey():
global t
t += 1
if g_function[goal] <= open_list[i].minkey():
if g_function[goal] < float("inf"):
do_something(back_pointer, goal, start)
else:
_, get_s = open_list[i].top_show()
visited.add(get_s)
expand_state(
get_s,
i,
visited,
g_function,
close_list_anchor,
close_list_inad,
open_list,
back_pointer,
)
close_list_inad.append(get_s)
elif g_function[goal] <= open_list[0].minkey():
if g_function[goal] < float("inf"):
do_something(back_pointer, goal, start)
else:
get_s = open_list[0].top_show()
visited.add(get_s)
expand_state(
get_s,
0,
visited,
g_function,
close_list_anchor,
close_list_inad,
open_list,
back_pointer,
)
close_list_anchor.append(get_s)
print("No path found to goal")
print()
for i in range(n - 1, -1, -1):
for j in range(n):
if (j, i) in blocks:
print("#", end=" ")
elif (j, i) in back_pointer:
if (j, i) == (n - 1, n - 1):
print("*", end=" ")
else:
print("-", end=" ")
else:
print("*", end=" ")
if (j, i) == (n - 1, n - 1):
print("<-- End position", end=" ")
print()
print("^")
print("Start position")
print()
print("# is an obstacle")
print("- is the path taken by algorithm")
if __name__ == "__main__":
multi_a_star(start, goal, n_heuristic)
================================================
FILE: graphs/page_rank.py
================================================
"""
Author: https://github.com/bhushan-borole
"""
"""
The input graph for the algorithm is:
A B C
A 0 1 1
B 0 0 1
C 1 0 0
"""
graph = [[0, 1, 1], [0, 0, 1], [1, 0, 0]]
class Node:
def __init__(self, name):
self.name = name
self.inbound = []
self.outbound = []
def add_inbound(self, node):
self.inbound.append(node)
def add_outbound(self, node):
self.outbound.append(node)
def __repr__(self):
return f""
def page_rank(nodes, limit=3, d=0.85):
ranks = {}
for node in nodes:
ranks[node.name] = 1
outbounds = {}
for node in nodes:
outbounds[node.name] = len(node.outbound)
for i in range(limit):
print(f"======= Iteration {i + 1} =======")
for _, node in enumerate(nodes):
ranks[node.name] = (1 - d) + d * sum(
ranks[ib] / outbounds[ib] for ib in node.inbound
)
print(ranks)
def main():
names = list(input("Enter Names of the Nodes: ").split())
nodes = [Node(name) for name in names]
for ri, row in enumerate(graph):
for ci, col in enumerate(row):
if col == 1:
nodes[ci].add_inbound(names[ri])
nodes[ri].add_outbound(names[ci])
print("======= Nodes =======")
for node in nodes:
print(node)
page_rank(nodes)
if __name__ == "__main__":
main()
================================================
FILE: graphs/prim.py
================================================
"""Prim's Algorithm.
Determines the minimum spanning tree(MST) of a graph using the Prim's Algorithm.
Details: https://en.wikipedia.org/wiki/Prim%27s_algorithm
"""
import heapq as hq
import math
from collections.abc import Iterator
class Vertex:
"""Class Vertex."""
def __init__(self, id_):
"""
Arguments:
id - input an id to identify the vertex
Attributes:
neighbors - a list of the vertices it is linked to
edges - a dict to store the edges's weight
"""
self.id = str(id_)
self.key = None
self.pi = None
self.neighbors = []
self.edges = {} # {vertex:distance}
def __lt__(self, other):
"""Comparison rule to < operator."""
return self.key < other.key
def __repr__(self):
"""Return the vertex id."""
return self.id
def add_neighbor(self, vertex):
"""Add a pointer to a vertex at neighbor's list."""
self.neighbors.append(vertex)
def add_edge(self, vertex, weight):
"""Destination vertex and weight."""
self.edges[vertex.id] = weight
def connect(graph, a, b, edge):
# add the neighbors:
graph[a - 1].add_neighbor(graph[b - 1])
graph[b - 1].add_neighbor(graph[a - 1])
# add the edges:
graph[a - 1].add_edge(graph[b - 1], edge)
graph[b - 1].add_edge(graph[a - 1], edge)
def prim(graph: list, root: Vertex) -> list:
"""Prim's Algorithm.
Runtime:
O(mn) with `m` edges and `n` vertices
Return:
List with the edges of a Minimum Spanning Tree
Usage:
prim(graph, graph[0])
"""
a = []
for u in graph:
u.key = math.inf
u.pi = None
root.key = 0
q = graph[:]
while q:
u = min(q)
q.remove(u)
for v in u.neighbors:
if (v in q) and (u.edges[v.id] < v.key):
v.pi = u
v.key = u.edges[v.id]
for i in range(1, len(graph)):
a.append((int(graph[i].id) + 1, int(graph[i].pi.id) + 1))
return a
def prim_heap(graph: list, root: Vertex) -> Iterator[tuple]:
"""Prim's Algorithm with min heap.
Runtime:
O((m + n)log n) with `m` edges and `n` vertices
Yield:
Edges of a Minimum Spanning Tree
Usage:
prim(graph, graph[0])
"""
for u in graph:
u.key = math.inf
u.pi = None
root.key = 0
h = list(graph)
hq.heapify(h)
while h:
u = hq.heappop(h)
for v in u.neighbors:
if (v in h) and (u.edges[v.id] < v.key):
v.pi = u
v.key = u.edges[v.id]
hq.heapify(h)
for i in range(1, len(graph)):
yield (int(graph[i].id) + 1, int(graph[i].pi.id) + 1)
def test_vector() -> None:
"""
# Creates a list to store x vertices.
>>> x = 5
>>> G = [Vertex(n) for n in range(x)]
>>> connect(G, 1, 2, 15)
>>> connect(G, 1, 3, 12)
>>> connect(G, 2, 4, 13)
>>> connect(G, 2, 5, 5)
>>> connect(G, 3, 2, 6)
>>> connect(G, 3, 4, 6)
>>> connect(G, 0, 0, 0) # Generate the minimum spanning tree:
>>> G_heap = G[:]
>>> MST = prim(G, G[0])
>>> MST_heap = prim_heap(G, G[0])
>>> for i in MST:
... print(i)
(2, 3)
(3, 1)
(4, 3)
(5, 2)
>>> for i in MST_heap:
... print(i)
(2, 3)
(3, 1)
(4, 3)
(5, 2)
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/random_graph_generator.py
================================================
"""
* Author: Manuel Di Lullo (https://github.com/manueldilullo)
* Description: Random graphs generator.
Uses graphs represented with an adjacency list.
URL: https://en.wikipedia.org/wiki/Random_graph
"""
import random
def random_graph(
vertices_number: int, probability: float, directed: bool = False
) -> dict:
"""
Generate a random graph
@input: vertices_number (number of vertices),
probability (probability that a generic edge (u,v) exists),
directed (if True: graph will be a directed graph,
otherwise it will be an undirected graph)
@examples:
>>> random.seed(1)
>>> random_graph(4, 0.5)
{0: [1], 1: [0, 2, 3], 2: [1, 3], 3: [1, 2]}
>>> random.seed(1)
>>> random_graph(4, 0.5, True)
{0: [1], 1: [2, 3], 2: [3], 3: []}
"""
graph: dict = {i: [] for i in range(vertices_number)}
# if probability is greater or equal than 1, then generate a complete graph
if probability >= 1:
return complete_graph(vertices_number)
# if probability is lower or equal than 0, then return a graph without edges
if probability <= 0:
return graph
# for each couple of nodes, add an edge from u to v
# if the number randomly generated is greater than probability probability
for i in range(vertices_number):
for j in range(i + 1, vertices_number):
if random.random() < probability:
graph[i].append(j)
if not directed:
# if the graph is undirected, add an edge in from j to i, either
graph[j].append(i)
return graph
def complete_graph(vertices_number: int) -> dict:
"""
Generate a complete graph with vertices_number vertices.
@input: vertices_number (number of vertices),
directed (False if the graph is undirected, True otherwise)
@example:
>>> complete_graph(3)
{0: [1, 2], 1: [0, 2], 2: [0, 1]}
"""
return {
i: [j for j in range(vertices_number) if i != j] for i in range(vertices_number)
}
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: graphs/scc_kosaraju.py
================================================
from __future__ import annotations
def dfs(u):
global graph, reversed_graph, scc, component, visit, stack
if visit[u]:
return
visit[u] = True
for v in graph[u]:
dfs(v)
stack.append(u)
def dfs2(u):
global graph, reversed_graph, scc, component, visit, stack
if visit[u]:
return
visit[u] = True
component.append(u)
for v in reversed_graph[u]:
dfs2(v)
def kosaraju():
global graph, reversed_graph, scc, component, visit, stack
for i in range(n):
dfs(i)
visit = [False] * n
for i in stack[::-1]:
if visit[i]:
continue
component = []
dfs2(i)
scc.append(component)
return scc
if __name__ == "__main__":
# n - no of nodes, m - no of edges
n, m = list(map(int, input().strip().split()))
graph: list[list[int]] = [[] for _ in range(n)] # graph
reversed_graph: list[list[int]] = [[] for i in range(n)] # reversed graph
# input graph data (edges)
for _ in range(m):
u, v = list(map(int, input().strip().split()))
graph[u].append(v)
reversed_graph[v].append(u)
stack: list[int] = []
visit: list[bool] = [False] * n
scc: list[int] = []
component: list[int] = []
print(kosaraju())
================================================
FILE: graphs/strongly_connected_components.py
================================================
"""
https://en.wikipedia.org/wiki/Strongly_connected_component
Finding strongly connected components in directed graph
"""
test_graph_1 = {0: [2, 3], 1: [0], 2: [1], 3: [4], 4: []}
test_graph_2 = {0: [1, 2, 3], 1: [2], 2: [0], 3: [4], 4: [5], 5: [3]}
def topology_sort(
graph: dict[int, list[int]], vert: int, visited: list[bool]
) -> list[int]:
"""
Use depth first search to sort graph
At this time graph is the same as input
>>> topology_sort(test_graph_1, 0, 5 * [False])
[1, 2, 4, 3, 0]
>>> topology_sort(test_graph_2, 0, 6 * [False])
[2, 1, 5, 4, 3, 0]
"""
visited[vert] = True
order = []
for neighbour in graph[vert]:
if not visited[neighbour]:
order += topology_sort(graph, neighbour, visited)
order.append(vert)
return order
def find_components(
reversed_graph: dict[int, list[int]], vert: int, visited: list[bool]
) -> list[int]:
"""
Use depth first search to find strongly connected
vertices. Now graph is reversed
>>> find_components({0: [1], 1: [2], 2: [0]}, 0, 5 * [False])
[0, 1, 2]
>>> find_components({0: [2], 1: [0], 2: [0, 1]}, 0, 6 * [False])
[0, 2, 1]
"""
visited[vert] = True
component = [vert]
for neighbour in reversed_graph[vert]:
if not visited[neighbour]:
component += find_components(reversed_graph, neighbour, visited)
return component
def strongly_connected_components(graph: dict[int, list[int]]) -> list[list[int]]:
"""
This function takes graph as a parameter
and then returns the list of strongly connected components
>>> strongly_connected_components(test_graph_1)
[[0, 1, 2], [3], [4]]
>>> strongly_connected_components(test_graph_2)
[[0, 2, 1], [3, 5, 4]]
"""
visited = len(graph) * [False]
reversed_graph: dict[int, list[int]] = {vert: [] for vert in range(len(graph))}
for vert, neighbours in graph.items():
for neighbour in neighbours:
reversed_graph[neighbour].append(vert)
order = []
for i, was_visited in enumerate(visited):
if not was_visited:
order += topology_sort(graph, i, visited)
components_list = []
visited = len(graph) * [False]
for i in range(len(graph)):
vert = order[len(graph) - i - 1]
if not visited[vert]:
component = find_components(reversed_graph, vert, visited)
components_list.append(component)
return components_list
================================================
FILE: graphs/tarjans_scc.py
================================================
from collections import deque
def tarjan(g: list[list[int]]) -> list[list[int]]:
"""
Tarjan's algo for finding strongly connected components in a directed graph
Uses two main attributes of each node to track reachability, the index of that node
within a component(index), and the lowest index reachable from that node(lowlink).
We then perform a dfs of the each component making sure to update these parameters
for each node and saving the nodes we visit on the way.
If ever we find that the lowest reachable node from a current node is equal to the
index of the current node then it must be the root of a strongly connected
component and so we save it and it's equireachable vertices as a strongly
connected component.
Complexity: strong_connect() is called at most once for each node and has a
complexity of O(|E|) as it is DFS.
Therefore this has complexity O(|V| + |E|) for a graph G = (V, E)
>>> tarjan([[2, 3, 4], [2, 3, 4], [0, 1, 3], [0, 1, 2], [1]])
[[4, 3, 1, 2, 0]]
>>> tarjan([[], [], [], []])
[[0], [1], [2], [3]]
>>> a = [0, 1, 2, 3, 4, 5, 4]
>>> b = [1, 0, 3, 2, 5, 4, 0]
>>> n = 7
>>> sorted(tarjan(create_graph(n, list(zip(a, b))))) == sorted(
... tarjan(create_graph(n, list(zip(a[::-1], b[::-1])))))
True
>>> a = [0, 1, 2, 3, 4, 5, 6]
>>> b = [0, 1, 2, 3, 4, 5, 6]
>>> sorted(tarjan(create_graph(n, list(zip(a, b)))))
[[0], [1], [2], [3], [4], [5], [6]]
"""
n = len(g)
stack: deque[int] = deque()
on_stack = [False for _ in range(n)]
index_of = [-1 for _ in range(n)]
lowlink_of = index_of[:]
def strong_connect(v: int, index: int, components: list[list[int]]) -> int:
index_of[v] = index # the number when this node is seen
lowlink_of[v] = index # lowest rank node reachable from here
index += 1
stack.append(v)
on_stack[v] = True
for w in g[v]:
if index_of[w] == -1:
index = strong_connect(w, index, components)
lowlink_of[v] = (
lowlink_of[w] if lowlink_of[w] < lowlink_of[v] else lowlink_of[v]
)
elif on_stack[w]:
lowlink_of[v] = (
lowlink_of[w] if lowlink_of[w] < lowlink_of[v] else lowlink_of[v]
)
if lowlink_of[v] == index_of[v]:
component = []
w = stack.pop()
on_stack[w] = False
component.append(w)
while w != v:
w = stack.pop()
on_stack[w] = False
component.append(w)
components.append(component)
return index
components: list[list[int]] = []
for v in range(n):
if index_of[v] == -1:
strong_connect(v, 0, components)
return components
def create_graph(n: int, edges: list[tuple[int, int]]) -> list[list[int]]:
"""
>>> n = 7
>>> source = [0, 0, 1, 2, 3, 3, 4, 4, 6]
>>> target = [1, 3, 2, 0, 1, 4, 5, 6, 5]
>>> edges = list(zip(source, target))
>>> create_graph(n, edges)
[[1, 3], [2], [0], [1, 4], [5, 6], [], [5]]
"""
g: list[list[int]] = [[] for _ in range(n)]
for u, v in edges:
g[u].append(v)
return g
if __name__ == "__main__":
# Test
n_vertices = 7
source = [0, 0, 1, 2, 3, 3, 4, 4, 6]
target = [1, 3, 2, 0, 1, 4, 5, 6, 5]
edges = list(zip(source, target))
g = create_graph(n_vertices, edges)
assert tarjan(g) == [[5], [6], [4], [3, 2, 1, 0]]
================================================
FILE: graphs/tests/__init__.py
================================================
================================================
FILE: graphs/tests/test_min_spanning_tree_kruskal.py
================================================
from graphs.minimum_spanning_tree_kruskal import kruskal
def test_kruskal_successful_result():
num_nodes = 9
edges = [
[0, 1, 4],
[0, 7, 8],
[1, 2, 8],
[7, 8, 7],
[7, 6, 1],
[2, 8, 2],
[8, 6, 6],
[2, 3, 7],
[2, 5, 4],
[6, 5, 2],
[3, 5, 14],
[3, 4, 9],
[5, 4, 10],
[1, 7, 11],
]
result = kruskal(num_nodes, edges)
expected = [
[7, 6, 1],
[2, 8, 2],
[6, 5, 2],
[0, 1, 4],
[2, 5, 4],
[2, 3, 7],
[0, 7, 8],
[3, 4, 9],
]
assert sorted(expected) == sorted(result)
================================================
FILE: graphs/tests/test_min_spanning_tree_prim.py
================================================
from collections import defaultdict
from graphs.minimum_spanning_tree_prims import prisms_algorithm as mst
def test_prim_successful_result():
num_nodes, num_edges = 9, 14 # noqa: F841
edges = [
[0, 1, 4],
[0, 7, 8],
[1, 2, 8],
[7, 8, 7],
[7, 6, 1],
[2, 8, 2],
[8, 6, 6],
[2, 3, 7],
[2, 5, 4],
[6, 5, 2],
[3, 5, 14],
[3, 4, 9],
[5, 4, 10],
[1, 7, 11],
]
adjacency = defaultdict(list)
for node1, node2, cost in edges:
adjacency[node1].append([node2, cost])
adjacency[node2].append([node1, cost])
result = mst(adjacency)
expected = [
[7, 6, 1],
[2, 8, 2],
[6, 5, 2],
[0, 1, 4],
[2, 5, 4],
[2, 3, 7],
[0, 7, 8],
[3, 4, 9],
]
for answer in expected:
edge = tuple(answer[:2])
reverse = tuple(edge[::-1])
assert edge in result or reverse in result
================================================
FILE: greedy_methods/__init__.py
================================================
================================================
FILE: greedy_methods/best_time_to_buy_and_sell_stock.py
================================================
"""
Given a list of stock prices calculate the maximum profit that can be made from a
single buy and sell of one share of stock. We only allowed to complete one buy
transaction and one sell transaction but must buy before we sell.
Example : prices = [7, 1, 5, 3, 6, 4]
max_profit will return 5 - which is by buying at price 1 and selling at price 6.
This problem can be solved using the concept of "GREEDY ALGORITHM".
We iterate over the price array once, keeping track of the lowest price point
(buy) and the maximum profit we can get at each point. The greedy choice at each point
is to either buy at the current price if it's less than our current buying price, or
sell at the current price if the profit is more than our current maximum profit.
"""
def max_profit(prices: list[int]) -> int:
"""
>>> max_profit([7, 1, 5, 3, 6, 4])
5
>>> max_profit([7, 6, 4, 3, 1])
0
"""
if not prices:
return 0
min_price = prices[0]
max_profit: int = 0
for price in prices:
min_price = min(price, min_price)
max_profit = max(price - min_price, max_profit)
return max_profit
if __name__ == "__main__":
import doctest
doctest.testmod()
print(max_profit([7, 1, 5, 3, 6, 4]))
================================================
FILE: greedy_methods/fractional_cover_problem.py
================================================
# https://en.wikipedia.org/wiki/Set_cover_problem
from dataclasses import dataclass
from operator import attrgetter
@dataclass
class Item:
weight: int
value: int
@property
def ratio(self) -> float:
"""
Return the value-to-weight ratio for the item.
Returns:
float: The value-to-weight ratio for the item.
Examples:
>>> Item(10, 65).ratio
6.5
>>> Item(20, 100).ratio
5.0
>>> Item(30, 120).ratio
4.0
"""
return self.value / self.weight
def fractional_cover(items: list[Item], capacity: int) -> float:
"""
Solve the Fractional Cover Problem.
Args:
items: A list of items, where each item has weight and value attributes.
capacity: The maximum weight capacity of the knapsack.
Returns:
The maximum value that can be obtained by selecting fractions of items to cover
the knapsack's capacity.
Raises:
ValueError: If capacity is negative.
Examples:
>>> fractional_cover((Item(10, 60), Item(20, 100), Item(30, 120)), capacity=50)
240.0
>>> fractional_cover([Item(20, 100), Item(30, 120), Item(10, 60)], capacity=25)
135.0
>>> fractional_cover([Item(10, 60), Item(20, 100), Item(30, 120)], capacity=60)
280.0
>>> fractional_cover(items=[Item(5, 30), Item(10, 60), Item(15, 90)], capacity=30)
180.0
>>> fractional_cover(items=[], capacity=50)
0.0
>>> fractional_cover(items=[Item(10, 60)], capacity=5)
30.0
>>> fractional_cover(items=[Item(10, 60)], capacity=1)
6.0
>>> fractional_cover(items=[Item(10, 60)], capacity=0)
0.0
>>> fractional_cover(items=[Item(10, 60)], capacity=-1)
Traceback (most recent call last):
...
ValueError: Capacity cannot be negative
"""
if capacity < 0:
raise ValueError("Capacity cannot be negative")
total_value = 0.0
remaining_capacity = capacity
# Sort the items by their value-to-weight ratio in descending order
for item in sorted(items, key=attrgetter("ratio"), reverse=True):
if remaining_capacity == 0:
break
weight_taken = min(item.weight, remaining_capacity)
total_value += weight_taken * item.ratio
remaining_capacity -= weight_taken
return total_value
if __name__ == "__main__":
import doctest
if result := doctest.testmod().failed:
print(f"{result} test(s) failed")
else:
print("All tests passed")
================================================
FILE: greedy_methods/fractional_knapsack.py
================================================
from bisect import bisect
from itertools import accumulate
def frac_knapsack(vl, wt, w, n):
"""
>>> frac_knapsack([60, 100, 120], [10, 20, 30], 50, 3)
240.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 10, 4)
105.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 8, 4)
95.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6], 8, 4)
60.0
>>> frac_knapsack([10, 40, 30], [5, 4, 6, 3], 8, 4)
60.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 0, 4)
0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 8, 0)
95.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], -8, 4)
0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 8, -4)
95.0
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 800, 4)
130
>>> frac_knapsack([10, 40, 30, 50], [5, 4, 6, 3], 8, 400)
95.0
>>> frac_knapsack("ABCD", [5, 4, 6, 3], 8, 400)
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for /: 'str' and 'int'
"""
r = sorted(zip(vl, wt), key=lambda x: x[0] / x[1], reverse=True)
vl, wt = [i[0] for i in r], [i[1] for i in r]
acc = list(accumulate(wt))
k = bisect(acc, w)
return (
0
if k == 0
else sum(vl[:k]) + (w - acc[k - 1]) * (vl[k]) / (wt[k])
if k != n
else sum(vl[:k])
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: greedy_methods/fractional_knapsack_2.py
================================================
# https://en.wikipedia.org/wiki/Continuous_knapsack_problem
# https://www.guru99.com/fractional-knapsack-problem-greedy.html
# https://medium.com/walkinthecode/greedy-algorithm-fractional-knapsack-problem-9aba1daecc93
from __future__ import annotations
def fractional_knapsack(
value: list[int], weight: list[int], capacity: int
) -> tuple[float, list[float]]:
"""
>>> value = [1, 3, 5, 7, 9]
>>> weight = [0.9, 0.7, 0.5, 0.3, 0.1]
>>> fractional_knapsack(value, weight, 5)
(25, [1, 1, 1, 1, 1])
>>> fractional_knapsack(value, weight, 15)
(25, [1, 1, 1, 1, 1])
>>> fractional_knapsack(value, weight, 25)
(25, [1, 1, 1, 1, 1])
>>> fractional_knapsack(value, weight, 26)
(25, [1, 1, 1, 1, 1])
>>> fractional_knapsack(value, weight, -1)
(-90.0, [0, 0, 0, 0, -10.0])
>>> fractional_knapsack([1, 3, 5, 7], weight, 30)
(16, [1, 1, 1, 1])
>>> fractional_knapsack(value, [0.9, 0.7, 0.5, 0.3, 0.1], 30)
(25, [1, 1, 1, 1, 1])
>>> fractional_knapsack([], [], 30)
(0, [])
"""
index = list(range(len(value)))
ratio = [v / w for v, w in zip(value, weight)]
index.sort(key=lambda i: ratio[i], reverse=True)
max_value: float = 0
fractions: list[float] = [0] * len(value)
for i in index:
if weight[i] <= capacity:
fractions[i] = 1
max_value += value[i]
capacity -= weight[i]
else:
fractions[i] = capacity / weight[i]
max_value += value[i] * capacity / weight[i]
break
return max_value, fractions
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: greedy_methods/gas_station.py
================================================
"""
Task:
There are n gas stations along a circular route, where the amount of gas
at the ith station is gas_quantities[i].
You have a car with an unlimited gas tank and it costs costs[i] of gas
to travel from the ith station to its next (i + 1)th station.
You begin the journey with an empty tank at one of the gas stations.
Given two integer arrays gas_quantities and costs, return the starting
gas station's index if you can travel around the circuit once
in the clockwise direction otherwise, return -1.
If there exists a solution, it is guaranteed to be unique
Reference: https://leetcode.com/problems/gas-station/description
Implementation notes:
First, check whether the total gas is enough to complete the journey. If not, return -1.
However, if there is enough gas, it is guaranteed that there is a valid
starting index to reach the end of the journey.
Greedily calculate the net gain (gas_quantity - cost) at each station.
If the net gain ever goes below 0 while iterating through the stations,
start checking from the next station.
"""
from dataclasses import dataclass
@dataclass
class GasStation:
gas_quantity: int
cost: int
def get_gas_stations(
gas_quantities: list[int], costs: list[int]
) -> tuple[GasStation, ...]:
"""
This function returns a tuple of gas stations.
Args:
gas_quantities: Amount of gas available at each station
costs: The cost of gas required to move from one station to the next
Returns:
A tuple of gas stations
>>> gas_stations = get_gas_stations([1, 2, 3, 4, 5], [3, 4, 5, 1, 2])
>>> len(gas_stations)
5
>>> gas_stations[0]
GasStation(gas_quantity=1, cost=3)
>>> gas_stations[-1]
GasStation(gas_quantity=5, cost=2)
"""
return tuple(
GasStation(quantity, cost) for quantity, cost in zip(gas_quantities, costs)
)
def can_complete_journey(gas_stations: tuple[GasStation, ...]) -> int:
"""
This function returns the index from which to start the journey
in order to reach the end.
Args:
gas_quantities [list]: Amount of gas available at each station
cost [list]: The cost of gas required to move from one station to the next
Returns:
start [int]: start index needed to complete the journey
Examples:
>>> can_complete_journey(get_gas_stations([1, 2, 3, 4, 5], [3, 4, 5, 1, 2]))
3
>>> can_complete_journey(get_gas_stations([2, 3, 4], [3, 4, 3]))
-1
"""
total_gas = sum(gas_station.gas_quantity for gas_station in gas_stations)
total_cost = sum(gas_station.cost for gas_station in gas_stations)
if total_gas < total_cost:
return -1
start = 0
net = 0
for i, gas_station in enumerate(gas_stations):
net += gas_station.gas_quantity - gas_station.cost
if net < 0:
start = i + 1
net = 0
return start
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: greedy_methods/minimum_coin_change.py
================================================
"""
Test cases:
Do you want to enter your denominations ? (Y/N) :N
Enter the change you want to make in Indian Currency: 987
Following is minimal change for 987 :
500 100 100 100 100 50 20 10 5 2
Do you want to enter your denominations ? (Y/N) :Y
Enter number of denomination:10
1
5
10
20
50
100
200
500
1000
2000
Enter the change you want to make: 18745
Following is minimal change for 18745 :
2000 2000 2000 2000 2000 2000 2000 2000 2000 500 200 20 20 5
Do you want to enter your denominations ? (Y/N) :N
Enter the change you want to make: 0
The total value cannot be zero or negative.
Do you want to enter your denominations ? (Y/N) :N
Enter the change you want to make: -98
The total value cannot be zero or negative.
Do you want to enter your denominations ? (Y/N) :Y
Enter number of denomination:5
1
5
100
500
1000
Enter the change you want to make: 456
Following is minimal change for 456 :
100 100 100 100 5 5 5 5 5 5 5 5 5 5 5 1
"""
def find_minimum_change(denominations: list[int], value: str) -> list[int]:
"""
Find the minimum change from the given denominations and value
>>> find_minimum_change([1, 5, 10, 20, 50, 100, 200, 500, 1000,2000], 18745)
[2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 500, 200, 20, 20, 5]
>>> find_minimum_change([1, 2, 5, 10, 20, 50, 100, 500, 2000], 987)
[500, 100, 100, 100, 100, 50, 20, 10, 5, 2]
>>> find_minimum_change([1, 2, 5, 10, 20, 50, 100, 500, 2000], 0)
[]
>>> find_minimum_change([1, 2, 5, 10, 20, 50, 100, 500, 2000], -98)
[]
>>> find_minimum_change([1, 5, 100, 500, 1000], 456)
[100, 100, 100, 100, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1]
"""
total_value = int(value)
# Initialize Result
answer = []
# Traverse through all denomination
for denomination in reversed(denominations):
# Find denominations
while int(total_value) >= int(denomination):
total_value -= int(denomination)
answer.append(denomination) # Append the "answers" array
return answer
# Driver Code
if __name__ == "__main__":
denominations = []
value = "0"
if (
input("Do you want to enter your denominations ? (yY/n): ").strip().lower()
== "y"
):
n = int(input("Enter the number of denominations you want to add: ").strip())
for i in range(n):
denominations.append(int(input(f"Denomination {i}: ").strip()))
value = input("Enter the change you want to make in Indian Currency: ").strip()
else:
# All denominations of Indian Currency if user does not enter
denominations = [1, 2, 5, 10, 20, 50, 100, 500, 2000]
value = input("Enter the change you want to make: ").strip()
if int(value) == 0 or int(value) < 0:
print("The total value cannot be zero or negative.")
else:
print(f"Following is minimal change for {value}: ")
answer = find_minimum_change(denominations, value)
# Print result
for i in range(len(answer)):
print(answer[i], end=" ")
================================================
FILE: greedy_methods/minimum_waiting_time.py
================================================
"""
Calculate the minimum waiting time using a greedy algorithm.
reference: https://www.youtube.com/watch?v=Sf3eiO12eJs
For doctests run following command:
python -m doctest -v minimum_waiting_time.py
The minimum_waiting_time function uses a greedy algorithm to calculate the minimum
time for queries to complete. It sorts the list in non-decreasing order, calculates
the waiting time for each query by multiplying its position in the list with the
sum of all remaining query times, and returns the total waiting time. A doctest
ensures that the function produces the correct output.
"""
def minimum_waiting_time(queries: list[int]) -> int:
"""
This function takes a list of query times and returns the minimum waiting time
for all queries to be completed.
Args:
queries: A list of queries measured in picoseconds
Returns:
total_waiting_time: Minimum waiting time measured in picoseconds
Examples:
>>> minimum_waiting_time([3, 2, 1, 2, 6])
17
>>> minimum_waiting_time([3, 2, 1])
4
>>> minimum_waiting_time([1, 2, 3, 4])
10
>>> minimum_waiting_time([5, 5, 5, 5])
30
>>> minimum_waiting_time([])
0
"""
n = len(queries)
if n in (0, 1):
return 0
return sum(query * (n - i - 1) for i, query in enumerate(sorted(queries)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: greedy_methods/optimal_merge_pattern.py
================================================
"""
This is a pure Python implementation of the greedy-merge-sort algorithm
reference: https://www.geeksforgeeks.org/optimal-file-merge-patterns/
For doctests run following command:
python3 -m doctest -v greedy_merge_sort.py
Objective
Merge a set of sorted files of different length into a single sorted file.
We need to find an optimal solution, where the resultant file
will be generated in minimum time.
Approach
If the number of sorted files are given, there are many ways
to merge them into a single sorted file.
This merge can be performed pair wise.
To merge a m-record file and a n-record file requires possibly m+n record moves
the optimal choice being,
merge the two smallest files together at each step (greedy approach).
"""
def optimal_merge_pattern(files: list) -> float:
"""Function to merge all the files with optimum cost
Args:
files [list]: A list of sizes of different files to be merged
Returns:
optimal_merge_cost [int]: Optimal cost to merge all those files
Examples:
>>> optimal_merge_pattern([2, 3, 4])
14
>>> optimal_merge_pattern([5, 10, 20, 30, 30])
205
>>> optimal_merge_pattern([8, 8, 8, 8, 8])
96
"""
optimal_merge_cost = 0
while len(files) > 1:
temp = 0
# Consider two files with minimum cost to be merged
for _ in range(2):
min_index = files.index(min(files))
temp += files[min_index]
files.pop(min_index)
files.append(temp)
optimal_merge_cost += temp
return optimal_merge_cost
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: greedy_methods/smallest_range.py
================================================
"""
smallest_range function takes a list of sorted integer lists and finds the smallest
range that includes at least one number from each list, using a min heap for efficiency.
"""
from heapq import heappop, heappush
from sys import maxsize
def smallest_range(nums: list[list[int]]) -> list[int]:
"""
Find the smallest range from each list in nums.
Uses min heap for efficiency. The range includes at least one number from each list.
Args:
`nums`: List of k sorted integer lists.
Returns:
list: Smallest range as a two-element list.
Examples:
>>> smallest_range([[4, 10, 15, 24, 26], [0, 9, 12, 20], [5, 18, 22, 30]])
[20, 24]
>>> smallest_range([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
[1, 1]
>>> smallest_range(((1, 2, 3), (1, 2, 3), (1, 2, 3)))
[1, 1]
>>> smallest_range(((-3, -2, -1), (0, 0, 0), (1, 2, 3)))
[-1, 1]
>>> smallest_range([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
[3, 7]
>>> smallest_range([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
[0, 0]
>>> smallest_range([[], [], []])
Traceback (most recent call last):
...
IndexError: list index out of range
"""
min_heap: list[tuple[int, int, int]] = []
current_max = -maxsize - 1
for i, items in enumerate(nums):
heappush(min_heap, (items[0], i, 0))
current_max = max(current_max, items[0])
# Initialize smallest_range with large integer values
smallest_range = [-maxsize - 1, maxsize]
while min_heap:
current_min, list_index, element_index = heappop(min_heap)
if current_max - current_min < smallest_range[1] - smallest_range[0]:
smallest_range = [current_min, current_max]
if element_index == len(nums[list_index]) - 1:
break
next_element = nums[list_index][element_index + 1]
heappush(min_heap, (next_element, list_index, element_index + 1))
current_max = max(current_max, next_element)
return smallest_range
if __name__ == "__main__":
from doctest import testmod
testmod()
print(f"{smallest_range([[1, 2, 3], [1, 2, 3], [1, 2, 3]])}") # Output: [1, 1]
================================================
FILE: hashes/README.md
================================================
# Hashes
Hashing is the process of mapping any amount of data to a specified size using an algorithm. This is known as a hash value (or, if you're feeling fancy, a hash code, hash sums, or even a hash digest). Hashing is a one-way function, whereas encryption is a two-way function. While it is functionally conceivable to reverse-hash stuff, the required computing power makes it impractical. Hashing is a one-way street.
Unlike encryption, which is intended to protect data in transit, hashing is intended to authenticate that a file or piece of data has not been altered—that it is authentic. In other words, it functions as a checksum.
## Common hashing algorithms
### MD5
This is one of the first algorithms that has gained widespread acceptance. MD5 is hashing algorithm made by Ray Rivest that is known to suffer vulnerabilities. It was created in 1992 as the successor to MD4. Currently MD6 is in the works, but as of 2009 Rivest had removed it from NIST consideration for SHA-3.
### SHA
SHA stands for Security Hashing Algorithm and it’s probably best known as the hashing algorithm used in most SSL/TLS cipher suites. A cipher suite is a collection of ciphers and algorithms that are used for SSL/TLS connections. SHA handles the hashing aspects. SHA-1, as we mentioned earlier, is now deprecated. SHA-2 is now mandatory. SHA-2 is sometimes known as SHA-256, though variants with longer bit lengths are also available.
### SHA256
SHA 256 is a member of the SHA 2 algorithm family, under which SHA stands for Secure Hash Algorithm. It was a collaborative effort between both the NSA and NIST to implement a successor to the SHA 1 family, which was beginning to lose potency against brute force attacks. It was published in 2001.
The importance of the 256 in the name refers to the final hash digest value, i.e. the hash value will remain 256 bits regardless of the size of the plaintext/cleartext. Other algorithms in the SHA family are similar to SHA 256 in some ways.
### Luhn
The Luhn algorithm, also renowned as the modulus 10 or mod 10 algorithm, is a straightforward checksum formula used to validate a wide range of identification numbers, including credit card numbers, IMEI numbers, and Canadian Social Insurance Numbers. A community of mathematicians developed the LUHN formula in the late 1960s. Companies offering credit cards quickly followed suit. Since the algorithm is in the public interest, anyone can use it. The algorithm is used by most credit cards and many government identification numbers as a simple method of differentiating valid figures from mistyped or otherwise incorrect numbers. It was created to guard against unintentional errors, not malicious attacks.
================================================
FILE: hashes/__init__.py
================================================
================================================
FILE: hashes/adler32.py
================================================
"""
Adler-32 is a checksum algorithm which was invented by Mark Adler in 1995.
Compared to a cyclic redundancy check of the same length, it trades reliability for
speed (preferring the latter).
Adler-32 is more reliable than Fletcher-16, and slightly less reliable than
Fletcher-32.[2]
source: https://en.wikipedia.org/wiki/Adler-32
"""
MOD_ADLER = 65521
def adler32(plain_text: str) -> int:
"""
Function implements adler-32 hash.
Iterates and evaluates a new value for each character
>>> adler32('Algorithms')
363791387
>>> adler32('go adler em all')
708642122
"""
a = 1
b = 0
for plain_chr in plain_text:
a = (a + ord(plain_chr)) % MOD_ADLER
b = (b + a) % MOD_ADLER
return (b << 16) | a
================================================
FILE: hashes/chaos_machine.py
================================================
"""example of simple chaos machine"""
# Chaos Machine (K, t, m)
K = [0.33, 0.44, 0.55, 0.44, 0.33]
t = 3
m = 5
# Buffer Space (with Parameters Space)
buffer_space: list[float] = []
params_space: list[float] = []
# Machine Time
machine_time = 0
def push(seed):
global buffer_space, params_space, machine_time, K, m, t
# Choosing Dynamical Systems (All)
for key, value in enumerate(buffer_space):
# Evolution Parameter
e = float(seed / value)
# Control Theory: Orbit Change
value = (buffer_space[(key + 1) % m] + e) % 1
# Control Theory: Trajectory Change
r = (params_space[key] + e) % 1 + 3
# Modification (Transition Function) - Jumps
buffer_space[key] = round(float(r * value * (1 - value)), 10)
params_space[key] = r # Saving to Parameters Space
# Logistic Map
assert max(buffer_space) < 1
assert max(params_space) < 4
# Machine Time
machine_time += 1
def pull():
global buffer_space, params_space, machine_time, K, m, t
# PRNG (Xorshift by George Marsaglia)
def xorshift(x, y):
x ^= y >> 13
y ^= x << 17
x ^= y >> 5
return x
# Choosing Dynamical Systems (Increment)
key = machine_time % m
# Evolution (Time Length)
for _ in range(t):
# Variables (Position + Parameters)
r = params_space[key]
value = buffer_space[key]
# Modification (Transition Function) - Flow
buffer_space[key] = round(float(r * value * (1 - value)), 10)
params_space[key] = (machine_time * 0.01 + r * 1.01) % 1 + 3
# Choosing Chaotic Data
x = int(buffer_space[(key + 2) % m] * (10**10))
y = int(buffer_space[(key - 2) % m] * (10**10))
# Machine Time
machine_time += 1
return xorshift(x, y) % 0xFFFFFFFF
def reset():
global buffer_space, params_space, machine_time, K, m, t
buffer_space = K
params_space = [0] * m
machine_time = 0
if __name__ == "__main__":
# Initialization
reset()
# Pushing Data (Input)
import random
message = random.sample(range(0xFFFFFFFF), 100)
for chunk in message:
push(chunk)
# for controlling
inp = ""
# Pulling Data (Output)
while inp in ("e", "E"):
print(f"{format(pull(), '#04x')}")
print(buffer_space)
print(params_space)
inp = input("(e)exit? ").strip()
================================================
FILE: hashes/djb2.py
================================================
"""
This algorithm (k=33) was first reported by Dan Bernstein many years ago in comp.lang.c
Another version of this algorithm (now favored by Bernstein) uses xor:
hash(i) = hash(i - 1) * 33 ^ str[i];
First Magic constant 33:
It has never been adequately explained.
It's magic because it works better than many other constants, prime or not.
Second Magic Constant 5381:
1. odd number
2. prime number
3. deficient number
4. 001/010/100/000/101 b
source: http://www.cse.yorku.ca/~oz/hash.html
"""
def djb2(s: str) -> int:
"""
Implementation of djb2 hash algorithm that
is popular because of it's magic constants.
>>> djb2('Algorithms')
3782405311
>>> djb2('scramble bits')
1609059040
"""
hash_value = 5381
for x in s:
hash_value = ((hash_value << 5) + hash_value) + ord(x)
return hash_value & 0xFFFFFFFF
================================================
FILE: hashes/elf.py
================================================
def elf_hash(data: str) -> int:
"""
Implementation of ElfHash Algorithm, a variant of PJW hash function.
>>> elf_hash('lorem ipsum')
253956621
"""
hash_ = x = 0
for letter in data:
hash_ = (hash_ << 4) + ord(letter)
x = hash_ & 0xF0000000
if x != 0:
hash_ ^= x >> 24
hash_ &= ~x
return hash_
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: hashes/enigma_machine.py
================================================
alphabets = [chr(i) for i in range(32, 126)]
gear_one = list(range(len(alphabets)))
gear_two = list(range(len(alphabets)))
gear_three = list(range(len(alphabets)))
reflector = list(reversed(range(len(alphabets))))
code = []
gear_one_pos = gear_two_pos = gear_three_pos = 0
def rotator():
global gear_one_pos
global gear_two_pos
global gear_three_pos
i = gear_one[0]
gear_one.append(i)
del gear_one[0]
gear_one_pos += 1
if gear_one_pos % len(alphabets) == 0:
i = gear_two[0]
gear_two.append(i)
del gear_two[0]
gear_two_pos += 1
if gear_two_pos % len(alphabets) == 0:
i = gear_three[0]
gear_three.append(i)
del gear_three[0]
gear_three_pos += 1
def engine(input_character):
target = alphabets.index(input_character)
target = gear_one[target]
target = gear_two[target]
target = gear_three[target]
target = reflector[target]
target = gear_three.index(target)
target = gear_two.index(target)
target = gear_one.index(target)
code.append(alphabets[target])
rotator()
if __name__ == "__main__":
decode = list(input("Type your message:\n"))
while True:
try:
token = int(input("Please set token:(must be only digits)\n"))
break
except Exception as error:
print(error)
for _ in range(token):
rotator()
for j in decode:
engine(j)
print("\n" + "".join(code))
print(
f"\nYour Token is {token} please write it down.\nIf you want to decode "
"this message again you should input same digits as token!"
)
================================================
FILE: hashes/fletcher16.py
================================================
"""
The Fletcher checksum is an algorithm for computing a position-dependent
checksum devised by John G. Fletcher (1934-2012) at Lawrence Livermore Labs
in the late 1970s.[1] The objective of the Fletcher checksum was to
provide error-detection properties approaching those of a cyclic
redundancy check but with the lower computational effort associated
with summation techniques.
Source: https://en.wikipedia.org/wiki/Fletcher%27s_checksum
"""
def fletcher16(text: str) -> int:
"""
Loop through every character in the data and add to two sums.
>>> fletcher16('hello world')
6752
>>> fletcher16('onethousandfourhundredthirtyfour')
28347
>>> fletcher16('The quick brown fox jumps over the lazy dog.')
5655
"""
data = bytes(text, "ascii")
sum1 = 0
sum2 = 0
for character in data:
sum1 = (sum1 + character) % 255
sum2 = (sum1 + sum2) % 255
return (sum2 << 8) | sum1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: hashes/hamming_code.py
================================================
# Author: João Gustavo A. Amorim & Gabriel Kunz
# Author email: joaogustavoamorim@gmail.com and gabriel-kunz@uergs.edu.br
# Coding date: apr 2019
# Black: True
"""
* This code implement the Hamming code:
https://en.wikipedia.org/wiki/Hamming_code - In telecommunication,
Hamming codes are a family of linear error-correcting codes. Hamming
codes can detect up to two-bit errors or correct one-bit errors
without detection of uncorrected errors. By contrast, the simple
parity code cannot correct errors, and can detect only an odd number
of bits in error. Hamming codes are perfect codes, that is, they
achieve the highest possible rate for codes with their block length
and minimum distance of three.
* the implemented code consists of:
* a function responsible for encoding the message (emitterConverter)
* return the encoded message
* a function responsible for decoding the message (receptorConverter)
* return the decoded message and a ack of data integrity
* how to use:
to be used you must declare how many parity bits (sizePari)
you want to include in the message.
it is desired (for test purposes) to select a bit to be set
as an error. This serves to check whether the code is working correctly.
Lastly, the variable of the message/word that must be desired to be
encoded (text).
* how this work:
declaration of variables (sizePari, be, text)
converts the message/word (text) to binary using the
text_to_bits function
encodes the message using the rules of hamming encoding
decodes the message using the rules of hamming encoding
print the original message, the encoded message and the
decoded message
forces an error in the coded text variable
decodes the message that was forced the error
print the original message, the encoded message, the bit changed
message and the decoded message
"""
# Imports
import numpy as np
# Functions of binary conversion--------------------------------------
def text_to_bits(text, encoding="utf-8", errors="surrogatepass"):
"""
>>> text_to_bits("msg")
'011011010111001101100111'
"""
bits = bin(int.from_bytes(text.encode(encoding, errors), "big"))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding="utf-8", errors="surrogatepass"):
"""
>>> text_from_bits('011011010111001101100111')
'msg'
"""
n = int(bits, 2)
return n.to_bytes((n.bit_length() + 7) // 8, "big").decode(encoding, errors) or "\0"
# Functions of hamming code-------------------------------------------
def emitter_converter(size_par, data):
"""
:param size_par: how many parity bits the message must have
:param data: information bits
:return: message to be transmitted by unreliable medium
- bits of information merged with parity bits
>>> emitter_converter(4, "101010111111")
['1', '1', '1', '1', '0', '1', '0', '0', '1', '0', '1', '1', '1', '1', '1', '1']
>>> emitter_converter(5, "101010111111")
Traceback (most recent call last):
...
ValueError: size of parity don't match with size of data
"""
if size_par + len(data) <= 2**size_par - (len(data) - 1):
raise ValueError("size of parity don't match with size of data")
data_out = []
parity = []
bin_pos = [bin(x)[2:] for x in range(1, size_par + len(data) + 1)]
# sorted information data for the size of the output data
data_ord = []
# data position template + parity
data_out_gab = []
# parity bit counter
qtd_bp = 0
# counter position of data bits
cont_data = 0
for x in range(1, size_par + len(data) + 1):
# Performs a template of bit positions - who should be given,
# and who should be parity
if qtd_bp < size_par:
if (np.log(x) / np.log(2)).is_integer():
data_out_gab.append("P")
qtd_bp = qtd_bp + 1
else:
data_out_gab.append("D")
else:
data_out_gab.append("D")
# Sorts the data to the new output size
if data_out_gab[-1] == "D":
data_ord.append(data[cont_data])
cont_data += 1
else:
data_ord.append(None)
# Calculates parity
qtd_bp = 0 # parity bit counter
for bp in range(1, size_par + 1):
# Bit counter one for a given parity
cont_bo = 0
# counter to control the loop reading
for cont_loop, x in enumerate(data_ord):
if x is not None:
try:
aux = (bin_pos[cont_loop])[-1 * (bp)]
except IndexError:
aux = "0"
if aux == "1" and x == "1":
cont_bo += 1
parity.append(cont_bo % 2)
qtd_bp += 1
# Mount the message
cont_bp = 0 # parity bit counter
for x in range(size_par + len(data)):
if data_ord[x] is None:
data_out.append(str(parity[cont_bp]))
cont_bp += 1
else:
data_out.append(data_ord[x])
return data_out
def receptor_converter(size_par, data):
"""
>>> receptor_converter(4, "1111010010111111")
(['1', '0', '1', '0', '1', '0', '1', '1', '1', '1', '1', '1'], True)
"""
# data position template + parity
data_out_gab = []
# Parity bit counter
qtd_bp = 0
# Counter p data bit reading
cont_data = 0
# list of parity received
parity_received = []
data_output = []
for i, item in enumerate(data, 1):
# Performs a template of bit positions - who should be given,
# and who should be parity
if qtd_bp < size_par and (np.log(i) / np.log(2)).is_integer():
data_out_gab.append("P")
qtd_bp = qtd_bp + 1
else:
data_out_gab.append("D")
# Sorts the data to the new output size
if data_out_gab[-1] == "D":
data_output.append(item)
else:
parity_received.append(item)
# -----------calculates the parity with the data
data_out = []
parity = []
bin_pos = [bin(x)[2:] for x in range(1, size_par + len(data_output) + 1)]
# sorted information data for the size of the output data
data_ord = []
# Data position feedback + parity
data_out_gab = []
# Parity bit counter
qtd_bp = 0
# Counter p data bit reading
cont_data = 0
for x in range(1, size_par + len(data_output) + 1):
# Performs a template position of bits - who should be given,
# and who should be parity
if qtd_bp < size_par and (np.log(x) / np.log(2)).is_integer():
data_out_gab.append("P")
qtd_bp = qtd_bp + 1
else:
data_out_gab.append("D")
# Sorts the data to the new output size
if data_out_gab[-1] == "D":
data_ord.append(data_output[cont_data])
cont_data += 1
else:
data_ord.append(None)
# Calculates parity
qtd_bp = 0 # parity bit counter
for bp in range(1, size_par + 1):
# Bit counter one for a certain parity
cont_bo = 0
for cont_loop, x in enumerate(data_ord):
if x is not None:
try:
aux = (bin_pos[cont_loop])[-1 * (bp)]
except IndexError:
aux = "0"
if aux == "1" and x == "1":
cont_bo += 1
parity.append(str(cont_bo % 2))
qtd_bp += 1
# Mount the message
cont_bp = 0 # Parity bit counter
for x in range(size_par + len(data_output)):
if data_ord[x] is None:
data_out.append(str(parity[cont_bp]))
cont_bp += 1
else:
data_out.append(data_ord[x])
ack = parity_received == parity
return data_output, ack
# ---------------------------------------------------------------------
"""
# Example how to use
# number of parity bits
sizePari = 4
# location of the bit that will be forced an error
be = 2
# Message/word to be encoded and decoded with hamming
# text = input("Enter the word to be read: ")
text = "Message01"
# Convert the message to binary
binaryText = text_to_bits(text)
# Prints the binary of the string
print("Text input in binary is '" + binaryText + "'")
# total transmitted bits
totalBits = len(binaryText) + sizePari
print("Size of data is " + str(totalBits))
print("\n --Message exchange--")
print("Data to send ------------> " + binaryText)
dataOut = emitterConverter(sizePari, binaryText)
print("Data converted ----------> " + "".join(dataOut))
dataReceiv, ack = receptorConverter(sizePari, dataOut)
print(
"Data receive ------------> "
+ "".join(dataReceiv)
+ "\t\t -- Data integrity: "
+ str(ack)
)
print("\n --Force error--")
print("Data to send ------------> " + binaryText)
dataOut = emitterConverter(sizePari, binaryText)
print("Data converted ----------> " + "".join(dataOut))
# forces error
dataOut[-be] = "1" * (dataOut[-be] == "0") + "0" * (dataOut[-be] == "1")
print("Data after transmission -> " + "".join(dataOut))
dataReceiv, ack = receptorConverter(sizePari, dataOut)
print(
"Data receive ------------> "
+ "".join(dataReceiv)
+ "\t\t -- Data integrity: "
+ str(ack)
)
"""
================================================
FILE: hashes/luhn.py
================================================
"""Luhn Algorithm"""
from __future__ import annotations
def is_luhn(string: str) -> bool:
"""
Perform Luhn validation on an input string
Algorithm:
* Double every other digit starting from 2nd last digit.
* Subtract 9 if number is greater than 9.
* Sum the numbers
*
>>> test_cases = (79927398710, 79927398711, 79927398712, 79927398713,
... 79927398714, 79927398715, 79927398716, 79927398717, 79927398718,
... 79927398719)
>>> [is_luhn(str(test_case)) for test_case in test_cases]
[False, False, False, True, False, False, False, False, False, False]
"""
check_digit: int
_vector: list[str] = list(string)
__vector, check_digit = _vector[:-1], int(_vector[-1])
vector: list[int] = [int(digit) for digit in __vector]
vector.reverse()
for i, digit in enumerate(vector):
if i & 1 == 0:
doubled: int = digit * 2
if doubled > 9:
doubled -= 9
check_digit += doubled
else:
check_digit += digit
return check_digit % 10 == 0
if __name__ == "__main__":
import doctest
doctest.testmod()
assert is_luhn("79927398713")
assert not is_luhn("79927398714")
================================================
FILE: hashes/md5.py
================================================
"""
The MD5 algorithm is a hash function that's commonly used as a checksum to
detect data corruption. The algorithm works by processing a given message in
blocks of 512 bits, padding the message as needed. It uses the blocks to operate
a 128-bit state and performs a total of 64 such operations. Note that all values
are little-endian, so inputs are converted as needed.
Although MD5 was used as a cryptographic hash function in the past, it's since
been cracked, so it shouldn't be used for security purposes.
For more info, see https://en.wikipedia.org/wiki/MD5
"""
from collections.abc import Generator
from math import sin
def to_little_endian(string_32: bytes) -> bytes:
"""
Converts the given string to little-endian in groups of 8 chars.
Arguments:
string_32 {[string]} -- [32-char string]
Raises:
ValueError -- [input is not 32 char]
Returns:
32-char little-endian string
>>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw')
b'pqrstuvwhijklmno90abcdfg12345678'
>>> to_little_endian(b'1234567890')
Traceback (most recent call last):
...
ValueError: Input must be of length 32
"""
if len(string_32) != 32:
raise ValueError("Input must be of length 32")
little_endian = b""
for i in [3, 2, 1, 0]:
little_endian += string_32[8 * i : 8 * i + 8]
return little_endian
def reformat_hex(i: int) -> bytes:
"""
Converts the given non-negative integer to hex string.
Example: Suppose the input is the following:
i = 1234
The input is 0x000004d2 in hex, so the little-endian hex string is
"d2040000".
Arguments:
i {[int]} -- [integer]
Raises:
ValueError -- [input is negative]
Returns:
8-char little-endian hex string
>>> reformat_hex(1234)
b'd2040000'
>>> reformat_hex(666)
b'9a020000'
>>> reformat_hex(0)
b'00000000'
>>> reformat_hex(1234567890)
b'd2029649'
>>> reformat_hex(1234567890987654321)
b'b11c6cb1'
>>> reformat_hex(-1)
Traceback (most recent call last):
...
ValueError: Input must be non-negative
"""
if i < 0:
raise ValueError("Input must be non-negative")
hex_rep = format(i, "08x")[-8:]
little_endian_hex = b""
for j in [3, 2, 1, 0]:
little_endian_hex += hex_rep[2 * j : 2 * j + 2].encode("utf-8")
return little_endian_hex
def preprocess(message: bytes) -> bytes:
"""
Preprocesses the message string:
- Convert message to bit string
- Pad bit string to a multiple of 512 chars:
- Append a 1
- Append 0's until length = 448 (mod 512)
- Append length of original message (64 chars)
Example: Suppose the input is the following:
message = "a"
The message bit string is "01100001", which is 8 bits long. Thus, the
bit string needs 439 bits of padding so that
(bit_string + "1" + padding) = 448 (mod 512).
The message length is "000010000...0" in 64-bit little-endian binary.
The combined bit string is then 512 bits long.
Arguments:
message {[string]} -- [message string]
Returns:
processed bit string padded to a multiple of 512 chars
>>> preprocess(b"a") == (b"01100001" + b"1" +
... (b"0" * 439) + b"00001000" + (b"0" * 56))
True
>>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64)
True
"""
bit_string = b""
for char in message:
bit_string += format(char, "08b").encode("utf-8")
start_len = format(len(bit_string), "064b").encode("utf-8")
# Pad bit_string to a multiple of 512 chars
bit_string += b"1"
while len(bit_string) % 512 != 448:
bit_string += b"0"
bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32])
return bit_string
def get_block_words(bit_string: bytes) -> Generator[list[int]]:
"""
Splits bit string into blocks of 512 chars and yields each block as a list
of 32-bit words
Example: Suppose the input is the following:
bit_string =
"000000000...0" + # 0x00 (32 bits, padded to the right)
"000000010...0" + # 0x01 (32 bits, padded to the right)
"000000100...0" + # 0x02 (32 bits, padded to the right)
"000000110...0" + # 0x03 (32 bits, padded to the right)
...
"000011110...0" # 0x0a (32 bits, padded to the right)
Then len(bit_string) == 512, so there'll be 1 block. The block is split
into 32-bit words, and each word is converted to little endian. The
first word is interpreted as 0 in decimal, the second word is
interpreted as 1 in decimal, etc.
Thus, block_words == [[0, 1, 2, 3, ..., 15]].
Arguments:
bit_string {[string]} -- [bit string with multiple of 512 as length]
Raises:
ValueError -- [length of bit string isn't multiple of 512]
Yields:
a list of 16 32-bit words
>>> test_string = ("".join(format(n << 24, "032b") for n in range(16))
... .encode("utf-8"))
>>> list(get_block_words(test_string))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
>>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4
True
>>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16]
True
>>> list(get_block_words(b""))
[]
>>> list(get_block_words(b"1111"))
Traceback (most recent call last):
...
ValueError: Input must have length that's a multiple of 512
"""
if len(bit_string) % 512 != 0:
raise ValueError("Input must have length that's a multiple of 512")
for pos in range(0, len(bit_string), 512):
block = bit_string[pos : pos + 512]
block_words = []
for i in range(0, 512, 32):
block_words.append(int(to_little_endian(block[i : i + 32]), 2))
yield block_words
def not_32(i: int) -> int:
"""
Perform bitwise NOT on given int.
Arguments:
i {[int]} -- [given int]
Raises:
ValueError -- [input is negative]
Returns:
Result of bitwise NOT on i
>>> not_32(34)
4294967261
>>> not_32(1234)
4294966061
>>> not_32(4294966061)
1234
>>> not_32(0)
4294967295
>>> not_32(1)
4294967294
>>> not_32(-1)
Traceback (most recent call last):
...
ValueError: Input must be non-negative
"""
if i < 0:
raise ValueError("Input must be non-negative")
i_str = format(i, "032b")
new_str = ""
for c in i_str:
new_str += "1" if c == "0" else "0"
return int(new_str, 2)
def sum_32(a: int, b: int) -> int:
"""
Add two numbers as 32-bit ints.
Arguments:
a {[int]} -- [first given int]
b {[int]} -- [second given int]
Returns:
(a + b) as an unsigned 32-bit int
>>> sum_32(1, 1)
2
>>> sum_32(2, 3)
5
>>> sum_32(0, 0)
0
>>> sum_32(-1, -1)
4294967294
>>> sum_32(4294967295, 1)
0
"""
return (a + b) % 2**32
def left_rotate_32(i: int, shift: int) -> int:
"""
Rotate the bits of a given int left by a given amount.
Arguments:
i {[int]} -- [given int]
shift {[int]} -- [shift amount]
Raises:
ValueError -- [either given int or shift is negative]
Returns:
`i` rotated to the left by `shift` bits
>>> left_rotate_32(1234, 1)
2468
>>> left_rotate_32(1111, 4)
17776
>>> left_rotate_32(2147483648, 1)
1
>>> left_rotate_32(2147483648, 3)
4
>>> left_rotate_32(4294967295, 4)
4294967295
>>> left_rotate_32(1234, 0)
1234
>>> left_rotate_32(0, 0)
0
>>> left_rotate_32(-1, 0)
Traceback (most recent call last):
...
ValueError: Input must be non-negative
>>> left_rotate_32(0, -1)
Traceback (most recent call last):
...
ValueError: Shift must be non-negative
"""
if i < 0:
raise ValueError("Input must be non-negative")
if shift < 0:
raise ValueError("Shift must be non-negative")
return ((i << shift) ^ (i >> (32 - shift))) % 2**32
def md5_me(message: bytes) -> bytes:
"""
Returns the 32-char MD5 hash of a given message.
Reference: https://en.wikipedia.org/wiki/MD5#Algorithm
Arguments:
message {[string]} -- [message]
Returns:
32-char MD5 hash string
>>> md5_me(b"")
b'd41d8cd98f00b204e9800998ecf8427e'
>>> md5_me(b"The quick brown fox jumps over the lazy dog")
b'9e107d9d372bb6826bd81d3542a419d6'
>>> md5_me(b"The quick brown fox jumps over the lazy dog.")
b'e4d909c290d0fb1ca068ffaddf22cbd0'
>>> import hashlib
>>> from string import ascii_letters
>>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"),
... b"The quick brown fox jumps over the lazy dog."]
>>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs)
True
"""
# Convert to bit string, add padding and append message length
bit_string = preprocess(message)
added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)]
# Starting states
a0 = 0x67452301
b0 = 0xEFCDAB89
c0 = 0x98BADCFE
d0 = 0x10325476
shift_amounts = [
7,
12,
17,
22,
7,
12,
17,
22,
7,
12,
17,
22,
7,
12,
17,
22,
5,
9,
14,
20,
5,
9,
14,
20,
5,
9,
14,
20,
5,
9,
14,
20,
4,
11,
16,
23,
4,
11,
16,
23,
4,
11,
16,
23,
4,
11,
16,
23,
6,
10,
15,
21,
6,
10,
15,
21,
6,
10,
15,
21,
6,
10,
15,
21,
]
# Process bit string in chunks, each with 16 32-char words
for block_words in get_block_words(bit_string):
a = a0
b = b0
c = c0
d = d0
# Hash current chunk
for i in range(64):
if i <= 15:
# f = (b & c) | (not_32(b) & d) # Alternate definition for f
f = d ^ (b & (c ^ d))
g = i
elif i <= 31:
# f = (d & b) | (not_32(d) & c) # Alternate definition for f
f = c ^ (d & (b ^ c))
g = (5 * i + 1) % 16
elif i <= 47:
f = b ^ c ^ d
g = (3 * i + 5) % 16
else:
f = c ^ (b | not_32(d))
g = (7 * i) % 16
f = (f + a + added_consts[i] + block_words[g]) % 2**32
a = d
d = c
c = b
b = sum_32(b, left_rotate_32(f, shift_amounts[i]))
# Add hashed chunk to running total
a0 = sum_32(a0, a)
b0 = sum_32(b0, b)
c0 = sum_32(c0, c)
d0 = sum_32(d0, d)
digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0)
return digest
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: hashes/sdbm.py
================================================
"""
This algorithm was created for sdbm (a public-domain reimplementation of ndbm)
database library.
It was found to do well in scrambling bits, causing better distribution of the keys
and fewer splits.
It also happens to be a good general hashing function with good distribution.
The actual function (pseudo code) is:
for i in i..len(str):
hash(i) = hash(i - 1) * 65599 + str[i];
What is included below is the faster version used in gawk. [there is even a faster,
duff-device version]
The magic constant 65599 was picked out of thin air while experimenting with
different constants.
It turns out to be a prime.
This is one of the algorithms used in berkeley db (see sleepycat) and elsewhere.
source: http://www.cse.yorku.ca/~oz/hash.html
"""
def sdbm(plain_text: str) -> int:
"""
Function implements sdbm hash, easy to use, great for bits scrambling.
iterates over each character in the given string and applies function to each of
them.
>>> sdbm('Algorithms')
1462174910723540325254304520539387479031000036
>>> sdbm('scramble bits')
730247649148944819640658295400555317318720608290373040936089
"""
hash_value = 0
for plain_chr in plain_text:
hash_value = (
ord(plain_chr) + (hash_value << 6) + (hash_value << 16) - hash_value
)
return hash_value
================================================
FILE: hashes/sha1.py
================================================
"""
Implementation of the SHA1 hash function and gives utilities to find hash of string or
hash of text from a file. Also contains a Test class to verify that the generated hash
matches what is returned by the hashlib library
Usage: python sha1.py --string "Hello World!!"
python sha1.py --file "hello_world.txt"
When run without any arguments, it prints the hash of the string "Hello World!!
Welcome to Cryptography"
SHA1 hash or SHA1 sum of a string is a cryptographic function, which means it is easy
to calculate forwards but extremely difficult to calculate backwards. What this means
is you can easily calculate the hash of a string, but it is extremely difficult to know
the original string if you have its hash. This property is useful for communicating
securely, send encrypted messages and is very useful in payment systems, blockchain and
cryptocurrency etc.
The algorithm as described in the reference:
First we start with a message. The message is padded and the length of the message
is added to the end. It is then split into blocks of 512 bits or 64 bytes. The blocks
are then processed one at a time. Each block must be expanded and compressed.
The value after each compression is added to a 160-bit buffer called the current hash
state. After the last block is processed, the current hash state is returned as
the final hash.
Reference: https://deadhacker.com/2006/02/21/sha-1-illustrated/
"""
import argparse
import hashlib # hashlib is only used inside the Test class
import struct
class SHA1Hash:
"""
Class to contain the entire pipeline for SHA1 hashing algorithm
>>> SHA1Hash(bytes('Allan', 'utf-8')).final_hash()
'872af2d8ac3d8695387e7c804bf0e02c18df9e6e'
"""
def __init__(self, data):
"""
Initiates the variables data and h. h is a list of 5 8-digit hexadecimal
numbers corresponding to
(1732584193, 4023233417, 2562383102, 271733878, 3285377520)
respectively. We will start with this as a message digest. 0x is how you write
hexadecimal numbers in Python
"""
self.data = data
self.h = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0]
@staticmethod
def rotate(n, b):
"""
Static method to be used inside other methods. Left rotates n by b.
>>> SHA1Hash('').rotate(12,2)
48
"""
return ((n << b) | (n >> (32 - b))) & 0xFFFFFFFF
def padding(self):
"""
Pads the input message with zeros so that padded_data has 64 bytes or 512 bits
"""
padding = b"\x80" + b"\x00" * (63 - (len(self.data) + 8) % 64)
padded_data = self.data + padding + struct.pack(">Q", 8 * len(self.data))
return padded_data
def split_blocks(self):
"""
Returns a list of bytestrings each of length 64
"""
return [
self.padded_data[i : i + 64] for i in range(0, len(self.padded_data), 64)
]
# @staticmethod
def expand_block(self, block):
"""
Takes a bytestring-block of length 64, unpacks it to a list of integers and
returns a list of 80 integers after some bit operations
"""
w = list(struct.unpack(">16L", block)) + [0] * 64
for i in range(16, 80):
w[i] = self.rotate((w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]), 1)
return w
def final_hash(self):
"""
Calls all the other methods to process the input. Pads the data, then splits
into blocks and then does a series of operations for each block (including
expansion).
For each block, the variable h that was initialized is copied to a,b,c,d,e
and these 5 variables a,b,c,d,e undergo several changes. After all the blocks
are processed, these 5 variables are pairwise added to h ie a to h[0], b to h[1]
and so on. This h becomes our final hash which is returned.
"""
self.padded_data = self.padding()
self.blocks = self.split_blocks()
for block in self.blocks:
expanded_block = self.expand_block(block)
a, b, c, d, e = self.h
for i in range(80):
if 0 <= i < 20:
f = (b & c) | ((~b) & d)
k = 0x5A827999
elif 20 <= i < 40:
f = b ^ c ^ d
k = 0x6ED9EBA1
elif 40 <= i < 60:
f = (b & c) | (b & d) | (c & d)
k = 0x8F1BBCDC
elif 60 <= i < 80:
f = b ^ c ^ d
k = 0xCA62C1D6
a, b, c, d, e = (
self.rotate(a, 5) + f + e + k + expanded_block[i] & 0xFFFFFFFF,
a,
self.rotate(b, 30),
c,
d,
)
self.h = (
self.h[0] + a & 0xFFFFFFFF,
self.h[1] + b & 0xFFFFFFFF,
self.h[2] + c & 0xFFFFFFFF,
self.h[3] + d & 0xFFFFFFFF,
self.h[4] + e & 0xFFFFFFFF,
)
return ("{:08x}" * 5).format(*self.h)
def test_sha1_hash():
msg = b"Test String"
assert SHA1Hash(msg).final_hash() == hashlib.sha1(msg).hexdigest() # noqa: S324
def main():
"""
Provides option 'string' or 'file' to take input and prints the calculated SHA1
hash. unittest.main() has been commented out because we probably don't want to run
the test each time.
"""
# unittest.main()
parser = argparse.ArgumentParser(description="Process some strings or files")
parser.add_argument(
"--string",
dest="input_string",
default="Hello World!! Welcome to Cryptography",
help="Hash the string",
)
parser.add_argument("--file", dest="input_file", help="Hash contents of a file")
args = parser.parse_args()
input_string = args.input_string
# In any case hash input should be a bytestring
if args.input_file:
with open(args.input_file, "rb") as f:
hash_input = f.read()
else:
hash_input = bytes(input_string, "utf-8")
print(SHA1Hash(hash_input).final_hash())
if __name__ == "__main__":
main()
import doctest
doctest.testmod()
================================================
FILE: hashes/sha256.py
================================================
# Author: M. Yathurshan
# Black Formatter: True
"""
Implementation of SHA256 Hash function in a Python class and provides utilities
to find hash of string or hash of text from a file.
Usage: python sha256.py --string "Hello World!!"
python sha256.py --file "hello_world.txt"
When run without any arguments,
it prints the hash of the string "Hello World!! Welcome to Cryptography"
References:
https://qvault.io/cryptography/how-sha-2-works-step-by-step-sha-256/
https://en.wikipedia.org/wiki/SHA-2
"""
import argparse
import struct
import unittest
class SHA256:
"""
Class to contain the entire pipeline for SHA1 Hashing Algorithm
>>> SHA256(b'Python').hash
'18885f27b5af9012df19e496460f9294d5ab76128824c6f993787004f6d9a7db'
>>> SHA256(b'hello world').hash
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9'
"""
def __init__(self, data: bytes) -> None:
self.data = data
# Initialize hash values
self.hashes = [
0x6A09E667,
0xBB67AE85,
0x3C6EF372,
0xA54FF53A,
0x510E527F,
0x9B05688C,
0x1F83D9AB,
0x5BE0CD19,
]
# Initialize round constants
self.round_constants = [
0x428A2F98,
0x71374491,
0xB5C0FBCF,
0xE9B5DBA5,
0x3956C25B,
0x59F111F1,
0x923F82A4,
0xAB1C5ED5,
0xD807AA98,
0x12835B01,
0x243185BE,
0x550C7DC3,
0x72BE5D74,
0x80DEB1FE,
0x9BDC06A7,
0xC19BF174,
0xE49B69C1,
0xEFBE4786,
0x0FC19DC6,
0x240CA1CC,
0x2DE92C6F,
0x4A7484AA,
0x5CB0A9DC,
0x76F988DA,
0x983E5152,
0xA831C66D,
0xB00327C8,
0xBF597FC7,
0xC6E00BF3,
0xD5A79147,
0x06CA6351,
0x14292967,
0x27B70A85,
0x2E1B2138,
0x4D2C6DFC,
0x53380D13,
0x650A7354,
0x766A0ABB,
0x81C2C92E,
0x92722C85,
0xA2BFE8A1,
0xA81A664B,
0xC24B8B70,
0xC76C51A3,
0xD192E819,
0xD6990624,
0xF40E3585,
0x106AA070,
0x19A4C116,
0x1E376C08,
0x2748774C,
0x34B0BCB5,
0x391C0CB3,
0x4ED8AA4A,
0x5B9CCA4F,
0x682E6FF3,
0x748F82EE,
0x78A5636F,
0x84C87814,
0x8CC70208,
0x90BEFFFA,
0xA4506CEB,
0xBEF9A3F7,
0xC67178F2,
]
self.preprocessed_data = self.preprocessing(self.data)
self.final_hash()
@staticmethod
def preprocessing(data: bytes) -> bytes:
padding = b"\x80" + (b"\x00" * (63 - (len(data) + 8) % 64))
big_endian_integer = struct.pack(">Q", (len(data) * 8))
return data + padding + big_endian_integer
def final_hash(self) -> None:
# Convert into blocks of 64 bytes
self.blocks = [
self.preprocessed_data[x : x + 64]
for x in range(0, len(self.preprocessed_data), 64)
]
for block in self.blocks:
# Convert the given block into a list of 4 byte integers
words = list(struct.unpack(">16L", block))
# add 48 0-ed integers
words += [0] * 48
a, b, c, d, e, f, g, h = self.hashes
for index in range(64):
if index > 15:
# modify the zero-ed indexes at the end of the array
s0 = (
self.ror(words[index - 15], 7)
^ self.ror(words[index - 15], 18)
^ (words[index - 15] >> 3)
)
s1 = (
self.ror(words[index - 2], 17)
^ self.ror(words[index - 2], 19)
^ (words[index - 2] >> 10)
)
words[index] = (
words[index - 16] + s0 + words[index - 7] + s1
) % 0x100000000
# Compression
s1 = self.ror(e, 6) ^ self.ror(e, 11) ^ self.ror(e, 25)
ch = (e & f) ^ ((~e & (0xFFFFFFFF)) & g)
temp1 = (
h + s1 + ch + self.round_constants[index] + words[index]
) % 0x100000000
s0 = self.ror(a, 2) ^ self.ror(a, 13) ^ self.ror(a, 22)
maj = (a & b) ^ (a & c) ^ (b & c)
temp2 = (s0 + maj) % 0x100000000
h, g, f, e, d, c, b, a = (
g,
f,
e,
((d + temp1) % 0x100000000),
c,
b,
a,
((temp1 + temp2) % 0x100000000),
)
mutated_hash_values = [a, b, c, d, e, f, g, h]
# Modify final values
self.hashes = [
((element + mutated_hash_values[index]) % 0x100000000)
for index, element in enumerate(self.hashes)
]
self.hash = "".join([hex(value)[2:].zfill(8) for value in self.hashes])
def ror(self, value: int, rotations: int) -> int:
"""
Right rotate a given unsigned number by a certain amount of rotations
"""
return 0xFFFFFFFF & (value << (32 - rotations)) | (value >> rotations)
class SHA256HashTest(unittest.TestCase):
"""
Test class for the SHA256 class. Inherits the TestCase class from unittest
"""
def test_match_hashes(self) -> None:
import hashlib
msg = bytes("Test String", "utf-8")
assert SHA256(msg).hash == hashlib.sha256(msg).hexdigest()
def main() -> None:
"""
Provides option 'string' or 'file' to take input
and prints the calculated SHA-256 hash
"""
# unittest.main()
import doctest
doctest.testmod()
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--string",
dest="input_string",
default="Hello World!! Welcome to Cryptography",
help="Hash the string",
)
parser.add_argument(
"-f", "--file", dest="input_file", help="Hash contents of a file"
)
args = parser.parse_args()
input_string = args.input_string
# hash input should be a bytestring
if args.input_file:
with open(args.input_file, "rb") as f:
hash_input = f.read()
else:
hash_input = bytes(input_string, "utf-8")
print(SHA256(hash_input).hash)
if __name__ == "__main__":
main()
================================================
FILE: index.md
================================================
# TheAlgorithms/Python
```{toctree}
:maxdepth: 2
:caption: index.md
CONTRIBUTING.md
README.md
LICENSE.md
```
================================================
FILE: knapsack/README.md
================================================
# A recursive implementation of 0-N Knapsack Problem
This overview is taken from:
https://en.wikipedia.org/wiki/Knapsack_problem
---
## Overview
The knapsack problem is a problem in combinatorial optimization: Given a set of items, each with a weight and a value, determine the number of each item to include in a collection so that the total weight is less than or equal to a given limit and the total value is as large as possible. It derives its name from the problem faced by someone who is constrained by a fixed-size knapsack and must fill it with the most valuable items. The problem often arises in resource allocation where the decision makers have to choose from a set of non-divisible projects or tasks under a fixed budget or time constraint, respectively.
The knapsack problem has been studied for more than a century, with early works dating as far back as 1897 The name "knapsack problem" dates back to the early works of mathematician Tobias Dantzig (1884–1956), and refers to the commonplace problem of packing the most valuable or useful items without overloading the luggage.
---
## Documentation
This module uses docstrings to enable the use of Python's in-built `help(...)` function.
For instance, try `help(Vector)`, `help(unit_basis_vector)`, and `help(CLASSNAME.METHODNAME)`.
---
## Usage
Import the module `knapsack.py` from the **.** directory into your project.
---
## Tests
`.` contains Python unit tests which can be run with `python3 -m unittest -v`.
================================================
FILE: knapsack/__init__.py
================================================
================================================
FILE: knapsack/greedy_knapsack.py
================================================
# To get an insight into Greedy Algorithm through the Knapsack problem
"""
A shopkeeper has bags of wheat that each have different weights and different profits.
eg.
profit 5 8 7 1 12 3 4
weight 2 7 1 6 4 2 5
max_weight 100
Constraints:
max_weight > 0
profit[i] >= 0
weight[i] >= 0
Calculate the maximum profit that the shopkeeper can make given maxmum weight that can
be carried.
"""
def calc_profit(profit: list, weight: list, max_weight: int) -> int:
"""
Function description is as follows-
:param profit: Take a list of profits
:param weight: Take a list of weight if bags corresponding to the profits
:param max_weight: Maximum weight that could be carried
:return: Maximum expected gain
>>> calc_profit([1, 2, 3], [3, 4, 5], 15)
6
>>> calc_profit([10, 9 , 8], [3 ,4 , 5], 25)
27
"""
if len(profit) != len(weight):
raise ValueError("The length of profit and weight must be same.")
if max_weight <= 0:
raise ValueError("max_weight must greater than zero.")
if any(p < 0 for p in profit):
raise ValueError("Profit can not be negative.")
if any(w < 0 for w in weight):
raise ValueError("Weight can not be negative.")
# List created to store profit gained for the 1kg in case of each weight
# respectively. Calculate and append profit/weight for each element.
profit_by_weight = [p / w for p, w in zip(profit, weight)]
# Creating a copy of the list and sorting profit/weight in ascending order
sorted_profit_by_weight = sorted(profit_by_weight)
# declaring useful variables
length = len(sorted_profit_by_weight)
limit = 0
gain = 0
i = 0
# loop till the total weight do not reach max limit e.g. 15 kg and till i= weight[index]:
limit += weight[index]
# Adding profit gained for the given weight 1 ===
# weight[index]/weight[index]
gain += 1 * profit[index]
else:
# Since the weight encountered is greater than limit, therefore take the
# required number of remaining kgs and calculate profit for it.
# weight remaining / weight[index]
gain += (max_weight - limit) / weight[index] * profit[index]
break
i += 1
return gain
if __name__ == "__main__":
print(
"Input profits, weights, and then max_weight (all positive ints) separated by "
"spaces."
)
profit = [int(x) for x in input("Input profits separated by spaces: ").split()]
weight = [int(x) for x in input("Input weights separated by spaces: ").split()]
max_weight = int(input("Max weight allowed: "))
# Function Call
calc_profit(profit, weight, max_weight)
================================================
FILE: knapsack/knapsack.py
================================================
"""A recursive implementation of 0-N Knapsack Problem
https://en.wikipedia.org/wiki/Knapsack_problem
"""
from __future__ import annotations
from functools import lru_cache
def knapsack(
capacity: int,
weights: list[int],
values: list[int],
counter: int,
allow_repetition=False,
) -> int:
"""
Returns the maximum value that can be put in a knapsack of a capacity cap,
whereby each weight w has a specific value val
with option to allow repetitive selection of items
>>> cap = 50
>>> val = [60, 100, 120]
>>> w = [10, 20, 30]
>>> c = len(val)
>>> knapsack(cap, w, val, c)
220
Given the repetition is NOT allowed,
the result is 220 cause the values of 100 and 120 got the weight of 50
which is the limit of the capacity.
>>> knapsack(cap, w, val, c, True)
300
Given the repetition is allowed,
the result is 300 cause the values of 60*5 (pick 5 times)
got the weight of 10*5 which is the limit of the capacity.
"""
@lru_cache
def knapsack_recur(capacity: int, counter: int) -> int:
# Base Case
if counter == 0 or capacity == 0:
return 0
# If weight of the nth item is more than Knapsack of capacity,
# then this item cannot be included in the optimal solution,
# else return the maximum of two cases:
# (1) nth item included only once (0-1), if allow_repetition is False
# nth item included one or more times (0-N), if allow_repetition is True
# (2) not included
if weights[counter - 1] > capacity:
return knapsack_recur(capacity, counter - 1)
else:
left_capacity = capacity - weights[counter - 1]
new_value_included = values[counter - 1] + knapsack_recur(
left_capacity, counter - 1 if not allow_repetition else counter
)
without_new_value = knapsack_recur(capacity, counter - 1)
return max(new_value_included, without_new_value)
return knapsack_recur(capacity, counter)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: knapsack/recursive_approach_knapsack.py
================================================
# To get an insight into naive recursive way to solve the Knapsack problem
"""
A shopkeeper has bags of wheat that each have different weights and different profits.
eg.
no_of_items 4
profit 5 4 8 6
weight 1 2 4 5
max_weight 5
Constraints:
max_weight > 0
profit[i] >= 0
weight[i] >= 0
Calculate the maximum profit that the shopkeeper can make given maxmum weight that can
be carried.
"""
def knapsack(
weights: list, values: list, number_of_items: int, max_weight: int, index: int
) -> int:
"""
Function description is as follows-
:param weights: Take a list of weights
:param values: Take a list of profits corresponding to the weights
:param number_of_items: number of items available to pick from
:param max_weight: Maximum weight that could be carried
:param index: the element we are looking at
:return: Maximum expected gain
>>> knapsack([1, 2, 4, 5], [5, 4, 8, 6], 4, 5, 0)
13
>>> knapsack([3 ,4 , 5], [10, 9 , 8], 3, 25, 0)
27
"""
if index == number_of_items:
return 0
ans1 = 0
ans2 = 0
ans1 = knapsack(weights, values, number_of_items, max_weight, index + 1)
if weights[index] <= max_weight:
ans2 = values[index] + knapsack(
weights, values, number_of_items, max_weight - weights[index], index + 1
)
return max(ans1, ans2)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: knapsack/tests/__init__.py
================================================
================================================
FILE: knapsack/tests/test_greedy_knapsack.py
================================================
import unittest
import pytest
from knapsack import greedy_knapsack as kp
class TestClass(unittest.TestCase):
"""
Test cases for knapsack
"""
def test_sorted(self):
"""
kp.calc_profit takes the required argument (profit, weight, max_weight)
and returns whether the answer matches to the expected ones
"""
profit = [10, 20, 30, 40, 50, 60]
weight = [2, 4, 6, 8, 10, 12]
max_weight = 100
assert kp.calc_profit(profit, weight, max_weight) == 210
def test_negative_max_weight(self):
"""
Returns ValueError for any negative max_weight value
:return: ValueError
"""
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = -15
pytest.raises(ValueError, match=r"max_weight must greater than zero.")
def test_negative_profit_value(self):
"""
Returns ValueError for any negative profit value in the list
:return: ValueError
"""
# profit = [10, -20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = 15
pytest.raises(ValueError, match=r"Weight can not be negative.")
def test_negative_weight_value(self):
"""
Returns ValueError for any negative weight value in the list
:return: ValueError
"""
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, -4, 6, -8, 10, 12]
# max_weight = 15
pytest.raises(ValueError, match=r"Profit can not be negative.")
def test_null_max_weight(self):
"""
Returns ValueError for any zero max_weight value
:return: ValueError
"""
# profit = [10, 20, 30, 40, 50, 60]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = null
pytest.raises(ValueError, match=r"max_weight must greater than zero.")
def test_unequal_list_length(self):
"""
Returns IndexError if length of lists (profit and weight) are unequal.
:return: IndexError
"""
# profit = [10, 20, 30, 40, 50]
# weight = [2, 4, 6, 8, 10, 12]
# max_weight = 100
pytest.raises(
IndexError, match=r"The length of profit and weight must be same."
)
if __name__ == "__main__":
unittest.main()
================================================
FILE: knapsack/tests/test_knapsack.py
================================================
"""
Created on Fri Oct 16 09:31:07 2020
@author: Dr. Tobias Schröder
@license: MIT-license
This file contains the test-suite for the knapsack problem.
"""
import unittest
from knapsack import knapsack as k
class Test(unittest.TestCase):
def test_base_case(self):
"""
test for the base case
"""
cap = 0
val = [0]
w = [0]
c = len(val)
assert k.knapsack(cap, w, val, c) == 0
val = [60]
w = [10]
c = len(val)
assert k.knapsack(cap, w, val, c) == 0
def test_easy_case(self):
"""
test for the easy case
"""
cap = 3
val = [1, 2, 3]
w = [3, 2, 1]
c = len(val)
assert k.knapsack(cap, w, val, c) == 5
def test_knapsack(self):
"""
test for the knapsack
"""
cap = 50
val = [60, 100, 120]
w = [10, 20, 30]
c = len(val)
assert k.knapsack(cap, w, val, c) == 220
def test_knapsack_repetition(self):
"""
test for the knapsack repetition
"""
cap = 50
val = [60, 100, 120]
w = [10, 20, 30]
c = len(val)
assert k.knapsack(cap, w, val, c, True) == 300
if __name__ == "__main__":
unittest.main()
================================================
FILE: linear_algebra/README.md
================================================
# Linear algebra library for Python
This module contains classes and functions for doing linear algebra.
---
## Overview
### class Vector
-
- This class represents a vector of arbitrary size and related operations.
**Overview of the methods:**
- constructor(components) : init the vector
- set(components) : changes the vector components.
- \_\_str\_\_() : toString method
- component(i): gets the i-th component (0-indexed)
- \_\_len\_\_() : gets the size / length of the vector (number of components)
- euclidean_length() : returns the eulidean length of the vector
- operator + : vector addition
- operator - : vector subtraction
- operator * : scalar multiplication and dot product
- copy() : copies this vector and returns it
- change_component(pos,value) : changes the specified component
- function zero_vector(dimension)
- returns a zero vector of 'dimension'
- function unit_basis_vector(dimension, pos)
- returns a unit basis vector with a one at index 'pos' (0-indexed)
- function axpy(scalar, vector1, vector2)
- computes the axpy operation
- function random_vector(N, a, b)
- returns a random vector of size N, with random integer components between 'a' and 'b' inclusive
### class Matrix
-
- This class represents a matrix of arbitrary size and operations on it.
**Overview of the methods:**
- \_\_str\_\_() : returns a string representation
- operator * : implements the matrix vector multiplication
implements the matrix-scalar multiplication.
- change_component(x, y, value) : changes the specified component.
- component(x, y) : returns the specified component.
- width() : returns the width of the matrix
- height() : returns the height of the matrix
- determinant() : returns the determinant of the matrix if it is square
- operator + : implements the matrix-addition.
- operator - : implements the matrix-subtraction
- function square_zero_matrix(N)
- returns a square zero-matrix of dimension NxN
- function random_matrix(W, H, a, b)
- returns a random matrix WxH with integer components between 'a' and 'b' inclusive
---
## Documentation
This module uses docstrings to enable the use of Python's in-built `help(...)` function.
For instance, try `help(Vector)`, `help(unit_basis_vector)`, and `help(CLASSNAME.METHODNAME)`.
---
## Usage
Import the module `lib.py` from the **src** directory into your project.
Alternatively, you can directly use the Python bytecode file `lib.pyc`.
---
## Tests
`src/tests.py` contains Python unit tests which can be run with `python3 -m unittest -v`.
================================================
FILE: linear_algebra/__init__.py
================================================
================================================
FILE: linear_algebra/gaussian_elimination.py
================================================
"""
| Gaussian elimination method for solving a system of linear equations.
| Gaussian elimination - https://en.wikipedia.org/wiki/Gaussian_elimination
"""
import numpy as np
from numpy import float64
from numpy.typing import NDArray
def retroactive_resolution(
coefficients: NDArray[float64], vector: NDArray[float64]
) -> NDArray[float64]:
"""
This function performs a retroactive linear system resolution
for triangular matrix
Examples:
1.
* 2x1 + 2x2 - 1x3 = 5
* 0x1 - 2x2 - 1x3 = -7
* 0x1 + 0x2 + 5x3 = 15
2.
* 2x1 + 2x2 = -1
* 0x1 - 2x2 = -1
>>> gaussian_elimination([[2, 2, -1], [0, -2, -1], [0, 0, 5]], [[5], [-7], [15]])
array([[2.],
[2.],
[3.]])
>>> gaussian_elimination([[2, 2], [0, -2]], [[-1], [-1]])
array([[-1. ],
[ 0.5]])
"""
rows, _columns = np.shape(coefficients)
x: NDArray[float64] = np.zeros((rows, 1), dtype=float)
for row in reversed(range(rows)):
total = np.dot(coefficients[row, row + 1 :], x[row + 1 :])
x[row, 0] = (vector[row][0] - total[0]) / coefficients[row, row]
return x
def gaussian_elimination(
coefficients: NDArray[float64], vector: NDArray[float64]
) -> NDArray[float64]:
"""
This function performs Gaussian elimination method
Examples:
1.
* 1x1 - 4x2 - 2x3 = -2
* 5x1 + 2x2 - 2x3 = -3
* 1x1 - 1x2 + 0x3 = 4
2.
* 1x1 + 2x2 = 5
* 5x1 + 2x2 = 5
>>> gaussian_elimination([[1, -4, -2], [5, 2, -2], [1, -1, 0]], [[-2], [-3], [4]])
array([[ 2.3 ],
[-1.7 ],
[ 5.55]])
>>> gaussian_elimination([[1, 2], [5, 2]], [[5], [5]])
array([[0. ],
[2.5]])
"""
# coefficients must to be a square matrix so we need to check first
rows, columns = np.shape(coefficients)
if rows != columns:
return np.array((), dtype=float)
# augmented matrix
augmented_mat: NDArray[float64] = np.concatenate((coefficients, vector), axis=1)
augmented_mat = augmented_mat.astype("float64")
# scale the matrix leaving it triangular
for row in range(rows - 1):
pivot = augmented_mat[row, row]
for col in range(row + 1, columns):
factor = augmented_mat[col, row] / pivot
augmented_mat[col, :] -= factor * augmented_mat[row, :]
x = retroactive_resolution(
augmented_mat[:, 0:columns], augmented_mat[:, columns : columns + 1]
)
return x
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: linear_algebra/jacobi_iteration_method.py
================================================
"""
Jacobi Iteration Method - https://en.wikipedia.org/wiki/Jacobi_method
"""
from __future__ import annotations
import numpy as np
from numpy import float64
from numpy.typing import NDArray
# Method to find solution of system of linear equations
def jacobi_iteration_method(
coefficient_matrix: NDArray[float64],
constant_matrix: NDArray[float64],
init_val: list[float],
iterations: int,
) -> list[float]:
"""
Jacobi Iteration Method:
An iterative algorithm to determine the solutions of strictly diagonally dominant
system of linear equations
4x1 + x2 + x3 = 2
x1 + 5x2 + 2x3 = -6
x1 + 2x2 + 4x3 = -4
x_init = [0.5, -0.5 , -0.5]
Examples:
>>> coefficient = np.array([[4, 1, 1], [1, 5, 2], [1, 2, 4]])
>>> constant = np.array([[2], [-6], [-4]])
>>> init_val = [0.5, -0.5, -0.5]
>>> iterations = 3
>>> jacobi_iteration_method(coefficient, constant, init_val, iterations)
[0.909375, -1.14375, -0.7484375]
>>> coefficient = np.array([[4, 1, 1], [1, 5, 2]])
>>> constant = np.array([[2], [-6], [-4]])
>>> init_val = [0.5, -0.5, -0.5]
>>> iterations = 3
>>> jacobi_iteration_method(coefficient, constant, init_val, iterations)
Traceback (most recent call last):
...
ValueError: Coefficient matrix dimensions must be nxn but received 2x3
>>> coefficient = np.array([[4, 1, 1], [1, 5, 2], [1, 2, 4]])
>>> constant = np.array([[2], [-6]])
>>> init_val = [0.5, -0.5, -0.5]
>>> iterations = 3
>>> jacobi_iteration_method(
... coefficient, constant, init_val, iterations
... ) # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
ValueError: Coefficient and constant matrices dimensions must be nxn and nx1 but
received 3x3 and 2x1
>>> coefficient = np.array([[4, 1, 1], [1, 5, 2], [1, 2, 4]])
>>> constant = np.array([[2], [-6], [-4]])
>>> init_val = [0.5, -0.5]
>>> iterations = 3
>>> jacobi_iteration_method(
... coefficient, constant, init_val, iterations
... ) # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
ValueError: Number of initial values must be equal to number of rows in coefficient
matrix but received 2 and 3
>>> coefficient = np.array([[4, 1, 1], [1, 5, 2], [1, 2, 4]])
>>> constant = np.array([[2], [-6], [-4]])
>>> init_val = [0.5, -0.5, -0.5]
>>> iterations = 0
>>> jacobi_iteration_method(coefficient, constant, init_val, iterations)
Traceback (most recent call last):
...
ValueError: Iterations must be at least 1
"""
rows1, cols1 = coefficient_matrix.shape
rows2, cols2 = constant_matrix.shape
if rows1 != cols1:
msg = f"Coefficient matrix dimensions must be nxn but received {rows1}x{cols1}"
raise ValueError(msg)
if cols2 != 1:
msg = f"Constant matrix must be nx1 but received {rows2}x{cols2}"
raise ValueError(msg)
if rows1 != rows2:
msg = (
"Coefficient and constant matrices dimensions must be nxn and nx1 but "
f"received {rows1}x{cols1} and {rows2}x{cols2}"
)
raise ValueError(msg)
if len(init_val) != rows1:
msg = (
"Number of initial values must be equal to number of rows in coefficient "
f"matrix but received {len(init_val)} and {rows1}"
)
raise ValueError(msg)
if iterations <= 0:
raise ValueError("Iterations must be at least 1")
table: NDArray[float64] = np.concatenate(
(coefficient_matrix, constant_matrix), axis=1
)
rows, _cols = table.shape
strictly_diagonally_dominant(table)
"""
# Iterates the whole matrix for given number of times
for _ in range(iterations):
new_val = []
for row in range(rows):
temp = 0
for col in range(cols):
if col == row:
denom = table[row][col]
elif col == cols - 1:
val = table[row][col]
else:
temp += (-1) * table[row][col] * init_val[col]
temp = (temp + val) / denom
new_val.append(temp)
init_val = new_val
"""
# denominator - a list of values along the diagonal
denominator = np.diag(coefficient_matrix)
# val_last - values of the last column of the table array
val_last = table[:, -1]
# masks - boolean mask of all strings without diagonal
# elements array coefficient_matrix
masks = ~np.eye(coefficient_matrix.shape[0], dtype=bool)
# no_diagonals - coefficient_matrix array values without diagonal elements
no_diagonals = coefficient_matrix[masks].reshape(-1, rows - 1)
# Here we get 'i_col' - these are the column numbers, for each row
# without diagonal elements, except for the last column.
_i_row, i_col = np.where(masks)
ind = i_col.reshape(-1, rows - 1)
#'i_col' is converted to a two-dimensional list 'ind', which will be
# used to make selections from 'init_val' ('arr' array see below).
# Iterates the whole matrix for given number of times
for _ in range(iterations):
arr = np.take(init_val, ind)
sum_product_rows = np.sum((-1) * no_diagonals * arr, axis=1)
new_val = (sum_product_rows + val_last) / denominator
init_val = new_val
return new_val.tolist()
# Checks if the given matrix is strictly diagonally dominant
def strictly_diagonally_dominant(table: NDArray[float64]) -> bool:
"""
>>> table = np.array([[4, 1, 1, 2], [1, 5, 2, -6], [1, 2, 4, -4]])
>>> strictly_diagonally_dominant(table)
True
>>> table = np.array([[4, 1, 1, 2], [1, 5, 2, -6], [1, 2, 3, -4]])
>>> strictly_diagonally_dominant(table)
Traceback (most recent call last):
...
ValueError: Coefficient matrix is not strictly diagonally dominant
"""
rows, cols = table.shape
is_diagonally_dominant = True
for i in range(rows):
total = 0
for j in range(cols - 1):
if i == j:
continue
else:
total += table[i][j]
if table[i][i] <= total:
raise ValueError("Coefficient matrix is not strictly diagonally dominant")
return is_diagonally_dominant
# Test Cases
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: linear_algebra/lu_decomposition.py
================================================
"""
Lower-upper (LU) decomposition factors a matrix as a product of a lower
triangular matrix and an upper triangular matrix. A square matrix has an LU
decomposition under the following conditions:
- If the matrix is invertible, then it has an LU decomposition if and only
if all of its leading principal minors are non-zero (see
https://en.wikipedia.org/wiki/Minor_(linear_algebra) for an explanation of
leading principal minors of a matrix).
- If the matrix is singular (i.e., not invertible) and it has a rank of k
(i.e., it has k linearly independent columns), then it has an LU
decomposition if its first k leading principal minors are non-zero.
This algorithm will simply attempt to perform LU decomposition on any square
matrix and raise an error if no such decomposition exists.
Reference: https://en.wikipedia.org/wiki/LU_decomposition
"""
from __future__ import annotations
import numpy as np
def lower_upper_decomposition(table: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
Perform LU decomposition on a given matrix and raises an error if the matrix
isn't square or if no such decomposition exists
>>> matrix = np.array([[2, -2, 1], [0, 1, 2], [5, 3, 1]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
>>> lower_mat
array([[1. , 0. , 0. ],
[0. , 1. , 0. ],
[2.5, 8. , 1. ]])
>>> upper_mat
array([[ 2. , -2. , 1. ],
[ 0. , 1. , 2. ],
[ 0. , 0. , -17.5]])
>>> matrix = np.array([[4, 3], [6, 3]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
>>> lower_mat
array([[1. , 0. ],
[1.5, 1. ]])
>>> upper_mat
array([[ 4. , 3. ],
[ 0. , -1.5]])
>>> # Matrix is not square
>>> matrix = np.array([[2, -2, 1], [0, 1, 2]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
Traceback (most recent call last):
...
ValueError: 'table' has to be of square shaped array but got a 2x3 array:
[[ 2 -2 1]
[ 0 1 2]]
>>> # Matrix is invertible, but its first leading principal minor is 0
>>> matrix = np.array([[0, 1], [1, 0]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
Traceback (most recent call last):
...
ArithmeticError: No LU decomposition exists
>>> # Matrix is singular, but its first leading principal minor is 1
>>> matrix = np.array([[1, 0], [1, 0]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
>>> lower_mat
array([[1., 0.],
[1., 1.]])
>>> upper_mat
array([[1., 0.],
[0., 0.]])
>>> # Matrix is singular, but its first leading principal minor is 0
>>> matrix = np.array([[0, 1], [0, 1]])
>>> lower_mat, upper_mat = lower_upper_decomposition(matrix)
Traceback (most recent call last):
...
ArithmeticError: No LU decomposition exists
"""
# Ensure that table is a square array
rows, columns = np.shape(table)
if rows != columns:
msg = (
"'table' has to be of square shaped array but got a "
f"{rows}x{columns} array:\n{table}"
)
raise ValueError(msg)
lower = np.zeros((rows, columns))
upper = np.zeros((rows, columns))
# in 'total', the necessary data is extracted through slices
# and the sum of the products is obtained.
for i in range(columns):
for j in range(i):
total = np.sum(lower[i, :i] * upper[:i, j])
if upper[j][j] == 0:
raise ArithmeticError("No LU decomposition exists")
lower[i][j] = (table[i][j] - total) / upper[j][j]
lower[i][i] = 1
for j in range(i, columns):
total = np.sum(lower[i, :i] * upper[:i, j])
upper[i][j] = table[i][j] - total
return lower, upper
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: linear_algebra/matrix_inversion.py
================================================
import numpy as np
def invert_matrix(matrix: list[list[float]]) -> list[list[float]]:
"""
Returns the inverse of a square matrix using NumPy.
Parameters:
matrix (list[list[float]]): A square matrix.
Returns:
list[list[float]]: Inverted matrix if invertible, else raises error.
>>> invert_matrix([[4.0, 7.0], [2.0, 6.0]])
[[0.6000000000000001, -0.7000000000000001], [-0.2, 0.4]]
>>> invert_matrix([[1.0, 2.0], [0.0, 0.0]])
Traceback (most recent call last):
...
ValueError: Matrix is not invertible
"""
np_matrix = np.array(matrix)
try:
inv_matrix = np.linalg.inv(np_matrix)
except np.linalg.LinAlgError:
raise ValueError("Matrix is not invertible")
return inv_matrix.tolist()
if __name__ == "__main__":
mat = [[4.0, 7.0], [2.0, 6.0]]
print("Original Matrix:")
print(mat)
print("Inverted Matrix:")
print(invert_matrix(mat))
================================================
FILE: linear_algebra/src/__init__.py
================================================
================================================
FILE: linear_algebra/src/conjugate_gradient.py
================================================
"""
Resources:
- https://en.wikipedia.org/wiki/Conjugate_gradient_method
- https://en.wikipedia.org/wiki/Definite_symmetric_matrix
"""
from typing import Any
import numpy as np
def _is_matrix_spd(matrix: np.ndarray) -> bool:
"""
Returns True if input matrix is symmetric positive definite.
Returns False otherwise.
For a matrix to be SPD, all eigenvalues must be positive.
>>> import numpy as np
>>> matrix = np.array([
... [4.12401784, -5.01453636, -0.63865857],
... [-5.01453636, 12.33347422, -3.40493586],
... [-0.63865857, -3.40493586, 5.78591885]])
>>> _is_matrix_spd(matrix)
True
>>> matrix = np.array([
... [0.34634879, 1.96165514, 2.18277744],
... [0.74074469, -1.19648894, -1.34223498],
... [-0.7687067 , 0.06018373, -1.16315631]])
>>> _is_matrix_spd(matrix)
False
"""
# Ensure matrix is square.
assert np.shape(matrix)[0] == np.shape(matrix)[1]
# If matrix not symmetric, exit right away.
if np.allclose(matrix, matrix.T) is False:
return False
# Get eigenvalues and eignevectors for a symmetric matrix.
eigen_values, _ = np.linalg.eigh(matrix)
# Check sign of all eigenvalues.
# np.all returns a value of type np.bool_
return bool(np.all(eigen_values > 0))
def _create_spd_matrix(dimension: int) -> Any:
"""
Returns a symmetric positive definite matrix given a dimension.
Input:
dimension gives the square matrix dimension.
Output:
spd_matrix is an diminesion x dimensions symmetric positive definite (SPD) matrix.
>>> import numpy as np
>>> dimension = 3
>>> spd_matrix = _create_spd_matrix(dimension)
>>> _is_matrix_spd(spd_matrix)
True
"""
rng = np.random.default_rng()
random_matrix = rng.normal(size=(dimension, dimension))
spd_matrix = np.dot(random_matrix, random_matrix.T)
assert _is_matrix_spd(spd_matrix)
return spd_matrix
def conjugate_gradient(
spd_matrix: np.ndarray,
load_vector: np.ndarray,
max_iterations: int = 1000,
tol: float = 1e-8,
) -> Any:
"""
Returns solution to the linear system np.dot(spd_matrix, x) = b.
Input:
spd_matrix is an NxN Symmetric Positive Definite (SPD) matrix.
load_vector is an Nx1 vector.
Output:
x is an Nx1 vector that is the solution vector.
>>> import numpy as np
>>> spd_matrix = np.array([
... [8.73256573, -5.02034289, -2.68709226],
... [-5.02034289, 3.78188322, 0.91980451],
... [-2.68709226, 0.91980451, 1.94746467]])
>>> b = np.array([
... [-5.80872761],
... [ 3.23807431],
... [ 1.95381422]])
>>> conjugate_gradient(spd_matrix, b)
array([[-0.63114139],
[-0.01561498],
[ 0.13979294]])
"""
# Ensure proper dimensionality.
assert np.shape(spd_matrix)[0] == np.shape(spd_matrix)[1]
assert np.shape(load_vector)[0] == np.shape(spd_matrix)[0]
assert _is_matrix_spd(spd_matrix)
# Initialize solution guess, residual, search direction.
x0 = np.zeros((np.shape(load_vector)[0], 1))
r0 = np.copy(load_vector)
p0 = np.copy(r0)
# Set initial errors in solution guess and residual.
error_residual = 1e9
error_x_solution = 1e9
error = 1e9
# Set iteration counter to threshold number of iterations.
iterations = 0
while error > tol:
# Save this value so we only calculate the matrix-vector product once.
w = np.dot(spd_matrix, p0)
# The main algorithm.
# Update search direction magnitude.
alpha = np.dot(r0.T, r0) / np.dot(p0.T, w)
# Update solution guess.
x = x0 + alpha * p0
# Calculate new residual.
r = r0 - alpha * w
# Calculate new Krylov subspace scale.
beta = np.dot(r.T, r) / np.dot(r0.T, r0)
# Calculate new A conjuage search direction.
p = r + beta * p0
# Calculate errors.
error_residual = np.linalg.norm(r - r0)
error_x_solution = np.linalg.norm(x - x0)
error = np.maximum(error_residual, error_x_solution)
# Update variables.
x0 = np.copy(x)
r0 = np.copy(r)
p0 = np.copy(p)
# Update number of iterations.
iterations += 1
if iterations > max_iterations:
break
return x
def test_conjugate_gradient() -> None:
"""
>>> test_conjugate_gradient() # self running tests
"""
# Create linear system with SPD matrix and known solution x_true.
dimension = 3
spd_matrix = _create_spd_matrix(dimension)
rng = np.random.default_rng()
x_true = rng.normal(size=(dimension, 1))
b = np.dot(spd_matrix, x_true)
# Numpy solution.
x_numpy = np.linalg.solve(spd_matrix, b)
# Our implementation.
x_conjugate_gradient = conjugate_gradient(spd_matrix, b)
# Ensure both solutions are close to x_true (and therefore one another).
assert np.linalg.norm(x_numpy - x_true) <= 1e-6
assert np.linalg.norm(x_conjugate_gradient - x_true) <= 1e-6
if __name__ == "__main__":
import doctest
doctest.testmod()
test_conjugate_gradient()
================================================
FILE: linear_algebra/src/gaussian_elimination_pivoting.py
================================================
import numpy as np
def solve_linear_system(matrix: np.ndarray) -> np.ndarray:
"""
Solve a linear system of equations using Gaussian elimination with partial pivoting
Args:
- `matrix`: Coefficient matrix with the last column representing the constants.
Returns:
- Solution vector.
Raises:
- ``ValueError``: If the matrix is not correct (i.e., singular).
https://courses.engr.illinois.edu/cs357/su2013/lect.htm Lecture 7
Example:
>>> A = np.array([[2, 1, -1], [-3, -1, 2], [-2, 1, 2]], dtype=float)
>>> B = np.array([8, -11, -3], dtype=float)
>>> solution = solve_linear_system(np.column_stack((A, B)))
>>> np.allclose(solution, np.array([2., 3., -1.]))
True
>>> solve_linear_system(np.array([[0, 0, 0]], dtype=float))
Traceback (most recent call last):
...
ValueError: Matrix is not square
>>> solve_linear_system(np.array([[0, 0, 0], [0, 0, 0]], dtype=float))
Traceback (most recent call last):
...
ValueError: Matrix is singular
"""
ab = np.copy(matrix)
num_of_rows = ab.shape[0]
num_of_columns = ab.shape[1] - 1
x_lst: list[float] = []
if num_of_rows != num_of_columns:
raise ValueError("Matrix is not square")
for column_num in range(num_of_rows):
# Lead element search
for i in range(column_num, num_of_columns):
if abs(ab[i][column_num]) > abs(ab[column_num][column_num]):
ab[[column_num, i]] = ab[[i, column_num]]
# Upper triangular matrix
if abs(ab[column_num, column_num]) < 1e-8:
raise ValueError("Matrix is singular")
if column_num != 0:
for i in range(column_num, num_of_rows):
ab[i, :] -= (
ab[i, column_num - 1]
/ ab[column_num - 1, column_num - 1]
* ab[column_num - 1, :]
)
# Find x vector (Back Substitution)
for column_num in range(num_of_rows - 1, -1, -1):
x = ab[column_num, -1] / ab[column_num, column_num]
x_lst.insert(0, x)
for i in range(column_num - 1, -1, -1):
ab[i, -1] -= ab[i, column_num] * x
# Return the solution vector
return np.asarray(x_lst)
if __name__ == "__main__":
from doctest import testmod
testmod()
example_matrix = np.array(
[
[5.0, -5.0, -3.0, 4.0, -11.0],
[1.0, -4.0, 6.0, -4.0, -10.0],
[-2.0, -5.0, 4.0, -5.0, -12.0],
[-3.0, -3.0, 5.0, -5.0, 8.0],
],
dtype=float,
)
print(f"Matrix:\n{example_matrix}")
print(f"{solve_linear_system(example_matrix) = }")
================================================
FILE: linear_algebra/src/lib.py
================================================
"""
Created on Mon Feb 26 14:29:11 2018
@author: Christian Bender
@license: MIT-license
This module contains some useful classes and functions for dealing
with linear algebra in python.
Overview:
- class Vector
- function zero_vector(dimension)
- function unit_basis_vector(dimension, pos)
- function axpy(scalar, vector1, vector2)
- function random_vector(N, a, b)
- class Matrix
- function square_zero_matrix(N)
- function random_matrix(W, H, a, b)
"""
from __future__ import annotations
import math
import random
from collections.abc import Collection
from typing import overload
class Vector:
"""
This class represents a vector of arbitrary size.
You need to give the vector components.
Overview of the methods:
__init__(components: Collection[float] | None): init the vector
__len__(): gets the size of the vector (number of components)
__str__(): returns a string representation
__add__(other: Vector): vector addition
__sub__(other: Vector): vector subtraction
__mul__(other: float): scalar multiplication
__mul__(other: Vector): dot product
copy(): copies this vector and returns it
component(i): gets the i-th component (0-indexed)
change_component(pos: int, value: float): changes specified component
euclidean_length(): returns the euclidean length of the vector
angle(other: Vector, deg: bool): returns the angle between two vectors
"""
def __init__(self, components: Collection[float] | None = None) -> None:
"""
input: components or nothing
simple constructor for init the vector
"""
if components is None:
components = []
self.__components = list(components)
def __len__(self) -> int:
"""
returns the size of the vector
"""
return len(self.__components)
def __str__(self) -> str:
"""
returns a string representation of the vector
"""
return "(" + ",".join(map(str, self.__components)) + ")"
def __add__(self, other: Vector) -> Vector:
"""
input: other vector
assumes: other vector has the same size
returns a new vector that represents the sum.
"""
size = len(self)
if size == len(other):
result = [self.__components[i] + other.component(i) for i in range(size)]
return Vector(result)
else:
raise Exception("must have the same size")
def __sub__(self, other: Vector) -> Vector:
"""
input: other vector
assumes: other vector has the same size
returns a new vector that represents the difference.
"""
size = len(self)
if size == len(other):
result = [self.__components[i] - other.component(i) for i in range(size)]
return Vector(result)
else: # error case
raise Exception("must have the same size")
def __eq__(self, other: object) -> bool:
"""
performs the comparison between two vectors
"""
if not isinstance(other, Vector):
return NotImplemented
if len(self) != len(other):
return False
return all(self.component(i) == other.component(i) for i in range(len(self)))
@overload
def __mul__(self, other: float) -> Vector: ...
@overload
def __mul__(self, other: Vector) -> float: ...
def __mul__(self, other: float | Vector) -> float | Vector:
"""
mul implements the scalar multiplication
and the dot-product
"""
if isinstance(other, (float, int)):
ans = [c * other for c in self.__components]
return Vector(ans)
elif isinstance(other, Vector) and len(self) == len(other):
size = len(self)
prods = [self.__components[i] * other.component(i) for i in range(size)]
return sum(prods)
else: # error case
raise Exception("invalid operand!")
def copy(self) -> Vector:
"""
copies this vector and returns it.
"""
return Vector(self.__components)
def component(self, i: int) -> float:
"""
input: index (0-indexed)
output: the i-th component of the vector.
"""
if isinstance(i, int) and -len(self.__components) <= i < len(self.__components):
return self.__components[i]
else:
raise Exception("index out of range")
def change_component(self, pos: int, value: float) -> None:
"""
input: an index (pos) and a value
changes the specified component (pos) with the
'value'
"""
# precondition
assert -len(self.__components) <= pos < len(self.__components)
self.__components[pos] = value
def euclidean_length(self) -> float:
"""
returns the euclidean length of the vector
>>> Vector([2, 3, 4]).euclidean_length()
5.385164807134504
>>> Vector([1]).euclidean_length()
1.0
>>> Vector([0, -1, -2, -3, 4, 5, 6]).euclidean_length()
9.539392014169456
>>> Vector([]).euclidean_length()
Traceback (most recent call last):
...
Exception: Vector is empty
"""
if len(self.__components) == 0:
raise Exception("Vector is empty")
squares = [c**2 for c in self.__components]
return math.sqrt(sum(squares))
def angle(self, other: Vector, deg: bool = False) -> float:
"""
find angle between two Vector (self, Vector)
>>> Vector([3, 4, -1]).angle(Vector([2, -1, 1]))
1.4906464636572374
>>> Vector([3, 4, -1]).angle(Vector([2, -1, 1]), deg = True)
85.40775111366095
>>> Vector([3, 4, -1]).angle(Vector([2, -1]))
Traceback (most recent call last):
...
Exception: invalid operand!
"""
num = self * other
den = self.euclidean_length() * other.euclidean_length()
if deg:
return math.degrees(math.acos(num / den))
else:
return math.acos(num / den)
def zero_vector(dimension: int) -> Vector:
"""
returns a zero-vector of size 'dimension'
"""
# precondition
assert isinstance(dimension, int)
return Vector([0] * dimension)
def unit_basis_vector(dimension: int, pos: int) -> Vector:
"""
returns a unit basis vector with a One
at index 'pos' (indexing at 0)
"""
# precondition
assert isinstance(dimension, int)
assert isinstance(pos, int)
ans = [0] * dimension
ans[pos] = 1
return Vector(ans)
def axpy(scalar: float, x: Vector, y: Vector) -> Vector:
"""
input: a 'scalar' and two vectors 'x' and 'y'
output: a vector
computes the axpy operation
"""
# precondition
assert isinstance(x, Vector)
assert isinstance(y, Vector)
assert isinstance(scalar, (int, float))
return x * scalar + y
def random_vector(n: int, a: int, b: int) -> Vector:
"""
input: size (N) of the vector.
random range (a,b)
output: returns a random vector of size N, with
random integer components between 'a' and 'b'.
"""
random.seed(None)
ans = [random.randint(a, b) for _ in range(n)]
return Vector(ans)
class Matrix:
"""
class: Matrix
This class represents an arbitrary matrix.
Overview of the methods:
__init__():
__str__(): returns a string representation
__add__(other: Matrix): matrix addition
__sub__(other: Matrix): matrix subtraction
__mul__(other: float): scalar multiplication
__mul__(other: Vector): vector multiplication
height() : returns height
width() : returns width
component(x: int, y: int): returns specified component
change_component(x: int, y: int, value: float): changes specified component
minor(x: int, y: int): returns minor along (x, y)
cofactor(x: int, y: int): returns cofactor along (x, y)
determinant() : returns determinant
"""
def __init__(self, matrix: list[list[float]], w: int, h: int) -> None:
"""
simple constructor for initializing the matrix with components.
"""
self.__matrix = matrix
self.__width = w
self.__height = h
def __str__(self) -> str:
"""
returns a string representation of this matrix.
"""
ans = ""
for i in range(self.__height):
ans += "|"
for j in range(self.__width):
if j < self.__width - 1:
ans += str(self.__matrix[i][j]) + ","
else:
ans += str(self.__matrix[i][j]) + "|\n"
return ans
def __add__(self, other: Matrix) -> Matrix:
"""
implements matrix addition.
"""
if self.__width == other.width() and self.__height == other.height():
matrix = []
for i in range(self.__height):
row = [
self.__matrix[i][j] + other.component(i, j)
for j in range(self.__width)
]
matrix.append(row)
return Matrix(matrix, self.__width, self.__height)
else:
raise Exception("matrix must have the same dimension!")
def __sub__(self, other: Matrix) -> Matrix:
"""
implements matrix subtraction.
"""
if self.__width == other.width() and self.__height == other.height():
matrix = []
for i in range(self.__height):
row = [
self.__matrix[i][j] - other.component(i, j)
for j in range(self.__width)
]
matrix.append(row)
return Matrix(matrix, self.__width, self.__height)
else:
raise Exception("matrices must have the same dimension!")
@overload
def __mul__(self, other: float) -> Matrix: ...
@overload
def __mul__(self, other: Vector) -> Vector: ...
def __mul__(self, other: float | Vector) -> Vector | Matrix:
"""
implements the matrix-vector multiplication.
implements the matrix-scalar multiplication
"""
if isinstance(other, Vector): # matrix-vector
if len(other) == self.__width:
ans = zero_vector(self.__height)
for i in range(self.__height):
prods = [
self.__matrix[i][j] * other.component(j)
for j in range(self.__width)
]
ans.change_component(i, sum(prods))
return ans
else:
raise Exception(
"vector must have the same size as the "
"number of columns of the matrix!"
)
elif isinstance(other, (int, float)): # matrix-scalar
matrix = [
[self.__matrix[i][j] * other for j in range(self.__width)]
for i in range(self.__height)
]
return Matrix(matrix, self.__width, self.__height)
return None
def height(self) -> int:
"""
getter for the height
"""
return self.__height
def width(self) -> int:
"""
getter for the width
"""
return self.__width
def component(self, x: int, y: int) -> float:
"""
returns the specified (x,y) component
"""
if 0 <= x < self.__height and 0 <= y < self.__width:
return self.__matrix[x][y]
else:
raise Exception("change_component: indices out of bounds")
def change_component(self, x: int, y: int, value: float) -> None:
"""
changes the x-y component of this matrix
"""
if 0 <= x < self.__height and 0 <= y < self.__width:
self.__matrix[x][y] = value
else:
raise Exception("change_component: indices out of bounds")
def minor(self, x: int, y: int) -> float:
"""
returns the minor along (x, y)
"""
if self.__height != self.__width:
raise Exception("Matrix is not square")
minor = self.__matrix[:x] + self.__matrix[x + 1 :]
for i in range(len(minor)):
minor[i] = minor[i][:y] + minor[i][y + 1 :]
return Matrix(minor, self.__width - 1, self.__height - 1).determinant()
def cofactor(self, x: int, y: int) -> float:
"""
returns the cofactor (signed minor) along (x, y)
"""
if self.__height != self.__width:
raise Exception("Matrix is not square")
if 0 <= x < self.__height and 0 <= y < self.__width:
return (-1) ** (x + y) * self.minor(x, y)
else:
raise Exception("Indices out of bounds")
def determinant(self) -> float:
"""
returns the determinant of an nxn matrix using Laplace expansion
"""
if self.__height != self.__width:
raise Exception("Matrix is not square")
if self.__height < 1:
raise Exception("Matrix has no element")
elif self.__height == 1:
return self.__matrix[0][0]
elif self.__height == 2:
return (
self.__matrix[0][0] * self.__matrix[1][1]
- self.__matrix[0][1] * self.__matrix[1][0]
)
else:
cofactor_prods = [
self.__matrix[0][y] * self.cofactor(0, y) for y in range(self.__width)
]
return sum(cofactor_prods)
def square_zero_matrix(n: int) -> Matrix:
"""
returns a square zero-matrix of dimension NxN
"""
ans: list[list[float]] = [[0] * n for _ in range(n)]
return Matrix(ans, n, n)
def random_matrix(width: int, height: int, a: int, b: int) -> Matrix:
"""
returns a random matrix WxH with integer components
between 'a' and 'b'
"""
random.seed(None)
matrix: list[list[float]] = [
[random.randint(a, b) for _ in range(width)] for _ in range(height)
]
return Matrix(matrix, width, height)
================================================
FILE: linear_algebra/src/polynom_for_points.py
================================================
def points_to_polynomial(coordinates: list[list[int]]) -> str:
"""
coordinates is a two dimensional matrix: [[x, y], [x, y], ...]
number of points you want to use
>>> points_to_polynomial([])
Traceback (most recent call last):
...
ValueError: The program cannot work out a fitting polynomial.
>>> points_to_polynomial([[]])
Traceback (most recent call last):
...
ValueError: The program cannot work out a fitting polynomial.
>>> points_to_polynomial([[1, 0], [2, 0], [3, 0]])
'f(x)=x^2*0.0+x^1*-0.0+x^0*0.0'
>>> points_to_polynomial([[1, 1], [2, 1], [3, 1]])
'f(x)=x^2*0.0+x^1*-0.0+x^0*1.0'
>>> points_to_polynomial([[1, 3], [2, 3], [3, 3]])
'f(x)=x^2*0.0+x^1*-0.0+x^0*3.0'
>>> points_to_polynomial([[1, 1], [2, 2], [3, 3]])
'f(x)=x^2*0.0+x^1*1.0+x^0*0.0'
>>> points_to_polynomial([[1, 1], [2, 4], [3, 9]])
'f(x)=x^2*1.0+x^1*-0.0+x^0*0.0'
>>> points_to_polynomial([[1, 3], [2, 6], [3, 11]])
'f(x)=x^2*1.0+x^1*-0.0+x^0*2.0'
>>> points_to_polynomial([[1, -3], [2, -6], [3, -11]])
'f(x)=x^2*-1.0+x^1*-0.0+x^0*-2.0'
>>> points_to_polynomial([[1, 5], [2, 2], [3, 9]])
'f(x)=x^2*5.0+x^1*-18.0+x^0*18.0'
>>> points_to_polynomial([[1, 1], [1, 2], [1, 3]])
'x=1'
>>> points_to_polynomial([[1, 1], [2, 2], [2, 2]])
Traceback (most recent call last):
...
ValueError: The program cannot work out a fitting polynomial.
"""
if len(coordinates) == 0 or not all(len(pair) == 2 for pair in coordinates):
raise ValueError("The program cannot work out a fitting polynomial.")
if len({tuple(pair) for pair in coordinates}) != len(coordinates):
raise ValueError("The program cannot work out a fitting polynomial.")
set_x = {x for x, _ in coordinates}
if len(set_x) == 1:
return f"x={coordinates[0][0]}"
if len(set_x) != len(coordinates):
raise ValueError("The program cannot work out a fitting polynomial.")
x = len(coordinates)
# put the x and x to the power values in a matrix
matrix: list[list[float]] = [
[
coordinates[count_of_line][0] ** (x - (count_in_line + 1))
for count_in_line in range(x)
]
for count_of_line in range(x)
]
# put the y values into a vector
vector: list[float] = [coordinates[count_of_line][1] for count_of_line in range(x)]
for count in range(x):
for number in range(x):
if count == number:
continue
fraction = matrix[number][count] / matrix[count][count]
for counting_columns, item in enumerate(matrix[count]):
# manipulating all the values in the matrix
matrix[number][counting_columns] -= item * fraction
# manipulating the values in the vector
vector[number] -= vector[count] * fraction
# make solutions
solution: list[str] = [
str(vector[count] / matrix[count][count]) for count in range(x)
]
solved = "f(x)="
for count in range(x):
remove_e: list[str] = solution[count].split("E")
if len(remove_e) > 1:
solution[count] = f"{remove_e[0]}*10^{remove_e[1]}"
solved += f"x^{x - (count + 1)}*{solution[count]}"
if count + 1 != x:
solved += "+"
return solved
if __name__ == "__main__":
print(points_to_polynomial([]))
print(points_to_polynomial([[]]))
print(points_to_polynomial([[1, 0], [2, 0], [3, 0]]))
print(points_to_polynomial([[1, 1], [2, 1], [3, 1]]))
print(points_to_polynomial([[1, 3], [2, 3], [3, 3]]))
print(points_to_polynomial([[1, 1], [2, 2], [3, 3]]))
print(points_to_polynomial([[1, 1], [2, 4], [3, 9]]))
print(points_to_polynomial([[1, 3], [2, 6], [3, 11]]))
print(points_to_polynomial([[1, -3], [2, -6], [3, -11]]))
print(points_to_polynomial([[1, 5], [2, 2], [3, 9]]))
================================================
FILE: linear_algebra/src/power_iteration.py
================================================
import numpy as np
def power_iteration(
input_matrix: np.ndarray,
vector: np.ndarray,
error_tol: float = 1e-12,
max_iterations: int = 100,
) -> tuple[float, np.ndarray]:
"""
Power Iteration.
Find the largest eigenvalue and corresponding eigenvector
of matrix input_matrix given a random vector in the same space.
Will work so long as vector has component of largest eigenvector.
input_matrix must be either real or Hermitian.
Input
input_matrix: input matrix whose largest eigenvalue we will find.
Numpy array. np.shape(input_matrix) == (N,N).
vector: random initial vector in same space as matrix.
Numpy array. np.shape(vector) == (N,) or (N,1)
Output
largest_eigenvalue: largest eigenvalue of the matrix input_matrix.
Float. Scalar.
largest_eigenvector: eigenvector corresponding to largest_eigenvalue.
Numpy array. np.shape(largest_eigenvector) == (N,) or (N,1).
>>> import numpy as np
>>> input_matrix = np.array([
... [41, 4, 20],
... [ 4, 26, 30],
... [20, 30, 50]
... ])
>>> vector = np.array([41,4,20])
>>> power_iteration(input_matrix,vector)
(79.66086378788381, array([0.44472726, 0.46209842, 0.76725662]))
"""
# Ensure matrix is square.
assert np.shape(input_matrix)[0] == np.shape(input_matrix)[1]
# Ensure proper dimensionality.
assert np.shape(input_matrix)[0] == np.shape(vector)[0]
# Ensure inputs are either both complex or both real
assert np.iscomplexobj(input_matrix) == np.iscomplexobj(vector)
is_complex = np.iscomplexobj(input_matrix)
if is_complex:
# Ensure complex input_matrix is Hermitian
assert np.array_equal(input_matrix, input_matrix.conj().T)
# Set convergence to False. Will define convergence when we exceed max_iterations
# or when we have small changes from one iteration to next.
convergence = False
lambda_previous = 0
iterations = 0
error = 1e12
while not convergence:
# Multiple matrix by the vector.
w = np.dot(input_matrix, vector)
# Normalize the resulting output vector.
vector = w / np.linalg.norm(w)
# Find rayleigh quotient
# (faster than usual b/c we know vector is normalized already)
vector_h = vector.conj().T if is_complex else vector.T
lambda_ = np.dot(vector_h, np.dot(input_matrix, vector))
# Check convergence.
error = np.abs(lambda_ - lambda_previous) / lambda_
iterations += 1
if error <= error_tol or iterations >= max_iterations:
convergence = True
lambda_previous = lambda_
if is_complex:
lambda_ = np.real(lambda_)
return float(lambda_), vector
def test_power_iteration() -> None:
"""
>>> test_power_iteration() # self running tests
"""
real_input_matrix = np.array([[41, 4, 20], [4, 26, 30], [20, 30, 50]])
real_vector = np.array([41, 4, 20])
complex_input_matrix = real_input_matrix.astype(np.complex128)
imag_matrix = np.triu(1j * complex_input_matrix, 1)
complex_input_matrix += imag_matrix
complex_input_matrix += -1 * imag_matrix.T
complex_vector = np.array([41, 4, 20]).astype(np.complex128)
for problem_type in ["real", "complex"]:
if problem_type == "real":
input_matrix = real_input_matrix
vector = real_vector
elif problem_type == "complex":
input_matrix = complex_input_matrix
vector = complex_vector
# Our implementation.
eigen_value, eigen_vector = power_iteration(input_matrix, vector)
# Numpy implementation.
# Get eigenvalues and eigenvectors using built-in numpy
# eigh (eigh used for symmetric or hermetian matrices).
eigen_values, eigen_vectors = np.linalg.eigh(input_matrix)
# Last eigenvalue is the maximum one.
eigen_value_max = eigen_values[-1]
# Last column in this matrix is eigenvector corresponding to largest eigenvalue.
eigen_vector_max = eigen_vectors[:, -1]
# Check our implementation and numpy gives close answers.
assert np.abs(eigen_value - eigen_value_max) <= 1e-6
# Take absolute values element wise of each eigenvector.
# as they are only unique to a minus sign.
assert np.linalg.norm(np.abs(eigen_vector) - np.abs(eigen_vector_max)) <= 1e-6
if __name__ == "__main__":
import doctest
doctest.testmod()
test_power_iteration()
================================================
FILE: linear_algebra/src/rank_of_matrix.py
================================================
"""
Calculate the rank of a matrix.
See: https://en.wikipedia.org/wiki/Rank_(linear_algebra)
"""
def rank_of_matrix(matrix: list[list[int | float]]) -> int:
"""
Finds the rank of a matrix.
Args:
`matrix`: The matrix as a list of lists.
Returns:
The rank of the matrix.
Example:
>>> matrix1 = [[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]]
>>> rank_of_matrix(matrix1)
2
>>> matrix2 = [[1, 0, 0],
... [0, 1, 0],
... [0, 0, 0]]
>>> rank_of_matrix(matrix2)
2
>>> matrix3 = [[1, 2, 3, 4],
... [5, 6, 7, 8],
... [9, 10, 11, 12]]
>>> rank_of_matrix(matrix3)
2
>>> rank_of_matrix([[2,3,-1,-1],
... [1,-1,-2,4],
... [3,1,3,-2],
... [6,3,0,-7]])
4
>>> rank_of_matrix([[2,1,-3,-6],
... [3,-3,1,2],
... [1,1,1,2]])
3
>>> rank_of_matrix([[2,-1,0],
... [1,3,4],
... [4,1,-3]])
3
>>> rank_of_matrix([[3,2,1],
... [-6,-4,-2]])
1
>>> rank_of_matrix([[],[]])
0
>>> rank_of_matrix([[1]])
1
>>> rank_of_matrix([[]])
0
"""
rows = len(matrix)
columns = len(matrix[0])
rank = min(rows, columns)
for row in range(rank):
# Check if diagonal element is not zero
if matrix[row][row] != 0:
# Eliminate all the elements below the diagonal
for col in range(row + 1, rows):
multiplier = matrix[col][row] / matrix[row][row]
for i in range(row, columns):
matrix[col][i] -= multiplier * matrix[row][i]
else:
# Find a non-zero diagonal element to swap rows
reduce = True
for i in range(row + 1, rows):
if matrix[i][row] != 0:
matrix[row], matrix[i] = matrix[i], matrix[row]
reduce = False
break
if reduce:
rank -= 1
for i in range(rows):
matrix[i][row] = matrix[i][rank]
# Reduce the row pointer by one to stay on the same row
row -= 1
return rank
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: linear_algebra/src/rayleigh_quotient.py
================================================
"""
https://en.wikipedia.org/wiki/Rayleigh_quotient
"""
from typing import Any
import numpy as np
def is_hermitian(matrix: np.ndarray) -> bool:
"""
Checks if a matrix is Hermitian.
>>> import numpy as np
>>> A = np.array([
... [2, 2+1j, 4],
... [2-1j, 3, 1j],
... [4, -1j, 1]])
>>> is_hermitian(A)
True
>>> A = np.array([
... [2, 2+1j, 4+1j],
... [2-1j, 3, 1j],
... [4, -1j, 1]])
>>> is_hermitian(A)
False
"""
return np.array_equal(matrix, matrix.conjugate().T)
def rayleigh_quotient(a: np.ndarray, v: np.ndarray) -> Any:
"""
Returns the Rayleigh quotient of a Hermitian matrix A and
vector v.
>>> import numpy as np
>>> A = np.array([
... [1, 2, 4],
... [2, 3, -1],
... [4, -1, 1]
... ])
>>> v = np.array([
... [1],
... [2],
... [3]
... ])
>>> rayleigh_quotient(A, v)
array([[3.]])
"""
v_star = v.conjugate().T
v_star_dot = v_star.dot(a)
assert isinstance(v_star_dot, np.ndarray)
return (v_star_dot.dot(v)) / (v_star.dot(v))
def tests() -> None:
a = np.array([[2, 2 + 1j, 4], [2 - 1j, 3, 1j], [4, -1j, 1]])
v = np.array([[1], [2], [3]])
assert is_hermitian(a), f"{a} is not hermitian."
print(rayleigh_quotient(a, v))
a = np.array([[1, 2, 4], [2, 3, -1], [4, -1, 1]])
assert is_hermitian(a), f"{a} is not hermitian."
assert rayleigh_quotient(a, v) == float(3)
if __name__ == "__main__":
import doctest
doctest.testmod()
tests()
================================================
FILE: linear_algebra/src/schur_complement.py
================================================
import unittest
import numpy as np
import pytest
def schur_complement(
mat_a: np.ndarray,
mat_b: np.ndarray,
mat_c: np.ndarray,
pseudo_inv: np.ndarray | None = None,
) -> np.ndarray:
"""
Schur complement of a symmetric matrix X given as a 2x2 block matrix
consisting of matrices `A`, `B` and `C`.
Matrix `A` must be quadratic and non-singular.
In case `A` is singular, a pseudo-inverse may be provided using
the `pseudo_inv` argument.
| Link to Wiki: https://en.wikipedia.org/wiki/Schur_complement
| See also Convex Optimization - Boyd and Vandenberghe, A.5.5
>>> import numpy as np
>>> a = np.array([[1, 2], [2, 1]])
>>> b = np.array([[0, 3], [3, 0]])
>>> c = np.array([[2, 1], [6, 3]])
>>> schur_complement(a, b, c)
array([[ 5., -5.],
[ 0., 6.]])
"""
shape_a = np.shape(mat_a)
shape_b = np.shape(mat_b)
shape_c = np.shape(mat_c)
if shape_a[0] != shape_b[0]:
msg = (
"Expected the same number of rows for A and B. "
f"Instead found A of size {shape_a} and B of size {shape_b}"
)
raise ValueError(msg)
if shape_b[1] != shape_c[1]:
msg = (
"Expected the same number of columns for B and C. "
f"Instead found B of size {shape_b} and C of size {shape_c}"
)
raise ValueError(msg)
a_inv = pseudo_inv
if a_inv is None:
try:
a_inv = np.linalg.inv(mat_a)
except np.linalg.LinAlgError:
raise ValueError(
"Input matrix A is not invertible. Cannot compute Schur complement."
)
return mat_c - mat_b.T @ a_inv @ mat_b
class TestSchurComplement(unittest.TestCase):
def test_schur_complement(self) -> None:
a = np.array([[1, 2, 1], [2, 1, 2], [3, 2, 4]])
b = np.array([[0, 3], [3, 0], [2, 3]])
c = np.array([[2, 1], [6, 3]])
s = schur_complement(a, b, c)
input_matrix = np.block([[a, b], [b.T, c]])
det_x = np.linalg.det(input_matrix)
det_a = np.linalg.det(a)
det_s = np.linalg.det(s)
assert np.is_close(det_x, det_a * det_s)
def test_improper_a_b_dimensions(self) -> None:
a = np.array([[1, 2, 1], [2, 1, 2], [3, 2, 4]])
b = np.array([[0, 3], [3, 0], [2, 3]])
c = np.array([[2, 1], [6, 3]])
with pytest.raises(ValueError):
schur_complement(a, b, c)
def test_improper_b_c_dimensions(self) -> None:
a = np.array([[1, 2, 1], [2, 1, 2], [3, 2, 4]])
b = np.array([[0, 3], [3, 0], [2, 3]])
c = np.array([[2, 1, 3], [6, 3, 5]])
with pytest.raises(ValueError):
schur_complement(a, b, c)
if __name__ == "__main__":
import doctest
doctest.testmod()
unittest.main()
================================================
FILE: linear_algebra/src/test_linear_algebra.py
================================================
"""
Created on Mon Feb 26 15:40:07 2018
@author: Christian Bender
@license: MIT-license
This file contains the test-suite for the linear algebra library.
"""
import unittest
import pytest
from .lib import (
Matrix,
Vector,
axpy,
square_zero_matrix,
unit_basis_vector,
zero_vector,
)
class Test(unittest.TestCase):
def test_component(self) -> None:
"""
test for method component()
"""
x = Vector([1, 2, 3])
assert x.component(0) == 1
assert x.component(2) == 3
_ = Vector()
def test_str(self) -> None:
"""
test for method toString()
"""
x = Vector([0, 0, 0, 0, 0, 1])
assert str(x) == "(0,0,0,0,0,1)"
def test_size(self) -> None:
"""
test for method size()
"""
x = Vector([1, 2, 3, 4])
assert len(x) == 4
def test_euclidean_length(self) -> None:
"""
test for method euclidean_length()
"""
x = Vector([1, 2])
y = Vector([1, 2, 3, 4, 5])
z = Vector([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
w = Vector([1, -1, 1, -1, 2, -3, 4, -5])
assert x.euclidean_length() == pytest.approx(2.236, abs=1e-3)
assert y.euclidean_length() == pytest.approx(7.416, abs=1e-3)
assert z.euclidean_length() == 0
assert w.euclidean_length() == pytest.approx(7.616, abs=1e-3)
def test_add(self) -> None:
"""
test for + operator
"""
x = Vector([1, 2, 3])
y = Vector([1, 1, 1])
assert (x + y).component(0) == 2
assert (x + y).component(1) == 3
assert (x + y).component(2) == 4
def test_sub(self) -> None:
"""
test for - operator
"""
x = Vector([1, 2, 3])
y = Vector([1, 1, 1])
assert (x - y).component(0) == 0
assert (x - y).component(1) == 1
assert (x - y).component(2) == 2
def test_mul(self) -> None:
"""
test for * operator
"""
x = Vector([1, 2, 3])
a = Vector([2, -1, 4]) # for test of dot product
b = Vector([1, -2, -1])
assert str(x * 3.0) == "(3.0,6.0,9.0)"
assert a * b == 0
def test_zero_vector(self) -> None:
"""
test for global function zero_vector()
"""
assert str(zero_vector(10)).count("0") == 10
def test_unit_basis_vector(self) -> None:
"""
test for global function unit_basis_vector()
"""
assert str(unit_basis_vector(3, 1)) == "(0,1,0)"
def test_axpy(self) -> None:
"""
test for global function axpy() (operation)
"""
x = Vector([1, 2, 3])
y = Vector([1, 0, 1])
assert str(axpy(2, x, y)) == "(3,4,7)"
def test_copy(self) -> None:
"""
test for method copy()
"""
x = Vector([1, 0, 0, 0, 0, 0])
y = x.copy()
assert str(x) == str(y)
def test_change_component(self) -> None:
"""
test for method change_component()
"""
x = Vector([1, 0, 0])
x.change_component(0, 0)
x.change_component(1, 1)
assert str(x) == "(0,1,0)"
def test_str_matrix(self) -> None:
"""
test for Matrix method str()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
assert str(a) == "|1,2,3|\n|2,4,5|\n|6,7,8|\n"
def test_minor(self) -> None:
"""
test for Matrix method minor()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
minors = [[-3, -14, -10], [-5, -10, -5], [-2, -1, 0]]
for x in range(a.height()):
for y in range(a.width()):
assert minors[x][y] == a.minor(x, y)
def test_cofactor(self) -> None:
"""
test for Matrix method cofactor()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
cofactors = [[-3, 14, -10], [5, -10, 5], [-2, 1, 0]]
for x in range(a.height()):
for y in range(a.width()):
assert cofactors[x][y] == a.cofactor(x, y)
def test_determinant(self) -> None:
"""
test for Matrix method determinant()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
assert a.determinant() == -5
def test__mul__matrix(self) -> None:
"""
test for Matrix * operator
"""
a = Matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 3, 3)
x = Vector([1, 2, 3])
assert str(a * x) == "(14,32,50)"
assert str(a * 2) == "|2,4,6|\n|8,10,12|\n|14,16,18|\n"
def test_change_component_matrix(self) -> None:
"""
test for Matrix method change_component()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
a.change_component(0, 2, 5)
assert str(a) == "|1,2,5|\n|2,4,5|\n|6,7,8|\n"
def test_component_matrix(self) -> None:
"""
test for Matrix method component()
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
assert a.component(2, 1) == 7, "0.01"
def test__add__matrix(self) -> None:
"""
test for Matrix + operator
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
b = Matrix([[1, 2, 7], [2, 4, 5], [6, 7, 10]], 3, 3)
assert str(a + b) == "|2,4,10|\n|4,8,10|\n|12,14,18|\n"
def test__sub__matrix(self) -> None:
"""
test for Matrix - operator
"""
a = Matrix([[1, 2, 3], [2, 4, 5], [6, 7, 8]], 3, 3)
b = Matrix([[1, 2, 7], [2, 4, 5], [6, 7, 10]], 3, 3)
assert str(a - b) == "|0,0,-4|\n|0,0,0|\n|0,0,-2|\n"
def test_square_zero_matrix(self) -> None:
"""
test for global function square_zero_matrix()
"""
assert str(square_zero_matrix(5)) == (
"|0,0,0,0,0|\n|0,0,0,0,0|\n|0,0,0,0,0|\n|0,0,0,0,0|\n|0,0,0,0,0|\n"
)
if __name__ == "__main__":
unittest.main()
================================================
FILE: linear_algebra/src/transformations_2d.py
================================================
"""
2D Transformations are regularly used in Linear Algebra.
I have added the codes for reflection, projection, scaling and rotation 2D matrices.
.. code-block:: python
scaling(5) = [[5.0, 0.0], [0.0, 5.0]]
rotation(45) = [[0.5253219888177297, -0.8509035245341184],
[0.8509035245341184, 0.5253219888177297]]
projection(45) = [[0.27596319193541496, 0.446998331800279],
[0.446998331800279, 0.7240368080645851]]
reflection(45) = [[0.05064397763545947, 0.893996663600558],
[0.893996663600558, 0.7018070490682369]]
"""
from math import cos, sin
def scaling(scaling_factor: float) -> list[list[float]]:
"""
>>> scaling(5)
[[5.0, 0.0], [0.0, 5.0]]
"""
scaling_factor = float(scaling_factor)
return [[scaling_factor * int(x == y) for x in range(2)] for y in range(2)]
def rotation(angle: float) -> list[list[float]]:
"""
>>> rotation(45) # doctest: +NORMALIZE_WHITESPACE
[[0.5253219888177297, -0.8509035245341184],
[0.8509035245341184, 0.5253219888177297]]
"""
c, s = cos(angle), sin(angle)
return [[c, -s], [s, c]]
def projection(angle: float) -> list[list[float]]:
"""
>>> projection(45) # doctest: +NORMALIZE_WHITESPACE
[[0.27596319193541496, 0.446998331800279],
[0.446998331800279, 0.7240368080645851]]
"""
c, s = cos(angle), sin(angle)
cs = c * s
return [[c * c, cs], [cs, s * s]]
def reflection(angle: float) -> list[list[float]]:
"""
>>> reflection(45) # doctest: +NORMALIZE_WHITESPACE
[[0.05064397763545947, 0.893996663600558],
[0.893996663600558, 0.7018070490682369]]
"""
c, s = cos(angle), sin(angle)
cs = c * s
return [[2 * c - 1, 2 * cs], [2 * cs, 2 * s - 1]]
print(f" {scaling(5) = }")
print(f" {rotation(45) = }")
print(f"{projection(45) = }")
print(f"{reflection(45) = }")
================================================
FILE: linear_programming/__init__.py
================================================
================================================
FILE: linear_programming/simplex.py
================================================
"""
Python implementation of the simplex algorithm for solving linear programs in
tabular form with
- `>=`, `<=`, and `=` constraints and
- each variable `x1, x2, ...>= 0`.
See https://gist.github.com/imengus/f9619a568f7da5bc74eaf20169a24d98 for how to
convert linear programs to simplex tableaus, and the steps taken in the simplex
algorithm.
Resources:
https://en.wikipedia.org/wiki/Simplex_algorithm
https://tinyurl.com/simplex4beginners
"""
from typing import Any
import numpy as np
class Tableau:
"""Operate on simplex tableaus
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4]]), 2, 2)
Traceback (most recent call last):
...
TypeError: Tableau must have type float64
>>> Tableau(np.array([[-1,-1,0,0,-1],[1,3,1,0,4],[3,1,0,1,4.]]), 2, 2)
Traceback (most recent call last):
...
ValueError: RHS must be > 0
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]), -2, 2)
Traceback (most recent call last):
...
ValueError: number of (artificial) variables must be a natural number
"""
# Max iteration number to prevent cycling
maxiter = 100
def __init__(
self, tableau: np.ndarray, n_vars: int, n_artificial_vars: int
) -> None:
if tableau.dtype != "float64":
raise TypeError("Tableau must have type float64")
# Check if RHS is negative
if not (tableau[:, -1] >= 0).all():
raise ValueError("RHS must be > 0")
if n_vars < 2 or n_artificial_vars < 0:
raise ValueError(
"number of (artificial) variables must be a natural number"
)
self.tableau = tableau
self.n_rows, n_cols = tableau.shape
# Number of decision variables x1, x2, x3...
self.n_vars, self.n_artificial_vars = n_vars, n_artificial_vars
# 2 if there are >= or == constraints (nonstandard), 1 otherwise (std)
self.n_stages = (self.n_artificial_vars > 0) + 1
# Number of slack variables added to make inequalities into equalities
self.n_slack = n_cols - self.n_vars - self.n_artificial_vars - 1
# Objectives for each stage
self.objectives = ["max"]
# In two stage simplex, first minimise then maximise
if self.n_artificial_vars:
self.objectives.append("min")
self.col_titles = self.generate_col_titles()
# Index of current pivot row and column
self.row_idx = None
self.col_idx = None
# Does objective row only contain (non)-negative values?
self.stop_iter = False
def generate_col_titles(self) -> list[str]:
"""Generate column titles for tableau of specific dimensions
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
... 2, 0).generate_col_titles()
['x1', 'x2', 's1', 's2', 'RHS']
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
... 2, 2).generate_col_titles()
['x1', 'x2', 'RHS']
"""
args = (self.n_vars, self.n_slack)
# decision | slack
string_starts = ["x", "s"]
titles = []
for i in range(2):
for j in range(args[i]):
titles.append(string_starts[i] + str(j + 1))
titles.append("RHS")
return titles
def find_pivot(self) -> tuple[Any, Any]:
"""Finds the pivot row and column.
>>> tuple(int(x) for x in Tableau(np.array([[-2,1,0,0,0], [3,1,1,0,6],
... [1,2,0,1,7.]]), 2, 0).find_pivot())
(1, 0)
"""
objective = self.objectives[-1]
# Find entries of highest magnitude in objective rows
sign = (objective == "min") - (objective == "max")
col_idx = np.argmax(sign * self.tableau[0, :-1])
# Choice is only valid if below 0 for maximise, and above for minimise
if sign * self.tableau[0, col_idx] <= 0:
self.stop_iter = True
return 0, 0
# Pivot row is chosen as having the lowest quotient when elements of
# the pivot column divide the right-hand side
# Slice excluding the objective rows
s = slice(self.n_stages, self.n_rows)
# RHS
dividend = self.tableau[s, -1]
# Elements of pivot column within slice
divisor = self.tableau[s, col_idx]
# Array filled with nans
nans = np.full(self.n_rows - self.n_stages, np.nan)
# If element in pivot column is greater than zero, return
# quotient or nan otherwise
quotients = np.divide(dividend, divisor, out=nans, where=divisor > 0)
# Arg of minimum quotient excluding the nan values. n_stages is added
# to compensate for earlier exclusion of objective columns
row_idx = np.nanargmin(quotients) + self.n_stages
return row_idx, col_idx
def pivot(self, row_idx: int, col_idx: int) -> np.ndarray:
"""Pivots on value on the intersection of pivot row and column.
>>> Tableau(np.array([[-2,-3,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]),
... 2, 2).pivot(1, 0).tolist()
... # doctest: +NORMALIZE_WHITESPACE
[[0.0, 3.0, 2.0, 0.0, 8.0],
[1.0, 3.0, 1.0, 0.0, 4.0],
[0.0, -8.0, -3.0, 1.0, -8.0]]
"""
# Avoid changes to original tableau
piv_row = self.tableau[row_idx].copy()
piv_val = piv_row[col_idx]
# Entry becomes 1
piv_row *= 1 / piv_val
# Variable in pivot column becomes basic, ie the only non-zero entry
for idx, coeff in enumerate(self.tableau[:, col_idx]):
self.tableau[idx] += -coeff * piv_row
self.tableau[row_idx] = piv_row
return self.tableau
def change_stage(self) -> np.ndarray:
"""Exits first phase of the two-stage method by deleting artificial
rows and columns, or completes the algorithm if exiting the standard
case.
>>> Tableau(np.array([
... [3, 3, -1, -1, 0, 0, 4],
... [2, 1, 0, 0, 0, 0, 0.],
... [1, 2, -1, 0, 1, 0, 2],
... [2, 1, 0, -1, 0, 1, 2]
... ]), 2, 2).change_stage().tolist()
... # doctest: +NORMALIZE_WHITESPACE
[[2.0, 1.0, 0.0, 0.0, 0.0],
[1.0, 2.0, -1.0, 0.0, 2.0],
[2.0, 1.0, 0.0, -1.0, 2.0]]
"""
# Objective of original objective row remains
self.objectives.pop()
if not self.objectives:
return self.tableau
# Slice containing ids for artificial columns
s = slice(-self.n_artificial_vars - 1, -1)
# Delete the artificial variable columns
self.tableau = np.delete(self.tableau, s, axis=1)
# Delete the objective row of the first stage
self.tableau = np.delete(self.tableau, 0, axis=0)
self.n_stages = 1
self.n_rows -= 1
self.n_artificial_vars = 0
self.stop_iter = False
return self.tableau
def run_simplex(self) -> dict[Any, Any]:
"""Operate on tableau until objective function cannot be
improved further.
# Standard linear program:
Max: x1 + x2
ST: x1 + 3x2 <= 4
3x1 + x2 <= 4
>>> {key: float(value) for key, value in Tableau(np.array([[-1,-1,0,0,0],
... [1,3,1,0,4],[3,1,0,1,4.]]), 2, 0).run_simplex().items()}
{'P': 2.0, 'x1': 1.0, 'x2': 1.0}
# Standard linear program with 3 variables:
Max: 3x1 + x2 + 3x3
ST: 2x1 + x2 + x3 ≤ 2
x1 + 2x2 + 3x3 ≤ 5
2x1 + 2x2 + x3 ≤ 6
>>> {key: float(value) for key, value in Tableau(np.array([
... [-3,-1,-3,0,0,0,0],
... [2,1,1,1,0,0,2],
... [1,2,3,0,1,0,5],
... [2,2,1,0,0,1,6.]
... ]),3,0).run_simplex().items()} # doctest: +ELLIPSIS
{'P': 5.4, 'x1': 0.199..., 'x3': 1.6}
# Optimal tableau input:
>>> {key: float(value) for key, value in Tableau(np.array([
... [0, 0, 0.25, 0.25, 2],
... [0, 1, 0.375, -0.125, 1],
... [1, 0, -0.125, 0.375, 1]
... ]), 2, 0).run_simplex().items()}
{'P': 2.0, 'x1': 1.0, 'x2': 1.0}
# Non-standard: >= constraints
Max: 2x1 + 3x2 + x3
ST: x1 + x2 + x3 <= 40
2x1 + x2 - x3 >= 10
- x2 + x3 >= 10
>>> {key: float(value) for key, value in Tableau(np.array([
... [2, 0, 0, 0, -1, -1, 0, 0, 20],
... [-2, -3, -1, 0, 0, 0, 0, 0, 0],
... [1, 1, 1, 1, 0, 0, 0, 0, 40],
... [2, 1, -1, 0, -1, 0, 1, 0, 10],
... [0, -1, 1, 0, 0, -1, 0, 1, 10.]
... ]), 3, 2).run_simplex().items()}
{'P': 70.0, 'x1': 10.0, 'x2': 10.0, 'x3': 20.0}
# Non standard: minimisation and equalities
Min: x1 + x2
ST: 2x1 + x2 = 12
6x1 + 5x2 = 40
>>> {key: float(value) for key, value in Tableau(np.array([
... [8, 6, 0, 0, 52],
... [1, 1, 0, 0, 0],
... [2, 1, 1, 0, 12],
... [6, 5, 0, 1, 40.],
... ]), 2, 2).run_simplex().items()}
{'P': 7.0, 'x1': 5.0, 'x2': 2.0}
# Pivot on slack variables
Max: 8x1 + 6x2
ST: x1 + 3x2 <= 33
4x1 + 2x2 <= 48
2x1 + 4x2 <= 48
x1 + x2 >= 10
x1 >= 2
>>> {key: float(value) for key, value in Tableau(np.array([
... [2, 1, 0, 0, 0, -1, -1, 0, 0, 12.0],
... [-8, -6, 0, 0, 0, 0, 0, 0, 0, 0.0],
... [1, 3, 1, 0, 0, 0, 0, 0, 0, 33.0],
... [4, 2, 0, 1, 0, 0, 0, 0, 0, 60.0],
... [2, 4, 0, 0, 1, 0, 0, 0, 0, 48.0],
... [1, 1, 0, 0, 0, -1, 0, 1, 0, 10.0],
... [1, 0, 0, 0, 0, 0, -1, 0, 1, 2.0]
... ]), 2, 2).run_simplex().items()} # doctest: +ELLIPSIS
{'P': 132.0, 'x1': 12.000... 'x2': 5.999...}
"""
# Stop simplex algorithm from cycling.
for _ in range(Tableau.maxiter):
# Completion of each stage removes an objective. If both stages
# are complete, then no objectives are left
if not self.objectives:
# Find the values of each variable at optimal solution
return self.interpret_tableau()
row_idx, col_idx = self.find_pivot()
# If there are no more negative values in objective row
if self.stop_iter:
# Delete artificial variable columns and rows. Update attributes
self.tableau = self.change_stage()
else:
self.tableau = self.pivot(row_idx, col_idx)
return {}
def interpret_tableau(self) -> dict[str, float]:
"""Given the final tableau, add the corresponding values of the basic
decision variables to the `output_dict`
>>> {key: float(value) for key, value in Tableau(np.array([
... [0,0,0.875,0.375,5],
... [0,1,0.375,-0.125,1],
... [1,0,-0.125,0.375,1]
... ]),2, 0).interpret_tableau().items()}
{'P': 5.0, 'x1': 1.0, 'x2': 1.0}
"""
# P = RHS of final tableau
output_dict = {"P": abs(self.tableau[0, -1])}
for i in range(self.n_vars):
# Gives indices of nonzero entries in the ith column
nonzero = np.nonzero(self.tableau[:, i])
n_nonzero = len(nonzero[0])
# First entry in the nonzero indices
nonzero_rowidx = nonzero[0][0]
nonzero_val = self.tableau[nonzero_rowidx, i]
# If there is only one nonzero value in column, which is one
if n_nonzero == 1 and nonzero_val == 1:
rhs_val = self.tableau[nonzero_rowidx, -1]
output_dict[self.col_titles[i]] = rhs_val
return output_dict
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/__init__.py
================================================
================================================
FILE: machine_learning/apriori_algorithm.py
================================================
"""
Apriori Algorithm is a Association rule mining technique, also known as market basket
analysis, aims to discover interesting relationships or associations among a set of
items in a transactional or relational database.
For example, Apriori Algorithm states: "If a customer buys item A and item B, then they
are likely to buy item C." This rule suggests a relationship between items A, B, and C,
indicating that customers who purchased A and B are more likely to also purchase item C.
WIKI: https://en.wikipedia.org/wiki/Apriori_algorithm
Examples: https://www.kaggle.com/code/earthian/apriori-association-rules-mining
"""
from collections import Counter
from itertools import combinations
def load_data() -> list[list[str]]:
"""
Returns a sample transaction dataset.
>>> load_data()
[['milk'], ['milk', 'butter'], ['milk', 'bread'], ['milk', 'bread', 'chips']]
"""
return [["milk"], ["milk", "butter"], ["milk", "bread"], ["milk", "bread", "chips"]]
def prune(itemset: list, candidates: list, length: int) -> list:
"""
Prune candidate itemsets that are not frequent.
The goal of pruning is to filter out candidate itemsets that are not frequent. This
is done by checking if all the (k-1) subsets of a candidate itemset are present in
the frequent itemsets of the previous iteration (valid subsequences of the frequent
itemsets from the previous iteration).
Prunes candidate itemsets that are not frequent.
>>> itemset = ['X', 'Y', 'Z']
>>> candidates = [['X', 'Y'], ['X', 'Z'], ['Y', 'Z']]
>>> prune(itemset, candidates, 2)
[['X', 'Y'], ['X', 'Z'], ['Y', 'Z']]
>>> itemset = ['1', '2', '3', '4']
>>> candidates = ['1', '2', '4']
>>> prune(itemset, candidates, 3)
[]
"""
itemset_counter = Counter(tuple(item) for item in itemset)
pruned = []
for candidate in candidates:
is_subsequence = True
for item in candidate:
item_tuple = tuple(item)
if (
item_tuple not in itemset_counter
or itemset_counter[item_tuple] < length - 1
):
is_subsequence = False
break
if is_subsequence:
pruned.append(candidate)
return pruned
def apriori(data: list[list[str]], min_support: int) -> list[tuple[list[str], int]]:
"""
Returns a list of frequent itemsets and their support counts.
>>> data = [['A', 'B', 'C'], ['A', 'B'], ['A', 'C'], ['A', 'D'], ['B', 'C']]
>>> apriori(data, 2)
[(['A', 'B'], 1), (['A', 'C'], 2), (['B', 'C'], 2)]
>>> data = [['1', '2', '3'], ['1', '2'], ['1', '3'], ['1', '4'], ['2', '3']]
>>> apriori(data, 3)
[]
"""
itemset = [list(transaction) for transaction in data]
frequent_itemsets = []
length = 1
while itemset:
# Count itemset support
counts = [0] * len(itemset)
for transaction in data:
for j, candidate in enumerate(itemset):
if all(item in transaction for item in candidate):
counts[j] += 1
# Prune infrequent itemsets
itemset = [item for i, item in enumerate(itemset) if counts[i] >= min_support]
# Append frequent itemsets (as a list to maintain order)
for i, item in enumerate(itemset):
frequent_itemsets.append((sorted(item), counts[i]))
length += 1
itemset = prune(itemset, list(combinations(itemset, length)), length)
return frequent_itemsets
if __name__ == "__main__":
"""
Apriori algorithm for finding frequent itemsets.
Args:
data: A list of transactions, where each transaction is a list of items.
min_support: The minimum support threshold for frequent itemsets.
Returns:
A list of frequent itemsets along with their support counts.
"""
import doctest
doctest.testmod()
# user-defined threshold or minimum support level
frequent_itemsets = apriori(data=load_data(), min_support=2)
print("\n".join(f"{itemset}: {support}" for itemset, support in frequent_itemsets))
================================================
FILE: machine_learning/astar.py
================================================
"""
The A* algorithm combines features of uniform-cost search and pure heuristic search to
efficiently compute optimal solutions.
The A* algorithm is a best-first search algorithm in which the cost associated with a
node is f(n) = g(n) + h(n), where g(n) is the cost of the path from the initial state to
node n and h(n) is the heuristic estimate or the cost or a path from node n to a goal.
The A* algorithm introduces a heuristic into a regular graph-searching algorithm,
essentially planning ahead at each step so a more optimal decision is made. For this
reason, A* is known as an algorithm with brains.
https://en.wikipedia.org/wiki/A*_search_algorithm
"""
import numpy as np
class Cell:
"""
Class cell represents a cell in the world which have the properties:
position: represented by tuple of x and y coordinates initially set to (0,0).
parent: Contains the parent cell object visited before we arrived at this cell.
g, h, f: Parameters used when calling our heuristic function.
"""
def __init__(self):
self.position = (0, 0)
self.parent = None
self.g = 0
self.h = 0
self.f = 0
"""
Overrides equals method because otherwise cell assign will give
wrong results.
"""
def __eq__(self, cell):
return self.position == cell.position
def showcell(self):
print(self.position)
class Gridworld:
"""
Gridworld class represents the external world here a grid M*M
matrix.
world_size: create a numpy array with the given world_size default is 5.
"""
def __init__(self, world_size=(5, 5)):
self.w = np.zeros(world_size)
self.world_x_limit = world_size[0]
self.world_y_limit = world_size[1]
def show(self):
print(self.w)
def get_neighbours(self, cell):
"""
Return the neighbours of cell
"""
neughbour_cord = [
(-1, -1),
(-1, 0),
(-1, 1),
(0, -1),
(0, 1),
(1, -1),
(1, 0),
(1, 1),
]
current_x = cell.position[0]
current_y = cell.position[1]
neighbours = []
for n in neughbour_cord:
x = current_x + n[0]
y = current_y + n[1]
if 0 <= x < self.world_x_limit and 0 <= y < self.world_y_limit:
c = Cell()
c.position = (x, y)
c.parent = cell
neighbours.append(c)
return neighbours
def astar(world, start, goal):
"""
Implementation of a start algorithm.
world : Object of the world object.
start : Object of the cell as start position.
stop : Object of the cell as goal position.
>>> p = Gridworld()
>>> start = Cell()
>>> start.position = (0,0)
>>> goal = Cell()
>>> goal.position = (4,4)
>>> astar(p, start, goal)
[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]
"""
_open = []
_closed = []
_open.append(start)
while _open:
min_f = np.argmin([n.f for n in _open])
current = _open[min_f]
_closed.append(_open.pop(min_f))
if current == goal:
break
for n in world.get_neighbours(current):
for c in _closed:
if c == n:
continue
n.g = current.g + 1
x1, y1 = n.position
x2, y2 = goal.position
n.h = (y2 - y1) ** 2 + (x2 - x1) ** 2
n.f = n.h + n.g
for c in _open:
if c == n and c.f < n.f:
continue
_open.append(n)
path = []
while current.parent is not None:
path.append(current.position)
current = current.parent
path.append(current.position)
return path[::-1]
if __name__ == "__main__":
world = Gridworld()
# Start position and goal
start = Cell()
start.position = (0, 0)
goal = Cell()
goal.position = (4, 4)
print(f"path from {start.position} to {goal.position}")
s = astar(world, start, goal)
# Just for visual reasons.
for i in s:
world.w[i] = 1
print(world.w)
================================================
FILE: machine_learning/automatic_differentiation.py
================================================
"""
Demonstration of the Automatic Differentiation (Reverse mode).
Reference: https://en.wikipedia.org/wiki/Automatic_differentiation
Author: Poojan Smart
Email: smrtpoojan@gmail.com
"""
from __future__ import annotations
from collections import defaultdict
from enum import Enum
from types import TracebackType
from typing import Any
import numpy as np
from typing_extensions import Self # noqa: UP035
class OpType(Enum):
"""
Class represents list of supported operations on Variable for gradient calculation.
"""
ADD = 0
SUB = 1
MUL = 2
DIV = 3
MATMUL = 4
POWER = 5
NOOP = 6
class Variable:
"""
Class represents n-dimensional object which is used to wrap numpy array on which
operations will be performed and the gradient will be calculated.
Examples:
>>> Variable(5.0)
Variable(5.0)
>>> Variable([5.0, 2.9])
Variable([5. 2.9])
>>> Variable([5.0, 2.9]) + Variable([1.0, 5.5])
Variable([6. 8.4])
>>> Variable([[8.0, 10.0]])
Variable([[ 8. 10.]])
"""
def __init__(self, value: Any) -> None:
self.value = np.array(value)
# pointers to the operations to which the Variable is input
self.param_to: list[Operation] = []
# pointer to the operation of which the Variable is output of
self.result_of: Operation = Operation(OpType.NOOP)
def __repr__(self) -> str:
return f"Variable({self.value})"
def to_ndarray(self) -> np.ndarray:
return self.value
def __add__(self, other: Variable) -> Variable:
result = Variable(self.value + other.value)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(OpType.ADD, params=[self, other], output=result)
return result
def __sub__(self, other: Variable) -> Variable:
result = Variable(self.value - other.value)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(OpType.SUB, params=[self, other], output=result)
return result
def __mul__(self, other: Variable) -> Variable:
result = Variable(self.value * other.value)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(OpType.MUL, params=[self, other], output=result)
return result
def __truediv__(self, other: Variable) -> Variable:
result = Variable(self.value / other.value)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(OpType.DIV, params=[self, other], output=result)
return result
def __matmul__(self, other: Variable) -> Variable:
result = Variable(self.value @ other.value)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(OpType.MATMUL, params=[self, other], output=result)
return result
def __pow__(self, power: int) -> Variable:
result = Variable(self.value**power)
with GradientTracker() as tracker:
# if tracker is enabled, computation graph will be updated
if tracker.enabled:
tracker.append(
OpType.POWER,
params=[self],
output=result,
other_params={"power": power},
)
return result
def add_param_to(self, param_to: Operation) -> None:
self.param_to.append(param_to)
def add_result_of(self, result_of: Operation) -> None:
self.result_of = result_of
class Operation:
"""
Class represents operation between single or two Variable objects.
Operation objects contains type of operation, pointers to input Variable
objects and pointer to resulting Variable from the operation.
"""
def __init__(
self,
op_type: OpType,
other_params: dict | None = None,
) -> None:
self.op_type = op_type
self.other_params = {} if other_params is None else other_params
def add_params(self, params: list[Variable]) -> None:
self.params = params
def add_output(self, output: Variable) -> None:
self.output = output
def __eq__(self, value) -> bool:
return self.op_type == value if isinstance(value, OpType) else False
class GradientTracker:
"""
Class contains methods to compute partial derivatives of Variable
based on the computation graph.
Examples:
>>> with GradientTracker() as tracker:
... a = Variable([2.0, 5.0])
... b = Variable([1.0, 2.0])
... m = Variable([1.0, 2.0])
... c = a + b
... d = a * b
... e = c / d
>>> tracker.gradient(e, a)
array([-0.25, -0.04])
>>> tracker.gradient(e, b)
array([-1. , -0.25])
>>> tracker.gradient(e, m) is None
True
>>> with GradientTracker() as tracker:
... a = Variable([[2.0, 5.0]])
... b = Variable([[1.0], [2.0]])
... c = a @ b
>>> tracker.gradient(c, a)
array([[1., 2.]])
>>> tracker.gradient(c, b)
array([[2.],
[5.]])
>>> with GradientTracker() as tracker:
... a = Variable([[2.0, 5.0]])
... b = a ** 3
>>> tracker.gradient(b, a)
array([[12., 75.]])
"""
instance = None
def __new__(cls) -> Self:
"""
Executes at the creation of class object and returns if
object is already created. This class follows singleton
design pattern.
"""
if cls.instance is None:
cls.instance = super().__new__(cls)
return cls.instance
def __init__(self) -> None:
self.enabled = False
def __enter__(self) -> Self:
self.enabled = True
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
traceback: TracebackType | None,
) -> None:
self.enabled = False
def append(
self,
op_type: OpType,
params: list[Variable],
output: Variable,
other_params: dict | None = None,
) -> None:
"""
Adds Operation object to the related Variable objects for
creating computational graph for calculating gradients.
Args:
op_type: Operation type
params: Input parameters to the operation
output: Output variable of the operation
"""
operation = Operation(op_type, other_params=other_params)
param_nodes = []
for param in params:
param.add_param_to(operation)
param_nodes.append(param)
output.add_result_of(operation)
operation.add_params(param_nodes)
operation.add_output(output)
def gradient(self, target: Variable, source: Variable) -> np.ndarray | None:
"""
Reverse accumulation of partial derivatives to calculate gradients
of target variable with respect to source variable.
Args:
target: target variable for which gradients are calculated.
source: source variable with respect to which the gradients are
calculated.
Returns:
Gradient of the source variable with respect to the target variable
"""
# partial derivatives with respect to target
partial_deriv = defaultdict(lambda: 0)
partial_deriv[target] = np.ones_like(target.to_ndarray())
# iterating through each operations in the computation graph
operation_queue = [target.result_of]
while len(operation_queue) > 0:
operation = operation_queue.pop()
for param in operation.params:
# as per the chain rule, multiplying partial derivatives
# of variables with respect to the target
dparam_doutput = self.derivative(param, operation)
dparam_dtarget = dparam_doutput * partial_deriv[operation.output]
partial_deriv[param] += dparam_dtarget
if param.result_of and param.result_of != OpType.NOOP:
operation_queue.append(param.result_of)
return partial_deriv.get(source)
def derivative(self, param: Variable, operation: Operation) -> np.ndarray:
"""
Compute the derivative of given operation/function
Args:
param: variable to be differentiated
operation: function performed on the input variable
Returns:
Derivative of input variable with respect to the output of
the operation
"""
params = operation.params
if operation == OpType.ADD:
return np.ones_like(params[0].to_ndarray(), dtype=np.float64)
if operation == OpType.SUB:
if params[0] == param:
return np.ones_like(params[0].to_ndarray(), dtype=np.float64)
return -np.ones_like(params[1].to_ndarray(), dtype=np.float64)
if operation == OpType.MUL:
return (
params[1].to_ndarray().T
if params[0] == param
else params[0].to_ndarray().T
)
if operation == OpType.DIV:
if params[0] == param:
return 1 / params[1].to_ndarray()
return -params[0].to_ndarray() / (params[1].to_ndarray() ** 2)
if operation == OpType.MATMUL:
return (
params[1].to_ndarray().T
if params[0] == param
else params[0].to_ndarray().T
)
if operation == OpType.POWER:
power = operation.other_params["power"]
return power * (params[0].to_ndarray() ** (power - 1))
err_msg = f"invalid operation type: {operation.op_type}"
raise ValueError(err_msg)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/data_transformations.py
================================================
"""
Normalization.
Wikipedia: https://en.wikipedia.org/wiki/Normalization
Normalization is the process of converting numerical data to a standard range of values.
This range is typically between [0, 1] or [-1, 1]. The equation for normalization is
x_norm = (x - x_min)/(x_max - x_min) where x_norm is the normalized value, x is the
value, x_min is the minimum value within the column or list of data, and x_max is the
maximum value within the column or list of data. Normalization is used to speed up the
training of data and put all of the data on a similar scale. This is useful because
variance in the range of values of a dataset can heavily impact optimization
(particularly Gradient Descent).
Standardization Wikipedia: https://en.wikipedia.org/wiki/Standardization
Standardization is the process of converting numerical data to a normally distributed
range of values. This range will have a mean of 0 and standard deviation of 1. This is
also known as z-score normalization. The equation for standardization is
x_std = (x - mu)/(sigma) where mu is the mean of the column or list of values and sigma
is the standard deviation of the column or list of values.
Choosing between Normalization & Standardization is more of an art of a science, but it
is often recommended to run experiments with both to see which performs better.
Additionally, a few rules of thumb are:
1. gaussian (normal) distributions work better with standardization
2. non-gaussian (non-normal) distributions work better with normalization
3. If a column or list of values has extreme values / outliers, use standardization
"""
from statistics import mean, stdev
def normalization(data: list, ndigits: int = 3) -> list:
"""
Return a normalized list of values.
@params: data, a list of values to normalize
@returns: a list of normalized values (rounded to ndigits decimal places)
@examples:
>>> normalization([2, 7, 10, 20, 30, 50])
[0.0, 0.104, 0.167, 0.375, 0.583, 1.0]
>>> normalization([5, 10, 15, 20, 25])
[0.0, 0.25, 0.5, 0.75, 1.0]
"""
# variables for calculation
x_min = min(data)
x_max = max(data)
# normalize data
return [round((x - x_min) / (x_max - x_min), ndigits) for x in data]
def standardization(data: list, ndigits: int = 3) -> list:
"""
Return a standardized list of values.
@params: data, a list of values to standardize
@returns: a list of standardized values (rounded to ndigits decimal places)
@examples:
>>> standardization([2, 7, 10, 20, 30, 50])
[-0.999, -0.719, -0.551, 0.009, 0.57, 1.69]
>>> standardization([5, 10, 15, 20, 25])
[-1.265, -0.632, 0.0, 0.632, 1.265]
"""
# variables for calculation
mu = mean(data)
sigma = stdev(data)
# standardize data
return [round((x - mu) / (sigma), ndigits) for x in data]
================================================
FILE: machine_learning/decision_tree.py
================================================
"""
Implementation of a basic regression decision tree.
Input data set: The input data set must be 1-dimensional with continuous labels.
Output: The decision tree maps a real number input to a real number output.
"""
import numpy as np
class DecisionTree:
def __init__(self, depth=5, min_leaf_size=5):
self.depth = depth
self.decision_boundary = 0
self.left = None
self.right = None
self.min_leaf_size = min_leaf_size
self.prediction = None
def mean_squared_error(self, labels, prediction):
"""
mean_squared_error:
@param labels: a one-dimensional numpy array
@param prediction: a floating point value
return value: mean_squared_error calculates the error if prediction is used to
estimate the labels
>>> tester = DecisionTree()
>>> test_labels = np.array([1,2,3,4,5,6,7,8,9,10])
>>> test_prediction = float(6)
>>> bool(tester.mean_squared_error(test_labels, test_prediction) == (
... TestDecisionTree.helper_mean_squared_error_test(test_labels,
... test_prediction)))
True
>>> test_labels = np.array([1,2,3])
>>> test_prediction = float(2)
>>> bool(tester.mean_squared_error(test_labels, test_prediction) == (
... TestDecisionTree.helper_mean_squared_error_test(test_labels,
... test_prediction)))
True
"""
if labels.ndim != 1:
print("Error: Input labels must be one dimensional")
return np.mean((labels - prediction) ** 2)
def train(self, x, y):
"""
train:
@param x: a one-dimensional numpy array
@param y: a one-dimensional numpy array.
The contents of y are the labels for the corresponding X values
train() does not have a return value
Examples:
1. Try to train when x & y are of same length & 1 dimensions (No errors)
>>> dt = DecisionTree()
>>> dt.train(np.array([10,20,30,40,50]),np.array([0,0,0,1,1]))
2. Try to train when x is 2 dimensions
>>> dt = DecisionTree()
>>> dt.train(np.array([[1,2,3,4,5],[1,2,3,4,5]]),np.array([0,0,0,1,1]))
Traceback (most recent call last):
...
ValueError: Input data set must be one-dimensional
3. Try to train when x and y are not of the same length
>>> dt = DecisionTree()
>>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]]))
Traceback (most recent call last):
...
ValueError: x and y have different lengths
4. Try to train when x & y are of the same length but different dimensions
>>> dt = DecisionTree()
>>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]]))
Traceback (most recent call last):
...
ValueError: Data set labels must be one-dimensional
This section is to check that the inputs conform to our dimensionality
constraints
"""
if x.ndim != 1:
raise ValueError("Input data set must be one-dimensional")
if len(x) != len(y):
raise ValueError("x and y have different lengths")
if y.ndim != 1:
raise ValueError("Data set labels must be one-dimensional")
if len(x) < 2 * self.min_leaf_size:
self.prediction = np.mean(y)
return
if self.depth == 1:
self.prediction = np.mean(y)
return
best_split = 0
min_error = self.mean_squared_error(x, np.mean(y)) * 2
"""
loop over all possible splits for the decision tree. find the best split.
if no split exists that is less than 2 * error for the entire array
then the data set is not split and the average for the entire array is used as
the predictor
"""
for i in range(len(x)):
if len(x[:i]) < self.min_leaf_size: # noqa: SIM114
continue
elif len(x[i:]) < self.min_leaf_size:
continue
else:
error_left = self.mean_squared_error(x[:i], np.mean(y[:i]))
error_right = self.mean_squared_error(x[i:], np.mean(y[i:]))
error = error_left + error_right
if error < min_error:
best_split = i
min_error = error
if best_split != 0:
left_x = x[:best_split]
left_y = y[:best_split]
right_x = x[best_split:]
right_y = y[best_split:]
self.decision_boundary = x[best_split]
self.left = DecisionTree(
depth=self.depth - 1, min_leaf_size=self.min_leaf_size
)
self.right = DecisionTree(
depth=self.depth - 1, min_leaf_size=self.min_leaf_size
)
self.left.train(left_x, left_y)
self.right.train(right_x, right_y)
else:
self.prediction = np.mean(y)
return
def predict(self, x):
"""
predict:
@param x: a floating point value to predict the label of
the prediction function works by recursively calling the predict function
of the appropriate subtrees based on the tree's decision boundary
"""
if self.prediction is not None:
return self.prediction
elif self.left is not None and self.right is not None:
if x >= self.decision_boundary:
return self.right.predict(x)
else:
return self.left.predict(x)
else:
raise ValueError("Decision tree not yet trained")
class TestDecisionTree:
"""Decision Tres test class"""
@staticmethod
def helper_mean_squared_error_test(labels, prediction):
"""
helper_mean_squared_error_test:
@param labels: a one dimensional numpy array
@param prediction: a floating point value
return value: helper_mean_squared_error_test calculates the mean squared error
"""
squared_error_sum = float(0)
for label in labels:
squared_error_sum += (label - prediction) ** 2
return float(squared_error_sum / labels.size)
def main():
"""
In this demonstration we're generating a sample data set from the sin function in
numpy. We then train a decision tree on the data set and use the decision tree to
predict the label of 10 different test values. Then the mean squared error over
this test is displayed.
"""
x = np.arange(-1.0, 1.0, 0.005)
y = np.sin(x)
tree = DecisionTree(depth=10, min_leaf_size=10)
tree.train(x, y)
rng = np.random.default_rng()
test_cases = (rng.random(10) * 2) - 1
predictions = np.array([tree.predict(x) for x in test_cases])
avg_error = np.mean((predictions - test_cases) ** 2)
print("Test values: " + str(test_cases))
print("Predictions: " + str(predictions))
print("Average error: " + str(avg_error))
if __name__ == "__main__":
main()
import doctest
doctest.testmod(name="mean_squared_error", verbose=True)
================================================
FILE: machine_learning/dimensionality_reduction.py
================================================
# Copyright (c) 2023 Diego Gasco (diego.gasco99@gmail.com), Diegomangasco on GitHub
"""
Requirements:
- numpy version 1.21
- scipy version 1.3.3
Notes:
- Each column of the features matrix corresponds to a class item
"""
import logging
import numpy as np
import pytest
from scipy.linalg import eigh
logging.basicConfig(level=logging.INFO, format="%(message)s")
def column_reshape(input_array: np.ndarray) -> np.ndarray:
"""Function to reshape a row Numpy array into a column Numpy array
>>> input_array = np.array([1, 2, 3])
>>> column_reshape(input_array)
array([[1],
[2],
[3]])
"""
return input_array.reshape((input_array.size, 1))
def covariance_within_classes(
features: np.ndarray, labels: np.ndarray, classes: int
) -> np.ndarray:
"""Function to compute the covariance matrix inside each class.
>>> features = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
>>> labels = np.array([0, 1, 0])
>>> covariance_within_classes(features, labels, 2)
array([[0.66666667, 0.66666667, 0.66666667],
[0.66666667, 0.66666667, 0.66666667],
[0.66666667, 0.66666667, 0.66666667]])
"""
covariance_sum = np.nan
for i in range(classes):
data = features[:, labels == i]
data_mean = data.mean(1)
# Centralize the data of class i
centered_data = data - column_reshape(data_mean)
if i > 0:
# If covariance_sum is not None
covariance_sum += np.dot(centered_data, centered_data.T)
else:
# If covariance_sum is np.nan (i.e. first loop)
covariance_sum = np.dot(centered_data, centered_data.T)
return covariance_sum / features.shape[1]
def covariance_between_classes(
features: np.ndarray, labels: np.ndarray, classes: int
) -> np.ndarray:
"""Function to compute the covariance matrix between multiple classes
>>> features = np.array([[9, 2, 3], [4, 3, 6], [1, 8, 9]])
>>> labels = np.array([0, 1, 0])
>>> covariance_between_classes(features, labels, 2)
array([[ 3.55555556, 1.77777778, -2.66666667],
[ 1.77777778, 0.88888889, -1.33333333],
[-2.66666667, -1.33333333, 2. ]])
"""
general_data_mean = features.mean(1)
covariance_sum = np.nan
for i in range(classes):
data = features[:, labels == i]
device_data = data.shape[1]
data_mean = data.mean(1)
if i > 0:
# If covariance_sum is not None
covariance_sum += device_data * np.dot(
column_reshape(data_mean) - column_reshape(general_data_mean),
(column_reshape(data_mean) - column_reshape(general_data_mean)).T,
)
else:
# If covariance_sum is np.nan (i.e. first loop)
covariance_sum = device_data * np.dot(
column_reshape(data_mean) - column_reshape(general_data_mean),
(column_reshape(data_mean) - column_reshape(general_data_mean)).T,
)
return covariance_sum / features.shape[1]
def principal_component_analysis(features: np.ndarray, dimensions: int) -> np.ndarray:
"""
Principal Component Analysis.
For more details, see: https://en.wikipedia.org/wiki/Principal_component_analysis.
Parameters:
* features: the features extracted from the dataset
* dimensions: to filter the projected data for the desired dimension
>>> test_principal_component_analysis()
"""
# Check if the features have been loaded
if features.any():
data_mean = features.mean(1)
# Center the dataset
centered_data = features - np.reshape(data_mean, (data_mean.size, 1))
covariance_matrix = np.dot(centered_data, centered_data.T) / features.shape[1]
_, eigenvectors = np.linalg.eigh(covariance_matrix)
# Take all the columns in the reverse order (-1), and then takes only the first
filtered_eigenvectors = eigenvectors[:, ::-1][:, 0:dimensions]
# Project the database on the new space
projected_data = np.dot(filtered_eigenvectors.T, features)
logging.info("Principal Component Analysis computed")
return projected_data
else:
logging.basicConfig(level=logging.ERROR, format="%(message)s", force=True)
logging.error("Dataset empty")
raise AssertionError
def linear_discriminant_analysis(
features: np.ndarray, labels: np.ndarray, classes: int, dimensions: int
) -> np.ndarray:
"""
Linear Discriminant Analysis.
For more details, see: https://en.wikipedia.org/wiki/Linear_discriminant_analysis.
Parameters:
* features: the features extracted from the dataset
* labels: the class labels of the features
* classes: the number of classes present in the dataset
* dimensions: to filter the projected data for the desired dimension
>>> test_linear_discriminant_analysis()
"""
# Check if the dimension desired is less than the number of classes
assert classes > dimensions
# Check if features have been already loaded
if features.any:
_, eigenvectors = eigh(
covariance_between_classes(features, labels, classes),
covariance_within_classes(features, labels, classes),
)
filtered_eigenvectors = eigenvectors[:, ::-1][:, :dimensions]
svd_matrix, _, _ = np.linalg.svd(filtered_eigenvectors)
filtered_svd_matrix = svd_matrix[:, 0:dimensions]
projected_data = np.dot(filtered_svd_matrix.T, features)
logging.info("Linear Discriminant Analysis computed")
return projected_data
else:
logging.basicConfig(level=logging.ERROR, format="%(message)s", force=True)
logging.error("Dataset empty")
raise AssertionError
def test_linear_discriminant_analysis() -> None:
# Create dummy dataset with 2 classes and 3 features
features = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6, 7]])
labels = np.array([0, 0, 0, 1, 1])
classes = 2
dimensions = 2
# Assert that the function raises an AssertionError if dimensions > classes
with pytest.raises(AssertionError) as error_info: # noqa: PT012
projected_data = linear_discriminant_analysis(
features, labels, classes, dimensions
)
if isinstance(projected_data, np.ndarray):
raise AssertionError(
"Did not raise AssertionError for dimensions > classes"
)
assert error_info.type is AssertionError
def test_principal_component_analysis() -> None:
features = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
dimensions = 2
expected_output = np.array([[6.92820323, 8.66025404, 10.39230485], [3.0, 3.0, 3.0]])
with pytest.raises(AssertionError) as error_info: # noqa: PT012
output = principal_component_analysis(features, dimensions)
if not np.allclose(expected_output, output):
raise AssertionError
assert error_info.type is AssertionError
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/forecasting/__init__.py
================================================
================================================
FILE: machine_learning/forecasting/ex_data.csv
================================================
total_users,total_events,days
18231,0.0,1
22621,1.0,2
15675,0.0,3
23583,1.0,4
68351,5.0,5
34338,3.0,6
19238,0.0,0
24192,0.0,1
70349,0.0,2
103510,0.0,3
128355,1.0,4
148484,6.0,5
153489,3.0,6
162667,1.0,0
311430,3.0,1
435663,7.0,2
273526,0.0,3
628588,2.0,4
454989,13.0,5
539040,3.0,6
52974,1.0,0
103451,2.0,1
810020,5.0,2
580982,3.0,3
216515,0.0,4
134694,10.0,5
93563,1.0,6
55432,1.0,0
169634,1.0,1
254908,4.0,2
315285,3.0,3
191764,0.0,4
514284,7.0,5
181214,4.0,6
78459,2.0,0
161620,3.0,1
245610,4.0,2
326722,5.0,3
214578,0.0,4
312365,5.0,5
232454,4.0,6
178368,1.0,0
97152,1.0,1
222813,4.0,2
285852,4.0,3
192149,1.0,4
142241,1.0,5
173011,2.0,6
56488,3.0,0
89572,2.0,1
356082,2.0,2
172799,0.0,3
142300,1.0,4
78432,2.0,5
539023,9.0,6
62389,1.0,0
70247,1.0,1
89229,0.0,2
94583,1.0,3
102455,0.0,4
129270,0.0,5
311409,1.0,6
1837026,0.0,0
361824,0.0,1
111379,2.0,2
76337,2.0,3
96747,0.0,4
92058,0.0,5
81929,2.0,6
143423,0.0,0
82939,0.0,1
74403,1.0,2
68234,0.0,3
94556,1.0,4
80311,0.0,5
75283,3.0,6
77724,0.0,0
49229,2.0,1
65708,2.0,2
273864,1.0,3
1711281,0.0,4
1900253,5.0,5
343071,1.0,6
1551326,0.0,0
56636,1.0,1
272782,2.0,2
1785678,0.0,3
241866,0.0,4
461904,0.0,5
2191901,2.0,6
102925,0.0,0
242778,1.0,1
298608,0.0,2
322458,10.0,3
216027,9.0,4
916052,12.0,5
193278,12.0,6
263207,8.0,0
672948,10.0,1
281909,1.0,2
384562,1.0,3
1027375,2.0,4
828905,9.0,5
624188,22.0,6
392218,8.0,0
292581,10.0,1
299869,12.0,2
769455,20.0,3
316443,8.0,4
1212864,24.0,5
1397338,28.0,6
223249,8.0,0
191264,14.0,1
================================================
FILE: machine_learning/forecasting/run.py
================================================
"""
this is code for forecasting
but I modified it and used it for safety checker of data
for ex: you have an online shop and for some reason some data are
missing (the amount of data that u expected are not supposed to be)
then we can use it
*ps : 1. ofc we can use normal statistic method but in this case
the data is quite absurd and only a little^^
2. ofc u can use this and modified it for forecasting purpose
for the next 3 months sales or something,
u can just adjust it for ur own purpose
"""
from warnings import simplefilter
import numpy as np
import pandas as pd
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVR
from statsmodels.tsa.statespace.sarimax import SARIMAX
def linear_regression_prediction(
train_dt: list, train_usr: list, train_mtch: list, test_dt: list, test_mtch: list
) -> float:
"""
First method: linear regression
input : training data (date, total_user, total_event) in list of float
output : list of total user prediction in float
>>> n = linear_regression_prediction([2,3,4,5], [5,3,4,6], [3,1,2,4], [2,1], [2,2])
>>> bool(abs(n - 5.0) < 1e-6) # Checking precision because of floating point errors
True
"""
x = np.array([[1, item, train_mtch[i]] for i, item in enumerate(train_dt)])
y = np.array(train_usr)
beta = np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(), x)), x.transpose()), y)
return abs(beta[0] + test_dt[0] * beta[1] + test_mtch[0] + beta[2])
def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float:
"""
second method: Sarimax
sarimax is a statistic method which using previous input
and learn its pattern to predict future data
input : training data (total_user, with exog data = total_event) in list of float
output : list of total user prediction in float
>>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
6.6666671111109626
"""
# Suppress the User Warning raised by SARIMAX due to insufficient observations
simplefilter("ignore", UserWarning)
order = (1, 2, 1)
seasonal_order = (1, 1, 1, 7)
model = SARIMAX(
train_user, exog=train_match, order=order, seasonal_order=seasonal_order
)
model_fit = model.fit(disp=False, maxiter=600, method="nm")
result = model_fit.predict(1, len(test_match), exog=[test_match])
return float(result[0])
def support_vector_regressor(x_train: list, x_test: list, train_user: list) -> float:
"""
Third method: Support vector regressor
svr is quite the same with svm(support vector machine)
it uses the same principles as the SVM for classification,
with only a few minor differences and the only different is that
it suits better for regression purpose
input : training data (date, total_user, total_event) in list of float
where x = list of set (date and total event)
output : list of total user prediction in float
>>> support_vector_regressor([[5,2],[1,5],[6,2]], [[3,2]], [2,1,4])
1.634932078116079
"""
regressor = SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1)
regressor.fit(x_train, train_user)
y_pred = regressor.predict(x_test)
return float(y_pred[0])
def interquartile_range_checker(train_user: list) -> float:
"""
Optional method: interquatile range
input : list of total user in float
output : low limit of input in float
this method can be used to check whether some data is outlier or not
>>> interquartile_range_checker([1,2,3,4,5,6,7,8,9,10])
2.8
"""
train_user.sort()
q1 = np.percentile(train_user, 25)
q3 = np.percentile(train_user, 75)
iqr = q3 - q1
low_lim = q1 - (iqr * 0.1)
return float(low_lim)
def data_safety_checker(list_vote: list, actual_result: float) -> bool:
"""
Used to review all the votes (list result prediction)
and compare it to the actual result.
input : list of predictions
output : print whether it's safe or not
>>> data_safety_checker([2, 3, 4], 5.0)
False
"""
safe = 0
not_safe = 0
if not isinstance(actual_result, float):
raise TypeError("Actual result should be float. Value passed is a list")
for i in list_vote:
if i > actual_result:
safe = not_safe + 1
elif abs(abs(i) - abs(actual_result)) <= 0.1:
safe += 1
else:
not_safe += 1
return safe > not_safe
if __name__ == "__main__":
"""
data column = total user in a day, how much online event held in one day,
what day is that(sunday-saturday)
"""
data_input_df = pd.read_csv("ex_data.csv")
# start normalization
normalize_df = Normalizer().fit_transform(data_input_df.values)
# split data
total_date = normalize_df[:, 2].tolist()
total_user = normalize_df[:, 0].tolist()
total_match = normalize_df[:, 1].tolist()
# for svr (input variable = total date and total match)
x = normalize_df[:, [1, 2]].tolist()
x_train = x[: len(x) - 1]
x_test = x[len(x) - 1 :]
# for linear regression & sarimax
train_date = total_date[: len(total_date) - 1]
train_user = total_user[: len(total_user) - 1]
train_match = total_match[: len(total_match) - 1]
test_date = total_date[len(total_date) - 1 :]
test_user = total_user[len(total_user) - 1 :]
test_match = total_match[len(total_match) - 1 :]
# voting system with forecasting
res_vote = [
linear_regression_prediction(
train_date, train_user, train_match, test_date, test_match
),
sarimax_predictor(train_user, train_match, test_match),
support_vector_regressor(x_train, x_test, train_user),
]
# check the safety of today's data
not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
print(f"Today's data is {not_str}safe.")
================================================
FILE: machine_learning/frequent_pattern_growth.py
================================================
"""
The Frequent Pattern Growth algorithm (FP-Growth) is a widely used data mining
technique for discovering frequent itemsets in large transaction databases.
It overcomes some of the limitations of traditional methods such as Apriori by
efficiently constructing the FP-Tree
WIKI: https://athena.ecs.csus.edu/~mei/associationcw/FpGrowth.html
Examples: https://www.javatpoint.com/fp-growth-algorithm-in-data-mining
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class TreeNode:
"""
A node in a Frequent Pattern tree.
Args:
name: The name of this node.
num_occur: The number of occurrences of the node.
parent_node: The parent node.
Example:
>>> parent = TreeNode("Parent", 1, None)
>>> child = TreeNode("Child", 2, parent)
>>> child.name
'Child'
>>> child.count
2
"""
name: str
count: int
parent: TreeNode | None = None
children: dict[str, TreeNode] = field(default_factory=dict)
node_link: TreeNode | None = None
def __repr__(self) -> str:
return f"TreeNode({self.name!r}, {self.count!r}, {self.parent!r})"
def inc(self, num_occur: int) -> None:
self.count += num_occur
def disp(self, ind: int = 1) -> None:
print(f"{' ' * ind} {self.name} {self.count}")
for child in self.children.values():
child.disp(ind + 1)
def create_tree(data_set: list, min_sup: int = 1) -> tuple[TreeNode, dict]:
"""
Create Frequent Pattern tree
Args:
data_set: A list of transactions, where each transaction is a list of items.
min_sup: The minimum support threshold.
Items with support less than this will be pruned. Default is 1.
Returns:
The root of the FP-Tree.
header_table: The header table dictionary with item information.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> len(header_table)
4
>>> header_table["A"]
[[4, None], TreeNode('A', 4, TreeNode('Null Set', 1, None))]
>>> header_table["E"][1] # doctest: +NORMALIZE_WHITESPACE
TreeNode('E', 1, TreeNode('B', 3, TreeNode('A', 4, TreeNode('Null Set', 1, None))))
>>> sorted(header_table)
['A', 'B', 'C', 'E']
>>> fp_tree.name
'Null Set'
>>> sorted(fp_tree.children)
['A', 'B']
>>> fp_tree.children['A'].name
'A'
>>> sorted(fp_tree.children['A'].children)
['B', 'C']
"""
header_table: dict = {}
for trans in data_set:
for item in trans:
header_table[item] = header_table.get(item, [0, None])
header_table[item][0] += 1
for k in list(header_table):
if header_table[k][0] < min_sup:
del header_table[k]
if not (freq_item_set := set(header_table)):
return TreeNode("Null Set", 1, None), {}
for key, value in header_table.items():
header_table[key] = [value, None]
fp_tree = TreeNode("Null Set", 1, None) # Parent is None for the root node
for tran_set in data_set:
local_d = {
item: header_table[item][0] for item in tran_set if item in freq_item_set
}
if local_d:
sorted_items = sorted(
local_d.items(), key=lambda item_info: item_info[1], reverse=True
)
ordered_items = [item[0] for item in sorted_items]
update_tree(ordered_items, fp_tree, header_table, 1)
return fp_tree, header_table
def update_tree(items: list, in_tree: TreeNode, header_table: dict, count: int) -> None:
"""
Update the FP-Tree with a transaction.
Args:
items: List of items in the transaction.
in_tree: The current node in the FP-Tree.
header_table: The header table dictionary with item information.
count: The count of the transaction.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> transaction = ['A', 'B', 'E']
>>> update_tree(transaction, fp_tree, header_table, 1)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> fp_tree.children['A'].children['B'].children['E'].children
{}
>>> fp_tree.children['A'].children['B'].children['E'].count
2
>>> header_table['E'][1].name
'E'
"""
if items[0] in in_tree.children:
in_tree.children[items[0]].inc(count)
else:
in_tree.children[items[0]] = TreeNode(items[0], count, in_tree)
if header_table[items[0]][1] is None:
header_table[items[0]][1] = in_tree.children[items[0]]
else:
update_header(header_table[items[0]][1], in_tree.children[items[0]])
if len(items) > 1:
update_tree(items[1:], in_tree.children[items[0]], header_table, count)
def update_header(node_to_test: TreeNode, target_node: TreeNode) -> TreeNode:
"""
Update the header table with a node link.
Args:
node_to_test: The node to be updated in the header table.
target_node: The node to link to.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> node1 = TreeNode("A", 3, None)
>>> node2 = TreeNode("B", 4, None)
>>> node1
TreeNode('A', 3, None)
>>> node1 = update_header(node1, node2)
>>> node1
TreeNode('A', 3, None)
>>> node1.node_link
TreeNode('B', 4, None)
>>> node2.node_link is None
True
"""
while node_to_test.node_link is not None:
node_to_test = node_to_test.node_link
if node_to_test.node_link is None:
node_to_test.node_link = target_node
# Return the updated node
return node_to_test
def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
"""
Ascend the FP-Tree from a leaf node to its root, adding item names to the prefix
path.
Args:
leaf_node: The leaf node to start ascending from.
prefix_path: A list to store the item as they are ascended.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> path = []
>>> ascend_tree(fp_tree.children['A'], path)
>>> path # ascending from a leaf node 'A'
['A']
"""
if leaf_node.parent is not None:
prefix_path.append(leaf_node.name)
ascend_tree(leaf_node.parent, prefix_path)
def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001
"""
Find the conditional pattern base for a given base pattern.
Args:
base_pat: The base pattern for which to find the conditional pattern base.
tree_node: The node in the FP-Tree.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> len(header_table)
4
>>> base_pattern = frozenset(['A'])
>>> sorted(find_prefix_path(base_pattern, fp_tree.children['A']))
[]
"""
cond_pats: dict = {}
while tree_node is not None:
prefix_path: list = []
ascend_tree(tree_node, prefix_path)
if len(prefix_path) > 1:
cond_pats[frozenset(prefix_path[1:])] = tree_node.count
tree_node = tree_node.node_link
return cond_pats
def mine_tree(
in_tree: TreeNode, # noqa: ARG001
header_table: dict,
min_sup: int,
pre_fix: set,
freq_item_list: list,
) -> None:
"""
Mine the FP-Tree recursively to discover frequent itemsets.
Args:
in_tree: The FP-Tree to mine.
header_table: The header table dictionary with item information.
min_sup: The minimum support threshold.
pre_fix: A set of items as a prefix for the itemsets being mined.
freq_item_list: A list to store the frequent itemsets.
Example:
>>> data_set = [
... ['A', 'B', 'C'],
... ['A', 'C'],
... ['A', 'B', 'E'],
... ['A', 'B', 'C', 'E'],
... ['B', 'E']
... ]
>>> min_sup = 2
>>> fp_tree, header_table = create_tree(data_set, min_sup)
>>> fp_tree
TreeNode('Null Set', 1, None)
>>> frequent_itemsets = []
>>> mine_tree(fp_tree, header_table, min_sup, set([]), frequent_itemsets)
>>> expe_itm = [{'C'}, {'C', 'A'}, {'E'}, {'A', 'E'}, {'E', 'B'}, {'A'}, {'B'}]
>>> all(expected in frequent_itemsets for expected in expe_itm)
True
"""
sorted_items = sorted(header_table.items(), key=lambda item_info: item_info[1][0])
big_l = [item[0] for item in sorted_items]
for base_pat in big_l:
new_freq_set = pre_fix.copy()
new_freq_set.add(base_pat)
freq_item_list.append(new_freq_set)
cond_patt_bases = find_prefix_path(base_pat, header_table[base_pat][1])
my_cond_tree, my_head = create_tree(list(cond_patt_bases), min_sup)
if my_head is not None:
# Pass header_table[base_pat][1] as node_to_test to update_header
header_table[base_pat][1] = update_header(
header_table[base_pat][1], my_cond_tree
)
mine_tree(my_cond_tree, my_head, min_sup, new_freq_set, freq_item_list)
if __name__ == "__main__":
from doctest import testmod
testmod()
data_set: list[frozenset] = [
frozenset(["bread", "milk", "cheese"]),
frozenset(["bread", "milk"]),
frozenset(["bread", "diapers"]),
frozenset(["bread", "milk", "diapers"]),
frozenset(["milk", "diapers"]),
frozenset(["milk", "cheese"]),
frozenset(["diapers", "cheese"]),
frozenset(["bread", "milk", "cheese", "diapers"]),
]
print(f"{len(data_set) = }")
fp_tree, header_table = create_tree(data_set, min_sup=3)
print(f"{fp_tree = }")
print(f"{len(header_table) = }")
freq_items: list = []
mine_tree(fp_tree, header_table, 3, set(), freq_items)
print(f"{freq_items = }")
================================================
FILE: machine_learning/gaussian_naive_bayes.py.broken.txt
================================================
# Gaussian Naive Bayes Example
import time
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
def main():
"""
Gaussian Naive Bayes Example using sklearn function.
Iris type dataset is used to demonstrate algorithm.
"""
# Load Iris dataset
iris = load_iris()
# Split dataset into train and test data
x = iris["data"] # features
y = iris["target"]
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=1
)
# Gaussian Naive Bayes
nb_model = GaussianNB()
time.sleep(2.9)
model_fit = nb_model.fit(x_train, y_train)
y_pred = model_fit.predict(x_test) # Predictions on the test set
# Display Confusion Matrix
plot_confusion_matrix(
nb_model,
x_test,
y_test,
display_labels=iris["target_names"],
cmap="Blues", # although, Greys_r has a better contrast...
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.show()
time.sleep(1.8)
final_accuracy = 100 * accuracy_score(y_true=y_test, y_pred=y_pred)
print(f"The overall accuracy of the model is: {round(final_accuracy, 2)}%")
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/gradient_boosting_classifier.py
================================================
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
class GradientBoostingClassifier:
def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
"""
Initialize a GradientBoostingClassifier.
Parameters:
- n_estimators (int): The number of weak learners to train.
- learning_rate (float): The learning rate for updating the model.
Attributes:
- n_estimators (int): The number of weak learners.
- learning_rate (float): The learning rate.
- models (list): A list to store the trained weak learners.
"""
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.models: list[tuple[DecisionTreeRegressor, float]] = []
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
"""
Fit the GradientBoostingClassifier to the training data.
Parameters:
- features (np.ndarray): The training features.
- target (np.ndarray): The target values.
Returns:
None
>>> import numpy as np
>>> from sklearn.datasets import load_iris
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
>>> iris = load_iris()
>>> X, y = iris.data, iris.target
>>> clf.fit(X, y)
>>> # Check if the model is trained
>>> len(clf.models) == 100
True
"""
for _ in range(self.n_estimators):
# Calculate the pseudo-residuals
residuals = -self.gradient(target, self.predict(features))
# Fit a weak learner (e.g., decision tree) to the residuals
model = DecisionTreeRegressor(max_depth=1)
model.fit(features, residuals)
# Update the model by adding the weak learner with a learning rate
self.models.append((model, self.learning_rate))
def predict(self, features: np.ndarray) -> np.ndarray:
"""
Make predictions on input data.
Parameters:
- features (np.ndarray): The input data for making predictions.
Returns:
- np.ndarray: An array of binary predictions (-1 or 1).
>>> import numpy as np
>>> from sklearn.datasets import load_iris
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
>>> iris = load_iris()
>>> X, y = iris.data, iris.target
>>> clf.fit(X, y)
>>> y_pred = clf.predict(X)
>>> # Check if the predictions have the correct shape
>>> y_pred.shape == y.shape
True
"""
# Initialize predictions with zeros
predictions = np.zeros(features.shape[0])
for model, learning_rate in self.models:
predictions += learning_rate * model.predict(features)
return np.sign(predictions) # Convert to binary predictions (-1 or 1)
def gradient(self, target: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
"""
Calculate the negative gradient (pseudo-residuals) for logistic loss.
Parameters:
- target (np.ndarray): The target values.
- y_pred (np.ndarray): The predicted values.
Returns:
- np.ndarray: An array of pseudo-residuals.
>>> import numpy as np
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
>>> target = np.array([0, 1, 0, 1])
>>> y_pred = np.array([0.2, 0.8, 0.3, 0.7])
>>> residuals = clf.gradient(target, y_pred)
>>> # Check if residuals have the correct shape
>>> residuals.shape == target.shape
True
"""
return -target / (1 + np.exp(target * y_pred))
if __name__ == "__main__":
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
================================================
FILE: machine_learning/gradient_boosting_regressor.py.broken.txt
================================================
"""Implementation of GradientBoostingRegressor in sklearn using the
boston dataset which is very popular for regression problem to
predict house price.
"""
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
def main():
# loading the dataset from the sklearn
df = load_boston()
print(df.keys())
# now let construct a data frame
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
# let add the target to the dataframe
df_boston["Price"] = df.target
# print the first five rows using the head function
print(df_boston.head())
# Summary statistics
print(df_boston.describe().T)
# Feature selection
x = df_boston.iloc[:, :-1]
y = df_boston.iloc[:, -1] # target variable
# split the data with 75% train and 25% test sets.
x_train, x_test, y_train, y_test = train_test_split(
x, y, random_state=0, test_size=0.25
)
model = GradientBoostingRegressor(
n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
)
# training the model
model.fit(x_train, y_train)
# to see how good the model fit the data
training_score = model.score(x_train, y_train).round(3)
test_score = model.score(x_test, y_test).round(3)
print("Training score of GradientBoosting is :", training_score)
print("The test score of GradientBoosting is :", test_score)
# Let us evaluation the model by finding the errors
y_pred = model.predict(x_test)
# The mean squared error
print(f"Mean squared error: {mean_squared_error(y_test, y_pred):.2f}")
# Explained variance score: 1 is perfect prediction
print(f"Test Variance score: {r2_score(y_test, y_pred):.2f}")
# So let's run the model against the test data
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4)
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title("Truth vs Predicted")
# this show function will display the plotting
plt.show()
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/gradient_descent.py
================================================
"""
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis
function.
"""
import numpy as np
# List of input, output pairs
train_data = (
((5, 2, 3), 15),
((6, 5, 9), 25),
((11, 12, 13), 41),
((1, 1, 1), 8),
((11, 12, 13), 41),
)
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
parameter_vector = [2, 4, 1, 5]
m = len(train_data)
LEARNING_RATE = 0.009
def _error(example_no, data_set="train"):
"""
:param data_set: train data or test data
:param example_no: example number whose error has to be checked
:return: error in example pointed by example number.
"""
return calculate_hypothesis_value(example_no, data_set) - output(
example_no, data_set
)
def _hypothesis_value(data_input_tuple):
"""
Calculates hypothesis function value for a given input
:param data_input_tuple: Input tuple of a particular example
:return: Value of hypothesis function at that point.
Note that there is an 'biased input' whose value is fixed as 1.
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
So, we have to take care of it separately. Line 36 takes care of it.
"""
hyp_val = 0
for i in range(len(parameter_vector) - 1):
hyp_val += data_input_tuple[i] * parameter_vector[i + 1]
hyp_val += parameter_vector[0]
return hyp_val
def output(example_no, data_set):
"""
:param data_set: test data or train data
:param example_no: example whose output is to be fetched
:return: output for that example
"""
if data_set == "train":
return train_data[example_no][1]
elif data_set == "test":
return test_data[example_no][1]
return None
def calculate_hypothesis_value(example_no, data_set):
"""
Calculates hypothesis value for a given example
:param data_set: test data or train_data
:param example_no: example whose hypothesis value is to be calculated
:return: hypothesis value for that example
"""
if data_set == "train":
return _hypothesis_value(train_data[example_no][0])
elif data_set == "test":
return _hypothesis_value(test_data[example_no][0])
return None
def summation_of_cost_derivative(index, end=m):
"""
Calculates the sum of cost function derivative
:param index: index wrt derivative is being calculated
:param end: value where summation ends, default is m, number of examples
:return: Returns the summation of cost derivative
Note: If index is -1, this means we are calculating summation wrt to biased
parameter.
"""
summation_value = 0
for i in range(end):
if index == -1:
summation_value += _error(i)
else:
summation_value += _error(i) * train_data[i][0][index]
return summation_value
def get_cost_derivative(index):
"""
:param index: index of the parameter vector wrt to derivative is to be calculated
:return: derivative wrt to that index
Note: If index is -1, this means we are calculating summation wrt to biased
parameter.
"""
cost_derivative_value = summation_of_cost_derivative(index, m) / m
return cost_derivative_value
def run_gradient_descent():
global parameter_vector
# Tune these values to set a tolerance value for predicted output
absolute_error_limit = 0.000002
relative_error_limit = 0
j = 0
while True:
j += 1
temp_parameter_vector = [0, 0, 0, 0]
for i in range(len(parameter_vector)):
cost_derivative = get_cost_derivative(i - 1)
temp_parameter_vector[i] = (
parameter_vector[i] - LEARNING_RATE * cost_derivative
)
if np.allclose(
parameter_vector,
temp_parameter_vector,
atol=absolute_error_limit,
rtol=relative_error_limit,
):
break
parameter_vector = temp_parameter_vector
print(("Number of iterations:", j))
def test_gradient_descent():
for i in range(len(test_data)):
print(("Actual output value:", output(i, "test")))
print(("Hypothesis output:", calculate_hypothesis_value(i, "test")))
if __name__ == "__main__":
run_gradient_descent()
print("\nTesting gradient descent for a linear hypothesis function.\n")
test_gradient_descent()
================================================
FILE: machine_learning/k_means_clust.py
================================================
"""README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
Requirements:
- sklearn
- numpy
- matplotlib
Python:
- 3.5
Inputs:
- X , a 2D numpy array of features.
- k , number of clusters to create.
- initial_centroids , initial centroid values generated by utility function(mentioned
in usage).
- maxiter , maximum number of iterations to process.
- heterogeneity , empty list that will be filled with heterogeneity values if passed
to kmeans func.
Usage:
1. define 'k' value, 'X' features array and 'heterogeneity' empty list
2. create initial_centroids,
initial_centroids = get_initial_centroids(
X,
k,
seed=0 # seed value for initial centroid generation,
# None for randomness(default=None)
)
3. find centroids and clusters using kmeans function.
centroids, cluster_assignment = kmeans(
X,
k,
initial_centroids,
maxiter=400,
record_heterogeneity=heterogeneity,
verbose=True # whether to print logs in console or not.(default=False)
)
4. Plot the loss function and heterogeneity values for every iteration saved in
heterogeneity list.
plot_heterogeneity(
heterogeneity,
k
)
5. Plot the labeled 3D data points with centroids.
plot_kmeans(
X,
centroids,
cluster_assignment
)
6. Transfers Dataframe into excel format it must have feature called
'Clust' with k means clustering numbers in it.
"""
import warnings
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import pairwise_distances
warnings.filterwarnings("ignore")
TAG = "K-MEANS-CLUST/ "
def get_initial_centroids(data, k, seed=None):
"""Randomly choose k data points as initial centroids"""
# useful for obtaining consistent results
rng = np.random.default_rng(seed)
n = data.shape[0] # number of data points
# Pick K indices from range [0, N).
rand_indices = rng.integers(0, n, k)
# Keep centroids as dense format, as many entries will be nonzero due to averaging.
# As long as at least one document in a cluster contains a word,
# it will carry a nonzero weight in the TF-IDF vector of the centroid.
centroids = data[rand_indices, :]
return centroids
def centroid_pairwise_dist(x, centroids):
return pairwise_distances(x, centroids, metric="euclidean")
def assign_clusters(data, centroids):
# Compute distances between each data point and the set of centroids:
# Fill in the blank (RHS only)
distances_from_centroids = centroid_pairwise_dist(data, centroids)
# Compute cluster assignments for each data point:
# Fill in the blank (RHS only)
cluster_assignment = np.argmin(distances_from_centroids, axis=1)
return cluster_assignment
def revise_centroids(data, k, cluster_assignment):
new_centroids = []
for i in range(k):
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
member_data_points = data[cluster_assignment == i]
# Compute the mean of the data points. Fill in the blank (RHS only)
centroid = member_data_points.mean(axis=0)
new_centroids.append(centroid)
new_centroids = np.array(new_centroids)
return new_centroids
def compute_heterogeneity(data, k, centroids, cluster_assignment):
heterogeneity = 0.0
for i in range(k):
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
member_data_points = data[cluster_assignment == i, :]
if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty
# Compute distances from centroid to data points (RHS only)
distances = pairwise_distances(
member_data_points, [centroids[i]], metric="euclidean"
)
squared_distances = distances**2
heterogeneity += np.sum(squared_distances)
return heterogeneity
def plot_heterogeneity(heterogeneity, k):
plt.figure(figsize=(7, 4))
plt.plot(heterogeneity, linewidth=4)
plt.xlabel("# Iterations")
plt.ylabel("Heterogeneity")
plt.title(f"Heterogeneity of clustering over time, K={k:d}")
plt.rcParams.update({"font.size": 16})
plt.show()
def plot_kmeans(data, centroids, cluster_assignment):
ax = plt.axes(projection="3d")
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=cluster_assignment, cmap="viridis")
ax.scatter(
centroids[:, 0], centroids[:, 1], centroids[:, 2], c="red", s=100, marker="x"
)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
ax.set_title("3D K-Means Clustering Visualization")
plt.show()
def kmeans(
data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False
):
"""Runs k-means on given data and initial set of centroids.
maxiter: maximum number of iterations to run.(default=500)
record_heterogeneity: (optional) a list, to store the history of heterogeneity
as function of iterations
if None, do not store the history.
verbose: if True, print how many data points changed their cluster labels in
each iteration"""
centroids = initial_centroids[:]
prev_cluster_assignment = None
for itr in range(maxiter):
if verbose:
print(itr, end="")
# 1. Make cluster assignments using nearest centroids
cluster_assignment = assign_clusters(data, centroids)
# 2. Compute a new centroid for each of the k clusters, averaging all data
# points assigned to that cluster.
centroids = revise_centroids(data, k, cluster_assignment)
# Check for convergence: if none of the assignments changed, stop
if (
prev_cluster_assignment is not None
and (prev_cluster_assignment == cluster_assignment).all()
):
break
# Print number of new assignments
if prev_cluster_assignment is not None:
num_changed = np.sum(prev_cluster_assignment != cluster_assignment)
if verbose:
print(
f" {num_changed:5d} elements changed their cluster assignment."
)
# Record heterogeneity convergence metric
if record_heterogeneity is not None:
# YOUR CODE HERE
score = compute_heterogeneity(data, k, centroids, cluster_assignment)
record_heterogeneity.append(score)
prev_cluster_assignment = cluster_assignment[:]
return centroids, cluster_assignment
# Mock test below
if False: # change to true to run this test case.
from sklearn import datasets as ds
dataset = ds.load_iris()
k = 3
heterogeneity = []
initial_centroids = get_initial_centroids(dataset["data"], k, seed=0)
centroids, cluster_assignment = kmeans(
dataset["data"],
k,
initial_centroids,
maxiter=400,
record_heterogeneity=heterogeneity,
verbose=True,
)
plot_heterogeneity(heterogeneity, k)
plot_kmeans(dataset["data"], centroids, cluster_assignment)
def report_generator(
predicted: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
) -> pd.DataFrame:
"""
Generate a clustering report given these two arguments:
predicted - dataframe with predicted cluster column
fill_missing_report - dictionary of rules on how we are going to fill in missing
values for final generated report (not included in modelling);
>>> predicted = pd.DataFrame()
>>> predicted['numbers'] = [1, 2, 3]
>>> predicted['col1'] = [0.5, 2.5, 4.5]
>>> predicted['col2'] = [100, 200, 300]
>>> predicted['col3'] = [10, 20, 30]
>>> predicted['Cluster'] = [1, 1, 2]
>>> report_generator(predicted, ['col1', 'col2'], 0)
Features Type Mark 1 2
0 # of Customers ClusterSize False 2.000000 1.000000
1 % of Customers ClusterProportion False 0.666667 0.333333
2 col1 mean_with_zeros True 1.500000 4.500000
3 col2 mean_with_zeros True 150.000000 300.000000
4 numbers mean_with_zeros False 1.500000 3.000000
.. ... ... ... ... ...
99 dummy 5% False 1.000000 1.000000
100 dummy 95% False 1.000000 1.000000
101 dummy stdev False 0.000000 NaN
102 dummy mode False 1.000000 1.000000
103 dummy median False 1.000000 1.000000
[104 rows x 5 columns]
"""
# Fill missing values with given rules
if fill_missing_report:
predicted = predicted.fillna(value=fill_missing_report)
predicted["dummy"] = 1
numeric_cols = predicted.select_dtypes(np.number).columns
report = (
predicted.groupby(["Cluster"])[ # construct report dataframe
numeric_cols
] # group by cluster number
.agg(
[
("sum", "sum"),
("mean_with_zeros", lambda x: np.mean(np.nan_to_num(x))),
("mean_without_zeros", lambda x: x.replace(0, np.nan).mean()),
(
"mean_25-75",
lambda x: np.mean(
np.nan_to_num(
sorted(x)[
round(len(x) * 25 / 100) : round(len(x) * 75 / 100)
]
)
),
),
("mean_with_na", "mean"),
("min", lambda x: x.min()),
("5%", lambda x: x.quantile(0.05)),
("25%", lambda x: x.quantile(0.25)),
("50%", lambda x: x.quantile(0.50)),
("75%", lambda x: x.quantile(0.75)),
("95%", lambda x: x.quantile(0.95)),
("max", lambda x: x.max()),
("count", lambda x: x.count()),
("stdev", lambda x: x.std()),
("mode", lambda x: x.mode()[0]),
("median", lambda x: x.median()),
("# > 0", lambda x: (x > 0).sum()),
]
)
.T.reset_index()
.rename(index=str, columns={"level_0": "Features", "level_1": "Type"})
) # rename columns
# calculate the size of cluster(count of clientID's)
# avoid SettingWithCopyWarning
clustersize = report[
(report["Features"] == "dummy") & (report["Type"] == "count")
].copy()
# rename created predicted cluster to match report column names
clustersize.Type = "ClusterSize"
clustersize.Features = "# of Customers"
# calculating the proportion of cluster
clusterproportion = pd.DataFrame(
clustersize.iloc[:, 2:].to_numpy() / clustersize.iloc[:, 2:].to_numpy().sum()
)
# rename created predicted cluster to match report column names
clusterproportion["Type"] = "% of Customers"
clusterproportion["Features"] = "ClusterProportion"
cols = clusterproportion.columns.tolist()
cols = cols[-2:] + cols[:-2]
clusterproportion = clusterproportion[cols] # rearrange columns to match report
clusterproportion.columns = report.columns
# generating dataframe with count of nan values
a = pd.DataFrame(
abs(
report[report["Type"] == "count"].iloc[:, 2:].to_numpy()
- clustersize.iloc[:, 2:].to_numpy()
)
)
a["Features"] = 0
a["Type"] = "# of nan"
# filling values in order to match report
a.Features = report[report["Type"] == "count"].Features.tolist()
cols = a.columns.tolist()
cols = cols[-2:] + cols[:-2]
a = a[cols] # rearrange columns to match report
a.columns = report.columns # rename columns to match report
# drop count values except for cluster size
report = report.drop(report[report.Type == "count"].index)
# concat report with cluster size and nan values
report = pd.concat([report, a, clustersize, clusterproportion], axis=0)
report["Mark"] = report["Features"].isin(clustering_variables)
cols = report.columns.tolist()
cols = cols[0:2] + cols[-1:] + cols[2:-1]
report = report[cols]
sorter1 = {
"ClusterSize": 9,
"ClusterProportion": 8,
"mean_with_zeros": 7,
"mean_with_na": 6,
"max": 5,
"50%": 4,
"min": 3,
"25%": 2,
"75%": 1,
"# of nan": 0,
"# > 0": -1,
"sum_with_na": -2,
}
report = (
report.assign(
Sorter1=lambda x: x.Type.map(sorter1),
Sorter2=lambda x: list(reversed(range(len(x)))),
)
.sort_values(["Sorter1", "Mark", "Sorter2"], ascending=False)
.drop(["Sorter1", "Sorter2"], axis=1)
)
report.columns.name = ""
report = report.reset_index()
report = report.drop(columns=["index"])
return report
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/k_nearest_neighbours.py
================================================
"""
k-Nearest Neighbours (kNN) is a simple non-parametric supervised learning
algorithm used for classification. Given some labelled training data, a given
point is classified using its k nearest neighbours according to some distance
metric. The most commonly occurring label among the neighbours becomes the label
of the given point. In effect, the label of the given point is decided by a
majority vote.
This implementation uses the commonly used Euclidean distance metric, but other
distance metrics can also be used.
Reference: https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
"""
from collections import Counter
from heapq import nsmallest
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
class KNN:
def __init__(
self,
train_data: np.ndarray[float],
train_target: np.ndarray[int],
class_labels: list[str],
) -> None:
"""
Create a kNN classifier using the given training data and class labels
"""
self.data = zip(train_data, train_target)
self.labels = class_labels
@staticmethod
def _euclidean_distance(a: np.ndarray[float], b: np.ndarray[float]) -> float:
"""
Calculate the Euclidean distance between two points
>>> KNN._euclidean_distance(np.array([0, 0]), np.array([3, 4]))
5.0
>>> KNN._euclidean_distance(np.array([1, 2, 3]), np.array([1, 8, 11]))
10.0
"""
return float(np.linalg.norm(a - b))
def classify(self, pred_point: np.ndarray[float], k: int = 5) -> str:
"""
Classify a given point using the kNN algorithm
>>> train_X = np.array(
... [[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]]
... )
>>> train_y = np.array([0, 0, 0, 0, 1, 1, 1])
>>> classes = ['A', 'B']
>>> knn = KNN(train_X, train_y, classes)
>>> point = np.array([1.2, 1.2])
>>> knn.classify(point)
'A'
"""
# Distances of all points from the point to be classified
distances = (
(self._euclidean_distance(data_point[0], pred_point), data_point[1])
for data_point in self.data
)
# Choosing k points with the shortest distances
votes = (i[1] for i in nsmallest(k, distances))
# Most commonly occurring class is the one into which the point is classified
result = Counter(votes).most_common(1)[0][0]
return self.labels[result]
if __name__ == "__main__":
import doctest
doctest.testmod()
iris = datasets.load_iris()
X = np.array(iris["data"])
y = np.array(iris["target"])
iris_classes = iris["target_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
iris_point = np.array([4.4, 3.1, 1.3, 1.4])
classifier = KNN(X_train, y_train, iris_classes)
print(classifier.classify(iris_point, k=3))
================================================
FILE: machine_learning/linear_discriminant_analysis.py
================================================
"""
Linear Discriminant Analysis
Assumptions About Data :
1. The input variables has a gaussian distribution.
2. The variance calculated for each input variables by class grouping is the
same.
3. The mix of classes in your training set is representative of the problem.
Learning The Model :
The LDA model requires the estimation of statistics from the training data :
1. Mean of each input value for each class.
2. Probability of an instance belong to each class.
3. Covariance for the input data for each class
Calculate the class means :
mean(x) = 1/n ( for i = 1 to i = n --> sum(xi))
Calculate the class probabilities :
P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1))
P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1))
Calculate the variance :
We can calculate the variance for dataset in two steps :
1. Calculate the squared difference for each input variable from the
group mean.
2. Calculate the mean of the squared difference.
------------------------------------------------
Squared_Difference = (x - mean(k)) ** 2
Variance = (1 / (count(x) - count(classes))) *
(for i = 1 to i = n --> sum(Squared_Difference(xi)))
Making Predictions :
discriminant(x) = x * (mean / variance) -
((mean ** 2) / (2 * variance)) + Ln(probability)
---------------------------------------------------------------------------
After calculating the discriminant value for each class, the class with the
largest discriminant value is taken as the prediction.
Author: @EverLookNeverSee
"""
from collections.abc import Callable
from math import log
from os import name, system
from random import gauss, seed
# Make a training dataset drawn from a gaussian distribution
def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list:
"""
Generate gaussian distribution instances based-on given mean and standard deviation
:param mean: mean value of class
:param std_dev: value of standard deviation entered by usr or default value of it
:param instance_count: instance number of class
:return: a list containing generated values based-on given mean, std_dev and
instance_count
>>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE
[6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368,
3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747,
5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687,
5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033,
5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079]
"""
seed(1)
return [gauss(mean, std_dev) for _ in range(instance_count)]
# Make corresponding Y flags to detecting classes
def y_generator(class_count: int, instance_count: list) -> list:
"""
Generate y values for corresponding classes
:param class_count: Number of classes(data groupings) in dataset
:param instance_count: number of instances in class
:return: corresponding values for data groupings in dataset
>>> y_generator(1, [10])
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
>>> y_generator(2, [5, 10])
[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
"""
return [k for k in range(class_count) for _ in range(instance_count[k])]
# Calculate the class means
def calculate_mean(instance_count: int, items: list) -> float:
"""
Calculate given class mean
:param instance_count: Number of instances in class
:param items: items that related to specific class(data grouping)
:return: calculated actual mean of considered class
>>> items = gaussian_distribution(5.0, 1.0, 20)
>>> calculate_mean(len(items), items)
5.011267842911003
"""
# the sum of all items divided by number of instances
return sum(items) / instance_count
# Calculate the class probabilities
def calculate_probabilities(instance_count: int, total_count: int) -> float:
"""
Calculate the probability that a given instance will belong to which class
:param instance_count: number of instances in class
:param total_count: the number of all instances
:return: value of probability for considered class
>>> calculate_probabilities(20, 60)
0.3333333333333333
>>> calculate_probabilities(30, 100)
0.3
"""
# number of instances in specific class divided by number of all instances
return instance_count / total_count
# Calculate the variance
def calculate_variance(items: list, means: list, total_count: int) -> float:
"""
Calculate the variance
:param items: a list containing all items(gaussian distribution of all classes)
:param means: a list containing real mean values of each class
:param total_count: the number of all instances
:return: calculated variance for considered dataset
>>> items = gaussian_distribution(5.0, 1.0, 20)
>>> means = [5.011267842911003]
>>> total_count = 20
>>> calculate_variance([items], means, total_count)
0.9618530973487491
"""
squared_diff = [] # An empty list to store all squared differences
# iterate over number of elements in items
for i in range(len(items)):
# for loop iterates over number of elements in inner layer of items
for j in range(len(items[i])):
# appending squared differences to 'squared_diff' list
squared_diff.append((items[i][j] - means[i]) ** 2)
# one divided by (the number of all instances - number of classes) multiplied by
# sum of all squared differences
n_classes = len(means) # Number of classes in dataset
return 1 / (total_count - n_classes) * sum(squared_diff)
# Making predictions
def predict_y_values(
x_items: list, means: list, variance: float, probabilities: list
) -> list:
"""This function predicts new indexes(groups for our data)
:param x_items: a list containing all items(gaussian distribution of all classes)
:param means: a list containing real mean values of each class
:param variance: calculated value of variance by calculate_variance function
:param probabilities: a list containing all probabilities of classes
:return: a list containing predicted Y values
>>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262,
... 4.235456349028368, 3.9078267848958586, 5.031334516831717,
... 3.977896829989127, 3.56317055489747, 5.199311976483754,
... 5.133374604658605, 5.546468300338232, 4.086029056264687,
... 5.005005283626573, 4.935258239627312, 3.494170998739258,
... 5.537997178661033, 5.320711100998849, 7.3891120432406865,
... 5.202969177309964, 4.855297691835079], [11.288184753155463,
... 11.44944560869977, 10.066335808938263, 9.235456349028368,
... 8.907826784895859, 10.031334516831716, 8.977896829989128,
... 8.56317055489747, 10.199311976483754, 10.133374604658606,
... 10.546468300338232, 9.086029056264687, 10.005005283626572,
... 9.935258239627313, 8.494170998739259, 10.537997178661033,
... 10.320711100998848, 12.389112043240686, 10.202969177309964,
... 9.85529769183508], [16.288184753155463, 16.449445608699772,
... 15.066335808938263, 14.235456349028368, 13.907826784895859,
... 15.031334516831716, 13.977896829989128, 13.56317055489747,
... 15.199311976483754, 15.133374604658606, 15.546468300338232,
... 14.086029056264687, 15.005005283626572, 14.935258239627313,
... 13.494170998739259, 15.537997178661033, 15.320711100998848,
... 17.389112043240686, 15.202969177309964, 14.85529769183508]]
>>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002]
>>> variance = 0.9618530973487494
>>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
>>> predict_y_values(x_items, means, variance,
... probabilities) # doctest: +NORMALIZE_WHITESPACE
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2]
"""
# An empty list to store generated discriminant values of all items in dataset for
# each class
results = []
# for loop iterates over number of elements in list
for i in range(len(x_items)):
# for loop iterates over number of inner items of each element
for j in range(len(x_items[i])):
temp = [] # to store all discriminant values of each item as a list
# for loop iterates over number of classes we have in our dataset
for k in range(len(x_items)):
# appending values of discriminants for each class to 'temp' list
temp.append(
x_items[i][j] * (means[k] / variance)
- (means[k] ** 2 / (2 * variance))
+ log(probabilities[k])
)
# appending discriminant values of each item to 'results' list
results.append(temp)
return [result.index(max(result)) for result in results]
# Calculating Accuracy
def accuracy(actual_y: list, predicted_y: list) -> float:
"""
Calculate the value of accuracy based-on predictions
:param actual_y:a list containing initial Y values generated by 'y_generator'
function
:param predicted_y: a list containing predicted Y values generated by
'predict_y_values' function
:return: percentage of accuracy
>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
... 1, 1 ,1 ,1 ,1 ,1 ,1]
>>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
... 0, 0, 1, 1, 1, 0, 1, 1, 1]
>>> accuracy(actual_y, predicted_y)
50.0
>>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
>>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
>>> accuracy(actual_y, predicted_y)
100.0
"""
# iterate over one element of each list at a time (zip mode)
# prediction is correct if actual Y value equals to predicted Y value
correct = sum(1 for i, j in zip(actual_y, predicted_y) if i == j)
# percentage of accuracy equals to number of correct predictions divided by number
# of all data and multiplied by 100
return (correct / len(actual_y)) * 100
def valid_input[num](
input_type: Callable[[object], num], # Usually float or int
input_msg: str,
err_msg: str,
condition: Callable[[num], bool] = lambda _: True,
default: str | None = None,
) -> num:
"""
Ask for user value and validate that it fulfill a condition.
:input_type: user input expected type of value
:input_msg: message to show user in the screen
:err_msg: message to show in the screen in case of error
:condition: function that represents the condition that user input is valid.
:default: Default value in case the user does not type anything
:return: user's input
"""
while True:
try:
user_input = input_type(input(input_msg).strip() or default)
if condition(user_input):
return user_input
else:
print(f"{user_input}: {err_msg}")
continue
except ValueError:
print(
f"{user_input}: Incorrect input type, expected {input_type.__name__!r}"
)
# Main Function
def main():
"""This function starts execution phase"""
while True:
print(" Linear Discriminant Analysis ".center(50, "*"))
print("*" * 50, "\n")
print("First of all we should specify the number of classes that")
print("we want to generate as training dataset")
# Trying to get number of classes
n_classes = valid_input(
input_type=int,
condition=lambda x: x > 0,
input_msg="Enter the number of classes (Data Groupings): ",
err_msg="Number of classes should be positive!",
)
print("-" * 100)
# Trying to get the value of standard deviation
std_dev = valid_input(
input_type=float,
condition=lambda x: x >= 0,
input_msg=(
"Enter the value of standard deviation"
"(Default value is 1.0 for all classes): "
),
err_msg="Standard deviation should not be negative!",
default="1.0",
)
print("-" * 100)
# Trying to get number of instances in classes and theirs means to generate
# dataset
counts = [] # An empty list to store instance counts of classes in dataset
for i in range(n_classes):
user_count = valid_input(
input_type=int,
condition=lambda x: x > 0,
input_msg=(f"Enter The number of instances for class_{i + 1}: "),
err_msg="Number of instances should be positive!",
)
counts.append(user_count)
print("-" * 100)
# An empty list to store values of user-entered means of classes
user_means = []
for a in range(n_classes):
user_mean = valid_input(
input_type=float,
input_msg=(f"Enter the value of mean for class_{a + 1}: "),
err_msg="This is an invalid value.",
)
user_means.append(user_mean)
print("-" * 100)
print("Standard deviation: ", std_dev)
# print out the number of instances in classes in separated line
for i, count in enumerate(counts, 1):
print(f"Number of instances in class_{i} is: {count}")
print("-" * 100)
# print out mean values of classes separated line
for i, user_mean in enumerate(user_means, 1):
print(f"Mean of class_{i} is: {user_mean}")
print("-" * 100)
# Generating training dataset drawn from gaussian distribution
x = [
gaussian_distribution(user_means[j], std_dev, counts[j])
for j in range(n_classes)
]
print("Generated Normal Distribution: \n", x)
print("-" * 100)
# Generating Ys to detecting corresponding classes
y = y_generator(n_classes, counts)
print("Generated Corresponding Ys: \n", y)
print("-" * 100)
# Calculating the value of actual mean for each class
actual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)]
# for loop iterates over number of elements in 'actual_means' list and print
# out them in separated line
for i, actual_mean in enumerate(actual_means, 1):
print(f"Actual(Real) mean of class_{i} is: {actual_mean}")
print("-" * 100)
# Calculating the value of probabilities for each class
probabilities = [
calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes)
]
# for loop iterates over number of elements in 'probabilities' list and print
# out them in separated line
for i, probability in enumerate(probabilities, 1):
print(f"Probability of class_{i} is: {probability}")
print("-" * 100)
# Calculating the values of variance for each class
variance = calculate_variance(x, actual_means, sum(counts))
print("Variance: ", variance)
print("-" * 100)
# Predicting Y values
# storing predicted Y values in 'pre_indexes' variable
pre_indexes = predict_y_values(x, actual_means, variance, probabilities)
print("-" * 100)
# Calculating Accuracy of the model
print(f"Accuracy: {accuracy(y, pre_indexes)}")
print("-" * 100)
print(" DONE ".center(100, "+"))
if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q":
print("\n" + "GoodBye!".center(100, "-") + "\n")
break
system("cls" if name == "nt" else "clear") # noqa: S605
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/linear_regression.py
================================================
"""
Linear regression is the most basic type of regression commonly used for
predictive analysis. The idea is pretty simple: we have a dataset and we have
features associated with it. Features should be chosen very cautiously
as they determine how much our model will be able to make future predictions.
We try to set the weight of these features, over many iterations, so that they best
fit our dataset. In this particular code, I had used a CSGO dataset (ADR vs
Rating). We try to best fit a line through dataset and estimate the parameters.
"""
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "httpx",
# "numpy",
# ]
# ///
import httpx
import numpy as np
def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = httpx.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(",")
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
"""Run steep gradient descent and updates the Feature vector accordingly_
:param data_x : contains the dataset
:param data_y : contains the output associated with each data-entry
:param len_data : length of the data_
:param alpha : Learning rate of the model
:param theta : Feature vector (weight's for our model)
;param return : Updated Feature's, using
curr_features - alpha_ * gradient(w.r.t. feature)
>>> import numpy as np
>>> data_x = np.array([[1, 2], [3, 4]])
>>> data_y = np.array([5, 6])
>>> len_data = len(data_x)
>>> alpha = 0.01
>>> theta = np.array([0.1, 0.2])
>>> run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
array([0.196, 0.343])
"""
n = len_data
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / n) * sum_grad
return theta
def sum_of_square_error(data_x, data_y, len_data, theta):
"""Return sum of square error for error calculation
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param len_data : len of the dataset
:param theta : contains the feature vector
:return : sum of square error computed from given feature's
Example:
>>> vc_x = np.array([[1.1], [2.1], [3.1]])
>>> vc_y = np.array([1.2, 2.2, 3.2])
>>> round(sum_of_square_error(vc_x, vc_y, 3, np.array([1])),3)
np.float64(0.005)
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
def run_linear_regression(data_x, data_y):
"""Implement Linear regression over the dataset
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:return : feature for line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550
no_features = data_x.shape[1]
len_data = data_x.shape[0] - 1
theta = np.zeros((1, no_features))
for i in range(iterations):
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
error = sum_of_square_error(data_x, data_y, len_data, theta)
print(f"At Iteration {i + 1} - Error is {error:.5f}")
return theta
def mean_absolute_error(predicted_y, original_y):
"""Return sum of square error for error calculation
:param predicted_y : contains the output of prediction (result vector)
:param original_y : contains values of expected outcome
:return : mean absolute error computed from given feature's
>>> predicted_y = [3, -0.5, 2, 7]
>>> original_y = [2.5, 0.0, 2, 8]
>>> mean_absolute_error(predicted_y, original_y)
0.5
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
def main():
"""Driver function"""
data = collect_dataset()
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)
theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: machine_learning/local_weighted_learning/README.md
================================================
# Locally Weighted Linear Regression
It is a non-parametric ML algorithm that does not learn on a fixed set of parameters such as **linear regression**. \
So, here comes a question of what is *linear regression*? \
**Linear regression** is a supervised learning algorithm used for computing linear relationships between input (X) and output (Y). \
### Terminology Involved
number_of_features(i) = Number of features involved. \
number_of_training_examples(m) = Number of training examples. \
output_sequence(y) = Output Sequence. \
$\theta$ $^T$ x = predicted point. \
J($\theta$) = COst function of point.
The steps involved in ordinary linear regression are:
Training phase: Compute \theta to minimize the cost. \
J($\theta$) = $\sum_{i=1}^m$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
Predict output: for given query point x, \
return: ($\theta$)$^T$ x
This training phase is possible when data points are linear, but there again comes a question can we predict non-linear relationship between x and y ? as shown below
So, here comes the role of non-parametric algorithm which doesn't compute predictions based on fixed set of params. Rather parameters $\theta$ are computed individually for each query point/data point x.
While Computing $\theta$ , a higher preference is given to points in the vicinity of x than points farther from x.
Cost Function J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$
$w^i$ is non-negative weight associated to training point $x^i$. \
$w^i$ is large fr $x^i$'s lying closer to query point $x_i$. \
$w^i$ is small for $x^i$'s lying farther to query point $x_i$.
A Typical weight can be computed using \
$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$)
Where $\tau$ is the bandwidth parameter that controls $w^i$ distance from x.
Let's look at a example :
Suppose, we had a query point x=5.0 and training points $x^1$=4.9 and $x^2$=5.0 than we can calculate weights as :
$w^i$ = $\exp$(-$\frac{(x^i-x)(x^i-x)^T}{2\tau^2}$) with $\tau$=0.5
$w^1$ = $\exp$(-$\frac{(4.9-5)^2}{2(0.5)^2}$) = 0.9802
$w^2$ = $\exp$(-$\frac{(3-5)^2}{2(0.5)^2}$) = 0.000335
So, J($\theta$) = 0.9802*($\theta$ $^T$ $x^1$ - $y^1$) + 0.000335*($\theta$ $^T$ $x^2$ - $y^2$)
So, here by we can conclude that the weight fall exponentially as the distance between x & $x^i$ increases and So, does the contribution of error in prediction for $x^i$ to the cost.
Steps involved in LWL are : \
Compute \theta to minimize the cost.
J($\theta$) = $\sum_{i=1}^m$ $w^i$ (($\theta$)$^T$ $x^i$ - $y^i$)$^2$ \
Predict Output: for given query point x, \
return : $\theta$ $^T$ x
================================================
FILE: machine_learning/local_weighted_learning/__init__.py
================================================
================================================
FILE: machine_learning/local_weighted_learning/local_weighted_learning.py
================================================
"""
Locally weighted linear regression, also called local regression, is a type of
non-parametric linear regression that prioritizes data closest to a given
prediction point. The algorithm estimates the vector of model coefficients β
using weighted least squares regression:
β = (XᵀWX)⁻¹(XᵀWy),
where X is the design matrix, y is the response vector, and W is the diagonal
weight matrix.
This implementation calculates wᵢ, the weight of the ith training sample, using
the Gaussian weight:
wᵢ = exp(-‖xᵢ - x‖²/(2τ²)),
where xᵢ is the ith training sample, x is the prediction point, τ is the
"bandwidth", and ‖x‖ is the Euclidean norm (also called the 2-norm or the L²
norm). The bandwidth τ controls how quickly the weight of a training sample
decreases as its distance from the prediction point increases. One can think of
the Gaussian weight as a bell curve centered around the prediction point: a
training sample is weighted lower if it's farther from the center, and τ
controls the spread of the bell curve.
Other types of locally weighted regression such as locally estimated scatterplot
smoothing (LOESS) typically use different weight functions.
References:
- https://en.wikipedia.org/wiki/Local_regression
- https://en.wikipedia.org/wiki/Weighted_least_squares
- https://cs229.stanford.edu/notes2022fall/main_notes.pdf
"""
import matplotlib.pyplot as plt
import numpy as np
def weight_matrix(point: np.ndarray, x_train: np.ndarray, tau: float) -> np.ndarray:
"""
Calculate the weight of every point in the training data around a given
prediction point
Args:
point: x-value at which the prediction is being made
x_train: ndarray of x-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
m x m weight matrix around the prediction point, where m is the size of
the training set
>>> weight_matrix(
... np.array([1., 1.]),
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
... 0.6
... )
array([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
"""
m = len(x_train) # Number of training samples
weights = np.eye(m) # Initialize weights as identity matrix
for j in range(m):
diff = point - x_train[j]
weights[j, j] = np.exp(diff @ diff.T / (-2.0 * tau**2))
return weights
def local_weight(
point: np.ndarray, x_train: np.ndarray, y_train: np.ndarray, tau: float
) -> np.ndarray:
"""
Calculate the local weights at a given prediction point using the weight
matrix for that point
Args:
point: x-value at which the prediction is being made
x_train: ndarray of x-values for training
y_train: ndarray of y-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
ndarray of local weights
>>> local_weight(
... np.array([1., 1.]),
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
... np.array([[1.01, 1.66, 3.5]]),
... 0.6
... )
array([[0.00873174],
[0.08272556]])
"""
weight_mat = weight_matrix(point, x_train, tau)
weight = np.linalg.inv(x_train.T @ weight_mat @ x_train) @ (
x_train.T @ weight_mat @ y_train.T
)
return weight
def local_weight_regression(
x_train: np.ndarray, y_train: np.ndarray, tau: float
) -> np.ndarray:
"""
Calculate predictions for each point in the training data
Args:
x_train: ndarray of x-values for training
y_train: ndarray of y-values for training
tau: bandwidth value, controls how quickly the weight of training values
decreases as the distance from the prediction point increases
Returns:
ndarray of predictions
>>> local_weight_regression(
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
... np.array([[1.01, 1.66, 3.5]]),
... 0.6
... )
array([1.07173261, 1.65970737, 3.50160179])
"""
y_pred = np.zeros(len(x_train)) # Initialize array of predictions
for i, item in enumerate(x_train):
y_pred[i] = np.dot(item, local_weight(item, x_train, y_train, tau)).item()
return y_pred
def load_data(
dataset_name: str, x_name: str, y_name: str
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Load data from seaborn and split it into x and y points
>>> pass # No doctests, function is for demo purposes only
"""
import seaborn as sns
data = sns.load_dataset(dataset_name)
x_data = np.array(data[x_name])
y_data = np.array(data[y_name])
one = np.ones(len(y_data))
# pairing elements of one and x_data
x_train = np.column_stack((one, x_data))
return x_train, x_data, y_data
def plot_preds(
x_train: np.ndarray,
preds: np.ndarray,
x_data: np.ndarray,
y_data: np.ndarray,
x_name: str,
y_name: str,
) -> None:
"""
Plot predictions and display the graph
>>> pass # No doctests, function is for demo purposes only
"""
x_train_sorted = np.sort(x_train, axis=0)
plt.scatter(x_data, y_data, color="blue")
plt.plot(
x_train_sorted[:, 1],
preds[x_train[:, 1].argsort(0)],
color="yellow",
linewidth=5,
)
plt.title("Local Weighted Regression")
plt.xlabel(x_name)
plt.ylabel(y_name)
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod()
# Demo with a dataset from the seaborn module
training_data_x, total_bill, tip = load_data("tips", "total_bill", "tip")
predictions = local_weight_regression(training_data_x, tip, 5)
plot_preds(training_data_x, predictions, total_bill, tip, "total_bill", "tip")
================================================
FILE: machine_learning/logistic_regression.py
================================================
#!/usr/bin/python
# Logistic Regression from scratch
# In[62]:
# In[63]:
# importing all the required libraries
"""
Implementing logistic regression for classification problem
Helpful resources:
Coursera ML course
https://medium.com/@martinpella/logistic-regression-from-scratch-in-python-124c5636b8ac
"""
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets
# get_ipython().run_line_magic('matplotlib', 'inline')
# In[67]:
# sigmoid function or logistic function is used as a hypothesis function in
# classification problems
def sigmoid_function(z: float | np.ndarray) -> float | np.ndarray:
"""
Also known as Logistic Function.
1
f(x) = -------
1 + e⁻ˣ
The sigmoid function approaches a value of 1 as its input 'x' becomes
increasing positive. Opposite for negative values.
Reference: https://en.wikipedia.org/wiki/Sigmoid_function
@param z: input to the function
@returns: returns value in the range 0 to 1
Examples:
>>> float(sigmoid_function(4))
0.9820137900379085
>>> sigmoid_function(np.array([-3, 3]))
array([0.04742587, 0.95257413])
>>> sigmoid_function(np.array([-3, 3, 1]))
array([0.04742587, 0.95257413, 0.73105858])
>>> sigmoid_function(np.array([-0.01, -2, -1.9]))
array([0.49750002, 0.11920292, 0.13010847])
>>> sigmoid_function(np.array([-1.3, 5.3, 12]))
array([0.21416502, 0.9950332 , 0.99999386])
>>> sigmoid_function(np.array([0.01, 0.02, 4.1]))
array([0.50249998, 0.50499983, 0.9836975 ])
>>> sigmoid_function(np.array([0.8]))
array([0.68997448])
"""
return 1 / (1 + np.exp(-z))
def cost_function(h: np.ndarray, y: np.ndarray) -> float:
"""
Cost function quantifies the error between predicted and expected values.
The cost function used in Logistic Regression is called Log Loss
or Cross Entropy Function.
J(θ) = (1/m) * Σ [ -y * log(hθ(x)) - (1 - y) * log(1 - hθ(x)) ]
Where:
- J(θ) is the cost that we want to minimize during training
- m is the number of training examples
- Σ represents the summation over all training examples
- y is the actual binary label (0 or 1) for a given example
- hθ(x) is the predicted probability that x belongs to the positive class
@param h: the output of sigmoid function. It is the estimated probability
that the input example 'x' belongs to the positive class
@param y: the actual binary label associated with input example 'x'
Examples:
>>> estimations = sigmoid_function(np.array([0.3, -4.3, 8.1]))
>>> cost_function(h=estimations,y=np.array([1, 0, 1]))
0.18937868932131605
>>> estimations = sigmoid_function(np.array([4, 3, 1]))
>>> cost_function(h=estimations,y=np.array([1, 0, 0]))
1.459999655669926
>>> estimations = sigmoid_function(np.array([4, -3, -1]))
>>> cost_function(h=estimations,y=np.array([1,0,0]))
0.1266663223365915
>>> estimations = sigmoid_function(0)
>>> cost_function(h=estimations,y=np.array([1]))
0.6931471805599453
References:
- https://en.wikipedia.org/wiki/Logistic_regression
"""
return float((-y * np.log(h) - (1 - y) * np.log(1 - h)).mean())
def log_likelihood(x, y, weights):
scores = np.dot(x, weights)
return np.sum(y * scores - np.log(1 + np.exp(scores)))
# here alpha is the learning rate, X is the feature matrix,y is the target matrix
def logistic_reg(alpha, x, y, max_iterations=70000):
theta = np.zeros(x.shape[1])
for iterations in range(max_iterations):
z = np.dot(x, theta)
h = sigmoid_function(z)
gradient = np.dot(x.T, h - y) / y.size
theta = theta - alpha * gradient # updating the weights
z = np.dot(x, theta)
h = sigmoid_function(z)
j = cost_function(h, y)
if iterations % 100 == 0:
print(f"loss: {j} \t") # printing the loss after every 100 iterations
return theta
# In[68]:
if __name__ == "__main__":
import doctest
doctest.testmod()
iris = datasets.load_iris()
x = iris.data[:, :2]
y = (iris.target != 0) * 1
alpha = 0.1
theta = logistic_reg(alpha, x, y, max_iterations=70000)
print("theta: ", theta) # printing the theta i.e our weights vector
def predict_prob(x):
return sigmoid_function(
np.dot(x, theta)
) # predicting the value of probability from the logistic regression algorithm
plt.figure(figsize=(10, 6))
plt.scatter(x[y == 0][:, 0], x[y == 0][:, 1], color="b", label="0")
plt.scatter(x[y == 1][:, 0], x[y == 1][:, 1], color="r", label="1")
(x1_min, x1_max) = (x[:, 0].min(), x[:, 0].max())
(x2_min, x2_max) = (x[:, 1].min(), x[:, 1].max())
(xx1, xx2) = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
probs = predict_prob(grid).reshape(xx1.shape)
plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors="black")
plt.legend()
plt.show()
================================================
FILE: machine_learning/loss_functions.py
================================================
import numpy as np
def binary_cross_entropy(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate the mean binary cross-entropy (BCE) loss between true labels and predicted
probabilities.
BCE loss quantifies dissimilarity between true labels (0 or 1) and predicted
probabilities. It's widely used in binary classification tasks.
BCE = -Σ(y_true * ln(y_pred) + (1 - y_true) * ln(1 - y_pred))
Reference: https://en.wikipedia.org/wiki/Cross_entropy
Parameters:
- y_true: True binary labels (0 or 1)
- y_pred: Predicted probabilities for class 1
- epsilon: Small constant to avoid numerical instability
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
>>> float(binary_cross_entropy(true_labels, predicted_probs))
0.2529995012327421
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> binary_cross_entropy(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
y_pred = np.clip(y_pred, epsilon, 1 - epsilon) # Clip predictions to avoid log(0)
bce_loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
return np.mean(bce_loss)
def binary_focal_cross_entropy(
y_true: np.ndarray,
y_pred: np.ndarray,
gamma: float = 2.0,
alpha: float = 0.25,
epsilon: float = 1e-15,
) -> float:
"""
Calculate the mean binary focal cross-entropy (BFCE) loss between true labels
and predicted probabilities.
BFCE loss quantifies dissimilarity between true labels (0 or 1) and predicted
probabilities. It's a variation of binary cross-entropy that addresses class
imbalance by focusing on hard examples.
BCFE = -Σ(alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
+ (1 - alpha) * y_pred**gamma * (1 - y_true) * log(1 - y_pred))
Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
Parameters:
- y_true: True binary labels (0 or 1).
- y_pred: Predicted probabilities for class 1.
- gamma: Focusing parameter for modulating the loss (default: 2.0).
- alpha: Weighting factor for class 1 (default: 0.25).
- epsilon: Small constant to avoid numerical instability.
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
>>> float(binary_focal_cross_entropy(true_labels, predicted_probs))
0.008257977659239775
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> binary_focal_cross_entropy(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
# Clip predicted probabilities to avoid log(0)
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
bcfe_loss = -(
alpha * (1 - y_pred) ** gamma * y_true * np.log(y_pred)
+ (1 - alpha) * y_pred**gamma * (1 - y_true) * np.log(1 - y_pred)
)
return np.mean(bcfe_loss)
def categorical_cross_entropy(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate categorical cross-entropy (CCE) loss between true class labels and
predicted class probabilities.
CCE = -Σ(y_true * ln(y_pred))
Reference: https://en.wikipedia.org/wiki/Cross_entropy
Parameters:
- y_true: True class labels (one-hot encoded)
- y_pred: Predicted class probabilities
- epsilon: Small constant to avoid numerical instability
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
>>> float(categorical_cross_entropy(true_labels, pred_probs))
0.567395975254385
>>> true_labels = np.array([[1, 0], [0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same shape.
>>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Predicted probabilities must sum to approximately 1.
"""
if y_true.shape != y_pred.shape:
raise ValueError("Input arrays must have the same shape.")
if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
raise ValueError("y_true must be one-hot encoded.")
if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
raise ValueError("Predicted probabilities must sum to approximately 1.")
y_pred = np.clip(y_pred, epsilon, 1) # Clip predictions to avoid log(0)
return -np.sum(y_true * np.log(y_pred))
def categorical_focal_cross_entropy(
y_true: np.ndarray,
y_pred: np.ndarray,
alpha: np.ndarray = None,
gamma: float = 2.0,
epsilon: float = 1e-15,
) -> float:
"""
Calculate the mean categorical focal cross-entropy (CFCE) loss between true
labels and predicted probabilities for multi-class classification.
CFCE loss is a generalization of binary focal cross-entropy for multi-class
classification. It addresses class imbalance by focusing on hard examples.
CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
Parameters:
- y_true: True labels in one-hot encoded form.
- y_pred: Predicted probabilities for each class.
- alpha: Array of weighting factors for each class.
- gamma: Focusing parameter for modulating the loss (default: 2.0).
- epsilon: Small constant to avoid numerical instability.
Returns:
- The mean categorical focal cross-entropy loss.
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
>>> alpha = np.array([0.6, 0.2, 0.7])
>>> float(categorical_focal_cross_entropy(true_labels, pred_probs, alpha))
0.0025966118981496423
>>> true_labels = np.array([[0, 1, 0], [0, 0, 1]])
>>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
>>> alpha = np.array([0.25, 0.25, 0.25])
>>> float(categorical_focal_cross_entropy(true_labels, pred_probs, alpha))
0.23315276982014324
>>> true_labels = np.array([[1, 0], [0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same shape.
>>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Predicted probabilities must sum to approximately 1.
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
>>> alpha = np.array([0.6, 0.2])
>>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
Traceback (most recent call last):
...
ValueError: Length of alpha must match the number of classes.
"""
if y_true.shape != y_pred.shape:
raise ValueError("Shape of y_true and y_pred must be the same.")
if alpha is None:
alpha = np.ones(y_true.shape[1])
if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
raise ValueError("y_true must be one-hot encoded.")
if len(alpha) != y_true.shape[1]:
raise ValueError("Length of alpha must match the number of classes.")
if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
raise ValueError("Predicted probabilities must sum to approximately 1.")
# Clip predicted probabilities to avoid log(0)
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
# Calculate loss for each class and sum across classes
cfce_loss = -np.sum(
alpha * np.power(1 - y_pred, gamma) * y_true * np.log(y_pred), axis=1
)
return np.mean(cfce_loss)
def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean hinge loss for between true labels and predicted probabilities
for training support vector machines (SVMs).
Hinge loss = max(0, 1 - true * pred)
Reference: https://en.wikipedia.org/wiki/Hinge_loss
Args:
- y_true: actual values (ground truth) encoded as -1 or 1
- y_pred: predicted values
>>> true_labels = np.array([-1, 1, 1, -1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> float(hinge_loss(true_labels, pred))
1.52
>>> true_labels = np.array([-1, 1, 1, -1, 1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> hinge_loss(true_labels, pred)
Traceback (most recent call last):
...
ValueError: Length of predicted and actual array must be same.
>>> true_labels = np.array([-1, 1, 10, -1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> hinge_loss(true_labels, pred)
Traceback (most recent call last):
...
ValueError: y_true can have values -1 or 1 only.
"""
if len(y_true) != len(y_pred):
raise ValueError("Length of predicted and actual array must be same.")
if np.any((y_true != -1) & (y_true != 1)):
raise ValueError("y_true can have values -1 or 1 only.")
hinge_losses = np.maximum(0, 1.0 - (y_true * y_pred))
return np.mean(hinge_losses)
def huber_loss(y_true: np.ndarray, y_pred: np.ndarray, delta: float) -> float:
"""
Calculate the mean Huber loss between the given ground truth and predicted values.
The Huber loss describes the penalty incurred by an estimation procedure, and it
serves as a measure of accuracy for regression models.
Huber loss =
0.5 * (y_true - y_pred)^2 if |y_true - y_pred| <= delta
delta * |y_true - y_pred| - 0.5 * delta^2 otherwise
Reference: https://en.wikipedia.org/wiki/Huber_loss
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([0.9, 10.0, 2.0, 1.0, 5.2])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> bool(np.isclose(huber_loss(true_values, predicted_values, 1.0), 2.102))
True
>>> true_labels = np.array([11.0, 21.0, 3.32, 4.0, 5.0])
>>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
>>> bool(np.isclose(huber_loss(true_labels, predicted_probs, 1.0), 1.80164))
True
>>> true_labels = np.array([11.0, 21.0, 3.32, 4.0])
>>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
>>> huber_loss(true_labels, predicted_probs, 1.0)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
huber_mse = 0.5 * (y_true - y_pred) ** 2
huber_mae = delta * (np.abs(y_true - y_pred) - 0.5 * delta)
return np.where(np.abs(y_true - y_pred) <= delta, huber_mse, huber_mae).mean()
def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean squared error (MSE) between ground truth and predicted values.
MSE measures the squared difference between true values and predicted values, and it
serves as a measure of accuracy for regression models.
MSE = (1/n) * Σ(y_true - y_pred)^2
Reference: https://en.wikipedia.org/wiki/Mean_squared_error
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> bool(np.isclose(mean_squared_error(true_values, predicted_values), 0.028))
True
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> mean_squared_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
squared_errors = (y_true - y_pred) ** 2
return np.mean(squared_errors)
def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculates the Mean Absolute Error (MAE) between ground truth (observed)
and predicted values.
MAE measures the absolute difference between true values and predicted values.
Equation:
MAE = (1/n) * Σ(abs(y_true - y_pred))
Reference: https://en.wikipedia.org/wiki/Mean_absolute_error
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> bool(np.isclose(mean_absolute_error(true_values, predicted_values), 0.16))
True
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> bool(np.isclose(mean_absolute_error(true_values, predicted_values), 2.16))
False
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 5.2])
>>> mean_absolute_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
return np.mean(abs(y_true - y_pred))
def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean squared logarithmic error (MSLE) between ground truth and
predicted values.
MSLE measures the squared logarithmic difference between true values and predicted
values for regression models. It's particularly useful for dealing with skewed or
large-value data, and it's often used when the relative differences between
predicted and true values are more important than absolute differences.
MSLE = (1/n) * Σ(log(1 + y_true) - log(1 + y_pred))^2
Reference: https://insideaiml.com/blog/MeanSquared-Logarithmic-Error-Loss-1035
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> float(mean_squared_logarithmic_error(true_values, predicted_values))
0.0030860877925181344
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> mean_squared_logarithmic_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
squared_logarithmic_errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
return np.mean(squared_logarithmic_errors)
def mean_absolute_percentage_error(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate the Mean Absolute Percentage Error between y_true and y_pred.
Mean Absolute Percentage Error calculates the average of the absolute
percentage differences between the predicted and true values.
Formula = (Σ|y_true[i]-Y_pred[i]/y_true[i]|)/n
Source: https://stephenallwright.com/good-mape-score/
Parameters:
y_true (np.ndarray): Numpy array containing true/target values.
y_pred (np.ndarray): Numpy array containing predicted values.
Returns:
float: The Mean Absolute Percentage error between y_true and y_pred.
Examples:
>>> y_true = np.array([10, 20, 30, 40])
>>> y_pred = np.array([12, 18, 33, 45])
>>> float(mean_absolute_percentage_error(y_true, y_pred))
0.13125
>>> y_true = np.array([1, 2, 3, 4])
>>> y_pred = np.array([2, 3, 4, 5])
>>> float(mean_absolute_percentage_error(y_true, y_pred))
0.5208333333333333
>>> y_true = np.array([34, 37, 44, 47, 48, 48, 46, 43, 32, 27, 26, 24])
>>> y_pred = np.array([37, 40, 46, 44, 46, 50, 45, 44, 34, 30, 22, 23])
>>> float(mean_absolute_percentage_error(y_true, y_pred))
0.064671076436071
"""
if len(y_true) != len(y_pred):
raise ValueError("The length of the two arrays should be the same.")
y_true = np.where(y_true == 0, epsilon, y_true)
absolute_percentage_diff = np.abs((y_true - y_pred) / y_true)
return np.mean(absolute_percentage_diff)
def perplexity_loss(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-7
) -> float:
"""
Calculate the perplexity for the y_true and y_pred.
Compute the Perplexity which useful in predicting language model
accuracy in Natural Language Processing (NLP.)
Perplexity is measure of how certain the model in its predictions.
Perplexity Loss = exp(-1/N (Σ ln(p(x)))
Reference:
https://en.wikipedia.org/wiki/Perplexity
Args:
y_true: Actual label encoded sentences of shape (batch_size, sentence_length)
y_pred: Predicted sentences of shape (batch_size, sentence_length, vocab_size)
epsilon: Small floating point number to avoid getting inf for log(0)
Returns:
Perplexity loss between y_true and y_pred.
>>> y_true = np.array([[1, 4], [2, 3]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> float(perplexity_loss(y_true, y_pred))
5.0247347775367945
>>> y_true = np.array([[1, 4], [2, 3]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27],
... [0.30, 0.10, 0.20, 0.15, 0.25]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12],
... [0.30, 0.10, 0.20, 0.15, 0.25]],]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Sentence length of y_true and y_pred must be equal.
>>> y_true = np.array([[1, 4], [2, 11]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Label value must not be greater than vocabulary size.
>>> y_true = np.array([[1, 4]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Batch size of y_true and y_pred must be equal.
"""
vocab_size = y_pred.shape[2]
if y_true.shape[0] != y_pred.shape[0]:
raise ValueError("Batch size of y_true and y_pred must be equal.")
if y_true.shape[1] != y_pred.shape[1]:
raise ValueError("Sentence length of y_true and y_pred must be equal.")
if np.max(y_true) > vocab_size:
raise ValueError("Label value must not be greater than vocabulary size.")
# Matrix to select prediction value only for true class
filter_matrix = np.array(
[[list(np.eye(vocab_size)[word]) for word in sentence] for sentence in y_true]
)
# Getting the matrix containing prediction for only true class
true_class_pred = np.sum(y_pred * filter_matrix, axis=2).clip(epsilon, 1)
# Calculating perplexity for each sentence
perp_losses = np.exp(np.negative(np.mean(np.log(true_class_pred), axis=1)))
return np.mean(perp_losses)
def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> float:
"""
Calculate the Smooth L1 Loss between y_true and y_pred.
The Smooth L1 Loss is less sensitive to outliers than the L2 Loss and is often used
in regression problems, such as object detection.
Smooth L1 Loss =
0.5 * (x - y)^2 / beta, if |x - y| < beta
|x - y| - 0.5 * beta, otherwise
Reference:
https://pytorch.org/docs/stable/generated/torch.nn.SmoothL1Loss.html
Args:
y_true: Array of true values.
y_pred: Array of predicted values.
beta: Specifies the threshold at which to change between L1 and L2 loss.
Returns:
The calculated Smooth L1 Loss between y_true and y_pred.
Raises:
ValueError: If the length of the two arrays is not the same.
>>> y_true = np.array([3, 5, 2, 7])
>>> y_pred = np.array([2.9, 4.8, 2.1, 7.2])
>>> float(smooth_l1_loss(y_true, y_pred, 1.0))
0.012500000000000022
>>> y_true = np.array([2, 4, 6])
>>> y_pred = np.array([1, 5, 7])
>>> float(smooth_l1_loss(y_true, y_pred, 1.0))
0.5
>>> y_true = np.array([1, 3, 5, 7])
>>> y_pred = np.array([1, 3, 5, 7])
>>> float(smooth_l1_loss(y_true, y_pred, 1.0))
0.0
>>> y_true = np.array([1, 3, 5])
>>> y_pred = np.array([1, 3, 5, 7])
>>> smooth_l1_loss(y_true, y_pred, 1.0)
Traceback (most recent call last):
...
ValueError: The length of the two arrays should be the same.
"""
if len(y_true) != len(y_pred):
raise ValueError("The length of the two arrays should be the same.")
diff = np.abs(y_true - y_pred)
loss = np.where(diff < beta, 0.5 * diff**2 / beta, diff - 0.5 * beta)
return np.mean(loss)
def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
and predicted probabilities.
KL divergence loss quantifies dissimilarity between true labels and predicted
probabilities. It's often used in training generative models.
KL = Σ(y_true * ln(y_true / y_pred))
Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
Parameters:
- y_true: True class probabilities
- y_pred: Predicted class probabilities
>>> true_labels = np.array([0.2, 0.3, 0.5])
>>> predicted_probs = np.array([0.3, 0.3, 0.4])
>>> float(kullback_leibler_divergence(true_labels, predicted_probs))
0.030478754035472025
>>> true_labels = np.array([0.2, 0.3, 0.5])
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
>>> kullback_leibler_divergence(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
kl_loss = y_true * np.log(y_true / y_pred)
return np.sum(kl_loss)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/lstm/__init__.py
================================================
================================================
FILE: machine_learning/lstm/lstm_prediction.py
================================================
"""
Create a Long Short Term Memory (LSTM) network model
An LSTM is a type of Recurrent Neural Network (RNN) as discussed at:
* https://colah.github.io/posts/2015-08-Understanding-LSTMs
* https://en.wikipedia.org/wiki/Long_short-term_memory
"""
import numpy as np
import pandas as pd
from keras.layers import LSTM, Dense
from keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
if __name__ == "__main__":
"""
First part of building a model is to get the data and prepare
it for our model. You can use any dataset for stock prediction
make sure you set the price column on line number 21. Here we
use a dataset which have the price on 3rd column.
"""
sample_data = pd.read_csv("sample_data.csv", header=None)
len_data = sample_data.shape[:1][0]
# If you're using some other dataset input the target column
actual_data = sample_data.iloc[:, 1:2]
actual_data = actual_data.to_numpy().reshape(len_data, 1)
actual_data = MinMaxScaler().fit_transform(actual_data)
look_back = 10
forward_days = 5
periods = 20
division = len_data - periods * look_back
train_data = actual_data[:division]
test_data = actual_data[division - look_back :]
train_x, train_y = [], []
test_x, test_y = [], []
for i in range(len(train_data) - forward_days - look_back + 1):
train_x.append(train_data[i : i + look_back])
train_y.append(train_data[i + look_back : i + look_back + forward_days])
for i in range(len(test_data) - forward_days - look_back + 1):
test_x.append(test_data[i : i + look_back])
test_y.append(test_data[i + look_back : i + look_back + forward_days])
x_train = np.array(train_x)
x_test = np.array(test_x)
y_train = np.array([list(i.ravel()) for i in train_y])
y_test = np.array([list(i.ravel()) for i in test_y])
model = Sequential()
model.add(LSTM(128, input_shape=(look_back, 1), return_sequences=True))
model.add(LSTM(64, input_shape=(128, 1)))
model.add(Dense(forward_days))
model.compile(loss="mean_squared_error", optimizer="adam")
history = model.fit(
x_train, y_train, epochs=150, verbose=1, shuffle=True, batch_size=4
)
pred = model.predict(x_test)
================================================
FILE: machine_learning/lstm/sample_data.csv
================================================
04/24/2020, 1279.31, 1640394, 1261.17, 1280.4, 1249.45
04/23/2020, 1276.31, 1566203, 1271.55, 1293.31, 1265.67
04/22/2020, 1263.21, 2093140, 1245.54, 1285.6133, 1242
04/21/2020, 1216.34, 2153003, 1247, 1254.27, 1209.71
04/20/2020, 1266.61, 1695488, 1271, 1281.6, 1261.37
04/17/2020, 1283.25, 1949042, 1284.85, 1294.43, 1271.23
04/16/2020, 1263.47, 2518099, 1274.1, 1279, 1242.62
04/15/2020, 1262.47, 1671703, 1245.61, 1280.46, 1240.4
04/14/2020, 1269.23, 2470353, 1245.09, 1282.07, 1236.93
04/13/2020, 1217.56, 1739828, 1209.18, 1220.51, 1187.5984
04/09/2020, 1211.45, 2175421, 1224.08, 1225.57, 1196.7351
04/08/2020, 1210.28, 1975135, 1206.5, 1219.07, 1188.16
04/07/2020, 1186.51, 2387329, 1221, 1225, 1182.23
04/06/2020, 1186.92, 2664723, 1138, 1194.66, 1130.94
04/03/2020, 1097.88, 2313400, 1119.015, 1123.54, 1079.81
04/02/2020, 1120.84, 1964881, 1098.26, 1126.86, 1096.4
04/01/2020, 1105.62, 2344173, 1122, 1129.69, 1097.45
03/31/2020, 1162.81, 2487983, 1147.3, 1175.31, 1138.14
03/30/2020, 1146.82, 2574061, 1125.04, 1151.63, 1096.48
03/27/2020, 1110.71, 3208495, 1125.67, 1150.6702, 1105.91
03/26/2020, 1161.75, 3573755, 1111.8, 1169.97, 1093.53
03/25/2020, 1102.49, 4081528, 1126.47, 1148.9, 1086.01
03/24/2020, 1134.46, 3344450, 1103.77, 1135, 1090.62
03/23/2020, 1056.62, 4044137, 1061.32, 1071.32, 1013.5361
03/20/2020, 1072.32, 3601750, 1135.72, 1143.99, 1065.49
03/19/2020, 1115.29, 3651106, 1093.05, 1157.9699, 1060.1075
03/18/2020, 1096.8, 4233435, 1056.51, 1106.5, 1037.28
03/17/2020, 1119.8, 3861489, 1093.11, 1130.86, 1056.01
03/16/2020, 1084.33, 4252365, 1096, 1152.2665, 1074.44
03/13/2020, 1219.73, 3700125, 1179, 1219.76, 1117.1432
03/12/2020, 1114.91, 4226748, 1126, 1193.87, 1113.3
03/11/2020, 1215.41, 2611229, 1249.7, 1260.96, 1196.07
03/10/2020, 1280.39, 2611373, 1260, 1281.15, 1218.77
03/09/2020, 1215.56, 3365365, 1205.3, 1254.7599, 1200
03/06/2020, 1298.41, 2660628, 1277.06, 1306.22, 1261.05
03/05/2020, 1319.04, 2561288, 1350.2, 1358.91, 1305.1
03/04/2020, 1386.52, 1913315, 1359.23, 1388.09, 1343.11
03/03/2020, 1341.39, 2402326, 1399.42, 1410.15, 1332
03/02/2020, 1389.11, 2431468, 1351.61, 1390.87, 1326.815
02/28/2020, 1339.33, 3790618, 1277.5, 1341.14, 1271
02/27/2020, 1318.09, 2978300, 1362.06, 1371.7037, 1317.17
02/26/2020, 1393.18, 2204037, 1396.14, 1415.7, 1379
02/25/2020, 1388.45, 2478278, 1433, 1438.14, 1382.4
02/24/2020, 1421.59, 2867053, 1426.11, 1436.97, 1411.39
02/21/2020, 1485.11, 1732273, 1508.03, 1512.215, 1480.44
02/20/2020, 1518.15, 1096552, 1522, 1529.64, 1506.82
02/19/2020, 1526.69, 949268, 1525.07, 1532.1063, 1521.4
02/18/2020, 1519.67, 1121140, 1515, 1531.63, 1512.59
02/14/2020, 1520.74, 1197836, 1515.6, 1520.74, 1507.34
02/13/2020, 1514.66, 929730, 1512.69, 1527.18, 1504.6
02/12/2020, 1518.27, 1167565, 1514.48, 1520.695, 1508.11
02/11/2020, 1508.79, 1344633, 1511.81, 1529.63, 1505.6378
02/10/2020, 1508.68, 1419876, 1474.32, 1509.5, 1474.32
02/07/2020, 1479.23, 1172270, 1467.3, 1485.84, 1466.35
02/06/2020, 1476.23, 1679384, 1450.33, 1481.9997, 1449.57
02/05/2020, 1448.23, 1986157, 1462.42, 1463.84, 1430.56
02/04/2020, 1447.07, 3932954, 1457.07, 1469.5, 1426.3
02/03/2020, 1485.94, 3055216, 1462, 1490, 1458.99
01/31/2020, 1434.23, 2417214, 1468.9, 1470.13, 1428.53
01/30/2020, 1455.84, 1339421, 1439.96, 1457.28, 1436.4
01/29/2020, 1458.63, 1078667, 1458.8, 1465.43, 1446.74
01/28/2020, 1452.56, 1577422, 1443, 1456, 1432.47
01/27/2020, 1433.9, 1755201, 1431, 1438.07, 1421.2
01/24/2020, 1466.71, 1784644, 1493.59, 1495.495, 1465.25
01/23/2020, 1486.65, 1351354, 1487.64, 1495.52, 1482.1
01/22/2020, 1485.95, 1610846, 1491, 1503.2143, 1484.93
01/21/2020, 1484.4, 2036780, 1479.12, 1491.85, 1471.2
01/17/2020, 1480.39, 2396215, 1462.91, 1481.2954, 1458.22
01/16/2020, 1451.7, 1173688, 1447.44, 1451.99, 1440.92
01/15/2020, 1439.2, 1282685, 1430.21, 1441.395, 1430.21
01/14/2020, 1430.88, 1560453, 1439.01, 1441.8, 1428.37
01/13/2020, 1439.23, 1653482, 1436.13, 1440.52, 1426.02
01/10/2020, 1429.73, 1821566, 1427.56, 1434.9292, 1418.35
01/09/2020, 1419.83, 1502664, 1420.57, 1427.33, 1410.27
01/08/2020, 1404.32, 1529177, 1392.08, 1411.58, 1390.84
01/07/2020, 1393.34, 1511693, 1397.94, 1402.99, 1390.38
01/06/2020, 1394.21, 1733149, 1350, 1396.5, 1350
01/03/2020, 1360.66, 1187006, 1347.86, 1372.5, 1345.5436
01/02/2020, 1367.37, 1406731, 1341.55, 1368.14, 1341.55
12/31/2019, 1337.02, 962468, 1330.11, 1338, 1329.085
12/30/2019, 1336.14, 1051323, 1350, 1353, 1334.02
12/27/2019, 1351.89, 1038718, 1362.99, 1364.53, 1349.31
12/26/2019, 1360.4, 667754, 1346.17, 1361.3269, 1344.47
12/24/2019, 1343.56, 347518, 1348.5, 1350.26, 1342.78
12/23/2019, 1348.84, 883200, 1355.87, 1359.7999, 1346.51
12/20/2019, 1349.59, 3316905, 1363.35, 1363.64, 1349
12/19/2019, 1356.04, 1470112, 1351.82, 1358.1, 1348.985
12/18/2019, 1352.62, 1657069, 1356.6, 1360.47, 1351
12/17/2019, 1355.12, 1855259, 1362.89, 1365, 1351.3231
12/16/2019, 1361.17, 1397451, 1356.5, 1364.68, 1352.67
12/13/2019, 1347.83, 1550028, 1347.95, 1353.0931, 1343.87
12/12/2019, 1350.27, 1281722, 1345.94, 1355.775, 1340.5
12/11/2019, 1345.02, 850796, 1350.84, 1351.2, 1342.67
12/10/2019, 1344.66, 1094653, 1341.5, 1349.975, 1336.04
12/09/2019, 1343.56, 1355795, 1338.04, 1359.45, 1337.84
12/06/2019, 1340.62, 1315510, 1333.44, 1344, 1333.44
12/05/2019, 1328.13, 1212818, 1328, 1329.3579, 1316.44
12/04/2019, 1320.54, 1538110, 1307.01, 1325.8, 1304.87
12/03/2019, 1295.28, 1268647, 1279.57, 1298.461, 1279
12/02/2019, 1289.92, 1511851, 1301, 1305.83, 1281
11/29/2019, 1304.96, 586981, 1307.12, 1310.205, 1303.97
11/27/2019, 1312.99, 996329, 1315, 1318.36, 1309.63
11/26/2019, 1313.55, 1069795, 1309.86, 1314.8, 1305.09
11/25/2019, 1306.69, 1036487, 1299.18, 1311.31, 1298.13
11/22/2019, 1295.34, 1386506, 1305.62, 1308.73, 1291.41
11/21/2019, 1301.35, 995499, 1301.48, 1312.59, 1293
11/20/2019, 1303.05, 1309835, 1311.74, 1315, 1291.15
11/19/2019, 1315.46, 1269372, 1327.7, 1327.7, 1312.8
11/18/2019, 1320.7, 1488083, 1332.22, 1335.5288, 1317.5
11/15/2019, 1334.87, 1782955, 1318.94, 1334.88, 1314.2796
11/14/2019, 1311.46, 1194305, 1297.5, 1317, 1295.65
11/13/2019, 1298, 853861, 1294.07, 1304.3, 1293.51
11/12/2019, 1298.8, 1085859, 1300, 1310, 1295.77
11/11/2019, 1299.19, 1012429, 1303.18, 1306.425, 1297.41
11/08/2019, 1311.37, 1251916, 1305.28, 1318, 1304.365
11/07/2019, 1308.86, 2029970, 1294.28, 1323.74, 1294.245
11/06/2019, 1291.8, 1152977, 1289.46, 1293.73, 1282.5
11/05/2019, 1292.03, 1282711, 1292.89, 1298.93, 1291.2289
11/04/2019, 1291.37, 1500964, 1276.45, 1294.13, 1276.355
11/01/2019, 1273.74, 1670072, 1265, 1274.62, 1260.5
10/31/2019, 1260.11, 1455651, 1261.28, 1267.67, 1250.8428
10/30/2019, 1261.29, 1408851, 1252.97, 1269.36, 1252
10/29/2019, 1262.62, 1886380, 1276.23, 1281.59, 1257.2119
10/28/2019, 1290, 2613237, 1275.45, 1299.31, 1272.54
10/25/2019, 1265.13, 1213051, 1251.03, 1269.6, 1250.01
10/24/2019, 1260.99, 1039868, 1260.9, 1264, 1253.715
10/23/2019, 1259.13, 928595, 1242.36, 1259.89, 1242.36
10/22/2019, 1242.8, 1047851, 1247.85, 1250.6, 1241.38
10/21/2019, 1246.15, 1038042, 1252.26, 1254.6287, 1240.6
10/18/2019, 1245.49, 1352839, 1253.46, 1258.89, 1241.08
10/17/2019, 1253.07, 980510, 1250.93, 1263.325, 1249.94
10/16/2019, 1243.64, 1168174, 1241.17, 1254.74, 1238.45
10/15/2019, 1243.01, 1395259, 1220.4, 1247.33, 1220.4
10/14/2019, 1217.14, 882039, 1212.34, 1226.33, 1211.76
10/11/2019, 1215.45, 1277144, 1222.21, 1228.39, 1213.74
10/10/2019, 1208.67, 932531, 1198.58, 1215, 1197.34
10/09/2019, 1202.31, 876632, 1199.35, 1208.35, 1197.63
10/08/2019, 1189.13, 1141784, 1197.59, 1206.08, 1189.01
10/07/2019, 1207.68, 867149, 1204.4, 1218.2036, 1203.75
10/04/2019, 1209, 1183264, 1191.89, 1211.44, 1189.17
10/03/2019, 1187.83, 1663656, 1180, 1189.06, 1162.43
10/02/2019, 1176.63, 1639237, 1196.98, 1196.99, 1171.29
10/01/2019, 1205.1, 1358279, 1219, 1231.23, 1203.58
09/30/2019, 1219, 1419676, 1220.97, 1226, 1212.3
09/27/2019, 1225.09, 1354432, 1243.01, 1244.02, 1214.45
09/26/2019, 1241.39, 1561882, 1241.96, 1245, 1232.268
09/25/2019, 1246.52, 1593875, 1215.82, 1248.3, 1210.09
09/24/2019, 1218.76, 1591786, 1240, 1246.74, 1210.68
09/23/2019, 1234.03, 1075253, 1226, 1239.09, 1224.17
09/20/2019, 1229.93, 2337269, 1233.12, 1243.32, 1223.08
09/19/2019, 1238.71, 1000155, 1232.06, 1244.44, 1232.02
09/18/2019, 1232.41, 1144333, 1227.51, 1235.61, 1216.53
09/17/2019, 1229.15, 958112, 1230.4, 1235, 1223.69
09/16/2019, 1231.3, 1053299, 1229.52, 1239.56, 1225.61
09/13/2019, 1239.56, 1301350, 1231.35, 1240.88, 1227.01
09/12/2019, 1234.25, 1725908, 1224.3, 1241.86, 1223.02
09/11/2019, 1220.17, 1307033, 1203.41, 1222.6, 1202.2
09/10/2019, 1206, 1260115, 1195.15, 1210, 1194.58
09/09/2019, 1204.41, 1471880, 1204, 1220, 1192.62
09/06/2019, 1204.93, 1072143, 1208.13, 1212.015, 1202.5222
09/05/2019, 1211.38, 1408601, 1191.53, 1213.04, 1191.53
09/04/2019, 1181.41, 1068968, 1176.71, 1183.48, 1171
09/03/2019, 1168.39, 1480420, 1177.03, 1186.89, 1163.2
08/30/2019, 1188.1, 1129959, 1198.5, 1198.5, 1183.8026
08/29/2019, 1192.85, 1088858, 1181.12, 1196.06, 1181.12
08/28/2019, 1171.02, 802243, 1161.71, 1176.4199, 1157.3
08/27/2019, 1167.84, 1077452, 1180.53, 1182.4, 1161.45
08/26/2019, 1168.89, 1226441, 1157.26, 1169.47, 1152.96
08/23/2019, 1151.29, 1688271, 1181.99, 1194.08, 1147.75
08/22/2019, 1189.53, 947906, 1194.07, 1198.0115, 1178.58
08/21/2019, 1191.25, 741053, 1193.15, 1199, 1187.43
08/20/2019, 1182.69, 915605, 1195.25, 1196.06, 1182.11
08/19/2019, 1198.45, 1232517, 1190.09, 1206.99, 1190.09
08/16/2019, 1177.6, 1349436, 1179.55, 1182.72, 1171.81
08/15/2019, 1167.26, 1224739, 1163.5, 1175.84, 1162.11
08/14/2019, 1164.29, 1578668, 1176.31, 1182.3, 1160.54
08/13/2019, 1197.27, 1318009, 1171.46, 1204.78, 1171.46
08/12/2019, 1174.71, 1003187, 1179.21, 1184.96, 1167.6723
08/09/2019, 1188.01, 1065658, 1197.99, 1203.88, 1183.603
08/08/2019, 1204.8, 1467997, 1182.83, 1205.01, 1173.02
08/07/2019, 1173.99, 1444324, 1156, 1178.4451, 1149.6239
08/06/2019, 1169.95, 1709374, 1163.31, 1179.96, 1160
08/05/2019, 1152.32, 2597455, 1170.04, 1175.24, 1140.14
08/02/2019, 1193.99, 1645067, 1200.74, 1206.9, 1188.94
08/01/2019, 1209.01, 1698510, 1214.03, 1234.11, 1205.72
07/31/2019, 1216.68, 1725454, 1223, 1234, 1207.7635
07/30/2019, 1225.14, 1453263, 1225.41, 1234.87, 1223.3
07/29/2019, 1239.41, 2223731, 1241.05, 1247.37, 1228.23
07/26/2019, 1250.41, 4805752, 1224.04, 1265.5499, 1224
07/25/2019, 1132.12, 2209823, 1137.82, 1141.7, 1120.92
07/24/2019, 1137.81, 1590101, 1131.9, 1144, 1126.99
07/23/2019, 1146.21, 1093688, 1144, 1146.9, 1131.8
07/22/2019, 1138.07, 1301846, 1133.45, 1139.25, 1124.24
07/19/2019, 1130.1, 1647245, 1148.19, 1151.14, 1129.62
07/18/2019, 1146.33, 1291281, 1141.74, 1147.605, 1132.73
07/17/2019, 1146.35, 1170047, 1150.97, 1158.36, 1145.77
07/16/2019, 1153.58, 1238807, 1146, 1158.58, 1145
07/15/2019, 1150.34, 903780, 1146.86, 1150.82, 1139.4
07/12/2019, 1144.9, 863973, 1143.99, 1147.34, 1138.78
07/11/2019, 1144.21, 1195569, 1143.25, 1153.07, 1139.58
07/10/2019, 1140.48, 1209466, 1131.22, 1142.05, 1130.97
07/09/2019, 1124.83, 1330370, 1111.8, 1128.025, 1107.17
07/08/2019, 1116.35, 1236419, 1125.17, 1125.98, 1111.21
07/05/2019, 1131.59, 1264540, 1117.8, 1132.88, 1116.14
07/03/2019, 1121.58, 767011, 1117.41, 1126.76, 1113.86
07/02/2019, 1111.25, 991755, 1102.24, 1111.77, 1098.17
07/01/2019, 1097.95, 1438504, 1098, 1107.58, 1093.703
06/28/2019, 1080.91, 1693450, 1076.39, 1081, 1073.37
06/27/2019, 1076.01, 1004477, 1084, 1087.1, 1075.29
06/26/2019, 1079.8, 1810869, 1086.5, 1092.97, 1072.24
06/25/2019, 1086.35, 1546913, 1112.66, 1114.35, 1083.8
06/24/2019, 1115.52, 1395696, 1119.61, 1122, 1111.01
06/21/2019, 1121.88, 1947591, 1109.24, 1124.11, 1108.08
06/20/2019, 1111.42, 1262011, 1119.99, 1120.12, 1104.74
06/19/2019, 1102.33, 1339218, 1105.6, 1107, 1093.48
06/18/2019, 1103.6, 1386684, 1109.69, 1116.39, 1098.99
06/17/2019, 1092.5, 941602, 1086.28, 1099.18, 1086.28
06/14/2019, 1085.35, 1111643, 1086.42, 1092.69, 1080.1721
06/13/2019, 1088.77, 1058000, 1083.64, 1094.17, 1080.15
06/12/2019, 1077.03, 1061255, 1078, 1080.93, 1067.54
06/11/2019, 1078.72, 1437063, 1093.98, 1101.99, 1077.6025
06/10/2019, 1080.38, 1464248, 1072.98, 1092.66, 1072.3216
06/07/2019, 1066.04, 1802370, 1050.63, 1070.92, 1048.4
06/06/2019, 1044.34, 1703244, 1044.99, 1047.49, 1033.7
06/05/2019, 1042.22, 2168439, 1051.54, 1053.55, 1030.49
06/04/2019, 1053.05, 2833483, 1042.9, 1056.05, 1033.69
06/03/2019, 1036.23, 5130576, 1065.5, 1065.5, 1025
05/31/2019, 1103.63, 1508203, 1101.29, 1109.6, 1100.18
05/30/2019, 1117.95, 951873, 1115.54, 1123.13, 1112.12
05/29/2019, 1116.46, 1538212, 1127.52, 1129.1, 1108.2201
05/28/2019, 1134.15, 1365166, 1134, 1151.5871, 1133.12
05/24/2019, 1133.47, 1112341, 1147.36, 1149.765, 1131.66
05/23/2019, 1140.77, 1199300, 1140.5, 1145.9725, 1129.224
05/22/2019, 1151.42, 914839, 1146.75, 1158.52, 1145.89
05/21/2019, 1149.63, 1160158, 1148.49, 1152.7077, 1137.94
05/20/2019, 1138.85, 1353292, 1144.5, 1146.7967, 1131.4425
05/17/2019, 1162.3, 1208623, 1168.47, 1180.15, 1160.01
05/16/2019, 1178.98, 1531404, 1164.51, 1188.16, 1162.84
05/15/2019, 1164.21, 2289302, 1117.87, 1171.33, 1116.6657
05/14/2019, 1120.44, 1836604, 1137.21, 1140.42, 1119.55
05/13/2019, 1132.03, 1860648, 1141.96, 1147.94, 1122.11
05/10/2019, 1164.27, 1314546, 1163.59, 1172.6, 1142.5
05/09/2019, 1162.38, 1185973, 1159.03, 1169.66, 1150.85
05/08/2019, 1166.27, 1309514, 1172.01, 1180.4243, 1165.74
05/07/2019, 1174.1, 1551368, 1180.47, 1190.44, 1161.04
05/06/2019, 1189.39, 1563943, 1166.26, 1190.85, 1166.26
05/03/2019, 1185.4, 1980653, 1173.65, 1186.8, 1169
05/02/2019, 1162.61, 1944817, 1167.76, 1174.1895, 1155.0018
05/01/2019, 1168.08, 2642983, 1188.05, 1188.05, 1167.18
04/30/2019, 1188.48, 6194691, 1185, 1192.81, 1175
04/29/2019, 1287.58, 2412788, 1274, 1289.27, 1266.2949
04/26/2019, 1272.18, 1228276, 1269, 1273.07, 1260.32
04/25/2019, 1263.45, 1099614, 1264.77, 1267.4083, 1252.03
04/24/2019, 1256, 1015006, 1264.12, 1268.01, 1255
04/23/2019, 1264.55, 1271195, 1250.69, 1269, 1246.38
04/22/2019, 1248.84, 806577, 1235.99, 1249.09, 1228.31
04/18/2019, 1236.37, 1315676, 1239.18, 1242, 1234.61
04/17/2019, 1236.34, 1211866, 1233, 1240.56, 1227.82
04/16/2019, 1227.13, 855258, 1225, 1230.82, 1220.12
04/15/2019, 1221.1, 1187353, 1218, 1224.2, 1209.1101
04/12/2019, 1217.87, 926799, 1210, 1218.35, 1208.11
04/11/2019, 1204.62, 709417, 1203.96, 1207.96, 1200.13
04/10/2019, 1202.16, 724524, 1200.68, 1203.785, 1196.435
04/09/2019, 1197.25, 865416, 1196, 1202.29, 1193.08
04/08/2019, 1203.84, 859969, 1207.89, 1208.69, 1199.86
04/05/2019, 1207.15, 900950, 1214.99, 1216.22, 1205.03
04/04/2019, 1215, 949962, 1205.94, 1215.67, 1204.13
04/03/2019, 1205.92, 1014195, 1207.48, 1216.3, 1200.5
04/02/2019, 1200.49, 800820, 1195.32, 1201.35, 1185.71
04/01/2019, 1194.43, 1188235, 1184.1, 1196.66, 1182
03/29/2019, 1173.31, 1269573, 1174.9, 1178.99, 1162.88
03/28/2019, 1168.49, 966843, 1171.54, 1171.565, 1159.4312
03/27/2019, 1173.02, 1362217, 1185.5, 1187.559, 1159.37
03/26/2019, 1184.62, 1894639, 1198.53, 1202.83, 1176.72
03/25/2019, 1193, 1493841, 1196.93, 1206.3975, 1187.04
03/22/2019, 1205.5, 1668910, 1226.32, 1230, 1202.825
03/21/2019, 1231.54, 1195899, 1216, 1231.79, 1213.15
03/20/2019, 1223.97, 2089367, 1197.35, 1227.14, 1196.17
03/19/2019, 1198.85, 1404863, 1188.81, 1200, 1185.87
03/18/2019, 1184.26, 1212506, 1183.3, 1190, 1177.4211
03/15/2019, 1184.46, 2457597, 1193.38, 1196.57, 1182.61
03/14/2019, 1185.55, 1150950, 1194.51, 1197.88, 1184.48
03/13/2019, 1193.32, 1434816, 1200.645, 1200.93, 1191.94
03/12/2019, 1193.2, 2012306, 1178.26, 1200, 1178.26
03/11/2019, 1175.76, 1569332, 1144.45, 1176.19, 1144.45
03/08/2019, 1142.32, 1212271, 1126.73, 1147.08, 1123.3
03/07/2019, 1143.3, 1166076, 1155.72, 1156.755, 1134.91
03/06/2019, 1157.86, 1094100, 1162.49, 1167.5658, 1155.49
03/05/2019, 1162.03, 1422357, 1150.06, 1169.61, 1146.195
03/04/2019, 1147.8, 1444774, 1146.99, 1158.2804, 1130.69
03/01/2019, 1140.99, 1447454, 1124.9, 1142.97, 1124.75
02/28/2019, 1119.92, 1541068, 1111.3, 1127.65, 1111.01
02/27/2019, 1116.05, 968362, 1106.95, 1117.98, 1101
02/26/2019, 1115.13, 1469761, 1105.75, 1119.51, 1099.92
02/25/2019, 1109.4, 1395281, 1116, 1118.54, 1107.27
02/22/2019, 1110.37, 1048361, 1100.9, 1111.24, 1095.6
02/21/2019, 1096.97, 1414744, 1110.84, 1111.94, 1092.52
02/20/2019, 1113.8, 1080144, 1119.99, 1123.41, 1105.28
02/19/2019, 1118.56, 1046315, 1110, 1121.89, 1110
02/15/2019, 1113.65, 1442461, 1130.08, 1131.67, 1110.65
02/14/2019, 1121.67, 941678, 1118.05, 1128.23, 1110.445
02/13/2019, 1120.16, 1048630, 1124.99, 1134.73, 1118.5
02/12/2019, 1121.37, 1608658, 1106.8, 1125.295, 1105.85
02/11/2019, 1095.01, 1063825, 1096.95, 1105.945, 1092.86
02/08/2019, 1095.06, 1072031, 1087, 1098.91, 1086.55
02/07/2019, 1098.71, 2040615, 1104.16, 1104.84, 1086
02/06/2019, 1115.23, 2101674, 1139.57, 1147, 1112.77
02/05/2019, 1145.99, 3529974, 1124.84, 1146.85, 1117.248
02/04/2019, 1132.8, 2518184, 1112.66, 1132.8, 1109.02
02/01/2019, 1110.75, 1455609, 1112.4, 1125, 1104.89
01/31/2019, 1116.37, 1531463, 1103, 1117.33, 1095.41
01/30/2019, 1089.06, 1241760, 1068.43, 1091, 1066.85
01/29/2019, 1060.62, 1006731, 1072.68, 1075.15, 1055.8647
01/28/2019, 1070.08, 1277745, 1080.11, 1083, 1063.8
01/25/2019, 1090.99, 1114785, 1085, 1094, 1081.82
01/24/2019, 1073.9, 1317718, 1076.48, 1079.475, 1060.7
01/23/2019, 1075.57, 956526, 1077.35, 1084.93, 1059.75
01/22/2019, 1070.52, 1607398, 1088, 1091.51, 1063.47
01/18/2019, 1098.26, 1933754, 1100, 1108.352, 1090.9
01/17/2019, 1089.9, 1223674, 1079.47, 1091.8, 1073.5
01/16/2019, 1080.97, 1320530, 1080, 1092.375, 1079.34
01/15/2019, 1077.15, 1452238, 1050.17, 1080.05, 1047.34
01/14/2019, 1044.69, 1127417, 1046.92, 1051.53, 1041.255
01/11/2019, 1057.19, 1512651, 1063.18, 1063.775, 1048.48
01/10/2019, 1070.33, 1444976, 1067.66, 1071.15, 1057.71
01/09/2019, 1074.66, 1198369, 1081.65, 1082.63, 1066.4
01/08/2019, 1076.28, 1748371, 1076.11, 1084.56, 1060.53
01/07/2019, 1068.39, 1978077, 1071.5, 1073.9999, 1054.76
01/04/2019, 1070.71, 2080144, 1032.59, 1070.84, 1027.4179
01/03/2019, 1016.06, 1829379, 1041, 1056.98, 1014.07
01/02/2019, 1045.85, 1516681, 1016.57, 1052.32, 1015.71
12/31/2018, 1035.61, 1492541, 1050.96, 1052.7, 1023.59
12/28/2018, 1037.08, 1399218, 1049.62, 1055.56, 1033.1
12/27/2018, 1043.88, 2102069, 1017.15, 1043.89, 997
12/26/2018, 1039.46, 2337212, 989.01, 1040, 983
12/24/2018, 976.22, 1590328, 973.9, 1003.54, 970.11
12/21/2018, 979.54, 4560424, 1015.3, 1024.02, 973.69
12/20/2018, 1009.41, 2659047, 1018.13, 1034.22, 996.36
12/19/2018, 1023.01, 2419322, 1033.99, 1062, 1008.05
12/18/2018, 1028.71, 2101854, 1026.09, 1049.48, 1021.44
12/17/2018, 1016.53, 2337631, 1037.51, 1053.15, 1007.9
12/14/2018, 1042.1, 1685802, 1049.98, 1062.6, 1040.79
12/13/2018, 1061.9, 1329198, 1068.07, 1079.7597, 1053.93
12/12/2018, 1063.68, 1523276, 1068, 1081.65, 1062.79
12/11/2018, 1051.75, 1354751, 1056.49, 1060.6, 1039.84
12/10/2018, 1039.55, 1793465, 1035.05, 1048.45, 1023.29
12/07/2018, 1036.58, 2098526, 1060.01, 1075.26, 1028.5
12/06/2018, 1068.73, 2758098, 1034.26, 1071.2, 1030.7701
12/04/2018, 1050.82, 2278200, 1103.12, 1104.42, 1049.98
12/03/2018, 1106.43, 1900355, 1123.14, 1124.65, 1103.6645
11/30/2018, 1094.43, 2554416, 1089.07, 1095.57, 1077.88
11/29/2018, 1088.3, 1403540, 1076.08, 1094.245, 1076
11/28/2018, 1086.23, 2399374, 1048.76, 1086.84, 1035.76
11/27/2018, 1044.41, 1801334, 1041, 1057.58, 1038.49
11/26/2018, 1048.62, 1846430, 1038.35, 1049.31, 1033.91
11/23/2018, 1023.88, 691462, 1030, 1037.59, 1022.3992
11/21/2018, 1037.61, 1531676, 1036.76, 1048.56, 1033.47
11/20/2018, 1025.76, 2447254, 1000, 1031.74, 996.02
11/19/2018, 1020, 1837207, 1057.2, 1060.79, 1016.2601
11/16/2018, 1061.49, 1641232, 1059.41, 1067, 1048.98
11/15/2018, 1064.71, 1819132, 1044.71, 1071.85, 1031.78
11/14/2018, 1043.66, 1561656, 1050, 1054.5643, 1031
11/13/2018, 1036.05, 1496534, 1043.29, 1056.605, 1031.15
11/12/2018, 1038.63, 1429319, 1061.39, 1062.12, 1031
11/09/2018, 1066.15, 1343154, 1073.99, 1075.56, 1053.11
11/08/2018, 1082.4, 1463022, 1091.38, 1093.27, 1072.2048
11/07/2018, 1093.39, 2057155, 1069, 1095.46, 1065.9
11/06/2018, 1055.81, 1225197, 1039.48, 1064.345, 1038.07
11/05/2018, 1040.09, 2436742, 1055, 1058.47, 1021.24
11/02/2018, 1057.79, 1829295, 1073.73, 1082.975, 1054.61
11/01/2018, 1070, 1456222, 1075.8, 1083.975, 1062.46
10/31/2018, 1076.77, 2528584, 1059.81, 1091.94, 1057
10/30/2018, 1036.21, 3209126, 1008.46, 1037.49, 1000.75
10/29/2018, 1020.08, 3873644, 1082.47, 1097.04, 995.83
10/26/2018, 1071.47, 4185201, 1037.03, 1106.53, 1034.09
10/25/2018, 1095.57, 2511884, 1071.79, 1110.98, 1069.55
10/24/2018, 1050.71, 1910060, 1104.25, 1106.12, 1048.74
10/23/2018, 1103.69, 1847798, 1080.89, 1107.89, 1070
10/22/2018, 1101.16, 1494285, 1103.06, 1112.23, 1091
10/19/2018, 1096.46, 1264605, 1093.37, 1110.36, 1087.75
10/18/2018, 1087.97, 2056606, 1121.84, 1121.84, 1077.09
10/17/2018, 1115.69, 1397613, 1126.46, 1128.99, 1102.19
10/16/2018, 1121.28, 1845491, 1104.59, 1124.22, 1102.5
10/15/2018, 1092.25, 1343231, 1108.91, 1113.4464, 1089
10/12/2018, 1110.08, 2029872, 1108, 1115, 1086.402
10/11/2018, 1079.32, 2939514, 1072.94, 1106.4, 1068.27
10/10/2018, 1081.22, 2574985, 1131.08, 1132.17, 1081.13
10/09/2018, 1138.82, 1308706, 1146.15, 1154.35, 1137.572
10/08/2018, 1148.97, 1877142, 1150.11, 1168, 1127.3636
10/05/2018, 1157.35, 1184245, 1167.5, 1173.4999, 1145.12
10/04/2018, 1168.19, 2151762, 1195.33, 1197.51, 1155.576
10/03/2018, 1202.95, 1207280, 1205, 1206.41, 1193.83
10/02/2018, 1200.11, 1655602, 1190.96, 1209.96, 1186.63
10/01/2018, 1195.31, 1345250, 1199.89, 1209.9, 1190.3
09/28/2018, 1193.47, 1306822, 1191.87, 1195.41, 1184.5
09/27/2018, 1194.64, 1244278, 1186.73, 1202.1, 1183.63
09/26/2018, 1180.49, 1346434, 1185.15, 1194.23, 1174.765
09/25/2018, 1184.65, 937577, 1176.15, 1186.88, 1168
09/24/2018, 1173.37, 1218532, 1157.17, 1178, 1146.91
09/21/2018, 1166.09, 4363929, 1192, 1192.21, 1166.04
09/20/2018, 1186.87, 1209855, 1179.99, 1189.89, 1173.36
09/19/2018, 1171.09, 1185321, 1164.98, 1173.21, 1154.58
09/18/2018, 1161.22, 1184407, 1157.09, 1176.08, 1157.09
09/17/2018, 1156.05, 1279147, 1170.14, 1177.24, 1154.03
09/14/2018, 1172.53, 934300, 1179.1, 1180.425, 1168.3295
09/13/2018, 1175.33, 1402005, 1170.74, 1178.61, 1162.85
09/12/2018, 1162.82, 1291304, 1172.72, 1178.61, 1158.36
09/11/2018, 1177.36, 1209171, 1161.63, 1178.68, 1156.24
09/10/2018, 1164.64, 1115259, 1172.19, 1174.54, 1160.11
09/07/2018, 1164.83, 1401034, 1158.67, 1175.26, 1157.215
09/06/2018, 1171.44, 1886690, 1186.3, 1186.3, 1152
09/05/2018, 1186.48, 2043732, 1193.8, 1199.0096, 1162
09/04/2018, 1197, 1800509, 1204.27, 1212.99, 1192.5
08/31/2018, 1218.19, 1812366, 1234.98, 1238.66, 1211.2854
08/30/2018, 1239.12, 1320261, 1244.23, 1253.635, 1232.59
08/29/2018, 1249.3, 1295939, 1237.45, 1250.66, 1236.3588
08/28/2018, 1231.15, 1296532, 1241.29, 1242.545, 1228.69
08/27/2018, 1241.82, 1154962, 1227.6, 1243.09, 1225.716
08/24/2018, 1220.65, 946529, 1208.82, 1221.65, 1206.3588
08/23/2018, 1205.38, 988509, 1207.14, 1221.28, 1204.24
08/22/2018, 1207.33, 881463, 1200, 1211.84, 1199
08/21/2018, 1201.62, 1187884, 1208, 1217.26, 1200.3537
08/20/2018, 1207.77, 864462, 1205.02, 1211, 1194.6264
08/17/2018, 1200.96, 1381724, 1202.03, 1209.02, 1188.24
08/16/2018, 1206.49, 1319985, 1224.73, 1225.9999, 1202.55
08/15/2018, 1214.38, 1815642, 1229.26, 1235.24, 1209.51
08/14/2018, 1242.1, 1342534, 1235.19, 1245.8695, 1225.11
08/13/2018, 1235.01, 957153, 1236.98, 1249.2728, 1233.6405
08/10/2018, 1237.61, 1107323, 1243, 1245.695, 1232
08/09/2018, 1249.1, 805227, 1249.9, 1255.542, 1246.01
08/08/2018, 1245.61, 1369650, 1240.47, 1256.5, 1238.0083
08/07/2018, 1242.22, 1493073, 1237, 1251.17, 1236.17
08/06/2018, 1224.77, 1080923, 1225, 1226.0876, 1215.7965
08/03/2018, 1223.71, 1072524, 1229.62, 1230, 1215.06
08/02/2018, 1226.15, 1520488, 1205.9, 1229.88, 1204.79
08/01/2018, 1220.01, 1567142, 1228, 1233.47, 1210.21
07/31/2018, 1217.26, 1632823, 1220.01, 1227.5877, 1205.6
07/30/2018, 1219.74, 1822782, 1228.01, 1234.916, 1211.47
07/27/2018, 1238.5, 2115802, 1271, 1273.89, 1231
07/26/2018, 1268.33, 2334881, 1251, 1269.7707, 1249.02
07/25/2018, 1263.7, 2115890, 1239.13, 1265.86, 1239.13
07/24/2018, 1248.08, 3303268, 1262.59, 1266, 1235.56
07/23/2018, 1205.5, 2584034, 1181.01, 1206.49, 1181
07/20/2018, 1184.91, 1246898, 1186.96, 1196.86, 1184.22
07/19/2018, 1186.96, 1256113, 1191, 1200, 1183.32
07/18/2018, 1195.88, 1391232, 1196.56, 1204.5, 1190.34
07/17/2018, 1198.8, 1585091, 1172.22, 1203.04, 1170.6
07/16/2018, 1183.86, 1049560, 1189.39, 1191, 1179.28
07/13/2018, 1188.82, 1221687, 1185, 1195.4173, 1180
07/12/2018, 1183.48, 1251083, 1159.89, 1184.41, 1155.935
07/11/2018, 1153.9, 1094301, 1144.59, 1164.29, 1141.0003
07/10/2018, 1152.84, 789249, 1156.98, 1159.59, 1149.59
07/09/2018, 1154.05, 906073, 1148.48, 1154.67, 1143.42
07/06/2018, 1140.17, 966155, 1123.58, 1140.93, 1120.7371
07/05/2018, 1124.27, 1060752, 1110.53, 1127.5, 1108.48
07/03/2018, 1102.89, 679034, 1135.82, 1135.82, 1100.02
07/02/2018, 1127.46, 1188616, 1099, 1128, 1093.8
06/29/2018, 1115.65, 1275979, 1120, 1128.2265, 1115
06/28/2018, 1114.22, 1072438, 1102.09, 1122.31, 1096.01
06/27/2018, 1103.98, 1287698, 1121.34, 1131.8362, 1103.62
06/26/2018, 1118.46, 1559791, 1128, 1133.21, 1116.6589
06/25/2018, 1124.81, 2155276, 1143.6, 1143.91, 1112.78
06/22/2018, 1155.48, 1310164, 1159.14, 1162.4965, 1147.26
06/21/2018, 1157.66, 1232352, 1174.85, 1177.295, 1152.232
06/20/2018, 1169.84, 1648248, 1175.31, 1186.2856, 1169.16
06/19/2018, 1168.06, 1616125, 1158.5, 1171.27, 1154.01
06/18/2018, 1173.46, 1400641, 1143.65, 1174.31, 1143.59
06/15/2018, 1152.26, 2119134, 1148.86, 1153.42, 1143.485
06/14/2018, 1152.12, 1350085, 1143.85, 1155.47, 1140.64
06/13/2018, 1134.79, 1490017, 1141.12, 1146.5, 1133.38
06/12/2018, 1139.32, 899231, 1131.07, 1139.79, 1130.735
06/11/2018, 1129.99, 1071114, 1118.6, 1137.26, 1118.6
06/08/2018, 1120.87, 1289859, 1118.18, 1126.67, 1112.15
06/07/2018, 1123.86, 1519860, 1131.32, 1135.82, 1116.52
06/06/2018, 1136.88, 1697489, 1142.17, 1143, 1125.7429
06/05/2018, 1139.66, 1538169, 1140.99, 1145.738, 1133.19
06/04/2018, 1139.29, 1881046, 1122.33, 1141.89, 1122.005
06/01/2018, 1119.5, 2416755, 1099.35, 1120, 1098.5
05/31/2018, 1084.99, 3085325, 1067.56, 1097.19, 1067.56
05/30/2018, 1067.8, 1129958, 1063.03, 1069.21, 1056.83
05/29/2018, 1060.32, 1858676, 1064.89, 1073.37, 1055.22
05/25/2018, 1075.66, 878903, 1079.02, 1082.56, 1073.775
05/24/2018, 1079.24, 757752, 1079, 1080.47, 1066.15
05/23/2018, 1079.69, 1057712, 1065.13, 1080.78, 1061.71
05/22/2018, 1069.73, 1088700, 1083.56, 1086.59, 1066.69
05/21/2018, 1079.58, 1012258, 1074.06, 1088, 1073.65
05/18/2018, 1066.36, 1496448, 1061.86, 1069.94, 1060.68
05/17/2018, 1078.59, 1031190, 1079.89, 1086.87, 1073.5
05/16/2018, 1081.77, 989819, 1077.31, 1089.27, 1076.26
05/15/2018, 1079.23, 1494306, 1090, 1090.05, 1073.47
05/14/2018, 1100.2, 1450140, 1100, 1110.75, 1099.11
05/11/2018, 1098.26, 1253205, 1093.6, 1101.3295, 1090.91
05/10/2018, 1097.57, 1441456, 1086.03, 1100.44, 1085.64
05/09/2018, 1082.76, 2032319, 1058.1, 1085.44, 1056.365
05/08/2018, 1053.91, 1217260, 1058.54, 1060.55, 1047.145
05/07/2018, 1054.79, 1464008, 1049.23, 1061.68, 1047.1
05/04/2018, 1048.21, 1936797, 1016.9, 1048.51, 1016.9
05/03/2018, 1023.72, 1813623, 1019, 1029.675, 1006.29
05/02/2018, 1024.38, 1534094, 1028.1, 1040.389, 1022.87
05/01/2018, 1037.31, 1427171, 1013.66, 1038.47, 1008.21
04/30/2018, 1017.33, 1664084, 1030.01, 1037, 1016.85
04/27/2018, 1030.05, 1617452, 1046, 1049.5, 1025.59
04/26/2018, 1040.04, 1984448, 1029.51, 1047.98, 1018.19
04/25/2018, 1021.18, 2225495, 1025.52, 1032.49, 1015.31
04/24/2018, 1019.98, 4750851, 1052, 1057, 1010.59
04/23/2018, 1067.45, 2278846, 1077.86, 1082.72, 1060.7
04/20/2018, 1072.96, 1887698, 1082, 1092.35, 1069.57
04/19/2018, 1087.7, 1741907, 1069.4, 1094.165, 1068.18
04/18/2018, 1072.08, 1336678, 1077.43, 1077.43, 1066.225
04/17/2018, 1074.16, 2311903, 1051.37, 1077.88, 1048.26
04/16/2018, 1037.98, 1194144, 1037, 1043.24, 1026.74
04/13/2018, 1029.27, 1175754, 1040.88, 1046.42, 1022.98
04/12/2018, 1032.51, 1357599, 1025.04, 1040.69, 1021.4347
04/11/2018, 1019.97, 1476133, 1027.99, 1031.3641, 1015.87
04/10/2018, 1031.64, 1983510, 1026.44, 1036.28, 1011.34
04/09/2018, 1015.45, 1738682, 1016.8, 1039.6, 1014.08
04/06/2018, 1007.04, 1740896, 1020, 1031.42, 1003.03
04/05/2018, 1027.81, 1345681, 1041.33, 1042.79, 1020.1311
04/04/2018, 1025.14, 2464418, 993.41, 1028.7175, 993
04/03/2018, 1013.41, 2271858, 1013.91, 1020.99, 994.07
04/02/2018, 1006.47, 2679214, 1022.82, 1034.8, 990.37
03/29/2018, 1031.79, 2714402, 1011.63, 1043, 1002.9
03/28/2018, 1004.56, 3345046, 998, 1024.23, 980.64
03/27/2018, 1005.1, 3081612, 1063, 1064.8393, 996.92
03/26/2018, 1053.21, 2593808, 1046, 1055.63, 1008.4
03/23/2018, 1021.57, 2147097, 1047.03, 1063.36, 1021.22
03/22/2018, 1049.08, 2584639, 1081.88, 1082.9, 1045.91
03/21/2018, 1090.88, 1878294, 1092.74, 1106.2999, 1085.15
03/20/2018, 1097.71, 1802209, 1099, 1105.2, 1083.46
03/19/2018, 1099.82, 2355186, 1120.01, 1121.99, 1089.01
03/16/2018, 1135.73, 2614871, 1154.14, 1155.88, 1131.96
03/15/2018, 1149.58, 1397767, 1149.96, 1161.08, 1134.54
03/14/2018, 1149.49, 1290638, 1145.21, 1158.59, 1141.44
03/13/2018, 1138.17, 1874176, 1170, 1176.76, 1133.33
03/12/2018, 1164.5, 2106548, 1163.85, 1177.05, 1157.42
03/09/2018, 1160.04, 2121425, 1136, 1160.8, 1132.4606
03/08/2018, 1126, 1393529, 1115.32, 1127.6, 1112.8
03/07/2018, 1109.64, 1277439, 1089.19, 1112.22, 1085.4823
03/06/2018, 1095.06, 1497087, 1099.22, 1101.85, 1089.775
03/05/2018, 1090.93, 1141932, 1075.14, 1097.1, 1069.0001
03/02/2018, 1078.92, 2271394, 1053.08, 1081.9986, 1048.115
03/01/2018, 1069.52, 2511872, 1107.87, 1110.12, 1067.001
02/28/2018, 1104.73, 1873737, 1123.03, 1127.53, 1103.24
02/27/2018, 1118.29, 1772866, 1141.24, 1144.04, 1118
02/26/2018, 1143.75, 1514920, 1127.8, 1143.96, 1126.695
02/23/2018, 1126.79, 1190432, 1112.64, 1127.28, 1104.7135
02/22/2018, 1106.63, 1309536, 1116.19, 1122.82, 1102.59
02/21/2018, 1111.34, 1507152, 1106.47, 1133.97, 1106.33
02/20/2018, 1102.46, 1389491, 1090.57, 1113.95, 1088.52
02/16/2018, 1094.8, 1680283, 1088.41, 1104.67, 1088.3134
02/15/2018, 1089.52, 1785552, 1079.07, 1091.4794, 1064.34
02/14/2018, 1069.7, 1547665, 1048.95, 1071.72, 1046.75
02/13/2018, 1052.1, 1213800, 1045, 1058.37, 1044.0872
02/12/2018, 1051.94, 2054002, 1048, 1061.5, 1040.928
02/09/2018, 1037.78, 3503970, 1017.25, 1043.97, 992.56
02/08/2018, 1001.52, 2809890, 1055.41, 1058.62, 1000.66
02/07/2018, 1048.58, 2353003, 1081.54, 1081.78, 1048.26
02/06/2018, 1080.6, 3432313, 1027.18, 1081.71, 1023.1367
02/05/2018, 1055.8, 3769453, 1090.6, 1110, 1052.03
02/02/2018, 1111.9, 4837979, 1122, 1123.07, 1107.2779
02/01/2018, 1167.7, 2380221, 1162.61, 1174, 1157.52
01/31/2018, 1169.94, 1523820, 1170.57, 1173, 1159.13
01/30/2018, 1163.69, 1541771, 1167.83, 1176.52, 1163.52
01/29/2018, 1175.58, 1337324, 1176.48, 1186.89, 1171.98
01/26/2018, 1175.84, 1981173, 1175.08, 1175.84, 1158.11
01/25/2018, 1170.37, 1461518, 1172.53, 1175.94, 1162.76
01/24/2018, 1164.24, 1382904, 1177.33, 1179.86, 1161.05
01/23/2018, 1169.97, 1309862, 1159.85, 1171.6266, 1158.75
01/22/2018, 1155.81, 1616120, 1137.49, 1159.88, 1135.1101
01/19/2018, 1137.51, 1390118, 1131.83, 1137.86, 1128.3
01/18/2018, 1129.79, 1194943, 1131.41, 1132.51, 1117.5
01/17/2018, 1131.98, 1200476, 1126.22, 1132.6, 1117.01
01/16/2018, 1121.76, 1566662, 1132.51, 1139.91, 1117.8316
01/12/2018, 1122.26, 1718491, 1102.41, 1124.29, 1101.15
01/11/2018, 1105.52, 977727, 1106.3, 1106.525, 1099.59
01/10/2018, 1102.61, 1042273, 1097.1, 1104.6, 1096.11
01/09/2018, 1106.26, 900089, 1109.4, 1110.57, 1101.2307
01/08/2018, 1106.94, 1046767, 1102.23, 1111.27, 1101.62
01/05/2018, 1102.23, 1279990, 1094, 1104.25, 1092
01/04/2018, 1086.4, 1002945, 1088, 1093.5699, 1084.0017
01/03/2018, 1082.48, 1429757, 1064.31, 1086.29, 1063.21
01/02/2018, 1065, 1236401, 1048.34, 1066.94, 1045.23
12/29/2017, 1046.4, 886845, 1046.72, 1049.7, 1044.9
12/28/2017, 1048.14, 833011, 1051.6, 1054.75, 1044.77
12/27/2017, 1049.37, 1271780, 1057.39, 1058.37, 1048.05
12/26/2017, 1056.74, 761097, 1058.07, 1060.12, 1050.2
12/22/2017, 1060.12, 755089, 1061.11, 1064.2, 1059.44
12/21/2017, 1063.63, 986548, 1064.95, 1069.33, 1061.7938
12/20/2017, 1064.95, 1268285, 1071.78, 1073.38, 1061.52
12/19/2017, 1070.68, 1307894, 1075.2, 1076.84, 1063.55
12/18/2017, 1077.14, 1552016, 1066.08, 1078.49, 1062
12/15/2017, 1064.19, 3275091, 1054.61, 1067.62, 1049.5
12/14/2017, 1049.15, 1558684, 1045, 1058.5, 1043.11
12/13/2017, 1040.61, 1220364, 1046.12, 1046.665, 1038.38
12/12/2017, 1040.48, 1279511, 1039.63, 1050.31, 1033.6897
12/11/2017, 1041.1, 1190527, 1035.5, 1043.8, 1032.0504
12/08/2017, 1037.05, 1288419, 1037.49, 1042.05, 1032.5222
12/07/2017, 1030.93, 1458145, 1020.43, 1034.24, 1018.071
12/06/2017, 1018.38, 1258496, 1001.5, 1024.97, 1001.14
12/05/2017, 1005.15, 2066247, 995.94, 1020.61, 988.28
12/04/2017, 998.68, 1906058, 1012.66, 1016.1, 995.57
12/01/2017, 1010.17, 1908962, 1015.8, 1022.4897, 1002.02
11/30/2017, 1021.41, 1723003, 1022.37, 1028.4899, 1015
11/29/2017, 1021.66, 2442974, 1042.68, 1044.08, 1015.65
11/28/2017, 1047.41, 1421027, 1055.09, 1062.375, 1040
11/27/2017, 1054.21, 1307471, 1040, 1055.46, 1038.44
11/24/2017, 1040.61, 536996, 1035.87, 1043.178, 1035
11/22/2017, 1035.96, 746351, 1035, 1039.706, 1031.43
11/21/2017, 1034.49, 1096161, 1023.31, 1035.11, 1022.655
11/20/2017, 1018.38, 898389, 1020.26, 1022.61, 1017.5
11/17/2017, 1019.09, 1366936, 1034.01, 1034.42, 1017.75
11/16/2017, 1032.5, 1129424, 1022.52, 1035.92, 1022.52
11/15/2017, 1020.91, 847932, 1019.21, 1024.09, 1015.42
11/14/2017, 1026, 958708, 1022.59, 1026.81, 1014.15
11/13/2017, 1025.75, 885565, 1023.42, 1031.58, 1022.57
11/10/2017, 1028.07, 720674, 1026.46, 1030.76, 1025.28
11/09/2017, 1031.26, 1244701, 1033.99, 1033.99, 1019.6656
11/08/2017, 1039.85, 1088395, 1030.52, 1043.522, 1028.45
11/07/2017, 1033.33, 1112123, 1027.27, 1033.97, 1025.13
11/06/2017, 1025.9, 1124757, 1028.99, 1034.87, 1025
11/03/2017, 1032.48, 1075134, 1022.11, 1032.65, 1020.31
11/02/2017, 1025.58, 1048584, 1021.76, 1028.09, 1013.01
11/01/2017, 1025.5, 1371619, 1017.21, 1029.67, 1016.95
10/31/2017, 1016.64, 1331265, 1015.22, 1024, 1010.42
10/30/2017, 1017.11, 2083490, 1014, 1024.97, 1007.5
10/27/2017, 1019.27, 5165922, 1009.19, 1048.39, 1008.2
10/26/2017, 972.56, 2027218, 980, 987.6, 972.2
10/25/2017, 973.33, 1210368, 968.37, 976.09, 960.5201
10/24/2017, 970.54, 1206074, 970, 972.23, 961
10/23/2017, 968.45, 1471544, 989.52, 989.52, 966.12
10/20/2017, 988.2, 1176177, 989.44, 991, 984.58
10/19/2017, 984.45, 1312706, 986, 988.88, 978.39
10/18/2017, 992.81, 1057285, 991.77, 996.72, 986.9747
10/17/2017, 992.18, 1290152, 990.29, 996.44, 988.59
10/16/2017, 992, 910246, 992.1, 993.9065, 984
10/13/2017, 989.68, 1169584, 992, 997.21, 989
10/12/2017, 987.83, 1278357, 987.45, 994.12, 985
10/11/2017, 989.25, 1692843, 973.72, 990.71, 972.25
10/10/2017, 972.6, 968113, 980, 981.57, 966.0801
10/09/2017, 977, 890620, 980, 985.425, 976.11
10/06/2017, 978.89, 1146207, 966.7, 979.46, 963.36
10/05/2017, 969.96, 1210427, 955.49, 970.91, 955.18
10/04/2017, 951.68, 951766, 957, 960.39, 950.69
10/03/2017, 957.79, 888303, 954, 958, 949.14
10/02/2017, 953.27, 1282850, 959.98, 962.54, 947.84
09/29/2017, 959.11, 1576365, 952, 959.7864, 951.51
09/28/2017, 949.5, 997036, 941.36, 950.69, 940.55
09/27/2017, 944.49, 2237538, 927.74, 949.9, 927.74
09/26/2017, 924.86, 1666749, 923.72, 930.82, 921.14
09/25/2017, 920.97, 1855742, 925.45, 926.4, 909.7
09/22/2017, 928.53, 1052170, 927.75, 934.73, 926.48
09/21/2017, 932.45, 1227059, 933, 936.53, 923.83
09/20/2017, 931.58, 1535626, 922.98, 933.88, 922
09/19/2017, 921.81, 912967, 917.42, 922.4199, 912.55
09/18/2017, 915, 1300759, 920.01, 922.08, 910.6
09/15/2017, 920.29, 2499466, 924.66, 926.49, 916.36
09/14/2017, 925.11, 1395497, 931.25, 932.77, 924
09/13/2017, 935.09, 1101145, 930.66, 937.25, 929.86
09/12/2017, 932.07, 1133638, 932.59, 933.48, 923.861
09/11/2017, 929.08, 1266020, 934.25, 938.38, 926.92
09/08/2017, 926.5, 997699, 936.49, 936.99, 924.88
09/07/2017, 935.95, 1211472, 931.73, 936.41, 923.62
09/06/2017, 927.81, 1526209, 930.15, 930.915, 919.27
09/05/2017, 928.45, 1346791, 933.08, 937, 921.96
09/01/2017, 937.34, 943657, 941.13, 942.48, 935.15
08/31/2017, 939.33, 1566888, 931.76, 941.98, 931.76
08/30/2017, 929.57, 1300616, 920.05, 930.819, 919.65
08/29/2017, 921.29, 1181391, 905.1, 923.33, 905
08/28/2017, 913.81, 1085014, 916, 919.245, 911.87
08/25/2017, 915.89, 1052764, 923.49, 925.555, 915.5
08/24/2017, 921.28, 1266191, 928.66, 930.84, 915.5
08/23/2017, 927, 1088575, 921.93, 929.93, 919.36
08/22/2017, 924.69, 1166320, 912.72, 925.86, 911.4751
08/21/2017, 906.66, 942328, 910, 913, 903.4
08/18/2017, 910.67, 1341990, 910.31, 915.275, 907.1543
08/17/2017, 910.98, 1241782, 925.78, 926.86, 910.98
08/16/2017, 926.96, 1005261, 925.29, 932.7, 923.445
08/15/2017, 922.22, 882479, 924.23, 926.5499, 919.82
08/14/2017, 922.67, 1063404, 922.53, 924.668, 918.19
08/11/2017, 914.39, 1205652, 907.97, 917.78, 905.58
08/10/2017, 907.24, 1755521, 917.55, 919.26, 906.13
08/09/2017, 922.9, 1191332, 920.61, 925.98, 917.2501
08/08/2017, 926.79, 1057351, 927.09, 935.814, 925.6095
08/07/2017, 929.36, 1031710, 929.06, 931.7, 926.5
08/04/2017, 927.96, 1081814, 926.75, 930.3068, 923.03
08/03/2017, 923.65, 1201519, 930.34, 932.24, 922.24
08/02/2017, 930.39, 1822272, 928.61, 932.6, 916.68
08/01/2017, 930.83, 1234612, 932.38, 937.447, 929.26
07/31/2017, 930.5, 1964748, 941.89, 943.59, 926.04
07/28/2017, 941.53, 1802343, 929.4, 943.83, 927.5
07/27/2017, 934.09, 3128819, 951.78, 951.78, 920
07/26/2017, 947.8, 2069349, 954.68, 955, 942.2788
07/25/2017, 950.7, 4656609, 953.81, 959.7, 945.4
07/24/2017, 980.34, 3205374, 972.22, 986.2, 970.77
07/21/2017, 972.92, 1697190, 962.25, 973.23, 960.15
07/20/2017, 968.15, 1620636, 975, 975.9, 961.51
07/19/2017, 970.89, 1221155, 967.84, 973.04, 964.03
07/18/2017, 965.4, 1152741, 953, 968.04, 950.6
07/17/2017, 953.42, 1164141, 957, 960.74, 949.2407
07/14/2017, 955.99, 1052855, 952, 956.91, 948.005
07/13/2017, 947.16, 1294674, 946.29, 954.45, 943.01
07/12/2017, 943.83, 1517168, 938.68, 946.3, 934.47
07/11/2017, 930.09, 1112417, 929.54, 931.43, 922
07/10/2017, 928.8, 1190237, 921.77, 930.38, 919.59
07/07/2017, 918.59, 1590456, 908.85, 921.54, 908.85
07/06/2017, 906.69, 1424290, 904.12, 914.9444, 899.7
07/05/2017, 911.71, 1813309, 901.76, 914.51, 898.5
07/03/2017, 898.7, 1710373, 912.18, 913.94, 894.79
06/30/2017, 908.73, 2086340, 926.05, 926.05, 908.31
06/29/2017, 917.79, 3287991, 929.92, 931.26, 910.62
06/28/2017, 940.49, 2719213, 929, 942.75, 916
06/27/2017, 927.33, 2566047, 942.46, 948.29, 926.85
06/26/2017, 952.27, 1596664, 969.9, 973.31, 950.79
06/23/2017, 965.59, 1527513, 956.83, 966, 954.2
06/22/2017, 957.09, 941639, 958.7, 960.72, 954.55
06/21/2017, 959.45, 1201971, 953.64, 960.1, 950.76
06/20/2017, 950.63, 1125520, 957.52, 961.62, 950.01
06/19/2017, 957.37, 1520715, 949.96, 959.99, 949.05
06/16/2017, 939.78, 3061794, 940, 942.04, 931.595
06/15/2017, 942.31, 2065271, 933.97, 943.339, 924.44
06/14/2017, 950.76, 1487378, 959.92, 961.15, 942.25
06/13/2017, 953.4, 2012980, 951.91, 959.98, 944.09
06/12/2017, 942.9, 3762434, 939.56, 949.355, 915.2328
06/09/2017, 949.83, 3305545, 984.5, 984.5, 935.63
06/08/2017, 983.41, 1477151, 982.35, 984.57, 977.2
06/07/2017, 981.08, 1447172, 979.65, 984.15, 975.77
06/06/2017, 976.57, 1814323, 983.16, 988.25, 975.14
06/05/2017, 983.68, 1251903, 976.55, 986.91, 975.1
06/02/2017, 975.6, 1750723, 969.46, 975.88, 966
06/01/2017, 966.95, 1408958, 968.95, 971.5, 960.01
05/31/2017, 964.86, 2447176, 975.02, 979.27, 960.18
05/30/2017, 975.88, 1466288, 970.31, 976.2, 969.49
05/26/2017, 971.47, 1251425, 969.7, 974.98, 965.03
05/25/2017, 969.54, 1659422, 957.33, 972.629, 955.47
05/24/2017, 954.96, 1031408, 952.98, 955.09, 949.5
05/23/2017, 948.82, 1269438, 947.92, 951.4666, 942.575
05/22/2017, 941.86, 1118456, 935, 941.8828, 935
05/19/2017, 934.01, 1389848, 931.47, 937.755, 931
05/18/2017, 930.24, 1596058, 921, 933.17, 918.75
05/17/2017, 919.62, 2357922, 935.67, 939.3325, 918.14
05/16/2017, 943, 968288, 940, 943.11, 937.58
05/15/2017, 937.08, 1104595, 932.95, 938.25, 929.34
05/12/2017, 932.22, 1050377, 931.53, 933.44, 927.85
05/11/2017, 930.6, 834997, 925.32, 932.53, 923.0301
05/10/2017, 928.78, 1173887, 931.98, 932, 925.16
05/09/2017, 932.17, 1581236, 936.95, 937.5, 929.53
05/08/2017, 934.3, 1328885, 926.12, 936.925, 925.26
05/05/2017, 927.13, 1910317, 933.54, 934.9, 925.2
05/04/2017, 931.66, 1421938, 926.07, 935.93, 924.59
05/03/2017, 927.04, 1497565, 914.86, 928.1, 912.5426
05/02/2017, 916.44, 1543696, 909.62, 920.77, 909.4526
05/01/2017, 912.57, 2114629, 901.94, 915.68, 901.45
04/28/2017, 905.96, 3223850, 910.66, 916.85, 905.77
04/27/2017, 874.25, 2009509, 873.6, 875.4, 870.38
04/26/2017, 871.73, 1233724, 874.23, 876.05, 867.7481
04/25/2017, 872.3, 1670095, 865, 875, 862.81
04/24/2017, 862.76, 1371722, 851.2, 863.45, 849.86
04/21/2017, 843.19, 1323364, 842.88, 843.88, 840.6
04/20/2017, 841.65, 957994, 841.44, 845.2, 839.32
04/19/2017, 838.21, 954324, 839.79, 842.22, 836.29
04/18/2017, 836.82, 835433, 834.22, 838.93, 832.71
04/17/2017, 837.17, 894540, 825.01, 837.75, 824.47
04/13/2017, 823.56, 1118221, 822.14, 826.38, 821.44
04/12/2017, 824.32, 900059, 821.93, 826.66, 821.02
04/11/2017, 823.35, 1078951, 824.71, 827.4267, 817.0201
04/10/2017, 824.73, 978825, 825.39, 829.35, 823.77
04/07/2017, 824.67, 1056692, 827.96, 828.485, 820.5127
04/06/2017, 827.88, 1254235, 832.4, 836.39, 826.46
04/05/2017, 831.41, 1553163, 835.51, 842.45, 830.72
04/04/2017, 834.57, 1044455, 831.36, 835.18, 829.0363
04/03/2017, 838.55, 1670349, 829.22, 840.85, 829.22
03/31/2017, 829.56, 1401756, 828.97, 831.64, 827.39
03/30/2017, 831.5, 1055263, 833.5, 833.68, 829
03/29/2017, 831.41, 1785006, 825, 832.765, 822.3801
03/28/2017, 820.92, 1620532, 820.41, 825.99, 814.027
03/27/2017, 819.51, 1894735, 806.95, 821.63, 803.37
03/24/2017, 814.43, 1980415, 820.08, 821.93, 808.89
03/23/2017, 817.58, 3485390, 821, 822.57, 812.257
03/22/2017, 829.59, 1399409, 831.91, 835.55, 827.1801
03/21/2017, 830.46, 2461375, 851.4, 853.5, 829.02
03/20/2017, 848.4, 1217560, 850.01, 850.22, 845.15
03/17/2017, 852.12, 1712397, 851.61, 853.4, 847.11
03/16/2017, 848.78, 977384, 849.03, 850.85, 846.13
03/15/2017, 847.2, 1381328, 847.59, 848.63, 840.77
03/14/2017, 845.62, 779920, 843.64, 847.24, 840.8
03/13/2017, 845.54, 1149928, 844, 848.685, 843.25
03/10/2017, 843.25, 1702731, 843.28, 844.91, 839.5
03/09/2017, 838.68, 1261393, 836, 842, 834.21
03/08/2017, 835.37, 988900, 833.51, 838.15, 831.79
03/07/2017, 831.91, 1037573, 827.4, 833.41, 826.52
03/06/2017, 827.78, 1108799, 826.95, 828.88, 822.4
03/03/2017, 829.08, 890640, 830.56, 831.36, 825.751
03/02/2017, 830.63, 937824, 833.85, 834.51, 829.64
03/01/2017, 835.24, 1495934, 828.85, 836.255, 827.26
02/28/2017, 823.21, 2258695, 825.61, 828.54, 820.2
02/27/2017, 829.28, 1101120, 824.55, 830.5, 824
02/24/2017, 828.64, 1392039, 827.73, 829, 824.2
02/23/2017, 831.33, 1471342, 830.12, 832.46, 822.88
02/22/2017, 830.76, 983058, 828.66, 833.25, 828.64
02/21/2017, 831.66, 1259841, 828.66, 833.45, 828.35
02/17/2017, 828.07, 1602549, 823.02, 828.07, 821.655
02/16/2017, 824.16, 1285919, 819.93, 824.4, 818.98
02/15/2017, 818.98, 1311316, 819.36, 823, 818.47
02/14/2017, 820.45, 1054472, 819, 823, 816
02/13/2017, 819.24, 1205835, 816, 820.959, 815.49
02/10/2017, 813.67, 1134701, 811.7, 815.25, 809.78
02/09/2017, 809.56, 990260, 809.51, 810.66, 804.54
02/08/2017, 808.38, 1155892, 807, 811.84, 803.1903
02/07/2017, 806.97, 1240257, 803.99, 810.5, 801.78
02/06/2017, 801.34, 1182882, 799.7, 801.67, 795.2501
02/03/2017, 801.49, 1461217, 802.99, 806, 800.37
02/02/2017, 798.53, 1530827, 793.8, 802.7, 792
02/01/2017, 795.695, 2027708, 799.68, 801.19, 791.19
01/31/2017, 796.79, 2153957, 796.86, 801.25, 790.52
01/30/2017, 802.32, 3243568, 814.66, 815.84, 799.8
01/27/2017, 823.31, 2964989, 834.71, 841.95, 820.44
01/26/2017, 832.15, 2944642, 837.81, 838, 827.01
01/25/2017, 835.67, 1612854, 829.62, 835.77, 825.06
01/24/2017, 823.87, 1472228, 822.3, 825.9, 817.821
01/23/2017, 819.31, 1962506, 807.25, 820.87, 803.74
01/20/2017, 805.02, 1668638, 806.91, 806.91, 801.69
01/19/2017, 802.175, 917085, 805.12, 809.48, 801.8
01/18/2017, 806.07, 1293893, 805.81, 806.205, 800.99
01/17/2017, 804.61, 1361935, 807.08, 807.14, 800.37
01/13/2017, 807.88, 1098154, 807.48, 811.2244, 806.69
01/12/2017, 806.36, 1352872, 807.14, 807.39, 799.17
01/11/2017, 807.91, 1065360, 805, 808.15, 801.37
01/10/2017, 804.79, 1176637, 807.86, 809.1299, 803.51
01/09/2017, 806.65, 1274318, 806.4, 809.9664, 802.83
01/06/2017, 806.15, 1639246, 795.26, 807.9, 792.2041
01/05/2017, 794.02, 1334028, 786.08, 794.48, 785.02
01/04/2017, 786.9, 1071198, 788.36, 791.34, 783.16
01/03/2017, 786.14, 1657291, 778.81, 789.63, 775.8
12/30/2016, 771.82, 1769809, 782.75, 782.78, 770.41
12/29/2016, 782.79, 743808, 783.33, 785.93, 778.92
12/28/2016, 785.05, 1142148, 793.7, 794.23, 783.2
12/27/2016, 791.55, 789151, 790.68, 797.86, 787.657
12/23/2016, 789.91, 623682, 790.9, 792.74, 787.28
12/22/2016, 791.26, 972147, 792.36, 793.32, 788.58
12/21/2016, 794.56, 1208770, 795.84, 796.6757, 787.1
12/20/2016, 796.42, 950345, 796.76, 798.65, 793.27
12/19/2016, 794.2, 1231966, 790.22, 797.66, 786.27
12/16/2016, 790.8, 2435100, 800.4, 800.8558, 790.29
12/15/2016, 797.85, 1623709, 797.34, 803, 792.92
12/14/2016, 797.07, 1700875, 797.4, 804, 794.01
12/13/2016, 796.1, 2122735, 793.9, 804.3799, 793.34
12/12/2016, 789.27, 2102288, 785.04, 791.25, 784.3554
12/09/2016, 789.29, 1821146, 780, 789.43, 779.021
12/08/2016, 776.42, 1487517, 772.48, 778.18, 767.23
12/07/2016, 771.19, 1757710, 761, 771.36, 755.8
12/06/2016, 759.11, 1690365, 764.73, 768.83, 757.34
12/05/2016, 762.52, 1393566, 757.71, 763.9, 752.9
12/02/2016, 750.5, 1452181, 744.59, 754, 743.1
12/01/2016, 747.92, 3017001, 757.44, 759.85, 737.0245
11/30/2016, 758.04, 2386628, 770.07, 772.99, 754.83
11/29/2016, 770.84, 1616427, 771.53, 778.5, 768.24
11/28/2016, 768.24, 2177039, 760, 779.53, 759.8
11/25/2016, 761.68, 587421, 764.26, 765, 760.52
11/23/2016, 760.99, 1477501, 767.73, 768.2825, 755.25
11/22/2016, 768.27, 1592372, 772.63, 776.96, 767
11/21/2016, 769.2, 1324431, 762.61, 769.7, 760.6
11/18/2016, 760.54, 1528555, 771.37, 775, 760
11/17/2016, 771.23, 1298484, 766.92, 772.7, 764.23
11/16/2016, 764.48, 1468196, 755.2, 766.36, 750.51
11/15/2016, 758.49, 2375056, 746.97, 764.4162, 746.97
11/14/2016, 736.08, 3644965, 755.6, 757.85, 727.54
11/11/2016, 754.02, 2421889, 756.54, 760.78, 750.38
11/10/2016, 762.56, 4733916, 791.17, 791.17, 752.18
11/09/2016, 785.31, 2603860, 779.94, 791.2265, 771.67
11/08/2016, 790.51, 1361472, 783.4, 795.633, 780.19
11/07/2016, 782.52, 1574426, 774.5, 785.19, 772.55
11/04/2016, 762.02, 2131948, 750.66, 770.36, 750.5611
11/03/2016, 762.13, 1933937, 767.25, 769.95, 759.03
11/02/2016, 768.7, 1905814, 778.2, 781.65, 763.4496
11/01/2016, 783.61, 2404898, 782.89, 789.49, 775.54
10/31/2016, 784.54, 2420892, 795.47, 796.86, 784
10/28/2016, 795.37, 4261912, 808.35, 815.49, 793.59
10/27/2016, 795.35, 2723097, 801, 803.49, 791.5
10/26/2016, 799.07, 1645403, 806.34, 806.98, 796.32
10/25/2016, 807.67, 1575020, 816.68, 816.68, 805.14
10/24/2016, 813.11, 1693162, 804.9, 815.18, 804.82
10/21/2016, 799.37, 1262042, 795, 799.5, 794
10/20/2016, 796.97, 1755546, 803.3, 803.97, 796.03
10/19/2016, 801.56, 1762990, 798.86, 804.63, 797.635
10/18/2016, 795.26, 2046338, 787.85, 801.61, 785.565
10/17/2016, 779.96, 1091524, 779.8, 785.85, 777.5
10/14/2016, 778.53, 851512, 781.65, 783.95, 776
10/13/2016, 778.19, 1360619, 781.22, 781.22, 773
10/12/2016, 786.14, 935138, 783.76, 788.13, 782.06
10/11/2016, 783.07, 1371461, 786.66, 792.28, 780.58
10/10/2016, 785.94, 1161410, 777.71, 789.38, 775.87
10/07/2016, 775.08, 932444, 779.66, 779.66, 770.75
10/06/2016, 776.86, 1066910, 779, 780.48, 775.54
10/05/2016, 776.47, 1457661, 779.31, 782.07, 775.65
10/04/2016, 776.43, 1198361, 776.03, 778.71, 772.89
10/03/2016, 772.56, 1276614, 774.25, 776.065, 769.5
09/30/2016, 777.29, 1583293, 776.33, 780.94, 774.09
09/29/2016, 775.01, 1310252, 781.44, 785.8, 774.232
09/28/2016, 781.56, 1108249, 777.85, 781.81, 774.97
09/27/2016, 783.01, 1152760, 775.5, 785.9899, 774.308
09/26/2016, 774.21, 1531788, 782.74, 782.74, 773.07
09/23/2016, 786.9, 1411439, 786.59, 788.93, 784.15
09/22/2016, 787.21, 1483899, 780, 789.85, 778.44
09/21/2016, 776.22, 1166290, 772.66, 777.16, 768.301
09/20/2016, 771.41, 975434, 769, 773.33, 768.53
09/19/2016, 765.7, 1171969, 772.42, 774, 764.4406
09/16/2016, 768.88, 2047036, 769.75, 769.75, 764.66
09/15/2016, 771.76, 1344945, 762.89, 773.8, 759.96
09/14/2016, 762.49, 1093723, 759.61, 767.68, 759.11
09/13/2016, 759.69, 1394158, 764.48, 766.2195, 755.8
09/12/2016, 769.02, 1310493, 755.13, 770.29, 754.0001
09/09/2016, 759.66, 1879903, 770.1, 773.245, 759.66
09/08/2016, 775.32, 1268663, 778.59, 780.35, 773.58
09/07/2016, 780.35, 893874, 780, 782.73, 776.2
09/06/2016, 780.08, 1441864, 773.45, 782, 771
09/02/2016, 771.46, 1070725, 773.01, 773.9199, 768.41
09/01/2016, 768.78, 925019, 769.25, 771.02, 764.3
08/31/2016, 767.05, 1247937, 767.01, 769.09, 765.38
08/30/2016, 769.09, 1129932, 769.33, 774.466, 766.84
08/29/2016, 772.15, 847537, 768.74, 774.99, 766.615
08/26/2016, 769.54, 1164713, 769, 776.0799, 765.85
08/25/2016, 769.41, 926856, 767, 771.89, 763.1846
08/24/2016, 769.64, 1071569, 770.58, 774.5, 767.07
08/23/2016, 772.08, 925356, 775.48, 776.44, 771.785
08/22/2016, 772.15, 950417, 773.27, 774.54, 770.0502
08/19/2016, 775.42, 860899, 775, 777.1, 773.13
08/18/2016, 777.5, 718882, 780.01, 782.86, 777
08/17/2016, 779.91, 921666, 777.32, 780.81, 773.53
08/16/2016, 777.14, 1027836, 780.3, 780.98, 773.444
08/15/2016, 782.44, 938183, 783.75, 787.49, 780.11
08/12/2016, 783.22, 739761, 781.5, 783.395, 780.4
08/11/2016, 784.85, 971742, 785, 789.75, 782.97
08/10/2016, 784.68, 784559, 783.75, 786.8123, 782.778
08/09/2016, 784.26, 1318457, 781.1, 788.94, 780.57
08/08/2016, 781.76, 1106693, 782, 782.63, 778.091
08/05/2016, 782.22, 1799478, 773.78, 783.04, 772.34
08/04/2016, 771.61, 1139972, 772.22, 774.07, 768.795
08/03/2016, 773.18, 1283186, 767.18, 773.21, 766.82
08/02/2016, 771.07, 1782822, 768.69, 775.84, 767.85
08/01/2016, 772.88, 2697699, 761.09, 780.43, 761.09
07/29/2016, 768.79, 3830103, 772.71, 778.55, 766.77
07/28/2016, 745.91, 3473040, 747.04, 748.65, 739.3
07/27/2016, 741.77, 1509133, 738.28, 744.46, 737
07/26/2016, 738.42, 1182993, 739.04, 741.69, 734.27
07/25/2016, 739.77, 1031643, 740.67, 742.61, 737.5
07/22/2016, 742.74, 1256741, 741.86, 743.24, 736.56
07/21/2016, 738.63, 1022229, 740.36, 741.69, 735.831
07/20/2016, 741.19, 1283931, 737.33, 742.13, 737.1
07/19/2016, 736.96, 1225467, 729.89, 736.99, 729
07/18/2016, 733.78, 1284740, 722.71, 736.13, 721.19
07/15/2016, 719.85, 1277514, 725.73, 725.74, 719.055
07/14/2016, 720.95, 949456, 721.58, 722.21, 718.03
07/13/2016, 716.98, 933352, 723.62, 724, 716.85
07/12/2016, 720.64, 1336112, 719.12, 722.94, 715.91
07/11/2016, 715.09, 1107039, 708.05, 716.51, 707.24
07/08/2016, 705.63, 1573909, 699.5, 705.71, 696.435
07/07/2016, 695.36, 1303661, 698.08, 698.2, 688.215
07/06/2016, 697.77, 1411080, 689.98, 701.68, 689.09
07/05/2016, 694.49, 1462879, 696.06, 696.94, 688.88
07/01/2016, 699.21, 1344387, 692.2, 700.65, 692.1301
06/30/2016, 692.1, 1597298, 685.47, 692.32, 683.65
06/29/2016, 684.11, 1931436, 683, 687.4292, 681.41
06/28/2016, 680.04, 2169704, 678.97, 680.33, 673
06/27/2016, 668.26, 2632011, 671, 672.3, 663.284
06/24/2016, 675.22, 4442943, 675.17, 689.4, 673.45
06/23/2016, 701.87, 2166183, 697.45, 701.95, 687
06/22/2016, 697.46, 1182161, 699.06, 700.86, 693.0819
06/21/2016, 695.94, 1464836, 698.4, 702.77, 692.01
06/20/2016, 693.71, 2080645, 698.77, 702.48, 693.41
06/17/2016, 691.72, 3397720, 708.65, 708.82, 688.4515
06/16/2016, 710.36, 1981657, 714.91, 716.65, 703.26
06/15/2016, 718.92, 1213386, 719, 722.98, 717.31
06/14/2016, 718.27, 1303808, 716.48, 722.47, 713.12
06/13/2016, 718.36, 1255199, 716.51, 725.44, 716.51
06/10/2016, 719.41, 1213989, 719.47, 725.89, 716.43
06/09/2016, 728.58, 987635, 722.87, 729.54, 722.3361
06/08/2016, 728.28, 1583325, 723.96, 728.57, 720.58
06/07/2016, 716.65, 1336348, 719.84, 721.98, 716.55
06/06/2016, 716.55, 1565955, 724.91, 724.91, 714.61
06/03/2016, 722.34, 1225924, 729.27, 729.49, 720.56
06/02/2016, 730.4, 1340664, 732.5, 733.02, 724.17
06/01/2016, 734.15, 1251468, 734.53, 737.21, 730.66
05/31/2016, 735.72, 2128358, 731.74, 739.73, 731.26
05/27/2016, 732.66, 1974425, 724.01, 733.936, 724
05/26/2016, 724.12, 1573635, 722.87, 728.33, 720.28
05/25/2016, 725.27, 1629790, 720.76, 727.51, 719.7047
05/24/2016, 720.09, 1926828, 706.86, 720.97, 706.86
05/23/2016, 704.24, 1326386, 706.53, 711.4781, 704.18
05/20/2016, 709.74, 1825830, 701.62, 714.58, 700.52
05/19/2016, 700.32, 1668887, 702.36, 706, 696.8
05/18/2016, 706.63, 1765632, 703.67, 711.6, 700.63
05/17/2016, 706.23, 1999883, 715.99, 721.52, 704.11
05/16/2016, 716.49, 1316719, 709.13, 718.48, 705.65
05/13/2016, 710.83, 1307559, 711.93, 716.6619, 709.26
05/12/2016, 713.31, 1361170, 717.06, 719.25, 709
05/11/2016, 715.29, 1690862, 723.41, 724.48, 712.8
05/10/2016, 723.18, 1568621, 716.75, 723.5, 715.72
05/09/2016, 712.9, 1509892, 712, 718.71, 710
05/06/2016, 711.12, 1828508, 698.38, 711.86, 698.1067
05/05/2016, 701.43, 1680220, 697.7, 702.3199, 695.72
05/04/2016, 695.7, 1692757, 690.49, 699.75, 689.01
05/03/2016, 692.36, 1541297, 696.87, 697.84, 692
05/02/2016, 698.21, 1645013, 697.63, 700.64, 691
04/29/2016, 693.01, 2486584, 690.7, 697.62, 689
04/28/2016, 691.02, 2859790, 708.26, 714.17, 689.55
04/27/2016, 705.84, 3094905, 707.29, 708.98, 692.3651
04/26/2016, 708.14, 2739133, 725.42, 725.766, 703.0264
04/25/2016, 723.15, 1956956, 716.1, 723.93, 715.59
04/22/2016, 718.77, 5949699, 726.3, 736.12, 713.61
04/21/2016, 759.14, 2995094, 755.38, 760.45, 749.55
04/20/2016, 752.67, 1526776, 758, 758.1315, 750.01
04/19/2016, 753.93, 2027962, 769.51, 769.9, 749.33
04/18/2016, 766.61, 1557199, 760.46, 768.05, 757.3
04/15/2016, 759, 1807062, 753.98, 761, 752.6938
04/14/2016, 753.2, 1134056, 754.01, 757.31, 752.705
04/13/2016, 751.72, 1707397, 749.16, 754.38, 744.261
04/12/2016, 743.09, 1349780, 738, 743.83, 731.01
04/11/2016, 736.1, 1218789, 743.02, 745, 736.05
04/08/2016, 739.15, 1289869, 743.97, 745.45, 735.55
04/07/2016, 740.28, 1452369, 745.37, 746.9999, 736.28
04/06/2016, 745.69, 1052171, 735.77, 746.24, 735.56
04/05/2016, 737.8, 1130817, 738, 742.8, 735.37
04/04/2016, 745.29, 1134214, 750.06, 752.8, 742.43
04/01/2016, 749.91, 1576240, 738.6, 750.34, 737
03/31/2016, 744.95, 1718638, 749.25, 750.85, 740.94
03/30/2016, 750.53, 1782278, 750.1, 757.88, 748.74
03/29/2016, 744.77, 1902254, 734.59, 747.25, 728.76
03/28/2016, 733.53, 1300817, 736.79, 738.99, 732.5
03/24/2016, 735.3, 1570474, 732.01, 737.747, 731
03/23/2016, 738.06, 1431130, 742.36, 745.7199, 736.15
03/22/2016, 740.75, 1269263, 737.46, 745, 737.46
03/21/2016, 742.09, 1835963, 736.5, 742.5, 733.5157
03/18/2016, 737.6, 2982194, 741.86, 742, 731.83
03/17/2016, 737.78, 1859562, 736.45, 743.07, 736
03/16/2016, 736.09, 1621412, 726.37, 737.47, 724.51
03/15/2016, 728.33, 1720790, 726.92, 732.29, 724.77
03/14/2016, 730.49, 1717002, 726.81, 735.5, 725.15
03/11/2016, 726.82, 1968164, 720, 726.92, 717.125
03/10/2016, 712.82, 2830630, 708.12, 716.44, 703.36
03/09/2016, 705.24, 1419661, 698.47, 705.68, 694
03/08/2016, 693.97, 2075305, 688.59, 703.79, 685.34
03/07/2016, 695.16, 2986064, 706.9, 708.0912, 686.9
03/04/2016, 710.89, 1971379, 714.99, 716.49, 706.02
03/03/2016, 712.42, 1956958, 718.68, 719.45, 706.02
03/02/2016, 718.85, 1629501, 719, 720, 712
03/01/2016, 718.81, 2148608, 703.62, 718.81, 699.77
02/29/2016, 697.77, 2478214, 700.32, 710.89, 697.68
02/26/2016, 705.07, 2241785, 708.58, 713.43, 700.86
02/25/2016, 705.75, 1640430, 700.01, 705.98, 690.585
02/24/2016, 699.56, 1961258, 688.92, 700, 680.78
02/23/2016, 695.85, 2006572, 701.45, 708.4, 693.58
02/22/2016, 706.46, 1949046, 707.45, 713.24, 702.51
02/19/2016, 700.91, 1585152, 695.03, 703.0805, 694.05
02/18/2016, 697.35, 1880306, 710, 712.35, 696.03
02/17/2016, 708.4, 2490021, 699, 709.75, 691.38
02/16/2016, 691, 2517324, 692.98, 698, 685.05
02/12/2016, 682.4, 2138937, 690.26, 693.75, 678.6
02/11/2016, 683.11, 3021587, 675, 689.35, 668.8675
02/10/2016, 684.12, 2629130, 686.86, 701.31, 682.13
02/09/2016, 678.11, 3605792, 672.32, 699.9, 668.77
02/08/2016, 682.74, 4241416, 667.85, 684.03, 663.06
02/05/2016, 683.57, 5098357, 703.87, 703.99, 680.15
02/04/2016, 708.01, 5157988, 722.81, 727, 701.86
02/03/2016, 726.95, 6166731, 770.22, 774.5, 720.5
02/02/2016, 764.65, 6340548, 784.5, 789.8699, 764.65
02/01/2016, 752, 5065235, 750.46, 757.86, 743.27
01/29/2016, 742.95, 3464432, 731.53, 744.9899, 726.8
01/28/2016, 730.96, 2664956, 722.22, 733.69, 712.35
01/27/2016, 699.99, 2175913, 713.67, 718.235, 694.39
01/26/2016, 713.04, 1329141, 713.85, 718.28, 706.48
01/25/2016, 711.67, 1709777, 723.58, 729.68, 710.01
01/22/2016, 725.25, 2009951, 723.6, 728.13, 720.121
01/21/2016, 706.59, 2411079, 702.18, 719.19, 694.46
01/20/2016, 698.45, 3441642, 688.61, 706.85, 673.26
01/19/2016, 701.79, 2264747, 703.3, 709.98, 693.4101
01/15/2016, 694.45, 3604137, 692.29, 706.74, 685.37
01/14/2016, 714.72, 2225495, 705.38, 721.925, 689.1
01/13/2016, 700.56, 2497086, 730.85, 734.74, 698.61
01/12/2016, 726.07, 2010026, 721.68, 728.75, 717.3165
01/11/2016, 716.03, 2089495, 716.61, 718.855, 703.54
01/08/2016, 714.47, 2449420, 731.45, 733.23, 713
01/07/2016, 726.39, 2960578, 730.31, 738.5, 719.06
01/06/2016, 743.62, 1943685, 730, 747.18, 728.92
01/05/2016, 742.58, 1949386, 746.45, 752, 738.64
01/04/2016, 741.84, 3271348, 743, 744.06, 731.2577
12/31/2015, 758.88, 1500129, 769.5, 769.5, 758.34
12/30/2015, 771, 1293514, 776.6, 777.6, 766.9
12/29/2015, 776.6, 1764044, 766.69, 779.98, 766.43
12/28/2015, 762.51, 1515574, 752.92, 762.99, 749.52
12/24/2015, 748.4, 527223, 749.55, 751.35, 746.62
12/23/2015, 750.31, 1566723, 753.47, 754.21, 744
12/22/2015, 750, 1365420, 751.65, 754.85, 745.53
12/21/2015, 747.77, 1524535, 746.13, 750, 740
12/18/2015, 739.31, 3140906, 746.51, 754.13, 738.15
12/17/2015, 749.43, 1551087, 762.42, 762.68, 749
12/16/2015, 758.09, 1986319, 750, 760.59, 739.435
12/15/2015, 743.4, 2661199, 753, 758.08, 743.01
12/14/2015, 747.77, 2417778, 741.79, 748.73, 724.17
12/11/2015, 738.87, 2223284, 741.16, 745.71, 736.75
12/10/2015, 749.46, 1988035, 752.85, 755.85, 743.83
12/09/2015, 751.61, 2697978, 759.17, 764.23, 737.001
12/08/2015, 762.37, 1829004, 757.89, 764.8, 754.2
12/07/2015, 763.25, 1811336, 767.77, 768.73, 755.09
12/04/2015, 766.81, 2756194, 753.1, 768.49, 750
12/03/2015, 752.54, 2589641, 766.01, 768.995, 745.63
12/02/2015, 762.38, 2196721, 768.9, 775.955, 758.96
12/01/2015, 767.04, 2131827, 747.11, 768.95, 746.7
11/30/2015, 742.6, 2045584, 748.81, 754.93, 741.27
11/27/2015, 750.26, 838528, 748.46, 753.41, 747.49
11/25/2015, 748.15, 1122224, 748.14, 752, 746.06
11/24/2015, 748.28, 2333700, 752, 755.279, 737.63
11/23/2015, 755.98, 1414640, 757.45, 762.7075, 751.82
11/20/2015, 756.6, 2212934, 746.53, 757.92, 743
11/19/2015, 738.41, 1327265, 738.74, 742, 737.43
11/18/2015, 740, 1683978, 727.58, 741.41, 727
11/17/2015, 725.3, 1507449, 729.29, 731.845, 723.027
11/16/2015, 728.96, 1904395, 715.6, 729.49, 711.33
11/13/2015, 717, 2072392, 729.17, 731.15, 716.73
11/12/2015, 731.23, 1836567, 731, 737.8, 728.645
11/11/2015, 735.4, 1366611, 732.46, 741, 730.23
11/10/2015, 728.32, 1606499, 724.4, 730.59, 718.5001
11/09/2015, 724.89, 2068920, 730.2, 734.71, 719.43
11/06/2015, 733.76, 1510586, 731.5, 735.41, 727.01
11/05/2015, 731.25, 1861100, 729.47, 739.48, 729.47
11/04/2015, 728.11, 1705745, 722, 733.1, 721.9
11/03/2015, 722.16, 1565355, 718.86, 724.65, 714.72
11/02/2015, 721.11, 1885155, 711.06, 721.62, 705.85
10/30/2015, 710.81, 1907732, 715.73, 718, 710.05
10/29/2015, 716.92, 1455508, 710.5, 718.26, 710.01
10/28/2015, 712.95, 2178841, 707.33, 712.98, 703.08
10/27/2015, 708.49, 2232183, 707.38, 713.62, 704.55
10/26/2015, 712.78, 2709292, 701.55, 719.15, 701.26
10/23/2015, 702, 6651909, 727.5, 730, 701.5
10/22/2015, 651.79, 3994360, 646.7, 657.8, 644.01
10/21/2015, 642.61, 1792869, 654.15, 655.87, 641.73
10/20/2015, 650.28, 2498077, 664.04, 664.7197, 644.195
10/19/2015, 666.1, 1465691, 661.18, 666.82, 659.58
10/16/2015, 662.2, 1610712, 664.11, 664.97, 657.2
10/15/2015, 661.74, 1832832, 654.66, 663.13, 654.46
10/14/2015, 651.16, 1413798, 653.21, 659.39, 648.85
10/13/2015, 652.3, 1806003, 643.15, 657.8125, 643.15
10/12/2015, 646.67, 1275565, 642.09, 648.5, 639.01
10/09/2015, 643.61, 1648656, 640, 645.99, 635.318
10/08/2015, 639.16, 2181990, 641.36, 644.45, 625.56
10/07/2015, 642.36, 2092536, 649.24, 650.609, 632.15
10/06/2015, 645.44, 2235078, 638.84, 649.25, 636.5295
10/05/2015, 641.47, 1802263, 632, 643.01, 627
10/02/2015, 626.91, 2681241, 607.2, 627.34, 603.13
10/01/2015, 611.29, 1866223, 608.37, 612.09, 599.85
09/30/2015, 608.42, 2412754, 603.28, 608.76, 600.73
09/29/2015, 594.97, 2310065, 597.28, 605, 590.22
09/28/2015, 594.89, 3118693, 610.34, 614.605, 589.38
09/25/2015, 611.97, 2173134, 629.77, 629.77, 611
09/24/2015, 625.8, 2238097, 616.64, 627.32, 612.4
09/23/2015, 622.36, 1470633, 622.05, 628.93, 620
09/22/2015, 622.69, 2561551, 627, 627.55, 615.43
09/21/2015, 635.44, 1786543, 634.4, 636.49, 625.94
09/18/2015, 629.25, 5123314, 636.79, 640, 627.02
09/17/2015, 642.9, 2259404, 637.79, 650.9, 635.02
09/16/2015, 635.98, 1276250, 635.47, 637.95, 632.32
09/15/2015, 635.14, 2082426, 626.7, 638.7, 623.78
09/14/2015, 623.24, 1701618, 625.7, 625.86, 619.43
09/11/2015, 625.77, 1372803, 619.75, 625.78, 617.42
09/10/2015, 621.35, 1903334, 613.1, 624.16, 611.43
09/09/2015, 612.72, 1699686, 621.22, 626.52, 609.6
09/08/2015, 614.66, 2277487, 612.49, 616.31, 604.12
09/04/2015, 600.7, 2087028, 600, 603.47, 595.25
09/03/2015, 606.25, 1757851, 617, 619.71, 602.8213
09/02/2015, 614.34, 2573982, 605.59, 614.34, 599.71
09/01/2015, 597.79, 3699844, 602.36, 612.86, 594.1
08/31/2015, 618.25, 2172168, 627.54, 635.8, 617.68
08/28/2015, 630.38, 1975818, 632.82, 636.88, 624.56
08/27/2015, 637.61, 3485906, 639.4, 643.59, 622
08/26/2015, 628.62, 4187276, 610.35, 631.71, 599.05
08/25/2015, 582.06, 3521916, 614.91, 617.45, 581.11
08/24/2015, 589.61, 5727282, 573, 614, 565.05
08/21/2015, 612.48, 4261666, 639.78, 640.05, 612.33
08/20/2015, 646.83, 2854028, 655.46, 662.99, 642.9
08/19/2015, 660.9, 2132265, 656.6, 667, 654.19
08/18/2015, 656.13, 1455664, 661.9, 664, 653.46
08/17/2015, 660.87, 1050553, 656.8, 661.38, 651.24
08/14/2015, 657.12, 1071333, 655.01, 659.855, 652.66
08/13/2015, 656.45, 1807182, 659.323, 664.5, 651.661
08/12/2015, 659.56, 2938651, 663.08, 665, 652.29
08/11/2015, 660.78, 5016425, 669.2, 674.9, 654.27
08/10/2015, 633.73, 1653836, 639.48, 643.44, 631.249
08/07/2015, 635.3, 1403441, 640.23, 642.68, 629.71
08/06/2015, 642.68, 1572150, 645, 645.379, 632.25
08/05/2015, 643.78, 2331720, 634.33, 647.86, 633.16
08/04/2015, 629.25, 1486858, 628.42, 634.81, 627.16
08/03/2015, 631.21, 1301439, 625.34, 633.0556, 625.34
07/31/2015, 625.61, 1705286, 631.38, 632.91, 625.5
07/30/2015, 632.59, 1472286, 630, 635.22, 622.05
07/29/2015, 631.93, 1573146, 628.8, 633.36, 622.65
07/28/2015, 628, 1713684, 632.83, 632.83, 623.31
07/27/2015, 627.26, 2673801, 621, 634.3, 620.5
07/24/2015, 623.56, 3622089, 647, 648.17, 622.52
07/23/2015, 644.28, 3014035, 661.27, 663.63, 641
07/22/2015, 662.1, 3707818, 660.89, 678.64, 659
07/21/2015, 662.3, 3363342, 655.21, 673, 654.3
07/20/2015, 663.02, 5857092, 659.24, 668.88, 653.01
07/17/2015, 672.93, 11153500, 649, 674.468, 645
07/16/2015, 579.85, 4559712, 565.12, 580.68, 565
07/15/2015, 560.22, 1782264, 560.13, 566.5029, 556.79
07/14/2015, 561.1, 3231284, 546.76, 565.8487, 546.71
07/13/2015, 546.55, 2204610, 532.88, 547.11, 532.4001
07/10/2015, 530.13, 1954951, 526.29, 532.56, 525.55
07/09/2015, 520.68, 1840155, 523.12, 523.77, 520.35
07/08/2015, 516.83, 1293372, 521.05, 522.734, 516.11
07/07/2015, 525.02, 1595672, 523.13, 526.18, 515.18
07/06/2015, 522.86, 1278587, 519.5, 525.25, 519
07/02/2015, 523.4, 1235773, 521.08, 524.65, 521.08
07/01/2015, 521.84, 1961197, 524.73, 525.69, 518.2305
06/30/2015, 520.51, 2234284, 526.02, 526.25, 520.5
06/29/2015, 521.52, 1935361, 525.01, 528.61, 520.54
06/26/2015, 531.69, 2108629, 537.26, 537.76, 531.35
06/25/2015, 535.23, 1332412, 538.87, 540.9, 535.23
06/24/2015, 537.84, 1286576, 540, 540, 535.66
06/23/2015, 540.48, 1196115, 539.64, 541.499, 535.25
06/22/2015, 538.19, 1243535, 539.59, 543.74, 537.53
06/19/2015, 536.69, 1890916, 537.21, 538.25, 533.01
06/18/2015, 536.73, 1832450, 531, 538.15, 530.79
06/17/2015, 529.26, 1269113, 529.37, 530.98, 525.1
06/16/2015, 528.15, 1071728, 528.4, 529.6399, 525.56
06/15/2015, 527.2, 1632675, 528, 528.3, 524
06/12/2015, 532.33, 955489, 531.6, 533.12, 530.16
06/11/2015, 534.61, 1208632, 538.425, 538.98, 533.02
06/10/2015, 536.69, 1813775, 529.36, 538.36, 529.35
06/09/2015, 526.69, 1454172, 527.56, 529.2, 523.01
06/08/2015, 526.83, 1523960, 533.31, 534.12, 526.24
06/05/2015, 533.33, 1375008, 536.35, 537.2, 532.52
06/04/2015, 536.7, 1346044, 537.76, 540.59, 534.32
06/03/2015, 540.31, 1716836, 539.91, 543.5, 537.11
06/02/2015, 539.18, 1936721, 532.93, 543, 531.33
06/01/2015, 533.99, 1900257, 536.79, 536.79, 529.76
05/29/2015, 532.11, 2590445, 537.37, 538.63, 531.45
05/28/2015, 539.78, 1029764, 538.01, 540.61, 536.25
05/27/2015, 539.79, 1524783, 532.8, 540.55, 531.71
05/26/2015, 532.32, 2404462, 538.12, 539, 529.88
05/22/2015, 540.11, 1175065, 540.15, 544.19, 539.51
05/21/2015, 542.51, 1461431, 537.95, 543.8399, 535.98
05/20/2015, 539.27, 1430565, 538.49, 542.92, 532.972
05/19/2015, 537.36, 1964037, 533.98, 540.66, 533.04
05/18/2015, 532.3, 2001117, 532.01, 534.82, 528.85
05/15/2015, 533.85, 1965088, 539.18, 539.2743, 530.38
05/14/2015, 538.4, 1401005, 533.77, 539, 532.41
05/13/2015, 529.62, 1253005, 530.56, 534.3215, 528.655
05/12/2015, 529.04, 1633180, 531.6, 533.2089, 525.26
05/11/2015, 535.7, 904465, 538.37, 541.98, 535.4
05/08/2015, 538.22, 1527181, 536.65, 541.15, 536
05/07/2015, 530.7, 1543986, 523.99, 533.46, 521.75
05/06/2015, 524.22, 1566865, 531.24, 532.38, 521.085
05/05/2015, 530.8, 1380519, 538.21, 539.74, 530.3906
05/04/2015, 540.78, 1303830, 538.53, 544.07, 535.06
05/01/2015, 537.9, 1758085, 538.43, 539.54, 532.1
04/30/2015, 537.34, 2080834, 547.87, 548.59, 535.05
04/29/2015, 549.08, 1696886, 550.47, 553.68, 546.905
04/28/2015, 553.68, 1490735, 554.64, 556.02, 550.366
04/27/2015, 555.37, 2390696, 563.39, 565.95, 553.2001
================================================
FILE: machine_learning/mfcc.py
================================================
"""
Mel Frequency Cepstral Coefficients (MFCC) Calculation
MFCC is an algorithm widely used in audio and speech processing to represent the
short-term power spectrum of a sound signal in a more compact and
discriminative way. It is particularly popular in speech and audio processing
tasks such as speech recognition and speaker identification.
How Mel Frequency Cepstral Coefficients are Calculated:
1. Preprocessing:
- Load an audio signal and normalize it to ensure that the values fall
within a specific range (e.g., between -1 and 1).
- Frame the audio signal into overlapping, fixed-length segments, typically
using a technique like windowing to reduce spectral leakage.
2. Fourier Transform:
- Apply a Fast Fourier Transform (FFT) to each audio frame to convert it
from the time domain to the frequency domain. This results in a
representation of the audio frame as a sequence of frequency components.
3. Power Spectrum:
- Calculate the power spectrum by taking the squared magnitude of each
frequency component obtained from the FFT. This step measures the energy
distribution across different frequency bands.
4. Mel Filterbank:
- Apply a set of triangular filterbanks spaced in the Mel frequency scale
to the power spectrum. These filters mimic the human auditory system's
frequency response. Each filterbank sums the power spectrum values within
its band.
5. Logarithmic Compression:
- Take the logarithm (typically base 10) of the filterbank values to
compress the dynamic range. This step mimics the logarithmic response of
the human ear to sound intensity.
6. Discrete Cosine Transform (DCT):
- Apply the Discrete Cosine Transform to the log filterbank energies to
obtain the MFCC coefficients. This transformation helps decorrelate the
filterbank energies and captures the most important features of the audio
signal.
7. Feature Extraction:
- Select a subset of the DCT coefficients to form the feature vector.
Often, the first few coefficients (e.g., 12-13) are used for most
applications.
References:
- Mel-Frequency Cepstral Coefficients (MFCCs):
https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
- Speech and Language Processing by Daniel Jurafsky & James H. Martin:
https://web.stanford.edu/~jurafsky/slp3/
- Mel Frequency Cepstral Coefficient (MFCC) tutorial
http://practicalcryptography.com/miscellaneous/machine-learning
/guide-mel-frequency-cepstral-coefficients-mfccs/
Author: Amir Lavasani
"""
import logging
import numpy as np
import scipy.fftpack as fft
from scipy.signal import get_window
logging.basicConfig(filename=f"{__file__}.log", level=logging.INFO)
def mfcc(
audio: np.ndarray,
sample_rate: int,
ftt_size: int = 1024,
hop_length: int = 20,
mel_filter_num: int = 10,
dct_filter_num: int = 40,
) -> np.ndarray:
"""
Calculate Mel Frequency Cepstral Coefficients (MFCCs) from an audio signal.
Args:
audio: The input audio signal.
sample_rate: The sample rate of the audio signal (in Hz).
ftt_size: The size of the FFT window (default is 1024).
hop_length: The hop length for frame creation (default is 20ms).
mel_filter_num: The number of Mel filters (default is 10).
dct_filter_num: The number of DCT filters (default is 40).
Returns:
A matrix of MFCCs for the input audio.
Raises:
ValueError: If the input audio is empty.
Example:
>>> sample_rate = 44100 # Sample rate of 44.1 kHz
>>> duration = 2.0 # Duration of 1 second
>>> t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
>>> audio = 0.5 * np.sin(2 * np.pi * 440.0 * t) # Generate a 440 Hz sine wave
>>> mfccs = mfcc(audio, sample_rate)
>>> mfccs.shape
(40, 101)
"""
logging.info(f"Sample rate: {sample_rate}Hz")
logging.info(f"Audio duration: {len(audio) / sample_rate}s")
logging.info(f"Audio min: {np.min(audio)}")
logging.info(f"Audio max: {np.max(audio)}")
# normalize audio
audio_normalized = normalize(audio)
logging.info(f"Normalized audio min: {np.min(audio_normalized)}")
logging.info(f"Normalized audio max: {np.max(audio_normalized)}")
# frame audio into
audio_framed = audio_frames(
audio_normalized, sample_rate, ftt_size=ftt_size, hop_length=hop_length
)
logging.info(f"Framed audio shape: {audio_framed.shape}")
logging.info(f"First frame: {audio_framed[0]}")
# convert to frequency domain
# For simplicity we will choose the Hanning window.
window = get_window("hann", ftt_size, fftbins=True)
audio_windowed = audio_framed * window
logging.info(f"Windowed audio shape: {audio_windowed.shape}")
logging.info(f"First frame: {audio_windowed[0]}")
audio_fft = calculate_fft(audio_windowed, ftt_size)
logging.info(f"fft audio shape: {audio_fft.shape}")
logging.info(f"First frame: {audio_fft[0]}")
audio_power = calculate_signal_power(audio_fft)
logging.info(f"power audio shape: {audio_power.shape}")
logging.info(f"First frame: {audio_power[0]}")
filters = mel_spaced_filterbank(sample_rate, mel_filter_num, ftt_size)
logging.info(f"filters shape: {filters.shape}")
audio_filtered = np.dot(filters, np.transpose(audio_power))
audio_log = 10.0 * np.log10(audio_filtered)
logging.info(f"audio_log shape: {audio_log.shape}")
dct_filters = discrete_cosine_transform(dct_filter_num, mel_filter_num)
cepstral_coefficents = np.dot(dct_filters, audio_log)
logging.info(f"cepstral_coefficents shape: {cepstral_coefficents.shape}")
return cepstral_coefficents
def normalize(audio: np.ndarray) -> np.ndarray:
"""
Normalize an audio signal by scaling it to have values between -1 and 1.
Args:
audio: The input audio signal.
Returns:
The normalized audio signal.
Examples:
>>> audio = np.array([1, 2, 3, 4, 5])
>>> normalized_audio = normalize(audio)
>>> float(np.max(normalized_audio))
1.0
>>> float(np.min(normalized_audio))
0.2
"""
# Divide the entire audio signal by the maximum absolute value
return audio / np.max(np.abs(audio))
def audio_frames(
audio: np.ndarray,
sample_rate: int,
hop_length: int = 20,
ftt_size: int = 1024,
) -> np.ndarray:
"""
Split an audio signal into overlapping frames.
Args:
audio: The input audio signal.
sample_rate: The sample rate of the audio signal.
hop_length: The length of the hopping (default is 20ms).
ftt_size: The size of the FFT window (default is 1024).
Returns:
An array of overlapping frames.
Examples:
>>> audio = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]*1000)
>>> sample_rate = 8000
>>> frames = audio_frames(audio, sample_rate, hop_length=10, ftt_size=512)
>>> frames.shape
(126, 512)
"""
hop_size = np.round(sample_rate * hop_length / 1000).astype(int)
# Pad the audio signal to handle edge cases
audio = np.pad(audio, int(ftt_size / 2), mode="reflect")
# Calculate the number of frames
frame_count = int((len(audio) - ftt_size) / hop_size) + 1
# Initialize an array to store the frames
frames = np.zeros((frame_count, ftt_size))
# Split the audio signal into frames
for n in range(frame_count):
frames[n] = audio[n * hop_size : n * hop_size + ftt_size]
return frames
def calculate_fft(audio_windowed: np.ndarray, ftt_size: int = 1024) -> np.ndarray:
"""
Calculate the Fast Fourier Transform (FFT) of windowed audio data.
Args:
audio_windowed: The windowed audio signal.
ftt_size: The size of the FFT (default is 1024).
Returns:
The FFT of the audio data.
Examples:
>>> audio_windowed = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
>>> audio_fft = calculate_fft(audio_windowed, ftt_size=4)
>>> bool(np.allclose(audio_fft[0], np.array([6.0+0.j, -1.5+0.8660254j,
... -1.5-0.8660254j])))
True
"""
# Transpose the audio data to have time in rows and channels in columns
audio_transposed = np.transpose(audio_windowed)
# Initialize an array to store the FFT results
audio_fft = np.empty(
(int(1 + ftt_size // 2), audio_transposed.shape[1]),
dtype=np.complex64,
order="F",
)
# Compute FFT for each channel
for n in range(audio_fft.shape[1]):
audio_fft[:, n] = fft.fft(audio_transposed[:, n], axis=0)[: audio_fft.shape[0]]
# Transpose the FFT results back to the original shape
return np.transpose(audio_fft)
def calculate_signal_power(audio_fft: np.ndarray) -> np.ndarray:
"""
Calculate the power of the audio signal from its FFT.
Args:
audio_fft: The FFT of the audio signal.
Returns:
The power of the audio signal.
Examples:
>>> audio_fft = np.array([1+2j, 2+3j, 3+4j, 4+5j])
>>> power = calculate_signal_power(audio_fft)
>>> np.allclose(power, np.array([5, 13, 25, 41]))
True
"""
# Calculate the power by squaring the absolute values of the FFT coefficients
return np.square(np.abs(audio_fft))
def freq_to_mel(freq: float) -> float:
"""
Convert a frequency in Hertz to the mel scale.
Args:
freq: The frequency in Hertz.
Returns:
The frequency in mel scale.
Examples:
>>> float(round(freq_to_mel(1000), 2))
999.99
"""
# Use the formula to convert frequency to the mel scale
return 2595.0 * np.log10(1.0 + freq / 700.0)
def mel_to_freq(mels: float) -> float:
"""
Convert a frequency in the mel scale to Hertz.
Args:
mels: The frequency in mel scale.
Returns:
The frequency in Hertz.
Examples:
>>> round(mel_to_freq(999.99), 2)
1000.01
"""
# Use the formula to convert mel scale to frequency
return 700.0 * (10.0 ** (mels / 2595.0) - 1.0)
def mel_spaced_filterbank(
sample_rate: int, mel_filter_num: int = 10, ftt_size: int = 1024
) -> np.ndarray:
"""
Create a Mel-spaced filter bank for audio processing.
Args:
sample_rate: The sample rate of the audio.
mel_filter_num: The number of mel filters (default is 10).
ftt_size: The size of the FFT (default is 1024).
Returns:
Mel-spaced filter bank.
Examples:
>>> float(round(mel_spaced_filterbank(8000, 10, 1024)[0][1], 10))
0.0004603981
"""
freq_min = 0
freq_high = sample_rate // 2
logging.info(f"Minimum frequency: {freq_min}")
logging.info(f"Maximum frequency: {freq_high}")
# Calculate filter points and mel frequencies
filter_points, mel_freqs = get_filter_points(
sample_rate,
freq_min,
freq_high,
mel_filter_num,
ftt_size,
)
filters = get_filters(filter_points, ftt_size)
# normalize filters
# taken from the librosa library
enorm = 2.0 / (mel_freqs[2 : mel_filter_num + 2] - mel_freqs[:mel_filter_num])
return filters * enorm[:, np.newaxis]
def get_filters(filter_points: np.ndarray, ftt_size: int) -> np.ndarray:
"""
Generate filters for audio processing.
Args:
filter_points: A list of filter points.
ftt_size: The size of the FFT.
Returns:
A matrix of filters.
Examples:
>>> get_filters(np.array([0, 20, 51, 95, 161, 256], dtype=int), 512).shape
(4, 257)
"""
num_filters = len(filter_points) - 2
filters = np.zeros((num_filters, int(ftt_size / 2) + 1))
for n in range(num_filters):
start = filter_points[n]
mid = filter_points[n + 1]
end = filter_points[n + 2]
# Linearly increase values from 0 to 1
filters[n, start:mid] = np.linspace(0, 1, mid - start)
# Linearly decrease values from 1 to 0
filters[n, mid:end] = np.linspace(1, 0, end - mid)
return filters
def get_filter_points(
sample_rate: int,
freq_min: int,
freq_high: int,
mel_filter_num: int = 10,
ftt_size: int = 1024,
) -> tuple[np.ndarray, np.ndarray]:
"""
Calculate the filter points and frequencies for mel frequency filters.
Args:
sample_rate: The sample rate of the audio.
freq_min: The minimum frequency in Hertz.
freq_high: The maximum frequency in Hertz.
mel_filter_num: The number of mel filters (default is 10).
ftt_size: The size of the FFT (default is 1024).
Returns:
Filter points and corresponding frequencies.
Examples:
>>> filter_points = get_filter_points(8000, 0, 4000, mel_filter_num=4, ftt_size=512)
>>> filter_points[0]
array([ 0, 20, 51, 95, 161, 256])
>>> filter_points[1]
array([ 0. , 324.46707094, 799.33254207, 1494.30973963,
2511.42581671, 4000. ])
"""
# Convert minimum and maximum frequencies to mel scale
fmin_mel = freq_to_mel(freq_min)
fmax_mel = freq_to_mel(freq_high)
logging.info(f"MEL min: {fmin_mel}")
logging.info(f"MEL max: {fmax_mel}")
# Generate equally spaced mel frequencies
mels = np.linspace(fmin_mel, fmax_mel, num=mel_filter_num + 2)
# Convert mel frequencies back to Hertz
freqs = mel_to_freq(mels)
# Calculate filter points as integer values
filter_points = np.floor((ftt_size + 1) / sample_rate * freqs).astype(int)
return filter_points, freqs
def discrete_cosine_transform(dct_filter_num: int, filter_num: int) -> np.ndarray:
"""
Compute the Discrete Cosine Transform (DCT) basis matrix.
Args:
dct_filter_num: The number of DCT filters to generate.
filter_num: The number of the fbank filters.
Returns:
The DCT basis matrix.
Examples:
>>> float(round(discrete_cosine_transform(3, 5)[0][0], 5))
0.44721
"""
basis = np.empty((dct_filter_num, filter_num))
basis[0, :] = 1.0 / np.sqrt(filter_num)
samples = np.arange(1, 2 * filter_num, 2) * np.pi / (2.0 * filter_num)
for i in range(1, dct_filter_num):
basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / filter_num)
return basis
def example(wav_file_path: str = "./path-to-file/sample.wav") -> np.ndarray:
"""
Example function to calculate Mel Frequency Cepstral Coefficients
(MFCCs) from an audio file.
Args:
wav_file_path: The path to the WAV audio file.
Returns:
np.ndarray: The computed MFCCs for the audio.
"""
from scipy.io import wavfile
# Load the audio from the WAV file
sample_rate, audio = wavfile.read(wav_file_path)
# Calculate MFCCs
return mfcc(audio, sample_rate)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/multilayer_perceptron_classifier.py
================================================
from sklearn.neural_network import MLPClassifier
X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
y = [0, 1, 0, 0]
clf = MLPClassifier(
solver="lbfgs", alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1
)
clf.fit(X, y)
test = [[0.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
Y = clf.predict(test)
def wrapper(y):
"""
>>> [int(x) for x in wrapper(Y)]
[0, 0, 1]
"""
return list(y)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/polynomial_regression.py
================================================
"""
Polynomial regression is a type of regression analysis that models the relationship
between a predictor x and the response y as an mth-degree polynomial:
y = β₀ + β₁x + β₂x² + ... + βₘxᵐ + ε
By treating x, x², ..., xᵐ as distinct variables, we see that polynomial regression is a
special case of multiple linear regression. Therefore, we can use ordinary least squares
(OLS) estimation to estimate the vector of model parameters β = (β₀, β₁, β₂, ..., βₘ)
for polynomial regression:
β = (XᵀX)⁻¹Xᵀy = X⁺y
where X is the design matrix, y is the response vector, and X⁺ denotes the Moore-Penrose
pseudoinverse of X. In the case of polynomial regression, the design matrix is
|1 x₁ x₁² ⋯ x₁ᵐ|
X = |1 x₂ x₂² ⋯ x₂ᵐ|
|⋮ ⋮ ⋮ ⋱ ⋮ |
|1 xₙ xₙ² ⋯ xₙᵐ|
In OLS estimation, inverting XᵀX to compute X⁺ can be very numerically unstable. This
implementation sidesteps this need to invert XᵀX by computing X⁺ using singular value
decomposition (SVD):
β = VΣ⁺Uᵀy
where UΣVᵀ is an SVD of X.
References:
- https://en.wikipedia.org/wiki/Polynomial_regression
- https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse
- https://en.wikipedia.org/wiki/Numerical_methods_for_linear_least_squares
- https://en.wikipedia.org/wiki/Singular_value_decomposition
"""
import matplotlib.pyplot as plt
import numpy as np
class PolynomialRegression:
__slots__ = "degree", "params"
def __init__(self, degree: int) -> None:
"""
@raises ValueError: if the polynomial degree is negative
"""
if degree < 0:
raise ValueError("Polynomial degree must be non-negative")
self.degree = degree
self.params = None
@staticmethod
def _design_matrix(data: np.ndarray, degree: int) -> np.ndarray:
"""
Constructs a polynomial regression design matrix for the given input data. For
input data x = (x₁, x₂, ..., xₙ) and polynomial degree m, the design matrix is
the Vandermonde matrix
|1 x₁ x₁² ⋯ x₁ᵐ|
X = |1 x₂ x₂² ⋯ x₂ᵐ|
|⋮ ⋮ ⋮ ⋱ ⋮ |
|1 xₙ xₙ² ⋯ xₙᵐ|
Reference: https://en.wikipedia.org/wiki/Vandermonde_matrix
@param data: the input predictor values x, either for model fitting or for
prediction
@param degree: the polynomial degree m
@returns: the Vandermonde matrix X (see above)
@raises ValueError: if input data is not N x 1
>>> x = np.array([0, 1, 2])
>>> PolynomialRegression._design_matrix(x, degree=0)
array([[1],
[1],
[1]])
>>> PolynomialRegression._design_matrix(x, degree=1)
array([[1, 0],
[1, 1],
[1, 2]])
>>> PolynomialRegression._design_matrix(x, degree=2)
array([[1, 0, 0],
[1, 1, 1],
[1, 2, 4]])
>>> PolynomialRegression._design_matrix(x, degree=3)
array([[1, 0, 0, 0],
[1, 1, 1, 1],
[1, 2, 4, 8]])
>>> PolynomialRegression._design_matrix(np.array([[0, 0], [0 , 0]]), degree=3)
Traceback (most recent call last):
...
ValueError: Data must have dimensions N x 1
"""
_rows, *remaining = data.shape
if remaining:
raise ValueError("Data must have dimensions N x 1")
return np.vander(data, N=degree + 1, increasing=True)
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
"""
Computes the polynomial regression model parameters using ordinary least squares
(OLS) estimation:
β = (XᵀX)⁻¹Xᵀy = X⁺y
where X⁺ denotes the Moore-Penrose pseudoinverse of the design matrix X. This
function computes X⁺ using singular value decomposition (SVD).
References:
- https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse
- https://en.wikipedia.org/wiki/Singular_value_decomposition
- https://en.wikipedia.org/wiki/Multicollinearity
@param x_train: the predictor values x for model fitting
@param y_train: the response values y for model fitting
@raises ArithmeticError: if X isn't full rank, then XᵀX is singular and β
doesn't exist
>>> x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
>>> y = x**3 - 2 * x**2 + 3 * x - 5
>>> poly_reg = PolynomialRegression(degree=3)
>>> poly_reg.fit(x, y)
>>> poly_reg.params
array([-5., 3., -2., 1.])
>>> poly_reg = PolynomialRegression(degree=20)
>>> poly_reg.fit(x, y)
Traceback (most recent call last):
...
ArithmeticError: Design matrix is not full rank, can't compute coefficients
Make sure errors don't grow too large:
>>> coefs = np.array([-250, 50, -2, 36, 20, -12, 10, 2, -1, -15, 1])
>>> y = PolynomialRegression._design_matrix(x, len(coefs) - 1) @ coefs
>>> poly_reg = PolynomialRegression(degree=len(coefs) - 1)
>>> poly_reg.fit(x, y)
>>> np.allclose(poly_reg.params, coefs, atol=10e-3)
True
"""
X = PolynomialRegression._design_matrix(x_train, self.degree) # noqa: N806
_, cols = X.shape
if np.linalg.matrix_rank(X) < cols:
raise ArithmeticError(
"Design matrix is not full rank, can't compute coefficients"
)
# np.linalg.pinv() computes the Moore-Penrose pseudoinverse using SVD
self.params = np.linalg.pinv(X) @ y_train
def predict(self, data: np.ndarray) -> np.ndarray:
"""
Computes the predicted response values y for the given input data by
constructing the design matrix X and evaluating y = Xβ.
@param data: the predictor values x for prediction
@returns: the predicted response values y = Xβ
@raises ArithmeticError: if this function is called before the model
parameters are fit
>>> x = np.array([0, 1, 2, 3, 4])
>>> y = x**3 - 2 * x**2 + 3 * x - 5
>>> poly_reg = PolynomialRegression(degree=3)
>>> poly_reg.fit(x, y)
>>> poly_reg.predict(np.array([-1]))
array([-11.])
>>> poly_reg.predict(np.array([-2]))
array([-27.])
>>> poly_reg.predict(np.array([6]))
array([157.])
>>> PolynomialRegression(degree=3).predict(x)
Traceback (most recent call last):
...
ArithmeticError: Predictor hasn't been fit yet
"""
if self.params is None:
raise ArithmeticError("Predictor hasn't been fit yet")
return PolynomialRegression._design_matrix(data, self.degree) @ self.params
def main() -> None:
"""
Fit a polynomial regression model to predict fuel efficiency using seaborn's mpg
dataset
>>> pass # Placeholder, function is only for demo purposes
"""
import seaborn as sns
mpg_data = sns.load_dataset("mpg")
poly_reg = PolynomialRegression(degree=2)
poly_reg.fit(mpg_data.weight, mpg_data.mpg)
weight_sorted = np.sort(mpg_data.weight)
predictions = poly_reg.predict(weight_sorted)
plt.scatter(mpg_data.weight, mpg_data.mpg, color="gray", alpha=0.5)
plt.plot(weight_sorted, predictions, color="red", linewidth=3)
plt.title("Predicting Fuel Efficiency Using Polynomial Regression")
plt.xlabel("Weight (lbs)")
plt.ylabel("Fuel Efficiency (mpg)")
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: machine_learning/principle_component_analysis.py
================================================
"""
Principal Component Analysis (PCA) is a dimensionality reduction technique
used in machine learning. It transforms high-dimensional data into a lower-dimensional
representation while retaining as much variance as possible.
This implementation follows best practices, including:
- Standardizing the dataset.
- Computing principal components using Singular Value Decomposition (SVD).
- Returning transformed data and explained variance ratio.
"""
import doctest
import numpy as np
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
def collect_dataset() -> tuple[np.ndarray, np.ndarray]:
"""
Collects the dataset (Iris dataset) and returns feature matrix and target values.
:return: Tuple containing feature matrix (X) and target labels (y)
Example:
>>> X, y = collect_dataset()
>>> X.shape
(150, 4)
>>> y.shape
(150,)
"""
data = load_iris()
return np.array(data.data), np.array(data.target)
def apply_pca(data_x: np.ndarray, n_components: int) -> tuple[np.ndarray, np.ndarray]:
"""
Applies Principal Component Analysis (PCA) to reduce dimensionality.
:param data_x: Original dataset (features)
:param n_components: Number of principal components to retain
:return: Tuple containing transformed dataset and explained variance ratio
Example:
>>> X, _ = collect_dataset()
>>> transformed_X, variance = apply_pca(X, 2)
>>> transformed_X.shape
(150, 2)
>>> len(variance) == 2
True
"""
# Standardizing the dataset
scaler = StandardScaler()
data_x_scaled = scaler.fit_transform(data_x)
# Applying PCA
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(data_x_scaled)
return principal_components, pca.explained_variance_ratio_
def main() -> None:
"""
Driver function to execute PCA and display results.
"""
data_x, _data_y = collect_dataset()
# Number of principal components to retain
n_components = 2
# Apply PCA
transformed_data, variance_ratio = apply_pca(data_x, n_components)
print("Transformed Dataset (First 5 rows):")
print(transformed_data[:5])
print("\nExplained Variance Ratio:")
print(variance_ratio)
if __name__ == "__main__":
doctest.testmod()
main()
================================================
FILE: machine_learning/random_forest_classifier.py.broken.txt
================================================
# Random Forest Classifier Example
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
def main():
"""
Random Forest Classifier Example using sklearn function.
Iris type dataset is used to demonstrate algorithm.
"""
# Load Iris dataset
iris = load_iris()
# Split dataset into train and test data
x = iris["data"] # features
y = iris["target"]
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=1
)
# Random Forest Classifier
rand_for = RandomForestClassifier(random_state=42, n_estimators=100)
rand_for.fit(x_train, y_train)
# Display Confusion Matrix of Classifier
plot_confusion_matrix(
rand_for,
x_test,
y_test,
display_labels=iris["target_names"],
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.show()
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/random_forest_regressor.py.broken.txt
================================================
# Random Forest Regressor Example
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
def main():
"""
Random Forest Regressor Example using sklearn function.
Boston house price dataset is used to demonstrate the algorithm.
"""
# Load Boston house price dataset
boston = load_boston()
print(boston.keys())
# Split dataset into train and test data
x = boston["data"] # features
y = boston["target"]
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=1
)
# Random Forest Regressor
rand_for = RandomForestRegressor(random_state=42, n_estimators=300)
rand_for.fit(x_train, y_train)
# Predict target for test data
predictions = rand_for.predict(x_test)
predictions = predictions.reshape(len(predictions), 1)
# Error printing
print(f"Mean Absolute Error:\t {mean_absolute_error(y_test, predictions)}")
print(f"Mean Square Error :\t {mean_squared_error(y_test, predictions)}")
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/scoring_functions.py
================================================
import numpy as np
""" Here I implemented the scoring functions.
MAE, MSE, RMSE, RMSLE are included.
Those are used for calculating differences between
predicted values and actual values.
Metrics are slightly differentiated. Sometimes squared, rooted,
even log is used.
Using log and roots can be perceived as tools for penalizing big
errors. However, using appropriate metrics depends on the situations,
and types of data
"""
# Mean Absolute Error
def mae(predict, actual):
"""
Examples(rounded for precision):
>>> actual = [1,2,3];predict = [1,4,3]
>>> float(np.around(mae(predict,actual),decimals = 2))
0.67
>>> actual = [1,1,1];predict = [1,1,1]
>>> float(mae(predict,actual))
0.0
"""
predict = np.array(predict)
actual = np.array(actual)
difference = abs(predict - actual)
score = difference.mean()
return score
# Mean Squared Error
def mse(predict, actual):
"""
Examples(rounded for precision):
>>> actual = [1,2,3];predict = [1,4,3]
>>> float(np.around(mse(predict,actual),decimals = 2))
1.33
>>> actual = [1,1,1];predict = [1,1,1]
>>> float(mse(predict,actual))
0.0
"""
predict = np.array(predict)
actual = np.array(actual)
difference = predict - actual
square_diff = np.square(difference)
score = square_diff.mean()
return score
# Root Mean Squared Error
def rmse(predict, actual):
"""
Examples(rounded for precision):
>>> actual = [1,2,3];predict = [1,4,3]
>>> float(np.around(rmse(predict,actual),decimals = 2))
1.15
>>> actual = [1,1,1];predict = [1,1,1]
>>> float(rmse(predict,actual))
0.0
"""
predict = np.array(predict)
actual = np.array(actual)
difference = predict - actual
square_diff = np.square(difference)
mean_square_diff = square_diff.mean()
score = np.sqrt(mean_square_diff)
return score
# Root Mean Square Logarithmic Error
def rmsle(predict, actual):
"""
Examples(rounded for precision):
>>> float(np.around(rmsle(predict=[10, 2, 30], actual=[10, 10, 30]), decimals=2))
0.75
>>> float(rmsle(predict=[1, 1, 1], actual=[1, 1, 1]))
0.0
"""
predict = np.array(predict)
actual = np.array(actual)
log_predict = np.log(predict + 1)
log_actual = np.log(actual + 1)
difference = log_predict - log_actual
square_diff = np.square(difference)
mean_square_diff = square_diff.mean()
score = np.sqrt(mean_square_diff)
return score
# Mean Bias Deviation
def mbd(predict, actual):
"""
This value is Negative, if the model underpredicts,
positive, if it overpredicts.
Example(rounded for precision):
Here the model overpredicts
>>> actual = [1,2,3];predict = [2,3,4]
>>> float(np.around(mbd(predict,actual),decimals = 2))
50.0
Here the model underpredicts
>>> actual = [1,2,3];predict = [0,1,1]
>>> float(np.around(mbd(predict,actual),decimals = 2))
-66.67
"""
predict = np.array(predict)
actual = np.array(actual)
difference = predict - actual
numerator = np.sum(difference) / len(predict)
denumerator = np.sum(actual) / len(predict)
# print(numerator, denumerator)
score = float(numerator) / denumerator * 100
return score
def manual_accuracy(predict, actual):
return np.mean(np.array(actual) == np.array(predict))
================================================
FILE: machine_learning/self_organizing_map.py
================================================
"""
https://en.wikipedia.org/wiki/Self-organizing_map
"""
import math
class SelfOrganizingMap:
def get_winner(self, weights: list[list[float]], sample: list[int]) -> int:
"""
Compute the winning vector by Euclidean distance
>>> SelfOrganizingMap().get_winner([[1, 2, 3], [4, 5, 6]], [1, 2, 3])
1
"""
d0 = 0.0
d1 = 0.0
for i in range(len(sample)):
d0 += math.pow((sample[i] - weights[0][i]), 2)
d1 += math.pow((sample[i] - weights[1][i]), 2)
return 0 if d0 > d1 else 1
return 0
def update(
self, weights: list[list[int | float]], sample: list[int], j: int, alpha: float
) -> list[list[int | float]]:
"""
Update the winning vector.
>>> SelfOrganizingMap().update([[1, 2, 3], [4, 5, 6]], [1, 2, 3], 1, 0.1)
[[1, 2, 3], [3.7, 4.7, 6]]
"""
for i in range(len(weights)):
weights[j][i] += alpha * (sample[i] - weights[j][i])
return weights
# Driver code
def main() -> None:
# Training Examples ( m, n )
training_samples = [[1, 1, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 1, 1]]
# weight initialization ( n, C )
weights = [[0.2, 0.6, 0.5, 0.9], [0.8, 0.4, 0.7, 0.3]]
# training
self_organizing_map = SelfOrganizingMap()
epochs = 3
alpha = 0.5
for _ in range(epochs):
for j in range(len(training_samples)):
# training sample
sample = training_samples[j]
# Compute the winning vector
winner = self_organizing_map.get_winner(weights, sample)
# Update the winning vector
weights = self_organizing_map.update(weights, sample, winner, alpha)
# classify test sample
sample = [0, 0, 0, 1]
winner = self_organizing_map.get_winner(weights, sample)
# results
print(f"Clusters that the test sample belongs to : {winner}")
print(f"Weights that have been trained : {weights}")
# running the main() function
if __name__ == "__main__":
main()
================================================
FILE: machine_learning/sequential_minimum_optimization.py
================================================
"""
Sequential minimal optimization (SMO) for support vector machines (SVM)
Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
programming (QP) problem that arises during the training of SVMs. It was invented by
John Platt in 1998.
Input:
0: type: numpy.ndarray.
1: first column of ndarray must be tags of samples, must be 1 or -1.
2: rows of ndarray represent samples.
Usage:
Command:
python3 sequential_minimum_optimization.py
Code:
from sequential_minimum_optimization import SmoSVM, Kernel
kernel = Kernel(kernel='poly', degree=3., coef0=1., gamma=0.5)
init_alphas = np.zeros(train.shape[0])
SVM = SmoSVM(train=train, alpha_list=init_alphas, kernel_func=kernel, cost=0.4,
b=0.0, tolerance=0.001)
SVM.fit()
predict = SVM.predict(test_samples)
Reference:
https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/smo-book.pdf
https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-98-14.pdf
"""
import os
import sys
import urllib.request
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs, make_circles
from sklearn.preprocessing import StandardScaler
CANCER_DATASET_URL = (
"https://archive.ics.uci.edu/ml/machine-learning-databases/"
"breast-cancer-wisconsin/wdbc.data"
)
class SmoSVM:
def __init__(
self,
train,
kernel_func,
alpha_list=None,
cost=0.4,
b=0.0,
tolerance=0.001,
auto_norm=True,
):
self._init = True
self._auto_norm = auto_norm
self._c = np.float64(cost)
self._b = np.float64(b)
self._tol = np.float64(tolerance) if tolerance > 0.0001 else np.float64(0.001)
self.tags = train[:, 0]
self.samples = self._norm(train[:, 1:]) if self._auto_norm else train[:, 1:]
self.alphas = alpha_list if alpha_list is not None else np.zeros(train.shape[0])
self.Kernel = kernel_func
self._eps = 0.001
self._all_samples = list(range(self.length))
self._K_matrix = self._calculate_k_matrix()
self._error = np.zeros(self.length)
self._unbound = []
self.choose_alpha = self._choose_alphas()
# Calculate alphas using SMO algorithm
def fit(self):
k = self._k
state = None
while True:
# 1: Find alpha1, alpha2
try:
i1, i2 = self.choose_alpha.send(state)
state = None
except StopIteration:
print("Optimization done!\nEvery sample satisfy the KKT condition!")
break
# 2: calculate new alpha2 and new alpha1
y1, y2 = self.tags[i1], self.tags[i2]
a1, a2 = self.alphas[i1].copy(), self.alphas[i2].copy()
e1, e2 = self._e(i1), self._e(i2)
args = (i1, i2, a1, a2, e1, e2, y1, y2)
a1_new, a2_new = self._get_new_alpha(*args)
if not a1_new and not a2_new:
state = False
continue
self.alphas[i1], self.alphas[i2] = a1_new, a2_new
# 3: update threshold(b)
b1_new = np.float64(
-e1
- y1 * k(i1, i1) * (a1_new - a1)
- y2 * k(i2, i1) * (a2_new - a2)
+ self._b
)
b2_new = np.float64(
-e2
- y2 * k(i2, i2) * (a2_new - a2)
- y1 * k(i1, i2) * (a1_new - a1)
+ self._b
)
if 0.0 < a1_new < self._c:
b = b1_new
if 0.0 < a2_new < self._c:
b = b2_new
if not (np.float64(0) < a2_new < self._c) and not (
np.float64(0) < a1_new < self._c
):
b = (b1_new + b2_new) / 2.0
b_old = self._b
self._b = b
# 4: update error, here we only calculate the error for non-bound samples
self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
for s in self.unbound:
if s in (i1, i2):
continue
self._error[s] += (
y1 * (a1_new - a1) * k(i1, s)
+ y2 * (a2_new - a2) * k(i2, s)
+ (self._b - b_old)
)
# if i1 or i2 is non-bound, update their error value to zero
if self._is_unbound(i1):
self._error[i1] = 0
if self._is_unbound(i2):
self._error[i2] = 0
# Predict test samples
def predict(self, test_samples, classify=True):
if test_samples.shape[1] > self.samples.shape[1]:
raise ValueError(
"Test samples' feature length does not equal to that of train samples"
)
if self._auto_norm:
test_samples = self._norm(test_samples)
results = []
for test_sample in test_samples:
result = self._predict(test_sample)
if classify:
results.append(1 if result > 0 else -1)
else:
results.append(result)
return np.array(results)
# Check if alpha violates the KKT condition
def _check_obey_kkt(self, index):
alphas = self.alphas
tol = self._tol
r = self._e(index) * self.tags[index]
c = self._c
return (r < -tol and alphas[index] < c) or (r > tol and alphas[index] > 0.0)
# Get value calculated from kernel function
def _k(self, i1, i2):
# for test samples, use kernel function
if isinstance(i2, np.ndarray):
return self.Kernel(self.samples[i1], i2)
# for training samples, kernel values have been saved in matrix
else:
return self._K_matrix[i1, i2]
# Get error for sample
def _e(self, index):
"""
Two cases:
1: Sample[index] is non-bound, fetch error from list: _error
2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
"""
# get from error data
if self._is_unbound(index):
return self._error[index]
# get by g(xi) - yi
else:
gx = np.dot(self.alphas * self.tags, self._K_matrix[:, index]) + self._b
yi = self.tags[index]
return gx - yi
# Calculate kernel matrix of all possible i1, i2, saving time
def _calculate_k_matrix(self):
k_matrix = np.zeros([self.length, self.length])
for i in self._all_samples:
for j in self._all_samples:
k_matrix[i, j] = np.float64(
self.Kernel(self.samples[i, :], self.samples[j, :])
)
return k_matrix
# Predict tag for test sample
def _predict(self, sample):
k = self._k
predicted_value = (
np.sum(
[
self.alphas[i1] * self.tags[i1] * k(i1, sample)
for i1 in self._all_samples
]
)
+ self._b
)
return predicted_value
# Choose alpha1 and alpha2
def _choose_alphas(self):
loci = yield from self._choose_a1()
if not loci:
return None
return loci
def _choose_a1(self):
"""
Choose first alpha
Steps:
1: First loop over all samples
2: Second loop over all non-bound samples until no non-bound samples violate
the KKT condition.
3: Repeat these two processes until no samples violate the KKT condition
after the first loop.
"""
while True:
all_not_obey = True
# all sample
print("Scanning all samples!")
for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:
all_not_obey = False
yield from self._choose_a2(i1)
# non-bound sample
print("Scanning non-bound samples!")
while True:
not_obey = True
for i1 in [
i
for i in self._all_samples
if self._check_obey_kkt(i) and self._is_unbound(i)
]:
not_obey = False
yield from self._choose_a2(i1)
if not_obey:
print("All non-bound samples satisfy the KKT condition!")
break
if all_not_obey:
print("All samples satisfy the KKT condition!")
break
return False
def _choose_a2(self, i1):
"""
Choose the second alpha using a heuristic algorithm
Steps:
1: Choose alpha2 that maximizes the step size (|E1 - E2|).
2: Start in a random point, loop over all non-bound samples till alpha1 and
alpha2 are optimized.
3: Start in a random point, loop over all samples till alpha1 and alpha2 are
optimized.
"""
self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
if len(self.unbound) > 0:
tmp_error = self._error.copy().tolist()
tmp_error_dict = {
index: value
for index, value in enumerate(tmp_error)
if self._is_unbound(index)
}
if self._e(i1) >= 0:
i2 = min(tmp_error_dict, key=lambda index: tmp_error_dict[index])
else:
i2 = max(tmp_error_dict, key=lambda index: tmp_error_dict[index])
cmd = yield i1, i2
if cmd is None:
return
rng = np.random.default_rng()
for i2 in np.roll(self.unbound, rng.choice(self.length)):
cmd = yield i1, i2
if cmd is None:
return
for i2 in np.roll(self._all_samples, rng.choice(self.length)):
cmd = yield i1, i2
if cmd is None:
return
# Get the new alpha2 and new alpha1
def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
k = self._k
if i1 == i2:
return None, None
# calculate L and H which bound the new alpha2
s = y1 * y2
if s == -1:
l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741
else:
l, h = max(0.0, a2 + a1 - self._c), min(self._c, a2 + a1) # noqa: E741
if l == h:
return None, None
# calculate eta
k11 = k(i1, i1)
k22 = k(i2, i2)
k12 = k(i1, i2)
# select the new alpha2 which could achieve the minimal objectives
if (eta := k11 + k22 - 2.0 * k12) > 0.0:
a2_new_unc = a2 + (y2 * (e1 - e2)) / eta
# a2_new has a boundary
if a2_new_unc >= h:
a2_new = h
elif a2_new_unc <= l:
a2_new = l
else:
a2_new = a2_new_unc
else:
b = self._b
l1 = a1 + s * (a2 - l)
h1 = a1 + s * (a2 - h)
# Method 1
f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)
f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)
ol = (
l1 * f1
+ l * f2
+ 1 / 2 * l1**2 * k(i1, i1)
+ 1 / 2 * l**2 * k(i2, i2)
+ s * l * l1 * k(i1, i2)
)
oh = (
h1 * f1
+ h * f2
+ 1 / 2 * h1**2 * k(i1, i1)
+ 1 / 2 * h**2 * k(i2, i2)
+ s * h * h1 * k(i1, i2)
)
"""
Method 2: Use objective function to check which alpha2_new could achieve the
minimal objectives
"""
if ol < (oh - self._eps):
a2_new = l
elif ol > oh + self._eps:
a2_new = h
else:
a2_new = a2
# a1_new has a boundary too
a1_new = a1 + s * (a2 - a2_new)
if a1_new < 0:
a2_new += s * a1_new
a1_new = 0
if a1_new > self._c:
a2_new += s * (a1_new - self._c)
a1_new = self._c
return a1_new, a2_new
# Normalize data using min-max method
def _norm(self, data):
if self._init:
self._min = np.min(data, axis=0)
self._max = np.max(data, axis=0)
self._init = False
return (data - self._min) / (self._max - self._min)
else:
return (data - self._min) / (self._max - self._min)
def _is_unbound(self, index):
return bool(0.0 < self.alphas[index] < self._c)
def _is_support(self, index):
return bool(self.alphas[index] > 0)
@property
def unbound(self):
return self._unbound
@property
def support(self):
return [i for i in range(self.length) if self._is_support(i)]
@property
def length(self):
return self.samples.shape[0]
class Kernel:
def __init__(self, kernel, degree=1.0, coef0=0.0, gamma=1.0):
self.degree = np.float64(degree)
self.coef0 = np.float64(coef0)
self.gamma = np.float64(gamma)
self._kernel_name = kernel
self._kernel = self._get_kernel(kernel_name=kernel)
self._check()
def _polynomial(self, v1, v2):
return (self.gamma * np.inner(v1, v2) + self.coef0) ** self.degree
def _linear(self, v1, v2):
return np.inner(v1, v2) + self.coef0
def _rbf(self, v1, v2):
return np.exp(-1 * (self.gamma * np.linalg.norm(v1 - v2) ** 2))
def _check(self):
if self._kernel == self._rbf and self.gamma < 0:
raise ValueError("gamma value must be non-negative")
def _get_kernel(self, kernel_name):
maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}
return maps[kernel_name]
def __call__(self, v1, v2):
return self._kernel(v1, v2)
def __repr__(self):
return self._kernel_name
def count_time(func):
def call_func(*args, **kwargs):
import time
start_time = time.time()
func(*args, **kwargs)
end_time = time.time()
print(f"SMO algorithm cost {end_time - start_time} seconds")
return call_func
@count_time
def test_cancer_data():
print("Hello!\nStart test SVM using the SMO algorithm!")
# 0: download dataset and load into pandas' dataframe
if not os.path.exists(r"cancer_data.csv"):
request = urllib.request.Request( # noqa: S310
CANCER_DATASET_URL,
headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},
)
response = urllib.request.urlopen(request) # noqa: S310
content = response.read().decode("utf-8")
with open(r"cancer_data.csv", "w") as f:
f.write(content)
data = pd.read_csv(
"cancer_data.csv",
header=None,
dtype={0: str}, # Assuming the first column contains string data
)
# 1: pre-processing data
del data[data.columns.tolist()[0]]
data = data.dropna(axis=0)
data = data.replace({"M": np.float64(1), "B": np.float64(-1)})
samples = np.array(data)[:, :]
# 2: dividing data into train_data data and test_data data
train_data, test_data = samples[:328, :], samples[328:, :]
test_tags, test_samples = test_data[:, 0], test_data[:, 1:]
# 3: choose kernel function, and set initial alphas to zero (optional)
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
al = np.zeros(train_data.shape[0])
# 4: calculating best alphas using SMO algorithm and predict test_data samples
mysvm = SmoSVM(
train=train_data,
kernel_func=my_kernel,
alpha_list=al,
cost=0.4,
b=0.0,
tolerance=0.001,
)
mysvm.fit()
predict = mysvm.predict(test_samples)
# 5: check accuracy
score = 0
test_num = test_tags.shape[0]
for i in range(test_tags.shape[0]):
if test_tags[i] == predict[i]:
score += 1
print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")
print(f"Rough Accuracy: {score / test_tags.shape[0]}")
def test_demonstration():
# change stdout
print("\nStarting plot, please wait!")
sys.stdout = open(os.devnull, "w")
ax1 = plt.subplot2grid((2, 2), (0, 0))
ax2 = plt.subplot2grid((2, 2), (0, 1))
ax3 = plt.subplot2grid((2, 2), (1, 0))
ax4 = plt.subplot2grid((2, 2), (1, 1))
ax1.set_title("Linear SVM, cost = 0.1")
test_linear_kernel(ax1, cost=0.1)
ax2.set_title("Linear SVM, cost = 500")
test_linear_kernel(ax2, cost=500)
ax3.set_title("RBF kernel SVM, cost = 0.1")
test_rbf_kernel(ax3, cost=0.1)
ax4.set_title("RBF kernel SVM, cost = 500")
test_rbf_kernel(ax4, cost=500)
sys.stdout = sys.__stdout__
print("Plot done!")
def test_linear_kernel(ax, cost):
train_x, train_y = make_blobs(
n_samples=500, centers=2, n_features=2, random_state=1
)
train_y[train_y == 0] = -1
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM(
train=train_data,
kernel_func=my_kernel,
cost=cost,
tolerance=0.001,
auto_norm=False,
)
mysvm.fit()
plot_partition_boundary(mysvm, train_data, ax=ax)
def test_rbf_kernel(ax, cost):
train_x, train_y = make_circles(
n_samples=500, noise=0.1, factor=0.1, random_state=1
)
train_y[train_y == 0] = -1
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM(
train=train_data,
kernel_func=my_kernel,
cost=cost,
tolerance=0.001,
auto_norm=False,
)
mysvm.fit()
plot_partition_boundary(mysvm, train_data, ax=ax)
def plot_partition_boundary(
model, train_data, ax, resolution=100, colors=("b", "k", "r")
):
"""
We cannot get the optimal w of our kernel SVM model, which is different from a
linear SVM. For this reason, we generate randomly distributed points with high
density, and predicted values of these points are calculated using our trained
model. Then we could use this predicted values to draw contour map, and this contour
map represents the SVM's partition boundary.
"""
train_data_x = train_data[:, 1]
train_data_y = train_data[:, 2]
train_data_tags = train_data[:, 0]
xrange = np.linspace(train_data_x.min(), train_data_x.max(), resolution)
yrange = np.linspace(train_data_y.min(), train_data_y.max(), resolution)
test_samples = np.array([(x, y) for x in xrange for y in yrange]).reshape(
resolution * resolution, 2
)
test_tags = model.predict(test_samples, classify=False)
grid = test_tags.reshape((len(xrange), len(yrange)))
# Plot contour map which represents the partition boundary
ax.contour(
xrange,
yrange,
np.asmatrix(grid).T,
levels=(-1, 0, 1),
linestyles=("--", "-", "--"),
linewidths=(1, 1, 1),
colors=colors,
)
# Plot all train samples
ax.scatter(
train_data_x,
train_data_y,
c=train_data_tags,
cmap=plt.cm.Dark2,
lw=0,
alpha=0.5,
)
# Plot support vectors
support = model.support
ax.scatter(
train_data_x[support],
train_data_y[support],
c=train_data_tags[support],
cmap=plt.cm.Dark2,
)
if __name__ == "__main__":
test_cancer_data()
test_demonstration()
plt.show()
================================================
FILE: machine_learning/similarity_search.py
================================================
"""
Similarity Search : https://en.wikipedia.org/wiki/Similarity_search
Similarity search is a search algorithm for finding the nearest vector from
vectors, used in natural language processing.
In this algorithm, it calculates distance with euclidean distance and
returns a list containing two data for each vector:
1. the nearest vector
2. distance between the vector and the nearest vector (float)
"""
from __future__ import annotations
import math
import numpy as np
from numpy.linalg import norm
def euclidean(input_a: np.ndarray, input_b: np.ndarray) -> float:
"""
Calculates euclidean distance between two data.
:param input_a: ndarray of first vector.
:param input_b: ndarray of second vector.
:return: Euclidean distance of input_a and input_b. By using math.sqrt(),
result will be float.
>>> euclidean(np.array([0]), np.array([1]))
1.0
>>> euclidean(np.array([0, 1]), np.array([1, 1]))
1.0
>>> euclidean(np.array([0, 0, 0]), np.array([0, 0, 1]))
1.0
"""
return math.sqrt(sum(pow(a - b, 2) for a, b in zip(input_a, input_b)))
def similarity_search(
dataset: np.ndarray, value_array: np.ndarray
) -> list[list[list[float] | float]]:
"""
:param dataset: Set containing the vectors. Should be ndarray.
:param value_array: vector/vectors we want to know the nearest vector from dataset.
:return: Result will be a list containing
1. the nearest vector
2. distance from the vector
>>> dataset = np.array([[0], [1], [2]])
>>> value_array = np.array([[0]])
>>> similarity_search(dataset, value_array)
[[[0], 0.0]]
>>> dataset = np.array([[0, 0], [1, 1], [2, 2]])
>>> value_array = np.array([[0, 1]])
>>> similarity_search(dataset, value_array)
[[[0, 0], 1.0]]
>>> dataset = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]])
>>> value_array = np.array([[0, 0, 1]])
>>> similarity_search(dataset, value_array)
[[[0, 0, 0], 1.0]]
>>> dataset = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]])
>>> value_array = np.array([[0, 0, 0], [0, 0, 1]])
>>> similarity_search(dataset, value_array)
[[[0, 0, 0], 0.0], [[0, 0, 0], 1.0]]
These are the errors that might occur:
1. If dimensions are different.
For example, dataset has 2d array and value_array has 1d array:
>>> dataset = np.array([[1]])
>>> value_array = np.array([1])
>>> similarity_search(dataset, value_array)
Traceback (most recent call last):
...
ValueError: Wrong input data's dimensions... dataset : 2, value_array : 1
2. If data's shapes are different.
For example, dataset has shape of (3, 2) and value_array has (2, 3).
We are expecting same shapes of two arrays, so it is wrong.
>>> dataset = np.array([[0, 0], [1, 1], [2, 2]])
>>> value_array = np.array([[0, 0, 0], [0, 0, 1]])
>>> similarity_search(dataset, value_array)
Traceback (most recent call last):
...
ValueError: Wrong input data's shape... dataset : 2, value_array : 3
3. If data types are different.
When trying to compare, we are expecting same types so they should be same.
If not, it'll come up with errors.
>>> dataset = np.array([[0, 0], [1, 1], [2, 2]], dtype=np.float32)
>>> value_array = np.array([[0, 0], [0, 1]], dtype=np.int32)
>>> similarity_search(dataset, value_array) # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
TypeError: Input data have different datatype...
dataset : float32, value_array : int32
"""
if dataset.ndim != value_array.ndim:
msg = (
"Wrong input data's dimensions... "
f"dataset : {dataset.ndim}, value_array : {value_array.ndim}"
)
raise ValueError(msg)
try:
if dataset.shape[1] != value_array.shape[1]:
msg = (
"Wrong input data's shape... "
f"dataset : {dataset.shape[1]}, value_array : {value_array.shape[1]}"
)
raise ValueError(msg)
except IndexError:
if dataset.ndim != value_array.ndim:
raise TypeError("Wrong shape")
if dataset.dtype != value_array.dtype:
msg = (
"Input data have different datatype... "
f"dataset : {dataset.dtype}, value_array : {value_array.dtype}"
)
raise TypeError(msg)
answer = []
for value in value_array:
dist = euclidean(value, dataset[0])
vector = dataset[0].tolist()
for dataset_value in dataset[1:]:
temp_dist = euclidean(value, dataset_value)
if dist > temp_dist:
dist = temp_dist
vector = dataset_value.tolist()
answer.append([vector, dist])
return answer
def cosine_similarity(input_a: np.ndarray, input_b: np.ndarray) -> float:
"""
Calculates cosine similarity between two data.
:param input_a: ndarray of first vector.
:param input_b: ndarray of second vector.
:return: Cosine similarity of input_a and input_b. By using math.sqrt(),
result will be float.
>>> cosine_similarity(np.array([1]), np.array([1]))
1.0
>>> cosine_similarity(np.array([1, 2]), np.array([6, 32]))
0.9615239476408232
"""
return float(np.dot(input_a, input_b) / (norm(input_a) * norm(input_b)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/support_vector_machines.py
================================================
import numpy as np
from numpy import ndarray
from scipy.optimize import Bounds, LinearConstraint, minimize
def norm_squared(vector: ndarray) -> float:
"""
Return the squared second norm of vector
norm_squared(v) = sum(x * x for x in v)
Args:
vector (ndarray): input vector
Returns:
float: squared second norm of vector
>>> int(norm_squared([1, 2]))
5
>>> int(norm_squared(np.asarray([1, 2])))
5
>>> int(norm_squared([0, 0]))
0
"""
return np.dot(vector, vector)
class SVC:
"""
Support Vector Classifier
Args:
kernel (str): kernel to use. Default: linear
Possible choices:
- linear
regularization: constraint for soft margin (data not linearly separable)
Default: unbound
>>> SVC(kernel="asdf")
Traceback (most recent call last):
...
ValueError: Unknown kernel: asdf
>>> SVC(kernel="rbf")
Traceback (most recent call last):
...
ValueError: rbf kernel requires gamma
>>> SVC(kernel="rbf", gamma=-1)
Traceback (most recent call last):
...
ValueError: gamma must be > 0
"""
def __init__(
self,
*,
regularization: float = np.inf,
kernel: str = "linear",
gamma: float = 0.0,
) -> None:
self.regularization = regularization
self.gamma = gamma
if kernel == "linear":
self.kernel = self.__linear
elif kernel == "rbf":
if self.gamma == 0:
raise ValueError("rbf kernel requires gamma")
if not isinstance(self.gamma, (float, int)):
raise ValueError("gamma must be float or int")
if not self.gamma > 0:
raise ValueError("gamma must be > 0")
self.kernel = self.__rbf
# in the future, there could be a default value like in sklearn
# sklear: def_gamma = 1/(n_features * X.var()) (wiki)
# previously it was 1/(n_features)
else:
msg = f"Unknown kernel: {kernel}"
raise ValueError(msg)
# kernels
def __linear(self, vector1: ndarray, vector2: ndarray) -> float:
"""Linear kernel (as if no kernel used at all)"""
return np.dot(vector1, vector2)
def __rbf(self, vector1: ndarray, vector2: ndarray) -> float:
"""
RBF: Radial Basis Function Kernel
Note: for more information see:
https://en.wikipedia.org/wiki/Radial_basis_function_kernel
Args:
vector1 (ndarray): first vector
vector2 (ndarray): second vector)
Returns:
float: exp(-(gamma * norm_squared(vector1 - vector2)))
"""
return np.exp(-(self.gamma * norm_squared(vector1 - vector2)))
def fit(self, observations: list[ndarray], classes: ndarray) -> None:
"""
Fits the SVC with a set of observations.
Args:
observations (list[ndarray]): list of observations
classes (ndarray): classification of each observation (in {1, -1})
"""
self.observations = observations
self.classes = classes
# using Wolfe's Dual to calculate w.
# Primal problem: minimize 1/2*norm_squared(w)
# constraint: yn(w . xn + b) >= 1
#
# With l a vector
# Dual problem: maximize sum_n(ln) -
# 1/2 * sum_n(sum_m(ln*lm*yn*ym*xn . xm))
# constraint: self.C >= ln >= 0
# and sum_n(ln*yn) = 0
# Then we get w using w = sum_n(ln*yn*xn)
# At the end we can get b ~= mean(yn - w . xn)
#
# Since we use kernels, we only need l_star to calculate b
# and to classify observations
(n,) = np.shape(classes)
def to_minimize(candidate: ndarray) -> float:
"""
Opposite of the function to maximize
Args:
candidate (ndarray): candidate array to test
Return:
float: Wolfe's Dual result to minimize
"""
s = 0
(n,) = np.shape(candidate)
for i in range(n):
for j in range(n):
s += (
candidate[i]
* candidate[j]
* classes[i]
* classes[j]
* self.kernel(observations[i], observations[j])
)
return 1 / 2 * s - sum(candidate)
ly_contraint = LinearConstraint(classes, 0, 0)
l_bounds = Bounds(0, self.regularization)
l_star = minimize(
to_minimize, np.ones(n), bounds=l_bounds, constraints=[ly_contraint]
).x
self.optimum = l_star
# calculating mean offset of separation plane to points
s = 0
for i in range(n):
for j in range(n):
s += classes[i] - classes[i] * self.optimum[i] * self.kernel(
observations[i], observations[j]
)
self.offset = s / n
def predict(self, observation: ndarray) -> int:
"""
Get the expected class of an observation
Args:
observation (Vector): observation
Returns:
int {1, -1}: expected class
>>> xs = [
... np.asarray([0, 1]), np.asarray([0, 2]),
... np.asarray([1, 1]), np.asarray([1, 2])
... ]
>>> y = np.asarray([1, 1, -1, -1])
>>> s = SVC()
>>> s.fit(xs, y)
>>> s.predict(np.asarray([0, 1]))
1
>>> s.predict(np.asarray([1, 1]))
-1
>>> s.predict(np.asarray([2, 2]))
-1
"""
s = sum(
self.optimum[n]
* self.classes[n]
* self.kernel(self.observations[n], observation)
for n in range(len(self.classes))
)
return 1 if s + self.offset >= 0 else -1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: machine_learning/t_stochastic_neighbour_embedding.py
================================================
"""
t-distributed stochastic neighbor embedding (t-SNE)
For more details, see:
https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
"""
import doctest
import numpy as np
from numpy import ndarray
from sklearn.datasets import load_iris
def collect_dataset() -> tuple[ndarray, ndarray]:
"""
Load the Iris dataset and return features and labels.
Returns:
tuple[ndarray, ndarray]: Feature matrix and target labels.
>>> features, targets = collect_dataset()
>>> features.shape
(150, 4)
>>> targets.shape
(150,)
"""
iris_dataset = load_iris()
return np.array(iris_dataset.data), np.array(iris_dataset.target)
def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
"""
Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
Args:
data_matrix: Input data of shape (n_samples, n_features).
sigma: Gaussian kernel bandwidth.
Returns:
ndarray: Symmetrized probability matrix.
>>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
>>> probabilities = compute_pairwise_affinities(x)
>>> float(round(probabilities[0, 1], 3))
0.25
"""
n_samples = data_matrix.shape[0]
squared_sum = np.sum(np.square(data_matrix), axis=1)
squared_distance = np.add(
np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum
)
affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
np.fill_diagonal(affinity_matrix, 0)
affinity_matrix /= np.sum(affinity_matrix)
return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
"""
Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
Args:
embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
Returns:
tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
>>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
>>> q_matrix, numerators = compute_low_dim_affinities(y)
>>> q_matrix.shape
(2, 2)
"""
squared_sum = np.sum(np.square(embedding_matrix), axis=1)
numerator_matrix = 1 / (
1
+ np.add(
np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
squared_sum,
)
)
np.fill_diagonal(numerator_matrix, 0)
q_matrix = numerator_matrix / np.sum(numerator_matrix)
return q_matrix, numerator_matrix
def apply_tsne(
data_matrix: ndarray,
n_components: int = 2,
learning_rate: float = 200.0,
n_iter: int = 500,
) -> ndarray:
"""
Apply t-SNE for dimensionality reduction.
Args:
data_matrix: Original dataset (features).
n_components: Target dimension (2D or 3D).
learning_rate: Step size for gradient descent.
n_iter: Number of iterations.
Returns:
ndarray: Low-dimensional embedding of the data.
>>> features, _ = collect_dataset()
>>> embedding = apply_tsne(features, n_components=2, n_iter=50)
>>> embedding.shape
(150, 2)
"""
if n_components < 1 or n_iter < 1:
raise ValueError("n_components and n_iter must be >= 1")
n_samples = data_matrix.shape[0]
rng = np.random.default_rng()
embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
high_dim_affinities = compute_pairwise_affinities(data_matrix)
high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
embedding_increment = np.zeros_like(embedding)
momentum = 0.5
for iteration in range(n_iter):
low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
affinity_diff = high_dim_affinities - low_dim_affinities
gradient = 4 * (
np.dot((affinity_diff * numerator_matrix), embedding)
- np.multiply(
np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis],
embedding,
)
)
embedding_increment = momentum * embedding_increment - learning_rate * gradient
embedding += embedding_increment
if iteration == int(n_iter / 4):
momentum = 0.8
return embedding
def main() -> None:
"""
Run t-SNE on the Iris dataset and display the first 5 embeddings.
>>> main() # doctest: +ELLIPSIS
t-SNE embedding (first 5 points):
[[...
"""
features, _labels = collect_dataset()
embedding = apply_tsne(features, n_components=2, n_iter=300)
if not isinstance(embedding, np.ndarray):
raise TypeError("t-SNE embedding must be an ndarray")
print("t-SNE embedding (first 5 points):")
print(embedding[:5])
# Optional visualization (Ruff/mypy compliant)
# import matplotlib.pyplot as plt
# plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
# plt.title("t-SNE Visualization of the Iris Dataset")
# plt.xlabel("Dimension 1")
# plt.ylabel("Dimension 2")
# plt.show()
if __name__ == "__main__":
doctest.testmod()
main()
================================================
FILE: machine_learning/word_frequency_functions.py
================================================
import string
from math import log10
"""
tf-idf Wikipedia: https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tf-idf and other word frequency algorithms are often used
as a weighting factor in information retrieval and text
mining. 83% of text-based recommender systems use
tf-idf for term weighting. In Layman's terms, tf-idf
is a statistic intended to reflect how important a word
is to a document in a corpus (a collection of documents)
Here I've implemented several word frequency algorithms
that are commonly used in information retrieval: Term Frequency,
Document Frequency, and TF-IDF (Term-Frequency*Inverse-Document-Frequency)
are included.
Term Frequency is a statistical function that
returns a number representing how frequently
an expression occurs in a document. This
indicates how significant a particular term is in
a given document.
Document Frequency is a statistical function that returns
an integer representing the number of documents in a
corpus that a term occurs in (where the max number returned
would be the number of documents in the corpus).
Inverse Document Frequency is mathematically written as
log10(N/df), where N is the number of documents in your
corpus and df is the Document Frequency. If df is 0, a
ZeroDivisionError will be thrown.
Term-Frequency*Inverse-Document-Frequency is a measure
of the originality of a term. It is mathematically written
as tf*log10(N/df). It compares the number of times
a term appears in a document with the number of documents
the term appears in. If df is 0, a ZeroDivisionError will be thrown.
"""
def term_frequency(term: str, document: str) -> int:
"""
Return the number of times a term occurs within
a given document.
@params: term, the term to search a document for, and document,
the document to search within
@returns: an integer representing the number of times a term is
found within the document
@examples:
>>> term_frequency("to", "To be, or not to be")
2
"""
# strip all punctuation and newlines and replace it with ''
document_without_punctuation = document.translate(
str.maketrans("", "", string.punctuation)
).replace("\n", "")
tokenize_document = document_without_punctuation.split(" ") # word tokenization
return len([word for word in tokenize_document if word.lower() == term.lower()])
def document_frequency(term: str, corpus: str) -> tuple[int, int]:
"""
Calculate the number of documents in a corpus that contain a
given term
@params : term, the term to search each document for, and corpus, a collection of
documents. Each document should be separated by a newline.
@returns : the number of documents in the corpus that contain the term you are
searching for and the number of documents in the corpus
@examples :
>>> document_frequency("first", "This is the first document in the corpus.\\nThIs\
is the second document in the corpus.\\nTHIS is \
the third document in the corpus.")
(1, 3)
"""
corpus_without_punctuation = corpus.lower().translate(
str.maketrans("", "", string.punctuation)
) # strip all punctuation and replace it with ''
docs = corpus_without_punctuation.split("\n")
term = term.lower()
return (len([doc for doc in docs if term in doc]), len(docs))
def inverse_document_frequency(df: int, n: int, smoothing=False) -> float:
"""
Return an integer denoting the importance
of a word. This measure of importance is
calculated by log10(N/df), where N is the
number of documents and df is
the Document Frequency.
@params : df, the Document Frequency, N,
the number of documents in the corpus and
smoothing, if True return the idf-smooth
@returns : log10(N/df) or 1+log10(N/1+df)
@examples :
>>> inverse_document_frequency(3, 0)
Traceback (most recent call last):
...
ValueError: log10(0) is undefined.
>>> inverse_document_frequency(1, 3)
0.477
>>> inverse_document_frequency(0, 3)
Traceback (most recent call last):
...
ZeroDivisionError: df must be > 0
>>> inverse_document_frequency(0, 3,True)
1.477
"""
if smoothing:
if n == 0:
raise ValueError("log10(0) is undefined.")
return round(1 + log10(n / (1 + df)), 3)
if df == 0:
raise ZeroDivisionError("df must be > 0")
elif n == 0:
raise ValueError("log10(0) is undefined.")
return round(log10(n / df), 3)
def tf_idf(tf: int, idf: int) -> float:
"""
Combine the term frequency
and inverse document frequency functions to
calculate the originality of a term. This
'originality' is calculated by multiplying
the term frequency and the inverse document
frequency : tf-idf = TF * IDF
@params : tf, the term frequency, and idf, the inverse document
frequency
@examples :
>>> tf_idf(2, 0.477)
0.954
"""
return round(tf * idf, 3)
================================================
FILE: machine_learning/xgboost_classifier.py
================================================
# XGBoost Classifier Example
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
def data_handling(data: dict) -> tuple:
# Split dataset into features and target
# data is features
"""
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
('[5.1, 3.5, 1.4, 0.2]', [0])
>>> data_handling(
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
... )
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
"""
return (data["data"], data["target"])
def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
"""
# THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
early_stopping_rounds=None, enable_categorical=False,
eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
importance_type=None, interaction_constraints='',
learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
missing=nan, monotone_constraints='()', n_estimators=100,
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
reg_alpha=0, reg_lambda=1, ...)
"""
classifier = XGBClassifier()
classifier.fit(features, target)
return classifier
def main() -> None:
"""
Url for the algorithm:
https://xgboost.readthedocs.io/en/stable/
Iris type dataset is used to demonstrate algorithm.
"""
# Load Iris dataset
iris = load_iris()
features, targets = data_handling(iris)
x_train, x_test, y_train, y_test = train_test_split(
features, targets, test_size=0.25
)
names = iris["target_names"]
# Create an XGBoost Classifier from the training data
xgboost_classifier = xgboost(x_train, y_train)
# Display the confusion matrix of the classifier with both training and test sets
ConfusionMatrixDisplay.from_estimator(
xgboost_classifier,
x_test,
y_test,
display_labels=names,
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
main()
================================================
FILE: machine_learning/xgboost_regressor.py
================================================
# XGBoost Regressor Example
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
def data_handling(data: dict) -> tuple:
# Split dataset into features and target. Data is features.
"""
>>> data_handling((
... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]'
... ,'target':([4.526])}))
('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526])
"""
return (data["data"], data["target"])
def xgboost(
features: np.ndarray, target: np.ndarray, test_features: np.ndarray
) -> np.ndarray:
"""
>>> xgboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068,
... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]),
... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00,
... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]]))
array([[1.1139996]], dtype=float32)
"""
xgb = XGBRegressor(
verbosity=0, random_state=42, tree_method="exact", base_score=0.5
)
xgb.fit(features, target)
# Predict target for test data
predictions = xgb.predict(test_features)
predictions = predictions.reshape(len(predictions), 1)
return predictions
def main() -> None:
"""
The URL for this algorithm
https://xgboost.readthedocs.io/en/stable/
California house price dataset is used to demonstrate the algorithm.
Expected error values:
Mean Absolute Error: 0.30957163379906033
Mean Square Error: 0.22611560196662744
"""
# Load California house price dataset
california = fetch_california_housing()
data, target = data_handling(california)
x_train, x_test, y_train, y_test = train_test_split(
data, target, test_size=0.25, random_state=1
)
predictions = xgboost(x_train, y_train, x_test)
# Error printing
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}")
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
main()
================================================
FILE: maths/__init__.py
================================================
================================================
FILE: maths/abs.py
================================================
"""Absolute Value."""
def abs_val(num: float) -> float:
"""
Find the absolute value of a number.
>>> abs_val(-5.1)
5.1
>>> abs_val(-5) == abs_val(5)
True
>>> abs_val(0)
0
"""
return -num if num < 0 else num
def abs_min(x: list[int]) -> int:
"""
>>> abs_min([0,5,1,11])
0
>>> abs_min([3,-10,-2])
-2
>>> abs_min([])
Traceback (most recent call last):
...
ValueError: abs_min() arg is an empty sequence
"""
if len(x) == 0:
raise ValueError("abs_min() arg is an empty sequence")
j = x[0]
for i in x:
if abs_val(i) < abs_val(j):
j = i
return j
def abs_max(x: list[int]) -> int:
"""
>>> abs_max([0,5,1,11])
11
>>> abs_max([3,-10,-2])
-10
>>> abs_max([])
Traceback (most recent call last):
...
ValueError: abs_max() arg is an empty sequence
"""
if len(x) == 0:
raise ValueError("abs_max() arg is an empty sequence")
j = x[0]
for i in x:
if abs(i) > abs(j):
j = i
return j
def abs_max_sort(x: list[int]) -> int:
"""
>>> abs_max_sort([0,5,1,11])
11
>>> abs_max_sort([3,-10,-2])
-10
>>> abs_max_sort([])
Traceback (most recent call last):
...
ValueError: abs_max_sort() arg is an empty sequence
"""
if len(x) == 0:
raise ValueError("abs_max_sort() arg is an empty sequence")
return sorted(x, key=abs)[-1]
def test_abs_val():
"""
>>> test_abs_val()
"""
assert abs_val(0) == 0
assert abs_val(34) == 34
assert abs_val(-100000000000) == 100000000000
a = [-3, -1, 2, -11]
assert abs_max(a) == -11
assert abs_max_sort(a) == -11
assert abs_min(a) == -1
if __name__ == "__main__":
import doctest
doctest.testmod()
test_abs_val()
print(abs_val(-34)) # --> 34
================================================
FILE: maths/addition_without_arithmetic.py
================================================
"""
Illustrate how to add the integer without arithmetic operation
Author: suraj Kumar
Time Complexity: 1
https://en.wikipedia.org/wiki/Bitwise_operation
"""
def add(first: int, second: int) -> int:
"""
Implementation of addition of integer
Examples:
>>> add(3, 5)
8
>>> add(13, 5)
18
>>> add(-7, 2)
-5
>>> add(0, -7)
-7
>>> add(-321, 0)
-321
"""
while second != 0:
c = first & second
first ^= second
second = c << 1
return first
if __name__ == "__main__":
import doctest
doctest.testmod()
first = int(input("Enter the first number: ").strip())
second = int(input("Enter the second number: ").strip())
print(f"{add(first, second) = }")
================================================
FILE: maths/aliquot_sum.py
================================================
def aliquot_sum(input_num: int) -> int:
"""
Finds the aliquot sum of an input integer, where the
aliquot sum of a number n is defined as the sum of all
natural numbers less than n that divide n evenly. For
example, the aliquot sum of 15 is 1 + 3 + 5 = 9. This is
a simple O(n) implementation.
@param input_num: a positive integer whose aliquot sum is to be found
@return: the aliquot sum of input_num, if input_num is positive.
Otherwise, raise a ValueError
Wikipedia Explanation: https://en.wikipedia.org/wiki/Aliquot_sum
>>> aliquot_sum(15)
9
>>> aliquot_sum(6)
6
>>> aliquot_sum(-1)
Traceback (most recent call last):
...
ValueError: Input must be positive
>>> aliquot_sum(0)
Traceback (most recent call last):
...
ValueError: Input must be positive
>>> aliquot_sum(1.6)
Traceback (most recent call last):
...
ValueError: Input must be an integer
>>> aliquot_sum(12)
16
>>> aliquot_sum(1)
0
>>> aliquot_sum(19)
1
"""
if not isinstance(input_num, int):
raise ValueError("Input must be an integer")
if input_num <= 0:
raise ValueError("Input must be positive")
return sum(
divisor for divisor in range(1, input_num // 2 + 1) if input_num % divisor == 0
)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/allocation_number.py
================================================
"""
In a multi-threaded download, this algorithm could be used to provide
each worker thread with a block of non-overlapping bytes to download.
For example:
for i in allocation_list:
requests.get(url,headers={'Range':f'bytes={i}'})
"""
from __future__ import annotations
def allocation_num(number_of_bytes: int, partitions: int) -> list[str]:
"""
Divide a number of bytes into x partitions.
:param number_of_bytes: the total of bytes.
:param partitions: the number of partition need to be allocated.
:return: list of bytes to be assigned to each worker thread
>>> allocation_num(16647, 4)
['1-4161', '4162-8322', '8323-12483', '12484-16647']
>>> allocation_num(50000, 5)
['1-10000', '10001-20000', '20001-30000', '30001-40000', '40001-50000']
>>> allocation_num(888, 999)
Traceback (most recent call last):
...
ValueError: partitions can not > number_of_bytes!
>>> allocation_num(888, -4)
Traceback (most recent call last):
...
ValueError: partitions must be a positive number!
"""
if partitions <= 0:
raise ValueError("partitions must be a positive number!")
if partitions > number_of_bytes:
raise ValueError("partitions can not > number_of_bytes!")
bytes_per_partition = number_of_bytes // partitions
allocation_list = []
for i in range(partitions):
start_bytes = i * bytes_per_partition + 1
end_bytes = (
number_of_bytes if i == partitions - 1 else (i + 1) * bytes_per_partition
)
allocation_list.append(f"{start_bytes}-{end_bytes}")
return allocation_list
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/arc_length.py
================================================
from math import pi
def arc_length(angle: int, radius: int) -> float:
"""
>>> arc_length(45, 5)
3.9269908169872414
>>> arc_length(120, 15)
31.415926535897928
>>> arc_length(90, 10)
15.707963267948966
"""
return 2 * pi * radius * (angle / 360)
if __name__ == "__main__":
print(arc_length(90, 10))
================================================
FILE: maths/area.py
================================================
"""
Find the area of various geometric shapes
Wikipedia reference: https://en.wikipedia.org/wiki/Area
"""
from math import pi, sqrt, tan
def surface_area_cube(side_length: float) -> float:
"""
Calculate the Surface Area of a Cube.
>>> surface_area_cube(1)
6
>>> surface_area_cube(1.6)
15.360000000000003
>>> surface_area_cube(0)
0
>>> surface_area_cube(3)
54
>>> surface_area_cube(-1)
Traceback (most recent call last):
...
ValueError: surface_area_cube() only accepts non-negative values
"""
if side_length < 0:
raise ValueError("surface_area_cube() only accepts non-negative values")
return 6 * side_length**2
def surface_area_cuboid(length: float, breadth: float, height: float) -> float:
"""
Calculate the Surface Area of a Cuboid.
>>> surface_area_cuboid(1, 2, 3)
22
>>> surface_area_cuboid(0, 0, 0)
0
>>> surface_area_cuboid(1.6, 2.6, 3.6)
38.56
>>> surface_area_cuboid(-1, 2, 3)
Traceback (most recent call last):
...
ValueError: surface_area_cuboid() only accepts non-negative values
>>> surface_area_cuboid(1, -2, 3)
Traceback (most recent call last):
...
ValueError: surface_area_cuboid() only accepts non-negative values
>>> surface_area_cuboid(1, 2, -3)
Traceback (most recent call last):
...
ValueError: surface_area_cuboid() only accepts non-negative values
"""
if length < 0 or breadth < 0 or height < 0:
raise ValueError("surface_area_cuboid() only accepts non-negative values")
return 2 * ((length * breadth) + (breadth * height) + (length * height))
def surface_area_sphere(radius: float) -> float:
"""
Calculate the Surface Area of a Sphere.
Wikipedia reference: https://en.wikipedia.org/wiki/Sphere
Formula: 4 * pi * r^2
>>> surface_area_sphere(5)
314.1592653589793
>>> surface_area_sphere(1)
12.566370614359172
>>> surface_area_sphere(1.6)
32.169908772759484
>>> surface_area_sphere(0)
0.0
>>> surface_area_sphere(-1)
Traceback (most recent call last):
...
ValueError: surface_area_sphere() only accepts non-negative values
"""
if radius < 0:
raise ValueError("surface_area_sphere() only accepts non-negative values")
return 4 * pi * radius**2
def surface_area_hemisphere(radius: float) -> float:
"""
Calculate the Surface Area of a Hemisphere.
Formula: 3 * pi * r^2
>>> surface_area_hemisphere(5)
235.61944901923448
>>> surface_area_hemisphere(1)
9.42477796076938
>>> surface_area_hemisphere(0)
0.0
>>> surface_area_hemisphere(1.1)
11.40398133253095
>>> surface_area_hemisphere(-1)
Traceback (most recent call last):
...
ValueError: surface_area_hemisphere() only accepts non-negative values
"""
if radius < 0:
raise ValueError("surface_area_hemisphere() only accepts non-negative values")
return 3 * pi * radius**2
def surface_area_cone(radius: float, height: float) -> float:
"""
Calculate the Surface Area of a Cone.
Wikipedia reference: https://en.wikipedia.org/wiki/Cone
Formula: pi * r * (r + (h ** 2 + r ** 2) ** 0.5)
>>> surface_area_cone(10, 24)
1130.9733552923256
>>> surface_area_cone(6, 8)
301.59289474462014
>>> surface_area_cone(1.6, 2.6)
23.387862992395807
>>> surface_area_cone(0, 0)
0.0
>>> surface_area_cone(-1, -2)
Traceback (most recent call last):
...
ValueError: surface_area_cone() only accepts non-negative values
>>> surface_area_cone(1, -2)
Traceback (most recent call last):
...
ValueError: surface_area_cone() only accepts non-negative values
>>> surface_area_cone(-1, 2)
Traceback (most recent call last):
...
ValueError: surface_area_cone() only accepts non-negative values
"""
if radius < 0 or height < 0:
raise ValueError("surface_area_cone() only accepts non-negative values")
return pi * radius * (radius + (height**2 + radius**2) ** 0.5)
def surface_area_conical_frustum(
radius_1: float, radius_2: float, height: float
) -> float:
"""
Calculate the Surface Area of a Conical Frustum.
>>> surface_area_conical_frustum(1, 2, 3)
45.511728065337266
>>> surface_area_conical_frustum(4, 5, 6)
300.7913575056268
>>> surface_area_conical_frustum(0, 0, 0)
0.0
>>> surface_area_conical_frustum(1.6, 2.6, 3.6)
78.57907060751548
>>> surface_area_conical_frustum(-1, 2, 3)
Traceback (most recent call last):
...
ValueError: surface_area_conical_frustum() only accepts non-negative values
>>> surface_area_conical_frustum(1, -2, 3)
Traceback (most recent call last):
...
ValueError: surface_area_conical_frustum() only accepts non-negative values
>>> surface_area_conical_frustum(1, 2, -3)
Traceback (most recent call last):
...
ValueError: surface_area_conical_frustum() only accepts non-negative values
"""
if radius_1 < 0 or radius_2 < 0 or height < 0:
raise ValueError(
"surface_area_conical_frustum() only accepts non-negative values"
)
slant_height = (height**2 + (radius_1 - radius_2) ** 2) ** 0.5
return pi * ((slant_height * (radius_1 + radius_2)) + radius_1**2 + radius_2**2)
def surface_area_cylinder(radius: float, height: float) -> float:
"""
Calculate the Surface Area of a Cylinder.
Wikipedia reference: https://en.wikipedia.org/wiki/Cylinder
Formula: 2 * pi * r * (h + r)
>>> surface_area_cylinder(7, 10)
747.6990515543707
>>> surface_area_cylinder(1.6, 2.6)
42.22300526424682
>>> surface_area_cylinder(0, 0)
0.0
>>> surface_area_cylinder(6, 8)
527.7875658030853
>>> surface_area_cylinder(-1, -2)
Traceback (most recent call last):
...
ValueError: surface_area_cylinder() only accepts non-negative values
>>> surface_area_cylinder(1, -2)
Traceback (most recent call last):
...
ValueError: surface_area_cylinder() only accepts non-negative values
>>> surface_area_cylinder(-1, 2)
Traceback (most recent call last):
...
ValueError: surface_area_cylinder() only accepts non-negative values
"""
if radius < 0 or height < 0:
raise ValueError("surface_area_cylinder() only accepts non-negative values")
return 2 * pi * radius * (height + radius)
def surface_area_torus(torus_radius: float, tube_radius: float) -> float:
"""Calculate the Area of a Torus.
Wikipedia reference: https://en.wikipedia.org/wiki/Torus
:return 4pi^2 * torus_radius * tube_radius
>>> surface_area_torus(1, 1)
39.47841760435743
>>> surface_area_torus(4, 3)
473.7410112522892
>>> surface_area_torus(3, 4)
Traceback (most recent call last):
...
ValueError: surface_area_torus() does not support spindle or self intersecting tori
>>> surface_area_torus(1.6, 1.6)
101.06474906715503
>>> surface_area_torus(0, 0)
0.0
>>> surface_area_torus(-1, 1)
Traceback (most recent call last):
...
ValueError: surface_area_torus() only accepts non-negative values
>>> surface_area_torus(1, -1)
Traceback (most recent call last):
...
ValueError: surface_area_torus() only accepts non-negative values
"""
if torus_radius < 0 or tube_radius < 0:
raise ValueError("surface_area_torus() only accepts non-negative values")
if torus_radius < tube_radius:
raise ValueError(
"surface_area_torus() does not support spindle or self intersecting tori"
)
return 4 * pow(pi, 2) * torus_radius * tube_radius
def area_rectangle(length: float, width: float) -> float:
"""
Calculate the area of a rectangle.
>>> area_rectangle(10, 20)
200
>>> area_rectangle(1.6, 2.6)
4.16
>>> area_rectangle(0, 0)
0
>>> area_rectangle(-1, -2)
Traceback (most recent call last):
...
ValueError: area_rectangle() only accepts non-negative values
>>> area_rectangle(1, -2)
Traceback (most recent call last):
...
ValueError: area_rectangle() only accepts non-negative values
>>> area_rectangle(-1, 2)
Traceback (most recent call last):
...
ValueError: area_rectangle() only accepts non-negative values
"""
if length < 0 or width < 0:
raise ValueError("area_rectangle() only accepts non-negative values")
return length * width
def area_square(side_length: float) -> float:
"""
Calculate the area of a square.
>>> area_square(10)
100
>>> area_square(0)
0
>>> area_square(1.6)
2.5600000000000005
>>> area_square(-1)
Traceback (most recent call last):
...
ValueError: area_square() only accepts non-negative values
"""
if side_length < 0:
raise ValueError("area_square() only accepts non-negative values")
return side_length**2
def area_triangle(base: float, height: float) -> float:
"""
Calculate the area of a triangle given the base and height.
>>> area_triangle(10, 10)
50.0
>>> area_triangle(1.6, 2.6)
2.08
>>> area_triangle(0, 0)
0.0
>>> area_triangle(-1, -2)
Traceback (most recent call last):
...
ValueError: area_triangle() only accepts non-negative values
>>> area_triangle(1, -2)
Traceback (most recent call last):
...
ValueError: area_triangle() only accepts non-negative values
>>> area_triangle(-1, 2)
Traceback (most recent call last):
...
ValueError: area_triangle() only accepts non-negative values
"""
if base < 0 or height < 0:
raise ValueError("area_triangle() only accepts non-negative values")
return (base * height) / 2
def area_triangle_three_sides(side1: float, side2: float, side3: float) -> float:
"""
Calculate area of triangle when the length of 3 sides are known.
This function uses Heron's formula: https://en.wikipedia.org/wiki/Heron%27s_formula
>>> area_triangle_three_sides(5, 12, 13)
30.0
>>> area_triangle_three_sides(10, 11, 12)
51.521233486786784
>>> area_triangle_three_sides(0, 0, 0)
0.0
>>> area_triangle_three_sides(1.6, 2.6, 3.6)
1.8703742940919619
>>> area_triangle_three_sides(-1, -2, -1)
Traceback (most recent call last):
...
ValueError: area_triangle_three_sides() only accepts non-negative values
>>> area_triangle_three_sides(1, -2, 1)
Traceback (most recent call last):
...
ValueError: area_triangle_three_sides() only accepts non-negative values
>>> area_triangle_three_sides(2, 4, 7)
Traceback (most recent call last):
...
ValueError: Given three sides do not form a triangle
>>> area_triangle_three_sides(2, 7, 4)
Traceback (most recent call last):
...
ValueError: Given three sides do not form a triangle
>>> area_triangle_three_sides(7, 2, 4)
Traceback (most recent call last):
...
ValueError: Given three sides do not form a triangle
"""
if side1 < 0 or side2 < 0 or side3 < 0:
raise ValueError("area_triangle_three_sides() only accepts non-negative values")
elif side1 + side2 < side3 or side1 + side3 < side2 or side2 + side3 < side1:
raise ValueError("Given three sides do not form a triangle")
semi_perimeter = (side1 + side2 + side3) / 2
area = sqrt(
semi_perimeter
* (semi_perimeter - side1)
* (semi_perimeter - side2)
* (semi_perimeter - side3)
)
return area
def area_parallelogram(base: float, height: float) -> float:
"""
Calculate the area of a parallelogram.
>>> area_parallelogram(10, 20)
200
>>> area_parallelogram(1.6, 2.6)
4.16
>>> area_parallelogram(0, 0)
0
>>> area_parallelogram(-1, -2)
Traceback (most recent call last):
...
ValueError: area_parallelogram() only accepts non-negative values
>>> area_parallelogram(1, -2)
Traceback (most recent call last):
...
ValueError: area_parallelogram() only accepts non-negative values
>>> area_parallelogram(-1, 2)
Traceback (most recent call last):
...
ValueError: area_parallelogram() only accepts non-negative values
"""
if base < 0 or height < 0:
raise ValueError("area_parallelogram() only accepts non-negative values")
return base * height
def area_trapezium(base1: float, base2: float, height: float) -> float:
"""
Calculate the area of a trapezium.
>>> area_trapezium(10, 20, 30)
450.0
>>> area_trapezium(1.6, 2.6, 3.6)
7.5600000000000005
>>> area_trapezium(0, 0, 0)
0.0
>>> area_trapezium(-1, -2, -3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(-1, 2, 3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(1, -2, 3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(1, 2, -3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(-1, -2, 3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(1, -2, -3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
>>> area_trapezium(-1, 2, -3)
Traceback (most recent call last):
...
ValueError: area_trapezium() only accepts non-negative values
"""
if base1 < 0 or base2 < 0 or height < 0:
raise ValueError("area_trapezium() only accepts non-negative values")
return 1 / 2 * (base1 + base2) * height
def area_circle(radius: float) -> float:
"""
Calculate the area of a circle.
>>> area_circle(20)
1256.6370614359173
>>> area_circle(1.6)
8.042477193189871
>>> area_circle(0)
0.0
>>> area_circle(-1)
Traceback (most recent call last):
...
ValueError: area_circle() only accepts non-negative values
"""
if radius < 0:
raise ValueError("area_circle() only accepts non-negative values")
return pi * radius**2
def area_ellipse(radius_x: float, radius_y: float) -> float:
"""
Calculate the area of a ellipse.
>>> area_ellipse(10, 10)
314.1592653589793
>>> area_ellipse(10, 20)
628.3185307179587
>>> area_ellipse(0, 0)
0.0
>>> area_ellipse(1.6, 2.6)
13.06902543893354
>>> area_ellipse(-10, 20)
Traceback (most recent call last):
...
ValueError: area_ellipse() only accepts non-negative values
>>> area_ellipse(10, -20)
Traceback (most recent call last):
...
ValueError: area_ellipse() only accepts non-negative values
>>> area_ellipse(-10, -20)
Traceback (most recent call last):
...
ValueError: area_ellipse() only accepts non-negative values
"""
if radius_x < 0 or radius_y < 0:
raise ValueError("area_ellipse() only accepts non-negative values")
return pi * radius_x * radius_y
def area_rhombus(diagonal_1: float, diagonal_2: float) -> float:
"""
Calculate the area of a rhombus.
>>> area_rhombus(10, 20)
100.0
>>> area_rhombus(1.6, 2.6)
2.08
>>> area_rhombus(0, 0)
0.0
>>> area_rhombus(-1, -2)
Traceback (most recent call last):
...
ValueError: area_rhombus() only accepts non-negative values
>>> area_rhombus(1, -2)
Traceback (most recent call last):
...
ValueError: area_rhombus() only accepts non-negative values
>>> area_rhombus(-1, 2)
Traceback (most recent call last):
...
ValueError: area_rhombus() only accepts non-negative values
"""
if diagonal_1 < 0 or diagonal_2 < 0:
raise ValueError("area_rhombus() only accepts non-negative values")
return 1 / 2 * diagonal_1 * diagonal_2
def area_reg_polygon(sides: int, length: float) -> float:
"""
Calculate the area of a regular polygon.
Wikipedia reference: https://en.wikipedia.org/wiki/Polygon#Regular_polygons
Formula: (n*s^2*cot(pi/n))/4
>>> area_reg_polygon(3, 10)
43.301270189221945
>>> area_reg_polygon(4, 10)
100.00000000000001
>>> area_reg_polygon(0, 0)
Traceback (most recent call last):
...
ValueError: area_reg_polygon() only accepts integers greater than or equal to \
three as number of sides
>>> area_reg_polygon(-1, -2)
Traceback (most recent call last):
...
ValueError: area_reg_polygon() only accepts integers greater than or equal to \
three as number of sides
>>> area_reg_polygon(5, -2)
Traceback (most recent call last):
...
ValueError: area_reg_polygon() only accepts non-negative values as \
length of a side
>>> area_reg_polygon(-1, 2)
Traceback (most recent call last):
...
ValueError: area_reg_polygon() only accepts integers greater than or equal to \
three as number of sides
"""
if not isinstance(sides, int) or sides < 3:
raise ValueError(
"area_reg_polygon() only accepts integers greater than or \
equal to three as number of sides"
)
elif length < 0:
raise ValueError(
"area_reg_polygon() only accepts non-negative values as \
length of a side"
)
return (sides * length**2) / (4 * tan(pi / sides))
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True) # verbose so we can see methods missing tests
print("[DEMO] Areas of various geometric shapes: \n")
print(f"Rectangle: {area_rectangle(10, 20) = }")
print(f"Square: {area_square(10) = }")
print(f"Triangle: {area_triangle(10, 10) = }")
print(f"Triangle: {area_triangle_three_sides(5, 12, 13) = }")
print(f"Parallelogram: {area_parallelogram(10, 20) = }")
print(f"Rhombus: {area_rhombus(10, 20) = }")
print(f"Trapezium: {area_trapezium(10, 20, 30) = }")
print(f"Circle: {area_circle(20) = }")
print(f"Ellipse: {area_ellipse(10, 20) = }")
print("\nSurface Areas of various geometric shapes: \n")
print(f"Cube: {surface_area_cube(20) = }")
print(f"Cuboid: {surface_area_cuboid(10, 20, 30) = }")
print(f"Sphere: {surface_area_sphere(20) = }")
print(f"Hemisphere: {surface_area_hemisphere(20) = }")
print(f"Cone: {surface_area_cone(10, 20) = }")
print(f"Conical Frustum: {surface_area_conical_frustum(10, 20, 30) = }")
print(f"Cylinder: {surface_area_cylinder(10, 20) = }")
print(f"Torus: {surface_area_torus(20, 10) = }")
print(f"Equilateral Triangle: {area_reg_polygon(3, 10) = }")
print(f"Square: {area_reg_polygon(4, 10) = }")
print(f"Reqular Pentagon: {area_reg_polygon(5, 10) = }")
================================================
FILE: maths/area_under_curve.py
================================================
"""
Approximates the area under the curve using the trapezoidal rule
"""
from __future__ import annotations
from collections.abc import Callable
def trapezoidal_area(
fnc: Callable[[float], float],
x_start: float,
x_end: float,
steps: int = 100,
) -> float:
"""
Treats curve as a collection of linear lines and sums the area of the
trapezium shape they form
:param fnc: a function which defines a curve
:param x_start: left end point to indicate the start of line segment
:param x_end: right end point to indicate end of line segment
:param steps: an accuracy gauge; more steps increases the accuracy
:return: a float representing the length of the curve
>>> def f(x):
... return 5
>>> f"{trapezoidal_area(f, 12.0, 14.0, 1000):.3f}"
'10.000'
>>> def f(x):
... return 9*x**2
>>> f"{trapezoidal_area(f, -4.0, 0, 10000):.4f}"
'192.0000'
>>> f"{trapezoidal_area(f, -4.0, 4.0, 10000):.4f}"
'384.0000'
"""
x1 = x_start
fx1 = fnc(x_start)
area = 0.0
for _ in range(steps):
# Approximates small segments of curve as linear and solve
# for trapezoidal area
x2 = (x_end - x_start) / steps + x1
fx2 = fnc(x2)
area += abs(fx2 + fx1) * (x2 - x1) / 2
# Increment step
x1 = x2
fx1 = fx2
return area
if __name__ == "__main__":
def f(x):
return x**3 + x**2
print("f(x) = x^3 + x^2")
print("The area between the curve, x = -5, x = 5 and the x axis is:")
i = 10
while i <= 100000:
print(f"with {i} steps: {trapezoidal_area(f, -5, 5, i)}")
i *= 10
================================================
FILE: maths/average_absolute_deviation.py
================================================
def average_absolute_deviation(nums: list[int]) -> float:
"""
Return the average absolute deviation of a list of numbers.
Wiki: https://en.wikipedia.org/wiki/Average_absolute_deviation
>>> average_absolute_deviation([0])
0.0
>>> average_absolute_deviation([4, 1, 3, 2])
1.0
>>> average_absolute_deviation([2, 70, 6, 50, 20, 8, 4, 0])
20.0
>>> average_absolute_deviation([-20, 0, 30, 15])
16.25
>>> average_absolute_deviation([])
Traceback (most recent call last):
...
ValueError: List is empty
"""
if not nums: # Makes sure that the list is not empty
raise ValueError("List is empty")
average = sum(nums) / len(nums) # Calculate the average
return sum(abs(x - average) for x in nums) / len(nums)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/average_mean.py
================================================
from __future__ import annotations
def mean(nums: list) -> float:
"""
Find mean of a list of numbers.
Wiki: https://en.wikipedia.org/wiki/Mean
>>> mean([3, 6, 9, 12, 15, 18, 21])
12.0
>>> mean([5, 10, 15, 20, 25, 30, 35])
20.0
>>> mean([1, 2, 3, 4, 5, 6, 7, 8])
4.5
>>> mean([])
Traceback (most recent call last):
...
ValueError: List is empty
"""
if not nums:
raise ValueError("List is empty")
return sum(nums) / len(nums)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/average_median.py
================================================
from __future__ import annotations
def median(nums: list) -> int | float:
"""
Find median of a list of numbers.
Wiki: https://en.wikipedia.org/wiki/Median
>>> median([0])
0
>>> median([4, 1, 3, 2])
2.5
>>> median([2, 70, 6, 50, 20, 8, 4])
8
Args:
nums: List of nums
Returns:
Median.
"""
# The sorted function returns list[SupportsRichComparisonT@sorted]
# which does not support `+`
sorted_list: list[int] = sorted(nums)
length = len(sorted_list)
mid_index = length >> 1
return (
(sorted_list[mid_index] + sorted_list[mid_index - 1]) / 2
if length % 2 == 0
else sorted_list[mid_index]
)
def main():
import doctest
doctest.testmod()
if __name__ == "__main__":
main()
================================================
FILE: maths/average_mode.py
================================================
from typing import Any
def mode(input_list: list) -> list[Any]:
"""This function returns the mode(Mode as in the measures of
central tendency) of the input data.
The input list may contain any Datastructure or any Datatype.
>>> mode([2, 3, 4, 5, 3, 4, 2, 5, 2, 2, 4, 2, 2, 2])
[2]
>>> mode([3, 4, 5, 3, 4, 2, 5, 2, 2, 4, 4, 2, 2, 2])
[2]
>>> mode([3, 4, 5, 3, 4, 2, 5, 2, 2, 4, 4, 4, 2, 2, 4, 2])
[2, 4]
>>> mode(["x", "y", "y", "z"])
['y']
>>> mode(["x", "x" , "y", "y", "z"])
['x', 'y']
"""
if not input_list:
return []
result = [input_list.count(value) for value in input_list]
y = max(result) # Gets the maximum count in the input list.
# Gets values of modes
return sorted({input_list[i] for i, value in enumerate(result) if value == y})
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/bailey_borwein_plouffe.py
================================================
def bailey_borwein_plouffe(digit_position: int, precision: int = 1000) -> str:
"""
Implement a popular pi-digit-extraction algorithm known as the
Bailey-Borwein-Plouffe (BBP) formula to calculate the nth hex digit of pi.
Wikipedia page:
https://en.wikipedia.org/wiki/Bailey%E2%80%93Borwein%E2%80%93Plouffe_formula
@param digit_position: a positive integer representing the position of the digit to
extract.
The digit immediately after the decimal point is located at position 1.
@param precision: number of terms in the second summation to calculate.
A higher number reduces the chance of an error but increases the runtime.
@return: a hexadecimal digit representing the digit at the nth position
in pi's decimal expansion.
>>> "".join(bailey_borwein_plouffe(i) for i in range(1, 11))
'243f6a8885'
>>> bailey_borwein_plouffe(5, 10000)
'6'
>>> bailey_borwein_plouffe(-10)
Traceback (most recent call last):
...
ValueError: Digit position must be a positive integer
>>> bailey_borwein_plouffe(0)
Traceback (most recent call last):
...
ValueError: Digit position must be a positive integer
>>> bailey_borwein_plouffe(1.7)
Traceback (most recent call last):
...
ValueError: Digit position must be a positive integer
>>> bailey_borwein_plouffe(2, -10)
Traceback (most recent call last):
...
ValueError: Precision must be a nonnegative integer
>>> bailey_borwein_plouffe(2, 1.6)
Traceback (most recent call last):
...
ValueError: Precision must be a nonnegative integer
"""
if (not isinstance(digit_position, int)) or (digit_position <= 0):
raise ValueError("Digit position must be a positive integer")
elif (not isinstance(precision, int)) or (precision < 0):
raise ValueError("Precision must be a nonnegative integer")
# compute an approximation of (16 ** (n - 1)) * pi whose fractional part is mostly
# accurate
sum_result = (
4 * _subsum(digit_position, 1, precision)
- 2 * _subsum(digit_position, 4, precision)
- _subsum(digit_position, 5, precision)
- _subsum(digit_position, 6, precision)
)
# return the first hex digit of the fractional part of the result
return hex(int((sum_result % 1) * 16))[2:]
def _subsum(
digit_pos_to_extract: int, denominator_addend: int, precision: int
) -> float:
# only care about first digit of fractional part; don't need decimal
"""
Private helper function to implement the summation
functionality.
@param digit_pos_to_extract: digit position to extract
@param denominator_addend: added to denominator of fractions in the formula
@param precision: same as precision in main function
@return: floating-point number whose integer part is not important
"""
total = 0.0
for sum_index in range(digit_pos_to_extract + precision):
denominator = 8 * sum_index + denominator_addend
if sum_index < digit_pos_to_extract:
# if the exponential term is an integer and we mod it by the denominator
# before dividing, only the integer part of the sum will change;
# the fractional part will not
exponential_term = pow(
16, digit_pos_to_extract - 1 - sum_index, denominator
)
else:
exponential_term = pow(16, digit_pos_to_extract - 1 - sum_index)
total += exponential_term / denominator
return total
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/base_neg2_conversion.py
================================================
def decimal_to_negative_base_2(num: int) -> int:
"""
This function returns the number negative base 2
of the decimal number of the input data.
Args:
int: The decimal number to convert.
Returns:
int: The negative base 2 number.
Examples:
>>> decimal_to_negative_base_2(0)
0
>>> decimal_to_negative_base_2(-19)
111101
>>> decimal_to_negative_base_2(4)
100
>>> decimal_to_negative_base_2(7)
11011
"""
if num == 0:
return 0
ans = ""
while num != 0:
num, rem = divmod(num, -2)
if rem < 0:
rem += 2
num += 1
ans = str(rem) + ans
return int(ans)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/basic_maths.py
================================================
"""Implementation of Basic Math in Python."""
import math
def prime_factors(n: int) -> list:
"""Find Prime Factors.
>>> prime_factors(100)
[2, 2, 5, 5]
>>> prime_factors(0)
Traceback (most recent call last):
...
ValueError: Only positive integers have prime factors
>>> prime_factors(-10)
Traceback (most recent call last):
...
ValueError: Only positive integers have prime factors
"""
if n <= 0:
raise ValueError("Only positive integers have prime factors")
pf = []
while n % 2 == 0:
pf.append(2)
n = int(n / 2)
for i in range(3, int(math.sqrt(n)) + 1, 2):
while n % i == 0:
pf.append(i)
n = int(n / i)
if n > 2:
pf.append(n)
return pf
def number_of_divisors(n: int) -> int:
"""Calculate Number of Divisors of an Integer.
>>> number_of_divisors(100)
9
>>> number_of_divisors(0)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
>>> number_of_divisors(-10)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
"""
if n <= 0:
raise ValueError("Only positive numbers are accepted")
div = 1
temp = 1
while n % 2 == 0:
temp += 1
n = int(n / 2)
div *= temp
for i in range(3, int(math.sqrt(n)) + 1, 2):
temp = 1
while n % i == 0:
temp += 1
n = int(n / i)
div *= temp
if n > 1:
div *= 2
return div
def sum_of_divisors(n: int) -> int:
"""Calculate Sum of Divisors.
>>> sum_of_divisors(100)
217
>>> sum_of_divisors(0)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
>>> sum_of_divisors(-10)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
"""
if n <= 0:
raise ValueError("Only positive numbers are accepted")
s = 1
temp = 1
while n % 2 == 0:
temp += 1
n = int(n / 2)
if temp > 1:
s *= (2**temp - 1) / (2 - 1)
for i in range(3, int(math.sqrt(n)) + 1, 2):
temp = 1
while n % i == 0:
temp += 1
n = int(n / i)
if temp > 1:
s *= (i**temp - 1) / (i - 1)
return int(s)
def euler_phi(n: int) -> int:
"""Calculate Euler's Phi Function.
>>> euler_phi(100)
40
>>> euler_phi(0)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
>>> euler_phi(-10)
Traceback (most recent call last):
...
ValueError: Only positive numbers are accepted
"""
if n <= 0:
raise ValueError("Only positive numbers are accepted")
s = n
for x in set(prime_factors(n)):
s *= (x - 1) / x
return int(s)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/binary_exponentiation.py
================================================
"""
Binary Exponentiation
This is a method to find a^b in O(log b) time complexity and is one of the most commonly
used methods of exponentiation. The method is also useful for modular exponentiation,
when the solution to (a^b) % c is required.
To calculate a^b:
- If b is even, then a^b = (a * a)^(b / 2)
- If b is odd, then a^b = a * a^(b - 1)
Repeat until b = 1 or b = 0
For modular exponentiation, we use the fact that (a * b) % c = ((a % c) * (b % c)) % c
"""
def binary_exp_recursive(base: float, exponent: int) -> float:
"""
Computes a^b recursively, where a is the base and b is the exponent
>>> binary_exp_recursive(3, 5)
243
>>> binary_exp_recursive(11, 13)
34522712143931
>>> binary_exp_recursive(-1, 3)
-1
>>> binary_exp_recursive(0, 5)
0
>>> binary_exp_recursive(3, 1)
3
>>> binary_exp_recursive(3, 0)
1
>>> binary_exp_recursive(1.5, 4)
5.0625
>>> binary_exp_recursive(3, -1)
Traceback (most recent call last):
...
ValueError: Exponent must be a non-negative integer
"""
if exponent < 0:
raise ValueError("Exponent must be a non-negative integer")
if exponent == 0:
return 1
if exponent % 2 == 1:
return binary_exp_recursive(base, exponent - 1) * base
b = binary_exp_recursive(base, exponent // 2)
return b * b
def binary_exp_iterative(base: float, exponent: int) -> float:
"""
Computes a^b iteratively, where a is the base and b is the exponent
>>> binary_exp_iterative(3, 5)
243
>>> binary_exp_iterative(11, 13)
34522712143931
>>> binary_exp_iterative(-1, 3)
-1
>>> binary_exp_iterative(0, 5)
0
>>> binary_exp_iterative(3, 1)
3
>>> binary_exp_iterative(3, 0)
1
>>> binary_exp_iterative(1.5, 4)
5.0625
>>> binary_exp_iterative(3, -1)
Traceback (most recent call last):
...
ValueError: Exponent must be a non-negative integer
"""
if exponent < 0:
raise ValueError("Exponent must be a non-negative integer")
res: int | float = 1
while exponent > 0:
if exponent & 1:
res *= base
base *= base
exponent >>= 1
return res
def binary_exp_mod_recursive(base: float, exponent: int, modulus: int) -> float:
"""
Computes a^b % c recursively, where a is the base, b is the exponent, and c is the
modulus
>>> binary_exp_mod_recursive(3, 4, 5)
1
>>> binary_exp_mod_recursive(11, 13, 7)
4
>>> binary_exp_mod_recursive(1.5, 4, 3)
2.0625
>>> binary_exp_mod_recursive(7, -1, 10)
Traceback (most recent call last):
...
ValueError: Exponent must be a non-negative integer
>>> binary_exp_mod_recursive(7, 13, 0)
Traceback (most recent call last):
...
ValueError: Modulus must be a positive integer
"""
if exponent < 0:
raise ValueError("Exponent must be a non-negative integer")
if modulus <= 0:
raise ValueError("Modulus must be a positive integer")
if exponent == 0:
return 1
if exponent % 2 == 1:
return (binary_exp_mod_recursive(base, exponent - 1, modulus) * base) % modulus
r = binary_exp_mod_recursive(base, exponent // 2, modulus)
return (r * r) % modulus
def binary_exp_mod_iterative(base: float, exponent: int, modulus: int) -> float:
"""
Computes a^b % c iteratively, where a is the base, b is the exponent, and c is the
modulus
>>> binary_exp_mod_iterative(3, 4, 5)
1
>>> binary_exp_mod_iterative(11, 13, 7)
4
>>> binary_exp_mod_iterative(1.5, 4, 3)
2.0625
>>> binary_exp_mod_iterative(7, -1, 10)
Traceback (most recent call last):
...
ValueError: Exponent must be a non-negative integer
>>> binary_exp_mod_iterative(7, 13, 0)
Traceback (most recent call last):
...
ValueError: Modulus must be a positive integer
"""
if exponent < 0:
raise ValueError("Exponent must be a non-negative integer")
if modulus <= 0:
raise ValueError("Modulus must be a positive integer")
res: int | float = 1
while exponent > 0:
if exponent & 1:
res = ((res % modulus) * (base % modulus)) % modulus
base *= base
exponent >>= 1
return res
if __name__ == "__main__":
from timeit import timeit
a = 1269380576
b = 374
c = 34
runs = 100_000
print(
timeit(
f"binary_exp_recursive({a}, {b})",
setup="from __main__ import binary_exp_recursive",
number=runs,
)
)
print(
timeit(
f"binary_exp_iterative({a}, {b})",
setup="from __main__ import binary_exp_iterative",
number=runs,
)
)
print(
timeit(
f"binary_exp_mod_recursive({a}, {b}, {c})",
setup="from __main__ import binary_exp_mod_recursive",
number=runs,
)
)
print(
timeit(
f"binary_exp_mod_iterative({a}, {b}, {c})",
setup="from __main__ import binary_exp_mod_iterative",
number=runs,
)
)
================================================
FILE: maths/binary_multiplication.py
================================================
"""
Binary Multiplication
This is a method to find a*b in a time complexity of O(log b)
This is one of the most commonly used methods of finding result of multiplication.
Also useful in cases where solution to (a*b)%c is required,
where a,b,c can be numbers over the computers calculation limits.
Done using iteration, can also be done using recursion
Let's say you need to calculate a * b
RULE 1 : a * b = (a+a) * (b/2) ---- example : 4 * 4 = (4+4) * (4/2) = 8 * 2
RULE 2 : IF b is odd, then ---- a * b = a + (a * (b - 1)), where (b - 1) is even.
Once b is even, repeat the process to get a * b
Repeat the process until b = 1 or b = 0, because a*1 = a and a*0 = 0
As far as the modulo is concerned,
the fact : (a+b) % c = ((a%c) + (b%c)) % c
Now apply RULE 1 or 2, whichever is required.
@author chinmoy159
"""
def binary_multiply(a: int, b: int) -> int:
"""
Multiply 'a' and 'b' using bitwise multiplication.
Parameters:
a (int): The first number.
b (int): The second number.
Returns:
int: a * b
Examples:
>>> binary_multiply(2, 3)
6
>>> binary_multiply(5, 0)
0
>>> binary_multiply(3, 4)
12
>>> binary_multiply(10, 5)
50
>>> binary_multiply(0, 5)
0
>>> binary_multiply(2, 1)
2
>>> binary_multiply(1, 10)
10
"""
res = 0
while b > 0:
if b & 1:
res += a
a += a
b >>= 1
return res
def binary_mod_multiply(a: int, b: int, modulus: int) -> int:
"""
Calculate (a * b) % c using binary multiplication and modular arithmetic.
Parameters:
a (int): The first number.
b (int): The second number.
modulus (int): The modulus.
Returns:
int: (a * b) % modulus.
Examples:
>>> binary_mod_multiply(2, 3, 5)
1
>>> binary_mod_multiply(5, 0, 7)
0
>>> binary_mod_multiply(3, 4, 6)
0
>>> binary_mod_multiply(10, 5, 13)
11
>>> binary_mod_multiply(2, 1, 5)
2
>>> binary_mod_multiply(1, 10, 3)
1
"""
res = 0
while b > 0:
if b & 1:
res = ((res % modulus) + (a % modulus)) % modulus
a += a
b >>= 1
return res
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/binomial_coefficient.py
================================================
def binomial_coefficient(n: int, r: int) -> int:
"""
Find binomial coefficient using Pascal's triangle.
Calculate C(n, r) using Pascal's triangle.
:param n: The total number of items.
:param r: The number of items to choose.
:return: The binomial coefficient C(n, r).
>>> binomial_coefficient(10, 5)
252
>>> binomial_coefficient(10, 0)
1
>>> binomial_coefficient(0, 10)
1
>>> binomial_coefficient(10, 10)
1
>>> binomial_coefficient(5, 2)
10
>>> binomial_coefficient(5, 6)
0
>>> binomial_coefficient(3, 5)
0
>>> binomial_coefficient(-2, 3)
Traceback (most recent call last):
...
ValueError: n and r must be non-negative integers
>>> binomial_coefficient(5, -1)
Traceback (most recent call last):
...
ValueError: n and r must be non-negative integers
>>> binomial_coefficient(10.1, 5)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
>>> binomial_coefficient(10, 5.1)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
"""
if n < 0 or r < 0:
raise ValueError("n and r must be non-negative integers")
if 0 in (n, r):
return 1
c = [0 for i in range(r + 1)]
# nc0 = 1
c[0] = 1
for i in range(1, n + 1):
# to compute current row from previous row.
j = min(i, r)
while j > 0:
c[j] += c[j - 1]
j -= 1
return c[r]
if __name__ == "__main__":
from doctest import testmod
testmod()
print(binomial_coefficient(n=10, r=5))
================================================
FILE: maths/binomial_distribution.py
================================================
"""For more information about the Binomial Distribution -
https://en.wikipedia.org/wiki/Binomial_distribution"""
from math import factorial
def binomial_distribution(successes: int, trials: int, prob: float) -> float:
"""
Return probability of k successes out of n tries, with p probability for one
success
The function uses the factorial function in order to calculate the binomial
coefficient
>>> binomial_distribution(3, 5, 0.7)
0.30870000000000003
>>> binomial_distribution (2, 4, 0.5)
0.375
"""
if successes > trials:
raise ValueError("""successes must be lower or equal to trials""")
if trials < 0 or successes < 0:
raise ValueError("the function is defined for non-negative integers")
if not isinstance(successes, int) or not isinstance(trials, int):
raise ValueError("the function is defined for non-negative integers")
if not 0 < prob < 1:
raise ValueError("prob has to be in range of 1 - 0")
probability = (prob**successes) * ((1 - prob) ** (trials - successes))
# Calculate the binomial coefficient: n! / k!(n-k)!
coefficient = float(factorial(trials))
coefficient /= factorial(successes) * factorial(trials - successes)
return probability * coefficient
if __name__ == "__main__":
from doctest import testmod
testmod()
print("Probability of 2 successes out of 4 trails")
print("with probability of 0.75 is:", end=" ")
print(binomial_distribution(2, 4, 0.75))
================================================
FILE: maths/ceil.py
================================================
"""
https://en.wikipedia.org/wiki/Floor_and_ceiling_functions
"""
def ceil(x: float) -> int:
"""
Return the ceiling of x as an Integral.
:param x: the number
:return: the smallest integer >= x.
>>> import math
>>> all(ceil(n) == math.ceil(n) for n
... in (1, -1, 0, -0, 1.1, -1.1, 1.0, -1.0, 1_000_000_000))
True
"""
return int(x) if x - int(x) <= 0 else int(x) + 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/chebyshev_distance.py
================================================
def chebyshev_distance(point_a: list[float], point_b: list[float]) -> float:
"""
This function calculates the Chebyshev distance (also known as the
Chessboard distance) between two n-dimensional points represented as lists.
https://en.wikipedia.org/wiki/Chebyshev_distance
>>> chebyshev_distance([1.0, 1.0], [2.0, 2.0])
1.0
>>> chebyshev_distance([1.0, 1.0, 9.0], [2.0, 2.0, -5.2])
14.2
>>> chebyshev_distance([1.0], [2.0, 2.0])
Traceback (most recent call last):
...
ValueError: Both points must have the same dimension.
"""
if len(point_a) != len(point_b):
raise ValueError("Both points must have the same dimension.")
return max(abs(a - b) for a, b in zip(point_a, point_b))
================================================
FILE: maths/check_polygon.py
================================================
from __future__ import annotations
def check_polygon(nums: list[float]) -> bool:
"""
Takes list of possible side lengths and determines whether a
two-dimensional polygon with such side lengths can exist.
Returns a boolean value for the < comparison
of the largest side length with sum of the rest.
Wiki: https://en.wikipedia.org/wiki/Triangle_inequality
>>> check_polygon([6, 10, 5])
True
>>> check_polygon([3, 7, 13, 2])
False
>>> check_polygon([1, 4.3, 5.2, 12.2])
False
>>> nums = [3, 7, 13, 2]
>>> _ = check_polygon(nums) # Run function, do not show answer in output
>>> nums # Check numbers are not reordered
[3, 7, 13, 2]
>>> check_polygon([])
Traceback (most recent call last):
...
ValueError: Monogons and Digons are not polygons in the Euclidean space
>>> check_polygon([-2, 5, 6])
Traceback (most recent call last):
...
ValueError: All values must be greater than 0
"""
if len(nums) < 2:
raise ValueError("Monogons and Digons are not polygons in the Euclidean space")
if any(i <= 0 for i in nums):
raise ValueError("All values must be greater than 0")
copy_nums = nums.copy()
copy_nums.sort()
return copy_nums[-1] < sum(copy_nums[:-1])
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/chinese_remainder_theorem.py
================================================
"""
Chinese Remainder Theorem:
GCD ( Greatest Common Divisor ) or HCF ( Highest Common Factor )
If GCD(a,b) = 1, then for any remainder ra modulo a and any remainder rb modulo b
there exists integer n, such that n = ra (mod a) and n = ra(mod b). If n1 and n2 are
two such integers, then n1=n2(mod ab)
Algorithm :
1. Use extended euclid algorithm to find x,y such that a*x + b*y = 1
2. Take n = ra*by + rb*ax
"""
from __future__ import annotations
# Extended Euclid
def extended_euclid(a: int, b: int) -> tuple[int, int]:
"""
>>> extended_euclid(10, 6)
(-1, 2)
>>> extended_euclid(7, 5)
(-2, 3)
"""
if b == 0:
return (1, 0)
(x, y) = extended_euclid(b, a % b)
k = a // b
return (y, x - k * y)
# Uses ExtendedEuclid to find inverses
def chinese_remainder_theorem(n1: int, r1: int, n2: int, r2: int) -> int:
"""
>>> chinese_remainder_theorem(5,1,7,3)
31
Explanation : 31 is the smallest number such that
(i) When we divide it by 5, we get remainder 1
(ii) When we divide it by 7, we get remainder 3
>>> chinese_remainder_theorem(6,1,4,3)
14
"""
(x, y) = extended_euclid(n1, n2)
m = n1 * n2
n = r2 * x * n1 + r1 * y * n2
return (n % m + m) % m
# ----------SAME SOLUTION USING InvertModulo instead ExtendedEuclid----------------
# This function find the inverses of a i.e., a^(-1)
def invert_modulo(a: int, n: int) -> int:
"""
>>> invert_modulo(2, 5)
3
>>> invert_modulo(8,7)
1
"""
(b, _x) = extended_euclid(a, n)
if b < 0:
b = (b % n + n) % n
return b
# Same a above using InvertingModulo
def chinese_remainder_theorem2(n1: int, r1: int, n2: int, r2: int) -> int:
"""
>>> chinese_remainder_theorem2(5,1,7,3)
31
>>> chinese_remainder_theorem2(6,1,4,3)
14
"""
x, y = invert_modulo(n1, n2), invert_modulo(n2, n1)
m = n1 * n2
n = r2 * x * n1 + r1 * y * n2
return (n % m + m) % m
if __name__ == "__main__":
from doctest import testmod
testmod(name="chinese_remainder_theorem", verbose=True)
testmod(name="chinese_remainder_theorem2", verbose=True)
testmod(name="invert_modulo", verbose=True)
testmod(name="extended_euclid", verbose=True)
================================================
FILE: maths/chudnovsky_algorithm.py
================================================
from decimal import Decimal, getcontext
from math import ceil, factorial
def pi(precision: int) -> str:
"""
The Chudnovsky algorithm is a fast method for calculating the digits of PI,
based on Ramanujan's PI formulae.
https://en.wikipedia.org/wiki/Chudnovsky_algorithm
PI = constant_term / ((multinomial_term * linear_term) / exponential_term)
where constant_term = 426880 * sqrt(10005)
The linear_term and the exponential_term can be defined iteratively as follows:
L_k+1 = L_k + 545140134 where L_0 = 13591409
X_k+1 = X_k * -262537412640768000 where X_0 = 1
The multinomial_term is defined as follows:
6k! / ((3k)! * (k!) ^ 3)
where k is the k_th iteration.
This algorithm correctly calculates around 14 digits of PI per iteration
>>> pi(10)
'3.14159265'
>>> pi(100)
'3.14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706'
>>> pi('hello')
Traceback (most recent call last):
...
TypeError: Undefined for non-integers
>>> pi(-1)
Traceback (most recent call last):
...
ValueError: Undefined for non-natural numbers
"""
if not isinstance(precision, int):
raise TypeError("Undefined for non-integers")
elif precision < 1:
raise ValueError("Undefined for non-natural numbers")
getcontext().prec = precision
num_iterations = ceil(precision / 14)
constant_term = 426880 * Decimal(10005).sqrt()
exponential_term = 1
linear_term = 13591409
partial_sum = Decimal(linear_term)
for k in range(1, num_iterations):
multinomial_term = factorial(6 * k) // (factorial(3 * k) * factorial(k) ** 3)
linear_term += 545140134
exponential_term *= -262537412640768000
partial_sum += Decimal(multinomial_term * linear_term) / exponential_term
return str(constant_term / partial_sum)[:-1]
if __name__ == "__main__":
n = 50
print(f"The first {n} digits of pi is: {pi(n)}")
================================================
FILE: maths/collatz_sequence.py
================================================
"""
The Collatz conjecture is a famous unsolved problem in mathematics. Given a starting
positive integer, define the following sequence:
- If the current term n is even, then the next term is n/2.
- If the current term n is odd, then the next term is 3n + 1.
The conjecture claims that this sequence will always reach 1 for any starting number.
Other names for this problem include the 3n + 1 problem, the Ulam conjecture, Kakutani's
problem, the Thwaites conjecture, Hasse's algorithm, the Syracuse problem, and the
hailstone sequence.
Reference: https://en.wikipedia.org/wiki/Collatz_conjecture
"""
from __future__ import annotations
from collections.abc import Generator
def collatz_sequence(n: int) -> Generator[int]:
"""
Generate the Collatz sequence starting at n.
>>> tuple(collatz_sequence(2.1))
Traceback (most recent call last):
...
Exception: Sequence only defined for positive integers
>>> tuple(collatz_sequence(0))
Traceback (most recent call last):
...
Exception: Sequence only defined for positive integers
>>> tuple(collatz_sequence(4))
(4, 2, 1)
>>> tuple(collatz_sequence(11))
(11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1)
>>> tuple(collatz_sequence(31)) # doctest: +NORMALIZE_WHITESPACE
(31, 94, 47, 142, 71, 214, 107, 322, 161, 484, 242, 121, 364, 182, 91, 274, 137,
412, 206, 103, 310, 155, 466, 233, 700, 350, 175, 526, 263, 790, 395, 1186, 593,
1780, 890, 445, 1336, 668, 334, 167, 502, 251, 754, 377, 1132, 566, 283, 850, 425,
1276, 638, 319, 958, 479, 1438, 719, 2158, 1079, 3238, 1619, 4858, 2429, 7288, 3644,
1822, 911, 2734, 1367, 4102, 2051, 6154, 3077, 9232, 4616, 2308, 1154, 577, 1732,
866, 433, 1300, 650, 325, 976, 488, 244, 122, 61, 184, 92, 46, 23, 70, 35, 106, 53,
160, 80, 40, 20, 10, 5, 16, 8, 4, 2, 1)
>>> tuple(collatz_sequence(43)) # doctest: +NORMALIZE_WHITESPACE
(43, 130, 65, 196, 98, 49, 148, 74, 37, 112, 56, 28, 14, 7, 22, 11, 34, 17, 52, 26,
13, 40, 20, 10, 5, 16, 8, 4, 2, 1)
"""
if not isinstance(n, int) or n < 1:
raise Exception("Sequence only defined for positive integers")
yield n
while n != 1:
if n % 2 == 0:
n //= 2
else:
n = 3 * n + 1
yield n
def main():
n = int(input("Your number: "))
sequence = tuple(collatz_sequence(n))
print(sequence)
print(f"Collatz sequence from {n} took {len(sequence)} steps.")
if __name__ == "__main__":
main()
================================================
FILE: maths/combinations.py
================================================
"""
https://en.wikipedia.org/wiki/Combination
"""
def combinations(n: int, k: int) -> int:
"""
Returns the number of different combinations of k length which can
be made from n values, where n >= k.
Examples:
>>> combinations(10,5)
252
>>> combinations(6,3)
20
>>> combinations(20,5)
15504
>>> combinations(52, 5)
2598960
>>> combinations(0, 0)
1
>>> combinations(-4, -5)
...
Traceback (most recent call last):
ValueError: Please enter positive integers for n and k where n >= k
"""
# If either of the conditions are true, the function is being asked
# to calculate a factorial of a negative number, which is not possible
if n < k or k < 0:
raise ValueError("Please enter positive integers for n and k where n >= k")
res = 1
for i in range(k):
res *= n - i
res //= i + 1
return res
if __name__ == "__main__":
print(
"The number of five-card hands possible from a standard",
f"fifty-two card deck is: {combinations(52, 5)}\n",
)
print(
"If a class of 40 students must be arranged into groups of",
f"4 for group projects, there are {combinations(40, 4)} ways",
"to arrange them.\n",
)
print(
"If 10 teams are competing in a Formula One race, there",
f"are {combinations(10, 3)} ways that first, second and",
"third place can be awarded.",
)
================================================
FILE: maths/continued_fraction.py
================================================
"""
Finding the continuous fraction for a rational number using python
https://en.wikipedia.org/wiki/Continued_fraction
"""
from fractions import Fraction
from math import floor
def continued_fraction(num: Fraction) -> list[int]:
"""
:param num:
Fraction of the number whose continued fractions to be found.
Use Fraction(str(number)) for more accurate results due to
float inaccuracies.
:return:
The continued fraction of rational number.
It is the all commas in the (n + 1)-tuple notation.
>>> continued_fraction(Fraction(2))
[2]
>>> continued_fraction(Fraction("3.245"))
[3, 4, 12, 4]
>>> continued_fraction(Fraction("2.25"))
[2, 4]
>>> continued_fraction(1/Fraction("2.25"))
[0, 2, 4]
>>> continued_fraction(Fraction("415/93"))
[4, 2, 6, 7]
>>> continued_fraction(Fraction(0))
[0]
>>> continued_fraction(Fraction(0.75))
[0, 1, 3]
>>> continued_fraction(Fraction("-2.25")) # -2.25 = -3 + 0.75
[-3, 1, 3]
"""
numerator, denominator = num.as_integer_ratio()
continued_fraction_list: list[int] = []
while True:
integer_part = floor(numerator / denominator)
continued_fraction_list.append(integer_part)
numerator -= integer_part * denominator
if numerator == 0:
break
numerator, denominator = denominator, numerator
return continued_fraction_list
if __name__ == "__main__":
import doctest
doctest.testmod()
print("Continued Fraction of 0.84375 is: ", continued_fraction(Fraction("0.84375")))
================================================
FILE: maths/decimal_isolate.py
================================================
"""
Isolate the Decimal part of a Number
https://stackoverflow.com/questions/3886402/how-to-get-numbers-after-decimal-point
"""
def decimal_isolate(number: float, digit_amount: int) -> float:
"""
Isolates the decimal part of a number.
If digitAmount > 0 round to that decimal place, else print the entire decimal.
>>> decimal_isolate(1.53, 0)
0.53
>>> decimal_isolate(35.345, 1)
0.3
>>> decimal_isolate(35.345, 2)
0.34
>>> decimal_isolate(35.345, 3)
0.345
>>> decimal_isolate(-14.789, 3)
-0.789
>>> decimal_isolate(0, 2)
0
>>> decimal_isolate(-14.123, 1)
-0.1
>>> decimal_isolate(-14.123, 2)
-0.12
>>> decimal_isolate(-14.123, 3)
-0.123
"""
if digit_amount > 0:
return round(number - int(number), digit_amount)
return number - int(number)
if __name__ == "__main__":
print(decimal_isolate(1.53, 0))
print(decimal_isolate(35.345, 1))
print(decimal_isolate(35.345, 2))
print(decimal_isolate(35.345, 3))
print(decimal_isolate(-14.789, 3))
print(decimal_isolate(0, 2))
print(decimal_isolate(-14.123, 1))
print(decimal_isolate(-14.123, 2))
print(decimal_isolate(-14.123, 3))
================================================
FILE: maths/decimal_to_fraction.py
================================================
def decimal_to_fraction(decimal: float | str) -> tuple[int, int]:
"""
Return a decimal number in its simplest fraction form
>>> decimal_to_fraction(2)
(2, 1)
>>> decimal_to_fraction(89.)
(89, 1)
>>> decimal_to_fraction("67")
(67, 1)
>>> decimal_to_fraction("45.0")
(45, 1)
>>> decimal_to_fraction(1.5)
(3, 2)
>>> decimal_to_fraction("6.25")
(25, 4)
>>> decimal_to_fraction("78td")
Traceback (most recent call last):
ValueError: Please enter a valid number
>>> decimal_to_fraction(0)
(0, 1)
>>> decimal_to_fraction(-2.5)
(-5, 2)
>>> decimal_to_fraction(0.125)
(1, 8)
>>> decimal_to_fraction(1000000.25)
(4000001, 4)
>>> decimal_to_fraction(1.3333)
(13333, 10000)
>>> decimal_to_fraction("1.23e2")
(123, 1)
>>> decimal_to_fraction("0.500")
(1, 2)
"""
try:
decimal = float(decimal)
except ValueError:
raise ValueError("Please enter a valid number")
fractional_part = decimal - int(decimal)
if fractional_part == 0:
return int(decimal), 1
else:
number_of_frac_digits = len(str(decimal).split(".")[1])
numerator = int(decimal * (10**number_of_frac_digits))
denominator = 10**number_of_frac_digits
divisor, dividend = denominator, numerator
while True:
remainder = dividend % divisor
if remainder == 0:
break
dividend, divisor = divisor, remainder
numerator, denominator = numerator // divisor, denominator // divisor
return numerator, denominator
if __name__ == "__main__":
print(f"{decimal_to_fraction(2) = }")
print(f"{decimal_to_fraction(89.0) = }")
print(f"{decimal_to_fraction('67') = }")
print(f"{decimal_to_fraction('45.0') = }")
print(f"{decimal_to_fraction(1.5) = }")
print(f"{decimal_to_fraction('6.25') = }")
print(f"{decimal_to_fraction('78td') = }")
================================================
FILE: maths/dodecahedron.py
================================================
# dodecahedron.py
"""
A regular dodecahedron is a three-dimensional figure made up of
12 pentagon faces having the same equal size.
"""
def dodecahedron_surface_area(edge: float) -> float:
"""
Calculates the surface area of a regular dodecahedron
a = 3 * ((25 + 10 * (5** (1 / 2))) ** (1 / 2 )) * (e**2)
where:
a --> is the area of the dodecahedron
e --> is the length of the edge
reference-->"Dodecahedron" Study.com
:param edge: length of the edge of the dodecahedron
:type edge: float
:return: the surface area of the dodecahedron as a float
Tests:
>>> dodecahedron_surface_area(5)
516.1432201766901
>>> dodecahedron_surface_area(10)
2064.5728807067603
>>> dodecahedron_surface_area(-1)
Traceback (most recent call last):
...
ValueError: Length must be a positive.
"""
if edge <= 0 or not isinstance(edge, int):
raise ValueError("Length must be a positive.")
return 3 * ((25 + 10 * (5 ** (1 / 2))) ** (1 / 2)) * (edge**2)
def dodecahedron_volume(edge: float) -> float:
"""
Calculates the volume of a regular dodecahedron
v = ((15 + (7 * (5** (1 / 2)))) / 4) * (e**3)
where:
v --> is the volume of the dodecahedron
e --> is the length of the edge
reference-->"Dodecahedron" Study.com
:param edge: length of the edge of the dodecahedron
:type edge: float
:return: the volume of the dodecahedron as a float
Tests:
>>> dodecahedron_volume(5)
957.8898700780791
>>> dodecahedron_volume(10)
7663.118960624633
>>> dodecahedron_volume(-1)
Traceback (most recent call last):
...
ValueError: Length must be a positive.
"""
if edge <= 0 or not isinstance(edge, int):
raise ValueError("Length must be a positive.")
return ((15 + (7 * (5 ** (1 / 2)))) / 4) * (edge**3)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/double_factorial.py
================================================
def double_factorial_recursive(n: int) -> int:
"""
Compute double factorial using recursive method.
Recursion can be costly for large numbers.
To learn about the theory behind this algorithm:
https://en.wikipedia.org/wiki/Double_factorial
>>> from math import prod
>>> all(double_factorial_recursive(i) == prod(range(i, 0, -2)) for i in range(20))
True
>>> double_factorial_recursive(0.1)
Traceback (most recent call last):
...
ValueError: double_factorial_recursive() only accepts integral values
>>> double_factorial_recursive(-1)
Traceback (most recent call last):
...
ValueError: double_factorial_recursive() not defined for negative values
"""
if not isinstance(n, int):
raise ValueError("double_factorial_recursive() only accepts integral values")
if n < 0:
raise ValueError("double_factorial_recursive() not defined for negative values")
return 1 if n <= 1 else n * double_factorial_recursive(n - 2)
def double_factorial_iterative(num: int) -> int:
"""
Compute double factorial using iterative method.
To learn about the theory behind this algorithm:
https://en.wikipedia.org/wiki/Double_factorial
>>> from math import prod
>>> all(double_factorial_iterative(i) == prod(range(i, 0, -2)) for i in range(20))
True
>>> double_factorial_iterative(0.1)
Traceback (most recent call last):
...
ValueError: double_factorial_iterative() only accepts integral values
>>> double_factorial_iterative(-1)
Traceback (most recent call last):
...
ValueError: double_factorial_iterative() not defined for negative values
"""
if not isinstance(num, int):
raise ValueError("double_factorial_iterative() only accepts integral values")
if num < 0:
raise ValueError("double_factorial_iterative() not defined for negative values")
value = 1
for i in range(num, 0, -2):
value *= i
return value
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/dual_number_automatic_differentiation.py
================================================
from math import factorial
"""
https://en.wikipedia.org/wiki/Automatic_differentiation#Automatic_differentiation_using_dual_numbers
https://blog.jliszka.org/2013/10/24/exact-numeric-nth-derivatives.html
Note this only works for basic functions, f(x) where the power of x is positive.
"""
class Dual:
def __init__(self, real, rank):
self.real = real
if isinstance(rank, int):
self.duals = [1] * rank
else:
self.duals = rank
def __repr__(self):
s = "+".join(f"{dual}E{n}" for n, dual in enumerate(self.duals, 1))
return f"{self.real}+{s}"
def reduce(self):
cur = self.duals.copy()
while cur[-1] == 0:
cur.pop(-1)
return Dual(self.real, cur)
def __add__(self, other):
if not isinstance(other, Dual):
return Dual(self.real + other, self.duals)
s_dual = self.duals.copy()
o_dual = other.duals.copy()
if len(s_dual) > len(o_dual):
o_dual.extend([1] * (len(s_dual) - len(o_dual)))
elif len(s_dual) < len(o_dual):
s_dual.extend([1] * (len(o_dual) - len(s_dual)))
new_duals = []
for i in range(len(s_dual)):
new_duals.append(s_dual[i] + o_dual[i])
return Dual(self.real + other.real, new_duals)
__radd__ = __add__
def __sub__(self, other):
return self + other * -1
def __mul__(self, other):
if not isinstance(other, Dual):
new_duals = []
for i in self.duals:
new_duals.append(i * other)
return Dual(self.real * other, new_duals)
new_duals = [0] * (len(self.duals) + len(other.duals) + 1)
for i, item in enumerate(self.duals):
for j, jtem in enumerate(other.duals):
new_duals[i + j + 1] += item * jtem
for k in range(len(self.duals)):
new_duals[k] += self.duals[k] * other.real
for index in range(len(other.duals)):
new_duals[index] += other.duals[index] * self.real
return Dual(self.real * other.real, new_duals)
__rmul__ = __mul__
def __truediv__(self, other):
if not isinstance(other, Dual):
new_duals = []
for i in self.duals:
new_duals.append(i / other)
return Dual(self.real / other, new_duals)
raise ValueError
def __floordiv__(self, other):
if not isinstance(other, Dual):
new_duals = []
for i in self.duals:
new_duals.append(i // other)
return Dual(self.real // other, new_duals)
raise ValueError
def __pow__(self, n):
if n < 0 or isinstance(n, float):
raise ValueError("power must be a positive integer")
if n == 0:
return 1
if n == 1:
return self
x = self
for _ in range(n - 1):
x *= self
return x
def differentiate(func, position, order):
"""
>>> differentiate(lambda x: x**2, 2, 2)
2
>>> differentiate(lambda x: x**2 * x**4, 9, 2)
196830
>>> differentiate(lambda y: 0.5 * (y + 3) ** 6, 3.5, 4)
7605.0
>>> differentiate(lambda y: y ** 2, 4, 3)
0
>>> differentiate(8, 8, 8)
Traceback (most recent call last):
...
ValueError: differentiate() requires a function as input for func
>>> differentiate(lambda x: x **2, "", 1)
Traceback (most recent call last):
...
ValueError: differentiate() requires a float as input for position
>>> differentiate(lambda x: x**2, 3, "")
Traceback (most recent call last):
...
ValueError: differentiate() requires an int as input for order
"""
if not callable(func):
raise ValueError("differentiate() requires a function as input for func")
if not isinstance(position, (float, int)):
raise ValueError("differentiate() requires a float as input for position")
if not isinstance(order, int):
raise ValueError("differentiate() requires an int as input for order")
d = Dual(position, 1)
result = func(d)
if order == 0:
return result.real
return result.duals[order - 1] * factorial(order)
if __name__ == "__main__":
import doctest
doctest.testmod()
def f(y):
return y**2 * y**4
print(differentiate(f, 9, 2))
================================================
FILE: maths/entropy.py
================================================
#!/usr/bin/env python3
"""
Implementation of entropy of information
https://en.wikipedia.org/wiki/Entropy_(information_theory)
"""
from __future__ import annotations
import math
from collections import Counter
from string import ascii_lowercase
def calculate_prob(text: str) -> None:
"""
This method takes path and two dict as argument
and than calculates entropy of them.
:param dict:
:param dict:
:return: Prints
1) Entropy of information based on 1 alphabet
2) Entropy of information based on couples of 2 alphabet
3) print Entropy of H(X n|Xn-1)
Text from random books. Also, random quotes.
>>> text = ("Behind Winston's back the voice "
... "from the telescreen was still "
... "babbling and the overfulfilment")
>>> calculate_prob(text)
4.0
6.0
2.0
>>> text = ("The Ministry of Truth—Minitrue, in Newspeak [Newspeak was the official"
... "face in elegant lettering, the three")
>>> calculate_prob(text)
4.0
5.0
1.0
>>> text = ("Had repulsive dashwoods suspicion sincerity but advantage now him. "
... "Remark easily garret nor nay. Civil those mrs enjoy shy fat merry. "
... "You greatest jointure saw horrible. He private he on be imagine "
... "suppose. Fertile beloved evident through no service elderly is. Blind "
... "there if every no so at. Own neglected you preferred way sincerity "
... "delivered his attempted. To of message cottage windows do besides "
... "against uncivil. Delightful unreserved impossible few estimating "
... "men favourable see entreaties. She propriety immediate was improving. "
... "He or entrance humoured likewise moderate. Much nor game son say "
... "feel. Fat make met can must form into gate. Me we offending prevailed "
... "discovery.")
>>> calculate_prob(text)
4.0
7.0
3.0
"""
single_char_strings, two_char_strings = analyze_text(text)
my_alphas = list(" " + ascii_lowercase)
# what is our total sum of probabilities.
all_sum = sum(single_char_strings.values())
# one length string
my_fir_sum = 0
# for each alpha we go in our dict and if it is in it we calculate entropy
for ch in my_alphas:
if ch in single_char_strings:
my_str = single_char_strings[ch]
prob = my_str / all_sum
my_fir_sum += prob * math.log2(prob) # entropy formula.
# print entropy
print(f"{round(-1 * my_fir_sum):.1f}")
# two len string
all_sum = sum(two_char_strings.values())
my_sec_sum = 0
# for each alpha (two in size) calculate entropy.
for ch0 in my_alphas:
for ch1 in my_alphas:
sequence = ch0 + ch1
if sequence in two_char_strings:
my_str = two_char_strings[sequence]
prob = int(my_str) / all_sum
my_sec_sum += prob * math.log2(prob)
# print second entropy
print(f"{round(-1 * my_sec_sum):.1f}")
# print the difference between them
print(f"{round((-1 * my_sec_sum) - (-1 * my_fir_sum)):.1f}")
def analyze_text(text: str) -> tuple[dict, dict]:
"""
Convert text input into two dicts of counts.
The first dictionary stores the frequency of single character strings.
The second dictionary stores the frequency of two character strings.
"""
single_char_strings = Counter() # type: ignore[var-annotated]
two_char_strings = Counter() # type: ignore[var-annotated]
single_char_strings[text[-1]] += 1
# first case when we have space at start.
two_char_strings[" " + text[0]] += 1
for i in range(len(text) - 1):
single_char_strings[text[i]] += 1
two_char_strings[text[i : i + 2]] += 1
return single_char_strings, two_char_strings
def main():
import doctest
doctest.testmod()
# text = (
# "Had repulsive dashwoods suspicion sincerity but advantage now him. Remark "
# "easily garret nor nay. Civil those mrs enjoy shy fat merry. You greatest "
# "jointure saw horrible. He private he on be imagine suppose. Fertile "
# "beloved evident through no service elderly is. Blind there if every no so "
# "at. Own neglected you preferred way sincerity delivered his attempted. To "
# "of message cottage windows do besides against uncivil. Delightful "
# "unreserved impossible few estimating men favourable see entreaties. She "
# "propriety immediate was improving. He or entrance humoured likewise "
# "moderate. Much nor game son say feel. Fat make met can must form into "
# "gate. Me we offending prevailed discovery. "
# )
# calculate_prob(text)
if __name__ == "__main__":
main()
================================================
FILE: maths/euclidean_distance.py
================================================
from __future__ import annotations
import typing
from collections.abc import Iterable
import numpy as np
Vector = typing.Union[Iterable[float], Iterable[int], np.ndarray] # noqa: UP007
VectorOut = typing.Union[np.float64, int, float] # noqa: UP007
def euclidean_distance(vector_1: Vector, vector_2: Vector) -> VectorOut:
"""
Calculate the distance between the two endpoints of two vectors.
A vector is defined as a list, tuple, or numpy 1D array.
>>> float(euclidean_distance((0, 0), (2, 2)))
2.8284271247461903
>>> float(euclidean_distance(np.array([0, 0, 0]), np.array([2, 2, 2])))
3.4641016151377544
>>> float(euclidean_distance(np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8])))
8.0
>>> float(euclidean_distance([1, 2, 3, 4], [5, 6, 7, 8]))
8.0
"""
return np.sqrt(np.sum((np.asarray(vector_1) - np.asarray(vector_2)) ** 2))
def euclidean_distance_no_np(vector_1: Vector, vector_2: Vector) -> VectorOut:
"""
Calculate the distance between the two endpoints of two vectors without numpy.
A vector is defined as a list, tuple, or numpy 1D array.
>>> euclidean_distance_no_np((0, 0), (2, 2))
2.8284271247461903
>>> euclidean_distance_no_np([1, 2, 3, 4], [5, 6, 7, 8])
8.0
"""
return sum((v1 - v2) ** 2 for v1, v2 in zip(vector_1, vector_2)) ** (1 / 2)
if __name__ == "__main__":
def benchmark() -> None:
"""
Benchmarks
"""
from timeit import timeit
print("Without Numpy")
print(
timeit(
"euclidean_distance_no_np([1, 2, 3], [4, 5, 6])",
number=10000,
globals=globals(),
)
)
print("With Numpy")
print(
timeit(
"euclidean_distance([1, 2, 3], [4, 5, 6])",
number=10000,
globals=globals(),
)
)
benchmark()
================================================
FILE: maths/euler_method.py
================================================
from collections.abc import Callable
import numpy as np
def explicit_euler(
ode_func: Callable, y0: float, x0: float, step_size: float, x_end: float
) -> np.ndarray:
"""Calculate numeric solution at each step to an ODE using Euler's Method
For reference to Euler's method refer to https://en.wikipedia.org/wiki/Euler_method.
Args:
ode_func (Callable): The ordinary differential equation
as a function of x and y.
y0 (float): The initial value for y.
x0 (float): The initial value for x.
step_size (float): The increment value for x.
x_end (float): The final value of x to be calculated.
Returns:
np.ndarray: Solution of y for every step in x.
>>> # the exact solution is math.exp(x)
>>> def f(x, y):
... return y
>>> y0 = 1
>>> y = explicit_euler(f, y0, 0.0, 0.01, 5)
>>> float(y[-1])
144.77277243257308
"""
n = int(np.ceil((x_end - x0) / step_size))
y = np.zeros((n + 1,))
y[0] = y0
x = x0
for k in range(n):
y[k + 1] = y[k] + step_size * ode_func(x, y[k])
x += step_size
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/euler_modified.py
================================================
from collections.abc import Callable
import numpy as np
def euler_modified(
ode_func: Callable, y0: float, x0: float, step_size: float, x_end: float
) -> np.ndarray:
"""
Calculate solution at each step to an ODE using Euler's Modified Method
The Euler Method is straightforward to implement, but can't give accurate solutions.
So, some changes were proposed to improve accuracy.
https://en.wikipedia.org/wiki/Euler_method
Arguments:
ode_func -- The ode as a function of x and y
y0 -- the initial value for y
x0 -- the initial value for x
stepsize -- the increment value for x
x_end -- the end value for x
>>> # the exact solution is math.exp(x)
>>> def f1(x, y):
... return -2*x*(y**2)
>>> y = euler_modified(f1, 1.0, 0.0, 0.2, 1.0)
>>> float(y[-1])
0.503338255442106
>>> import math
>>> def f2(x, y):
... return -2*y + (x**3)*math.exp(-2*x)
>>> y = euler_modified(f2, 1.0, 0.0, 0.1, 0.3)
>>> float(y[-1])
0.5525976431951775
"""
n = int(np.ceil((x_end - x0) / step_size))
y = np.zeros((n + 1,))
y[0] = y0
x = x0
for k in range(n):
y_get = y[k] + step_size * ode_func(x, y[k])
y[k + 1] = y[k] + (
(step_size / 2) * (ode_func(x, y[k]) + ode_func(x + step_size, y_get))
)
x += step_size
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/eulers_totient.py
================================================
# Eulers Totient function finds the number of relative primes of a number n from 1 to n
def totient(n: int) -> list:
"""
>>> n = 10
>>> totient_calculation = totient(n)
>>> for i in range(1, n):
... print(f"{i} has {totient_calculation[i]} relative primes.")
1 has 0 relative primes.
2 has 1 relative primes.
3 has 2 relative primes.
4 has 2 relative primes.
5 has 4 relative primes.
6 has 2 relative primes.
7 has 6 relative primes.
8 has 4 relative primes.
9 has 6 relative primes.
"""
is_prime = [True for i in range(n + 1)]
totients = [i - 1 for i in range(n + 1)]
primes = []
for i in range(2, n + 1):
if is_prime[i]:
primes.append(i)
for j in range(len(primes)):
if i * primes[j] >= n:
break
is_prime[i * primes[j]] = False
if i % primes[j] == 0:
totients[i * primes[j]] = totients[i] * primes[j]
break
totients[i * primes[j]] = totients[i] * (primes[j] - 1)
return totients
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/extended_euclidean_algorithm.py
================================================
"""
Extended Euclidean Algorithm.
Finds 2 numbers a and b such that it satisfies
the equation am + bn = gcd(m, n) (a.k.a Bezout's Identity)
https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm
"""
# @Author: S. Sharma
# @Date: 2019-02-25T12:08:53-06:00
# @Email: silentcat@protonmail.com
# @Last modified by: pikulet
# @Last modified time: 2020-10-02
from __future__ import annotations
import sys
def extended_euclidean_algorithm(a: int, b: int) -> tuple[int, int]:
"""
Extended Euclidean Algorithm.
Finds 2 numbers a and b such that it satisfies
the equation am + bn = gcd(m, n) (a.k.a Bezout's Identity)
>>> extended_euclidean_algorithm(1, 24)
(1, 0)
>>> extended_euclidean_algorithm(8, 14)
(2, -1)
>>> extended_euclidean_algorithm(240, 46)
(-9, 47)
>>> extended_euclidean_algorithm(1, -4)
(1, 0)
>>> extended_euclidean_algorithm(-2, -4)
(-1, 0)
>>> extended_euclidean_algorithm(0, -4)
(0, -1)
>>> extended_euclidean_algorithm(2, 0)
(1, 0)
"""
# base cases
if abs(a) == 1:
return a, 0
elif abs(b) == 1:
return 0, b
old_remainder, remainder = a, b
old_coeff_a, coeff_a = 1, 0
old_coeff_b, coeff_b = 0, 1
while remainder != 0:
quotient = old_remainder // remainder
old_remainder, remainder = remainder, old_remainder - quotient * remainder
old_coeff_a, coeff_a = coeff_a, old_coeff_a - quotient * coeff_a
old_coeff_b, coeff_b = coeff_b, old_coeff_b - quotient * coeff_b
# sign correction for negative numbers
if a < 0:
old_coeff_a = -old_coeff_a
if b < 0:
old_coeff_b = -old_coeff_b
return old_coeff_a, old_coeff_b
def main():
"""Call Extended Euclidean Algorithm."""
if len(sys.argv) < 3:
print("2 integer arguments required")
return 1
a = int(sys.argv[1])
b = int(sys.argv[2])
print(extended_euclidean_algorithm(a, b))
return 0
if __name__ == "__main__":
raise SystemExit(main())
================================================
FILE: maths/factorial.py
================================================
"""
Factorial of a positive integer -- https://en.wikipedia.org/wiki/Factorial
"""
def factorial(number: int) -> int:
"""
Calculate the factorial of specified number (n!).
>>> import math
>>> all(factorial(i) == math.factorial(i) for i in range(20))
True
>>> factorial(0.1)
Traceback (most recent call last):
...
ValueError: factorial() only accepts integral values
>>> factorial(-1)
Traceback (most recent call last):
...
ValueError: factorial() not defined for negative values
>>> factorial(1)
1
>>> factorial(6)
720
>>> factorial(0)
1
"""
if number != int(number):
raise ValueError("factorial() only accepts integral values")
if number < 0:
raise ValueError("factorial() not defined for negative values")
value = 1
for i in range(1, number + 1):
value *= i
return value
def factorial_recursive(n: int) -> int:
"""
Calculate the factorial of a positive integer
https://en.wikipedia.org/wiki/Factorial
>>> import math
>>> all(factorial_recursive(i) == math.factorial(i) for i in range(20))
True
>>> factorial_recursive(0.1)
Traceback (most recent call last):
...
ValueError: factorial_recursive() only accepts integral values
>>> factorial_recursive(-1)
Traceback (most recent call last):
...
ValueError: factorial_recursive() not defined for negative values
"""
if not isinstance(n, int):
raise ValueError("factorial_recursive() only accepts integral values")
if n < 0:
raise ValueError("factorial_recursive() not defined for negative values")
return 1 if n in {0, 1} else n * factorial_recursive(n - 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
n = int(input("Enter a positive integer: ").strip() or 0)
print(f"factorial{n} is {factorial(n)}")
================================================
FILE: maths/factors.py
================================================
from doctest import testmod
from math import sqrt
def factors_of_a_number(num: int) -> list:
"""
>>> factors_of_a_number(1)
[1]
>>> factors_of_a_number(5)
[1, 5]
>>> factors_of_a_number(24)
[1, 2, 3, 4, 6, 8, 12, 24]
>>> factors_of_a_number(-24)
[]
"""
facs: list[int] = []
if num < 1:
return facs
facs.append(1)
if num == 1:
return facs
facs.append(num)
for i in range(2, int(sqrt(num)) + 1):
if num % i == 0: # If i is a factor of num
facs.append(i)
d = num // i # num//i is the other factor of num
if d != i: # If d and i are distinct
facs.append(d) # we have found another factor
facs.sort()
return facs
if __name__ == "__main__":
testmod(name="factors_of_a_number", verbose=True)
================================================
FILE: maths/fast_inverse_sqrt.py
================================================
"""
Fast inverse square root (1/sqrt(x)) using the Quake III algorithm.
Reference: https://en.wikipedia.org/wiki/Fast_inverse_square_root
Accuracy: https://en.wikipedia.org/wiki/Fast_inverse_square_root#Accuracy
"""
import struct
def fast_inverse_sqrt(number: float) -> float:
"""
Compute the fast inverse square root of a floating-point number using the famous
Quake III algorithm.
:param float number: Input number for which to calculate the inverse square root.
:return float: The fast inverse square root of the input number.
Example:
>>> fast_inverse_sqrt(10)
0.3156857923527257
>>> fast_inverse_sqrt(4)
0.49915357479239103
>>> fast_inverse_sqrt(4.1)
0.4932849504615651
>>> fast_inverse_sqrt(0)
Traceback (most recent call last):
...
ValueError: Input must be a positive number.
>>> fast_inverse_sqrt(-1)
Traceback (most recent call last):
...
ValueError: Input must be a positive number.
>>> from math import isclose, sqrt
>>> all(isclose(fast_inverse_sqrt(i), 1 / sqrt(i), rel_tol=0.00132)
... for i in range(50, 60))
True
"""
if number <= 0:
raise ValueError("Input must be a positive number.")
i = struct.unpack(">i", struct.pack(">f", number))[0]
i = 0x5F3759DF - (i >> 1)
y = struct.unpack(">f", struct.pack(">i", i))[0]
return y * (1.5 - 0.5 * number * y * y)
if __name__ == "__main__":
from doctest import testmod
testmod()
# https://en.wikipedia.org/wiki/Fast_inverse_square_root#Accuracy
from math import sqrt
for i in range(5, 101, 5):
print(f"{i:>3}: {(1 / sqrt(i)) - fast_inverse_sqrt(i):.5f}")
================================================
FILE: maths/fermat_little_theorem.py
================================================
# Python program to show the usage of Fermat's little theorem in a division
# According to Fermat's little theorem, (a / b) mod p always equals
# a * (b ^ (p - 2)) mod p
# Here we assume that p is a prime number, b divides a, and p doesn't divide b
# Wikipedia reference: https://en.wikipedia.org/wiki/Fermat%27s_little_theorem
def binary_exponentiation(a: int, n: float, mod: int) -> int:
if n == 0:
return 1
elif n % 2 == 1:
return (binary_exponentiation(a, n - 1, mod) * a) % mod
else:
b = binary_exponentiation(a, n / 2, mod)
return (b * b) % mod
# a prime number
p = 701
a = 1000000000
b = 10
# using binary exponentiation function, O(log(p)):
print((a / b) % p == (a * binary_exponentiation(b, p - 2, p)) % p)
# using Python operators:
print((a / b) % p == (a * b ** (p - 2)) % p)
================================================
FILE: maths/fibonacci.py
================================================
"""
Calculates the Fibonacci sequence using iteration, recursion, memoization,
and a simplified form of Binet's formula
NOTE 1: the iterative, recursive, memoization functions are more accurate than
the Binet's formula function because the Binet formula function uses floats
NOTE 2: the Binet's formula function is much more limited in the size of inputs
that it can handle due to the size limitations of Python floats
NOTE 3: the matrix function is the fastest and most memory efficient for large n
See benchmark numbers in __main__ for performance comparisons/
https://en.wikipedia.org/wiki/Fibonacci_number for more information
"""
import functools
from collections.abc import Iterator
from math import sqrt
from time import time
import numpy as np
from numpy import ndarray
def time_func(func, *args, **kwargs):
"""
Times the execution of a function with parameters
"""
start = time()
output = func(*args, **kwargs)
end = time()
if int(end - start) > 0:
print(f"{func.__name__} runtime: {(end - start):0.4f} s")
else:
print(f"{func.__name__} runtime: {(end - start) * 1000:0.4f} ms")
return output
def fib_iterative_yield(n: int) -> Iterator[int]:
"""
Calculates the first n (1-indexed) Fibonacci numbers using iteration with yield
>>> list(fib_iterative_yield(0))
[0]
>>> tuple(fib_iterative_yield(1))
(0, 1)
>>> tuple(fib_iterative_yield(5))
(0, 1, 1, 2, 3, 5)
>>> tuple(fib_iterative_yield(10))
(0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55)
>>> tuple(fib_iterative_yield(-1))
Traceback (most recent call last):
...
ValueError: n is negative
"""
if n < 0:
raise ValueError("n is negative")
a, b = 0, 1
yield a
for _ in range(n):
yield b
a, b = b, a + b
def fib_iterative(n: int) -> list[int]:
"""
Calculates the first n (0-indexed) Fibonacci numbers using iteration
>>> fib_iterative(0)
[0]
>>> fib_iterative(1)
[0, 1]
>>> fib_iterative(5)
[0, 1, 1, 2, 3, 5]
>>> fib_iterative(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
>>> fib_iterative(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
if n < 0:
raise ValueError("n is negative")
if n == 0:
return [0]
fib = [0, 1]
for _ in range(n - 1):
fib.append(fib[-1] + fib[-2])
return fib
def fib_recursive(n: int) -> list[int]:
"""
Calculates the first n (0-indexed) Fibonacci numbers using recursion
>>> fib_recursive(0)
[0]
>>> fib_recursive(1)
[0, 1]
>>> fib_recursive(5)
[0, 1, 1, 2, 3, 5]
>>> fib_recursive(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
>>> fib_recursive(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
def fib_recursive_term(i: int) -> int:
"""
Calculates the i-th (0-indexed) Fibonacci number using recursion
>>> fib_recursive_term(0)
0
>>> fib_recursive_term(1)
1
>>> fib_recursive_term(5)
5
>>> fib_recursive_term(10)
55
>>> fib_recursive_term(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
if i < 0:
raise ValueError("n is negative")
if i < 2:
return i
return fib_recursive_term(i - 1) + fib_recursive_term(i - 2)
if n < 0:
raise ValueError("n is negative")
return [fib_recursive_term(i) for i in range(n + 1)]
def fib_recursive_cached(n: int) -> list[int]:
"""
Calculates the first n (0-indexed) Fibonacci numbers using recursion
>>> fib_recursive_cached(0)
[0]
>>> fib_recursive_cached(1)
[0, 1]
>>> fib_recursive_cached(5)
[0, 1, 1, 2, 3, 5]
>>> fib_recursive_cached(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
>>> fib_recursive_cached(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
@functools.cache
def fib_recursive_term(i: int) -> int:
"""
Calculates the i-th (0-indexed) Fibonacci number using recursion
"""
if i < 0:
raise ValueError("n is negative")
if i < 2:
return i
return fib_recursive_term(i - 1) + fib_recursive_term(i - 2)
if n < 0:
raise ValueError("n is negative")
return [fib_recursive_term(i) for i in range(n + 1)]
def fib_memoization(n: int) -> list[int]:
"""
Calculates the first n (0-indexed) Fibonacci numbers using memoization
>>> fib_memoization(0)
[0]
>>> fib_memoization(1)
[0, 1]
>>> fib_memoization(5)
[0, 1, 1, 2, 3, 5]
>>> fib_memoization(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
>>> fib_memoization(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
if n < 0:
raise ValueError("n is negative")
# Cache must be outside recursive function
# other it will reset every time it calls itself.
cache: dict[int, int] = {0: 0, 1: 1, 2: 1} # Prefilled cache
def rec_fn_memoized(num: int) -> int:
if num in cache:
return cache[num]
value = rec_fn_memoized(num - 1) + rec_fn_memoized(num - 2)
cache[num] = value
return value
return [rec_fn_memoized(i) for i in range(n + 1)]
def fib_binet(n: int) -> list[int]:
"""
Calculates the first n (0-indexed) Fibonacci numbers using a simplified form
of Binet's formula:
https://en.m.wikipedia.org/wiki/Fibonacci_number#Computation_by_rounding
NOTE 1: this function diverges from fib_iterative at around n = 71, likely
due to compounding floating-point arithmetic errors
NOTE 2: this function doesn't accept n >= 1475 because it overflows
thereafter due to the size limitations of Python floats
>>> fib_binet(0)
[0]
>>> fib_binet(1)
[0, 1]
>>> fib_binet(5)
[0, 1, 1, 2, 3, 5]
>>> fib_binet(10)
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
>>> fib_binet(-1)
Traceback (most recent call last):
...
ValueError: n is negative
>>> fib_binet(1475)
Traceback (most recent call last):
...
ValueError: n is too large
"""
if n < 0:
raise ValueError("n is negative")
if n >= 1475:
raise ValueError("n is too large")
sqrt_5 = sqrt(5)
phi = (1 + sqrt_5) / 2
return [round(phi**i / sqrt_5) for i in range(n + 1)]
def matrix_pow_np(m: ndarray, power: int) -> ndarray:
"""
Raises a matrix to the power of 'power' using binary exponentiation.
Args:
m: Matrix as a numpy array.
power: The power to which the matrix is to be raised.
Returns:
The matrix raised to the power.
Raises:
ValueError: If power is negative.
>>> m = np.array([[1, 1], [1, 0]], dtype=int)
>>> matrix_pow_np(m, 0) # Identity matrix when raised to the power of 0
array([[1, 0],
[0, 1]])
>>> matrix_pow_np(m, 1) # Same matrix when raised to the power of 1
array([[1, 1],
[1, 0]])
>>> matrix_pow_np(m, 5)
array([[8, 5],
[5, 3]])
>>> matrix_pow_np(m, -1)
Traceback (most recent call last):
...
ValueError: power is negative
"""
result = np.array([[1, 0], [0, 1]], dtype=int) # Identity Matrix
base = m
if power < 0: # Negative power is not allowed
raise ValueError("power is negative")
while power:
if power % 2 == 1:
result = np.dot(result, base)
base = np.dot(base, base)
power //= 2
return result
def fib_matrix_np(n: int) -> int:
"""
Calculates the n-th Fibonacci number using matrix exponentiation.
https://www.nayuki.io/page/fast-fibonacci-algorithms#:~:text=
Summary:%20The%20two%20fast%20Fibonacci%20algorithms%20are%20matrix
Args:
n: Fibonacci sequence index
Returns:
The n-th Fibonacci number.
Raises:
ValueError: If n is negative.
>>> fib_matrix_np(0)
0
>>> fib_matrix_np(1)
1
>>> fib_matrix_np(5)
5
>>> fib_matrix_np(10)
55
>>> fib_matrix_np(-1)
Traceback (most recent call last):
...
ValueError: n is negative
"""
if n < 0:
raise ValueError("n is negative")
if n == 0:
return 0
m = np.array([[1, 1], [1, 0]], dtype=int)
result = matrix_pow_np(m, n - 1)
return int(result[0, 0])
if __name__ == "__main__":
from doctest import testmod
testmod()
# Time on an M1 MacBook Pro -- Fastest to slowest
num = 30
time_func(fib_iterative_yield, num) # 0.0012 ms
time_func(fib_iterative, num) # 0.0031 ms
time_func(fib_binet, num) # 0.0062 ms
time_func(fib_memoization, num) # 0.0100 ms
time_func(fib_recursive_cached, num) # 0.0153 ms
time_func(fib_recursive, num) # 257.0910 ms
time_func(fib_matrix_np, num) # 0.0000 ms
================================================
FILE: maths/find_max.py
================================================
from __future__ import annotations
def find_max_iterative(nums: list[int | float]) -> int | float:
"""
>>> for nums in ([3, 2, 1], [-3, -2, -1], [3, -3, 0], [3.0, 3.1, 2.9]):
... find_max_iterative(nums) == max(nums)
True
True
True
True
>>> find_max_iterative([2, 4, 9, 7, 19, 94, 5])
94
>>> find_max_iterative([])
Traceback (most recent call last):
...
ValueError: find_max_iterative() arg is an empty sequence
"""
if len(nums) == 0:
raise ValueError("find_max_iterative() arg is an empty sequence")
max_num = nums[0]
for x in nums:
if x > max_num: # noqa: PLR1730
max_num = x
return max_num
# Divide and Conquer algorithm
def find_max_recursive(nums: list[int | float], left: int, right: int) -> int | float:
"""
find max value in list
:param nums: contains elements
:param left: index of first element
:param right: index of last element
:return: max in nums
>>> for nums in ([3, 2, 1], [-3, -2, -1], [3, -3, 0], [3.0, 3.1, 2.9]):
... find_max_recursive(nums, 0, len(nums) - 1) == max(nums)
True
True
True
True
>>> nums = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
>>> find_max_recursive(nums, 0, len(nums) - 1) == max(nums)
True
>>> find_max_recursive([], 0, 0)
Traceback (most recent call last):
...
ValueError: find_max_recursive() arg is an empty sequence
>>> find_max_recursive(nums, 0, len(nums)) == max(nums)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> find_max_recursive(nums, -len(nums), -1) == max(nums)
True
>>> find_max_recursive(nums, -len(nums) - 1, -1) == max(nums)
Traceback (most recent call last):
...
IndexError: list index out of range
"""
if len(nums) == 0:
raise ValueError("find_max_recursive() arg is an empty sequence")
if (
left >= len(nums)
or left < -len(nums)
or right >= len(nums)
or right < -len(nums)
):
raise IndexError("list index out of range")
if left == right:
return nums[left]
mid = (left + right) >> 1 # the middle
left_max = find_max_recursive(nums, left, mid) # find max in range[left, mid]
right_max = find_max_recursive(
nums, mid + 1, right
) # find max in range[mid + 1, right]
return left_max if left_max >= right_max else right_max
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
================================================
FILE: maths/find_min.py
================================================
from __future__ import annotations
def find_min_iterative(nums: list[int | float]) -> int | float:
"""
Find Minimum Number in a List
:param nums: contains elements
:return: min number in list
>>> for nums in ([3, 2, 1], [-3, -2, -1], [3, -3, 0], [3.0, 3.1, 2.9]):
... find_min_iterative(nums) == min(nums)
True
True
True
True
>>> find_min_iterative([0, 1, 2, 3, 4, 5, -3, 24, -56])
-56
>>> find_min_iterative([])
Traceback (most recent call last):
...
ValueError: find_min_iterative() arg is an empty sequence
"""
if len(nums) == 0:
raise ValueError("find_min_iterative() arg is an empty sequence")
min_num = nums[0]
for num in nums:
min_num = min(min_num, num)
return min_num
# Divide and Conquer algorithm
def find_min_recursive(nums: list[int | float], left: int, right: int) -> int | float:
"""
find min value in list
:param nums: contains elements
:param left: index of first element
:param right: index of last element
:return: min in nums
>>> for nums in ([3, 2, 1], [-3, -2, -1], [3, -3, 0], [3.0, 3.1, 2.9]):
... find_min_recursive(nums, 0, len(nums) - 1) == min(nums)
True
True
True
True
>>> nums = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
>>> find_min_recursive(nums, 0, len(nums) - 1) == min(nums)
True
>>> find_min_recursive([], 0, 0)
Traceback (most recent call last):
...
ValueError: find_min_recursive() arg is an empty sequence
>>> find_min_recursive(nums, 0, len(nums)) == min(nums)
Traceback (most recent call last):
...
IndexError: list index out of range
>>> find_min_recursive(nums, -len(nums), -1) == min(nums)
True
>>> find_min_recursive(nums, -len(nums) - 1, -1) == min(nums)
Traceback (most recent call last):
...
IndexError: list index out of range
"""
if len(nums) == 0:
raise ValueError("find_min_recursive() arg is an empty sequence")
if (
left >= len(nums)
or left < -len(nums)
or right >= len(nums)
or right < -len(nums)
):
raise IndexError("list index out of range")
if left == right:
return nums[left]
mid = (left + right) >> 1 # the middle
left_min = find_min_recursive(nums, left, mid) # find min in range[left, mid]
right_min = find_min_recursive(
nums, mid + 1, right
) # find min in range[mid + 1, right]
return left_min if left_min <= right_min else right_min
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
================================================
FILE: maths/floor.py
================================================
"""
https://en.wikipedia.org/wiki/Floor_and_ceiling_functions
"""
def floor(x: float) -> int:
"""
Return the floor of x as an Integral.
:param x: the number
:return: the largest integer <= x.
>>> import math
>>> all(floor(n) == math.floor(n) for n
... in (1, -1, 0, -0, 1.1, -1.1, 1.0, -1.0, 1_000_000_000))
True
"""
return int(x) if x - int(x) >= 0 else int(x) - 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/gamma.py
================================================
"""
Gamma function is a very useful tool in math and physics.
It helps calculating complex integral in a convenient way.
for more info: https://en.wikipedia.org/wiki/Gamma_function
In mathematics, the gamma function is one commonly
used extension of the factorial function to complex numbers.
The gamma function is defined for all complex numbers except
the non-positive integers
Python's Standard Library math.gamma() function overflows around gamma(171.624).
"""
import math
from numpy import inf
from scipy.integrate import quad
def gamma_iterative(num: float) -> float:
"""
Calculates the value of Gamma function of num
where num is either an integer (1, 2, 3..) or a half-integer (0.5, 1.5, 2.5 ...).
>>> gamma_iterative(-1)
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_iterative(0)
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_iterative(9)
40320.0
>>> from math import gamma as math_gamma
>>> all(.99999999 < gamma_iterative(i) / math_gamma(i) <= 1.000000001
... for i in range(1, 50))
True
>>> gamma_iterative(-1)/math_gamma(-1) <= 1.000000001
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_iterative(3.3) - math_gamma(3.3) <= 0.00000001
True
"""
if num <= 0:
raise ValueError("math domain error")
return quad(integrand, 0, inf, args=(num))[0]
def integrand(x: float, z: float) -> float:
return math.pow(x, z - 1) * math.exp(-x)
def gamma_recursive(num: float) -> float:
"""
Calculates the value of Gamma function of num
where num is either an integer (1, 2, 3..) or a half-integer (0.5, 1.5, 2.5 ...).
Implemented using recursion
Examples:
>>> from math import isclose, gamma as math_gamma
>>> gamma_recursive(0.5)
1.7724538509055159
>>> gamma_recursive(1)
1.0
>>> gamma_recursive(2)
1.0
>>> gamma_recursive(3.5)
3.3233509704478426
>>> gamma_recursive(171.5)
9.483367566824795e+307
>>> all(isclose(gamma_recursive(num), math_gamma(num))
... for num in (0.5, 2, 3.5, 171.5))
True
>>> gamma_recursive(0)
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_recursive(-1.1)
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_recursive(-4)
Traceback (most recent call last):
...
ValueError: math domain error
>>> gamma_recursive(172)
Traceback (most recent call last):
...
OverflowError: math range error
>>> gamma_recursive(1.1)
Traceback (most recent call last):
...
NotImplementedError: num must be an integer or a half-integer
"""
if num <= 0:
raise ValueError("math domain error")
if num > 171.5:
raise OverflowError("math range error")
elif num - int(num) not in (0, 0.5):
raise NotImplementedError("num must be an integer or a half-integer")
elif num == 0.5:
return math.sqrt(math.pi)
else:
return 1.0 if num == 1 else (num - 1) * gamma_recursive(num - 1)
if __name__ == "__main__":
from doctest import testmod
testmod()
num = 1.0
while num:
num = float(input("Gamma of: "))
print(f"gamma_iterative({num}) = {gamma_iterative(num)}")
print(f"gamma_recursive({num}) = {gamma_recursive(num)}")
print("\nEnter 0 to exit...")
================================================
FILE: maths/gaussian.py
================================================
"""
Reference: https://en.wikipedia.org/wiki/Gaussian_function
"""
from numpy import exp, pi, sqrt
def gaussian(x, mu: float = 0.0, sigma: float = 1.0) -> float:
"""
>>> float(gaussian(1))
0.24197072451914337
>>> float(gaussian(24))
3.342714441794458e-126
>>> float(gaussian(1, 4, 2))
0.06475879783294587
>>> float(gaussian(1, 5, 3))
0.05467002489199788
Supports NumPy Arrays
Use numpy.meshgrid with this to generate gaussian blur on images.
>>> import numpy as np
>>> x = np.arange(15)
>>> gaussian(x)
array([3.98942280e-01, 2.41970725e-01, 5.39909665e-02, 4.43184841e-03,
1.33830226e-04, 1.48671951e-06, 6.07588285e-09, 9.13472041e-12,
5.05227108e-15, 1.02797736e-18, 7.69459863e-23, 2.11881925e-27,
2.14638374e-32, 7.99882776e-38, 1.09660656e-43])
>>> float(gaussian(15))
5.530709549844416e-50
>>> gaussian([1,2, 'string'])
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for -: 'list' and 'float'
>>> gaussian('hello world')
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for -: 'str' and 'float'
>>> gaussian(10**234) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
OverflowError: (34, 'Result too large')
>>> float(gaussian(10**-326))
0.3989422804014327
>>> float(gaussian(2523, mu=234234, sigma=3425))
0.0
"""
return 1 / sqrt(2 * pi * sigma**2) * exp(-((x - mu) ** 2) / (2 * sigma**2))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/gcd_of_n_numbers.py
================================================
"""
Gcd of N Numbers
Reference: https://en.wikipedia.org/wiki/Greatest_common_divisor
"""
from collections import Counter
def get_factors(
number: int, factors: Counter | None = None, factor: int = 2
) -> Counter:
"""
this is a recursive function for get all factors of number
>>> get_factors(45)
Counter({3: 2, 5: 1})
>>> get_factors(2520)
Counter({2: 3, 3: 2, 5: 1, 7: 1})
>>> get_factors(23)
Counter({23: 1})
>>> get_factors(0)
Traceback (most recent call last):
...
TypeError: number must be integer and greater than zero
>>> get_factors(-1)
Traceback (most recent call last):
...
TypeError: number must be integer and greater than zero
>>> get_factors(1.5)
Traceback (most recent call last):
...
TypeError: number must be integer and greater than zero
factor can be all numbers from 2 to number that we check if number % factor == 0
if it is equal to zero, we check again with number // factor
else we increase factor by one
"""
match number:
case int(number) if number == 1:
return Counter({1: 1})
case int(num) if number > 0:
number = num
case _:
raise TypeError("number must be integer and greater than zero")
factors = factors or Counter()
if number == factor: # break condition
# all numbers are factors of itself
factors[factor] += 1
return factors
if number % factor > 0:
# if it is greater than zero
# so it is not a factor of number and we check next number
return get_factors(number, factors, factor + 1)
factors[factor] += 1
# else we update factors (that is Counter(dict-like) type) and check again
return get_factors(number // factor, factors, factor)
def get_greatest_common_divisor(*numbers: int) -> int:
"""
get gcd of n numbers:
>>> get_greatest_common_divisor(18, 45)
9
>>> get_greatest_common_divisor(23, 37)
1
>>> get_greatest_common_divisor(2520, 8350)
10
>>> get_greatest_common_divisor(-10, 20)
Traceback (most recent call last):
...
Exception: numbers must be integer and greater than zero
>>> get_greatest_common_divisor(1.5, 2)
Traceback (most recent call last):
...
Exception: numbers must be integer and greater than zero
>>> get_greatest_common_divisor(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
1
>>> get_greatest_common_divisor("1", 2, 3, 4, 5, 6, 7, 8, 9, 10)
Traceback (most recent call last):
...
Exception: numbers must be integer and greater than zero
"""
# we just need factors, not numbers itself
try:
same_factors, *factors = map(get_factors, numbers)
except TypeError as e:
raise Exception("numbers must be integer and greater than zero") from e
for factor in factors:
same_factors &= factor
# get common factor between all
# `&` return common elements with smaller value (for Counter type)
# now, same_factors is something like {2: 2, 3: 4} that means 2 * 2 * 3 * 3 * 3 * 3
mult = 1
# power each factor and multiply
# for {2: 2, 3: 4}, it is [4, 81] and then 324
for m in [factor**power for factor, power in same_factors.items()]:
mult *= m
return mult
if __name__ == "__main__":
print(get_greatest_common_divisor(18, 45)) # 9
================================================
FILE: maths/geometric_mean.py
================================================
"""
The Geometric Mean of n numbers is defined as the n-th root of the product
of those numbers. It is used to measure the central tendency of the numbers.
https://en.wikipedia.org/wiki/Geometric_mean
"""
def compute_geometric_mean(*args: int) -> float:
"""
Return the geometric mean of the argument numbers.
>>> compute_geometric_mean(2,8)
4.0
>>> compute_geometric_mean('a', 4)
Traceback (most recent call last):
...
TypeError: Not a Number
>>> compute_geometric_mean(5, 125)
25.0
>>> compute_geometric_mean(1, 0)
0.0
>>> compute_geometric_mean(1, 5, 25, 5)
5.0
>>> compute_geometric_mean(2, -2)
Traceback (most recent call last):
...
ArithmeticError: Cannot Compute Geometric Mean for these numbers.
>>> compute_geometric_mean(-5, 25, 1)
-5.0
"""
product = 1
for number in args:
if not isinstance(number, int) and not isinstance(number, float):
raise TypeError("Not a Number")
product *= number
# Cannot calculate the even root for negative product.
# Frequently they are restricted to being positive.
if product < 0 and len(args) % 2 == 0:
raise ArithmeticError("Cannot Compute Geometric Mean for these numbers.")
mean = abs(product) ** (1 / len(args))
# Since python calculates complex roots for negative products with odd roots.
if product < 0:
mean = -mean
# Since it does floating point arithmetic, it gives 64**(1/3) as 3.99999996
possible_mean = float(round(mean))
# To check if the rounded number is actually the mean.
if possible_mean ** len(args) == product:
mean = possible_mean
return mean
if __name__ == "__main__":
from doctest import testmod
testmod(name="compute_geometric_mean")
print(compute_geometric_mean(-3, -27))
================================================
FILE: maths/germain_primes.py
================================================
"""
A Sophie Germain prime is any prime p, where 2p + 1 is also prime.
The second number, 2p + 1 is called a safe prime.
Examples of Germain primes include: 2, 3, 5, 11, 23
Their corresponding safe primes: 5, 7, 11, 23, 47
https://en.wikipedia.org/wiki/Safe_and_Sophie_Germain_primes
"""
from maths.prime_check import is_prime
def is_germain_prime(number: int) -> bool:
"""Checks if input number and 2*number + 1 are prime.
>>> is_germain_prime(3)
True
>>> is_germain_prime(11)
True
>>> is_germain_prime(4)
False
>>> is_germain_prime(23)
True
>>> is_germain_prime(13)
False
>>> is_germain_prime(20)
False
>>> is_germain_prime('abc')
Traceback (most recent call last):
...
TypeError: Input value must be a positive integer. Input value: abc
"""
if not isinstance(number, int) or number < 1:
msg = f"Input value must be a positive integer. Input value: {number}"
raise TypeError(msg)
return is_prime(number) and is_prime(2 * number + 1)
def is_safe_prime(number: int) -> bool:
"""Checks if input number and (number - 1)/2 are prime.
The smallest safe prime is 5, with the Germain prime is 2.
>>> is_safe_prime(5)
True
>>> is_safe_prime(11)
True
>>> is_safe_prime(1)
False
>>> is_safe_prime(2)
False
>>> is_safe_prime(3)
False
>>> is_safe_prime(47)
True
>>> is_safe_prime('abc')
Traceback (most recent call last):
...
TypeError: Input value must be a positive integer. Input value: abc
"""
if not isinstance(number, int) or number < 1:
msg = f"Input value must be a positive integer. Input value: {number}"
raise TypeError(msg)
return (number - 1) % 2 == 0 and is_prime(number) and is_prime((number - 1) // 2)
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: maths/greatest_common_divisor.py
================================================
"""
Greatest Common Divisor.
Wikipedia reference: https://en.wikipedia.org/wiki/Greatest_common_divisor
gcd(a, b) = gcd(a, -b) = gcd(-a, b) = gcd(-a, -b) by definition of divisibility
"""
def greatest_common_divisor(a: int, b: int) -> int:
"""
Calculate Greatest Common Divisor (GCD).
>>> greatest_common_divisor(24, 40)
8
>>> greatest_common_divisor(1, 1)
1
>>> greatest_common_divisor(1, 800)
1
>>> greatest_common_divisor(11, 37)
1
>>> greatest_common_divisor(3, 5)
1
>>> greatest_common_divisor(16, 4)
4
>>> greatest_common_divisor(-3, 9)
3
>>> greatest_common_divisor(9, -3)
3
>>> greatest_common_divisor(3, -9)
3
>>> greatest_common_divisor(-3, -9)
3
>>> greatest_common_divisor(0, 0)
0
"""
return abs(b) if a == 0 else greatest_common_divisor(b % a, a)
def gcd_by_iterative(x: int, y: int) -> int:
"""
Below method is more memory efficient because it does not create additional
stack frames for recursive functions calls (as done in the above method).
>>> gcd_by_iterative(24, 40)
8
>>> greatest_common_divisor(24, 40) == gcd_by_iterative(24, 40)
True
>>> gcd_by_iterative(-3, -9)
3
>>> gcd_by_iterative(3, -9)
3
>>> gcd_by_iterative(1, -800)
1
>>> gcd_by_iterative(11, 37)
1
>>> gcd_by_iterative(0, 0)
0
"""
while y: # --> when y=0 then loop will terminate and return x as final GCD.
x, y = y, x % y
return abs(x)
def main():
"""
Call Greatest Common Divisor function.
"""
try:
nums = input("Enter two integers separated by comma (,): ").split(",")
num_1 = int(nums[0])
num_2 = int(nums[1])
print(
f"greatest_common_divisor({num_1}, {num_2}) = "
f"{greatest_common_divisor(num_1, num_2)}"
)
print(f"By iterative gcd({num_1}, {num_2}) = {gcd_by_iterative(num_1, num_2)}")
except (IndexError, UnboundLocalError, ValueError):
print("Wrong input")
if __name__ == "__main__":
main()
================================================
FILE: maths/hardy_ramanujanalgo.py
================================================
# This theorem states that the number of prime factors of n
# will be approximately log(log(n)) for most natural numbers n
import math
def exact_prime_factor_count(n: int) -> int:
"""
>>> exact_prime_factor_count(51242183)
3
"""
count = 0
if n % 2 == 0:
count += 1
while n % 2 == 0:
n = int(n / 2)
# the n input value must be odd so that
# we can skip one element (ie i += 2)
i = 3
while i <= int(math.sqrt(n)):
if n % i == 0:
count += 1
while n % i == 0:
n = int(n / i)
i = i + 2
# this condition checks the prime
# number n is greater than 2
if n > 2:
count += 1
return count
if __name__ == "__main__":
n = 51242183
print(f"The number of distinct prime factors is/are {exact_prime_factor_count(n)}")
print(f"The value of log(log(n)) is {math.log(math.log(n)):.4f}")
"""
The number of distinct prime factors is/are 3
The value of log(log(n)) is 2.8765
"""
================================================
FILE: maths/images/__init__.py
================================================
================================================
FILE: maths/integer_square_root.py
================================================
"""
Integer Square Root Algorithm -- An efficient method to calculate the square root of a
non-negative integer 'num' rounded down to the nearest integer. It uses a binary search
approach to find the integer square root without using any built-in exponent functions
or operators.
* https://en.wikipedia.org/wiki/Integer_square_root
* https://docs.python.org/3/library/math.html#math.isqrt
Note:
- This algorithm is designed for non-negative integers only.
- The result is rounded down to the nearest integer.
- The algorithm has a time complexity of O(log(x)).
- Original algorithm idea based on binary search.
"""
def integer_square_root(num: int) -> int:
"""
Returns the integer square root of a non-negative integer num.
Args:
num: A non-negative integer.
Returns:
The integer square root of num.
Raises:
ValueError: If num is not an integer or is negative.
>>> [integer_square_root(i) for i in range(18)]
[0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4]
>>> integer_square_root(625)
25
>>> integer_square_root(2_147_483_647)
46340
>>> from math import isqrt
>>> all(integer_square_root(i) == isqrt(i) for i in range(20))
True
>>> integer_square_root(-1)
Traceback (most recent call last):
...
ValueError: num must be non-negative integer
>>> integer_square_root(1.5)
Traceback (most recent call last):
...
ValueError: num must be non-negative integer
>>> integer_square_root("0")
Traceback (most recent call last):
...
ValueError: num must be non-negative integer
"""
if not isinstance(num, int) or num < 0:
raise ValueError("num must be non-negative integer")
if num < 2:
return num
left_bound = 0
right_bound = num // 2
while left_bound <= right_bound:
mid = left_bound + (right_bound - left_bound) // 2
mid_squared = mid * mid
if mid_squared == num:
return mid
if mid_squared < num:
left_bound = mid + 1
else:
right_bound = mid - 1
return right_bound
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/interquartile_range.py
================================================
"""
An implementation of interquartile range (IQR) which is a measure of statistical
dispersion, which is the spread of the data.
The function takes the list of numeric values as input and returns the IQR.
Script inspired by this Wikipedia article:
https://en.wikipedia.org/wiki/Interquartile_range
"""
from __future__ import annotations
def find_median(nums: list[int | float]) -> float:
"""
This is the implementation of the median.
:param nums: The list of numeric nums
:return: Median of the list
>>> find_median(nums=([1, 2, 2, 3, 4]))
2
>>> find_median(nums=([1, 2, 2, 3, 4, 4]))
2.5
>>> find_median(nums=([-1, 2, 0, 3, 4, -4]))
1.5
>>> find_median(nums=([1.1, 2.2, 2, 3.3, 4.4, 4]))
2.65
"""
div, mod = divmod(len(nums), 2)
if mod:
return nums[div]
return (nums[div] + nums[(div) - 1]) / 2
def interquartile_range(nums: list[int | float]) -> float:
"""
Return the interquartile range for a list of numeric values.
:param nums: The list of numeric values.
:return: interquartile range
>>> interquartile_range(nums=[4, 1, 2, 3, 2])
2.0
>>> interquartile_range(nums = [-2, -7, -10, 9, 8, 4, -67, 45])
17.0
>>> interquartile_range(nums = [-2.1, -7.1, -10.1, 9.1, 8.1, 4.1, -67.1, 45.1])
17.2
>>> interquartile_range(nums = [0, 0, 0, 0, 0])
0.0
>>> interquartile_range(nums=[])
Traceback (most recent call last):
...
ValueError: The list is empty. Provide a non-empty list.
"""
if not nums:
raise ValueError("The list is empty. Provide a non-empty list.")
nums.sort()
length = len(nums)
div, mod = divmod(length, 2)
q1 = find_median(nums[:div])
half_length = sum((div, mod))
q3 = find_median(nums[half_length:length])
return q3 - q1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/is_int_palindrome.py
================================================
def is_int_palindrome(num: int) -> bool:
"""
Returns whether `num` is a palindrome or not
(see for reference https://en.wikipedia.org/wiki/Palindromic_number).
>>> is_int_palindrome(-121)
False
>>> is_int_palindrome(0)
True
>>> is_int_palindrome(10)
False
>>> is_int_palindrome(11)
True
>>> is_int_palindrome(101)
True
>>> is_int_palindrome(120)
False
"""
if num < 0:
return False
num_copy: int = num
rev_num: int = 0
while num > 0:
rev_num = rev_num * 10 + (num % 10)
num //= 10
return num_copy == rev_num
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/is_ip_v4_address_valid.py
================================================
"""
wiki: https://en.wikipedia.org/wiki/IPv4
Is IP v4 address valid?
A valid IP address must be four octets in the form of A.B.C.D,
where A, B, C and D are numbers from 0-255
for example: 192.168.23.1, 172.255.255.255 are valid IP address
192.168.256.0, 256.192.3.121 are invalid IP address
"""
def is_ip_v4_address_valid(ip: str) -> bool:
"""
print "Valid IP address" If IP is valid.
or
print "Invalid IP address" If IP is invalid.
>>> is_ip_v4_address_valid("192.168.0.23")
True
>>> is_ip_v4_address_valid("192.256.15.8")
False
>>> is_ip_v4_address_valid("172.100.0.8")
True
>>> is_ip_v4_address_valid("255.256.0.256")
False
>>> is_ip_v4_address_valid("1.2.33333333.4")
False
>>> is_ip_v4_address_valid("1.2.-3.4")
False
>>> is_ip_v4_address_valid("1.2.3")
False
>>> is_ip_v4_address_valid("1.2.3.4.5")
False
>>> is_ip_v4_address_valid("1.2.A.4")
False
>>> is_ip_v4_address_valid("0.0.0.0")
True
>>> is_ip_v4_address_valid("1.2.3.")
False
>>> is_ip_v4_address_valid("1.2.3.05")
False
"""
octets = ip.split(".")
if len(octets) != 4:
return False
for octet in octets:
if not octet.isdigit():
return False
number = int(octet)
if len(str(number)) != len(octet):
return False
if not 0 <= number <= 255:
return False
return True
if __name__ == "__main__":
ip = input().strip()
valid_or_invalid = "valid" if is_ip_v4_address_valid(ip) else "invalid"
print(f"{ip} is a {valid_or_invalid} IPv4 address.")
================================================
FILE: maths/is_square_free.py
================================================
"""
References: wikipedia:square free number
psf/black : True
ruff : True
"""
from __future__ import annotations
def is_square_free(factors: list[int]) -> bool:
"""
# doctest: +NORMALIZE_WHITESPACE
This functions takes a list of prime factors as input.
returns True if the factors are square free.
>>> is_square_free([1, 1, 2, 3, 4])
False
These are wrong but should return some value
it simply checks for repetition in the numbers.
>>> is_square_free([1, 3, 4, 'sd', 0.0])
True
>>> is_square_free([1, 0.5, 2, 0.0])
True
>>> is_square_free([1, 2, 2, 5])
False
>>> is_square_free('asd')
True
>>> is_square_free(24)
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
"""
return len(set(factors)) == len(factors)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/jaccard_similarity.py
================================================
"""
The Jaccard similarity coefficient is a commonly used indicator of the
similarity between two sets. Let U be a set and A and B be subsets of U,
then the Jaccard index/similarity is defined to be the ratio of the number
of elements of their intersection and the number of elements of their union.
Inspired from Wikipedia and
the book Mining of Massive Datasets [MMDS 2nd Edition, Chapter 3]
https://en.wikipedia.org/wiki/Jaccard_index
https://mmds.org
Jaccard similarity is widely used with MinHashing.
"""
def jaccard_similarity(
set_a: set[str] | list[str] | tuple[str],
set_b: set[str] | list[str] | tuple[str],
alternative_union=False,
):
"""
Finds the jaccard similarity between two sets.
Essentially, its intersection over union.
The alternative way to calculate this is to take union as sum of the
number of items in the two sets. This will lead to jaccard similarity
of a set with itself be 1/2 instead of 1. [MMDS 2nd Edition, Page 77]
Parameters:
:set_a (set,list,tuple): A non-empty set/list
:set_b (set,list,tuple): A non-empty set/list
:alternativeUnion (boolean): If True, use sum of number of
items as union
Output:
(float) The jaccard similarity between the two sets.
Examples:
>>> set_a = {'a', 'b', 'c', 'd', 'e'}
>>> set_b = {'c', 'd', 'e', 'f', 'h', 'i'}
>>> jaccard_similarity(set_a, set_b)
0.375
>>> jaccard_similarity(set_a, set_a)
1.0
>>> jaccard_similarity(set_a, set_a, True)
0.5
>>> set_a = ['a', 'b', 'c', 'd', 'e']
>>> set_b = ('c', 'd', 'e', 'f', 'h', 'i')
>>> jaccard_similarity(set_a, set_b)
0.375
>>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
>>> set_b = ['a', 'b', 'c', 'd', 'e']
>>> jaccard_similarity(set_a, set_b)
0.375
>>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
>>> set_b = ['a', 'b', 'c', 'd']
>>> jaccard_similarity(set_a, set_b, True)
0.2
>>> set_a = {'a', 'b'}
>>> set_b = ['c', 'd']
>>> jaccard_similarity(set_a, set_b)
Traceback (most recent call last):
...
ValueError: Set a and b must either both be sets or be either a list or a tuple.
"""
if isinstance(set_a, set) and isinstance(set_b, set):
intersection_length = len(set_a.intersection(set_b))
if alternative_union:
union_length = len(set_a) + len(set_b)
else:
union_length = len(set_a.union(set_b))
return intersection_length / union_length
elif isinstance(set_a, (list, tuple)) and isinstance(set_b, (list, tuple)):
intersection = [element for element in set_a if element in set_b]
if alternative_union:
return len(intersection) / (len(set_a) + len(set_b))
else:
# Cast set_a to list because tuples cannot be mutated
union = list(set_a) + [element for element in set_b if element not in set_a]
return len(intersection) / len(union)
raise ValueError(
"Set a and b must either both be sets or be either a list or a tuple."
)
if __name__ == "__main__":
set_a = {"a", "b", "c", "d", "e"}
set_b = {"c", "d", "e", "f", "h", "i"}
print(jaccard_similarity(set_a, set_b))
================================================
FILE: maths/joint_probability_distribution.py
================================================
"""
Calculate joint probability distribution
https://en.wikipedia.org/wiki/Joint_probability_distribution
"""
def joint_probability_distribution(
x_values: list[int],
y_values: list[int],
x_probabilities: list[float],
y_probabilities: list[float],
) -> dict:
"""
>>> joint_distribution = joint_probability_distribution(
... [1, 2], [-2, 5, 8], [0.7, 0.3], [0.3, 0.5, 0.2]
... )
>>> from math import isclose
>>> isclose(joint_distribution.pop((1, 8)), 0.14)
True
>>> joint_distribution
{(1, -2): 0.21, (1, 5): 0.35, (2, -2): 0.09, (2, 5): 0.15, (2, 8): 0.06}
"""
return {
(x, y): x_prob * y_prob
for x, x_prob in zip(x_values, x_probabilities)
for y, y_prob in zip(y_values, y_probabilities)
}
# Function to calculate the expectation (mean)
def expectation(values: list, probabilities: list) -> float:
"""
>>> from math import isclose
>>> isclose(expectation([1, 2], [0.7, 0.3]), 1.3)
True
"""
return sum(x * p for x, p in zip(values, probabilities))
# Function to calculate the variance
def variance(values: list[int], probabilities: list[float]) -> float:
"""
>>> from math import isclose
>>> isclose(variance([1,2],[0.7,0.3]), 0.21)
True
"""
mean = expectation(values, probabilities)
return sum((x - mean) ** 2 * p for x, p in zip(values, probabilities))
# Function to calculate the covariance
def covariance(
x_values: list[int],
y_values: list[int],
x_probabilities: list[float],
y_probabilities: list[float],
) -> float:
"""
>>> covariance([1, 2], [-2, 5, 8], [0.7, 0.3], [0.3, 0.5, 0.2])
-2.7755575615628914e-17
"""
mean_x = expectation(x_values, x_probabilities)
mean_y = expectation(y_values, y_probabilities)
return sum(
(x - mean_x) * (y - mean_y) * px * py
for x, px in zip(x_values, x_probabilities)
for y, py in zip(y_values, y_probabilities)
)
# Function to calculate the standard deviation
def standard_deviation(variance: float) -> float:
"""
>>> standard_deviation(0.21)
0.458257569495584
"""
return variance**0.5
if __name__ == "__main__":
from doctest import testmod
testmod()
# Input values for X and Y
x_vals = input("Enter values of X separated by spaces: ").split()
y_vals = input("Enter values of Y separated by spaces: ").split()
# Convert input values to integers
x_values = [int(x) for x in x_vals]
y_values = [int(y) for y in y_vals]
# Input probabilities for X and Y
x_probs = input("Enter probabilities for X separated by spaces: ").split()
y_probs = input("Enter probabilities for Y separated by spaces: ").split()
assert len(x_values) == len(x_probs)
assert len(y_values) == len(y_probs)
# Convert input probabilities to floats
x_probabilities = [float(p) for p in x_probs]
y_probabilities = [float(p) for p in y_probs]
# Calculate the joint probability distribution
jpd = joint_probability_distribution(
x_values, y_values, x_probabilities, y_probabilities
)
# Print the joint probability distribution
print(
"\n".join(
f"P(X={x}, Y={y}) = {probability}" for (x, y), probability in jpd.items()
)
)
mean_xy = expectation(
[x * y for x in x_values for y in y_values],
[px * py for px in x_probabilities for py in y_probabilities],
)
print(f"x mean: {expectation(x_values, x_probabilities) = }")
print(f"y mean: {expectation(y_values, y_probabilities) = }")
print(f"xy mean: {mean_xy}")
print(f"x: {variance(x_values, x_probabilities) = }")
print(f"y: {variance(y_values, y_probabilities) = }")
print(f"{covariance(x_values, y_values, x_probabilities, y_probabilities) = }")
print(f"x: {standard_deviation(variance(x_values, x_probabilities)) = }")
print(f"y: {standard_deviation(variance(y_values, y_probabilities)) = }")
================================================
FILE: maths/josephus_problem.py
================================================
"""
The Josephus problem is a famous theoretical problem related to a certain
counting-out game. This module provides functions to solve the Josephus problem
for num_people and a step_size.
The Josephus problem is defined as follows:
- num_people are standing in a circle.
- Starting with a specified person, you count around the circle,
skipping a fixed number of people (step_size).
- The person at which you stop counting is eliminated from the circle.
- The counting continues until only one person remains.
For more information about the Josephus problem, refer to:
https://en.wikipedia.org/wiki/Josephus_problem
"""
def josephus_recursive(num_people: int, step_size: int) -> int:
"""
Solve the Josephus problem for num_people and a step_size recursively.
Args:
num_people: A positive integer representing the number of people.
step_size: A positive integer representing the step size for elimination.
Returns:
The position of the last person remaining.
Raises:
ValueError: If num_people or step_size is not a positive integer.
Examples:
>>> josephus_recursive(7, 3)
3
>>> josephus_recursive(10, 2)
4
>>> josephus_recursive(0, 2)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive(1.9, 2)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive(-2, 2)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive(7, 0)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive(7, -2)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive(1_000, 0.01)
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
>>> josephus_recursive("cat", "dog")
Traceback (most recent call last):
...
ValueError: num_people or step_size is not a positive integer.
"""
if (
not isinstance(num_people, int)
or not isinstance(step_size, int)
or num_people <= 0
or step_size <= 0
):
raise ValueError("num_people or step_size is not a positive integer.")
if num_people == 1:
return 0
return (josephus_recursive(num_people - 1, step_size) + step_size) % num_people
def find_winner(num_people: int, step_size: int) -> int:
"""
Find the winner of the Josephus problem for num_people and a step_size.
Args:
num_people (int): Number of people.
step_size (int): Step size for elimination.
Returns:
int: The position of the last person remaining (1-based index).
Examples:
>>> find_winner(7, 3)
4
>>> find_winner(10, 2)
5
"""
return josephus_recursive(num_people, step_size) + 1
def josephus_iterative(num_people: int, step_size: int) -> int:
"""
Solve the Josephus problem for num_people and a step_size iteratively.
Args:
num_people (int): The number of people in the circle.
step_size (int): The number of steps to take before eliminating someone.
Returns:
int: The position of the last person standing.
Examples:
>>> josephus_iterative(5, 2)
3
>>> josephus_iterative(7, 3)
4
"""
circle = list(range(1, num_people + 1))
current = 0
while len(circle) > 1:
current = (current + step_size - 1) % len(circle)
circle.pop(current)
return circle[0]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/juggler_sequence.py
================================================
"""
== Juggler Sequence ==
Juggler sequence start with any positive integer n. The next term is
obtained as follows:
If n term is even, the next term is floor value of square root of n .
If n is odd, the next term is floor value of 3 time the square root of n.
https://en.wikipedia.org/wiki/Juggler_sequence
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
import math
def juggler_sequence(number: int) -> list[int]:
"""
>>> juggler_sequence(0)
Traceback (most recent call last):
...
ValueError: Input value of [number=0] must be a positive integer
>>> juggler_sequence(1)
[1]
>>> juggler_sequence(2)
[2, 1]
>>> juggler_sequence(3)
[3, 5, 11, 36, 6, 2, 1]
>>> juggler_sequence(5)
[5, 11, 36, 6, 2, 1]
>>> juggler_sequence(10)
[10, 3, 5, 11, 36, 6, 2, 1]
>>> juggler_sequence(25)
[25, 125, 1397, 52214, 228, 15, 58, 7, 18, 4, 2, 1]
>>> juggler_sequence(6.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=6.0] must be an integer
>>> juggler_sequence(-1)
Traceback (most recent call last):
...
ValueError: Input value of [number=-1] must be a positive integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 1:
msg = f"Input value of [number={number}] must be a positive integer"
raise ValueError(msg)
sequence = [number]
while number != 1:
if number % 2 == 0:
number = math.floor(math.sqrt(number))
else:
number = math.floor(
math.sqrt(number) * math.sqrt(number) * math.sqrt(number)
)
sequence.append(number)
return sequence
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/karatsuba.py
================================================
"""Multiply two numbers using Karatsuba algorithm"""
def karatsuba(a: int, b: int) -> int:
"""
>>> karatsuba(15463, 23489) == 15463 * 23489
True
>>> karatsuba(3, 9) == 3 * 9
True
"""
if len(str(a)) == 1 or len(str(b)) == 1:
return a * b
m1 = max(len(str(a)), len(str(b)))
m2 = m1 // 2
a1, a2 = divmod(a, 10**m2)
b1, b2 = divmod(b, 10**m2)
x = karatsuba(a2, b2)
y = karatsuba((a1 + a2), (b1 + b2))
z = karatsuba(a1, b1)
return (z * 10 ** (2 * m2)) + ((y - z - x) * 10 ** (m2)) + (x)
def main():
print(karatsuba(15463, 23489))
if __name__ == "__main__":
main()
================================================
FILE: maths/kth_lexicographic_permutation.py
================================================
def kth_permutation(k, n):
"""
Finds k'th lexicographic permutation (in increasing order) of
0,1,2,...n-1 in O(n^2) time.
Examples:
First permutation is always 0,1,2,...n
>>> kth_permutation(0,5)
[0, 1, 2, 3, 4]
The order of permutation of 0,1,2,3 is [0,1,2,3], [0,1,3,2], [0,2,1,3],
[0,2,3,1], [0,3,1,2], [0,3,2,1], [1,0,2,3], [1,0,3,2], [1,2,0,3],
[1,2,3,0], [1,3,0,2]
>>> kth_permutation(10,4)
[1, 3, 0, 2]
"""
# Factorails from 1! to (n-1)!
factorials = [1]
for i in range(2, n):
factorials.append(factorials[-1] * i)
assert 0 <= k < factorials[-1] * n, "k out of bounds"
permutation = []
elements = list(range(n))
# Find permutation
while factorials:
factorial = factorials.pop()
number, k = divmod(k, factorial)
permutation.append(elements[number])
elements.remove(elements[number])
permutation.append(elements[0])
return permutation
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/largest_of_very_large_numbers.py
================================================
# Author: Abhijeeth S
import math
def res(x, y):
"""
Reduces large number to a more manageable number
>>> res(5, 7)
4.892790030352132
>>> res(0, 5)
0
>>> res(3, 0)
1
>>> res(-1, 5)
Traceback (most recent call last):
...
ValueError: expected a positive input
"""
if 0 not in (x, y):
# We use the relation x^y = y*log10(x), where 10 is the base.
return y * math.log10(x)
elif x == 0: # 0 raised to any number is 0
return 0
elif y == 0:
return 1 # any number raised to 0 is 1
raise AssertionError("This should never happen")
if __name__ == "__main__": # Main function
# Read two numbers from input and typecast them to int using map function.
# Here x is the base and y is the power.
prompt = "Enter the base and the power separated by a comma: "
x1, y1 = map(int, input(prompt).split(","))
x2, y2 = map(int, input(prompt).split(","))
# We find the log of each number, using the function res(), which takes two
# arguments.
res1 = res(x1, y1)
res2 = res(x2, y2)
# We check for the largest number
if res1 > res2:
print("Largest number is", x1, "^", y1)
elif res2 > res1:
print("Largest number is", x2, "^", y2)
else:
print("Both are equal")
================================================
FILE: maths/least_common_multiple.py
================================================
import unittest
from timeit import timeit
from maths.greatest_common_divisor import greatest_common_divisor
def least_common_multiple_slow(first_num: int, second_num: int) -> int:
"""
Find the least common multiple of two numbers.
Learn more: https://en.wikipedia.org/wiki/Least_common_multiple
>>> least_common_multiple_slow(5, 2)
10
>>> least_common_multiple_slow(12, 76)
228
"""
max_num = first_num if first_num >= second_num else second_num
common_mult = max_num
while (common_mult % first_num > 0) or (common_mult % second_num > 0):
common_mult += max_num
return common_mult
def least_common_multiple_fast(first_num: int, second_num: int) -> int:
"""
Find the least common multiple of two numbers.
https://en.wikipedia.org/wiki/Least_common_multiple#Using_the_greatest_common_divisor
>>> least_common_multiple_fast(5,2)
10
>>> least_common_multiple_fast(12,76)
228
"""
return first_num // greatest_common_divisor(first_num, second_num) * second_num
def benchmark():
setup = (
"from __main__ import least_common_multiple_slow, least_common_multiple_fast"
)
print(
"least_common_multiple_slow():",
timeit("least_common_multiple_slow(1000, 999)", setup=setup),
)
print(
"least_common_multiple_fast():",
timeit("least_common_multiple_fast(1000, 999)", setup=setup),
)
class TestLeastCommonMultiple(unittest.TestCase):
test_inputs = (
(10, 20),
(13, 15),
(4, 31),
(10, 42),
(43, 34),
(5, 12),
(12, 25),
(10, 25),
(6, 9),
)
expected_results = (20, 195, 124, 210, 1462, 60, 300, 50, 18)
def test_lcm_function(self):
for i, (first_num, second_num) in enumerate(self.test_inputs):
slow_result = least_common_multiple_slow(first_num, second_num)
fast_result = least_common_multiple_fast(first_num, second_num)
with self.subTest(i=i):
assert slow_result == self.expected_results[i]
assert fast_result == self.expected_results[i]
if __name__ == "__main__":
benchmark()
unittest.main()
================================================
FILE: maths/line_length.py
================================================
from __future__ import annotations
import math
from collections.abc import Callable
def line_length(
fnc: Callable[[float], float],
x_start: float,
x_end: float,
steps: int = 100,
) -> float:
"""
Approximates the arc length of a line segment by treating the curve as a
sequence of linear lines and summing their lengths
:param fnc: a function which defines a curve
:param x_start: left end point to indicate the start of line segment
:param x_end: right end point to indicate end of line segment
:param steps: an accuracy gauge; more steps increases accuracy
:return: a float representing the length of the curve
>>> def f(x):
... return x
>>> f"{line_length(f, 0, 1, 10):.6f}"
'1.414214'
>>> def f(x):
... return 1
>>> f"{line_length(f, -5.5, 4.5):.6f}"
'10.000000'
>>> def f(x):
... return math.sin(5 * x) + math.cos(10 * x) + x * x/10
>>> f"{line_length(f, 0.0, 10.0, 10000):.6f}"
'69.534930'
"""
x1 = x_start
fx1 = fnc(x_start)
length = 0.0
for _ in range(steps):
# Approximates curve as a sequence of linear lines and sums their length
x2 = (x_end - x_start) / steps + x1
fx2 = fnc(x2)
length += math.hypot(x2 - x1, fx2 - fx1)
# Increment step
x1 = x2
fx1 = fx2
return length
if __name__ == "__main__":
def f(x):
return math.sin(10 * x)
print("f(x) = sin(10 * x)")
print("The length of the curve from x = -10 to x = 10 is:")
i = 10
while i <= 100000:
print(f"With {i} steps: {line_length(f, -10, 10, i)}")
i *= 10
================================================
FILE: maths/liouville_lambda.py
================================================
"""
== Liouville Lambda Function ==
The Liouville Lambda function, denoted by λ(n)
and λ(n) is 1 if n is the product of an even number of prime numbers,
and -1 if it is the product of an odd number of primes.
https://en.wikipedia.org/wiki/Liouville_function
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
from maths.prime_factors import prime_factors
def liouville_lambda(number: int) -> int:
"""
This functions takes an integer number as input.
returns 1 if n has even number of prime factors and -1 otherwise.
>>> liouville_lambda(10)
1
>>> liouville_lambda(11)
-1
>>> liouville_lambda(0)
Traceback (most recent call last):
...
ValueError: Input must be a positive integer
>>> liouville_lambda(-1)
Traceback (most recent call last):
...
ValueError: Input must be a positive integer
>>> liouville_lambda(11.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=11.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 1:
raise ValueError("Input must be a positive integer")
return -1 if len(prime_factors(number)) % 2 else 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/lucas_lehmer_primality_test.py
================================================
"""
In mathematics, the Lucas-Lehmer test (LLT) is a primality test for Mersenne
numbers. https://en.wikipedia.org/wiki/Lucas%E2%80%93Lehmer_primality_test
A Mersenne number is a number that is one less than a power of two.
That is M_p = 2^p - 1
https://en.wikipedia.org/wiki/Mersenne_prime
The Lucas-Lehmer test is the primality test used by the
Great Internet Mersenne Prime Search (GIMPS) to locate large primes.
"""
# Primality test 2^p - 1
# Return true if 2^p - 1 is prime
def lucas_lehmer_test(p: int) -> bool:
"""
>>> lucas_lehmer_test(p=7)
True
>>> lucas_lehmer_test(p=11)
False
# M_11 = 2^11 - 1 = 2047 = 23 * 89
"""
if p < 2:
raise ValueError("p should not be less than 2!")
elif p == 2:
return True
s = 4
m = (1 << p) - 1
for _ in range(p - 2):
s = ((s * s) - 2) % m
return s == 0
if __name__ == "__main__":
print(lucas_lehmer_test(7))
print(lucas_lehmer_test(11))
================================================
FILE: maths/lucas_series.py
================================================
"""
https://en.wikipedia.org/wiki/Lucas_number
"""
def recursive_lucas_number(n_th_number: int) -> int:
"""
Returns the nth lucas number
>>> recursive_lucas_number(1)
1
>>> recursive_lucas_number(20)
15127
>>> recursive_lucas_number(0)
2
>>> recursive_lucas_number(25)
167761
>>> recursive_lucas_number(-1.5)
Traceback (most recent call last):
...
TypeError: recursive_lucas_number accepts only integer arguments.
"""
if not isinstance(n_th_number, int):
raise TypeError("recursive_lucas_number accepts only integer arguments.")
if n_th_number == 0:
return 2
if n_th_number == 1:
return 1
return recursive_lucas_number(n_th_number - 1) + recursive_lucas_number(
n_th_number - 2
)
def dynamic_lucas_number(n_th_number: int) -> int:
"""
Returns the nth lucas number
>>> dynamic_lucas_number(1)
1
>>> dynamic_lucas_number(20)
15127
>>> dynamic_lucas_number(0)
2
>>> dynamic_lucas_number(25)
167761
>>> dynamic_lucas_number(-1.5)
Traceback (most recent call last):
...
TypeError: dynamic_lucas_number accepts only integer arguments.
"""
if not isinstance(n_th_number, int):
raise TypeError("dynamic_lucas_number accepts only integer arguments.")
a, b = 2, 1
for _ in range(n_th_number):
a, b = b, a + b
return a
if __name__ == "__main__":
from doctest import testmod
testmod()
n = int(input("Enter the number of terms in lucas series:\n").strip())
print("Using recursive function to calculate lucas series:")
print(" ".join(str(recursive_lucas_number(i)) for i in range(n)))
print("\nUsing dynamic function to calculate lucas series:")
print(" ".join(str(dynamic_lucas_number(i)) for i in range(n)))
================================================
FILE: maths/maclaurin_series.py
================================================
"""
https://en.wikipedia.org/wiki/Taylor_series#Trigonometric_functions
"""
from math import factorial, pi
def maclaurin_sin(theta: float, accuracy: int = 30) -> float:
"""
Finds the maclaurin approximation of sin
:param theta: the angle to which sin is found
:param accuracy: the degree of accuracy wanted minimum
:return: the value of sine in radians
>>> from math import isclose, sin
>>> all(isclose(maclaurin_sin(x, 50), sin(x)) for x in range(-25, 25))
True
>>> maclaurin_sin(10)
-0.5440211108893691
>>> maclaurin_sin(-10)
0.5440211108893704
>>> maclaurin_sin(10, 15)
-0.544021110889369
>>> maclaurin_sin(-10, 15)
0.5440211108893704
>>> maclaurin_sin("10")
Traceback (most recent call last):
...
ValueError: maclaurin_sin() requires either an int or float for theta
>>> maclaurin_sin(10, -30)
Traceback (most recent call last):
...
ValueError: maclaurin_sin() requires a positive int for accuracy
>>> maclaurin_sin(10, 30.5)
Traceback (most recent call last):
...
ValueError: maclaurin_sin() requires a positive int for accuracy
>>> maclaurin_sin(10, "30")
Traceback (most recent call last):
...
ValueError: maclaurin_sin() requires a positive int for accuracy
"""
if not isinstance(theta, (int, float)):
raise ValueError("maclaurin_sin() requires either an int or float for theta")
if not isinstance(accuracy, int) or accuracy <= 0:
raise ValueError("maclaurin_sin() requires a positive int for accuracy")
theta = float(theta)
div = theta // (2 * pi)
theta -= 2 * div * pi
return sum(
(-1) ** r * theta ** (2 * r + 1) / factorial(2 * r + 1) for r in range(accuracy)
)
def maclaurin_cos(theta: float, accuracy: int = 30) -> float:
"""
Finds the maclaurin approximation of cos
:param theta: the angle to which cos is found
:param accuracy: the degree of accuracy wanted
:return: the value of cosine in radians
>>> from math import isclose, cos
>>> all(isclose(maclaurin_cos(x, 50), cos(x)) for x in range(-25, 25))
True
>>> maclaurin_cos(5)
0.2836621854632268
>>> maclaurin_cos(-5)
0.2836621854632265
>>> maclaurin_cos(10, 15)
-0.8390715290764524
>>> maclaurin_cos(-10, 15)
-0.8390715290764521
>>> maclaurin_cos("10")
Traceback (most recent call last):
...
ValueError: maclaurin_cos() requires either an int or float for theta
>>> maclaurin_cos(10, -30)
Traceback (most recent call last):
...
ValueError: maclaurin_cos() requires a positive int for accuracy
>>> maclaurin_cos(10, 30.5)
Traceback (most recent call last):
...
ValueError: maclaurin_cos() requires a positive int for accuracy
>>> maclaurin_cos(10, "30")
Traceback (most recent call last):
...
ValueError: maclaurin_cos() requires a positive int for accuracy
"""
if not isinstance(theta, (int, float)):
raise ValueError("maclaurin_cos() requires either an int or float for theta")
if not isinstance(accuracy, int) or accuracy <= 0:
raise ValueError("maclaurin_cos() requires a positive int for accuracy")
theta = float(theta)
div = theta // (2 * pi)
theta -= 2 * div * pi
return sum((-1) ** r * theta ** (2 * r) / factorial(2 * r) for r in range(accuracy))
if __name__ == "__main__":
import doctest
doctest.testmod()
print(maclaurin_sin(10))
print(maclaurin_sin(-10))
print(maclaurin_sin(10, 15))
print(maclaurin_sin(-10, 15))
print(maclaurin_cos(5))
print(maclaurin_cos(-5))
print(maclaurin_cos(10, 15))
print(maclaurin_cos(-10, 15))
================================================
FILE: maths/manhattan_distance.py
================================================
def manhattan_distance(point_a: list, point_b: list) -> float:
"""
Expectts two list of numbers representing two points in the same
n-dimensional space
https://en.wikipedia.org/wiki/Taxicab_geometry
>>> manhattan_distance([1,1], [2,2])
2.0
>>> manhattan_distance([1.5,1.5], [2,2])
1.0
>>> manhattan_distance([1.5,1.5], [2.5,2])
1.5
>>> manhattan_distance([-3, -3, -3], [0, 0, 0])
9.0
>>> manhattan_distance([1,1], None)
Traceback (most recent call last):
...
ValueError: Missing an input
>>> manhattan_distance([1,1], [2, 2, 2])
Traceback (most recent call last):
...
ValueError: Both points must be in the same n-dimensional space
>>> manhattan_distance([1,"one"], [2, 2, 2])
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
>>> manhattan_distance(1, [2, 2, 2])
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found int
>>> manhattan_distance([1,1], "not_a_list")
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
"""
_validate_point(point_a)
_validate_point(point_b)
if len(point_a) != len(point_b):
raise ValueError("Both points must be in the same n-dimensional space")
return float(sum(abs(a - b) for a, b in zip(point_a, point_b)))
def _validate_point(point: list[float]) -> None:
"""
>>> _validate_point(None)
Traceback (most recent call last):
...
ValueError: Missing an input
>>> _validate_point([1,"one"])
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
>>> _validate_point(1)
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found int
>>> _validate_point("not_a_list")
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
"""
if point:
if isinstance(point, list):
for item in point:
if not isinstance(item, (int, float)):
msg = (
"Expected a list of numbers as input, found "
f"{type(item).__name__}"
)
raise TypeError(msg)
else:
msg = f"Expected a list of numbers as input, found {type(point).__name__}"
raise TypeError(msg)
else:
raise ValueError("Missing an input")
def manhattan_distance_one_liner(point_a: list, point_b: list) -> float:
"""
Version with one liner
>>> manhattan_distance_one_liner([1,1], [2,2])
2.0
>>> manhattan_distance_one_liner([1.5,1.5], [2,2])
1.0
>>> manhattan_distance_one_liner([1.5,1.5], [2.5,2])
1.5
>>> manhattan_distance_one_liner([-3, -3, -3], [0, 0, 0])
9.0
>>> manhattan_distance_one_liner([1,1], None)
Traceback (most recent call last):
...
ValueError: Missing an input
>>> manhattan_distance_one_liner([1,1], [2, 2, 2])
Traceback (most recent call last):
...
ValueError: Both points must be in the same n-dimensional space
>>> manhattan_distance_one_liner([1,"one"], [2, 2, 2])
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
>>> manhattan_distance_one_liner(1, [2, 2, 2])
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found int
>>> manhattan_distance_one_liner([1,1], "not_a_list")
Traceback (most recent call last):
...
TypeError: Expected a list of numbers as input, found str
"""
_validate_point(point_a)
_validate_point(point_b)
if len(point_a) != len(point_b):
raise ValueError("Both points must be in the same n-dimensional space")
return float(sum(abs(x - y) for x, y in zip(point_a, point_b)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/matrix_exponentiation.py
================================================
"""Matrix Exponentiation"""
import timeit
"""
Matrix Exponentiation is a technique to solve linear recurrences in logarithmic time.
You read more about it here:
https://zobayer.blogspot.com/2010/11/matrix-exponentiation.html
https://www.hackerearth.com/practice/notes/matrix-exponentiation-1/
"""
class Matrix:
def __init__(self, arg: list[list] | int) -> None:
if isinstance(arg, list): # Initializes a matrix identical to the one provided.
self.t = arg
self.n = len(arg)
else: # Initializes a square matrix of the given size and set values to zero.
self.n = arg
self.t = [[0 for _ in range(self.n)] for _ in range(self.n)]
def __mul__(self, b: Matrix) -> Matrix:
matrix = Matrix(self.n)
for i in range(self.n):
for j in range(self.n):
for k in range(self.n):
matrix.t[i][j] += self.t[i][k] * b.t[k][j]
return matrix
def modular_exponentiation(a: Matrix, b: int) -> Matrix:
matrix = Matrix([[1, 0], [0, 1]])
while b > 0:
if b & 1:
matrix *= a
a *= a
b >>= 1
return matrix
def fibonacci_with_matrix_exponentiation(n: int, f1: int, f2: int) -> int:
"""
Returns the nth number of the Fibonacci sequence that
starts with f1 and f2
Uses the matrix exponentiation
>>> fibonacci_with_matrix_exponentiation(1, 5, 6)
5
>>> fibonacci_with_matrix_exponentiation(2, 10, 11)
11
>>> fibonacci_with_matrix_exponentiation(13, 0, 1)
144
>>> fibonacci_with_matrix_exponentiation(10, 5, 9)
411
>>> fibonacci_with_matrix_exponentiation(9, 2, 3)
89
"""
# Trivial Cases
if n == 1:
return f1
elif n == 2:
return f2
matrix = Matrix([[1, 1], [1, 0]])
matrix = modular_exponentiation(matrix, n - 2)
return f2 * matrix.t[0][0] + f1 * matrix.t[0][1]
def simple_fibonacci(n: int, f1: int, f2: int) -> int:
"""
Returns the nth number of the Fibonacci sequence that
starts with f1 and f2
Uses the definition
>>> simple_fibonacci(1, 5, 6)
5
>>> simple_fibonacci(2, 10, 11)
11
>>> simple_fibonacci(13, 0, 1)
144
>>> simple_fibonacci(10, 5, 9)
411
>>> simple_fibonacci(9, 2, 3)
89
"""
# Trivial Cases
if n == 1:
return f1
elif n == 2:
return f2
n -= 2
while n > 0:
f2, f1 = f1 + f2, f2
n -= 1
return f2
def matrix_exponentiation_time() -> float:
setup = """
from random import randint
from __main__ import fibonacci_with_matrix_exponentiation
"""
code = "fibonacci_with_matrix_exponentiation(randint(1,70000), 1, 1)"
exec_time = timeit.timeit(setup=setup, stmt=code, number=100)
print("With matrix exponentiation the average execution time is ", exec_time / 100)
return exec_time
def simple_fibonacci_time() -> float:
setup = """
from random import randint
from __main__ import simple_fibonacci
"""
code = "simple_fibonacci(randint(1,70000), 1, 1)"
exec_time = timeit.timeit(setup=setup, stmt=code, number=100)
print(
"Without matrix exponentiation the average execution time is ", exec_time / 100
)
return exec_time
def main() -> None:
matrix_exponentiation_time()
simple_fibonacci_time()
if __name__ == "__main__":
main()
================================================
FILE: maths/max_sum_sliding_window.py
================================================
"""
Given an array of integer elements and an integer 'k', we are required to find the
maximum sum of 'k' consecutive elements in the array.
Instead of using a nested for loop, in a Brute force approach we will use a technique
called 'Window sliding technique' where the nested loops can be converted to a single
loop to reduce time complexity.
"""
from __future__ import annotations
def max_sum_in_array(array: list[int], k: int) -> int:
"""
Returns the maximum sum of k consecutive elements
>>> arr = [1, 4, 2, 10, 2, 3, 1, 0, 20]
>>> k = 4
>>> max_sum_in_array(arr, k)
24
>>> k = 10
>>> max_sum_in_array(arr,k)
Traceback (most recent call last):
...
ValueError: Invalid Input
>>> arr = [1, 4, 2, 10, 2, 13, 1, 0, 2]
>>> k = 4
>>> max_sum_in_array(arr, k)
27
"""
if len(array) < k or k < 0:
raise ValueError("Invalid Input")
max_sum = current_sum = sum(array[:k])
for i in range(len(array) - k):
current_sum = current_sum - array[i] + array[i + k]
max_sum = max(max_sum, current_sum)
return max_sum
if __name__ == "__main__":
from doctest import testmod
from random import randint
testmod()
array = [randint(-1000, 1000) for i in range(100)]
k = randint(0, 110)
print(
f"The maximum sum of {k} consecutive elements is {max_sum_in_array(array, k)}"
)
================================================
FILE: maths/minkowski_distance.py
================================================
def minkowski_distance(
point_a: list[float],
point_b: list[float],
order: int,
) -> float:
"""
This function calculates the Minkowski distance for a given order between
two n-dimensional points represented as lists. For the case of order = 1,
the Minkowski distance degenerates to the Manhattan distance. For
order = 2, the usual Euclidean distance is obtained.
https://en.wikipedia.org/wiki/Minkowski_distance
Note: due to floating point calculation errors the output of this
function may be inaccurate.
>>> minkowski_distance([1.0, 1.0], [2.0, 2.0], 1)
2.0
>>> minkowski_distance([1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], 2)
8.0
>>> import numpy as np
>>> bool(np.isclose(5.0, minkowski_distance([5.0], [0.0], 3)))
True
>>> minkowski_distance([1.0], [2.0], -1)
Traceback (most recent call last):
...
ValueError: The order must be greater than or equal to 1.
>>> minkowski_distance([1.0], [1.0, 2.0], 1)
Traceback (most recent call last):
...
ValueError: Both points must have the same dimension.
"""
if order < 1:
raise ValueError("The order must be greater than or equal to 1.")
if len(point_a) != len(point_b):
raise ValueError("Both points must have the same dimension.")
return sum(abs(a - b) ** order for a, b in zip(point_a, point_b)) ** (1 / order)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/mobius_function.py
================================================
"""
References: https://en.wikipedia.org/wiki/M%C3%B6bius_function
References: wikipedia:square free number
psf/black : True
ruff : True
"""
from maths.is_square_free import is_square_free
from maths.prime_factors import prime_factors
def mobius(n: int) -> int:
"""
Mobius function
>>> mobius(24)
0
>>> mobius(-1)
1
>>> mobius('asd')
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'str'
>>> mobius(10**400)
0
>>> mobius(10**-400)
1
>>> mobius(-1424)
1
>>> mobius([1, '2', 2.0])
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'list'
"""
factors = prime_factors(n)
if is_square_free(factors):
return -1 if len(factors) % 2 else 1
return 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/modular_division.py
================================================
from __future__ import annotations
def modular_division(a: int, b: int, n: int) -> int:
"""
Modular Division :
An efficient algorithm for dividing b by a modulo n.
GCD ( Greatest Common Divisor ) or HCF ( Highest Common Factor )
Given three integers a, b, and n, such that gcd(a,n)=1 and n>1, the algorithm should
return an integer x such that 0≤x≤n-1, and b/a=x(modn) (that is, b=ax(modn)).
Theorem:
a has a multiplicative inverse modulo n iff gcd(a,n) = 1
This find x = b*a^(-1) mod n
Uses ExtendedEuclid to find the inverse of a
>>> modular_division(4,8,5)
2
>>> modular_division(3,8,5)
1
>>> modular_division(4, 11, 5)
4
"""
if n <= 1:
raise ValueError("Modulus n must be greater than 1")
if a <= 0:
raise ValueError("Divisor a must be a positive integer")
if greatest_common_divisor(a, n) != 1:
raise ValueError("a and n must be coprime (gcd(a, n) = 1)")
(_d, _t, s) = extended_gcd(n, a) # Implemented below
x = (b * s) % n
return x
def invert_modulo(a: int, n: int) -> int:
"""
This function find the inverses of a i.e., a^(-1)
>>> invert_modulo(2, 5)
3
>>> invert_modulo(8,7)
1
"""
(b, _x) = extended_euclid(a, n) # Implemented below
if b < 0:
b = (b % n + n) % n
return b
# ------------------ Finding Modular division using invert_modulo -------------------
def modular_division2(a: int, b: int, n: int) -> int:
"""
This function used the above inversion of a to find x = (b*a^(-1))mod n
>>> modular_division2(4,8,5)
2
>>> modular_division2(3,8,5)
1
>>> modular_division2(4, 11, 5)
4
"""
s = invert_modulo(a, n)
x = (b * s) % n
return x
def extended_gcd(a: int, b: int) -> tuple[int, int, int]:
"""
Extended Euclid's Algorithm : If d divides a and b and d = a*x + b*y for integers x
and y, then d = gcd(a,b)
>>> extended_gcd(10, 6)
(2, -1, 2)
>>> extended_gcd(7, 5)
(1, -2, 3)
** extended_gcd function is used when d = gcd(a,b) is required in output
"""
assert a >= 0
assert b >= 0
if b == 0:
d, x, y = a, 1, 0
else:
(d, p, q) = extended_gcd(b, a % b)
x = q
y = p - q * (a // b)
assert a % d == 0
assert b % d == 0
assert d == a * x + b * y
return (d, x, y)
def extended_euclid(a: int, b: int) -> tuple[int, int]:
"""
Extended Euclid
>>> extended_euclid(10, 6)
(-1, 2)
>>> extended_euclid(7, 5)
(-2, 3)
"""
if b == 0:
return (1, 0)
(x, y) = extended_euclid(b, a % b)
k = a // b
return (y, x - k * y)
def greatest_common_divisor(a: int, b: int) -> int:
"""
Euclid's Lemma : d divides a and b, if and only if d divides a-b and b
Euclid's Algorithm
>>> greatest_common_divisor(7,5)
1
Note : In number theory, two integers a and b are said to be relatively prime,
mutually prime, or co-prime if the only positive integer (factor) that divides
both of them is 1 i.e., gcd(a,b) = 1.
>>> greatest_common_divisor(121, 11)
11
"""
if a < b:
a, b = b, a
while a % b != 0:
a, b = b, a % b
return b
if __name__ == "__main__":
from doctest import testmod
testmod(name="modular_division", verbose=True)
testmod(name="modular_division2", verbose=True)
testmod(name="invert_modulo", verbose=True)
testmod(name="extended_gcd", verbose=True)
testmod(name="extended_euclid", verbose=True)
testmod(name="greatest_common_divisor", verbose=True)
================================================
FILE: maths/modular_exponential.py
================================================
"""
Modular Exponential.
Modular exponentiation is a type of exponentiation performed over a modulus.
For more explanation, please check
https://en.wikipedia.org/wiki/Modular_exponentiation
"""
"""Calculate Modular Exponential."""
def modular_exponential(base: int, power: int, mod: int):
"""
>>> modular_exponential(5, 0, 10)
1
>>> modular_exponential(2, 8, 7)
4
>>> modular_exponential(3, -2, 9)
-1
"""
if power < 0:
return -1
base %= mod
result = 1
while power > 0:
if power & 1:
result = (result * base) % mod
power = power >> 1
base = (base * base) % mod
return result
def main():
"""Call Modular Exponential Function."""
print(modular_exponential(3, 200, 13))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: maths/monte_carlo.py
================================================
"""
@author: MatteoRaso
"""
from collections.abc import Callable
from math import pi, sqrt
from random import uniform
from statistics import mean
def pi_estimator(iterations: int) -> None:
"""
An implementation of the Monte Carlo method used to find pi.
1. Draw a 2x2 square centred at (0,0).
2. Inscribe a circle within the square.
3. For each iteration, place a dot anywhere in the square.
a. Record the number of dots within the circle.
4. After all the dots are placed, divide the dots in the circle by the total.
5. Multiply this value by 4 to get your estimate of pi.
6. Print the estimated and numpy value of pi
"""
# A local function to see if a dot lands in the circle.
def is_in_circle(x: float, y: float) -> bool:
distance_from_centre = sqrt((x**2) + (y**2))
# Our circle has a radius of 1, so a distance
# greater than 1 would land outside the circle.
return distance_from_centre <= 1
# The proportion of guesses that landed in the circle
proportion = mean(
int(is_in_circle(uniform(-1.0, 1.0), uniform(-1.0, 1.0)))
for _ in range(iterations)
)
# The ratio of the area for circle to square is pi/4.
pi_estimate = proportion * 4
print(f"The estimated value of pi is {pi_estimate}")
print(f"The numpy value of pi is {pi}")
print(f"The total error is {abs(pi - pi_estimate)}")
def area_under_curve_estimator(
iterations: int,
function_to_integrate: Callable[[float], float],
min_value: float = 0.0,
max_value: float = 1.0,
) -> float:
"""
An implementation of the Monte Carlo method to find area under
a single variable non-negative real-valued continuous function,
say f(x), where x lies within a continuous bounded interval,
say [min_value, max_value], where min_value and max_value are
finite numbers
1. Let x be a uniformly distributed random variable between min_value to
max_value
2. Expected value of f(x) =
(integrate f(x) from min_value to max_value)/(max_value - min_value)
3. Finding expected value of f(x):
a. Repeatedly draw x from uniform distribution
b. Evaluate f(x) at each of the drawn x values
c. Expected value = average of the function evaluations
4. Estimated value of integral = Expected value * (max_value - min_value)
5. Returns estimated value
"""
return mean(
function_to_integrate(uniform(min_value, max_value)) for _ in range(iterations)
) * (max_value - min_value)
def area_under_line_estimator_check(
iterations: int, min_value: float = 0.0, max_value: float = 1.0
) -> None:
"""
Checks estimation error for area_under_curve_estimator function
for f(x) = x where x lies within min_value to max_value
1. Calls "area_under_curve_estimator" function
2. Compares with the expected value
3. Prints estimated, expected and error value
"""
def identity_function(x: float) -> float:
"""
Represents identity function
>>> [function_to_integrate(x) for x in [-2.0, -1.0, 0.0, 1.0, 2.0]]
[-2.0, -1.0, 0.0, 1.0, 2.0]
"""
return x
estimated_value = area_under_curve_estimator(
iterations, identity_function, min_value, max_value
)
expected_value = (max_value * max_value - min_value * min_value) / 2
print("******************")
print(f"Estimating area under y=x where x varies from {min_value} to {max_value}")
print(f"Estimated value is {estimated_value}")
print(f"Expected value is {expected_value}")
print(f"Total error is {abs(estimated_value - expected_value)}")
print("******************")
def pi_estimator_using_area_under_curve(iterations: int) -> None:
"""
Area under curve y = sqrt(4 - x^2) where x lies in 0 to 2 is equal to pi
"""
def function_to_integrate(x: float) -> float:
"""
Represents semi-circle with radius 2
>>> [function_to_integrate(x) for x in [-2.0, 0.0, 2.0]]
[0.0, 2.0, 0.0]
"""
return sqrt(4.0 - x * x)
estimated_value = area_under_curve_estimator(
iterations, function_to_integrate, 0.0, 2.0
)
print("******************")
print("Estimating pi using area_under_curve_estimator")
print(f"Estimated value is {estimated_value}")
print(f"Expected value is {pi}")
print(f"Total error is {abs(estimated_value - pi)}")
print("******************")
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/monte_carlo_dice.py
================================================
from __future__ import annotations
import random
class Dice:
NUM_SIDES = 6
def __init__(self):
"""Initialize a six sided dice"""
self.sides = list(range(1, Dice.NUM_SIDES + 1))
def roll(self):
return random.choice(self.sides)
def throw_dice(num_throws: int, num_dice: int = 2) -> list[float]:
"""
Return probability list of all possible sums when throwing dice.
>>> random.seed(0)
>>> throw_dice(10, 1)
[10.0, 0.0, 30.0, 50.0, 10.0, 0.0]
>>> throw_dice(100, 1)
[19.0, 17.0, 17.0, 11.0, 23.0, 13.0]
>>> throw_dice(1000, 1)
[18.8, 15.5, 16.3, 17.6, 14.2, 17.6]
>>> throw_dice(10000, 1)
[16.35, 16.89, 16.93, 16.6, 16.52, 16.71]
>>> throw_dice(10000, 2)
[2.74, 5.6, 7.99, 11.26, 13.92, 16.7, 14.44, 10.63, 8.05, 5.92, 2.75]
"""
dices = [Dice() for i in range(num_dice)]
count_of_sum = [0] * (len(dices) * Dice.NUM_SIDES + 1)
for _ in range(num_throws):
count_of_sum[sum(dice.roll() for dice in dices)] += 1
probability = [round((count * 100) / num_throws, 2) for count in count_of_sum]
return probability[num_dice:] # remove probability of sums that never appear
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/number_of_digits.py
================================================
import math
from timeit import timeit
def num_digits(n: int) -> int:
"""
Find the number of digits in a number.
>>> num_digits(12345)
5
>>> num_digits(123)
3
>>> num_digits(0)
1
>>> num_digits(-1)
1
>>> num_digits(-123456)
6
>>> num_digits('123') # Raises a TypeError for non-integer input
Traceback (most recent call last):
...
TypeError: Input must be an integer
"""
if not isinstance(n, int):
raise TypeError("Input must be an integer")
digits = 0
n = abs(n)
while True:
n = n // 10
digits += 1
if n == 0:
break
return digits
def num_digits_fast(n: int) -> int:
"""
Find the number of digits in a number.
abs() is used as logarithm for negative numbers is not defined.
>>> num_digits_fast(12345)
5
>>> num_digits_fast(123)
3
>>> num_digits_fast(0)
1
>>> num_digits_fast(-1)
1
>>> num_digits_fast(-123456)
6
>>> num_digits('123') # Raises a TypeError for non-integer input
Traceback (most recent call last):
...
TypeError: Input must be an integer
"""
if not isinstance(n, int):
raise TypeError("Input must be an integer")
return 1 if n == 0 else math.floor(math.log(abs(n), 10) + 1)
def num_digits_faster(n: int) -> int:
"""
Find the number of digits in a number.
abs() is used for negative numbers
>>> num_digits_faster(12345)
5
>>> num_digits_faster(123)
3
>>> num_digits_faster(0)
1
>>> num_digits_faster(-1)
1
>>> num_digits_faster(-123456)
6
>>> num_digits('123') # Raises a TypeError for non-integer input
Traceback (most recent call last):
...
TypeError: Input must be an integer
"""
if not isinstance(n, int):
raise TypeError("Input must be an integer")
return len(str(abs(n)))
def benchmark() -> None:
"""
Benchmark multiple functions, with three different length int values.
"""
from collections.abc import Callable
def benchmark_a_function(func: Callable, value: int) -> None:
call = f"{func.__name__}({value})"
timing = timeit(f"__main__.{call}", setup="import __main__")
print(f"{call}: {func(value)} -- {timing} seconds")
for value in (262144, 1125899906842624, 1267650600228229401496703205376):
for func in (num_digits, num_digits_fast, num_digits_faster):
benchmark_a_function(func, value)
print()
if __name__ == "__main__":
import doctest
doctest.testmod()
benchmark()
================================================
FILE: maths/numerical_analysis/__init__.py
================================================
================================================
FILE: maths/numerical_analysis/adams_bashforth.py
================================================
"""
Use the Adams-Bashforth methods to solve Ordinary Differential Equations.
https://en.wikipedia.org/wiki/Linear_multistep_method
Author : Ravi Kumar
"""
from collections.abc import Callable
from dataclasses import dataclass
import numpy as np
@dataclass
class AdamsBashforth:
"""
args:
func: An ordinary differential equation (ODE) as function of x and y.
x_initials: List containing initial required values of x.
y_initials: List containing initial required values of y.
step_size: The increment value of x.
x_final: The final value of x.
Returns: Solution of y at each nodal point
>>> def f(x, y):
... return x + y
>>> AdamsBashforth(f, [0, 0.2, 0.4], [0, 0.2, 1], 0.2, 1) # doctest: +ELLIPSIS
AdamsBashforth(func=..., x_initials=[0, 0.2, 0.4], y_initials=[0, 0.2, 1], step...)
>>> AdamsBashforth(f, [0, 0.2, 1], [0, 0, 0.04], 0.2, 1).step_2()
Traceback (most recent call last):
...
ValueError: The final value of x must be greater than the initial values of x.
>>> AdamsBashforth(f, [0, 0.2, 0.3], [0, 0, 0.04], 0.2, 1).step_3()
Traceback (most recent call last):
...
ValueError: x-values must be equally spaced according to step size.
>>> AdamsBashforth(f,[0,0.2,0.4,0.6,0.8],[0,0,0.04,0.128,0.307],-0.2,1).step_5()
Traceback (most recent call last):
...
ValueError: Step size must be positive.
"""
func: Callable[[float, float], float]
x_initials: list[float]
y_initials: list[float]
step_size: float
x_final: float
def __post_init__(self) -> None:
if self.x_initials[-1] >= self.x_final:
raise ValueError(
"The final value of x must be greater than the initial values of x."
)
if self.step_size <= 0:
raise ValueError("Step size must be positive.")
if not all(
round(x1 - x0, 10) == self.step_size
for x0, x1 in zip(self.x_initials, self.x_initials[1:])
):
raise ValueError("x-values must be equally spaced according to step size.")
def step_2(self) -> np.ndarray:
"""
>>> def f(x, y):
... return x
>>> AdamsBashforth(f, [0, 0.2], [0, 0], 0.2, 1).step_2()
array([0. , 0. , 0.06, 0.16, 0.3 , 0.48])
>>> AdamsBashforth(f, [0, 0.2, 0.4], [0, 0, 0.04], 0.2, 1).step_2()
Traceback (most recent call last):
...
ValueError: Insufficient initial points information.
"""
if len(self.x_initials) != 2 or len(self.y_initials) != 2:
raise ValueError("Insufficient initial points information.")
x_0, x_1 = self.x_initials[:2]
y_0, y_1 = self.y_initials[:2]
n = int((self.x_final - x_1) / self.step_size)
y = np.zeros(n + 2)
y[0] = y_0
y[1] = y_1
for i in range(n):
y[i + 2] = y[i + 1] + (self.step_size / 2) * (
3 * self.func(x_1, y[i + 1]) - self.func(x_0, y[i])
)
x_0 = x_1
x_1 += self.step_size
return y
def step_3(self) -> np.ndarray:
"""
>>> def f(x, y):
... return x + y
>>> y = AdamsBashforth(f, [0, 0.2, 0.4], [0, 0, 0.04], 0.2, 1).step_3()
>>> float(y[3])
0.15533333333333332
>>> AdamsBashforth(f, [0, 0.2], [0, 0], 0.2, 1).step_3()
Traceback (most recent call last):
...
ValueError: Insufficient initial points information.
"""
if len(self.x_initials) != 3 or len(self.y_initials) != 3:
raise ValueError("Insufficient initial points information.")
x_0, x_1, x_2 = self.x_initials[:3]
y_0, y_1, y_2 = self.y_initials[:3]
n = int((self.x_final - x_2) / self.step_size)
y = np.zeros(n + 4)
y[0] = y_0
y[1] = y_1
y[2] = y_2
for i in range(n + 1):
y[i + 3] = y[i + 2] + (self.step_size / 12) * (
23 * self.func(x_2, y[i + 2])
- 16 * self.func(x_1, y[i + 1])
+ 5 * self.func(x_0, y[i])
)
x_0 = x_1
x_1 = x_2
x_2 += self.step_size
return y
def step_4(self) -> np.ndarray:
"""
>>> def f(x,y):
... return x + y
>>> y = AdamsBashforth(
... f, [0, 0.2, 0.4, 0.6], [0, 0, 0.04, 0.128], 0.2, 1).step_4()
>>> float(y[4])
0.30699999999999994
>>> float(y[5])
0.5771083333333333
>>> AdamsBashforth(f, [0, 0.2, 0.4], [0, 0, 0.04], 0.2, 1).step_4()
Traceback (most recent call last):
...
ValueError: Insufficient initial points information.
"""
if len(self.x_initials) != 4 or len(self.y_initials) != 4:
raise ValueError("Insufficient initial points information.")
x_0, x_1, x_2, x_3 = self.x_initials[:4]
y_0, y_1, y_2, y_3 = self.y_initials[:4]
n = int((self.x_final - x_3) / self.step_size)
y = np.zeros(n + 4)
y[0] = y_0
y[1] = y_1
y[2] = y_2
y[3] = y_3
for i in range(n):
y[i + 4] = y[i + 3] + (self.step_size / 24) * (
55 * self.func(x_3, y[i + 3])
- 59 * self.func(x_2, y[i + 2])
+ 37 * self.func(x_1, y[i + 1])
- 9 * self.func(x_0, y[i])
)
x_0 = x_1
x_1 = x_2
x_2 = x_3
x_3 += self.step_size
return y
def step_5(self) -> np.ndarray:
"""
>>> def f(x,y):
... return x + y
>>> y = AdamsBashforth(
... f, [0, 0.2, 0.4, 0.6, 0.8], [0, 0.02140, 0.02140, 0.22211, 0.42536],
... 0.2, 1).step_5()
>>> float(y[-1])
0.05436839444444452
>>> AdamsBashforth(f, [0, 0.2, 0.4], [0, 0, 0.04], 0.2, 1).step_5()
Traceback (most recent call last):
...
ValueError: Insufficient initial points information.
"""
if len(self.x_initials) != 5 or len(self.y_initials) != 5:
raise ValueError("Insufficient initial points information.")
x_0, x_1, x_2, x_3, x_4 = self.x_initials[:5]
y_0, y_1, y_2, y_3, y_4 = self.y_initials[:5]
n = int((self.x_final - x_4) / self.step_size)
y = np.zeros(n + 6)
y[0] = y_0
y[1] = y_1
y[2] = y_2
y[3] = y_3
y[4] = y_4
for i in range(n + 1):
y[i + 5] = y[i + 4] + (self.step_size / 720) * (
1901 * self.func(x_4, y[i + 4])
- 2774 * self.func(x_3, y[i + 3])
- 2616 * self.func(x_2, y[i + 2])
- 1274 * self.func(x_1, y[i + 1])
+ 251 * self.func(x_0, y[i])
)
x_0 = x_1
x_1 = x_2
x_2 = x_3
x_3 = x_4
x_4 += self.step_size
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/bisection.py
================================================
from collections.abc import Callable
def bisection(function: Callable[[float], float], a: float, b: float) -> float:
"""
finds where function becomes 0 in [a,b] using bolzano
>>> bisection(lambda x: x ** 3 - 1, -5, 5)
1.0000000149011612
>>> bisection(lambda x: x ** 3 - 1, 2, 1000)
Traceback (most recent call last):
...
ValueError: could not find root in given interval.
>>> bisection(lambda x: x ** 2 - 4 * x + 3, 0, 2)
1.0
>>> bisection(lambda x: x ** 2 - 4 * x + 3, 2, 4)
3.0
>>> bisection(lambda x: x ** 2 - 4 * x + 3, 4, 1000)
Traceback (most recent call last):
...
ValueError: could not find root in given interval.
"""
start: float = a
end: float = b
if function(a) == 0: # one of the a or b is a root for the function
return a
elif function(b) == 0:
return b
elif (
function(a) * function(b) > 0
): # if none of these are root and they are both positive or negative,
# then this algorithm can't find the root
raise ValueError("could not find root in given interval.")
else:
mid: float = start + (end - start) / 2.0
while abs(start - mid) > 10**-7: # until precisely equals to 10^-7
if function(mid) == 0:
return mid
elif function(mid) * function(start) < 0:
end = mid
else:
start = mid
mid = start + (end - start) / 2.0
return mid
def f(x: float) -> float:
return x**3 - 2 * x - 5
if __name__ == "__main__":
print(bisection(f, 1, 1000))
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/bisection_2.py
================================================
"""
Given a function on floating number f(x) and two floating numbers `a` and `b` such that
f(a) * f(b) < 0 and f(x) is continuous in [a, b].
Here f(x) represents algebraic or transcendental equation.
Find root of function in interval [a, b] (Or find a value of x such that f(x) is 0)
https://en.wikipedia.org/wiki/Bisection_method
"""
def equation(x: float) -> float:
"""
>>> equation(5)
-15
>>> equation(0)
10
>>> equation(-5)
-15
>>> equation(0.1)
9.99
>>> equation(-0.1)
9.99
"""
return 10 - x * x
def bisection(a: float, b: float) -> float:
"""
>>> bisection(-2, 5)
3.1611328125
>>> bisection(0, 6)
3.158203125
>>> bisection(2, 3)
Traceback (most recent call last):
...
ValueError: Wrong space!
"""
# Bolzano theory in order to find if there is a root between a and b
if equation(a) * equation(b) >= 0:
raise ValueError("Wrong space!")
c = a
while (b - a) >= 0.01:
# Find middle point
c = (a + b) / 2
# Check if middle point is root
if equation(c) == 0.0:
break
# Decide the side to repeat the steps
if equation(c) * equation(a) < 0:
b = c
else:
a = c
return c
if __name__ == "__main__":
import doctest
doctest.testmod()
print(bisection(-2, 5))
print(bisection(0, 6))
================================================
FILE: maths/numerical_analysis/integration_by_simpson_approx.py
================================================
"""
Author : Syed Faizan ( 3rd Year IIIT Pune )
Github : faizan2700
Purpose : You have one function f(x) which takes float integer and returns
float you have to integrate the function in limits a to b.
The approximation proposed by Thomas Simpson in 1743 is one way to calculate
integration.
( read article : https://cp-algorithms.com/num_methods/simpson-integration.html )
simpson_integration() takes function,lower_limit=a,upper_limit=b,precision and
returns the integration of function in given limit.
"""
# constants
# the more the number of steps the more accurate
N_STEPS = 1000
def f(x: float) -> float:
return x * x
"""
Summary of Simpson Approximation :
By simpsons integration :
1. integration of fxdx with limit a to b is =
f(x0) + 4 * f(x1) + 2 * f(x2) + 4 * f(x3) + 2 * f(x4)..... + f(xn)
where x0 = a
xi = a + i * h
xn = b
"""
def simpson_integration(function, a: float, b: float, precision: int = 4) -> float:
"""
Args:
function : the function which's integration is desired
a : the lower limit of integration
b : upper limit of integration
precision : precision of the result,error required default is 4
Returns:
result : the value of the approximated integration of function in range a to b
Raises:
AssertionError: function is not callable
AssertionError: a is not float or integer
AssertionError: function should return float or integer
AssertionError: b is not float or integer
AssertionError: precision is not positive integer
>>> simpson_integration(lambda x : x*x,1,2,3)
2.333
>>> simpson_integration(lambda x : x*x,'wrong_input',2,3)
Traceback (most recent call last):
...
AssertionError: a should be float or integer your input : wrong_input
>>> simpson_integration(lambda x : x*x,1,'wrong_input',3)
Traceback (most recent call last):
...
AssertionError: b should be float or integer your input : wrong_input
>>> simpson_integration(lambda x : x*x,1,2,'wrong_input')
Traceback (most recent call last):
...
AssertionError: precision should be positive integer your input : wrong_input
>>> simpson_integration('wrong_input',2,3,4)
Traceback (most recent call last):
...
AssertionError: the function(object) passed should be callable your input : ...
>>> simpson_integration(lambda x : x*x,3.45,3.2,1)
-2.8
>>> simpson_integration(lambda x : x*x,3.45,3.2,0)
Traceback (most recent call last):
...
AssertionError: precision should be positive integer your input : 0
>>> simpson_integration(lambda x : x*x,3.45,3.2,-1)
Traceback (most recent call last):
...
AssertionError: precision should be positive integer your input : -1
"""
assert callable(function), (
f"the function(object) passed should be callable your input : {function}"
)
assert isinstance(a, (float, int)), f"a should be float or integer your input : {a}"
assert isinstance(function(a), (float, int)), (
"the function should return integer or float return type of your function, "
f"{type(a)}"
)
assert isinstance(b, (float, int)), f"b should be float or integer your input : {b}"
assert isinstance(precision, int) and precision > 0, (
f"precision should be positive integer your input : {precision}"
)
# just applying the formula of simpson for approximate integration written in
# mentioned article in first comment of this file and above this function
h = (b - a) / N_STEPS
result = function(a) + function(b)
for i in range(1, N_STEPS):
a1 = a + h * i
result += function(a1) * (4 if i % 2 else 2)
result *= h / 3
return round(result, precision)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/intersection.py
================================================
import math
from collections.abc import Callable
def intersection(function: Callable[[float], float], x0: float, x1: float) -> float:
"""
function is the f we want to find its root
x0 and x1 are two random starting points
>>> intersection(lambda x: x ** 3 - 1, -5, 5)
0.9999999999954654
>>> intersection(lambda x: x ** 3 - 1, 5, 5)
Traceback (most recent call last):
...
ZeroDivisionError: float division by zero, could not find root
>>> intersection(lambda x: x ** 3 - 1, 100, 200)
1.0000000000003888
>>> intersection(lambda x: x ** 2 - 4 * x + 3, 0, 2)
0.9999999998088019
>>> intersection(lambda x: x ** 2 - 4 * x + 3, 2, 4)
2.9999999998088023
>>> intersection(lambda x: x ** 2 - 4 * x + 3, 4, 1000)
3.0000000001786042
>>> intersection(math.sin, -math.pi, math.pi)
0.0
>>> intersection(math.cos, -math.pi, math.pi)
Traceback (most recent call last):
...
ZeroDivisionError: float division by zero, could not find root
"""
x_n: float = x0
x_n1: float = x1
while True:
if x_n == x_n1 or function(x_n1) == function(x_n):
raise ZeroDivisionError("float division by zero, could not find root")
x_n2: float = x_n1 - (
function(x_n1) / ((function(x_n1) - function(x_n)) / (x_n1 - x_n))
)
if abs(x_n2 - x_n1) < 10**-5:
return x_n2
x_n = x_n1
x_n1 = x_n2
def f(x: float) -> float:
"""
function is f(x) = x^3 - 2x - 5
>>> f(2)
-1.0
"""
return math.pow(x, 3) - (2 * x) - 5
if __name__ == "__main__":
print(intersection(f, 3, 3.5))
================================================
FILE: maths/numerical_analysis/nevilles_method.py
================================================
"""
Python program to show how to interpolate and evaluate a polynomial
using Neville's method.
Neville's method evaluates a polynomial that passes through a
given set of x and y points for a particular x value (x0) using the
Newton polynomial form.
Reference:
https://rpubs.com/aaronsc32/nevilles-method-polynomial-interpolation
"""
def neville_interpolate(x_points: list, y_points: list, x0: int) -> list:
"""
Interpolate and evaluate a polynomial using Neville's method.
Arguments:
x_points, y_points: Iterables of x and corresponding y points through
which the polynomial passes.
x0: The value of x to evaluate the polynomial for.
Return Value: A list of the approximated value and the Neville iterations
table respectively.
>>> import pprint
>>> neville_interpolate((1,2,3,4,6), (6,7,8,9,11), 5)[0]
10.0
>>> pprint.pprint(neville_interpolate((1,2,3,4,6), (6,7,8,9,11), 99)[1])
[[0, 6, 0, 0, 0],
[0, 7, 0, 0, 0],
[0, 8, 104.0, 0, 0],
[0, 9, 104.0, 104.0, 0],
[0, 11, 104.0, 104.0, 104.0]]
>>> neville_interpolate((1,2,3,4,6), (6,7,8,9,11), 99)[0]
104.0
>>> neville_interpolate((1,2,3,4,6), (6,7,8,9,11), '')
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for -: 'str' and 'int'
"""
n = len(x_points)
q = [[0] * n for i in range(n)]
for i in range(n):
q[i][1] = y_points[i]
for i in range(2, n):
for j in range(i, n):
q[j][i] = (
(x0 - x_points[j - i + 1]) * q[j][i - 1]
- (x0 - x_points[j]) * q[j - 1][i - 1]
) / (x_points[j] - x_points[j - i + 1])
return [q[n - 1][n - 1], q]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/newton_forward_interpolation.py
================================================
# https://www.geeksforgeeks.org/newton-forward-backward-interpolation/
from __future__ import annotations
import math
# for calculating u value
def ucal(u: float, p: int) -> float:
"""
>>> ucal(1, 2)
0
>>> ucal(1.1, 2)
0.11000000000000011
>>> ucal(1.2, 2)
0.23999999999999994
"""
temp = u
for i in range(1, p):
temp = temp * (u - i)
return temp
def main() -> None:
n = int(input("enter the numbers of values: "))
y: list[list[float]] = []
for _ in range(n):
y.append([])
for i in range(n):
for j in range(n):
y[i].append(j)
y[i][j] = 0
print("enter the values of parameters in a list: ")
x = list(map(int, input().split()))
print("enter the values of corresponding parameters: ")
for i in range(n):
y[i][0] = float(input())
value = int(input("enter the value to interpolate: "))
u = (value - x[0]) / (x[1] - x[0])
# for calculating forward difference table
for i in range(1, n):
for j in range(n - i):
y[j][i] = y[j + 1][i - 1] - y[j][i - 1]
summ = y[0][0]
for i in range(1, n):
summ += (ucal(u, i) * y[0][i]) / math.factorial(i)
print(f"the value at {value} is {summ}")
if __name__ == "__main__":
main()
================================================
FILE: maths/numerical_analysis/newton_raphson.py
================================================
"""
The Newton-Raphson method (aka the Newton method) is a root-finding algorithm that
approximates a root of a given real-valued function f(x). It is an iterative method
given by the formula
x_{n + 1} = x_n + f(x_n) / f'(x_n)
with the precision of the approximation increasing as the number of iterations increase.
Reference: https://en.wikipedia.org/wiki/Newton%27s_method
"""
from collections.abc import Callable
RealFunc = Callable[[float], float]
def calc_derivative(f: RealFunc, x: float, delta_x: float = 1e-3) -> float:
"""
Approximate the derivative of a function f(x) at a point x using the finite
difference method
>>> import math
>>> tolerance = 1e-5
>>> derivative = calc_derivative(lambda x: x**2, 2)
>>> math.isclose(derivative, 4, abs_tol=tolerance)
True
>>> derivative = calc_derivative(math.sin, 0)
>>> math.isclose(derivative, 1, abs_tol=tolerance)
True
"""
return (f(x + delta_x / 2) - f(x - delta_x / 2)) / delta_x
def newton_raphson(
f: RealFunc,
x0: float = 0,
max_iter: int = 100,
step: float = 1e-6,
max_error: float = 1e-6,
log_steps: bool = False,
) -> tuple[float, float, list[float]]:
"""
Find a root of the given function f using the Newton-Raphson method.
:param f: A real-valued single-variable function
:param x0: Initial guess
:param max_iter: Maximum number of iterations
:param step: Step size of x, used to approximate f'(x)
:param max_error: Maximum approximation error
:param log_steps: bool denoting whether to log intermediate steps
:return: A tuple containing the approximation, the error, and the intermediate
steps. If log_steps is False, then an empty list is returned for the third
element of the tuple.
:raises ZeroDivisionError: The derivative approaches 0.
:raises ArithmeticError: No solution exists, or the solution isn't found before the
iteration limit is reached.
>>> import math
>>> tolerance = 1e-15
>>> root, *_ = newton_raphson(lambda x: x**2 - 5*x + 2, 0.4, max_error=tolerance)
>>> math.isclose(root, (5 - math.sqrt(17)) / 2, abs_tol=tolerance)
True
>>> root, *_ = newton_raphson(lambda x: math.log(x) - 1, 2, max_error=tolerance)
>>> math.isclose(root, math.e, abs_tol=tolerance)
True
>>> root, *_ = newton_raphson(math.sin, 1, max_error=tolerance)
>>> math.isclose(root, 0, abs_tol=tolerance)
True
>>> newton_raphson(math.cos, 0)
Traceback (most recent call last):
...
ZeroDivisionError: No converging solution found, zero derivative
>>> newton_raphson(lambda x: x**2 + 1, 2)
Traceback (most recent call last):
...
ArithmeticError: No converging solution found, iteration limit reached
"""
def f_derivative(x: float) -> float:
return calc_derivative(f, x, step)
a = x0 # Set initial guess
steps = []
for _ in range(max_iter):
if log_steps: # Log intermediate steps
steps.append(a)
error = abs(f(a))
if error < max_error:
return a, error, steps
if f_derivative(a) == 0:
raise ZeroDivisionError("No converging solution found, zero derivative")
a -= f(a) / f_derivative(a) # Calculate next estimate
raise ArithmeticError("No converging solution found, iteration limit reached")
if __name__ == "__main__":
import doctest
from math import exp, tanh
doctest.testmod()
def func(x: float) -> float:
return tanh(x) ** 2 - exp(3 * x)
solution, err, steps = newton_raphson(
func, x0=10, max_iter=100, step=1e-6, log_steps=True
)
print(f"{solution=}, {err=}")
print("\n".join(str(x) for x in steps))
================================================
FILE: maths/numerical_analysis/numerical_integration.py
================================================
"""
Approximates the area under the curve using the trapezoidal rule
"""
from __future__ import annotations
from collections.abc import Callable
def trapezoidal_area(
fnc: Callable[[float], float],
x_start: float,
x_end: float,
steps: int = 100,
) -> float:
"""
Treats curve as a collection of linear lines and sums the area of the
trapezium shape they form
:param fnc: a function which defines a curve
:param x_start: left end point to indicate the start of line segment
:param x_end: right end point to indicate end of line segment
:param steps: an accuracy gauge; more steps increases the accuracy
:return: a float representing the length of the curve
>>> def f(x):
... return 5
>>> '%.3f' % trapezoidal_area(f, 12.0, 14.0, 1000)
'10.000'
>>> def f(x):
... return 9*x**2
>>> '%.4f' % trapezoidal_area(f, -4.0, 0, 10000)
'192.0000'
>>> '%.4f' % trapezoidal_area(f, -4.0, 4.0, 10000)
'384.0000'
"""
x1 = x_start
fx1 = fnc(x_start)
area = 0.0
for _ in range(steps):
# Approximates small segments of curve as linear and solve
# for trapezoidal area
x2 = (x_end - x_start) / steps + x1
fx2 = fnc(x2)
area += abs(fx2 + fx1) * (x2 - x1) / 2
# Increment step
x1 = x2
fx1 = fx2
return area
if __name__ == "__main__":
def f(x):
return x**3
print("f(x) = x^3")
print("The area between the curve, x = -10, x = 10 and the x axis is:")
i = 10
while i <= 100000:
area = trapezoidal_area(f, -5, 5, i)
print(f"with {i} steps: {area}")
i *= 10
================================================
FILE: maths/numerical_analysis/proper_fractions.py
================================================
from math import gcd
def proper_fractions(denominator: int) -> list[str]:
"""
this algorithm returns a list of proper fractions, in the
range between 0 and 1, which can be formed with the given denominator
https://en.wikipedia.org/wiki/Fraction#Proper_and_improper_fractions
>>> proper_fractions(10)
['1/10', '3/10', '7/10', '9/10']
>>> proper_fractions(5)
['1/5', '2/5', '3/5', '4/5']
>>> proper_fractions(-15)
Traceback (most recent call last):
...
ValueError: The Denominator Cannot be less than 0
>>> proper_fractions(0)
[]
>>> proper_fractions(1.2)
Traceback (most recent call last):
...
ValueError: The Denominator must be an integer
"""
if denominator < 0:
raise ValueError("The Denominator Cannot be less than 0")
elif isinstance(denominator, float):
raise ValueError("The Denominator must be an integer")
return [
f"{numerator}/{denominator}"
for numerator in range(1, denominator)
if gcd(numerator, denominator) == 1
]
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: maths/numerical_analysis/runge_kutta.py
================================================
import numpy as np
def runge_kutta(f, y0, x0, h, x_end):
"""
Calculate the numeric solution at each step to the ODE f(x, y) using RK4
https://en.wikipedia.org/wiki/Runge-Kutta_methods
Arguments:
f -- The ode as a function of x and y
y0 -- the initial value for y
x0 -- the initial value for x
h -- the stepsize
x_end -- the end value for x
>>> # the exact solution is math.exp(x)
>>> def f(x, y):
... return y
>>> y0 = 1
>>> y = runge_kutta(f, y0, 0.0, 0.01, 5)
>>> float(y[-1])
148.41315904125113
"""
n = int(np.ceil((x_end - x0) / h))
y = np.zeros((n + 1,))
y[0] = y0
x = x0
for k in range(n):
k1 = f(x, y[k])
k2 = f(x + 0.5 * h, y[k] + 0.5 * h * k1)
k3 = f(x + 0.5 * h, y[k] + 0.5 * h * k2)
k4 = f(x + h, y[k] + h * k3)
y[k + 1] = y[k] + (1 / 6) * h * (k1 + 2 * k2 + 2 * k3 + k4)
x += h
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/runge_kutta_fehlberg_45.py
================================================
"""
Use the Runge-Kutta-Fehlberg method to solve Ordinary Differential Equations.
"""
from collections.abc import Callable
import numpy as np
def runge_kutta_fehlberg_45(
func: Callable,
x_initial: float,
y_initial: float,
step_size: float,
x_final: float,
) -> np.ndarray:
"""
Solve an Ordinary Differential Equations using Runge-Kutta-Fehlberg Method (rkf45)
of order 5.
https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method
args:
func: An ordinary differential equation (ODE) as function of x and y.
x_initial: The initial value of x.
y_initial: The initial value of y.
step_size: The increment value of x.
x_final: The final value of x.
Returns:
Solution of y at each nodal point
# exact value of y[1] is tan(0.2) = 0.2027100937470787
>>> def f(x, y):
... return 1 + y**2
>>> y = runge_kutta_fehlberg_45(f, 0, 0, 0.2, 1)
>>> float(y[1])
0.2027100937470787
>>> def f(x,y):
... return x
>>> y = runge_kutta_fehlberg_45(f, -1, 0, 0.2, 0)
>>> float(y[1])
-0.18000000000000002
>>> y = runge_kutta_fehlberg_45(5, 0, 0, 0.1, 1)
Traceback (most recent call last):
...
TypeError: 'int' object is not callable
>>> def f(x, y):
... return x + y
>>> y = runge_kutta_fehlberg_45(f, 0, 0, 0.2, -1)
Traceback (most recent call last):
...
ValueError: The final value of x must be greater than initial value of x.
>>> def f(x, y):
... return x
>>> y = runge_kutta_fehlberg_45(f, -1, 0, -0.2, 0)
Traceback (most recent call last):
...
ValueError: Step size must be positive.
"""
if x_initial >= x_final:
raise ValueError(
"The final value of x must be greater than initial value of x."
)
if step_size <= 0:
raise ValueError("Step size must be positive.")
n = int((x_final - x_initial) / step_size)
y = np.zeros(
(n + 1),
)
x = np.zeros(n + 1)
y[0] = y_initial
x[0] = x_initial
for i in range(n):
k1 = step_size * func(x[i], y[i])
k2 = step_size * func(x[i] + step_size / 4, y[i] + k1 / 4)
k3 = step_size * func(
x[i] + (3 / 8) * step_size, y[i] + (3 / 32) * k1 + (9 / 32) * k2
)
k4 = step_size * func(
x[i] + (12 / 13) * step_size,
y[i] + (1932 / 2197) * k1 - (7200 / 2197) * k2 + (7296 / 2197) * k3,
)
k5 = step_size * func(
x[i] + step_size,
y[i] + (439 / 216) * k1 - 8 * k2 + (3680 / 513) * k3 - (845 / 4104) * k4,
)
k6 = step_size * func(
x[i] + step_size / 2,
y[i]
- (8 / 27) * k1
+ 2 * k2
- (3544 / 2565) * k3
+ (1859 / 4104) * k4
- (11 / 40) * k5,
)
y[i + 1] = (
y[i]
+ (16 / 135) * k1
+ (6656 / 12825) * k3
+ (28561 / 56430) * k4
- (9 / 50) * k5
+ (2 / 55) * k6
)
x[i + 1] = step_size + x[i]
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/runge_kutta_gills.py
================================================
"""
Use the Runge-Kutta-Gill's method of order 4 to solve Ordinary Differential Equations.
https://www.geeksforgeeks.org/gills-4th-order-method-to-solve-differential-equations/
Author : Ravi Kumar
"""
from collections.abc import Callable
from math import sqrt
import numpy as np
def runge_kutta_gills(
func: Callable[[float, float], float],
x_initial: float,
y_initial: float,
step_size: float,
x_final: float,
) -> np.ndarray:
"""
Solve an Ordinary Differential Equations using Runge-Kutta-Gills Method of order 4.
args:
func: An ordinary differential equation (ODE) as function of x and y.
x_initial: The initial value of x.
y_initial: The initial value of y.
step_size: The increment value of x.
x_final: The final value of x.
Returns:
Solution of y at each nodal point
>>> def f(x, y):
... return (x-y)/2
>>> y = runge_kutta_gills(f, 0, 3, 0.2, 5)
>>> float(y[-1])
3.4104259225717537
>>> def f(x,y):
... return x
>>> y = runge_kutta_gills(f, -1, 0, 0.2, 0)
>>> y
array([ 0. , -0.18, -0.32, -0.42, -0.48, -0.5 ])
>>> def f(x, y):
... return x + y
>>> y = runge_kutta_gills(f, 0, 0, 0.2, -1)
Traceback (most recent call last):
...
ValueError: The final value of x must be greater than initial value of x.
>>> def f(x, y):
... return x
>>> y = runge_kutta_gills(f, -1, 0, -0.2, 0)
Traceback (most recent call last):
...
ValueError: Step size must be positive.
"""
if x_initial >= x_final:
raise ValueError(
"The final value of x must be greater than initial value of x."
)
if step_size <= 0:
raise ValueError("Step size must be positive.")
n = int((x_final - x_initial) / step_size)
y = np.zeros(n + 1)
y[0] = y_initial
for i in range(n):
k1 = step_size * func(x_initial, y[i])
k2 = step_size * func(x_initial + step_size / 2, y[i] + k1 / 2)
k3 = step_size * func(
x_initial + step_size / 2,
y[i] + (-0.5 + 1 / sqrt(2)) * k1 + (1 - 1 / sqrt(2)) * k2,
)
k4 = step_size * func(
x_initial + step_size, y[i] - (1 / sqrt(2)) * k2 + (1 + 1 / sqrt(2)) * k3
)
y[i + 1] = y[i] + (k1 + (2 - sqrt(2)) * k2 + (2 + sqrt(2)) * k3 + k4) / 6
x_initial += step_size
return y
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/numerical_analysis/secant_method.py
================================================
"""
Implementing Secant method in Python
Author: dimgrichr
"""
from math import exp
def f(x: float) -> float:
"""
>>> f(5)
39.98652410600183
"""
return 8 * x - 2 * exp(-x)
def secant_method(lower_bound: float, upper_bound: float, repeats: int) -> float:
"""
>>> secant_method(1, 3, 2)
0.2139409276214589
"""
x0 = lower_bound
x1 = upper_bound
for _ in range(repeats):
x0, x1 = x1, x1 - (f(x1) * (x1 - x0)) / (f(x1) - f(x0))
return x1
if __name__ == "__main__":
print(f"Example: {secant_method(1, 3, 2)}")
================================================
FILE: maths/numerical_analysis/simpson_rule.py
================================================
"""
Numerical integration or quadrature for a smooth function f with known values at x_i
This method is the classical approach of summing 'Equally Spaced Abscissas'
method 2:
"Simpson Rule"
"""
def method_2(boundary: list[int], steps: int) -> float:
# "Simpson Rule"
# int(f) = delta_x/2 * (b-a)/3*(f1 + 4f2 + 2f_3 + ... + fn)
"""
Calculate the definite integral of a function using Simpson's Rule.
:param boundary: A list containing the lower and upper bounds of integration.
:param steps: The number of steps or resolution for the integration.
:return: The approximate integral value.
>>> round(method_2([0, 2, 4], 10), 10)
2.6666666667
>>> round(method_2([2, 0], 10), 10)
-0.2666666667
>>> round(method_2([-2, -1], 10), 10)
2.172
>>> round(method_2([0, 1], 10), 10)
0.3333333333
>>> round(method_2([0, 2], 10), 10)
2.6666666667
>>> round(method_2([0, 2], 100), 10)
2.5621226667
>>> round(method_2([0, 1], 1000), 10)
0.3320026653
>>> round(method_2([0, 2], 0), 10)
Traceback (most recent call last):
...
ZeroDivisionError: Number of steps must be greater than zero
>>> round(method_2([0, 2], -10), 10)
Traceback (most recent call last):
...
ZeroDivisionError: Number of steps must be greater than zero
"""
if steps <= 0:
raise ZeroDivisionError("Number of steps must be greater than zero")
h = (boundary[1] - boundary[0]) / steps
a = boundary[0]
b = boundary[1]
x_i = make_points(a, b, h)
y = 0.0
y += (h / 3.0) * f(a)
cnt = 2
for i in x_i:
y += (h / 3) * (4 - 2 * (cnt % 2)) * f(i)
cnt += 1
y += (h / 3.0) * f(b)
return y
def make_points(a, b, h):
x = a + h
while x < (b - h):
yield x
x = x + h
def f(x): # enter your function here
y = (x - 0) * (x - 0)
return y
def main():
a = 0.0 # Lower bound of integration
b = 1.0 # Upper bound of integration
steps = 10.0 # number of steps or resolution
boundary = [a, b] # boundary of integration
y = method_2(boundary, steps)
print(f"y = {y}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: maths/numerical_analysis/square_root.py
================================================
import math
def fx(x: float, a: float) -> float:
return math.pow(x, 2) - a
def fx_derivative(x: float) -> float:
return 2 * x
def get_initial_point(a: float) -> float:
start = 2.0
while start <= a:
start = math.pow(start, 2)
return start
def square_root_iterative(
a: float, max_iter: int = 9999, tolerance: float = 1e-14
) -> float:
"""
Square root approximated using Newton's method.
https://en.wikipedia.org/wiki/Newton%27s_method
>>> all(abs(square_root_iterative(i) - math.sqrt(i)) <= 1e-14 for i in range(500))
True
>>> square_root_iterative(-1)
Traceback (most recent call last):
...
ValueError: math domain error
>>> square_root_iterative(4)
2.0
>>> square_root_iterative(3.2)
1.788854381999832
>>> square_root_iterative(140)
11.832159566199232
"""
if a < 0:
raise ValueError("math domain error")
value = get_initial_point(a)
for _ in range(max_iter):
prev_value = value
value = value - fx(value, a) / fx_derivative(value)
if abs(prev_value - value) < tolerance:
return value
return value
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: maths/numerical_analysis/weierstrass_method.py
================================================
from collections.abc import Callable
import numpy as np
def weierstrass_method(
polynomial: Callable[[np.ndarray], np.ndarray],
degree: int,
roots: np.ndarray | None = None,
max_iter: int = 100,
) -> np.ndarray:
"""
Approximates all complex roots of a polynomial using the
Weierstrass (Durand-Kerner) method.
Args:
polynomial: A function that takes a NumPy array of complex numbers and returns
the polynomial values at those points.
degree: Degree of the polynomial (number of roots to find). Must be ≥ 1.
roots: Optional initial guess as a NumPy array of complex numbers.
Must have length equal to 'degree'.
If None, perturbed complex roots of unity are used.
max_iter: Number of iterations to perform (default: 100).
Returns:
np.ndarray: Array of approximated complex roots.
Raises:
ValueError: If degree < 1, or if initial roots length doesn't match the degree.
Note:
- Root updates are clipped to prevent numerical overflow.
Example:
>>> import numpy as np
>>> def check(poly, degree, expected):
... roots = weierstrass_method(poly, degree)
... return np.allclose(np.sort(roots), np.sort(expected))
>>> check(
... lambda x: x**2 - 1,
... 2,
... np.array([-1, 1]))
True
>>> check(
... lambda x: x**3 - 4.5*x**2 + 5.75*x - 1.875,
... 3,
... np.array([1.5, 0.5, 2.5])
... )
True
See Also:
https://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method
"""
if degree < 1:
raise ValueError("Degree of the polynomial must be at least 1.")
if roots is None:
# Use perturbed complex roots of unity as initial guesses
rng = np.random.default_rng()
roots = np.array(
[
np.exp(2j * np.pi * i / degree) * (1 + 1e-3 * rng.random())
for i in range(degree)
],
dtype=np.complex128,
)
else:
roots = np.asarray(roots, dtype=np.complex128)
if roots.shape[0] != degree:
raise ValueError(
"Length of initial roots must match the degree of the polynomial."
)
for _ in range(max_iter):
# Construct the product denominator for each root
denominator = np.array([root - roots for root in roots], dtype=np.complex128)
np.fill_diagonal(denominator, 1.0) # Avoid zero in diagonal
denominator = np.prod(denominator, axis=1)
# Evaluate polynomial at each root
numerator = polynomial(roots).astype(np.complex128)
# Compute update and clip to prevent overflow
delta = numerator / denominator
delta = np.clip(delta, -1e10, 1e10)
roots -= delta
return roots
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/odd_sieve.py
================================================
from itertools import compress, repeat
from math import ceil, sqrt
def odd_sieve(num: int) -> list[int]:
"""
Returns the prime numbers < `num`. The prime numbers are calculated using an
odd sieve implementation of the Sieve of Eratosthenes algorithm
(see for reference https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes).
>>> odd_sieve(2)
[]
>>> odd_sieve(3)
[2]
>>> odd_sieve(10)
[2, 3, 5, 7]
>>> odd_sieve(20)
[2, 3, 5, 7, 11, 13, 17, 19]
"""
if num <= 2:
return []
if num == 3:
return [2]
# Odd sieve for numbers in range [3, num - 1]
sieve = bytearray(b"\x01") * ((num >> 1) - 1)
for i in range(3, int(sqrt(num)) + 1, 2):
if sieve[(i >> 1) - 1]:
i_squared = i**2
sieve[(i_squared >> 1) - 1 :: i] = repeat(
0, ceil((num - i_squared) / (i << 1))
)
return [2, *list(compress(range(3, num, 2), sieve))]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/perfect_cube.py
================================================
def perfect_cube(n: int) -> bool:
"""
Check if a number is a perfect cube or not.
>>> perfect_cube(27)
True
>>> perfect_cube(4)
False
"""
val = n ** (1 / 3)
return (val * val * val) == n
def perfect_cube_binary_search(n: int) -> bool:
"""
Check if a number is a perfect cube or not using binary search.
Time complexity : O(Log(n))
Space complexity: O(1)
>>> perfect_cube_binary_search(27)
True
>>> perfect_cube_binary_search(64)
True
>>> perfect_cube_binary_search(4)
False
>>> perfect_cube_binary_search("a")
Traceback (most recent call last):
...
TypeError: perfect_cube_binary_search() only accepts integers
>>> perfect_cube_binary_search(0.1)
Traceback (most recent call last):
...
TypeError: perfect_cube_binary_search() only accepts integers
"""
if not isinstance(n, int):
raise TypeError("perfect_cube_binary_search() only accepts integers")
if n < 0:
n = -n
left = 0
right = n
while left <= right:
mid = left + (right - left) // 2
if mid * mid * mid == n:
return True
elif mid * mid * mid < n:
left = mid + 1
else:
right = mid - 1
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/perfect_number.py
================================================
"""
== Perfect Number ==
In number theory, a perfect number is a positive integer that is equal to the sum of
its positive divisors, excluding the number itself.
For example: 6 ==> divisors[1, 2, 3, 6]
Excluding 6, the sum(divisors) is 1 + 2 + 3 = 6
So, 6 is a Perfect Number
Other examples of Perfect Numbers: 28, 486, ...
https://en.wikipedia.org/wiki/Perfect_number
"""
def perfect(number: int) -> bool:
"""
Check if a number is a perfect number.
A perfect number is a positive integer that is equal to the sum of its proper
divisors (excluding itself).
Args:
number: The number to be checked.
Returns:
True if the number is a perfect number otherwise, False.
Start from 1 because dividing by 0 will raise ZeroDivisionError.
A number at most can be divisible by the half of the number except the number
itself. For example, 6 is at most can be divisible by 3 except by 6 itself.
Examples:
>>> perfect(27)
False
>>> perfect(28)
True
>>> perfect(29)
False
>>> perfect(6)
True
>>> perfect(12)
False
>>> perfect(496)
True
>>> perfect(8128)
True
>>> perfect(0)
False
>>> perfect(-1)
False
>>> perfect(33550336) # Large perfect number
True
>>> perfect(33550337) # Just above a large perfect number
False
>>> perfect(1) # Edge case: 1 is not a perfect number
False
>>> perfect("123") # String representation of a number
Traceback (most recent call last):
...
ValueError: number must be an integer
>>> perfect(12.34)
Traceback (most recent call last):
...
ValueError: number must be an integer
>>> perfect("Hello")
Traceback (most recent call last):
...
ValueError: number must be an integer
"""
if not isinstance(number, int):
raise ValueError("number must be an integer")
if number <= 0:
return False
return sum(i for i in range(1, number // 2 + 1) if number % i == 0) == number
if __name__ == "__main__":
from doctest import testmod
testmod()
print("Program to check whether a number is a Perfect number or not...")
try:
number = int(input("Enter a positive integer: ").strip())
except ValueError:
msg = "number must be an integer"
raise ValueError(msg)
print(f"{number} is {'' if perfect(number) else 'not '}a Perfect Number.")
================================================
FILE: maths/perfect_square.py
================================================
import math
def perfect_square(num: int) -> bool:
"""
Check if a number is perfect square number or not
:param num: the number to be checked
:return: True if number is square number, otherwise False
>>> perfect_square(9)
True
>>> perfect_square(16)
True
>>> perfect_square(1)
True
>>> perfect_square(0)
True
>>> perfect_square(10)
False
"""
return math.sqrt(num) * math.sqrt(num) == num
def perfect_square_binary_search(n: int) -> bool:
"""
Check if a number is perfect square using binary search.
Time complexity : O(Log(n))
Space complexity: O(1)
>>> perfect_square_binary_search(9)
True
>>> perfect_square_binary_search(16)
True
>>> perfect_square_binary_search(1)
True
>>> perfect_square_binary_search(0)
True
>>> perfect_square_binary_search(10)
False
>>> perfect_square_binary_search(-1)
False
>>> perfect_square_binary_search(1.1)
False
>>> perfect_square_binary_search("a")
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'str'
>>> perfect_square_binary_search(None)
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'NoneType'
>>> perfect_square_binary_search([])
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'list'
"""
left = 0
right = n
while left <= right:
mid = (left + right) // 2
if mid**2 == n:
return True
elif mid**2 > n:
right = mid - 1
else:
left = mid + 1
return False
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/persistence.py
================================================
def multiplicative_persistence(num: int) -> int:
"""
Return the persistence of a given number.
https://en.wikipedia.org/wiki/Persistence_of_a_number
>>> multiplicative_persistence(217)
2
>>> multiplicative_persistence(-1)
Traceback (most recent call last):
...
ValueError: multiplicative_persistence() does not accept negative values
>>> multiplicative_persistence("long number")
Traceback (most recent call last):
...
ValueError: multiplicative_persistence() only accepts integral values
"""
if not isinstance(num, int):
raise ValueError("multiplicative_persistence() only accepts integral values")
if num < 0:
raise ValueError("multiplicative_persistence() does not accept negative values")
steps = 0
num_string = str(num)
while len(num_string) != 1:
numbers = [int(i) for i in num_string]
total = 1
for i in range(len(numbers)):
total *= numbers[i]
num_string = str(total)
steps += 1
return steps
def additive_persistence(num: int) -> int:
"""
Return the persistence of a given number.
https://en.wikipedia.org/wiki/Persistence_of_a_number
>>> additive_persistence(199)
3
>>> additive_persistence(-1)
Traceback (most recent call last):
...
ValueError: additive_persistence() does not accept negative values
>>> additive_persistence("long number")
Traceback (most recent call last):
...
ValueError: additive_persistence() only accepts integral values
"""
if not isinstance(num, int):
raise ValueError("additive_persistence() only accepts integral values")
if num < 0:
raise ValueError("additive_persistence() does not accept negative values")
steps = 0
num_string = str(num)
while len(num_string) != 1:
numbers = [int(i) for i in num_string]
total = 0
for i in range(len(numbers)):
total += numbers[i]
num_string = str(total)
steps += 1
return steps
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/pi_generator.py
================================================
def calculate_pi(limit: int) -> str:
"""
https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80
Leibniz Formula for Pi
The Leibniz formula is the special case arctan(1) = pi / 4.
Leibniz's formula converges extremely slowly: it exhibits sublinear convergence.
Convergence (https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80#Convergence)
We cannot try to prove against an interrupted, uncompleted generation.
https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80#Unusual_behaviour
The errors can in fact be predicted, but those calculations also approach infinity
for accuracy.
Our output will be a string so that we can definitely store all digits.
>>> import math
>>> float(calculate_pi(15)) == math.pi
True
Since we cannot predict errors or interrupt any infinite alternating series
generation since they approach infinity, or interrupt any alternating series, we'll
need math.isclose()
>>> math.isclose(float(calculate_pi(50)), math.pi)
True
>>> math.isclose(float(calculate_pi(100)), math.pi)
True
Since math.pi contains only 16 digits, here are some tests with known values:
>>> calculate_pi(50)
'3.14159265358979323846264338327950288419716939937510'
>>> calculate_pi(80)
'3.14159265358979323846264338327950288419716939937510582097494459230781640628620899'
"""
# Variables used for the iteration process
q = 1
r = 0
t = 1
k = 1
n = 3
m = 3
decimal = limit
counter = 0
result = ""
# We can't compare against anything if we make a generator,
# so we'll stick with plain return logic
while counter != decimal + 1:
if 4 * q + r - t < n * t:
result += str(n)
if counter == 0:
result += "."
if decimal == counter:
break
counter += 1
nr = 10 * (r - n * t)
n = ((10 * (3 * q + r)) // t) - 10 * n
q *= 10
r = nr
else:
nr = (2 * q + r) * m
nn = (q * (7 * k) + 2 + (r * m)) // (t * m)
q *= k
t *= m
m += 2
k += 1
n = nn
r = nr
return result
def main() -> None:
print(f"{calculate_pi(50) = }")
import doctest
doctest.testmod()
if __name__ == "__main__":
main()
================================================
FILE: maths/pi_monte_carlo_estimation.py
================================================
import random
class Point:
def __init__(self, x: float, y: float) -> None:
self.x = x
self.y = y
def is_in_unit_circle(self) -> bool:
"""
True, if the point lies in the unit circle
False, otherwise
"""
return (self.x**2 + self.y**2) <= 1
@classmethod
def random_unit_square(cls):
"""
Generates a point randomly drawn from the unit square [0, 1) x [0, 1).
"""
return cls(x=random.random(), y=random.random())
def estimate_pi(number_of_simulations: int) -> float:
"""
Generates an estimate of the mathematical constant PI.
See https://en.wikipedia.org/wiki/Monte_Carlo_method#Overview
The estimate is generated by Monte Carlo simulations. Let U be uniformly drawn from
the unit square [0, 1) x [0, 1). The probability that U lies in the unit circle is:
P[U in unit circle] = 1/4 PI
and therefore
PI = 4 * P[U in unit circle]
We can get an estimate of the probability P[U in unit circle].
See https://en.wikipedia.org/wiki/Empirical_probability by:
1. Draw a point uniformly from the unit square.
2. Repeat the first step n times and count the number of points in the unit
circle, which is called m.
3. An estimate of P[U in unit circle] is m/n
"""
if number_of_simulations < 1:
raise ValueError("At least one simulation is necessary to estimate PI.")
number_in_unit_circle = 0
for _ in range(number_of_simulations):
random_point = Point.random_unit_square()
if random_point.is_in_unit_circle():
number_in_unit_circle += 1
return 4 * number_in_unit_circle / number_of_simulations
if __name__ == "__main__":
# import doctest
# doctest.testmod()
from math import pi
prompt = "Please enter the desired number of Monte Carlo simulations: "
my_pi = estimate_pi(int(input(prompt).strip()))
print(f"An estimate of PI is {my_pi} with an error of {abs(my_pi - pi)}")
================================================
FILE: maths/points_are_collinear_3d.py
================================================
"""
Check if three points are collinear in 3D.
In short, the idea is that we are able to create a triangle using three points,
and the area of that triangle can determine if the three points are collinear or not.
First, we create two vectors with the same initial point from the three points,
then we will calculate the cross-product of them.
The length of the cross vector is numerically equal to the area of a parallelogram.
Finally, the area of the triangle is equal to half of the area of the parallelogram.
Since we are only differentiating between zero and anything else,
we can get rid of the square root when calculating the length of the vector,
and also the division by two at the end.
From a second perspective, if the two vectors are parallel and overlapping,
we can't get a nonzero perpendicular vector,
since there will be an infinite number of orthogonal vectors.
To simplify the solution we will not calculate the length,
but we will decide directly from the vector whether it is equal to (0, 0, 0) or not.
Read More:
https://math.stackexchange.com/a/1951650
"""
Vector3d = tuple[float, float, float]
Point3d = tuple[float, float, float]
def create_vector(end_point1: Point3d, end_point2: Point3d) -> Vector3d:
"""
Pass two points to get the vector from them in the form (x, y, z).
>>> create_vector((0, 0, 0), (1, 1, 1))
(1, 1, 1)
>>> create_vector((45, 70, 24), (47, 32, 1))
(2, -38, -23)
>>> create_vector((-14, -1, -8), (-7, 6, 4))
(7, 7, 12)
"""
x = end_point2[0] - end_point1[0]
y = end_point2[1] - end_point1[1]
z = end_point2[2] - end_point1[2]
return (x, y, z)
def get_3d_vectors_cross(ab: Vector3d, ac: Vector3d) -> Vector3d:
"""
Get the cross of the two vectors AB and AC.
I used determinant of 2x2 to get the determinant of the 3x3 matrix in the process.
Read More:
https://en.wikipedia.org/wiki/Cross_product
https://en.wikipedia.org/wiki/Determinant
>>> get_3d_vectors_cross((3, 4, 7), (4, 9, 2))
(-55, 22, 11)
>>> get_3d_vectors_cross((1, 1, 1), (1, 1, 1))
(0, 0, 0)
>>> get_3d_vectors_cross((-4, 3, 0), (3, -9, -12))
(-36, -48, 27)
>>> get_3d_vectors_cross((17.67, 4.7, 6.78), (-9.5, 4.78, -19.33))
(-123.2594, 277.15110000000004, 129.11260000000001)
"""
x = ab[1] * ac[2] - ab[2] * ac[1] # *i
y = (ab[0] * ac[2] - ab[2] * ac[0]) * -1 # *j
z = ab[0] * ac[1] - ab[1] * ac[0] # *k
return (x, y, z)
def is_zero_vector(vector: Vector3d, accuracy: int) -> bool:
"""
Check if vector is equal to (0, 0, 0) or not.
Since the algorithm is very accurate, we will never get a zero vector,
so we need to round the vector axis,
because we want a result that is either True or False.
In other applications, we can return a float that represents the collinearity ratio.
>>> is_zero_vector((0, 0, 0), accuracy=10)
True
>>> is_zero_vector((15, 74, 32), accuracy=10)
False
>>> is_zero_vector((-15, -74, -32), accuracy=10)
False
"""
return tuple(round(x, accuracy) for x in vector) == (0, 0, 0)
def are_collinear(a: Point3d, b: Point3d, c: Point3d, accuracy: int = 10) -> bool:
"""
Check if three points are collinear or not.
1- Create two vectors AB and AC.
2- Get the cross vector of the two vectors.
3- Calculate the length of the cross vector.
4- If the length is zero then the points are collinear, else they are not.
The use of the accuracy parameter is explained in is_zero_vector docstring.
>>> are_collinear((4.802293498137402, 3.536233125455244, 0),
... (-2.186788107953106, -9.24561398001649, 7.141509524846482),
... (1.530169574640268, -2.447927606600034, 3.343487096469054))
True
>>> are_collinear((-6, -2, 6),
... (6.200213806439997, -4.930157614926678, -4.482371908289856),
... (-4.085171149525941, -2.459889509029438, 4.354787180795383))
True
>>> are_collinear((2.399001826862445, -2.452009976680793, 4.464656666157666),
... (-3.682816335934376, 5.753788986533145, 9.490993909044244),
... (1.962903518985307, 3.741415730125627, 7))
False
>>> are_collinear((1.875375340689544, -7.268426006071538, 7.358196269835993),
... (-3.546599383667157, -4.630005261513976, 3.208784032924246),
... (-2.564606140206386, 3.937845170672183, 7))
False
"""
ab = create_vector(a, b)
ac = create_vector(a, c)
return is_zero_vector(get_3d_vectors_cross(ab, ac), accuracy)
================================================
FILE: maths/pollard_rho.py
================================================
from __future__ import annotations
from math import gcd
def pollard_rho(
num: int,
seed: int = 2,
step: int = 1,
attempts: int = 3,
) -> int | None:
"""
Use Pollard's Rho algorithm to return a nontrivial factor of ``num``.
The returned factor may be composite and require further factorization.
If the algorithm will return None if it fails to find a factor within
the specified number of attempts or within the specified number of steps.
If ``num`` is prime, this algorithm is guaranteed to return None.
https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm
>>> pollard_rho(18446744073709551617)
274177
>>> pollard_rho(97546105601219326301)
9876543191
>>> pollard_rho(100)
2
>>> pollard_rho(17)
>>> pollard_rho(17**3)
17
>>> pollard_rho(17**3, attempts=1)
>>> pollard_rho(3*5*7)
21
>>> pollard_rho(1)
Traceback (most recent call last):
...
ValueError: The input value cannot be less than 2
"""
# A value less than 2 can cause an infinite loop in the algorithm.
if num < 2:
raise ValueError("The input value cannot be less than 2")
# Because of the relationship between ``f(f(x))`` and ``f(x)``, this
# algorithm struggles to find factors that are divisible by two.
# As a workaround, we specifically check for two and even inputs.
# See: https://math.stackexchange.com/a/2856214/165820
if num > 2 and num % 2 == 0:
return 2
# Pollard's Rho algorithm requires a function that returns pseudorandom
# values between 0 <= X < ``num``. It doesn't need to be random in the
# sense that the output value is cryptographically secure or difficult
# to calculate, it only needs to be random in the sense that all output
# values should be equally likely to appear.
# For this reason, Pollard suggested using ``f(x) = (x**2 - 1) % num``
# However, the success of Pollard's algorithm isn't guaranteed and is
# determined in part by the initial seed and the chosen random function.
# To make retries easier, we will instead use ``f(x) = (x**2 + C) % num``
# where ``C`` is a value that we can modify between each attempt.
def rand_fn(value: int, step: int, modulus: int) -> int:
"""
Returns a pseudorandom value modulo ``modulus`` based on the
input ``value`` and attempt-specific ``step`` size.
>>> rand_fn(0, 0, 0)
Traceback (most recent call last):
...
ZeroDivisionError: integer division or modulo by zero
>>> rand_fn(1, 2, 3)
0
>>> rand_fn(0, 10, 7)
3
>>> rand_fn(1234, 1, 17)
16
"""
return (pow(value, 2) + step) % modulus
for _ in range(attempts):
# These track the position within the cycle detection logic.
tortoise = seed
hare = seed
while True:
# At each iteration, the tortoise moves one step and the hare moves two.
tortoise = rand_fn(tortoise, step, num)
hare = rand_fn(hare, step, num)
hare = rand_fn(hare, step, num)
# At some point both the tortoise and the hare will enter a cycle whose
# length ``p`` is a divisor of ``num``. Once in that cycle, at some point
# the tortoise and hare will end up on the same value modulo ``p``.
# We can detect when this happens because the position difference between
# the tortoise and the hare will share a common divisor with ``num``.
divisor = gcd(hare - tortoise, num)
if divisor == 1:
# No common divisor yet, just keep searching.
continue
# We found a common divisor!
elif divisor == num:
# Unfortunately, the divisor is ``num`` itself and is useless.
break
else:
# The divisor is a nontrivial factor of ``num``!
return divisor
# If we made it here, then this attempt failed.
# We need to pick a new starting seed for the tortoise and hare
# in addition to a new step value for the random function.
# To keep this example implementation deterministic, the
# new values will be generated based on currently available
# values instead of using something like ``random.randint``.
# We can use the hare's position as the new seed.
# This is actually what Richard Brent's the "optimized" variant does.
seed = hare
# The new step value for the random function can just be incremented.
# At first the results will be similar to what the old function would
# have produced, but the value will quickly diverge after a bit.
step += 1
# We haven't found a divisor within the requested number of attempts.
# We were unlucky or ``num`` itself is actually prime.
return None
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"num",
type=int,
help="The value to find a divisor of",
)
parser.add_argument(
"--attempts",
type=int,
default=3,
help="The number of attempts before giving up",
)
args = parser.parse_args()
divisor = pollard_rho(args.num, attempts=args.attempts)
if divisor is None:
print(f"{args.num} is probably prime")
else:
quotient = args.num // divisor
print(f"{args.num} = {divisor} * {quotient}")
================================================
FILE: maths/polynomial_evaluation.py
================================================
from collections.abc import Sequence
def evaluate_poly(poly: Sequence[float], x: float) -> float:
"""Evaluate a polynomial f(x) at specified point x and return the value.
Arguments:
poly -- the coefficients of a polynomial as an iterable in order of
ascending degree
x -- the point at which to evaluate the polynomial
>>> evaluate_poly((0.0, 0.0, 5.0, 9.3, 7.0), 10.0)
79800.0
"""
return sum(c * (x**i) for i, c in enumerate(poly))
def horner(poly: Sequence[float], x: float) -> float:
"""Evaluate a polynomial at specified point using Horner's method.
In terms of computational complexity, Horner's method is an efficient method
of evaluating a polynomial. It avoids the use of expensive exponentiation,
and instead uses only multiplication and addition to evaluate the polynomial
in O(n), where n is the degree of the polynomial.
https://en.wikipedia.org/wiki/Horner's_method
Arguments:
poly -- the coefficients of a polynomial as an iterable in order of
ascending degree
x -- the point at which to evaluate the polynomial
>>> horner((0.0, 0.0, 5.0, 9.3, 7.0), 10.0)
79800.0
"""
result = 0.0
for coeff in reversed(poly):
result = result * x + coeff
return result
if __name__ == "__main__":
"""
Example:
>>> poly = (0.0, 0.0, 5.0, 9.3, 7.0) # f(x) = 7.0x^4 + 9.3x^3 + 5.0x^2
>>> x = -13.0
>>> # f(-13) = 7.0(-13)^4 + 9.3(-13)^3 + 5.0(-13)^2 = 180339.9
>>> evaluate_poly(poly, x)
180339.9
"""
poly = (0.0, 0.0, 5.0, 9.3, 7.0)
x = 10.0
print(evaluate_poly(poly, x))
print(horner(poly, x))
================================================
FILE: maths/polynomials/__init__.py
================================================
================================================
FILE: maths/polynomials/single_indeterminate_operations.py
================================================
"""
This module implements a single indeterminate polynomials class
with some basic operations
Reference: https://en.wikipedia.org/wiki/Polynomial
"""
from __future__ import annotations
from collections.abc import MutableSequence
class Polynomial:
def __init__(self, degree: int, coefficients: MutableSequence[float]) -> None:
"""
The coefficients should be in order of degree, from smallest to largest.
>>> p = Polynomial(2, [1, 2, 3])
>>> p = Polynomial(2, [1, 2, 3, 4])
Traceback (most recent call last):
...
ValueError: The number of coefficients should be equal to the degree + 1.
"""
if len(coefficients) != degree + 1:
raise ValueError(
"The number of coefficients should be equal to the degree + 1."
)
self.coefficients: list[float] = list(coefficients)
self.degree = degree
def __add__(self, polynomial_2: Polynomial) -> Polynomial:
"""
Polynomial addition
>>> p = Polynomial(2, [1, 2, 3])
>>> q = Polynomial(2, [1, 2, 3])
>>> p + q
6x^2 + 4x + 2
"""
if self.degree > polynomial_2.degree:
coefficients = self.coefficients[:]
for i in range(polynomial_2.degree + 1):
coefficients[i] += polynomial_2.coefficients[i]
return Polynomial(self.degree, coefficients)
else:
coefficients = polynomial_2.coefficients[:]
for i in range(self.degree + 1):
coefficients[i] += self.coefficients[i]
return Polynomial(polynomial_2.degree, coefficients)
def __sub__(self, polynomial_2: Polynomial) -> Polynomial:
"""
Polynomial subtraction
>>> p = Polynomial(2, [1, 2, 4])
>>> q = Polynomial(2, [1, 2, 3])
>>> p - q
1x^2
"""
return self + polynomial_2 * Polynomial(0, [-1])
def __neg__(self) -> Polynomial:
"""
Polynomial negation
>>> p = Polynomial(2, [1, 2, 3])
>>> -p
- 3x^2 - 2x - 1
"""
return Polynomial(self.degree, [-c for c in self.coefficients])
def __mul__(self, polynomial_2: Polynomial) -> Polynomial:
"""
Polynomial multiplication
>>> p = Polynomial(2, [1, 2, 3])
>>> q = Polynomial(2, [1, 2, 3])
>>> p * q
9x^4 + 12x^3 + 10x^2 + 4x + 1
"""
coefficients: list[float] = [0] * (self.degree + polynomial_2.degree + 1)
for i in range(self.degree + 1):
for j in range(polynomial_2.degree + 1):
coefficients[i + j] += (
self.coefficients[i] * polynomial_2.coefficients[j]
)
return Polynomial(self.degree + polynomial_2.degree, coefficients)
def evaluate(self, substitution: float) -> float:
"""
Evaluates the polynomial at x.
>>> p = Polynomial(2, [1, 2, 3])
>>> p.evaluate(2)
17
"""
result: int | float = 0
for i in range(self.degree + 1):
result += self.coefficients[i] * (substitution**i)
return result
def __str__(self) -> str:
"""
>>> p = Polynomial(2, [1, 2, 3])
>>> print(p)
3x^2 + 2x + 1
"""
polynomial = ""
for i in range(self.degree, -1, -1):
if self.coefficients[i] == 0:
continue
elif self.coefficients[i] > 0:
if polynomial:
polynomial += " + "
else:
polynomial += " - "
if i == 0:
polynomial += str(abs(self.coefficients[i]))
elif i == 1:
polynomial += str(abs(self.coefficients[i])) + "x"
else:
polynomial += str(abs(self.coefficients[i])) + "x^" + str(i)
return polynomial
def __repr__(self) -> str:
"""
>>> p = Polynomial(2, [1, 2, 3])
>>> p
3x^2 + 2x + 1
"""
return self.__str__()
def derivative(self) -> Polynomial:
"""
Returns the derivative of the polynomial.
>>> p = Polynomial(2, [1, 2, 3])
>>> p.derivative()
6x + 2
"""
coefficients: list[float] = [0] * self.degree
for i in range(self.degree):
coefficients[i] = self.coefficients[i + 1] * (i + 1)
return Polynomial(self.degree - 1, coefficients)
def integral(self, constant: float = 0) -> Polynomial:
"""
Returns the integral of the polynomial.
>>> p = Polynomial(2, [1, 2, 3])
>>> p.integral()
1.0x^3 + 1.0x^2 + 1.0x
"""
coefficients: list[float] = [0] * (self.degree + 2)
coefficients[0] = constant
for i in range(self.degree + 1):
coefficients[i + 1] = self.coefficients[i] / (i + 1)
return Polynomial(self.degree + 1, coefficients)
def __eq__(self, polynomial_2: object) -> bool:
"""
Checks if two polynomials are equal.
>>> p = Polynomial(2, [1, 2, 3])
>>> q = Polynomial(2, [1, 2, 3])
>>> p == q
True
"""
if not isinstance(polynomial_2, Polynomial):
return False
if self.degree != polynomial_2.degree:
return False
for i in range(self.degree + 1):
if self.coefficients[i] != polynomial_2.coefficients[i]:
return False
return True
def __ne__(self, polynomial_2: object) -> bool:
"""
Checks if two polynomials are not equal.
>>> p = Polynomial(2, [1, 2, 3])
>>> q = Polynomial(2, [1, 2, 3])
>>> p != q
False
"""
return not self.__eq__(polynomial_2)
================================================
FILE: maths/power_using_recursion.py
================================================
"""
== Raise base to the power of exponent using recursion ==
Input -->
Enter the base: 3
Enter the exponent: 4
Output -->
3 to the power of 4 is 81
Input -->
Enter the base: 2
Enter the exponent: 0
Output -->
2 to the power of 0 is 1
"""
def power(base: int, exponent: int) -> float:
"""
Calculate the power of a base raised to an exponent.
>>> power(3, 4)
81
>>> power(2, 0)
1
>>> all(power(base, exponent) == pow(base, exponent)
... for base in range(-10, 10) for exponent in range(10))
True
>>> power('a', 1)
'a'
>>> power('a', 2)
Traceback (most recent call last):
...
TypeError: can't multiply sequence by non-int of type 'str'
>>> power('a', 'b')
Traceback (most recent call last):
...
TypeError: unsupported operand type(s) for -: 'str' and 'int'
>>> power(2, -1)
Traceback (most recent call last):
...
RecursionError: maximum recursion depth exceeded
>>> power(0, 0)
1
>>> power(0, 1)
0
>>> power(5,6)
15625
>>> power(23, 12)
21914624432020321
"""
return base * power(base, (exponent - 1)) if exponent else 1
if __name__ == "__main__":
from doctest import testmod
testmod()
print("Raise base to the power of exponent using recursion...")
base = int(input("Enter the base: ").strip())
exponent = int(input("Enter the exponent: ").strip())
result = power(base, abs(exponent))
if exponent < 0: # power() does not properly deal w/ negative exponents
result = 1 / result
print(f"{base} to the power of {exponent} is {result}")
================================================
FILE: maths/prime_check.py
================================================
"""Prime Check."""
import math
import unittest
import pytest
def is_prime(number: int) -> bool:
"""Checks to see if a number is a prime in O(sqrt(n)).
A number is prime if it has exactly two factors: 1 and itself.
>>> is_prime(0)
False
>>> is_prime(1)
False
>>> is_prime(2)
True
>>> is_prime(3)
True
>>> is_prime(27)
False
>>> is_prime(87)
False
>>> is_prime(563)
True
>>> is_prime(2999)
True
>>> is_prime(67483)
False
>>> is_prime(16.1)
Traceback (most recent call last):
...
ValueError: is_prime() only accepts positive integers
>>> is_prime(-4)
Traceback (most recent call last):
...
ValueError: is_prime() only accepts positive integers
"""
# precondition
if not isinstance(number, int) or not number >= 0:
raise ValueError("is_prime() only accepts positive integers")
if 1 < number < 4:
# 2 and 3 are primes
return True
elif number < 2 or number % 2 == 0 or number % 3 == 0:
# Negatives, 0, 1, all even numbers, all multiples of 3 are not primes
return False
# All primes number are in format of 6k +/- 1
for i in range(5, int(math.sqrt(number) + 1), 6):
if number % i == 0 or number % (i + 2) == 0:
return False
return True
class Test(unittest.TestCase):
def test_primes(self):
assert is_prime(2)
assert is_prime(3)
assert is_prime(5)
assert is_prime(7)
assert is_prime(11)
assert is_prime(13)
assert is_prime(17)
assert is_prime(19)
assert is_prime(23)
assert is_prime(29)
def test_not_primes(self):
with pytest.raises(ValueError):
is_prime(-19)
assert not is_prime(0), (
"Zero doesn't have any positive factors, primes must have exactly two."
)
assert not is_prime(1), (
"One only has 1 positive factor, primes must have exactly two."
)
assert not is_prime(2 * 2)
assert not is_prime(2 * 3)
assert not is_prime(3 * 3)
assert not is_prime(3 * 5)
assert not is_prime(3 * 5 * 7)
if __name__ == "__main__":
unittest.main()
================================================
FILE: maths/prime_factors.py
================================================
"""
python/black : True
"""
from __future__ import annotations
def prime_factors(n: int) -> list[int]:
"""
Returns prime factors of n as a list.
>>> prime_factors(0)
[]
>>> prime_factors(100)
[2, 2, 5, 5]
>>> prime_factors(2560)
[2, 2, 2, 2, 2, 2, 2, 2, 2, 5]
>>> prime_factors(10**-2)
[]
>>> prime_factors(0.02)
[]
>>> x = prime_factors(10**241) # doctest: +NORMALIZE_WHITESPACE
>>> x == [2]*241 + [5]*241
True
>>> prime_factors(10**-354)
[]
>>> prime_factors('hello')
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'str'
>>> prime_factors([1,2,'hello'])
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'list'
"""
i = 2
factors = []
while i * i <= n:
if n % i:
i += 1
else:
n //= i
factors.append(i)
if n > 1:
factors.append(n)
return factors
def unique_prime_factors(n: int) -> list[int]:
"""
Returns unique prime factors of n as a list.
>>> unique_prime_factors(0)
[]
>>> unique_prime_factors(100)
[2, 5]
>>> unique_prime_factors(2560)
[2, 5]
>>> unique_prime_factors(10**-2)
[]
>>> unique_prime_factors(0.02)
[]
>>> unique_prime_factors(10**241)
[2, 5]
>>> unique_prime_factors(10**-354)
[]
>>> unique_prime_factors('hello')
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'str'
>>> unique_prime_factors([1,2,'hello'])
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'int' and 'list'
"""
i = 2
factors = []
while i * i <= n:
if not n % i:
while not n % i:
n //= i
factors.append(i)
i += 1
if n > 1:
factors.append(n)
return factors
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/prime_numbers.py
================================================
import math
from collections.abc import Generator
def slow_primes(max_n: int) -> Generator[int]:
"""
Return a list of all primes numbers up to max.
>>> list(slow_primes(0))
[]
>>> list(slow_primes(-1))
[]
>>> list(slow_primes(-10))
[]
>>> list(slow_primes(25))
[2, 3, 5, 7, 11, 13, 17, 19, 23]
>>> list(slow_primes(11))
[2, 3, 5, 7, 11]
>>> list(slow_primes(33))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
>>> list(slow_primes(1000))[-1]
997
"""
numbers: Generator = (i for i in range(1, (max_n + 1)))
for i in (n for n in numbers if n > 1):
for j in range(2, i):
if (i % j) == 0:
break
else:
yield i
def primes(max_n: int) -> Generator[int]:
"""
Return a list of all primes numbers up to max.
>>> list(primes(0))
[]
>>> list(primes(-1))
[]
>>> list(primes(-10))
[]
>>> list(primes(25))
[2, 3, 5, 7, 11, 13, 17, 19, 23]
>>> list(primes(11))
[2, 3, 5, 7, 11]
>>> list(primes(33))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
>>> list(primes(1000))[-1]
997
"""
numbers: Generator = (i for i in range(1, (max_n + 1)))
for i in (n for n in numbers if n > 1):
# only need to check for factors up to sqrt(i)
bound = int(math.sqrt(i)) + 1
for j in range(2, bound):
if (i % j) == 0:
break
else:
yield i
def fast_primes(max_n: int) -> Generator[int]:
"""
Return a list of all primes numbers up to max.
>>> list(fast_primes(0))
[]
>>> list(fast_primes(-1))
[]
>>> list(fast_primes(-10))
[]
>>> list(fast_primes(25))
[2, 3, 5, 7, 11, 13, 17, 19, 23]
>>> list(fast_primes(11))
[2, 3, 5, 7, 11]
>>> list(fast_primes(33))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
>>> list(fast_primes(1000))[-1]
997
"""
numbers: Generator = (i for i in range(1, (max_n + 1), 2))
# It's useless to test even numbers as they will not be prime
if max_n > 2:
yield 2 # Because 2 will not be tested, it's necessary to yield it now
for i in (n for n in numbers if n > 1):
bound = int(math.sqrt(i)) + 1
for j in range(3, bound, 2):
# As we removed the even numbers, we don't need them now
if (i % j) == 0:
break
else:
yield i
def benchmark():
"""
Let's benchmark our functions side-by-side...
"""
from timeit import timeit
setup = "from __main__ import slow_primes, primes, fast_primes"
print(timeit("slow_primes(1_000_000_000_000)", setup=setup, number=1_000_000))
print(timeit("primes(1_000_000_000_000)", setup=setup, number=1_000_000))
print(timeit("fast_primes(1_000_000_000_000)", setup=setup, number=1_000_000))
if __name__ == "__main__":
number = int(input("Calculate primes up to:\n>> ").strip())
for ret in primes(number):
print(ret)
benchmark()
================================================
FILE: maths/prime_sieve_eratosthenes.py
================================================
"""
Sieve of Eratosthenes
Input: n = 10
Output: 2 3 5 7
Input: n = 20
Output: 2 3 5 7 11 13 17 19
you can read in detail about this at
https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
"""
def prime_sieve_eratosthenes(num: int) -> list[int]:
"""
Print the prime numbers up to n
>>> prime_sieve_eratosthenes(10)
[2, 3, 5, 7]
>>> prime_sieve_eratosthenes(20)
[2, 3, 5, 7, 11, 13, 17, 19]
>>> prime_sieve_eratosthenes(2)
[2]
>>> prime_sieve_eratosthenes(1)
[]
>>> prime_sieve_eratosthenes(-1)
Traceback (most recent call last):
...
ValueError: Input must be a positive integer
"""
if num <= 0:
raise ValueError("Input must be a positive integer")
primes = [True] * (num + 1)
p = 2
while p * p <= num:
if primes[p]:
for i in range(p * p, num + 1, p):
primes[i] = False
p += 1
return [prime for prime in range(2, num + 1) if primes[prime]]
if __name__ == "__main__":
import doctest
doctest.testmod()
user_num = int(input("Enter a positive integer: ").strip())
print(prime_sieve_eratosthenes(user_num))
================================================
FILE: maths/primelib.py
================================================
"""
Created on Thu Oct 5 16:44:23 2017
@author: Christian Bender
This Python library contains some useful functions to deal with
prime numbers and whole numbers.
Overview:
is_prime(number)
sieve_er(N)
get_prime_numbers(N)
prime_factorization(number)
greatest_prime_factor(number)
smallest_prime_factor(number)
get_prime(n)
get_primes_between(pNumber1, pNumber2)
----
is_even(number)
is_odd(number)
kg_v(number1, number2) // least common multiple
get_divisors(number) // all divisors of 'number' inclusive 1, number
is_perfect_number(number)
NEW-FUNCTIONS
simplify_fraction(numerator, denominator)
factorial (n) // n!
fib (n) // calculate the n-th fibonacci term.
-----
goldbach(number) // Goldbach's assumption
"""
from math import sqrt
from maths.greatest_common_divisor import gcd_by_iterative
def is_prime(number: int) -> bool:
"""
input: positive integer 'number'
returns true if 'number' is prime otherwise false.
>>> is_prime(3)
True
>>> is_prime(10)
False
>>> is_prime(97)
True
>>> is_prime(9991)
False
>>> is_prime(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and positive
>>> is_prime("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and positive
"""
# precondition
assert isinstance(number, int) and (number >= 0), (
"'number' must been an int and positive"
)
status = True
# 0 and 1 are none primes.
if number <= 1:
status = False
for divisor in range(2, round(sqrt(number)) + 1):
# if 'number' divisible by 'divisor' then sets 'status'
# of false and break up the loop.
if number % divisor == 0:
status = False
break
# precondition
assert isinstance(status, bool), "'status' must been from type bool"
return status
# ------------------------------------------
def sieve_er(n):
"""
input: positive integer 'N' > 2
returns a list of prime numbers from 2 up to N.
This function implements the algorithm called
sieve of erathostenes.
>>> sieve_er(8)
[2, 3, 5, 7]
>>> sieve_er(-1)
Traceback (most recent call last):
...
AssertionError: 'N' must been an int and > 2
>>> sieve_er("test")
Traceback (most recent call last):
...
AssertionError: 'N' must been an int and > 2
"""
# precondition
assert isinstance(n, int) and (n > 2), "'N' must been an int and > 2"
# beginList: contains all natural numbers from 2 up to N
begin_list = list(range(2, n + 1))
ans = [] # this list will be returns.
# actual sieve of erathostenes
for i in range(len(begin_list)):
for j in range(i + 1, len(begin_list)):
if (begin_list[i] != 0) and (begin_list[j] % begin_list[i] == 0):
begin_list[j] = 0
# filters actual prime numbers.
ans = [x for x in begin_list if x != 0]
# precondition
assert isinstance(ans, list), "'ans' must been from type list"
return ans
# --------------------------------
def get_prime_numbers(n):
"""
input: positive integer 'N' > 2
returns a list of prime numbers from 2 up to N (inclusive)
This function is more efficient as function 'sieveEr(...)'
>>> get_prime_numbers(8)
[2, 3, 5, 7]
>>> get_prime_numbers(-1)
Traceback (most recent call last):
...
AssertionError: 'N' must been an int and > 2
>>> get_prime_numbers("test")
Traceback (most recent call last):
...
AssertionError: 'N' must been an int and > 2
"""
# precondition
assert isinstance(n, int) and (n > 2), "'N' must been an int and > 2"
ans = []
# iterates over all numbers between 2 up to N+1
# if a number is prime then appends to list 'ans'
for number in range(2, n + 1):
if is_prime(number):
ans.append(number)
# precondition
assert isinstance(ans, list), "'ans' must been from type list"
return ans
# -----------------------------------------
def prime_factorization(number):
"""
input: positive integer 'number'
returns a list of the prime number factors of 'number'
>>> prime_factorization(0)
[0]
>>> prime_factorization(8)
[2, 2, 2]
>>> prime_factorization(287)
[7, 41]
>>> prime_factorization(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
>>> prime_factorization("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
"""
# precondition
assert isinstance(number, int) and number >= 0, "'number' must been an int and >= 0"
ans = [] # this list will be returns of the function.
# potential prime number factors.
factor = 2
quotient = number
if number in {0, 1}:
ans.append(number)
# if 'number' not prime then builds the prime factorization of 'number'
elif not is_prime(number):
while quotient != 1:
if is_prime(factor) and (quotient % factor == 0):
ans.append(factor)
quotient /= factor
else:
factor += 1
else:
ans.append(number)
# precondition
assert isinstance(ans, list), "'ans' must been from type list"
return ans
# -----------------------------------------
def greatest_prime_factor(number):
"""
input: positive integer 'number' >= 0
returns the greatest prime number factor of 'number'
>>> greatest_prime_factor(0)
0
>>> greatest_prime_factor(8)
2
>>> greatest_prime_factor(287)
41
>>> greatest_prime_factor(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
>>> greatest_prime_factor("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
"""
# precondition
assert isinstance(number, int) and (number >= 0), (
"'number' must been an int and >= 0"
)
ans = 0
# prime factorization of 'number'
prime_factors = prime_factorization(number)
ans = max(prime_factors)
# precondition
assert isinstance(ans, int), "'ans' must been from type int"
return ans
# ----------------------------------------------
def smallest_prime_factor(number):
"""
input: integer 'number' >= 0
returns the smallest prime number factor of 'number'
>>> smallest_prime_factor(0)
0
>>> smallest_prime_factor(8)
2
>>> smallest_prime_factor(287)
7
>>> smallest_prime_factor(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
>>> smallest_prime_factor("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 0
"""
# precondition
assert isinstance(number, int) and (number >= 0), (
"'number' must been an int and >= 0"
)
ans = 0
# prime factorization of 'number'
prime_factors = prime_factorization(number)
ans = min(prime_factors)
# precondition
assert isinstance(ans, int), "'ans' must been from type int"
return ans
# ----------------------
def is_even(number):
"""
input: integer 'number'
returns true if 'number' is even, otherwise false.
>>> is_even(0)
True
>>> is_even(8)
True
>>> is_even(287)
False
>>> is_even(-1)
False
>>> is_even("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int
"""
# precondition
assert isinstance(number, int), "'number' must been an int"
assert isinstance(number % 2 == 0, bool), "compare must been from type bool"
return number % 2 == 0
# ------------------------
def is_odd(number):
"""
input: integer 'number'
returns true if 'number' is odd, otherwise false.
>>> is_odd(0)
False
>>> is_odd(8)
False
>>> is_odd(287)
True
>>> is_odd(-1)
True
>>> is_odd("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int
"""
# precondition
assert isinstance(number, int), "'number' must been an int"
assert isinstance(number % 2 != 0, bool), "compare must been from type bool"
return number % 2 != 0
# ------------------------
def goldbach(number):
"""
Goldbach's assumption
input: a even positive integer 'number' > 2
returns a list of two prime numbers whose sum is equal to 'number'
>>> goldbach(8)
[3, 5]
>>> goldbach(824)
[3, 821]
>>> goldbach(0)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int, even and > 2
>>> goldbach(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int, even and > 2
>>> goldbach("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int, even and > 2
"""
# precondition
assert isinstance(number, int) and (number > 2) and is_even(number), (
"'number' must been an int, even and > 2"
)
ans = [] # this list will returned
# creates a list of prime numbers between 2 up to 'number'
prime_numbers = get_prime_numbers(number)
len_pn = len(prime_numbers)
# run variable for while-loops.
i = 0
j = None
# exit variable. for break up the loops
loop = True
while i < len_pn and loop:
j = i + 1
while j < len_pn and loop:
if prime_numbers[i] + prime_numbers[j] == number:
loop = False
ans.append(prime_numbers[i])
ans.append(prime_numbers[j])
j += 1
i += 1
# precondition
assert (
isinstance(ans, list)
and (len(ans) == 2)
and (ans[0] + ans[1] == number)
and is_prime(ans[0])
and is_prime(ans[1])
), "'ans' must contains two primes. And sum of elements must been eq 'number'"
return ans
# ----------------------------------------------
def kg_v(number1, number2):
"""
Least common multiple
input: two positive integer 'number1' and 'number2'
returns the least common multiple of 'number1' and 'number2'
>>> kg_v(8,10)
40
>>> kg_v(824,67)
55208
>>> kg_v(1, 10)
10
>>> kg_v(0)
Traceback (most recent call last):
...
TypeError: kg_v() missing 1 required positional argument: 'number2'
>>> kg_v(10,-1)
Traceback (most recent call last):
...
AssertionError: 'number1' and 'number2' must been positive integer.
>>> kg_v("test","test2")
Traceback (most recent call last):
...
AssertionError: 'number1' and 'number2' must been positive integer.
"""
# precondition
assert (
isinstance(number1, int)
and isinstance(number2, int)
and (number1 >= 1)
and (number2 >= 1)
), "'number1' and 'number2' must been positive integer."
ans = 1 # actual answer that will be return.
# for kgV (x,1)
if number1 > 1 and number2 > 1:
# builds the prime factorization of 'number1' and 'number2'
prime_fac_1 = prime_factorization(number1)
prime_fac_2 = prime_factorization(number2)
elif number1 == 1 or number2 == 1:
prime_fac_1 = []
prime_fac_2 = []
ans = max(number1, number2)
count1 = 0
count2 = 0
done = [] # captured numbers int both 'primeFac1' and 'primeFac2'
# iterates through primeFac1
for n in prime_fac_1:
if n not in done:
if n in prime_fac_2:
count1 = prime_fac_1.count(n)
count2 = prime_fac_2.count(n)
for _ in range(max(count1, count2)):
ans *= n
else:
count1 = prime_fac_1.count(n)
for _ in range(count1):
ans *= n
done.append(n)
# iterates through primeFac2
for n in prime_fac_2:
if n not in done:
count2 = prime_fac_2.count(n)
for _ in range(count2):
ans *= n
done.append(n)
# precondition
assert isinstance(ans, int) and (ans >= 0), (
"'ans' must been from type int and positive"
)
return ans
# ----------------------------------
def get_prime(n):
"""
Gets the n-th prime number.
input: positive integer 'n' >= 0
returns the n-th prime number, beginning at index 0
>>> get_prime(0)
2
>>> get_prime(8)
23
>>> get_prime(824)
6337
>>> get_prime(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been a positive int
>>> get_prime("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been a positive int
"""
# precondition
assert isinstance(n, int) and (n >= 0), "'number' must been a positive int"
index = 0
ans = 2 # this variable holds the answer
while index < n:
index += 1
ans += 1 # counts to the next number
# if ans not prime then
# runs to the next prime number.
while not is_prime(ans):
ans += 1
# precondition
assert isinstance(ans, int) and is_prime(ans), (
"'ans' must been a prime number and from type int"
)
return ans
# ---------------------------------------------------
def get_primes_between(p_number_1, p_number_2):
"""
input: prime numbers 'pNumber1' and 'pNumber2'
pNumber1 < pNumber2
returns a list of all prime numbers between 'pNumber1' (exclusive)
and 'pNumber2' (exclusive)
>>> get_primes_between(3, 67)
[5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61]
>>> get_primes_between(0)
Traceback (most recent call last):
...
TypeError: get_primes_between() missing 1 required positional argument: 'p_number_2'
>>> get_primes_between(0, 1)
Traceback (most recent call last):
...
AssertionError: The arguments must been prime numbers and 'pNumber1' < 'pNumber2'
>>> get_primes_between(-1, 3)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and positive
>>> get_primes_between("test","test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and positive
"""
# precondition
assert (
is_prime(p_number_1) and is_prime(p_number_2) and (p_number_1 < p_number_2)
), "The arguments must been prime numbers and 'pNumber1' < 'pNumber2'"
number = p_number_1 + 1 # jump to the next number
ans = [] # this list will be returns.
# if number is not prime then
# fetch the next prime number.
while not is_prime(number):
number += 1
while number < p_number_2:
ans.append(number)
number += 1
# fetch the next prime number.
while not is_prime(number):
number += 1
# precondition
assert (
isinstance(ans, list)
and ans[0] != p_number_1
and ans[len(ans) - 1] != p_number_2
), "'ans' must been a list without the arguments"
# 'ans' contains not 'pNumber1' and 'pNumber2' !
return ans
# ----------------------------------------------------
def get_divisors(n):
"""
input: positive integer 'n' >= 1
returns all divisors of n (inclusive 1 and 'n')
>>> get_divisors(8)
[1, 2, 4, 8]
>>> get_divisors(824)
[1, 2, 4, 8, 103, 206, 412, 824]
>>> get_divisors(-1)
Traceback (most recent call last):
...
AssertionError: 'n' must been int and >= 1
>>> get_divisors("test")
Traceback (most recent call last):
...
AssertionError: 'n' must been int and >= 1
"""
# precondition
assert isinstance(n, int) and (n >= 1), "'n' must been int and >= 1"
ans = [] # will be returned.
for divisor in range(1, n + 1):
if n % divisor == 0:
ans.append(divisor)
# precondition
assert ans[0] == 1 and ans[len(ans) - 1] == n, "Error in function getDivisiors(...)"
return ans
# ----------------------------------------------------
def is_perfect_number(number):
"""
input: positive integer 'number' > 1
returns true if 'number' is a perfect number otherwise false.
>>> is_perfect_number(28)
True
>>> is_perfect_number(824)
False
>>> is_perfect_number(-1)
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 1
>>> is_perfect_number("test")
Traceback (most recent call last):
...
AssertionError: 'number' must been an int and >= 1
"""
# precondition
assert isinstance(number, int) and (number > 1), (
"'number' must been an int and >= 1"
)
divisors = get_divisors(number)
# precondition
assert (
isinstance(divisors, list)
and (divisors[0] == 1)
and (divisors[len(divisors) - 1] == number)
), "Error in help-function getDivisiors(...)"
# summed all divisors up to 'number' (exclusive), hence [:-1]
return sum(divisors[:-1]) == number
# ------------------------------------------------------------
def simplify_fraction(numerator, denominator):
"""
input: two integer 'numerator' and 'denominator'
assumes: 'denominator' != 0
returns: a tuple with simplify numerator and denominator.
>>> simplify_fraction(10, 20)
(1, 2)
>>> simplify_fraction(10, -1)
(10, -1)
>>> simplify_fraction("test","test")
Traceback (most recent call last):
...
AssertionError: The arguments must been from type int and 'denominator' != 0
"""
# precondition
assert (
isinstance(numerator, int)
and isinstance(denominator, int)
and (denominator != 0)
), "The arguments must been from type int and 'denominator' != 0"
# build the greatest common divisor of numerator and denominator.
gcd_of_fraction = gcd_by_iterative(abs(numerator), abs(denominator))
# precondition
assert (
isinstance(gcd_of_fraction, int)
and (numerator % gcd_of_fraction == 0)
and (denominator % gcd_of_fraction == 0)
), "Error in function gcd_by_iterative(...,...)"
return (numerator // gcd_of_fraction, denominator // gcd_of_fraction)
# -----------------------------------------------------------------
def factorial(n):
"""
input: positive integer 'n'
returns the factorial of 'n' (n!)
>>> factorial(0)
1
>>> factorial(20)
2432902008176640000
>>> factorial(-1)
Traceback (most recent call last):
...
AssertionError: 'n' must been a int and >= 0
>>> factorial("test")
Traceback (most recent call last):
...
AssertionError: 'n' must been a int and >= 0
"""
# precondition
assert isinstance(n, int) and (n >= 0), "'n' must been a int and >= 0"
ans = 1 # this will be return.
for factor in range(1, n + 1):
ans *= factor
return ans
# -------------------------------------------------------------------
def fib(n: int) -> int:
"""
input: positive integer 'n'
returns the n-th fibonacci term , indexing by 0
>>> fib(0)
1
>>> fib(5)
8
>>> fib(20)
10946
>>> fib(99)
354224848179261915075
>>> fib(-1)
Traceback (most recent call last):
...
AssertionError: 'n' must been an int and >= 0
>>> fib("test")
Traceback (most recent call last):
...
AssertionError: 'n' must been an int and >= 0
"""
# precondition
assert isinstance(n, int) and (n >= 0), "'n' must been an int and >= 0"
tmp = 0
fib1 = 1
ans = 1 # this will be return
for _ in range(n - 1):
tmp = ans
ans += fib1
fib1 = tmp
return ans
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/print_multiplication_table.py
================================================
def multiplication_table(number: int, number_of_terms: int) -> str:
"""
Prints the multiplication table of a given number till the given number of terms
>>> print(multiplication_table(3, 5))
3 * 1 = 3
3 * 2 = 6
3 * 3 = 9
3 * 4 = 12
3 * 5 = 15
>>> print(multiplication_table(-4, 6))
-4 * 1 = -4
-4 * 2 = -8
-4 * 3 = -12
-4 * 4 = -16
-4 * 5 = -20
-4 * 6 = -24
"""
return "\n".join(
f"{number} * {i} = {number * i}" for i in range(1, number_of_terms + 1)
)
if __name__ == "__main__":
print(multiplication_table(number=5, number_of_terms=10))
================================================
FILE: maths/pythagoras.py
================================================
"""Uses Pythagoras theorem to calculate the distance between two points in space."""
import math
class Point:
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
def __repr__(self) -> str:
return f"Point({self.x}, {self.y}, {self.z})"
def distance(a: Point, b: Point) -> float:
"""
>>> point1 = Point(2, -1, 7)
>>> point2 = Point(1, -3, 5)
>>> print(f"Distance from {point1} to {point2} is {distance(point1, point2)}")
Distance from Point(2, -1, 7) to Point(1, -3, 5) is 3.0
"""
return math.sqrt(abs((b.x - a.x) ** 2 + (b.y - a.y) ** 2 + (b.z - a.z) ** 2))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/qr_decomposition.py
================================================
import numpy as np
def qr_householder(a: np.ndarray):
"""Return a QR-decomposition of the matrix A using Householder reflection.
The QR-decomposition decomposes the matrix A of shape (m, n) into an
orthogonal matrix Q of shape (m, m) and an upper triangular matrix R of
shape (m, n). Note that the matrix A does not have to be square. This
method of decomposing A uses the Householder reflection, which is
numerically stable and of complexity O(n^3).
https://en.wikipedia.org/wiki/QR_decomposition#Using_Householder_reflections
Arguments:
A -- a numpy.ndarray of shape (m, n)
Note: several optimizations can be made for numeric efficiency, but this is
intended to demonstrate how it would be represented in a mathematics
textbook. In cases where efficiency is particularly important, an optimized
version from BLAS should be used.
>>> A = np.array([[12, -51, 4], [6, 167, -68], [-4, 24, -41]], dtype=float)
>>> Q, R = qr_householder(A)
>>> # check that the decomposition is correct
>>> np.allclose(Q@R, A)
True
>>> # check that Q is orthogonal
>>> np.allclose(Q@Q.T, np.eye(A.shape[0]))
True
>>> np.allclose(Q.T@Q, np.eye(A.shape[0]))
True
>>> # check that R is upper triangular
>>> np.allclose(np.triu(R), R)
True
"""
m, n = a.shape
t = min(m, n)
q = np.eye(m)
r = a.copy()
for k in range(t - 1):
# select a column of modified matrix A':
x = r[k:, [k]]
# construct first basis vector
e1 = np.zeros_like(x)
e1[0] = 1.0
# determine scaling factor
alpha = np.linalg.norm(x)
# construct vector v for Householder reflection
v = x + np.sign(x[0]) * alpha * e1
v /= np.linalg.norm(v)
# construct the Householder matrix
q_k = np.eye(m - k) - 2.0 * v @ v.T
# pad with ones and zeros as necessary
q_k = np.block([[np.eye(k), np.zeros((k, m - k))], [np.zeros((m - k, k)), q_k]])
q = q @ q_k.T
r = q_k @ r
return q, r
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/quadratic_equations_complex_numbers.py
================================================
from __future__ import annotations
from cmath import sqrt
def quadratic_roots(a: int, b: int, c: int) -> tuple[complex, complex]:
"""
Given the numerical coefficients a, b and c,
calculates the roots for any quadratic equation of the form ax^2 + bx + c
>>> quadratic_roots(a=1, b=3, c=-4)
(1.0, -4.0)
>>> quadratic_roots(5, 6, 1)
(-0.2, -1.0)
>>> quadratic_roots(1, -6, 25)
((3+4j), (3-4j))
"""
if a == 0:
raise ValueError("Coefficient 'a' must not be zero.")
delta = b * b - 4 * a * c
root_1 = (-b + sqrt(delta)) / (2 * a)
root_2 = (-b - sqrt(delta)) / (2 * a)
return (
root_1.real if not root_1.imag else root_1,
root_2.real if not root_2.imag else root_2,
)
def main():
solution1, solution2 = quadratic_roots(a=5, b=6, c=1)
print(f"The solutions are: {solution1} and {solution2}")
if __name__ == "__main__":
main()
================================================
FILE: maths/radians.py
================================================
from math import pi
def radians(degree: float) -> float:
"""
Converts the given angle from degrees to radians
https://en.wikipedia.org/wiki/Radian
>>> radians(180)
3.141592653589793
>>> radians(92)
1.6057029118347832
>>> radians(274)
4.782202150464463
>>> radians(109.82)
1.9167205845401725
>>> from math import radians as math_radians
>>> all(abs(radians(i) - math_radians(i)) <= 1e-8 for i in range(-2, 361))
True
"""
return degree / (180 / pi)
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: maths/radix2_fft.py
================================================
"""
Fast Polynomial Multiplication using radix-2 fast Fourier Transform.
"""
import mpmath # for roots of unity
import numpy as np
class FFT:
"""
Fast Polynomial Multiplication using radix-2 fast Fourier Transform.
Reference:
https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#The_radix-2_DIT_case
For polynomials of degree m and n the algorithms has complexity
O(n*logn + m*logm)
The main part of the algorithm is split in two parts:
1) __DFT: We compute the discrete fourier transform (DFT) of A and B using a
bottom-up dynamic approach -
2) __multiply: Once we obtain the DFT of A*B, we can similarly
invert it to obtain A*B
The class FFT takes two polynomials A and B with complex coefficients as arguments;
The two polynomials should be represented as a sequence of coefficients starting
from the free term. Thus, for instance x + 2*x^3 could be represented as
[0,1,0,2] or (0,1,0,2). The constructor adds some zeros at the end so that the
polynomials have the same length which is a power of 2 at least the length of
their product.
Example:
Create two polynomials as sequences
>>> A = [0, 1, 0, 2] # x+2x^3
>>> B = (2, 3, 4, 0) # 2+3x+4x^2
Create an FFT object with them
>>> x = FFT(A, B)
Print product
>>> x.product # 2x + 3x^2 + 8x^3 + 6x^4 + 8x^5
[(-0-0j), (2+0j), (3-0j), (8-0j), (6+0j), (8+0j)]
__str__ test
>>> print(x)
A = 0*x^0 + 1*x^1 + 0*x^2 + 2*x^3
B = 2*x^0 + 3*x^1 + 4*x^2
A*B = (-0-0j)*x^0 + (2+0j)*x^1 + (3-0j)*x^2 + (8-0j)*x^3 + (6+0j)*x^4 + (8+0j)*x^5
"""
def __init__(self, poly_a=None, poly_b=None):
# Input as list
self.polyA = list(poly_a or [0])[:]
self.polyB = list(poly_b or [0])[:]
# Remove leading zero coefficients
while self.polyA[-1] == 0:
self.polyA.pop()
self.len_A = len(self.polyA)
while self.polyB[-1] == 0:
self.polyB.pop()
self.len_B = len(self.polyB)
# Add 0 to make lengths equal a power of 2
self.c_max_length = int(
2 ** np.ceil(np.log2(len(self.polyA) + len(self.polyB) - 1))
)
while len(self.polyA) < self.c_max_length:
self.polyA.append(0)
while len(self.polyB) < self.c_max_length:
self.polyB.append(0)
# A complex root used for the fourier transform
self.root = complex(mpmath.root(x=1, n=self.c_max_length, k=1))
# The product
self.product = self.__multiply()
# Discrete fourier transform of A and B
def __dft(self, which):
dft = [[x] for x in self.polyA] if which == "A" else [[x] for x in self.polyB]
# Corner case
if len(dft) <= 1:
return dft[0]
next_ncol = self.c_max_length // 2
while next_ncol > 0:
new_dft = [[] for i in range(next_ncol)]
root = self.root**next_ncol
# First half of next step
current_root = 1
for j in range(self.c_max_length // (next_ncol * 2)):
for i in range(next_ncol):
new_dft[i].append(dft[i][j] + current_root * dft[i + next_ncol][j])
current_root *= root
# Second half of next step
current_root = 1
for j in range(self.c_max_length // (next_ncol * 2)):
for i in range(next_ncol):
new_dft[i].append(dft[i][j] - current_root * dft[i + next_ncol][j])
current_root *= root
# Update
dft = new_dft
next_ncol = next_ncol // 2
return dft[0]
# multiply the DFTs of A and B and find A*B
def __multiply(self):
dft_a = self.__dft("A")
dft_b = self.__dft("B")
inverce_c = [[dft_a[i] * dft_b[i] for i in range(self.c_max_length)]]
del dft_a
del dft_b
# Corner Case
if len(inverce_c[0]) <= 1:
return inverce_c[0]
# Inverse DFT
next_ncol = 2
while next_ncol <= self.c_max_length:
new_inverse_c = [[] for i in range(next_ncol)]
root = self.root ** (next_ncol // 2)
current_root = 1
# First half of next step
for j in range(self.c_max_length // next_ncol):
for i in range(next_ncol // 2):
# Even positions
new_inverse_c[i].append(
(
inverce_c[i][j]
+ inverce_c[i][j + self.c_max_length // next_ncol]
)
/ 2
)
# Odd positions
new_inverse_c[i + next_ncol // 2].append(
(
inverce_c[i][j]
- inverce_c[i][j + self.c_max_length // next_ncol]
)
/ (2 * current_root)
)
current_root *= root
# Update
inverce_c = new_inverse_c
next_ncol *= 2
# Unpack
inverce_c = [
complex(round(x[0].real, 8), round(x[0].imag, 8)) for x in inverce_c
]
# Remove leading 0's
while inverce_c[-1] == 0:
inverce_c.pop()
return inverce_c
# Overwrite __str__ for print(); Shows A, B and A*B
def __str__(self):
a = "A = " + " + ".join(
f"{coef}*x^{i}" for i, coef in enumerate(self.polyA[: self.len_A])
)
b = "B = " + " + ".join(
f"{coef}*x^{i}" for i, coef in enumerate(self.polyB[: self.len_B])
)
c = "A*B = " + " + ".join(
f"{coef}*x^{i}" for i, coef in enumerate(self.product)
)
return f"{a}\n{b}\n{c}"
# Unit tests
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/remove_digit.py
================================================
def remove_digit(num: int) -> int:
"""
returns the biggest possible result
that can be achieved by removing
one digit from the given number
>>> remove_digit(152)
52
>>> remove_digit(6385)
685
>>> remove_digit(-11)
1
>>> remove_digit(2222222)
222222
>>> remove_digit("2222222")
Traceback (most recent call last):
TypeError: only integers accepted as input
>>> remove_digit("string input")
Traceback (most recent call last):
TypeError: only integers accepted as input
"""
if not isinstance(num, int):
raise TypeError("only integers accepted as input")
else:
num_str = str(abs(num))
num_transpositions = [list(num_str) for char in range(len(num_str))]
for index in range(len(num_str)):
num_transpositions[index].pop(index)
return max(
int("".join(list(transposition))) for transposition in num_transpositions
)
if __name__ == "__main__":
__import__("doctest").testmod()
================================================
FILE: maths/segmented_sieve.py
================================================
"""Segmented Sieve."""
import math
def sieve(n: int) -> list[int]:
"""
Segmented Sieve.
Examples:
>>> sieve(8)
[2, 3, 5, 7]
>>> sieve(27)
[2, 3, 5, 7, 11, 13, 17, 19, 23]
>>> sieve(0)
Traceback (most recent call last):
...
ValueError: Number 0 must instead be a positive integer
>>> sieve(-1)
Traceback (most recent call last):
...
ValueError: Number -1 must instead be a positive integer
>>> sieve(22.2)
Traceback (most recent call last):
...
ValueError: Number 22.2 must instead be a positive integer
"""
if n <= 0 or isinstance(n, float):
msg = f"Number {n} must instead be a positive integer"
raise ValueError(msg)
in_prime = []
start = 2
end = int(math.sqrt(n)) # Size of every segment
temp = [True] * (end + 1)
prime = []
while start <= end:
if temp[start] is True:
in_prime.append(start)
for i in range(start * start, end + 1, start):
temp[i] = False
start += 1
prime += in_prime
low = end + 1
high = min(2 * end, n)
while low <= n:
temp = [True] * (high - low + 1)
for each in in_prime:
t = math.floor(low / each) * each
if t < low:
t += each
for j in range(t, high + 1, each):
temp[j - low] = False
for j in range(len(temp)):
if temp[j] is True:
prime.append(j + low)
low = high + 1
high = min(high + end, n)
return prime
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{sieve(10**6) = }")
================================================
FILE: maths/series/__init__.py
================================================
================================================
FILE: maths/series/arithmetic.py
================================================
"""
Arithmetic mean
Reference: https://en.wikipedia.org/wiki/Arithmetic_mean
Arithmetic series
Reference: https://en.wikipedia.org/wiki/Arithmetic_series
(The URL above will redirect you to arithmetic progression)
"""
def is_arithmetic_series(series: list) -> bool:
"""
checking whether the input series is arithmetic series or not
>>> is_arithmetic_series([2, 4, 6])
True
>>> is_arithmetic_series([3, 6, 12, 24])
False
>>> is_arithmetic_series([1, 2, 3])
True
>>> is_arithmetic_series(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [2, 4, 6]
>>> is_arithmetic_series([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [2, 4, 6]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
if len(series) == 1:
return True
common_diff = series[1] - series[0]
for index in range(len(series) - 1):
if series[index + 1] - series[index] != common_diff:
return False
return True
def arithmetic_mean(series: list) -> float:
"""
return the arithmetic mean of series
>>> arithmetic_mean([2, 4, 6])
4.0
>>> arithmetic_mean([3, 6, 9, 12])
7.5
>>> arithmetic_mean(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [2, 4, 6]
>>> arithmetic_mean([4, 8, 1])
4.333333333333333
>>> arithmetic_mean([1, 2, 3])
2.0
>>> arithmetic_mean([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [2, 4, 6]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
answer = 0
for val in series:
answer += val
return answer / len(series)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/series/geometric.py
================================================
"""
Geometric Mean
Reference : https://en.wikipedia.org/wiki/Geometric_mean
Geometric series
Reference: https://en.wikipedia.org/wiki/Geometric_series
"""
def is_geometric_series(series: list) -> bool:
"""
checking whether the input series is geometric series or not
>>> is_geometric_series([2, 4, 8])
True
>>> is_geometric_series([3, 6, 12, 24])
True
>>> is_geometric_series([1, 2, 3])
False
>>> is_geometric_series([0, 0, 3])
False
>>> is_geometric_series([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
>>> is_geometric_series(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [2, 4, 8]
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [2, 4, 8]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
if len(series) == 1:
return True
try:
common_ratio = series[1] / series[0]
for index in range(len(series) - 1):
if series[index + 1] / series[index] != common_ratio:
return False
except ZeroDivisionError:
return False
return True
def geometric_mean(series: list) -> float:
"""
return the geometric mean of series
>>> geometric_mean([2, 4, 8])
3.9999999999999996
>>> geometric_mean([3, 6, 12, 24])
8.48528137423857
>>> geometric_mean([4, 8, 16])
7.999999999999999
>>> geometric_mean(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [2, 4, 8]
>>> geometric_mean([1, 2, 3])
1.8171205928321397
>>> geometric_mean([0, 2, 3])
0.0
>>> geometric_mean([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [2, 4, 8]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
answer = 1
for value in series:
answer *= value
return pow(answer, 1 / len(series))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/series/geometric_series.py
================================================
"""
This is a pure Python implementation of the Geometric Series algorithm
https://en.wikipedia.org/wiki/Geometric_series
Run the doctests with the following command:
python3 -m doctest -v geometric_series.py
or
python -m doctest -v geometric_series.py
For manual testing run:
python3 geometric_series.py
"""
from __future__ import annotations
def geometric_series(
nth_term: float,
start_term_a: float,
common_ratio_r: float,
) -> list[float]:
"""
Pure Python implementation of Geometric Series algorithm
:param nth_term: The last term (nth term of Geometric Series)
:param start_term_a : The first term of Geometric Series
:param common_ratio_r : The common ratio between all the terms
:return: The Geometric Series starting from first term a and multiple of common
ration with first term with increase in power till last term (nth term)
Examples:
>>> geometric_series(4, 2, 2)
[2, 4.0, 8.0, 16.0]
>>> geometric_series(4.0, 2.0, 2.0)
[2.0, 4.0, 8.0, 16.0]
>>> geometric_series(4.1, 2.1, 2.1)
[2.1, 4.41, 9.261000000000001, 19.448100000000004]
>>> geometric_series(4, 2, -2)
[2, -4.0, 8.0, -16.0]
>>> geometric_series(4, -2, 2)
[-2, -4.0, -8.0, -16.0]
>>> geometric_series(-4, 2, 2)
[]
>>> geometric_series(0, 100, 500)
[]
>>> geometric_series(1, 1, 1)
[1]
>>> geometric_series(0, 0, 0)
[]
"""
if not all((nth_term, start_term_a, common_ratio_r)):
return []
series: list[float] = []
power = 1
multiple = common_ratio_r
for _ in range(int(nth_term)):
if not series:
series.append(start_term_a)
else:
power += 1
series.append(float(start_term_a * multiple))
multiple = pow(float(common_ratio_r), power)
return series
if __name__ == "__main__":
import doctest
doctest.testmod()
nth_term = float(input("Enter the last number (n term) of the Geometric Series"))
start_term_a = float(input("Enter the starting term (a) of the Geometric Series"))
common_ratio_r = float(
input("Enter the common ratio between two terms (r) of the Geometric Series")
)
print("Formula of Geometric Series => a + ar + ar^2 ... +ar^n")
print(geometric_series(nth_term, start_term_a, common_ratio_r))
================================================
FILE: maths/series/harmonic.py
================================================
"""
Harmonic mean
Reference: https://en.wikipedia.org/wiki/Harmonic_mean
Harmonic series
Reference: https://en.wikipedia.org/wiki/Harmonic_series(mathematics)
"""
def is_harmonic_series(series: list) -> bool:
"""
checking whether the input series is arithmetic series or not
>>> is_harmonic_series([ 1, 2/3, 1/2, 2/5, 1/3])
True
>>> is_harmonic_series([ 1, 2/3, 2/5, 1/3])
False
>>> is_harmonic_series([1, 2, 3])
False
>>> is_harmonic_series([1/2, 1/3, 1/4])
True
>>> is_harmonic_series([2/5, 2/10, 2/15, 2/20, 2/25])
True
>>> is_harmonic_series(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [1, 2/3, 2]
>>> is_harmonic_series([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
>>> is_harmonic_series([0])
Traceback (most recent call last):
...
ValueError: Input series cannot have 0 as an element
>>> is_harmonic_series([1,2,0,6])
Traceback (most recent call last):
...
ValueError: Input series cannot have 0 as an element
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [1, 2/3, 2]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
if len(series) == 1 and series[0] != 0:
return True
rec_series = []
series_len = len(series)
for i in range(series_len):
if series[i] == 0:
raise ValueError("Input series cannot have 0 as an element")
rec_series.append(1 / series[i])
common_diff = rec_series[1] - rec_series[0]
for index in range(2, series_len):
if rec_series[index] - rec_series[index - 1] != common_diff:
return False
return True
def harmonic_mean(series: list) -> float:
"""
return the harmonic mean of series
>>> harmonic_mean([1, 4, 4])
2.0
>>> harmonic_mean([3, 6, 9, 12])
5.759999999999999
>>> harmonic_mean(4)
Traceback (most recent call last):
...
ValueError: Input series is not valid, valid series - [2, 4, 6]
>>> harmonic_mean([1, 2, 3])
1.6363636363636365
>>> harmonic_mean([])
Traceback (most recent call last):
...
ValueError: Input list must be a non empty list
"""
if not isinstance(series, list):
raise ValueError("Input series is not valid, valid series - [2, 4, 6]")
if len(series) == 0:
raise ValueError("Input list must be a non empty list")
answer = 0
for val in series:
answer += 1 / val
return len(series) / answer
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/series/harmonic_series.py
================================================
"""
This is a pure Python implementation of the Harmonic Series algorithm
https://en.wikipedia.org/wiki/Harmonic_series_(mathematics)
For doctests run following command:
python -m doctest -v harmonic_series.py
or
python3 -m doctest -v harmonic_series.py
For manual testing run:
python3 harmonic_series.py
"""
def harmonic_series(n_term: str) -> list:
"""Pure Python implementation of Harmonic Series algorithm
:param n_term: The last (nth) term of Harmonic Series
:return: The Harmonic Series starting from 1 to last (nth) term
Examples:
>>> harmonic_series(5)
['1', '1/2', '1/3', '1/4', '1/5']
>>> harmonic_series(5.0)
['1', '1/2', '1/3', '1/4', '1/5']
>>> harmonic_series(5.1)
['1', '1/2', '1/3', '1/4', '1/5']
>>> harmonic_series(-5)
[]
>>> harmonic_series(0)
[]
>>> harmonic_series(1)
['1']
"""
if n_term == "":
return []
series: list = []
for temp in range(int(n_term)):
series.append(f"1/{temp + 1}" if series else "1")
return series
if __name__ == "__main__":
nth_term = input("Enter the last number (nth term) of the Harmonic Series")
print("Formula of Harmonic Series => 1+1/2+1/3 ..... 1/n")
print(harmonic_series(nth_term))
================================================
FILE: maths/series/hexagonal_numbers.py
================================================
"""
A hexagonal number sequence is a sequence of figurate numbers
where the nth hexagonal number hₙ is the number of distinct dots
in a pattern of dots consisting of the outlines of regular
hexagons with sides up to n dots, when the hexagons are overlaid
so that they share one vertex.
Calculates the hexagonal numbers sequence with a formula
hₙ = n(2n-1)
where:
hₙ --> is nth element of the sequence
n --> is the number of element in the sequence
reference-->"Hexagonal number" Wikipedia
"""
def hexagonal_numbers(length: int) -> list[int]:
"""
:param len: max number of elements
:type len: int
:return: Hexagonal numbers as a list
Tests:
>>> hexagonal_numbers(10)
[0, 1, 6, 15, 28, 45, 66, 91, 120, 153]
>>> hexagonal_numbers(5)
[0, 1, 6, 15, 28]
>>> hexagonal_numbers(0)
Traceback (most recent call last):
...
ValueError: Length must be a positive integer.
"""
if length <= 0 or not isinstance(length, int):
raise ValueError("Length must be a positive integer.")
return [n * (2 * n - 1) for n in range(length)]
if __name__ == "__main__":
print(hexagonal_numbers(length=5))
print(hexagonal_numbers(length=10))
================================================
FILE: maths/series/p_series.py
================================================
"""
This is a pure Python implementation of the P-Series algorithm
https://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#P-series
For doctests run following command:
python -m doctest -v p_series.py
or
python3 -m doctest -v p_series.py
For manual testing run:
python3 p_series.py
"""
from __future__ import annotations
def p_series(nth_term: float | str, power: float | str) -> list[str]:
"""
Pure Python implementation of P-Series algorithm
:return: The P-Series starting from 1 to last (nth) term
Examples:
>>> p_series(5, 2)
['1', '1 / 4', '1 / 9', '1 / 16', '1 / 25']
>>> p_series(-5, 2)
[]
>>> p_series(5, -2)
['1', '1 / 0.25', '1 / 0.1111111111111111', '1 / 0.0625', '1 / 0.04']
>>> p_series("", 1000)
['']
>>> p_series(0, 0)
[]
>>> p_series(1, 1)
['1']
"""
if nth_term == "":
return [""]
nth_term = int(nth_term)
power = int(power)
series: list[str] = []
for temp in range(int(nth_term)):
series.append(f"1 / {pow(temp + 1, int(power))}" if series else "1")
return series
if __name__ == "__main__":
import doctest
doctest.testmod()
nth_term = int(input("Enter the last number (nth term) of the P-Series"))
power = int(input("Enter the power for P-Series"))
print("Formula of P-Series => 1+1/2^p+1/3^p ..... 1/n^p")
print(p_series(nth_term, power))
================================================
FILE: maths/sieve_of_eratosthenes.py
================================================
"""
Sieve of Eratosthones
The sieve of Eratosthenes is an algorithm used to find prime numbers, less than or
equal to a given value.
Illustration:
https://upload.wikimedia.org/wikipedia/commons/b/b9/Sieve_of_Eratosthenes_animation.gif
Reference: https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
doctest provider: Bruno Simas Hadlich (https://github.com/brunohadlich)
Also thanks to Dmitry (https://github.com/LizardWizzard) for finding the problem
"""
from __future__ import annotations
import math
def prime_sieve(num: int) -> list[int]:
"""
Returns a list with all prime numbers up to n.
>>> prime_sieve(50)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
>>> prime_sieve(25)
[2, 3, 5, 7, 11, 13, 17, 19, 23]
>>> prime_sieve(10)
[2, 3, 5, 7]
>>> prime_sieve(9)
[2, 3, 5, 7]
>>> prime_sieve(2)
[2]
>>> prime_sieve(1)
[]
"""
if num <= 0:
msg = f"{num}: Invalid input, please enter a positive integer."
raise ValueError(msg)
sieve = [True] * (num + 1)
prime = []
start = 2
end = int(math.sqrt(num))
while start <= end:
# If start is a prime
if sieve[start] is True:
prime.append(start)
# Set multiples of start be False
for i in range(start * start, num + 1, start):
if sieve[i] is True:
sieve[i] = False
start += 1
for j in range(end + 1, num + 1):
if sieve[j] is True:
prime.append(j)
return prime
if __name__ == "__main__":
print(prime_sieve(int(input("Enter a positive integer: ").strip())))
================================================
FILE: maths/sigmoid.py
================================================
"""
This script demonstrates the implementation of the Sigmoid function.
The function takes a vector of K real numbers as input and then 1 / (1 + exp(-x)).
After through Sigmoid, the element of the vector mostly 0 between 1. or 1 between -1.
Script inspired from its corresponding Wikipedia article
https://en.wikipedia.org/wiki/Sigmoid_function
"""
import numpy as np
def sigmoid(vector: np.ndarray) -> np.ndarray:
"""
Implements the sigmoid function
Parameters:
vector (np.array): A numpy array of shape (1,n)
consisting of real values
Returns:
sigmoid_vec (np.array): The input numpy array, after applying
sigmoid.
Examples:
>>> sigmoid(np.array([-1.0, 1.0, 2.0]))
array([0.26894142, 0.73105858, 0.88079708])
>>> sigmoid(np.array([0.0]))
array([0.5])
"""
return 1 / (1 + np.exp(-vector))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/signum.py
================================================
"""
Signum function -- https://en.wikipedia.org/wiki/Sign_function
"""
def signum(num: float) -> int:
"""
Applies signum function on the number
Custom test cases:
>>> signum(-10)
-1
>>> signum(10)
1
>>> signum(0)
0
>>> signum(-20.5)
-1
>>> signum(20.5)
1
>>> signum(-1e-6)
-1
>>> signum(1e-6)
1
>>> signum("Hello")
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'str' and 'int'
>>> signum([])
Traceback (most recent call last):
...
TypeError: '<' not supported between instances of 'list' and 'int'
"""
if num < 0:
return -1
return 1 if num else 0
def test_signum() -> None:
"""
Tests the signum function
>>> test_signum()
"""
assert signum(5) == 1
assert signum(-5) == -1
assert signum(0) == 0
assert signum(10.5) == 1
assert signum(-10.5) == -1
assert signum(1e-6) == 1
assert signum(-1e-6) == -1
assert signum(123456789) == 1
assert signum(-123456789) == -1
if __name__ == "__main__":
print(signum(12))
print(signum(-12))
print(signum(0))
================================================
FILE: maths/simultaneous_linear_equation_solver.py
================================================
"""
https://en.wikipedia.org/wiki/Augmented_matrix
This algorithm solves simultaneous linear equations of the form
λa + λb + λc + λd + ... = y as [λ, λ, λ, λ, ..., y]
Where λ & y are individual coefficients, the no. of equations = no. of coefficients - 1
Note in order to work there must exist 1 equation where all instances of λ and y != 0
"""
def simplify(current_set: list[list]) -> list[list]:
"""
>>> simplify([[1, 2, 3], [4, 5, 6]])
[[1.0, 2.0, 3.0], [0.0, 0.75, 1.5]]
>>> simplify([[5, 2, 5], [5, 1, 10]])
[[1.0, 0.4, 1.0], [0.0, 0.2, -1.0]]
"""
# Divide each row by magnitude of first term --> creates 'unit' matrix
duplicate_set = current_set.copy()
for row_index, row in enumerate(duplicate_set):
magnitude = row[0]
for column_index, column in enumerate(row):
if magnitude == 0:
current_set[row_index][column_index] = column
continue
current_set[row_index][column_index] = column / magnitude
# Subtract to cancel term
first_row = current_set[0]
final_set = [first_row]
current_set = current_set[1::]
for row in current_set:
temp_row = []
# If first term is 0, it is already in form we want, so we preserve it
if row[0] == 0:
final_set.append(row)
continue
for column_index in range(len(row)):
temp_row.append(first_row[column_index] - row[column_index])
final_set.append(temp_row)
# Create next recursion iteration set
if len(final_set[0]) != 3:
current_first_row = final_set[0]
current_first_column = []
next_iteration = []
for row in final_set[1::]:
current_first_column.append(row[0])
next_iteration.append(row[1::])
resultant = simplify(next_iteration)
for i in range(len(resultant)):
resultant[i].insert(0, current_first_column[i])
resultant.insert(0, current_first_row)
final_set = resultant
return final_set
def solve_simultaneous(equations: list[list]) -> list:
"""
>>> solve_simultaneous([[1, 2, 3],[4, 5, 6]])
[-1.0, 2.0]
>>> solve_simultaneous([[0, -3, 1, 7],[3, 2, -1, 11],[5, 1, -2, 12]])
[6.4, 1.2, 10.6]
>>> solve_simultaneous([])
Traceback (most recent call last):
...
IndexError: solve_simultaneous() requires n lists of length n+1
>>> solve_simultaneous([[1, 2, 3],[1, 2]])
Traceback (most recent call last):
...
IndexError: solve_simultaneous() requires n lists of length n+1
>>> solve_simultaneous([[1, 2, 3],["a", 7, 8]])
Traceback (most recent call last):
...
ValueError: solve_simultaneous() requires lists of integers
>>> solve_simultaneous([[0, 2, 3],[4, 0, 6]])
Traceback (most recent call last):
...
ValueError: solve_simultaneous() requires at least 1 full equation
"""
if len(equations) == 0:
raise IndexError("solve_simultaneous() requires n lists of length n+1")
_length = len(equations) + 1
if any(len(item) != _length for item in equations):
raise IndexError("solve_simultaneous() requires n lists of length n+1")
for row in equations:
if any(not isinstance(column, (int, float)) for column in row):
raise ValueError("solve_simultaneous() requires lists of integers")
if len(equations) == 1:
return [equations[0][-1] / equations[0][0]]
data_set = equations.copy()
if any(0 in row for row in data_set):
temp_data = data_set.copy()
full_row = []
for row_index, row in enumerate(temp_data):
if 0 not in row:
full_row = data_set.pop(row_index)
break
if not full_row:
raise ValueError("solve_simultaneous() requires at least 1 full equation")
data_set.insert(0, full_row)
useable_form = data_set.copy()
simplified = simplify(useable_form)
simplified = simplified[::-1]
solutions: list = []
for row in simplified:
current_solution = row[-1]
if not solutions:
if row[-2] == 0:
solutions.append(0)
continue
solutions.append(current_solution / row[-2])
continue
temp_row = row.copy()[: len(row) - 1 :]
while temp_row[0] == 0:
temp_row.pop(0)
if len(temp_row) == 0:
solutions.append(0)
continue
temp_row = temp_row[1::]
temp_row = temp_row[::-1]
for column_index, column in enumerate(temp_row):
current_solution -= column * solutions[column_index]
solutions.append(current_solution)
final = []
for item in solutions:
final.append(float(round(item, 5)))
return final[::-1]
if __name__ == "__main__":
import doctest
doctest.testmod()
eq = [
[2, 1, 1, 1, 1, 4],
[1, 2, 1, 1, 1, 5],
[1, 1, 2, 1, 1, 6],
[1, 1, 1, 2, 1, 7],
[1, 1, 1, 1, 2, 8],
]
print(solve_simultaneous(eq))
print(solve_simultaneous([[4, 2]]))
================================================
FILE: maths/sin.py
================================================
"""
Calculate sin function.
It's not a perfect function so I am rounding the result to 10 decimal places by default.
Formula: sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
Where: x = angle in randians.
Source:
https://www.homeschoolmath.net/teaching/sine_calculator.php
"""
from math import factorial, radians
def sin(
angle_in_degrees: float, accuracy: int = 18, rounded_values_count: int = 10
) -> float:
"""
Implement sin function.
>>> sin(0.0)
0.0
>>> sin(90.0)
1.0
>>> sin(180.0)
0.0
>>> sin(270.0)
-1.0
>>> sin(0.68)
0.0118679603
>>> sin(1.97)
0.0343762121
>>> sin(64.0)
0.8987940463
>>> sin(9999.0)
-0.9876883406
>>> sin(-689.0)
0.5150380749
>>> sin(89.7)
0.9999862922
"""
# Simplify the angle to be between 360 and -360 degrees.
angle_in_degrees = angle_in_degrees - ((angle_in_degrees // 360.0) * 360.0)
# Converting from degrees to radians
angle_in_radians = radians(angle_in_degrees)
result = angle_in_radians
a = 3
b = -1
for _ in range(accuracy):
result += (b * (angle_in_radians**a)) / factorial(a)
b = -b # One positive term and the next will be negative and so on...
a += 2 # Increased by 2 for every term.
return round(result, rounded_values_count)
if __name__ == "__main__":
__import__("doctest").testmod()
================================================
FILE: maths/sock_merchant.py
================================================
from collections import Counter
def sock_merchant(colors: list[int]) -> int:
"""
>>> sock_merchant([10, 20, 20, 10, 10, 30, 50, 10, 20])
3
>>> sock_merchant([1, 1, 3, 3])
2
"""
return sum(socks_by_color // 2 for socks_by_color in Counter(colors).values())
if __name__ == "__main__":
import doctest
doctest.testmod()
colors = [int(x) for x in input("Enter socks by color :").rstrip().split()]
print(f"sock_merchant({colors}) = {sock_merchant(colors)}")
================================================
FILE: maths/softmax.py
================================================
"""
This script demonstrates the implementation of the Softmax function.
Its a function that takes as input a vector of K real numbers, and normalizes
it into a probability distribution consisting of K probabilities proportional
to the exponentials of the input numbers. After softmax, the elements of the
vector always sum up to 1.
Script inspired from its corresponding Wikipedia article
https://en.wikipedia.org/wiki/Softmax_function
"""
import numpy as np
def softmax(vector):
"""
Implements the softmax function
Parameters:
vector (np.array,list,tuple): A numpy array of shape (1,n)
consisting of real values or a similar list,tuple
Returns:
softmax_vec (np.array): The input numpy array after applying
softmax.
The softmax vector adds up to one. We need to ceil to mitigate for
precision
>>> float(np.ceil(np.sum(softmax([1,2,3,4]))))
1.0
>>> vec = np.array([5,5])
>>> softmax(vec)
array([0.5, 0.5])
>>> softmax([0])
array([1.])
"""
# Calculate e^x for each x in your vector where e is Euler's
# number (approximately 2.718)
exponent_vector = np.exp(vector)
# Add up the all the exponentials
sum_of_exponents = np.sum(exponent_vector)
# Divide every exponent by the sum of all exponents
softmax_vector = exponent_vector / sum_of_exponents
return softmax_vector
if __name__ == "__main__":
print(softmax((0,)))
================================================
FILE: maths/solovay_strassen_primality_test.py
================================================
"""
This script implements the Solovay-Strassen Primality test.
This probabilistic primality test is based on Euler's criterion. It is similar
to the Fermat test but uses quadratic residues. It can quickly identify
composite numbers but may occasionally classify composite numbers as prime.
More details and concepts about this can be found on:
https://en.wikipedia.org/wiki/Solovay%E2%80%93Strassen_primality_test
"""
import random
def jacobi_symbol(random_a: int, number: int) -> int:
"""
Calculate the Jacobi symbol. The Jacobi symbol is a generalization
of the Legendre symbol, which can be used to simplify computations involving
quadratic residues. The Jacobi symbol is used in primality tests, like the
Solovay-Strassen test, because it helps determine if an integer is a
quadratic residue modulo a given modulus, providing valuable information
about the number's potential primality or compositeness.
Parameters:
random_a: A randomly chosen integer from 2 to n-2 (inclusive)
number: The number that is tested for primality
Returns:
jacobi_symbol: The Jacobi symbol is a mathematical function
used to determine whether an integer is a quadratic residue modulo
another integer (usually prime) or not.
>>> jacobi_symbol(2, 13)
-1
>>> jacobi_symbol(5, 19)
1
>>> jacobi_symbol(7, 14)
0
"""
if random_a in (0, 1):
return random_a
random_a %= number
t = 1
while random_a != 0:
while random_a % 2 == 0:
random_a //= 2
r = number % 8
if r in (3, 5):
t = -t
random_a, number = number, random_a
if random_a % 4 == number % 4 == 3:
t = -t
random_a %= number
return t if number == 1 else 0
def solovay_strassen(number: int, iterations: int) -> bool:
"""
Check whether the input number is prime or not using
the Solovay-Strassen Primality test
Parameters:
number: The number that is tested for primality
iterations: The number of times that the test is run
which effects the accuracy
Returns:
result: True if number is probably prime and false
if not
>>> random.seed(10)
>>> solovay_strassen(13, 5)
True
>>> solovay_strassen(9, 10)
False
>>> solovay_strassen(17, 15)
True
"""
if number <= 1:
return False
if number <= 3:
return True
for _ in range(iterations):
a = random.randint(2, number - 2)
x = jacobi_symbol(a, number)
y = pow(a, (number - 1) // 2, number)
if x == 0 or y != x % number:
return False
return True
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/spearman_rank_correlation_coefficient.py
================================================
from collections.abc import Sequence
def assign_ranks(data: Sequence[float]) -> list[int]:
"""
Assigns ranks to elements in the array.
:param data: List of floats.
:return: List of ints representing the ranks.
Example:
>>> assign_ranks([3.2, 1.5, 4.0, 2.7, 5.1])
[3, 1, 4, 2, 5]
>>> assign_ranks([10.5, 8.1, 12.4, 9.3, 11.0])
[3, 1, 5, 2, 4]
"""
ranked_data = sorted((value, index) for index, value in enumerate(data))
ranks = [0] * len(data)
for position, (_, index) in enumerate(ranked_data):
ranks[index] = position + 1
return ranks
def calculate_spearman_rank_correlation(
variable_1: Sequence[float], variable_2: Sequence[float]
) -> float:
"""
Calculates Spearman's rank correlation coefficient.
:param variable_1: List of floats representing the first variable.
:param variable_2: List of floats representing the second variable.
:return: Spearman's rank correlation coefficient.
Example Usage:
>>> x = [1, 2, 3, 4, 5]
>>> y = [5, 4, 3, 2, 1]
>>> calculate_spearman_rank_correlation(x, y)
-1.0
>>> x = [1, 2, 3, 4, 5]
>>> y = [2, 4, 6, 8, 10]
>>> calculate_spearman_rank_correlation(x, y)
1.0
>>> x = [1, 2, 3, 4, 5]
>>> y = [5, 1, 2, 9, 5]
>>> calculate_spearman_rank_correlation(x, y)
0.6
"""
n = len(variable_1)
rank_var1 = assign_ranks(variable_1)
rank_var2 = assign_ranks(variable_2)
# Calculate differences of ranks
d = [rx - ry for rx, ry in zip(rank_var1, rank_var2)]
# Calculate the sum of squared differences
d_squared = sum(di**2 for di in d)
# Calculate the Spearman's rank correlation coefficient
rho = 1 - (6 * d_squared) / (n * (n**2 - 1))
return rho
if __name__ == "__main__":
import doctest
doctest.testmod()
# Example usage:
print(
f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [2, 4, 6, 8, 10]) = }"
)
print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) = }")
print(f"{calculate_spearman_rank_correlation([1, 2, 3, 4, 5], [5, 1, 2, 9, 5]) = }")
================================================
FILE: maths/special_numbers/__init__.py
================================================
================================================
FILE: maths/special_numbers/armstrong_numbers.py
================================================
"""
An Armstrong number is equal to the sum of its own digits each raised to the
power of the number of digits.
For example, 370 is an Armstrong number because 3*3*3 + 7*7*7 + 0*0*0 = 370.
Armstrong numbers are also called Narcissistic numbers and Pluperfect numbers.
On-Line Encyclopedia of Integer Sequences entry: https://oeis.org/A005188
"""
PASSING = (1, 153, 370, 371, 1634, 24678051, 115132219018763992565095597973971522401)
FAILING: tuple = (-153, -1, 0, 1.2, 200, "A", [], {}, None)
def armstrong_number(n: int) -> bool:
"""
Return True if n is an Armstrong number or False if it is not.
>>> all(armstrong_number(n) for n in PASSING)
True
>>> any(armstrong_number(n) for n in FAILING)
False
"""
if not isinstance(n, int) or n < 1:
return False
# Initialization of sum and number of digits.
total = 0
number_of_digits = 0
temp = n
# Calculation of digits of the number
number_of_digits = len(str(n))
# Dividing number into separate digits and find Armstrong number
temp = n
while temp > 0:
rem = temp % 10
total += rem**number_of_digits
temp //= 10
return n == total
def pluperfect_number(n: int) -> bool:
"""Return True if n is a pluperfect number or False if it is not
>>> all(pluperfect_number(n) for n in PASSING)
True
>>> any(pluperfect_number(n) for n in FAILING)
False
"""
if not isinstance(n, int) or n < 1:
return False
# Init a "histogram" of the digits
digit_histogram = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
digit_total = 0
total = 0
temp = n
while temp > 0:
temp, rem = divmod(temp, 10)
digit_histogram[rem] += 1
digit_total += 1
for cnt, i in zip(digit_histogram, range(len(digit_histogram))):
total += cnt * i**digit_total
return n == total
def narcissistic_number(n: int) -> bool:
"""Return True if n is a narcissistic number or False if it is not.
>>> all(narcissistic_number(n) for n in PASSING)
True
>>> any(narcissistic_number(n) for n in FAILING)
False
"""
if not isinstance(n, int) or n < 1:
return False
expo = len(str(n)) # the power that all digits will be raised to
# check if sum of each digit multiplied expo times is equal to number
return n == sum(int(i) ** expo for i in str(n))
def main():
"""
Request that user input an integer and tell them if it is Armstrong number.
"""
num = int(input("Enter an integer to see if it is an Armstrong number: ").strip())
print(f"{num} is {'' if armstrong_number(num) else 'not '}an Armstrong number.")
print(f"{num} is {'' if narcissistic_number(num) else 'not '}an Armstrong number.")
print(f"{num} is {'' if pluperfect_number(num) else 'not '}an Armstrong number.")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: maths/special_numbers/automorphic_number.py
================================================
"""
== Automorphic Numbers ==
A number n is said to be a Automorphic number if
the square of n "ends" in the same digits as n itself.
Examples of Automorphic Numbers: 0, 1, 5, 6, 25, 76, 376, 625, 9376, 90625, ...
https://en.wikipedia.org/wiki/Automorphic_number
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
# Time Complexity : O(log10n)
def is_automorphic_number(number: int) -> bool:
"""
# doctest: +NORMALIZE_WHITESPACE
This functions takes an integer number as input.
returns True if the number is automorphic.
>>> is_automorphic_number(-1)
False
>>> is_automorphic_number(0)
True
>>> is_automorphic_number(5)
True
>>> is_automorphic_number(6)
True
>>> is_automorphic_number(7)
False
>>> is_automorphic_number(25)
True
>>> is_automorphic_number(259918212890625)
True
>>> is_automorphic_number(259918212890636)
False
>>> is_automorphic_number(740081787109376)
True
>>> is_automorphic_number(5.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=5.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 0:
return False
number_square = number * number
while number > 0:
if number % 10 != number_square % 10:
return False
number //= 10
number_square //= 10
return True
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/bell_numbers.py
================================================
"""
Bell numbers represent the number of ways to partition a set into non-empty
subsets. This module provides functions to calculate Bell numbers for sets of
integers. In other words, the first (n + 1) Bell numbers.
For more information about Bell numbers, refer to:
https://en.wikipedia.org/wiki/Bell_number
"""
def bell_numbers(max_set_length: int) -> list[int]:
"""
Calculate Bell numbers for the sets of lengths from 0 to max_set_length.
In other words, calculate first (max_set_length + 1) Bell numbers.
Args:
max_set_length (int): The maximum length of the sets for which
Bell numbers are calculated.
Returns:
list: A list of Bell numbers for sets of lengths from 0 to max_set_length.
Examples:
>>> bell_numbers(-2)
Traceback (most recent call last):
...
ValueError: max_set_length must be non-negative
>>> bell_numbers(0)
[1]
>>> bell_numbers(1)
[1, 1]
>>> bell_numbers(5)
[1, 1, 2, 5, 15, 52]
"""
if max_set_length < 0:
raise ValueError("max_set_length must be non-negative")
bell = [0] * (max_set_length + 1)
bell[0] = 1
for i in range(1, max_set_length + 1):
for j in range(i):
bell[i] += _binomial_coefficient(i - 1, j) * bell[j]
return bell
def _binomial_coefficient(total_elements: int, elements_to_choose: int) -> int:
"""
Calculate the binomial coefficient C(total_elements, elements_to_choose)
Args:
total_elements (int): The total number of elements.
elements_to_choose (int): The number of elements to choose.
Returns:
int: The binomial coefficient C(total_elements, elements_to_choose).
Examples:
>>> _binomial_coefficient(5, 2)
10
>>> _binomial_coefficient(6, 3)
20
"""
if elements_to_choose in {0, total_elements}:
return 1
elements_to_choose = min(elements_to_choose, total_elements - elements_to_choose)
coefficient = 1
for i in range(elements_to_choose):
coefficient *= total_elements - i
coefficient //= i + 1
return coefficient
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/carmichael_number.py
================================================
"""
== Carmichael Numbers ==
A number n is said to be a Carmichael number if it
satisfies the following modular arithmetic condition:
power(b, n-1) MOD n = 1,
for all b ranging from 1 to n such that b and
n are relatively prime, i.e, gcd(b, n) = 1
Examples of Carmichael Numbers: 561, 1105, ...
https://en.wikipedia.org/wiki/Carmichael_number
"""
from maths.greatest_common_divisor import greatest_common_divisor
def power(x: int, y: int, mod: int) -> int:
"""
Examples:
>>> power(2, 15, 3)
2
>>> power(5, 1, 30)
5
"""
if y == 0:
return 1
temp = power(x, y // 2, mod) % mod
temp = (temp * temp) % mod
if y % 2 == 1:
temp = (temp * x) % mod
return temp
def is_carmichael_number(n: int) -> bool:
"""
Examples:
>>> is_carmichael_number(4)
False
>>> is_carmichael_number(561)
True
>>> is_carmichael_number(562)
False
>>> is_carmichael_number(900)
False
>>> is_carmichael_number(1105)
True
>>> is_carmichael_number(8911)
True
>>> is_carmichael_number(5.1)
Traceback (most recent call last):
...
ValueError: Number 5.1 must instead be a positive integer
>>> is_carmichael_number(-7)
Traceback (most recent call last):
...
ValueError: Number -7 must instead be a positive integer
>>> is_carmichael_number(0)
Traceback (most recent call last):
...
ValueError: Number 0 must instead be a positive integer
"""
if n <= 0 or not isinstance(n, int):
msg = f"Number {n} must instead be a positive integer"
raise ValueError(msg)
return all(
power(b, n - 1, n) == 1
for b in range(2, n)
if greatest_common_divisor(b, n) == 1
)
if __name__ == "__main__":
import doctest
doctest.testmod()
number = int(input("Enter number: ").strip())
if is_carmichael_number(number):
print(f"{number} is a Carmichael Number.")
else:
print(f"{number} is not a Carmichael Number.")
================================================
FILE: maths/special_numbers/catalan_number.py
================================================
"""
Calculate the nth Catalan number
Source:
https://en.wikipedia.org/wiki/Catalan_number
"""
def catalan(number: int) -> int:
"""
:param number: nth catalan number to calculate
:return: the nth catalan number
Note: A catalan number is only defined for positive integers
>>> catalan(5)
14
>>> catalan(0)
Traceback (most recent call last):
...
ValueError: Input value of [number=0] must be > 0
>>> catalan(-1)
Traceback (most recent call last):
...
ValueError: Input value of [number=-1] must be > 0
>>> catalan(5.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=5.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 1:
msg = f"Input value of [number={number}] must be > 0"
raise ValueError(msg)
current_number = 1
for i in range(1, number):
current_number *= 4 * i - 2
current_number //= i + 1
return current_number
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/hamming_numbers.py
================================================
"""
A Hamming number is a positive integer of the form 2^i*3^j*5^k, for some
non-negative integers i, j, and k. They are often referred to as regular numbers.
More info at: https://en.wikipedia.org/wiki/Regular_number.
"""
def hamming(n_element: int) -> list:
"""
This function creates an ordered list of n length as requested, and afterwards
returns the last value of the list. It must be given a positive integer.
:param n_element: The number of elements on the list
:return: The nth element of the list
>>> hamming(-5)
Traceback (most recent call last):
...
ValueError: n_element should be a positive number
>>> hamming(5)
[1, 2, 3, 4, 5]
>>> hamming(10)
[1, 2, 3, 4, 5, 6, 8, 9, 10, 12]
>>> hamming(15)
[1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24]
"""
n_element = int(n_element)
if n_element < 1:
my_error = ValueError("n_element should be a positive number")
raise my_error
hamming_list = [1]
i, j, k = (0, 0, 0)
index = 1
while index < n_element:
while hamming_list[i] * 2 <= hamming_list[-1]:
i += 1
while hamming_list[j] * 3 <= hamming_list[-1]:
j += 1
while hamming_list[k] * 5 <= hamming_list[-1]:
k += 1
hamming_list.append(
min(hamming_list[i] * 2, hamming_list[j] * 3, hamming_list[k] * 5)
)
index += 1
return hamming_list
if __name__ == "__main__":
n = input("Enter the last number (nth term) of the Hamming Number Series: ")
print("Formula of Hamming Number Series => 2^i * 3^j * 5^k")
hamming_numbers = hamming(int(n))
print("-----------------------------------------------------")
print(f"The list with nth numbers is: {hamming_numbers}")
print("-----------------------------------------------------")
================================================
FILE: maths/special_numbers/happy_number.py
================================================
def is_happy_number(number: int) -> bool:
"""
A happy number is a number which eventually reaches 1 when replaced by the sum of
the square of each digit.
:param number: The number to check for happiness.
:return: True if the number is a happy number, False otherwise.
>>> is_happy_number(19)
True
>>> is_happy_number(2)
False
>>> is_happy_number(23)
True
>>> is_happy_number(1)
True
>>> is_happy_number(0)
Traceback (most recent call last):
...
ValueError: number=0 must be a positive integer
>>> is_happy_number(-19)
Traceback (most recent call last):
...
ValueError: number=-19 must be a positive integer
>>> is_happy_number(19.1)
Traceback (most recent call last):
...
ValueError: number=19.1 must be a positive integer
>>> is_happy_number("happy")
Traceback (most recent call last):
...
ValueError: number='happy' must be a positive integer
"""
if not isinstance(number, int) or number <= 0:
msg = f"{number=} must be a positive integer"
raise ValueError(msg)
seen = set()
while number != 1 and number not in seen:
seen.add(number)
number = sum(int(digit) ** 2 for digit in str(number))
return number == 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/harshad_numbers.py
================================================
"""
A harshad number (or more specifically an n-harshad number) is a number that's
divisible by the sum of its digits in some given base n.
Reference: https://en.wikipedia.org/wiki/Harshad_number
"""
def int_to_base(number: int, base: int) -> str:
"""
Convert a given positive decimal integer to base 'base'.
Where 'base' ranges from 2 to 36.
Examples:
>>> int_to_base(0, 21)
'0'
>>> int_to_base(23, 2)
'10111'
>>> int_to_base(58, 5)
'213'
>>> int_to_base(167, 16)
'A7'
>>> # bases below 2 and beyond 36 will error
>>> int_to_base(98, 1)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> int_to_base(98, 37)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> int_to_base(-99, 16)
Traceback (most recent call last):
...
ValueError: number must be a positive integer
"""
if base < 2 or base > 36:
raise ValueError("'base' must be between 2 and 36 inclusive")
digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
result = ""
if number < 0:
raise ValueError("number must be a positive integer")
while number > 0:
number, remainder = divmod(number, base)
result = digits[remainder] + result
if result == "":
result = "0"
return result
def sum_of_digits(num: int, base: int) -> str:
"""
Calculate the sum of digit values in a positive integer
converted to the given 'base'.
Where 'base' ranges from 2 to 36.
Examples:
>>> sum_of_digits(103, 12)
'13'
>>> sum_of_digits(1275, 4)
'30'
>>> sum_of_digits(6645, 2)
'1001'
>>> # bases below 2 and beyond 36 will error
>>> sum_of_digits(543, 1)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> sum_of_digits(543, 37)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
"""
if base < 2 or base > 36:
raise ValueError("'base' must be between 2 and 36 inclusive")
num_str = int_to_base(num, base)
res = sum(int(char, base) for char in num_str)
res_str = int_to_base(res, base)
return res_str
def harshad_numbers_in_base(limit: int, base: int) -> list[str]:
"""
Finds all Harshad numbers smaller than num in base 'base'.
Where 'base' ranges from 2 to 36.
Examples:
>>> harshad_numbers_in_base(15, 2)
['1', '10', '100', '110', '1000', '1010', '1100']
>>> harshad_numbers_in_base(12, 34)
['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B']
>>> harshad_numbers_in_base(12, 4)
['1', '2', '3', '10', '12', '20', '21']
>>> # bases below 2 and beyond 36 will error
>>> harshad_numbers_in_base(234, 37)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> harshad_numbers_in_base(234, 1)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> harshad_numbers_in_base(-12, 6)
[]
"""
if base < 2 or base > 36:
raise ValueError("'base' must be between 2 and 36 inclusive")
if limit < 0:
return []
numbers = [
int_to_base(i, base)
for i in range(1, limit)
if i % int(sum_of_digits(i, base), base) == 0
]
return numbers
def is_harshad_number_in_base(num: int, base: int) -> bool:
"""
Determines whether n in base 'base' is a harshad number.
Where 'base' ranges from 2 to 36.
Examples:
>>> is_harshad_number_in_base(18, 10)
True
>>> is_harshad_number_in_base(21, 10)
True
>>> is_harshad_number_in_base(-21, 5)
False
>>> # bases below 2 and beyond 36 will error
>>> is_harshad_number_in_base(45, 37)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
>>> is_harshad_number_in_base(45, 1)
Traceback (most recent call last):
...
ValueError: 'base' must be between 2 and 36 inclusive
"""
if base < 2 or base > 36:
raise ValueError("'base' must be between 2 and 36 inclusive")
if num < 0:
return False
n = int_to_base(num, base)
d = sum_of_digits(num, base)
return int(n, base) % int(d, base) == 0
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/hexagonal_number.py
================================================
"""
== Hexagonal Number ==
The nth hexagonal number hn is the number of distinct dots
in a pattern of dots consisting of the outlines of regular
hexagons with sides up to n dots, when the hexagons are
overlaid so that they share one vertex.
https://en.wikipedia.org/wiki/Hexagonal_number
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
def hexagonal(number: int) -> int:
"""
:param number: nth hexagonal number to calculate
:return: the nth hexagonal number
Note: A hexagonal number is only defined for positive integers
>>> hexagonal(4)
28
>>> hexagonal(11)
231
>>> hexagonal(22)
946
>>> hexagonal(0)
Traceback (most recent call last):
...
ValueError: Input must be a positive integer
>>> hexagonal(-1)
Traceback (most recent call last):
...
ValueError: Input must be a positive integer
>>> hexagonal(11.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=11.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 1:
raise ValueError("Input must be a positive integer")
return number * (2 * number - 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/krishnamurthy_number.py
================================================
"""
== Krishnamurthy Number ==
It is also known as Peterson Number
A Krishnamurthy Number is a number whose sum of the
factorial of the digits equals to the original
number itself.
For example: 145 = 1! + 4! + 5!
So, 145 is a Krishnamurthy Number
"""
def factorial(digit: int) -> int:
"""
>>> factorial(3)
6
>>> factorial(0)
1
>>> factorial(5)
120
"""
return 1 if digit in (0, 1) else (digit * factorial(digit - 1))
def krishnamurthy(number: int) -> bool:
"""
>>> krishnamurthy(145)
True
>>> krishnamurthy(240)
False
>>> krishnamurthy(1)
True
"""
fact_sum = 0
duplicate = number
while duplicate > 0:
duplicate, digit = divmod(duplicate, 10)
fact_sum += factorial(digit)
return fact_sum == number
if __name__ == "__main__":
print("Program to check whether a number is a Krisnamurthy Number or not.")
number = int(input("Enter number: ").strip())
print(
f"{number} is {'' if krishnamurthy(number) else 'not '}a Krishnamurthy Number."
)
================================================
FILE: maths/special_numbers/perfect_number.py
================================================
"""
== Perfect Number ==
In number theory, a perfect number is a positive integer that is equal to the sum of
its positive divisors, excluding the number itself.
For example: 6 ==> divisors[1, 2, 3, 6]
Excluding 6, the sum(divisors) is 1 + 2 + 3 = 6
So, 6 is a Perfect Number
Other examples of Perfect Numbers: 28, 486, ...
https://en.wikipedia.org/wiki/Perfect_number
"""
def perfect(number: int) -> bool:
"""
Check if a number is a perfect number.
A perfect number is a positive integer that is equal to the sum of its proper
divisors (excluding itself).
Args:
number: The number to be checked.
Returns:
True if the number is a perfect number, False otherwise.
Start from 1 because dividing by 0 will raise ZeroDivisionError.
A number at most can be divisible by the half of the number except the number
itself. For example, 6 is at most can be divisible by 3 except by 6 itself.
Examples:
>>> perfect(27)
False
>>> perfect(28)
True
>>> perfect(29)
False
>>> perfect(6)
True
>>> perfect(12)
False
>>> perfect(496)
True
>>> perfect(8128)
True
>>> perfect(0)
False
>>> perfect(-1)
False
>>> perfect(12.34)
Traceback (most recent call last):
...
ValueError: number must be an integer
>>> perfect("Hello")
Traceback (most recent call last):
...
ValueError: number must be an integer
"""
if not isinstance(number, int):
raise ValueError("number must be an integer")
if number <= 0:
return False
return sum(i for i in range(1, number // 2 + 1) if number % i == 0) == number
if __name__ == "__main__":
from doctest import testmod
testmod()
print("Program to check whether a number is a Perfect number or not...")
try:
number = int(input("Enter a positive integer: ").strip())
except ValueError:
msg = "number must be an integer"
print(msg)
raise ValueError(msg)
print(f"{number} is {'' if perfect(number) else 'not '}a Perfect Number.")
================================================
FILE: maths/special_numbers/polygonal_numbers.py
================================================
def polygonal_num(num: int, sides: int) -> int:
"""
Returns the `num`th `sides`-gonal number. It is assumed that `num` >= 0 and
`sides` >= 3 (see for reference https://en.wikipedia.org/wiki/Polygonal_number).
>>> polygonal_num(0, 3)
0
>>> polygonal_num(3, 3)
6
>>> polygonal_num(5, 4)
25
>>> polygonal_num(2, 5)
5
>>> polygonal_num(-1, 0)
Traceback (most recent call last):
...
ValueError: Invalid input: num must be >= 0 and sides must be >= 3.
>>> polygonal_num(0, 2)
Traceback (most recent call last):
...
ValueError: Invalid input: num must be >= 0 and sides must be >= 3.
"""
if num < 0 or sides < 3:
raise ValueError("Invalid input: num must be >= 0 and sides must be >= 3.")
return ((sides - 2) * num**2 - (sides - 4) * num) // 2
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/pronic_number.py
================================================
"""
== Pronic Number ==
A number n is said to be a Proic number if
there exists an integer m such that n = m * (m + 1)
Examples of Proic Numbers: 0, 2, 6, 12, 20, 30, 42, 56, 72, 90, 110 ...
https://en.wikipedia.org/wiki/Pronic_number
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
def is_pronic(number: int) -> bool:
"""
# doctest: +NORMALIZE_WHITESPACE
This functions takes an integer number as input.
returns True if the number is pronic.
>>> is_pronic(-1)
False
>>> is_pronic(0)
True
>>> is_pronic(2)
True
>>> is_pronic(5)
False
>>> is_pronic(6)
True
>>> is_pronic(8)
False
>>> is_pronic(30)
True
>>> is_pronic(32)
False
>>> is_pronic(2147441940)
True
>>> is_pronic(9223372033963249500)
True
>>> is_pronic(6.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=6.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 0 or number % 2 == 1:
return False
number_sqrt = int(number**0.5)
return number == number_sqrt * (number_sqrt + 1)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/proth_number.py
================================================
"""
Calculate the nth Proth number
Source:
https://handwiki.org/wiki/Proth_number
"""
import math
def proth(number: int) -> int:
"""
:param number: nth number to calculate in the sequence
:return: the nth number in Proth number
Note: indexing starts at 1 i.e. proth(1) gives the first Proth number of 3
>>> proth(6)
25
>>> proth(0)
Traceback (most recent call last):
...
ValueError: Input value of [number=0] must be > 0
>>> proth(-1)
Traceback (most recent call last):
...
ValueError: Input value of [number=-1] must be > 0
>>> proth(6.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=6.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if number < 1:
msg = f"Input value of [number={number}] must be > 0"
raise ValueError(msg)
elif number == 1:
return 3
elif number == 2:
return 5
else:
"""
+1 for binary starting at 0 i.e. 2^0, 2^1, etc.
+1 to start the sequence at the 3rd Proth number
Hence, we have a +2 in the below statement
"""
block_index = int(math.log(number // 3, 2)) + 2
proth_list = [3, 5]
proth_index = 2
increment = 3
for block in range(1, block_index):
for _ in range(increment):
proth_list.append(2 ** (block + 1) + proth_list[proth_index - 1])
proth_index += 1
increment *= 2
return proth_list[number - 1]
def is_proth_number(number: int) -> bool:
"""
:param number: positive integer number
:return: true if number is a Proth number, false otherwise
>>> is_proth_number(1)
False
>>> is_proth_number(2)
False
>>> is_proth_number(3)
True
>>> is_proth_number(4)
False
>>> is_proth_number(5)
True
>>> is_proth_number(34)
False
>>> is_proth_number(-1)
Traceback (most recent call last):
...
ValueError: Input value of [number=-1] must be > 0
>>> is_proth_number(6.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=6.0] must be an integer
"""
if not isinstance(number, int):
message = f"Input value of [{number=}] must be an integer"
raise TypeError(message)
if number <= 0:
message = f"Input value of [{number=}] must be > 0"
raise ValueError(message)
if number == 1:
return False
number -= 1
n = 0
while number % 2 == 0:
n += 1
number //= 2
return number < 2**n
if __name__ == "__main__":
import doctest
doctest.testmod()
for number in range(11):
value = 0
try:
value = proth(number)
except ValueError:
print(f"ValueError: there is no {number}th Proth number")
continue
print(f"The {number}th Proth number: {value}")
for number in [1, 2, 3, 4, 5, 9, 13, 49, 57, 193, 241, 163, 201]:
if is_proth_number(number):
print(f"{number} is a Proth number")
else:
print(f"{number} is not a Proth number")
================================================
FILE: maths/special_numbers/triangular_numbers.py
================================================
"""
A triangular number or triangle number counts objects arranged in an
equilateral triangle. This module provides a function to generate n'th
triangular number.
For more information about triangular numbers, refer to:
https://en.wikipedia.org/wiki/Triangular_number
"""
def triangular_number(position: int) -> int:
"""
Generate the triangular number at the specified position.
Args:
position (int): The position of the triangular number to generate.
Returns:
int: The triangular number at the specified position.
Raises:
ValueError: If `position` is negative.
Examples:
>>> triangular_number(1)
1
>>> triangular_number(3)
6
>>> triangular_number(-1)
Traceback (most recent call last):
...
ValueError: param `position` must be non-negative
"""
if position < 0:
raise ValueError("param `position` must be non-negative")
return position * (position + 1) // 2
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/special_numbers/ugly_numbers.py
================================================
"""
Ugly numbers are numbers whose only prime factors are 2, 3 or 5. The sequence
1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, … shows the first 11 ugly numbers. By convention,
1 is included.
Given an integer n, we have to find the nth ugly number.
For more details, refer this article
https://www.geeksforgeeks.org/ugly-numbers/
"""
def ugly_numbers(n: int) -> int:
"""
Returns the nth ugly number.
>>> ugly_numbers(100)
1536
>>> ugly_numbers(0)
1
>>> ugly_numbers(20)
36
>>> ugly_numbers(-5)
1
>>> ugly_numbers(-5.5)
Traceback (most recent call last):
...
TypeError: 'float' object cannot be interpreted as an integer
"""
ugly_nums = [1]
i2, i3, i5 = 0, 0, 0
next_2 = ugly_nums[i2] * 2
next_3 = ugly_nums[i3] * 3
next_5 = ugly_nums[i5] * 5
for _ in range(1, n):
next_num = min(next_2, next_3, next_5)
ugly_nums.append(next_num)
if next_num == next_2:
i2 += 1
next_2 = ugly_nums[i2] * 2
if next_num == next_3:
i3 += 1
next_3 = ugly_nums[i3] * 3
if next_num == next_5:
i5 += 1
next_5 = ugly_nums[i5] * 5
return ugly_nums[-1]
if __name__ == "__main__":
from doctest import testmod
testmod(verbose=True)
print(f"{ugly_numbers(200) = }")
================================================
FILE: maths/special_numbers/weird_number.py
================================================
"""
https://en.wikipedia.org/wiki/Weird_number
Fun fact: The set of weird numbers has positive asymptotic density.
"""
from math import sqrt
def factors(number: int) -> list[int]:
"""
>>> factors(12)
[1, 2, 3, 4, 6]
>>> factors(1)
[1]
>>> factors(100)
[1, 2, 4, 5, 10, 20, 25, 50]
# >>> factors(-12)
# [1, 2, 3, 4, 6]
"""
values = [1]
for i in range(2, int(sqrt(number)) + 1, 1):
if number % i == 0:
values.append(i)
if int(number // i) != i:
values.append(int(number // i))
return sorted(values)
def abundant(n: int) -> bool:
"""
>>> abundant(0)
True
>>> abundant(1)
False
>>> abundant(12)
True
>>> abundant(13)
False
>>> abundant(20)
True
# >>> abundant(-12)
# True
"""
return sum(factors(n)) > n
def semi_perfect(number: int) -> bool:
"""
>>> semi_perfect(0)
True
>>> semi_perfect(1)
True
>>> semi_perfect(12)
True
>>> semi_perfect(13)
False
# >>> semi_perfect(-12)
# True
"""
values = factors(number)
r = len(values)
subset = [[0 for i in range(number + 1)] for j in range(r + 1)]
for i in range(r + 1):
subset[i][0] = True
for i in range(1, number + 1):
subset[0][i] = False
for i in range(1, r + 1):
for j in range(1, number + 1):
if j < values[i - 1]:
subset[i][j] = subset[i - 1][j]
else:
subset[i][j] = subset[i - 1][j] or subset[i - 1][j - values[i - 1]]
return subset[r][number] != 0
def weird(number: int) -> bool:
"""
>>> weird(0)
False
>>> weird(70)
True
>>> weird(77)
False
"""
return abundant(number) and not semi_perfect(number)
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
for number in (69, 70, 71):
print(f"{number} is {'' if weird(number) else 'not '}weird.")
================================================
FILE: maths/sum_of_arithmetic_series.py
================================================
# DarkCoder
def sum_of_series(first_term: int, common_diff: int, num_of_terms: int) -> float:
"""
Find the sum of n terms in an arithmetic progression.
>>> sum_of_series(1, 1, 10)
55.0
>>> sum_of_series(1, 10, 100)
49600.0
"""
total = (num_of_terms / 2) * (2 * first_term + (num_of_terms - 1) * common_diff)
# formula for sum of series
return total
def main():
print(sum_of_series(1, 1, 10))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/sum_of_digits.py
================================================
def sum_of_digits(n: int) -> int:
"""
Find the sum of digits of a number.
>>> sum_of_digits(12345)
15
>>> sum_of_digits(123)
6
>>> sum_of_digits(-123)
6
>>> sum_of_digits(0)
0
"""
n = abs(n)
res = 0
while n > 0:
res += n % 10
n //= 10
return res
def sum_of_digits_recursion(n: int) -> int:
"""
Find the sum of digits of a number using recursion
>>> sum_of_digits_recursion(12345)
15
>>> sum_of_digits_recursion(123)
6
>>> sum_of_digits_recursion(-123)
6
>>> sum_of_digits_recursion(0)
0
"""
n = abs(n)
return n if n < 10 else n % 10 + sum_of_digits(n // 10)
def sum_of_digits_compact(n: int) -> int:
"""
Find the sum of digits of a number
>>> sum_of_digits_compact(12345)
15
>>> sum_of_digits_compact(123)
6
>>> sum_of_digits_compact(-123)
6
>>> sum_of_digits_compact(0)
0
"""
return sum(int(c) for c in str(abs(n)))
def benchmark() -> None:
"""
Benchmark multiple functions, with three different length int values.
"""
from collections.abc import Callable
from timeit import timeit
def benchmark_a_function(func: Callable, value: int) -> None:
call = f"{func.__name__}({value})"
timing = timeit(f"__main__.{call}", setup="import __main__")
print(f"{call:56} = {func(value)} -- {timing:.4f} seconds")
for value in (262144, 1125899906842624, 1267650600228229401496703205376):
for func in (sum_of_digits, sum_of_digits_recursion, sum_of_digits_compact):
benchmark_a_function(func, value)
print()
if __name__ == "__main__":
import doctest
doctest.testmod()
benchmark()
================================================
FILE: maths/sum_of_geometric_progression.py
================================================
def sum_of_geometric_progression(
first_term: int, common_ratio: int, num_of_terms: int
) -> float:
""" "
Return the sum of n terms in a geometric progression.
>>> sum_of_geometric_progression(1, 2, 10)
1023.0
>>> sum_of_geometric_progression(1, 10, 5)
11111.0
>>> sum_of_geometric_progression(0, 2, 10)
0.0
>>> sum_of_geometric_progression(1, 0, 10)
1.0
>>> sum_of_geometric_progression(1, 2, 0)
-0.0
>>> sum_of_geometric_progression(-1, 2, 10)
-1023.0
>>> sum_of_geometric_progression(1, -2, 10)
-341.0
>>> sum_of_geometric_progression(1, 2, -10)
-0.9990234375
"""
if common_ratio == 1:
# Formula for sum if common ratio is 1
return num_of_terms * first_term
# Formula for finding sum of n terms of a GeometricProgression
return (first_term / (1 - common_ratio)) * (1 - common_ratio**num_of_terms)
================================================
FILE: maths/sum_of_harmonic_series.py
================================================
def sum_of_harmonic_progression(
first_term: float, common_difference: float, number_of_terms: int
) -> float:
"""
https://en.wikipedia.org/wiki/Harmonic_progression_(mathematics)
Find the sum of n terms in an harmonic progression. The calculation starts with the
first_term and loops adding the common difference of Arithmetic Progression by which
the given Harmonic Progression is linked.
>>> sum_of_harmonic_progression(1 / 2, 2, 2)
0.75
>>> sum_of_harmonic_progression(1 / 5, 5, 5)
0.45666666666666667
"""
arithmetic_progression = [1 / first_term]
first_term = 1 / first_term
for _ in range(number_of_terms - 1):
first_term += common_difference
arithmetic_progression.append(first_term)
harmonic_series = [1 / step for step in arithmetic_progression]
return sum(harmonic_series)
if __name__ == "__main__":
import doctest
doctest.testmod()
print(sum_of_harmonic_progression(1 / 2, 2, 2))
================================================
FILE: maths/sumset.py
================================================
"""
Calculates the SumSet of two sets of numbers (A and B)
Source:
https://en.wikipedia.org/wiki/Sumset
"""
def sumset(set_a: set, set_b: set) -> set:
"""
:param first set: a set of numbers
:param second set: a set of numbers
:return: the nth number in Sylvester's sequence
>>> sumset({1, 2, 3}, {4, 5, 6})
{5, 6, 7, 8, 9}
>>> sumset({1, 2, 3}, {4, 5, 6, 7})
{5, 6, 7, 8, 9, 10}
>>> sumset({1, 2, 3, 4}, 3)
Traceback (most recent call last):
...
AssertionError: The input value of [set_b=3] is not a set
"""
assert isinstance(set_a, set), f"The input value of [set_a={set_a}] is not a set"
assert isinstance(set_b, set), f"The input value of [set_b={set_b}] is not a set"
return {a + b for a in set_a for b in set_b}
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: maths/sylvester_sequence.py
================================================
"""
Calculates the nth number in Sylvester's sequence
Source:
https://en.wikipedia.org/wiki/Sylvester%27s_sequence
"""
def sylvester(number: int) -> int:
"""
:param number: nth number to calculate in the sequence
:return: the nth number in Sylvester's sequence
>>> sylvester(8)
113423713055421844361000443
>>> sylvester(-1)
Traceback (most recent call last):
...
ValueError: The input value of [n=-1] has to be > 0
>>> sylvester(8.0)
Traceback (most recent call last):
...
AssertionError: The input value of [n=8.0] is not an integer
"""
assert isinstance(number, int), f"The input value of [n={number}] is not an integer"
if number == 1:
return 2
elif number < 1:
msg = f"The input value of [n={number}] has to be > 0"
raise ValueError(msg)
else:
num = sylvester(number - 1)
lower = num - 1
upper = num
return lower * upper + 1
if __name__ == "__main__":
print(f"The 8th number in Sylvester's sequence: {sylvester(8)}")
================================================
FILE: maths/tanh.py
================================================
"""
This script demonstrates the implementation of the tangent hyperbolic
or tanh function.
The function takes a vector of K real numbers as input and
then (e^x - e^(-x))/(e^x + e^(-x)). After through tanh, the
element of the vector mostly -1 between 1.
Script inspired from its corresponding Wikipedia article
https://en.wikipedia.org/wiki/Activation_function
"""
import numpy as np
def tangent_hyperbolic(vector: np.ndarray) -> np.ndarray:
"""
Implements the tanh function
Parameters:
vector: np.ndarray
Returns:
tanh (np.array): The input numpy array after applying tanh.
mathematically (e^x - e^(-x))/(e^x + e^(-x)) can be written as (2/(1+e^(-2x))-1
Examples:
>>> tangent_hyperbolic(np.array([1,5,6,-0.67]))
array([ 0.76159416, 0.9999092 , 0.99998771, -0.58497988])
>>> tangent_hyperbolic(np.array([8,10,2,-0.98,13]))
array([ 0.99999977, 1. , 0.96402758, -0.7530659 , 1. ])
"""
return (2 / (1 + np.exp(-2 * vector))) - 1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/test_factorial.py
================================================
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "pytest",
# ]
# ///
import pytest
from maths.factorial import factorial, factorial_recursive
@pytest.mark.parametrize("function", [factorial, factorial_recursive])
def test_zero(function):
assert function(0) == 1
@pytest.mark.parametrize("function", [factorial, factorial_recursive])
def test_positive_integers(function):
assert function(1) == 1
assert function(5) == 120
assert function(7) == 5040
@pytest.mark.parametrize("function", [factorial, factorial_recursive])
def test_large_number(function):
assert function(10) == 3628800
@pytest.mark.parametrize("function", [factorial, factorial_recursive])
def test_negative_number(function):
with pytest.raises(ValueError):
function(-3)
@pytest.mark.parametrize("function", [factorial, factorial_recursive])
def test_float_number(function):
with pytest.raises(ValueError):
function(1.5)
if __name__ == "__main__":
pytest.main(["-v", __file__])
================================================
FILE: maths/test_prime_check.py
================================================
"""
Minimalist file that allows pytest to find and run the Test unittest. For details, see:
https://doc.pytest.org/en/latest/goodpractices.html#conventions-for-python-test-discovery
"""
from .prime_check import Test
Test()
================================================
FILE: maths/three_sum.py
================================================
"""
https://en.wikipedia.org/wiki/3SUM
"""
def three_sum(nums: list[int]) -> list[list[int]]:
"""
Find all unique triplets in a sorted array of integers that sum up to zero.
Args:
nums: A sorted list of integers.
Returns:
A list of lists containing unique triplets that sum up to zero.
>>> three_sum([-1, 0, 1, 2, -1, -4])
[[-1, -1, 2], [-1, 0, 1]]
>>> three_sum([1, 2, 3, 4])
[]
"""
nums.sort()
ans = []
for i in range(len(nums) - 2):
if i == 0 or (nums[i] != nums[i - 1]):
low, high, c = i + 1, len(nums) - 1, 0 - nums[i]
while low < high:
if nums[low] + nums[high] == c:
ans.append([nums[i], nums[low], nums[high]])
while low < high and nums[low] == nums[low + 1]:
low += 1
while low < high and nums[high] == nums[high - 1]:
high -= 1
low += 1
high -= 1
elif nums[low] + nums[high] < c:
low += 1
else:
high -= 1
return ans
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/trapezoidal_rule.py
================================================
"""
Numerical integration or quadrature for a smooth function f with known values at x_i
"""
def trapezoidal_rule(boundary, steps):
"""
Implements the extended trapezoidal rule for numerical integration.
The function f(x) is provided below.
:param boundary: List containing the lower and upper bounds of integration [a, b]
:param steps: The number of steps (intervals) used in the approximation
:return: The numerical approximation of the integral
>>> abs(trapezoidal_rule([0, 1], 10) - 0.33333) < 0.01
True
>>> abs(trapezoidal_rule([0, 1], 100) - 0.33333) < 0.01
True
>>> abs(trapezoidal_rule([0, 2], 1000) - 2.66667) < 0.01
True
>>> abs(trapezoidal_rule([1, 2], 1000) - 2.33333) < 0.01
True
"""
h = (boundary[1] - boundary[0]) / steps
a = boundary[0]
b = boundary[1]
x_i = make_points(a, b, h)
y = 0.0
y += (h / 2.0) * f(a)
for i in x_i:
y += h * f(i)
y += (h / 2.0) * f(b)
return y
def make_points(a, b, h):
"""
Generates points between a and b with step size h for trapezoidal integration.
:param a: The lower bound of integration
:param b: The upper bound of integration
:param h: The step size
:yield: The next x-value in the range (a, b)
>>> list(make_points(0, 1, 0.1)) # doctest: +NORMALIZE_WHITESPACE
[0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6, 0.7, 0.7999999999999999, \
0.8999999999999999]
>>> list(make_points(0, 10, 2.5))
[2.5, 5.0, 7.5]
>>> list(make_points(0, 10, 2))
[2, 4, 6, 8]
>>> list(make_points(1, 21, 5))
[6, 11, 16]
>>> list(make_points(1, 5, 2))
[3]
>>> list(make_points(1, 4, 3))
[]
"""
x = a + h
while x <= (b - h):
yield x
x += h
def f(x):
"""
This is the function to integrate, f(x) = (x - 0)^2 = x^2.
:param x: The input value
:return: The value of f(x)
>>> f(0)
0
>>> f(1)
1
>>> f(0.5)
0.25
"""
return x**2
def main():
"""
Main function to test the trapezoidal rule.
:a: Lower bound of integration
:b: Upper bound of integration
:steps: define number of steps or resolution
:boundary: define boundary of integration
>>> main()
y = 0.3349999999999999
"""
a = 0.0
b = 1.0
steps = 10.0
boundary = [a, b]
y = trapezoidal_rule(boundary, steps)
print(f"y = {y}")
if __name__ == "__main__":
import doctest
doctest.testmod()
main()
================================================
FILE: maths/triplet_sum.py
================================================
"""
Given an array of integers and another integer target,
we are required to find a triplet from the array such that it's sum is equal to
the target.
"""
from __future__ import annotations
from itertools import permutations
from random import randint
from timeit import repeat
def make_dataset() -> tuple[list[int], int]:
arr = [randint(-1000, 1000) for i in range(10)]
r = randint(-5000, 5000)
return (arr, r)
dataset = make_dataset()
def triplet_sum1(arr: list[int], target: int) -> tuple[int, ...]:
"""
Returns a triplet in the array with sum equal to target,
else (0, 0, 0).
>>> triplet_sum1([13, 29, 7, 23, 5], 35)
(5, 7, 23)
>>> triplet_sum1([37, 9, 19, 50, 44], 65)
(9, 19, 37)
>>> arr = [6, 47, 27, 1, 15]
>>> target = 11
>>> triplet_sum1(arr, target)
(0, 0, 0)
"""
for triplet in permutations(arr, 3):
if sum(triplet) == target:
return tuple(sorted(triplet))
return (0, 0, 0)
def triplet_sum2(arr: list[int], target: int) -> tuple[int, int, int]:
"""
Returns a triplet in the array with sum equal to target,
else (0, 0, 0).
>>> triplet_sum2([13, 29, 7, 23, 5], 35)
(5, 7, 23)
>>> triplet_sum2([37, 9, 19, 50, 44], 65)
(9, 19, 37)
>>> arr = [6, 47, 27, 1, 15]
>>> target = 11
>>> triplet_sum2(arr, target)
(0, 0, 0)
"""
arr.sort()
n = len(arr)
for i in range(n - 1):
left, right = i + 1, n - 1
while left < right:
if arr[i] + arr[left] + arr[right] == target:
return (arr[i], arr[left], arr[right])
elif arr[i] + arr[left] + arr[right] < target:
left += 1
elif arr[i] + arr[left] + arr[right] > target:
right -= 1
return (0, 0, 0)
def solution_times() -> tuple[float, float]:
setup_code = """
from __main__ import dataset, triplet_sum1, triplet_sum2
"""
test_code1 = """
triplet_sum1(*dataset)
"""
test_code2 = """
triplet_sum2(*dataset)
"""
times1 = repeat(setup=setup_code, stmt=test_code1, repeat=5, number=10000)
times2 = repeat(setup=setup_code, stmt=test_code2, repeat=5, number=10000)
return (min(times1), min(times2))
if __name__ == "__main__":
from doctest import testmod
testmod()
times = solution_times()
print(f"The time for naive implementation is {times[0]}.")
print(f"The time for optimized implementation is {times[1]}.")
================================================
FILE: maths/twin_prime.py
================================================
"""
== Twin Prime ==
A number n+2 is said to be a Twin prime of number n if
both n and n+2 are prime.
Examples of Twin pairs: (3, 5), (5, 7), (11, 13), (17, 19), (29, 31), (41, 43), ...
https://en.wikipedia.org/wiki/Twin_prime
"""
# Author : Akshay Dubey (https://github.com/itsAkshayDubey)
from maths.prime_check import is_prime
def twin_prime(number: int) -> int:
"""
# doctest: +NORMALIZE_WHITESPACE
This functions takes an integer number as input.
returns n+2 if n and n+2 are prime numbers and -1 otherwise.
>>> twin_prime(3)
5
>>> twin_prime(4)
-1
>>> twin_prime(5)
7
>>> twin_prime(17)
19
>>> twin_prime(0)
-1
>>> twin_prime(6.0)
Traceback (most recent call last):
...
TypeError: Input value of [number=6.0] must be an integer
"""
if not isinstance(number, int):
msg = f"Input value of [number={number}] must be an integer"
raise TypeError(msg)
if is_prime(number) and is_prime(number + 2):
return number + 2
else:
return -1
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: maths/two_pointer.py
================================================
"""
Given a sorted array of integers, return indices of the two numbers such
that they add up to a specific target using the two pointers technique.
You may assume that each input would have exactly one solution, and you
may not use the same element twice.
This is an alternative solution of the two-sum problem, which uses a
map to solve the problem. Hence can not solve the issue if there is a
constraint not use the same index twice. [1]
Example:
Given nums = [2, 7, 11, 15], target = 9,
Because nums[0] + nums[1] = 2 + 7 = 9,
return [0, 1].
[1]: https://github.com/TheAlgorithms/Python/blob/master/other/two_sum.py
"""
from __future__ import annotations
def two_pointer(nums: list[int], target: int) -> list[int]:
"""
>>> two_pointer([2, 7, 11, 15], 9)
[0, 1]
>>> two_pointer([2, 7, 11, 15], 17)
[0, 3]
>>> two_pointer([2, 7, 11, 15], 18)
[1, 2]
>>> two_pointer([2, 7, 11, 15], 26)
[2, 3]
>>> two_pointer([1, 3, 3], 6)
[1, 2]
>>> two_pointer([2, 7, 11, 15], 8)
[]
>>> two_pointer([3 * i for i in range(10)], 19)
[]
>>> two_pointer([1, 2, 3], 6)
[]
"""
i = 0
j = len(nums) - 1
while i < j:
if nums[i] + nums[j] == target:
return [i, j]
elif nums[i] + nums[j] < target:
i = i + 1
else:
j = j - 1
return []
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{two_pointer([2, 7, 11, 15], 9) = }")
================================================
FILE: maths/two_sum.py
================================================
"""
Given an array of integers, return indices of the two numbers such that they add up to
a specific target.
You may assume that each input would have exactly one solution, and you may not use the
same element twice.
Example:
Given nums = [2, 7, 11, 15], target = 9,
Because nums[0] + nums[1] = 2 + 7 = 9,
return [0, 1].
"""
from __future__ import annotations
def two_sum(nums: list[int], target: int) -> list[int]:
"""
>>> two_sum([2, 7, 11, 15], 9)
[0, 1]
>>> two_sum([15, 2, 11, 7], 13)
[1, 2]
>>> two_sum([2, 7, 11, 15], 17)
[0, 3]
>>> two_sum([7, 15, 11, 2], 18)
[0, 2]
>>> two_sum([2, 7, 11, 15], 26)
[2, 3]
>>> two_sum([2, 7, 11, 15], 8)
[]
>>> two_sum([3 * i for i in range(10)], 19)
[]
"""
chk_map: dict[int, int] = {}
for index, val in enumerate(nums):
compl = target - val
if compl in chk_map:
return [chk_map[compl], index]
chk_map[val] = index
return []
if __name__ == "__main__":
import doctest
doctest.testmod()
print(f"{two_sum([2, 7, 11, 15], 9) = }")
================================================
FILE: maths/volume.py
================================================
"""
Find the volume of various shapes.
* https://en.wikipedia.org/wiki/Volume
* https://en.wikipedia.org/wiki/Spherical_cap
"""
from __future__ import annotations
from math import pi, pow # noqa: A004
def vol_cube(side_length: float) -> float:
"""
Calculate the Volume of a Cube.
>>> vol_cube(1)
1.0
>>> vol_cube(3)
27.0
>>> vol_cube(0)
0.0
>>> vol_cube(1.6)
4.096000000000001
>>> vol_cube(-1)
Traceback (most recent call last):
...
ValueError: vol_cube() only accepts non-negative values
"""
if side_length < 0:
raise ValueError("vol_cube() only accepts non-negative values")
return pow(side_length, 3)
def vol_spherical_cap(height: float, radius: float) -> float:
"""
Calculate the volume of the spherical cap.
>>> vol_spherical_cap(1, 2)
5.235987755982988
>>> vol_spherical_cap(1.6, 2.6)
16.621119532592402
>>> vol_spherical_cap(0, 0)
0.0
>>> vol_spherical_cap(-1, 2)
Traceback (most recent call last):
...
ValueError: vol_spherical_cap() only accepts non-negative values
>>> vol_spherical_cap(1, -2)
Traceback (most recent call last):
...
ValueError: vol_spherical_cap() only accepts non-negative values
"""
if height < 0 or radius < 0:
raise ValueError("vol_spherical_cap() only accepts non-negative values")
# Volume is 1/3 pi * height squared * (3 * radius - height)
return 1 / 3 * pi * pow(height, 2) * (3 * radius - height)
def vol_spheres_intersect(
radius_1: float, radius_2: float, centers_distance: float
) -> float:
r"""
Calculate the volume of the intersection of two spheres.
The intersection is composed by two spherical caps and therefore its volume is the
sum of the volumes of the spherical caps.
First, it calculates the heights :math:`(h_1, h_2)` of the spherical caps,
then the two volumes and it returns the sum.
The height formulas are
.. math::
h_1 = \frac{(radius_1 - radius_2 + centers\_distance)
\cdot (radius_1 + radius_2 - centers\_distance)}
{2 \cdot centers\_distance}
h_2 = \frac{(radius_2 - radius_1 + centers\_distance)
\cdot (radius_2 + radius_1 - centers\_distance)}
{2 \cdot centers\_distance}
if `centers_distance` is 0 then it returns the volume of the smallers sphere
:return: ``vol_spherical_cap`` (:math:`h_1`, :math:`radius_2`)
+ ``vol_spherical_cap`` (:math:`h_2`, :math:`radius_1`)
>>> vol_spheres_intersect(2, 2, 1)
21.205750411731103
>>> vol_spheres_intersect(2.6, 2.6, 1.6)
40.71504079052372
>>> vol_spheres_intersect(0, 0, 0)
0.0
>>> vol_spheres_intersect(-2, 2, 1)
Traceback (most recent call last):
...
ValueError: vol_spheres_intersect() only accepts non-negative values
>>> vol_spheres_intersect(2, -2, 1)
Traceback (most recent call last):
...
ValueError: vol_spheres_intersect() only accepts non-negative values
>>> vol_spheres_intersect(2, 2, -1)
Traceback (most recent call last):
...
ValueError: vol_spheres_intersect() only accepts non-negative values
"""
if radius_1 < 0 or radius_2 < 0 or centers_distance < 0:
raise ValueError("vol_spheres_intersect() only accepts non-negative values")
if centers_distance == 0:
return vol_sphere(min(radius_1, radius_2))
h1 = (
(radius_1 - radius_2 + centers_distance)
* (radius_1 + radius_2 - centers_distance)
/ (2 * centers_distance)
)
h2 = (
(radius_2 - radius_1 + centers_distance)
* (radius_2 + radius_1 - centers_distance)
/ (2 * centers_distance)
)
return vol_spherical_cap(h1, radius_2) + vol_spherical_cap(h2, radius_1)
def vol_spheres_union(
radius_1: float, radius_2: float, centers_distance: float
) -> float:
r"""
Calculate the volume of the union of two spheres that possibly intersect.
It is the sum of sphere :math:`A` and sphere :math:`B` minus their intersection.
First, it calculates the volumes :math:`(v_1, v_2)` of the spheres,
then the volume of the intersection :math:`i` and
it returns the sum :math:`v_1 + v_2 - i`.
If `centers_distance` is 0 then it returns the volume of the larger sphere
:return: ``vol_sphere`` (:math:`radius_1`) + ``vol_sphere`` (:math:`radius_2`)
- ``vol_spheres_intersect``
(:math:`radius_1`, :math:`radius_2`, :math:`centers\_distance`)
>>> vol_spheres_union(2, 2, 1)
45.814892864851146
>>> vol_spheres_union(1.56, 2.2, 1.4)
48.77802773671288
>>> vol_spheres_union(0, 2, 1)
Traceback (most recent call last):
...
ValueError: vol_spheres_union() only accepts non-negative values, non-zero radius
>>> vol_spheres_union('1.56', '2.2', '1.4')
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'str' and 'int'
>>> vol_spheres_union(1, None, 1)
Traceback (most recent call last):
...
TypeError: '<=' not supported between instances of 'NoneType' and 'int'
"""
if radius_1 <= 0 or radius_2 <= 0 or centers_distance < 0:
raise ValueError(
"vol_spheres_union() only accepts non-negative values, non-zero radius"
)
if centers_distance == 0:
return vol_sphere(max(radius_1, radius_2))
return (
vol_sphere(radius_1)
+ vol_sphere(radius_2)
- vol_spheres_intersect(radius_1, radius_2, centers_distance)
)
def vol_cuboid(width: float, height: float, length: float) -> float:
"""
Calculate the Volume of a Cuboid.
:return: multiple of `width`, `length` and `height`
>>> vol_cuboid(1, 1, 1)
1.0
>>> vol_cuboid(1, 2, 3)
6.0
>>> vol_cuboid(1.6, 2.6, 3.6)
14.976
>>> vol_cuboid(0, 0, 0)
0.0
>>> vol_cuboid(-1, 2, 3)
Traceback (most recent call last):
...
ValueError: vol_cuboid() only accepts non-negative values
>>> vol_cuboid(1, -2, 3)
Traceback (most recent call last):
...
ValueError: vol_cuboid() only accepts non-negative values
>>> vol_cuboid(1, 2, -3)
Traceback (most recent call last):
...
ValueError: vol_cuboid() only accepts non-negative values
"""
if width < 0 or height < 0 or length < 0:
raise ValueError("vol_cuboid() only accepts non-negative values")
return float(width * height * length)
def vol_cone(area_of_base: float, height: float) -> float:
r"""
| Calculate the Volume of a Cone.
| Wikipedia reference: https://en.wikipedia.org/wiki/Cone
:return: :math:`\frac{1}{3} \cdot area\_of\_base \cdot height`
>>> vol_cone(10, 3)
10.0
>>> vol_cone(1, 1)
0.3333333333333333
>>> vol_cone(1.6, 1.6)
0.8533333333333335
>>> vol_cone(0, 0)
0.0
>>> vol_cone(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_cone() only accepts non-negative values
>>> vol_cone(1, -1)
Traceback (most recent call last):
...
ValueError: vol_cone() only accepts non-negative values
"""
if height < 0 or area_of_base < 0:
raise ValueError("vol_cone() only accepts non-negative values")
return area_of_base * height / 3.0
def vol_right_circ_cone(radius: float, height: float) -> float:
r"""
| Calculate the Volume of a Right Circular Cone.
| Wikipedia reference: https://en.wikipedia.org/wiki/Cone
:return: :math:`\frac{1}{3} \cdot \pi \cdot radius^2 \cdot height`
>>> vol_right_circ_cone(2, 3)
12.566370614359172
>>> vol_right_circ_cone(0, 0)
0.0
>>> vol_right_circ_cone(1.6, 1.6)
4.289321169701265
>>> vol_right_circ_cone(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_right_circ_cone() only accepts non-negative values
>>> vol_right_circ_cone(1, -1)
Traceback (most recent call last):
...
ValueError: vol_right_circ_cone() only accepts non-negative values
"""
if height < 0 or radius < 0:
raise ValueError("vol_right_circ_cone() only accepts non-negative values")
return pi * pow(radius, 2) * height / 3.0
def vol_prism(area_of_base: float, height: float) -> float:
r"""
| Calculate the Volume of a Prism.
| Wikipedia reference: https://en.wikipedia.org/wiki/Prism_(geometry)
:return: :math:`V = B \cdot h`
>>> vol_prism(10, 2)
20.0
>>> vol_prism(11, 1)
11.0
>>> vol_prism(1.6, 1.6)
2.5600000000000005
>>> vol_prism(0, 0)
0.0
>>> vol_prism(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_prism() only accepts non-negative values
>>> vol_prism(1, -1)
Traceback (most recent call last):
...
ValueError: vol_prism() only accepts non-negative values
"""
if height < 0 or area_of_base < 0:
raise ValueError("vol_prism() only accepts non-negative values")
return float(area_of_base * height)
def vol_pyramid(area_of_base: float, height: float) -> float:
r"""
| Calculate the Volume of a Pyramid.
| Wikipedia reference: https://en.wikipedia.org/wiki/Pyramid_(geometry)
:return: :math:`\frac{1}{3} \cdot B \cdot h`
>>> vol_pyramid(10, 3)
10.0
>>> vol_pyramid(1.5, 3)
1.5
>>> vol_pyramid(1.6, 1.6)
0.8533333333333335
>>> vol_pyramid(0, 0)
0.0
>>> vol_pyramid(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_pyramid() only accepts non-negative values
>>> vol_pyramid(1, -1)
Traceback (most recent call last):
...
ValueError: vol_pyramid() only accepts non-negative values
"""
if height < 0 or area_of_base < 0:
raise ValueError("vol_pyramid() only accepts non-negative values")
return area_of_base * height / 3.0
def vol_sphere(radius: float) -> float:
r"""
| Calculate the Volume of a Sphere.
| Wikipedia reference: https://en.wikipedia.org/wiki/Sphere
:return: :math:`\frac{4}{3} \cdot \pi \cdot r^3`
>>> vol_sphere(5)
523.5987755982989
>>> vol_sphere(1)
4.1887902047863905
>>> vol_sphere(1.6)
17.15728467880506
>>> vol_sphere(0)
0.0
>>> vol_sphere(-1)
Traceback (most recent call last):
...
ValueError: vol_sphere() only accepts non-negative values
"""
if radius < 0:
raise ValueError("vol_sphere() only accepts non-negative values")
# Volume is 4/3 * pi * radius cubed
return 4 / 3 * pi * pow(radius, 3)
def vol_hemisphere(radius: float) -> float:
r"""
| Calculate the volume of a hemisphere
| Wikipedia reference: https://en.wikipedia.org/wiki/Hemisphere
| Other references: https://www.cuemath.com/geometry/hemisphere
:return: :math:`\frac{2}{3} \cdot \pi \cdot radius^3`
>>> vol_hemisphere(1)
2.0943951023931953
>>> vol_hemisphere(7)
718.377520120866
>>> vol_hemisphere(1.6)
8.57864233940253
>>> vol_hemisphere(0)
0.0
>>> vol_hemisphere(-1)
Traceback (most recent call last):
...
ValueError: vol_hemisphere() only accepts non-negative values
"""
if radius < 0:
raise ValueError("vol_hemisphere() only accepts non-negative values")
# Volume is radius cubed * pi * 2/3
return pow(radius, 3) * pi * 2 / 3
def vol_circular_cylinder(radius: float, height: float) -> float:
r"""
| Calculate the Volume of a Circular Cylinder.
| Wikipedia reference: https://en.wikipedia.org/wiki/Cylinder
:return: :math:`\pi \cdot radius^2 \cdot height`
>>> vol_circular_cylinder(1, 1)
3.141592653589793
>>> vol_circular_cylinder(4, 3)
150.79644737231007
>>> vol_circular_cylinder(1.6, 1.6)
12.867963509103795
>>> vol_circular_cylinder(0, 0)
0.0
>>> vol_circular_cylinder(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_circular_cylinder() only accepts non-negative values
>>> vol_circular_cylinder(1, -1)
Traceback (most recent call last):
...
ValueError: vol_circular_cylinder() only accepts non-negative values
"""
if height < 0 or radius < 0:
raise ValueError("vol_circular_cylinder() only accepts non-negative values")
# Volume is radius squared * height * pi
return pow(radius, 2) * height * pi
def vol_hollow_circular_cylinder(
inner_radius: float, outer_radius: float, height: float
) -> float:
"""
Calculate the Volume of a Hollow Circular Cylinder.
>>> vol_hollow_circular_cylinder(1, 2, 3)
28.274333882308138
>>> vol_hollow_circular_cylinder(1.6, 2.6, 3.6)
47.50088092227767
>>> vol_hollow_circular_cylinder(-1, 2, 3)
Traceback (most recent call last):
...
ValueError: vol_hollow_circular_cylinder() only accepts non-negative values
>>> vol_hollow_circular_cylinder(1, -2, 3)
Traceback (most recent call last):
...
ValueError: vol_hollow_circular_cylinder() only accepts non-negative values
>>> vol_hollow_circular_cylinder(1, 2, -3)
Traceback (most recent call last):
...
ValueError: vol_hollow_circular_cylinder() only accepts non-negative values
>>> vol_hollow_circular_cylinder(2, 1, 3)
Traceback (most recent call last):
...
ValueError: outer_radius must be greater than inner_radius
>>> vol_hollow_circular_cylinder(0, 0, 0)
Traceback (most recent call last):
...
ValueError: outer_radius must be greater than inner_radius
"""
# Volume - (outer_radius squared - inner_radius squared) * pi * height
if inner_radius < 0 or outer_radius < 0 or height < 0:
raise ValueError(
"vol_hollow_circular_cylinder() only accepts non-negative values"
)
if outer_radius <= inner_radius:
raise ValueError("outer_radius must be greater than inner_radius")
return pi * (pow(outer_radius, 2) - pow(inner_radius, 2)) * height
def vol_conical_frustum(height: float, radius_1: float, radius_2: float) -> float:
"""
| Calculate the Volume of a Conical Frustum.
| Wikipedia reference: https://en.wikipedia.org/wiki/Frustum
>>> vol_conical_frustum(45, 7, 28)
48490.482608158454
>>> vol_conical_frustum(1, 1, 2)
7.330382858376184
>>> vol_conical_frustum(1.6, 2.6, 3.6)
48.7240076620753
>>> vol_conical_frustum(0, 0, 0)
0.0
>>> vol_conical_frustum(-2, 2, 1)
Traceback (most recent call last):
...
ValueError: vol_conical_frustum() only accepts non-negative values
>>> vol_conical_frustum(2, -2, 1)
Traceback (most recent call last):
...
ValueError: vol_conical_frustum() only accepts non-negative values
>>> vol_conical_frustum(2, 2, -1)
Traceback (most recent call last):
...
ValueError: vol_conical_frustum() only accepts non-negative values
"""
# Volume is 1/3 * pi * height *
# (radius_1 squared + radius_2 squared + radius_1 * radius_2)
if radius_1 < 0 or radius_2 < 0 or height < 0:
raise ValueError("vol_conical_frustum() only accepts non-negative values")
return (
1
/ 3
* pi
* height
* (pow(radius_1, 2) + pow(radius_2, 2) + radius_1 * radius_2)
)
def vol_torus(torus_radius: float, tube_radius: float) -> float:
r"""
| Calculate the Volume of a Torus.
| Wikipedia reference: https://en.wikipedia.org/wiki/Torus
:return: :math:`2 \pi^2 \cdot torus\_radius \cdot tube\_radius^2`
>>> vol_torus(1, 1)
19.739208802178716
>>> vol_torus(4, 3)
710.6115168784338
>>> vol_torus(3, 4)
947.4820225045784
>>> vol_torus(1.6, 1.6)
80.85179925372404
>>> vol_torus(0, 0)
0.0
>>> vol_torus(-1, 1)
Traceback (most recent call last):
...
ValueError: vol_torus() only accepts non-negative values
>>> vol_torus(1, -1)
Traceback (most recent call last):
...
ValueError: vol_torus() only accepts non-negative values
"""
if torus_radius < 0 or tube_radius < 0:
raise ValueError("vol_torus() only accepts non-negative values")
return 2 * pow(pi, 2) * torus_radius * pow(tube_radius, 2)
def vol_icosahedron(tri_side: float) -> float:
"""
| Calculate the Volume of an Icosahedron.
| Wikipedia reference: https://en.wikipedia.org/wiki/Regular_icosahedron
>>> from math import isclose
>>> isclose(vol_icosahedron(2.5), 34.088984228514256)
True
>>> isclose(vol_icosahedron(10), 2181.694990624912374)
True
>>> isclose(vol_icosahedron(5), 272.711873828114047)
True
>>> isclose(vol_icosahedron(3.49), 92.740688412033628)
True
>>> vol_icosahedron(0)
0.0
>>> vol_icosahedron(-1)
Traceback (most recent call last):
...
ValueError: vol_icosahedron() only accepts non-negative values
>>> vol_icosahedron(-0.2)
Traceback (most recent call last):
...
ValueError: vol_icosahedron() only accepts non-negative values
"""
if tri_side < 0:
raise ValueError("vol_icosahedron() only accepts non-negative values")
return tri_side**3 * (3 + 5**0.5) * 5 / 12
def main():
"""Print the Results of Various Volume Calculations."""
print("Volumes:")
print(f"Cube: {vol_cube(2) = }") # = 8
print(f"Cuboid: {vol_cuboid(2, 2, 2) = }") # = 8
print(f"Cone: {vol_cone(2, 2) = }") # ~= 1.33
print(f"Right Circular Cone: {vol_right_circ_cone(2, 2) = }") # ~= 8.38
print(f"Prism: {vol_prism(2, 2) = }") # = 4
print(f"Pyramid: {vol_pyramid(2, 2) = }") # ~= 1.33
print(f"Sphere: {vol_sphere(2) = }") # ~= 33.5
print(f"Hemisphere: {vol_hemisphere(2) = }") # ~= 16.75
print(f"Circular Cylinder: {vol_circular_cylinder(2, 2) = }") # ~= 25.1
print(f"Torus: {vol_torus(2, 2) = }") # ~= 157.9
print(f"Conical Frustum: {vol_conical_frustum(2, 2, 4) = }") # ~= 58.6
print(f"Spherical cap: {vol_spherical_cap(1, 2) = }") # ~= 5.24
print(f"Spheres intersection: {vol_spheres_intersect(2, 2, 1) = }") # ~= 21.21
print(f"Spheres union: {vol_spheres_union(2, 2, 1) = }") # ~= 45.81
print(
f"Hollow Circular Cylinder: {vol_hollow_circular_cylinder(1, 2, 3) = }"
) # ~= 28.3
print(f"Icosahedron: {vol_icosahedron(2.5) = }") # ~=34.09
if __name__ == "__main__":
main()
================================================
FILE: maths/zellers_congruence.py
================================================
import argparse
import datetime
def zeller(date_input: str) -> str:
"""
| Zellers Congruence Algorithm
| Find the day of the week for nearly any Gregorian or Julian calendar date
>>> zeller('01-31-2010')
'Your date 01-31-2010, is a Sunday!'
Validate out of range month:
>>> zeller('13-31-2010')
Traceback (most recent call last):
...
ValueError: Month must be between 1 - 12
>>> zeller('.2-31-2010')
Traceback (most recent call last):
...
ValueError: invalid literal for int() with base 10: '.2'
Validate out of range date:
>>> zeller('01-33-2010')
Traceback (most recent call last):
...
ValueError: Date must be between 1 - 31
>>> zeller('01-.4-2010')
Traceback (most recent call last):
...
ValueError: invalid literal for int() with base 10: '.4'
Validate second separator:
>>> zeller('01-31*2010')
Traceback (most recent call last):
...
ValueError: Date separator must be '-' or '/'
Validate first separator:
>>> zeller('01^31-2010')
Traceback (most recent call last):
...
ValueError: Date separator must be '-' or '/'
Validate out of range year:
>>> zeller('01-31-8999')
Traceback (most recent call last):
...
ValueError: Year out of range. There has to be some sort of limit...right?
Test null input:
>>> zeller()
Traceback (most recent call last):
...
TypeError: zeller() missing 1 required positional argument: 'date_input'
Test length of `date_input`:
>>> zeller('')
Traceback (most recent call last):
...
ValueError: Must be 10 characters long
>>> zeller('01-31-19082939')
Traceback (most recent call last):
...
ValueError: Must be 10 characters long"""
# Days of the week for response
days = {
"0": "Sunday",
"1": "Monday",
"2": "Tuesday",
"3": "Wednesday",
"4": "Thursday",
"5": "Friday",
"6": "Saturday",
}
convert_datetime_days = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 0}
# Validate
if not 0 < len(date_input) < 11:
raise ValueError("Must be 10 characters long")
# Get month
m: int = int(date_input[0] + date_input[1])
# Validate
if not 0 < m < 13:
raise ValueError("Month must be between 1 - 12")
sep_1: str = date_input[2]
# Validate
if sep_1 not in ["-", "/"]:
raise ValueError("Date separator must be '-' or '/'")
# Get day
d: int = int(date_input[3] + date_input[4])
# Validate
if not 0 < d < 32:
raise ValueError("Date must be between 1 - 31")
# Get second separator
sep_2: str = date_input[5]
# Validate
if sep_2 not in ["-", "/"]:
raise ValueError("Date separator must be '-' or '/'")
# Get year
y: int = int(date_input[6] + date_input[7] + date_input[8] + date_input[9])
# Arbitrary year range
if not 45 < y < 8500:
raise ValueError(
"Year out of range. There has to be some sort of limit...right?"
)
# Get datetime obj for validation
dt_ck = datetime.date(int(y), int(m), int(d))
# Start math
if m <= 2:
y = y - 1
m = m + 12
# maths var
c: int = int(str(y)[:2])
k: int = int(str(y)[2:])
t: int = int(2.6 * m - 5.39)
u: int = int(c / 4)
v: int = int(k / 4)
x: int = int(d + k)
z: int = int(t + u + v + x)
w: int = int(z - (2 * c))
f: int = round(w % 7)
# End math
# Validate math
if f != convert_datetime_days[dt_ck.weekday()]:
raise AssertionError("The date was evaluated incorrectly. Contact developer.")
# Response
response: str = f"Your date {date_input}, is a {days[str(f)]}!"
return response
if __name__ == "__main__":
import doctest
doctest.testmod()
parser = argparse.ArgumentParser(
description=(
"Find out what day of the week nearly any date is or was. Enter "
"date as a string in the mm-dd-yyyy or mm/dd/yyyy format"
)
)
parser.add_argument(
"date_input", type=str, help="Date as a string (mm-dd-yyyy or mm/dd/yyyy)"
)
args = parser.parse_args()
zeller(args.date_input)
================================================
FILE: matrix/__init__.py
================================================
================================================
FILE: matrix/binary_search_matrix.py
================================================
def binary_search(array: list, lower_bound: int, upper_bound: int, value: int) -> int:
"""
This function carries out Binary search on a 1d array and
return -1 if it do not exist
array: A 1d sorted array
value : the value meant to be searched
>>> matrix = [1, 4, 7, 11, 15]
>>> binary_search(matrix, 0, len(matrix) - 1, 1)
0
>>> binary_search(matrix, 0, len(matrix) - 1, 23)
-1
"""
r = int((lower_bound + upper_bound) // 2)
if array[r] == value:
return r
if lower_bound >= upper_bound:
return -1
if array[r] < value:
return binary_search(array, r + 1, upper_bound, value)
else:
return binary_search(array, lower_bound, r - 1, value)
def mat_bin_search(value: int, matrix: list) -> list:
"""
This function loops over a 2d matrix and calls binarySearch on
the selected 1d array and returns [-1, -1] is it do not exist
value : value meant to be searched
matrix = a sorted 2d matrix
>>> matrix = [[1, 4, 7, 11, 15],
... [2, 5, 8, 12, 19],
... [3, 6, 9, 16, 22],
... [10, 13, 14, 17, 24],
... [18, 21, 23, 26, 30]]
>>> target = 1
>>> mat_bin_search(target, matrix)
[0, 0]
>>> target = 34
>>> mat_bin_search(target, matrix)
[-1, -1]
"""
index = 0
if matrix[index][0] == value:
return [index, 0]
while index < len(matrix) and matrix[index][0] < value:
r = binary_search(matrix[index], 0, len(matrix[index]) - 1, value)
if r != -1:
return [index, r]
index += 1
return [-1, -1]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: matrix/count_islands_in_matrix.py
================================================
# An island in matrix is a group of linked areas, all having the same value.
# This code counts number of islands in a given matrix, with including diagonal
# connections.
class Matrix: # Public class to implement a graph
def __init__(self, row: int, col: int, graph: list[list[bool]]) -> None:
self.ROW = row
self.COL = col
self.graph = graph
def is_safe(self, i: int, j: int, visited: list[list[bool]]) -> bool:
return (
0 <= i < self.ROW
and 0 <= j < self.COL
and not visited[i][j]
and self.graph[i][j]
)
def diffs(self, i: int, j: int, visited: list[list[bool]]) -> None:
# Checking all 8 elements surrounding nth element
row_nbr = [-1, -1, -1, 0, 0, 1, 1, 1] # Coordinate order
col_nbr = [-1, 0, 1, -1, 1, -1, 0, 1]
visited[i][j] = True # Make those cells visited
for k in range(8):
if self.is_safe(i + row_nbr[k], j + col_nbr[k], visited):
self.diffs(i + row_nbr[k], j + col_nbr[k], visited)
def count_islands(self) -> int: # And finally, count all islands.
visited = [[False for j in range(self.COL)] for i in range(self.ROW)]
count = 0
for i in range(self.ROW):
for j in range(self.COL):
if visited[i][j] is False and self.graph[i][j] == 1:
self.diffs(i, j, visited)
count += 1
return count
================================================
FILE: matrix/count_negative_numbers_in_sorted_matrix.py
================================================
"""
Given an matrix of numbers in which all rows and all columns are sorted in decreasing
order, return the number of negative numbers in grid.
Reference: https://leetcode.com/problems/count-negative-numbers-in-a-sorted-matrix
"""
def generate_large_matrix() -> list[list[int]]:
"""
>>> generate_large_matrix() # doctest: +ELLIPSIS
[[1000, ..., -999], [999, ..., -1001], ..., [2, ..., -1998]]
"""
return [list(range(1000 - i, -1000 - i, -1)) for i in range(1000)]
grid = generate_large_matrix()
test_grids = (
[[4, 3, 2, -1], [3, 2, 1, -1], [1, 1, -1, -2], [-1, -1, -2, -3]],
[[3, 2], [1, 0]],
[[7, 7, 6]],
[[7, 7, 6], [-1, -2, -3]],
grid,
)
def validate_grid(grid: list[list[int]]) -> None:
"""
Validate that the rows and columns of the grid is sorted in decreasing order.
>>> for grid in test_grids:
... validate_grid(grid)
"""
assert all(row == sorted(row, reverse=True) for row in grid)
assert all(list(col) == sorted(col, reverse=True) for col in zip(*grid))
def find_negative_index(array: list[int]) -> int:
"""
Find the smallest negative index
>>> find_negative_index([0,0,0,0])
4
>>> find_negative_index([4,3,2,-1])
3
>>> find_negative_index([1,0,-1,-10])
2
>>> find_negative_index([0,0,0,-1])
3
>>> find_negative_index([11,8,7,-3,-5,-9])
3
>>> find_negative_index([-1,-1,-2,-3])
0
>>> find_negative_index([5,1,0])
3
>>> find_negative_index([-5,-5,-5])
0
>>> find_negative_index([0])
1
>>> find_negative_index([])
0
"""
left = 0
right = len(array) - 1
# Edge cases such as no values or all numbers are negative.
if not array or array[0] < 0:
return 0
while right + 1 > left:
mid = (left + right) // 2
num = array[mid]
# Num must be negative and the index must be greater than or equal to 0.
if num < 0 and array[mid - 1] >= 0:
return mid
if num >= 0:
left = mid + 1
else:
right = mid - 1
# No negative numbers so return the last index of the array + 1 which is the length.
return len(array)
def count_negatives_binary_search(grid: list[list[int]]) -> int:
"""
An O(m logn) solution that uses binary search in order to find the boundary between
positive and negative numbers
>>> [count_negatives_binary_search(grid) for grid in test_grids]
[8, 0, 0, 3, 1498500]
"""
total = 0
bound = len(grid[0])
for i in range(len(grid)):
bound = find_negative_index(grid[i][:bound])
total += bound
return (len(grid) * len(grid[0])) - total
def count_negatives_brute_force(grid: list[list[int]]) -> int:
"""
This solution is O(n^2) because it iterates through every column and row.
>>> [count_negatives_brute_force(grid) for grid in test_grids]
[8, 0, 0, 3, 1498500]
"""
return len([number for row in grid for number in row if number < 0])
def count_negatives_brute_force_with_break(grid: list[list[int]]) -> int:
"""
Similar to the brute force solution above but uses break in order to reduce the
number of iterations.
>>> [count_negatives_brute_force_with_break(grid) for grid in test_grids]
[8, 0, 0, 3, 1498500]
"""
total = 0
for row in grid:
for i, number in enumerate(row):
if number < 0:
total += len(row) - i
break
return total
def benchmark() -> None:
"""Benchmark our functions next to each other"""
from timeit import timeit
print("Running benchmarks")
setup = (
"from __main__ import count_negatives_binary_search, "
"count_negatives_brute_force, count_negatives_brute_force_with_break, grid"
)
for func in (
"count_negatives_binary_search", # took 0.7727 seconds
"count_negatives_brute_force_with_break", # took 4.6505 seconds
"count_negatives_brute_force", # took 12.8160 seconds
):
time = timeit(f"{func}(grid=grid)", setup=setup, number=500)
print(f"{func}() took {time:0.4f} seconds")
if __name__ == "__main__":
import doctest
doctest.testmod()
benchmark()
================================================
FILE: matrix/count_paths.py
================================================
"""
Given a grid, where you start from the top left position [0, 0],
you want to find how many paths you can take to get to the bottom right position.
start here -> 0 0 0 0
1 1 0 0
0 0 0 1
0 1 0 0 <- finish here
how many 'distinct' paths can you take to get to the finish?
Using a recursive depth-first search algorithm below, you are able to
find the number of distinct unique paths (count).
'*' will demonstrate a path
In the example above, there are two distinct paths:
1. 2.
* * * 0 * * * *
1 1 * 0 1 1 * *
0 0 * 1 0 0 * 1
0 1 * * 0 1 * *
"""
def depth_first_search(grid: list[list[int]], row: int, col: int, visit: set) -> int:
"""
Recursive Backtracking Depth First Search Algorithm
Starting from top left of a matrix, count the number of
paths that can reach the bottom right of a matrix.
1 represents a block (inaccessible)
0 represents a valid space (accessible)
0 0 0 0
1 1 0 0
0 0 0 1
0 1 0 0
>>> grid = [[0, 0, 0, 0], [1, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0]]
>>> depth_first_search(grid, 0, 0, set())
2
0 0 0 0 0
0 1 1 1 0
0 1 1 1 0
0 0 0 0 0
>>> grid = [[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]
>>> depth_first_search(grid, 0, 0, set())
2
"""
row_length, col_length = len(grid), len(grid[0])
if (
min(row, col) < 0
or row == row_length
or col == col_length
or (row, col) in visit
or grid[row][col] == 1
):
return 0
if row == row_length - 1 and col == col_length - 1:
return 1
visit.add((row, col))
count = 0
count += depth_first_search(grid, row + 1, col, visit)
count += depth_first_search(grid, row - 1, col, visit)
count += depth_first_search(grid, row, col + 1, visit)
count += depth_first_search(grid, row, col - 1, visit)
visit.remove((row, col))
return count
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: matrix/cramers_rule_2x2.py
================================================
# https://www.chilimath.com/lessons/advanced-algebra/cramers-rule-with-two-variables
# https://en.wikipedia.org/wiki/Cramer%27s_rule
def cramers_rule_2x2(equation1: list[int], equation2: list[int]) -> tuple[float, float]:
"""
Solves the system of linear equation in 2 variables.
:param: equation1: list of 3 numbers
:param: equation2: list of 3 numbers
:return: String of result
input format : [a1, b1, d1], [a2, b2, d2]
determinant = [[a1, b1], [a2, b2]]
determinant_x = [[d1, b1], [d2, b2]]
determinant_y = [[a1, d1], [a2, d2]]
>>> cramers_rule_2x2([2, 3, 0], [5, 1, 0])
(0.0, 0.0)
>>> cramers_rule_2x2([0, 4, 50], [2, 0, 26])
(13.0, 12.5)
>>> cramers_rule_2x2([11, 2, 30], [1, 0, 4])
(4.0, -7.0)
>>> cramers_rule_2x2([4, 7, 1], [1, 2, 0])
(2.0, -1.0)
>>> cramers_rule_2x2([1, 2, 3], [2, 4, 6])
Traceback (most recent call last):
...
ValueError: Infinite solutions. (Consistent system)
>>> cramers_rule_2x2([1, 2, 3], [2, 4, 7])
Traceback (most recent call last):
...
ValueError: No solution. (Inconsistent system)
>>> cramers_rule_2x2([1, 2, 3], [11, 22])
Traceback (most recent call last):
...
ValueError: Please enter a valid equation.
>>> cramers_rule_2x2([0, 1, 6], [0, 0, 3])
Traceback (most recent call last):
...
ValueError: No solution. (Inconsistent system)
>>> cramers_rule_2x2([0, 0, 6], [0, 0, 3])
Traceback (most recent call last):
...
ValueError: Both a & b of two equations can't be zero.
>>> cramers_rule_2x2([1, 2, 3], [1, 2, 3])
Traceback (most recent call last):
...
ValueError: Infinite solutions. (Consistent system)
>>> cramers_rule_2x2([0, 4, 50], [0, 3, 99])
Traceback (most recent call last):
...
ValueError: No solution. (Inconsistent system)
"""
# Check if the input is valid
if not len(equation1) == len(equation2) == 3:
raise ValueError("Please enter a valid equation.")
if equation1[0] == equation1[1] == equation2[0] == equation2[1] == 0:
raise ValueError("Both a & b of two equations can't be zero.")
# Extract the coefficients
a1, b1, c1 = equation1
a2, b2, c2 = equation2
# Calculate the determinants of the matrices
determinant = a1 * b2 - a2 * b1
determinant_x = c1 * b2 - c2 * b1
determinant_y = a1 * c2 - a2 * c1
# Check if the system of linear equations has a solution (using Cramer's rule)
if determinant == 0:
if determinant_x == determinant_y == 0:
raise ValueError("Infinite solutions. (Consistent system)")
else:
raise ValueError("No solution. (Inconsistent system)")
elif determinant_x == determinant_y == 0:
# Trivial solution (Inconsistent system)
return (0.0, 0.0)
else:
x = determinant_x / determinant
y = determinant_y / determinant
# Non-Trivial Solution (Consistent system)
return (x, y)
================================================
FILE: matrix/inverse_of_matrix.py
================================================
from __future__ import annotations
from decimal import Decimal
from numpy import array
def inverse_of_matrix(matrix: list[list[float]]) -> list[list[float]]:
"""
A matrix multiplied with its inverse gives the identity matrix.
This function finds the inverse of a 2x2 and 3x3 matrix.
If the determinant of a matrix is 0, its inverse does not exist.
Sources for fixing inaccurate float arithmetic:
https://stackoverflow.com/questions/6563058/how-do-i-use-accurate-float-arithmetic-in-python
https://docs.python.org/3/library/decimal.html
Doctests for 2x2
>>> inverse_of_matrix([[2, 5], [2, 0]])
[[0.0, 0.5], [0.2, -0.2]]
>>> inverse_of_matrix([[2.5, 5], [1, 2]])
Traceback (most recent call last):
...
ValueError: This matrix has no inverse.
>>> inverse_of_matrix([[12, -16], [-9, 0]])
[[0.0, -0.1111111111111111], [-0.0625, -0.08333333333333333]]
>>> inverse_of_matrix([[12, 3], [16, 8]])
[[0.16666666666666666, -0.0625], [-0.3333333333333333, 0.25]]
>>> inverse_of_matrix([[10, 5], [3, 2.5]])
[[0.25, -0.5], [-0.3, 1.0]]
Doctests for 3x3
>>> inverse_of_matrix([[2, 5, 7], [2, 0, 1], [1, 2, 3]])
[[2.0, 5.0, -4.0], [1.0, 1.0, -1.0], [-5.0, -12.0, 10.0]]
>>> inverse_of_matrix([[1, 2, 2], [1, 2, 2], [3, 2, -1]])
Traceback (most recent call last):
...
ValueError: This matrix has no inverse.
>>> inverse_of_matrix([[],[]])
Traceback (most recent call last):
...
ValueError: Please provide a matrix of size 2x2 or 3x3.
>>> inverse_of_matrix([[1, 2], [3, 4], [5, 6]])
Traceback (most recent call last):
...
ValueError: Please provide a matrix of size 2x2 or 3x3.
>>> inverse_of_matrix([[1, 2, 1], [0,3, 4]])
Traceback (most recent call last):
...
ValueError: Please provide a matrix of size 2x2 or 3x3.
>>> inverse_of_matrix([[1, 2, 3], [7, 8, 9], [7, 8, 9]])
Traceback (most recent call last):
...
ValueError: This matrix has no inverse.
>>> inverse_of_matrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
"""
d = Decimal
# Check if the provided matrix has 2 rows and 2 columns
# since this implementation only works for 2x2 matrices
if len(matrix) == 2 and len(matrix[0]) == 2 and len(matrix[1]) == 2:
# Calculate the determinant of the matrix
determinant = float(
d(matrix[0][0]) * d(matrix[1][1]) - d(matrix[1][0]) * d(matrix[0][1])
)
if determinant == 0:
raise ValueError("This matrix has no inverse.")
# Creates a copy of the matrix with swapped positions of the elements
swapped_matrix = [[0.0, 0.0], [0.0, 0.0]]
swapped_matrix[0][0], swapped_matrix[1][1] = matrix[1][1], matrix[0][0]
swapped_matrix[1][0], swapped_matrix[0][1] = -matrix[1][0], -matrix[0][1]
# Calculate the inverse of the matrix
return [
[(float(d(n)) / determinant) or 0.0 for n in row] for row in swapped_matrix
]
elif (
len(matrix) == 3
and len(matrix[0]) == 3
and len(matrix[1]) == 3
and len(matrix[2]) == 3
):
# Calculate the determinant of the matrix using Sarrus rule
determinant = float(
(
(d(matrix[0][0]) * d(matrix[1][1]) * d(matrix[2][2]))
+ (d(matrix[0][1]) * d(matrix[1][2]) * d(matrix[2][0]))
+ (d(matrix[0][2]) * d(matrix[1][0]) * d(matrix[2][1]))
)
- (
(d(matrix[0][2]) * d(matrix[1][1]) * d(matrix[2][0]))
+ (d(matrix[0][1]) * d(matrix[1][0]) * d(matrix[2][2]))
+ (d(matrix[0][0]) * d(matrix[1][2]) * d(matrix[2][1]))
)
)
if determinant == 0:
raise ValueError("This matrix has no inverse.")
# Creating cofactor matrix
cofactor_matrix = [
[d(0.0), d(0.0), d(0.0)],
[d(0.0), d(0.0), d(0.0)],
[d(0.0), d(0.0), d(0.0)],
]
cofactor_matrix[0][0] = (d(matrix[1][1]) * d(matrix[2][2])) - (
d(matrix[1][2]) * d(matrix[2][1])
)
cofactor_matrix[0][1] = -(
(d(matrix[1][0]) * d(matrix[2][2])) - (d(matrix[1][2]) * d(matrix[2][0]))
)
cofactor_matrix[0][2] = (d(matrix[1][0]) * d(matrix[2][1])) - (
d(matrix[1][1]) * d(matrix[2][0])
)
cofactor_matrix[1][0] = -(
(d(matrix[0][1]) * d(matrix[2][2])) - (d(matrix[0][2]) * d(matrix[2][1]))
)
cofactor_matrix[1][1] = (d(matrix[0][0]) * d(matrix[2][2])) - (
d(matrix[0][2]) * d(matrix[2][0])
)
cofactor_matrix[1][2] = -(
(d(matrix[0][0]) * d(matrix[2][1])) - (d(matrix[0][1]) * d(matrix[2][0]))
)
cofactor_matrix[2][0] = (d(matrix[0][1]) * d(matrix[1][2])) - (
d(matrix[0][2]) * d(matrix[1][1])
)
cofactor_matrix[2][1] = -(
(d(matrix[0][0]) * d(matrix[1][2])) - (d(matrix[0][2]) * d(matrix[1][0]))
)
cofactor_matrix[2][2] = (d(matrix[0][0]) * d(matrix[1][1])) - (
d(matrix[0][1]) * d(matrix[1][0])
)
# Transpose the cofactor matrix (Adjoint matrix)
adjoint_matrix = array(cofactor_matrix)
for i in range(3):
for j in range(3):
adjoint_matrix[i][j] = cofactor_matrix[j][i]
# Inverse of the matrix using the formula (1/determinant) * adjoint matrix
inverse_matrix = array(cofactor_matrix)
for i in range(3):
for j in range(3):
inverse_matrix[i][j] /= d(determinant)
# Calculate the inverse of the matrix
return [[float(d(n)) or 0.0 for n in row] for row in inverse_matrix]
raise ValueError("Please provide a matrix of size 2x2 or 3x3.")
================================================
FILE: matrix/largest_square_area_in_matrix.py
================================================
"""
Question:
Given a binary matrix mat of size n * m, find out the maximum size square
sub-matrix with all 1s.
---
Example 1:
Input:
n = 2, m = 2
mat = [[1, 1],
[1, 1]]
Output:
2
Explanation: The maximum size of the square
sub-matrix is 2. The matrix itself is the
maximum sized sub-matrix in this case.
---
Example 2
Input:
n = 2, m = 2
mat = [[0, 0],
[0, 0]]
Output: 0
Explanation: There is no 1 in the matrix.
Approach:
We initialize another matrix (dp) with the same dimensions
as the original one initialized with all 0's.
dp_array(i,j) represents the side length of the maximum square whose
bottom right corner is the cell with index (i,j) in the original matrix.
Starting from index (0,0), for every 1 found in the original matrix,
we update the value of the current element as
dp_array(i,j)=dp_array(dp(i-1,j),dp_array(i-1,j-1),dp_array(i,j-1)) + 1.
"""
def largest_square_area_in_matrix_top_down_approch(
rows: int, cols: int, mat: list[list[int]]
) -> int:
"""
Function updates the largest_square_area[0], if recursive call found
square with maximum area.
We aren't using dp_array here, so the time complexity would be exponential.
>>> largest_square_area_in_matrix_top_down_approch(2, 2, [[1,1], [1,1]])
2
>>> largest_square_area_in_matrix_top_down_approch(2, 2, [[0,0], [0,0]])
0
"""
def update_area_of_max_square(row: int, col: int) -> int:
# BASE CASE
if row >= rows or col >= cols:
return 0
right = update_area_of_max_square(row, col + 1)
diagonal = update_area_of_max_square(row + 1, col + 1)
down = update_area_of_max_square(row + 1, col)
if mat[row][col]:
sub_problem_sol = 1 + min([right, diagonal, down])
largest_square_area[0] = max(largest_square_area[0], sub_problem_sol)
return sub_problem_sol
else:
return 0
largest_square_area = [0]
update_area_of_max_square(0, 0)
return largest_square_area[0]
def largest_square_area_in_matrix_top_down_approch_with_dp(
rows: int, cols: int, mat: list[list[int]]
) -> int:
"""
Function updates the largest_square_area[0], if recursive call found
square with maximum area.
We are using dp_array here, so the time complexity would be O(N^2).
>>> largest_square_area_in_matrix_top_down_approch_with_dp(2, 2, [[1,1], [1,1]])
2
>>> largest_square_area_in_matrix_top_down_approch_with_dp(2, 2, [[0,0], [0,0]])
0
"""
def update_area_of_max_square_using_dp_array(
row: int, col: int, dp_array: list[list[int]]
) -> int:
if row >= rows or col >= cols:
return 0
if dp_array[row][col] != -1:
return dp_array[row][col]
right = update_area_of_max_square_using_dp_array(row, col + 1, dp_array)
diagonal = update_area_of_max_square_using_dp_array(row + 1, col + 1, dp_array)
down = update_area_of_max_square_using_dp_array(row + 1, col, dp_array)
if mat[row][col]:
sub_problem_sol = 1 + min([right, diagonal, down])
largest_square_area[0] = max(largest_square_area[0], sub_problem_sol)
dp_array[row][col] = sub_problem_sol
return sub_problem_sol
else:
return 0
largest_square_area = [0]
dp_array = [[-1] * cols for _ in range(rows)]
update_area_of_max_square_using_dp_array(0, 0, dp_array)
return largest_square_area[0]
def largest_square_area_in_matrix_bottom_up(
rows: int, cols: int, mat: list[list[int]]
) -> int:
"""
Function updates the largest_square_area, using bottom up approach.
>>> largest_square_area_in_matrix_bottom_up(2, 2, [[1,1], [1,1]])
2
>>> largest_square_area_in_matrix_bottom_up(2, 2, [[0,0], [0,0]])
0
"""
dp_array = [[0] * (cols + 1) for _ in range(rows + 1)]
largest_square_area = 0
for row in range(rows - 1, -1, -1):
for col in range(cols - 1, -1, -1):
right = dp_array[row][col + 1]
diagonal = dp_array[row + 1][col + 1]
bottom = dp_array[row + 1][col]
if mat[row][col] == 1:
dp_array[row][col] = 1 + min(right, diagonal, bottom)
largest_square_area = max(dp_array[row][col], largest_square_area)
else:
dp_array[row][col] = 0
return largest_square_area
def largest_square_area_in_matrix_bottom_up_space_optimization(
rows: int, cols: int, mat: list[list[int]]
) -> int:
"""
Function updates the largest_square_area, using bottom up
approach. with space optimization.
>>> largest_square_area_in_matrix_bottom_up_space_optimization(2, 2, [[1,1], [1,1]])
2
>>> largest_square_area_in_matrix_bottom_up_space_optimization(2, 2, [[0,0], [0,0]])
0
"""
current_row = [0] * (cols + 1)
next_row = [0] * (cols + 1)
largest_square_area = 0
for row in range(rows - 1, -1, -1):
for col in range(cols - 1, -1, -1):
right = current_row[col + 1]
diagonal = next_row[col + 1]
bottom = next_row[col]
if mat[row][col] == 1:
current_row[col] = 1 + min(right, diagonal, bottom)
largest_square_area = max(current_row[col], largest_square_area)
else:
current_row[col] = 0
next_row = current_row
return largest_square_area
if __name__ == "__main__":
import doctest
doctest.testmod()
print(largest_square_area_in_matrix_bottom_up(2, 2, [[1, 1], [1, 1]]))
================================================
FILE: matrix/matrix_based_game.py
================================================
"""
Matrix-Based Game Script
=========================
This script implements a matrix-based game where players interact with a grid of
elements. The primary goals are to:
- Identify connected elements of the same type from a selected position.
- Remove those elements, adjust the matrix by simulating gravity, and reorganize empty
columns.
- Calculate and display the score based on the number of elements removed in each move.
Functions:
-----------
1. `find_repeat`: Finds all connected elements of the same type.
2. `increment_score`: Calculates the score for a given move.
3. `move_x`: Simulates gravity in a column.
4. `move_y`: Reorganizes the matrix by shifting columns leftward when a column becomes
empty.
5. `play`: Executes a single move, updating the matrix and returning the score.
Input Format:
--------------
1. Matrix size (`lines`): Integer specifying the size of the matrix (N x N).
2. Matrix content (`matrix`): Rows of the matrix, each consisting of characters.
3. Number of moves (`movs`): Integer indicating the number of moves.
4. List of moves (`movements`): A comma-separated string of coordinates for each move.
(0,0) position starts from first left column to last right, and below row to up row
Example Input:
---------------
4
RRBG
RBBG
YYGG
XYGG
2
0 1,1 1
Example (0,0) = X
Output:
--------
The script outputs the total score after processing all moves.
Usage:
-------
Run the script and provide the required inputs as prompted.
"""
def validate_matrix_size(size: int) -> None:
"""
>>> validate_matrix_size(-1)
Traceback (most recent call last):
...
ValueError: Matrix size must be a positive integer.
"""
if not isinstance(size, int) or size <= 0:
raise ValueError("Matrix size must be a positive integer.")
def validate_matrix_content(matrix: list[str], size: int) -> None:
"""
Validates that the number of elements in the matrix matches the given size.
>>> validate_matrix_content(['aaaa', 'aaaa', 'aaaa', 'aaaa'], 3)
Traceback (most recent call last):
...
ValueError: The matrix dont match with size.
>>> validate_matrix_content(['aa%', 'aaa', 'aaa'], 3)
Traceback (most recent call last):
...
ValueError: Matrix rows can only contain letters and numbers.
>>> validate_matrix_content(['aaa', 'aaa', 'aaaa'], 3)
Traceback (most recent call last):
...
ValueError: Each row in the matrix must have exactly 3 characters.
"""
print(matrix)
if len(matrix) != size:
raise ValueError("The matrix dont match with size.")
for row in matrix:
if len(row) != size:
msg = f"Each row in the matrix must have exactly {size} characters."
raise ValueError(msg)
if not all(char.isalnum() for char in row):
raise ValueError("Matrix rows can only contain letters and numbers.")
def validate_moves(moves: list[tuple[int, int]], size: int) -> None:
"""
>>> validate_moves([(1, 2), (-1, 0)], 3)
Traceback (most recent call last):
...
ValueError: Move is out of bounds for a matrix.
"""
for move in moves:
x, y = move
if not (0 <= x < size and 0 <= y < size):
raise ValueError("Move is out of bounds for a matrix.")
def parse_moves(input_str: str) -> list[tuple[int, int]]:
"""
>>> parse_moves("0 1, 1 1")
[(0, 1), (1, 1)]
>>> parse_moves("0 1, 1 1, 2")
Traceback (most recent call last):
...
ValueError: Each move must have exactly two numbers.
>>> parse_moves("0 1, 1 1, 2 4 5 6")
Traceback (most recent call last):
...
ValueError: Each move must have exactly two numbers.
"""
moves = []
for pair in input_str.split(","):
parts = pair.strip().split()
if len(parts) != 2:
raise ValueError("Each move must have exactly two numbers.")
x, y = map(int, parts)
moves.append((x, y))
return moves
def find_repeat(
matrix_g: list[list[str]], row: int, column: int, size: int
) -> set[tuple[int, int]]:
"""
Finds all connected elements of the same type from a given position.
>>> find_repeat([['A', 'B', 'A'], ['A', 'B', 'A'], ['A', 'A', 'A']], 0, 0, 3)
{(1, 2), (2, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0)}
>>> find_repeat([['-', '-', '-'], ['-', '-', '-'], ['-', '-', '-']], 1, 1, 3)
set()
"""
column = size - 1 - column
visited = set()
repeated = set()
if (color := matrix_g[column][row]) != "-":
def dfs(row_n: int, column_n: int) -> None:
if row_n < 0 or row_n >= size or column_n < 0 or column_n >= size:
return
if (row_n, column_n) in visited:
return
visited.add((row_n, column_n))
if matrix_g[row_n][column_n] == color:
repeated.add((row_n, column_n))
dfs(row_n - 1, column_n)
dfs(row_n + 1, column_n)
dfs(row_n, column_n - 1)
dfs(row_n, column_n + 1)
dfs(column, row)
return repeated
def increment_score(count: int) -> int:
"""
Calculates the score for a move based on the number of elements removed.
>>> increment_score(3)
6
>>> increment_score(0)
0
"""
return int(count * (count + 1) / 2)
def move_x(matrix_g: list[list[str]], column: int, size: int) -> list[list[str]]:
"""
Simulates gravity in a specific column.
>>> move_x([['-', 'A'], ['-', '-'], ['-', 'C']], 1, 2)
[['-', '-'], ['-', 'A'], ['-', 'C']]
"""
new_list = []
for row in range(size):
if matrix_g[row][column] != "-":
new_list.append(matrix_g[row][column])
else:
new_list.insert(0, matrix_g[row][column])
for row in range(size):
matrix_g[row][column] = new_list[row]
return matrix_g
def move_y(matrix_g: list[list[str]], size: int) -> list[list[str]]:
"""
Shifts all columns leftward when an entire column becomes empty.
>>> move_y([['-', 'A'], ['-', '-'], ['-', 'C']], 2)
[['A', '-'], ['-', '-'], ['-', 'C']]
"""
empty_columns = []
for column in range(size - 1, -1, -1):
if all(matrix_g[row][column] == "-" for row in range(size)):
empty_columns.append(column)
for column in empty_columns:
for col in range(column + 1, size):
for row in range(size):
matrix_g[row][col - 1] = matrix_g[row][col]
for row in range(size):
matrix_g[row][-1] = "-"
return matrix_g
def play(
matrix_g: list[list[str]], pos_x: int, pos_y: int, size: int
) -> tuple[list[list[str]], int]:
"""
Processes a single move, updating the matrix and calculating the score.
>>> play([['R', 'G'], ['R', 'G']], 0, 0, 2)
([['G', '-'], ['G', '-']], 3)
"""
same_colors = find_repeat(matrix_g, pos_x, pos_y, size)
if len(same_colors) != 0:
for pos in same_colors:
matrix_g[pos[0]][pos[1]] = "-"
for column in range(size):
matrix_g = move_x(matrix_g, column, size)
matrix_g = move_y(matrix_g, size)
return (matrix_g, increment_score(len(same_colors)))
def process_game(size: int, matrix: list[str], moves: list[tuple[int, int]]) -> int:
"""Processes the game logic for the given matrix and moves.
Args:
size (int): Size of the game board.
matrix (List[str]): Initial game matrix.
moves (List[Tuple[int, int]]): List of moves as (x, y) coordinates.
Returns:
int: The total score obtained.
>>> process_game(3, ['aaa', 'bbb', 'ccc'], [(0, 0)])
6
"""
game_matrix = [list(row) for row in matrix]
total_score = 0
for move in moves:
pos_x, pos_y = move
game_matrix, score = play(game_matrix, pos_x, pos_y, size)
total_score += score
return total_score
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
try:
size = int(input("Enter the size of the matrix: "))
validate_matrix_size(size)
print(f"Enter the {size} rows of the matrix:")
matrix = [input(f"Row {i + 1}: ") for i in range(size)]
validate_matrix_content(matrix, size)
moves_input = input("Enter the moves (e.g., '0 0, 1 1'): ")
moves = parse_moves(moves_input)
validate_moves(moves, size)
score = process_game(size, matrix, moves)
print(f"Total score: {score}")
except ValueError as e:
print(f"{e}")
================================================
FILE: matrix/matrix_class.py
================================================
# An OOP approach to representing and manipulating matrices
from __future__ import annotations
class Matrix:
"""
Matrix object generated from a 2D array where each element is an array representing
a row.
Rows can contain type int or float.
Common operations and information available.
>>> rows = [
... [1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]
... ]
>>> matrix = Matrix(rows)
>>> print(matrix)
[[1. 2. 3.]
[4. 5. 6.]
[7. 8. 9.]]
Matrix rows and columns are available as 2D arrays
>>> matrix.rows
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
>>> matrix.columns()
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
Order is returned as a tuple
>>> matrix.order
(3, 3)
Squareness and invertability are represented as bool
>>> matrix.is_square
True
>>> matrix.is_invertable()
False
Identity, Minors, Cofactors and Adjugate are returned as Matrices. Inverse can be
a Matrix or Nonetype
>>> print(matrix.identity())
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
>>> print(matrix.minors())
[[-3. -6. -3.]
[-6. -12. -6.]
[-3. -6. -3.]]
>>> print(matrix.cofactors())
[[-3. 6. -3.]
[6. -12. 6.]
[-3. 6. -3.]]
>>> # won't be apparent due to the nature of the cofactor matrix
>>> print(matrix.adjugate())
[[-3. 6. -3.]
[6. -12. 6.]
[-3. 6. -3.]]
>>> matrix.inverse()
Traceback (most recent call last):
...
TypeError: Only matrices with a non-zero determinant have an inverse
Determinant is an int, float, or Nonetype
>>> matrix.determinant()
0
Negation, scalar multiplication, addition, subtraction, multiplication and
exponentiation are available and all return a Matrix
>>> print(-matrix)
[[-1. -2. -3.]
[-4. -5. -6.]
[-7. -8. -9.]]
>>> matrix2 = matrix * 3
>>> print(matrix2)
[[3. 6. 9.]
[12. 15. 18.]
[21. 24. 27.]]
>>> print(matrix + matrix2)
[[4. 8. 12.]
[16. 20. 24.]
[28. 32. 36.]]
>>> print(matrix - matrix2)
[[-2. -4. -6.]
[-8. -10. -12.]
[-14. -16. -18.]]
>>> print(matrix ** 3)
[[468. 576. 684.]
[1062. 1305. 1548.]
[1656. 2034. 2412.]]
Matrices can also be modified
>>> matrix.add_row([10, 11, 12])
>>> print(matrix)
[[1. 2. 3.]
[4. 5. 6.]
[7. 8. 9.]
[10. 11. 12.]]
>>> matrix2.add_column([8, 16, 32])
>>> print(matrix2)
[[3. 6. 9. 8.]
[12. 15. 18. 16.]
[21. 24. 27. 32.]]
>>> print(matrix * matrix2)
[[90. 108. 126. 136.]
[198. 243. 288. 304.]
[306. 378. 450. 472.]
[414. 513. 612. 640.]]
"""
def __init__(self, rows: list[list[int]]):
error = TypeError(
"Matrices must be formed from a list of zero or more lists containing at "
"least one and the same number of values, each of which must be of type "
"int or float."
)
if len(rows) != 0:
cols = len(rows[0])
if cols == 0:
raise error
for row in rows:
if len(row) != cols:
raise error
for value in row:
if not isinstance(value, (int, float)):
raise error
self.rows = rows
else:
self.rows = []
# MATRIX INFORMATION
def columns(self) -> list[list[int]]:
return [[row[i] for row in self.rows] for i in range(len(self.rows[0]))]
@property
def num_rows(self) -> int:
return len(self.rows)
@property
def num_columns(self) -> int:
return len(self.rows[0])
@property
def order(self) -> tuple[int, int]:
return self.num_rows, self.num_columns
@property
def is_square(self) -> bool:
return self.order[0] == self.order[1]
def identity(self) -> Matrix:
values = [
[0 if column_num != row_num else 1 for column_num in range(self.num_rows)]
for row_num in range(self.num_rows)
]
return Matrix(values)
def determinant(self) -> int:
if not self.is_square:
return 0
if self.order == (0, 0):
return 1
if self.order == (1, 1):
return int(self.rows[0][0])
if self.order == (2, 2):
return int(
(self.rows[0][0] * self.rows[1][1])
- (self.rows[0][1] * self.rows[1][0])
)
else:
return sum(
self.rows[0][column] * self.cofactors().rows[0][column]
for column in range(self.num_columns)
)
def is_invertable(self) -> bool:
return bool(self.determinant())
def get_minor(self, row: int, column: int) -> int:
values = [
[
self.rows[other_row][other_column]
for other_column in range(self.num_columns)
if other_column != column
]
for other_row in range(self.num_rows)
if other_row != row
]
return Matrix(values).determinant()
def get_cofactor(self, row: int, column: int) -> int:
if (row + column) % 2 == 0:
return self.get_minor(row, column)
return -1 * self.get_minor(row, column)
def minors(self) -> Matrix:
return Matrix(
[
[self.get_minor(row, column) for column in range(self.num_columns)]
for row in range(self.num_rows)
]
)
def cofactors(self) -> Matrix:
return Matrix(
[
[
self.minors().rows[row][column]
if (row + column) % 2 == 0
else self.minors().rows[row][column] * -1
for column in range(self.minors().num_columns)
]
for row in range(self.minors().num_rows)
]
)
def adjugate(self) -> Matrix:
values = [
[self.cofactors().rows[column][row] for column in range(self.num_columns)]
for row in range(self.num_rows)
]
return Matrix(values)
def inverse(self) -> Matrix:
determinant = self.determinant()
if not determinant:
raise TypeError("Only matrices with a non-zero determinant have an inverse")
return self.adjugate() * (1 / determinant)
def __repr__(self) -> str:
return str(self.rows)
def __str__(self) -> str:
if self.num_rows == 0:
return "[]"
if self.num_rows == 1:
return "[[" + ". ".join(str(self.rows[0])) + "]]"
return (
"["
+ "\n ".join(
[
"[" + ". ".join([str(value) for value in row]) + ".]"
for row in self.rows
]
)
+ "]"
)
# MATRIX MANIPULATION
def add_row(self, row: list[int], position: int | None = None) -> None:
type_error = TypeError("Row must be a list containing all ints and/or floats")
if not isinstance(row, list):
raise type_error
for value in row:
if not isinstance(value, (int, float)):
raise type_error
if len(row) != self.num_columns:
raise ValueError(
"Row must be equal in length to the other rows in the matrix"
)
if position is None:
self.rows.append(row)
else:
self.rows = [*self.rows[0:position], row, *self.rows[position:]]
def add_column(self, column: list[int], position: int | None = None) -> None:
type_error = TypeError(
"Column must be a list containing all ints and/or floats"
)
if not isinstance(column, list):
raise type_error
for value in column:
if not isinstance(value, (int, float)):
raise type_error
if len(column) != self.num_rows:
raise ValueError(
"Column must be equal in length to the other columns in the matrix"
)
if position is None:
self.rows = [self.rows[i] + [column[i]] for i in range(self.num_rows)]
else:
self.rows = [
[*self.rows[i][0:position], column[i], *self.rows[i][position:]]
for i in range(self.num_rows)
]
# MATRIX OPERATIONS
def __eq__(self, other: object) -> bool:
if not isinstance(other, Matrix):
return NotImplemented
return self.rows == other.rows
def __ne__(self, other: object) -> bool:
return not self == other
def __neg__(self) -> Matrix:
return self * -1
def __add__(self, other: Matrix) -> Matrix:
if self.order != other.order:
raise ValueError("Addition requires matrices of the same order")
return Matrix(
[
[self.rows[i][j] + other.rows[i][j] for j in range(self.num_columns)]
for i in range(self.num_rows)
]
)
def __sub__(self, other: Matrix) -> Matrix:
if self.order != other.order:
raise ValueError("Subtraction requires matrices of the same order")
return Matrix(
[
[self.rows[i][j] - other.rows[i][j] for j in range(self.num_columns)]
for i in range(self.num_rows)
]
)
def __mul__(self, other: Matrix | float) -> Matrix:
if isinstance(other, (int, float)):
return Matrix(
[[int(element * other) for element in row] for row in self.rows]
)
elif isinstance(other, Matrix):
if self.num_columns != other.num_rows:
raise ValueError(
"The number of columns in the first matrix must "
"be equal to the number of rows in the second"
)
return Matrix(
[
[Matrix.dot_product(row, column) for column in other.columns()]
for row in self.rows
]
)
else:
raise TypeError(
"A Matrix can only be multiplied by an int, float, or another matrix"
)
def __pow__(self, other: int) -> Matrix:
if not isinstance(other, int):
raise TypeError("A Matrix can only be raised to the power of an int")
if not self.is_square:
raise ValueError("Only square matrices can be raised to a power")
if other == 0:
return self.identity()
if other < 0:
if self.is_invertable():
return self.inverse() ** (-other)
raise ValueError(
"Only invertable matrices can be raised to a negative power"
)
result = self
for _ in range(other - 1):
result *= self
return result
@classmethod
def dot_product(cls, row: list[int], column: list[int]) -> int:
return sum(row[i] * column[i] for i in range(len(row)))
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: matrix/matrix_equalization.py
================================================
from sys import maxsize
def array_equalization(vector: list[int], step_size: int) -> int:
"""
This algorithm equalizes all elements of the input vector
to a common value, by making the minimal number of
"updates" under the constraint of a step size (step_size).
>>> array_equalization([1, 1, 6, 2, 4, 6, 5, 1, 7, 2, 2, 1, 7, 2, 2], 4)
4
>>> array_equalization([22, 81, 88, 71, 22, 81, 632, 81, 81, 22, 92], 2)
5
>>> array_equalization([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 5)
0
>>> array_equalization([22, 22, 22, 33, 33, 33], 2)
2
>>> array_equalization([1, 2, 3], 0)
Traceback (most recent call last):
ValueError: Step size must be positive and non-zero.
>>> array_equalization([1, 2, 3], -1)
Traceback (most recent call last):
ValueError: Step size must be positive and non-zero.
>>> array_equalization([1, 2, 3], 0.5)
Traceback (most recent call last):
ValueError: Step size must be an integer.
>>> array_equalization([1, 2, 3], maxsize)
1
"""
if step_size <= 0:
raise ValueError("Step size must be positive and non-zero.")
if not isinstance(step_size, int):
raise ValueError("Step size must be an integer.")
unique_elements = set(vector)
min_updates = maxsize
for element in unique_elements:
elem_index = 0
updates = 0
while elem_index < len(vector):
if vector[elem_index] != element:
updates += 1
elem_index += step_size
else:
elem_index += 1
min_updates = min(min_updates, updates)
return min_updates
if __name__ == "__main__":
from doctest import testmod
testmod()
================================================
FILE: matrix/matrix_multiplication_recursion.py
================================================
# @Author : ojas-wani
# @File : matrix_multiplication_recursion.py
# @Date : 10/06/2023
"""
Perform matrix multiplication using a recursive algorithm.
https://en.wikipedia.org/wiki/Matrix_multiplication
"""
# type Matrix = list[list[int]] # psf/black currenttly fails on this line
Matrix = list[list[int]]
matrix_1_to_4 = [
[1, 2],
[3, 4],
]
matrix_5_to_8 = [
[5, 6],
[7, 8],
]
matrix_5_to_9_high = [
[5, 6],
[7, 8],
[9],
]
matrix_5_to_9_wide = [
[5, 6],
[7, 8, 9],
]
matrix_count_up = [
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[13, 14, 15, 16],
]
matrix_unordered = [
[5, 8, 1, 2],
[6, 7, 3, 0],
[4, 5, 9, 1],
[2, 6, 10, 14],
]
matrices = (
matrix_1_to_4,
matrix_5_to_8,
matrix_5_to_9_high,
matrix_5_to_9_wide,
matrix_count_up,
matrix_unordered,
)
def is_square(matrix: Matrix) -> bool:
"""
>>> is_square([])
True
>>> is_square(matrix_1_to_4)
True
>>> is_square(matrix_5_to_9_high)
False
"""
len_matrix = len(matrix)
return all(len(row) == len_matrix for row in matrix)
def matrix_multiply(matrix_a: Matrix, matrix_b: Matrix) -> Matrix:
"""
>>> matrix_multiply(matrix_1_to_4, matrix_5_to_8)
[[19, 22], [43, 50]]
"""
return [
[sum(a * b for a, b in zip(row, col)) for col in zip(*matrix_b)]
for row in matrix_a
]
def matrix_multiply_recursive(matrix_a: Matrix, matrix_b: Matrix) -> Matrix:
"""
:param matrix_a: A square Matrix.
:param matrix_b: Another square Matrix with the same dimensions as matrix_a.
:return: Result of matrix_a * matrix_b.
:raises ValueError: If the matrices cannot be multiplied.
>>> matrix_multiply_recursive([], [])
[]
>>> matrix_multiply_recursive(matrix_1_to_4, matrix_5_to_8)
[[19, 22], [43, 50]]
>>> matrix_multiply_recursive(matrix_count_up, matrix_unordered)
[[37, 61, 74, 61], [105, 165, 166, 129], [173, 269, 258, 197], [241, 373, 350, 265]]
>>> matrix_multiply_recursive(matrix_1_to_4, matrix_5_to_9_wide)
Traceback (most recent call last):
...
ValueError: Invalid matrix dimensions
>>> matrix_multiply_recursive(matrix_1_to_4, matrix_5_to_9_high)
Traceback (most recent call last):
...
ValueError: Invalid matrix dimensions
>>> matrix_multiply_recursive(matrix_1_to_4, matrix_count_up)
Traceback (most recent call last):
...
ValueError: Invalid matrix dimensions
"""
if not matrix_a or not matrix_b:
return []
if not all(
(len(matrix_a) == len(matrix_b), is_square(matrix_a), is_square(matrix_b))
):
raise ValueError("Invalid matrix dimensions")
# Initialize the result matrix with zeros
result = [[0] * len(matrix_b[0]) for _ in range(len(matrix_a))]
# Recursive multiplication of matrices
def multiply(
i_loop: int,
j_loop: int,
k_loop: int,
matrix_a: Matrix,
matrix_b: Matrix,
result: Matrix,
) -> None:
"""
:param matrix_a: A square Matrix.
:param matrix_b: Another square Matrix with the same dimensions as matrix_a.
:param result: Result matrix
:param i: Index used for iteration during multiplication.
:param j: Index used for iteration during multiplication.
:param k: Index used for iteration during multiplication.
>>> 0 > 1 # Doctests in inner functions are never run
True
"""
if i_loop >= len(matrix_a):
return
if j_loop >= len(matrix_b[0]):
return multiply(i_loop + 1, 0, 0, matrix_a, matrix_b, result)
if k_loop >= len(matrix_b):
return multiply(i_loop, j_loop + 1, 0, matrix_a, matrix_b, result)
result[i_loop][j_loop] += matrix_a[i_loop][k_loop] * matrix_b[k_loop][j_loop]
return multiply(i_loop, j_loop, k_loop + 1, matrix_a, matrix_b, result)
# Perform the recursive matrix multiplication
multiply(0, 0, 0, matrix_a, matrix_b, result)
return result
if __name__ == "__main__":
from doctest import testmod
failure_count, test_count = testmod()
if not failure_count:
matrix_a = matrices[0]
for matrix_b in matrices[1:]:
print("Multiplying:")
for row in matrix_a:
print(row)
print("By:")
for row in matrix_b:
print(row)
print("Result:")
try:
result = matrix_multiply_recursive(matrix_a, matrix_b)
for row in result:
print(row)
assert result == matrix_multiply(matrix_a, matrix_b)
except ValueError as e:
print(f"{e!r}")
print()
matrix_a = matrix_b
print("Benchmark:")
from functools import partial
from timeit import timeit
mytimeit = partial(timeit, globals=globals(), number=100_000)
for func in ("matrix_multiply", "matrix_multiply_recursive"):
print(f"{func:>25}(): {mytimeit(f'{func}(matrix_count_up, matrix_unordered)')}")
================================================
FILE: matrix/matrix_operation.py
================================================
"""
Functions for 2D matrix operations
"""
from __future__ import annotations
from typing import Any
def add(*matrix_s: list[list[int]]) -> list[list[int]]:
"""
>>> add([[1,2],[3,4]],[[2,3],[4,5]])
[[3, 5], [7, 9]]
>>> add([[1.2,2.4],[3,4]],[[2,3],[4,5]])
[[3.2, 5.4], [7, 9]]
>>> add([[1, 2], [4, 5]], [[3, 7], [3, 4]], [[3, 5], [5, 7]])
[[7, 14], [12, 16]]
>>> add([3], [4, 5])
Traceback (most recent call last):
...
TypeError: Expected a matrix, got int/list instead
"""
if all(_check_not_integer(m) for m in matrix_s):
for i in matrix_s[1:]:
_verify_matrix_sizes(matrix_s[0], i)
return [[sum(t) for t in zip(*m)] for m in zip(*matrix_s)]
raise TypeError("Expected a matrix, got int/list instead")
def subtract(matrix_a: list[list[int]], matrix_b: list[list[int]]) -> list[list[int]]:
"""
>>> subtract([[1,2],[3,4]],[[2,3],[4,5]])
[[-1, -1], [-1, -1]]
>>> subtract([[1,2.5],[3,4]],[[2,3],[4,5.5]])
[[-1, -0.5], [-1, -1.5]]
>>> subtract([3], [4, 5])
Traceback (most recent call last):
...
TypeError: Expected a matrix, got int/list instead
"""
if (
_check_not_integer(matrix_a)
and _check_not_integer(matrix_b)
and _verify_matrix_sizes(matrix_a, matrix_b)
):
return [[i - j for i, j in zip(*m)] for m in zip(matrix_a, matrix_b)]
raise TypeError("Expected a matrix, got int/list instead")
def scalar_multiply(matrix: list[list[int]], n: float) -> list[list[float]]:
"""
>>> scalar_multiply([[1,2],[3,4]],5)
[[5, 10], [15, 20]]
>>> scalar_multiply([[1.4,2.3],[3,4]],5)
[[7.0, 11.5], [15, 20]]
"""
return [[x * n for x in row] for row in matrix]
def multiply(matrix_a: list[list[int]], matrix_b: list[list[int]]) -> list[list[int]]:
"""
>>> multiply([[1,2],[3,4]],[[5,5],[7,5]])
[[19, 15], [43, 35]]
>>> multiply([[1,2.5],[3,4.5]],[[5,5],[7,5]])
[[22.5, 17.5], [46.5, 37.5]]
>>> multiply([[1, 2, 3]], [[2], [3], [4]])
[[20]]
"""
if _check_not_integer(matrix_a) and _check_not_integer(matrix_b):
rows, cols = _verify_matrix_sizes(matrix_a, matrix_b)
if cols[0] != rows[1]:
msg = (
"Cannot multiply matrix of dimensions "
f"({rows[0]},{cols[0]}) and ({rows[1]},{cols[1]})"
)
raise ValueError(msg)
return [
[sum(m * n for m, n in zip(i, j)) for j in zip(*matrix_b)] for i in matrix_a
]
def identity(n: int) -> list[list[int]]:
"""
:param n: dimension for nxn matrix
:type n: int
:return: Identity matrix of shape [n, n]
>>> identity(3)
[[1, 0, 0], [0, 1, 0], [0, 0, 1]]
"""
n = int(n)
return [[int(row == column) for column in range(n)] for row in range(n)]
def transpose(
matrix: list[list[int]], return_map: bool = True
) -> list[list[int]] | map[list[int]]:
"""
>>> transpose([[1,2],[3,4]]) # doctest: +ELLIPSIS