Repository: supabase/realtime Branch: main Commit: 0916b0dceaf9 Files: 475 Total size: 2.0 MB Directory structure: gitextract_musv60dn/ ├── .credo.exs ├── .dockerignore ├── .formatter.exs ├── .github/ │ ├── actionlint.yaml │ └── workflows/ │ ├── beacon_tests.yml │ ├── docker-build.yml │ ├── integration_tests.yml │ ├── lint.yml │ ├── manual_prod_build.yml │ ├── mirror.yml │ ├── prod_build.yml │ ├── prod_linter.yml │ ├── tests.yml │ └── update-supabase-js.yml ├── .gitignore ├── .releaserc ├── .sobelow-conf ├── .tool-versions ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── assets/ │ ├── css/ │ │ ├── app.css │ │ └── phoenix.css │ ├── js/ │ │ └── app.js │ ├── package.json │ ├── tailwind.config.js │ └── vendor/ │ └── topbar.js ├── beacon/ │ ├── .formatter.exs │ ├── .gitignore │ ├── README.md │ ├── config/ │ │ └── config.exs │ ├── lib/ │ │ ├── beacon/ │ │ │ ├── adapter/ │ │ │ │ └── erl_dist.ex │ │ │ ├── adapter.ex │ │ │ ├── partition.ex │ │ │ ├── scope.ex │ │ │ └── supervisor.ex │ │ └── beacon.ex │ ├── mix.exs │ └── test/ │ ├── beacon/ │ │ └── partition_test.exs │ ├── beacon_test.exs │ ├── support/ │ │ └── peer.ex │ └── test_helper.exs ├── bench/ │ ├── gen_counter.exs │ └── secrets.exs ├── config/ │ ├── config.exs │ ├── dev.exs │ ├── prod.exs │ ├── runtime.exs │ └── test.exs ├── coveralls.json ├── deploy/ │ └── fly/ │ ├── prod.toml │ ├── qa.toml │ └── staging.toml ├── dev/ │ └── postgres/ │ └── 00-supabase-schema.sql ├── docker-compose.dbs.yml ├── docker-compose.tests.yml ├── docker-compose.yml ├── lib/ │ ├── extensions/ │ │ ├── extensions.ex │ │ └── postgres_cdc_rls/ │ │ ├── cdc_rls.ex │ │ ├── db_settings.ex │ │ ├── message_dispatcher.ex │ │ ├── replication_poller.ex │ │ ├── replications.ex │ │ ├── subscription_manager.ex │ │ ├── subscriptions.ex │ │ ├── subscriptions_checker.ex │ │ ├── supervisor.ex │ │ └── worker_supervisor.ex │ ├── realtime/ │ │ ├── adapters/ │ │ │ ├── changes.ex │ │ │ └── postgres/ │ │ │ ├── decoder.ex │ │ │ ├── oid_database.ex │ │ │ ├── protocol/ │ │ │ │ ├── keep_alive.ex │ │ │ │ └── write.ex │ │ │ └── protocol.ex │ │ ├── api/ │ │ │ ├── extensions.ex │ │ │ ├── message.ex │ │ │ └── tenant.ex │ │ ├── api.ex │ │ ├── application.ex │ │ ├── beacon_pub_sub_adapter.ex │ │ ├── crypto.ex │ │ ├── database.ex │ │ ├── gen_counter/ │ │ │ └── gen_counter.ex │ │ ├── gen_rpc/ │ │ │ └── pub_sub.ex │ │ ├── gen_rpc.ex │ │ ├── helpers.ex │ │ ├── log_filter.ex │ │ ├── logs.ex │ │ ├── messages.ex │ │ ├── metrics_cleaner.ex │ │ ├── metrics_pusher.ex │ │ ├── monitoring/ │ │ │ ├── distributed_metrics.ex │ │ │ ├── erl_sys_mon.ex │ │ │ ├── gen_rpc_metrics.ex │ │ │ ├── latency.ex │ │ │ ├── os_metrics.ex │ │ │ ├── peep/ │ │ │ │ └── partitioned.ex │ │ │ ├── prom_ex/ │ │ │ │ └── plugins/ │ │ │ │ ├── channels.ex │ │ │ │ ├── distributed.ex │ │ │ │ ├── gen_rpc.ex │ │ │ │ ├── osmon.ex │ │ │ │ ├── phoenix.ex │ │ │ │ ├── tenant.ex │ │ │ │ ├── tenant_global.ex │ │ │ │ └── tenants.ex │ │ │ ├── prom_ex.ex │ │ │ ├── prometheus.ex │ │ │ └── tenant_prom_ex.ex │ │ ├── nodes.ex │ │ ├── operations.ex │ │ ├── postgres_cdc.ex │ │ ├── rate_counter/ │ │ │ ├── dynamic_supervisor.ex │ │ │ └── rate_counter.ex │ │ ├── release.ex │ │ ├── repo.ex │ │ ├── repo_replica.ex │ │ ├── rpc.ex │ │ ├── signal_handler.ex │ │ ├── syn/ │ │ │ └── postgres_cdc.ex │ │ ├── syn_handler.ex │ │ ├── telemetry/ │ │ │ ├── logger.ex │ │ │ └── telemetry.ex │ │ ├── tenants/ │ │ │ ├── authorization/ │ │ │ │ ├── policies/ │ │ │ │ │ ├── broadcast_policies.ex │ │ │ │ │ └── presence_policies.ex │ │ │ │ └── policies.ex │ │ │ ├── authorization.ex │ │ │ ├── batch_broadcast.ex │ │ │ ├── cache.ex │ │ │ ├── connect/ │ │ │ │ ├── check_connection.ex │ │ │ │ ├── get_tenant.ex │ │ │ │ ├── piper.ex │ │ │ │ ├── reconcile_migrations.ex │ │ │ │ └── register_process.ex │ │ │ ├── connect.ex │ │ │ ├── janitor/ │ │ │ │ └── maintenance_task.ex │ │ │ ├── janitor.ex │ │ │ ├── migrations.ex │ │ │ ├── rebalancer.ex │ │ │ ├── replication_connection/ │ │ │ │ └── watchdog.ex │ │ │ ├── replication_connection.ex │ │ │ ├── repo/ │ │ │ │ └── migrations/ │ │ │ │ ├── 20211116024918_create_realtime_subscription_table.ex │ │ │ │ ├── 20211116045059_create_realtime_check_filters_trigger.ex │ │ │ │ ├── 20211116050929_create_realtime_quote_wal2json_function.ex │ │ │ │ ├── 20211116051442_create_realtime_check_equality_op_function.ex │ │ │ │ ├── 20211116212300_create_realtime_build_prepared_statement_sql_function.ex │ │ │ │ ├── 20211116213355_create_realtime_cast_function.ex │ │ │ │ ├── 20211116213934_create_realtime_is_visible_through_filters_function.ex │ │ │ │ ├── 20211116214523_create_realtime_apply_rls_function.ex │ │ │ │ ├── 20211122062447_grant_realtime_usage_to_authenticated_role.ex │ │ │ │ ├── 20211124070109_enable_realtime_apply_rls_function_postgrest_9_compatibility.ex │ │ │ │ ├── 20211202204204_update_realtime_subscription_check_filters_function_security.ex │ │ │ │ ├── 20211202204605_update_realtime_build_prepared_statement_sql_function_for_compatibility_with_all_types.ex │ │ │ │ ├── 20211210212804_enable_generic_subscription_claims.ex │ │ │ │ ├── 20211228014915_add_wal_payload_on_errors_in_apply_rls_function.ex │ │ │ │ ├── 20220107221237_update_change_timestamp_to_iso_8601_zulu_format.ex │ │ │ │ ├── 20220228202821_update_subscription_check_filters_function_dynamic_table_name.ex │ │ │ │ ├── 20220312004840_update_apply_rls_function_to_apply_iso_8601.ex │ │ │ │ ├── 20220603231003_add_quoted_regtypes_support.ex │ │ │ │ ├── 20220603232444_add_output_for_data_less_than_equal_64_bytes_when_payload_too_large.ex │ │ │ │ ├── 20220615214548_add_quoted_regtypes_backward_compatibility_support.ex │ │ │ │ ├── 20220712093339_recreate_realtime_build_prepared_statement_sql_function.ex │ │ │ │ ├── 20220908172859_null_passes_filters_recreate_is_visible_through_filters.ex │ │ │ │ ├── 20220916233421_update_apply_rls_function_to_pass_through_delete_events_on_filter.ex │ │ │ │ ├── 20230119133233_millisecond_precision_for_walrus.ex │ │ │ │ ├── 20230128025114_add_in_op_to_filters.ex │ │ │ │ ├── 20230128025212_enable_filtering_on_delete_record.ex │ │ │ │ ├── 20230227211149_update_subscription_check_filters_for_in_filter_non_text_types.ex │ │ │ │ ├── 20230228184745_convert_commit_timestamp_to_utc.ex │ │ │ │ ├── 20230308225145_output_full_record_when_unchanged_toast.ex │ │ │ │ ├── 20230328144023_create_list_changes_function.ex │ │ │ │ ├── 20231018144023_create_channels.ex │ │ │ │ ├── 20231204144023_set_required_grants.ex │ │ │ │ ├── 20231204144024_create_rls_helper_functions.ex │ │ │ │ ├── 20231204144025_enable_channels_rls.ex │ │ │ │ ├── 20240108234812_add_channels_column_for_write_check.ex │ │ │ │ ├── 20240109165339_add_update_grant_to_channels.ex │ │ │ │ ├── 20240227174441_add_broadcast_permissions_table.ex │ │ │ │ ├── 20240311171622_add_insert_and_delete_grant_to_channels.ex │ │ │ │ ├── 20240321100241_add_presences_permissions_table.ex │ │ │ │ ├── 20240401105812_create_realtime_admin_and_move_ownership.ex │ │ │ │ ├── 20240418121054_remove_check_columns.ex │ │ │ │ ├── 20240523004032_redefine_authorization_tables.ex │ │ │ │ ├── 20240618124746_fix_walrus_role_handling.ex │ │ │ │ ├── 20240801235015_unlogged_messages_table.ex │ │ │ │ ├── 20240805133720_logged_messages_table.ex │ │ │ │ ├── 20240827160934_filter_delete_postgres_changes.ex │ │ │ │ ├── 20240919163303_add_payload_to_messages.ex │ │ │ │ ├── 20240919163305_change_messages_id_type.ex │ │ │ │ ├── 20241019105805_uuid_auto_generation.ex │ │ │ │ ├── 20241030150047_messages_partitioning.ex │ │ │ │ ├── 20241108114728_messages_using_uuid.ex │ │ │ │ ├── 20241121104152_fix_send_function_.ex │ │ │ │ ├── 20241130184212_recreate_entity_index_using_btree.ex │ │ │ │ ├── 20241220035512_fix_send_function_partition_creation.ex │ │ │ │ ├── 20241220123912_realtime_send_handle_exceptions_remove_partition_creation.ex │ │ │ │ ├── 20241224161212_realtime_send_sets_config.ex │ │ │ │ ├── 20250107150512_realtime_subscription_unlogged.ex │ │ │ │ ├── 20250110162412_realtime_subscription_logged.ex │ │ │ │ ├── 20250123174212_remove_unused_publications.ex │ │ │ │ ├── 20250128220012_realtime_send_sets_topic_config.ex │ │ │ │ ├── 20250506224012_subscription_index_bridging_disabled.ex │ │ │ │ ├── 20250523164012_run_subscription_index_bridging_disabled.ex │ │ │ │ ├── 20250714121412_broadcast_send_error_logging.ex │ │ │ │ ├── 20250905041441_create_messages_replay_index.ex │ │ │ │ ├── 20251103001201_broadcast_send_include_payload_id.ex │ │ │ │ ├── 20251120212548_add_action_to_subscriptions.ex │ │ │ │ ├── 20251120215549_filter_action_postgres_changes.ex │ │ │ │ └── 20260218120000_fix_bytea_double_encoding_in_cast.ex │ │ │ └── repo.ex │ │ ├── tenants.ex │ │ └── users_counter.ex │ ├── realtime.ex │ ├── realtime_web/ │ │ ├── api_spec.ex │ │ ├── channels/ │ │ │ ├── auth/ │ │ │ │ ├── channels_authorization.ex │ │ │ │ └── jwt_verification.ex │ │ │ ├── payloads/ │ │ │ │ ├── broadcast/ │ │ │ │ │ └── replay.ex │ │ │ │ ├── broadcast.ex │ │ │ │ ├── config.ex │ │ │ │ ├── flexible_boolean.ex │ │ │ │ ├── join.ex │ │ │ │ ├── postgres_change.ex │ │ │ │ └── presence.ex │ │ │ ├── presence.ex │ │ │ ├── realtime_channel/ │ │ │ │ ├── assign.ex │ │ │ │ ├── broadcast_handler.ex │ │ │ │ ├── logging.ex │ │ │ │ ├── message_dispatcher.ex │ │ │ │ ├── presence_handler.ex │ │ │ │ └── tracker.ex │ │ │ ├── realtime_channel.ex │ │ │ ├── socket_disconnect.ex │ │ │ ├── tenant_rate_limiters.ex │ │ │ └── user_socket.ex │ │ ├── controllers/ │ │ │ ├── broadcast_controller.ex │ │ │ ├── fallback_controller.ex │ │ │ ├── legacy_metrics_controller.ex │ │ │ ├── metrics_controller.ex │ │ │ ├── page_controller.ex │ │ │ ├── ping_controller.ex │ │ │ └── tenant_controller.ex │ │ ├── dashboard/ │ │ │ ├── process_dump.ex │ │ │ └── tenant_info.ex │ │ ├── endpoint.ex │ │ ├── gettext.ex │ │ ├── live/ │ │ │ ├── components.ex │ │ │ ├── inspector_live/ │ │ │ │ ├── conn_component.ex │ │ │ │ ├── conn_component.html.heex │ │ │ │ ├── index.ex │ │ │ │ └── index.html.heex │ │ │ ├── page_live/ │ │ │ │ ├── index.ex │ │ │ │ └── index.html.heex │ │ │ ├── ping_live.ex │ │ │ ├── status_live/ │ │ │ │ ├── index.ex │ │ │ │ └── index.html.heex │ │ │ ├── tenants_live/ │ │ │ │ ├── index.ex │ │ │ │ └── index.html.heex │ │ │ └── time_live.ex │ │ ├── open_api_schemas.ex │ │ ├── plugs/ │ │ │ ├── assign_tenant.ex │ │ │ ├── auth_tenant.ex │ │ │ ├── baggage_request_id.ex │ │ │ ├── metrics_mode.ex │ │ │ └── rate_limiter.ex │ │ ├── router.ex │ │ ├── socket/ │ │ │ ├── user_broadcast.ex │ │ │ └── v2_serializer.ex │ │ ├── telemetry.ex │ │ ├── templates/ │ │ │ └── layout/ │ │ │ ├── app.html.heex │ │ │ ├── live.html.heex │ │ │ └── root.html.heex │ │ ├── tenant_broadcaster.ex │ │ └── views/ │ │ ├── changeset_view.ex │ │ ├── error_helpers.ex │ │ ├── error_view.ex │ │ ├── layout_view.ex │ │ └── tenant_view.ex │ └── realtime_web.ex ├── mix.exs ├── phx_join.schema.json ├── priv/ │ ├── gettext/ │ │ ├── en/ │ │ │ └── LC_MESSAGES/ │ │ │ └── errors.po │ │ └── errors.pot │ ├── repo/ │ │ ├── dev_seeds.exs │ │ ├── migrations/ │ │ │ ├── .formatter.exs │ │ │ ├── 20210706140551_create_tenant.exs │ │ │ ├── 20220329161857_add_extensions_table.exs │ │ │ ├── 20220410212326_add_tenant_max_eps.exs │ │ │ ├── 20220506102948_rename_poll_interval_to_poll_interval_ms.exs │ │ │ ├── 20220527210857_add_external_id_uniq_index.exs │ │ │ ├── 20220815211129_new_max_events_per_second_default.exs │ │ │ ├── 20220815215024_set_current_max_events_per_second.exs │ │ │ ├── 20220818141501_change_limits_defaults.exs │ │ │ ├── 20221018173709_add_cdc_default.exs │ │ │ ├── 20221102172703_rename_pg_type.exs │ │ │ ├── 20221223010058_drop_tenants_uniq_external_id_index.exs │ │ │ ├── 20230110180046_add_limits_fields_to_tenants.exs │ │ │ ├── 20230810220907_alter_tenants_table_columns_to_text.exs │ │ │ ├── 20230810220924_alter_extensions_table_columns_to_text.exs │ │ │ ├── 20231024094642_add_tenant_suspend_flag.exs │ │ │ ├── 20240306114423_add_tenant_jwt_jwks.exs │ │ │ ├── 20240418082835_add_authorization_flag.exs │ │ │ ├── 20240625211759_remove_enable_authorization_flag.exs │ │ │ ├── 20240704172020_add_notify_private_alpha.exs │ │ │ ├── 20240902173232_add_extension_external_id_index.exs │ │ │ ├── 20241106103258_add_private_only_flag_column_to_tenant.exs │ │ │ ├── 20250424203323_add_migrations_ran_to_tenant.exs │ │ │ ├── 20250613072131_add_tenant_broadcast_adapter.exs │ │ │ ├── 20250711044927_change_default_broadcast_adapter_to_gen_rpc.exs │ │ │ ├── 20250811121559_add_max_presence_events_per_second.exs │ │ │ ├── 20250926223044_set_default_presence_value.exs │ │ │ ├── 20251204170944_nullable_jwt_secrets.exs │ │ │ ├── 20251218000543_ensure_jwt_secret_is_text.exs │ │ │ ├── 20260209232800_add_max_client_presence_events_per_second.exs │ │ │ └── 20260304000000_add_presence_enabled_to_tenants.exs │ │ ├── seeds.exs │ │ └── seeds_before_migration.exs │ └── static/ │ ├── robots.txt │ └── worker.js ├── rel/ │ ├── env.bat.eex │ ├── env.sh.eex │ ├── overlays/ │ │ ├── bin/ │ │ │ ├── migrate │ │ │ ├── migrate.bat │ │ │ ├── server │ │ │ └── server.bat │ │ └── config.example.yml │ └── vm.args.eex ├── run.sh └── test/ ├── api_jwt_secret_test.exs ├── e2e/ │ ├── .gitignore │ ├── .template.env │ ├── .tool-versions │ ├── README.md │ ├── flake.nix │ ├── legacy/ │ │ ├── .tool-versions │ │ ├── README.md │ │ └── tests.ts │ ├── package.json │ ├── realtime-check.ts │ └── supabase/ │ ├── .branches/ │ │ └── _current_branch │ └── .temp/ │ └── cli-latest ├── extensions/ │ ├── extensions_test.exs │ └── postgres_cdc_rls/ │ ├── db_settings_test.exs │ ├── message_dispatcher_test.exs │ ├── replications_test.exs │ └── worker_supervisor_test.exs ├── integration/ │ ├── distributed_realtime_channel_test.exs │ ├── measure_traffic_test.exs │ ├── region_aware_migrations_test.exs │ ├── region_aware_routing_test.exs │ ├── rt_channel/ │ │ ├── authorization_test.exs │ │ ├── billable_events_test.exs │ │ ├── broadcast_test.exs │ │ ├── connection_lifecycle_test.exs │ │ ├── postgres_changes_test.exs │ │ ├── presence_test.exs │ │ ├── token_handling_test.exs │ │ └── wal_bloat_test.exs │ ├── tests.ts │ └── tracker_test.exs ├── realtime/ │ ├── adapters/ │ │ └── postgres/ │ │ └── protocol_test.exs │ ├── api/ │ │ └── extensions_test.exs │ ├── api_test.exs │ ├── database_distributed_test.exs │ ├── database_test.exs │ ├── extensions/ │ │ └── cdc_rls/ │ │ ├── cdc_rls_test.exs │ │ ├── replication_poller_test.exs │ │ ├── replications_test.exs │ │ ├── subscription_manager_test.exs │ │ ├── subscriptions_checker_distributed_test.exs │ │ ├── subscriptions_checker_test.exs │ │ └── subscriptions_test.exs │ ├── gen_counter/ │ │ └── gen_counter_test.exs │ ├── gen_rpc_pub_sub/ │ │ └── worker_test.exs │ ├── gen_rpc_pub_sub_test.exs │ ├── gen_rpc_test.exs │ ├── helpers_test.exs │ ├── log_filter_test.exs │ ├── logs_test.exs │ ├── messages_test.exs │ ├── metrics_cleaner_test.exs │ ├── metrics_pusher_test.exs │ ├── monitoring/ │ │ ├── distributed_metrics_test.exs │ │ ├── erl_sys_mon_test.exs │ │ ├── gen_rpc_metrics_test.exs │ │ ├── latency_test.exs │ │ ├── peep/ │ │ │ └── partitioned_test.exs │ │ ├── prom_ex/ │ │ │ └── plugins/ │ │ │ ├── distributed_test.exs │ │ │ ├── gen_rpc_test.exs │ │ │ ├── phoenix_test.exs │ │ │ ├── tenant_test.exs │ │ │ └── tenants_test.exs │ │ ├── prom_ex_test.exs │ │ └── prometheus_test.exs │ ├── nodes_test.exs │ ├── oid_test.exs │ ├── postgres_decoder_test.exs │ ├── rate_counter/ │ │ └── rate_counter_test.exs │ ├── repo_replica_test.exs │ ├── rpc_test.exs │ ├── signal_handler_test.exs │ ├── syn_handler_test.exs │ ├── telemetry/ │ │ └── logger_test.exs │ ├── tenants/ │ │ ├── authorization_remote_test.exs │ │ ├── authorization_test.exs │ │ ├── batch_broadcast_test.exs │ │ ├── cache_test.exs │ │ ├── connect/ │ │ │ ├── get_tenant_test.exs │ │ │ ├── piper_test.exs │ │ │ ├── reconcile_migrations_test.exs │ │ │ └── register_process_test.exs │ │ ├── connect_test.exs │ │ ├── janitor/ │ │ │ └── maintenance_task_test.exs │ │ ├── janitor_test.exs │ │ ├── migrations_test.exs │ │ ├── rebalancer_test.exs │ │ ├── replication_connection/ │ │ │ └── watchdog_test.exs │ │ ├── replication_connection_test.exs │ │ └── repo_test.exs │ ├── tenants_test.exs │ └── users_counter_test.exs ├── realtime_web/ │ ├── channels/ │ │ ├── auth/ │ │ │ ├── channels_authorization_test.exs │ │ │ └── jwt_verification_test.exs │ │ ├── payloads/ │ │ │ ├── flexible_boolean_test.exs │ │ │ └── join_test.exs │ │ ├── realtime_channel/ │ │ │ ├── broadcast_handler_test.exs │ │ │ ├── logging_test.exs │ │ │ ├── message_dispatcher_test.exs │ │ │ ├── presence_handler_test.exs │ │ │ └── tracker_test.exs │ │ ├── realtime_channel_test.exs │ │ ├── socket_disconnect_test.exs │ │ ├── tenant_rate_limiters_test.exs │ │ └── user_socket_test.exs │ ├── controllers/ │ │ ├── broadcast_controller_test.exs │ │ ├── fallback_controller_test.exs │ │ ├── legacy_metrics_controller_test.exs │ │ ├── live_dasboard_test.exs │ │ ├── metrics_controller_test.exs │ │ ├── openapi_controller_test.exs │ │ ├── page_controller_test.exs │ │ └── tenant_controller_test.exs │ ├── dashboard/ │ │ └── tenant_info_test.exs │ ├── integration/ │ │ └── tracing_test.exs │ ├── live/ │ │ ├── inspector_live/ │ │ │ └── index_test.exs │ │ ├── page_live/ │ │ │ └── index_test.exs │ │ ├── status_live/ │ │ │ └── index_test.exs │ │ └── tenants_live/ │ │ └── index_test.exs │ ├── plugs/ │ │ ├── assign_tenant_test.exs │ │ ├── auth_tenant_test.exs │ │ ├── baggage_request_id_test.exs │ │ ├── metrics_mode_test.exs │ │ └── rate_limiter_test.exs │ ├── socket/ │ │ └── v2_serializer_test.exs │ ├── tenant_broadcaster_test.exs │ └── views/ │ ├── error_view_test.exs │ ├── layout_view_test.exs │ └── page_view_test.exs ├── support/ │ ├── channel_case.ex │ ├── cleanup.ex │ ├── clustered.ex │ ├── conn_case.ex │ ├── containers/ │ │ └── container.ex │ ├── containers.ex │ ├── data_case.ex │ ├── generators.ex │ ├── integrations.ex │ ├── joken_current_time_mock.ex │ ├── metrics_helper.ex │ ├── prometheus_fixtures.ex │ ├── rate_counter_helper.ex │ ├── replication_test_handler.ex │ ├── tenant_connection.ex │ ├── tracing.ex │ └── websocket_client.ex └── test_helper.exs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .credo.exs ================================================ %{ configs: [ %{ name: "default", files: %{ included: ["lib/", "src/", "web/", "apps/"], excluded: [] }, plugins: [], requires: [], strict: false, parse_timeout: 5000, color: true, checks: %{ disabled: [ {Credo.Check.Design.TagTODO, []}, {Credo.Check.Consistency.ExceptionNames, []}, {Credo.Check.Refactor.Nesting, []}, {Credo.Check.Refactor.CyclomaticComplexity, []}, {Credo.Check.Readability.WithSingleClause, []}, {Credo.Check.Readability.AliasOrder, []}, {Credo.Check.Readability.StringSigils, []}, {Credo.Check.Refactor.Apply, []} ] } } ] } ================================================ FILE: .dockerignore ================================================ # This file excludes paths from the Docker build context. # # By default, Docker's build context includes all files (and folders) in the # current directory. Even if a file isn't copied into the container it is still sent to # the Docker daemon. # # There are multiple reasons to exclude files from the build context: # # 1. Prevent nested folders from being copied into the container (ex: exclude # /assets/node_modules when copying /assets) # 2. Reduce the size of the build context and improve build time (ex. /build, /deps, /doc) # 3. Avoid sending files containing sensitive information # # More information on using .dockerignore is available here: # https://docs.docker.com/engine/reference/builder/#dockerignore-file .dockerignore # Ignore git, but keep git HEAD and refs to access current commit hash if needed: # # $ cat .git/HEAD | awk '{print ".git/"$2}' | xargs cat # d0b8727759e1e0e7aa3d41707d12376e373d5ecc .git !.git/HEAD !.git/refs # Common development/test artifacts /cover/ /doc/ /test/ /tmp/ .elixir_ls # Mix artifacts /_build/ /deps/ *.ez # Generated on crash by the VM erl_crash.dump # Static artifacts - These should be fetched and built inside the Docker image /assets/node_modules/ /priv/static/assets/ /priv/static/cache_manifest.json ================================================ FILE: .formatter.exs ================================================ [ import_deps: [:ecto, :ecto_sql, :phoenix, :open_api_spex], subdirectories: ["priv/*/migrations"], plugins: [], inputs: ["*.{heex,ex,exs}", "{config,lib,test}/**/*.{heex,ex,exs}", "priv/*/*seeds*.exs"], line_length: 120 ] ================================================ FILE: .github/actionlint.yaml ================================================ self-hosted-runner: labels: - blacksmith-4vcpu-ubuntu-2404 - blacksmith-8vcpu-ubuntu-2404 ================================================ FILE: .github/workflows/beacon_tests.yml ================================================ name: Beacon Tests defaults: run: shell: bash working-directory: ./beacon on: pull_request: paths: - "beacon/**" - ".github/workflows/beacon_tests.yml" push: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true env: MIX_ENV: test jobs: tests: name: Tests & Lint runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 - name: Setup elixir id: beam uses: erlef/setup-beam@v1 with: otp-version: 27.x # Define the OTP version [required] elixir-version: 1.18.x # Define the elixir version [required] - name: Cache Mix uses: actions/cache@v5 with: path: | beacon/deps beacon/_build key: ${{ github.workflow }}-${{ runner.os }}-mix-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}-${{ hashFiles('beacon/mix.lock') }} restore-keys: | ${{ github.workflow }}-${{ runner.os }}-mix-${{ steps.beam.outputs.elixir-version }}-${{ steps.beam.outputs.otp-version }}- - name: Install dependencies run: mix deps.get - name: Start epmd run: epmd -daemon - name: Run tests run: MIX_ENV=test mix test env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Check for warnings run: mix compile --force --warnings-as-errors - name: Run format check run: mix format --check-formatted ================================================ FILE: .github/workflows/docker-build.yml ================================================ name: Docker Build on: pull_request: branches: - main jobs: build: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v6 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build Docker image run: docker build . ================================================ FILE: .github/workflows/integration_tests.yml ================================================ name: Integration Tests on: pull_request: paths: - "lib/**" - "test/**" - "config/**" - "priv/**" - "assets/**" - "rel/**" - "mix.exs" - "Dockerfile" - "run.sh" - "docker-compose.test.yml" - ".github/workflows/integration_tests.yml" push: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true env: POSTGRES_IMAGE: supabase/postgres:17.6.1.074 DENO_IMAGE: denoland/deno:alpine-2.5.6 jobs: tests: name: Tests runs-on: blacksmith-8vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 - name: Cache Docker images uses: actions/cache@v5 id: docker-cache with: path: /tmp/docker-images key: docker-images-integration-zstd-${{ env.POSTGRES_IMAGE }}-${{ env.DENO_IMAGE }} - name: Load Docker images from cache if: steps.docker-cache.outputs.cache-hit == 'true' run: | zstd -d --stdout /tmp/docker-images/postgres.tar.zst | docker image load & PID1=$! zstd -d --stdout /tmp/docker-images/deno.tar.zst | docker image load & PID2=$! wait $PID1 || exit $? wait $PID2 || exit $? - name: Pull and save Docker images if: steps.docker-cache.outputs.cache-hit != 'true' run: | docker pull ${{ env.POSTGRES_IMAGE }} & PID1=$! docker pull ${{ env.DENO_IMAGE }} & PID2=$! wait $PID1 || exit $? wait $PID2 || exit $? mkdir -p /tmp/docker-images docker image save ${{ env.POSTGRES_IMAGE }} | zstd -T0 -o /tmp/docker-images/postgres.tar.zst docker image save ${{ env.DENO_IMAGE }} | zstd -T0 -o /tmp/docker-images/deno.tar.zst - name: Run integration test run: docker compose -f docker-compose.tests.yml up --abort-on-container-exit --exit-code-from test-runner ================================================ FILE: .github/workflows/lint.yml ================================================ name: Lint on: pull_request: paths: - "lib/**" - "test/**" - "config/**" - "priv/**" - "assets/**" - "rel/**" - "mix.exs" - "Dockerfile" - "run.sh" - ".github/workflows/lint.yml" push: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: tests: name: Lint runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 - name: Setup elixir id: beam uses: erlef/setup-beam@v1 with: otp-version: 27.x # Define the OTP version [required] elixir-version: 1.18.x # Define the elixir version [required] - name: Cache Mix uses: actions/cache@v5 with: path: | deps _build key: ${{ github.workflow }}-${{ runner.os }}-mix-${{ env.elixir }}-${{ env.otp }}-${{ hashFiles('**/mix.lock') }} restore-keys: | ${{ github.workflow }}-${{ runner.os }}-mix-${{ env.elixir }}-${{ env.otp }}- - name: Install dependencies run: mix deps.get - name: Check for warnings run: mix compile --force --warnings-as-errors - name: Run format check run: mix format --check-formatted - name: Credo checks run: mix credo - name: Run hex audit run: mix hex.audit - name: Run mix_audit run: mix deps.audit - name: Run sobelow run: mix sobelow --config .sobelow-conf - name: Retrieve PLT Cache uses: actions/cache@v5 id: plt-cache with: path: priv/plts key: ${{ runner.os }}-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}-plts-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} - name: Create PLTs if: steps.plt-cache.outputs.cache-hit != 'true' run: | mkdir -p priv/plts mix dialyzer.build - name: Run dialyzer run: mix dialyzer ================================================ FILE: .github/workflows/manual_prod_build.yml ================================================ name: Manual Build Production on: workflow_dispatch: inputs: branch: description: "Branch to run the workflow" required: true docker_tag: description: "Tag to be used by the docker image on push" required: true jobs: docker_x86_release: runs-on: blacksmith-4vcpu-ubuntu-2404 timeout-minutes: 120 env: arch: amd64 outputs: image_digest: ${{ steps.build.outputs.digest }} steps: - id: meta uses: docker/metadata-action@v5 with: images: | supabase/realtime tags: | type=raw,value=v${{ github.event.inputs.docker_tag }}_${{ env.arch }} - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - id: build uses: useblacksmith/build-push-action@v2 with: push: true tags: ${{ steps.meta.outputs.tags }} platforms: linux/${{ env.arch }} docker_arm_release: runs-on: arm-runner timeout-minutes: 120 env: arch: arm64 outputs: image_digest: ${{ steps.build.outputs.digest }} steps: - uses: actions/checkout@v6 - id: meta uses: docker/metadata-action@v5 with: images: | supabase/realtime tags: | type=raw,value=v${{ github.event.inputs.docker_tag }}_${{ env.arch }} - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - id: build uses: useblacksmith/build-push-action@v2 with: context: . push: true tags: ${{ steps.meta.outputs.tags }} platforms: linux/${{ env.arch }} no-cache: true merge_manifest: needs: [docker_x86_release, docker_arm_release] runs-on: blacksmith-4vcpu-ubuntu-2404 permissions: contents: read packages: write id-token: write steps: - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Merge multi-arch manifests for custom output run: | docker buildx imagetools create -t supabase/realtime:v${{ github.event.inputs.docker_tag }} \ supabase/realtime@${{ needs.docker_x86_release.outputs.image_digest }} \ supabase/realtime@${{ needs.docker_arm_release.outputs.image_digest }} - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v5 with: role-to-assume: ${{ secrets.PROD_AWS_ROLE }} aws-region: us-east-1 - name: Login to ECR uses: docker/login-action@v3 with: registry: public.ecr.aws - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Mirror to ECR uses: akhilerm/tag-push-action@v2.2.0 with: src: docker.io/supabase/realtime:v${{ github.event.inputs.docker_tag }} dst: | public.ecr.aws/supabase/realtime:v${{ github.event.inputs.docker_tag }} ghcr.io/supabase/realtime:v${{ github.event.inputs.docker_tag }} ================================================ FILE: .github/workflows/mirror.yml ================================================ name: Mirror Image on: workflow_dispatch: inputs: version: description: "Image tag" required: true type: string jobs: mirror: runs-on: blacksmith-4vcpu-ubuntu-2404 permissions: contents: read packages: write id-token: write steps: - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v5 with: role-to-assume: ${{ secrets.PROD_AWS_ROLE }} aws-region: us-east-1 - uses: docker/login-action@v3 with: registry: public.ecr.aws - uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - uses: akhilerm/tag-push-action@v2.2.0 with: src: docker.io/supabase/realtime:${{ inputs.version }} dst: | public.ecr.aws/supabase/realtime:${{ inputs.version }} ghcr.io/supabase/realtime:${{ inputs.version }} ================================================ FILE: .github/workflows/prod_build.yml ================================================ name: Build Production on: push: branches: - "main" paths: - "lib/**" - "config/**" - "priv/**" - "assets/**" - "rel/**" - "mix.exs" - "Dockerfile" - "run.sh" - ".github/workflows/prod_build.yml" jobs: release: runs-on: blacksmith-4vcpu-ubuntu-2404 outputs: published: ${{ steps.semantic.outputs.new_release_published }} version: ${{ steps.semantic.outputs.new_release_version }} steps: - uses: actions/checkout@v6 with: persist-credentials: false - id: semantic uses: cycjimmy/semantic-release-action@v6 with: semantic_version: 24 extra_plugins: | @semantic-release/exec @semantic-release/git env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN_PROJECT_ACTION }} docker_x86_release: needs: release runs-on: blacksmith-4vcpu-ubuntu-2404 if: needs.release.outputs.published == 'true' timeout-minutes: 120 env: arch: amd64 outputs: image_digest: ${{ steps.build.outputs.digest }} steps: - uses: actions/checkout@v6 with: ref: v${{ needs.release.outputs.version }} - id: meta uses: docker/metadata-action@v5 with: images: | supabase/realtime tags: | type=raw,value=v${{ needs.release.outputs.version }}_${{ env.arch }} type=raw,value=latest_${{ env.arch }} - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - id: build uses: useblacksmith/build-push-action@v2 with: context: . push: true tags: ${{ steps.meta.outputs.tags }} platforms: linux/${{ env.arch }} docker_arm_release: needs: release runs-on: arm-runner if: needs.release.outputs.published == 'true' timeout-minutes: 120 env: arch: arm64 outputs: image_digest: ${{ steps.build.outputs.digest }} steps: - uses: actions/checkout@v6 with: ref: v${{ needs.release.outputs.version }} - id: meta uses: docker/metadata-action@v5 with: images: | supabase/realtime tags: | type=raw,value=v${{ needs.release.outputs.version }}_${{ env.arch }} type=raw,value=latest_${{ env.arch }} - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - id: build uses: useblacksmith/build-push-action@v2 with: context: . push: true tags: ${{ steps.meta.outputs.tags }} platforms: linux/${{ env.arch }} no-cache: true merge_manifest: needs: [release, docker_x86_release, docker_arm_release] runs-on: blacksmith-4vcpu-ubuntu-2404 permissions: contents: read packages: write id-token: write steps: - name: Setup Blacksmith Builder uses: useblacksmith/setup-docker-builder@v1 - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Merge multi-arch manifests for versioned output run: | docker buildx imagetools create -t supabase/realtime:v${{ needs.release.outputs.version }} \ supabase/realtime@${{ needs.docker_x86_release.outputs.image_digest }} \ supabase/realtime@${{ needs.docker_arm_release.outputs.image_digest }} - name: Merge multi-arch manifests for latest output run: | docker buildx imagetools create -t supabase/realtime:latest \ supabase/realtime@${{ needs.docker_x86_release.outputs.image_digest }} \ supabase/realtime@${{ needs.docker_arm_release.outputs.image_digest }} - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v5 with: role-to-assume: ${{ secrets.PROD_AWS_ROLE }} aws-region: us-east-1 - name: Login to ECR uses: docker/login-action@v3 with: registry: public.ecr.aws - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Mirror to ECR uses: akhilerm/tag-push-action@v2.2.0 with: src: docker.io/supabase/realtime:v${{ needs.release.outputs.version }} dst: | public.ecr.aws/supabase/realtime:v${{ needs.release.outputs.version }} ghcr.io/supabase/realtime:v${{ needs.release.outputs.version }} update-branch-name: needs: [release, docker_x86_release, docker_arm_release, merge_manifest] runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - name: Checkout branch uses: actions/checkout@v6 with: ref: refs/heads/main - name: Update branch name run: | git branch -m main releases/v${{ needs.release.outputs.version }} git push origin HEAD:releases/v${{ needs.release.outputs.version }} ================================================ FILE: .github/workflows/prod_linter.yml ================================================ name: Production Formatting Checks on: pull_request: branches: - release jobs: format: name: Formatting Checks runs-on: blacksmith-4vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 - name: Setup elixir id: beam uses: erlef/setup-beam@v1 with: otp-version: 27.x # Define the OTP version [required] elixir-version: 1.18.x # Define the elixir version [required] - name: Cache Mix uses: actions/cache@v5 with: path: deps key: ${{ runner.os }}-mix-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} restore-keys: | ${{ runner.os }}-mix- - name: Install dependencies run: mix deps.get - name: Set up Postgres run: docker compose -f docker-compose.dbs.yml up -d - name: Run database migrations run: mix ecto.migrate - name: Run format check run: mix format --check-formatted - name: Credo checks run: mix credo --strict --mute-exit-status - name: Retrieve PLT Cache uses: actions/cache@v5 id: plt-cache with: path: priv/plts key: ${{ runner.os }}-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}-plts-${{ hashFiles(format('{0}{1}', github.workspace, '/mix.lock')) }} - name: Create PLTs if: steps.plt-cache.outputs.cache-hit != 'true' run: | mkdir -p priv/plts mix dialyzer.build - name: Run dialyzer run: mix dialyzer - name: Run tests run: mix test ================================================ FILE: .github/workflows/tests.yml ================================================ name: Tests on: pull_request: paths: - "lib/**" - "test/**" - "config/**" - "priv/**" - "assets/**" - "rel/**" - "native/**" - "mix.exs" - "Dockerfile" - "run.sh" - ".github/workflows/tests.yml" push: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true env: MIX_ENV: test POSTGRES_IMAGE: supabase/postgres:17.6.1.074 jobs: tests: name: Tests (Partition ${{ matrix.partition }}) runs-on: blacksmith-8vcpu-ubuntu-2404 strategy: fail-fast: false matrix: partition: [1, 2, 3, 4] steps: - uses: actions/checkout@v6 - name: Setup elixir id: beam uses: erlef/setup-beam@v1 with: otp-version: 27.x # Define the OTP version [required] elixir-version: 1.18.x # Define the elixir version [required] - name: Cache Mix uses: actions/cache@v5 with: path: | deps _build priv/native key: ${{ github.workflow }}-${{ runner.os }}-mix-${{ env.elixir }}-${{ env.otp }}-${{ hashFiles('**/mix.lock') }} restore-keys: | ${{ github.workflow }}-${{ runner.os }}-mix-${{ env.elixir }}-${{ env.otp }}- - name: Cache Docker images uses: actions/cache@v5 id: docker-cache with: path: /tmp/docker-images key: docker-images-zstd-${{ env.POSTGRES_IMAGE }} - name: Load Docker images from cache if: steps.docker-cache.outputs.cache-hit == 'true' run: zstd -d --stdout /tmp/docker-images/postgres.tar.zst | docker image load - name: Pull and save Docker images if: steps.docker-cache.outputs.cache-hit != 'true' run: | docker pull ${{ env.POSTGRES_IMAGE }} mkdir -p /tmp/docker-images docker image save ${{ env.POSTGRES_IMAGE }} | zstd -T0 -o /tmp/docker-images/postgres.tar.zst - name: Install dependencies run: mix deps.get - name: Set up Postgres run: docker compose -f docker-compose.dbs.yml up -d - name: Start epmd run: epmd -daemon - name: Run tests run: MIX_TEST_PARTITION=${{ matrix.partition }} mix coveralls.lcov --partitions 4 - name: Upload coverage artifact uses: actions/upload-artifact@v4 with: name: coverage-partition-${{ matrix.partition }} path: cover/lcov.info coverage: name: Merge Coverage needs: tests if: ${{ needs.tests.result == 'success' }} runs-on: blacksmith-8vcpu-ubuntu-2404 steps: - uses: actions/checkout@v6 - name: Download all coverage artifacts uses: actions/download-artifact@v4 with: pattern: coverage-partition-* path: coverage - name: Upload merged coverage to Coveralls uses: coverallsapp/github-action@v2 with: github-token: ${{ secrets.GITHUB_TOKEN }} files: coverage/coverage-partition-1/lcov.info coverage/coverage-partition-2/lcov.info coverage/coverage-partition-3/lcov.info coverage/coverage-partition-4/lcov.info ================================================ FILE: .github/workflows/update-supabase-js.yml ================================================ name: Update @supabase/supabase-js on: workflow_dispatch: inputs: version: description: "Version to update to" required: true type: string source: description: "Source of the update" required: false type: string default: "manual" permissions: pull-requests: read contents: read jobs: update-supabase-js: runs-on: ubuntu-latest concurrency: group: ${{ github.workflow }}-supabase-update-${{ inputs.version }} cancel-in-progress: false steps: - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 with: ref: ${{ github.event.repository.default_branch }} - name: Setup Node.js uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 with: node-version: "20" cache: "npm" cache-dependency-path: assets/package-lock.json - name: Update @supabase/supabase-js working-directory: assets run: | npm pkg set "dependencies.@supabase/supabase-js=${{ inputs.version }}" npm install --package-lock-only --ignore-scripts - name: Generate token id: app-token uses: actions/create-github-app-token@29824e69f54612133e76f7eaac726eef6c875baf # v2.2.1 with: app-id: ${{ secrets.GH_AUTOFIX_APP_ID }} private-key: ${{ secrets.GH_AUTOFIX_PRIVATE_KEY }} - name: Create pull request uses: peter-evans/create-pull-request@c5a7806660adbe173f04e3e038b0ccdcd758773c # v6.1.0 with: token: ${{ steps.app-token.outputs.token }} commit-message: "chore: update @supabase/supabase-js to v${{ inputs.version }}" title: "chore: update @supabase/supabase-js to v${{ inputs.version }}" body: | This PR updates `@supabase/supabase-js` to v${{ inputs.version }}. **Source**: ${{ inputs.source }} This PR was created automatically. branch: "gha/auto-update-supabase-js-v${{ inputs.version }}" base: ${{ github.event.repository.default_branch }} ================================================ FILE: .gitignore ================================================ # The directory Mix will write compiled artifacts to. /_build/ # If you run "mix test --cover", coverage assets end up here. /cover/ # The directory Mix downloads your dependencies sources to. /deps/ # Where 3rd-party dependencies like ExDoc output generated docs. /doc/ # Ignore .fetch files in case you like to edit your project deps locally. /.fetch # If the VM crashes, it generates a dump, let's ignore it too. erl_crash.dump # Also ignore archive artifacts (built via "mix archive.build"). *.ez # Ignore package tarball (built via "mix hex.build"). realtime-*.tar # Ignore assets that are produced by build tools. /priv/static/assets/ # Ignore Dialyzer .plt /priv/plts/* node_modules .supabase config/prod.secret.exs demo/.env .lexical .vscode ================================================ FILE: .releaserc ================================================ { "branches": ["main"], "plugins": [ "@semantic-release/commit-analyzer", "@semantic-release/release-notes-generator", [ "@semantic-release/exec", { "prepareCmd": "sed -i 's/version: \"[^\"]*\"/version: \"${nextRelease.version}\"/' mix.exs" } ], [ "@semantic-release/git", { "assets": ["mix.exs"], "message": "chore(release): ${nextRelease.version} [skip ci]" } ], "@semantic-release/github" ] } ================================================ FILE: .sobelow-conf ================================================ [ verbose: true, private: false, skip: false, router: nil, exit: :low, format: "txt", out: nil, threshold: :medium, ignore: ["Config.CSP", "Config.HTTPS"], ignore_files: [], version: false ] ================================================ FILE: .tool-versions ================================================ elixir 1.18.4-otp-27 nodejs 24 erlang 27 ================================================ FILE: Dockerfile ================================================ ARG ELIXIR_VERSION=1.18 ARG OTP_VERSION=27.3 ARG DEBIAN_VERSION=bookworm-20250929-slim ARG BUILDER_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" ARG RUNNER_IMAGE="debian:${DEBIAN_VERSION}" FROM ${BUILDER_IMAGE} AS builder ENV MIX_ENV="prod" RUN apt-get update -y \ && apt-get install curl -y \ && apt-get install -y build-essential git \ && apt-get clean RUN set -uex; \ apt-get update; \ apt-get install -y ca-certificates curl gnupg; \ mkdir -p /etc/apt/keyrings; \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg; \ NODE_MAJOR=24; \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" \ > /etc/apt/sources.list.d/nodesource.list; \ apt-get -qy update; \ apt-get -qy install nodejs; # prepare build dir WORKDIR /app # install hex + rebar RUN mix local.hex --force && \ mix local.rebar --force # install mix dependencies COPY mix.exs mix.lock ./ COPY beacon beacon RUN mix deps.get --only $MIX_ENV RUN mkdir config # copy compile-time config files before we compile dependencies # to ensure any relevant config change will trigger the dependencies # to be re-compiled. COPY config/config.exs config/${MIX_ENV}.exs config/ RUN mix deps.compile COPY priv priv COPY lib lib COPY assets assets # compile assets with esbuild and npm RUN cd assets \ && npm install \ && cd .. \ && mix assets.deploy # Compile the release RUN mix compile # Changes to config/runtime.exs don't require recompiling the code COPY config/runtime.exs config/ COPY rel rel RUN mix release # start a new build stage so that the final image will only contain # the compiled release and other runtime necessities FROM ${RUNNER_IMAGE} ARG SLOT_NAME_SUFFIX ENV SLOT_NAME_SUFFIX="${SLOT_NAME_SUFFIX}" \ LANG="en_US.UTF-8" \ LANGUAGE="en_US:en" \ LC_ALL="en_US.UTF-8" \ MIX_ENV="prod" \ ECTO_IPV6="true" \ ERL_AFLAGS="-proto_dist inet6_tcp" RUN apt-get update -y && \ apt-get install -y libstdc++6 openssl libncurses5 locales iptables sudo tini curl awscli jq && \ apt-get clean && rm -f /var/lib/apt/lists/*_* # Set the locale RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen WORKDIR "/app" RUN chown nobody /app COPY --from=builder --chown=nobody:root /app/_build/${MIX_ENV}/rel/realtime ./ COPY run.sh run.sh RUN ls -la /app ENTRYPOINT ["/usr/bin/tini", "-s", "-g", "--", "/app/run.sh"] CMD ["/app/bin/server"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 Supabase Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ CLUSTER_STRATEGIES ?= EPMD NODE_NAME ?= pink PORT ?= 4000 .PHONY: dev dev.orange seed prod bench.% dev_db start start.% stop stop.% rebuild rebuild.% .DEFAULT_GOAL := help # Common commands dev: ## Start a dev server ELIXIR_ERL_OPTIONS="+hmax 1000000000" SLOT_NAME_SUFFIX=some_sha PORT=$(PORT) MIX_ENV=dev SECURE_CHANNELS=true API_JWT_SECRET=dev METRICS_JWT_SECRET=dev REGION=us-east-1 DB_ENC_KEY="1234567890123456" CLUSTER_STRATEGIES=$(CLUSTER_STRATEGIES) ERL_AFLAGS="-kernel shell_history enabled" GEN_RPC_TCP_SERVER_PORT=5369 GEN_RPC_TCP_CLIENT_PORT=5469 iex --name $(NODE_NAME)@127.0.0.1 --cookie cookie -S mix phx.server dev.orange: ## Start another dev server (orange) on port 4001 ELIXIR_ERL_OPTIONS="+hmax 1000000000" SLOT_NAME_SUFFIX=some_sha PORT=4001 MIX_ENV=dev SECURE_CHANNELS=true API_JWT_SECRET=dev METRICS_JWT_SECRET=dev REGION=eu-west-1 DB_ENC_KEY="1234567890123456" CLUSTER_STRATEGIES=$(CLUSTER_STRATEGIES) ERL_AFLAGS="-kernel shell_history enabled" GEN_RPC_TCP_SERVER_PORT=5469 GEN_RPC_TCP_CLIENT_PORT=5369 iex --name orange@127.0.0.1 --cookie cookie -S mix phx.server seed: ## Seed the database DB_ENC_KEY="1234567890123456" FLY_ALLOC_ID=123e4567-e89b-12d3-a456-426614174000 mix run priv/repo/dev_seeds.exs prod: ## Start a server with a MIX_ENV=prod ELIXIR_ERL_OPTIONS="+hmax 1000000000" SLOT_NAME_SUFFIX=some_sha MIX_ENV=prod FLY_APP_NAME=realtime-local API_KEY=dev SECURE_CHANNELS=true API_JWT_SECRET=dev METRICS_JWT_SECRET=dev FLY_REGION=fra FLY_ALLOC_ID=123e4567-e89b-12d3-a456-426614174000 DB_ENC_KEY="1234567890123456" SECRET_KEY_BASE=M+55t7f6L9VWyhH03R5N7cIhrdRlZaMDfTE6Udz0eZS7gCbnoLQ8PImxwhEyao6D DASHBOARD_USER=realtime_local DASHBOARD_PASSWORD=password ERL_AFLAGS="-kernel shell_history enabled" iex -S mix phx.server bench.%: ## Run benchmark with a specific file. e.g. bench.secrets ELIXIR_ERL_OPTIONS="+hmax 1000000000" SLOT_NAME_SUFFIX=some_sha MIX_ENV=dev SECURE_CHANNELS=true API_JWT_SECRET=dev METRICS_JWT_SECRET=dev FLY_REGION=fra FLY_ALLOC_ID=123e4567-e89b-12d3-a456-426614174000 DB_ENC_KEY="1234567890123456" ERL_AFLAGS="-kernel shell_history enabled" mix run bench/$* dev_db: ## Start dev databases using docker docker-compose -f docker-compose.dbs.yml up -d && mix ecto.migrate --log-migrator-sql # Docker specific commands start: ## Start main docker compose docker-compose up start.%: ## Start docker compose with a specific file. e.g. start.dbs docker-compose -f docker-compose.$*.yml up stop: ## Stop main docker compose docker-compose down --remove-orphans stop.%: ## Stop docker compose with a specific file. e.g. stop.dbs docker-compose -f docker-compose.yml -f docker-compose.$*.yml down --remove-orphans rebuild: ## Rebuild main docker compose images make stop docker-compose build docker-compose up --force-recreate --build rebuild.%: ## Rebuild docker compose images with a specific file. e.g. rebuild.dbs make stop.$* docker-compose -f docker-compose.yml -f docker-compose.$*.yml build docker-compose -f docker-compose.yml -f docker-compose.$*.yml up --force-recreate --build # Based on https://gist.github.com/prwhite/8168133 .DEFAULT_GOAL:=help .PHONY: help help: ## Display this help $(info Realtime commands) @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[%.a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) ================================================ FILE: README.md ================================================

Supabase Logo

Supabase Realtime

Send ephemeral messages, track and synchronize shared state, and listen to Postgres changes all over WebSockets.
Multiplayer Demo · Request Feature · Report Bug

## Status [![GitHub License](https://img.shields.io/github/license/supabase/realtime)](https://github.com/supabase/realtime/blob/main/LICENSE) [![Coverage Status](https://coveralls.io/repos/github/supabase/realtime/badge.svg?branch=main)](https://coveralls.io/github/supabase/realtime?branch=main) | Features | v1 | v2 | Status | | ---------------- | --- | --- | ------ | | Postgres Changes | ✔ | ✔ | GA | | Broadcast | | ✔ | GA | | Presence | | ✔ | GA | This repository focuses on version 2 but you can still access the previous version's [code](https://github.com/supabase/realtime/tree/v1) and [Docker image](https://hub.docker.com/layers/supabase/realtime/v1.0.0/images/sha256-e2766e0e3b0d03f7e9aa1b238286245697d0892c2f6f192fd2995dca32a4446a). For the latest Docker images go to https://hub.docker.com/r/supabase/realtime. The codebase is under heavy development and the documentation is constantly evolving. Give it a try and let us know what you think by creating an issue. Watch [releases](https://github.com/supabase/realtime/releases) of this repo to get notified of updates. And give us a star if you like it! ## Overview ### What is this? This is a server built with Elixir using the [Phoenix Framework](https://www.phoenixframework.org) that enables the following functionality: - Broadcast: Send ephemeral messages from client to clients with low latency. - Presence: Track and synchronize shared state between clients. - Postgres Changes: Listen to Postgres database changes and send them to authorized clients. For a more detailed overview head over to [Realtime guides](https://supabase.com/docs/guides/realtime). ### Does this server guarantee message delivery? The server does not guarantee that every message will be delivered to your clients so keep that in mind as you're using Realtime. ## Quick start You can check out the [Supabase UI Library](https://supabase.com/ui) Realtime components and the [multiplayer.dev](https://multiplayer.dev) demo app source code [here](https://github.com/supabase/multiplayer.dev) ## Client libraries - [JavaScript](https://github.com/supabase/supabase-js/tree/master/packages/core/realtime-js) - [Flutter/Dart](https://github.com/supabase/supabase-flutter/tree/main/packages/realtime_client) - [Python](https://github.com/supabase/supabase-py/tree/main/src/realtime) - [Swift](https://github.com/supabase/supabase-swift/tree/main/Sources/Realtime) ## Server Setup To get started, spin up your Postgres database and Realtime server containers defined in `docker-compose.yml`. As an example, you may run `docker-compose -f docker-compose.yml up`. > **Note** > Supabase runs Realtime in production with a separate database that keeps track of all tenants. However, a schema, `_realtime`, is created when spinning up containers via `docker-compose.yml` to simplify local development. A tenant has already been added on your behalf. You can confirm this by checking the `_realtime.tenants` and `_realtime.extensions` tables inside the database. You can add your own by making a `POST` request to the server. You must change both `name` and `external_id` while you may update other values as you see fit: ```bash curl -X POST \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiIiLCJpYXQiOjE2NzEyMzc4NzMsImV4cCI6MTcwMjc3Mzk5MywiYXVkIjoiIiwic3ViIjoiIn0._ARixa2KFUVsKBf3UGR90qKLCpGjxhKcXY4akVbmeNQ' \ -d $'{ "tenant" : { "name": "realtime-dev", "external_id": "realtime-dev", "jwt_secret": "a1d99c8b-91b6-47b2-8f3c-aa7d9a9ad20f", "extensions": [ { "type": "postgres_cdc_rls", "settings": { "db_name": "postgres", "db_host": "host.docker.internal", "db_user": "postgres", "db_password": "postgres", "db_port": "5432", "region": "us-west-1", "poll_interval_ms": 100, "poll_max_record_bytes": 1048576, "ssl_enforced": false } } ] } }' \ http://localhost:4000/api/tenants ``` > **Note** > The `Authorization` token is signed with the secret set by `API_JWT_SECRET` in `docker-compose.yml`. If you want to listen to Postgres changes, you can create a table and then add the table to the `supabase_realtime` publication: ```sql create table test ( id serial primary key ); alter publication supabase_realtime add table test; ``` You can start playing around with Broadcast, Presence, and Postgres Changes features either with the client libs (e.g. `@supabase/realtime-js`), or use the built in Realtime Inspector on localhost, `http://localhost:4000/inspector/new` (make sure the port is correct for your development environment). The WebSocket URL must contain the subdomain, `external_id` of the tenant on the `_realtime.tenants` table, and the token must be signed with the `jwt_secret` that was inserted along with the tenant. If you're using the default tenant, the URL is `ws://realtime-dev.localhost:4000/socket` (make sure the port is correct for your development environment), and you can use `eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDMwMjgwODcsInJvbGUiOiJwb3N0Z3JlcyJ9.tz_XJ89gd6bN8MBpCl7afvPrZiBH6RB65iA1FadPT3Y` for the token. The token must have `exp` and `role` (database role) keys. **Environment Variables** | Variable | Type | Description | | ----------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | PORT | number | Port which you can connect your client/listeners | | DB_HOST | string | Database host URL | | DB_PORT | number | Database port | | DB_USER | string | Database user | | DB_PASSWORD | string | Database password | | DB_NAME | string | Postgres database name | | DB_ENC_KEY | string | Key used to encrypt sensitive fields in \_realtime.tenants and \_realtime.extensions tables. Recommended: 16 characters. | | DB_AFTER_CONNECT_QUERY | string | Query that is run after server connects to database. | | DB_IP_VERSION | string | Sets the IP Version to be used. Allowed values are "ipv6" and "ipv4". If none are set we will try to infer the correct version | | DB_SSL | boolean | Whether or not the connection will be set-up using SSL | | DB_SSL_CA_CERT | string | Filepath to a CA trust store (e.g.: /etc/cacert.pem). If defined it enables server certificate verification | | API_JWT_SECRET | string | Secret that is used to sign tokens used to manage tenants and their extensions via HTTP requests. | | SECRET_KEY_BASE | string | Secret used by the server to sign cookies. Recommended: 64 characters. | | ERL_AFLAGS | string | Set to either "-proto_dist inet_tcp" or "-proto_dist inet6_tcp" depending on whether or not your network uses IPv4 or IPv6, respectively. | | APP_NAME | string | A name of the server. | | DNS_NODES | string | Node name used when running server in a cluster. | | MAX_CONNECTIONS | string | Set the soft maximum for WebSocket connections. Defaults to '16384'. | | MAX_HEADER_LENGTH | string | Set the maximum header length for connections (in bytes). Defaults to '4096'. | | NUM_ACCEPTORS | string | Set the number of server processes that will relay incoming WebSocket connection requests. Defaults to '100'. | | DB_QUEUE_TARGET | string | Maximum time to wait for a connection from the pool. Defaults to '5000' or 5 seconds. See for more info: [DBConnection](https://hexdocs.pm/db_connection/DBConnection.html#start_link/2-queue-config). | | DB_QUEUE_INTERVAL | string | Interval to wait to check if all connections were checked out under DB_QUEUE_TARGET. If all connections surpassed the target during this interval than the target is doubled. Defaults to '5000' or 5 seconds. See for more info: [DBConnection](https://hexdocs.pm/db_connection/DBConnection.html#start_link/2-queue-config). | | DB_POOL_SIZE | string | Sets the number of connections in the database pool. Defaults to '5'. | | DB_REPLICA_HOST | string | Hostname for the replica database. If set, enables the main replica connection pool. | | DB_REPLICA_POOL_SIZE | string | Sets the number of connections in the replica database pool. Defaults to '5'. | | SLOT_NAME_SUFFIX | string | This is appended to the replication slot which allows making a custom slot name. May contain lowercase letters, numbers, and the underscore character. Together with the default `supabase_realtime_replication_slot`, slot name should be up to 64 characters long. | | TENANT_CACHE_EXPIRATION_IN_MS | string | Set tenant cache TTL in milliseconds | | TENANT_MAX_BYTES_PER_SECOND | string | The default value of maximum bytes per second that each tenant can support, used when creating a tenant for the first time. Defaults to '100_000'. | | TENANT_MAX_CHANNELS_PER_CLIENT | string | The default value of maximum number of channels each tenant can support, used when creating a tenant for the first time. Defaults to '100'. | | TENANT_MAX_CONCURRENT_USERS | string | The default value of maximum concurrent users per channel that each tenant can support, used when creating a tenant for the first time. Defaults to '200'. | | TENANT_MAX_EVENTS_PER_SECOND | string | The default value of maximum events per second that each tenant can support, used when creating a tenant for the first time. Defaults to '100'. | | TENANT_MAX_JOINS_PER_SECOND | string | The default value of maximum channel joins per second that each tenant can support, used when creating a tenant for the first time. Defaults to '100'. | | CLIENT_PRESENCE_MAX_CALLS | number | Maximum number of presence calls allowed per client (per WebSocket connection) within the time window. Defaults to '5'. | | CLIENT_PRESENCE_WINDOW_MS | number | Time window in milliseconds for per-client presence rate limiting. Defaults to '30000' (30 seconds). | | SEED_SELF_HOST | boolean | Seeds the system with default tenant | | SELF_HOST_TENANT_NAME | string | Tenant reference to be used for self host. Do keep in mind to use a URL compatible name | | LOG_LEVEL | string | Sets log level for Realtime logs. Defaults to info, supported levels are: info, emergency, alert, critical, error, warning, notice, debug | | DISABLE_HEALTHCHECK_LOGGING | boolean | Disables request logging for healthcheck endpoints (/healthcheck and /api/tenants/:tenant_id/health). Defaults to false. | | RUN_JANITOR | boolean | Do you want to janitor tasks to run | | JANITOR_SCHEDULE_TIMER_IN_MS | number | Time in ms to run the janitor task | | JANITOR_SCHEDULE_RANDOMIZE | boolean | Adds a randomized value of minutes to the timer | | JANITOR_RUN_AFTER_IN_MS | number | Tells system when to start janitor tasks after boot | | JANITOR_CLEANUP_MAX_CHILDREN | number | Maximum number of concurrent tasks working on janitor cleanup | | JANITOR_CLEANUP_CHILDREN_TIMEOUT | number | Timeout for each async task for janitor cleanup | | JANITOR_CHUNK_SIZE | number | Number of tenants to process per chunk. Each chunk will be processed by a Task | | MIGRATION_PARTITION_SLOTS | number | Number of dynamic supervisor partitions used by the migrations process | | CONNECT_PARTITION_SLOTS | number | Number of dynamic supervisor partitions used by the Connect, ReplicationConnect processes | | METRICS_CLEANER_SCHEDULE_TIMER_IN_MS | number | Time in ms to run the Metric Cleaner task | | METRICS_RPC_TIMEOUT_IN_MS | number | Time in ms to wait for RPC call to fetch Metric per node | | WEBSOCKET_MAX_HEAP_SIZE | number | Max number of bytes to be allocated as heap for the WebSocket transport process. If the limit is reached the process is brutally killed. Defaults to 50MB. | | REQUEST_ID_BAGGAGE_KEY | string | OTEL Baggage key to be used as request id | | OTEL_SDK_DISABLED | boolean | Disable OpenTelemetry tracing completely when 'true' | | OTEL_TRACES_EXPORTER | string | Possible values: `otlp` or `none`. See [https://github.com/open-telemetry/opentelemetry-erlang/tree/v1.4.0/apps#os-environment] for more details on how to configure the traces exporter. | | OTEL_TRACES_SAMPLER | string | Default to `parentbased_always_on` . More info [here](https://opentelemetry.io/docs/languages/erlang/sampling/#environment-variables) | | GEN_RPC_TCP_SERVER_PORT | number | Port served by `gen_rpc`. Must be secured just like the Erlang distribution port. Defaults to 5369 | | GEN_RPC_TCP_CLIENT_PORT | number | `gen_rpc` connects to another node using this port. Most of the time it should be the same as GEN_RPC_TCP_SERVER_PORT. Defaults to 5369 | | GEN_RPC_SSL_SERVER_PORT | number | Port served by `gen_rpc` secured with TLS. Must also define GEN_RPC_CERTFILE, GEN_RPC_KEYFILE and GEN_RPC_CACERTFILE. If this is defined then only TLS connections will be set-up. | | GEN_RPC_SSL_CLIENT_PORT | number | `gen_rpc` connects to another node using this port. Most of the time it should be the same as GEN_RPC_SSL_SERVER_PORT. Defaults to 6369 | | GEN_RPC_CERTFILE | string | Path to the public key in PEM format. Only needs to be provided if GEN_RPC_SSL_SERVER_PORT is defined | | GEN_RPC_KEYFILE | string | Path to the private key in PEM format. Only needs to be provided if GEN_RPC_SSL_SERVER_PORT is defined | | GEN_RPC_CACERTFILE | string | Path to the certificate authority public key in PEM format. Only needs to be provided if GEN_RPC_SSL_SERVER_PORT is defined | | GEN_RPC_CONNECT_TIMEOUT_IN_MS | number | `gen_rpc` client connect timeout in milliseconds. Defaults to 10000. | | GEN_RPC_SEND_TIMEOUT_IN_MS | number | `gen_rpc` client and server send timeout in milliseconds. Defaults to 10000. | | GEN_RPC_SOCKET_IP | string | Interface which `gen_rpc` will bind to. Defaults to "0.0.0.0" (ipv4) which means that all interfaces are going to expose the `gen_rpc` port. | | GEN_RPC_IPV6_ONLY | boolean | Configure `gen_rpc` to use IPv6 only. | | GEN_RPC_MAX_BATCH_SIZE | integer | Configure `gen_rpc` to batch when possible RPC casts. Defaults to 0 | | GEN_RPC_COMPRESS | integer | Configure `gen_rpc` to compress or not payloads. 0 means no compression and 9 max compression level. Defaults to 0. | | GEN_RPC_COMPRESSION_THRESHOLD_IN_BYTES | integer | Configure `gen_rpc` to compress only above a certain threshold in bytes. Defaults to 1000. | | MAX_GEN_RPC_CLIENTS | number | Max amount of `gen_rpc` TCP connections per node-to-node channel | | REBALANCE_CHECK_INTERVAL_IN_MS | number | Time in ms to check if process is in the right region | | NODE_BALANCE_UPTIME_THRESHOLD_IN_MS | number | Minimum node uptime in ms before using load-aware node picker. Nodes below this threshold use random selection as their metrics are not yet reliable. Defaults to 5 minutes. | | DISCONNECT_SOCKET_ON_NO_CHANNELS_INTERVAL_IN_MS | number | Time in ms to check if a socket has no channels open and if so, disconnect it | | BROADCAST_POOL_SIZE | number | Number of processes to relay Phoenix.PubSub messages across the cluster | | PRESENCE_POOL_SIZE | number | Number of tracker processes for Presence feature. Defaults to 10. Higher values improve concurrency for presence tracking across many channels. | | PRESENCE_BROADCAST_PERIOD_IN_MS | number | Interval in milliseconds to send presence delta broadcasts across the cluster. Defaults to 1500 (1.5 seconds). Lower values increase network traffic but reduce presence sync latency. | | PRESENCE_PERMDOWN_PERIOD_IN_MS | number | Interval in milliseconds to flag a replica as permanently down and discard its state. Defaults to 1200000 (20 minutes). Must be greater than down_period. Higher values are more forgiving of temporary network issues but slower to clean up truly dead replicas. | | POSTGRES_CDC_SCOPE_SHARDS | number | Number of dynamic supervisor partitions used by the Postgres CDC extension. Defaults to 5. | | USERS_SCOPE_SHARDS | number | Number of dynamic supervisor partitions used by the Users extension. Defaults to 5. | | REGION_MAPPING | string | Custom mapping of platform regions to tenant regions. Must be a valid JSON object with string keys and values (e.g., `{"custom-region-1": "us-east-1", "eu-north-1": "eu-west-2"}`). If not provided, uses the default hardcoded region mapping. When set, only the specified mappings are used (no fallback to defaults). | | METRICS_PUSHER_ENABLED | boolean | Enable periodic push of Prometheus metrics. Defaults to 'false'. Requires METRICS_PUSHER_URL to be set. | | METRICS_PUSHER_URL | string | Full URL endpoint to push metrics using Prometheus exposition format (e.g., 'https://example.com/api/v1/import/prometheus'). Required when METRICS_PUSHER_ENABLED is 'true'. | | METRICS_PUSHER_USER | string | Username for Basic auth (RFC 7617) on metrics pushes. Defaults to 'realtime'. Used together with METRICS_PUSHER_AUTH to form the Authorization header as `Basic Base64("user:password")`. | | METRICS_PUSHER_AUTH | string | Password for Basic auth (RFC 7617) on metrics pushes. Used together with METRICS_PUSHER_USER to form the Authorization header as `Basic Base64("user:password")`. If not set, requests will be sent without authorization. Keep this secret if used. | | METRICS_PUSHER_INTERVAL_MS | number | Interval in milliseconds between metrics pushes. Defaults to '30000' (30 seconds). | | METRICS_PUSHER_TIMEOUT_MS | number | HTTP request timeout in milliseconds for metrics push operations. Defaults to '15000' (15 seconds). | | METRICS_PUSHER_COMPRESS | boolean | Enable gzip compression for metrics payloads. Defaults to 'true'. | | DASHBOARD_AUTH | string | Authentication method for the admin dashboard (`/admin`). Accepted values: `basic_auth` (default) or `zta`. When `basic_auth`, `DASHBOARD_USER` and `DASHBOARD_PASSWORD` are required. When `zta`, `CF_TEAM_DOMAIN` is required. | | DASHBOARD_USER | string | Username for admin dashboard basic auth. Required when `DASHBOARD_AUTH` is `basic_auth`. | | DASHBOARD_PASSWORD | string | Password for admin dashboard basic auth. Required when `DASHBOARD_AUTH` is `basic_auth`. | | CF_TEAM_DOMAIN | string | Cloudflare Zero Trust team domain used for ZTA authentication. Required when `DASHBOARD_AUTH` is `zta`. | The OpenTelemetry variables mentioned above are not an exhaustive list of all [supported environment variables](https://opentelemetry.io/docs/languages/sdk-configuration/). ## WebSocket URL The WebSocket URL is in the following format for local development: `ws://[external_id].localhost:4000/socket/websocket` If you're using Supabase's hosted Realtime in production the URL is `wss://[project-ref].supabase.co/realtime/v1/websocket?apikey=[anon-token]&log_level=info&vsn=1.0.0"` ## WebSocket Connection Authorization WebSocket connections are authorized via symmetric JWT verification. Only supports JWTs signed with the following algorithms: - HS256 - HS384 - HS512 Verify JWT claims by setting JWT_CLAIM_VALIDATORS: > e.g. {'iss': 'Issuer', 'nbf': 1610078130} > > Then JWT's "iss" value must equal "Issuer" and "nbf" value must equal 1610078130. **Note:** > JWT expiration is checked automatically. `exp` and `role` (database role) keys are mandatory. **Authorizing Client Connection**: You can pass in the JWT by following the instructions under the Realtime client lib. For example, refer to the **Usage** section in the [@supabase/realtime-js](https://github.com/supabase/realtime-js) client library. ## Error Operational Codes This is the list of operational codes that can help you understand your deployment and your usage. | Code | Description | | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | TopicNameRequired | You are trying to use Realtime without a topic name set | | InvalidJoinPayload | The payload provided to Realtime on connect is invalid | | RealtimeDisabledForConfiguration | The configuration provided to Realtime on connect will not be able to provide you any Postgres Changes | | TenantNotFound | The tenant you are trying to connect to does not exist | | ErrorConnectingToWebsocket | Error when trying to connect to the WebSocket server | | ErrorAuthorizingWebsocket | Error when trying to authorize the WebSocket connection | | TableHasSpacesInName | The table you are trying to listen to has spaces in its name which we are unable to support | | UnableToDeleteTenant | Error when trying to delete a tenant | | UnableToSetPolicies | Error when setting up Authorization Policies | | UnableCheckoutConnection | Error when trying to checkout a connection from the tenant pool | | UnableToSubscribeToPostgres | Error when trying to subscribe to Postgres changes | | ReconnectSubscribeToPostgres | Postgres changes still waiting to be subscribed | | ChannelRateLimitReached | The number of channels you can create has reached its limit | | ConnectionRateLimitReached | The number of connected clients as reached its limit | | ClientJoinRateLimitReached | The rate of joins per second from your clients has reached the channel limits | | DatabaseConnectionRateLimitReached | The rate of attempts to connect to tenants database has reached the limit | | MessagePerSecondRateLimitReached | The rate of messages per second from your clients has reached the channel limits | | RealtimeDisabledForTenant | Realtime has been disabled for the tenant | | UnableToConnectToTenantDatabase | Realtime was not able to connect to the tenant's database | | DatabaseLackOfConnections | Realtime was not able to connect to the tenant's database due to not having enough available connections | | RealtimeNodeDisconnected | Realtime is a distributed application and this means that one the system is unable to communicate with one of the distributed nodes | | MigrationsFailedToRun | Error when running the migrations against the Tenant database that are required by Realtime | | StartReplicationFailed | Error when starting the replication and listening of errors for database broadcasting | | ReplicationConnectionTimeout | Replication connection timed out during initialization | | ReplicationMaxWalSendersReached | Maximum number of WAL senders reached in tenant database, check how to increase this value in this [link](https://supabase.com/docs/guides/database/custom-postgres-config#cli-configurable-settings) | | MigrationCheckFailed | Check to see if we require to run migrations fails | | PartitionCreationFailed | Error when creating partitions for realtime.messages | | ErrorStartingPostgresCDCStream | Error when starting the Postgres CDC stream which is used for Postgres Changes | | UnknownDataProcessed | An unknown data type was processed by the Realtime system | | ErrorStartingPostgresCDC | Error when starting the Postgres CDC extension which is used for Postgres Changes | | ReplicationSlotBeingUsed | The replication slot is being used by another transaction | | PoolingReplicationPreparationError | Error when preparing the replication slot | | PoolingReplicationError | Error when pooling the replication slot | | SubscriptionCleanupFailed | Error when trying to clean up all subscriptions on subscription manager initialization or OID change | | SubscriptionDeletionFailed | Error when trying to delete a subscription for postgres changes | | SubscriptionsCheckerConnectionFailed | Error when the subscriptions checker process fails to connect to the database on startup | | ReplicationPollerConnectionFailed | Error when the replication poller process fails to connect to the database on startup | | SubscriptionManagerConnectionFailed | Error when the subscription manager process fails to connect to the database on startup | | PgStatActivityQueryFailed | Error when querying pg_stat_activity to diagnose a replication slot conflict | | RateCounterError | Error when retrieving the subscription rate counter, falling back to blocking new subscriptions | | UnableToDeletePhantomSubscriptions | Error when trying to delete subscriptions that are no longer being used | | UnableToCheckProcessesOnRemoteNode | Error when trying to check the processes on a remote node | | UnhandledProcessMessage | Unhandled message received by a Realtime process | | UnableToTrackPresence | Error when handling track presence for this socket | | UnknownPresenceEvent | Presence event type not recognized by service | | IncreaseConnectionPool | The number of connections you have set for Realtime are not enough to handle your current use case | | RlsPolicyError | Error on RLS policy used for authorization | | ConnectionInitializing | Database is initializing connection | | DatabaseConnectionIssue | Database had connection issues and connection was not able to be established | | UnableToConnectToProject | Unable to connect to Project database | | InvalidJWTExpiration | JWT exp claim value it's incorrect | | JwtSignatureError | JWT signature was not able to be validated | | MalformedJWT | Token received does not comply with the JWT format | | Unauthorized | Unauthorized access to Realtime channel | | RealtimeRestarting | Realtime is currently restarting | | UnableToProcessListenPayload | Payload sent in NOTIFY operation was JSON parsable | | UnprocessableEntity | Received a HTTP request with a body that was not able to be processed by the endpoint | | InitializingProjectConnection | Connection against Tenant database is still starting | | TimeoutOnRpcCall | RPC request within the Realtime server as timed out. | | ErrorOnRpcCall | Error when calling another realtime node | | ErrorExecutingTransaction | Error executing a database transaction in tenant database | | SynInitializationError | Our framework to syncronize processes has failed to properly startup a connection to the database | | JanitorFailedToDeleteOldMessages | Scheduled task for realtime.message cleanup was unable to run | | UnableToEncodeJson | An error were we are not handling correctly the response to be sent to the end user | | UnableToBroadcastChanges | Error when trying to broadcast database changes to subscribers | | UnexpectedMessageReceived | An unexpected message was received by the replication connection process | | ErrorRunningQuery | Error when running a query against the tenant database | | UnknownError | An unhandled error occurred | | UnknownErrorOnController | An error we are not handling correctly was triggered on a controller | | UnknownErrorOnChannel | An error we are not handling correctly was triggered on a channel | | PresenceRateLimitReached | Limit of presence events reached | | ClientPresenceRateLimitReached | Limit of presence events reached on socket | | UnableToReplayMessages | An error while replaying messages | | JwtSignerError | Failed to generate a JWT signer — check your JWT secret or JWKS configuration | | MalformedWebSocketMessage | Received a WebSocket message that is empty, invalid JSON, or missing required fields (`ref`, `topic`, or `event`). The connection is kept alive but the message is dropped | | UnknownErrorOnWebSocketMessage | An unexpected error occurred while processing an incoming WebSocket message. The connection is kept alive but the message is dropped | ## Observability and Metrics Supabase Realtime exposes comprehensive metrics for monitoring performance, resource usage, and application behavior. These metrics are exposed in Prometheus format and can be scraped by any compatible monitoring system (Victoria Metrics, Prometheus, Grafana Agent, etc.). ### Metrics Endpoints Metrics are split across two endpoints with different priorities, allowing you to configure different scrape intervals in your monitoring system: | Endpoint | Priority | Recommended Scrape Interval | Contents | | ----------------------------- | -------- | --------------------------- | ------------------------------------------------------------------------------------------------ | | `GET /metrics` | **High** | 30s | BEAM/VM, OS, Phoenix, distributed infra, and global aggregated tenant totals (no `tenant` label) | | `GET /tenant-metrics` | **Low** | 60s | Per-tenant labeled metrics (connection counts, channel events, replication, authorization) | | `GET /metrics/:region` | **High** | 30s | Same as `/metrics` scoped to a specific region | | `GET /tenant-metrics/:region` | **Low** | 60s | Same as `/tenant-metrics` scoped to a specific region | All endpoints require a `Bearer` JWT token in the `Authorization` header signed with `METRICS_JWT_SECRET`. **Victoria Metrics scrape configuration example:** ```yaml scrape_configs: - job_name: realtime_global scrape_interval: 30s bearer_token: static_configs: - targets: [":4000"] metrics_path: /metrics - job_name: realtime_tenant scrape_interval: 60s bearer_token: static_configs: - targets: [":4000"] metrics_path: /tenant-metrics ``` ### Metric Scopes Metrics are classified by their scope to help you understand what they measure: - **Per-Tenant**: Metrics tagged with a `tenant` label measure activity scoped to individual tenants. Exposed on `/tenant-metrics`. - **Global Aggregate**: Metrics prefixed with `realtime_channel_global_*` or `realtime_connections_global_*` aggregate tenant data without the `tenant` label, suitable for cluster-wide dashboards. Exposed on `/metrics`. - **Per-Node**: Metrics measure activity on the current Realtime node. Without explicit per-node indication, assume metrics apply to the local node. - **BEAM/Erlang VM**: Metrics prefixed with `beam_*` and `phoenix_*` expose Erlang runtime internals. Exposed on `/metrics`. - **Infrastructure**: Metrics prefixed with `osmon_*`, `gen_rpc_*`, and `dist_*` measure system-level resources and cluster communication. Exposed on `/metrics`. ### Connection & Tenant Metrics These metrics track WebSocket connections and tenant activity across the Realtime cluster. | Metric | Type | Description | Scope | Endpoint | | ----------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------- | ----------------- | | `realtime_tenants_connected` | Gauge | Number of connected tenants per Realtime node. Use this to understand tenant distribution across your cluster and identify load imbalances. | Per-Node | `/metrics` | | `realtime_connections_global_connected` | Gauge | Node total of active WebSocket connections across all tenants. Aggregated without a `tenant` label for cluster-wide dashboards. | Global Aggregate | `/metrics` | | `realtime_connections_global_connected_cluster` | Gauge | Cluster-wide total of active WebSocket connections across all tenants. | Global Aggregate | `/metrics` | | `realtime_connections_connected` | Gauge | Active WebSocket connections that have at least one subscribed channel. Indicates active client engagement with Realtime features. | **Per-Tenant** | `/tenant-metrics` | | `realtime_connections_connected_cluster` | Gauge | Cluster-wide active WebSocket connections for each individual tenant. | **Per-Tenant** | `/tenant-metrics` | | `phoenix_connections_total` | Gauge | Total open connections to the Ranch listener (includes idle connections waiting for data). | Per-Node | `/metrics` | | `phoenix_connections_active` | Gauge | Connections actively processing a WebSocket frame or HTTP request. Divide by `phoenix_connections_max` to get a saturation ratio. | Per-Node | `/metrics` | | `phoenix_connections_max` | Gauge | The configured Ranch connection limit. When `phoenix_connections_total` approaches this the node is saturated and new connections will be queued. | Per-Node | `/metrics` | | `realtime_channel_joins` | Counter | Rate of channel join attempts per second per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_global_joins` | Counter | Global rate of channel join attempts per second across all tenants. | Global Aggregate | `/metrics` | ### Event Metrics These metrics measure the volume and types of events flowing through your Realtime system, segmented by feature type. | Metric | Type | Description | Scope | Endpoint | | ----------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------- | ---------------- | ----------------- | | `realtime_channel_events` | Counter | Broadcast events per second per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_presence_events` | Counter | Presence events per second per tenant. Includes online/offline status updates and custom presence metadata synchronization. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_db_events` | Counter | Postgres Changes events per second per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_global_events` | Counter | Global broadcast events per second across all tenants. Compare against per-tenant values for outlier detection. | Global Aggregate | `/metrics` | | `realtime_channel_global_presence_events` | Counter | Global presence events per second across all tenants. | Global Aggregate | `/metrics` | | `realtime_channel_global_db_events` | Counter | Global Postgres Changes events per second across all tenants. | Global Aggregate | `/metrics` | ### Payload & Traffic Metrics These metrics provide insight into data volume, message sizes, and network I/O characteristics. | Metric | Type | Description | Scope | Endpoint | | -------------------------------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------- | ---------------- | ----------------- | | `realtime_payload_size_bucket` | Histogram | Global payload size distribution across all tenants, tagged by message type. Use for cluster-wide sizing and capacity planning. | Global Aggregate | `/metrics` | | `realtime_tenants_payload_size_bucket` | Histogram | Per-tenant payload size distribution. Use this to identify tenants generating unusually large messages. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_input_bytes` | Counter | Total ingress bytes per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_output_bytes` | Counter | Total egress bytes per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_global_input_bytes` | Counter | Global total ingress bytes across all tenants. | Global Aggregate | `/metrics` | | `realtime_channel_global_output_bytes` | Counter | Global total egress bytes across all tenants. | Global Aggregate | `/metrics` | ### Latency & Performance Metrics These metrics measure end-to-end latency and processing performance across different Realtime operations. | Metric | Type | Description | Scope | Endpoint | | ---------------------------------------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------- | ---------------- | ----------------- | | `realtime_replication_poller_query_duration_bucket` | Histogram | Postgres Changes query latency in milliseconds per tenant. High values may indicate database performance issues. | **Per-Tenant** | `/tenant-metrics` | | `realtime_replication_poller_query_duration_count` | Counter | Number of database polling queries executed per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_tenants_broadcast_from_database_latency_committed_at_bucket` | Histogram | Time from database commit to client broadcast per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_tenants_broadcast_from_database_latency_inserted_at_bucket` | Histogram | Alternative latency using insert timestamp per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_tenants_replay_bucket` | Histogram | Broadcast replay latency per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_global_rpc_bucket` | Histogram | Inter-node RPC call latency distribution, tagged by `success` and `mechanism`. | Global Aggregate | `/metrics` | | `realtime_global_rpc_count` | Counter | Total inter-node RPC calls. Divide failed by total to get error rate. | Global Aggregate | `/metrics` | | `realtime_tenants_read_authorization_check_bucket` | Histogram | RLS policy evaluation time for read operations per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_tenants_read_authorization_check_count` | Counter | Number of read authorization checks per tenant. | **Per-Tenant** | `/tenant-metrics` | | `realtime_tenants_write_authorization_check_bucket` | Histogram | RLS policy evaluation time for write operations per tenant. | **Per-Tenant** | `/tenant-metrics` | | `phoenix_channel_handled_in_duration_milliseconds_bucket` | Histogram | Time for the application to respond to a channel message. High p99 values indicate slow message handlers. | Per-Node | `/metrics` | | `phoenix_socket_connected_duration_milliseconds_bucket` | Histogram | Time to establish a WebSocket socket connection, tagged by `result`/`transport`/`serializer`. | Per-Node | `/metrics` | ### Authorization & Error Metrics These metrics track security policy enforcement and error rates. | Metric | Type | Description | Scope | Endpoint | | ------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | ---------------- | ----------------- | | `realtime_channel_error` | Counter | Unhandled channel errors per tenant. Any non-zero value warrants investigation. | **Per-Tenant** | `/tenant-metrics` | | `realtime_channel_global_error` | Counter | Global unhandled channel error count across all tenants, tagged by error code. | Global Aggregate | `/metrics` | | `phoenix_channel_joined_total` | Counter | WebSocket channel join attempts tagged by `result` (`ok`/`error`) and `transport`. Use `result="error"` rate to detect client or policy issues. | Per-Node | `/metrics` | ### BEAM/Erlang VM Metrics These metrics provide insight into the underlying Erlang runtime that powers Realtime, critical for capacity planning and debugging performance issues. All BEAM/Erlang VM metrics are served from `GET /metrics`. #### Memory Metrics | Metric | Type | Description | | ----------------------------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `beam_memory_allocated_bytes` | Gauge | Total memory allocated by the Erlang VM. Compare this to the container memory limit to ensure you have headroom. Steady increase may indicate a memory leak. | | `beam_memory_atom_total_bytes` | Gauge | Memory used by the atom table. Atoms in Erlang are never garbage collected, so this should remain relatively stable. Unbounded growth indicates a bug creating new atoms. | | `beam_memory_binary_total_bytes` | Gauge | Memory used by binary data (WebSocket payloads, database results). This metric closely correlates with active connection volume and message sizes. | | `beam_memory_code_total_bytes` | Gauge | Memory used by compiled Erlang bytecode. Changes only during code reloads and should remain stable in production. | | `beam_memory_ets_total_bytes` | Gauge | Memory used by ETS (in-memory tables) including channel subscriptions and presence state. Monitor this to understand session storage overhead. | | `beam_memory_processes_total_bytes` | Gauge | Memory used by Erlang processes themselves. Each channel connection and background task consumes memory; this scales with concurrency. | | `beam_memory_persistent_term_total_bytes` | Gauge | Memory used by persistent terms (immutable shared state). Should be minimal and stable in typical Realtime deployments. | #### Process & Resource Metrics | Metric | Type | Description | | -------------------------- | ----- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `beam_stats_process_count` | Gauge | Number of active Erlang processes. Each WebSocket connection spawns processes; high values correlate with connection count. Sudden spikes may indicate process leaks. | | `beam_stats_port_count` | Gauge | Number of open port connections (network sockets, pipes). Should correlate roughly with connection count plus internal cluster communications. | | `beam_stats_ets_count` | Gauge | Number of active ETS tables used for caching and state. Changes reflect dynamic supervisor activity and feature usage patterns. | | `beam_stats_atom_count` | Gauge | Total atoms in the atom table. Should remain relatively stable; unbounded growth indicates code bugs. | #### Performance Metrics | Metric | Type | Description | | -------------------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `beam_stats_uptime_milliseconds_count` | Counter | Node uptime in milliseconds. Use this to track restarts and validate deployment stability. Unexpected resets indicate crashes. | | `beam_stats_port_io_byte_count` | Counter | Total bytes transferred through network ports. Compare ingress and egress to identify asymmetric traffic patterns. | | `beam_stats_gc_count` | Counter | Garbage collection events executed by the Erlang VM. Frequent GC indicates high memory churn; infrequent GC suggests stable state. | | `beam_stats_gc_reclaimed_bytes` | Counter | Bytes reclaimed by garbage collection. Divide by GC count to understand average cleanup size. Low reclaim per GC may indicate inefficient memory allocation patterns. | | `beam_stats_reduction_count` | Counter | Total reductions (work units) executed by the VM. Correlates with CPU usage; high reduction rates under stable load indicate inefficient algorithms. | | `beam_stats_context_switch_count` | Counter | Process context switches by the Erlang scheduler. High values indicate contention between many processes; compare with process count to gauge congestion. | | `beam_stats_active_task_count` | Gauge | Tasks currently executing on dirty schedulers (non-Erlang operations). High values indicate CPU-bound work or blocking I/O. | | `beam_stats_run_queue_count` | Gauge | Processes waiting to be scheduled. High values indicate CPU saturation; the node cannot keep up with work demand. | ### Infrastructure Metrics These metrics expose system-level resource usage and inter-node cluster communication. All infrastructure metrics are served from `GET /metrics`. #### Node Metrics | Metric | Type | Description | | ----------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | | `osmon_cpu_util` | Gauge | Current CPU utilization percentage (0-100). Monitor this to trigger horizontal scaling and identify CPU-bound bottlenecks. | | `osmon_cpu_avg1` | Gauge | 1-minute CPU load average. Sharp increases indicate sudden load spikes; values > CPU count indicate sustained overload. | | `osmon_cpu_avg5` | Gauge | 5-minute CPU load average. Smooths short-term spikes; use this to detect sustained load increases. | | `osmon_cpu_avg15` | Gauge | 15-minute CPU load average. Indicates long-term trends; use for capacity planning and detecting gradual load growth. | | `osmon_ram_usage` | Gauge | RAM utilization percentage (0-100). Combined with `beam_memory_allocated_bytes`, this indicates kernel memory overhead and other processes on the node. | #### Distributed System Metrics | Metric | Type | Description | | ---------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------- | | `gen_rpc_queue_size_bytes` | Gauge | Outbound queue size for gen_rpc inter-node communication in bytes. Large values indicate a receiving node cannot keep up with message rate. | | `gen_rpc_send_pending_bytes` | Gauge | Bytes pending transmission in gen_rpc queues. Combined with queue size, helps identify network saturation or slow receivers. | | `gen_rpc_send_bytes` | Counter | Total bytes sent via gen_rpc across the cluster. Monitor this to understand inter-node traffic and plan network capacity. | | `gen_rpc_recv_bytes` | Counter | Total bytes received via gen_rpc from other nodes. Compare with send bytes to identify asymmetric communication patterns. | | `dist_queue_size` | Gauge | Erlang distribution queue size for cluster communication. High values indicate network congestion or unbalanced load across nodes. | | `dist_send_pending_bytes` | Gauge | Bytes pending in Erlang distribution queues. Works with queue size to diagnose cluster communication issues. | | `dist_send_bytes` | Counter | Total bytes sent via Erlang distribution protocol. Includes all cluster metadata and RPC traffic. | | `dist_recv_bytes` | Counter | Total bytes received via Erlang distribution protocol. Compare with send to validate symmetric communication. | ## License This repo is licensed under Apache 2.0. ## Credits - [Phoenix](https://github.com/phoenixframework/phoenix) - `Realtime` server is built with the amazing Elixir framework. - [Phoenix Channels JavaScript Client](https://github.com/phoenixframework/phoenix/tree/master/assets/js/phoenix) - [@supabase/realtime-js](https://github.com/supabase/realtime-js) client library heavily draws from the Phoenix Channels client library. ================================================ FILE: assets/css/app.css ================================================ @tailwind base; @tailwind components; @tailwind utilities; ================================================ FILE: assets/css/phoenix.css ================================================ /* Includes some default style for the starter application. * This can be safely deleted to start fresh. */ /* Milligram v1.3.0 https://milligram.github.io * Copyright (c) 2017 CJ Patoilo Licensed under the MIT license */ *,*:after,*:before{box-sizing:inherit}html{box-sizing:border-box;font-size:62.5%}body{color:#000000;font-family:'Helvetica', 'Arial', sans-serif;font-size:1.6em;font-weight:300;line-height:1.6}blockquote{border-left:0.3rem solid #d1d1d1;margin-left:0;margin-right:0;padding:1rem 1.5rem}blockquote *:last-child{margin-bottom:0}.button,button,input[type='button'],input[type='reset'],input[type='submit']{background-color:#0069d9;border:0.1rem solid #0069d9;border-radius:.4rem;color:#fff;cursor:pointer;display:inline-block;font-size:1.1rem;font-weight:700;height:3.8rem;letter-spacing:.1rem;line-height:3.8rem;padding:0 3.0rem;text-align:center;text-decoration:none;text-transform:uppercase;white-space:nowrap}.button:focus,.button:hover,button:focus,button:hover,input[type='button']:focus,input[type='button']:hover,input[type='reset']:focus,input[type='reset']:hover,input[type='submit']:focus,input[type='submit']:hover{background-color:#606c76;border-color:#606c76;color:#fff;outline:0}.button[disabled],button[disabled],input[type='button'][disabled],input[type='reset'][disabled],input[type='submit'][disabled]{cursor:default;opacity:.5}.button[disabled]:focus,.button[disabled]:hover,button[disabled]:focus,button[disabled]:hover,input[type='button'][disabled]:focus,input[type='button'][disabled]:hover,input[type='reset'][disabled]:focus,input[type='reset'][disabled]:hover,input[type='submit'][disabled]:focus,input[type='submit'][disabled]:hover{background-color:#0069d9;border-color:#0069d9}.button.button-outline,button.button-outline,input[type='button'].button-outline,input[type='reset'].button-outline,input[type='submit'].button-outline{background-color:transparent;color:#0069d9}.button.button-outline:focus,.button.button-outline:hover,button.button-outline:focus,button.button-outline:hover,input[type='button'].button-outline:focus,input[type='button'].button-outline:hover,input[type='reset'].button-outline:focus,input[type='reset'].button-outline:hover,input[type='submit'].button-outline:focus,input[type='submit'].button-outline:hover{background-color:transparent;border-color:#606c76;color:#606c76}.button.button-outline[disabled]:focus,.button.button-outline[disabled]:hover,button.button-outline[disabled]:focus,button.button-outline[disabled]:hover,input[type='button'].button-outline[disabled]:focus,input[type='button'].button-outline[disabled]:hover,input[type='reset'].button-outline[disabled]:focus,input[type='reset'].button-outline[disabled]:hover,input[type='submit'].button-outline[disabled]:focus,input[type='submit'].button-outline[disabled]:hover{border-color:inherit;color:#0069d9}.button.button-clear,button.button-clear,input[type='button'].button-clear,input[type='reset'].button-clear,input[type='submit'].button-clear{background-color:transparent;border-color:transparent;color:#0069d9}.button.button-clear:focus,.button.button-clear:hover,button.button-clear:focus,button.button-clear:hover,input[type='button'].button-clear:focus,input[type='button'].button-clear:hover,input[type='reset'].button-clear:focus,input[type='reset'].button-clear:hover,input[type='submit'].button-clear:focus,input[type='submit'].button-clear:hover{background-color:transparent;border-color:transparent;color:#606c76}.button.button-clear[disabled]:focus,.button.button-clear[disabled]:hover,button.button-clear[disabled]:focus,button.button-clear[disabled]:hover,input[type='button'].button-clear[disabled]:focus,input[type='button'].button-clear[disabled]:hover,input[type='reset'].button-clear[disabled]:focus,input[type='reset'].button-clear[disabled]:hover,input[type='submit'].button-clear[disabled]:focus,input[type='submit'].button-clear[disabled]:hover{color:#0069d9}code{background:#f4f5f6;border-radius:.4rem;font-size:86%;margin:0 .2rem;padding:.2rem .5rem;white-space:nowrap}pre{background:#f4f5f6;border-left:0.3rem solid #0069d9;overflow-y:hidden}pre>code{border-radius:0;display:block;padding:1rem 1.5rem;white-space:pre}hr{border:0;border-top:0.1rem solid #f4f5f6;margin:3.0rem 0}input[type='email'],input[type='number'],input[type='password'],input[type='search'],input[type='tel'],input[type='text'],input[type='url'],textarea,select{-webkit-appearance:none;-moz-appearance:none;appearance:none;background-color:transparent;border:0.1rem solid #d1d1d1;border-radius:.4rem;box-shadow:none;box-sizing:inherit;height:3.8rem;padding:.6rem 1.0rem;width:100%}input[type='email']:focus,input[type='number']:focus,input[type='password']:focus,input[type='search']:focus,input[type='tel']:focus,input[type='text']:focus,input[type='url']:focus,textarea:focus,select:focus{border-color:#0069d9;outline:0}select{background:url('data:image/svg+xml;utf8,') center right no-repeat;padding-right:3.0rem}select:focus{background-image:url('data:image/svg+xml;utf8,')}textarea{min-height:6.5rem}label,legend{display:block;font-size:1.6rem;font-weight:700;margin-bottom:.5rem}fieldset{border-width:0;padding:0}input[type='checkbox'],input[type='radio']{display:inline}.label-inline{display:inline-block;font-weight:normal;margin-left:.5rem}.row{display:flex;flex-direction:column;padding:0;width:100%}.row.row-no-padding{padding:0}.row.row-no-padding>.column{padding:0}.row.row-wrap{flex-wrap:wrap}.row.row-top{align-items:flex-start}.row.row-bottom{align-items:flex-end}.row.row-center{align-items:center}.row.row-stretch{align-items:stretch}.row.row-baseline{align-items:baseline}.row .column{display:block;flex:1 1 auto;margin-left:0;max-width:100%;width:100%}.row .column.column-offset-10{margin-left:10%}.row .column.column-offset-20{margin-left:20%}.row .column.column-offset-25{margin-left:25%}.row .column.column-offset-33,.row .column.column-offset-34{margin-left:33.3333%}.row .column.column-offset-50{margin-left:50%}.row .column.column-offset-66,.row .column.column-offset-67{margin-left:66.6666%}.row .column.column-offset-75{margin-left:75%}.row .column.column-offset-80{margin-left:80%}.row .column.column-offset-90{margin-left:90%}.row .column.column-10{flex:0 0 10%;max-width:10%}.row .column.column-20{flex:0 0 20%;max-width:20%}.row .column.column-25{flex:0 0 25%;max-width:25%}.row .column.column-33,.row .column.column-34{flex:0 0 33.3333%;max-width:33.3333%}.row .column.column-40{flex:0 0 40%;max-width:40%}.row .column.column-50{flex:0 0 50%;max-width:50%}.row .column.column-60{flex:0 0 60%;max-width:60%}.row .column.column-66,.row .column.column-67{flex:0 0 66.6666%;max-width:66.6666%}.row .column.column-75{flex:0 0 75%;max-width:75%}.row .column.column-80{flex:0 0 80%;max-width:80%}.row .column.column-90{flex:0 0 90%;max-width:90%}.row .column .column-top{align-self:flex-start}.row .column .column-bottom{align-self:flex-end}.row .column .column-center{-ms-grid-row-align:center;align-self:center}@media (min-width: 40rem){.row{flex-direction:row;margin-left:-1.0rem;width:calc(100% + 2.0rem)}.row .column{margin-bottom:inherit;padding:0 1.0rem}}a{color:#0069d9;text-decoration:none}a:focus,a:hover{color:#606c76}dl,ol,ul{list-style:none;margin-top:0;padding-left:0}dl dl,dl ol,dl ul,ol dl,ol ol,ol ul,ul dl,ul ol,ul ul{font-size:90%;margin:1.5rem 0 1.5rem 3.0rem}ol{list-style:decimal inside}ul{list-style:circle inside}.button,button,dd,dt,li{margin-bottom:1.0rem}fieldset,input,select,textarea{margin-bottom:1.5rem}blockquote,dl,figure,form,ol,p,pre,table,ul{margin-bottom:2.5rem}table{border-spacing:0;width:100%}td,th{border-bottom:0.1rem solid #e1e1e1;padding:1.2rem 1.5rem;text-align:left}td:first-child,th:first-child{padding-left:0}td:last-child,th:last-child{padding-right:0}b,strong{font-weight:bold}p{margin-top:0}h1,h2,h3,h4,h5,h6{font-weight:300;letter-spacing:-.1rem;margin-bottom:2.0rem;margin-top:0}h1{font-size:4.6rem;line-height:1.2}h2{font-size:3.6rem;line-height:1.25}h3{font-size:2.8rem;line-height:1.3}h4{font-size:2.2rem;letter-spacing:-.08rem;line-height:1.35}h5{font-size:1.8rem;letter-spacing:-.05rem;line-height:1.5}h6{font-size:1.6rem;letter-spacing:0;line-height:1.4}img{max-width:100%}.clearfix:after{clear:both;content:' ';display:table}.float-left{float:left}.float-right{float:right} /* General style */ h1{font-size: 3.6rem; line-height: 1.25} h2{font-size: 2.8rem; line-height: 1.3} h3{font-size: 2.2rem; letter-spacing: -.08rem; line-height: 1.35} h4{font-size: 1.8rem; letter-spacing: -.05rem; line-height: 1.5} h5{font-size: 1.6rem; letter-spacing: 0; line-height: 1.4} h6{font-size: 1.4rem; letter-spacing: 0; line-height: 1.2} pre{padding: 1em;} .container{ margin: 0 auto; max-width: 80.0rem; padding: 0 2.0rem; position: relative; width: 100% } select { width: auto; } /* Phoenix promo and logo */ .phx-hero { text-align: center; border-bottom: 1px solid #e3e3e3; background: #eee; border-radius: 6px; padding: 3em 3em 1em; margin-bottom: 3rem; font-weight: 200; font-size: 120%; } .phx-hero input { background: #ffffff; } .phx-logo { min-width: 300px; margin: 1rem; display: block; } .phx-logo img { width: auto; display: block; } /* Headers */ header { width: 100%; background: #fdfdfd; border-bottom: 1px solid #eaeaea; margin-bottom: 2rem; } header section { align-items: center; display: flex; flex-direction: column; justify-content: space-between; } header section :first-child { order: 2; } header section :last-child { order: 1; } header nav ul, header nav li { margin: 0; padding: 0; display: block; text-align: right; white-space: nowrap; } header nav ul { margin: 1rem; margin-top: 0; } header nav a { display: block; } @media (min-width: 40.0rem) { /* Small devices (landscape phones, 576px and up) */ header section { flex-direction: row; } header nav ul { margin: 1rem; } .phx-logo { flex-basis: 527px; margin: 2rem 1rem; } } ================================================ FILE: assets/js/app.js ================================================ import "../css/app.css"; import "phoenix_html"; import { Socket } from "phoenix"; import { LiveSocket } from "phoenix_live_view"; import topbar from "../vendor/topbar"; import { createClient } from "@supabase/supabase-js"; // LiveView is managing this page because we have Phoenix running // We're using LiveView to handle the Realtime client via LiveView Hooks const Hooks = {}; Hooks.payload = { initRealtime( channelName, host, log_level, token, schema, table, filter, bearer, enable_presence, enable_db_changes, private_channel ) { // Instantiate our client with the Realtime server and params to connect with const opts = { realtime: { params: { log_level: log_level, }, }, }; this.realtimeSocket = createClient(host, token, opts); if (bearer !== "") { this.realtimeSocket.realtime.setAuth(bearer); } private_channel = private_channel === "true"; // Join the Channel 'any' // Channels can be named anything // All clients on the same Channel will get messages sent to that Channel this.channel = this.realtimeSocket.channel(channelName, { config: { broadcast: { self: true }, private: private_channel, }, }); // Hack to confirm Postgres is subscribed // Need to add 'extension' key in the 'payload' this.channel.on("system", {}, (payload) => { if (payload.extension === "postgres_changes" && payload.status === "ok") { this.pushEventTo("#conn_info", "postgres_subscribed", {}); } const ts = new Date(); const line = ` SYSTEM ${ts.toISOString()} ${JSON.stringify(payload)} `; const list = document.querySelector("#plist"); list.innerHTML = line + list.innerHTML; }); // Listen for all (`*`) `broadcast` events // The event name can by anything // Match on specific event names to filter for only those types of events and do something with them this.channel.on("broadcast", { event: "*" }, (payload) => { const ts = new Date(); const line = ` BROADCAST ${ts.toISOString()} ${JSON.stringify(payload)} `; const list = document.querySelector("#plist"); list.innerHTML = line + list.innerHTML; }); // Listen for all (`*`) `presence` events if (enable_presence === "true") { console.log("enable_presence", enable_presence); this.channel.on("presence", { event: "*" }, (payload) => { this.pushEventTo("#conn_info", "presence_subscribed", {}); const ts = new Date(); const line = ` PRESENCE ${ts.toISOString()} ${JSON.stringify(payload)} `; const list = document.querySelector("#plist"); list.innerHTML = line + list.innerHTML; }); } // Listen for all (`*`) `postgres_changes` events on tables in the `public` schema if (enable_db_changes === "true") { const postgres_changes_opts = { event: "*", schema: schema, table: table, }; if (filter !== "") { postgres_changes_opts.filter = filter; } this.channel.on("postgres_changes", postgres_changes_opts, (payload) => { const ts = performance.now() + performance.timeOrigin; const iso_ts = new Date(); const payload_ts = Date.parse(payload.commit_timestamp); const latency = ts - payload_ts; const line = ` POSTGRES ${iso_ts.toISOString()}
${JSON.stringify(payload)}
Latency: ${latency.toFixed( 1 )} ms
`; const list = document.querySelector("#plist"); list.innerHTML = line + list.innerHTML; }); } // Finally, subscribe to the Channel we just setup this.channel.subscribe(async (status, error) => { if (status === "SUBSCRIBED") { console.log(`Realtime Channel status: ${status}`); // Let LiveView know we connected so we can update the button text this.pushEventTo("#conn_info", "broadcast_subscribed", { host: host }); // Save params to local storage if `SUBSCRIBED` localStorage.setItem("host", host); localStorage.setItem("token", token); localStorage.setItem("log_level", log_level); localStorage.setItem("channel", channelName); localStorage.setItem("schema", schema); localStorage.setItem("table", table); localStorage.setItem("filter", filter); localStorage.setItem("bearer", bearer); localStorage.setItem("enable_presence", enable_presence); localStorage.setItem("enable_db_changes", enable_db_changes); localStorage.setItem("private_channel", private_channel); // Initiate Presence for a connected user // Now when a new user connects and sends a `TRACK` message all clients will receive a message like: // { // "event":"join", // "key":"2b88be54-3b41-11ed-9887-1a9e1a785cf8", // "currentPresences":[ // // ], // "newPresences":[ // { // "name":"realtime_presence_55", // "t":1968.1000000238419, // "presence_ref":"Fxd_ZWlhIIfuIwlD" // } // ] // } // // And when `TRACK`ed users leave we'll receive an event like: // // { // "event":"leave", // "key":"2b88be54-3b41-11ed-9887-1a9e1a785cf8", // "currentPresences":[ // // ], // "leftPresences":[ // { // "name":"realtime_presence_55", // "t":1968.1000000238419, // "presence_ref":"Fxd_ZWlhIIfuIwlD" // } // ] // } if (enable_presence === "true") { const name = "user_name_" + Math.floor(Math.random() * 100); await this.channel.track({ name: name, t: performance.now(), }); } } else { console.error(`Realtime Channel error status: ${status}`); console.error(`Realtime Channel error: ${error}`); } }); }, sendRealtime(event, payload) { // Send a `broadcast` message over the Channel // All connected clients will receive this message if they're subscribed // to `broadcast` events and matching on the `event` name or using `*` to match all event names this.channel.send({ type: "broadcast", event: event, payload: payload, }); }, disconnectRealtime() { // Send a `broadcast` message over the Channel // All connected clients will receive this message if they're subscribed // to `broadcast` events and matching on the `event` name or using `*` to match all event names this.channel.unsubscribe(); }, clearLocalStorage() { localStorage.clear(); }, mounted() { const params = { log_level: localStorage.getItem("log_level"), token: localStorage.getItem("token"), host: localStorage.getItem("host"), channel: localStorage.getItem("channel"), schema: localStorage.getItem("schema"), table: localStorage.getItem("table"), filter: localStorage.getItem("filter"), bearer: localStorage.getItem("bearer"), enable_presence: localStorage.getItem("enable_presence"), enable_db_changes: localStorage.getItem("enable_db_changes"), private_channel: localStorage.getItem("private_channel"), }; this.pushEventTo("#conn_form", "local_storage", params); this.handleEvent("connect", ({ connection }) => this.initRealtime( connection.channel, connection.host, connection.log_level, connection.token, connection.schema, connection.table, connection.filter, connection.bearer, connection.enable_presence, connection.enable_db_changes, connection.private_channel ) ); this.handleEvent("send_message", ({ message }) => this.sendRealtime(message.event, message.payload) ); this.handleEvent("disconnect", () => this.disconnectRealtime()); this.handleEvent("clear_local_storage", () => this.clearLocalStorage()); }, }; Hooks.latency = { mounted() { this.handleEvent("ping", (params) => this.pong(params)); }, pong(params) { this.pushEventTo("#ping", "pong", params); }, }; const csrfToken = document .querySelector("meta[name='csrf-token']") .getAttribute("content"); const liveSocket = new LiveSocket("/live", Socket, { hooks: Hooks, params: { _csrf_token: csrfToken }, }); topbar.config({ barColors: { 0: "#29d" }, shadowColor: "rgba(0, 0, 0, .3)" }); window.addEventListener("phx:page-loading-start", () => topbar.show()); window.addEventListener("phx:page-loading-stop", () => topbar.hide()); liveSocket.connect(); window.liveSocket = liveSocket; ================================================ FILE: assets/package.json ================================================ { "dependencies": { "@supabase/supabase-js": "2.100.0-canary.0" } } ================================================ FILE: assets/tailwind.config.js ================================================ const plugin = require("tailwindcss/plugin") const colors = require('tailwindcss/colors') module.exports = { content: [ './js/**/*.js', '../lib/*_web.ex', '../lib/*_web/**/*.*ex', ], theme: { colors: { transparent: 'transparent', current: 'currentColor', black: colors.black, white: colors.white, gray: colors.gray, emerald: colors.emerald, indigo: colors.indigo, yellow: colors.yellow, green: colors.green }, fontFamily: { sans: ['custom-font', 'Helvetica Neue', 'Helvetica', 'Arial', 'sans-serif'], mono: ['Source Code Pro', 'Menlo', 'monospace'], }, }, plugins: [ require("@tailwindcss/forms"), require('@tailwindcss/typography'), plugin(({addVariant}) => addVariant("phx-no-feedback", [".phx-no-feedback&", ".phx-no-feedback &"])), plugin(({addVariant}) => addVariant("phx-click-loading", [".phx-click-loading&", ".phx-click-loading &"])), plugin(({addVariant}) => addVariant("phx-submit-loading", [".phx-submit-loading&", ".phx-submit-loading &"])), plugin(({addVariant}) => addVariant("phx-change-loading", [".phx-change-loading&", ".phx-change-loading &"])) ] }; ================================================ FILE: assets/vendor/topbar.js ================================================ /** * @license MIT * topbar 1.0.0, 2021-01-06 * https://buunguyen.github.io/topbar * Copyright (c) 2021 Buu Nguyen */ (function (window, document) { "use strict"; // https://gist.github.com/paulirish/1579671 (function () { var lastTime = 0; var vendors = ["ms", "moz", "webkit", "o"]; for (var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) { window.requestAnimationFrame = window[vendors[x] + "RequestAnimationFrame"]; window.cancelAnimationFrame = window[vendors[x] + "CancelAnimationFrame"] || window[vendors[x] + "CancelRequestAnimationFrame"]; } if (!window.requestAnimationFrame) window.requestAnimationFrame = function (callback, element) { var currTime = new Date().getTime(); var timeToCall = Math.max(0, 16 - (currTime - lastTime)); var id = window.setTimeout(function () { callback(currTime + timeToCall); }, timeToCall); lastTime = currTime + timeToCall; return id; }; if (!window.cancelAnimationFrame) window.cancelAnimationFrame = function (id) { clearTimeout(id); }; })(); var canvas, progressTimerId, fadeTimerId, currentProgress, showing, addEvent = function (elem, type, handler) { if (elem.addEventListener) elem.addEventListener(type, handler, false); else if (elem.attachEvent) elem.attachEvent("on" + type, handler); else elem["on" + type] = handler; }, options = { autoRun: true, barThickness: 3, barColors: { 0: "rgba(26, 188, 156, .9)", ".25": "rgba(52, 152, 219, .9)", ".50": "rgba(241, 196, 15, .9)", ".75": "rgba(230, 126, 34, .9)", "1.0": "rgba(211, 84, 0, .9)", }, shadowBlur: 10, shadowColor: "rgba(0, 0, 0, .6)", className: null, }, repaint = function () { canvas.width = window.innerWidth; canvas.height = options.barThickness * 5; // need space for shadow var ctx = canvas.getContext("2d"); ctx.shadowBlur = options.shadowBlur; ctx.shadowColor = options.shadowColor; var lineGradient = ctx.createLinearGradient(0, 0, canvas.width, 0); for (var stop in options.barColors) lineGradient.addColorStop(stop, options.barColors[stop]); ctx.lineWidth = options.barThickness; ctx.beginPath(); ctx.moveTo(0, options.barThickness / 2); ctx.lineTo( Math.ceil(currentProgress * canvas.width), options.barThickness / 2 ); ctx.strokeStyle = lineGradient; ctx.stroke(); }, createCanvas = function () { canvas = document.createElement("canvas"); var style = canvas.style; style.position = "fixed"; style.top = style.left = style.right = style.margin = style.padding = 0; style.zIndex = 100001; style.display = "none"; if (options.className) canvas.classList.add(options.className); document.body.appendChild(canvas); addEvent(window, "resize", repaint); }, topbar = { config: function (opts) { for (var key in opts) if (options.hasOwnProperty(key)) options[key] = opts[key]; }, show: function () { if (showing) return; showing = true; if (fadeTimerId !== null) window.cancelAnimationFrame(fadeTimerId); if (!canvas) createCanvas(); canvas.style.opacity = 1; canvas.style.display = "block"; topbar.progress(0); if (options.autoRun) { (function loop() { progressTimerId = window.requestAnimationFrame(loop); topbar.progress( "+" + 0.05 * Math.pow(1 - Math.sqrt(currentProgress), 2) ); })(); } }, progress: function (to) { if (typeof to === "undefined") return currentProgress; if (typeof to === "string") { to = (to.indexOf("+") >= 0 || to.indexOf("-") >= 0 ? currentProgress : 0) + parseFloat(to); } currentProgress = to > 1 ? 1 : to; repaint(); return currentProgress; }, hide: function () { if (!showing) return; showing = false; if (progressTimerId != null) { window.cancelAnimationFrame(progressTimerId); progressTimerId = null; } (function loop() { if (topbar.progress("+.1") >= 1) { canvas.style.opacity -= 0.05; if (canvas.style.opacity <= 0.05) { canvas.style.display = "none"; fadeTimerId = null; return; } } fadeTimerId = window.requestAnimationFrame(loop); })(); }, }; if (typeof module === "object" && typeof module.exports === "object") { module.exports = topbar; } else if (typeof define === "function" && define.amd) { define(function () { return topbar; }); } else { this.topbar = topbar; } }.call(this, window, document)); ================================================ FILE: beacon/.formatter.exs ================================================ # Used by "mix format" [ inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] ] ================================================ FILE: beacon/.gitignore ================================================ # The directory Mix will write compiled artifacts to. /_build/ # If you run "mix test --cover", coverage assets end up here. /cover/ # The directory Mix downloads your dependencies sources to. /deps/ # Where third-party dependencies like ExDoc output generated docs. /doc/ # If the VM crashes, it generates a dump, let's ignore it too. erl_crash.dump # Also ignore archive artifacts (built via "mix archive.build"). *.ez # Ignore package tarball (built via "mix hex.build"). beacon-*.tar # Temporary files, for example, from tests. /tmp/ ================================================ FILE: beacon/README.md ================================================ # Beacon Beacon is a scalable process group manager. The main use case for this library is to have membership counts available on the cluster without spamming whenever a process joins or leaves a group. A node can have thousands of processes joining and leaving hundreds of groups while sending just the membership count to other nodes. The main features are: * Process pids are available only to the node the where the processes reside; * Groups are partitioned locally to allow greater concurrency while joining different groups; * Group counts are periodically broadcasted (defaults to every 5 seconds) to update group membership numbers to all participating nodes; * Sub-cluster nodes join by using same scope; ## Installation The package can be installed by adding `beacon` to your list of dependencies in `mix.exs`: ```elixir def deps do [ {:beacon, "~> 1.0"} ] end ``` ## Using Add Beacon to your application's supervision tree specifying a scope name (here it's `:users`) ```elixir def start(_type, _args) do children = [ {Beacon, :users}, # Or passing options: # {Beacon, [:users, opts]} # See Beacon.start_link/2 for the options ``` Now process can join groups ```elixir iex> pid = self() #PID<0.852.0> iex> Beacon.join(:users, {:tenant, 123}, pid) :ok iex> Beacon.local_member_count(:users, {:tenant, 123}) 1 iex> Beacon.local_members(:users, {:tenant, 123}) [#PID<0.852.0>] iex> Beacon.local_member?(:users, {:tenant, 123}, pid) true ``` From another node part of the same scope: ```elixir iex> Beacon.member_counts(:users) %{{:tenant, 123} => 1} iex> Beacon.member_count(:users, {:tenant, 123}) 1 ``` ================================================ FILE: beacon/config/config.exs ================================================ import Config # Print nothing during tests unless captured or a test failure happens config :logger, backends: [], level: :debug ================================================ FILE: beacon/lib/beacon/adapter/erl_dist.ex ================================================ defmodule Beacon.Adapter.ErlDist do @moduledoc false import Kernel, except: [send: 2] @behaviour Beacon.Adapter @impl true def register(scope) do Process.register(self(), Beacon.Supervisor.name(scope)) :ok end @impl true def broadcast(scope, message) do name = Beacon.Supervisor.name(scope) Enum.each(Node.list(), fn node -> :erlang.send({name, node}, message, [:noconnect]) end) end @impl true def broadcast(scope, nodes, message) do name = Beacon.Supervisor.name(scope) Enum.each(nodes, fn node -> :erlang.send({name, node}, message, [:noconnect]) end) end @impl true def send(scope, node, message) do :erlang.send({Beacon.Supervisor.name(scope), node}, message, [:noconnect]) end end ================================================ FILE: beacon/lib/beacon/adapter.ex ================================================ defmodule Beacon.Adapter do @moduledoc """ Behaviour module for Beacon messaging adapters. """ @doc "Register the current process to receive messages for the given scope" @callback register(scope :: atom) :: :ok @doc "Broadcast a message to all nodes in the given scope" @callback broadcast(scope :: atom, message :: term) :: any @doc "Broadcast a message to specific nodes in the given scope" @callback broadcast(scope :: atom, [node], message :: term) :: any @doc "Send a message to a specific node in the given scope" @callback send(scope :: atom, node, message :: term) :: any end ================================================ FILE: beacon/lib/beacon/partition.ex ================================================ defmodule Beacon.Partition do @moduledoc false use GenServer require Logger defmodule State do @moduledoc false @type t :: %__MODULE__{ name: atom, scope: atom, entries_table: atom, monitors: %{{Beacon.group(), pid} => reference} } defstruct [:name, :scope, :entries_table, monitors: %{}] end @spec join(atom, Beacon.group(), pid) :: :ok def join(partition_name, group, pid), do: GenServer.call(partition_name, {:join, group, pid}) @spec leave(atom, Beacon.group(), pid) :: :ok def leave(partition_name, group, pid), do: GenServer.call(partition_name, {:leave, group, pid}) @spec members(atom, Beacon.group()) :: [pid] def members(partition_name, group) do partition_name |> Beacon.Supervisor.partition_entries_table() |> :ets.select([{{{group, :"$1"}}, [], [:"$1"]}]) end @spec member_count(atom, Beacon.group()) :: non_neg_integer def member_count(partition_name, group), do: :ets.lookup_element(partition_name, group, 2, 0) @spec member_counts(atom) :: %{Beacon.group() => non_neg_integer} def member_counts(partition_name) do partition_name |> :ets.tab2list() |> Map.new() end @spec member?(atom, Beacon.group(), pid) :: boolean def member?(partition_name, group, pid) do partition_name |> Beacon.Supervisor.partition_entries_table() |> :ets.lookup({group, pid}) |> case do [{{^group, ^pid}}] -> true [] -> false end end @spec groups(atom) :: [Beacon.group()] def groups(partition_name), do: :ets.select(partition_name, [{{:"$1", :_}, [], [:"$1"]}]) @spec group_count(atom) :: non_neg_integer def group_count(partition_name), do: :ets.info(partition_name, :size) @spec start_link(atom, atom, atom) :: GenServer.on_start() def start_link(scope, partition_name, partition_entries_table), do: GenServer.start_link(__MODULE__, [scope, partition_name, partition_entries_table], name: partition_name ) @impl true @spec init(any) :: {:ok, State.t()} def init([scope, name, entries_table]) do {:ok, %State{scope: scope, name: name, entries_table: entries_table}, {:continue, :rebuild_monitors_and_counters}} end @impl true @spec handle_continue(:rebuild_monitors_and_counters, State.t()) :: {:noreply, State.t()} def handle_continue(:rebuild_monitors_and_counters, state) do # Here we delete all counters and rebuild them based on entries table :ets.delete_all_objects(state.name) monitors = :ets.tab2list(state.entries_table) |> Enum.reduce(%{}, fn {{group, pid}}, monitors_acc -> ref = Process.monitor(pid, tag: {:DOWN, group}) :ets.update_counter(state.name, group, {2, 1}, {group, 0}) Map.put(monitors_acc, {group, pid}, ref) end) {:noreply, %{state | monitors: monitors}} end @impl true @spec handle_call({:join, Beacon.group(), pid}, GenServer.from(), State.t()) :: {:reply, :ok, State.t()} def handle_call({:join, group, pid}, _from, state) do if :ets.insert_new(state.entries_table, {{group, pid}}) do case :ets.lookup_element(state.name, group, 2, 0) do 0 -> :ets.insert(state.name, {group, 1}) :telemetry.execute([:beacon, state.scope, :group, :occupied], %{}, %{group: group}) count when count > 0 -> :ets.insert(state.name, {group, count + 1}) end ref = Process.monitor(pid, tag: {:DOWN, group}) monitors = Map.put(state.monitors, {group, pid}, ref) {:reply, :ok, %{state | monitors: monitors}} else {:reply, :ok, state} end end def handle_call({:leave, group, pid}, _from, state) do state = remove(group, pid, state) {:reply, :ok, state} end @impl true @spec handle_info({{:DOWN, Beacon.group()}, reference, :process, pid, term}, State.t()) :: {:noreply, State.t()} def handle_info({{:DOWN, group}, _ref, :process, pid, _reason}, state) do state = remove(group, pid, state) {:noreply, state} end def handle_info(_, state), do: {:noreply, state} defp remove(group, pid, state) do case :ets.lookup(state.entries_table, {group, pid}) do [{{^group, ^pid}}] -> :ets.delete(state.entries_table, {group, pid}) # Delete or decrement counter case :ets.lookup_element(state.name, group, 2, 0) do 1 -> :ets.delete(state.name, group) :telemetry.execute([:beacon, state.scope, :group, :vacant], %{}, %{group: group}) count when count > 1 -> :ets.update_counter(state.name, group, {2, -1}) end [] -> Logger.warning( "Beacon[#{node()}|#{state.scope}] Trying to remove an unknown process #{inspect(pid)}" ) :ok end case Map.pop(state.monitors, {group, pid}) do {nil, _} -> state {ref, new_monitors} -> Process.demonitor(ref, [:flush]) %{state | monitors: new_monitors} end end end ================================================ FILE: beacon/lib/beacon/scope.ex ================================================ defmodule Beacon.Scope do @moduledoc false # Responsible to discover and keep track of all Beacon peers in the cluster use GenServer require Logger @default_broadcast_interval 5_000 @spec member_counts(atom) :: %{Beacon.group() => non_neg_integer} def member_counts(scope) do scope |> table_name() |> :ets.select([{{:_, :"$1"}, [], [:"$1"]}]) |> Enum.reduce(%{}, fn member_counts, acc -> Map.merge(acc, member_counts, fn _k, v1, v2 -> v1 + v2 end) end) end @spec member_count(atom, Beacon.group()) :: non_neg_integer def member_count(scope, group) do scope |> table_name() |> :ets.select([{{:_, %{group => :"$1"}}, [], [:"$1"]}]) |> Enum.sum() end @spec member_count(atom, Beacon.group(), node) :: non_neg_integer def member_count(scope, group, node) do case :ets.lookup(table_name(scope), node) do [{^node, member_counts}] -> Map.get(member_counts, group, 0) [] -> 0 end end @spec groups(atom) :: MapSet.t(Beacon.group()) def groups(scope) do scope |> table_name() |> :ets.select([{{:_, :"$1"}, [], [:"$1"]}]) |> Enum.reduce(MapSet.new(), fn member_counts, acc -> member_counts |> Map.keys() |> MapSet.new() |> MapSet.union(acc) end) end @typep member_counts :: %{Beacon.group() => non_neg_integer} defp table_name(scope), do: :"#{scope}_beacon_peer_counts" defmodule State do @moduledoc false @type t :: %__MODULE__{ scope: atom, message_module: module, broadcast_interval: non_neg_integer, peer_counts_table: :ets.tid(), peers: %{pid => reference} } defstruct [ :scope, :message_module, :broadcast_interval, :peer_counts_table, peers: %{} ] end @spec start_link(atom, Keyword.t()) :: GenServer.on_start() def start_link(scope, opts \\ []), do: GenServer.start_link(__MODULE__, [scope, opts]) @impl true def init([scope, opts]) do :ok = :net_kernel.monitor_nodes(true) peer_counts_table = :ets.new(table_name(scope), [:set, :protected, :named_table, read_concurrency: true]) broadcast_interval = Keyword.get(opts, :broadcast_interval_in_ms, @default_broadcast_interval) message_module = Keyword.get(opts, :message_module, Beacon.Adapter.ErlDist) Logger.info("Beacon[#{node()}|#{scope}] Starting") :ok = message_module.register(scope) {:ok, %State{ scope: scope, message_module: message_module, broadcast_interval: broadcast_interval, peer_counts_table: peer_counts_table }, {:continue, :discover}} end @impl true @spec handle_continue(:discover, State.t()) :: {:noreply, State.t()} def handle_continue(:discover, state) do state.message_module.broadcast(state.scope, {:discover, self()}) Process.send_after(self(), :broadcast_counts, state.broadcast_interval) {:noreply, state} end @impl true @spec handle_info( {:discover, pid} | {:sync, pid, member_counts} | :broadcast_counts | {:nodeup, node} | {:nodedown, node} | {:DOWN, reference, :process, pid, term}, State.t() ) :: {:noreply, State.t()} # A remote peer is discovering us def handle_info({:discover, peer}, state) do Logger.info( "Beacon[#{node()}|#{state.scope}] Received DISCOVER request from node #{node(peer)}" ) state.message_module.send( state.scope, node(peer), {:sync, self(), Beacon.local_member_counts(state.scope)} ) # We don't do anything if we already know about this peer if Map.has_key?(state.peers, peer) do Logger.debug( "Beacon[#{node()}|#{state.scope}] already know peer #{inspect(peer)} from node #{node(peer)}" ) {:noreply, state} else Logger.debug( "Beacon[#{node()}|#{state.scope}] discovered peer #{inspect(peer)} from node #{node(peer)}" ) ref = Process.monitor(peer) new_peers = Map.put(state.peers, peer, ref) state.message_module.send(state.scope, node(peer), {:discover, self()}) {:noreply, %State{state | peers: new_peers}} end end # A remote peer has sent us its local member counts def handle_info({:sync, peer, member_counts}, state) do :ets.insert(state.peer_counts_table, {node(peer), member_counts}) {:noreply, state} end # Periodic broadcast of our local member counts to all known peers def handle_info(:broadcast_counts, state) do nodes = state.peers |> Map.keys() |> Enum.map(&node/1) state.message_module.broadcast( state.scope, nodes, {:sync, self(), Beacon.local_member_counts(state.scope)} ) Process.send_after(self(), :broadcast_counts, state.broadcast_interval) {:noreply, state} end # Do nothing if the node that came up is our own node def handle_info({:nodeup, node}, state) when node == node(), do: {:noreply, state} # Send a discover message to the node that just connected def handle_info({:nodeup, node}, state) do :telemetry.execute([:beacon, state.scope, :node, :up], %{}, %{node: node}) Logger.info( "Beacon[#{node()}|#{state.scope}] Node #{node} has joined the cluster, sending discover message" ) state.message_module.send(state.scope, node, {:discover, self()}) {:noreply, state} end # Do nothing and wait for the DOWN message from monitor def handle_info({:nodedown, _node}, state), do: {:noreply, state} # A remote peer has disconnected/crashed # We forget about it and remove its member counts def handle_info({:DOWN, ref, :process, peer, reason}, state) do Logger.info( "Beacon[#{node()}|#{state.scope}] Scope process is DOWN on node #{node(peer)}: #{inspect(reason)}" ) case Map.pop(state.peers, peer) do {nil, _} -> {:noreply, state} {^ref, new_peers} -> :ets.delete(state.peer_counts_table, node(peer)) :telemetry.execute([:beacon, state.scope, :node, :down], %{}, %{node: node(peer)}) {:noreply, %State{state | peers: new_peers}} end end def handle_info(_msg, state), do: {:noreply, state} end ================================================ FILE: beacon/lib/beacon/supervisor.ex ================================================ defmodule Beacon.Supervisor do @moduledoc false use Supervisor def name(scope), do: :"#{scope}_beacon" def supervisor_name(scope), do: :"#{scope}_beacon_supervisor" def partition_name(scope, partition), do: :"#{scope}_beacon_partition_#{partition}" def partition_entries_table(partition_name), do: :"#{partition_name}_entries" @spec partition(atom, Scope.group()) :: atom def partition(scope, group) do case :persistent_term.get(scope, :unknown) do :unknown -> raise "Beacon for scope #{inspect(scope)} is not started" partition_names -> elem(partition_names, :erlang.phash2(group, tuple_size(partition_names))) end end @spec partitions(atom) :: [atom] def partitions(scope) do case :persistent_term.get(scope, :unknown) do :unknown -> raise "Beacon for scope #{inspect(scope)} is not started" partition_names -> Tuple.to_list(partition_names) end end @spec start_link(atom, pos_integer(), Keyword.t()) :: Supervisor.on_start() def start_link(scope, partitions, opts \\ []) do args = [scope, partitions, opts] Supervisor.start_link(__MODULE__, args, name: supervisor_name(scope)) end @impl true def init([scope, partitions, opts]) do children = for i <- 0..(partitions - 1) do partition_name = partition_name(scope, i) partition_entries_table = partition_entries_table(partition_name) ^partition_entries_table = :ets.new(partition_entries_table, [:set, :public, :named_table, read_concurrency: true]) ^partition_name = :ets.new(partition_name, [:set, :public, :named_table, read_concurrency: true]) %{ id: i, start: {Beacon.Partition, :start_link, [scope, partition_name, partition_entries_table]} } end partition_names = for i <- 0..(partitions - 1), do: partition_name(scope, i) :persistent_term.put(scope, List.to_tuple(partition_names)) children = [ %{id: :scope, start: {Beacon.Scope, :start_link, [scope, opts]}} | children ] Supervisor.init(children, strategy: :one_for_one) end end ================================================ FILE: beacon/lib/beacon.ex ================================================ defmodule Beacon do @moduledoc """ Distributed process group membership tracking. """ alias Beacon.Partition alias Beacon.Scope @type group :: any @type start_option :: {:partitions, pos_integer()} | {:broadcast_interval_in_ms, non_neg_integer()} @doc "Returns a supervisor child specification for a Beacon scope" def child_spec([scope]) when is_atom(scope), do: child_spec([scope, []]) def child_spec(scope) when is_atom(scope), do: child_spec([scope, []]) def child_spec([scope, opts]) when is_atom(scope) and is_list(opts) do %{ id: Beacon, start: {__MODULE__, :start_link, [scope, opts]}, type: :supervisor } end @doc """ Starts the Beacon supervision tree for `scope`. Options: * `:partitions` - number of partitions to use (default: number of schedulers online) * `:broadcast_interval_in_ms`: - interval in milliseconds to broadcast membership counts to other nodes (default: 5000 ms) * `:message_module` - module implementing `Beacon.Adapter` behaviour (default: `Beacon.Adapter.ErlDist`) """ @spec start_link(atom, [start_option]) :: Supervisor.on_start() def start_link(scope, opts \\ []) when is_atom(scope) do {partitions, opts} = Keyword.pop(opts, :partitions, System.schedulers_online()) broadcast_interval_in_ms = Keyword.get(opts, :broadcast_interval_in_ms) if not (is_integer(partitions) and partitions >= 1) do raise ArgumentError, "expected :partitions to be a positive integer, got: #{inspect(partitions)}" end if broadcast_interval_in_ms != nil and not (is_integer(broadcast_interval_in_ms) and broadcast_interval_in_ms > 0) do raise ArgumentError, "expected :broadcast_interval_in_ms to be a positive integer, got: #{inspect(broadcast_interval_in_ms)}" end Beacon.Supervisor.start_link(scope, partitions, opts) end @doc "Join pid to group in scope" @spec join(atom, any, pid) :: :ok | {:error, :not_local} def join(_scope, _group, pid) when is_pid(pid) and node(pid) != node(), do: {:error, :not_local} def join(scope, group, pid) when is_atom(scope) and is_pid(pid) do Partition.join(Beacon.Supervisor.partition(scope, group), group, pid) end @doc "Leave pid from group in scope" @spec leave(atom, group, pid) :: :ok def leave(scope, group, pid) when is_atom(scope) and is_pid(pid) do Partition.leave(Beacon.Supervisor.partition(scope, group), group, pid) end @doc "Get total members count per group in scope" @spec member_counts(atom) :: %{group => non_neg_integer} def member_counts(scope) when is_atom(scope) do remote_counts = Scope.member_counts(scope) scope |> local_member_counts() |> Map.merge(remote_counts, fn _k, v1, v2 -> v1 + v2 end) end @doc "Get total member count of group in scope" @spec member_count(atom, group) :: non_neg_integer def member_count(scope, group) do local_member_count(scope, group) + Scope.member_count(scope, group) end @doc "Get total member count of group in scope on specific node" @spec member_count(atom, group, node) :: non_neg_integer def member_count(scope, group, node) when node == node(), do: local_member_count(scope, group) def member_count(scope, group, node), do: Scope.member_count(scope, group, node) @doc "Get local members of group in scope" @spec local_members(atom, group) :: [pid] def local_members(scope, group) when is_atom(scope) do Partition.members(Beacon.Supervisor.partition(scope, group), group) end @doc "Get local member count of group in scope" @spec local_member_count(atom, group) :: non_neg_integer def local_member_count(scope, group) when is_atom(scope) do Partition.member_count(Beacon.Supervisor.partition(scope, group), group) end @doc "Get local members count per group in scope" @spec local_member_counts(atom) :: %{group => non_neg_integer} def local_member_counts(scope) when is_atom(scope) do Enum.reduce(Beacon.Supervisor.partitions(scope), %{}, fn partition_name, acc -> Map.merge(acc, Partition.member_counts(partition_name)) end) end @doc "Check if pid is a local member of group in scope" @spec local_member?(atom, group, pid) :: boolean def local_member?(scope, group, pid) when is_atom(scope) and is_pid(pid) do Partition.member?(Beacon.Supervisor.partition(scope, group), group, pid) end @doc "Get all local groups in scope" @spec local_groups(atom) :: [group] def local_groups(scope) when is_atom(scope) do Enum.flat_map(Beacon.Supervisor.partitions(scope), fn partition_name -> Partition.groups(partition_name) end) end @doc "Get local group count in scope" @spec local_group_count(atom) :: non_neg_integer def local_group_count(scope) when is_atom(scope) do Enum.sum_by(Beacon.Supervisor.partitions(scope), fn partition_name -> Partition.group_count(partition_name) end) end @doc "Get groups in scope" @spec groups(atom) :: [group] def groups(scope) when is_atom(scope) do remote_groups = Scope.groups(scope) scope |> local_groups() |> MapSet.new() |> MapSet.union(remote_groups) |> MapSet.to_list() end @doc "Get group count in scope" @spec group_count(atom) :: non_neg_integer def group_count(scope) when is_atom(scope) do remote_groups = Scope.groups(scope) scope |> local_groups() |> MapSet.new() |> MapSet.union(remote_groups) |> MapSet.size() end end ================================================ FILE: beacon/mix.exs ================================================ defmodule Beacon.MixProject do use Mix.Project def project do [ app: :beacon, version: "1.0.0", elixir: "~> 1.18", start_permanent: Mix.env() == :prod, elixirc_paths: elixirc_paths(Mix.env()), deps: deps() ] end # Run "mix help compile.app" to learn about applications. def application do [ extra_applications: [:logger] ] end # Specifies which paths to compile per environment. defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] # Run "mix help deps" to learn about dependencies. defp deps do [ {:telemetry, "~> 1.3"}, {:mix_test_watch, "~> 1.0", only: [:dev, :test], runtime: false} # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} ] end end ================================================ FILE: beacon/test/beacon/partition_test.exs ================================================ defmodule Beacon.PartitionTest do use ExUnit.Case, async: true alias Beacon.Partition @scope __MODULE__ setup do partition_name = Beacon.Supervisor.partition_name(@scope, System.unique_integer([:positive])) entries_table = Beacon.Supervisor.partition_entries_table(partition_name) ^partition_name = :ets.new(partition_name, [:set, :public, :named_table, read_concurrency: true]) ^entries_table = :ets.new(entries_table, [:set, :public, :named_table, read_concurrency: true]) spec = %{ id: partition_name, start: {Partition, :start_link, [@scope, partition_name, entries_table]}, type: :supervisor, restart: :temporary } pid = start_supervised!(spec) ref = :telemetry_test.attach_event_handlers(self(), [ [:beacon, @scope, :group, :occupied], [:beacon, @scope, :group, :vacant] ]) {:ok, partition_name: partition_name, partition_pid: pid, ref: ref} end test "members/2 returns empty list for non-existent group", %{partition_name: partition} do assert Partition.members(partition, :nonexistent) == [] end test "member_count/2 returns 0 for non-existent group", %{partition_name: partition} do assert Partition.member_count(partition, :nonexistent) == 0 end test "member?/3 returns false for non-member", %{partition_name: partition} do pid = spawn_link(fn -> Process.sleep(:infinity) end) refute Partition.member?(partition, :group1, pid) end test "join and query member", %{partition_name: partition, ref: ref} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Partition.join(partition, :group9, pid) assert Partition.member?(partition, :group9, pid) assert Partition.member_count(partition, :group9) == 1 assert pid in Partition.members(partition, :group9) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group9}} refute_receive {_, ^ref, _, _} end test "join multiple times and query member", %{partition_name: partition, ref: ref} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Partition.join(partition, :group1, pid) assert :ok = Partition.join(partition, :group1, pid) assert :ok = Partition.join(partition, :group1, pid) assert Partition.member?(partition, :group1, pid) assert Partition.member_count(partition, :group1) == 1 assert pid in Partition.members(partition, :group1) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} end test "occupied event only when first member joins", %{partition_name: partition, ref: ref} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group1, pid2) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} end test "leave removes member", %{partition_name: partition, ref: ref} do pid = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid) assert Partition.member?(partition, :group1, pid) Partition.leave(partition, :group1, pid) refute Partition.member?(partition, :group1, pid) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group1}} assert_receive {[:beacon, @scope, :group, :vacant], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} end test "vacant event only when no members left", %{partition_name: partition, ref: ref} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group1, pid2) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} Partition.leave(partition, :group1, pid1) refute_receive {_, ^ref, _, _} Partition.leave(partition, :group1, pid2) assert_receive {[:beacon, @scope, :group, :vacant], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} end test "leave multiple times removes member", %{partition_name: partition, ref: ref} do pid = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid) assert Partition.member?(partition, :group1, pid) Partition.leave(partition, :group1, pid) Partition.leave(partition, :group1, pid) Partition.leave(partition, :group1, pid) refute Partition.member?(partition, :group1, pid) assert_receive {[:beacon, @scope, :group, :occupied], ^ref, %{}, %{group: :group1}} assert_receive {[:beacon, @scope, :group, :vacant], ^ref, %{}, %{group: :group1}} refute_receive {_, ^ref, _, _} end test "member_counts returns counts for all groups", %{partition_name: partition} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) pid3 = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group1, pid2) Partition.join(partition, :group2, pid3) counts = Partition.member_counts(partition) assert map_size(counts) == 2 assert counts[:group1] == 2 assert counts[:group2] == 1 end test "groups returns all groups", %{partition_name: partition} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group2, pid2) groups = Partition.groups(partition) assert :group1 in groups assert :group2 in groups end test "group_counts returns number of groups", %{partition_name: partition} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) pid3 = spawn_link(fn -> Process.sleep(:infinity) end) pid4 = spawn_link(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group1, pid2) Partition.join(partition, :group2, pid3) Partition.join(partition, :group3, pid4) assert Partition.group_count(partition) == 3 end test "process death removes member from group", %{partition_name: partition} do pid = spawn(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid) assert Partition.member?(partition, :group1, pid) Process.exit(pid, :kill) Process.sleep(50) refute Partition.member?(partition, :group1, pid) assert Partition.member_count(partition, :group1) == 0 end test "partition recovery monitors processes again", %{ partition_name: partition, partition_pid: partition_pid } do pid1 = spawn(fn -> Process.sleep(:infinity) end) pid2 = spawn(fn -> Process.sleep(:infinity) end) Partition.join(partition, :group1, pid1) Partition.join(partition, :group2, pid2) monitors = Process.info(partition_pid, [:monitors])[:monitors] |> Enum.map(&elem(&1, 1)) assert length(monitors) assert monitors |> Enum.member?(pid1) assert monitors |> Enum.member?(pid2) assert %{{:group1, ^pid1} => _ref1, {:group2, ^pid2} => _ref2} = :sys.get_state(partition_pid).monitors Process.monitor(partition_pid) Process.exit(partition_pid, :kill) assert_receive {:DOWN, _ref, :process, ^partition_pid, :killed} spec = %{ id: :recover, start: {Partition, :start_link, [@scope, partition, Beacon.Supervisor.partition_entries_table(partition)]}, type: :supervisor } partition_pid = start_supervised!(spec) assert %{{:group1, ^pid1} => _ref1, {:group2, ^pid2} => _ref2} = :sys.get_state(partition_pid).monitors monitors = Process.info(partition_pid, [:monitors])[:monitors] |> Enum.map(&elem(&1, 1)) assert length(monitors) assert monitors |> Enum.member?(pid1) assert monitors |> Enum.member?(pid2) assert Partition.member_count(partition, :group1) == 1 assert Partition.member_count(partition, :group2) == 1 assert Partition.member?(partition, :group1, pid1) assert Partition.member?(partition, :group2, pid2) end end ================================================ FILE: beacon/test/beacon_test.exs ================================================ defmodule BeaconTest do use ExUnit.Case, async: true setup do scope = :"test_scope#{System.unique_integer([:positive])}" %{scope: scope} end defp spec(scope, opts) do %{ id: scope, start: {Beacon, :start_link, [scope, opts]}, type: :supervisor } end describe "start_link/2" do test "starts beacon with default partitions", %{scope: scope} do pid = start_supervised!({Beacon, [scope, []]}) assert Process.alive?(pid) assert is_list(Beacon.Supervisor.partitions(scope)) assert length(Beacon.Supervisor.partitions(scope)) == System.schedulers_online() end test "starts beacon with custom partition count", %{scope: scope} do pid = start_supervised!(spec(scope, partitions: 3)) assert Process.alive?(pid) assert length(Beacon.Supervisor.partitions(scope)) == 3 end test "raises on invalid partition count", %{scope: scope} do assert_raise ArgumentError, ~r/expected :partitions to be a positive integer/, fn -> Beacon.start_link(scope, partitions: 0) end assert_raise ArgumentError, ~r/expected :partitions to be a positive integer/, fn -> Beacon.start_link(scope, partitions: -1) end assert_raise ArgumentError, ~r/expected :partitions to be a positive integer/, fn -> Beacon.start_link(scope, partitions: :invalid) end end test "raises on invalid broadcast_interval_in_ms", %{scope: scope} do assert_raise ArgumentError, ~r/expected :broadcast_interval_in_ms to be a positive integer/, fn -> Beacon.start_link(scope, broadcast_interval_in_ms: 0) end assert_raise ArgumentError, ~r/expected :broadcast_interval_in_ms to be a positive integer/, fn -> Beacon.start_link(scope, broadcast_interval_in_ms: -1) end assert_raise ArgumentError, ~r/expected :broadcast_interval_in_ms to be a positive integer/, fn -> Beacon.start_link(scope, broadcast_interval_in_ms: :invalid) end end end describe "join/3 and leave/3" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "can join a group", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid) assert Beacon.local_member?(scope, :group1, pid) end test "can leave a group", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid) assert Beacon.local_member?(scope, :group1, pid) assert :ok = Beacon.leave(scope, :group1, pid) refute Beacon.local_member?(scope, :group1, pid) end test "joining same group twice is idempotent", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid) assert :ok = Beacon.join(scope, :group1, pid) assert Beacon.local_member_count(scope, :group1) == 1 end test "multiple processes can join same group", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid1) assert :ok = Beacon.join(scope, :group1, pid2) members = Beacon.local_members(scope, :group1) assert length(members) == 2 assert pid1 in members assert pid2 in members end test "process can join multiple groups", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid) assert :ok = Beacon.join(scope, :group2, pid) assert Beacon.local_member?(scope, :group1, pid) assert Beacon.local_member?(scope, :group2, pid) end test "automatically removes member when process dies", %{scope: scope} do pid = spawn(fn -> Process.sleep(:infinity) end) assert :ok = Beacon.join(scope, :group1, pid) assert Beacon.local_member?(scope, :group1, pid) Process.exit(pid, :kill) Process.sleep(50) refute Beacon.local_member?(scope, :group1, pid) assert Beacon.local_member_count(scope, :group1) == 0 end end describe "local_members/2" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns empty list for non-existent group", %{scope: scope} do assert Beacon.local_members(scope, :nonexistent) == [] end test "returns all members of a group", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) pid3 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) Beacon.join(scope, :group2, pid3) members = Beacon.local_members(scope, :group1) assert length(members) == 2 assert pid1 in members assert pid2 in members refute pid3 in members end end describe "local_member_count/2" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns 0 for non-existent group", %{scope: scope} do assert Beacon.local_member_count(scope, :nonexistent) == 0 end test "returns correct count", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) assert Beacon.local_member_count(scope, :group1) == 0 Beacon.join(scope, :group1, pid1) assert Beacon.local_member_count(scope, :group1) == 1 Beacon.join(scope, :group1, pid2) assert Beacon.local_member_count(scope, :group1) == 2 Beacon.leave(scope, :group1, pid1) assert Beacon.local_member_count(scope, :group1) == 1 end end describe "local_member_counts/1" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns empty map when no groups exist", %{scope: scope} do assert Beacon.local_member_counts(scope) == %{} end test "returns counts for all groups", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) pid3 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) Beacon.join(scope, :group2, pid3) assert Beacon.local_member_counts(scope) == %{ group1: 2, group2: 1 } end end describe "local_member?/3" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns false for non-member", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) refute Beacon.local_member?(scope, :group1, pid) end test "returns true for member", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid) assert Beacon.local_member?(scope, :group1, pid) end test "returns false after leaving", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid) Beacon.leave(scope, :group1, pid) refute Beacon.local_member?(scope, :group1, pid) end end describe "local_groups/1" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns empty list when no groups exist", %{scope: scope} do assert Beacon.local_groups(scope) == [] end test "returns all groups with members", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group2, pid2) Beacon.join(scope, :group3, pid1) groups = Beacon.local_groups(scope) assert :group1 in groups assert :group2 in groups assert :group3 in groups assert length(groups) == 3 end test "removes group from list when last member leaves", %{scope: scope} do pid = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid) assert :group1 in Beacon.local_groups(scope) Beacon.leave(scope, :group1, pid) refute :group1 in Beacon.local_groups(scope) end end describe "local_group_count/1" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns 0 when no groups exist", %{scope: scope} do assert Beacon.local_group_count(scope) == 0 end test "returns correct count of groups", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group2, pid2) Beacon.join(scope, :group3, pid2) Beacon.join(scope, :group3, pid1) assert Beacon.local_group_count(scope) == 3 Beacon.leave(scope, :group2, pid2) assert Beacon.local_group_count(scope) == 2 end end describe "member_counts/1" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 2)) :ok end test "returns local counts when no peers", %{scope: scope} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) counts = Beacon.member_counts(scope) assert counts[:group1] == 2 end end describe "partition distribution" do setup %{scope: scope} do start_supervised!(spec(scope, partitions: 4)) :ok end test "distributes groups across partitions", %{scope: scope} do # Create multiple processes and verify they're split against different partitions pids = for _ <- 1..20, do: spawn_link(fn -> Process.sleep(:infinity) end) Enum.each(pids, fn pid -> Beacon.join(scope, pid, pid) end) # Check that multiple partitions are being used partition_names = Beacon.Supervisor.partitions(scope) Enum.map(partition_names, fn partition_name -> assert Beacon.Partition.member_counts(partition_name) > 1 end) end test "same group always maps to same partition", %{scope: scope} do partition1 = Beacon.Supervisor.partition(scope, :my_group) partition2 = Beacon.Supervisor.partition(scope, :my_group) partition3 = Beacon.Supervisor.partition(scope, :my_group) assert partition1 == partition2 assert partition2 == partition3 end end @aux_mod (quote do defmodule PeerAux do def start(scope) do spawn(fn -> {:ok, _} = Beacon.start_link(scope, broadcast_interval_in_ms: 50) pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group2, pid2) Beacon.join(scope, :group3, pid2) Process.sleep(:infinity) end) end end end) describe "distributed tests" do setup do scope = :"broadcast_scope#{System.unique_integer([:positive])}" supervisor_pid = start_supervised!(spec(scope, partitions: 2, broadcast_interval_in_ms: 50)) {:ok, peer, node} = Peer.start_disconnected(aux_mod: @aux_mod) ref = :telemetry_test.attach_event_handlers(self(), [ [:beacon, scope, :node, :up], [:beacon, scope, :node, :down] ]) %{scope: scope, supervisor_pid: supervisor_pid, peer: peer, node: node, telemetry_ref: ref} end test "node up", %{scope: scope, peer: peer, node: node, telemetry_ref: telemetry_ref} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) Beacon.join(scope, :group2, pid2) true = Node.connect(node) :peer.call(peer, PeerAux, :start, [scope]) assert_receive {[:beacon, ^scope, :node, :up], ^telemetry_ref, %{}, %{node: ^node}} # Wait for at least one broadcast interval Process.sleep(150) assert Beacon.group_count(scope) == 3 groups = Beacon.groups(scope) assert length(groups) == 3 assert :group1 in groups assert :group2 in groups assert :group3 in groups assert Beacon.member_counts(scope) == %{group1: 3, group2: 2, group3: 1} assert Beacon.member_count(scope, :group1) == 3 assert Beacon.member_count(scope, :group3, node) == 1 assert Beacon.member_count(scope, :group1, node()) == 2 end test "node down", %{scope: scope, peer: peer, node: node, telemetry_ref: telemetry_ref} do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) Beacon.join(scope, :group2, pid2) true = Node.connect(node) :peer.call(peer, PeerAux, :start, [scope]) assert_receive {[:beacon, ^scope, :node, :up], ^telemetry_ref, %{}, %{node: ^node}} # Wait for remote scope to communicate with local Process.sleep(150) true = Node.disconnect(node) assert_receive {[:beacon, ^scope, :node, :down], ^telemetry_ref, %{}, %{node: ^node}} assert Beacon.member_counts(scope) == %{group1: 2, group2: 1} assert Beacon.member_count(scope, :group1) == 2 end test "scope restart can recover", %{ scope: scope, supervisor_pid: supervisor_pid, peer: peer, node: node, telemetry_ref: telemetry_ref } do pid1 = spawn_link(fn -> Process.sleep(:infinity) end) pid2 = spawn_link(fn -> Process.sleep(:infinity) end) Beacon.join(scope, :group1, pid1) Beacon.join(scope, :group1, pid2) Beacon.join(scope, :group2, pid2) true = Node.connect(node) :peer.call(peer, PeerAux, :start, [scope]) assert_receive {[:beacon, ^scope, :node, :up], ^telemetry_ref, %{}, %{node: ^node}} # Wait for remote scope to communicate with local Process.sleep(150) [ {1, _, :worker, [Beacon.Partition]}, {0, _, :worker, [Beacon.Partition]}, {:scope, scope_pid, :worker, [Beacon.Scope]} ] = Supervisor.which_children(supervisor_pid) # Restart the scope process Process.monitor(scope_pid) Process.exit(scope_pid, :kill) assert_receive {:DOWN, _ref, :process, ^scope_pid, :killed} # Wait for recovery and communication Process.sleep(200) assert Beacon.group_count(scope) == 3 groups = Beacon.groups(scope) assert length(groups) == 3 assert :group1 in groups assert :group2 in groups assert :group3 in groups assert Beacon.member_counts(scope) == %{group1: 3, group2: 2, group3: 1} end end end ================================================ FILE: beacon/test/support/peer.ex ================================================ defmodule Peer do @moduledoc """ Uses the gist https://gist.github.com/ityonemo/177cbc96f8c8722bfc4d127ff9baec62 to start a node for testing """ @doc """ Starts a node for testing. Can receive an auxiliary module to be evaluated in the node so you are able to setup functions within the test context and outside of the normal code context e.g. ``` @aux_mod (quote do defmodule Aux do def checker(res), do: res end end) Code.eval_quoted(@aux_mod) test "clustered call" do {:ok, node} = Clustered.start(@aux_mod) assert ok = :rpc.call(node, Aux, :checker, [:ok]) end ``` """ @spec start(Keyword.t()) :: {:ok, :peer.server_ref(), node} def start(opts \\ []) do {:ok, peer, node} = start_disconnected(opts) true = Node.connect(node) {:ok, peer, node} end @doc """ Similar to `start/2` but the node is not connected automatically """ @spec start_disconnected(Keyword.t()) :: {:ok, :peer.server_ref(), node} def start_disconnected(opts \\ []) do extra_config = Keyword.get(opts, :extra_config, []) name = Keyword.get(opts, :name, :peer.random_name()) aux_mod = Keyword.get(opts, :aux_mod, nil) true = :erlang.set_cookie(:cookie) {:ok, pid, node} = ExUnit.Callbacks.start_supervised(%{ id: {:peer, name}, start: {:peer, :start_link, [ %{ name: name, host: ~c"127.0.0.1", longnames: true, connection: :standard_io } ]} }) :peer.call(pid, :erlang, :set_cookie, [:cookie]) :ok = :peer.call(pid, :code, :add_paths, [:code.get_path()]) for {app_name, _, _} <- Application.loaded_applications(), {key, value} <- Application.get_all_env(app_name) do :ok = :peer.call(pid, Application, :put_env, [app_name, key, value]) end # Override with extra config for {app_name, key, value} <- extra_config do :ok = :peer.call(pid, Application, :put_env, [app_name, key, value]) end {:ok, _} = :peer.call(pid, Application, :ensure_all_started, [:mix]) :ok = :peer.call(pid, Mix, :env, [Mix.env()]) Enum.map( [:logger, :runtime_tools, :mix, :os_mon, :beacon], fn app -> {:ok, _} = :peer.call(pid, Application, :ensure_all_started, [app]) end ) if aux_mod do {{:module, _, _, _}, []} = :peer.call(pid, Code, :eval_quoted, [aux_mod]) end {:ok, pid, node} end end ================================================ FILE: beacon/test/test_helper.exs ================================================ ExUnit.start(capture_log: true) :net_kernel.start([:"beacon@127.0.0.1"]) ================================================ FILE: bench/gen_counter.exs ================================================ alias Realtime.GenCounter counter = :counters.new(1, [:write_concurrency]) _gen_counter = GenCounter.new(:any_term) Benchee.run( %{ ":counters.add" => fn -> :counters.add(counter, 1, 1) end, "GenCounter.add" => fn -> GenCounter.add(:any_term) end } ) ================================================ FILE: bench/secrets.exs ================================================ alias RealtimeWeb.ChannelsAuthorization alias Realtime.Helpers, as: H jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImxvY2FsaG9zdCIsInJvbGUiOiJhbm9uIiwiaWF0IjoxNjU4NjAwNzkxLCJleHAiOjE5NzQxNzY3OTF9.Iki--9QilZ7vySEUJHj0a1T8BDHkR7rmdWStXImCZfk" jwt_secret = "d3v_HtNXEpT+zfsyy1LE1WPGmNKLWRfw/rpjnVtCEEM2cSFV2s+kUh5OKX7TPYmG" secret_key = "1234567890123456" string_to_encrypt = "supabase_realtime" string_to_decrypt = "A5mS7ggkPXm0FaKKoZtrsYNlZA3qZxFe9XA9w2YYqgU=" Benchee.run(%{ "authorize_jwt" => fn -> {:ok, _} = ChannelsAuthorization.authorize_conn(jwt, jwt_secret, nil) end, "encrypt_string" => fn -> H.encrypt!(string_to_encrypt, secret_key) end, "decrypt_string" => fn -> H.decrypt!(string_to_decrypt, secret_key) end }) ================================================ FILE: config/config.exs ================================================ # This file is responsible for configuring your application # and its dependencies with the aid of the Mix.Config module. # # This configuration file is loaded before any dependency and # is restricted to this project. # General application configuration import Config config :realtime, websocket_fullsweep_after: 20, ecto_repos: [Realtime.Repo], version: Mix.Project.config()[:version], replication_watchdog_interval: :timer.minutes(5), replication_watchdog_timeout: :timer.minutes(1) # Configures the endpoint config :realtime, RealtimeWeb.Endpoint, url: [host: "127.0.0.1"], secret_key_base: "ktyW57usZxrivYdvLo9os7UGcUUZYKchOMHT3tzndmnHuxD09k+fQnPUmxlPMUI3", render_errors: [view: RealtimeWeb.ErrorView, accepts: ~w(html json), layout: false], pubsub_server: Realtime.PubSub, live_view: [signing_salt: "wUMBeR8j"] config :realtime, :extensions, postgres_cdc_rls: %{ type: :postgres_cdc, key: "postgres_cdc_rls", driver: Extensions.PostgresCdcRls, supervisor: Extensions.PostgresCdcRls.Supervisor, db_settings: Extensions.PostgresCdcRls.DbSettings } config :esbuild, version: "0.14.29", default: [ args: ~w(js/app.js --bundle --target=es2017 --outdir=../priv/static/assets --external:/fonts/* --external:/images/*), cd: Path.expand("../assets", __DIR__), env: %{"NODE_PATH" => Path.expand("../deps", __DIR__)} ] config :tailwind, version: "3.3.2", default: [ args: ~w( --config=tailwind.config.js --input=css/app.css --output=../priv/static/assets/app.css ), cd: Path.expand("../assets", __DIR__) ] # Configures Elixir's Logger config :logger, :console, format: "$time $metadata[$level] $message\n", metadata: [:request_id, :project, :external_id, :application_name, :error_code, :sub, :iss, :exp] # Use Jason for JSON parsing in Phoenix config :phoenix, :json_library, Jason config :open_api_spex, :cache_adapter, OpenApiSpex.Plug.PersistentTermCache config :logflare_logger_backend, flush_interval: 1_000, max_batch_size: 50, metadata: :all config :phoenix, :filter_parameters, {:keep, []} config :opentelemetry, resource_detectors: [:otel_resource_app_env, :otel_resource_env_var], resource: %{ :"service.name" => "realtime" }, text_map_propagators: [:baggage, :trace_context], # Exporter must be configured through environment variables traces_exporter: :none, span_processor: :batch config :gen_rpc, # Inactivity period in milliseconds after which a pending process holding an async_call return value will exit. # This is used for process sanitation purposes so please make sure to set it in a sufficiently high number async_call_inactivity_timeout: 300_000 config :prom_ex, :storage_adapter, Realtime.PromEx.Store config :realtime, Realtime.PromEx, ets_flush_interval: 90_000 config :realtime, Realtime.TenantPromEx, ets_flush_interval: 90_000 # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" ================================================ FILE: config/dev.exs ================================================ import Config # For development, we disable any cache and enable # debugging and code reloading. # # The watchers configuration can be used to run external # watchers to your application. For example, we use it # with webpack to recompile .js and .css sources. # Channels are not secured by default in development and # are secured by default in production. presence = System.get_env("PRESENCE", "false") == "false" config :realtime, presence: presence, node_balance_uptime_threshold_in_ms: 100 config :realtime, RealtimeWeb.Endpoint, http: [port: System.get_env("PORT", "4000"), compress: true], debug_errors: true, code_reloader: true, check_origin: false, watchers: [ # Start the esbuild watcher by calling Esbuild.install_and_run(:default, args) esbuild: {Esbuild, :install_and_run, [:default, ~w(--sourcemap=inline --watch)]}, tailwind: {Tailwind, :install_and_run, [:default, ~w(--watch)]} ] # watchers: [ # node: [ # "node_modules/webpack/bin/webpack.js", # "--mode", # "development", # "--watch-stdin", # cd: Path.expand("../assets", __DIR__) # ] # ] # ## SSL Support # # In order to use HTTPS in development, a self-signed # certificate can be generated by running the following # Mix task: # # mix phx.gen.cert # # Note that this task requires Erlang/OTP 20 or later. # Run `mix help phx.gen.cert` for more information. # # The `http:` config above can be replaced with: # # https: [ # port: 4001, # cipher_suite: :strong, # keyfile: "priv/cert/selfsigned_key.pem", # certfile: "priv/cert/selfsigned.pem" # ], # # If desired, both `http:` and `https:` keys can be # configured to run both http and https servers on # different ports. # Watch static and templates for browser reloading. config :realtime, RealtimeWeb.Endpoint, live_reload: [ patterns: [ ~r"priv/static/.*(js|css|png|jpeg|jpg|gif|svg)$", ~r"priv/gettext/.*(po)$", ~r"lib/realtime_web/(live|views)/.*(ex)$", ~r"lib/realtime_web/templates/.*(eex)$" ] ] # Do not include metadata nor timestamps in development logs config :logger, :console, format: "$time [$level] $message $metadata\n", metadata: [ :error_code, :file, :pid, :project, :external_id, :application_name, :region, :request_id, :sub, :iss, :exp ] # Set a higher stacktrace during development. Avoid configuring such # in production as building large stacktraces may be expensive. config :phoenix, :stacktrace_depth, 20 # Initialize plugs at runtime for faster development compilation config :phoenix, :plug_init_mode, :runtime # Disable caching to ensure the rendered spec is refreshed config :open_api_spex, :cache_adapter, OpenApiSpex.Plug.NoneCache # Disabled but can print to stdout with: # config :opentelemetry, traces_exporter: {:otel_exporter_stdout, []} config :opentelemetry, traces_exporter: :none config :mix_test_watch, clear: true ================================================ FILE: config/prod.exs ================================================ import Config # For production, don't forget to configure the url host # to something meaningful, Phoenix uses this information # when generating URLs. # # Note we also include the path to a cache manifest # containing the digested version of static files. This # manifest is generated by the `mix phx.digest` task, # which you should run after static files are built and # before starting your production server. # config :realtime, RealtimeWeb.Endpoint, # url: [host: "realtime.dev", port: 80] # Do not print debug messages in production config :logger, :warning, format: "$time [$level] $message $metadata\n", metadata: [ :error_code, :file, :pid, :project, :external_id, :application_name, :cluster, :region, :request_id, :sub, :iss, :exp ] # ## SSL Support # # To get SSL working, you will need to add the `https` key # to the previous section and set your `:url` port to 443: # # config :realtime, RealtimeWeb.Endpoint, # ... # url: [host: "example.com", port: 443], # https: [ # port: 443, # cipher_suite: :strong, # keyfile: System.get_env("SOME_APP_SSL_KEY_PATH"), # certfile: System.get_env("SOME_APP_SSL_CERT_PATH"), # transport_options: [socket_opts: [:inet6]] # ] # # The `cipher_suite` is set to `:strong` to support only the # latest and more secure SSL ciphers. This means old browsers # and clients may not be supported. You can set it to # `:compatible` for wider support. # # `:keyfile` and `:certfile` expect an absolute path to the key # and cert in disk or a relative path inside priv, for example # "priv/ssl/server.key". For all supported SSL configuration # options, see https://hexdocs.pm/plug/Plug.SSL.html#configure/1 # # We also recommend setting `force_ssl` in your endpoint, ensuring # no data is ever sent via http, always redirecting to https: # # config :realtime, RealtimeWeb.Endpoint, # force_ssl: [hsts: true] # # Check `Plug.SSL` for all available options in `force_ssl`. # Finally import the config/prod.secret.exs which loads secrets # and configuration from environment variables. ================================================ FILE: config/runtime.exs ================================================ import Config defmodule Env do def get_integer(env, default) do value = System.get_env(env) if value, do: String.to_integer(value), else: default end def get_charlist(env, default) do value = System.get_env(env) if value, do: String.to_charlist(value), else: default end def get_boolean(env, default) do value = System.get_env(env) if value, do: value |> String.downcase() |> String.to_existing_atom(), else: default end end app_name = System.get_env("APP_NAME", "") # Setup Database default_db_host = System.get_env("DB_HOST", "127.0.0.1") username = System.get_env("DB_USER", "supabase_admin") password = System.get_env("DB_PASSWORD", "postgres") database = System.get_env("DB_NAME", "postgres") port = System.get_env("DB_PORT", "5432") db_version = System.get_env("DB_IP_VERSION") slot_name_suffix = System.get_env("SLOT_NAME_SUFFIX") db_ssl_enabled? = Env.get_boolean("DB_SSL", false) db_ssl_ca_cert = System.get_env("DB_SSL_CA_CERT") queue_target = Env.get_integer("DB_QUEUE_TARGET", 5000) queue_interval = Env.get_integer("DB_QUEUE_INTERVAL", 5000) pool_size = Env.get_integer("DB_POOL_SIZE", 5) master_region = System.get_env("DB_MASTER_REGION") region = System.get_env("REGION") region_mapping = System.get_env("REGION_MAPPING") after_connect_query_args = case System.get_env("DB_AFTER_CONNECT_QUERY") do nil -> nil query -> {Postgrex, :query!, [query, []]} end ssl_opts = cond do db_ssl_enabled? and is_binary(db_ssl_ca_cert) -> [cacertfile: db_ssl_ca_cert] db_ssl_enabled? -> [verify: :verify_none] true -> false end tenant_cache_expiration = Env.get_integer("TENANT_CACHE_EXPIRATION_IN_MS", :timer.seconds(30)) migration_partition_slots = Env.get_integer("MIGRATION_PARTITION_SLOTS", System.schedulers_online() * 2) connect_partition_slots = Env.get_integer("CONNECT_PARTITION_SLOTS", System.schedulers_online() * 2) metrics_cleaner_schedule_timer_in_ms = Env.get_integer("METRICS_CLEANER_SCHEDULE_TIMER_IN_MS", :timer.minutes(30)) metrics_rpc_timeout_in_ms = Env.get_integer("METRICS_RPC_TIMEOUT_IN_MS", :timer.seconds(15)) rebalance_check_interval_in_ms = Env.get_integer("REBALANCE_CHECK_INTERVAL_IN_MS", :timer.minutes(10)) node_balance_uptime_threshold_in_ms = Env.get_integer("NODE_BALANCE_UPTIME_THRESHOLD_IN_MS", :timer.minutes(5)) tenant_max_bytes_per_second = Env.get_integer("TENANT_MAX_BYTES_PER_SECOND", 100_000) tenant_max_channels_per_client = Env.get_integer("TENANT_MAX_CHANNELS_PER_CLIENT", 100) tenant_max_concurrent_users = Env.get_integer("TENANT_MAX_CONCURRENT_USERS", 200) tenant_max_events_per_second = Env.get_integer("TENANT_MAX_EVENTS_PER_SECOND", 100) tenant_max_joins_per_second = Env.get_integer("TENANT_MAX_JOINS_PER_SECOND", 100) client_presence_max_calls = Env.get_integer("CLIENT_PRESENCE_MAX_CALLS", 5) client_presence_window_ms = Env.get_integer("CLIENT_PRESENCE_WINDOW_MS", 30_000) rpc_timeout = Env.get_integer("RPC_TIMEOUT", :timer.seconds(30)) max_gen_rpc_clients = Env.get_integer("MAX_GEN_RPC_CLIENTS", 5) run_janitor? = Env.get_boolean("RUN_JANITOR", false) disable_healthcheck_logging = Env.get_boolean("DISABLE_HEALTHCHECK_LOGGING", false) janitor_schedule_randomize = Env.get_boolean("JANITOR_SCHEDULE_RANDOMIZE", true) janitor_max_children = Env.get_integer("JANITOR_MAX_CHILDREN", 5) janitor_chunk_size = Env.get_integer("JANITOR_CHUNK_SIZE", 10) janitor_run_after_in_ms = Env.get_integer("JANITOR_RUN_AFTER_IN_MS", :timer.minutes(10)) janitor_children_timeout = Env.get_integer("JANITOR_CHILDREN_TIMEOUT", :timer.seconds(5)) janitor_schedule_timer = Env.get_integer("JANITOR_SCHEDULE_TIMER_IN_MS", :timer.hours(4)) platform = if System.get_env("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE", do: :aws, else: :fly broadcast_pool_size = Env.get_integer("BROADCAST_POOL_SIZE", 10) presence_pool_size = Env.get_integer("PRESENCE_POOL_SIZE", 10) presence_broadcast_period = Env.get_integer("PRESENCE_BROADCAST_PERIOD_IN_MS", 1_500) presence_permdown_period = Env.get_integer("PRESENCE_PERMDOWN_PERIOD_IN_MS", 1_200_000) pubsub_adapter = System.get_env("PUBSUB_ADAPTER", "gen_rpc") |> String.to_atom() websocket_max_heap_size = div(Env.get_integer("WEBSOCKET_MAX_HEAP_SIZE", 50_000_000), :erlang.system_info(:wordsize)) users_scope_shards = Env.get_integer("USERS_SCOPE_SHARDS", 5) postgres_cdc_scope_shards = Env.get_integer("POSTGRES_CDC_SCOPE_SHARDS", 5) regional_broadcasting = Env.get_boolean("REGIONAL_BROADCASTING", false) no_channel_timeout_in_ms = Env.get_integer("NO_CHANNEL_TIMEOUT_IN_MS", :timer.minutes(10)) measure_traffic_interval_in_ms = Env.get_integer("MEASURE_TRAFFIC_INTERVAL_IN_MS", :timer.seconds(10)) metrics_pusher_enabled = Env.get_boolean("METRICS_PUSHER_ENABLED", false) metrics_separation_enabled = Env.get_boolean("METRICS_SEPARATION_ENABLED", false) metrics_pusher_url = System.get_env("METRICS_PUSHER_URL") metrics_pusher_user = System.get_env("METRICS_PUSHER_USER", "realtime") metrics_pusher_auth = System.get_env("METRICS_PUSHER_AUTH") metrics_pusher_interval_ms = Env.get_integer("METRICS_PUSHER_INTERVAL_MS", :timer.seconds(30)) metrics_pusher_timeout_ms = Env.get_integer("METRICS_PUSHER_TIMEOUT_MS", :timer.seconds(15)) metrics_pusher_compress = Env.get_boolean("METRICS_PUSHER_COMPRESS", true) if !(db_version in [nil, "ipv6", "ipv4"]), do: raise("Invalid IP version, please set either ipv6 or ipv4") socket_options = cond do db_version == "ipv6" -> [:inet6] db_version == "ipv4" -> [:inet] true -> case Realtime.Database.detect_ip_version(default_db_host) do {:ok, ip_version} -> [ip_version] {:error, reason} -> raise "Failed to detect IP version for DB_HOST: #{reason}" end end [_, node_host] = node() |> Atom.to_string() |> String.split("@") metrics_tags = %{ region: region, host: node_host, id: Realtime.Nodes.short_node_id_from_name(node()) } config :realtime, Realtime.Repo, hostname: default_db_host, username: username, password: password, database: database, port: port, pool_size: pool_size, queue_target: queue_target, queue_interval: queue_interval, parameters: [application_name: "supabase_mt_realtime"], after_connect: after_connect_query_args, socket_options: socket_options, ssl: ssl_opts config :realtime, websocket_max_heap_size: websocket_max_heap_size, migration_partition_slots: migration_partition_slots, connect_partition_slots: connect_partition_slots, rebalance_check_interval_in_ms: rebalance_check_interval_in_ms, tenant_max_bytes_per_second: tenant_max_bytes_per_second, tenant_max_channels_per_client: tenant_max_channels_per_client, tenant_max_concurrent_users: tenant_max_concurrent_users, tenant_max_events_per_second: tenant_max_events_per_second, tenant_max_joins_per_second: tenant_max_joins_per_second, metrics_cleaner_schedule_timer_in_ms: metrics_cleaner_schedule_timer_in_ms, metrics_rpc_timeout: metrics_rpc_timeout_in_ms, tenant_cache_expiration: tenant_cache_expiration, rpc_timeout: rpc_timeout, no_channel_timeout_in_ms: no_channel_timeout_in_ms, platform: platform, pubsub_adapter: pubsub_adapter, broadcast_pool_size: broadcast_pool_size, presence_pool_size: presence_pool_size, presence_broadcast_period: presence_broadcast_period, presence_permdown_period: presence_permdown_period, users_scope_shards: users_scope_shards, postgres_cdc_scope_shards: postgres_cdc_scope_shards, regional_broadcasting: regional_broadcasting, master_region: master_region, region_mapping: region_mapping, metrics_tags: metrics_tags, measure_traffic_interval_in_ms: measure_traffic_interval_in_ms, client_presence_rate_limit: [ max_calls: client_presence_max_calls, window_ms: client_presence_window_ms ], disable_healthcheck_logging: disable_healthcheck_logging, metrics_pusher_enabled: metrics_pusher_enabled, metrics_pusher_url: metrics_pusher_url, metrics_pusher_user: metrics_pusher_user, metrics_pusher_auth: metrics_pusher_auth, metrics_pusher_interval_ms: metrics_pusher_interval_ms, metrics_pusher_timeout_ms: metrics_pusher_timeout_ms, metrics_pusher_compress: metrics_pusher_compress, metrics_separation_enabled: metrics_separation_enabled if config_env() != :test && run_janitor? do config :realtime, run_janitor: true, janitor_schedule_randomize: janitor_schedule_randomize, janitor_max_children: janitor_max_children, janitor_chunk_size: janitor_chunk_size, janitor_run_after_in_ms: janitor_run_after_in_ms, janitor_children_timeout: janitor_children_timeout, janitor_schedule_timer: janitor_schedule_timer end default_cluster_strategy = case config_env() do :prod -> "POSTGRES" _ -> "EPMD" end cluster_topologies = System.get_env("CLUSTER_STRATEGIES", default_cluster_strategy) |> String.upcase() |> String.split(",") |> Enum.reduce([], fn strategy, acc -> strategy |> String.trim() |> then(fn "DNS" -> [ dns: [ strategy: Cluster.Strategy.DNSPoll, config: [polling_interval: 5_000, query: System.get_env("DNS_NODES"), node_basename: app_name] ] ] ++ acc "POSTGRES" -> [ postgres: [ strategy: LibclusterPostgres.Strategy, config: [ hostname: default_db_host, username: username, password: password, database: database, port: port, parameters: [application_name: "cluster_node_#{node()}"], socket_options: socket_options, ssl: ssl_opts, heartbeat_interval: 5_000 ] ] ] ++ acc "EPMD" -> [ dev: [ strategy: Cluster.Strategy.Epmd, config: [hosts: [:"orange@127.0.0.1", :"pink@127.0.0.1"]], connect: {:net_kernel, :connect_node, []}, disconnect: {:net_kernel, :disconnect_node, []} ] ] ++ acc _ -> acc end) end) # Setup Logging if System.get_env("LOGS_ENGINE") == "logflare" do config :logflare_logger_backend, url: System.get_env("LOGFLARE_LOGGER_BACKEND_URL", "https://api.logflare.app") if !System.get_env("LOGFLARE_API_KEY") or !System.get_env("LOGFLARE_SOURCE_ID") do raise """ Environment variable LOGFLARE_API_KEY or LOGFLARE_SOURCE_ID is missing. Check those variables or choose another LOGS_ENGINE. """ end config :logger, sync_threshold: 6_000, discard_threshold: 6_000, backends: [LogflareLogger.HttpBackend] end # Setup production and development environments if config_env() != :test do gen_rpc_socket_ip = System.get_env("GEN_RPC_SOCKET_IP", "0.0.0.0") |> to_charlist() gen_rpc_ssl_server_port = System.get_env("GEN_RPC_SSL_SERVER_PORT") gen_rpc_ssl_server_port = if gen_rpc_ssl_server_port do String.to_integer(gen_rpc_ssl_server_port) end gen_rpc_default_driver = if gen_rpc_ssl_server_port, do: :ssl, else: :tcp if gen_rpc_default_driver == :ssl do gen_rpc_ssl_opts = [ certfile: System.fetch_env!("GEN_RPC_CERTFILE"), keyfile: System.fetch_env!("GEN_RPC_KEYFILE"), cacertfile: System.fetch_env!("GEN_RPC_CACERTFILE") ] config :gen_rpc, ssl_server_port: gen_rpc_ssl_server_port, ssl_client_port: System.get_env("GEN_RPC_SSL_CLIENT_PORT", "6369") |> String.to_integer(), ssl_client_options: gen_rpc_ssl_opts, ssl_server_options: gen_rpc_ssl_opts, tcp_server_port: false, tcp_client_port: false else config :gen_rpc, ssl_server_port: false, ssl_client_port: false, tcp_server_port: System.get_env("GEN_RPC_TCP_SERVER_PORT", "5369") |> String.to_integer(), tcp_client_port: System.get_env("GEN_RPC_TCP_CLIENT_PORT", "5369") |> String.to_integer() end case :inet.parse_address(gen_rpc_socket_ip) do {:ok, address} -> config :gen_rpc, default_client_driver: gen_rpc_default_driver, connect_timeout: System.get_env("GEN_RPC_CONNECT_TIMEOUT_IN_MS", "10000") |> String.to_integer(), send_timeout: System.get_env("GEN_RPC_SEND_TIMEOUT_IN_MS", "10000") |> String.to_integer(), ipv6_only: System.get_env("GEN_RPC_IPV6_ONLY", "false") == "true", socket_ip: address, max_batch_size: System.get_env("GEN_RPC_MAX_BATCH_SIZE", "0") |> String.to_integer(), compress: System.get_env("GEN_RPC_COMPRESS", "0") |> String.to_integer(), compression_threshold: System.get_env("GEN_RPC_COMPRESSION_THRESHOLD_IN_BYTES", "1000") |> String.to_integer() _ -> raise """ Environment variable GEN_RPC_SOCKET_IP is not a valid IP Address Most likely it should be "0.0.0.0" (ipv4) or "::" (ipv6) to bind to all interfaces """ end config :logger, level: System.get_env("LOG_LEVEL", "info") |> String.to_existing_atom() config :realtime, request_id_baggage_key: System.get_env("REQUEST_ID_BAGGAGE_KEY", "request-id"), jwt_claim_validators: System.get_env("JWT_CLAIM_VALIDATORS", "{}"), api_jwt_secret: System.get_env("API_JWT_SECRET"), api_blocklist: System.get_env("API_TOKEN_BLOCKLIST", "") |> String.split(","), metrics_blocklist: System.get_env("METRICS_TOKEN_BLOCKLIST", "") |> String.split(","), metrics_jwt_secret: System.fetch_env!("METRICS_JWT_SECRET"), db_enc_key: System.get_env("DB_ENC_KEY"), region: region, prom_poll_rate: Env.get_integer("PROM_POLL_RATE", 5000), slot_name_suffix: slot_name_suffix, max_gen_rpc_clients: max_gen_rpc_clients end # Setup Production if config_env() == :prod do config :libcluster, debug: false, topologies: cluster_topologies config :realtime, node_balance_uptime_threshold_in_ms: node_balance_uptime_threshold_in_ms secret_key_base = System.fetch_env!("SECRET_KEY_BASE") if app_name == "", do: raise("APP_NAME not available") config :realtime, RealtimeWeb.Endpoint, server: true, url: [host: "#{app_name}.supabase.co", port: 443], http: [ compress: true, port: Env.get_integer("PORT", 4000), protocol_options: [ max_header_value_length: Env.get_integer("MAX_HEADER_LENGTH", 4096) ], transport_options: [ max_connections: Env.get_integer("MAX_CONNECTIONS", 1000), num_acceptors: Env.get_integer("NUM_ACCEPTORS", 100), socket_opts: [:inet6] ] ], check_origin: false, secret_key_base: secret_key_base alias Realtime.Repo.Replica replica_repos = %{ Realtime.Repo.Replica.FRA => System.get_env("DB_HOST_REPLICA_FRA", default_db_host), Realtime.Repo.Replica.IAD => System.get_env("DB_HOST_REPLICA_IAD", default_db_host), Realtime.Repo.Replica.SIN => System.get_env("DB_HOST_REPLICA_SIN", default_db_host), Realtime.Repo.Replica.SJC => System.get_env("DB_HOST_REPLICA_SJC", default_db_host), Realtime.Repo.Replica.Singapore => System.get_env("DB_HOST_REPLICA_SIN", default_db_host), Realtime.Repo.Replica.London => System.get_env("DB_HOST_REPLICA_FRA", default_db_host), Realtime.Repo.Replica.NorthVirginia => System.get_env("DB_HOST_REPLICA_IAD", default_db_host), Realtime.Repo.Replica.Oregon => System.get_env("DB_HOST_REPLICA_SJC", default_db_host), Realtime.Repo.Replica.SanJose => System.get_env("DB_HOST_REPLICA_SJC", default_db_host), Realtime.Repo.Replica.Local => default_db_host } # Legacy repos # username, password, database, and port must match primary credentials for {replica_repo, hostname} <- replica_repos do config :realtime, replica_repo, hostname: hostname, username: username, password: password, database: database, port: port, pool_size: System.get_env("DB_REPLICA_POOL_SIZE", "5") |> String.to_integer(), queue_target: queue_target, queue_interval: queue_interval, parameters: [ application_name: "supabase_mt_realtime_ro" ], socket_options: socket_options, ssl: ssl_opts end # New main replica repo replica_host = System.get_env("DB_REPLICA_HOST") if replica_host do config :realtime, Realtime.Repo.Replica, hostname: replica_host, username: username, password: password, database: database, port: port, pool_size: System.get_env("DB_REPLICA_POOL_SIZE", "5") |> String.to_integer(), queue_target: queue_target, queue_interval: queue_interval, parameters: [ application_name: "supabase_mt_realtime_ro" ], socket_options: socket_options, ssl: ssl_opts end end if config_env() != :test do case System.get_env("DASHBOARD_AUTH", "basic_auth") do "zta" -> config :realtime, dashboard_auth: :zta _ -> config :realtime, dashboard_auth: :basic_auth, dashboard_credentials: {System.get_env("DASHBOARD_USER") || raise("DASHBOARD_USER is not set"), System.get_env("DASHBOARD_PASSWORD") || raise("DASHBOARD_PASSWORD is not set")} end end ================================================ FILE: config/test.exs ================================================ import Config partition = System.get_env("MIX_TEST_PARTITION") for repo <- [ Realtime.Repo, Realtime.Repo.Replica.FRA, Realtime.Repo.Replica.IAD, Realtime.Repo.Replica.SIN, Realtime.Repo.Replica.SJC, Realtime.Repo.Replica.Singapore, Realtime.Repo.Replica.London, Realtime.Repo.Replica.NorthVirginia, Realtime.Repo.Replica.Oregon, Realtime.Repo.Replica.SanJose ] do config :realtime, repo, username: "supabase_admin", password: "postgres", database: "realtime_test#{partition}", hostname: "127.0.0.1", pool: Ecto.Adapters.SQL.Sandbox end http_port = if partition, do: 4002 + String.to_integer(partition), else: 4002 config :realtime, RealtimeWeb.Endpoint, http: [port: http_port], server: true # that's what config/runtime.exs expects to see as region System.put_env("REGION", "us-east-1") config :realtime, regional_broadcasting: true, region: "us-east-1", db_enc_key: "1234567890123456", jwt_claim_validators: System.get_env("JWT_CLAIM_VALIDATORS", "{}"), api_jwt_secret: System.get_env("API_JWT_SECRET", "secret"), metrics_jwt_secret: "test", prom_poll_rate: 5_000, request_id_baggage_key: "sb-request-id", node_balance_uptime_threshold_in_ms: 999_999_999_999, max_gen_rpc_clients: 5, metrics_pusher_req_options: [ plug: {Req.Test, Realtime.MetricsPusher} ] # Print nothing during tests unless captured or a test failure happens config :logger, backends: [], level: :info # Configures Elixir's Logger config :logger, :console, format: "$time $metadata[$level] $message\n", metadata: [:error_code, :request_id, :project, :external_id, :application_name, :sub, :iss, :exp] config :opentelemetry, span_processor: :simple, traces_exporter: :none, processors: [{:otel_simple_processor, %{}}] # Using different ports so that a remote node during test can connect using the same local network # See Clustered module gen_rpc_offset = if partition, do: String.to_integer(partition) * 10, else: 0 config :gen_rpc, tcp_server_port: 5969 + gen_rpc_offset, tcp_client_port: 5970 + gen_rpc_offset, connect_timeout: 500 config :realtime, :dashboard_auth, :basic_auth config :realtime, :dashboard_credentials, {"test_user", "test_password"} ================================================ FILE: coveralls.json ================================================ { "skip_files": [ "lib/realtime_web/api_spec.ex", "lib/realtime_web/channels/presence.ex", "lib/realtime_web/controllers/page_controller.ex", "lib/realtime_web/dashboard/", "lib/realtime_web/endpoint.ex", "lib/realtime_web/gettext.ex", "lib/realtime_web/live/", "lib/realtime_web/open_api_schemas.ex", "lib/realtime_web/telemetry.ex", "lib/realtime_web/views/", "lib/realtime.ex", "lib/realtime/adapters/changes.ex", "lib/realtime/adapters/postgres/decoder.ex", "lib/realtime/adapters/postgres/oid_database.ex", "lib/realtime/adapters/postgres/protocol/", "lib/realtime/application.ex", "lib/realtime/monitoring/prom_ex/plugins/phoenix.ex", "lib/realtime/operations.ex", "lib/realtime/release.ex", "lib/realtime/tenants/authorization/policies/broadcast_policies.ex", "lib/realtime/tenants/authorization/policies/presence_policies.ex", "lib/realtime/tenants/repo/migrations/", "/lib/realtime/tenants/cache_supervisor.ex", "test/" ] } ================================================ FILE: deploy/fly/prod.toml ================================================ # fly.toml app configuration file generated for realtime-prod on 2023-08-08T09:07:09-07:00 # # See https://fly.io/docs/reference/configuration/ for information about how to use this file. # app = "realtime-prod" primary_region = "sea" kill_signal = "SIGTERM" kill_timeout = "5s" [experimental] auto_rollback = true [deploy] release_command = "/app/bin/migrate" strategy = "rolling" [env] DNS_NODES = "realtime-prod.internal" ERL_CRASH_DUMP = "/data/erl_crash.dump" ERL_CRASH_DUMP_SECONDS = "30" [[services]] protocol = "tcp" internal_port = 4000 processes = ["app"] [[services.ports]] port = 80 handlers = ["http"] force_https = true [[services.ports]] port = 443 handlers = ["tls", "http"] [services.concurrency] type = "connections" hard_limit = 100000 soft_limit = 100000 [[services.tcp_checks]] interval = "15s" timeout = "2s" grace_period = "30s" [[services.http_checks]] interval = "10s" timeout = "2s" grace_period = "5s" method = "get" path = "/" protocol = "http" tls_skip_verify = false ================================================ FILE: deploy/fly/qa.toml ================================================ app = "realtime-qa" kill_signal = "SIGTERM" kill_timeout = 5 processes = [] [deploy] release_command = "/app/bin/migrate" strategy = "rolling" [env] DNS_NODES = "realtime-qa.internal" ERL_CRASH_DUMP = "/data/erl_crash.dump" ERL_CRASH_DUMP_SECONDS = 30 [experimental] allowed_public_ports = [] auto_rollback = true [[services]] internal_port = 4000 processes = ["app"] protocol = "tcp" script_checks = [] [services.concurrency] hard_limit = 100000 soft_limit = 100000 type = "connections" [[services.ports]] force_https = true handlers = ["http"] port = 80 [[services.ports]] handlers = ["tls", "http"] port = 443 [[services.tcp_checks]] grace_period = "30s" interval = "15s" restart_limit = 6 timeout = "2s" [[services.http_checks]] interval = 10000 grace_period = "5s" method = "get" path = "/" protocol = "http" restart_limit = 0 timeout = 2000 tls_skip_verify = false [services.http_checks.headers] ================================================ FILE: deploy/fly/staging.toml ================================================ # fly.toml app configuration file generated for realtime-staging on 2023-06-27T07:39:20-07:00 # # See https://fly.io/docs/reference/configuration/ for information about how to use this file. # app = "realtime-staging" primary_region = "lhr" kill_signal = "SIGTERM" kill_timeout = "5s" [experimental] auto_rollback = true [deploy] release_command = "/app/bin/migrate" strategy = "rolling" [env] DNS_NODES = "realtime-staging.internal" ERL_CRASH_DUMP = "/data/erl_crash.dump" ERL_CRASH_DUMP_SECONDS = "30" [[mounts]] source = "data_vol_machines" destination = "/data" processes = ["app"] [[services]] protocol = "tcp" internal_port = 4000 processes = ["app"] [[services.ports]] port = 80 handlers = ["http"] force_https = true [[services.ports]] port = 443 handlers = ["tls", "http"] [services.concurrency] type = "connections" hard_limit = 16384 soft_limit = 16384 [[services.tcp_checks]] interval = "15s" timeout = "2s" grace_period = "30s" restart_limit = 6 [[services.http_checks]] interval = "10s" timeout = "2s" grace_period = "5s" restart_limit = 0 method = "get" path = "/" protocol = "http" ================================================ FILE: dev/postgres/00-supabase-schema.sql ================================================ create schema if not exists _realtime; create schema if not exists realtime; ================================================ FILE: docker-compose.dbs.yml ================================================ version: '3' services: db: image: supabase/postgres:17.6.1.074 container_name: realtime-db ports: - "5432:5432" volumes: - ./dev/postgres/00-supabase-schema.sql:/docker-entrypoint-initdb.d/00-supabase-schema.sql command: postgres -c config_file=/etc/postgresql/postgresql.conf environment: POSTGRES_HOST: /var/run/postgresql POSTGRES_PASSWORD: postgres tenant_db: image: supabase/postgres:17.6.1.074 container_name: tenant-db ports: - "5433:5432" command: postgres -c config_file=/etc/postgresql/postgresql.conf environment: POSTGRES_HOST: /var/run/postgresql POSTGRES_PASSWORD: postgres ================================================ FILE: docker-compose.tests.yml ================================================ services: # Supabase Realtime service test_db: image: supabase/postgres:17.6.1.074 container_name: test-realtime-db ports: - "5532:5432" volumes: - ./dev/postgres:/docker-entrypoint-initdb.d/ command: postgres -c config_file=/etc/postgresql/postgresql.conf environment: POSTGRES_HOST: /var/run/postgresql POSTGRES_PASSWORD: postgres healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 10s timeout: 5s retries: 5 test_realtime: depends_on: - test_db build: . container_name: test-realtime-server ports: - "4100:4100" extra_hosts: - "host.docker.internal:host-gateway" environment: PORT: 4100 DB_HOST: host.docker.internal DB_PORT: 5532 DB_USER: supabase_admin DB_PASSWORD: postgres DB_NAME: postgres DB_ENC_KEY: 1234567890123456 DB_AFTER_CONNECT_QUERY: 'SET search_path TO _realtime' API_JWT_SECRET: super-secret-jwt-token-with-at-least-32-characters-long METRICS_JWT_SECRET: super-secret-jwt-token-with-at-least-32-characters-long SECRET_KEY_BASE: UpNVntn3cDxHJpq99YMc1T1AQgQpc8kfYTuRgBiYa15BLrx8etQoXz3gZv1/u2oq ERL_AFLAGS: -proto_dist inet_tcp DNS_NODES: "''" APP_NAME: realtime RUN_JANITOR: true JANITOR_INTERVAL: 60000 LOG_LEVEL: "info" SEED_SELF_HOST: true DASHBOARD_USER: admin DASHBOARD_PASSWORD: admin networks: test-network: aliases: - realtime-dev.local - realtime-dev.localhost healthcheck: test: ["CMD", "curl", "-f", "http://localhost:4100/"] interval: 10s timeout: 5s retries: 5 start_period: 5s # Deno test runner test-runner: image: denoland/deno:alpine-2.5.6 container_name: deno-test-runner depends_on: test_realtime: condition: service_healthy test_db: condition: service_healthy volumes: - ./test/integration/tests.ts:/app/tests.ts:ro working_dir: /app command: > sh -c " echo 'Running tests...' && deno test tests.ts --allow-import --no-check --allow-read --allow-net --trace-leaks --allow-env=WS_NO_BUFFER_UTIL " networks: - test-network extra_hosts: - "realtime-dev.localhost:host-gateway" networks: test-network: driver: bridge ================================================ FILE: docker-compose.yml ================================================ services: db: image: supabase/postgres:17.6.1.074 container_name: realtime-db ports: - "5432:5432" volumes: - ./dev/postgres/00-supabase-schema.sql:/docker-entrypoint-initdb.d/00-supabase-schema.sql command: postgres -c config_file=/etc/postgresql/postgresql.conf environment: POSTGRES_HOST: /var/run/postgresql POSTGRES_PASSWORD: postgres tenant_db: image: supabase/postgres:17.6.1.074 container_name: tenant-db ports: - "5433:5432" command: postgres -c config_file=/etc/postgresql/postgresql.conf environment: POSTGRES_HOST: /var/run/postgresql POSTGRES_PASSWORD: postgres realtime: depends_on: - db build: . container_name: realtime-server ports: - "4000:4000" extra_hosts: - "host.docker.internal:host-gateway" environment: PORT: 4000 DB_HOST: host.docker.internal DB_PORT: 5432 DB_USER: supabase_admin DB_PASSWORD: postgres DB_NAME: postgres DB_ENC_KEY: supabaserealtime DB_AFTER_CONNECT_QUERY: 'SET search_path TO _realtime' API_JWT_SECRET: dc447559-996d-4761-a306-f47a5eab1623 SECRET_KEY_BASE: UpNVntn3cDxHJpq99YMc1T1AQgQpc8kfYTuRgBiYa15BLrx8etQoXz3gZv1/u2oq ERL_AFLAGS: -proto_dist inet_tcp RLIMIT_NOFILE: 1000000 DNS_NODES: "''" APP_NAME: realtime RUN_JANITOR: true JANITOR_INTERVAL: 60000 LOG_LEVEL: "info" SEED_SELF_HOST: true METRICS_JWT_SECRET: dc447559-996d-4761-a306-f47a5eab1623 DASHBOARD_USER: admin DASHBOARD_PASSWORD: admin ================================================ FILE: lib/extensions/extensions.ex ================================================ defmodule Realtime.Extensions do @moduledoc """ This module provides functions to get extension settings. """ def db_settings(type) do db_settings = Application.get_env(:realtime, :extensions) |> Enum.reduce(nil, fn {_, %{key: ^type, db_settings: db_settings}}, _ -> db_settings _, acc -> acc end) if db_settings do %{default: apply(db_settings, :default, []), required: apply(db_settings, :required, [])} else %{default: %{}, required: []} end end end ================================================ FILE: lib/extensions/postgres_cdc_rls/cdc_rls.ex ================================================ defmodule Extensions.PostgresCdcRls do @moduledoc """ Callbacks for initiating a Postgres connection and creating a Realtime subscription for database changes. """ @behaviour Realtime.PostgresCdc use Realtime.Logs alias Extensions.PostgresCdcRls, as: Rls alias Realtime.GenCounter alias Realtime.GenRpc alias RealtimeWeb.Endpoint alias Rls.Subscriptions @impl true @spec handle_connect(map()) :: {:ok, {pid(), pid()}} | nil def handle_connect(args) do case get_manager_conn(args["id"]) do {:error, nil} -> start_distributed(args) nil {:error, :wait} -> nil {:ok, pid, conn} -> {:ok, {pid, conn}} end end @impl true def handle_after_connect({manager_pid, conn}, settings, params_list, tenant) do with {:ok, subscription_list} <- subscription_list(params_list) do pool_size = Map.get(settings, "subcriber_pool_size", 4) publication = settings["publication"] create_subscription(conn, tenant, publication, pool_size, subscription_list, manager_pid, self()) end end @database_timeout_reason "Too many database timeouts" def create_subscription(conn, tenant, publication, pool_size, subscription_list, manager_pid, caller) when node(conn) == node() do with_rate_counter(tenant, pool_size, fn rate_counter -> case Subscriptions.create(conn, publication, subscription_list, manager_pid, caller) do {:error, %DBConnection.ConnectionError{}} -> GenCounter.add(rate_counter.id) {:error, @database_timeout_reason} {:error, {:exit, _}} -> GenCounter.add(rate_counter.id) {:error, @database_timeout_reason} response -> response end end) end def create_subscription(conn, tenant, publication, pool_size, subscription_list, manager_pid, caller) do with_rate_counter(tenant, pool_size, fn rate_counter -> args = [conn, tenant, publication, pool_size, subscription_list, manager_pid, caller] case GenRpc.call(node(conn), __MODULE__, :create_subscription, args, timeout: 15_000, tenant_id: tenant) do {:error, @database_timeout_reason} -> GenCounter.add(rate_counter.id) {:error, @database_timeout_reason} response -> response end end) end defp with_rate_counter(tenant, pool_size, fun) do with {:ok, %{limit: %{triggered: false}} = rate_counter} <- rate_counter(tenant, pool_size) do fun.(rate_counter) else {:ok, _} -> {:error, @database_timeout_reason} {:error, reason} -> log_error("RateCounterError", reason) {:error, @database_timeout_reason} end end defp rate_counter(tenant_id, pool_size) do rate_counter_args = Realtime.Tenants.subscription_errors_per_second_rate(tenant_id, pool_size) Realtime.RateCounter.get(rate_counter_args) rescue e -> {:error, e} end defp subscription_list(params_list) do Enum.reduce_while(params_list, {:ok, []}, fn params, {:ok, acc} -> case Subscriptions.parse_subscription_params(params[:params]) do {:ok, subscription_params} -> {:cont, {:ok, [%{id: params.id, claims: params.claims, subscription_params: subscription_params} | acc]}} {:error, reason} -> {:halt, {:error, {:malformed_subscription_params, reason}}} end end) end @impl true def handle_subscribe(_, tenant, metadata) do Endpoint.subscribe("realtime:postgres:" <> tenant, metadata) end @impl true @doc """ Stops the Supervision tree for a tenant. Expects an `external_id` as the `tenant`. """ @spec handle_stop(String.t(), non_neg_integer()) :: :ok def handle_stop(tenant, timeout) when is_binary(tenant) do scope = Realtime.Syn.PostgresCdc.scope(tenant) case :syn.whereis_name({scope, tenant}) do :undefined -> Logger.warning("Database supervisor not found for tenant #{tenant}") :ok pid -> DynamicSupervisor.stop(pid, :shutdown, timeout) end end ## Internal functions def start_distributed(%{"region" => region, "id" => tenant} = args) do platform_region = Realtime.Nodes.platform_region_translator(region) launch_node = Realtime.Nodes.launch_node(platform_region, node(), tenant) Logger.warning( "Starting distributed postgres extension #{inspect(lauch_node: launch_node, region: region, platform_region: platform_region)}" ) case GenRpc.call(launch_node, __MODULE__, :start, [args], timeout: 30_000, tenant_id: tenant) do {:ok, _pid} = ok -> ok {:error, {:already_started, _pid}} = error -> Logger.info("Postgres Extension already started on node #{inspect(launch_node)}") error error -> log_error("ErrorStartingPostgresCDC", error) error end end @doc """ Start db poller. Expects an `external_id` as a `tenant`. """ @spec start(map()) :: {:ok, pid} | {:error, :already_started | :reserved} def start(%{"id" => tenant} = args) when is_binary(tenant) do Logger.debug("Starting #{__MODULE__} extension with args: #{inspect(args, pretty: true)}") DynamicSupervisor.start_child( {:via, PartitionSupervisor, {Rls.DynamicSupervisor, tenant}}, %{ id: tenant, start: {Rls.WorkerSupervisor, :start_link, [args]}, restart: :temporary } ) end @spec get_manager_conn(String.t()) :: {:error, nil | :wait} | {:ok, pid(), pid()} def get_manager_conn(id) do scope = Realtime.Syn.PostgresCdc.scope(id) case :syn.lookup(scope, id) do {_, %{manager: nil, subs_pool: nil}} -> {:error, :wait} {_, %{manager: manager, subs_pool: conn}} -> {:ok, manager, conn} _ -> {:error, nil} end end @spec supervisor_id(String.t(), String.t()) :: {atom(), String.t(), map()} def supervisor_id(tenant, region) do scope = Realtime.Syn.PostgresCdc.scope(tenant) {scope, tenant, %{region: region, manager: nil, subs_pool: nil}} end @spec update_meta(String.t(), pid(), pid()) :: {:ok, {pid(), term()}} | {:error, term()} def update_meta(tenant, manager_pid, subs_pool) do scope = Realtime.Syn.PostgresCdc.scope(tenant) :syn.update_registry(scope, tenant, fn pid, meta -> if node(pid) == node(manager_pid) do %{meta | manager: manager_pid, subs_pool: subs_pool} else Logger.warning("Node mismatch for tenant #{tenant} #{inspect(node(pid))} #{inspect(node(manager_pid))}") meta end end) end end ================================================ FILE: lib/extensions/postgres_cdc_rls/db_settings.ex ================================================ defmodule Extensions.PostgresCdcRls.DbSettings do @moduledoc """ Schema callbacks for CDC RLS implementation. """ def default do %{ "poll_interval_ms" => 100, "poll_max_changes" => 100, "poll_max_record_bytes" => 1_048_576, "publication" => "supabase_realtime", "slot_name" => "supabase_realtime_replication_slot" } end def required do [ {"region", &is_binary/1, false}, {"db_host", &is_binary/1, true}, {"db_name", &is_binary/1, true}, {"db_user", &is_binary/1, true}, {"db_port", &is_binary/1, true}, {"db_password", &is_binary/1, true} ] end end ================================================ FILE: lib/extensions/postgres_cdc_rls/message_dispatcher.ex ================================================ # This file draws from https://github.com/phoenixframework/phoenix/blob/9941711736c8464b27b40914a4d954ed2b4f5958/lib/phoenix/channel/server.ex # License: https://github.com/phoenixframework/phoenix/blob/518a4640a70aa4d1370a64c2280d598e5b928168/LICENSE.md defmodule Extensions.PostgresCdcRls.MessageDispatcher do @moduledoc """ Hook invoked by Phoenix.PubSub dispatch. """ alias Phoenix.Socket.Broadcast def dispatch([_ | _] = topic_subscriptions, _from, {type, payload, sub_ids}) do _ = Enum.reduce(topic_subscriptions, %{}, fn {_pid, {:subscriber_fastlane, fastlane_pid, serializer, ids, join_topic, is_new_api}}, cache -> for {bin_id, id} <- ids, reduce: [] do acc -> if MapSet.member?(sub_ids, bin_id) do [id | acc] else acc end end |> case do [_ | _] = valid_ids -> new_payload = if is_new_api do %Broadcast{ topic: join_topic, event: "postgres_changes", payload: %{ids: valid_ids, data: Jason.Fragment.new(payload)} } else %Broadcast{topic: join_topic, event: type, payload: Jason.Fragment.new(payload)} end broadcast_message(cache, fastlane_pid, new_payload, serializer) _ -> cache end end) :ok end defp broadcast_message(cache, fastlane_pid, msg, serializer) do case cache do %{^msg => encoded_msg} -> send(fastlane_pid, encoded_msg) cache %{} -> encoded_msg = serializer.fastlane!(msg) send(fastlane_pid, encoded_msg) Map.put(cache, msg, encoded_msg) end end end ================================================ FILE: lib/extensions/postgres_cdc_rls/replication_poller.ex ================================================ defmodule Extensions.PostgresCdcRls.ReplicationPoller do @moduledoc """ Polls the write ahead log, applies row level sucurity policies for each subscriber and broadcast records to the `MessageDispatcher`. """ use GenServer use Realtime.Logs import Realtime.Helpers alias DBConnection.Backoff alias Extensions.PostgresCdcRls.MessageDispatcher alias Extensions.PostgresCdcRls.Replications alias Realtime.Adapters.Changes.DeletedRecord alias Realtime.Adapters.Changes.NewRecord alias Realtime.Adapters.Changes.UpdatedRecord alias Realtime.Database alias Realtime.RateCounter alias Realtime.Tenants alias RealtimeWeb.TenantBroadcaster def start_link(opts), do: GenServer.start_link(__MODULE__, opts) @impl true def init(args) do Process.flag(:fullsweep_after, 20) tenant_id = args["id"] Logger.metadata(external_id: tenant_id, project: tenant_id) %Realtime.Api.Tenant{} = tenant = Tenants.Cache.get_tenant_by_external_id(tenant_id) rate_counter_args = Tenants.db_events_per_second_rate(tenant) extension = Realtime.PostgresCdc.filter_settings("postgres_cdc_rls", tenant.extensions) RateCounter.new(rate_counter_args) state = %{ backoff: Backoff.new(backoff_min: 100, backoff_max: 5_000, backoff_type: :rand_exp), max_changes: extension["poll_max_changes"], max_record_bytes: extension["poll_max_record_bytes"], poll_interval_ms: extension["poll_interval_ms"], poll_ref: nil, publication: extension["publication"], retry_ref: nil, retry_count: 0, slot_name: extension["slot_name"] <> slot_name_suffix(), tenant_id: tenant_id, rate_counter_args: rate_counter_args, subscribers_nodes_table: args["subscribers_nodes_table"] } {:ok, _} = Registry.register(__MODULE__.Registry, tenant_id, %{}) {:ok, state, {:continue, {:connect, tenant}}} end @impl true def handle_continue({:connect, tenant}, state) do realtime_rls_settings = Database.from_tenant(tenant, "realtime_rls") with {:ok, conn} <- Database.connect_db(realtime_rls_settings) do {:noreply, Map.put(state, :conn, conn), {:continue, :prepare}} else {:error, reason} -> log_error("ReplicationPollerConnectionFailed", reason) {:stop, reason, state} end end def handle_continue(:prepare, state) do {:noreply, prepare_replication(state)} end @impl true def handle_info( :poll, %{ backoff: backoff, poll_interval_ms: poll_interval_ms, poll_ref: poll_ref, publication: publication, retry_ref: retry_ref, retry_count: retry_count, slot_name: slot_name, max_record_bytes: max_record_bytes, max_changes: max_changes, conn: conn, tenant_id: tenant_id, subscribers_nodes_table: subscribers_nodes_table, rate_counter_args: rate_counter_args } = state ) do cancel_timer(poll_ref) cancel_timer(retry_ref) args = [conn, slot_name, publication, max_changes, max_record_bytes] {time, list_changes} = :timer.tc(Replications, :list_changes, args) record_list_changes_telemetry(time, tenant_id) case handle_list_changes_result(list_changes, subscribers_nodes_table, tenant_id, rate_counter_args) do {:ok, row_count} -> Backoff.reset(backoff) pool_ref = if row_count > 0 do send(self(), :poll) nil else Process.send_after(self(), :poll, poll_interval_ms) end {:noreply, %{state | backoff: backoff, poll_ref: pool_ref}} {:error, %Postgrex.Error{postgres: %{code: :object_in_use, message: msg}}} -> log_error("ReplicationSlotBeingUsed", msg) [_, db_pid] = Regex.run(~r/PID\s(\d*)$/, msg) db_pid = String.to_integer(db_pid) case Replications.get_pg_stat_activity_diff(conn, db_pid) do {:ok, diff} -> Logger.warning("Database PID #{db_pid} found in pg_stat_activity with state_change diff of #{diff}") {:error, reason} -> log_error("PgStatActivityQueryFailed", reason) end if retry_count > 3 do case Replications.terminate_backend(conn, slot_name) do {:ok, :terminated} -> Logger.warning("Replication slot in use - terminating") {:error, :slot_not_found} -> Logger.warning("Replication slot not found") {:error, error} -> Logger.warning("Error terminating backend: #{inspect(error)}") end end {timeout, backoff} = Backoff.backoff(backoff) retry_ref = Process.send_after(self(), :retry, timeout) {:noreply, %{state | backoff: backoff, retry_ref: retry_ref, retry_count: retry_count + 1}} {:error, reason} -> log_error("PoolingReplicationError", reason) {timeout, backoff} = Backoff.backoff(backoff) retry_ref = Process.send_after(self(), :retry, timeout) {:noreply, %{state | backoff: backoff, retry_ref: retry_ref, retry_count: retry_count + 1}} end end @impl true def handle_info(:retry, %{retry_ref: retry_ref} = state) do cancel_timer(retry_ref) {:noreply, prepare_replication(state)} end def slot_name_suffix do case Application.get_env(:realtime, :slot_name_suffix) do nil -> "" slot_name_suffix -> "_" <> slot_name_suffix end end defp convert_errors([_ | _] = errors), do: errors defp convert_errors(_), do: nil defp prepare_replication(%{backoff: backoff, conn: conn, slot_name: slot_name, retry_count: retry_count} = state) do case Replications.prepare_replication(conn, slot_name) do {:ok, _} -> send(self(), :poll) state {:error, error} -> log_error("PoolingReplicationPreparationError", error) {timeout, backoff} = Backoff.backoff(backoff) retry_ref = Process.send_after(self(), :retry, timeout) %{state | backoff: backoff, retry_ref: retry_ref, retry_count: retry_count + 1} end end defp record_list_changes_telemetry(time, tenant_id) do Realtime.Telemetry.execute( [:realtime, :replication, :poller, :query, :stop], %{duration: time}, %{tenant: tenant_id} ) end defp handle_list_changes_result( {:ok, %Postgrex.Result{ columns: columns, rows: [_ | _] = rows, num_rows: rows_count }}, subscribers_nodes_table, tenant_id, rate_counter_args ) do case RateCounter.get(rate_counter_args) do {:ok, %{limit: %{triggered: true}}} -> :ok _ -> for row <- rows, change <- columns |> Enum.zip(row) |> generate_record() |> List.wrap() do topic = "realtime:postgres:" <> tenant_id Realtime.GenCounter.add(rate_counter_args.id, MapSet.size(change.subscription_ids)) payload = change |> Map.drop([:subscription_ids]) |> Jason.encode!() case collect_subscription_nodes(subscribers_nodes_table, change.subscription_ids) do {:ok, nodes} -> for {node, subscription_ids} <- nodes do TenantBroadcaster.pubsub_direct_broadcast( node, tenant_id, topic, # Send only the subscription IDs relevant to this node {change.type, payload, MapSet.new(subscription_ids)}, MessageDispatcher, :postgres_changes ) end {:error, :node_not_found} -> TenantBroadcaster.pubsub_broadcast( tenant_id, topic, {change.type, payload, change.subscription_ids}, MessageDispatcher, :postgres_changes ) end end end {:ok, rows_count} end defp handle_list_changes_result({:ok, _}, _, _, _), do: {:ok, 0} defp handle_list_changes_result({:error, reason}, _, _, _), do: {:error, reason} defp collect_subscription_nodes(subscribers_nodes_table, subscription_ids) do Enum.reduce_while(subscription_ids, {:ok, %{}}, fn subscription_id, {:ok, acc} -> case :ets.lookup(subscribers_nodes_table, subscription_id) do [{_, node}] -> updated_acc = Map.update(acc, node, [subscription_id], fn existing_ids -> [subscription_id | existing_ids] end) {:cont, {:ok, updated_acc}} _ -> {:halt, {:error, :node_not_found}} end end) rescue _ -> {:error, :node_not_found} end def generate_record([ {"type", "INSERT" = type}, {"schema", schema}, {"table", table}, {"columns", columns}, {"record", record}, {"old_record", _}, {"commit_timestamp", commit_timestamp}, {"subscription_ids", subscription_ids}, {"errors", errors} ]) when is_list(subscription_ids) do %NewRecord{ columns: Jason.Fragment.new(columns), commit_timestamp: commit_timestamp, errors: convert_errors(errors), schema: schema, table: table, type: type, subscription_ids: MapSet.new(subscription_ids), record: Jason.Fragment.new(record) } end def generate_record([ {"type", "UPDATE" = type}, {"schema", schema}, {"table", table}, {"columns", columns}, {"record", record}, {"old_record", old_record}, {"commit_timestamp", commit_timestamp}, {"subscription_ids", subscription_ids}, {"errors", errors} ]) when is_list(subscription_ids) do %UpdatedRecord{ columns: Jason.Fragment.new(columns), commit_timestamp: commit_timestamp, errors: convert_errors(errors), schema: schema, table: table, type: type, subscription_ids: MapSet.new(subscription_ids), old_record: Jason.Fragment.new(old_record), record: Jason.Fragment.new(record) } end def generate_record([ {"type", "DELETE" = type}, {"schema", schema}, {"table", table}, {"columns", columns}, {"record", _}, {"old_record", old_record}, {"commit_timestamp", commit_timestamp}, {"subscription_ids", subscription_ids}, {"errors", errors} ]) when is_list(subscription_ids) do %DeletedRecord{ columns: Jason.Fragment.new(columns), commit_timestamp: commit_timestamp, errors: convert_errors(errors), schema: schema, table: table, type: type, subscription_ids: MapSet.new(subscription_ids), old_record: Jason.Fragment.new(old_record) } end def generate_record(_), do: nil end ================================================ FILE: lib/extensions/postgres_cdc_rls/replications.ex ================================================ defmodule Extensions.PostgresCdcRls.Replications do @moduledoc """ SQL queries that use PostgresCdcRls.ReplicationPoller to create a temporary slot and poll the write-ahead log. """ import Postgrex, only: [query: 3] @spec prepare_replication(pid(), String.t()) :: {:ok, Postgrex.Result.t()} | {:error, Postgrex.Error.t()} def prepare_replication(conn, slot_name) do query( conn, "select case when not exists ( select 1 from pg_replication_slots where slot_name = $1 ) then ( select 1 from pg_create_logical_replication_slot($1, 'wal2json', 'true') ) else 1 end;", [slot_name] ) end @spec terminate_backend(pid(), String.t()) :: {:ok, :terminated} | {:error, :slot_not_found | Postgrex.Error.t()} def terminate_backend(conn, slot_name) do slots = query(conn, "select active_pid from pg_replication_slots where slot_name = $1", [slot_name]) case slots do {:ok, %Postgrex.Result{rows: [[nil]]}} -> {:error, :slot_not_found} {:ok, %Postgrex.Result{rows: [[backend]]}} -> case query(conn, "select pg_terminate_backend($1)", [backend]) do {:ok, _resp} -> {:ok, :terminated} {:error, erroer} -> {:error, erroer} end {:ok, %Postgrex.Result{num_rows: 0}} -> {:error, :slot_not_found} {:error, error} -> {:error, error} end end @spec get_pg_stat_activity_diff(pid(), integer()) :: {:ok, integer()} | {:error, Postgrex.Error.t()} def get_pg_stat_activity_diff(conn, db_pid) do query = query( conn, "select extract( epoch from (now() - state_change) )::int as diff from pg_stat_activity where application_name = 'realtime_rls' and pid = $1", [db_pid] ) case query do {:ok, %{rows: [[diff]]}} -> {:ok, diff} {:ok, _} -> {:error, :pid_not_found} {:error, error} -> {:error, error} end end def list_changes(conn, slot_name, publication, max_changes, max_record_bytes) do query( conn, """ SELECT wal->>'type' as type, wal->>'schema' as schema, wal->>'table' as table, COALESCE(wal->>'columns', '[]') as columns, COALESCE(wal->>'record', '{}') as record, COALESCE(wal->>'old_record', '{}') as old_record, wal->>'commit_timestamp' as commit_timestamp, subscription_ids, errors FROM realtime.list_changes($1, $2, $3, $4) """, [ publication, slot_name, max_changes, max_record_bytes ] ) end end ================================================ FILE: lib/extensions/postgres_cdc_rls/subscription_manager.ex ================================================ defmodule Extensions.PostgresCdcRls.SubscriptionManager do @moduledoc """ Handles subscriptions from tenant's database. """ use GenServer use Realtime.Logs alias Realtime.Tenants.Rebalancer alias Extensions.PostgresCdcRls, as: Rls alias Realtime.Database alias Realtime.Helpers alias Rls.Subscriptions @timeout 15_000 @max_delete_records 1000 @check_oids_interval 60_000 @check_no_users_interval 60_000 @stop_after 60_000 * 10 defmodule State do @moduledoc false defstruct [ :id, :publication, :subscribers_pids_table, :subscribers_nodes_table, :conn, :delete_queue, :no_users_ref, no_users_ts: nil, oids: %{}, check_oid_ref: nil, check_region_interval: nil ] @type t :: %__MODULE__{ id: String.t(), publication: String.t(), subscribers_pids_table: :ets.tid(), subscribers_nodes_table: :ets.tid(), conn: Postgrex.conn(), oids: map(), check_oid_ref: reference() | nil, delete_queue: %{ ref: reference(), queue: :queue.queue() }, no_users_ref: reference(), no_users_ts: non_neg_integer() | nil, check_region_interval: non_neg_integer } end @spec start_link(GenServer.options()) :: GenServer.on_start() def start_link(opts) do GenServer.start_link(__MODULE__, opts) end ## Callbacks @impl true def init(args) do %{"id" => id} = args Logger.metadata(external_id: id, project: id) {:ok, nil, {:continue, {:connect, args}}} end @impl true def handle_continue({:connect, args}, _) do %{ "id" => id, "subscribers_pids_table" => subscribers_pids_table, "subscribers_nodes_table" => subscribers_nodes_table } = args %Realtime.Api.Tenant{} = tenant = Realtime.Tenants.Cache.get_tenant_by_external_id(id) extension = Realtime.PostgresCdc.filter_settings("postgres_cdc_rls", tenant.extensions) extension = Map.merge(extension, %{"subs_pool_size" => Map.get(extension, "subcriber_pool_size", 4)}) subscription_manager_settings = Database.from_settings(extension, "realtime_subscription_manager") subscription_manager_pub_settings = Database.from_settings(extension, "realtime_subscription_manager_pub") with {:ok, conn} <- Database.connect_db(subscription_manager_settings), {:ok, conn_pub} <- Database.connect_db(subscription_manager_pub_settings) do Subscriptions.delete_all_if_table_exists(conn) Rls.update_meta(id, self(), conn_pub) publication = extension["publication"] oids = Subscriptions.fetch_publication_tables(conn, publication) check_region_interval = Map.get(args, :check_region_interval, rebalance_check_interval_in_ms()) send_region_check_message(check_region_interval) state = %State{ id: id, conn: conn, publication: publication, subscribers_pids_table: subscribers_pids_table, subscribers_nodes_table: subscribers_nodes_table, oids: oids, delete_queue: %{ ref: check_delete_queue(), queue: :queue.new() }, no_users_ref: check_no_users(), check_region_interval: check_region_interval } send(self(), :check_oids) {:noreply, state} else {:error, reason} -> log_error("SubscriptionManagerConnectionFailed", reason) {:stop, reason, nil} end end @impl true def handle_info({:subscribed, {pid, id}}, state) do case :ets.match(state.subscribers_pids_table, {pid, id, :"$1", :_}) do [] -> :ets.insert(state.subscribers_pids_table, {pid, id, Process.monitor(pid), node(pid)}) _ -> :ok end :ets.insert(state.subscribers_nodes_table, {UUID.string_to_binary!(id), node(pid)}) {:noreply, %{state | no_users_ts: nil}} end def handle_info( :check_oids, %State{check_oid_ref: ref, conn: conn, publication: publication, oids: old_oids} = state ) do Helpers.cancel_timer(ref) oids = case Subscriptions.fetch_publication_tables(conn, publication) do ^old_oids -> old_oids new_oids -> Logger.warning("Found new oids #{inspect(new_oids, pretty: true)}") Subscriptions.delete_all(conn) fn {pid, _id, ref, _node}, _acc -> Process.demonitor(ref, [:flush]) send(pid, :postgres_subscribe) end |> :ets.foldl([], state.subscribers_pids_table) new_oids end {:noreply, %{state | oids: oids, check_oid_ref: check_oids()}} end def handle_info( {:DOWN, _ref, :process, pid, _reason}, %State{ subscribers_pids_table: subscribers_pids_table, subscribers_nodes_table: subscribers_nodes_table, delete_queue: %{queue: q} } = state ) do q1 = case :ets.take(subscribers_pids_table, pid) do [] -> q values -> for {_pid, id, _ref, _node} <- values, reduce: q do acc -> bin_id = UUID.string_to_binary!(id) :ets.delete(subscribers_nodes_table, bin_id) :queue.in(bin_id, acc) end end {:noreply, put_in(state.delete_queue.queue, q1)} end def handle_info(:check_delete_queue, %State{delete_queue: %{ref: ref, queue: q}} = state) do Helpers.cancel_timer(ref) q1 = if :queue.is_empty(q) do q else {ids, q1} = Helpers.queue_take(q, @max_delete_records) Logger.debug("delete sub id #{inspect(ids)}") case Subscriptions.delete_multi(state.conn, ids) do {:ok, _} -> q1 {:error, reason} -> log_error("SubscriptionDeletionFailed", reason) q end end ref = if :queue.is_empty(q1), do: check_delete_queue(), else: check_delete_queue(1_000) {:noreply, %{state | delete_queue: %{ref: ref, queue: q1}}} end def handle_info(:check_no_users, %{subscribers_pids_table: tid, no_users_ts: ts} = state) do Helpers.cancel_timer(state.no_users_ref) ts_new = case {:ets.info(tid, :size), ts != nil && ts + @stop_after < now()} do {0, true} -> Logger.info("Stop tenant #{state.id} because of no connected users") Rls.handle_stop(state.id, 15_000) ts {0, false} -> if ts != nil, do: ts, else: now() _ -> nil end {:noreply, %{state | no_users_ts: ts_new, no_users_ref: check_no_users()}} end def handle_info({:check_region, previous_nodes_set}, state) do current_nodes_set = MapSet.new(Node.list()) case Rebalancer.check(previous_nodes_set, current_nodes_set, state.id) do :ok -> # Let's check again in the future send_region_check_message(state.check_region_interval) {:noreply, state} {:error, :wrong_region} -> Logger.warning("Rebalancing Postgres Changes replication for a closer region") Rls.handle_stop(state.id, 15_000) {:noreply, state} end end def handle_info(msg, state) do log_error("UnhandledProcessMessage", msg) {:noreply, state} end ## Internal functions defp check_oids, do: Process.send_after(self(), :check_oids, @check_oids_interval) defp now, do: System.system_time(:millisecond) defp check_no_users, do: Process.send_after(self(), :check_no_users, @check_no_users_interval) defp check_delete_queue(timeout \\ @timeout), do: Process.send_after(self(), :check_delete_queue, timeout) defp send_region_check_message(check_region_interval) do Process.send_after(self(), {:check_region, MapSet.new(Node.list())}, check_region_interval) end defp rebalance_check_interval_in_ms(), do: Application.fetch_env!(:realtime, :rebalance_check_interval_in_ms) end ================================================ FILE: lib/extensions/postgres_cdc_rls/subscriptions.ex ================================================ defmodule Extensions.PostgresCdcRls.Subscriptions do @moduledoc """ This module consolidates subscriptions handling """ use Realtime.Logs import Postgrex, only: [transaction: 2, query: 3, rollback: 2] @type conn() :: Postgrex.conn() @type filter :: {binary, binary, binary} @type subscription_params :: {action_filter :: binary, schema :: binary, table :: binary, [filter]} @type subscription_list :: [%{id: binary, claims: map, subscription_params: subscription_params}] @filter_types ["eq", "neq", "lt", "lte", "gt", "gte", "in"] @spec create(conn(), String.t(), subscription_list, pid(), pid()) :: {:ok, Postgrex.Result.t()} | {:error, Exception.t() | {:exit, term} | {:subscription_insert_failed, String.t()}} def create(conn, publication, subscription_list, manager, caller) do transaction(conn, fn conn -> Enum.map(subscription_list, fn %{id: id, claims: claims, subscription_params: params} -> case query(conn, publication, id, claims, params) do {:ok, %{num_rows: num} = result} when num > 0 -> send(manager, {:subscribed, {caller, id}}) result {:ok, _} -> msg = "Unable to subscribe to changes with given parameters. Please check Realtime is enabled for the given connect parameters: [#{params_to_log(params)}]" rollback(conn, {:subscription_insert_failed, msg}) {:error, exception} -> msg = "Unable to subscribe to changes with given parameters. An exception happened so please check your connect parameters: [#{params_to_log(params)}]. Exception: #{Exception.message(exception)}" rollback(conn, {:subscription_insert_failed, msg}) end end) end) rescue e in DBConnection.ConnectionError -> {:error, e} catch :exit, reason -> {:error, {:exit, reason}} end defp query(conn, publication, id, claims, subscription_params) do sql = "with sub_tables as ( select rr.entity from pg_publication_tables pub, lateral ( select format('%I.%I', pub.schemaname, pub.tablename)::regclass entity ) rr where pub.pubname = $1 and pub.schemaname like (case $2 when '*' then '%' else $2 end) and pub.tablename like (case $3 when '*' then '%' else $3 end) ) insert into realtime.subscription as x( subscription_id, entity, filters, claims, action_filter ) select $4::text::uuid, sub_tables.entity, $6, $5, $7 from sub_tables on conflict (subscription_id, entity, filters, action_filter) do update set claims = excluded.claims, created_at = now() returning id" {action_filter, schema, table, filters} = subscription_params query(conn, sql, [publication, schema, table, id, claims, filters, action_filter]) end defp params_to_log({action_filter, schema, table, filters}) do [event: action_filter, schema: schema, table: table, filters: filters] |> Enum.map_join(", ", fn {k, v} -> "#{k}: #{to_log(v)}" end) end @spec delete(conn(), String.t()) :: {:ok, Postgrex.Result.t()} | {:error, any()} def delete(conn, id) do Logger.debug("Delete subscription") sql = "delete from realtime.subscription where subscription_id = $1" case query(conn, sql, [id]) do {:error, reason} -> log_error("SubscriptionDeletionFailed", reason) {:error, reason} result -> result end catch :exit, reason -> log_error("SubscriptionDeletionFailed", {:exit, reason}) {:error, {:exit, reason}} end @spec delete_all(conn()) :: :ok def delete_all(conn) do Logger.debug("Delete all subscriptions") case query(conn, "delete from realtime.subscription;", []) do {:ok, _} -> :ok {:error, reason} -> log_error("SubscriptionDeletionFailed", reason) end catch :exit, reason -> log_error("SubscriptionDeletionFailed", {:exit, reason}) end @spec delete_multi(conn(), [Ecto.UUID.t()]) :: {:ok, Postgrex.Result.t()} | {:error, Exception.t()} def delete_multi(conn, ids) do Logger.debug("Delete multi ids subscriptions") sql = "delete from realtime.subscription where subscription_id = ANY($1::uuid[])" query(conn, sql, [ids]) end @spec delete_all_if_table_exists(conn()) :: :ok def delete_all_if_table_exists(conn) do case query( conn, "do $$ begin if exists ( select 1 from pg_tables where schemaname = 'realtime' and tablename = 'subscription' ) then delete from realtime.subscription; end if; end $$", [] ) do {:ok, _} -> :ok {:error, reason} -> log_error("SubscriptionCleanupFailed", reason) end catch :exit, reason -> log_error("SubscriptionCleanupFailed", {:exit, reason}) end @spec fetch_publication_tables(conn(), String.t()) :: %{ {<<_::1>>} => [integer()], {String.t()} => [integer()], {String.t(), String.t()} => [integer()] } | %{} def fetch_publication_tables(conn, publication) do sql = "select schemaname, tablename, format('%I.%I', schemaname, tablename)::regclass as oid from pg_publication_tables where pubname = $1" case query(conn, sql, [publication]) do {:ok, %{columns: ["schemaname", "tablename", "oid"], rows: rows}} -> Enum.reduce(rows, %{}, fn [schema, table, oid], acc -> if String.contains?(table, " ") do log_error( "TableHasSpacesInName", "Table name cannot have spaces: \"#{schema}\".\"#{table}\"" ) end Map.put(acc, {schema, table}, [oid]) |> Map.update({schema}, [oid], &[oid | &1]) |> Map.update({"*"}, [oid], &[oid | &1]) end) |> Enum.reduce(%{}, fn {k, v}, acc -> Map.put(acc, k, Enum.sort(v)) end) _ -> %{} end end @doc """ Parses subscription filter parameters into something we can pass into our `create_subscription` query. We currently support the following filters: 'eq', 'neq', 'lt', 'lte', 'gt', 'gte', 'in' ## Examples iex> parse_subscription_params(%{"schema" => "public", "table" => "messages", "filter" => "subject=eq.hey"}) {:ok, {"*", "public", "messages", [{"subject", "eq", "hey"}]}} `in` filter: iex> parse_subscription_params(%{"schema" => "public", "table" => "messages", "filter" => "subject=in.(hidee,ho)"}) {:ok, {"*", "public", "messages", [{"subject", "in", "{hidee,ho}"}]}} no filter: iex> parse_subscription_params(%{"schema" => "public", "table" => "messages"}) {:ok, {"*", "public", "messages", []}} only schema: iex> parse_subscription_params(%{"schema" => "public"}) {:ok, {"*", "public", "*", []}} only table: iex> parse_subscription_params(%{"table" => "messages"}) {:ok, {"*", "public", "messages", []}} An unsupported filter will respond with an error tuple: iex> parse_subscription_params(%{"schema" => "public", "table" => "messages", "filter" => "subject=like.hey"}) {:error, ~s(Error parsing `filter` params: ["like", "hey"])} Catch `undefined` filters: iex> parse_subscription_params(%{"schema" => "public", "table" => "messages", "filter" => "undefined"}) {:error, ~s(Error parsing `filter` params: ["undefined"])} Catch `missing params`: iex> parse_subscription_params(%{}) {:error, ~s(No subscription params provided. Please provide at least a `schema` or `table` to subscribe to: %{})} """ @spec parse_subscription_params(map()) :: {:ok, subscription_params} | {:error, binary()} def parse_subscription_params(params) do action_filter = action_filter(params) case params do %{"schema" => schema, "table" => table, "filter" => filter} when is_binary(schema) and is_binary(table) and is_binary(filter) -> with [col, rest] <- String.split(filter, "=", parts: 2), [filter_type, value] when filter_type in @filter_types <- String.split(rest, ".", parts: 2), {:ok, formatted_value} <- format_filter_value(filter_type, value) do {:ok, {action_filter, schema, table, [{col, filter_type, formatted_value}]}} else {:error, msg} -> {:error, "Error parsing `filter` params: #{msg}"} e -> {:error, "Error parsing `filter` params: #{inspect(e)}"} end %{"schema" => schema, "table" => table} when is_binary(schema) and is_binary(table) and not is_map_key(params, "filter") -> {:ok, {action_filter, schema, table, []}} %{"schema" => schema} when is_binary(schema) and not is_map_key(params, "table") and not is_map_key(params, "filter") -> {:ok, {action_filter, schema, "*", []}} %{"table" => table} when is_binary(table) and not is_map_key(params, "schema") and not is_map_key(params, "filter") -> {:ok, {action_filter, "public", table, []}} map when is_map_key(map, "user_token") or is_map_key(map, "auth_token") -> {:error, "No subscription params provided. Please provide at least a `schema` or `table` to subscribe to: "} error -> {:error, "No subscription params provided. Please provide at least a `schema` or `table` to subscribe to: #{inspect(error)}"} end end defp action_filter(%{"event" => "*"}), do: "*" defp action_filter(%{"event" => event}) when is_binary(event) do case String.upcase(event) do "INSERT" -> "INSERT" "UPDATE" -> "UPDATE" "DELETE" -> "DELETE" _ -> "*" end end defp action_filter(_), do: "*" defp format_filter_value(filter, value) do case filter do "in" -> case Regex.run(~r/^\((.*)\)$/, value) do nil -> {:error, "`in` filter value must be wrapped by parentheses"} [_, new_value] -> {:ok, "{#{new_value}}"} end _ -> {:ok, value} end end end ================================================ FILE: lib/extensions/postgres_cdc_rls/subscriptions_checker.ex ================================================ defmodule Extensions.PostgresCdcRls.SubscriptionsChecker do @moduledoc false use GenServer use Realtime.Logs alias Extensions.PostgresCdcRls, as: Rls alias Realtime.Database alias Realtime.Helpers alias Realtime.GenRpc alias Realtime.Telemetry alias Rls.Subscriptions @timeout 120_000 @max_delete_records 1000 defmodule State do @moduledoc false defstruct [:id, :conn, :check_active_pids, :subscribers_pids_table, :subscribers_nodes_table, :delete_queue] @type t :: %__MODULE__{ id: String.t(), conn: Postgrex.conn(), check_active_pids: reference(), subscribers_pids_table: :ets.tid(), subscribers_nodes_table: :ets.tid(), delete_queue: %{ ref: reference(), queue: :queue.queue() } } end @spec start_link(GenServer.options()) :: GenServer.on_start() def start_link(opts) do GenServer.start_link(__MODULE__, opts) end ## Callbacks @impl true def init(args) do %{"id" => id} = args Logger.metadata(external_id: id, project: id) {:ok, nil, {:continue, {:connect, args}}} end @impl true def handle_continue({:connect, args}, _) do %{ "id" => id, "subscribers_pids_table" => subscribers_pids_table, "subscribers_nodes_table" => subscribers_nodes_table } = args %Realtime.Api.Tenant{} = tenant = Realtime.Tenants.Cache.get_tenant_by_external_id(id) realtime_subscription_checker_settings = Database.from_tenant(tenant, "realtime_subscription_checker") with {:ok, conn} <- Database.connect_db(realtime_subscription_checker_settings) do state = %State{ id: id, conn: conn, check_active_pids: check_active_pids(), subscribers_pids_table: subscribers_pids_table, subscribers_nodes_table: subscribers_nodes_table, delete_queue: %{ref: nil, queue: :queue.new()} } {:noreply, state} else {:error, reason} -> log_error("SubscriptionsCheckerConnectionFailed", reason) {:stop, reason, nil} end end @impl true def handle_info(:check_active_pids, %State{check_active_pids: ref, delete_queue: delete_queue, id: id} = state) do Helpers.cancel_timer(ref) ids = state.subscribers_pids_table |> subscribers_by_node() |> not_alive_pids_dist() |> pop_not_alive_pids(state.subscribers_pids_table, state.subscribers_nodes_table, id) new_delete_queue = if length(ids) > 0 do q = Enum.reduce(ids, delete_queue.queue, fn id, acc -> if :queue.member(id, acc), do: acc, else: :queue.in(id, acc) end) %{ ref: check_delete_queue(), queue: q } else delete_queue end {:noreply, %{state | check_active_pids: check_active_pids(), delete_queue: new_delete_queue}} end def handle_info(:check_delete_queue, %State{delete_queue: %{ref: ref, queue: q}} = state) do Helpers.cancel_timer(ref) new_queue = if :queue.is_empty(q) do q else {ids, q1} = Helpers.queue_take(q, @max_delete_records) Logger.warning("Delete #{length(ids)} phantom subscribers from db") case Subscriptions.delete_multi(state.conn, ids) do {:ok, _} -> q1 {:error, reason} -> log_error("UnableToDeletePhantomSubscriptions", reason) q end end new_ref = if :queue.is_empty(new_queue), do: ref, else: check_delete_queue() {:noreply, %{state | delete_queue: %{ref: new_ref, queue: new_queue}}} end ## Internal functions @spec pop_not_alive_pids([pid()], :ets.tid(), :ets.tid(), binary()) :: [Ecto.UUID.t()] def pop_not_alive_pids(pids, subscribers_pids_table, subscribers_nodes_table, tenant_id) do Enum.reduce(pids, [], fn pid, acc -> case :ets.lookup(subscribers_pids_table, pid) do [] -> Telemetry.execute( [:realtime, :subscriptions_checker, :pid_not_found], %{quantity: 1}, %{tenant_id: tenant_id} ) acc results -> for {^pid, postgres_id, _ref, _node} <- results do Telemetry.execute( [:realtime, :subscriptions_checker, :phantom_pid_detected], %{quantity: 1}, %{tenant_id: tenant_id} ) :ets.delete(subscribers_pids_table, pid) bin_id = UUID.string_to_binary!(postgres_id) :ets.delete(subscribers_nodes_table, bin_id) bin_id end ++ acc end end) end @spec subscribers_by_node(:ets.tid()) :: %{node() => MapSet.t(pid())} def subscribers_by_node(tid) do fn {pid, _postgres_id, _ref, node}, acc -> set = if Map.has_key?(acc, node), do: MapSet.put(acc[node], pid), else: MapSet.new([pid]) Map.put(acc, node, set) end |> :ets.foldl(%{}, tid) end @spec not_alive_pids_dist(%{node() => MapSet.t(pid())}) :: [pid()] | [] def not_alive_pids_dist(pids) do Enum.reduce(pids, [], fn {node, pids}, acc -> if node == node() do acc ++ not_alive_pids(pids) else case GenRpc.call(node, __MODULE__, :not_alive_pids, [pids], timeout: 15_000) do {:error, :rpc_error, _} = error -> log_error("UnableToCheckProcessesOnRemoteNode", error) acc pids -> acc ++ pids end end end) end @spec not_alive_pids(MapSet.t(pid())) :: [pid()] | [] def not_alive_pids(pids) do Enum.reduce(pids, [], fn pid, acc -> if Process.alive?(pid), do: acc, else: [pid | acc] end) end defp check_delete_queue, do: Process.send_after(self(), :check_delete_queue, 1000) defp check_active_pids, do: Process.send_after(self(), :check_active_pids, @timeout) end ================================================ FILE: lib/extensions/postgres_cdc_rls/supervisor.ex ================================================ defmodule Extensions.PostgresCdcRls.Supervisor do @moduledoc """ Supervisor to spin up the Postgres CDC RLS tree. """ use Supervisor alias Extensions.PostgresCdcRls @spec start_link :: :ignore | {:error, any} | {:ok, pid} def start_link do Supervisor.start_link(__MODULE__, [], name: __MODULE__) end @impl true def init(_args) do load_migrations_modules() :syn.add_node_to_scopes(Realtime.Syn.PostgresCdc.scopes()) children = [ { PartitionSupervisor, partitions: 20, child_spec: DynamicSupervisor, strategy: :one_for_one, name: PostgresCdcRls.DynamicSupervisor } ] Supervisor.init(children, strategy: :one_for_one) end defp load_migrations_modules do {:ok, modules} = :application.get_key(:realtime, :modules) modules |> Enum.filter(&String.starts_with?(to_string(&1), "Elixir.Realtime.Tenants.Migrations")) |> Enum.each(&Code.ensure_loaded!/1) end end ================================================ FILE: lib/extensions/postgres_cdc_rls/worker_supervisor.ex ================================================ defmodule Extensions.PostgresCdcRls.WorkerSupervisor do @moduledoc false use Supervisor alias Extensions.PostgresCdcRls alias PostgresCdcRls.ReplicationPoller alias PostgresCdcRls.SubscriptionManager alias PostgresCdcRls.SubscriptionsChecker alias Realtime.Tenants.Cache alias Realtime.PostgresCdc.Exception def start_link(args) do name = PostgresCdcRls.supervisor_id(args["id"], args["region"]) Supervisor.start_link(__MODULE__, args, name: {:via, :syn, name}) end @impl true def init(%{"id" => tenant} = args) when is_binary(tenant) do Logger.metadata(external_id: tenant, project: tenant) unless Cache.get_tenant_by_external_id(tenant), do: raise(Exception) subscribers_pids_table = :ets.new(__MODULE__, [:public, :bag]) subscribers_nodes_table = :ets.new(__MODULE__, [:public, :set]) tid_args = Map.merge(args, %{ "subscribers_pids_table" => subscribers_pids_table, "subscribers_nodes_table" => subscribers_nodes_table }) children = [ %{ id: ReplicationPoller, start: {ReplicationPoller, :start_link, [tid_args]}, restart: :transient }, %{ id: SubscriptionManager, start: {SubscriptionManager, :start_link, [tid_args]}, restart: :transient }, %{ id: SubscriptionsChecker, start: {SubscriptionsChecker, :start_link, [tid_args]}, restart: :transient } ] Supervisor.init(children, strategy: :rest_for_one, max_restarts: 10, max_seconds: 60) end end ================================================ FILE: lib/realtime/adapters/changes.ex ================================================ # This file draws heavily from https://github.com/cainophile/cainophile # License: https://github.com/cainophile/cainophile/blob/master/LICENSE require Protocol defmodule Realtime.Adapters.Changes do @moduledoc """ This module provides structures of CDC changes. """ defmodule Transaction do @moduledoc false defstruct [:changes, :commit_timestamp] end defmodule NewRecord do @moduledoc false @derive {Jason.Encoder, except: [:subscription_ids]} defstruct [ :columns, :commit_timestamp, :errors, :schema, :table, :record, :subscription_ids, :type ] end defmodule UpdatedRecord do @moduledoc false @derive {Jason.Encoder, except: [:subscription_ids]} defstruct [ :columns, :commit_timestamp, :errors, :schema, :table, :old_record, :record, :subscription_ids, :type ] end defmodule DeletedRecord do @moduledoc false @derive {Jason.Encoder, except: [:subscription_ids]} defstruct [ :columns, :commit_timestamp, :errors, :schema, :table, :old_record, :subscription_ids, :type ] end defmodule TruncatedRelation do @moduledoc false defstruct [:type, :schema, :table, :commit_timestamp] end end Protocol.derive(Jason.Encoder, Realtime.Adapters.Changes.Transaction) Protocol.derive(Jason.Encoder, Realtime.Adapters.Changes.TruncatedRelation) Protocol.derive(Jason.Encoder, Realtime.Adapters.Postgres.Decoder.Messages.Relation.Column) ================================================ FILE: lib/realtime/adapters/postgres/decoder.ex ================================================ # This file draws heavily from https://github.com/cainophile/pgoutput_decoder # License: https://github.com/cainophile/pgoutput_decoder/blob/master/LICENSE defmodule Realtime.Adapters.Postgres.Decoder do @moduledoc """ Functions for decoding different types of logical replication messages. """ defmodule Messages do @moduledoc """ Different types of logical replication messages from Postgres """ defmodule Begin do @moduledoc """ Struct representing the BEGIN message in PostgreSQL's logical decoding output. * `final_lsn` - The LSN of the commit that this transaction ended at. * `commit_timestamp` - The timestamp of the commit that this transaction ended at. * `xid` - The transaction ID of this transaction. """ defstruct [:final_lsn, :commit_timestamp, :xid] end defmodule Commit do @moduledoc """ Struct representing the COMMIT message in PostgreSQL's logical decoding output. * `flags` - Bitmask of flags associated with this commit. * `lsn` - The LSN of the commit. * `end_lsn` - The LSN of the next record in the WAL stream. * `commit_timestamp` - The timestamp of the commit. """ defstruct [:flags, :lsn, :end_lsn, :commit_timestamp] end defmodule Origin do @moduledoc """ Struct representing the ORIGIN message in PostgreSQL's logical decoding output. * `origin_commit_lsn` - The LSN of the commit in the database that the change originated from. * `name` - The name of the origin. """ defstruct [:origin_commit_lsn, :name] end defmodule Relation do @moduledoc """ Struct representing the RELATION message in PostgreSQL's logical decoding output. * `id` - The OID of the relation. * `namespace` - The OID of the namespace that the relation belongs to. * `name` - The name of the relation. * `replica_identity` - The replica identity setting of the relation. * `columns` - A list of columns in the relation. """ defstruct [:id, :namespace, :name, :replica_identity, :columns] defmodule Column do @moduledoc """ Struct representing a column in a relation. * `flags` - Bitmask of flags associated with this column. * `name` - The name of the column. * `type` - The OID of the data type of the column. * `type_modifier` - The type modifier of the column. """ defstruct [:flags, :name, :type, :type_modifier] end end defmodule Insert do @moduledoc """ Struct representing the INSERT message in PostgreSQL's logical decoding output. * `relation_id` - The OID of the relation that the tuple was inserted into. * `tuple_data` - The data of the inserted tuple. """ defstruct [:relation_id, :tuple_data] end defmodule Update do @moduledoc """ Struct representing the UPDATE message in PostgreSQL's logical decoding output. * `relation_id` - The OID of the relation that the tuple was updated in. * `changed_key_tuple_data` - The data of the tuple with the old key values. * `old_tuple_data` - The data of the tuple before the update. * `tuple_data` - The data of the tuple after the update. """ defstruct [:relation_id, :changed_key_tuple_data, :old_tuple_data, :tuple_data] end defmodule Delete do @moduledoc """ Struct representing the DELETE message in PostgreSQL's logical decoding output. * `relation_id` - The OID of the relation that the tuple was deleted from. * `changed_key_tuple_data` - The data of the tuple with the old key values. * `old_tuple_data` - The data of the tuple before the delete. """ defstruct [:relation_id, :changed_key_tuple_data, :old_tuple_data] end defmodule Truncate do @moduledoc """ Struct representing the TRUNCATE message in PostgreSQL's logical decoding output. * `number_of_relations` - The number of truncated relations. * `options` - Additional options provided when truncating the relations. * `truncated_relations` - List of relations that have been truncated. """ defstruct [:number_of_relations, :options, :truncated_relations] end defmodule Type do @moduledoc """ Struct representing the TYPE message in PostgreSQL's logical decoding output. * `id` - The OID of the type. * `namespace` - The namespace of the type. * `name` - The name of the type. """ defstruct [:id, :namespace, :name] end defmodule Unsupported do @moduledoc """ Struct representing an unsupported message in PostgreSQL's logical decoding output. * `data` - The raw data of the unsupported message. """ defstruct [:data] end end require Logger @pg_epoch DateTime.from_iso8601("2000-01-01T00:00:00Z") alias Messages.Begin alias Messages.Commit alias Messages.Origin alias Messages.Relation alias Messages.Relation.Column alias Messages.Insert alias Messages.Type alias Messages.Unsupported alias Realtime.Adapters.Postgres.OidDatabase @doc """ Parses logical replication messages from Postgres """ def decode_message(message, relations) when is_binary(message) do decode_message_impl(message, relations) end defp decode_message_impl(<<"B", lsn::binary-8, timestamp::integer-64, xid::integer-32>>, _relations) do %Begin{ final_lsn: decode_lsn(lsn), commit_timestamp: pgtimestamp_to_timestamp(timestamp), xid: xid } end defp decode_message_impl( <<"C", _flags::binary-1, lsn::binary-8, end_lsn::binary-8, timestamp::integer-64>>, _relations ) do %Commit{ flags: [], lsn: decode_lsn(lsn), end_lsn: decode_lsn(end_lsn), commit_timestamp: pgtimestamp_to_timestamp(timestamp) } end # TODO: Verify this is correct with real data from Postgres defp decode_message_impl(<<"O", lsn::binary-8, name::binary>>, _relations) do %Origin{ origin_commit_lsn: decode_lsn(lsn), name: name } end defp decode_message_impl(<<"R", id::integer-32, rest::binary>>, _relations) do [ namespace | [name | [<>]] ] = String.split(rest, <<0>>, parts: 3) # TODO: Handle case where pg_catalog is blank, we should still return the schema as pg_catalog friendly_replica_identity = case replica_identity do "d" -> :default "n" -> :nothing "f" -> :all_columns "i" -> :index end %Relation{ id: id, namespace: namespace, name: name, replica_identity: friendly_replica_identity, columns: decode_columns(columns) } end defp decode_message_impl( <<"I", relation_id::integer-32, "N", number_of_columns::integer-16, tuple_data::binary>>, relations ) do relation = relations |> get_in([relation_id, :columns]) if relation do {<<>>, decoded_tuple_data} = decode_tuple_data(tuple_data, number_of_columns, relation) %Insert{relation_id: relation_id, tuple_data: decoded_tuple_data} else %Unsupported{} end end defp decode_message_impl(<<"Y", data_type_id::integer-32, namespace_and_name::binary>>, _relations) do [namespace, name_with_null] = :binary.split(namespace_and_name, <<0>>) name = String.slice(name_with_null, 0..-2//1) %Type{ id: data_type_id, namespace: namespace, name: name } end defp decode_message_impl(binary, _relations), do: %Unsupported{data: binary} defp decode_tuple_data(binary, columns_remaining, relations, accumulator \\ []) defp decode_tuple_data(remaining_binary, 0, _relations, accumulator) when is_binary(remaining_binary), do: {remaining_binary, accumulator |> Enum.reverse() |> List.to_tuple()} defp decode_tuple_data(<<"n", rest::binary>>, columns_remaining, [_ | relations], accumulator), do: decode_tuple_data(rest, columns_remaining - 1, relations, [nil | accumulator]) defp decode_tuple_data(<<"u", rest::binary>>, columns_remaining, [_ | relations], accumulator), do: decode_tuple_data(rest, columns_remaining - 1, relations, [:unchanged_toast | accumulator]) @start_date "2000-01-01T00:00:00Z" defp decode_tuple_data( <<"b", column_length::integer-32, rest::binary>>, columns_remaining, [%Column{type: type} | relations], accumulator ) do data = :erlang.binary_part(rest, {0, column_length}) remainder = :erlang.binary_part(rest, {byte_size(rest), -(byte_size(rest) - column_length)}) data = case type do "bool" -> data == <<1>> "jsonb" -> <<1, rest::binary>> = data rest "timestamp" -> <> = data @start_date |> NaiveDateTime.from_iso8601!() |> NaiveDateTime.add(microseconds, :microsecond) "text" -> data "uuid" -> UUID.binary_to_string!(data) end decode_tuple_data(remainder, columns_remaining - 1, relations, [data | accumulator]) end defp decode_columns(binary, accumulator \\ []) defp decode_columns(<<>>, accumulator), do: Enum.reverse(accumulator) defp decode_columns(<>, accumulator) do [name | [<>]] = String.split(rest, <<0>>, parts: 2) decoded_flags = case flags do 1 -> [:key] _ -> [] end decode_columns(columns, [ %Column{ name: name, flags: decoded_flags, type: OidDatabase.name_for_type_id(data_type_id), type_modifier: type_modifier } | accumulator ]) end defp pgtimestamp_to_timestamp(microsecond_offset) when is_integer(microsecond_offset) do {:ok, epoch, 0} = @pg_epoch DateTime.add(epoch, microsecond_offset, :microsecond) end defp decode_lsn(<>), do: {xlog_file, xlog_offset} end ================================================ FILE: lib/realtime/adapters/postgres/oid_database.ex ================================================ # CREDITS # This file draws heavily from https://github.com/cainophile/pgoutput_decoder # License: https://github.com/cainophile/pgoutput_decoder/blob/master/LICENSE # Lifted from epgsql (src/epgsql_binary.erl), this module licensed under # 3-clause BSD found here: https://raw.githubusercontent.com/epgsql/epgsql/devel/LICENSE # https://github.com/brianc/node-pg-types/blob/master/lib/builtins.js # MIT License (MIT) # Following query was used to generate this file: # SELECT json_object_agg(UPPER(PT.typname), PT.oid::int4 ORDER BY pt.oid) # FROM pg_type PT # WHERE typnamespace = (SELECT pgn.oid FROM pg_namespace pgn WHERE nspname = 'pg_catalog') -- Take only builting Postgres types with stable OID (extension types are not guaranteed to be stable) # AND typtype = 'b' -- Only basic types # AND typisdefined -- Ignore undefined types defmodule Realtime.Adapters.Postgres.OidDatabase do @moduledoc "This module maps a numeric PostgreSQL type ID to a descriptive string." @doc """ Maps a numeric PostgreSQL type ID to a descriptive string. ## Examples iex> name_for_type_id(1700) "numeric" iex> name_for_type_id(25) "text" iex> name_for_type_id(3802) "jsonb" """ def name_for_type_id(type_id) do case type_id do 16 -> "bool" 17 -> "bytea" 18 -> "char" 19 -> "name" 20 -> "int8" 21 -> "int2" 22 -> "int2vector" 23 -> "int4" 24 -> "regproc" 25 -> "text" 26 -> "oid" 27 -> "tid" 28 -> "xid" 29 -> "cid" 30 -> "oidvector" 114 -> "json" 142 -> "xml" 143 -> "_xml" 194 -> "pg_node_tree" 199 -> "_json" 210 -> "smgr" 600 -> "point" 601 -> "lseg" 602 -> "path" 603 -> "box" 604 -> "polygon" 628 -> "line" 629 -> "_line" 650 -> "cidr" 651 -> "_cidr" 700 -> "float4" 701 -> "float8" 702 -> "abstime" 703 -> "reltime" 704 -> "tinterval" 718 -> "circle" 719 -> "_circle" 774 -> "macaddr8" 775 -> "_macaddr8" 790 -> "money" 791 -> "_money" 829 -> "macaddr" 869 -> "inet" 1000 -> "_bool" 1001 -> "_bytea" 1002 -> "_char" 1003 -> "_name" 1005 -> "_int2" 1006 -> "_int2vector" 1007 -> "_int4" 1008 -> "_regproc" 1009 -> "_text" 1010 -> "_tid" 1011 -> "_xid" 1012 -> "_cid" 1013 -> "_oidvector" 1014 -> "_bpchar" 1015 -> "_varchar" 1016 -> "_int8" 1017 -> "_point" 1018 -> "_lseg" 1019 -> "_path" 1020 -> "_box" 1021 -> "_float4" 1022 -> "_float8" 1023 -> "_abstime" 1024 -> "_reltime" 1025 -> "_tinterval" 1027 -> "_polygon" 1028 -> "_oid" 1033 -> "aclitem" 1034 -> "_aclitem" 1040 -> "_macaddr" 1041 -> "_inet" 1042 -> "bpchar" 1043 -> "varchar" 1082 -> "date" 1083 -> "time" 1114 -> "timestamp" 1115 -> "_timestamp" 1182 -> "_date" 1183 -> "_time" 1184 -> "timestamptz" 1185 -> "_timestamptz" 1186 -> "interval" 1187 -> "_interval" 1231 -> "_numeric" 1263 -> "_cstring" 1266 -> "timetz" 1270 -> "_timetz" 1560 -> "bit" 1561 -> "_bit" 1562 -> "varbit" 1563 -> "_varbit" 1700 -> "numeric" 1790 -> "refcursor" 2201 -> "_refcursor" 2202 -> "regprocedure" 2203 -> "regoper" 2204 -> "regoperator" 2205 -> "regclass" 2206 -> "regtype" 2207 -> "_regprocedure" 2208 -> "_regoper" 2209 -> "_regoperator" 2210 -> "_regclass" 2211 -> "_regtype" 2949 -> "_txid_snapshot" 2950 -> "uuid" 2951 -> "_uuid" 2970 -> "txid_snapshot" 3220 -> "pg_lsn" 3221 -> "_pg_lsn" 3361 -> "pg_ndistinct" 3402 -> "pg_dependencies" 3614 -> "tsvector" 3615 -> "tsquery" 3642 -> "gtsvector" 3643 -> "_tsvector" 3644 -> "_gtsvector" 3645 -> "_tsquery" 3734 -> "regconfig" 3735 -> "_regconfig" 3769 -> "regdictionary" 3770 -> "_regdictionary" 3802 -> "jsonb" 3807 -> "_jsonb" 3905 -> "_int4range" 3907 -> "_numrange" 3909 -> "_tsrange" 3911 -> "_tstzrange" 3913 -> "_daterange" 3927 -> "_int8range" 4089 -> "regnamespace" 4090 -> "_regnamespace" 4096 -> "regrole" 4097 -> "_regrole" _ -> type_id end end end ================================================ FILE: lib/realtime/adapters/postgres/protocol/keep_alive.ex ================================================ defmodule Realtime.Adapters.Postgres.Protocol.KeepAlive do @moduledoc """ Primary keepalive message (B) Byte1('k') Identifies the message as a sender keepalive. Int64 The current end of WAL on the server. Int64 The server's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. Byte1 1 means that the client should reply to this message as soon as possible, to avoid a timeout disconnect. 0 otherwise. The receiving process can send replies back to the sender at any time, using one of the following message formats (also in the payload of a CopyData message): """ @type t :: %__MODULE__{ wal_end: integer(), clock: integer(), reply: :now | :await } defstruct [:wal_end, :clock, :reply] end ================================================ FILE: lib/realtime/adapters/postgres/protocol/write.ex ================================================ defmodule Realtime.Adapters.Postgres.Protocol.Write do @moduledoc """ XLogData (B) Byte1('w') Identifies the message as WAL data. Int64 The starting point of the WAL data in this message. Int64 The current end of WAL on the server. Int64 The server's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. Byten A section of the WAL data stream. A single WAL record is never split across two XLogData messages. When a WAL record crosses a WAL page boundary, and is therefore already split using continuation records, it can be split at the page boundary. In other words, the first main WAL record and its continuation records can be sent in different XLogData messages. """ defstruct [:server_wal_start, :server_wal_end, :server_system_clock, :message] end ================================================ FILE: lib/realtime/adapters/postgres/protocol.ex ================================================ defmodule Realtime.Adapters.Postgres.Protocol do @moduledoc """ This module is responsible for parsing the Postgres WAL messages. """ alias Realtime.Adapters.Postgres.Protocol.Write alias Realtime.Adapters.Postgres.Protocol.KeepAlive defguard is_write(value) when binary_part(value, 0, 1) == <> defguard is_keep_alive(value) when binary_part(value, 0, 1) == <> def parse(<>) do %Write{ server_wal_start: server_wal_start, server_wal_end: server_wal_end, server_system_clock: server_system_clock, message: message } end def parse(<>) do reply = case reply do 0 -> :later 1 -> :now end %KeepAlive{wal_end: wal_end, clock: clock, reply: reply} end @doc """ Message to send to the server to request a standby status update. Check https://www.postgresql.org/docs/current/protocol-replication.html#PROTOCOL-REPLICATION-STANDBY-STATUS-UPDATE for more information """ @spec standby_status(integer(), integer(), integer(), :now | :later, integer() | nil) :: [ binary() ] def standby_status(last_wal_received, last_wal_flushed, last_wal_applied, reply, clock \\ nil) def standby_status(last_wal_received, last_wal_flushed, last_wal_applied, reply, nil) do standby_status(last_wal_received, last_wal_flushed, last_wal_applied, reply, current_time()) end def standby_status(last_wal_received, last_wal_flushed, last_wal_applied, reply, clock) do reply = case reply do :now -> 1 :later -> 0 end [ <> ] end @doc """ Message to send the server to not do any operation since the server can wait """ def hold, do: [] @epoch DateTime.to_unix(~U[2000-01-01 00:00:00Z], :microsecond) def current_time, do: System.os_time(:microsecond) - @epoch end ================================================ FILE: lib/realtime/api/extensions.ex ================================================ defmodule Realtime.Api.Extensions do @moduledoc """ Schema for Realtime Extension settings. """ use Ecto.Schema import Ecto.Changeset alias Realtime.Crypto @primary_key {:id, :binary_id, autogenerate: true} @foreign_key_type :binary_id @derive {Jason.Encoder, only: [:type, :inserted_at, :updated_at, :settings]} schema "extensions" do field(:type, :string) field(:settings, :map) belongs_to(:tenant, Realtime.Api.Tenant, foreign_key: :tenant_external_id, type: :string) timestamps() end def changeset(extension, attrs) do {attrs1, required_settings} = case attrs["type"] do nil -> {attrs, []} type -> %{default: default, required: required} = Realtime.Extensions.db_settings(type) { %{attrs | "settings" => Map.merge(default, attrs["settings"])}, required } end extension |> cast(attrs1, [:type, :tenant_external_id, :settings]) |> validate_required([:type, :settings]) |> unique_constraint([:tenant_external_id, :type]) |> validate_required_settings(required_settings) |> encrypt_settings(required_settings) end def encrypt_settings(changeset, required) do update_change(changeset, :settings, fn settings -> Enum.reduce(required, settings, fn {field, _, true}, acc -> encrypted = Crypto.encrypt!(settings[field]) %{acc | field => encrypted} _, acc -> acc end) end) end def validate_required_settings(changeset, required) do validate_change(changeset, :settings, fn _, value -> Enum.reduce(required, [], fn {field, checker, _}, acc -> case value[field] do nil -> [{:settings, "#{field} can't be blank"} | acc] data -> if checker.(data) do acc else [{:settings, "#{field} is invalid"} | acc] end end end) end) end end ================================================ FILE: lib/realtime/api/message.ex ================================================ defmodule Realtime.Api.Message do @moduledoc """ Defines the Message schema to be used to check RLS authorization policies """ use Ecto.Schema import Ecto.Changeset @primary_key {:id, Ecto.UUID, autogenerate: true} @schema_prefix "realtime" @type t :: %__MODULE__{} @timestamps_opts [type: :naive_datetime_usec] schema "messages" do field(:topic, :string) field(:extension, Ecto.Enum, values: [:broadcast, :presence]) field(:payload, :map) field(:event, :string) field(:private, :boolean) timestamps() end def changeset(message, attrs) do message |> cast(attrs, [ :topic, :extension, :payload, :event, :private, :inserted_at, :updated_at ]) |> validate_required([:topic, :extension]) |> put_timestamp(:updated_at) |> maybe_put_timestamp(:inserted_at) end defp put_timestamp(changeset, field) do put_change(changeset, field, NaiveDateTime.utc_now(:microsecond)) end defp maybe_put_timestamp(changeset, field) do case get_field(changeset, field) do nil -> put_timestamp(changeset, field) _ -> changeset end end end ================================================ FILE: lib/realtime/api/tenant.ex ================================================ defmodule Realtime.Api.Tenant do @moduledoc """ Describes a database/tenant which makes use of the realtime service. """ use Ecto.Schema import Ecto.Changeset alias Realtime.Api.Extensions alias Realtime.Crypto @type t :: %__MODULE__{} @primary_key {:id, :binary_id, autogenerate: true} @foreign_key_type :binary_id schema "tenants" do field(:name, :string) field(:external_id, :string) field(:jwt_secret, :string) field(:jwt_jwks, :map) field(:postgres_cdc_default, :string) field(:max_concurrent_users, :integer) field(:max_events_per_second, :integer) field(:max_presence_events_per_second, :integer, default: 1000) field(:max_payload_size_in_kb, :integer, default: 3000) field(:max_bytes_per_second, :integer) field(:max_channels_per_client, :integer) field(:max_joins_per_second, :integer) field(:suspend, :boolean, default: false) field(:events_per_second_rolling, :float, virtual: true) field(:events_per_second_now, :integer, virtual: true) field(:private_only, :boolean, default: false) field(:migrations_ran, :integer, default: 0) field(:broadcast_adapter, Ecto.Enum, values: [:phoenix, :gen_rpc], default: :gen_rpc) field(:max_client_presence_events_per_window, :integer) field(:client_presence_window_ms, :integer) field(:presence_enabled, :boolean, default: false) has_many(:extensions, Realtime.Api.Extensions, foreign_key: :tenant_external_id, references: :external_id, on_delete: :delete_all, on_replace: :delete ) timestamps() end @doc false def changeset(tenant, attrs) do # TODO: remove after infra update extension_key = if attrs[:extensions], do: :extensions, else: "extensions" attrs = if attrs[extension_key] do ext = Enum.map(attrs[extension_key], fn %{"type" => "postgres"} = e -> %{e | "type" => "postgres_cdc_rls"} e -> e end) %{attrs | extension_key => ext} else attrs end tenant |> cast(attrs, [ :name, :external_id, :jwt_secret, :jwt_jwks, :max_concurrent_users, :max_events_per_second, :postgres_cdc_default, :max_bytes_per_second, :max_channels_per_client, :max_joins_per_second, :max_presence_events_per_second, :max_payload_size_in_kb, :suspend, :private_only, :migrations_ran, :broadcast_adapter, :max_client_presence_events_per_window, :client_presence_window_ms, :presence_enabled ]) |> validate_required([:external_id]) |> check_constraint(:jwt_secret, name: :jwt_secret_or_jwt_jwks_required, message: "either jwt_secret or jwt_jwks must be provided" ) |> unique_constraint([:external_id]) |> encrypt_jwt_secret() |> maybe_set_default(:max_bytes_per_second, :tenant_max_bytes_per_second) |> maybe_set_default(:max_channels_per_client, :tenant_max_channels_per_client) |> maybe_set_default(:max_concurrent_users, :tenant_max_concurrent_users) |> maybe_set_default(:max_events_per_second, :tenant_max_events_per_second) |> maybe_set_default(:max_joins_per_second, :tenant_max_joins_per_second) |> cast_assoc(:extensions, with: &Extensions.changeset/2) end def maybe_set_default(changeset, property, config_key) do has_key? = Map.get(changeset.data, property) || Map.get(changeset.changes, property) if has_key? do changeset else put_change(changeset, property, Application.fetch_env!(:realtime, config_key)) end end def encrypt_jwt_secret(%Ecto.Changeset{valid?: true} = changeset), do: update_change(changeset, :jwt_secret, &Crypto.encrypt!/1) def encrypt_jwt_secret(changeset), do: changeset end ================================================ FILE: lib/realtime/api.ex ================================================ defmodule Realtime.Api do @moduledoc """ The Api context. """ require Logger import Ecto.Query alias Ecto.Changeset alias Extensions.PostgresCdcRls alias Realtime.Api.Extensions alias Realtime.Api.Tenant alias Realtime.GenCounter alias Realtime.GenRpc alias Realtime.Nodes alias Realtime.RateCounter alias Realtime.Repo alias Realtime.Repo.Replica alias Realtime.Tenants alias Realtime.Tenants.Cache alias Realtime.Tenants.Connect alias RealtimeWeb.SocketDisconnect defguard requires_disconnect(changeset) when changeset.valid? == true and (is_map_key(changeset.changes, :jwt_secret) or is_map_key(changeset.changes, :jwt_jwks) or is_map_key(changeset.changes, :private_only) or is_map_key(changeset.changes, :suspend)) defguard requires_restarting_db_connection(changeset) when changeset.valid? == true and (is_map_key(changeset.changes, :extensions) or is_map_key(changeset.changes, :jwt_secret) or is_map_key(changeset.changes, :jwt_jwks) or is_map_key(changeset.changes, :suspend)) @doc """ Returns the list of tenants. ## Examples iex> list_tenants() [%Tenant{}, ...] """ def list_tenants do repo_replica = Replica.replica() Tenant |> repo_replica.all() |> repo_replica.preload(:extensions) end @doc """ Returns list of tenants with filter options: * order_by * search external id * limit * ordering (desc / asc) """ def list_tenants(opts) when is_list(opts) do repo_replica = Replica.replica() field = Keyword.get(opts, :order_by, "inserted_at") |> String.to_atom() external_id = Keyword.get(opts, :search) limit = Keyword.get(opts, :limit, 50) order = Keyword.get(opts, :order, "desc") |> String.to_atom() query = Tenant |> order_by({^order, ^field}) |> limit(^limit) ilike = "#{external_id}%" query = if external_id, do: query |> where([t], ilike(t.external_id, ^ilike)), else: query query |> repo_replica.all() |> repo_replica.preload(:extensions) end @doc """ Gets a single tenant. Raises `Ecto.NoResultsError` if the Tenant does not exist. ## Examples iex> _by_host!(123) do end %Tenant{} iex> get_tenant!(456) ** (Ecto.NoResultsError) """ def get_tenant!(id), do: Replica.replica().get!(Tenant, id) @doc """ Creates a tenant. ## Examples iex> create_tenant(%{field: value}) {:ok, %Tenant{}} iex> create_tenant(%{field: bad_value}) {:error, %Ecto.Changeset{}} """ def create_tenant(attrs) do Logger.debug("create_tenant #{inspect(attrs, pretty: true)}") tenant_id = Map.get(attrs, :external_id) || Map.get(attrs, "external_id") if master_region?() do %Tenant{} |> Tenant.changeset(attrs) |> Repo.insert() |> case do {:ok, tenant} -> Cache.global_cache_update(tenant) {:ok, tenant} error -> error end else call(:create_tenant, [attrs], tenant_id) end end @doc """ Updates a tenant. """ @spec update_tenant_by_external_id(binary(), map()) :: {:ok, Tenant.t()} | {:error, term()} def update_tenant_by_external_id(tenant_id, attrs) when is_binary(tenant_id) do if master_region?() do tenant_id |> get_tenant_by_external_id(use_replica?: false) |> update_tenant(attrs) else call(:update_tenant_by_external_id, [tenant_id, attrs], tenant_id) end end defp update_tenant(%Tenant{} = tenant, attrs) do changeset = Tenant.changeset(tenant, attrs) updated = Repo.update(changeset) case updated do {:ok, tenant} -> maybe_update_cache(tenant, changeset) maybe_trigger_disconnect(changeset) maybe_restart_db_connection(changeset) maybe_restart_rate_counters(changeset) Logger.debug("Tenant updated: #{inspect(tenant, pretty: true)}") {:error, error} -> Logger.error("Failed to update tenant: #{inspect(error, pretty: true)}") end updated end @spec delete_tenant_by_external_id(String.t()) :: boolean() def delete_tenant_by_external_id(id) do if master_region?() do query = from(t in Tenant, where: t.external_id == ^id) {num, _} = Repo.delete_all(query) num > 0 else call(:delete_tenant_by_external_id, [id], id) end end @spec get_tenant_by_external_id(String.t(), Keyword.t()) :: Tenant.t() | nil def get_tenant_by_external_id(external_id, opts \\ []) do use_replica? = Keyword.get(opts, :use_replica?, true) cond do use_replica? -> Replica.replica().get_by(Tenant, external_id: external_id) |> Replica.replica().preload(:extensions) !use_replica? and master_region?() -> Repo.get_by(Tenant, external_id: external_id) |> Repo.preload(:extensions) true -> call(:get_tenant_by_external_id, [external_id, opts], external_id) end end defp list_extensions(type) do query = from(e in Extensions, where: e.type == ^type, select: e) replica = Replica.replica() replica.all(query) end def rename_settings_field(from, to) do if master_region?() do for extension <- list_extensions("postgres_cdc_rls") do {value, settings} = Map.pop(extension.settings, from) new_settings = Map.put(settings, to, value) extension |> Changeset.cast(%{settings: new_settings}, [:settings]) |> Repo.update() end else call(:rename_settings_field, [from, to], from) end end @spec preload_counters(nil | Realtime.Api.Tenant.t(), any()) :: nil | Realtime.Api.Tenant.t() @doc """ Updates the migrations_ran field for a tenant. """ @spec update_migrations_ran(binary(), integer()) :: {:ok, Tenant.t()} | {:error, term()} def update_migrations_ran(external_id, count) do if master_region?() do tenant = get_tenant_by_external_id(external_id, use_replica?: false) tenant |> Tenant.changeset(%{migrations_ran: count}) |> Repo.update() |> tap(fn result -> case result do {:ok, tenant} -> Cache.global_cache_update(tenant) _ -> :ok end end) else call(:update_migrations_ran, [external_id, count], external_id) end end def preload_counters(nil), do: nil def preload_counters(%Tenant{} = tenant) do rate = Tenants.requests_per_second_rate(tenant) preload_counters(tenant, rate) end def preload_counters(nil, _rate), do: nil def preload_counters(%Tenant{} = tenant, counters_rate) do current = GenCounter.get(counters_rate.id) {:ok, %RateCounter{avg: avg}} = RateCounter.get(counters_rate) tenant |> Map.put(:events_per_second_rolling, avg) |> Map.put(:events_per_second_now, current) end @field_to_rate_counter_key %{ max_events_per_second: [ &Tenants.events_per_second_key/1, &Tenants.db_events_per_second_key/1 ], max_joins_per_second: [ &Tenants.joins_per_second_key/1 ], max_presence_events_per_second: [ &Tenants.presence_events_per_second_key/1 ], extensions: [ &Tenants.connect_errors_per_second_key/1, &Tenants.subscription_errors_per_second_key/1, &Tenants.authorization_errors_per_second_key/1 ] } defp maybe_restart_rate_counters(changeset) do tenant_id = Changeset.fetch_field!(changeset, :external_id) Enum.each(@field_to_rate_counter_key, fn {field, key_fns} -> if Changeset.changed?(changeset, field) do Enum.each(key_fns, fn key_fn -> tenant_id |> key_fn.() |> RateCounter.publish_update() end) end end) end defp maybe_update_cache(tenant, %Changeset{changes: changes, valid?: true}) when changes != %{} do Tenants.Cache.global_cache_update(tenant) end defp maybe_update_cache(_tenant, _changeset), do: :ok defp maybe_trigger_disconnect(%Changeset{data: %{external_id: external_id}} = changeset) when requires_disconnect(changeset) do SocketDisconnect.distributed_disconnect(external_id) end defp maybe_trigger_disconnect(_changeset), do: nil defp maybe_restart_db_connection(%Changeset{data: %{external_id: external_id}} = changeset) when requires_restarting_db_connection(changeset) do Connect.shutdown(external_id) try do PostgresCdcRls.handle_stop(external_id, 5_000) catch kind, reason -> Logger.warning("Failed to stop CDC processes for tenant #{external_id}: #{inspect(kind)} #{inspect(reason)}") :ok end end defp maybe_restart_db_connection(_changeset), do: nil defp master_region? do region = Application.get_env(:realtime, :region) master_region = Application.get_env(:realtime, :master_region) || region region == master_region end defp call(operation, args, tenant_id) do master_region = Application.get_env(:realtime, :master_region) with {:ok, master_node} <- Nodes.node_from_region(master_region, self()), {:ok, result} <- wrapped_call(master_node, operation, args, tenant_id) do result end end defp wrapped_call(master_node, operation, args, tenant_id) do case GenRpc.call(master_node, __MODULE__, operation, args, tenant_id: tenant_id) do {:error, :rpc_error, reason} -> {:error, reason} {:error, reason} -> {:error, reason} result -> {:ok, result} end end end ================================================ FILE: lib/realtime/application.ex ================================================ defmodule Realtime.Application do # See https://hexdocs.pm/elixir/Application.html # for more information on OTP Applications @moduledoc false use Application require Logger alias Realtime.Repo.Replica alias Realtime.Tenants.ReplicationConnection alias Realtime.Tenants.Connect alias Realtime.Tenants.Migrations defmodule JwtSecretError, do: defexception([:message]) defmodule JwtClaimValidatorsError, do: defexception([:message]) defmodule RegionMappingError, do: defexception([:message]) def start(_type, _args) do opentelemetry_setup() Realtime.LogFilter.setup() primary_config = :logger.get_primary_config() # add the region to logs :ok = :logger.set_primary_config( :metadata, Enum.into([region: System.get_env("REGION"), cluster: System.get_env("CLUSTER")], primary_config.metadata) ) topologies = Application.get_env(:libcluster, :topologies) || [] case Application.fetch_env!(:realtime, :jwt_claim_validators) |> Jason.decode() do {:ok, claims} when is_map(claims) -> Application.put_env(:realtime, :jwt_claim_validators, claims) _ -> raise JwtClaimValidatorsError, message: "JWT claim validators is not a valid JSON object" end setup_region_mapping() :ok = :gen_event.swap_sup_handler( :erl_signal_server, {:erl_signal_handler, []}, {Realtime.SignalHandler, %{handler_mod: :erl_signal_handler}} ) :ets.new(Realtime.Tenants.Connect, [:named_table, :set, :public]) :syn.set_event_handler(Realtime.SynHandler) :ok = :syn.add_node_to_scopes([RegionNodes, Realtime.Tenants.Connect]) region = Application.get_env(:realtime, :region) broadcast_pool_size = Application.get_env(:realtime, :broadcast_pool_size, 10) presence_pool_size = Application.get_env(:realtime, :presence_pool_size, 10) presence_broadcast_period = Application.get_env(:realtime, :presence_broadcast_period, 1_500) presence_permdown_period = Application.get_env(:realtime, :presence_permdown_period, 1_200_000) migration_partition_slots = Application.get_env(:realtime, :migration_partition_slots) connect_partition_slots = Application.get_env(:realtime, :connect_partition_slots) no_channel_timeout_in_ms = Application.get_env(:realtime, :no_channel_timeout_in_ms) master_region = Application.get_env(:realtime, :master_region) || region user_scope_shards = Application.fetch_env!(:realtime, :users_scope_shards) user_scope_broadast_interval_in_ms = Application.get_env(:realtime, :users_scope_broadcast_interval_in_ms, 10_000) :syn.join(RegionNodes, region, self(), node: node()) zta_children = case Application.get_env(:realtime, :dashboard_auth) do :zta -> [{NimbleZTA.Cloudflare, name: Realtime.ZTA, identity_key: System.fetch_env!("CF_TEAM_DOMAIN")}] _ -> [] end children = [ Realtime.ErlSysMon, Realtime.GenCounter, Realtime.PromEx, Realtime.TenantPromEx, {Realtime.Telemetry.Logger, handler_id: "telemetry-logger"}, RealtimeWeb.Telemetry, {Cluster.Supervisor, [topologies, [name: Realtime.ClusterSupervisor]]}, {Phoenix.PubSub, name: Realtime.PubSub, pool_size: 10, adapter: pubsub_adapter(), broadcast_pool_size: broadcast_pool_size}, {Beacon, [ :users, [ partitions: user_scope_shards, broadcast_interval_in_ms: user_scope_broadast_interval_in_ms, message_module: Realtime.BeaconPubSubAdapter ] ]}, {Cachex, name: Realtime.RateCounter}, Realtime.Tenants.Cache, Realtime.RateCounter.DynamicSupervisor, Realtime.Latency, {Registry, keys: :duplicate, name: Realtime.Registry}, {Registry, keys: :unique, name: Realtime.Registry.Unique}, {Registry, keys: :unique, name: Realtime.Tenants.Connect.Registry}, {Registry, keys: :unique, name: Extensions.PostgresCdcRls.ReplicationPoller.Registry}, {Registry, keys: :duplicate, partitions: System.schedulers_online() * 2, name: RealtimeWeb.SocketDisconnect.Registry}, {Task.Supervisor, name: Realtime.TaskSupervisor}, {Task.Supervisor, name: Realtime.Tenants.Migrations.TaskSupervisor}, {PartitionSupervisor, child_spec: {DynamicSupervisor, max_restarts: 0}, strategy: :one_for_one, name: Migrations.DynamicSupervisor, partitions: migration_partition_slots}, {PartitionSupervisor, child_spec: DynamicSupervisor, strategy: :one_for_one, name: ReplicationConnection.DynamicSupervisor, partitions: connect_partition_slots}, {PartitionSupervisor, child_spec: DynamicSupervisor, strategy: :one_for_one, name: Connect.DynamicSupervisor, partitions: connect_partition_slots}, {RealtimeWeb.RealtimeChannel.Tracker, check_interval_in_ms: no_channel_timeout_in_ms}, RealtimeWeb.Endpoint, {RealtimeWeb.Presence, pool_size: presence_pool_size, broadcast_period: presence_broadcast_period, permdown_period: presence_permdown_period} ] ++ extensions_supervisors() ++ janitor_tasks() ++ metrics_pusher_children() ++ zta_children database_connections = if master_region == region, do: [Realtime.Repo], else: [Replica.replica()] children = database_connections ++ children # See https://hexdocs.pm/elixir/Supervisor.html # for other strategies and supported options opts = [strategy: :one_for_one, name: Realtime.Supervisor] Supervisor.start_link(children, opts) end defp extensions_supervisors do Enum.reduce(Application.get_env(:realtime, :extensions), [], fn {_, %{supervisor: name}}, acc -> opts = %{ id: name, start: {name, :start_link, []}, restart: :transient } [opts | acc] _, acc -> acc end) end defp janitor_tasks do if Application.get_env(:realtime, :run_janitor) do janitor_max_children = Application.get_env(:realtime, :janitor_max_children) janitor_children_timeout = Application.get_env(:realtime, :janitor_children_timeout) [ { Task.Supervisor, name: Realtime.Tenants.Janitor.TaskSupervisor, max_children: janitor_max_children, max_seconds: janitor_children_timeout, max_restarts: 1 }, Realtime.Tenants.Janitor, Realtime.MetricsCleaner ] else [] end end defp metrics_pusher_children do if Application.get_env(:realtime, :metrics_pusher_enabled) do [Realtime.MetricsPusher] else [] end end defp opentelemetry_setup do :opentelemetry_cowboy.setup() OpentelemetryPhoenix.setup(adapter: :cowboy2) OpentelemetryEcto.setup([:realtime, :repo], db_statement: :enabled) end defp pubsub_adapter do if Application.fetch_env!(:realtime, :pubsub_adapter) == :gen_rpc do Realtime.GenRpcPubSub else Phoenix.PubSub.PG2 end end defp setup_region_mapping do case Application.get_env(:realtime, :region_mapping) do nil -> :ok mapping_json when is_binary(mapping_json) -> case Jason.decode(mapping_json) do {:ok, mapping} when is_map(mapping) -> if Enum.all?(mapping, fn {k, v} -> is_binary(k) and is_binary(v) end) do Application.put_env(:realtime, :region_mapping, mapping) else raise RegionMappingError, message: "REGION_MAPPING must contain only string keys and values" end {:ok, _} -> raise RegionMappingError, message: "REGION_MAPPING must be a JSON object" {:error, %Jason.DecodeError{} = error} -> raise RegionMappingError, message: "Failed to parse REGION_MAPPING: #{Exception.message(error)}" end end end end ================================================ FILE: lib/realtime/beacon_pub_sub_adapter.ex ================================================ defmodule Realtime.BeaconPubSubAdapter do @moduledoc "Beacon adapter to use PubSub" import Kernel, except: [send: 2] @behaviour Beacon.Adapter @impl true def register(scope) do :ok = Phoenix.PubSub.subscribe(Realtime.PubSub, topic(scope)) end @impl true def broadcast(scope, message) do Phoenix.PubSub.broadcast_from(Realtime.PubSub, self(), topic(scope), message) end @impl true def broadcast(scope, _nodes, message) do # Notice here that we don't filter by nodes, as PubSub broadcasts to all subscribers # We are broadcasting to everyone because we want to use the fact that Realtime.PubSub uses # regional broadcasting which is more efficient in this multi-region setup broadcast(scope, message) end @impl true def send(scope, node, message) do Phoenix.PubSub.direct_broadcast(node, Realtime.PubSub, topic(scope), message) end defp topic(scope), do: "beacon:#{scope}" end ================================================ FILE: lib/realtime/crypto.ex ================================================ defmodule Realtime.Crypto do @moduledoc """ Encrypt and decrypt operations required by Realtime. It uses the secret set on Application.get_env(:realtime, :db_enc_key) """ @doc """ Encrypts the given text """ @spec encrypt!(binary()) :: binary() def encrypt!(text) do secret_key = Application.get_env(:realtime, :db_enc_key) :aes_128_ecb |> :crypto.crypto_one_time(secret_key, pad(text), true) |> Base.encode64() end @doc """ Decrypts the given base64 encoded text """ @spec decrypt!(binary()) :: binary() def decrypt!(base64_text) do secret_key = Application.get_env(:realtime, :db_enc_key) crypto_text = Base.decode64!(base64_text) :aes_128_ecb |> :crypto.crypto_one_time(secret_key, crypto_text, false) |> unpad() end defp pad(data) do to_add = 16 - rem(byte_size(data), 16) data <> :binary.copy(<>, to_add) end defp unpad(data) do to_remove = :binary.last(data) :binary.part(data, 0, byte_size(data) - to_remove) end end ================================================ FILE: lib/realtime/database.ex ================================================ defmodule Realtime.Database do @moduledoc """ Handles tenant database operations """ use Realtime.Logs alias Realtime.Api.Tenant alias Realtime.Crypto alias Realtime.PostgresCdc alias Realtime.Rpc alias Realtime.Telemetry defstruct [ :hostname, :port, :database, :username, :password, :pool_size, :queue_target, :application_name, :max_restarts, :socket_options, ssl: true, backoff_type: :rand_exp ] @type t :: %__MODULE__{ hostname: binary(), database: binary(), username: binary(), password: binary(), port: non_neg_integer(), pool_size: non_neg_integer(), queue_target: non_neg_integer(), application_name: binary(), max_restarts: non_neg_integer() | nil, ssl: boolean(), socket_options: list(), backoff_type: :stop | :exp | :rand | :rand_exp } @cdc "postgres_cdc_rls" @doc """ Creates a database connection struct from the given tenant. """ @spec from_tenant(Tenant.t(), binary(), :stop | :exp | :rand | :rand_exp) :: t() def from_tenant(%Tenant{} = tenant, application_name, backoff \\ :rand_exp) do tenant |> then(&Realtime.PostgresCdc.filter_settings(@cdc, &1.extensions)) |> then(&from_settings(&1, application_name, backoff)) end @doc """ Creates a database connection struct from the given settings. """ @spec from_settings(map(), binary(), :stop | :exp | :rand | :rand_exp) :: t() def from_settings(settings, application_name, backoff \\ :rand_exp) do pool = pool_size_by_application_name(application_name, settings) settings = settings |> Map.take(["db_host", "db_port", "db_name", "db_user", "db_password"]) |> Enum.map(fn {k, v} -> {k, Crypto.decrypt!(v)} end) |> Map.new() |> then(&Map.merge(settings, &1)) {:ok, addrtype} = detect_ip_version(settings["db_host"]) ssl = if default_ssl_param(settings), do: [verify: :verify_none], else: false %__MODULE__{ hostname: settings["db_host"], port: String.to_integer(settings["db_port"]), database: settings["db_name"], username: settings["db_user"], password: settings["db_password"], pool_size: pool, queue_target: settings["db_queue_target"] || 5_000, application_name: application_name, backoff_type: backoff, socket_options: [addrtype], ssl: ssl } end @available_connection_factor 0.95 @doc """ Checks if the Tenant CDC extension information is properly configured and that we're able to query against the tenant database. """ @spec check_tenant_connection(Tenant.t() | nil) :: {:error, atom()} | {:ok, pid(), non_neg_integer()} def check_tenant_connection(nil), do: {:error, :tenant_not_found} def check_tenant_connection(tenant) do tenant |> then(&PostgresCdc.filter_settings(@cdc, &1.extensions)) |> then(fn settings -> required_pool = tenant_pool_requirements(settings) check_settings = from_settings(settings, "realtime_connect", :stop) check_settings = Map.put(check_settings, :max_restarts, 0) with {:ok, conn} <- connect_db(check_settings), {:ok, [available_connections, migrations_ran]} <- query_connection_info(conn) do requirement = ceil(required_pool * @available_connection_factor) if requirement < available_connections do {:ok, conn, migrations_ran} else msg = "Only #{available_connections} available connections. At least #{requirement} connections are required." log_error("DatabaseLackOfConnections", msg) GenServer.stop(conn) {:error, :tenant_db_too_many_connections} end else {:error, e} -> log_error("UnableToConnectToTenantDatabase", e) {:error, e} end end) end @migrations_table_exists_query """ SELECT to_regclass('realtime.schema_migrations') IS NOT NULL """ @migrations_count_query """ SELECT count(*)::int FROM realtime.schema_migrations """ @connections_query """ SELECT (current_setting('max_connections')::int - count(*))::int FROM pg_stat_activity WHERE application_name != 'realtime_connect' """ defp query_connection_info(conn) do Postgrex.transaction(conn, fn conn -> %{rows: [[available_connections]]} = Postgrex.query!(conn, @connections_query, []) %{rows: [[table_exists]]} = Postgrex.query!(conn, @migrations_table_exists_query, []) %{rows: [[migrations_ran]]} = if table_exists, do: Postgrex.query!(conn, @migrations_count_query, []), else: %{rows: [[0]]} [available_connections, migrations_ran] end) rescue e -> GenServer.stop(conn) {:error, e} end @doc """ Connects to the database using the given settings. """ @spec connect(Tenant.t(), binary(), :stop | :exp | :rand | :rand_exp) :: {:ok, pid()} | {:error, any()} def connect(tenant, application_name, backoff \\ :stop) do tenant |> from_tenant(application_name, backoff) |> connect_db() end @doc """ If the param `ssl_enforced` is not set, it defaults to true. """ @spec default_ssl_param(map) :: boolean def default_ssl_param(%{"ssl_enforced" => ssl_enforced}) when is_boolean(ssl_enforced), do: ssl_enforced def default_ssl_param(_), do: true @doc """ Runs database transaction in local node or against a target node withing a Postgrex transaction """ @spec transaction(pid | DBConnection.t(), fun(), keyword(), keyword()) :: {:ok, any()} | {:error, any()} def transaction(db_conn, func, opts \\ [], metadata \\ []) def transaction(%DBConnection{} = db_conn, func, opts, metadata), do: transaction_catched(db_conn, func, opts, metadata) def transaction(db_conn, func, opts, metadata) when node() == node(db_conn), do: transaction_catched(db_conn, func, opts, metadata) def transaction(db_conn, func, opts, metadata) do metadata = Keyword.put(metadata, :target, node(db_conn)) args = [db_conn, func, opts, metadata] case Rpc.enhanced_call(node(db_conn), __MODULE__, :transaction, args, metadata) do {:ok, value} -> {:ok, value} {:error, :rpc_error, error} -> {:error, error} {:error, error} -> {:error, error} end end defp transaction_catched(db_conn, func, opts, metadata) do telemetry = Keyword.get(opts, :telemetry, nil) if telemetry do tenant_id = Keyword.get(opts, :tenant_id, nil) {latency, value} = :timer.tc(Postgrex, :transaction, [db_conn, func, opts], :millisecond) Telemetry.execute(telemetry, %{latency: latency}, %{tenant: tenant_id}) value else Postgrex.transaction(db_conn, func, opts) end rescue e -> log_error("ErrorExecutingTransaction", e, metadata) {:error, e} catch :exit, reason -> log_error("ErrorExecutingTransaction", reason, metadata) {:error, {:exit, reason}} end @spec connect_db(__MODULE__.t()) :: {:ok, pid()} | {:error, any()} def connect_db(%__MODULE__{} = settings) do %__MODULE__{ hostname: hostname, port: port, database: database, username: username, password: password, pool_size: pool_size, queue_target: queue_target, application_name: application_name, backoff_type: backoff_type, max_restarts: max_restarts, socket_options: socket_options, ssl: ssl } = settings metadata = Logger.metadata() [ hostname: hostname, port: port, database: database, username: username, password: password, pool_size: pool_size, queue_target: queue_target, parameters: [application_name: application_name], socket_options: socket_options, backoff_type: backoff_type, ssl: ssl, configure: fn args -> metadata |> Keyword.put(:application_name, application_name) |> Logger.metadata() args end ] |> then(fn opts -> if max_restarts, do: Keyword.put(opts, :max_restarts, max_restarts), else: opts end) |> Postgrex.start_link() end @doc """ Returns the pool size for a given application name. Override pool size if provided. `realtime_rls` and `realtime_broadcast_changes` will be handled as a special scenario as it will need to be hardcoded as 1 otherwise replication slots will be tried to be reused leading to errors `realtime_migrations` will be handled as a special scenario as it requires 2 connections. """ @spec pool_size_by_application_name(binary(), map() | nil) :: non_neg_integer() def pool_size_by_application_name(application_name, settings) do case application_name do "realtime_subscription_manager" -> 1 "realtime_subscription_manager_pub" -> settings["subs_pool_size"] || 1 "realtime_subscription_checker" -> 1 "realtime_connect" -> settings["db_pool"] || 1 "realtime_health_check" -> 1 "realtime_janitor" -> 1 "realtime_migrations" -> 2 "realtime_broadcast_changes" -> 1 "realtime_rls" -> 1 "realtime_replication_slot_teardown" -> 1 _ -> 1 end end @doc """ Gets the external id from a host connection string found in the conn. """ @spec get_external_id(String.t()) :: {:ok, String.t()} | {:error, atom()} def get_external_id(host) when is_binary(host) do case String.split(host, ".", parts: 2) do [id] -> {:ok, id} [id, _] -> {:ok, id} end end @doc """ Detects the IP version for a given host. """ @spec detect_ip_version(String.t()) :: {:ok, :inet | :inet6} | {:error, :nxdomain} def detect_ip_version(host) when is_binary(host) do host = String.to_charlist(host) if match?({:ok, _}, :inet6_tcp.getaddr(host)) do {:ok, :inet6} else case :inet.gethostbyname(host) do {:ok, hostent} -> [addr | _] = elem(hostent, 5) resolved = addr |> :inet.ntoa() |> to_string() log_warning("IpV4Detected", "IPv4 detected for host #{inspect(host)} resolved to #{resolved}") {:ok, :inet} _ -> {:error, :nxdomain} end end end @doc """ Terminates all replication slots with the name containing 'realtime' in the tenant database. """ @spec replication_slot_teardown(Tenant.t()) :: :ok def replication_slot_teardown(tenant) do {:ok, conn} = connect(tenant, "realtime_replication_slot_teardown") query = "select slot_name from pg_replication_slots where slot_name like '%realtime%'" with {:ok, %{rows: [rows]}} <- Postgrex.query(conn, query, []) do rows |> Enum.reject(&is_nil/1) |> Enum.each(&replication_slot_teardown(conn, &1)) end GenServer.stop(conn) :ok end @doc """ Terminates replication slot with a given name in the tenant database. """ @spec replication_slot_teardown(pid() | Tenant.t(), String.t()) :: :ok def replication_slot_teardown(%Tenant{} = tenant, slot_name) do {:ok, conn} = connect(tenant, "realtime_replication_slot_teardown") replication_slot_teardown(conn, slot_name) :ok end def replication_slot_teardown(conn, slot_name) do Postgrex.query( conn, "select active_pid, pg_terminate_backend(active_pid), pg_drop_replication_slot(slot_name) from pg_replication_slots where slot_name = $1", [slot_name] ) Postgrex.query(conn, "select pg_drop_replication_slot($1)", [slot_name]) :ok end @doc """ Transforms database settings into keyword list to be used by Postgrex. ## Examples iex> Database.opts(%Database{hostname: "localhost", port: 5432, database: "realtime", username: "postgres", password: "postgres", application_name: "test", backoff_type: :stop, pool_size: 10, queue_target: 10_000, socket_options: [:inet], ssl: true}) |> Enum.sort() [ application_name: "test", backoff_type: :stop, database: "realtime", hostname: "localhost", max_restarts: nil, password: "postgres", pool_size: 10, port: 5432, queue_target: 10000, socket_options: [:inet], ssl: true, username: "postgres" ] """ @spec opts(__MODULE__.t()) :: keyword() def opts(%__MODULE__{} = settings) do settings |> Map.from_struct() |> Map.to_list() |> Keyword.new() end defp tenant_pool_requirements(settings) do application_names = [ "realtime_subscription_manager", "realtime_subscription_manager_pub", "realtime_subscription_checker", "realtime_health_check", "realtime_janitor", "realtime_migrations", "realtime_broadcast_changes", "realtime_rls", "realtime_replication_slot_teardown", "realtime_connect" ] Enum.reduce(application_names, 0, fn application_name, acc -> acc + pool_size_by_application_name(application_name, settings) end) end end ================================================ FILE: lib/realtime/gen_counter/gen_counter.ex ================================================ defmodule Realtime.GenCounter do @moduledoc """ Process holds an ETS table where each row is a key and a counter """ use GenServer @name __MODULE__ @table :gen_counter @spec start_link(any) :: GenServer.on_start() def start_link(_), do: GenServer.start_link(__MODULE__, :ok, name: @name) @spec add(term, integer) :: integer def add(term), do: add(term, 1) def add(term, count), do: :ets.update_counter(@table, term, count, {term, 0}) @spec get(term) :: integer def get(term) do case :ets.lookup(@table, term) do [{^term, value}] -> value [] -> 0 end end @doc "Reset counter to 0 and return previous value" @spec reset(term) :: integer def reset(term) do # We might lose some updates between lookup and the update case :ets.lookup(@table, term) do [{^term, 0}] -> 0 [{^term, previous}] -> :ets.update_element(@table, term, {2, 0}, {term, 0}) previous [] -> 0 end end @spec delete(term) :: :ok def delete(term) do :ets.delete(@table, term) :ok end @impl true def init(_) do table = :ets.new(@table, [ :set, :public, :named_table, {:decentralized_counters, true}, {:write_concurrency, :auto} ]) {:ok, table} end end ================================================ FILE: lib/realtime/gen_rpc/pub_sub.ex ================================================ defmodule Realtime.GenRpcPubSub do @moduledoc """ gen_rpc Phoenix.PubSub adapter """ @behaviour Phoenix.PubSub.Adapter alias Realtime.GenRpc alias Realtime.GenRpcPubSub.Worker alias Realtime.Nodes use Supervisor @impl true def node_name(_), do: node() # Supervisor callbacks def start_link(opts) do adapter_name = Keyword.fetch!(opts, :adapter_name) name = Keyword.fetch!(opts, :name) pool_size = Keyword.get(opts, :pool_size, 1) broadcast_pool_size = Keyword.get(opts, :broadcast_pool_size, pool_size) Supervisor.start_link(__MODULE__, {adapter_name, name, broadcast_pool_size}, name: :"#{name}#{adapter_name}_supervisor" ) end @impl true def init({adapter_name, pubsub, pool_size}) do workers = for number <- 1..pool_size, do: :"#{pubsub}#{adapter_name}_#{number}" :persistent_term.put(adapter_name, List.to_tuple(workers)) children = for worker <- workers do Supervisor.child_spec({Realtime.GenRpcPubSub.Worker, {pubsub, worker}}, id: worker) end Supervisor.init(children, strategy: :one_for_one) end defp worker_name(adapter_name, key) do workers = :persistent_term.get(adapter_name) elem(workers, :erlang.phash2(key, tuple_size(workers))) end @impl true def broadcast(adapter_name, topic, message, dispatcher) do worker = worker_name(adapter_name, self()) if Application.get_env(:realtime, :regional_broadcasting, false) do my_region = Application.get_env(:realtime, :region) # broadcast to all other nodes in the region other_nodes = for node <- Realtime.Nodes.region_nodes(my_region), node != node(), do: node GenRpc.abcast(other_nodes, worker, Worker.forward_to_local(topic, message, dispatcher), key: self()) # send a message to a node in each region to forward to the rest of the region other_region_nodes = nodes_from_other_regions(my_region, self()) GenRpc.abcast(other_region_nodes, worker, Worker.forward_to_region(topic, message, dispatcher), key: self()) else GenRpc.abcast(Node.list(), worker, Worker.forward_to_local(topic, message, dispatcher), key: self()) end :ok end defp nodes_from_other_regions(my_region, key) do Enum.flat_map(Nodes.all_node_regions(), fn ^my_region -> [] region -> case Nodes.node_from_region(region, key) do {:ok, node} -> [node] _ -> [] end end) end @impl true def direct_broadcast(adapter_name, node_name, topic, message, dispatcher) do worker = worker_name(adapter_name, self()) GenRpc.abcast([node_name], worker, Worker.forward_to_local(topic, message, dispatcher), key: self()) end end defmodule Realtime.GenRpcPubSub.Worker do @moduledoc false use GenServer def forward_to_local(topic, message, dispatcher), do: {:ftl, topic, message, dispatcher} def forward_to_region(topic, message, dispatcher), do: {:ftr, topic, message, dispatcher} @doc false def start_link({pubsub, worker}), do: GenServer.start_link(__MODULE__, {pubsub, worker}, name: worker) @impl true def init({pubsub, worker}) do Process.flag(:message_queue_data, :off_heap) Process.flag(:fullsweep_after, 20) {:ok, {pubsub, worker}} end @impl true # Forward to local def handle_info({:ftl, topic, message, dispatcher}, {pubsub, worker}) do Phoenix.PubSub.local_broadcast(pubsub, topic, message, dispatcher) {:noreply, {pubsub, worker}} end # Forward to the rest of the region def handle_info({:ftr, topic, message, dispatcher}, {pubsub, worker}) do # Forward to local first Phoenix.PubSub.local_broadcast(pubsub, topic, message, dispatcher) # Then broadcast to the rest of my region my_region = Application.get_env(:realtime, :region) other_nodes = for node <- Realtime.Nodes.region_nodes(my_region), node != node(), do: node if other_nodes != [] do Realtime.GenRpc.abcast(other_nodes, worker, forward_to_local(topic, message, dispatcher), []) end {:noreply, {pubsub, worker}} end @impl true def handle_info(_, pubsub), do: {:noreply, pubsub} end ================================================ FILE: lib/realtime/gen_rpc.ex ================================================ defmodule Realtime.GenRpc do @moduledoc """ RPC module for Realtime using :gen_rpc :max_gen_rpc_clients is the maximum number of clients (TCP connections) used by gen_rpc between two nodes """ use Realtime.Logs alias Realtime.Telemetry @type result :: any | {:error, :rpc_error, reason :: any} @doc """ Broadcasts the message `msg` asynchronously to the registered process `name` on the specified `nodes`. Options: - `:key` - Optional key to consistently select the same gen_rpc clients to guarantee message order between nodes """ @spec abcast([node], atom, any, keyword()) :: :ok def abcast(nodes, name, msg, opts) when is_list(nodes) and is_atom(name) and is_list(opts) do key = Keyword.get(opts, :key, nil) nodes = rpc_nodes(nodes, key) :gen_rpc.abcast(nodes, name, msg) :ok end @doc """ Fire and forget apply(mod, func, args) on one node Options: - `:key` - Optional key to consistently select the same gen_rpc client to guarantee some message order between nodes """ @spec cast(node, module, atom, list(any), keyword()) :: :ok def cast(node, mod, func, args, opts \\ []) # Local def cast(node, mod, func, args, _opts) when node == node() do :erpc.cast(node, mod, func, args) :ok end def cast(node, mod, func, args, opts) when is_atom(node) and is_atom(mod) and is_atom(func) and is_list(args) and is_list(opts) do key = Keyword.get(opts, :key, nil) # Ensure this node is part of the connected nodes if node in Node.list() do node_key = rpc_node(node, key) :gen_rpc.cast(node_key, mod, func, args) end :ok end @doc """ Fire and forget apply(mod, func, args) on all nodes Options: - `:key` - Optional key to consistently select the same gen_rpc clients to guarantee message order between nodes """ @spec multicast(module, atom, list(any), keyword()) :: :ok def multicast(mod, func, args, opts \\ []) when is_atom(mod) and is_atom(func) and is_list(args) and is_list(opts) do key = Keyword.get(opts, :key, nil) nodes = rpc_nodes(Node.list(), key) # Use erpc for the local node because :gen_rpc tries to connect with the local node :ok = :erpc.cast(Node.self(), mod, func, args) :gen_rpc.eval_everywhere(nodes, mod, func, args) :ok end @doc """ Calls node to apply(mod, func, args) Options: - `:key` - Optional key to consistently select the same gen_rpc clients to guarantee message order between nodes - `:tenant_id` - Tenant ID for logging, defaults to nil - `:timeout` - timeout in milliseconds for the RPC call, defaults to 5000ms """ @spec call(node, module, atom, list(any), keyword()) :: result def call(node, mod, func, args, opts) when is_atom(node) and is_atom(mod) and is_atom(func) and is_list(args) and is_list(opts) do if node == node() or node in Node.list() do do_call(node, mod, func, args, opts) else tenant_id = Keyword.get(opts, :tenant_id) log_error( "ErrorOnRpcCall", %{target: node, mod: mod, func: func, error: :badnode}, project: tenant_id, external_id: tenant_id ) {:error, :rpc_error, :badnode} end end defp do_call(node, mod, func, args, opts) do timeout = Keyword.get(opts, :timeout, default_rpc_timeout()) tenant_id = Keyword.get(opts, :tenant_id) key = Keyword.get(opts, :key, nil) node_key = rpc_node(node, key) {latency, response} = :timer.tc(fn -> :gen_rpc.call(node_key, mod, func, args, timeout) end) case response do {:badrpc, reason} -> reason = unwrap_reason(reason) log_error( "ErrorOnRpcCall", %{target: node, mod: mod, func: func, error: reason}, project: tenant_id, external_id: tenant_id ) telemetry_failure(node, latency) {:error, :rpc_error, reason} {:error, _} -> telemetry_failure(node, latency) response _ -> telemetry_success(node, latency) response end end # Not using :gen_rpc.multicall here because we can't see the actual results on errors @doc """ Evaluates apply(mod, func, args) on all nodes Options: - `:timeout` - timeout for the RPC call, defaults to 5000ms - `:tenant_id` - tenant ID for telemetry and logging, defaults to nil - `:key` - Optional key to consistently select the same gen_rpc clients to guarantee message order between nodes """ @spec multicall(module, atom, list(any), keyword()) :: [{node, result}] def multicall(mod, func, args, opts \\ []) when is_atom(mod) and is_atom(func) and is_list(args) and is_list(opts) do timeout = Keyword.get(opts, :timeout, default_rpc_timeout()) tenant_id = Keyword.get(opts, :tenant_id) key = Keyword.get(opts, :key, nil) nodes = rpc_nodes([node() | Node.list()], key) # Latency here is the amount of time that it takes for this node to gather the result. # If one node takes a while to reply the remaining calls will have at least the latency reported by this node # Example: # Node A, B and C receive the calls in this order # Node A takes 500ms to return on nb_yield # Node B and C will report at least 500ms to return regardless how long it took for them to actually reply back results = nodes |> Enum.map(&{&1, :erlang.monotonic_time(), async_call(&1, mod, func, args)}) |> Enum.map(fn {{node, _key}, start_time, ref} -> result = case nb_yield(node, ref, timeout) do :timeout -> {:error, :rpc_error, :timeout} {:value, {:badrpc, reason}} -> {:error, :rpc_error, unwrap_reason(reason)} {:value, result} -> result end end_time = :erlang.monotonic_time() latency = :erlang.convert_time_unit(end_time - start_time, :native, :microsecond) {node, latency, result} end) Enum.map(results, fn {node, latency, {:error, :rpc_error, reason} = result} -> log_error( "ErrorOnRpcCall", %{target: node, mod: mod, func: func, error: reason}, project: tenant_id, external_id: tenant_id ) telemetry_failure(node, latency) {node, result} {node, latency, {:ok, _} = result} -> telemetry_success(node, latency) {node, result} {node, latency, result} -> telemetry_failure(node, latency) {node, result} end) end defp telemetry_success(node, latency) do Telemetry.execute( [:realtime, :rpc], %{latency: latency}, %{origin_node: node(), target_node: node, success: true, mechanism: :gen_rpc} ) end defp telemetry_failure(node, latency) do Telemetry.execute( [:realtime, :rpc], %{latency: latency}, %{origin_node: node(), target_node: node, success: false, mechanism: :gen_rpc} ) end # Max amount of clients (TCP connections) used by gen_rpc defp max_clients(), do: Application.fetch_env!(:realtime, :max_gen_rpc_clients) defp rpc_nodes(nodes, key), do: Enum.map(nodes, &rpc_node(&1, key)) # Tag the node with a random number from 1 to max_clients # This ensures that we don't use the same client/tcp connection for this node defp rpc_node(node, nil), do: {node, :rand.uniform(max_clients())} # Tag the node with a random number from 1 to max_clients # Using phash2 to ensure the same key and the same client per node defp rpc_node(node, key), do: {node, :erlang.phash2(key, max_clients()) + 1} defp unwrap_reason({:unknown_error, {{:badrpc, reason}, _}}), do: reason defp unwrap_reason(reason), do: reason defp default_rpc_timeout, do: Application.get_env(:realtime, :rpc_timeout, 5_000) # Here we run the async_call on all nodes using gen_rpc except the local node # This is because gen_rpc does not have a bypass for local node on multicall # For the local node we use rpc instead defp async_call({node, _}, mod, func, args) when node == node(), do: :rpc.async_call(node, mod, func, args) defp async_call(node, mod, func, args), do: :gen_rpc.async_call(node, mod, func, args) defp nb_yield(node, ref, timeout) when node == node(), do: :rpc.nb_yield(ref, timeout) defp nb_yield(_node, ref, timeout), do: :gen_rpc.nb_yield(ref, timeout) end ================================================ FILE: lib/realtime/helpers.ex ================================================ defmodule Realtime.Helpers do @moduledoc """ This module includes helper functions for different contexts that can't be union in one module. """ require Logger @spec cancel_timer(reference() | nil) :: non_neg_integer() | false | :ok | nil def cancel_timer(nil), do: nil def cancel_timer(ref), do: Process.cancel_timer(ref) @doc """ Takes the first N items from the queue and returns the list of items and the new queue. ## Examples iex> q = :queue.new() iex> q = :queue.in(1, q) iex> q = :queue.in(2, q) iex> q = :queue.in(3, q) iex> Realtime.Helpers.queue_take(q, 2) {[2, 1], {[], [3]}} """ @spec queue_take(:queue.queue(), non_neg_integer()) :: {list(), :queue.queue()} def queue_take(q, count) do Enum.reduce_while(1..count, {[], q}, fn _, {items, queue} -> case :queue.out(queue) do {{:value, item}, new_q} -> {:cont, {[item | items], new_q}} {:empty, new_q} -> {:halt, {items, new_q}} end end) end end ================================================ FILE: lib/realtime/log_filter.ex ================================================ defmodule Realtime.LogFilter do @moduledoc """ Primary logger filter that suppresses noisy errors from dependencies. """ @filter_id :connection_noise @doc """ Installs the primary filter into the Erlang logger. Safe to call multiple times. """ def setup do case :logger.add_primary_filter(@filter_id, {&filter/2, []}) do :ok -> :ok {:error, {:already_exist, @filter_id}} -> :ok end end @doc """ Filter function passed to `:logger.add_primary_filter/2`. Returns `:stop` to suppress the event or the original event map to allow it through. """ def filter( %{msg: {:report, %{label: {:gen_statem, :terminate}, reason: {_, %DBConnection.ConnectionError{}, _}}}}, _ ), do: :stop def filter(%{meta: %{mfa: {DBConnection.Connection, _, _}}}, _), do: :stop @ranch_format "Ranch listener ~p had connection process started with ~p:start_link/3 at ~p exit with reason: ~0p~n" def filter(%{msg: {:format, @ranch_format, [_, _, _, :killed]}}, _), do: :stop def filter(event, _), do: event end ================================================ FILE: lib/realtime/logs.ex ================================================ defmodule Realtime.Logs do @moduledoc """ Logging operations for Realtime """ require Logger defmacro __using__(_opts) do quote do require Logger import Realtime.Logs end end @doc """ Prepares a value to be logged """ def to_log(value) when is_binary(value), do: value def to_log(value), do: inspect(value, pretty: true) defmacro log_error(code, error, metadata \\ []) do quote bind_quoted: [code: code, error: error, metadata: metadata], location: :keep do Logger.error("#{code}: #{Realtime.Logs.to_log(error)}", [error_code: code] ++ metadata) end end defmacro log_warning(code, warning, metadata \\ []) do quote bind_quoted: [code: code, warning: warning, metadata: metadata], location: :keep do Logger.warning("#{code}: #{Realtime.Logs.to_log(warning)}", [{:error_code, code} | metadata]) end end end defimpl Jason.Encoder, for: DBConnection.ConnectionError do def encode( %DBConnection.ConnectionError{message: message, reason: reason, severity: severity}, _opts ) do inspect(%{message: message, reason: reason, severity: severity}, pretty: true) end end defimpl Jason.Encoder, for: Postgrex.Error do def encode( %Postgrex.Error{ message: message, postgres: %{code: code, schema: schema, table: table} }, _opts ) do inspect(%{message: message, schema: schema, table: table, code: code}, pretty: true) end end defimpl Jason.Encoder, for: Tuple do require Logger def encode(tuple, _opts) do Logger.error("UnableToEncodeJson: Tuple encoding not supported: #{inspect(tuple)}") inspect(%{error: "unable to parse response"}, pretty: true) end end ================================================ FILE: lib/realtime/messages.ex ================================================ defmodule Realtime.Messages do @moduledoc """ Handles `realtime.messages` table operations """ alias Realtime.Api.Message import Ecto.Query, only: [from: 2] @hard_limit 25 @default_timeout 5_000 @doc """ Fetch last `limit ` messages for a given `topic` inserted after `since` Automatically uses RPC if the database connection is not in the same node Only allowed for private channels """ @spec replay(pid, String.t(), String.t(), non_neg_integer, non_neg_integer) :: {:ok, Message.t(), [String.t()]} | {:error, term} | {:error, :rpc_error, term} def replay(conn, tenant_id, topic, since, limit) when node(conn) == node() and is_integer(since) and is_integer(limit) do limit = max(min(limit, @hard_limit), 1) with {:ok, since} <- DateTime.from_unix(since, :millisecond), {:ok, messages} <- messages(conn, tenant_id, topic, since, limit) do {:ok, Enum.reverse(messages), MapSet.new(messages, & &1.id)} else {:error, :postgrex_exception} -> {:error, :failed_to_replay_messages} {:error, :invalid_unix_time} -> {:error, :invalid_replay_params} error -> error end end def replay(conn, tenant_id, topic, since, limit) when is_integer(since) and is_integer(limit) do Realtime.GenRpc.call(node(conn), __MODULE__, :replay, [conn, tenant_id, topic, since, limit], key: topic, tenant_id: tenant_id ) end def replay(_, _, _, _, _), do: {:error, :invalid_replay_params} defp messages(conn, tenant_id, topic, since, limit) do since = DateTime.to_naive(since) # We want to avoid searching partitions in the future as they should be empty # so we limit to 1 minute in the future to account for any potential drift now = NaiveDateTime.utc_now() |> NaiveDateTime.add(1, :minute) query = from m in Message, where: m.topic == ^topic and m.private == true and m.extension == :broadcast and m.inserted_at >= ^since and m.inserted_at < ^now, limit: ^limit, order_by: [desc: m.inserted_at] {latency, value} = :timer.tc(Realtime.Tenants.Repo, :all, [conn, query, Message, [timeout: @default_timeout]], :millisecond) :telemetry.execute([:realtime, :tenants, :replay], %{latency: latency}, %{tenant: tenant_id}) value end @doc """ Deletes messages older than 72 hours for a given tenant connection """ @spec delete_old_messages(pid()) :: :ok def delete_old_messages(conn) do limit = NaiveDateTime.utc_now() |> NaiveDateTime.add(-72, :hour) |> NaiveDateTime.to_date() %{rows: rows} = Postgrex.query!( conn, """ SELECT child.relname FROM pg_inherits JOIN pg_class parent ON pg_inherits.inhparent = parent.oid JOIN pg_class child ON pg_inherits.inhrelid = child.oid JOIN pg_namespace nmsp_parent ON nmsp_parent.oid = parent.relnamespace JOIN pg_namespace nmsp_child ON nmsp_child.oid = child.relnamespace WHERE parent.relname = 'messages' AND nmsp_child.nspname = 'realtime' """, [] ) rows |> Enum.filter(fn ["messages_" <> date] -> date |> String.replace("_", "-") |> Date.from_iso8601!() |> Date.compare(limit) == :lt end) |> Enum.each(&Postgrex.query!(conn, "DROP TABLE IF EXISTS realtime.#{&1}", [])) :ok end end ================================================ FILE: lib/realtime/metrics_cleaner.ex ================================================ defmodule Realtime.MetricsCleaner do @moduledoc false use GenServer require Logger defstruct [:check_ref, :interval] def handle_beacon_event([:beacon, :users, :group, :vacant], _, %{group: tenant_id}, vacant_websockets) do :ets.insert(vacant_websockets, {tenant_id, DateTime.to_unix(DateTime.utc_now(), :second)}) end def handle_beacon_event([:beacon, :users, :group, :occupied], _, %{group: tenant_id}, vacant_websockets) do :ets.delete(vacant_websockets, tenant_id) end def handle_syn_event([:syn, Realtime.Tenants.Connect, :unregistered], _, %{name: tenant_id}, disconnected_tenants) do :ets.insert(disconnected_tenants, {tenant_id, DateTime.to_unix(DateTime.utc_now(), :second)}) end def handle_syn_event([:syn, Realtime.Tenants.Connect, :registered], _, %{name: tenant_id}, disconnected_tenants) do :ets.delete(disconnected_tenants, tenant_id) end def start_link(opts), do: GenServer.start_link(__MODULE__, opts) # 10 minutes @default_vacant_metric_threshold_in_seconds 600 @impl true def init(opts) do interval = opts[:metrics_cleaner_schedule_timer_in_ms] || Application.fetch_env!(:realtime, :metrics_cleaner_schedule_timer_in_ms) vacant_metric_threshold_in_seconds = opts[:vacant_metric_threshold_in_seconds] || @default_vacant_metric_threshold_in_seconds Logger.info("Starting MetricsCleaner") vacant_websockets = :ets.new(:vacant_websockets, [:set, :public, read_concurrency: false, write_concurrency: :auto]) disconnected_tenants = :ets.new(:disconnected_tenants, [:set, :public, read_concurrency: false, write_concurrency: :auto]) :ok = :telemetry.attach_many( [self(), :vacant_websockets], [[:beacon, :users, :group, :occupied], [:beacon, :users, :group, :vacant]], &__MODULE__.handle_beacon_event/4, vacant_websockets ) :ok = :telemetry.attach_many( [self(), :disconnected_tenants], [[:syn, Realtime.Tenants.Connect, :registered], [:syn, Realtime.Tenants.Connect, :unregistered]], &__MODULE__.handle_syn_event/4, disconnected_tenants ) {:ok, %{ check_ref: check(interval), interval: interval, vacant_metric_threshold_in_seconds: vacant_metric_threshold_in_seconds, vacant_websockets: vacant_websockets, disconnected_tenants: disconnected_tenants }} end @impl true def terminate(_reason, _state) do :telemetry.detach([self(), :vacant_websockets]) :telemetry.detach([self(), :disconnected_tenants]) :ok end @impl true def handle_info(:check, %{interval: interval} = state) do Process.cancel_timer(state.check_ref) {exec_time, _} = :timer.tc( fn -> loop_and_cleanup_metrics_table(state.vacant_websockets, state.vacant_metric_threshold_in_seconds) loop_and_cleanup_metrics_table(state.disconnected_tenants, state.vacant_metric_threshold_in_seconds) end, :millisecond ) if exec_time > :timer.seconds(5), do: Logger.warning("Metrics check took: #{exec_time} ms") {:noreply, %{state | check_ref: check(interval)}} end def handle_info(msg, state) do Logger.error("Unexpected message: #{inspect(msg)}") {:noreply, state} end defp check(interval), do: Process.send_after(self(), :check, interval) defp loop_and_cleanup_metrics_table(cleaner_table, vacant_metric_cleanup_threshold_in_seconds) do threshold = DateTime.utc_now() |> DateTime.add(-vacant_metric_cleanup_threshold_in_seconds, :second) |> DateTime.to_unix(:second) # We do this to have a consistent view of the table while we read and delete :ets.safe_fixtable(cleaner_table, true) try do # Look for tenant_ids that have been vacant for more than threshold vacant_tenant_ids = :ets.select(cleaner_table, [ {{:"$1", :"$2"}, [{:<, :"$2", threshold}], [:"$1"]} ]) vacant_tenant_ids |> Enum.map(fn tenant_id -> %{tenant: tenant_id} end) |> then(&Peep.prune_tags(Realtime.TenantPromEx.Metrics, &1)) # Delete them from the table :ets.select_delete(cleaner_table, [ {{:"$1", :"$2"}, [{:<, :"$2", threshold}], [true]} ]) after :ets.safe_fixtable(cleaner_table, false) end end end ================================================ FILE: lib/realtime/metrics_pusher.ex ================================================ defmodule Realtime.MetricsPusher do @moduledoc """ GenServer that periodically pushes Prometheus metrics to an endpoint. Only starts if `url` is configured. Pushes metrics every 30 seconds (configurable) to the configured URL endpoint. """ use GenServer require Logger defstruct [:push_ref, :interval, :req_options, :auth] @spec start_link(keyword()) :: {:ok, pid()} | :ignore def start_link(opts) do url = opts[:url] || Application.get_env(:realtime, :metrics_pusher_url) if is_binary(url) do GenServer.start_link(__MODULE__, opts, name: __MODULE__) else Logger.warning("MetricsPusher not started: url must be configured") :ignore end end @impl true def init(opts) do url = opts[:url] || Application.get_env(:realtime, :metrics_pusher_url) user = opts[:user] || Application.get_env(:realtime, :metrics_pusher_user, "realtime") auth = opts[:auth] || Application.get_env(:realtime, :metrics_pusher_auth) interval = Keyword.get( opts, :interval, Application.get_env(:realtime, :metrics_pusher_interval_ms, :timer.seconds(30)) ) timeout = Keyword.get( opts, :timeout, Application.get_env(:realtime, :metrics_pusher_timeout_ms, :timer.seconds(15)) ) compress = Keyword.get( opts, :compress, Application.get_env(:realtime, :metrics_pusher_compress, true) ) Logger.info("Starting MetricsPusher (url: #{url}, interval: #{interval}ms, compress: #{compress})") headers = [{"content-type", "text/plain"}] basic_auth = if auth, do: [auth: {:basic, "#{user}:#{auth}"}], else: [] req_options = [ method: :post, url: url, headers: headers, compress_body: compress, receive_timeout: timeout ] |> Keyword.merge(basic_auth) |> Keyword.merge(Application.get_env(:realtime, :metrics_pusher_req_options, [])) state = %__MODULE__{ push_ref: schedule_push(interval), interval: interval, req_options: req_options } {:ok, state} end @impl true def handle_info(:push, state) do {exec_time, _} = :timer.tc(fn -> push(state.req_options) end, :millisecond) if exec_time > :timer.seconds(5) do Logger.warning("Metrics push took: #{exec_time} ms") end {:noreply, %{state | push_ref: schedule_push(state.interval)}} end @impl true def handle_info(msg, state) do Logger.error("MetricsPusher received unexpected message: #{inspect(msg)}") {:noreply, state} end defp schedule_push(delay), do: Process.send_after(self(), :push, delay) defp push(req_options) do try do metrics = Realtime.PromEx.get_metrics() case send_metrics(req_options, metrics) do :ok -> :ok {:error, reason} -> Logger.error("MetricsPusher: Failed to push metrics to #{req_options[:url]}: #{inspect(reason)}") :ok end rescue error -> Logger.error("MetricsPusher: Exception during push: #{inspect(error)}") :ok end end defp send_metrics(req_options, metrics) do [{:body, metrics} | req_options] |> Req.request() |> handle_response() end defp handle_response({:ok, %{status: status}}) when status in 200..299, do: :ok defp handle_response({:ok, %{status: status} = response}), do: {:error, {:http_error, status, response.body}} defp handle_response({:error, reason}), do: {:error, reason} end ================================================ FILE: lib/realtime/monitoring/distributed_metrics.ex ================================================ defmodule Realtime.DistributedMetrics do @moduledoc """ Gather stats for each connected node """ require Record Record.defrecordp(:net_address, Record.extract(:net_address, from_lib: "kernel/include/net_address.hrl")) @spec info() :: %{node => map} def info do # First check if Erlang distribution is started if :net_kernel.get_state()[:started] != :no do {:ok, nodes_info} = :net_kernel.nodes_info() # Ignore "hidden" nodes (remote shell) nodes_info = Enum.filter(nodes_info, fn {_k, v} -> v[:type] == :normal end) port_addresses = :erlang.ports() |> Stream.filter(fn port -> :erlang.port_info(port, :name) == {:name, ~c"tcp_inet"} end) |> Stream.map(&{:inet.peername(&1), &1}) |> Stream.filter(fn {{:ok, _peername}, _port} -> true _ -> false end) |> Enum.map(fn {{:ok, peername}, port} -> {peername, port} end) |> Enum.into(%{}) Map.new(nodes_info, &info(&1, port_addresses)) else %{} end end defp info({node, info}, port_addresses) do dist_pid = info[:owner] state = info[:state] case info[:address] do net_address(address: address) when address != :undefined -> {node, info(node, port_addresses, dist_pid, state, address)} _ -> {node, %{pid: dist_pid, state: state}} end end defp info(node, port_addresses, dist_pid, state, address) do if dist_port = port_addresses[address] do %{ inet_stats: inet_stats(dist_port), port: dist_port, pid: dist_pid, state: state } else %{pid: dist_pid, state: state} end |> Map.merge(%{ queue_size: node_queue_size(node) }) end defp inet_stats(port) do case :inet.getstat(port) do {:ok, stats} -> stats _ -> nil end end defp node_queue_size(node) do case :ets.lookup(:sys_dist, node) do [dist] -> conn_id = elem(dist, 2) with {:ok, _, _, queue_size} <- :erlang.dist_get_stat(conn_id) do {:ok, queue_size} else _ -> {:error, :not_found} end _ -> {:error, :not_found} end end end ================================================ FILE: lib/realtime/monitoring/erl_sys_mon.ex ================================================ defmodule Realtime.ErlSysMon do @moduledoc """ Logs Erlang System Monitor events. """ use GenServer require Logger @defaults [ :busy_dist_port, :busy_port, {:long_gc, 500}, {:long_schedule, 500}, {:long_message_queue, {0, 1_000}} ] def start_link(args), do: GenServer.start_link(__MODULE__, args) def init(args) do config = Keyword.get(args, :config, @defaults) :erlang.system_monitor(self(), config) {:ok, []} end def handle_info({:monitor, pid, _type, _meta} = msg, state) when is_pid(pid) do log_process_info(msg, pid) {:noreply, state} end def handle_info(msg, state) do Logger.warning("#{__MODULE__} message: " <> inspect(msg)) {:noreply, state} end defp log_process_info(msg, pid) do pid_info = pid |> Process.info(:dictionary) |> case do {:dictionary, dict} when is_list(dict) -> {List.keyfind(dict, :"$initial_call", 0), List.keyfind(dict, :"$ancestors", 0)} other -> other end extra_info = Process.info(pid, [:registered_name, :message_queue_len, :total_heap_size]) Logger.warning( "#{__MODULE__} message: " <> inspect(msg) <> "|\n process info: #{inspect(pid_info)} #{inspect(extra_info)}" ) rescue _ -> Logger.warning("#{__MODULE__} message: " <> inspect(msg)) end end ================================================ FILE: lib/realtime/monitoring/gen_rpc_metrics.ex ================================================ defmodule Realtime.GenRpcMetrics do @moduledoc """ Gather stats for gen_rpc TCP sockets """ require Record Record.defrecordp(:net_address, Record.extract(:net_address, from_lib: "kernel/include/net_address.hrl")) @spec info() :: %{node() => %{inet_stats: %{:inet.stat_option() => integer}, queue_size: non_neg_integer()}} def info do if :net_kernel.get_state()[:started] != :no do {:ok, nodes_info} = :net_kernel.nodes_info() # Ignore "hidden" nodes (remote shell) nodes_info = Enum.filter(nodes_info, fn {_k, v} -> v[:type] == :normal end) gen_rpc_server_port = server_port() ip_address_node = ip_address_node(nodes_info) {client_ports, server_ports} = :erlang.ports() |> Stream.filter(fn port -> :erlang.port_info(port, :name) == {:name, ~c"tcp_inet"} end) |> Stream.map(&{:inet.peername(&1), :inet.sockname(&1), &1}) |> Stream.filter(fn {{:ok, _peername}, {:ok, _sockname}, _port} -> true _ -> false end) |> Stream.map(fn {{:ok, {peername_ipaddress, peername_port}}, {:ok, {_, server_port}}, port} -> {ip_address_node[peername_ipaddress], peername_port, server_port, port} end) |> Stream.filter(fn {nil, _, _} -> false {node, peername_port, server_port, _port} -> {_, client_tcp_or_ssl_port} = :gen_rpc_helper.get_client_config_per_node(node) # Only keep Erlang ports that are either serving on the gen_rpc server tcp/ssl port or # connecting to other nodes using the expected client tcp/ssl port for that node peername_port == client_tcp_or_ssl_port or server_port == gen_rpc_server_port end) |> Enum.reduce({%{}, %{}}, fn {node, _peername_port, server_port, port}, {clients, servers} -> if server_port == gen_rpc_server_port do # This Erlang port is serving gen_rpc {clients, update_in(servers, [node], fn value -> [port | value || []] end)} else # This Erlang port is requesting gen_rpc {update_in(clients, [node], fn value -> [port | value || []] end), servers} end end) Map.new(nodes_info, &info(&1, client_ports, server_ports)) else %{} end end defp info({node, _}, client_ports, server_ports) do gen_rpc_ports = Map.get(client_ports, node, []) ++ Map.get(server_ports, node, []) if gen_rpc_ports != [] do {node, %{ inet_stats: inet_stats(gen_rpc_ports), queue_size: queue_size(gen_rpc_ports), connections: length(gen_rpc_ports) }} else {node, %{}} end end defp inet_stats(ports) do Enum.reduce(ports, %{}, fn port, acc -> case :inet.getstat(port) do {:ok, stats} -> Map.merge(acc, Map.new(stats), fn _k, v1, v2 -> v1 + v2 end) _ -> acc end end) end defp queue_size(ports) do Enum.reduce(ports, 0, fn port, acc -> {:queue_size, queue_size} = :erlang.port_info(port, :queue_size) acc + queue_size end) end defp server_port() do if Application.fetch_env!(:gen_rpc, :default_client_driver) == :tcp do Application.fetch_env!(:gen_rpc, :tcp_server_port) else Application.fetch_env!(:gen_rpc, :ssl_server_port) end end defp ip_address_node(nodes_info) do nodes_info |> Stream.map(fn {node, info} -> case info[:address] do net_address(address: {ip_address, _}) -> {ip_address, node} _ -> {nil, node} end end) |> Stream.filter(fn {ip_address, _node} -> ip_address != nil end) |> Map.new() end end ================================================ FILE: lib/realtime/monitoring/latency.ex ================================================ defmodule Realtime.Latency do @moduledoc """ Measures the latency of the cluster from each node and broadcasts it over PubSub. """ use GenServer use Realtime.Logs alias Realtime.Nodes alias Realtime.GenRpc defmodule Payload do @moduledoc false defstruct [ :from_node, :from_region, :node, :region, :latency, :response, :timestamp ] @type t :: %__MODULE__{ node: atom(), region: String.t() | nil, from_node: atom(), from_region: String.t(), latency: integer(), response: {:ok, :pong} | {:badrpc, any()}, timestamp: DateTime } end @every 15_000 def start_link(args) do GenServer.start_link(__MODULE__, args, name: __MODULE__) end def init(_args) do ping_after() {:ok, []} end def handle_info(:ping, state) do ping() ping_after() {:noreply, state} end def handle_info(msg, state) do Logger.warning("Unexpected message: #{inspect(msg)}") {:noreply, state} end def handle_cast({:ping, pong_timeout, timer_timeout, yield_timeout}, state) do # For testing ping(pong_timeout, timer_timeout, yield_timeout) {:noreply, state} end @doc """ Pings all the nodes in the cluster one after another and returns with their responses. There is a timeout for a single node rpc, and a timeout to yield_many which should really never get hit because these pings happen async under the Realtime.TaskSupervisor. """ @spec ping :: [{Task.t(), tuple() | nil}] def ping(pong_timeout \\ 0, timer_timeout \\ 5_000, yield_timeout \\ 5_000) do tasks = for n <- [Node.self() | Node.list()] do Task.Supervisor.async(Realtime.TaskSupervisor, fn -> {latency, response} = :timer.tc(fn -> GenRpc.call(n, __MODULE__, :pong, [pong_timeout], timeout: timer_timeout) end) latency_ms = latency / 1_000 region = Application.get_env(:realtime, :region, "not_set") short_name = Nodes.short_node_id_from_name(n) from_node = Nodes.short_node_id_from_name(Node.self()) case response do {:error, :rpc_error, reason} -> log_error( "RealtimeNodeDisconnected", "Unable to connect to #{short_name} from #{region}: #{reason}" ) payload = %Payload{ from_node: from_node, from_region: region, node: short_name, region: nil, latency: latency_ms, response: response, timestamp: DateTime.utc_now() } RealtimeWeb.Endpoint.broadcast("admin:cluster", "pong", payload) payload {:ok, {:pong, remote_region}} -> if latency_ms > 1_000, do: Logger.warning( "Network warning: latency to #{remote_region} (#{short_name}) from #{region} (#{from_node}) is #{latency_ms} ms" ) payload = %Payload{ from_node: from_node, from_region: region, node: short_name, region: remote_region, latency: latency_ms, response: response, timestamp: DateTime.utc_now() } RealtimeWeb.Endpoint.broadcast("admin:cluster", "pong", payload) payload end end) end |> Task.yield_many(yield_timeout) for {task, result} <- tasks do unless result, do: Task.shutdown(task, :brutal_kill) end tasks end @doc """ A noop function to call from a remote server. """ @spec pong :: {:ok, {:pong, String.t()}} def pong do region = Application.get_env(:realtime, :region, "not_set") {:ok, {:pong, region}} end @spec pong(:infinity | non_neg_integer) :: {:ok, {:pong, String.t()}} def pong(latency) when is_integer(latency) do Process.sleep(latency) pong() end defp ping_after do Process.send_after(self(), :ping, @every) end end ================================================ FILE: lib/realtime/monitoring/os_metrics.ex ================================================ defmodule Realtime.OsMetrics do @moduledoc """ This module provides functions to get CPU and RAM usage. """ @spec ram_usage() :: float() def ram_usage do mem = :memsup.get_system_memory_data() free_mem = if Mix.env() in [:dev, :test], do: mem[:free_memory], else: mem[:available_memory] 100 - free_mem / mem[:total_memory] * 100 end @spec cpu_la() :: %{avg1: float(), avg5: float(), avg15: float()} def cpu_la do %{ avg1: :cpu_sup.avg1() / 256, avg5: :cpu_sup.avg5() / 256, avg15: :cpu_sup.avg15() / 256 } end @spec cpu_util() :: float() | {:error, term()} def cpu_util do :cpu_sup.util() end end ================================================ FILE: lib/realtime/monitoring/peep/partitioned.ex ================================================ defmodule Realtime.Monitoring.Peep.Partitioned do @moduledoc """ Peep.Storage implementation using a single ETS table with a configurable number of partitions """ alias Peep.Storage alias Telemetry.Metrics @behaviour Peep.Storage @spec new(pos_integer) :: {:ets.tid(), pos_integer} @impl true def new(partitions) when is_integer(partitions) and partitions > 0 do opts = [ :public, # Enabling read_concurrency makes switching between reads and writes # more expensive. The goal is to ruthlessly optimize writes, even at # the cost of read performance. read_concurrency: false, write_concurrency: true, decentralized_counters: true ] {:ets.new(__MODULE__, opts), partitions} end @impl true def storage_size({tid, _}) do %{ size: :ets.info(tid, :size), memory: :ets.info(tid, :memory) * :erlang.system_info(:wordsize) } end @impl true def insert_metric({tid, partitions}, id, %Metrics.Counter{}, _value, %{} = tags) do key = {id, tags, :rand.uniform(partitions)} :ets.update_counter(tid, key, {2, 1}, {key, 0}) end def insert_metric({tid, partitions}, id, %Metrics.Sum{}, value, %{} = tags) do key = {id, tags, :rand.uniform(partitions)} :ets.update_counter(tid, key, {2, value}, {key, 0}) end def insert_metric({tid, _partitions}, id, %Metrics.LastValue{}, value, %{} = tags) do key = {id, tags} :ets.insert(tid, {key, value}) end def insert_metric({tid, _partitions}, id, %Metrics.Distribution{} = metric, value, %{} = tags) do key = {id, tags} atomics = case :ets.lookup(tid, key) do [{_key, ref}] -> ref [] -> # Race condition: Multiple processes could be attempting # to write to this key. Thankfully, :ets.insert_new/2 will break ties, # and concurrent writers should agree on which :atomics object to # increment. new_atomics = Storage.Atomics.new(metric) case :ets.insert_new(tid, {key, new_atomics}) do true -> new_atomics false -> [{_key, atomics}] = :ets.lookup(tid, key) atomics end end Storage.Atomics.insert(atomics, value) end @impl true def get_all_metrics({tid, _partitions}, %Peep.Persistent{ids_to_metrics: itm}) do :ets.tab2list(tid) |> group_metrics(itm, %{}) end @impl true def get_metric({tid, _partitions}, id, %Metrics.Counter{}, tags) do :ets.select(tid, [{{{id, :"$2", :_}, :"$1"}, [{:==, :"$2", tags}], [:"$1"]}]) |> Enum.reduce(0, fn count, acc -> count + acc end) end def get_metric({tid, _partitions}, id, %Metrics.Sum{}, tags) do :ets.select(tid, [{{{id, :"$2", :_}, :"$1"}, [{:==, :"$2", tags}], [:"$1"]}]) |> Enum.reduce(0, fn count, acc -> count + acc end) end def get_metric({tid, _partitions}, id, %Metrics.LastValue{}, tags) do case :ets.lookup(tid, {id, tags}) do [{_key, value}] -> value _ -> nil end end def get_metric({tid, _partitions}, id, %Metrics.Distribution{}, tags) do key = {id, tags} case :ets.lookup(tid, key) do [{_key, atomics}] -> Storage.Atomics.values(atomics) _ -> nil end end @impl true def prune_tags({tid, _partitions}, patterns) do match_spec = patterns |> Enum.flat_map(fn pattern -> counter_or_sum_key = {:_, pattern, :_} dist_or_last_value_key = {:_, pattern} [ { {counter_or_sum_key, :_}, [], [true] }, { {dist_or_last_value_key, :_}, [], [true] } ] end) :ets.select_delete(tid, match_spec) :ok end defp group_metrics([], _itm, acc) do acc end defp group_metrics([metric | rest], itm, acc) do acc2 = group_metric(metric, itm, acc) group_metrics(rest, itm, acc2) end defp group_metric({{id, tags, _}, value}, itm, acc) do %{^id => metric} = itm update_in(acc, [Access.key(metric, %{}), Access.key(tags, 0)], &(&1 + value)) end defp group_metric({{id, tags}, %Storage.Atomics{} = atomics}, itm, acc) do %{^id => metric} = itm put_in(acc, [Access.key(metric, %{}), Access.key(tags)], Storage.Atomics.values(atomics)) end defp group_metric({{id, tags}, value}, itm, acc) do %{^id => metric} = itm put_in(acc, [Access.key(metric, %{}), Access.key(tags)], value) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/channels.ex ================================================ defmodule Realtime.PromEx.Plugins.Channels do @moduledoc """ Realtime channels monitoring plugin for PromEx """ use PromEx.Plugin require Logger @impl true def event_metrics(_opts) do Event.build(:realtime, [ counter( [:realtime, :channel, :error], event_name: [:realtime, :channel, :error], measurement: :code, tags: [:code], description: "Count of errors in the Realtime channels initialization" ) ]) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/distributed.ex ================================================ defmodule Realtime.PromEx.Plugins.Distributed do @moduledoc """ Distributed erlang metrics """ use PromEx.Plugin alias Realtime.DistributedMetrics @event_node_queue_size [:prom_ex, :plugin, :dist, :queue_size] @event_recv_bytes [:prom_ex, :plugin, :dist, :recv, :bytes] @event_recv_count [:prom_ex, :plugin, :dist, :recv, :count] @event_send_bytes [:prom_ex, :plugin, :dist, :send, :bytes] @event_send_count [:prom_ex, :plugin, :dist, :send, :count] @event_send_pending_bytes [:prom_ex, :plugin, :dist, :send, :pending, :bytes] @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate) [ metrics(poll_rate) ] end defp metrics(poll_rate) do Polling.build( :realtime_vm_dist, poll_rate, {__MODULE__, :execute_metrics, []}, [ last_value( [:dist, :queue_size], event_name: @event_node_queue_size, description: "Number of bytes in the output distribution queue", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:dist, :recv_bytes], event_name: @event_recv_bytes, description: "Number of bytes received by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:dist, :recv_count], event_name: @event_recv_count, description: "Number of packets received by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:dist, :send_bytes], event_name: @event_send_bytes, description: "Number of bytes sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:dist, :send_count], event_name: @event_send_count, description: "Number of packets sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:dist, :send_pending_bytes], event_name: @event_send_pending_bytes, description: "Number of bytes waiting to be sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ) ], detach_on_error: false ) end def execute_metrics do dist_info = DistributedMetrics.info() Enum.each(dist_info, fn {node, info} -> execute_queue_size(node, info) execute_inet_stats(node, info) end) end defp execute_inet_stats(node, info) do if stats = info[:inet_stats] do :telemetry.execute(@event_recv_bytes, %{size: stats[:recv_oct]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_recv_count, %{size: stats[:recv_cnt]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_bytes, %{size: stats[:send_oct]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_count, %{size: stats[:send_cnt]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_pending_bytes, %{size: stats[:send_pend]}, %{ origin_node: node(), target_node: node }) end end defp execute_queue_size(node, info) do with {:ok, size} <- info[:queue_size] do :telemetry.execute(@event_node_queue_size, %{size: size}, %{origin_node: node(), target_node: node}) end end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/gen_rpc.ex ================================================ defmodule Realtime.PromEx.Plugins.GenRpc do @moduledoc """ GenRpc metrics """ use PromEx.Plugin alias Realtime.GenRpcMetrics @event_queue_size_bytes [:prom_ex, :plugin, :gen_rpc, :queue_size, :bytes] @event_recv_bytes [:prom_ex, :plugin, :gen_rpc, :recv, :bytes] @event_recv_count [:prom_ex, :plugin, :gen_rpc, :recv, :count] @event_send_bytes [:prom_ex, :plugin, :gen_rpc, :send, :bytes] @event_send_count [:prom_ex, :plugin, :gen_rpc, :send, :count] @event_send_pending_bytes [:prom_ex, :plugin, :gen_rpc, :send, :pending, :bytes] @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate) [ metrics(poll_rate) ] end defp metrics(poll_rate) do Polling.build( :realtime_gen_rpc, poll_rate, {__MODULE__, :execute_metrics, []}, [ last_value( [:gen_rpc, :queue_size_bytes], event_name: @event_queue_size_bytes, description: "The total number of bytes queued by the port using the ERTS driver queue implementation", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:gen_rpc, :recv_bytes], event_name: @event_recv_bytes, description: "Number of bytes received by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:gen_rpc, :recv_count], event_name: @event_recv_count, description: "Number of packets received by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:gen_rpc, :send_bytes], event_name: @event_send_bytes, description: "Number of bytes sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:gen_rpc, :send_count], event_name: @event_send_count, description: "Number of packets sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ), last_value( [:gen_rpc, :send_pending_bytes], event_name: @event_send_pending_bytes, description: "Number of bytes waiting to be sent by the socket.", measurement: :size, tags: [:origin_node, :target_node] ) ], detach_on_error: false ) end def execute_metrics do dist_info = GenRpcMetrics.info() Enum.each(dist_info, fn {node, info} -> execute_queue_size(node, info) execute_inet_stats(node, info) end) end defp execute_inet_stats(node, info) do if stats = info[:inet_stats] do :telemetry.execute(@event_recv_bytes, %{size: stats[:recv_oct]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_recv_count, %{size: stats[:recv_cnt]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_bytes, %{size: stats[:send_oct]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_count, %{size: stats[:send_cnt]}, %{origin_node: node(), target_node: node}) :telemetry.execute(@event_send_pending_bytes, %{size: stats[:send_pend]}, %{ origin_node: node(), target_node: node }) end end defp execute_queue_size(node, info) do :telemetry.execute(@event_queue_size_bytes, %{size: info[:queue_size]}, %{origin_node: node(), target_node: node}) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/osmon.ex ================================================ defmodule Realtime.PromEx.Plugins.OsMon do @moduledoc """ Polls os_mon metrics. """ use PromEx.Plugin require Logger alias Realtime.OsMetrics @event_ram_usage [:prom_ex, :plugin, :osmon, :ram_usage] @event_cpu_util [:prom_ex, :plugin, :osmon, :cpu_util] @event_cpu_la [:prom_ex, :plugin, :osmon, :cpu_avg1] @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate) [ metrics(poll_rate) ] end defp metrics(poll_rate) do Polling.build( :realtime_osmon_events, poll_rate, {__MODULE__, :execute_metrics, []}, [ last_value( [:osmon, :ram_usage], event_name: @event_ram_usage, description: "The total percentage usage of operative memory.", measurement: :ram ), last_value( [:osmon, :cpu_util], event_name: @event_cpu_util, description: "The sum of the percentage shares of the CPU cycles spent in all busy processor states in average on all CPUs.", measurement: :cpu ), last_value( [:osmon, :cpu_avg1], event_name: @event_cpu_la, description: "The average system load in the last minute.", measurement: :avg1 ), last_value( [:osmon, :cpu_avg5], event_name: @event_cpu_la, description: "The average system load in the last five minutes.", measurement: :avg5 ), last_value( [:osmon, :cpu_avg15], event_name: @event_cpu_la, description: "The average system load in the last 15 minutes.", measurement: :avg15 ) ], detach_on_error: false ) end def execute_metrics do execute_metrics(@event_ram_usage, %{ram: OsMetrics.ram_usage()}) execute_metrics(@event_cpu_util, %{cpu: OsMetrics.cpu_util()}) execute_metrics(@event_cpu_la, OsMetrics.cpu_la()) end defp execute_metrics(event, metrics) do :telemetry.execute(event, metrics, %{}) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/phoenix.ex ================================================ # copied from https://github.com/akoutmos/prom_ex/blob/master/lib/prom_ex/plugins/phoenix.ex if Code.ensure_loaded?(Phoenix) do defmodule Realtime.PromEx.Plugins.Phoenix do @moduledoc false use PromEx.Plugin require Logger alias Phoenix.Socket alias RealtimeWeb.Endpoint.HTTP, as: HTTP @stop_event [:prom_ex, :plugin, :phoenix, :stop] @event_all_connections [:prom_ex, :plugin, :phoenix, :all_connections] @impl true def event_metrics(opts) do otp_app = Keyword.fetch!(opts, :otp_app) metric_prefix = Keyword.get(opts, :metric_prefix, PromEx.metric_prefix(otp_app, :phoenix)) phoenix_event_prefixes = fetch_event_prefixes!(opts) set_up_telemetry_proxy(phoenix_event_prefixes) # Event metrics definitions [ channel_events(metric_prefix), socket_events(metric_prefix) ] end @impl true def polling_metrics(opts) do otp_app = Keyword.fetch!(opts, :otp_app) metric_prefix = Keyword.get(opts, :metric_prefix, PromEx.metric_prefix(otp_app, :phoenix)) poll_rate = Keyword.get(opts, :poll_rate) [ metrics(metric_prefix, poll_rate) ] end def metrics(metric_prefix, poll_rate) do Polling.build( :phoenix_all_connections, poll_rate, {__MODULE__, :execute_metrics, []}, [ last_value( metric_prefix ++ [:connections, :total], event_name: @event_all_connections, description: "The total open connections to ranch.", measurement: :total ), last_value( metric_prefix ++ [:connections, :active], event_name: @event_all_connections, description: "Connections actively processing a request or WebSocket frame.", measurement: :active ), last_value( metric_prefix ++ [:connections, :max], event_name: @event_all_connections, description: "The configured maximum connections limit for the ranch listener.", measurement: :max ) ], detach_on_error: false ) end def execute_metrics do info = if :ranch.info()[HTTP], do: :ranch.info(HTTP), else: %{} :telemetry.execute( @event_all_connections, %{ total: Map.get(info, :all_connections, -1), active: Map.get(info, :active_connections, -1), max: Map.get(info, :max_connections, -1) }, %{} ) end defmodule Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [10, 100, 500, 1_000, 5_000, 10_000] end defp channel_events(metric_prefix) do Event.build( :phoenix_channel_event_metrics, [ # Capture the number of channel joins that have occurred counter( metric_prefix ++ [:channel, :joined, :total], event_name: [:phoenix, :channel_joined], description: "The number of channel joins that have occurred.", tag_values: fn %{ result: result, socket: %Socket{transport: transport, endpoint: endpoint} } -> %{ transport: transport, result: result, endpoint: normalize_module_name(endpoint) } end, tags: [:result, :transport, :endpoint] ), # Capture channel handle_in duration distribution( metric_prefix ++ [:channel, :handled_in, :duration, :milliseconds], event_name: [:phoenix, :channel_handled_in], measurement: :duration, description: "The time it takes for the application to respond to channel messages.", reporter_options: [peep_bucket_calculator: Buckets], tag_values: fn %{socket: %Socket{endpoint: endpoint}} -> %{ endpoint: normalize_module_name(endpoint) } end, tags: [:endpoint], unit: {:native, :millisecond} ) ] ) end defp socket_events(metric_prefix) do Event.build( :phoenix_socket_event_metrics, [ # Capture socket connection duration distribution( metric_prefix ++ [:socket, :connected, :duration, :milliseconds], event_name: [:phoenix, :socket_connected], measurement: :duration, description: "The time it takes for the application to establish a socket connection.", reporter_options: [peep_bucket_calculator: Buckets], tag_values: fn %{result: result, endpoint: endpoint, transport: transport, serializer: serializer} -> %{ transport: transport, result: result, endpoint: normalize_module_name(endpoint), serializer: serializer } end, tags: [:result, :transport, :endpoint, :serializer], unit: {:native, :millisecond} ) ] ) end defp set_up_telemetry_proxy(phoenix_event_prefixes) do phoenix_event_prefixes |> Enum.each(fn telemetry_prefix -> stop_event = telemetry_prefix ++ [:stop] :telemetry.attach( [:prom_ex, :phoenix, :proxy] ++ telemetry_prefix, stop_event, &__MODULE__.handle_proxy_phoenix_event/4, %{} ) end) end @doc false def handle_proxy_phoenix_event(_event_name, event_measurement, event_metadata, _config) do :telemetry.execute(@stop_event, event_measurement, event_metadata) end defp normalize_module_name(name) when is_atom(name) do name |> Atom.to_string() |> String.trim_leading("Elixir.") end defp normalize_module_name(name), do: name defp fetch_event_prefixes!(opts) do opts |> fetch_either!(:router, :endpoints) |> case do endpoints when is_list(endpoints) -> endpoints |> Enum.map(fn {_endpoint, endpoint_opts} -> Keyword.get(endpoint_opts, :event_prefix, [:phoenix, :endpoint]) end) _router -> [Keyword.get(opts, :event_prefix, [:phoenix, :endpoint])] end |> MapSet.new() |> MapSet.to_list() end defp fetch_either!(keywordlist, key1, key2) do case {Keyword.has_key?(keywordlist, key1), Keyword.has_key?(keywordlist, key2)} do {true, _} -> keywordlist[key1] {false, true} -> keywordlist[key2] {false, false} -> raise KeyError, "Neither #{inspect(key1)} nor #{inspect(key2)} found in #{inspect(keywordlist)}" end end end else defmodule PromEx.Plugins.Phoenix do @moduledoc false use PromEx.Plugin @impl true def event_metrics(_opts) do PromEx.Plugin.no_dep_raise(__MODULE__, "Phoenix") end end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/tenant.ex ================================================ defmodule Realtime.PromEx.Plugins.Tenant do @moduledoc false use PromEx.Plugin require Logger alias Realtime.Telemetry alias Realtime.Tenants alias Realtime.UsersCounter @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate, 5_000) [ concurrent_connections(poll_rate) ] end @impl true def event_metrics(_opts) do # Event metrics definitions [ channel_events(), replication_metrics(), payload_size_metrics() ] end defmodule PayloadSize.Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [250, 500, 1000, 3000, 5000, 10_000, 25_000, 100_000, 500_000, 1_000_000, 3_000_000] end defp payload_size_metrics do Event.build( :realtime_tenant_payload_size_metrics, [ distribution( [:realtime, :tenants, :payload, :size], event_name: [:realtime, :tenants, :payload, :size], measurement: :size, description: "Tenant payload size", tags: [:tenant, :message_type], unit: :byte, reporter_options: [peep_bucket_calculator: PayloadSize.Buckets] ) ] ) end defp concurrent_connections(poll_rate) do Polling.build( :realtime_concurrent_connections, poll_rate, {__MODULE__, :execute_tenant_metrics, []}, [ last_value( [:realtime, :connections, :connected], event_name: [:realtime, :connections], description: "The node total count of connected clients for a tenant.", measurement: :connected, tags: [:tenant] ), last_value( [:realtime, :connections, :connected_cluster], event_name: [:realtime, :connections], description: "The cluster total count of connected clients for a tenant.", measurement: :connected_cluster, tags: [:tenant] ) ], detach_on_error: false ) end def execute_tenant_metrics do cluster_counts = UsersCounter.tenant_counts() local_tenant_counts = UsersCounter.local_tenant_counts() for {t, count} <- local_tenant_counts do tenant = Tenants.Cache.get_tenant_by_external_id(t) if tenant != nil do Telemetry.execute( [:realtime, :connections], %{ connected: count, connected_cluster: Map.get(cluster_counts, t, 0), limit: tenant.max_concurrent_users }, %{tenant: t} ) end end end defmodule Replication.Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [250, 500, 1000, 3000, 5000, 10_000, 25_000, 100_000, 500_000, 1_000_000, 3_000_000] end defp replication_metrics do Event.build( :realtime_tenant_replication_event_metrics, [ distribution( [:realtime, :replication, :poller, :query, :duration], event_name: [:realtime, :replication, :poller, :query, :stop], measurement: :duration, description: "Duration of the logical replication slot polling query for Realtime RLS.", tags: [:tenant], unit: {:microsecond, :millisecond}, reporter_options: [peep_bucket_calculator: Replication.Buckets] ) ] ) end defmodule PolicyAuthorization.Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [10, 250, 5000, 15_000] end defmodule BroadcastFromDatabase.Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [10, 250, 5000] end defmodule Replay.Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [10, 250, 5000, 15_000] end defp channel_events do Event.build( :realtime_tenant_channel_event_metrics, [ sum( [:realtime, :channel, :events], event_name: [:realtime, :rate_counter, :channel, :events], measurement: :sum, description: "Sum of messages sent on a Realtime Channel.", tags: [:tenant] ), sum( [:realtime, :channel, :presence_events], event_name: [:realtime, :rate_counter, :channel, :presence_events], measurement: :sum, description: "Sum of presence messages sent on a Realtime Channel.", tags: [:tenant] ), sum( [:realtime, :channel, :db_events], event_name: [:realtime, :rate_counter, :channel, :db_events], measurement: :sum, description: "Sum of db messages sent on a Realtime Channel.", tags: [:tenant] ), sum( [:realtime, :channel, :joins], event_name: [:realtime, :rate_counter, :channel, :joins], measurement: :sum, description: "Sum of Realtime Channel joins.", tags: [:tenant] ), sum( [:realtime, :channel, :input_bytes], event_name: [:realtime, :channel, :input_bytes], description: "Sum of input bytes sent on sockets.", measurement: :size, tags: [:tenant] ), sum( [:realtime, :channel, :output_bytes], event_name: [:realtime, :channel, :output_bytes], description: "Sum of output bytes sent on sockets.", measurement: :size, tags: [:tenant] ), distribution( [:realtime, :tenants, :read_authorization_check], event_name: [:realtime, :tenants, :read_authorization_check], measurement: :latency, unit: :millisecond, description: "Latency of read authorization checks.", tags: [:tenant], reporter_options: [peep_bucket_calculator: PolicyAuthorization.Buckets] ), distribution( [:realtime, :tenants, :write_authorization_check], event_name: [:realtime, :tenants, :write_authorization_check], measurement: :latency, unit: :millisecond, description: "Latency of write authorization checks.", tags: [:tenant], reporter_options: [peep_bucket_calculator: PolicyAuthorization.Buckets] ), distribution( [:realtime, :tenants, :broadcast_from_database, :latency_committed_at], event_name: [:realtime, :tenants, :broadcast_from_database], measurement: :latency_committed_at, unit: :millisecond, description: "Latency of database transaction start until reaches server to be broadcasted", tags: [:tenant], reporter_options: [peep_bucket_calculator: BroadcastFromDatabase.Buckets] ), distribution( [:realtime, :tenants, :broadcast_from_database, :latency_inserted_at], event_name: [:realtime, :tenants, :broadcast_from_database], measurement: :latency_inserted_at, unit: {:microsecond, :millisecond}, description: "Latency of database inserted_at until reaches server to be broadcasted", tags: [:tenant], reporter_options: [peep_bucket_calculator: BroadcastFromDatabase.Buckets] ), distribution( [:realtime, :tenants, :replay], event_name: [:realtime, :tenants, :replay], measurement: :latency, unit: :millisecond, description: "Latency of broadcast replay", tags: [:tenant], reporter_options: [peep_bucket_calculator: Replay.Buckets] ) ] ) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/tenant_global.ex ================================================ defmodule Realtime.PromEx.Plugins.TenantGlobal do @moduledoc """ Global aggregated variants of per-tenant metrics. Subscribes to the same telemetry events as the Tenant plugin but records metrics without the tenant tag, enabling cluster-wide aggregation. These live on the global endpoint (/metrics) for high-priority scraping. """ use PromEx.Plugin alias Realtime.PromEx.Plugins.Tenant alias Realtime.Telemetry alias Realtime.UsersCounter @global_connections_event [:prom_ex, :plugin, :realtime, :connections, :global] @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate, 5_000) [ Polling.build( :realtime_global_connections, poll_rate, {__MODULE__, :execute_global_connection_metrics, []}, [ last_value( [:realtime, :connections, :global, :connected], event_name: @global_connections_event, description: "The node total count of connected clients across all tenants.", measurement: :connected ), last_value( [:realtime, :connections, :global, :connected_cluster], event_name: @global_connections_event, description: "The cluster total count of connected clients across all tenants.", measurement: :connected_cluster ) ], detach_on_error: false ) ] end @impl true def event_metrics(_opts) do [ channel_global_events(), payload_global_size_metrics() ] end def execute_global_connection_metrics do cluster_counts = UsersCounter.tenant_counts() local_tenant_counts = UsersCounter.local_tenant_counts() connected = local_tenant_counts |> Map.values() |> Enum.sum() connected_cluster = cluster_counts |> Map.values() |> Enum.sum() Telemetry.execute( @global_connections_event, %{connected: connected, connected_cluster: connected_cluster}, %{} ) end defp payload_global_size_metrics do Event.build( :realtime_global_payload_size_metrics, [ distribution( [:realtime, :payload, :size], event_name: [:realtime, :tenants, :payload, :size], measurement: :size, description: "Global payload size across all tenants", tags: [:message_type], unit: :byte, reporter_options: [peep_bucket_calculator: Tenant.PayloadSize.Buckets] ) ] ) end defp channel_global_events do Event.build( :realtime_global_channel_event_metrics, [ sum( [:realtime, :channel, :global, :events], event_name: [:realtime, :rate_counter, :channel, :events], measurement: :sum, description: "Global sum of messages sent on a Realtime Channel." ), sum( [:realtime, :channel, :global, :presence_events], event_name: [:realtime, :rate_counter, :channel, :presence_events], measurement: :sum, description: "Global sum of presence messages sent on a Realtime Channel." ), sum( [:realtime, :channel, :global, :db_events], event_name: [:realtime, :rate_counter, :channel, :db_events], measurement: :sum, description: "Global sum of db messages sent on a Realtime Channel." ), sum( [:realtime, :channel, :global, :joins], event_name: [:realtime, :rate_counter, :channel, :joins], measurement: :sum, description: "Global sum of Realtime Channel joins." ), sum( [:realtime, :channel, :global, :input_bytes], event_name: [:realtime, :channel, :input_bytes], description: "Global sum of input bytes sent on sockets.", measurement: :size ), sum( [:realtime, :channel, :global, :output_bytes], event_name: [:realtime, :channel, :output_bytes], description: "Global sum of output bytes sent on sockets.", measurement: :size ), counter( [:realtime, :channel, :global, :error], event_name: [:realtime, :channel, :error], measurement: :code, tags: [:code], description: "Global count of errors in Realtime channel initialization." ) ] ) end end ================================================ FILE: lib/realtime/monitoring/prom_ex/plugins/tenants.ex ================================================ defmodule Realtime.PromEx.Plugins.Tenants do @moduledoc false use PromEx.Plugin alias PromEx.MetricTypes.Event alias Realtime.Tenants.Connect require Logger defmodule Buckets do @moduledoc false use Peep.Buckets.Custom, buckets: [10, 250, 5000, 15_000] end @event_connected [:prom_ex, :plugin, :realtime, :tenants, :connected] @impl true def event_metrics(_) do Event.build(:realtime, [ distribution( [:realtime, :global, :rpc], event_name: [:realtime, :rpc], description: "Global Latency of rpc calls", measurement: :latency, unit: {:microsecond, :millisecond}, tags: [:success, :mechanism], reporter_options: [peep_bucket_calculator: Buckets] ) ]) end @impl true def polling_metrics(opts) do poll_rate = Keyword.get(opts, :poll_rate) [ Polling.build( :realtime_tenants_events, poll_rate, {__MODULE__, :execute_metrics, []}, [ last_value( [:realtime, :tenants, :connected], event_name: @event_connected, description: "The total count of connected tenants.", measurement: :connected ) ], detach_on_error: false ) ] end def execute_metrics do connected = if Enum.member?(:syn.node_scopes(), Connect), do: :syn.local_registry_count(Connect), else: -1 execute_metrics(@event_connected, %{connected: connected}) end defp execute_metrics(event, metrics) do :telemetry.execute(event, metrics, %{}) end end ================================================ FILE: lib/realtime/monitoring/prom_ex.ex ================================================ defmodule Realtime.PromEx do alias Realtime.PromEx.Plugins.Distributed alias Realtime.PromEx.Plugins.GenRpc alias Realtime.PromEx.Plugins.OsMon alias Realtime.PromEx.Plugins.Phoenix alias Realtime.PromEx.Plugins.TenantGlobal alias Realtime.PromEx.Plugins.Tenants @moduledoc """ PromEx configuration for global metrics (BEAM, OS, Phoenix, distributed infrastructure). These are higher-priority metrics. Configure your Victoria Metrics scrape interval lower compared to the tenant metrics endpoint. Exposes metrics via `/metrics` and `/metrics/:region`. Be sure to add the following to finish setting up PromEx: 1. Update your configuration (config.exs, dev.exs, prod.exs, releases.exs, etc) to configure the necessary bit of PromEx. Be sure to check out `PromEx.Config` for more details regarding configuring PromEx: ``` config :realtime, Realtime.PromEx, disabled: false, manual_metrics_start_delay: :no_delay, drop_metrics_groups: [], grafana: :disabled, metrics_server: :disabled ``` 2. Add this module to your application supervision tree. It should be one of the first things that is started so that no Telemetry events are missed. For example, if PromEx is started after your Repo module, you will miss Ecto's init events and the dashboards will be missing some data points: ``` def start(_type, _args) do children = [ Realtime.PromEx, ... ] ... end ``` 3. Update your `endpoint.ex` file to expose your metrics (or configure a standalone server using the `:metrics_server` config options). Be sure to put this plug before your `Plug.Telemetry` entry so that you can avoid having calls to your `/metrics` endpoint create their own metrics and logs which can pollute your logs/metrics given that Prometheus will scrape at a regular interval and that can get noisy: ``` defmodule RealtimeWeb.Endpoint do use Phoenix.Endpoint, otp_app: :realtime ... plug PromEx.Plug, prom_ex_module: Realtime.PromEx ... end ``` 4. Update the list of plugins in the `plugins/0` function return list to reflect your application's dependencies. Also update the list of dashboards that are to be uploaded to Grafana in the `dashboards/0` function. """ use PromEx, otp_app: :realtime alias PromEx.Plugins defmodule Store do @moduledoc false # Custom store to set global tags @behaviour PromEx.Storage @impl true def scrape(name) do Peep.get_all_metrics(name) |> Realtime.Monitoring.Prometheus.export() end @impl true def child_spec(name, metrics) do Peep.child_spec( name: name, metrics: metrics, global_tags: Application.get_env(:realtime, :metrics_tags, %{}), storage: {Realtime.Monitoring.Peep.Partitioned, 4} ) end end @impl true def plugins do poll_rate = Application.get_env(:realtime, :prom_poll_rate) [ {Plugins.Beam, poll_rate: poll_rate, metric_prefix: [:beam]}, {Phoenix, router: RealtimeWeb.Router, poll_rate: poll_rate, metric_prefix: [:phoenix]}, {OsMon, poll_rate: poll_rate}, {Tenants, poll_rate: poll_rate}, {TenantGlobal, poll_rate: poll_rate}, {Distributed, poll_rate: poll_rate}, {GenRpc, poll_rate: poll_rate} ] end @impl true def dashboard_assigns do [ datasource_id: "YOUR_PROMETHEUS_DATASOURCE_ID" ] end @impl true def dashboards do [ # PromEx built in Grafana dashboards # {:prom_ex, "application.json"}, # {:prom_ex, "beam.json"}, # {:prom_ex, "phoenix.json"} # {:prom_ex, "ecto.json"}, # {:prom_ex, "oban.json"}, # {:prom_ex, "phoenix_live_view.json"} # Add your dashboard definitions here with the format: {:otp_app, "path_in_priv"} # {:realtime, "/grafana_dashboards/user_metrics.json"} ] end def get_global_metrics do metrics = PromEx.get_metrics(Realtime.PromEx) Realtime.PromEx.__ets_cron_flusher_name__() |> PromEx.ETSCronFlusher.defer_ets_flush() metrics end @doc deprecated: "Use get_global_metrics/0 instead" def get_metrics, do: get_global_metrics() end ================================================ FILE: lib/realtime/monitoring/prometheus.ex ================================================ # Based on https://github.com/rkallos/peep/blob/708546ed069aebdf78ac1f581130332bd2e8b5b1/lib/peep/prometheus.ex defmodule Realtime.Monitoring.Prometheus do @moduledoc """ Prometheus exporter module Use a temporary ets table to cache formatted names and label values """ alias Telemetry.Metrics.{Counter, Distribution, LastValue, Sum} def export(metrics) do cache = :ets.new(:cache, [:set, :private, read_concurrency: false, write_concurrency: :auto]) result = [Enum.map(metrics, &format(&1, cache)), "# EOF\n"] :ets.delete(cache) result end defp format({%Counter{}, _series} = metric, cache) do format_standard(metric, "counter", cache) end defp format({%Sum{} = spec, _series} = metric, cache) do format_standard(metric, spec.reporter_options[:prometheus_type] || "counter", cache) end defp format({%LastValue{} = spec, _series} = metric, cache) do format_standard(metric, spec.reporter_options[:prometheus_type] || "gauge", cache) end defp format({%Distribution{} = metric, tagged_series}, cache) do name = format_name(metric.name, cache) help = ["# HELP ", name, " ", escape_help(metric.description)] type = ["# TYPE ", name, " histogram"] distributions = Enum.map(tagged_series, fn {tags, buckets} -> format_distribution(name, tags, buckets, cache) end) [help, ?\n, type, ?\n, distributions] end defp format_distribution(name, tags, buckets, cache) do has_labels? = not Enum.empty?(tags) buckets_as_floats = Map.drop(buckets, [:sum, :infinity]) |> Enum.map(fn {bucket_string, count} -> {String.to_float(bucket_string), count} end) |> Enum.sort() {prefix_sums, count} = prefix_sums(buckets_as_floats) {labels_done, bucket_partial} = if has_labels? do labels = format_labels(tags, cache) {[?{, labels, "} "], [name, "_bucket{", labels, ",le=\""]} else {?\s, [name, "_bucket{le=\""]} end samples = prefix_sums |> Enum.map(fn {upper_bound, count} -> [bucket_partial, format_value(upper_bound), "\"} ", Integer.to_string(count), ?\n] end) sum = Map.get(buckets, :sum, 0) inf = Map.get(buckets, :infinity, 0) [ samples, [bucket_partial, "+Inf\"} ", Integer.to_string(count + inf), ?\n], [name, "_sum", labels_done, Integer.to_string(sum), ?\n], [name, "_count", labels_done, Integer.to_string(count + inf), ?\n] ] end defp format_standard({metric, series}, type, cache) do name = format_name(metric.name, cache) help = ["# HELP ", name, " ", escape_help(metric.description)] type = ["# TYPE ", name, " ", to_string(type)] samples = Enum.map(series, fn {labels, value} -> has_labels? = not Enum.empty?(labels) if has_labels? do [name, ?{, format_labels(labels, cache), ?}, " ", format_value(value), ?\n] else [name, " ", format_value(value), ?\n] end end) [help, ?\n, type, ?\n, samples] end defp format_labels(labels, cache) do labels |> Enum.sort() |> Enum.map_intersperse(?,, fn {k, v} -> [to_string(k), "=\"", escape(v, cache), ?"] end) end defp format_name(name, cache) do case :ets.lookup_element(cache, name, 2, nil) do nil -> result = name |> Enum.join("_") |> format_name_start() |> IO.iodata_to_binary() :ets.insert(cache, {name, result}) result result -> result end end # Name must start with an ascii letter defp format_name_start(<>) when h not in ?A..?Z and h not in ?a..?z, do: format_name_start(rest) defp format_name_start(<>), do: format_name_rest(rest, <<>>) # Otherwise only letters, numbers, or _ defp format_name_rest(<>, acc) when h in ?A..?Z or h in ?a..?z or h in ?0..?9 or h == ?_, do: format_name_rest(rest, [acc, h]) defp format_name_rest(<<_, rest::binary>>, acc), do: format_name_rest(rest, acc) defp format_name_rest(<<>>, acc), do: acc defp format_value(true), do: "1" defp format_value(false), do: "0" defp format_value(nil), do: "0" defp format_value(n) when is_integer(n), do: Integer.to_string(n) defp format_value(f) when is_float(f), do: Float.to_string(f) defp escape(nil, _cache), do: "nil" defp escape(value, cache) do case :ets.lookup_element(cache, value, 2, nil) do nil -> result = value |> safe_to_string() |> do_escape(<<>>) |> IO.iodata_to_binary() :ets.insert(cache, {value, result}) result result -> result end end defp safe_to_string(value) do case String.Chars.impl_for(value) do nil -> inspect(value) _ -> to_string(value) end end defp do_escape(<>, acc), do: do_escape(rest, [acc, ?\\, ?\"]) defp do_escape(<>, acc), do: do_escape(rest, [acc, ?\\, ?\\]) defp do_escape(<>, acc), do: do_escape(rest, [acc, ?\\, ?n]) defp do_escape(<>, acc), do: do_escape(rest, [acc, h]) defp do_escape(<<>>, acc), do: acc defp escape_help(value) do value |> to_string() |> escape_help(<<>>) end defp escape_help(<>, acc), do: escape_help(rest, <>) defp escape_help(<>, acc), do: escape_help(rest, <>) defp escape_help(<>, acc), do: escape_help(rest, <>) defp escape_help(<<>>, acc), do: acc defp prefix_sums(buckets), do: prefix_sums(buckets, [], 0) defp prefix_sums([], acc, sum), do: {Enum.reverse(acc), sum} defp prefix_sums([{bucket, count} | rest], acc, sum) do new_sum = sum + count new_bucket = {bucket, new_sum} prefix_sums(rest, [new_bucket | acc], new_sum) end end ================================================ FILE: lib/realtime/monitoring/tenant_prom_ex.ex ================================================ defmodule Realtime.TenantPromEx do alias Realtime.PromEx.Plugins.Channels alias Realtime.PromEx.Plugins.Tenant @moduledoc """ PromEx configuration for tenant-level metrics. These metrics are per-tenant and considered secondary priority for scraping. Configure your Victoria Metrics scrape interval higher (e.g. 60s) compared to the global metrics endpoint. Exposes metrics via `/metrics/tenant` and `/metrics/:region/tenant`. """ use PromEx, otp_app: :realtime @impl true def plugins do poll_rate = Application.get_env(:realtime, :prom_poll_rate) [ {Tenant, poll_rate: poll_rate}, {Channels, poll_rate: poll_rate} ] end def get_metrics do metrics = PromEx.get_metrics(Realtime.TenantPromEx) Realtime.TenantPromEx.__ets_cron_flusher_name__() |> PromEx.ETSCronFlusher.defer_ets_flush() metrics end end ================================================ FILE: lib/realtime/nodes.ex ================================================ defmodule Realtime.Nodes do @moduledoc """ Handles common needs for :syn module operations """ require Logger alias Realtime.Api.Tenant alias Realtime.Tenants @doc """ Gets the node to launch the Postgres connection on for a tenant. """ @spec get_node_for_tenant(Tenant.t()) :: {:ok, node(), binary()} | {:error, term()} def get_node_for_tenant(nil), do: {:error, :tenant_not_found} def get_node_for_tenant(%Tenant{} = tenant) do with region <- Tenants.region(tenant), tenant_region <- platform_region_translator(region), node <- launch_node(tenant_region, node(), tenant.external_id) do {:ok, node, tenant_region} end end @doc """ Translates a region from a platform to the closest Supabase tenant region. Region mapping can be customized via the REGION_MAPPING environment variable. If not provided, uses the default hardcoded mapping. """ @spec platform_region_translator(String.t() | nil) :: nil | binary() def platform_region_translator(nil), do: nil def platform_region_translator(tenant_region) when is_binary(tenant_region) do case Application.get_env(:realtime, :region_mapping) do nil -> default_region_mapping(tenant_region) mapping when is_map(mapping) -> Map.get(mapping, tenant_region) end end # Private function with hardcoded defaults defp default_region_mapping(tenant_region) do case tenant_region do "ap-east-1" -> "ap-southeast-1" "ap-northeast-1" -> "ap-southeast-1" "ap-northeast-2" -> "ap-southeast-1" "ap-south-1" -> "ap-southeast-1" "ap-southeast-1" -> "ap-southeast-1" "ap-southeast-2" -> "ap-southeast-2" "ca-central-1" -> "us-east-1" "eu-central-1" -> "eu-west-2" "eu-central-2" -> "eu-west-2" "eu-north-1" -> "eu-west-2" "eu-west-1" -> "eu-west-2" "eu-west-2" -> "eu-west-2" "eu-west-3" -> "eu-west-2" "sa-east-1" -> "us-east-1" "us-east-1" -> "us-east-1" "us-east-2" -> "us-east-1" "us-west-1" -> "us-west-1" "us-west-2" -> "us-west-1" _ -> nil end end @doc """ Lists the nodes in a region. Sorts by node name in case the list order is unstable. """ @spec region_nodes(String.t() | nil) :: [atom()] def region_nodes(region) when is_binary(region) do :syn.members(RegionNodes, region) |> Enum.map(fn {_pid, [node: node]} -> node end) |> Enum.sort() end def region_nodes(nil), do: [] @doc """ Picks a node from a region based on the provided key """ @spec node_from_region(String.t(), term()) :: {:ok, node} | {:error, :not_available} def node_from_region(region, key) when is_binary(region) do nodes = region_nodes(region) case nodes do [] -> {:error, :not_available} _ -> member_count = Enum.count(nodes) index = :erlang.phash2(key, member_count) {:ok, Enum.fetch!(nodes, index)} end end def node_from_region(_, _), do: {:error, :not_available} @doc """ Picks the node to launch the Postgres connection on. Selection is deterministic within time buckets to prevent syn conflicts from concurrent requests for the same tenant. Uses time-bucketed seeded random selection to pick 2 candidate nodes, compares their loads, and picks the least loaded one. The time bucket approach ensures: - Requests within same time window (default: 60s) pick same nodes → prevents conflicts - Requests in different time windows pick different random nodes → better long-term distribution If the uptime of the node is below the configured threshold for load balancing, a consistent node is picked based on hashing the tenant ID. If there are not two nodes in a region, the connection is established from the `default` node given. """ @spec launch_node(String.t() | nil, atom(), String.t()) :: atom() def launch_node(region, default, tenant_id) when is_binary(tenant_id) do case region_nodes(region) do [] -> Logger.warning("Zero region nodes for #{region} using #{inspect(default)}") default [single_node] -> single_node nodes -> load_aware_node_picker(nodes, tenant_id) end end @node_selection_time_bucket_seconds Application.compile_env( :realtime, :node_selection_time_bucket_seconds, 60 ) defp load_aware_node_picker(regions_nodes, tenant_id) when is_binary(tenant_id) do case regions_nodes do nodes -> node_count = length(nodes) {node1, node2} = two_random_nodes(tenant_id, nodes, node_count) # Compare loads and pick least loaded load1 = node_load(node1) load2 = node_load(node2) if is_number(load1) and is_number(load2) do if load1 <= load2, do: node1, else: node2 else # Fallback to consistently picking a node if load data is not available index = :erlang.phash2(tenant_id, node_count) Enum.fetch!(nodes, index) end end end defp two_random_nodes(tenant_id, nodes, node_count) do # Get current time bucket (unix timestamp / bucket_size) time_bucket = div(System.system_time(:second), @node_selection_time_bucket_seconds) # Seed the RNG without storing into the process dictionary seed_value = :erlang.phash2({tenant_id, time_bucket}) rand_state = :rand.seed_s(:exsss, seed_value) {id1, rand_state2} = :rand.uniform_s(node_count, rand_state) {id2, _rand_state3} = :rand.uniform_s(node_count, rand_state2) # Ensure id2 is different from id1 when multiple nodes available id2 = if id1 == id2 and node_count > 1 do # Pick next node (wraps around using rem) rem(id1, node_count) + 1 else id2 end node1 = Enum.at(nodes, id1 - 1) node2 = Enum.at(nodes, id2 - 1) {node1, node2} end @doc """ Gets the node load for a node either locally or remotely. Returns {:error, :not_enough_data} if the node has not been running for long enough to get reliable metrics. """ @spec node_load(atom()) :: integer() | {:error, :not_enough_data} def node_load(node) when node() == node do if uptime_ms() < Application.fetch_env!(:realtime, :node_balance_uptime_threshold_in_ms), do: {:error, :not_enough_data}, else: :cpu_sup.avg5() end def node_load(node) when node() != node, do: Realtime.GenRpc.call(node, __MODULE__, :node_load, [node], []) @doc """ Gets a short node name from a node name when a node name looks like `realtime-prod@fdaa:0:cc:a7b:b385:83c3:cfe3:2` ## Examples iex> node = Node.self() iex> Realtime.Helpers.short_node_id_from_name(node) "nohost" iex> node = :"realtime-prod@fdaa:0:cc:a7b:b385:83c3:cfe3:2" iex> Realtime.Helpers.short_node_id_from_name(node) "83c3cfe3" iex> node = :"pink@127.0.0.1" iex> Realtime.Helpers.short_node_id_from_name(node) "pink@127.0.0.1" iex> node = :"pink@10.0.1.1" iex> Realtime.Helpers.short_node_id_from_name(node) "10.0.1.1" iex> node = :"realtime@host.name.internal" iex> Realtime.Helpers.short_node_id_from_name(node) "host.name.internal" """ @spec short_node_id_from_name(atom()) :: String.t() def short_node_id_from_name(name) when is_atom(name) do [_, host] = name |> Atom.to_string() |> String.split("@", parts: 2) case String.split(host, ":", parts: 8) do [_, _, _, _, _, one, two, _] -> one <> two ["127.0.0.1"] -> Atom.to_string(name) _other -> host end end @spec all_node_regions() :: [String.t()] @doc "List all the regions where nodes can be launched" def all_node_regions(), do: :syn.group_names(RegionNodes) defp uptime_ms do start_time = :erlang.system_info(:start_time) now = :erlang.monotonic_time() :erlang.convert_time_unit(now - start_time, :native, :millisecond) end end ================================================ FILE: lib/realtime/operations.ex ================================================ defmodule Realtime.Operations do @moduledoc """ Support operations for Realtime. """ alias Realtime.Rpc @doc """ Ensures connected users are connected to the closest region by killing and restart the connection process. """ def rebalance do Enum.reduce(:syn.group_names(:users), 0, fn tenant, acc -> scope = Realtime.Syn.PostgresCdc.scope(tenant) case :syn.lookup(scope, tenant) do {pid, %{region: region}} -> platform_region = Realtime.Nodes.platform_region_translator(region) current_node = node(pid) case Realtime.Nodes.launch_node(platform_region, false, tenant) do ^current_node -> acc _ -> stop_user_tenant_process(tenant, platform_region, acc) end _ -> acc end end) end @doc """ Kills all connections to a tenant database in all connected nodes """ @spec kill_connections_to_tenant_id_in_all_nodes(String.t(), atom()) :: list() def kill_connections_to_tenant_id_in_all_nodes(tenant_id, reason \\ :normal) do [node() | Node.list()] |> Task.async_stream( fn node -> Rpc.enhanced_call(node, __MODULE__, :kill_connections_to_tenant_id, [tenant_id, reason]) end, timeout: 5000 ) |> Enum.map(& &1) end @doc """ Kills all connections to a tenant database in the current node """ @spec kill_connections_to_tenant_id(String.t(), atom()) :: :ok def kill_connections_to_tenant_id(tenant_id, reason) do Logger.metadata(external_id: tenant_id, project: tenant_id) pids_to_kill = for pid <- Process.list(), info = Process.info(pid), dict = Keyword.get(info, :dictionary, []), match?({DBConnection.Connection, :init, 1}, dict[:"$initial_call"]), Keyword.get(dict, :"$logger_metadata$")[:external_id] == tenant_id, links = Keyword.get(info, :links) do links |> Enum.filter(fn pid -> is_pid(pid) && pid |> Process.info() |> Keyword.get(:dictionary, []) |> Keyword.get(:"$initial_call") == {:supervisor, DBConnection.ConnectionPool.Pool, 1} end) end Enum.each(pids_to_kill, &Process.exit(&1, reason)) end @doc """ Kills all Ecto.Migration.Runner processes that are linked only to Ecto.MigratorSupervisor """ @spec dirty_terminate_runners :: list() def dirty_terminate_runners do Ecto.MigratorSupervisor |> DynamicSupervisor.which_children() |> Enum.reduce([], fn {_, pid, :worker, [Ecto.Migration.Runner]}, acc -> if length(Process.info(pid)[:links]) < 2 do [{pid, Agent.stop(pid, :normal, 5_000)} | acc] else acc end _, acc -> acc end) end defp stop_user_tenant_process(tenant, platform_region, acc) do Extensions.PostgresCdcRls.handle_stop(tenant, 5_000) # credo:disable-for-next-line IO.inspect({"Stopped", tenant, platform_region}) Process.sleep(1_500) acc + 1 catch kind, reason -> # credo:disable-for-next-line IO.inspect({"Failed to stop", tenant, kind, reason}) end end ================================================ FILE: lib/realtime/postgres_cdc.ex ================================================ defmodule Realtime.PostgresCdc do @moduledoc false require Logger alias Realtime.Api.Tenant @timeout 10_000 @extensions Application.compile_env(:realtime, :extensions) defmodule Exception do defexception message: "PostgresCdc error!" end def connect(module, opts) do apply(module, :handle_connect, [opts]) end def after_connect(module, connect_response, extension, params, tenant) do apply(module, :handle_after_connect, [connect_response, extension, params, tenant]) end def subscribe(module, pg_change_params, tenant, metadata) do RealtimeWeb.Endpoint.subscribe("postgres_cdc_rls:" <> tenant) apply(module, :handle_subscribe, [pg_change_params, tenant, metadata]) end @spec stop(module, Tenant.t(), pos_integer) :: :ok def stop(module, tenant, timeout \\ @timeout) do apply(module, :handle_stop, [tenant.external_id, timeout]) end @doc """ Stops all available drivers within a specified timeout. Expects all handle_stop calls to return `:ok` within the `stop_timeout`. We want all available drivers to stop within the `timeout`. """ @spec stop_all(Tenant.t(), pos_integer) :: :ok | :error def stop_all(tenant, timeout \\ @timeout) do count = Enum.count(available_drivers()) stop_timeout = Kernel.ceil(timeout / count) stops = Enum.map(available_drivers(), fn module -> stop(module, tenant, stop_timeout) end) case Enum.all?(stops, &(&1 == :ok)) do true -> :ok false -> :error end end @spec available_drivers :: list def available_drivers do @extensions |> Enum.filter(fn {_, e} -> e.type == :postgres_cdc end) |> Enum.map(fn {_, e} -> e.driver end) end @spec filter_settings(binary(), list()) :: map() def filter_settings(key, extensions) do [cdc] = Enum.filter(extensions, fn e -> e.type == key end) cdc.settings end @doc """ Gets the extension module for a tenant. """ @spec driver(String.t()) :: {:ok, module()} | {:error, String.t()} def driver(tenant_key) do @extensions |> Enum.filter(fn {_, %{key: key}} -> tenant_key == key end) |> case do [{_, %{driver: driver}}] -> {:ok, driver} _ -> {:error, "No driver found for key #{tenant_key}"} end end @callback handle_connect(any()) :: {:ok, any()} | nil @callback handle_after_connect(any(), any(), any(), tenant_id :: String.t()) :: {:ok, any()} | {:error, any()} | {:error, any(), any()} @callback handle_subscribe(any(), any(), any()) :: :ok @callback handle_stop(any(), any()) :: any() end ================================================ FILE: lib/realtime/rate_counter/dynamic_supervisor.ex ================================================ defmodule Realtime.RateCounter.DynamicSupervisor do @moduledoc """ Dynamic Supervisor to spin up `RateCounter`s as needed. """ use DynamicSupervisor @spec start_link(list()) :: {:error, any} | {:ok, pid} def start_link(args) do DynamicSupervisor.start_link(__MODULE__, args, name: __MODULE__) end @impl true def init(_args) do DynamicSupervisor.init(strategy: :one_for_one) end end ================================================ FILE: lib/realtime/rate_counter/rate_counter.ex ================================================ defmodule Realtime.RateCounter do @moduledoc """ Start a RateCounter for any Erlang term. These rate counters use the GenCounter module. Start your RateCounter here and increment it with a `GenCounter.add/1` call, for example. """ use GenServer require Logger alias Realtime.GenCounter alias Realtime.RateCounter alias Realtime.Telemetry defmodule Args do @moduledoc false @type t :: %__MODULE__{id: term(), opts: keyword} defstruct id: nil, opts: [] end @idle_shutdown :timer.minutes(10) @tick :timer.seconds(1) @max_bucket_len 60 @cache __MODULE__ @app_name Mix.Project.config()[:app] defstruct id: nil, avg: 0.0, sum: 0, bucket: [], max_bucket_len: @max_bucket_len, tick: @tick, tick_ref: nil, idle_shutdown: @idle_shutdown, idle_shutdown_ref: nil, limit: %{log: false}, telemetry: %{emit: false} @type t :: %__MODULE__{ id: term(), avg: float(), sum: non_neg_integer(), bucket: list(), max_bucket_len: integer(), tick: integer(), tick_ref: reference() | nil, idle_shutdown: integer() | :infinity, idle_shutdown_ref: reference() | nil, limit: %{log: false} | %{ log: true, value: integer(), measurement: :sum | :avg, triggered: boolean(), log_fn: (-> term()) }, telemetry: %{emit: false} | %{ emit: true, event_name: :telemetry.event_name(), measurements: :telemetry.event_measurements(), metadata: :telemetry.event_metadata() } } @spec start_link([keyword()]) :: {:ok, pid()} | {:error, {:already_started, pid()}} def start_link(args) do id = Keyword.get(args, :id) if !id, do: raise("Supply an identifier to start a counter!") GenServer.start_link(__MODULE__, args, name: {:via, Registry, {Realtime.Registry.Unique, {__MODULE__, :rate_counter, id}}} ) end @doc """ Starts a new RateCounter under a DynamicSupervisor """ @spec new(Args.t(), keyword) :: DynamicSupervisor.on_start_child() def new(%Args{id: id} = args, opts \\ []) do opts = [id: id] ++ Keyword.merge(args.opts, opts) DynamicSupervisor.start_child(RateCounter.DynamicSupervisor, %{ id: id, start: {__MODULE__, :start_link, [opts]}, restart: :transient }) end @doc "Publish an update to the RateCounter with the given id" @spec publish_update(term()) :: :ok def publish_update(id), do: Phoenix.PubSub.broadcast(Realtime.PubSub, update_topic(id), :update) @doc """ Gets the state of the RateCounter. Automatically starts the RateCounter if it does not exist or if it has stopped due to idleness. """ @spec get(term() | Args.t()) :: {:ok, t} | {:error, term()} def get(%Args{id: id} = args) do case do_get(id) do {:ok, state} -> {:ok, state} {:error, :not_found} -> case new(args) do {:ok, _} -> do_get(id) {:error, {:already_started, _}} -> do_get(id) {:error, reason} -> {:error, reason} end end end defp do_get(id) do case Cachex.get(@cache, id) do {:ok, nil} -> {:error, :not_found} {:ok, state} -> {:ok, state} end end defp update_topic(id), do: "rate_counter:#{inspect(id)}" @impl true def init(args) do id = Keyword.fetch!(args, :id) telem_opts = Keyword.get(args, :telemetry) every = Keyword.get(args, :tick, @tick) max_bucket_len = Keyword.get(args, :max_bucket_len, @max_bucket_len) idle_shutdown_ms = Keyword.get(args, :idle_shutdown, @idle_shutdown) limit_opts = Keyword.get(args, :limit) Logger.info("Starting #{__MODULE__} for: #{inspect(id)}") # Always reset the counter in case the counter had already accumulated without # a RateCounter running to calculate avg and buckets GenCounter.reset(id) :ok = Phoenix.PubSub.subscribe(Realtime.PubSub, update_topic(id)) telemetry = if telem_opts do Logger.metadata(telem_opts.metadata) %{ emit: true, event_name: [@app_name] ++ [:rate_counter] ++ telem_opts.event_name, measurements: Map.merge(%{sum: 0}, telem_opts.measurements), metadata: Map.merge(%{id: id}, telem_opts.metadata) } else %{emit: false} end limit = if limit_opts do %{ log: true, value: Keyword.fetch!(limit_opts, :value), measurement: Keyword.fetch!(limit_opts, :measurement), log_fn: Keyword.fetch!(limit_opts, :log_fn), triggered: false } else %{log: false} end ticker = tick(0) idle_shutdown_ref = if idle_shutdown_ms != :infinity, do: shutdown_after(idle_shutdown_ms), else: nil state = %__MODULE__{ id: id, tick: every, tick_ref: ticker, max_bucket_len: max_bucket_len, idle_shutdown: idle_shutdown_ms, idle_shutdown_ref: idle_shutdown_ref, telemetry: telemetry, limit: limit } Cachex.put!(@cache, id, state) {:ok, state} end @impl true def handle_info(:tick, state) do Process.cancel_timer(state.tick_ref) count = GenCounter.reset(state.id) if state.telemetry.emit and count > 0, do: Telemetry.execute( state.telemetry.event_name, %{state.telemetry.measurements | sum: count}, state.telemetry.metadata ) bucket = [count | state.bucket] |> Enum.take(state.max_bucket_len) bucket_len = Enum.count(bucket) sum = Enum.sum(bucket) avg = sum / bucket_len state = %{state | bucket: bucket, sum: sum, avg: avg} state = maybe_trigger_limit(state) tick(state.tick) Cachex.put!(@cache, state.id, state) {:noreply, state} end def handle_info(:idle_shutdown, state) do if Enum.all?(state.bucket, &(&1 == 0)) do # All the buckets are empty, so we can assume this RateCounter has not been useful recently Logger.warning("#{__MODULE__} idle_shutdown reached for: #{inspect(state.id)}") shutdown(state) else Process.cancel_timer(state.idle_shutdown_ref) idle_shutdown_ref = shutdown_after(state.idle_shutdown) {:noreply, %{state | idle_shutdown_ref: idle_shutdown_ref}} end end def handle_info(:update, state) do # When we get an update message we shutdown so that this RateCounter # can be restarted with new parameters shutdown(state) end def handle_info(_, state), do: {:noreply, state} defp shutdown(state) do GenCounter.delete(state.id) # We are expiring in the near future instead of deleting so that # The process dies before the cache information disappears # If we were using Cachex.delete instead then the following rare scenario would be possible: # * RateCounter.get/2 is called; # * Cache was deleted but the process has not stopped yet; # * RateCounter.get/2 will then try to start a new RateCounter but the supervisor will return :already_started; # * Process finally stops; # * The cache is still empty because no new process was started causing an error Cachex.expire(@cache, state.id, :timer.seconds(1)) {:stop, :normal, state} end defp maybe_trigger_limit(%{limit: %{log: false}} = state), do: state defp maybe_trigger_limit(%{limit: %{triggered: true, measurement: measurement}} = state) do # Limit has been triggered, but we need to check if it is still above the limit if Map.fetch!(state, measurement) < state.limit.value do %{state | limit: %{state.limit | triggered: false}} else # Limit is still above the threshold, so we keep the state as is state end end defp maybe_trigger_limit(%{limit: %{measurement: measurement}} = state) do if Map.fetch!(state, measurement) >= state.limit.value do state.limit.log_fn.() %{state | limit: %{state.limit | triggered: true}} else state end end defp tick(every) do Process.send_after(self(), :tick, every) end defp shutdown_after(ms) do Process.send_after(self(), :idle_shutdown, ms) end end ================================================ FILE: lib/realtime/release.ex ================================================ defmodule Realtime.Release do @moduledoc """ Used for executing DB release tasks when run in production without Mix installed. """ @app :realtime def migrate do load_app() for repo <- repos() do {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :up, all: true)) end end def rollback(repo, version) do load_app() {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version)) end def seeds(repo) do load_app() {:ok, _} = Application.ensure_all_started(:realtime) {:ok, {:ok, _}, _} = Ecto.Migrator.with_repo(repo, fn _repo -> seeds_file = "#{:code.priv_dir(@app)}/repo/seeds.exs" if File.regular?(seeds_file) do {:ok, Code.eval_file(seeds_file)} else {:error, "Seeds file not found."} end end) end defp repos do Application.fetch_env!(@app, :ecto_repos) end defp load_app do Application.load(@app) end end ================================================ FILE: lib/realtime/repo.ex ================================================ defmodule Realtime.Repo do use Ecto.Repo, otp_app: :realtime, adapter: Ecto.Adapters.Postgres def with_dynamic_repo(config, callback) do default_dynamic_repo = get_dynamic_repo() {:ok, repo} = [name: nil, pool_size: 2] |> Keyword.merge(config) |> Realtime.Repo.start_link() try do put_dynamic_repo(repo) callback.(repo) after put_dynamic_repo(default_dynamic_repo) Supervisor.stop(repo) end end end ================================================ FILE: lib/realtime/repo_replica.ex ================================================ defmodule Realtime.Repo.Replica do @moduledoc """ Generates a read-only replica repo for the region specified in config/runtime.exs. """ require Logger use Ecto.Repo, otp_app: :realtime, adapter: Ecto.Adapters.Postgres, read_only: true @replicas_fly %{ "sea" => Realtime.Repo.Replica.SJC, "sjc" => Realtime.Repo.Replica.SJC, "gru" => Realtime.Repo.Replica.IAD, "iad" => Realtime.Repo.Replica.IAD, "sin" => Realtime.Repo.Replica.SIN, "maa" => Realtime.Repo.Replica.SIN, "syd" => Realtime.Repo.Replica.SIN, "lhr" => Realtime.Repo.Replica.FRA, "fra" => Realtime.Repo.Replica.FRA } @replicas_aws %{ "ap-southeast-1" => Realtime.Repo.Replica.Singapore, "ap-southeast-2" => Realtime.Repo.Replica.Singapore, "eu-west-2" => Realtime.Repo.Replica.London, "us-east-1" => Realtime.Repo.Replica.NorthVirginia, "us-west-2" => Realtime.Repo.Replica.Oregon, "us-west-1" => Realtime.Repo.Replica.SanJose } @ast (quote do use Ecto.Repo, otp_app: :realtime, adapter: Ecto.Adapters.Postgres, read_only: true end) @doc """ Returns the replica repo module for the region specified in config/runtime.exs. """ @spec replica() :: module() def replica do region = Application.get_env(:realtime, :region) master_region = Application.get_env(:realtime, :master_region) || region replica = configured_replica_module(region) replica_conf = Application.get_env(:realtime, replica) # Do not create module if replica isn't set or configuration is not present cond do is_nil(replica) -> Realtime.Repo is_nil(replica_conf) -> Realtime.Repo region == master_region -> Realtime.Repo true -> # Check if module is present case Code.ensure_compiled(replica) do {:module, _} -> nil _ -> {:module, _, _, _} = Module.create(replica, @ast, Macro.Env.location(__ENV__)) end replica end end defp configured_replica_module(region) do main_replica_config = Application.get_env(:realtime, __MODULE__) # If the main replica module is configured we don't bother with specific replica modules if main_replica_config do __MODULE__ else replicas = case Application.get_env(:realtime, :platform) do :aws -> @replicas_aws :fly -> @replicas_fly _ -> %{} end Map.get(replicas, region) end end if Mix.env() == :test do def replicas_aws, do: @replicas_aws def replicas_fly, do: @replicas_fly end end ================================================ FILE: lib/realtime/rpc.ex ================================================ defmodule Realtime.Rpc do @moduledoc """ RPC module for Realtime with the intent of standardizing the RPC interface and collect telemetry """ use Realtime.Logs alias Realtime.Telemetry @doc """ Calls external node using :rpc.call/5 and collects telemetry """ @spec call(atom(), atom(), atom(), any(), keyword()) :: any() def call(node, mod, func, args, opts \\ []) do timeout = Keyword.get(opts, :timeout, Application.get_env(:realtime, :rpc_timeout)) {latency, response} = :timer.tc(fn -> :rpc.call(node, mod, func, args, timeout) end) Telemetry.execute( [:realtime, :rpc], %{latency: latency}, %{mod: mod, func: func, target_node: node, origin_node: node(), mechanism: :rpc, success: nil} ) response end @doc """ Calls external node using :erpc.call/5 and collects telemetry """ @spec enhanced_call(atom(), atom(), atom(), any(), keyword()) :: {:ok, any()} | {:error, :rpc_error, term()} | {:error, term()} def enhanced_call(node, mod, func, args \\ [], opts \\ []) do timeout = Keyword.get(opts, :timeout, Application.get_env(:realtime, :rpc_timeout)) tenant_id = Keyword.get(opts, :tenant_id) try do with {latency, response} <- :timer.tc(fn -> :erpc.call(node, mod, func, args, timeout) end) do case response do {:ok, _} -> Telemetry.execute( [:realtime, :rpc], %{latency: latency}, %{ mod: mod, func: func, target_node: node, origin_node: node(), success: true, mechanism: :erpc } ) response error -> Telemetry.execute( [:realtime, :rpc], %{latency: latency}, %{ mod: mod, func: func, target_node: node, origin_node: node(), success: false, mechanism: :erpc } ) error end end catch _, reason -> reason = case reason do {_, reason} -> reason {_, reason, _} -> reason end Telemetry.execute( [:realtime, :rpc], %{latency: 0}, %{ mod: mod, func: func, target_node: node, origin_node: node(), success: false, mechanism: :erpc } ) log_error( "ErrorOnRpcCall", %{target: node, mod: mod, func: func, error: reason}, project: tenant_id, external_id: tenant_id ) {:error, :rpc_error, reason} end end end ================================================ FILE: lib/realtime/signal_handler.ex ================================================ defmodule Realtime.SignalHandler do @moduledoc false @behaviour :gen_event require Logger @spec shutdown_in_progress? :: :ok | {:error, :shutdown_in_progress} def shutdown_in_progress? do case !!Application.get_env(:realtime, :shutdown_in_progress) do true -> {:error, :shutdown_in_progress} false -> :ok end end @impl true def init({%{handler_mod: _} = args, :ok}) do {:ok, args} end @impl true def handle_event(signal, %{handler_mod: handler_mod} = state) do Logger.error("#{__MODULE__}: #{inspect(signal)} received") if signal == :sigterm do Application.put_env(:realtime, :shutdown_in_progress, true) end handler_mod.handle_event(signal, state) end @impl true defdelegate handle_info(info, state), to: :erl_signal_handler @impl true defdelegate handle_call(request, state), to: :erl_signal_handler end ================================================ FILE: lib/realtime/syn/postgres_cdc.ex ================================================ defmodule Realtime.Syn.PostgresCdc do @moduledoc """ Scope for the PostgresCdc module. """ @doc """ Returns the scope for a given tenant id. """ @spec scope(String.t()) :: atom() def scope(tenant_id) do shards = Application.fetch_env!(:realtime, :postgres_cdc_scope_shards) shard = :erlang.phash2(tenant_id, shards) :"realtime_postgres_cdc_#{shard}" end def scopes() do shards = Application.fetch_env!(:realtime, :postgres_cdc_scope_shards) Enum.map(0..(shards - 1), fn shard -> :"realtime_postgres_cdc_#{shard}" end) end def syn_topic_prefix(), do: "realtime_postgres_cdc_" def syn_topic(tenant_id), do: "#{syn_topic_prefix()}#{tenant_id}" end ================================================ FILE: lib/realtime/syn_handler.ex ================================================ defmodule Realtime.SynHandler do @moduledoc """ Custom defined Syn's callbacks """ require Logger alias Realtime.Syn.PostgresCdc alias Realtime.Tenants.Connect alias RealtimeWeb.Endpoint @behaviour :syn_event_handler @postgres_cdc_scope_prefix PostgresCdc.syn_topic_prefix() @impl true def on_registry_process_updated(Connect, tenant_id, pid, %{conn: conn}, :normal) when is_pid(conn) do # Update that a database connection is ready Endpoint.local_broadcast(Connect.syn_topic(tenant_id), "ready", %{pid: pid, conn: conn}) end def on_registry_process_updated(scope, tenant_id, _pid, meta, _reason) do scope = Atom.to_string(scope) case scope do @postgres_cdc_scope_prefix <> _ -> Endpoint.local_broadcast(PostgresCdc.syn_topic(tenant_id), "ready", meta) _ -> :ok end end @impl true def on_process_registered(scope, name, _pid, _meta, _reason) do :telemetry.execute([:syn, scope, :registered], %{}, %{name: name}) end @doc """ When processes registered with :syn are unregistered, either manually or by stopping, this callback is invoked. Other processes can subscribe to these events via PubSub to respond to them. We want to log conflict resolutions to know when more than one process on the cluster was started, and subsequently stopped because :syn handled the conflict. """ @impl true def on_process_unregistered(scope, name, pid, _meta, reason) do :telemetry.execute([:syn, scope, :unregistered], %{}, %{name: name}) case Atom.to_string(scope) do @postgres_cdc_scope_prefix <> _ = scope -> Endpoint.local_broadcast(PostgresCdc.syn_topic(name), scope <> "_down", %{pid: pid, reason: reason}) _ -> topic = topic(scope) Endpoint.local_broadcast(topic <> ":" <> name, topic <> "_down", %{pid: pid, reason: reason}) end if reason == :syn_conflict_resolution, do: log("#{scope} terminated due to syn conflict resolution: #{inspect(name)} #{inspect(pid)}") :ok end @doc """ We try to keep the oldest process. If the time they were registered is exactly the same we use their node names to decide. The most important part is that both nodes must 100% of the time agree on the decision We first send an exit with reason {:shutdown, :syn_conflict_resolution} If it times out an exit with reason :kill that can't be trapped """ @impl true def resolve_registry_conflict(mod, name, {pid1, _meta1, _time1}, {pid2, _meta2, _time2}) do {pid_to_keep, pid_to_stop} = decide(pid1, pid2, name) # Is this function running on the node that should stop? if node(pid_to_stop) == node() do log( "Resolving conflict on scope #{inspect(mod)} for name #{inspect(name)} {#{node(pid1)}, #{inspect(pid1)}} vs {#{node(pid2)}, #{inspect(pid2)}}, stop local process: #{inspect(pid_to_stop)}" ) stop(pid_to_stop) else log( "Resolving conflict on scope #{inspect(mod)} for name #{inspect(name)} {#{node(pid1)}, #{inspect(pid1)}} vs {#{node(pid2)}, #{inspect(pid2)}}, remote process will be stopped: #{inspect(pid_to_stop)}" ) end pid_to_keep end defp stop(pid_to_stop) do spawn(fn -> Process.monitor(pid_to_stop) Process.exit(pid_to_stop, {:shutdown, :syn_conflict_resolution}) receive do {:DOWN, _ref, :process, ^pid_to_stop, reason} -> log("Successfully stopped #{inspect(pid_to_stop)}. Reason: #{inspect(reason)}") after 5000 -> log("Timed out while waiting for process #{inspect(pid_to_stop)} to stop. Sending kill exit signal") Process.exit(pid_to_stop, :kill) end end) end defp log(message), do: Logger.warning("SynHandler(#{node()}): #{message}") # We use node and the name to decide who lives and who dies # This way both nodes will always agree on the same outcome # regardless of timing issues defp decide(pid1, pid2, name) do # We hash the name to not always pick one specific node when a conflict happens # between these 2 nodes hash = :erlang.phash2(name, 2) if hash == 1 do if node(pid1) < node(pid2) do {pid1, pid2} else {pid2, pid1} end else if node(pid1) < node(pid2) do {pid2, pid1} else {pid1, pid2} end end end defp topic(mod) do mod |> Macro.underscore() |> String.split("/") |> Enum.take(-1) |> hd() end end ================================================ FILE: lib/realtime/telemetry/logger.ex ================================================ defmodule Realtime.Telemetry.Logger do @moduledoc """ We can log less frequent Telemetry events to get data into BigQuery. """ require Logger use GenServer @events [ [:realtime, :connections], [:realtime, :rate_counter, :channel, :events], [:realtime, :rate_counter, :channel, :joins], [:realtime, :rate_counter, :channel, :db_events], [:realtime, :rate_counter, :channel, :presence_events] ] def start_link(args) do GenServer.start_link(__MODULE__, args) end def init(handler_id: handler_id) do :telemetry.attach_many(handler_id, @events, &__MODULE__.handle_event/4, []) {:ok, []} end @doc """ Logs billing metrics for a tenant aggregated and emitted by a PromEx metric poller. """ def handle_event(event, measurements, %{tenant: tenant}, _config) do meta = %{project: tenant, measurements: measurements} Logger.info(["Billing metrics: ", inspect(event)], meta) :ok end def handle_event(_event, _measurements, _metadata, _config) do :ok end def handle_info(_msg, state) do {:noreply, state} end end ================================================ FILE: lib/realtime/telemetry/telemetry.ex ================================================ defmodule Realtime.Telemetry do @moduledoc """ Telemetry wrapper """ @doc """ Dispatches Telemetry events. """ @spec execute([atom, ...], map, map) :: :ok def execute(event, measurements, metadata \\ %{}) do :telemetry.execute(event, measurements, metadata) end end ================================================ FILE: lib/realtime/tenants/authorization/policies/broadcast_policies.ex ================================================ defmodule Realtime.Tenants.Authorization.Policies.BroadcastPolicies do @moduledoc """ BroadcastPolicies structure that holds the required authorization information for a given connection within the scope of a sending / receiving broadcasts messages """ require Logger defstruct read: nil, write: nil @type t :: %__MODULE__{ read: boolean() | nil, write: boolean() | nil } end ================================================ FILE: lib/realtime/tenants/authorization/policies/presence_policies.ex ================================================ defmodule Realtime.Tenants.Authorization.Policies.PresencePolicies do @moduledoc """ PresencePolicies structure that holds the required authorization information for a given connection within the scope of a tracking / receiving presence messages """ require Logger defstruct read: nil, write: nil @type t :: %__MODULE__{ read: boolean() | nil, write: boolean() | nil } end ================================================ FILE: lib/realtime/tenants/authorization/policies.ex ================================================ defmodule Realtime.Tenants.Authorization.Policies do @moduledoc """ Policies structure that holds the required authorization information for a given connection. Currently there are two types of policies: * Realtime.Tenants.Authorization.Policies.BroadcastPolicies - Used to store the access to Broadcast feature on a given Topic * Realtime.Tenants.Authorization.Policies.PresencePolicies - Used to store the access to Presence feature on a given Topic """ alias Realtime.Tenants.Authorization.Policies.BroadcastPolicies alias Realtime.Tenants.Authorization.Policies.PresencePolicies defstruct broadcast: %BroadcastPolicies{}, presence: %PresencePolicies{} @type t :: %__MODULE__{ broadcast: BroadcastPolicies.t(), presence: PresencePolicies.t() } @doc """ Updates the Policies struct sub key with the given value. """ @spec update_policies(t(), atom, atom, boolean) :: t() def update_policies(policies, key, sub_key, value) do Map.update!(policies, key, fn map -> Map.put(map, sub_key, value) end) end end ================================================ FILE: lib/realtime/tenants/authorization.ex ================================================ defmodule Realtime.Tenants.Authorization do @moduledoc """ Runs validations based on RLS policies to return policies and creates a Realtime.Tenants.Policies struct with the accumulated results of the policies for a given user and a given channel context Each extension will have its own set of ways to check Policies against the Authorization context but we will create some setup data to be used by the policies. Check more information at Realtime.Tenants.Authorization.Policies """ import Ecto.Query alias DBConnection.ConnectionError alias Realtime.Api.Message alias Realtime.Api.Tenant alias Realtime.Database alias Realtime.GenCounter alias Realtime.GenRpc alias Realtime.Tenants.Repo alias Realtime.Tenants.Authorization.Policies defstruct [:tenant_id, :topic, :headers, :jwt, :claims, :role, :sub] @type t :: %__MODULE__{ :tenant_id => binary | nil, :topic => binary | nil, :claims => map, :headers => list({binary, binary}), :role => binary, :sub => binary | nil } @doc """ Builds a new authorization struct which will be used to retain the information required to check Policies. Requires a map with the following keys: * tenant_id: The tenant id * topic: The name of the channel being accessed taken from the request * headers: Request headers when the connection was made or WS was upgraded * claims: JWT claims * role: JWT role claim * sub: JWT sub claim """ @spec build_authorization_params(map()) :: t() def build_authorization_params(map) do %__MODULE__{ tenant_id: Map.get(map, :tenant_id), topic: Map.get(map, :topic), headers: Map.get(map, :headers), claims: Map.get(map, :claims), role: Map.get(map, :role), sub: Map.get(map, :sub) } end @doc """ Runs validations based on RLS policies to return policies for read policies Automatically uses RPC if the database connection is not in the same node """ @spec get_read_authorizations(Policies.t(), pid(), t(), keyword()) :: {:ok, Policies.t()} | {:error, any()} | {:error, :rls_policy_error, any()} def get_read_authorizations(policies, db_conn, authorization_context, opts \\ []) def get_read_authorizations(policies, db_conn, authorization_context, opts) when node() == node(db_conn) do rate_counter = rate_counter(authorization_context.tenant_id) if rate_counter.limit.triggered == false do db_conn |> get_read_policies_for_connection(authorization_context, policies, opts) |> handle_policies_result(rate_counter) else {:error, :increase_connection_pool} end end # Remote call def get_read_authorizations(policies, db_conn, authorization_context, opts) do rate_counter = rate_counter(authorization_context.tenant_id) if rate_counter.limit.triggered == false do case GenRpc.call( node(db_conn), __MODULE__, :get_read_authorizations, [policies, db_conn, authorization_context, opts], tenant_id: authorization_context.tenant_id, key: authorization_context.tenant_id ) do {:error, :increase_connection_pool} = error -> GenCounter.add(rate_counter.id) error {:error, :rpc_error, reason} -> {:error, reason} response -> response end else {:error, :increase_connection_pool} end end @doc """ Runs validations based on RLS policies to return policies for write policies Automatically uses RPC if the database connection is not in the same node """ @spec get_write_authorizations(Policies.t(), pid(), __MODULE__.t(), keyword()) :: {:ok, Policies.t()} | {:error, any()} | {:error, :rls_policy_error, any()} def get_write_authorizations(policies, db_conn, authorization_context, opts \\ []) def get_write_authorizations(policies, db_conn, authorization_context, opts) when node() == node(db_conn) do rate_counter = rate_counter(authorization_context.tenant_id) if rate_counter.limit.triggered == false do db_conn |> get_write_policies_for_connection(authorization_context, policies, opts) |> handle_policies_result(rate_counter) else {:error, :increase_connection_pool} end end # Remote call def get_write_authorizations(policies, db_conn, authorization_context, opts) do rate_counter = rate_counter(authorization_context.tenant_id) if rate_counter.limit.triggered == false do case GenRpc.call( node(db_conn), __MODULE__, :get_write_authorizations, [policies, db_conn, authorization_context, opts], tenant_id: authorization_context.tenant_id, key: authorization_context.tenant_id ) do {:error, :increase_connection_pool} = error -> GenCounter.add(rate_counter.id) error {:error, :rpc_error, reason} -> {:error, reason} response -> response end else {:error, :increase_connection_pool} end end def get_write_authorizations(db_conn, authorization_context) do get_write_authorizations(%Policies{}, db_conn, authorization_context) end defp handle_policies_result(result, rate_counter) do case result do {:ok, %Policies{} = policies} -> {:ok, policies} {:ok, {:error, %Postgrex.Error{} = error}} -> {:error, :rls_policy_error, error} {:error, %ConnectionError{reason: :queue_timeout}} -> GenCounter.add(rate_counter.id) {:error, :increase_connection_pool} {:error, {:exit, _}} -> GenCounter.add(rate_counter.id) {:error, :increase_connection_pool} {:error, error} -> {:error, error} end end @doc """ Sets the current connection configuration with the following config values: * role: The role of the user * realtime.topic: The name of the channel being accessed * request.jwt.claim.role: The role of the user * request.jwt.claim.sub: The sub claim of the JWT token * request.jwt.claims: The claims of the JWT token * request.headers: The headers of the request """ @spec set_conn_config(DBConnection.t(), t()) :: Postgrex.Result.t() def set_conn_config(conn, authorization_context) do %__MODULE__{ topic: topic, headers: headers, claims: claims, role: role, sub: sub } = authorization_context claims = Jason.encode!(claims) headers = headers |> Map.new() |> Jason.encode!() Postgrex.query!( conn, """ SELECT set_config('role', $1, true), set_config('realtime.topic', $2, true), set_config('request.jwt.claims', $3, true), set_config('request.jwt.claim.sub', $4, true), set_config('request.jwt.claim.role', $5, true), set_config('request.headers', $6, true) """, [role, topic, claims, sub, role, headers] ) end defp get_read_policies_for_connection(conn, authorization_context, policies, caller_opts) do tenant_id = authorization_context.tenant_id opts = [telemetry: [:realtime, :tenants, :read_authorization_check], tenant_id: tenant_id] metadata = [project: tenant_id, external_id: tenant_id, tenant_id: tenant_id] extensions = extensions_to_check(caller_opts) Database.transaction( conn, fn transaction_conn -> changesets = Enum.map(extensions, fn ext -> Message.changeset(%Message{}, %{topic: authorization_context.topic, extension: ext}) end) {:ok, messages} = Repo.insert_all_entries(transaction_conn, changesets, Message) messages_by_extension = Map.new(messages, &{&1.extension, &1.id}) set_conn_config(transaction_conn, authorization_context) policies = check_read_policies(transaction_conn, authorization_context, messages_by_extension, policies) Postgrex.query!(transaction_conn, "ROLLBACK AND CHAIN", []) policies end, opts, metadata ) end defp get_write_policies_for_connection(conn, authorization_context, policies, caller_opts) do tenant_id = authorization_context.tenant_id opts = [telemetry: [:realtime, :tenants, :write_authorization_check], tenant_id: tenant_id] metadata = [project: tenant_id, external_id: tenant_id] extensions = extensions_to_check(caller_opts) Database.transaction( conn, fn transaction_conn -> set_conn_config(transaction_conn, authorization_context) policies = check_write_policies(transaction_conn, authorization_context, extensions, policies) Postgrex.query!(transaction_conn, "ROLLBACK AND CHAIN", []) policies end, opts, metadata ) end @all_extensions [:broadcast, :presence] defp extensions_to_check(opts) do if Keyword.get(opts, :presence_enabled?, true), do: @all_extensions, else: [:broadcast] end defp check_read_policies(conn, authorization_context, messages_by_extension, policies) do ids = Map.values(messages_by_extension) query = from(m in Message, where: m.topic == ^authorization_context.topic and m.id in ^ids) with {:ok, res} <- Repo.all(conn, query, Message) do returned_ids = MapSet.new(res, & &1.id) Enum.reduce(@all_extensions, policies, fn extension, acc -> can? = Map.has_key?(messages_by_extension, extension) and MapSet.member?(returned_ids, messages_by_extension[extension]) Policies.update_policies(acc, extension, :read, can?) end) end end defp check_write_policies(conn, authorization_context, extensions, policies) do Enum.reduce(@all_extensions, policies, fn extension, acc -> if extension in extensions do changeset = Message.changeset(%Message{}, %{topic: authorization_context.topic, extension: extension}) case Repo.insert(conn, changeset, Message, mode: :savepoint) do {:ok, _} -> Policies.update_policies(acc, extension, :write, true) {:error, %Postgrex.Error{postgres: %{code: :insufficient_privilege}}} -> Policies.update_policies(acc, extension, :write, false) e -> e end else Policies.update_policies(acc, extension, :write, false) end end) end defp rate_counter(tenant_id) do %Tenant{} = tenant = Realtime.Tenants.Cache.get_tenant_by_external_id(tenant_id) rate_counter = Realtime.Tenants.authorization_errors_per_second_rate(tenant) {:ok, rate_counter} = Realtime.RateCounter.get(rate_counter) rate_counter end end ================================================ FILE: lib/realtime/tenants/batch_broadcast.ex ================================================ defmodule Realtime.Tenants.BatchBroadcast do @moduledoc """ Virtual schema with a representation of a batched broadcast. """ use Ecto.Schema import Ecto.Changeset alias Realtime.Api.Tenant alias Realtime.GenCounter alias Realtime.RateCounter alias Realtime.Tenants alias Realtime.Tenants.Authorization alias Realtime.Tenants.Authorization.Policies alias Realtime.Tenants.Authorization.Policies.BroadcastPolicies alias Realtime.Tenants.Connect alias RealtimeWeb.RealtimeChannel alias RealtimeWeb.TenantBroadcaster embedded_schema do embeds_many :messages, Message do field :event, :string field :topic, :string field :payload, :map field :private, :boolean, default: false end end @spec broadcast( auth_params :: map() | nil, tenant :: Tenant.t(), messages :: %{ messages: list(%{id: String.t(), topic: String.t(), payload: map(), event: String.t(), private: boolean()}) }, super_user :: boolean() ) :: :ok | {:error, atom() | Ecto.Changeset.t()} def broadcast(auth_params, tenant, messages, super_user \\ false) def broadcast(%Plug.Conn{} = conn, %Tenant{} = tenant, messages, super_user) do auth_params = %{ tenant_id: tenant.external_id, headers: conn.req_headers, claims: conn.assigns.claims, role: conn.assigns.role, sub: conn.assigns.sub } broadcast(auth_params, %Tenant{} = tenant, messages, super_user) end def broadcast(auth_params, %Tenant{} = tenant, messages, super_user) do with %Ecto.Changeset{valid?: true} = changeset <- changeset(%__MODULE__{}, messages, tenant), %Ecto.Changeset{changes: %{messages: messages}} = changeset, events_per_second_rate = Tenants.events_per_second_rate(tenant), :ok <- check_rate_limit(events_per_second_rate, tenant, length(messages)) do events = messages |> Enum.map(fn %{changes: event} -> event end) |> Enum.group_by(fn event -> Map.get(event, :private, false) end) # Handle events for public channel events |> Map.get(false, []) |> Enum.each(fn message -> send_message_and_count(tenant, events_per_second_rate, message, true) end) # Handle events for private channel events |> Map.get(true, []) |> Enum.group_by(fn event -> Map.get(event, :topic) end) |> Enum.each(fn {topic, events} -> if super_user do Enum.each(events, fn message -> send_message_and_count(tenant, events_per_second_rate, message, false) end) else case permissions_for_message(tenant, auth_params, topic) do %Policies{broadcast: %BroadcastPolicies{write: true}} -> Enum.each(events, fn message -> send_message_and_count(tenant, events_per_second_rate, message, false) end) _ -> nil end end end) :ok else %Ecto.Changeset{valid?: false} = changeset -> {:error, changeset} error -> error end end def broadcast(_, nil, _, _), do: {:error, :tenant_not_found} defp changeset(payload, attrs, tenant) do payload |> cast(attrs, []) |> cast_embed(:messages, required: true, with: fn message, attrs -> message_changeset(message, tenant, attrs) end) end defp message_changeset(message, tenant, attrs) do message |> cast(attrs, [:id, :topic, :payload, :event, :private]) |> maybe_put_private_change() |> validate_required([:topic, :payload, :event]) |> validate_payload_size(tenant) end defp maybe_put_private_change(changeset) do case get_change(changeset, :private) do nil -> put_change(changeset, :private, false) _ -> changeset end end defp validate_payload_size(changeset, tenant) do payload = get_change(changeset, :payload) case Tenants.validate_payload_size(tenant, payload) do :ok -> changeset _ -> add_error(changeset, :payload, "Payload size exceeds tenant limit") end end @event_type "broadcast" defp send_message_and_count(tenant, events_per_second_rate, message, public?) do tenant_topic = Tenants.tenant_topic(tenant, message.topic, public?) payload = %{"payload" => message.payload, "event" => message.event, "type" => "broadcast"} payload = if message[:id], do: Map.put(payload, "meta", %{"id" => message.id}), else: payload broadcast = %Phoenix.Socket.Broadcast{topic: message.topic, event: @event_type, payload: payload} GenCounter.add(events_per_second_rate.id) TenantBroadcaster.pubsub_broadcast( tenant.external_id, tenant_topic, broadcast, RealtimeChannel.MessageDispatcher, :broadcast ) end defp permissions_for_message(_, nil, _), do: nil defp permissions_for_message(tenant, auth_params, topic) do with {:ok, db_conn} <- Connect.lookup_or_start_connection(tenant.external_id) do auth_params = auth_params |> Map.put(:topic, topic) |> Authorization.build_authorization_params() case Authorization.get_write_authorizations(db_conn, auth_params) do {:ok, policies} -> policies {:error, :not_found} -> nil error -> error end end end defp check_rate_limit(events_per_second_rate, %Tenant{} = tenant, total_messages_to_broadcast) do %{max_events_per_second: max_events_per_second} = tenant {:ok, %{avg: events_per_second}} = RateCounter.get(events_per_second_rate) cond do events_per_second > max_events_per_second -> {:error, :too_many_requests, "You have exceeded your rate limit"} total_messages_to_broadcast + events_per_second > max_events_per_second -> {:error, :too_many_requests, "Too many messages to broadcast, please reduce the batch size"} true -> :ok end end end ================================================ FILE: lib/realtime/tenants/cache.ex ================================================ defmodule Realtime.Tenants.Cache do @moduledoc """ Cache for Tenants. """ require Cachex.Spec require Logger alias Realtime.GenRpc alias Realtime.Tenants def child_spec(_) do tenant_cache_expiration = Application.get_env(:realtime, :tenant_cache_expiration) %{ id: __MODULE__, start: {Cachex, :start_link, [__MODULE__, [expiration: Cachex.Spec.expiration(default: tenant_cache_expiration)]]} } end def get_tenant_by_external_id(tenant_id) do case Cachex.fetch(__MODULE__, cache_key(tenant_id), fn _key -> case Tenants.get_tenant_by_external_id(tenant_id) do nil -> {:ignore, nil} tenant -> {:commit, tenant} end end) do {:commit, value} -> value {:ok, value} -> value {:ignore, value} -> value end end defp cache_key(tenant_id), do: {:get_tenant_by_external_id, tenant_id} @doc """ Invalidates the cache for a tenant in the local node """ def invalidate_tenant_cache(tenant_id), do: Cachex.del(__MODULE__, cache_key(tenant_id)) def distributed_invalidate_tenant_cache(tenant_id) when is_binary(tenant_id) do GenRpc.multicast(__MODULE__, :invalidate_tenant_cache, [tenant_id]) end @doc """ Update the cache for a tenant """ def update_cache(tenant) do Cachex.put(__MODULE__, cache_key(tenant.external_id), tenant) end @doc """ Update the cache for a tenant in all nodes """ @spec global_cache_update(Realtime.Api.Tenant.t()) :: :ok def global_cache_update(tenant) do GenRpc.multicast(__MODULE__, :update_cache, [tenant]) end end ================================================ FILE: lib/realtime/tenants/connect/check_connection.ex ================================================ defmodule Realtime.Tenants.Connect.CheckConnection do @moduledoc """ Check tenant database connection. """ @behaviour Realtime.Tenants.Connect.Piper @impl true def run(acc) do %{tenant: tenant} = acc case Realtime.Database.check_tenant_connection(tenant) do {:ok, conn, migrations_ran} -> db_conn_reference = Process.monitor(conn) {:ok, %{ acc | db_conn_pid: conn, db_conn_reference: db_conn_reference, migrations_ran_on_database: migrations_ran }} {:error, error} -> {:error, error} end end end ================================================ FILE: lib/realtime/tenants/connect/get_tenant.ex ================================================ defmodule Realtime.Tenants.Connect.GetTenant do @moduledoc """ Get tenant database connection. """ alias Realtime.Api.Tenant alias Realtime.Tenants @behaviour Realtime.Tenants.Connect.Piper @impl Realtime.Tenants.Connect.Piper def run(acc) do %{tenant_id: tenant_id} = acc case Tenants.Cache.get_tenant_by_external_id(tenant_id) do %Tenant{} = tenant -> {:ok, Map.put(acc, :tenant, tenant)} _ -> {:error, :tenant_not_found} end end end ================================================ FILE: lib/realtime/tenants/connect/piper.ex ================================================ defmodule Realtime.Tenants.Connect.Piper do @moduledoc """ Pipes different commands to execute specific actions during the connection process. """ require Logger @callback run(any()) :: {:ok, any()} | {:error, any()} def run(pipers, init) do Enum.reduce_while(pipers, {:ok, init}, fn piper, {:ok, acc} -> case :timer.tc(fn -> piper.run(acc) end, :millisecond) do {exec_time, {:ok, result}} -> Logger.info("#{inspect(piper)} executed in #{exec_time} ms") {:cont, {:ok, result}} {exec_time, {:error, error}} -> Logger.error("#{inspect(piper)} failed in #{exec_time} ms") {:halt, {:error, error}} _ -> raise ArgumentError, "must return {:ok, _} or {:error, _}" end end) end end ================================================ FILE: lib/realtime/tenants/connect/reconcile_migrations.ex ================================================ defmodule Realtime.Tenants.Connect.ReconcileMigrations do @moduledoc """ Reconciles the tenant's cached migrations_ran counter with the actual migration count from the tenant database's schema_migrations table. This handles the case where a project restore causes the database schema to revert while the migrations_ran counter remains at the latest value. """ use Realtime.Logs alias Realtime.Api @behaviour Realtime.Tenants.Connect.Piper @impl true def run(%{tenant: tenant, migrations_ran_on_database: migrations_ran_on_database} = acc) do if tenant.migrations_ran != migrations_ran_on_database do log_warning( "MigrationCountMismatch", "cached=#{tenant.migrations_ran} database=#{migrations_ran_on_database}" ) case Api.update_migrations_ran(tenant.external_id, migrations_ran_on_database) do {:ok, updated_tenant} -> {:ok, %{acc | tenant: updated_tenant}} {:error, error} -> {:error, error} end else {:ok, acc} end end end ================================================ FILE: lib/realtime/tenants/connect/register_process.ex ================================================ defmodule Realtime.Tenants.Connect.RegisterProcess do @moduledoc """ Registers the database process in :syn """ alias Realtime.Tenants.Connect @behaviour Realtime.Tenants.Connect.Piper @impl true def run(acc) do %{tenant_id: tenant_id, db_conn_pid: conn} = acc with {:ok, _} <- :syn.update_registry(Connect, tenant_id, fn _pid, meta -> %{meta | conn: conn} end), {:ok, _} <- Registry.register(Connect.Registry, tenant_id, %{}) do {:ok, acc} else {:error, :undefined} -> {:error, :process_not_found} {:error, {:already_registered, _}} -> {:error, :already_registered} {:error, reason} -> {:error, reason} end end end ================================================ FILE: lib/realtime/tenants/connect.ex ================================================ defmodule Realtime.Tenants.Connect do @moduledoc """ This module is responsible for attempting to connect to a tenant's database and store the DBConnection in a Syn registry. ## Options * `:check_connected_user_interval` - The interval in milliseconds to check if there are any connected users to a tenant channel. If there are no connected users, the connection will be stopped. * `:check_connect_region_interval` - The interval in milliseconds to check if this process is in the correct region. If the region is not correct it stops the connection. * `:erpc_timeout` - The timeout in milliseconds for the `:erpc` calls to the tenant's database. """ use GenServer, restart: :temporary use Realtime.Logs alias Realtime.Api.Tenant alias Realtime.GenCounter alias Realtime.RateCounter alias Realtime.Rpc alias Realtime.Tenants alias Realtime.Tenants.Connect.CheckConnection alias Realtime.Tenants.Connect.GetTenant alias Realtime.Tenants.Connect.Piper alias Realtime.Tenants.Connect.ReconcileMigrations alias Realtime.Tenants.Connect.RegisterProcess alias Realtime.Tenants.Migrations alias Realtime.Tenants.Rebalancer alias Realtime.Tenants.ReplicationConnection alias Realtime.UsersCounter alias DBConnection.Backoff @rpc_timeout_default 30_000 @check_connected_user_interval_default 50_000 @connected_users_bucket_shutdown [0, 0, 0, 0, 0, 0] @type t :: %__MODULE__{ tenant_id: binary(), db_conn_reference: reference(), db_conn_pid: pid(), replication_connection_pid: pid(), replication_connection_reference: reference(), backoff: Backoff.t(), replication_recovery_started_at: non_neg_integer() | nil, check_connected_user_interval: non_neg_integer(), connected_users_bucket: list(non_neg_integer()), check_connect_region_interval: non_neg_integer(), migrations_ran_on_database: non_neg_integer() } defstruct tenant_id: nil, db_conn_reference: nil, db_conn_pid: nil, replication_connection_pid: nil, replication_connection_reference: nil, backoff: nil, replication_recovery_started_at: nil, check_connected_user_interval: nil, connected_users_bucket: [1], check_connect_region_interval: nil, migrations_ran_on_database: 0 @tenant_id_spec [{{:"$1", :_, :_, :_, :_, :_}, [], [:"$1"]}] @spec list_tenants() :: [binary] def list_tenants() do :syn_registry_by_name |> :syn_backbone.get_table_name(__MODULE__) |> :ets.select(@tenant_id_spec) end @doc "Check if Connect has finished setting up connections" def ready?(tenant_id) do case whereis(tenant_id) do pid when is_pid(pid) -> GenServer.call(pid, :ready?) _ -> false end end @doc """ Returns the database connection for a tenant. If the tenant is not connected, it will attempt to connect to the tenant's database. """ @spec lookup_or_start_connection(binary(), keyword()) :: {:ok, pid()} | {:error, :tenant_database_unavailable} | {:error, :initializing} | {:error, :tenant_database_connection_initializing} | {:error, :tenant_db_too_many_connections} | {:error, :connect_rate_limit_reached} | {:error, :rpc_error, term()} def lookup_or_start_connection(tenant_id, opts \\ []) when is_binary(tenant_id) do rate_args = Tenants.connect_errors_per_second_rate(tenant_id) RateCounter.new(rate_args) with {:ok, %{limit: %{triggered: false}}} <- RateCounter.get(rate_args), {:ok, conn} <- get_status(tenant_id) do {:ok, conn} else {:ok, %{limit: %{triggered: true}}} -> {:error, :connect_rate_limit_reached} {:error, :tenant_database_connection_initializing} -> case call_external_node(tenant_id, opts) do {:ok, pid} -> {:ok, pid} error -> GenCounter.add(rate_args.id) error end {:error, :initializing} -> {:error, :tenant_database_unavailable} {:error, reason} -> GenCounter.add(rate_args.id) {:error, reason} end end @doc """ Returns the database connection pid from :syn if it exists. """ @spec get_status(binary()) :: {:ok, pid()} | {:error, :tenant_database_unavailable} | {:error, :initializing} | {:error, :tenant_database_connection_initializing} | {:error, :tenant_db_too_many_connections} def get_status(tenant_id) do case :syn.lookup(__MODULE__, tenant_id) do {pid, %{conn: nil}} -> wait_for_connection(pid, tenant_id) {_, %{conn: conn, replication_conn: nil}} -> {:ok, conn} {_, %{conn: conn}} -> {:ok, conn} :undefined -> {:error, :tenant_database_connection_initializing} error -> log_error("SynInitializationError", error) {:error, :tenant_database_unavailable} end end def syn_topic(tenant_id), do: "connect:#{tenant_id}" defp wait_for_connection(pid, tenant_id) do RealtimeWeb.Endpoint.subscribe(syn_topic(tenant_id)) # We do a lookup after subscribing because we could've missed a message while subscribing case :syn.lookup(__MODULE__, tenant_id) do {_pid, %{conn: conn}} when is_pid(conn) -> {:ok, conn} _ -> # Wait for up to 5 seconds for the ready event receive do %{event: "ready", payload: %{pid: ^pid, conn: conn}} -> {:ok, conn} %{event: "connect_down", payload: %{pid: ^pid, reason: {:shutdown, :tenant_db_too_many_connections}}} -> {:error, :tenant_db_too_many_connections} %{event: "connect_down", payload: %{pid: ^pid, reason: _reason}} -> metadata = [external_id: tenant_id, project: tenant_id] log_error("UnableToConnectToTenantDatabase", "Unable to connect to tenant database", metadata) {:error, :tenant_database_unavailable} after 15_000 -> {:error, :initializing} end end after RealtimeWeb.Endpoint.unsubscribe(syn_topic(tenant_id)) end @doc """ Connects to a tenant's database and stores the DBConnection in the process :syn metadata """ @spec connect(binary(), binary(), keyword()) :: {:ok, DBConnection.t()} | {:error, term()} def connect(tenant_id, region, opts \\ []) do supervisor = {:via, PartitionSupervisor, {Realtime.Tenants.Connect.DynamicSupervisor, tenant_id}} spec = {__MODULE__, [tenant_id: tenant_id, region: region] ++ opts} metadata = [external_id: tenant_id, project: tenant_id] case DynamicSupervisor.start_child(supervisor, spec) do {:ok, _} -> get_status(tenant_id) {:error, {:already_started, _}} -> get_status(tenant_id) {:error, error} -> log_error("UnableToConnectToTenantDatabase", error, metadata) {:error, :tenant_database_unavailable} end end @doc """ Returns the pid of the tenant Connection process and db_conn pid """ @spec whereis(binary()) :: pid() | nil def whereis(tenant_id) do case :syn.lookup(__MODULE__, tenant_id) do {pid, _} when is_pid(pid) -> pid _ -> nil end end @doc """ Returns the replication connection status from :syn metadata without RPC calls. """ @spec replication_status(binary()) :: {:ok, pid()} | {:error, :not_connected} def replication_status(tenant_id) do case :syn.lookup(__MODULE__, tenant_id) do {_, %{replication_conn: pid}} when is_pid(pid) -> {:ok, pid} _ -> {:error, :not_connected} end end @doc """ Shutdown the tenant Connection and linked processes """ @spec shutdown(binary()) :: :ok | nil def shutdown(tenant_id) do case whereis(tenant_id) do pid when is_pid(pid) -> send(pid, :shutdown_connect) :ok _ -> :ok end end def start_link(opts) do tenant_id = Keyword.get(opts, :tenant_id) region = Keyword.get(opts, :region) check_connected_user_interval = Keyword.get(opts, :check_connected_user_interval, @check_connected_user_interval_default) check_connect_region_interval = Keyword.get(opts, :check_connect_region_interval, rebalance_check_interval_in_ms()) name = {__MODULE__, tenant_id, %{conn: nil, region: region, replication_conn: nil}} state = %__MODULE__{ tenant_id: tenant_id, check_connected_user_interval: check_connected_user_interval, check_connect_region_interval: check_connect_region_interval, backoff: Backoff.new(backoff_min: :timer.seconds(1), backoff_max: :timer.seconds(15), backoff_type: :rand_exp) } opts = Keyword.put(opts, :name, {:via, :syn, name}) GenServer.start_link(__MODULE__, state, opts) end ## GenServer callbacks # Needs to be done on init/1 to guarantee the GenServer only starts if we are able to connect to the database @impl GenServer def init(%{tenant_id: tenant_id} = state) do Logger.metadata(external_id: tenant_id, project: tenant_id) {:ok, state, {:continue, :db_connect}} end @impl true def handle_continue(:db_connect, state) do pipes = [ GetTenant, CheckConnection, ReconcileMigrations, RegisterProcess ] case Piper.run(pipes, state) do {:ok, acc} -> {:noreply, acc, {:continue, :run_migrations}} {:error, :tenant_not_found} -> {:stop, {:shutdown, :tenant_not_found}, state} {:error, :tenant_db_too_many_connections} -> {:stop, {:shutdown, :tenant_db_too_many_connections}, state} {:error, error} -> log_error("UnableToConnectToTenantDatabase", error) {:stop, :shutdown, state} end end def handle_continue(:run_migrations, state) do %{tenant: tenant, db_conn_pid: db_conn_pid} = state Logger.warning("Tenant #{tenant.external_id} is initializing: #{inspect(node())}") with res when res in [:ok, :noop] <- Migrations.run_migrations(tenant), :ok <- Migrations.create_partitions(db_conn_pid) do {:noreply, state, {:continue, :start_replication}} else error -> log_error("MigrationsFailedToRun", error) {:stop, :shutdown, state} end rescue error -> log_error("MigrationsFailedToRun", error) {:stop, :shutdown, state} end def handle_continue(:start_replication, state) do case start_replication_connection(state) do {:ok, state} -> {:noreply, state, {:continue, :setup_connected_user_events}} {:error, _error} -> {:stop, :shutdown, state} end end def handle_continue(:setup_connected_user_events, state) do %{ check_connected_user_interval: check_connected_user_interval, connected_users_bucket: connected_users_bucket, tenant_id: tenant_id } = state :ok = Phoenix.PubSub.subscribe(Realtime.PubSub, "realtime:operations:" <> tenant_id) send_connected_user_check_message(connected_users_bucket, check_connected_user_interval) :ets.insert(__MODULE__, {tenant_id}) {:noreply, state, {:continue, :start_connect_region_check}} end def handle_continue(:start_connect_region_check, state) do send_connect_region_check_message(state.check_connect_region_interval) {:noreply, state} end @impl GenServer def handle_info( :check_connected_users, %{ tenant_id: tenant_id, check_connected_user_interval: check_connected_user_interval, connected_users_bucket: connected_users_bucket } = state ) do connected_users_bucket = tenant_id |> update_connected_users_bucket(connected_users_bucket) |> send_connected_user_check_message(check_connected_user_interval) {:noreply, %{state | connected_users_bucket: connected_users_bucket}} end def handle_info({:check_connect_region, previous_nodes_set}, state) do current_nodes_set = MapSet.new(Node.list()) case Rebalancer.check(previous_nodes_set, current_nodes_set, state.tenant_id) do :ok -> # Let's check again in the future send_connect_region_check_message(state.check_connect_region_interval) {:noreply, state} {:error, :wrong_region} -> Logger.warning("Rebalancing Tenant database connection for a closer region") {:stop, {:shutdown, :rebalancing}, state} end end def handle_info(:shutdown_no_connected_users, state) do Logger.info("Tenant has no connected users, database connection will be terminated") {:stop, :shutdown, state} end def handle_info(:shutdown_connect, state) do Logger.warning("Shutdowning tenant connection") {:stop, :shutdown, state} end # Handle database connection termination def handle_info( {:DOWN, db_conn_reference, _, _, _}, %{db_conn_reference: db_conn_reference} = state ) do Logger.warning("Database connection has been terminated") {:stop, :shutdown, state} end # Handle replication connection termination def handle_info( {:DOWN, replication_connection_reference, _, _, _}, %{replication_connection_reference: replication_connection_reference, tenant_id: tenant_id} = state ) do %{backoff: backoff} = state log_warning("ReplicationConnectionDown", "Replication connection has been terminated, recovery window opened") update_syn_replication_conn(tenant_id, nil) {timeout, backoff} = Backoff.backoff(backoff) Process.send_after(self(), :recover_replication_connection, timeout) recovery_started_at = state.replication_recovery_started_at || System.monotonic_time(:millisecond) state = %{ state | replication_connection_pid: nil, replication_connection_reference: nil, backoff: backoff, replication_recovery_started_at: recovery_started_at } {:noreply, state} end @replication_connection_query "SELECT 1 from pg_stat_activity where application_name='realtime_replication_connection'" @max_replication_recovery_ms :timer.hours(2) def handle_info(:recover_replication_connection, %{replication_recovery_started_at: nil} = state) do {:noreply, state} end def handle_info(:recover_replication_connection, state) do %{backoff: backoff, db_conn_pid: db_conn_pid, replication_recovery_started_at: started_at} = state elapsed = System.monotonic_time(:millisecond) - started_at if elapsed > @max_replication_recovery_ms do log_warning( "ReplicationRecoveryWindowExceeded", "Replication recovery window exceeded after #{elapsed}ms, terminating connection" ) {:stop, :shutdown, state} else with {:query, {:ok, %{num_rows: 0}}} <- {:query, Postgrex.query(db_conn_pid, @replication_connection_query, [])}, {:start, {:ok, state}} <- {:start, start_replication_connection(state)} do {:noreply, %{state | backoff: Backoff.reset(backoff), replication_recovery_started_at: nil}} else {:query, {:ok, %{num_rows: _}}} -> Logger.info("Waiting for old walsender to exit") {:noreply, schedule_replication_retry(state)} {:query, {:error, error}} -> log_error("ReplicationConnectionRecoveryFailed", "DB check failed during recovery: #{inspect(error)}") {:noreply, schedule_replication_retry(state)} {:start, {:error, error}} -> log_error("ReplicationConnectionRecoveryFailed", "Replication connection recovery failed: #{inspect(error)}") {:noreply, schedule_replication_retry(state)} end end end def handle_info(_, state), do: {:noreply, state} @impl true def handle_call(:ready?, _from, state) do # We just want to know if the process is ready to reply to the client # Essentially checking if all handle_continue's were completed {:reply, true, state} end @impl true def terminate(reason, %{tenant_id: tenant_id}) do Logger.info("Tenant #{tenant_id} has been terminated: #{inspect(reason)}") :ok end ## Private functions defp call_external_node(tenant_id, opts) do Logger.warning("Connection process starting up") rpc_timeout = Keyword.get(opts, :rpc_timeout, @rpc_timeout_default) with tenant <- Tenants.Cache.get_tenant_by_external_id(tenant_id), :ok <- tenant_suspended?(tenant), {:ok, node, region} <- Realtime.Nodes.get_node_for_tenant(tenant) do Rpc.enhanced_call(node, __MODULE__, :connect, [tenant_id, region, opts], timeout: rpc_timeout, tenant_id: tenant_id ) end end defp update_connected_users_bucket(tenant_id, connected_users_bucket) do connected_users_bucket |> then(&(&1 ++ [UsersCounter.tenant_users(tenant_id)])) |> Enum.take(-6) end defp send_connected_user_check_message( @connected_users_bucket_shutdown, check_connected_user_interval ) do Process.send_after(self(), :shutdown_no_connected_users, check_connected_user_interval) end defp send_connected_user_check_message(connected_users_bucket, check_connected_user_interval) do Process.send_after(self(), :check_connected_users, check_connected_user_interval) connected_users_bucket end defp send_connect_region_check_message(check_connect_region_interval) do Process.send_after(self(), {:check_connect_region, MapSet.new(Node.list())}, check_connect_region_interval) end defp tenant_suspended?(%Tenant{suspend: true}), do: {:error, :tenant_suspended} defp tenant_suspended?(_), do: :ok defp rebalance_check_interval_in_ms(), do: Application.fetch_env!(:realtime, :rebalance_check_interval_in_ms) defp schedule_replication_retry(%{backoff: backoff} = state) do {timeout, backoff} = Backoff.backoff(backoff) Process.send_after(self(), :recover_replication_connection, timeout) %{state | backoff: backoff} end defp update_syn_replication_conn(tenant_id, pid) do :syn.update_registry(__MODULE__, tenant_id, fn _pid, meta -> %{meta | replication_conn: pid} end) end defp start_replication_connection(state) do %{tenant: tenant, tenant_id: tenant_id} = state with {:ok, replication_connection_pid} <- ReplicationConnection.start(tenant, self()), {:ok, _} <- update_syn_replication_conn(tenant_id, replication_connection_pid) do replication_connection_reference = Process.monitor(replication_connection_pid) state = %{ state | replication_connection_pid: replication_connection_pid, replication_connection_reference: replication_connection_reference } {:ok, state} else {:error, :max_wal_senders_reached} -> log_error("ReplicationMaxWalSendersReached", "Tenant database has reached the maximum number of WAL senders") {:error, :max_wal_senders_reached} {:error, :replication_connection_timeout} -> log_error("ReplicationConnectionTimeout", "Replication connection timed out during initialization") {:error, :replication_connection_timeout} {:error, error} -> log_error("StartReplicationFailed", error) {:error, error} end rescue error -> log_error("StartReplicationFailed", error) {:error, error} end end ================================================ FILE: lib/realtime/tenants/janitor/maintenance_task.ex ================================================ defmodule Realtime.Tenants.Janitor.MaintenanceTask do @moduledoc """ Perform maintenance on the messages table. * Delete old messages * Create new partitions """ @spec run(String.t()) :: :ok | {:error, any} def run(tenant_external_id) do with %Realtime.Api.Tenant{} = tenant <- Realtime.Tenants.Cache.get_tenant_by_external_id(tenant_external_id), {:ok, conn} <- Realtime.Database.connect(tenant, "realtime_janitor"), :ok <- Realtime.Messages.delete_old_messages(conn), :ok <- Realtime.Tenants.Migrations.create_partitions(conn) do GenServer.stop(conn) :ok end end end ================================================ FILE: lib/realtime/tenants/janitor.ex ================================================ defmodule Realtime.Tenants.Janitor do @moduledoc """ Scheduled tasks for the Tenants. """ use GenServer use Realtime.Logs alias Realtime.Tenants.Janitor.MaintenanceTask @type t :: %__MODULE__{ timer: pos_integer() | nil, region: String.t() | nil, chunks: pos_integer() | nil, start_after: pos_integer() | nil, randomize: boolean() | nil, tasks: map() } defstruct timer: nil, region: nil, chunks: nil, start_after: nil, randomize: nil, tasks: %{} def start_link(_args) do timer = Application.get_env(:realtime, :janitor_schedule_timer) start_after = Application.get_env(:realtime, :janitor_run_after_in_ms, 0) chunks = Application.get_env(:realtime, :janitor_chunk_size) randomize = Application.get_env(:realtime, :janitor_schedule_randomize) region = Application.get_env(:realtime, :region) state = %__MODULE__{ timer: timer, region: region, chunks: chunks, start_after: start_after, randomize: randomize } GenServer.start_link(__MODULE__, state, name: __MODULE__) end @impl true def init(%__MODULE__{start_after: start_after} = state) do timer = timer(state) + start_after Process.send_after(self(), :delete_old_messages, timer) Logger.info("Janitor started") {:ok, state} end @table_name Realtime.Tenants.Connect @syn_table :"syn_registry_by_name_Elixir.Realtime.Tenants.Connect" @impl true def handle_info(:delete_old_messages, state) do Logger.info("Janitor started") %{chunks: chunks, tasks: tasks} = state all_tenants = :ets.select(@table_name, [{{:"$1"}, [], [:"$1"]}]) connected_tenants = :ets.select(@syn_table, [{{:"$1", :_, :_, :_, :_, :"$2"}, [{:==, :"$2", {:const, Node.self()}}], [:"$1"]}]) new_tasks = MapSet.new(all_tenants ++ connected_tenants) |> Enum.to_list() |> Stream.chunk_every(chunks) |> Stream.map(fn chunks -> task = Task.Supervisor.async_nolink( __MODULE__.TaskSupervisor, fn -> perform_maintenance_tasks(chunks) end, ordered: false ) {task.ref, chunks} end) |> Map.new() Process.send_after(self(), :delete_old_messages, timer(state)) {:noreply, %{state | tasks: Map.merge(tasks, new_tasks)}} end def handle_info({:DOWN, ref, _, _, :normal}, state) do %{tasks: tasks} = state {tenants, tasks} = Map.pop(tasks, ref) Logger.info("Janitor finished for tenants: #{inspect(tenants)}") {:noreply, %{state | tasks: tasks}} end def handle_info({:DOWN, ref, _, _, :killed}, state) do %{tasks: tasks} = state tenants = Map.get(tasks, ref) log_error( "JanitorFailedToDeleteOldMessages", "Scheduled cleanup failed for tenants: #{inspect(tenants)}" ) {:noreply, %{state | tasks: tasks}} end def handle_info(_, state) do {:noreply, state} end # Ignore in coverage has the tests would require to await a random amount of minutes up to an hour # coveralls-ignore-start defp timer(%{timer: timer, randomize: true}), do: timer + :timer.minutes(Enum.random(1..59)) # coveralls-ignore-stop defp timer(%{timer: timer}), do: timer defp perform_maintenance_tasks(tenants), do: Enum.map(tenants, &perform_maintenance_task/1) defp perform_maintenance_task(tenant_external_id) do Logger.metadata(project: tenant_external_id, external_id: tenant_external_id) Logger.info("Janitor starting realtime.messages cleanup") :ets.delete(@table_name, tenant_external_id) with :ok <- MaintenanceTask.run(tenant_external_id) do Logger.info("Janitor finished") :ok end end end ================================================ FILE: lib/realtime/tenants/migrations.ex ================================================ defmodule Realtime.Tenants.Migrations do @moduledoc """ Run Realtime database migrations for tenant's database. """ use GenServer, restart: :transient use Realtime.Logs alias Realtime.Tenants alias Realtime.Database alias Realtime.Registry.Unique alias Realtime.Repo alias Realtime.Api.Tenant alias Realtime.Api alias Realtime.Nodes alias Realtime.GenRpc alias Realtime.Tenants.Migrations.{ CreateRealtimeSubscriptionTable, CreateRealtimeCheckFiltersTrigger, CreateRealtimeQuoteWal2jsonFunction, CreateRealtimeCheckEqualityOpFunction, CreateRealtimeBuildPreparedStatementSqlFunction, CreateRealtimeCastFunction, CreateRealtimeIsVisibleThroughFiltersFunction, CreateRealtimeApplyRlsFunction, GrantRealtimeUsageToAuthenticatedRole, EnableRealtimeApplyRlsFunctionPostgrest9Compatibility, UpdateRealtimeSubscriptionCheckFiltersFunctionSecurity, UpdateRealtimeBuildPreparedStatementSqlFunctionForCompatibilityWithAllTypes, EnableGenericSubscriptionClaims, AddWalPayloadOnErrorsInApplyRlsFunction, UpdateChangeTimestampToIso8601ZuluFormat, UpdateSubscriptionCheckFiltersFunctionDynamicTableName, UpdateApplyRlsFunctionToApplyIso8601, AddQuotedRegtypesSupport, AddOutputForDataLessThanEqual64BytesWhenPayloadTooLarge, AddQuotedRegtypesBackwardCompatibilitySupport, RecreateRealtimeBuildPreparedStatementSqlFunction, NullPassesFiltersRecreateIsVisibleThroughFilters, UpdateApplyRlsFunctionToPassThroughDeleteEventsOnFilter, MillisecondPrecisionForWalrus, AddInOpToFilters, EnableFilteringOnDeleteRecord, UpdateSubscriptionCheckFiltersForInFilterNonTextTypes, ConvertCommitTimestampToUtc, OutputFullRecordWhenUnchangedToast, CreateListChangesFunction, CreateChannels, SetRequiredGrants, CreateRlsHelperFunctions, EnableChannelsRls, AddChannelsColumnForWriteCheck, AddUpdateGrantToChannels, AddBroadcastsPoliciesTable, AddInsertAndDeleteGrantToChannels, AddPresencesPoliciesTable, CreateRealtimeAdminAndMoveOwnership, RemoveCheckColumns, RedefineAuthorizationTables, FixWalrusRoleHandling, UnloggedMessagesTable, LoggedMessagesTable, FilterDeletePostgresChanges, AddPayloadToMessages, ChangeMessagesIdType, UuidAutoGeneration, MessagesPartitioning, MessagesUsingUuid, FixSendFunction, RecreateEntityIndexUsingBtree, FixSendFunctionPartitionCreation, RealtimeSendHandleExceptionsRemovePartitionCreation, RealtimeSendSetsConfig, RealtimeSubscriptionUnlogged, RealtimeSubscriptionLogged, RemoveUnusedPublications, RealtimeSendSetsTopicConfig, SubscriptionIndexBridgingDisabled, RunSubscriptionIndexBridgingDisabled, BroadcastSendErrorLogging, CreateMessagesReplayIndex, BroadcastSendIncludePayloadId, AddActionToSubscriptions, FilterActionPostgresChanges, FixByteaDoubleEncodingInCast } @migrations [ {20_211_116_024_918, CreateRealtimeSubscriptionTable}, {20_211_116_045_059, CreateRealtimeCheckFiltersTrigger}, {20_211_116_050_929, CreateRealtimeQuoteWal2jsonFunction}, {20_211_116_051_442, CreateRealtimeCheckEqualityOpFunction}, {20_211_116_212_300, CreateRealtimeBuildPreparedStatementSqlFunction}, {20_211_116_213_355, CreateRealtimeCastFunction}, {20_211_116_213_934, CreateRealtimeIsVisibleThroughFiltersFunction}, {20_211_116_214_523, CreateRealtimeApplyRlsFunction}, {20_211_122_062_447, GrantRealtimeUsageToAuthenticatedRole}, {20_211_124_070_109, EnableRealtimeApplyRlsFunctionPostgrest9Compatibility}, {20_211_202_204_204, UpdateRealtimeSubscriptionCheckFiltersFunctionSecurity}, {20_211_202_204_605, UpdateRealtimeBuildPreparedStatementSqlFunctionForCompatibilityWithAllTypes}, {20_211_210_212_804, EnableGenericSubscriptionClaims}, {20_211_228_014_915, AddWalPayloadOnErrorsInApplyRlsFunction}, {20_220_107_221_237, UpdateChangeTimestampToIso8601ZuluFormat}, {20_220_228_202_821, UpdateSubscriptionCheckFiltersFunctionDynamicTableName}, {20_220_312_004_840, UpdateApplyRlsFunctionToApplyIso8601}, {20_220_603_231_003, AddQuotedRegtypesSupport}, {20_220_603_232_444, AddOutputForDataLessThanEqual64BytesWhenPayloadTooLarge}, {20_220_615_214_548, AddQuotedRegtypesBackwardCompatibilitySupport}, {20_220_712_093_339, RecreateRealtimeBuildPreparedStatementSqlFunction}, {20_220_908_172_859, NullPassesFiltersRecreateIsVisibleThroughFilters}, {20_220_916_233_421, UpdateApplyRlsFunctionToPassThroughDeleteEventsOnFilter}, {20_230_119_133_233, MillisecondPrecisionForWalrus}, {20_230_128_025_114, AddInOpToFilters}, {20_230_128_025_212, EnableFilteringOnDeleteRecord}, {20_230_227_211_149, UpdateSubscriptionCheckFiltersForInFilterNonTextTypes}, {20_230_228_184_745, ConvertCommitTimestampToUtc}, {20_230_308_225_145, OutputFullRecordWhenUnchangedToast}, {20_230_328_144_023, CreateListChangesFunction}, {20_231_018_144_023, CreateChannels}, {20_231_204_144_023, SetRequiredGrants}, {20_231_204_144_024, CreateRlsHelperFunctions}, {20_231_204_144_025, EnableChannelsRls}, {20_240_108_234_812, AddChannelsColumnForWriteCheck}, {20_240_109_165_339, AddUpdateGrantToChannels}, {20_240_227_174_441, AddBroadcastsPoliciesTable}, {20_240_311_171_622, AddInsertAndDeleteGrantToChannels}, {20_240_321_100_241, AddPresencesPoliciesTable}, {20_240_401_105_812, CreateRealtimeAdminAndMoveOwnership}, {20_240_418_121_054, RemoveCheckColumns}, {20_240_523_004_032, RedefineAuthorizationTables}, {20_240_618_124_746, FixWalrusRoleHandling}, {20_240_801_235_015, UnloggedMessagesTable}, {20_240_805_133_720, LoggedMessagesTable}, {20_240_827_160_934, FilterDeletePostgresChanges}, {20_240_919_163_303, AddPayloadToMessages}, {20_240_919_163_305, ChangeMessagesIdType}, {20_241_019_105_805, UuidAutoGeneration}, {20_241_030_150_047, MessagesPartitioning}, {20_241_108_114_728, MessagesUsingUuid}, {20_241_121_104_152, FixSendFunction}, {20_241_130_184_212, RecreateEntityIndexUsingBtree}, {20_241_220_035_512, FixSendFunctionPartitionCreation}, {20_241_220_123_912, RealtimeSendHandleExceptionsRemovePartitionCreation}, {20_241_224_161_212, RealtimeSendSetsConfig}, {20_250_107_150_512, RealtimeSubscriptionUnlogged}, {20_250_110_162_412, RealtimeSubscriptionLogged}, {20_250_123_174_212, RemoveUnusedPublications}, {20_250_128_220_012, RealtimeSendSetsTopicConfig}, {20_250_506_224_012, SubscriptionIndexBridgingDisabled}, {20_250_523_164_012, RunSubscriptionIndexBridgingDisabled}, {20_250_714_121_412, BroadcastSendErrorLogging}, {20_250_905_041_441, CreateMessagesReplayIndex}, {20_251_103_001_201, BroadcastSendIncludePayloadId}, {20_251_120_212_548, AddActionToSubscriptions}, {20_251_120_215_549, FilterActionPostgresChanges}, {20_260_218_120_000, FixByteaDoubleEncodingInCast} ] defstruct [:tenant_external_id, :settings, migrations_ran: 0] @type t :: %__MODULE__{ tenant_external_id: binary(), settings: map() } @doc """ Run migrations for the given tenant. """ @spec run_migrations(Tenant.t()) :: :ok | :noop | {:error, any()} def run_migrations(%Tenant{} = tenant) do if Tenants.run_migrations?(tenant) do %{extensions: [%{settings: settings} | _]} = tenant attrs = %__MODULE__{ tenant_external_id: tenant.external_id, settings: settings, migrations_ran: tenant.migrations_ran } node = case Nodes.get_node_for_tenant(tenant) do {:ok, node, _} -> node {:error, _} -> node() end GenRpc.call(node, __MODULE__, :start_migration, [attrs], tenant_id: tenant.external_id, timeout: 50_000) else :noop end end def start_migration(attrs) do supervisor = {:via, PartitionSupervisor, {Realtime.Tenants.Migrations.DynamicSupervisor, attrs.tenant_external_id}} spec = {__MODULE__, attrs} case DynamicSupervisor.start_child(supervisor, spec) do :ignore -> :ok error -> error end end def start_link(%__MODULE__{tenant_external_id: tenant_external_id} = attrs) do name = {:via, Registry, {Unique, {__MODULE__, :host, tenant_external_id}}} GenServer.start_link(__MODULE__, attrs, name: name) end def init(%__MODULE__{tenant_external_id: tenant_external_id, settings: settings}) do Logger.metadata(external_id: tenant_external_id, project: tenant_external_id) case migrate(settings) do :ok -> Task.Supervisor.async_nolink(__MODULE__.TaskSupervisor, Api, :update_migrations_ran, [ tenant_external_id, Enum.count(@migrations) ]) :ignore {:error, error} -> {:stop, error} end end defp migrate(settings) do settings = Database.from_settings(settings, "realtime_migrations", :stop) [ hostname: settings.hostname, port: settings.port, database: settings.database, password: settings.password, username: settings.username, pool_size: settings.pool_size, backoff_type: settings.backoff_type, socket_options: settings.socket_options, parameters: [application_name: settings.application_name], ssl: settings.ssl ] |> Repo.with_dynamic_repo(fn repo -> Logger.info("Applying migrations to #{settings.hostname}") try do opts = [all: true, prefix: "realtime", dynamic_repo: repo] Ecto.Migrator.run(Repo, @migrations, :up, opts) :ok rescue error -> log_error("MigrationsFailedToRun", error) {:error, error} end end) end @doc """ Create partitions against tenant db connection """ @spec create_partitions(pid()) :: :ok def create_partitions(db_conn_pid) do Logger.info("Creating partitions for realtime.messages") today = Date.utc_today() yesterday = Date.add(today, -1) future = Date.add(today, 3) dates = Date.range(yesterday, future) Enum.each(dates, fn date -> partition_name = "messages_#{date |> Date.to_iso8601() |> String.replace("-", "_")}" start_timestamp = Date.to_string(date) end_timestamp = Date.to_string(Date.add(date, 1)) Database.transaction(db_conn_pid, fn conn -> query = """ CREATE TABLE IF NOT EXISTS realtime.#{partition_name} PARTITION OF realtime.messages FOR VALUES FROM ('#{start_timestamp}') TO ('#{end_timestamp}'); """ case Postgrex.query(conn, query, []) do {:ok, _} -> Logger.debug("Partition #{partition_name} created") {:error, %Postgrex.Error{postgres: %{code: :duplicate_table}}} -> :ok {:error, error} -> log_error("PartitionCreationFailed", error) end end) end) :ok end def migrations(), do: @migrations end ================================================ FILE: lib/realtime/tenants/rebalancer.ex ================================================ defmodule Realtime.Tenants.Rebalancer do @moduledoc """ Responsible to tell if the executing node is in the correct region for this tenant """ alias Realtime.Api.Tenant @spec check(MapSet.t(node), MapSet.t(node), binary) :: :ok | {:error, :wrong_region} def check(previous_nodes_set, current_nodes_set, tenant_id) when is_struct(previous_nodes_set, MapSet) and is_struct(current_nodes_set, MapSet) and is_binary(tenant_id) do # Check if the current nodes set is equal to the previous nodes set # If they are equal it means that the cluster is relatively stable # We can check now if this Connect process is in the correct region if MapSet.equal?(current_nodes_set, previous_nodes_set) do with %Tenant{} = tenant <- Realtime.Tenants.Cache.get_tenant_by_external_id(tenant_id), {:ok, _node, expected_region} <- Realtime.Nodes.get_node_for_tenant(tenant), region when is_binary(region) <- Application.get_env(:realtime, :region) do if region == expected_region do :ok else {:error, :wrong_region} end else _ -> :ok end else # Nodes have changed, we can assume that the cluster is not stable enough to rebalance :ok end end end ================================================ FILE: lib/realtime/tenants/replication_connection/watchdog.ex ================================================ defmodule Realtime.Tenants.ReplicationConnection.Watchdog do @moduledoc """ Monitors ReplicationConnection health by performing periodic call checks. If the call times out, logs an error and shuts down, which cascades to ReplicationConnection. """ use GenServer use Realtime.Logs @default_check_interval :timer.minutes(5) @default_timeout :timer.minutes(1) defstruct [:parent_pid, :tenant_id, :check_interval, :timeout] def start_link(opts), do: GenServer.start_link(__MODULE__, opts) @impl true def init(opts) do parent_pid = Keyword.fetch!(opts, :parent_pid) tenant_id = Keyword.fetch!(opts, :tenant_id) check_interval = Keyword.get( opts, :watchdog_interval, Application.get_env(:realtime, :replication_watchdog_interval, @default_check_interval) ) timeout = Keyword.get( opts, :watchdog_timeout, Application.get_env(:realtime, :replication_watchdog_timeout, @default_timeout) ) Logger.metadata(external_id: tenant_id, project: tenant_id) # Schedule first health check Process.send_after(self(), :health_check, check_interval) state = %__MODULE__{ parent_pid: parent_pid, tenant_id: tenant_id, check_interval: check_interval, timeout: timeout } {:ok, state} end @impl true def handle_info(:health_check, state) do try do case Realtime.Tenants.ReplicationConnection.health_check(state.parent_pid, state.timeout) do :ok -> Process.send_after(self(), :health_check, state.check_interval) {:noreply, state} end catch :exit, {:timeout, _} -> log_error( "ReplicationConnectionWatchdogTimeout", "ReplicationConnection is not responding" ) {:stop, :watchdog_timeout, state} end end end ================================================ FILE: lib/realtime/tenants/replication_connection.ex ================================================ defmodule Realtime.Tenants.ReplicationConnection do @moduledoc """ ReplicationConnection it's the module that provides a way to stream data from a PostgreSQL database using logical replication. ## Struct parameters * `connection_opts` - The connection options to connect to the database. * `table` - The table to replicate. If `:all` is passed, it will replicate all tables. * `schema` - The schema of the table to replicate. If not provided, it will use the `public` schema. If `:all` is passed, this option is ignored. * `opts` - The options to pass to this module * `step` - The current step of the replication process * `publication_name` - The name of the publication to create. If not provided, it will use the schema and table name. * `replication_slot_name` - The name of the replication slot to create. If not provided, it will use the schema and table name. * `output_plugin` - The output plugin to use. Default is `pgoutput`. * `proto_version` - The protocol version to use. Default is `1`. * `handler_module` - The module that will handle the data received from the replication stream. * `metadata` - The metadata to pass to the handler module. """ use Postgrex.ReplicationConnection use Realtime.Logs import Realtime.Adapters.Postgres.Protocol import Realtime.Adapters.Postgres.Decoder alias Realtime.Adapters.Postgres.Decoder alias Realtime.Adapters.Postgres.Protocol.KeepAlive alias Realtime.Adapters.Postgres.Protocol.Write alias Realtime.Api.Tenant alias Realtime.Database alias Realtime.Telemetry alias Realtime.Tenants.BatchBroadcast alias Realtime.Tenants.Cache @type t :: %__MODULE__{ tenant_id: String.t(), opts: Keyword.t(), step: :disconnected | :check_replication_slot | :create_publication | :check_publication | :validate_publication | :create_slot | :start_replication_slot | :streaming, publication_name: String.t(), replication_slot_name: String.t(), output_plugin: String.t(), proto_version: integer(), relations: map(), buffer: list(), monitored_pid: pid(), latency_committed_at: integer() } defstruct tenant_id: nil, opts: [], step: :disconnected, publication_name: nil, replication_slot_name: nil, output_plugin: "pgoutput", proto_version: 2, relations: %{}, buffer: [], monitored_pid: nil, latency_committed_at: nil defmodule Wrapper do @moduledoc """ This GenServer exists at the moment so that we can have an init timeout for ReplicationConnection """ use GenServer def start_link(args, init_timeout) do GenServer.start_link(__MODULE__, args, timeout: init_timeout) end @impl true def init(args) do case Realtime.Tenants.ReplicationConnection.start_link(args) do {:ok, pid} -> {:ok, pid} {:error, reason} -> {:stop, reason} end end end @default_init_timeout 30_000 @table "messages" @schema "realtime" @doc """ Starts the replication connection for a tenant and monitors a given pid to stop the ReplicationConnection. """ @spec start(Realtime.Api.Tenant.t(), pid()) :: {:ok, pid()} | {:error, any()} def start(tenant, monitored_pid, init_timeout \\ @default_init_timeout) do Logger.info("Starting replication for Broadcast Changes") opts = %__MODULE__{tenant_id: tenant.external_id, monitored_pid: monitored_pid} supervisor_spec = supervisor_spec(tenant) child_spec = %{ id: __MODULE__, start: {Wrapper, :start_link, [opts, init_timeout]}, restart: :temporary, type: :worker } case DynamicSupervisor.start_child(supervisor_spec, child_spec) do {:ok, pid} -> {:ok, pid} {:error, {:already_started, pid}} -> {:ok, pid} {:error, {:bad_return_from_init, {:stop, error, _}}} -> {:error, error} {:error, %Postgrex.Error{postgres: %{pg_code: pg_code}}} when pg_code in ~w(53300 53400) -> {:error, :max_wal_senders_reached} {:error, :timeout} -> {:error, :replication_connection_timeout} error -> error end end @doc """ Finds replication connection by tenant_id """ @spec whereis(String.t()) :: pid() | nil def whereis(tenant_id) do case Registry.lookup(Realtime.Registry.Unique, {__MODULE__, tenant_id}) do [{pid, _}] -> pid [] -> nil end end @spec health_check(pid(), timeout()) :: :ok | no_return() def health_check(pid, timeout), do: Postgrex.ReplicationConnection.call(pid, :health_check, timeout) def start_link(%__MODULE__{tenant_id: tenant_id} = attrs) do tenant = Cache.get_tenant_by_external_id(tenant_id) connection_opts = Database.from_tenant(tenant, "realtime_broadcast_changes", :stop) connection_opts = [ name: {:via, Registry, {Realtime.Registry.Unique, {__MODULE__, tenant_id}}}, hostname: connection_opts.hostname, username: connection_opts.username, password: connection_opts.password, database: connection_opts.database, port: connection_opts.port, socket_options: connection_opts.socket_options, ssl: connection_opts.ssl, sync_connect: true, auto_reconnect: false, parameters: [application_name: "realtime_replication_connection"] ] case Postgrex.ReplicationConnection.start_link(__MODULE__, attrs, connection_opts) do {:ok, pid} -> {:ok, pid} {:error, {:already_started, pid}} -> {:ok, pid} {:error, {:bad_return_from_init, {:stop, error}}} -> {:error, error} {:error, error} -> {:error, error} end end @impl true def init(%__MODULE__{tenant_id: tenant_id, monitored_pid: monitored_pid} = state) do Process.flag(:fullsweep_after, 20) Logger.metadata(external_id: tenant_id, project: tenant_id) Process.monitor(monitored_pid) {:ok, _watchdog_pid} = Realtime.Tenants.ReplicationConnection.Watchdog.start_link(parent_pid: self(), tenant_id: tenant_id) state = %{ state | publication_name: publication_name(@schema, @table), replication_slot_name: replication_slot_name(@schema, @table) } Logger.info("Initializing connection with the status: #{inspect(state, pretty: true)}") {:ok, state} end @impl true def handle_connect(state) do replication_slot_name = replication_slot_name(@schema, @table) Logger.info("Checking if replication slot #{replication_slot_name} exists") query = "SELECT * FROM pg_replication_slots WHERE slot_name = '#{replication_slot_name}'" {:query, query, %{state | step: :check_replication_slot}} end @impl true def handle_result([%Postgrex.Result{num_rows: 1}], %__MODULE__{step: :check_replication_slot} = _state) do Logger.info("Replication slot already exists and in use, deferring connection") {:disconnect, {:shutdown, :replication_slot_in_use}} end def handle_result([%Postgrex.Result{num_rows: 0}], %__MODULE__{step: :check_replication_slot} = state) do %__MODULE__{ output_plugin: output_plugin, replication_slot_name: replication_slot_name, step: :check_replication_slot } = state Logger.info("Create replication slot #{replication_slot_name} using plugin #{output_plugin}") query = "CREATE_REPLICATION_SLOT #{replication_slot_name} TEMPORARY LOGICAL #{output_plugin} NOEXPORT_SNAPSHOT" {:query, query, %{state | step: :check_publication}} end def handle_result([%Postgrex.Result{}], %__MODULE__{step: :check_publication} = state) do %__MODULE__{publication_name: publication_name} = state Logger.info("Check publication #{publication_name} for table #{@schema}.#{@table} exists") query = "SELECT * FROM pg_publication WHERE pubname = '#{publication_name}'" {:query, query, %{state | step: :create_publication}} end def handle_result([%Postgrex.Result{num_rows: 0}], %__MODULE__{step: :create_publication} = state) do %__MODULE__{publication_name: publication_name} = state Logger.info("Create publication #{publication_name} for table #{@schema}.#{@table}") query = "CREATE PUBLICATION #{publication_name} FOR TABLE #{@schema}.#{@table}" {:query, query, %{state | step: :start_replication_slot}} end def handle_result([%Postgrex.Result{num_rows: 1}], %__MODULE__{step: :create_publication} = state) do %__MODULE__{publication_name: publication_name} = state Logger.info("Publication #{publication_name} exists, validating contents") query = """ SELECT schemaname, tablename FROM pg_publication_tables WHERE pubname = '#{publication_name}' """ {:query, query, %{state | step: :validate_publication}} end def handle_result([%Postgrex.Result{rows: rows}], %__MODULE__{step: :validate_publication} = state) do %__MODULE__{publication_name: publication_name} = state valid_tables = Enum.all?(rows, fn [schema, table] -> schema == @schema and (table == @table or String.starts_with?(table, "#{@table}_")) end) if valid_tables and rows != [] do {:query, "SELECT 1", %{state | step: :start_replication_slot}} else query = "DROP PUBLICATION IF EXISTS #{publication_name}; CREATE PUBLICATION #{publication_name} FOR TABLE #{@schema}.#{@table}" Logger.warning("Publication #{publication_name} contains unexpected tables. Recreating...") {:query, query, %{state | step: :start_replication_slot}} end end def handle_result(%Postgrex.Error{postgres: %{message: message}}, %__MODULE__{step: :start_replication_slot} = _state) do {:disconnect, "Error starting replication: #{message}"} end def handle_result(%Postgrex.Error{message: message}, %__MODULE__{step: :start_replication_slot} = _state) do {:disconnect, "Error starting replication: #{message}"} end def handle_result(results, %__MODULE__{step: :start_replication_slot} = state) do error = Enum.find(results, fn res -> match?(%Postgrex.Error{}, res) end) if error do {:disconnect, "Error starting replication: #{error.message}"} else %__MODULE__{ proto_version: proto_version, replication_slot_name: replication_slot_name, publication_name: publication_name } = state Logger.info( "Starting stream replication for slot #{replication_slot_name} using publication #{publication_name} and protocol version #{proto_version}" ) query = "START_REPLICATION SLOT #{replication_slot_name} LOGICAL 0/0 (proto_version '#{proto_version}', publication_names '#{publication_name}', binary 'true')" {:stream, query, [], %{state | step: :streaming}} end end def handle_result(%Postgrex.Error{postgres: %{pg_code: pg_code}}, _state) when pg_code in ~w(53300 53400) do {:disconnect, :max_wal_senders_reached} end def handle_result(%Postgrex.Error{postgres: %{message: message}}, _state) do {:disconnect, "Error starting replication: #{message}"} end @impl true def handle_data(data, state) when is_keep_alive(data) do %KeepAlive{reply: reply, wal_end: wal_end} = parse(data) wal_end = wal_end + 1 message = case reply do :now -> standby_status(wal_end, wal_end, wal_end, reply) :later -> hold() end {:noreply, message, state} end def handle_data(data, state) when is_write(data) do %Write{message: message} = parse(data) message |> decode_message(state.relations) |> then(&handle_message(&1, state)) end def handle_data(e, state) do log_error("UnexpectedMessageReceived", e) {:noreply, [], state} end @impl true def handle_call(:health_check, from, state) do Postgrex.ReplicationConnection.reply(from, :ok) {:noreply, state} end @impl true def handle_info({:DOWN, _, :process, _, _}, _), do: {:disconnect, :shutdown} def handle_info(_, state), do: {:noreply, state} defp handle_message(%Decoder.Messages.Begin{commit_timestamp: commit_timestamp}, state) do latency_committed_at = NaiveDateTime.utc_now() |> NaiveDateTime.diff(commit_timestamp, :millisecond) {:noreply, %{state | latency_committed_at: latency_committed_at}} end defp handle_message(%Decoder.Messages.Relation{} = msg, state) do %Decoder.Messages.Relation{id: id, namespace: namespace, name: name, columns: columns} = msg # Only care about relations with namespace=realtime and name starting with messages if namespace == @schema and String.starts_with?(name, @table) do %{relations: relations} = state relation = %{name: name, columns: columns, namespace: namespace} relations = Map.put(relations, id, relation) {:noreply, %{state | relations: relations}} else Logger.warning("Unexpected relation on schema '#{namespace}' and table '#{name}'") {:noreply, state} end rescue e -> log_error("UnableToBroadcastChanges", e) {:noreply, state} catch e -> log_error("UnableToBroadcastChanges", e) {:noreply, state} end defp handle_message(%Decoder.Messages.Insert{} = msg, state) do %Decoder.Messages.Insert{relation_id: relation_id, tuple_data: tuple_data} = msg %{relations: relations, tenant_id: tenant_id, latency_committed_at: latency_committed_at} = state with %{columns: columns} <- Map.get(relations, relation_id), to_broadcast = tuple_to_map(tuple_data, columns), {:ok, payload} <- get_or_error(to_broadcast, "payload", :payload_missing), {:ok, inserted_at} <- get_or_error(to_broadcast, "inserted_at", :inserted_at_missing), {:ok, event} <- get_or_error(to_broadcast, "event", :event_missing), {:ok, id} <- get_or_error(to_broadcast, "id", :id_missing), {:ok, topic} <- get_or_error(to_broadcast, "topic", :topic_missing), {:ok, private} <- get_or_error(to_broadcast, "private", :private_missing), %Tenant{} = tenant <- Cache.get_tenant_by_external_id(tenant_id), broadcast_message = %{ id: id, topic: topic, event: event, private: private, payload: Jason.Fragment.new(payload) }, :ok <- BatchBroadcast.broadcast(nil, tenant, %{messages: [broadcast_message]}, true) do latency_inserted_at = NaiveDateTime.utc_now(:microsecond) |> NaiveDateTime.diff(inserted_at, :microsecond) Telemetry.execute( [:realtime, :tenants, :broadcast_from_database], %{latency_committed_at: latency_committed_at, latency_inserted_at: latency_inserted_at}, %{tenant: tenant_id} ) {:noreply, state} else {:error, %Ecto.Changeset{valid?: false} = changeset} -> error = Ecto.Changeset.traverse_errors(changeset, &elem(&1, 0)) log_error("UnableToBroadcastChanges", error) {:noreply, state} {:error, error} -> log_error("UnableToBroadcastChanges", error) {:noreply, state} _ -> {:noreply, state} end rescue e -> log_error("UnableToBroadcastChanges", e) {:noreply, state} catch e -> log_error("UnableToBroadcastChanges", e) {:noreply, state} end defp handle_message(_, state), do: {:noreply, state} @impl true def handle_disconnect(state) do Logger.warning("Disconnecting broadcast changes handler in the step : #{inspect(state.step)}") {:noreply, %{state | step: :disconnected}} end @spec supervisor_spec(Tenant.t()) :: term() def supervisor_spec(%Tenant{external_id: tenant_id}) do {:via, PartitionSupervisor, {__MODULE__.DynamicSupervisor, tenant_id}} end def publication_name(schema, table) do "supabase_#{schema}_#{table}_publication" end def replication_slot_name(schema, table) do "supabase_#{schema}_#{table}_replication_slot_#{slot_suffix()}" end defp slot_suffix, do: Application.get_env(:realtime, :slot_name_suffix) defp tuple_to_map(tuple_data, columns) do tuple_data |> Tuple.to_list() |> Enum.zip(columns) |> Map.new(fn {nil, %{name: name}} -> {name, nil} {value, %{name: name, type: "bool"}} -> {name, value} {value, %{name: name}} -> {name, value} end) end defp get_or_error(map, key, error_type) do case Map.get(map, key) do nil -> {:error, error_type} value -> {:ok, value} end end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116024918_create_realtime_subscription_table.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeSubscriptionTable do @moduledoc false use Ecto.Migration def change do execute(""" DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'equality_op') THEN CREATE TYPE realtime.equality_op AS ENUM( 'eq', 'neq', 'lt', 'lte', 'gt', 'gte' ); END IF; END$$; """) execute(""" DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_defined_filter') THEN CREATE TYPE realtime.user_defined_filter as ( column_name text, op realtime.equality_op, value text ); END IF; END$$; """) execute("create table if not exists realtime.subscription ( -- Tracks which users are subscribed to each table id bigint not null generated always as identity, user_id uuid not null, -- Populated automatically by trigger. Required to enable auth.email() email varchar(255), entity regclass not null, filters realtime.user_defined_filter[] not null default '{}', created_at timestamp not null default timezone('utc', now()), constraint pk_subscription primary key (id), unique (entity, user_id, filters) )") execute("create index if not exists ix_realtime_subscription_entity on realtime.subscription using hash (entity)") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116045059_create_realtime_check_filters_trigger.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeCheckFiltersTrigger do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that 'authenticated' may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass = new.entity and pg_catalog.has_column_privilege('authenticated', new.entity, c.column_name, 'SELECT'); filter realtime.user_defined_filter; col_type text; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name )::text; if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- raises an exception if value is not coercable to type perform format('select %s::%I', filter.value, col_type); end loop; -- Apply consistent order to filters so the unique constraint on -- (user_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$;") execute("create trigger tr_check_filters before insert or update on realtime.subscription for each row execute function realtime.subscription_check_filters();") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116050929_create_realtime_quote_wal2json_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeQuoteWal2jsonFunction do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.quote_wal2json(entity regclass) returns text language sql immutable strict as $$ select ( select string_agg('\' || ch,'') from unnest(string_to_array(nsp.nspname::text, null)) with ordinality x(ch, idx) where not (x.idx = 1 and x.ch = '\"') and not ( x.idx = array_length(string_to_array(nsp.nspname::text, null), 1) and x.ch = '\"' ) ) || '.' || ( select string_agg('\' || ch,'') from unnest(string_to_array(pc.relname::text, null)) with ordinality x(ch, idx) where not (x.idx = 1 and x.ch = '\"') and not ( x.idx = array_length(string_to_array(nsp.nspname::text, null), 1) and x.ch = '\"' ) ) from pg_class pc join pg_namespace nsp on pc.relnamespace = nsp.oid where pc.oid = entity $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116051442_create_realtime_check_equality_op_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeCheckEqualityOpFunction do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.check_equality_op( op realtime.equality_op, type_ regtype, val_1 text, val_2 text ) returns bool immutable language plpgsql as $$ /* Casts *val_1* and *val_2* as type *type_* and check the *op* condition for truthiness */ declare op_symbol text = ( case when op = 'eq' then '=' when op = 'neq' then '!=' when op = 'lt' then '<' when op = 'lte' then '<=' when op = 'gt' then '>' when op = 'gte' then '>=' else 'UNKNOWN OP' end ); res boolean; begin execute format('select %L::'|| type_::text || ' ' || op_symbol || ' %L::'|| type_::text, val_1, val_2) into res; return res; end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116212300_create_realtime_build_prepared_statement_sql_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeBuildPreparedStatementSqlFunction do @moduledoc false use Ecto.Migration def change do execute(""" DO $$ DECLARE type_oid oid; BEGIN SELECT oid INTO type_oid FROM pg_type WHERE typname = 'wal_column' AND typnamespace = 'realtime'::regnamespace; -- Drop if it exists without the legacy 'type' column (e.g. pre-initialized by supabase-postgres) IF type_oid IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM pg_attribute WHERE attrelid = (SELECT typrelid FROM pg_type WHERE oid = type_oid) AND attname = 'type' ) THEN DROP TYPE realtime.wal_column CASCADE; type_oid := NULL; END IF; IF type_oid IS NULL THEN CREATE TYPE realtime.wal_column AS ( name text, type text, value jsonb, is_pkey boolean, is_selectable boolean ); END IF; END$$; """) execute("create or replace function realtime.build_prepared_statement_sql( prepared_statement_name text, entity regclass, columns realtime.wal_column[] ) returns text language sql as $$ /* Builds a sql string that, if executed, creates a prepared statement to tests retrive a row from *entity* by its primary key columns. Example select realtime.build_prepared_statment_sql('public.notes', '{\"id\"}'::text[], '{\"bigint\"}'::text[]) */ select 'prepare ' || prepared_statement_name || ' as select exists( select 1 from ' || entity || ' where ' || string_agg(quote_ident(pkc.name) || '=' || quote_nullable(pkc.value) , ' and ') || ' )' from unnest(columns) pkc where pkc.is_pkey group by entity $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116213355_create_realtime_cast_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeCastFunction do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.cast(val text, type_ regtype) returns jsonb immutable language plpgsql as $$ declare res jsonb; begin execute format('select to_jsonb(%L::'|| type_::text || ')', val) into res; return res; end $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116213934_create_realtime_is_visible_through_filters_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeIsVisibleThroughFiltersFunction do @moduledoc false use Ecto.Migration def change do execute( "create or replace function realtime.is_visible_through_filters(columns realtime.wal_column[], filters realtime.user_defined_filter[]) returns bool language sql immutable as $$ /* Should the record be visible (true) or filtered out (false) after *filters* are applied */ select -- Default to allowed when no filters present coalesce( sum( realtime.check_equality_op( op:=f.op, type_:=col.type::regtype, -- cast jsonb to text val_1:=col.value #>> '{}', val_2:=f.value )::int ) = count(1), true ) from unnest(filters) f join unnest(columns) col on f.column_name = col.name; $$;" ) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211116214523_create_realtime_apply_rls_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeApplyRlsFunction do @moduledoc false use Ecto.Migration def change do execute(""" DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'action') THEN CREATE TYPE realtime.action AS ENUM ( 'INSERT', 'UPDATE', 'DELETE', 'TRUNCATE', 'ERROR' ); END IF; END$$; """) execute(""" DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'wal_rls') THEN CREATE TYPE realtime.wal_rls AS ( wal jsonb, is_rls_enabled boolean, users uuid[], errors text[] ); END IF; END$$; """) execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' when 'T' then 'TRUNCATE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; -- Subscription vars user_id uuid; email varchar(255); user_has_access bool; is_visible_to_user boolean; visible_to_user_ids uuid[] = '{}'; -- user subscriptions to the wal record's table subscriptions realtime.subscription[] = array_agg(sub) from realtime.subscription sub where sub.entity = entity_; -- structured info for wal's columns columns realtime.wal_column[] = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, pg_catalog.has_column_privilege('authenticated', entity_, x->>'name', 'SELECT') )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); -- previous identity values for update/delete old_columns realtime.wal_column[] = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, pg_catalog.has_column_privilege('authenticated', entity_, x->>'name', 'SELECT') )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); output jsonb; -- Error states error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; error_unauthorized boolean = not pg_catalog.has_any_column_privilege('authenticated', entity_, 'SELECT'); errors text[] = case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}'::text[] end; begin -- The 'authenticated' user does not have SELECT permission on any of the columns for the entity_ if error_unauthorized is true then return ( null, null, visible_to_user_ids, array['Error 401: Unauthorized'] )::realtime.wal_rls; end if; ------------------------------- -- Build Output JSONB Object -- ------------------------------- output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', (wal ->> 'timestamp')::text::timestamp with time zone, 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege('authenticated', entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; if action in ('TRUNCATE', 'DELETE') then visible_to_user_ids = array_agg(s.user_id) from unnest(subscriptions) s; else -- If RLS is on and someone is subscribed to the table prep if is_rls_enabled and array_length(subscriptions, 1) > 0 then perform set_config('role', 'authenticated', true), set_config('request.jwt.claim.role', 'authenticated', true); if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; -- For each subscribed user for user_id, email, is_visible_to_user in ( select subs.user_id, subs.email, realtime.is_visible_through_filters(columns, subs.filters) from unnest(subscriptions) subs ) loop if is_visible_to_user then -- If RLS is off, add to visible users if not is_rls_enabled then visible_to_user_ids = visible_to_user_ids || user_id; else -- Check if RLS allows the user to see the record perform set_config('request.jwt.claim.sub', user_id::text, true), set_config('request.jwt.claim.email', email::text, true); execute 'execute walrus_rls_stmt' into user_has_access; if user_has_access then visible_to_user_ids = visible_to_user_ids || user_id; end if; end if; end if; end loop; perform ( set_config('role', null, true) ); end if; return ( output, is_rls_enabled, visible_to_user_ids, errors )::realtime.wal_rls; end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211122062447_grant_realtime_usage_to_authenticated_role.ex ================================================ defmodule Realtime.Tenants.Migrations.GrantRealtimeUsageToAuthenticatedRole do @moduledoc false use Ecto.Migration def change do execute("grant usage on schema realtime to authenticated;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211124070109_enable_realtime_apply_rls_function_postgrest_9_compatibility.ex ================================================ defmodule Realtime.Tenants.Migrations.EnableRealtimeApplyRlsFunctionPostgrest9Compatibility do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' when 'T' then 'TRUNCATE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; -- Subscription vars user_id uuid; email varchar(255); user_has_access bool; is_visible_to_user boolean; visible_to_user_ids uuid[] = '{}'; -- user subscriptions to the wal record's table subscriptions realtime.subscription[] = array_agg(sub) from realtime.subscription sub where sub.entity = entity_; -- structured info for wal's columns columns realtime.wal_column[] = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, pg_catalog.has_column_privilege('authenticated', entity_, x->>'name', 'SELECT') )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); -- previous identity values for update/delete old_columns realtime.wal_column[] = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, pg_catalog.has_column_privilege('authenticated', entity_, x->>'name', 'SELECT') )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); output jsonb; -- Error states error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; error_unauthorized boolean = not pg_catalog.has_any_column_privilege('authenticated', entity_, 'SELECT'); errors text[] = case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}'::text[] end; begin -- The 'authenticated' user does not have SELECT permission on any of the columns for the entity_ if error_unauthorized is true then return ( null, null, visible_to_user_ids, array['Error 401: Unauthorized'] )::realtime.wal_rls; end if; ------------------------------- -- Build Output JSONB Object -- ------------------------------- output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', (wal ->> 'timestamp')::text::timestamp with time zone, 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege('authenticated', entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; if action in ('TRUNCATE', 'DELETE') then visible_to_user_ids = array_agg(s.user_id) from unnest(subscriptions) s; else -- If RLS is on and someone is subscribed to the table prep if is_rls_enabled and array_length(subscriptions, 1) > 0 then perform set_config('role', 'authenticated', true); if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; -- For each subscribed user for user_id, email, is_visible_to_user in ( select subs.user_id, subs.email, realtime.is_visible_through_filters(columns, subs.filters) from unnest(subscriptions) subs ) loop if is_visible_to_user then -- If RLS is off, add to visible users if not is_rls_enabled then visible_to_user_ids = visible_to_user_ids || user_id; else -- Check if RLS allows the user to see the record perform set_config( 'request.jwt.claims', jsonb_build_object( 'sub', user_id::text, 'email', email::text, 'role', 'authenticated' )::text, true ); execute 'execute walrus_rls_stmt' into user_has_access; if user_has_access then visible_to_user_ids = visible_to_user_ids || user_id; end if; end if; end if; end loop; perform ( set_config('role', null, true) ); end if; return ( output, is_rls_enabled, visible_to_user_ids, errors )::realtime.wal_rls; end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211202204204_update_realtime_subscription_check_filters_function_security.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateRealtimeSubscriptionCheckFiltersFunctionSecurity do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that 'authenticated' may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass = new.entity and pg_catalog.has_column_privilege('authenticated', new.entity, c.column_name, 'SELECT'); filter realtime.user_defined_filter; col_type regtype; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name ); if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- raises an exception if value is not coercable to type perform realtime.cast(filter.value, col_type); end loop; -- Apply consistent order to filters so the unique constraint on -- (user_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211202204605_update_realtime_build_prepared_statement_sql_function_for_compatibility_with_all_types.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateRealtimeBuildPreparedStatementSqlFunctionForCompatibilityWithAllTypes do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.build_prepared_statement_sql( prepared_statement_name text, entity regclass, columns realtime.wal_column[] ) returns text language sql as $$ /* Builds a sql string that, if executed, creates a prepared statement to tests retrive a row from *entity* by its primary key columns. Example select realtime.build_prepared_statment_sql('public.notes', '{\"id\"}'::text[], '{\"bigint\"}'::text[]) */ select 'prepare ' || prepared_statement_name || ' as select exists( select 1 from ' || entity || ' where ' || string_agg(quote_ident(pkc.name) || '=' || quote_nullable(pkc.value #>> '{}') , ' and ') || ' )' from unnest(columns) pkc where pkc.is_pkey group by entity $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211210212804_enable_generic_subscription_claims.ex ================================================ defmodule Realtime.Tenants.Migrations.EnableGenericSubscriptionClaims do @moduledoc false use Ecto.Migration def change do execute("truncate table realtime.subscription restart identity") execute("alter table realtime.subscription drop constraint subscription_entity_user_id_filters_key cascade, drop column email cascade, drop column created_at cascade") execute("alter table realtime.subscription rename user_id to subscription_id") execute("create or replace function realtime.to_regrole(role_name text) returns regrole immutable language sql -- required to allow use in generated clause as $$ select role_name::regrole $$;") execute("alter table realtime.subscription add column claims jsonb not null, add column claims_role regrole not null generated always as (realtime.to_regrole(claims ->> 'role')) stored, add column created_at timestamp not null default timezone('utc', now())") execute( "create unique index subscription_subscription_id_entity_filters_key on realtime.subscription (subscription_id, entity, filters)" ) execute("revoke usage on schema realtime from authenticated;") execute("revoke all on realtime.subscription from authenticated;") execute("create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that the claimed role may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where format('%I.%I', c.table_schema, c.table_name)::regclass = new.entity and pg_catalog.has_column_privilege((new.claims ->> 'role'), new.entity, c.column_name, 'SELECT'); filter realtime.user_defined_filter; col_type regtype; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name ); if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- raises an exception if value is not coercable to type perform realtime.cast(filter.value, col_type); end loop; -- Apply consistent order to filters so the unique constraint on -- (subscription_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$;") execute("alter type realtime.wal_rls rename attribute users to subscription_ids cascade;") execute("drop function realtime.apply_rls(jsonb, integer);") execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', (wal ->> 'timestamp')::text::timestamp with time zone, 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20211228014915_add_wal_payload_on_errors_in_apply_rls_function.ex ================================================ defmodule Realtime.Tenants.Migrations.AddWalPayloadOnErrorsInApplyRlsFunction do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', (wal ->> 'timestamp')::text::timestamp with time zone, 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220107221237_update_change_timestamp_to_iso_8601_zulu_format.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateChangeTimestampToIso8601ZuluFormat do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220228202821_update_subscription_check_filters_function_dynamic_table_name.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateSubscriptionCheckFiltersFunctionDynamicTableName do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that the claimed role may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where format('%I.%I', c.table_schema, c.table_name)::regclass = new.entity and pg_catalog.has_column_privilege( (new.claims ->> 'role'), format('%I.%I', c.table_schema, c.table_name)::regclass, c.column_name, 'SELECT' ); filter realtime.user_defined_filter; col_type regtype; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name ); if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- raises an exception if value is not coercable to type perform realtime.cast(filter.value, col_type); end loop; -- Apply consistent order to filters so the unique constraint on -- (subscription_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220312004840_update_apply_rls_function_to_apply_iso_8601.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateApplyRlsFunctionToApplyIso8601 do @moduledoc false use Ecto.Migration def change do execute("create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', realtime.cast((x->'value') #>> '{}', (x->>'type')::regtype), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( null, is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$;") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220603231003_add_quoted_regtypes_support.ex ================================================ defmodule Realtime.Tenants.Migrations.AddQuotedRegtypesSupport do @moduledoc false use Ecto.Migration def change do execute("drop type if exists realtime.wal_column cascade;") execute(""" DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'wal_column') THEN CREATE TYPE realtime.wal_column AS ( name text, type_name text, type_oid oid, value jsonb, is_pkey boolean, is_selectable boolean ); END IF; END$$; """) execute(" create or replace function realtime.is_visible_through_filters(columns realtime.wal_column[], filters realtime.user_defined_filter[]) returns bool language sql immutable as $$ /* Should the record be visible (true) or filtered out (false) after *filters* are applied */ select -- Default to allowed when no filters present coalesce( sum( realtime.check_equality_op( op:=f.op, type_:=col.type_oid::regtype, -- cast jsonb to text val_1:=col.value #>> '{}', val_2:=f.value )::int ) = count(1), true ) from unnest(filters) f join unnest(columns) col on f.column_name = col.name; $$;") execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', (x->>'typeoid')::regtype ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', (x->>'typeoid')::regtype ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when error_record_exceeds_max_size then jsonb_build_object('record', '{}'::jsonb) when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when error_record_exceeds_max_size then jsonb_build_object('old_record', '{}'::jsonb) when action in ('UPDATE', 'DELETE') then jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220603232444_add_output_for_data_less_than_equal_64_bytes_when_payload_too_large.ex ================================================ defmodule Realtime.Tenants.Migrations.AddOutputForDataLessThanEqual64BytesWhenPayloadTooLarge do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', (x->>'typeoid')::regtype ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', (x->>'typeoid')::regtype ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then case when error_record_exceeds_max_size then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) end else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action in ('UPDATE', 'DELETE') then case when error_record_exceeds_max_size then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) end else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220615214548_add_quoted_regtypes_backward_compatibility_support.ex ================================================ defmodule Realtime.Tenants.Migrations.AddQuotedRegtypesBackwardCompatibilitySupport do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.is_visible_through_filters(columns realtime.wal_column[], filters realtime.user_defined_filter[]) returns bool language sql immutable as $$ /* Should the record be visible (true) or filtered out (false) after *filters* are applied */ select -- Default to allowed when no filters present coalesce( sum( realtime.check_equality_op( op:=f.op, type_:=coalesce( col.type_oid::regtype, -- null when wal2json version <= 2.4 col.type_name::regtype ), -- cast jsonb to text val_1:=col.value #>> '{}', val_2:=f.value )::int ) = count(1), true ) from unnest(filters) f join unnest(columns) col on f.column_name = col.name; $$; ") execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then case when error_record_exceeds_max_size then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) end else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action in ('UPDATE', 'DELETE') then case when error_record_exceeds_max_size then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) end else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and realtime.is_visible_through_filters(columns, subs.filters) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220712093339_recreate_realtime_build_prepared_statement_sql_function.ex ================================================ defmodule Realtime.Tenants.Migrations.RecreateRealtimeBuildPreparedStatementSqlFunction do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.build_prepared_statement_sql( prepared_statement_name text, entity regclass, columns realtime.wal_column[] ) returns text language sql as $$ /* Builds a sql string that, if executed, creates a prepared statement to tests retrive a row from *entity* by its primary key columns. Example select realtime.build_prepared_statement_sql('public.notes', '{\"id\"}'::text[], '{\"bigint\"}'::text[]) */ select 'prepare ' || prepared_statement_name || ' as select exists( select 1 from ' || entity || ' where ' || string_agg(quote_ident(pkc.name) || '=' || quote_nullable(pkc.value #>> '{}') , ' and ') || ' )' from unnest(columns) pkc where pkc.is_pkey group by entity $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220908172859_null_passes_filters_recreate_is_visible_through_filters.ex ================================================ defmodule Realtime.Tenants.Migrations.NullPassesFiltersRecreateIsVisibleThroughFilters do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.is_visible_through_filters(columns realtime.wal_column[], filters realtime.user_defined_filter[]) returns bool language sql immutable as $$ /* Should the record be visible (true) or filtered out (false) after *filters* are applied */ select -- Default to allowed when no filters present $2 is null -- no filters. this should not happen because subscriptions has a default or array_length($2, 1) is null -- array length of an empty array is null or bool_and( coalesce( realtime.check_equality_op( op:=f.op, type_:=coalesce( col.type_oid::regtype, -- null when wal2json version <= 2.4 col.type_name::regtype ), -- cast jsonb to text val_1:=col.value #>> '{}', val_2:=f.value ), false -- if null, filter does not match ) ) from unnest(filters) f join unnest(columns) col on f.column_name = col.name; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20220916233421_update_apply_rls_function_to_pass_through_delete_events_on_filter.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateApplyRlsFunctionToPassThroughDeleteEventsOnFilter do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then case when error_record_exceeds_max_size then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'record', (select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable) ) end else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action in ('UPDATE', 'DELETE') then case when error_record_exceeds_max_size then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and (octet_length((c).value::text) <= 64) ) ) else jsonb_build_object( 'old_record', (select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable) ) end else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230119133233_millisecond_precision_for_walrus.ex ================================================ defmodule Realtime.Tenants.Migrations.MillisecondPrecisionForWalrus do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS.MS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230128025114_add_in_op_to_filters.ex ================================================ defmodule Realtime.Tenants.Migrations.AddInOpToFilters do @moduledoc false use Ecto.Migration def change do execute("alter type realtime.equality_op add value 'in';") execute(" create or replace function realtime.check_equality_op( op realtime.equality_op, type_ regtype, val_1 text, val_2 text ) returns bool immutable language plpgsql as $$ /* Casts *val_1* and *val_2* as type *type_* and check the *op* condition for truthiness */ declare op_symbol text = ( case when op = 'eq' then '=' when op = 'neq' then '!=' when op = 'lt' then '<' when op = 'lte' then '<=' when op = 'gt' then '>' when op = 'gte' then '>=' when op = 'in' then '= any' else 'UNKNOWN OP' end ); res boolean; begin execute format( 'select %L::'|| type_::text || ' ' || op_symbol || ' ( %L::' || ( case when op = 'in' then type_::text || '[]' else type_::text end ) || ')', val_1, val_2) into res; return res; end; $$; ") execute(" create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that the claimed role may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where format('%I.%I', c.table_schema, c.table_name)::regclass = new.entity and pg_catalog.has_column_privilege( (new.claims ->> 'role'), format('%I.%I', c.table_schema, c.table_name)::regclass, c.column_name, 'SELECT' ); filter realtime.user_defined_filter; col_type regtype; in_val jsonb; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name ); if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- Set maximum number of entries for in filter if filter.op = 'in'::realtime.equality_op then in_val = realtime.cast(filter.value, (col_type::text || '[]')::regtype); if coalesce(jsonb_array_length(in_val), 0) > 100 then raise exception 'too many values for `in` filter. Maximum 100'; end if; end if; -- raises an exception if value is not coercable to type perform realtime.cast(filter.value, col_type); end loop; -- Apply consistent order to filters so the unique constraint on -- (subscription_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230128025212_enable_filtering_on_delete_record.ex ================================================ defmodule Realtime.Tenants.Migrations.EnableFilteringOnDeleteRecord do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( (wal ->> 'timestamp')::timestamptz, 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230227211149_update_subscription_check_filters_for_in_filter_non_text_types.ex ================================================ defmodule Realtime.Tenants.Migrations.UpdateSubscriptionCheckFiltersForInFilterNonTextTypes do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.subscription_check_filters() returns trigger language plpgsql as $$ /* Validates that the user defined filters for a subscription: - refer to valid columns that the claimed role may access - values are coercable to the correct column type */ declare col_names text[] = coalesce( array_agg(c.column_name order by c.ordinal_position), '{}'::text[] ) from information_schema.columns c where format('%I.%I', c.table_schema, c.table_name)::regclass = new.entity and pg_catalog.has_column_privilege( (new.claims ->> 'role'), format('%I.%I', c.table_schema, c.table_name)::regclass, c.column_name, 'SELECT' ); filter realtime.user_defined_filter; col_type regtype; in_val jsonb; begin for filter in select * from unnest(new.filters) loop -- Filtered column is valid if not filter.column_name = any(col_names) then raise exception 'invalid column for filter %', filter.column_name; end if; -- Type is sanitized and safe for string interpolation col_type = ( select atttypid::regtype from pg_catalog.pg_attribute where attrelid = new.entity and attname = filter.column_name ); if col_type is null then raise exception 'failed to lookup type for column %', filter.column_name; end if; -- Set maximum number of entries for in filter if filter.op = 'in'::realtime.equality_op then in_val = realtime.cast(filter.value, (col_type::text || '[]')::regtype); if coalesce(jsonb_array_length(in_val), 0) > 100 then raise exception 'too many values for `in` filter. Maximum 100'; end if; else -- raises an exception if value is not coercable to type perform realtime.cast(filter.value, col_type); end if; end loop; -- Apply consistent order to filters so the unique constraint on -- (subscription_id, entity, filters) can't be tricked by a different filter order new.filters = coalesce( array_agg(f order by f.column_name, f.op, f.value), '{}' ) from unnest(new.filters) f; return new; end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230228184745_convert_commit_timestamp_to_utc.ex ================================================ defmodule Realtime.Tenants.Migrations.ConvertCommitTimestampToUtc do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD\"T\"HH24:MI:SS.MS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg((c).name, (c).value) from unnest(columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230308225145_output_full_record_when_unchanged_toast.ex ================================================ defmodule Realtime.Tenants.Migrations.OutputFullRecordWhenUnchangedToast do @moduledoc false use Ecto.Migration def change do execute(" create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD\"T\"HH24:MI:SS.MS\"Z\"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add \"record\" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg( -- if unchanged toast, get column name and value from old record coalesce((c).name, (oc).name), case when (c).name is null then (oc).value else (c).value end ) from unnest(columns) c full outer join unnest(old_columns) oc on (c).name = (oc).name where coalesce((c).is_selectable, (oc).is_selectable) and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add \"old_record\" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform set_config('role', working_role::text, true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; ") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20230328144023_create_list_changes_function.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateListChangesFunction do @moduledoc false use Ecto.Migration def change do execute( "create or replace function realtime.list_changes(publication name, slot_name name, max_changes int, max_record_bytes int) returns setof realtime.wal_rls language sql set log_min_messages to 'fatal' as $$ with pub as ( select concat_ws( ',', case when bool_or(pubinsert) then 'insert' else null end, case when bool_or(pubupdate) then 'update' else null end, case when bool_or(pubdelete) then 'delete' else null end ) as w2j_actions, coalesce( string_agg( realtime.quote_wal2json(format('%I.%I', schemaname, tablename)::regclass), ',' ) filter (where ppt.tablename is not null and ppt.tablename not like '% %'), '' ) w2j_add_tables from pg_publication pp left join pg_publication_tables ppt on pp.pubname = ppt.pubname where pp.pubname = publication group by pp.pubname limit 1 ), w2j as ( select x.*, pub.w2j_add_tables from pub, pg_logical_slot_get_changes( slot_name, null, max_changes, 'include-pk', 'true', 'include-transaction', 'false', 'include-timestamp', 'true', 'include-type-oids', 'true', 'format-version', '2', 'actions', pub.w2j_actions, 'add-tables', pub.w2j_add_tables ) x ) select xyz.wal, xyz.is_rls_enabled, xyz.subscription_ids, xyz.errors from w2j, realtime.apply_rls( wal := w2j.data::jsonb, max_record_bytes := max_record_bytes ) xyz(wal, is_rls_enabled, subscription_ids, errors) where w2j.w2j_add_tables <> '' and xyz.subscription_ids[1] is not null $$;" ) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20231018144023_create_channels.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateChannels do @moduledoc false use Ecto.Migration def change do create table(:channels, prefix: "realtime") do add(:name, :string, null: false) timestamps() end create unique_index(:channels, [:name], prefix: "realtime") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20231204144023_set_required_grants.ex ================================================ defmodule Realtime.Tenants.Migrations.SetRequiredGrants do @moduledoc false use Ecto.Migration def change do execute(""" GRANT USAGE ON SCHEMA realtime TO postgres, anon, authenticated, service_role """) execute(""" GRANT SELECT ON ALL TABLES IN SCHEMA realtime TO postgres, anon, authenticated, service_role """) execute(""" GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA realtime TO postgres, anon, authenticated, service_role """) execute(""" GRANT USAGE ON ALL SEQUENCES IN SCHEMA realtime TO postgres, anon, authenticated, service_role """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20231204144024_create_rls_helper_functions.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRlsHelperFunctions do @moduledoc false use Ecto.Migration def change do execute(""" create or replace function realtime.channel_name() returns text as $$ select nullif(current_setting('realtime.channel_name', true), '')::text; $$ language sql stable; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20231204144025_enable_channels_rls.ex ================================================ defmodule Realtime.Tenants.Migrations.EnableChannelsRls do @moduledoc false use Ecto.Migration def change do execute("ALTER TABLE realtime.channels ENABLE row level security") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240108234812_add_channels_column_for_write_check.ex ================================================ defmodule Realtime.Tenants.Migrations.AddChannelsColumnForWriteCheck do @moduledoc false use Ecto.Migration def change do alter table(:channels, prefix: "realtime") do add :check, :boolean, default: false end end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240109165339_add_update_grant_to_channels.ex ================================================ defmodule Realtime.Tenants.Migrations.AddUpdateGrantToChannels do @moduledoc false use Ecto.Migration def change do execute(""" GRANT UPDATE ON realtime.channels TO postgres, anon, authenticated, service_role """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240227174441_add_broadcast_permissions_table.ex ================================================ defmodule Realtime.Tenants.Migrations.AddBroadcastsPoliciesTable do @moduledoc false use Ecto.Migration def change do create table(:broadcasts) do add :channel_id, references(:channels, on_delete: :delete_all), null: false add :check, :boolean, default: false, null: false timestamps() end create unique_index(:broadcasts, :channel_id) execute("ALTER TABLE realtime.broadcasts ENABLE row level security") execute("GRANT SELECT ON realtime.broadcasts TO postgres, anon, authenticated, service_role") execute("GRANT UPDATE ON realtime.broadcasts TO postgres, anon, authenticated, service_role") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240311171622_add_insert_and_delete_grant_to_channels.ex ================================================ defmodule Realtime.Tenants.Migrations.AddInsertAndDeleteGrantToChannels do @moduledoc false use Ecto.Migration def change do execute(""" GRANT INSERT, DELETE ON realtime.channels TO postgres, anon, authenticated, service_role """) execute(""" GRANT INSERT ON realtime.broadcasts TO postgres, anon, authenticated, service_role """) execute(""" GRANT USAGE ON SEQUENCE realtime.broadcasts_id_seq TO postgres, anon, authenticated, service_role """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240321100241_add_presences_permissions_table.ex ================================================ defmodule Realtime.Tenants.Migrations.AddPresencesPoliciesTable do @moduledoc false use Ecto.Migration def change do create table(:presences) do add :channel_id, references(:channels, on_delete: :delete_all), null: false add :check, :boolean, default: false, null: false timestamps() end create unique_index(:presences, :channel_id) execute("ALTER TABLE realtime.presences ENABLE row level security") execute("GRANT SELECT ON realtime.presences TO postgres, anon, authenticated, service_role") execute("GRANT UPDATE ON realtime.presences TO postgres, anon, authenticated, service_role") execute(""" GRANT INSERT ON realtime.presences TO postgres, anon, authenticated, service_role """) execute(""" GRANT USAGE ON SEQUENCE realtime.presences_id_seq TO postgres, anon, authenticated, service_role """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240401105812_create_realtime_admin_and_move_ownership.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateRealtimeAdminAndMoveOwnership do @moduledoc false use Ecto.Migration def change do execute(""" DO $do$ BEGIN IF EXISTS ( SELECT FROM pg_catalog.pg_roles WHERE rolname = 'supabase_realtime_admin') THEN RAISE NOTICE 'Role "supabase_realtime_admin" already exists. Skipping.'; ELSE CREATE ROLE supabase_realtime_admin WITH NOINHERIT NOLOGIN NOREPLICATION; END IF; END $do$; """) execute("GRANT ALL PRIVILEGES ON SCHEMA realtime TO supabase_realtime_admin") execute("GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA realtime TO supabase_realtime_admin") execute("GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA realtime TO supabase_realtime_admin") execute("GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA realtime TO supabase_realtime_admin") execute("ALTER table realtime.channels OWNER to supabase_realtime_admin") execute("ALTER table realtime.broadcasts OWNER to supabase_realtime_admin") execute("ALTER table realtime.presences OWNER TO supabase_realtime_admin") execute("ALTER function realtime.channel_name() owner to supabase_realtime_admin") execute("GRANT supabase_realtime_admin TO postgres") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240418121054_remove_check_columns.ex ================================================ defmodule Realtime.Tenants.Migrations.RemoveCheckColumns do @moduledoc false use Ecto.Migration def change do alter table(:channels) do remove :check end alter table(:broadcasts) do remove :check end alter table(:presences) do remove :check end end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240523004032_redefine_authorization_tables.ex ================================================ defmodule Realtime.Tenants.Migrations.RedefineAuthorizationTables do @moduledoc false use Ecto.Migration def change do drop table(:broadcasts), mode: :cascade drop table(:presences), mode: :cascade drop table(:channels), mode: :cascade create_if_not_exists table(:messages) do add :topic, :text, null: false add :extension, :text, null: false timestamps() end create_if_not_exists index(:messages, [:topic]) execute("ALTER TABLE realtime.messages ENABLE row level security") execute("GRANT SELECT ON realtime.messages TO postgres, anon, authenticated, service_role") execute("GRANT UPDATE ON realtime.messages TO postgres, anon, authenticated, service_role") execute(""" GRANT INSERT ON realtime.messages TO postgres, anon, authenticated, service_role """) execute(""" GRANT USAGE ON SEQUENCE realtime.messages_id_seq TO postgres, anon, authenticated, service_role """) execute("ALTER table realtime.messages OWNER to supabase_realtime_admin") execute(""" DROP function realtime.channel_name """) execute(""" create or replace function realtime.topic() returns text as $$ select nullif(current_setting('realtime.topic', true), '')::text; $$ language sql stable; """) execute("ALTER function realtime.topic() owner to supabase_realtime_admin") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240618124746_fix_walrus_role_handling.ex ================================================ defmodule Realtime.Tenants.Migrations.FixWalrusRoleHandling do @moduledoc false use Ecto.Migration def change do execute """ create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role::text) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add "record" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg( -- if unchanged toast, get column name and value from old record coalesce((c).name, (oc).name), case when (c).name is null then (oc).value else (c).value end ) from unnest(columns) c full outer join unnest(old_columns) oc on (c).name = (oc).name where coalesce((c).is_selectable, (oc).is_selectable) and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add "old_record" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or action = 'DELETE' ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform -- Trim leading and trailing quotes from working_role because set_config -- doesn't recognize the role as valid if they are included set_config('role', trim(both '"' from working_role::text), true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240801235015_unlogged_messages_table.ex ================================================ defmodule Realtime.Tenants.Migrations.UnloggedMessagesTable do @moduledoc false use Ecto.Migration def change do execute """ -- Commented to have oriole compatability -- ALTER TABLE realtime.messages SET UNLOGGED; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240805133720_logged_messages_table.ex ================================================ defmodule Realtime.Tenants.Migrations.LoggedMessagesTable do @moduledoc false use Ecto.Migration def change do execute """ -- Commented to have oriole compatability -- ALTER TABLE realtime.messages SET LOGGED; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240827160934_filter_delete_postgres_changes.ex ================================================ defmodule Realtime.Tenants.Migrations.FilterDeletePostgresChanges do @moduledoc false use Ecto.Migration def change do execute """ create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role::text) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add "record" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg( -- if unchanged toast, get column name and value from old record coalesce((c).name, (oc).name), case when (c).name is null then (oc).value else (c).value end ) from unnest(columns) c full outer join unnest(old_columns) oc on (c).name = (oc).name where coalesce((c).is_selectable, (oc).is_selectable) and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add "old_record" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or ( action = 'DELETE' and realtime.is_visible_through_filters(old_columns, subs.filters) ) ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform -- Trim leading and trailing quotes from working_role because set_config -- doesn't recognize the role as valid if they are included set_config('role', trim(both '"' from working_role::text), true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240919163303_add_payload_to_messages.ex ================================================ defmodule Realtime.Tenants.Migrations.AddPayloadToMessages do @moduledoc false use Ecto.Migration def change do alter table(:messages) do add_if_not_exists :payload, :map add_if_not_exists :event, :text add_if_not_exists :topic, :text add_if_not_exists :private, :boolean, default: true modify :inserted_at, :utc_datetime, default: fragment("now()") modify :updated_at, :utc_datetime, default: fragment("now()") end execute """ CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true) RETURNS void AS $$ BEGIN INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); END; $$ LANGUAGE plpgsql; """ execute """ CREATE OR REPLACE FUNCTION realtime.broadcast_changes (topic_name text, event_name text, operation text, table_name text, table_schema text, NEW record, OLD record, level text DEFAULT 'ROW') RETURNS void AS $$ DECLARE -- Declare a variable to hold the JSONB representation of the row row_data jsonb := '{}'::jsonb; BEGIN IF level = 'STATEMENT' THEN RAISE EXCEPTION 'function can only be triggered for each row, not for each statement'; END IF; -- Check the operation type and handle accordingly IF operation = 'INSERT' OR operation = 'UPDATE' OR operation = 'DELETE' THEN row_data := jsonb_build_object('old_record', OLD, 'record', NEW, 'operation', operation, 'table', table_name, 'schema', table_schema); PERFORM realtime.send (row_data, event_name, topic_name); ELSE RAISE EXCEPTION 'Unexpected operation type: %', operation; END IF; EXCEPTION WHEN OTHERS THEN RAISE EXCEPTION 'Failed to process the row: %', SQLERRM; END; $$ LANGUAGE plpgsql; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20240919163305_change_messages_id_type.ex ================================================ defmodule Realtime.Tenants.Migrations.ChangeMessagesIdType do @moduledoc false use Ecto.Migration def change do alter table(:messages) do add_if_not_exists :uuid, :uuid end end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241019105805_uuid_auto_generation.ex ================================================ defmodule Realtime.Tenants.Migrations.UuidAutoGeneration do @moduledoc false use Ecto.Migration def change do alter table(:messages) do modify :uuid, :uuid, null: false, default: fragment("gen_random_uuid()") end end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241030150047_messages_partitioning.ex ================================================ defmodule Realtime.Tenants.Migrations.MessagesPartitioning do @moduledoc false use Ecto.Migration def change do execute(""" CREATE TABLE IF NOT EXISTS realtime.messages_new ( id BIGSERIAL, uuid TEXT DEFAULT gen_random_uuid(), topic TEXT NOT NULL, extension TEXT NOT NULL, payload JSONB, event TEXT, private BOOLEAN DEFAULT FALSE, updated_at TIMESTAMP NOT NULL DEFAULT NOW(), inserted_at TIMESTAMP NOT NULL DEFAULT NOW(), PRIMARY KEY (id, inserted_at) ) PARTITION BY RANGE (inserted_at) """) execute("ALTER TABLE realtime.messages_new ENABLE ROW LEVEL SECURITY") execute(""" DO $$ DECLARE rec record; sql text; role_list text; BEGIN FOR rec IN SELECT * FROM pg_policies WHERE schemaname = 'realtime' AND tablename = 'messages' LOOP -- Start constructing the create policy statement sql := 'CREATE POLICY ' || quote_ident(rec.policyname) || ' ON realtime.messages_new '; IF (rec.permissive = 'PERMISSIVE') THEN sql := sql || 'AS PERMISSIVE '; ELSE sql := sql || 'AS RESTRICTIVE '; END IF; sql := sql || ' FOR ' || rec.cmd; -- Include roles if specified IF rec.roles IS NOT NULL AND array_length(rec.roles, 1) > 0 THEN role_list := ( SELECT string_agg(quote_ident(role), ', ') FROM unnest(rec.roles) AS role ); sql := sql || ' TO ' || role_list; END IF; -- Include using clause if specified IF rec.qual IS NOT NULL THEN sql := sql || ' USING (' || rec.qual || ')'; END IF; -- Include with check clause if specified IF rec.with_check IS NOT NULL THEN sql := sql || ' WITH CHECK (' || rec.with_check || ')'; END IF; -- Output the constructed sql for debugging purposes RAISE NOTICE 'Executing: %', sql; -- Execute the constructed sql statement EXECUTE sql; END LOOP; END $$ """) execute("ALTER TABLE realtime.messages RENAME TO messages_old") execute("ALTER TABLE realtime.messages_new RENAME TO messages") execute("DROP TABLE realtime.messages_old") execute("CREATE SEQUENCE IF NOT EXISTS realtime.messages_id_seq") execute("ALTER TABLE realtime.messages ALTER COLUMN id SET DEFAULT nextval('realtime.messages_id_seq')") execute("ALTER table realtime.messages OWNER to supabase_realtime_admin") execute("GRANT USAGE ON SEQUENCE realtime.messages_id_seq TO postgres, anon, authenticated, service_role") execute("GRANT SELECT ON realtime.messages TO postgres, anon, authenticated, service_role") execute("GRANT UPDATE ON realtime.messages TO postgres, anon, authenticated, service_role") execute("GRANT INSERT ON realtime.messages TO postgres, anon, authenticated, service_role") execute("ALTER TABLE realtime.messages ENABLE ROW LEVEL SECURITY") execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true) RETURNS void AS $$ DECLARE partition_name text; BEGIN partition_name := 'messages_' || to_char(NOW(), 'YYYY_MM_DD'); IF NOT EXISTS ( SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'realtime' AND c.relname = partition_name ) THEN EXECUTE format( 'CREATE TABLE %I PARTITION OF realtime.messages FOR VALUES FROM (%L) TO (%L)', partition_name, NOW(), (NOW() + interval '1 day')::timestamp ); END IF; INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241108114728_messages_using_uuid.ex ================================================ defmodule Realtime.Tenants.Migrations.MessagesUsingUuid do @moduledoc false use Ecto.Migration def change do alter table(:messages) do remove(:id) remove(:uuid) add(:id, :uuid, null: false, default: fragment("gen_random_uuid()")) end execute("ALTER TABLE realtime.messages ADD PRIMARY KEY (id, inserted_at)") execute("DROP SEQUENCE realtime.messages_id_seq") end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241121104152_fix_send_function_.ex ================================================ defmodule Realtime.Tenants.Migrations.FixSendFunction do @moduledoc false use Ecto.Migration # We missed the schema prefix of `realtime.` in the create table partition statement def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true) RETURNS void AS $$ DECLARE partition_name text; BEGIN partition_name := 'messages_' || to_char(NOW(), 'YYYY_MM_DD'); IF NOT EXISTS ( SELECT 1 FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'realtime' AND c.relname = partition_name ) THEN EXECUTE format( 'CREATE TABLE realtime.%I PARTITION OF realtime.messages FOR VALUES FROM (%L) TO (%L)', partition_name, NOW(), (NOW() + interval '1 day')::timestamp ); END IF; INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241130184212_recreate_entity_index_using_btree.ex ================================================ defmodule Realtime.Tenants.Migrations.RecreateEntityIndexUsingBtree do @moduledoc false use Ecto.Migration def change do execute("drop index if exists \"realtime\".\"ix_realtime_subscription_entity\"") execute(""" do $$ begin create index concurrently if not exists ix_realtime_subscription_entity on realtime.subscription using btree (entity); exception when others then create index if not exists ix_realtime_subscription_entity on realtime.subscription using btree (entity); end$$; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241220035512_fix_send_function_partition_creation.ex ================================================ defmodule Realtime.Tenants.Migrations.FixSendFunctionPartitionCreation do @moduledoc false use Ecto.Migration # We missed the schema prefix of `realtime.` in the create table partition statement def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true) RETURNS void AS $$ DECLARE partition_name text; partition_start timestamp; partition_end timestamp; BEGIN partition_start := date_trunc('day', NOW()); partition_end := partition_start + interval '1 day'; partition_name := 'messages_' || to_char(partition_start, 'YYYY_MM_DD'); BEGIN EXECUTE format( 'CREATE TABLE IF NOT EXISTS realtime.%I PARTITION OF realtime.messages FOR VALUES FROM (%L) TO (%L)', partition_name, partition_start, partition_end ); EXCEPTION WHEN duplicate_table THEN -- Ignore; table already exists END; INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241220123912_realtime_send_handle_exceptions_remove_partition_creation.ex ================================================ defmodule Realtime.Tenants.Migrations.RealtimeSendHandleExceptionsRemovePartitionCreation do @moduledoc false use Ecto.Migration # We missed the schema prefix of `realtime.` in the create table partition statement def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true ) RETURNS void AS $$ BEGIN BEGIN -- Attempt to insert the message INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); EXCEPTION WHEN OTHERS THEN -- Capture and notify the error PERFORM pg_notify( 'realtime:system', jsonb_build_object( 'error', SQLERRM, 'function', 'realtime.send', 'event', event, 'topic', topic, 'private', private )::text ); END; END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20241224161212_realtime_send_sets_config.ex ================================================ defmodule Realtime.Tenants.Migrations.RealtimeSendSetsConfig do @moduledoc false use Ecto.Migration # We missed the schema prefix of `realtime.` in the create table partition statement def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true ) RETURNS void AS $$ BEGIN BEGIN -- Set the topic configuration SET LOCAL realtime.topic TO topic; -- Attempt to insert the message INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); EXCEPTION WHEN OTHERS THEN -- Capture and notify the error PERFORM pg_notify( 'realtime:system', jsonb_build_object( 'error', SQLERRM, 'function', 'realtime.send', 'event', event, 'topic', topic, 'private', private )::text ); END; END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250107150512_realtime_subscription_unlogged.ex ================================================ defmodule Realtime.Tenants.Migrations.RealtimeSubscriptionUnlogged do @moduledoc false use Ecto.Migration def change do execute(""" -- Commented to have oriole compatability -- ALTER TABLE realtime.subscription SET UNLOGGED; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250110162412_realtime_subscription_logged.ex ================================================ defmodule Realtime.Tenants.Migrations.RealtimeSubscriptionLogged do @moduledoc false use Ecto.Migration # PG Updates doesn't allow us to use UNLOGGED tables due to the fact that Sequences on PG14 still need to be logged def change do execute(""" -- Commented to have oriole compatability -- ALTER TABLE realtime.subscription SET LOGGED; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250123174212_remove_unused_publications.ex ================================================ defmodule Realtime.Tenants.Migrations.RemoveUnusedPublications do @moduledoc false use Ecto.Migration def change do execute(""" DO $$ DECLARE r RECORD; BEGIN FOR r IN SELECT pubname FROM pg_publication WHERE pubname LIKE 'realtime_messages%' or pubname LIKE 'supabase_realtime_messages%' LOOP EXECUTE 'DROP PUBLICATION IF EXISTS ' || quote_ident(r.pubname) || ';' ; END LOOP; END $$; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250128220012_realtime_send_sets_topic_config.ex ================================================ defmodule Realtime.Tenants.Migrations.RealtimeSendSetsTopicConfig do @moduledoc false use Ecto.Migration # We missed the schema prefix of `realtime.` in the create table partition statement def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true ) RETURNS void AS $$ BEGIN BEGIN -- Set the topic configuration EXECUTE format('SET LOCAL realtime.topic TO %L', topic); -- Attempt to insert the message INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); EXCEPTION WHEN OTHERS THEN -- Capture and notify the error PERFORM pg_notify( 'realtime:system', jsonb_build_object( 'error', SQLERRM, 'function', 'realtime.send', 'event', event, 'topic', topic, 'private', private )::text ); END; END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250506224012_subscription_index_bridging_disabled.ex ================================================ defmodule Realtime.Tenants.Migrations.SubscriptionIndexBridgingDisabled do @moduledoc false use Ecto.Migration def change do """ alter table realtime.subscription reset (index_bridging); """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250523164012_run_subscription_index_bridging_disabled.ex ================================================ defmodule Realtime.Tenants.Migrations.RunSubscriptionIndexBridgingDisabled do @moduledoc false use Ecto.Migration def change do execute(""" alter table realtime.subscription reset (index_bridging); """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250714121412_broadcast_send_error_logging.ex ================================================ defmodule Realtime.Tenants.Migrations.BroadcastSendErrorLogging do @moduledoc false use Ecto.Migration # Removes pg_notification to use postgres logging instead def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true ) RETURNS void AS $$ BEGIN BEGIN -- Set the topic configuration EXECUTE format('SET LOCAL realtime.topic TO %L', topic); -- Attempt to insert the message INSERT INTO realtime.messages (payload, event, topic, private, extension) VALUES (payload, event, topic, private, 'broadcast'); EXCEPTION WHEN OTHERS THEN -- Capture and notify the error RAISE WARNING 'ErrorSendingBroadcastMessage: %', SQLERRM; END; END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20250905041441_create_messages_replay_index.ex ================================================ defmodule Realtime.Tenants.Migrations.CreateMessagesReplayIndex do @moduledoc false use Ecto.Migration def change do create_if_not_exists index(:messages, [{:desc, :inserted_at}, :topic], where: "extension = 'broadcast' and private IS TRUE" ) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20251103001201_broadcast_send_include_payload_id.ex ================================================ defmodule Realtime.Tenants.Migrations.BroadcastSendIncludePayloadId do @moduledoc false use Ecto.Migration # Include ID in the payload if not defined def change do execute(""" CREATE OR REPLACE FUNCTION realtime.send(payload jsonb, event text, topic text, private boolean DEFAULT true ) RETURNS void AS $$ DECLARE generated_id uuid; final_payload jsonb; BEGIN BEGIN -- Generate a new UUID for the id generated_id := gen_random_uuid(); -- Check if payload has an 'id' key, if not, add the generated UUID IF payload ? 'id' THEN final_payload := payload; ELSE final_payload := jsonb_set(payload, '{id}', to_jsonb(generated_id)); END IF; -- Set the topic configuration EXECUTE format('SET LOCAL realtime.topic TO %L', topic); -- Attempt to insert the message INSERT INTO realtime.messages (id, payload, event, topic, private, extension) VALUES (generated_id, final_payload, event, topic, private, 'broadcast'); EXCEPTION WHEN OTHERS THEN -- Capture and notify the error RAISE WARNING 'ErrorSendingBroadcastMessage: %', SQLERRM; END; END; $$ LANGUAGE plpgsql; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20251120212548_add_action_to_subscriptions.ex ================================================ defmodule Realtime.Tenants.Migrations.AddActionToSubscriptions do @moduledoc false use Ecto.Migration def up do execute(""" ALTER TABLE realtime.subscription ADD COLUMN action_filter text DEFAULT '*' CHECK (action_filter IN ('*', 'INSERT', 'UPDATE', 'DELETE')); """) execute(""" CREATE UNIQUE INDEX subscription_subscription_id_entity_filters_action_filter_key on realtime.subscription (subscription_id, entity, filters, action_filter); """) execute(""" DROP INDEX IF EXISTS "realtime"."subscription_subscription_id_entity_filters_key"; """) end def down do execute(""" ALTER TABLE realtime.subscription DROP COLUMN action_filter; """) execute(""" CREATE UNIQUE INDEX subscription_subscription_id_entity_filters_key on realtime.subscription (subscription_id, entity, filters) """) execute(""" DROP INDEX IF EXISTS "realtime"."subscription_subscription_id_entity_filters_action_filter_key"; """) end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20251120215549_filter_action_postgres_changes.ex ================================================ defmodule Realtime.Tenants.Migrations.FilterActionPostgresChanges do @moduledoc false use Ecto.Migration def up do execute """ create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_ -- Filter by action early - only get subscriptions interested in this action -- action_filter column can be: '*' (all), 'INSERT', 'UPDATE', or 'DELETE' and (subs.action_filter = '*' or subs.action_filter = action::text); -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role::text) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add "record" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg( -- if unchanged toast, get column name and value from old record coalesce((c).name, (oc).name), case when (c).name is null then (oc).value else (c).value end ) from unnest(columns) c full outer join unnest(old_columns) oc on (c).name = (oc).name where coalesce((c).is_selectable, (oc).is_selectable) and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add "old_record" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or ( action = 'DELETE' and realtime.is_visible_through_filters(old_columns, subs.filters) ) ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform -- Trim leading and trailing quotes from working_role because set_config -- doesn't recognize the role as valid if they are included set_config('role', trim(both '"' from working_role::text), true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; """ end def down do execute """ create or replace function realtime.apply_rls(wal jsonb, max_record_bytes int = 1024 * 1024) returns setof realtime.wal_rls language plpgsql volatile as $$ declare -- Regclass of the table e.g. public.notes entity_ regclass = (quote_ident(wal ->> 'schema') || '.' || quote_ident(wal ->> 'table'))::regclass; -- I, U, D, T: insert, update ... action realtime.action = ( case wal ->> 'action' when 'I' then 'INSERT' when 'U' then 'UPDATE' when 'D' then 'DELETE' else 'ERROR' end ); -- Is row level security enabled for the table is_rls_enabled bool = relrowsecurity from pg_class where oid = entity_; subscriptions realtime.subscription[] = array_agg(subs) from realtime.subscription subs where subs.entity = entity_; -- Subscription vars roles regrole[] = array_agg(distinct us.claims_role::text) from unnest(subscriptions) us; working_role regrole; claimed_role regrole; claims jsonb; subscription_id uuid; subscription_has_access bool; visible_to_subscription_ids uuid[] = '{}'; -- structured info for wal's columns columns realtime.wal_column[]; -- previous identity values for update/delete old_columns realtime.wal_column[]; error_record_exceeds_max_size boolean = octet_length(wal::text) > max_record_bytes; -- Primary jsonb output for record output jsonb; begin perform set_config('role', null, true); columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'columns') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); old_columns = array_agg( ( x->>'name', x->>'type', x->>'typeoid', realtime.cast( (x->'value') #>> '{}', coalesce( (x->>'typeoid')::regtype, -- null when wal2json version <= 2.4 (x->>'type')::regtype ) ), (pks ->> 'name') is not null, true )::realtime.wal_column ) from jsonb_array_elements(wal -> 'identity') x left join jsonb_array_elements(wal -> 'pk') pks on (x ->> 'name') = (pks ->> 'name'); for working_role in select * from unnest(roles) loop -- Update `is_selectable` for columns and old_columns columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(columns) c; old_columns = array_agg( ( c.name, c.type_name, c.type_oid, c.value, c.is_pkey, pg_catalog.has_column_privilege(working_role, entity_, c.name, 'SELECT') )::realtime.wal_column ) from unnest(old_columns) c; if action <> 'DELETE' and count(1) = 0 from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, -- subscriptions is already filtered by entity (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 400: Bad Request, no primary key'] )::realtime.wal_rls; -- The claims role does not have SELECT permission to the primary key of entity elsif action <> 'DELETE' and sum(c.is_selectable::int) <> count(1) from unnest(columns) c where c.is_pkey then return next ( jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action ), is_rls_enabled, (select array_agg(s.subscription_id) from unnest(subscriptions) as s where claims_role = working_role), array['Error 401: Unauthorized'] )::realtime.wal_rls; else output = jsonb_build_object( 'schema', wal ->> 'schema', 'table', wal ->> 'table', 'type', action, 'commit_timestamp', to_char( ((wal ->> 'timestamp')::timestamptz at time zone 'utc'), 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"' ), 'columns', ( select jsonb_agg( jsonb_build_object( 'name', pa.attname, 'type', pt.typname ) order by pa.attnum asc ) from pg_attribute pa join pg_type pt on pa.atttypid = pt.oid where attrelid = entity_ and attnum > 0 and pg_catalog.has_column_privilege(working_role, entity_, pa.attname, 'SELECT') ) ) -- Add "record" key for insert and update || case when action in ('INSERT', 'UPDATE') then jsonb_build_object( 'record', ( select jsonb_object_agg( -- if unchanged toast, get column name and value from old record coalesce((c).name, (oc).name), case when (c).name is null then (oc).value else (c).value end ) from unnest(columns) c full outer join unnest(old_columns) oc on (c).name = (oc).name where coalesce((c).is_selectable, (oc).is_selectable) and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) else '{}'::jsonb end -- Add "old_record" key for update and delete || case when action = 'UPDATE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) ) ) when action = 'DELETE' then jsonb_build_object( 'old_record', ( select jsonb_object_agg((c).name, (c).value) from unnest(old_columns) c where (c).is_selectable and ( not error_record_exceeds_max_size or (octet_length((c).value::text) <= 64)) and ( not is_rls_enabled or (c).is_pkey ) -- if RLS enabled, we can't secure deletes so filter to pkey ) ) else '{}'::jsonb end; -- Create the prepared statement if is_rls_enabled and action <> 'DELETE' then if (select 1 from pg_prepared_statements where name = 'walrus_rls_stmt' limit 1) > 0 then deallocate walrus_rls_stmt; end if; execute realtime.build_prepared_statement_sql('walrus_rls_stmt', entity_, columns); end if; visible_to_subscription_ids = '{}'; for subscription_id, claims in ( select subs.subscription_id, subs.claims from unnest(subscriptions) subs where subs.entity = entity_ and subs.claims_role = working_role and ( realtime.is_visible_through_filters(columns, subs.filters) or ( action = 'DELETE' and realtime.is_visible_through_filters(old_columns, subs.filters) ) ) ) loop if not is_rls_enabled or action = 'DELETE' then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; else -- Check if RLS allows the role to see the record perform -- Trim leading and trailing quotes from working_role because set_config -- doesn't recognize the role as valid if they are included set_config('role', trim(both '"' from working_role::text), true), set_config('request.jwt.claims', claims::text, true); execute 'execute walrus_rls_stmt' into subscription_has_access; if subscription_has_access then visible_to_subscription_ids = visible_to_subscription_ids || subscription_id; end if; end if; end loop; perform set_config('role', null, true); return next ( output, is_rls_enabled, visible_to_subscription_ids, case when error_record_exceeds_max_size then array['Error 413: Payload Too Large'] else '{}' end )::realtime.wal_rls; end if; end loop; perform set_config('role', null, true); end; $$; """ end end ================================================ FILE: lib/realtime/tenants/repo/migrations/20260218120000_fix_bytea_double_encoding_in_cast.ex ================================================ defmodule Realtime.Tenants.Migrations.FixByteaDoubleEncodingInCast do @moduledoc false use Ecto.Migration def up do execute """ create or replace function realtime.cast(val text, type_ regtype) returns jsonb immutable language plpgsql as $$ declare res jsonb; begin if type_::text = 'bytea' then return to_jsonb(val); end if; execute format('select to_jsonb(%L::'|| type_::text || ')', val) into res; return res; end $$; """ end def down do execute """ create or replace function realtime.cast(val text, type_ regtype) returns jsonb immutable language plpgsql as $$ declare res jsonb; begin execute format('select to_jsonb(%L::'|| type_::text || ')', val) into res; return res; end $$; """ end end ================================================ FILE: lib/realtime/tenants/repo.ex ================================================ defmodule Realtime.Tenants.Repo do @moduledoc """ Database operations done against the tenant database """ use Realtime.Logs import Ecto.Query alias Realtime.Repo.Replica @doc """ Lists all records for a given query and converts them into a given struct """ @spec all(DBConnection.conn(), Ecto.Queryable.t(), module(), [Postgrex.execute_option()]) :: {:ok, list(struct())} | {:error, any()} def all(conn, query, result_struct, opts \\ []) do conn |> run_all_query(query, opts) |> result_to_structs(result_struct) end @doc """ Fetches one record for a given query and converts it into a given struct """ @spec one( DBConnection.conn(), Ecto.Query.t(), module(), Postgrex.option() | Keyword.t() ) :: {:error, any()} | {:ok, struct()} | Ecto.Changeset.t() def one(conn, query, result_struct, opts \\ []) do conn |> run_all_query(query, opts) |> result_to_single_struct(result_struct, nil) end @doc """ Inserts a given changeset into the database and converts the result into a given struct """ @spec insert( DBConnection.conn(), Ecto.Changeset.t(), module(), Postgrex.option() | Keyword.t() ) :: {:ok, struct()} | {:error, any()} | Ecto.Changeset.t() def insert(conn, changeset, result_struct, opts \\ []) do with {:ok, {query, args}} <- insert_query_from_changeset(changeset) do conn |> run_query_with_trap(query, args, opts) |> result_to_single_struct(result_struct, changeset) end end @doc """ Inserts all changesets into the database and converts the result into a given list of structs """ @spec insert_all_entries( DBConnection.conn(), [Ecto.Changeset.t()], module(), Postgrex.option() | Keyword.t() ) :: {:ok, [struct()]} | {:error, any()} | Ecto.Changeset.t() def insert_all_entries(conn, changesets, result_struct, opts \\ []) do with {:ok, {query, args}} <- insert_all_query_from_changeset(changesets) do conn |> run_query_with_trap(query, args, opts) |> result_to_structs(result_struct) end end @doc """ Deletes records for a given query and returns the number of deleted records """ @spec del(DBConnection.conn(), Ecto.Queryable.t()) :: {:ok, non_neg_integer()} | {:error, any()} def del(conn, query) do with {:ok, %Postgrex.Result{num_rows: num_rows}} <- run_delete_query(conn, query) do {:ok, num_rows} end end @doc """ Updates an entry based on the changeset and returns the updated entry """ @spec update(DBConnection.conn(), Ecto.Changeset.t(), module()) :: {:ok, struct()} | {:error, any()} | Ecto.Changeset.t() def update(conn, changeset, result_struct, opts \\ []) do with {:ok, {query, args}} <- update_query_from_changeset(changeset) do conn |> run_query_with_trap(query, args, opts) |> result_to_single_struct(result_struct, changeset) end end defp result_to_single_struct( {:error, %Postgrex.Error{postgres: %{code: :unique_violation, constraint: "channels_name_index"}}}, _struct, changeset ) do Ecto.Changeset.add_error(changeset, :name, "has already been taken") end defp result_to_single_struct({:error, _} = error, _, _), do: error defp result_to_single_struct({:ok, %Postgrex.Result{rows: []}}, _, _) do {:error, :not_found} end defp result_to_single_struct({:ok, %Postgrex.Result{rows: [row], columns: columns}}, struct, _) do repo_module = Replica.replica() {:ok, repo_module.load(struct, Enum.zip(columns, row))} end defp result_to_single_struct({:ok, %Postgrex.Result{num_rows: num_rows}}, _, _) do raise("expected at most one result but got #{num_rows} in result") end defp result_to_structs({:error, _} = error, _), do: error defp result_to_structs({:ok, %Postgrex.Result{rows: rows, columns: columns}}, struct) do repo_module = Replica.replica() {:ok, Enum.map(rows, &repo_module.load(struct, Enum.zip(columns, &1)))} end defp insert_query_from_changeset(%{valid?: false} = changeset), do: {:error, changeset} defp insert_query_from_changeset(changeset) do schema = changeset.data.__struct__ source = schema.__schema__(:source) prefix = schema.__schema__(:prefix) acc = %{header: [], rows: []} %{header: header, rows: rows} = Enum.reduce(changeset.changes, acc, fn {field, row}, %{header: header, rows: rows} -> row = case row do row when is_boolean(row) -> row row when is_atom(row) -> Atom.to_string(row) _ -> row end %{ header: [Atom.to_string(field) | header], rows: [row | rows] } end) table = "\"#{prefix}\".\"#{source}\"" header = "(#{Enum.map_join(header, ",", &"\"#{&1}\"")})" arg_index = rows |> Enum.with_index(1) |> Enum.map_join(",", fn {_, index} -> "$#{index}" end) {:ok, {"INSERT INTO #{table} #{header} VALUES (#{arg_index}) RETURNING *", rows}} end defp insert_all_query_from_changeset(changesets) do invalid = Enum.filter(changesets, &(!&1.valid?)) if invalid != [] do {:error, changesets} else [schema] = changesets |> Enum.map(& &1.data.__struct__) |> Enum.uniq() source = schema.__schema__(:source) prefix = schema.__schema__(:prefix) changes = Enum.map(changesets, & &1.changes) %{header: header, rows: rows} = Enum.reduce(changes, %{header: [], rows: []}, fn v, changes_acc -> Enum.reduce(v, changes_acc, fn {field, row}, %{header: header, rows: rows} -> row = case row do row when is_boolean(row) -> row row when is_atom(row) -> Atom.to_string(row) _ -> row end %{ header: Enum.uniq([Atom.to_string(field) | header]), rows: [row | rows] } end) end) args_index = rows |> Enum.chunk_every(length(header)) |> Enum.reduce({"", 1}, fn row, {acc, count} -> arg_index = row |> Enum.with_index(count) |> Enum.map_join("", fn {_, index} -> "$#{index}," end) |> String.trim_trailing(",") |> then(&"(#{&1})") {"#{acc},#{arg_index}", count + length(row)} end) |> elem(0) |> String.trim_leading(",") table = "\"#{prefix}\".\"#{source}\"" header = "(#{Enum.map_join(header, ",", &"\"#{&1}\"")})" {:ok, {"INSERT INTO #{table} #{header} VALUES #{args_index} RETURNING *", rows}} end end defp update_query_from_changeset(%{valid?: false} = changeset), do: {:error, changeset} defp update_query_from_changeset(changeset) do repo_module = Replica.replica() %Ecto.Changeset{data: %{id: id, __struct__: struct}, changes: changes} = changeset changes = Keyword.new(changes) query = from(c in struct, where: c.id == ^id, select: c, update: [set: ^changes]) {:ok, repo_module.to_sql(:update_all, query)} end defp run_all_query(conn, query, opts) do repo_module = Replica.replica() {query, args} = repo_module.to_sql(:all, query) run_query_with_trap(conn, query, args, opts) end defp run_delete_query(conn, query) do repo_module = Replica.replica() {query, args} = repo_module.to_sql(:delete_all, query) run_query_with_trap(conn, query, args) end defp run_query_with_trap(conn, query, args, opts \\ []) do Postgrex.query(conn, query, args, opts) rescue e -> log_error("ErrorRunningQuery", e) {:error, :postgrex_exception} catch :exit, {:noproc, {DBConnection.Holder, :checkout, _}} -> log_error( "UnableCheckoutConnection", "Unable to checkout connection, please check your connection pool configuration" ) {:error, :postgrex_exception} :exit, reason -> log_error("UnknownError", reason) {:error, :postgrex_exception} end end ================================================ FILE: lib/realtime/tenants.ex ================================================ defmodule Realtime.Tenants do @moduledoc """ Everything to do with Tenants. """ require Logger alias Realtime.Api alias Realtime.Api.Tenant alias Realtime.Database alias Realtime.RateCounter alias Realtime.Repo.Replica alias Realtime.Tenants.Cache alias Realtime.Tenants.Connect alias Realtime.Tenants.Migrations alias Realtime.UsersCounter @doc """ Gets the database connection pid managed by the Tenants.Connect process. ## Examples iex> Realtime.Tenants.get_health_conn(%Realtime.Api.Tenant{external_id: "not_found_tenant"}) {:error, :tenant_database_connection_initializing} """ @spec get_health_conn(Tenant.t()) :: {:error, term()} | {:ok, pid()} def get_health_conn(%Tenant{external_id: external_id}) do Connect.get_status(external_id) end @doc """ Checks if a tenant is healthy. A tenant is healthy if: - Tenant has no db connection and zero client connections - Tenant has a db connection and >0 client connections A tenant is not healthy if a tenant has client connections and no database connection. The response includes `replication_connected` to indicate if the replication connection for broadcast changes is active. This is informational and does not affect the healthy status. """ @spec health_check(binary) :: {:error, :tenant_not_found | String.t() | %{ connected_cluster: pos_integer, db_connected: boolean, replication_connected: boolean, healthy: false, region: String.t(), node: String.t() }} | {:ok, %{ connected_cluster: non_neg_integer, db_connected: boolean, replication_connected: boolean, healthy: true, region: String.t(), node: String.t() }} def health_check(external_id) when is_binary(external_id) do region = Application.get_env(:realtime, :region) node = Node.self() |> to_string() with %Tenant{} = tenant <- Cache.get_tenant_by_external_id(external_id), {:error, _} <- get_health_conn(tenant), connected_cluster when connected_cluster > 0 <- UsersCounter.tenant_users(external_id) do {:error, %{ healthy: false, db_connected: false, replication_connected: false, connected_cluster: connected_cluster, region: region, node: node }} else nil -> {:error, :tenant_not_found} {:ok, _health_conn} -> connected_cluster = UsersCounter.tenant_users(external_id) replication_connected = replication_connected?(external_id) {:ok, %{ healthy: true, db_connected: true, replication_connected: replication_connected, connected_cluster: connected_cluster, region: region, node: node }} connected_cluster when is_integer(connected_cluster) -> tenant = Cache.get_tenant_by_external_id(external_id) result? = Migrations.run_migrations(tenant) {:ok, %{ healthy: result? == :ok || result? == :noop, db_connected: false, replication_connected: false, connected_cluster: connected_cluster, region: region, node: node }} end end defp replication_connected?(external_id) do case Connect.replication_status(external_id) do {:ok, _pid} -> true {:error, :not_connected} -> false end end @doc """ All the keys that we use to create counters and RateLimiters for tenants. """ @spec limiter_keys(Tenant.t()) :: [{atom(), atom(), String.t()}] def limiter_keys(%Tenant{} = tenant) do [ requests_per_second_key(tenant), channels_per_client_key(tenant), joins_per_second_key(tenant), events_per_second_key(tenant), db_events_per_second_key(tenant), presence_events_per_second_key(tenant) ] end @spec requests_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def requests_per_second_rate(%Tenant{} = tenant) do %RateCounter.Args{id: requests_per_second_key(tenant), opts: []} end @doc "The GenCounter key to use for counting requests through Plug." @spec requests_per_second_key(Tenant.t() | String.t()) :: {:plug, :requests, String.t()} def requests_per_second_key(%Tenant{} = tenant) do {:plug, :requests, tenant.external_id} end @doc "RateCounter arguments for counting joins per second." @spec joins_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def joins_per_second_rate(%Tenant{} = tenant), do: joins_per_second_rate(tenant.external_id, tenant.max_joins_per_second) @spec joins_per_second_rate(String.t(), non_neg_integer) :: RateCounter.Args.t() def joins_per_second_rate(tenant_id, max_joins_per_second) when is_binary(tenant_id) do opts = [ telemetry: %{ event_name: [:channel, :joins], measurements: %{limit: max_joins_per_second}, metadata: %{tenant: tenant_id} }, limit: [ value: max_joins_per_second, measurement: :avg, log_fn: fn -> Logger.critical("ClientJoinRateLimitReached: Too many joins per second", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: joins_per_second_key(tenant_id), opts: opts} end @doc "The GenCounter key to use for counting RealtimeChannel joins." @spec joins_per_second_key(Tenant.t() | String.t()) :: {:channel, :joins, String.t()} def joins_per_second_key(tenant) when is_binary(tenant) do {:channel, :joins, tenant} end def joins_per_second_key(%Tenant{} = tenant) do {:channel, :joins, tenant.external_id} end @doc "The Register key to use to limit the amount of channels connected to the websocket." @spec channels_per_client_key(Tenant.t() | String.t()) :: {:channel, :clients_per, String.t()} def channels_per_client_key(tenant) when is_binary(tenant) do {:channel, :clients_per, tenant} end def channels_per_client_key(%Tenant{} = tenant) do {:channel, :clients_per, tenant.external_id} end @doc "RateCounter arguments for counting events per second." @spec events_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def events_per_second_rate(tenant), do: events_per_second_rate(tenant.external_id, tenant.max_events_per_second) def events_per_second_rate(tenant_id, max_events_per_second) do opts = [ telemetry: %{ event_name: [:channel, :events], measurements: %{limit: max_events_per_second}, metadata: %{tenant: tenant_id} }, limit: [ value: max_events_per_second, measurement: :avg, log: true, log_fn: fn -> Logger.error("MessagePerSecondRateLimitReached: Too many messages per second", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: events_per_second_key(tenant_id), opts: opts} end @doc """ The GenCounter key to use when counting events for RealtimeChannel events. ## Examples iex> Realtime.Tenants.events_per_second_key("tenant_id") {:channel, :events, "tenant_id"} iex> Realtime.Tenants.events_per_second_key(%Realtime.Api.Tenant{external_id: "tenant_id"}) {:channel, :events, "tenant_id"} """ @spec events_per_second_key(Tenant.t() | String.t()) :: {:channel, :events, String.t()} def events_per_second_key(tenant) when is_binary(tenant) do {:channel, :events, tenant} end def events_per_second_key(%Tenant{} = tenant) do {:channel, :events, tenant.external_id} end @doc "RateCounter arguments for counting database events per second." @spec db_events_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def db_events_per_second_rate(%Tenant{} = tenant), do: db_events_per_second_rate(tenant.external_id, tenant.max_events_per_second) @doc "RateCounter arguments for counting database events per second with a limit." @spec db_events_per_second_rate(String.t(), non_neg_integer) :: RateCounter.Args.t() def db_events_per_second_rate(tenant_id, max_events_per_second) when is_binary(tenant_id) do opts = [ telemetry: %{ event_name: [:channel, :db_events], measurements: %{}, metadata: %{tenant: tenant_id} }, limit: [ value: max_events_per_second, measurement: :avg, log: true, log_fn: fn -> Logger.error("MessagePerSecondRateLimitReached: Too many postgres changes messages per second", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: db_events_per_second_key(tenant_id), opts: opts} end @doc """ The GenCounter key to use when counting events for RealtimeChannel events. iex> Realtime.Tenants.db_events_per_second_key("tenant_id") {:channel, :db_events, "tenant_id"} iex> Realtime.Tenants.db_events_per_second_key(%Realtime.Api.Tenant{external_id: "tenant_id"}) {:channel, :db_events, "tenant_id"} """ @spec db_events_per_second_key(Tenant.t() | String.t()) :: {:channel, :db_events, String.t()} def db_events_per_second_key(tenant) when is_binary(tenant) do {:channel, :db_events, tenant} end def db_events_per_second_key(%Tenant{} = tenant) do {:channel, :db_events, tenant.external_id} end @doc "RateCounter arguments for counting presence events per second." @spec presence_events_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def presence_events_per_second_rate(tenant) do presence_events_per_second_rate(tenant.external_id, tenant.max_presence_events_per_second) end @spec presence_events_per_second_rate(String.t(), non_neg_integer) :: RateCounter.Args.t() def presence_events_per_second_rate(tenant_id, max_presence_events_per_second) do opts = [ telemetry: %{ event_name: [:channel, :presence_events], measurements: %{limit: max_presence_events_per_second}, metadata: %{tenant: tenant_id} }, limit: [ value: max_presence_events_per_second, measurement: :avg, log_fn: fn -> Logger.error("PresenceRateLimitReached: Too many presence events per second", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: presence_events_per_second_key(tenant_id), opts: opts} end @doc """ The GenCounter key to use when counting presence events for RealtimeChannel events. ## Examples iex> Realtime.Tenants.presence_events_per_second_key("tenant_id") {:channel, :presence_events, "tenant_id"} iex> Realtime.Tenants.presence_events_per_second_key(%Realtime.Api.Tenant{external_id: "tenant_id"}) {:channel, :presence_events, "tenant_id"} """ @spec presence_events_per_second_key(Tenant.t() | String.t()) :: {:channel, :presence_events, String.t()} def presence_events_per_second_key(tenant) when is_binary(tenant) do {:channel, :presence_events, tenant} end def presence_events_per_second_key(%Tenant{} = tenant) do {:channel, :presence_events, tenant.external_id} end @spec authorization_errors_per_second_rate(Tenant.t()) :: RateCounter.Args.t() def authorization_errors_per_second_rate(%Tenant{external_id: external_id} = tenant) do opts = [ max_bucket_len: 30, limit: [ value: authorization_pool_size(tenant), measurement: :sum, log_fn: fn -> Logger.critical("IncreaseConnectionPool: Too many database timeouts", external_id: external_id, project: external_id ) end ] ] %RateCounter.Args{id: authorization_errors_per_second_key(external_id), opts: opts} end def authorization_errors_per_second_key(tenant_id), do: {:channel, :authorization_errors, tenant_id} @spec subscription_errors_per_second_rate(String.t(), non_neg_integer) :: RateCounter.Args.t() def subscription_errors_per_second_rate(tenant_id, pool_size) do opts = [ max_bucket_len: 30, limit: [ value: pool_size, measurement: :sum, log_fn: fn -> Logger.error("IncreaseSubscriptionConnectionPool: Too many database timeouts", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: subscription_errors_per_second_key(tenant_id), opts: opts} end def subscription_errors_per_second_key(tenant_id), do: {:channel, :subscription_errors, tenant_id} @connect_errors_limit 3 @connect_errors_tick 200 @connect_errors_bucket_len 25 @doc "RateCounter arguments for counting connect errors. Uses a 200ms tick with a 25-bucket window (5s) and triggers after 3 errors." @spec connect_errors_per_second_rate(Tenant.t() | String.t()) :: RateCounter.Args.t() def connect_errors_per_second_rate(%Tenant{external_id: external_id}) do connect_errors_per_second_rate(external_id) end def connect_errors_per_second_rate(tenant_id) do opts = [ tick: @connect_errors_tick, max_bucket_len: @connect_errors_bucket_len, limit: [ value: @connect_errors_limit, measurement: :sum, log_fn: fn -> Logger.critical( "DatabaseConnectionRateLimitReached: Too many connection attempts against the tenant database", external_id: tenant_id, project: tenant_id ) end ] ] %RateCounter.Args{id: connect_errors_per_second_key(tenant_id), opts: opts} end def connect_errors_per_second_key(tenant_id), do: {:database, :connect, tenant_id} defp authorization_pool_size(%{extensions: [%{settings: settings} | _]}) do Database.pool_size_by_application_name("realtime_connect", settings) end defp authorization_pool_size(_), do: 1 @spec get_tenant_limits(Realtime.Api.Tenant.t(), maybe_improper_list) :: list def get_tenant_limits(%Tenant{} = tenant, keys) when is_list(keys) do nodes = [Node.self() | Node.list()] nodes |> Enum.map(fn node -> Task.Supervisor.async({Realtime.TaskSupervisor, node}, fn -> for key <- keys do response = Realtime.GenCounter.get(key) %{ external_id: tenant.external_id, node: node, limiter: key, counter: response } end end) end) |> Task.await_many() |> List.flatten() end @spec get_tenant_by_external_id(String.t()) :: Tenant.t() | nil def get_tenant_by_external_id(external_id) do repo_replica = Replica.replica() Tenant |> repo_replica.get_by(external_id: external_id) |> repo_replica.preload(:extensions) end @doc """ Builds a PubSub topic from a tenant and a sub-topic. ## Examples iex> Realtime.Tenants.tenant_topic(%Realtime.Api.Tenant{external_id: "tenant_id"}, "sub_topic") "tenant_id:sub_topic" iex> Realtime.Tenants.tenant_topic("tenant_id", "sub_topic") "tenant_id:sub_topic" iex> Realtime.Tenants.tenant_topic(%Realtime.Api.Tenant{external_id: "tenant_id"}, "sub_topic", false) "tenant_id-private:sub_topic" iex> Realtime.Tenants.tenant_topic("tenant_id", "sub_topic", false) "tenant_id-private:sub_topic" iex> Realtime.Tenants.tenant_topic("tenant_id", ":sub_topic", false) "tenant_id-private::sub_topic" """ @spec tenant_topic(Tenant.t() | binary(), String.t(), boolean()) :: String.t() def tenant_topic(external_id, sub_topic, public? \\ true) def tenant_topic(%Tenant{external_id: external_id}, sub_topic, public?), do: tenant_topic(external_id, sub_topic, public?) def tenant_topic(external_id, sub_topic, false), do: "#{external_id}-private:#{sub_topic}" def tenant_topic(external_id, sub_topic, true), do: "#{external_id}:#{sub_topic}" @doc """ Sets tenant as suspended. New connections won't be accepted """ @spec suspend_tenant_by_external_id(String.t()) :: {:ok, Tenant.t()} | {:error, term()} def suspend_tenant_by_external_id(external_id) do external_id |> Api.update_tenant_by_external_id(%{suspend: true}) |> tap(fn _ -> broadcast_operation_event(:suspend_tenant, external_id) end) end @doc """ Sets tenant as unsuspended. New connections will be accepted """ @spec unsuspend_tenant_by_external_id(String.t()) :: {:ok, Tenant.t()} | {:error, term()} def unsuspend_tenant_by_external_id(external_id) do external_id |> Api.update_tenant_by_external_id(%{suspend: false}) |> tap(fn _ -> broadcast_operation_event(:unsuspend_tenant, external_id) end) end @doc """ Checks if migrations for a given tenant need to run. """ @spec run_migrations?(Tenant.t() | integer()) :: boolean() def run_migrations?(%Tenant{} = tenant), do: run_migrations?(tenant.migrations_ran) def run_migrations?(migrations_ran) when is_integer(migrations_ran), do: migrations_ran < Enum.count(Migrations.migrations()) @doc """ Broadcasts an operation event to the tenant's operations channel. """ @spec broadcast_operation_event(:suspend_tenant | :unsuspend_tenant | :disconnect, String.t()) :: :ok def broadcast_operation_event(action, external_id), do: Phoenix.PubSub.broadcast!(Realtime.PubSub, "realtime:operations:" <> external_id, action) @doc """ Returns the region of the tenant based on its extensions. If the region is not set, it returns nil. """ @spec region(Tenant.t()) :: String.t() | nil def region(%Tenant{extensions: [%{settings: settings}]}), do: Map.get(settings, "region") def region(_), do: nil @doc """ """ @spec validate_payload_size(Tenant.t() | binary(), map()) :: :ok | {:error, :payload_size_exceeded} def validate_payload_size(tenant_id, payload) when is_binary(tenant_id) do tenant_id |> Cache.get_tenant_by_external_id() |> validate_payload_size(payload) end @payload_size_padding 500 def validate_payload_size(%Tenant{max_payload_size_in_kb: max_payload_size_in_kb}, payload) do max_payload_size = max_payload_size_in_kb * 1000 + @payload_size_padding payload_size = :erlang.external_size(payload) if payload_size > max_payload_size, do: {:error, :payload_size_exceeded}, else: :ok end end ================================================ FILE: lib/realtime/users_counter.ex ================================================ defmodule Realtime.UsersCounter do @moduledoc """ Counts of connected clients for a tenant across the whole cluster or for a single node. """ @doc """ Adds a RealtimeChannel pid to the `:users` scope for a tenant so we can keep track of all connected clients for a tenant. """ @spec add(pid(), String.t()) :: :ok def add(pid, tenant_id) when is_pid(pid) and is_binary(tenant_id) do :ok = Beacon.join(:users, tenant_id, pid) end @doc "Return true if pid is already counted for tenant_id" @spec already_counted?(pid(), String.t()) :: boolean() def already_counted?(pid, tenant_id), do: Beacon.local_member?(:users, tenant_id, pid) @doc "List all local tenants with connected clients on this node." @spec local_tenants() :: [String.t()] def local_tenants(), do: Beacon.local_groups(:users) @doc """ Returns the count of all connected clients for a tenant for the cluster. """ @spec tenant_users(String.t()) :: non_neg_integer() def tenant_users(tenant_id), do: Beacon.member_count(:users, tenant_id) @doc """ Returns the counts of all connected clients for all tenants for the cluster. """ @spec tenant_counts() :: %{String.t() => non_neg_integer()} def tenant_counts(), do: Beacon.member_counts(:users) @doc """ Returns the counts of all connected clients for all tenants for the local node. """ @spec local_tenant_counts() :: %{String.t() => non_neg_integer()} def local_tenant_counts(), do: Beacon.local_member_counts(:users) end ================================================ FILE: lib/realtime.ex ================================================ defmodule Realtime do @moduledoc false end ================================================ FILE: lib/realtime_web/api_spec.ex ================================================ defmodule RealtimeWeb.ApiSpec do @moduledoc false alias OpenApiSpex.Components alias OpenApiSpex.Info alias OpenApiSpex.OpenApi alias OpenApiSpex.Paths alias OpenApiSpex.SecurityScheme alias OpenApiSpex.Server alias OpenApiSpex.ServerVariable alias RealtimeWeb.Router @behaviour OpenApi @impl OpenApi def spec do url = case Mix.env() do :prod -> "https://{tenant}.supabase.co/realtime/v1" _ -> "http://{tenant}.localhost:4000/" end %OpenApi{ servers: [ %Server{ url: url, variables: %{"tenant" => %ServerVariable{default: "tenant"}} } ], info: %Info{ title: to_string(Application.spec(:realtime, :description)), version: to_string(Application.spec(:realtime, :vsn)) }, paths: Paths.from_router(Router), components: %Components{ securitySchemes: %{"authorization" => %SecurityScheme{type: "http", scheme: "bearer"}} } } |> OpenApiSpex.resolve_schema_modules() end end ================================================ FILE: lib/realtime_web/channels/auth/channels_authorization.ex ================================================ defmodule RealtimeWeb.ChannelsAuthorization do @moduledoc """ Check connection is authorized to access channel """ require Logger @doc """ Authorize connection to access channel """ @spec authorize(binary(), binary(), binary() | nil) :: {:ok, map()} | {:error, any()} | {:error, :expired_token, String.t()} def authorize(token, jwt_secret, jwt_jwks) when is_binary(token) do token |> clean_token() |> RealtimeWeb.JwtVerification.verify(jwt_secret, jwt_jwks) end def authorize(_token, _jwt_secret, _jwt_jwks), do: {:error, :invalid_token} def authorize_conn(token, jwt_secret, jwt_jwks) do case authorize(token, jwt_secret, jwt_jwks) do {:ok, claims} -> required = ["role", "exp"] claims_keys = Map.keys(claims) if Enum.all?(required, &(&1 in claims_keys)), do: {:ok, claims}, else: {:error, :missing_claims} {:error, [message: validation_timer, claim: "exp", claim_val: claim_val]} when is_integer(validation_timer) -> msg = "Token has expired #{validation_timer - claim_val} seconds ago" {:error, :expired_token, msg} {:error, reason} -> {:error, reason} end end defp clean_token(token), do: Regex.replace(~r/\s|\n/, URI.decode(token), "") end ================================================ FILE: lib/realtime_web/channels/auth/jwt_verification.ex ================================================ defmodule RealtimeWeb.JwtVerification do @moduledoc """ Parse JWT and verify claims """ # Matching error in Dialyzer when using Joken.peek_claims/1 but {:ok, []} is actually possible and covered by our testing @dialyzer {:nowarn_function, check_claims_format: 1} defmodule JwtAuthToken do @moduledoc false use Joken.Config @impl true def token_config do Application.fetch_env!(:realtime, :jwt_claim_validators) |> Enum.reduce(%{}, fn {claim_key, expected_val}, claims -> add_claim_validator(claims, claim_key, expected_val) end) |> add_claim_validator("exp") end defp add_claim_validator(claims, "exp") do current_time = current_time() add_claim(claims, "exp", nil, &(&1 > current_time), message: current_time) end defp add_claim_validator(claims, claim_key, expected_val) do add_claim(claims, claim_key, nil, &(&1 == expected_val)) end end @hs_algorithms ["HS256", "HS384", "HS512"] @rs_algorithms ["RS256", "RS384", "RS512"] @es_algorithms ["ES256", "ES384", "ES512"] @ed_algorithms ["Ed25519", "Ed448"] @doc """ Verify JWT token and validate claims """ @spec verify(binary(), binary(), binary() | nil) :: {:ok, map()} | {:error, any()} def verify(token, jwt_secret, jwt_jwks) when is_binary(token) do with {:ok, _claims} <- check_claims_format(token), {:ok, header} <- check_header_format(token), {:ok, signer} <- generate_signer(header, jwt_secret, jwt_jwks) do JwtAuthToken.verify_and_validate(token, signer) else {:error, _e} = error -> error end end def verify(_token, _jwt_secret, _jwt_jwks), do: {:error, :not_a_string} defp check_claims_format(token) do case Joken.peek_claims(token) do {:ok, claims} when is_map(claims) -> {:ok, claims} {:ok, _} -> {:error, :expected_claims_map} {:error, :token_malformed} -> {:error, :token_malformed} end end defp check_header_format(token) do case Joken.peek_header(token) do {:ok, header} when is_map(header) -> {:ok, header} _error -> {:error, :expected_header_map} end end defp generate_signer(%{"alg" => alg, "kid" => kid}, _jwt_secret, %{ "keys" => keys }) when is_binary(kid) and alg in @rs_algorithms do jwk = Enum.find(keys, fn jwk -> jwk["kty"] == "RSA" and jwk["kid"] == kid end) case jwk do nil -> {:error, :error_generating_signer} _ -> {:ok, Joken.Signer.create(alg, jwk)} end end defp generate_signer(%{"alg" => alg, "kid" => kid}, _jwt_secret, %{"keys" => keys}) when is_binary(kid) and alg in @es_algorithms do jwk = Enum.find(keys, fn jwk -> jwk["kty"] == "EC" and jwk["kid"] == kid end) case jwk do nil -> {:error, :error_generating_signer} _ -> {:ok, Joken.Signer.create(alg, jwk)} end end defp generate_signer(%{"alg" => alg, "kid" => kid}, _jwt_secret, %{"keys" => keys}) when is_binary(kid) and alg in @ed_algorithms do jwk = Enum.find(keys, fn jwk -> jwk["kty"] == "OKP" and jwk["kid"] == kid end) case jwk do nil -> {:error, :error_generating_signer} _ -> {:ok, Joken.Signer.create(alg, jwk)} end end # Most Supabase Auth JWTs fall in this case, as they're usually signed with # HS256, have a kid header, but there's no JWK as this is sensitive. In this # case, the jwt_secret should be used. defp generate_signer(%{"alg" => alg, "kid" => kid}, jwt_secret, %{ "keys" => keys }) when is_binary(kid) and alg in @hs_algorithms do jwk = Enum.find(keys, fn jwk -> jwk["kty"] == "oct" and jwk["kid"] == kid and is_binary(jwk["k"]) end) if jwk do case Base.url_decode64(jwk["k"], padding: false) do {:ok, secret} -> {:ok, Joken.Signer.create(alg, secret)} _ -> {:error, :error_generating_signer} end else # If there's no JWK, and HS* is being used, instead of erroring, try # the jwt_secret instead. {:ok, Joken.Signer.create(alg, jwt_secret)} end end defp generate_signer(%{"alg" => alg}, jwt_secret, _jwt_jwks) when alg in @hs_algorithms do {:ok, Joken.Signer.create(alg, jwt_secret)} end defp generate_signer(_header, _jwt_secret, _jwt_jwks), do: {:error, :error_generating_signer} end ================================================ FILE: lib/realtime_web/channels/payloads/broadcast/replay.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.Broadcast.Replay do @moduledoc """ Validate broadcast replay field of the join payload. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Join embedded_schema do field :limit, :integer, default: 10 field :since, :integer, default: 0 end def changeset(broadcast, attrs) do cast(broadcast, attrs, [:limit, :since], message: &Join.error_message/2) end end ================================================ FILE: lib/realtime_web/channels/payloads/broadcast.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.Broadcast do @moduledoc """ Validate broadcast field of the join payload. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Join alias RealtimeWeb.Channels.Payloads.FlexibleBoolean embedded_schema do field :ack, FlexibleBoolean, default: false field :self, FlexibleBoolean, default: false embeds_one :replay, RealtimeWeb.Channels.Payloads.Broadcast.Replay end def changeset(broadcast, attrs) do broadcast |> cast(attrs, [:ack, :self], message: &Join.error_message/2) |> cast_embed(:replay, invalid_message: "unable to parse, expected a map") end end ================================================ FILE: lib/realtime_web/channels/payloads/config.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.Config do @moduledoc """ Validate config field of the join payload. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Join alias RealtimeWeb.Channels.Payloads.Broadcast alias RealtimeWeb.Channels.Payloads.Presence alias RealtimeWeb.Channels.Payloads.PostgresChange alias RealtimeWeb.Channels.Payloads.FlexibleBoolean embedded_schema do embeds_one :broadcast, Broadcast embeds_one :presence, Presence embeds_many :postgres_changes, PostgresChange field :private, FlexibleBoolean, default: false end def changeset(config, attrs) do attrs = attrs |> Enum.map(fn {k, v} when is_list(v) -> {k, Enum.filter(v, fn v -> v != nil end)} {"postgres_changes", nil} -> {"postgres_changes", []} {k, v} -> {k, v} end) |> Map.new() config |> cast(attrs, [:private], message: &Join.error_message/2) |> cast_embed(:broadcast, invalid_message: "unable to parse, expected a map") |> cast_embed(:presence, invalid_message: "unable to parse, expected a map") |> cast_embed(:postgres_changes, invalid_message: "unable to parse, expected an array of maps") end end ================================================ FILE: lib/realtime_web/channels/payloads/flexible_boolean.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.FlexibleBoolean do @moduledoc """ Custom Ecto type that handles boolean values coming as strings. Accepts: - Boolean values (true/false) - used as-is - Strings "true", "True", "TRUE", etc. - cast to true - Strings "false", "False", "FALSE", etc. - cast to false - Any other value - returns error """ use Ecto.Type @impl true def type, do: :boolean @impl true def cast(value) when is_boolean(value), do: {:ok, value} def cast(value) when is_binary(value) do case String.downcase(value) do "true" -> {:ok, true} "false" -> {:ok, false} _ -> :error end end def cast(_), do: :error @impl true def load(value), do: {:ok, value} @impl true def dump(value) when is_boolean(value), do: {:ok, value} def dump(_), do: :error end ================================================ FILE: lib/realtime_web/channels/payloads/join.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.Join do @moduledoc """ Payload validation for the phx_join event. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Config alias RealtimeWeb.Channels.Payloads.Broadcast alias RealtimeWeb.Channels.Payloads.Presence embedded_schema do embeds_one :config, Config field :access_token, :string field :user_token, :string end def changeset(join, attrs) do join |> cast(attrs, [:access_token, :user_token], message: &error_message/2) |> cast_embed(:config, invalid_message: "unable to parse, expected a map") end @spec validate(map()) :: {:ok, %__MODULE__{}} | {:error, :invalid_join_payload, map()} def validate(params) do case changeset(%__MODULE__{}, params) do %Ecto.Changeset{valid?: true} = changeset -> {:ok, Ecto.Changeset.apply_changes(changeset)} %Ecto.Changeset{valid?: false} = changeset -> errors = Ecto.Changeset.traverse_errors(changeset, &elem(&1, 0)) {:error, :invalid_join_payload, errors} end end def presence_enabled?(%__MODULE__{config: %Config{presence: %Presence{enabled: enabled}}}), do: enabled def presence_enabled?(_), do: true def presence_key(%__MODULE__{config: %Config{presence: %Presence{key: ""}}}), do: UUID.uuid1() def presence_key(%__MODULE__{config: %Config{presence: %Presence{key: key}}}), do: key def presence_key(_), do: UUID.uuid1() def ack_broadcast?(%__MODULE__{config: %Config{broadcast: %Broadcast{ack: ack}}}), do: ack def ack_broadcast?(_), do: false def self_broadcast?(%__MODULE__{config: %Config{broadcast: %Broadcast{self: self}}}), do: self def self_broadcast?(_), do: false def private?(%__MODULE__{config: %Config{private: private}}), do: private def private?(_), do: false def error_message(_field, meta) do type = Keyword.get(meta, :type) if type, do: "unable to parse, expected #{format_type(type)}", else: "unable to parse" end defp format_type(RealtimeWeb.Channels.Payloads.FlexibleBoolean), do: :boolean defp format_type(type), do: type end ================================================ FILE: lib/realtime_web/channels/payloads/postgres_change.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.PostgresChange do @moduledoc """ Validate postgres_changes field of the join payload. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Join embedded_schema do field :event, :string field :schema, :string field :table, :string field :filter, :string end def changeset(postgres_change, attrs) do cast(postgres_change, attrs, [:event, :schema, :table, :filter], message: &Join.error_message/2) end end ================================================ FILE: lib/realtime_web/channels/payloads/presence.ex ================================================ defmodule RealtimeWeb.Channels.Payloads.Presence do @moduledoc """ Validate presence field of the join payload. """ use Ecto.Schema import Ecto.Changeset alias RealtimeWeb.Channels.Payloads.Join alias RealtimeWeb.Channels.Payloads.FlexibleBoolean embedded_schema do field :enabled, FlexibleBoolean, default: true field :key, :any, default: UUID.uuid1(), virtual: true end def changeset(presence, attrs) do cast(presence, attrs, [:enabled, :key], message: &Join.error_message/2) end end ================================================ FILE: lib/realtime_web/channels/presence.ex ================================================ defmodule RealtimeWeb.Presence do @moduledoc """ Provides presence tracking to channels and processes. See the [`Phoenix.Presence`](http://hexdocs.pm/phoenix/Phoenix.Presence.html) docs for more details. """ use Phoenix.Presence, otp_app: :realtime, pubsub_server: Realtime.PubSub, dispatcher: RealtimeWeb.RealtimeChannel.MessageDispatcher end ================================================ FILE: lib/realtime_web/channels/realtime_channel/assign.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.Assigns do @moduledoc """ Assigns for RealtimeChannel """ defstruct [ :tenant, :log_level, :rate_counter, :limits, :tenant_topic, :pg_sub_ref, :pg_change_params, :postgres_extension, :claims, :jwt_secret, :jwt_jwks, :tenant_token, :access_token, :postgres_cdc_module, :channel_name, :headers ] @type t :: %__MODULE__{ tenant: String.t(), log_level: Logger.level(), rate_counter: Realtime.RateCounter.t(), limits: %{ max_events_per_second: integer(), max_concurrent_users: integer(), max_bytes_per_second: integer(), max_channels_per_client: integer(), max_joins_per_second: integer() }, tenant_topic: String.t(), pg_sub_ref: reference() | nil, pg_change_params: map(), postgres_extension: map(), claims: map(), jwt_secret: String.t(), jwt_jwks: map(), tenant_token: String.t(), access_token: String.t(), channel_name: String.t() } end ================================================ FILE: lib/realtime_web/channels/realtime_channel/broadcast_handler.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.BroadcastHandler do @moduledoc """ Handles the Broadcast feature from Realtime """ use Realtime.Logs import Phoenix.Socket, only: [assign: 3] alias Realtime.Tenants alias RealtimeWeb.RealtimeChannel alias RealtimeWeb.TenantBroadcaster alias Phoenix.Socket alias Realtime.GenCounter alias Realtime.Tenants.Authorization alias Realtime.Tenants.Authorization.Policies alias Realtime.Tenants.Authorization.Policies.BroadcastPolicies @type payload :: map | {String.t(), :json | :binary, binary} @event_type "broadcast" @spec handle(payload, Socket.t()) :: {:reply, :ok, Socket.t()} | {:noreply, Socket.t()} def handle(payload, %{assigns: %{private?: false}} = socket), do: handle(payload, nil, socket) @spec handle(payload, pid() | nil, Socket.t()) :: {:reply, :ok, Socket.t()} | {:noreply, Socket.t()} def handle(payload, db_conn, %{assigns: %{private?: true}} = socket) do %{ assigns: %{ self_broadcast: self_broadcast, tenant_topic: tenant_topic, authorization_context: authorization_context, policies: policies, tenant: tenant_id } } = socket case run_authorization_check(policies || %Policies{}, db_conn, authorization_context) do {:ok, %Policies{broadcast: %BroadcastPolicies{write: true}} = policies} -> socket = socket |> assign(:policies, policies) |> increment_rate_counter() %{ack_broadcast: ack_broadcast} = socket.assigns res = case Tenants.validate_payload_size(tenant_id, payload) do :ok -> send_message(tenant_id, self_broadcast, tenant_topic, payload) {:error, error} -> {:error, error} end cond do ack_broadcast && match?({:error, :payload_size_exceeded}, res) -> {:reply, {:error, :payload_size_exceeded}, socket} ack_broadcast -> {:reply, :ok, socket} true -> {:noreply, socket} end {:ok, policies} -> {:noreply, assign(socket, :policies, policies)} {:error, :rls_policy_error, error} -> log_error("RlsPolicyError", error) {:noreply, socket} {:error, error} -> log_error("UnableToSetPolicies", error) {:noreply, socket} end end def handle(payload, _db_conn, %{assigns: %{private?: false}} = socket) do %{ assigns: %{ tenant_topic: tenant_topic, self_broadcast: self_broadcast, ack_broadcast: ack_broadcast, tenant: tenant_id } } = socket socket = increment_rate_counter(socket) res = case Tenants.validate_payload_size(tenant_id, payload) do :ok -> send_message(tenant_id, self_broadcast, tenant_topic, payload) {:error, error} -> {:error, error} end cond do ack_broadcast && match?({:error, :payload_size_exceeded}, res) -> {:reply, {:error, :payload_size_exceeded}, socket} ack_broadcast -> {:reply, :ok, socket} true -> {:noreply, socket} end end defp send_message(tenant_id, self_broadcast, tenant_topic, payload) do broadcast = build_broadcast(tenant_topic, payload) if self_broadcast do TenantBroadcaster.pubsub_broadcast( tenant_id, tenant_topic, broadcast, RealtimeChannel.MessageDispatcher, :broadcast ) else TenantBroadcaster.pubsub_broadcast_from( tenant_id, self(), tenant_topic, broadcast, RealtimeChannel.MessageDispatcher, :broadcast ) end end # No idea why Dialyzer is complaining here @dialyzer {:nowarn_function, build_broadcast: 2} # Message payload was built by V2 Serializer which was originally UserBroadcastPush # We are not using the metadata for anything just yet. defp build_broadcast(topic, {user_event, user_payload_encoding, user_payload, _metadata}) do %RealtimeWeb.Socket.UserBroadcast{ topic: topic, user_event: user_event, user_payload_encoding: user_payload_encoding, user_payload: user_payload } end defp build_broadcast(topic, payload) do %Phoenix.Socket.Broadcast{topic: topic, event: @event_type, payload: payload} end defp increment_rate_counter(%{assigns: %{policies: %Policies{broadcast: %BroadcastPolicies{write: false}}}} = socket) do socket end defp increment_rate_counter(%{assigns: %{rate_counter: counter}} = socket) do GenCounter.add(counter.id) socket end defp run_authorization_check( %Policies{broadcast: %BroadcastPolicies{write: nil}} = policies, db_conn, authorization_context ) do Authorization.get_write_authorizations(policies, db_conn, authorization_context) end defp run_authorization_check(socket, _db_conn, _authorization_context) do {:ok, socket} end end ================================================ FILE: lib/realtime_web/channels/realtime_channel/logging.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.Logging do @moduledoc """ Log functions for Realtime channels """ alias Realtime.Telemetry require Logger defmacro __using__(_opts) do quote do require Logger import RealtimeWeb.RealtimeChannel.Logging end end @doc """ Logs an error message """ @spec log_error(socket :: Phoenix.Socket.t(), code :: binary(), msg :: any()) :: {:error, %{reason: binary}} def log_error(socket, code, msg) do msg = build_msg(code, msg) emit_system_error(:error, code) log(socket, :error, code, msg) {:error, %{reason: msg}} end @doc """ Logs a warning message """ @spec log_warning(socket :: Phoenix.Socket.t(), code :: binary(), msg :: any()) :: {:error, %{reason: binary}} def log_warning(socket, code, msg) do msg = build_msg(code, msg) log(socket, :warning, code, msg) {:error, %{reason: msg}} end @doc """ Logs an error if the log level is set to error """ @spec maybe_log_error(socket :: Phoenix.Socket.t(), code :: binary(), msg :: any()) :: {:error, %{reason: binary}} def maybe_log_error(socket, code, msg), do: maybe_log(socket, :error, code, msg) @doc """ Logs a warning if the log level is set to warning """ @spec maybe_log_warning(socket :: Phoenix.Socket.t(), code :: binary(), msg :: any()) :: {:error, %{reason: binary}} def maybe_log_warning(socket, code, msg), do: maybe_log(socket, :warning, code, msg) @doc """ Logs an info if the log level is set to info """ @spec maybe_log_info(socket :: Phoenix.Socket.t(), msg :: any()) :: :ok def maybe_log_info(socket, msg), do: maybe_log(socket, :info, nil, msg) defp build_msg(code, msg) do msg = stringify!(msg) if code, do: "#{code}: #{msg}", else: msg end defp log(%{assigns: %{tenant: tenant, access_token: access_token}}, level, code, msg) do Logger.metadata(external_id: tenant, project: tenant) if level in [:error, :warning], do: update_metadata_with_token_claims(access_token) Logger.log(level, msg, error_code: code) end defp maybe_log(%{assigns: %{log_level: log_level}} = socket, level, code, msg) do msg = build_msg(code, msg) emit_system_error(level, code) if Logger.compare_levels(log_level, level) != :gt, do: log(socket, level, code, msg) if level in [:error, :warning], do: {:error, %{reason: msg}}, else: :ok end @system_errors [ "UnableToSetPolicies", "InitializingProjectConnection", "DatabaseConnectionIssue", "UnknownErrorOnChannel" ] def system_errors, do: @system_errors defp emit_system_error(:error, code) when code in @system_errors, do: Telemetry.execute([:realtime, :channel, :error], %{code: code}, %{code: code}) defp emit_system_error(_, _), do: nil defp stringify!(msg) when is_binary(msg), do: msg defp stringify!(msg), do: inspect(msg, pretty: true) defp update_metadata_with_token_claims(nil), do: nil defp update_metadata_with_token_claims(token) do case Joken.peek_claims(token) do {:ok, claims} -> sub = Map.get(claims, "sub") exp = Map.get(claims, "exp") iss = Map.get(claims, "iss") Logger.metadata(sub: sub, exp: exp, iss: iss) _ -> nil end end end ================================================ FILE: lib/realtime_web/channels/realtime_channel/message_dispatcher.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.MessageDispatcher do @moduledoc """ Inspired by Phoenix.Channel.Server.dispatch/3 """ require Logger alias Phoenix.Socket.Broadcast alias RealtimeWeb.Socket.UserBroadcast def fastlane_metadata(fastlane_pid, serializer, topic, log_level, tenant_id, replayed_message_ids \\ MapSet.new()) do {:rc_fastlane, fastlane_pid, serializer, topic, log_level, tenant_id, replayed_message_ids} end @presence_diff "presence_diff" @doc """ This dispatch function caches encoded messages if fastlane is used It also sends an :update_rate_counter to the subscriber and it can conditionally log fastlane_pid is the actual socket transport pid """ @spec dispatch(list, pid, Broadcast.t() | UserBroadcast.t()) :: :ok def dispatch(subscribers, from, %Broadcast{event: @presence_diff} = msg) do {_cache, count} = Enum.reduce(subscribers, {%{}, 0}, fn {pid, _}, {cache, count} when pid == from -> {cache, count} {_pid, {:rc_fastlane, fastlane_pid, serializer, join_topic, log_level, tenant_id, _replayed_message_ids}}, {cache, count} -> maybe_log(log_level, join_topic, msg, tenant_id) cache = do_dispatch(msg, fastlane_pid, serializer, join_topic, cache, tenant_id, log_level) {cache, count + 1} {pid, _}, {cache, count} -> send(pid, msg) {cache, count} end) tenant_id = tenant_id(subscribers) increment_presence_counter(tenant_id, msg.event, count) :ok end def dispatch(subscribers, from, msg) do message_id = message_id(msg) _ = Enum.reduce(subscribers, %{}, fn {pid, _}, cache when pid == from -> cache {pid, {:rc_fastlane, fastlane_pid, serializer, join_topic, log_level, tenant_id, replayed_message_ids}}, cache -> if already_replayed?(message_id, replayed_message_ids) do # skip already replayed message cache else send(pid, :update_rate_counter) maybe_log(log_level, join_topic, msg, tenant_id) do_dispatch(msg, fastlane_pid, serializer, join_topic, cache, tenant_id, log_level) end {pid, _}, cache -> send(pid, msg) cache end) :ok end defp maybe_log(:info, join_topic, msg, tenant_id) when is_struct(msg) do log = "Received message on #{join_topic} with payload: #{inspect(msg, pretty: true)}" Logger.info(log, external_id: tenant_id, project: tenant_id) end defp maybe_log(:info, join_topic, msg, tenant_id) when is_binary(msg) do log = "Received message on #{join_topic}. #{msg}" Logger.info(log, external_id: tenant_id, project: tenant_id) end defp maybe_log(_level, _join_topic, _msg, _tenant_id), do: :ok defp do_dispatch(msg, fastlane_pid, serializer, join_topic, cache, tenant_id, log_level) do case cache do %{^serializer => {:ok, encoded_msg}} -> send(fastlane_pid, encoded_msg) cache %{^serializer => {:error, _reason}} -> # We do nothing at this stage. It has been already logged depending on the log level cache %{} -> # Use the original topic that was joined without the external_id msg = %{msg | topic: join_topic} result = case fastlane!(serializer, msg) do {:ok, encoded_msg} -> send(fastlane_pid, encoded_msg) {:ok, encoded_msg} {:error, reason} -> maybe_log(log_level, join_topic, reason, tenant_id) {:error, reason} end Map.put(cache, serializer, result) end end # We have to convert because V1 does not know how to process UserBroadcast defp fastlane!(Phoenix.Socket.V1.JSONSerializer = serializer, %UserBroadcast{} = msg) do with {:ok, msg} <- UserBroadcast.convert_to_json_broadcast(msg) do {:ok, serializer.fastlane!(msg)} end end defp fastlane!(serializer, msg), do: {:ok, serializer.fastlane!(msg)} defp tenant_id([{_pid, {:rc_fastlane, _, _, _, _, tenant_id, _}} | _]), do: tenant_id defp tenant_id(_), do: nil defp increment_presence_counter(tenant_id, "presence_diff", count) when is_binary(tenant_id) do tenant_id |> Realtime.Tenants.presence_events_per_second_key() |> Realtime.GenCounter.add(count) end defp increment_presence_counter(_tenant_id, _event, _count), do: :ok defp message_id(%Broadcast{payload: %{"meta" => %{"id" => id}}}), do: id defp message_id(_), do: nil defp already_replayed?(nil, _replayed_message_ids), do: false defp already_replayed?(message_id, replayed_message_ids), do: MapSet.member?(replayed_message_ids, message_id) end ================================================ FILE: lib/realtime_web/channels/realtime_channel/presence_handler.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.PresenceHandler do @moduledoc """ Handles the Presence feature from Realtime """ use Realtime.Logs import Phoenix.Socket, only: [assign: 3] import Phoenix.Channel, only: [push: 3] alias Phoenix.Socket alias Phoenix.Tracker.Shard alias Realtime.GenCounter alias Realtime.RateCounter alias Realtime.Tenants alias Realtime.Tenants.Authorization alias RealtimeWeb.Presence alias RealtimeWeb.RealtimeChannel.Logging defguard is_private?(socket) when socket.assigns.private? defguard can_read_presence?(socket) when is_private?(socket) and socket.assigns.policies.presence.read defguard can_write_presence?(socket) when is_private?(socket) and socket.assigns.policies.presence.write @doc """ Sends presence state to connected clients """ @spec sync(Socket.t()) :: :ok | {:error, :rate_limit_exceeded} def sync(%{assigns: %{presence_enabled?: false}}), do: :ok def sync(socket) when not is_private?(socket) do %{assigns: %{tenant_topic: topic}} = socket with :ok <- limit_presence_event(socket) do push(socket, "presence_state", presence_dirty_list(topic)) Logging.maybe_log_info(socket, :sync_presence) :ok end end def sync(socket) when not can_read_presence?(socket), do: :ok def sync(socket) when can_read_presence?(socket) do %{tenant_topic: topic} = socket.assigns with :ok <- limit_presence_event(socket) do push(socket, "presence_state", presence_dirty_list(topic)) Logging.maybe_log_info(socket, :sync_presence) :ok end end @spec handle(map(), pid() | nil, Socket.t()) :: {:ok, Socket.t()} | {:error, :rls_policy_error | :unable_to_set_policies | :rate_limit_exceeded | :client_rate_limit_exceeded | :unable_to_track_presence | :payload_size_exceeded} def handle(%{"event" => event} = payload, db_conn, socket) do event = String.downcase(event, :ascii) with {:ok, socket} <- limit_client_presence_event(socket) do handle_presence_event(event, payload, db_conn, socket) else {:error, :client_rate_limit_exceeded} = error -> error end end def handle(_, _, socket), do: {:ok, socket} defp handle_presence_event("track", payload, _, socket) when not is_private?(socket) do track(socket, payload) end defp handle_presence_event("track", payload, db_conn, socket) when is_private?(socket) and is_nil(socket.assigns.policies.presence.write) do %{assigns: %{authorization_context: authorization_context, policies: policies}} = socket case Authorization.get_write_authorizations(policies, db_conn, authorization_context) do {:ok, policies} -> socket = assign(socket, :policies, policies) handle_presence_event("track", payload, db_conn, socket) {:error, :rls_policy_error, error} -> log_error("RlsPolicyError", error) {:error, :rls_policy_error} {:error, error} -> log_error("UnableToSetPolicies", error) {:error, :unable_to_set_policies} end end defp handle_presence_event("track", payload, _, socket) when can_write_presence?(socket) do track(socket, payload) end defp handle_presence_event("track", _, _, socket) when not can_write_presence?(socket) do {:error, :unauthorized} end defp handle_presence_event("untrack", _, _, socket) do %{assigns: %{presence_key: presence_key, tenant_topic: tenant_topic}} = socket :ok = Presence.untrack(self(), tenant_topic, presence_key) {:ok, assign(socket, :presence_track_payload, nil)} end defp handle_presence_event(event, _, _, _) do log_error("UnknownPresenceEvent", event) {:error, :unknown_presence_event} end defp track(socket, payload) do %{assigns: %{presence_key: presence_key, tenant_topic: tenant_topic}} = socket payload = Map.get(payload, "payload", %{}) with :ok <- check_track_payload(socket.assigns, payload), tenant <- Tenants.Cache.get_tenant_by_external_id(socket.assigns.tenant), :ok <- validate_payload_size(tenant, payload), _ <- RealtimeWeb.TenantBroadcaster.collect_payload_size(socket.assigns.tenant, payload, :presence), :ok <- limit_presence_event(socket), {:ok, _} <- Presence.track(self(), tenant_topic, presence_key, payload) do socket = socket |> assign(:presence_enabled?, true) |> assign(:presence_track_payload, payload) {:ok, socket} else {:error, :no_payload_change} -> # no-op if payload hasn't changed {:ok, socket} {:error, {:already_tracked, pid, _, _}} -> case Presence.update(pid, tenant_topic, presence_key, payload) do {:ok, _} -> socket = assign(socket, :presence_track_payload, payload) {:ok, socket} {:error, _} -> {:error, :unable_to_track_presence} end {:error, :rate_limit_exceeded} -> {:error, :rate_limit_exceeded} {:error, :payload_size_exceeded} -> {:error, :payload_size_exceeded} {:error, error} -> log_error("UnableToTrackPresence", error) {:error, :unable_to_track_presence} end end defp check_track_payload(assigns, new_payload) do if assigns[:presence_track_payload] != new_payload do :ok else {:error, :no_payload_change} end end defp presence_dirty_list(topic) do [{:pool_size, size}] = :ets.lookup(Presence, :pool_size) Presence |> Shard.name_for_topic(topic, size) |> Shard.dirty_list(topic) |> Phoenix.Presence.group() end defp limit_presence_event(socket) do %{assigns: %{presence_rate_counter: presence_counter, tenant: _tenant_id}} = socket {:ok, rate_counter} = RateCounter.get(presence_counter) tenant = Tenants.Cache.get_tenant_by_external_id(socket.assigns.tenant) if rate_counter.avg > tenant.max_presence_events_per_second do {:error, :rate_limit_exceeded} else GenCounter.add(presence_counter.id) :ok end end defp limit_client_presence_event(socket) do %{assigns: %{presence_client_rate_limit: limit_config}} = socket current_time = System.monotonic_time(:millisecond) # Check if we need to reset the window cond do is_nil(limit_config.reset_at) or current_time > limit_config.reset_at -> # Start new window or reset expired window updated_limit_config = %{limit_config | counter: 1, reset_at: current_time + limit_config.window_ms} updated_socket = assign(socket, :presence_client_rate_limit, updated_limit_config) {:ok, updated_socket} limit_config.counter >= limit_config.max_calls -> {:error, :client_rate_limit_exceeded} true -> # Increment counter updated_limit_config = %{limit_config | counter: limit_config.counter + 1} updated_socket = assign(socket, :presence_client_rate_limit, updated_limit_config) {:ok, updated_socket} end end defp validate_payload_size(tenant, payload), do: Tenants.validate_payload_size(tenant, payload) end ================================================ FILE: lib/realtime_web/channels/realtime_channel/tracker.ex ================================================ defmodule RealtimeWeb.RealtimeChannel.Tracker do @moduledoc """ Tracks if the user has any channels open. Stores in :ets table the data. If the user has no channels open, we kill the transport pid. """ use GenServer require Logger @table :channel_tracker @zero_count_match [{{:"$1", :"$2"}, [{:"=<", :"$2", 0}], [:"$1"]}] @zero_count_delete [{{:"$1", :"$2"}, [{:"=<", :"$2", 0}], [true]}] @doc """ Tracks a transport pid. """ @spec track(pid()) :: integer() def track(pid), do: :ets.update_counter(@table, pid, 1, {pid, 0}) @doc """ Un-tracks a transport pid. """ @spec untrack(pid()) :: integer() def untrack(pid), do: :ets.update_counter(@table, pid, -1, {pid, 0}) @doc """ Returns the number of channels open for a transport pid. """ @spec count(pid()) :: integer() def count(pid) do case :ets.lookup(@table, pid) do [{^pid, count}] -> count [] -> 0 end end @doc """ Returns a list of all pids in the table and their count. """ @spec list_pids() :: [{pid(), integer()}] def list_pids, do: :ets.tab2list(@table) def start_link(opts) do if :ets.whereis(@table) == :undefined do :ets.new(@table, [ :set, :public, :named_table, {:decentralized_counters, true}, {:write_concurrency, true} ]) end GenServer.start_link(__MODULE__, opts) end @impl true def init(opts) do check_interval_in_ms = Keyword.fetch!(opts, :check_interval_in_ms) Process.send_after(self(), :check_channels, check_interval_in_ms) {:ok, %{check_interval_in_ms: check_interval_in_ms}} end @impl true def handle_info(:check_channels, state) do chunked_killing() :ets.select_delete(@table, @zero_count_delete) Process.send_after(self(), :check_channels, state.check_interval_in_ms) {:noreply, state} end defp chunked_killing(cont \\ nil) do result = if cont, do: :ets.select(cont), else: :ets.select(@table, @zero_count_match, 1000) case result do :"$end_of_table" -> :ok {pids, cont} -> Logger.info("Killing #{length(pids)} transport pids with no channels open") Enum.each(pids, fn pid -> if Process.alive?(pid), do: Process.exit(pid, :kill) end) chunked_killing(cont) end end def table_name, do: @table end ================================================ FILE: lib/realtime_web/channels/realtime_channel.ex ================================================ defmodule RealtimeWeb.RealtimeChannel do @moduledoc """ Used for handling channels and subscriptions. """ use RealtimeWeb, :channel use RealtimeWeb.RealtimeChannel.Logging alias RealtimeWeb.SocketDisconnect alias DBConnection.Backoff alias Realtime.Api.Tenant alias Realtime.Crypto alias Realtime.GenCounter alias Realtime.Helpers alias Realtime.PostgresCdc alias Realtime.RateCounter alias Realtime.SignalHandler alias Realtime.Tenants alias Realtime.Tenants.Authorization alias Realtime.Tenants.Authorization.Policies alias Realtime.Tenants.Authorization.Policies.BroadcastPolicies alias Realtime.Tenants.Cache alias Realtime.Tenants.Connect alias Realtime.UsersCounter alias RealtimeWeb.Channels.Payloads.Join alias RealtimeWeb.ChannelsAuthorization alias RealtimeWeb.RealtimeChannel.BroadcastHandler alias RealtimeWeb.RealtimeChannel.MessageDispatcher alias RealtimeWeb.RealtimeChannel.PresenceHandler alias RealtimeWeb.RealtimeChannel.Tracker @confirm_token_ms_interval :timer.minutes(5) @fullsweep_after Application.compile_env!(:realtime, :websocket_fullsweep_after) @impl true def join("realtime:", _params, socket) do log_error(socket, "TopicNameRequired", "You must provide a topic name") end def join("realtime:" <> sub_topic = topic, params, socket) do %{ assigns: %{tenant: tenant_id, log_level: log_level}, channel_pid: channel_pid, serializer: serializer, transport_pid: transport_pid } = socket Process.flag(:max_heap_size, max_heap_size()) Process.flag(:fullsweep_after, @fullsweep_after) Tracker.track(socket.transport_pid) Logger.metadata(external_id: tenant_id, project: tenant_id) Logger.put_process_level(self(), log_level) presence_enabled? = case get_in(params, ["config", "presence", "enabled"]) do enabled when is_boolean(enabled) -> enabled _ -> false end socket = socket |> assign_access_token(params) |> assign(:private?, !!params["config"]["private"]) |> assign(:policies, nil) |> assign(:presence_enabled?, presence_enabled?) case Join.validate(params) do {:ok, _join} -> nil {:error, :invalid_join_payload, errors} -> log_params = params |> Map.put("access_token", "") |> Map.put("user_token", "") log_error(socket, "InvalidJoinPayload", %{changeset_errors: errors, params: log_params}) end with :ok <- SignalHandler.shutdown_in_progress?(), %Tenant{} = tenant <- Cache.get_tenant_by_external_id(tenant_id), socket = assign(socket, :presence_enabled?, presence_enabled?(socket.assigns.presence_enabled?, tenant)), :ok <- only_private?(tenant, socket), :ok <- limit_max_users(tenant, transport_pid), :ok <- limit_joins(tenant, socket), :ok <- limit_channels(tenant, socket), {:ok, claims, confirm_token_ref} <- confirm_token(socket), socket = assign_authorization_context(socket, sub_topic, claims), {:ok, db_conn} <- Connect.lookup_or_start_connection(tenant_id), {:ok, socket} <- maybe_assign_policies(sub_topic, db_conn, socket), {:ok, replayed_message_ids} <- maybe_replay_messages(params["config"], sub_topic, db_conn, tenant_id, socket.assigns.private?) do tenant_topic = Tenants.tenant_topic(tenant_id, sub_topic, !socket.assigns.private?) # fastlane subscription metadata = MessageDispatcher.fastlane_metadata( transport_pid, serializer, topic, log_level, tenant_id, replayed_message_ids ) RealtimeWeb.Endpoint.subscribe(tenant_topic, metadata: metadata) Phoenix.PubSub.subscribe(Realtime.PubSub, "realtime:operations:" <> tenant_id) is_new_api = new_api?(params) presence_enabled? = socket.assigns.presence_enabled? pg_change_params = pg_change_params(is_new_api, params, channel_pid, claims, sub_topic) opts = %{ is_new_api: is_new_api, pg_change_params: pg_change_params, transport_pid: transport_pid, serializer: serializer, topic: topic, tenant: tenant_id } postgres_cdc_subscribe(tenant, opts) state = %{postgres_changes: add_id_to_postgres_changes(pg_change_params)} assigns = %{ ack_broadcast: !!params["config"]["broadcast"]["ack"], confirm_token_ref: confirm_token_ref, is_new_api: is_new_api, pg_sub_ref: nil, pg_change_params: pg_change_params, presence_key: presence_key(params), self_broadcast: !!params["config"]["broadcast"]["self"], tenant_topic: tenant_topic, channel_name: sub_topic, presence_enabled?: presence_enabled? } socket = socket |> assign_counter(tenant) |> assign_presence_counter(tenant) |> assign_client_presence_rate_limit(tenant) # Start presence and add user if presence is enabled if presence_enabled?, do: send(self(), :sync_presence) UsersCounter.add(transport_pid, tenant_id) SocketDisconnect.add(tenant_id, socket) {:ok, state, assign(socket, assigns)} else {:error, :expired_token, msg} -> maybe_log_warning(socket, "InvalidJWTToken", msg) {:error, :missing_claims} -> msg = "Fields `role` and `exp` are required in JWT" maybe_log_warning(socket, "InvalidJWTToken", msg) {:error, :unauthorized, msg} -> log_error(socket, "Unauthorized", msg) {:error, :too_many_channels} -> msg = "Too many channels" log_error(socket, "ChannelRateLimitReached", msg) {:error, :too_many_connections} -> msg = "Too many connected users" log_error(socket, "ConnectionRateLimitReached", msg) {:error, :too_many_joins} -> msg = "ClientJoinRateLimitReached: Too many joins per second" send(transport_pid, %Phoenix.Socket.Broadcast{event: "disconnect"}) {:error, %{reason: msg}} {:error, :increase_connection_pool} -> msg = "Please increase your connection pool size" log_error(socket, "IncreaseConnectionPool", msg) {:error, :tenant_db_too_many_connections} -> msg = "Database can't accept more connections, Realtime won't connect" log_error(socket, "DatabaseLackOfConnections", msg) {:error, :connect_rate_limit_reached} -> msg = "Too many database connections attempts per second" log_error(socket, "DatabaseConnectionRateLimitReached", msg) {:error, :unable_to_set_policies, error} -> log_error(socket, "UnableToSetPolicies", error) {:error, %{reason: "Realtime was unable to connect to the project database"}} {:error, :tenant_database_unavailable} -> log_error(socket, "UnableToConnectToProject", "Realtime was unable to connect to the project database") {:error, :rpc_error, :timeout} -> log_error(socket, "TimeoutOnRpcCall", "Node request timeout") {:error, :rpc_error, reason} -> log_error(socket, "ErrorOnRpcCall", "RPC call error: " <> inspect(reason)) {:error, :initializing} -> log_error(socket, "InitializingProjectConnection", "Realtime is initializing the project connection") {:error, :tenant_database_connection_initializing} -> log_error(socket, "InitializingProjectConnection", "Connecting to the project database") {:error, :token_malformed, msg} -> log_error(socket, "MalformedJWT", msg) {:error, invalid_exp} when is_integer(invalid_exp) and invalid_exp <= 0 -> log_error(socket, "InvalidJWTToken", "Token expiration time is invalid") {:error, :private_only} -> log_error(socket, "PrivateOnly", "This project only allows private channels") {:error, :tenant_not_found} -> log_error(socket, "TenantNotFound", "Tenant with the given ID does not exist") {:error, :tenant_suspended} -> log_error(socket, "RealtimeDisabledForTenant", "Realtime disabled for this tenant") {:error, :signature_error} -> log_error(socket, "JwtSignatureError", "Failed to validate JWT signature") {:error, :shutdown_in_progress} -> log_error(socket, "RealtimeRestarting", "Realtime is restarting, please standby") {:error, :failed_to_replay_messages} -> log_error(socket, "UnableToReplayMessages", "Realtime was unable to replay messages") {:error, :invalid_replay_params} -> log_error(socket, "UnableToReplayMessages", "Replay params are not valid") {:error, :invalid_replay_channel} -> log_error(socket, "UnableToReplayMessages", "Replay is not allowed for public channels") {:error, :error_generating_signer} -> log_error( socket, "JwtSignerError", "Failed to generate JWT signer, check your JWT secret or JWKS configuration" ) {:error, error} -> log_error(socket, "UnknownErrorOnChannel", error) {:error, %{reason: "Unknown Error on Channel"}} end end @impl true def handle_info({:replay, messages}, socket) do for message <- messages do meta = %{"replayed" => true, "id" => message.id} payload = %{"payload" => message.payload, "event" => message.event, "type" => "broadcast", "meta" => meta} push(socket, "broadcast", payload) end {:noreply, socket} end def handle_info(:update_rate_counter, socket) do count(socket) {:ok, rate_counter} = RateCounter.get(socket.assigns.rate_counter) if rate_counter.limit.triggered do message = "Too many messages per second" shutdown_response(socket, message) else {:noreply, socket} end end def handle_info(%{event: "postgres_cdc_rls_down"}, socket) do %{assigns: %{pg_sub_ref: pg_sub_ref}} = socket Helpers.cancel_timer(pg_sub_ref) pg_sub_ref = postgres_subscribe() {:noreply, assign(socket, %{pg_sub_ref: pg_sub_ref})} end def handle_info(_msg, %{assigns: %{policies: %Policies{broadcast: %BroadcastPolicies{read: false}}}} = socket) do Logger.warning("Broadcast message ignored") {:noreply, socket} end def handle_info(%{event: type, payload: payload} = msg, socket) do count(socket) maybe_log_info(socket, msg) push(socket, type, payload) {:noreply, socket} end def handle_info(:postgres_subscribe, %{assigns: %{channel_name: channel_name}} = socket) do %{ assigns: %{ tenant: tenant_id, pg_sub_ref: pg_sub_ref, pg_change_params: pg_change_params } } = socket Helpers.cancel_timer(pg_sub_ref) %Tenant{} = tenant = Cache.get_tenant_by_external_id(tenant_id) {:ok, module} = PostgresCdc.driver(tenant.postgres_cdc_default) postgres_extension = PostgresCdc.filter_settings(tenant.postgres_cdc_default, tenant.extensions) args = %{"region" => postgres_extension["region"], "id" => tenant_id} case PostgresCdc.connect(module, args) do {:ok, response} -> case PostgresCdc.after_connect(module, response, postgres_extension, pg_change_params, tenant_id) do {:ok, _response} -> message = "Subscribed to PostgreSQL" maybe_log_info(socket, message) push_system_message("postgres_changes", socket, "ok", message, channel_name) {:noreply, assign(socket, :pg_sub_ref, nil)} {:error, {reason, error}} when reason in [:malformed_subscription_params, :subscription_insert_failed] -> maybe_log_warning(socket, "RealtimeDisabledForConfiguration", error) push_system_message("postgres_changes", socket, "error", error, channel_name) # No point in retrying if the params are invalid {:noreply, assign(socket, :pg_sub_ref, nil)} error -> maybe_log_warning(socket, "RealtimeDisabledForConfiguration", error) push_system_message("postgres_changes", socket, "error", error, channel_name) {:noreply, assign(socket, :pg_sub_ref, postgres_subscribe(5, 10))} end nil -> maybe_log_warning( socket, "ReconnectSubscribeToPostgres", "Re-connecting to PostgreSQL with params: " <> inspect(pg_change_params) ) {:noreply, assign(socket, :pg_sub_ref, postgres_subscribe())} error -> maybe_log_error(socket, "UnableToSubscribeToPostgres", error) push_system_message("postgres_changes", socket, "error", error, channel_name) {:noreply, assign(socket, :pg_sub_ref, postgres_subscribe(5, 10))} end rescue error -> log_warning(socket, "UnableToSubscribeToPostgres", error) push_system_message("postgres_changes", socket, "error", error, channel_name) {:noreply, assign(socket, :pg_sub_ref, postgres_subscribe(5, 10))} end def handle_info(:confirm_token, %{assigns: %{pg_change_params: pg_change_params}} = socket) do case confirm_token(socket) do {:ok, claims, confirm_token_ref} -> pg_change_params = Enum.map(pg_change_params, &Map.put(&1, :claims, claims)) {:noreply, assign(socket, %{confirm_token_ref: confirm_token_ref, pg_change_params: pg_change_params})} {:error, :missing_claims} -> shutdown_response(socket, "Fields `role` and `exp` are required in JWT") {:error, :expired_token, msg} -> shutdown_response(socket, msg) {:error, error} -> shutdown_response(socket, Realtime.Logs.to_log(error)) end end def handle_info(:disconnect, %{assigns: %{channel_name: channel_name}} = socket) do Logger.info("Received operational call to disconnect channel") push_system_message("system", socket, "ok", "Server requested disconnect", channel_name) {:stop, :shutdown, socket} end def handle_info(:sync_presence, %{assigns: %{presence_enabled?: true}} = socket) do case PresenceHandler.sync(socket) do :ok -> {:noreply, socket} {:error, :rate_limit_exceeded} -> shutdown_response(socket, "Too many presence messages per second") end end def handle_info(:sync_presence, socket), do: {:noreply, socket} def handle_info(_, socket), do: {:noreply, socket} @impl true def handle_in("broadcast", payload, %{assigns: %{private?: true}} = socket) do %{tenant: tenant_id} = socket.assigns with {:ok, db_conn} <- Connect.lookup_or_start_connection(tenant_id) do BroadcastHandler.handle(payload, db_conn, socket) else {:error, error} -> log_error(socket, "UnableToHandleBroadcast", error) {:noreply, socket} end end def handle_in("broadcast", payload, %{assigns: %{private?: false}} = socket) do BroadcastHandler.handle(payload, socket) end def handle_in("presence", payload, %{assigns: %{private?: true}} = socket) do %{tenant: tenant_id} = socket.assigns with {:ok, db_conn} <- Connect.lookup_or_start_connection(tenant_id), {:ok, socket} <- PresenceHandler.handle(payload, db_conn, socket) do {:reply, :ok, socket} else {:error, :client_rate_limit_exceeded} -> log_error(socket, "ClientPresenceRateLimitReached", :client_rate_limit_exceeded) shutdown_response(socket, "Client presence rate limit exceeded") {:error, :rate_limit_exceeded} -> shutdown_response(socket, "Too many presence messages per second") {:error, :payload_size_exceeded} -> shutdown_response(socket, "Track message size exceeded") {:error, error} -> log_error(socket, "UnableToHandlePresence", error) {:reply, :error, socket} end end def handle_in("presence", payload, %{assigns: %{private?: false}} = socket) do with {:ok, socket} <- PresenceHandler.handle(payload, nil, socket) do {:reply, :ok, socket} else {:error, :client_rate_limit_exceeded} -> log_error(socket, "ClientPresenceRateLimitReached", :client_rate_limit_exceeded) shutdown_response(socket, "Client presence rate limit exceeded") {:error, :rate_limit_exceeded} -> shutdown_response(socket, "Too many presence messages per second") {:error, :payload_size_exceeded} -> shutdown_response(socket, "Track message size exceeded") {:error, error} -> log_error(socket, "UnableToHandlePresence", error) {:reply, :error, socket} end end def handle_in("access_token", %{"access_token" => "sb_" <> _}, socket) do {:noreply, socket} end def handle_in("access_token", %{"access_token" => refresh_token}, %{assigns: %{access_token: access_token}} = socket) when refresh_token == access_token do {:noreply, socket} end def handle_in("access_token", %{"access_token" => refresh_token}, %{assigns: %{access_token: _access_token}} = socket) when is_nil(refresh_token) do {:noreply, socket} end def handle_in("access_token", %{"access_token" => refresh_token}, socket) when is_binary(refresh_token) do %{ assigns: %{ tenant: tenant_id, pg_sub_ref: pg_sub_ref, channel_name: channel_name, pg_change_params: pg_change_params } } = socket # Update token and reset policies socket = assign(socket, %{access_token: refresh_token, policies: nil}) with {:ok, claims, confirm_token_ref} <- confirm_token(socket), socket = assign_authorization_context(socket, channel_name, claims), {:ok, db_conn} <- Connect.lookup_or_start_connection(tenant_id), {:ok, socket} <- maybe_assign_policies(channel_name, db_conn, socket) do Helpers.cancel_timer(pg_sub_ref) pg_change_params = Enum.map(pg_change_params, &Map.put(&1, :claims, claims)) pg_sub_ref = case pg_change_params do [_ | _] -> postgres_subscribe() _ -> nil end assigns = %{ pg_sub_ref: pg_sub_ref, confirm_token_ref: confirm_token_ref, pg_change_params: pg_change_params } {:noreply, assign(socket, assigns)} else {:error, reason, msg} when reason in ~w(unauthorized expired_token token_malformed)a -> shutdown_response(socket, msg) {:error, :missing_claims} -> shutdown_response(socket, "Fields `role` and `exp` are required in JWT") {:error, :unable_to_set_policies, _msg} -> shutdown_response(socket, "Realtime was unable to connect to the project database") {:error, error} -> shutdown_response(socket, inspect(error)) {:error, :rpc_error, :timeout} -> shutdown_response(socket, "Node request timeout") {:error, :rpc_error, reason} -> shutdown_response(socket, "RPC call error: " <> inspect(reason)) end end def handle_in(type, payload, socket) do count(socket) # Log info here so that bad messages from clients won't flood Logflare # Can subscribe to a Channel with `log_level` `info` to see these messages message = "Unexpected message from client of type `#{type}` with payload: #{inspect(payload)}" Logger.info(message) {:noreply, socket} end @impl true def terminate(reason, %{transport_pid: transport_pid}) do Logger.debug("Channel terminated with reason: #{inspect(reason)}") :telemetry.execute([:prom_ex, :plugin, :realtime, :disconnected], %{}) Tracker.untrack(transport_pid) :ok end defp postgres_subscribe(min \\ 1, max \\ 3) do Process.send_after(self(), :postgres_subscribe, backoff(min, max)) end defp backoff(min, max) do {wait, _} = Backoff.backoff(%Backoff{type: :rand, min: min * 1000, max: max * 1000}) wait end def limit_joins(tenant, socket) do rate_args = Tenants.joins_per_second_rate(tenant) RateCounter.new(rate_args) case RateCounter.get(rate_args) do {:ok, %{limit: %{triggered: false}}} -> GenCounter.add(rate_args.id) :ok {:ok, %{limit: %{triggered: true}}} -> {:error, :too_many_joins} error -> log_error(socket, "UnknownErrorOnCounter", error) {:error, error} end end def limit_channels(tenant, %{transport_pid: pid}) do key = Tenants.channels_per_client_key(tenant) if Registry.count_match(Realtime.Registry, key, pid) + 1 > tenant.max_channels_per_client do {:error, :too_many_channels} else Registry.register(Realtime.Registry, Tenants.channels_per_client_key(tenant), pid) :ok end end defp limit_max_users(tenant, transport_pid) do if !UsersCounter.already_counted?(transport_pid, tenant.external_id) and UsersCounter.tenant_users(tenant.external_id) >= tenant.max_concurrent_users do {:error, :too_many_connections} else :ok end end defp assign_counter(socket, tenant) do rate_args = Tenants.events_per_second_rate(tenant) RateCounter.new(rate_args) assign(socket, :rate_counter, rate_args) end defp assign_presence_counter(socket, tenant) do rate_args = Tenants.presence_events_per_second_rate(tenant) RateCounter.new(rate_args) assign(socket, :presence_rate_counter, rate_args) end defp assign_client_presence_rate_limit(socket, tenant) do config = Application.get_env(:realtime, :client_presence_rate_limit, max_calls: 5, window_ms: 30_000) max_calls = case tenant.max_client_presence_events_per_window do value when is_integer(value) and value > 0 -> value _ -> config[:max_calls] end window_ms = case tenant.client_presence_window_ms do value when is_integer(value) and value > 0 -> value _ -> config[:window_ms] end client_rate_limit = %{ max_calls: max_calls, window_ms: window_ms, counter: 0, reset_at: nil } assign(socket, :presence_client_rate_limit, client_rate_limit) end defp count(%{assigns: %{rate_counter: counter}}), do: GenCounter.add(counter.id) defp presence_key(params) do case params["config"]["presence"]["key"] do key when is_binary(key) and key != "" -> key _ -> UUID.uuid1() end end defp assign_access_token(%{assigns: %{tenant_token: tenant_token}} = socket, params) do access_token = Map.get(params, "access_token") || Map.get(params, "user_token") case access_token do "sb_" <> _ -> assign(socket, :access_token, tenant_token) _ -> handle_access_token(socket, params) end end defp handle_access_token(%{assigns: %{tenant_token: _tenant_token}} = socket, %{"user_token" => user_token}) when is_binary(user_token) do assign(socket, :access_token, user_token) end defp handle_access_token(%{assigns: %{tenant_token: _tenant_token}} = socket, %{"access_token" => access_token}) when is_binary(access_token) do assign(socket, :access_token, access_token) end defp handle_access_token(%{assigns: %{tenant_token: tenant_token}} = socket, _params) when is_binary(tenant_token) do assign(socket, :access_token, tenant_token) end defp confirm_token(%{assigns: assigns}) do %{jwt_secret: jwt_secret, access_token: access_token} = assigns jwt_jwks = Map.get(assigns, :jwt_jwks) with jwt_secret_dec <- Crypto.decrypt!(jwt_secret), {:ok, %{"exp" => exp} = claims} when is_integer(exp) <- ChannelsAuthorization.authorize_conn(access_token, jwt_secret_dec, jwt_jwks), exp_diff when exp_diff > 0 <- exp - Joken.current_time() do if ref = assigns[:confirm_token_ref], do: Helpers.cancel_timer(ref) interval = min(@confirm_token_ms_interval, exp_diff * 1000) ref = Process.send_after(self(), :confirm_token, interval) {:ok, claims, ref} else {:error, :token_malformed} -> {:error, :token_malformed, "The token provided is not a valid JWT"} {:error, error} -> {:error, error} {:error, error, message} -> {:error, error, message} e -> {:error, e} end end defp shutdown_response(socket, message) when is_binary(message) do %{assigns: %{channel_name: channel_name}} = socket push_system_message("system", socket, "error", message, channel_name) maybe_log_warning(socket, "ChannelShutdown", message) {:stop, :normal, socket} end defp push_system_message(extension, socket, status, error, channel_name) when is_map(error) and is_map_key(error, :error_code) and is_map_key(error, :error_message) do push(socket, "system", %{ extension: extension, status: status, message: "#{error.error_code}: #{error.error_message}", channel: channel_name }) end defp push_system_message(extension, socket, status, message, channel_name) when is_binary(message) do push(socket, "system", %{ extension: extension, status: status, message: message, channel: channel_name }) end defp push_system_message(extension, socket, status, message, channel_name) do push(socket, "system", %{ extension: extension, status: status, message: inspect(message), channel: channel_name }) end defp new_api?(%{"config" => _}), do: true defp new_api?(_), do: false defp pg_change_params(true, params, channel_pid, claims, _) do case get_in(params, ["config", "postgres_changes"]) do [_ | _] = params_list -> params_list |> Enum.reject(&is_nil/1) |> Enum.map(fn params -> %{ id: UUID.uuid1(), channel_pid: channel_pid, claims: claims, params: params } end) _ -> [] end end defp pg_change_params(false, _, channel_pid, claims, sub_topic) do params = case String.split(sub_topic, ":", parts: 3) do [schema, table, filter] -> %{"schema" => schema, "table" => table, "filter" => filter} [schema, table] -> %{"schema" => schema, "table" => table} [schema] -> %{"schema" => schema} end [ %{ id: UUID.uuid1(), channel_pid: channel_pid, claims: claims, params: params } ] end defp postgres_cdc_subscribe(_tenant, %{pg_change_params: []}), do: [] defp postgres_cdc_subscribe(tenant, opts) do %{ is_new_api: is_new_api, pg_change_params: pg_change_params, transport_pid: transport_pid, serializer: serializer, topic: topic } = opts ids = Enum.map(pg_change_params, fn %{id: id, params: params} -> {UUID.string_to_binary!(id), :erlang.phash2(params)} end) subscription_metadata = {:subscriber_fastlane, transport_pid, serializer, ids, topic, is_new_api} metadata = [metadata: subscription_metadata] {:ok, module} = PostgresCdc.driver(tenant.postgres_cdc_default) PostgresCdc.subscribe(module, pg_change_params, tenant.external_id, metadata) send(self(), :postgres_subscribe) pg_change_params end defp add_id_to_postgres_changes(pg_change_params) do Enum.map(pg_change_params, fn %{params: params} -> id = :erlang.phash2(params) Map.put(params, :id, id) end) end defp assign_authorization_context(socket, topic, claims) do authorization_context = Authorization.build_authorization_params(%{ tenant_id: socket.assigns.tenant, topic: topic, headers: Map.get(socket.assigns, :headers, []), claims: claims, role: claims["role"], sub: claims["sub"] }) assign(socket, :authorization_context, authorization_context) end defp maybe_assign_policies(topic, db_conn, %{assigns: %{private?: true}} = socket) when not is_nil(topic) do authorization_context = socket.assigns.authorization_context policies = socket.assigns.policies || %Policies{} presence_enabled? = socket.assigns.presence_enabled? with {:ok, policies} <- Authorization.get_read_authorizations(policies, db_conn, authorization_context, presence_enabled?: presence_enabled? ) do socket = assign(socket, :policies, policies) if match?(%Policies{broadcast: %BroadcastPolicies{read: false}}, socket.assigns.policies), do: {:error, :unauthorized, "You do not have permissions to read from this Channel topic: #{topic}"}, else: {:ok, socket} else {:error, :increase_connection_pool} -> {:error, :increase_connection_pool} {:error, :rls_policy_error, error} -> log_error(socket, "RlsPolicyError", error) {:error, :unauthorized, "You do not have permissions to read from this Channel topic: #{topic}"} {:error, error} -> {:error, :unable_to_set_policies, error} end end defp maybe_assign_policies(_, _, socket), do: {:ok, assign(socket, policies: nil)} defp only_private?(tenant, %{assigns: %{private?: private?}}) do if tenant.private_only and !private? do {:error, :private_only} else :ok end end defp maybe_replay_messages(%{"broadcast" => %{"replay" => _}}, _sub_topic, _db_conn, _tenant_id, false = _private?) do {:error, :invalid_replay_channel} end defp maybe_replay_messages( %{"broadcast" => %{"replay" => replay_params}}, sub_topic, db_conn, tenant_id, true = _private? ) when is_map(replay_params) do with {:ok, messages, message_ids} <- Realtime.Messages.replay( db_conn, tenant_id, sub_topic, replay_params["since"], replay_params["limit"] || 25 ) do # Send to self because we can't write to the socket before finishing the join process send(self(), {:replay, messages}) {:ok, message_ids} end end defp maybe_replay_messages(_, _, _, _, _), do: {:ok, MapSet.new()} defp presence_enabled?(client_enabled?, %Tenant{presence_enabled: tenant_enabled}) do client_enabled? || tenant_enabled end defp max_heap_size(), do: Application.fetch_env!(:realtime, :websocket_max_heap_size) end ================================================ FILE: lib/realtime_web/channels/socket_disconnect.ex ================================================ defmodule RealtimeWeb.SocketDisconnect do @moduledoc """ Handles the distributed disconnection of sockets for a given tenant. It also ensures that there are no repeated registrations of the same transport PID for a given tenant. """ use Realtime.Logs alias Phoenix.Socket alias Realtime.Api.Tenant alias Realtime.Tenants @doc """ Adds a socket to the registry associated to a tenant. It will register the transport PID and a list of channel PIDs associated with a given transport pid. """ @spec add(binary(), Socket.t()) :: :ok | {:error, term()} def add(tenant_external_id, %Socket{transport_pid: transport_pid}) when is_binary(tenant_external_id) do transport_pid_exists_match_spec = [ { {tenant_external_id, :"$1", :"$2"}, [{:==, :"$2", transport_pid}], [:"$1"] } ] case Registry.select(__MODULE__.Registry, transport_pid_exists_match_spec) do [] -> {:ok, _} = Registry.register(__MODULE__.Registry, tenant_external_id, transport_pid) _ -> nil end :ok end @doc """ Disconnects all sockets associated with a given tenant across all nodes in the cluster. """ @spec distributed_disconnect(Tenant.t() | binary()) :: list(:ok | :error) def distributed_disconnect(%Tenant{external_id: external_id}), do: distributed_disconnect(external_id) def distributed_disconnect(external_id) do [Node.self() | Node.list()] |> :erpc.multicall(__MODULE__, :disconnect, [external_id], 5000) |> Enum.map(fn {res, _} -> res end) end @doc """ Disconnects all sockets associated with a given tenant on the current node. """ @spec disconnect(binary()) :: :ok | :error def disconnect(%Tenant{external_id: external_id}), do: disconnect(external_id) def disconnect(tenant_external_id) do Logger.metadata(external_id: tenant_external_id, project: tenant_external_id) Logger.warning("Disconnecting all sockets for tenant #{tenant_external_id}") Tenants.broadcast_operation_event(:disconnect, tenant_external_id) pids = Registry.lookup(__MODULE__.Registry, tenant_external_id) for {_, pid} <- pids, Process.alive?(pid), do: Process.exit(pid, :shutdown) Registry.unregister(__MODULE__.Registry, tenant_external_id) :ok end end ================================================ FILE: lib/realtime_web/channels/tenant_rate_limiters.ex ================================================ defmodule RealtimeWeb.TenantRateLimiters do @moduledoc """ Rate limiters for tenants. """ require Logger alias Realtime.UsersCounter alias Realtime.Tenants alias Realtime.RateCounter alias Realtime.Api.Tenant @spec check_tenant(Realtime.Api.Tenant.t()) :: :ok | {:error, :too_many_connections | :too_many_joins} def check_tenant(tenant) do with :ok <- max_concurrent_users_check(tenant) do max_joins_per_second_check(tenant) end end defp max_concurrent_users_check(%Tenant{max_concurrent_users: max_conn_users, external_id: external_id}) do total_conn_users = UsersCounter.tenant_users(external_id) if total_conn_users < max_conn_users, do: :ok, else: {:error, :too_many_connections} end defp max_joins_per_second_check(%Tenant{max_joins_per_second: max_joins_per_second} = tenant) do rate_args = Tenants.joins_per_second_rate(tenant.external_id, max_joins_per_second) RateCounter.new(rate_args) case RateCounter.get(rate_args) do {:ok, %{limit: %{triggered: false}}} -> :ok {:ok, %{limit: %{triggered: true}}} -> {:error, :too_many_joins} error -> Logger.error("UnknownErrorOnCounter: #{inspect(error)}") {:error, error} end end end ================================================ FILE: lib/realtime_web/channels/user_socket.ex ================================================ defmodule RealtimeWeb.UserSocket do # This is defined up here before `use Phoenix.Socket` is called so that we can define `Phoenix.Socket.init/1` # It has to be overridden because we need to set the `max_heap_size` flag from the transport process context @impl Phoenix.Socket.Transport def handle_in({payload, opts}, {_state, socket} = full_state) do Phoenix.Socket.__in__({payload, opts}, full_state) rescue e in Phoenix.Socket.InvalidMessageError -> RealtimeWeb.RealtimeChannel.Logging.log_error(socket, "MalformedWebSocketMessage", e.message) {:ok, full_state} e in Jason.DecodeError -> RealtimeWeb.RealtimeChannel.Logging.log_error(socket, "MalformedWebSocketMessage", Jason.DecodeError.message(e)) {:ok, full_state} e -> RealtimeWeb.RealtimeChannel.Logging.log_error(socket, "UnknownErrorOnWebSocketMessage", Exception.message(e)) {:ok, full_state} end @impl true def init(state) when is_tuple(state) do Process.flag(:max_heap_size, max_heap_size()) Process.send_after(self(), {:measure_traffic, 0, 0}, measure_traffic_interval_in_ms()) Phoenix.Socket.__init__(state) end @impl true def handle_info( {:measure_traffic, previous_recv, previous_send}, {_, %{assigns: assigns, transport_pid: transport_pid}} = state ) do tenant_external_id = Map.get(assigns, :tenant) %{latest_recv: latest_recv, latest_send: latest_send} = collect_traffic_telemetry(transport_pid, tenant_external_id, previous_recv, previous_send) Process.send_after(self(), {:measure_traffic, latest_recv, latest_send}, measure_traffic_interval_in_ms()) {:ok, state} end use Phoenix.Socket use Realtime.Logs alias Realtime.Api.Tenant alias Realtime.Crypto alias Realtime.Database alias Realtime.Tenants alias RealtimeWeb.TenantRateLimiters alias RealtimeWeb.ChannelsAuthorization alias RealtimeWeb.RealtimeChannel alias RealtimeWeb.RealtimeChannel.Logging ## Channels channel "realtime:*", RealtimeChannel @default_log_level :error @impl true def id(%{assigns: %{tenant: tenant}}), do: subscribers_id(tenant) @spec subscribers_id(String.t()) :: String.t() def subscribers_id(tenant), do: "user_socket:" <> tenant @impl true def connect(params, socket, opts) do %{uri: %{host: host}, x_headers: headers} = opts {:ok, external_id} = Database.get_external_id(host) token = access_token(params, headers) log_level = log_level(params) Logger.metadata(external_id: external_id, project: external_id) Logger.put_process_level(self(), log_level) socket = socket |> assign(:tenant, external_id) |> assign(:log_level, log_level) |> assign(:access_token, token) with %Tenant{ jwt_secret: jwt_secret, jwt_jwks: jwt_jwks, suspend: false } = tenant <- Tenants.Cache.get_tenant_by_external_id(external_id), token when is_binary(token) <- token, jwt_secret_dec <- Crypto.decrypt!(jwt_secret), {:ok, claims} <- ChannelsAuthorization.authorize_conn(token, jwt_secret_dec, jwt_jwks), :ok <- TenantRateLimiters.check_tenant(tenant) do assigns = %RealtimeChannel.Assigns{ claims: claims, jwt_secret: jwt_secret, jwt_jwks: jwt_jwks, tenant: external_id, log_level: log_level, tenant_token: token, headers: opts.x_headers } assigns = Map.from_struct(assigns) {:ok, assign(socket, assigns)} else nil -> log_error("TenantNotFound", "Tenant not found: #{external_id}") {:error, :tenant_not_found} %Tenant{suspend: true} -> Logging.log_error(socket, "RealtimeDisabledForTenant", "Realtime disabled for this tenant") {:error, :tenant_suspended} {:error, :expired_token, msg} -> Logging.maybe_log_warning(socket, "InvalidJWTToken", msg) {:error, :expired_token} {:error, :missing_claims} -> msg = "Fields `role` and `exp` are required in JWT" Logging.maybe_log_warning(socket, "InvalidJWTToken", msg) {:error, :missing_claims} {:error, :token_malformed} -> log_error("MalformedJWT", "The token provided is not a valid JWT") {:error, :token_malformed} {:error, :too_many_connections} -> msg = "Too many connected users" Logging.log_error(socket, "ConnectionRateLimitReached", msg) {:error, :too_many_connections} {:error, :too_many_joins} -> msg = "Too many joins per second" Logging.log_error(socket, "JoinsRateLimitReached", msg) {:error, :too_many_joins} error -> log_error("ErrorConnectingToWebsocket", error) {:error, error} end end defp access_token(params, headers) do case :proplists.lookup("x-api-key", headers) do :none -> Map.get(params, "apikey") {"x-api-key", token} -> token end end defp log_level(params) do case Map.get(params, "log_level") do level when level in ["info", "warning", "error"] -> String.to_existing_atom(level) _ -> @default_log_level end end defp max_heap_size(), do: Application.fetch_env!(:realtime, :websocket_max_heap_size) defp measure_traffic_interval_in_ms(), do: Application.fetch_env!(:realtime, :measure_traffic_interval_in_ms) defp collect_traffic_telemetry(nil, _tenant_external_id, previous_recv, previous_send), do: %{latest_recv: previous_recv, latest_send: previous_send} defp collect_traffic_telemetry(transport_pid, tenant_external_id, previous_recv, previous_send) do %{send_oct: latest_send, recv_oct: latest_recv} = transport_pid |> Process.info(:links) |> then(fn {:links, links} -> links end) |> Enum.filter(&is_port/1) |> Enum.reduce(%{send_oct: 0, recv_oct: 0}, fn link, acc -> case :inet.getstat(link, [:send_oct, :recv_oct]) do {:ok, stats} -> send_oct = Keyword.get(stats, :send_oct, 0) recv_oct = Keyword.get(stats, :recv_oct, 0) %{ send_oct: acc.send_oct + send_oct, recv_oct: acc.recv_oct + recv_oct } {:error, _} -> acc end end) send_delta = max(0, latest_send - previous_send) recv_delta = max(0, latest_recv - previous_recv) :telemetry.execute([:realtime, :channel, :output_bytes], %{size: send_delta}, %{tenant: tenant_external_id}) :telemetry.execute([:realtime, :channel, :input_bytes], %{size: recv_delta}, %{tenant: tenant_external_id}) %{latest_recv: latest_recv, latest_send: latest_send} end end ================================================ FILE: lib/realtime_web/controllers/broadcast_controller.ex ================================================ defmodule RealtimeWeb.BroadcastController do use RealtimeWeb, :controller use OpenApiSpex.ControllerSpecs require Logger alias Realtime.Tenants.BatchBroadcast alias RealtimeWeb.OpenApiSchemas.EmptyResponse alias RealtimeWeb.OpenApiSchemas.TenantBatchParams alias RealtimeWeb.OpenApiSchemas.TooManyRequestsResponse alias RealtimeWeb.OpenApiSchemas.UnprocessableEntityResponse action_fallback(RealtimeWeb.FallbackController) operation(:broadcast, summary: "Broadcasts a batch of messages", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ] ], request_body: TenantBatchParams.params(), responses: %{ 202 => EmptyResponse.response(), 403 => EmptyResponse.response(), 422 => UnprocessableEntityResponse.response(), 429 => TooManyRequestsResponse.response() } ) def broadcast(%{assigns: %{tenant: tenant}} = conn, attrs) do with :ok <- BatchBroadcast.broadcast(conn, tenant, attrs) do send_resp(conn, :accepted, "") end end end ================================================ FILE: lib/realtime_web/controllers/fallback_controller.ex ================================================ defmodule RealtimeWeb.FallbackController do @moduledoc """ Translates controller action results into valid `Plug.Conn` responses. See `Phoenix.Controller.action_fallback/1` for more details. """ use RealtimeWeb, :controller use Realtime.Logs import RealtimeWeb.ErrorHelpers def call(conn, {:error, :not_found}) do log_error("TenantNotFound", "Tenant not found") conn |> put_status(:not_found) |> put_view(RealtimeWeb.ErrorView) |> render("error.json", message: "not found") end def call(conn, {:error, %Ecto.Changeset{} = changeset}) do log_error( "UnprocessableEntity", Ecto.Changeset.traverse_errors(changeset, &translate_error/1) ) conn |> put_status(:unprocessable_entity) |> put_view(RealtimeWeb.ChangesetView) |> render("error.json", changeset: changeset) end def call(conn, {:error, status, message}) when is_atom(status) and is_binary(message) do log_error("UnprocessableEntity", message) conn |> put_status(status) |> put_view(RealtimeWeb.ErrorView) |> render("error.json", message: message) end def call(conn, {:error, %Ecto.Changeset{valid?: false} = changeset}) do log_error( "UnprocessableEntity", Ecto.Changeset.traverse_errors(changeset, &translate_error/1) ) conn |> put_status(:unprocessable_entity) |> put_view(RealtimeWeb.ChangesetView) |> render("error.json", changeset: changeset) end def call(conn, {:error, _}) do conn |> put_status(:unauthorized) |> put_view(RealtimeWeb.ErrorView) |> render("error.json", message: "Unauthorized") end def call(conn, %Ecto.Changeset{valid?: false} = changeset) do log_error( "UnprocessableEntity", Ecto.Changeset.traverse_errors(changeset, &translate_error/1) ) conn |> put_status(:unprocessable_entity) |> put_view(RealtimeWeb.ChangesetView) |> render("error.json", changeset: changeset) end def call(conn, response) do log_error("UnknownErrorOnController", response) conn |> put_status(:unprocessable_entity) |> put_view(RealtimeWeb.ErrorView) |> render("error.json", message: "Unknown error") end end ================================================ FILE: lib/realtime_web/controllers/legacy_metrics_controller.ex ================================================ defmodule RealtimeWeb.LegacyMetricsController do use RealtimeWeb, :controller require Logger alias Realtime.PromEx alias Realtime.TenantPromEx alias Realtime.GenRpc def index(conn, _) do serve_metrics(conn, [Node.self() | Node.list()], "combined cluster") end def region(conn, %{"region" => region}) do serve_metrics(conn, Realtime.Nodes.region_nodes(region), "combined region=#{region}") end def get_combined_metrics do bump_max_heap_size() [PromEx.get_global_metrics(), TenantPromEx.get_metrics()] end defp serve_metrics(conn, nodes, label) do conn = conn |> put_resp_content_type("text/plain") |> send_chunked(200) {time, conn} = :timer.tc(fn -> collect_metrics(nodes, conn) end, :millisecond) Logger.info("Collected #{label} metrics in #{time} milliseconds") conn end defp collect_metrics(nodes, conn) do bump_max_heap_size() timeout = Application.fetch_env!(:realtime, :metrics_rpc_timeout) nodes |> Task.async_stream( fn node -> {node, GenRpc.call(node, __MODULE__, :get_combined_metrics, [], timeout: timeout)} end, timeout: :infinity ) |> Enum.reduce(conn, fn {:ok, {node, {:error, :rpc_error, reason}}}, acc_conn -> Logger.error("Cannot fetch metrics from the node #{inspect(node)} because #{inspect(reason)}") acc_conn {:ok, {_node, metrics}}, acc_conn -> case chunk(acc_conn, metrics) do {:ok, acc_conn} -> :erlang.garbage_collect() acc_conn {:error, reason} -> Logger.error("Cannot stream metrics chunk because #{inspect(reason)}") acc_conn end {:exit, reason}, acc_conn -> Logger.error("Metrics collection task exited: #{inspect(reason)}") acc_conn end) end defp bump_max_heap_size do system_max_heap_size = :erlang.system_info(:max_heap_size)[:size] if is_integer(system_max_heap_size) and system_max_heap_size > 0 do Process.flag(:max_heap_size, system_max_heap_size * 3) end end end ================================================ FILE: lib/realtime_web/controllers/metrics_controller.ex ================================================ defmodule RealtimeWeb.MetricsController do use RealtimeWeb, :controller require Logger alias Realtime.PromEx alias Realtime.TenantPromEx alias Realtime.GenRpc def index(conn, _) do serve_metrics(conn, [Node.self() | Node.list()], :get_global_metrics, "global cluster") end def tenant(conn, _) do serve_metrics(conn, [Node.self() | Node.list()], :get_tenant_metrics, "tenant cluster") end def region(conn, %{"region" => region}) do serve_metrics(conn, Realtime.Nodes.region_nodes(region), :get_global_metrics, "global region=#{region}") end def region_tenant(conn, %{"region" => region}) do serve_metrics(conn, Realtime.Nodes.region_nodes(region), :get_tenant_metrics, "tenant region=#{region}") end defp serve_metrics(conn, nodes, metrics_fun, label) do conn = conn |> put_resp_content_type("text/plain") |> send_chunked(200) {time, conn} = :timer.tc(fn -> collect_metrics(nodes, metrics_fun, conn) end, :millisecond) Logger.info("Collected #{label} metrics in #{time} milliseconds") conn end defp collect_metrics(nodes, metrics_fun, conn) do bump_max_heap_size() timeout = Application.fetch_env!(:realtime, :metrics_rpc_timeout) nodes |> Task.async_stream( fn node -> {node, GenRpc.call(node, __MODULE__, metrics_fun, [], timeout: timeout)} end, timeout: :infinity ) |> Enum.reduce(conn, fn {_, {node, response}}, acc_conn -> case response do {:error, :rpc_error, reason} -> Logger.error("Cannot fetch metrics from the node #{inspect(node)} because #{inspect(reason)}") acc_conn metrics -> {:ok, acc_conn} = chunk(acc_conn, metrics) :erlang.garbage_collect() acc_conn end end) end def get_global_metrics do bump_max_heap_size() PromEx.get_global_metrics() end def get_tenant_metrics do bump_max_heap_size() TenantPromEx.get_metrics() end @doc deprecated: "Use get_global_metrics/0 instead" def get_metrics, do: get_global_metrics() defp bump_max_heap_size do system_max_heap_size = :erlang.system_info(:max_heap_size)[:size] if is_integer(system_max_heap_size) and system_max_heap_size > 0 do Process.flag(:max_heap_size, system_max_heap_size * 3) end end end ================================================ FILE: lib/realtime_web/controllers/page_controller.ex ================================================ defmodule RealtimeWeb.PageController do use RealtimeWeb, :controller def index(conn, _params) do render(conn, "index.html") end def healthcheck(conn, _params) do conn |> put_status(:ok) |> text("ok") end end ================================================ FILE: lib/realtime_web/controllers/ping_controller.ex ================================================ defmodule RealtimeWeb.PingController do use RealtimeWeb, :controller def ping(conn, _params) do json(conn, %{message: "Success"}) end end ================================================ FILE: lib/realtime_web/controllers/tenant_controller.ex ================================================ defmodule RealtimeWeb.TenantController do use RealtimeWeb, :controller use OpenApiSpex.ControllerSpecs use Realtime.Logs import Realtime.Logs alias Realtime.Api alias Realtime.Api.Tenant alias Realtime.Database alias Realtime.PostgresCdc alias Realtime.Tenants alias Realtime.Tenants.Cache alias Realtime.Tenants.Connect alias Realtime.Tenants.Migrations alias RealtimeWeb.OpenApiSchemas.EmptyResponse alias RealtimeWeb.OpenApiSchemas.ErrorResponse alias RealtimeWeb.OpenApiSchemas.NotFoundResponse alias RealtimeWeb.OpenApiSchemas.TenantHealthResponse alias RealtimeWeb.OpenApiSchemas.TenantParams alias RealtimeWeb.OpenApiSchemas.TenantResponse alias RealtimeWeb.OpenApiSchemas.TenantResponseList alias RealtimeWeb.OpenApiSchemas.UnauthorizedResponse alias RealtimeWeb.SocketDisconnect @stop_timeout 10_000 action_fallback(RealtimeWeb.FallbackController) plug :set_observability_attributes when action in [:show, :edit, :update, :delete, :reload, :shutdown, :health] operation(:index, summary: "List tenants", parameters: [ authorization: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ] ], responses: %{ 200 => TenantResponseList.response(), 403 => EmptyResponse.response() } ) def index(conn, _params) do tenants = Api.list_tenants() render(conn, "index.json", tenants: tenants) end operation(:show, summary: "Fetch tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], responses: %{ 200 => TenantResponse.response(), 403 => EmptyResponse.response(), 404 => NotFoundResponse.response() } ) def show(conn, %{"tenant_id" => id}) do tenant = Api.get_tenant_by_external_id(id) case tenant do %Tenant{} = tenant -> render(conn, "show.json", tenant: tenant) nil -> {:error, :not_found} end end operation(:create, summary: "Create or update tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ] ], request_body: TenantParams.params(), responses: %{ 200 => TenantResponse.response(), 403 => EmptyResponse.response() } ) @spec create(any(), map()) :: any() def create(conn, %{"tenant" => params}) do external_id = Map.get(params, "external_id") case Tenant.changeset(%Tenant{}, params) do %{valid?: true} -> update(conn, %{"tenant_id" => external_id, "tenant" => params}) changeset -> changeset end end operation(:update, summary: "Create or update tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], request_body: TenantParams.params(), responses: %{ 200 => TenantResponse.response(), 403 => EmptyResponse.response() } ) def update(conn, %{"tenant_id" => external_id, "tenant" => tenant_params}) do tenant = Api.get_tenant_by_external_id(external_id, use_replica?: false) case tenant do nil -> tenant_params = tenant_params |> Map.put("external_id", external_id) |> Map.put("name", external_id) extensions = Enum.reduce(tenant_params["extensions"], [], fn %{"type" => type, "settings" => settings}, acc -> [%{"type" => type, "settings" => settings} | acc] _e, acc -> acc end) with {:ok, %Tenant{} = tenant} <- Api.create_tenant(%{tenant_params | "extensions" => extensions}), res when res in [:ok, :noop] <- Migrations.run_migrations(tenant) do Logger.metadata(external_id: tenant.external_id, project: tenant.external_id) conn |> put_status(:created) |> put_resp_header("location", Routes.tenant_path(conn, :show, tenant)) |> render("show.json", tenant: tenant) end tenant -> with {:ok, %Tenant{} = tenant} <- Api.update_tenant_by_external_id(tenant.external_id, tenant_params) do conn |> put_status(:ok) |> put_resp_header("location", Routes.tenant_path(conn, :show, tenant)) |> render("show.json", tenant: tenant) end end end operation(:delete, summary: "Delete tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], responses: %{ 204 => EmptyResponse.response(), 403 => UnauthorizedResponse.response(), 500 => ErrorResponse.response() } ) def delete(conn, %{"tenant_id" => tenant_id}) do stop_all_timeout = Enum.count(PostgresCdc.available_drivers()) * 1_000 with %Tenant{} = tenant <- Api.get_tenant_by_external_id(tenant_id, use_replica: false), _ <- Tenants.suspend_tenant_by_external_id(tenant_id), true <- Api.delete_tenant_by_external_id(tenant_id), :ok <- Cache.distributed_invalidate_tenant_cache(tenant_id), :ok <- PostgresCdc.stop_all(tenant, stop_all_timeout), :ok <- Database.replication_slot_teardown(tenant) do send_resp(conn, 204, "") else nil -> send_resp(conn, 204, "") err -> log_error("UnableToDeleteTenant", err) conn |> put_status(500) |> json(err) |> halt() end end operation(:reload, summary: "Reload tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], responses: %{ 204 => EmptyResponse.response(), 403 => EmptyResponse.response(), 404 => NotFoundResponse.response() } ) def reload(conn, %{"tenant_id" => tenant_id}) do case Api.get_tenant_by_external_id(tenant_id, use_replica?: false) do nil -> {:error, :not_found} tenant -> PostgresCdc.stop_all(tenant, @stop_timeout) Connect.shutdown(tenant.external_id) SocketDisconnect.disconnect(tenant.external_id) send_resp(conn, 204, "") end end operation(:shutdown, summary: "Shutdowns the Connect module for a tenant", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], responses: %{ 204 => EmptyResponse.response(), 403 => EmptyResponse.response(), 404 => NotFoundResponse.response() } ) def shutdown(conn, %{"tenant_id" => tenant_id}) do case Api.get_tenant_by_external_id(tenant_id, use_replica?: false) do nil -> {:error, :not_found} tenant -> Connect.shutdown(tenant.external_id) send_resp(conn, 204, "") end end operation(:health, summary: "Tenant health", parameters: [ token: [ in: :header, name: "Authorization", schema: %OpenApiSpex.Schema{type: :string}, required: true, example: "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2ODAxNjIxNTR9.U9orU6YYqXAtpF8uAiw6MS553tm4XxRzxOhz2IwDhpY" ], tenant_id: [in: :path, description: "Tenant ID", type: :string] ], responses: %{ 200 => TenantHealthResponse.response(), 403 => EmptyResponse.response(), 404 => NotFoundResponse.response() } ) def health(conn, %{"tenant_id" => tenant_id}) do case Tenants.health_check(tenant_id) do {:ok, response} -> json(conn, %{data: response}) {:error, %{healthy: false} = response} -> json(conn, %{data: response}) {:error, :tenant_not_found} -> {:error, :not_found} end end defp set_observability_attributes(conn, _opts) do tenant_id = conn.path_params["tenant_id"] OpenTelemetry.Tracer.set_attributes(external_id: tenant_id) Logger.metadata(external_id: tenant_id, project: tenant_id) conn end end ================================================ FILE: lib/realtime_web/dashboard/process_dump.ex ================================================ defmodule Realtime.Dashboard.ProcessDump do @moduledoc """ Live Dashboard page to dump the current processes tree """ use Phoenix.LiveDashboard.PageBuilder @impl true def menu_link(_, _) do {:ok, "Process Dump"} end @impl true def mount(_, _, socket) do ts = :os.system_time(:millisecond) name = "process_dump_#{ts}" content = dump_processes(name) {:ok, socket |> assign(content: content) |> assign(name: name)} end @impl true def render(assigns) do ~H"""

Process Dump

Download
After you untar the file, you can use `File.read!("filename") |> :erlang.binary_to_term` to check the contents
""" end defp dump_processes(name) do term = Process.list() |> Enum.map(&Process.info/1) |> :erlang.term_to_binary() path = "/tmp/#{name}" File.write!(path, term) System.cmd("tar", ["-czf", "#{path}.tar.gz", path]) "#{path}.tar.gz" |> File.read!() |> Base.encode64() end end ================================================ FILE: lib/realtime_web/dashboard/tenant_info.ex ================================================ defmodule Realtime.Dashboard.TenantInfo do @moduledoc """ Live Dashboard page to inspect tenant and extension information by project ref. Secrets (jwt_secret and encrypted extension fields) are never displayed. """ use Phoenix.LiveDashboard.PageBuilder alias Realtime.Api alias Realtime.Crypto @impl true def menu_link(_, _), do: {:ok, "Tenant Info"} @impl true def mount(_, _, socket) do {:ok, assign(socket, project_ref: "", tenant: nil, error: nil)} end @impl true def handle_event("lookup", %{"project_ref" => ref}, socket) do ref = String.trim(ref) case Api.get_tenant_by_external_id(ref) do nil -> {:noreply, assign(socket, project_ref: ref, tenant: nil, error: "Tenant not found")} tenant -> {:noreply, assign(socket, project_ref: ref, tenant: prepare_tenant(tenant), error: nil)} end end @impl true def render(assigns) do ~H"""
Tenant Info
<%= if @error do %>

<%= @error %>

<% end %> <%= if @tenant do %>
Tenant
external_id<%= @tenant.external_id %>
name<%= @tenant.name %>
suspend<%= @tenant.suspend %>
private_only<%= @tenant.private_only %>
presence_enabled<%= @tenant.presence_enabled %>
postgres_cdc_default<%= @tenant.postgres_cdc_default %>
broadcast_adapter<%= @tenant.broadcast_adapter %>
max_concurrent_users<%= @tenant.max_concurrent_users %>
max_events_per_second<%= @tenant.max_events_per_second %>
max_bytes_per_second<%= @tenant.max_bytes_per_second %>
max_channels_per_client<%= @tenant.max_channels_per_client %>
max_joins_per_second<%= @tenant.max_joins_per_second %>
max_presence_events_per_second<%= @tenant.max_presence_events_per_second %>
max_payload_size_in_kb<%= @tenant.max_payload_size_in_kb %>
max_client_presence_events_per_window<%= @tenant.max_client_presence_events_per_window %>
client_presence_window_ms<%= @tenant.client_presence_window_ms %>
migrations_ran<%= @tenant.migrations_ran %>
inserted_at<%= @tenant.inserted_at %>
updated_at<%= @tenant.updated_at %>
<%= for ext <- @tenant.extensions do %>
Extension: <%= ext.type %>
<%= for {key, value} <- ext.settings do %> <% end %>
<%= key %><%= value %>
inserted_at<%= ext.inserted_at %>
updated_at<%= ext.updated_at %>
<% end %> <% end %>
""" end @secret_settings ["db_password"] @encrypted_settings ["db_host", "db_port", "db_name", "db_user"] defp prepare_tenant(tenant) do %{tenant | extensions: Enum.map(tenant.extensions, &prepare_extension/1)} end defp prepare_extension(ext) do settings = ext.settings |> Map.drop(@secret_settings) |> Enum.map(fn {key, value} when key in @encrypted_settings -> {key, Crypto.decrypt!(value)} {key, value} -> {key, value} end) |> Enum.sort_by(&elem(&1, 0)) %{ext | settings: settings} end end ================================================ FILE: lib/realtime_web/endpoint.ex ================================================ defmodule RealtimeWeb.Endpoint do use Phoenix.Endpoint, otp_app: :realtime alias RealtimeWeb.Plugs.BaggageRequestId # The session will be stored in the cookie and signed, # this means its contents can be read but not tampered with. # Set :encryption_salt if you would also like to encrypt it. @session_options [ store: :cookie, key: "_realtime_key", signing_salt: "5OUq5X4H" ] @fullsweep_after Application.compile_env!(:realtime, :websocket_fullsweep_after) socket "/socket", RealtimeWeb.UserSocket, websocket: [ connect_info: [:peer_data, :uri, :x_headers], fullsweep_after: @fullsweep_after, max_frame_size: 5_000_000, # https://github.com/ninenines/cowboy/blob/24d32de931a0c985ff7939077463fc8be939f0e9/doc/src/manual/cowboy_websocket.asciidoc#L228 # active_n: The number of packets Cowboy will request from the socket at once. # This can be used to tweak the performance of the server. Higher values reduce # the number of times Cowboy need to request more packets from the port driver at # the expense of potentially higher memory being used. active_n: 100, # Skip validating UTF8 for faster frame processing. # Currently all text frames are handled only with JSON which already requires UTF-8 validate_utf8: false, serializer: [ {Phoenix.Socket.V1.JSONSerializer, "~> 1.0.0"}, {RealtimeWeb.Socket.V2Serializer, "~> 2.0.0"} ] ], longpoll: [ connect_info: [:peer_data, :uri, :x_headers], serializer: [ {Phoenix.Socket.V1.JSONSerializer, "~> 1.0.0"}, {Phoenix.Socket.V2.JSONSerializer, "~> 2.0.0"} ] ] socket "/live", Phoenix.LiveView.Socket, websocket: [connect_info: [session: @session_options]] # Serve at "/" the static files from "priv/static" directory. # # You should set gzip to true if you are running phx.digest # when deploying your static files in production. plug Plug.Static, at: "/", from: :realtime, gzip: false, only: RealtimeWeb.static_paths() # plug PromEx.Plug, path: "/metrics", prom_ex_module: Realtime.PromEx # Code reloading can be explicitly enabled under the # :code_reloader configuration of your endpoint. if code_reloading? do socket "/phoenix/live_reload/socket", Phoenix.LiveReloader.Socket plug Phoenix.LiveReloader plug Phoenix.CodeReloader end plug Phoenix.LiveDashboard.RequestLogger, param_key: "request_logger", cookie_key: "request_logger" plug BaggageRequestId, baggage_key: BaggageRequestId.baggage_key() plug Plug.Telemetry, event_prefix: [:phoenix, :endpoint], log: {__MODULE__, :log_level, []} # Disables logging for routes /healthcheck and /api/tenants/:tenant_id/health when DISABLE_HEALTHCHECK_LOGGING=true def log_level(%{path_info: ["healthcheck"]}) do if Application.get_env(:realtime, :disable_healthcheck_logging, false), do: false, else: :info end def log_level(%{path_info: ["api", "tenants", _, "health"]}) do if Application.get_env(:realtime, :disable_healthcheck_logging, false), do: false, else: :info end def log_level(_), do: :info plug Plug.Parsers, parsers: [:urlencoded, :multipart, :json], pass: ["*/*"], json_decoder: Phoenix.json_library() plug Plug.MethodOverride plug Plug.Head plug Plug.Session, @session_options plug RealtimeWeb.Router end ================================================ FILE: lib/realtime_web/gettext.ex ================================================ defmodule RealtimeWeb.Gettext do @moduledoc """ A module providing Internationalization with a gettext-based API. By using [Gettext](https://hexdocs.pm/gettext), your module gains a set of macros for translations, for example: import RealtimeWeb.Gettext # Simple translation gettext("Here is the string to translate") # Plural translation ngettext("Here is the string to translate", "Here are the strings to translate", 3) # Domain-based translation dgettext("errors", "Here is the error message to translate") See the [Gettext Docs](https://hexdocs.pm/gettext) for detailed usage. """ use Gettext.Backend, otp_app: :realtime end ================================================ FILE: lib/realtime_web/live/components.ex ================================================ defmodule RealtimeWeb.Components do @moduledoc """ Components for LiveView """ use Phoenix.Component alias Phoenix.HTML.Form alias Phoenix.LiveView.JS @doc """ Renders an h1 tag. ## Examples <.h1>My Header """ slot(:inner_block, required: true) def h1(assigns) do ~H"""

<%= render_slot(@inner_block) %>

""" end @doc """ Renders an h2 tag. ## Examples <.h2>My Header """ slot(:inner_block, required: true) def h2(assigns) do ~H"""

<%= render_slot(@inner_block) %>

""" end @doc """ Renders an h3 tag. ## Examples <.h3>My Header """ slot(:inner_block, required: true) def h3(assigns) do ~H"""

<%= render_slot(@inner_block) %>

""" end @doc """ Renders a button. ## Examples <.button>Send! <.button phx-click="go" class="ml-2">Send! """ attr :type, :string, default: nil attr :class, :string, default: nil attr :rest, :global slot(:inner_block, required: true) def button(assigns) do ~H""" """ end @doc """ Renders a link as a button. ## Examples <.link_button>Send! """ attr :href, :string, default: "#" attr :target, :string, default: "" attr :rest, :global slot(:inner_block, required: true) def link_button(assigns) do ~H""" <.link role="button" class="bg-green-600 hover:bg-green-500 text-white font-bold py-2 px-4 rounded focus:outline-none" href={@href} target={@target} {@rest} > <%= render_slot(@inner_block) %> """ end @doc """ Renders a link as a button. ## Examples <.link_button>Send! """ attr :href, :string, default: "#" attr :target, :string, default: "" attr :rest, :global slot(:inner_block, required: true) def gray_link_button(assigns) do ~H""" <.link role="button" class="bg-gray-600 hover:bg-gray-500 text-white font-bold py-2 px-4 rounded focus:outline-none" href={@href} target={@target} {@rest} > <%= render_slot(@inner_block) %> """ end @doc """ Renders a link as a button, but optionally patches the browser history. ## Examples <.patch_button>Send! """ attr :patch, :string, default: "#" attr :replace, :boolean, default: true attr :target, :string, default: "" attr :rest, :global slot(:inner_block, required: true) def patch_button(assigns) do ~H""" <.link role="button" class="bg-green-600 hover:bg-green-500 text-white font-bold py-2 px-4 rounded focus:outline-none" patch={@patch} replace={@replace} target={@target} {@rest} > <%= render_slot(@inner_block) %> """ end @doc """ Renders a modal. ## Examples <.modal id="confirm-modal"> Are you sure? <:confirm>OK <:cancel>Cancel JS commands may be passed to the `:on_cancel` and `on_confirm` attributes for the caller to reactor to each button press, for example: <.modal id="confirm" on_confirm={JS.push("delete")} on_cancel={JS.navigate(~p"/posts")}> Are you sure you? <:confirm>OK <:cancel>Cancel """ attr :id, :string, required: true attr :show, :boolean, default: false attr :on_cancel, JS, default: %JS{} attr :on_confirm, JS, default: %JS{} slot(:inner_block, required: true) slot(:title) slot(:subtitle) slot(:confirm) slot(:cancel) def modal(assigns) do ~H"""