Repository: robacarp/mosquito Branch: master Commit: 6ededc5e7a34 Files: 134 Total size: 306.2 KB Directory structure: gitextract_f3v7pvaw/ ├── .claude/ │ ├── hooks/ │ │ └── session-start.sh │ ├── settings.json │ └── todo.md ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ └── bug.md │ └── workflows/ │ ├── ci.yml │ └── docs.yml ├── .gitignore ├── .tool-versions ├── :w ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── benchmark/ │ ├── benchmark.cr │ └── jobs/ │ └── emit_message_job.cr ├── demo/ │ ├── jobs/ │ │ ├── custom_serializers.cr │ │ ├── periodically_puts.cr │ │ ├── queued_job.cr │ │ ├── rate_limited_job.cr │ │ └── unique_job.cr │ └── run.cr ├── scripts/ │ ├── increment_version │ ├── lib/ │ │ └── increment_version.sh │ └── version_tag ├── shard.yml ├── spec/ │ ├── helpers/ │ │ ├── bare_base_class.cr │ │ ├── configuration_helper.cr │ │ ├── global_helpers.cr │ │ ├── logging_helper.cr │ │ ├── mock_coordinator.cr │ │ ├── mock_executor.cr │ │ ├── mock_overseer.cr │ │ ├── mock_queue_list.cr │ │ ├── mocks.cr │ │ ├── null_dequeue_adapter.cr │ │ ├── pub_sub.cr │ │ └── spy_dequeue_adapter.cr │ ├── mosquito/ │ │ ├── api/ │ │ │ ├── executor_config_spec.cr │ │ │ ├── executor_spec.cr │ │ │ ├── job_run_spec.cr │ │ │ ├── overseer_spec.cr │ │ │ ├── periodic_job_spec.cr │ │ │ ├── publisher_spec.cr │ │ │ └── queue_spec.cr │ │ ├── api_spec.cr │ │ ├── backend/ │ │ │ ├── deleting_spec.cr │ │ │ ├── executor_spec.cr │ │ │ ├── expiring_list_spec.cr │ │ │ ├── hash_storage_spec.cr │ │ │ ├── inspection_spec.cr │ │ │ ├── lock_spec.cr │ │ │ ├── overseer_spec.cr │ │ │ └── queueing_spec.cr │ │ ├── backend_spec.cr │ │ ├── base_spec.cr │ │ ├── configuration_spec.cr │ │ ├── dequeue_adapters/ │ │ │ ├── concurrency_limited_dequeue_adapter_spec.cr │ │ │ ├── remote_config_dequeue_adapter_spec.cr │ │ │ ├── shuffle_dequeue_adapter_spec.cr │ │ │ └── weighted_dequeue_adapter_spec.cr │ │ ├── exceptions_spec.cr │ │ ├── job/ │ │ │ └── job_state_spec.cr │ │ ├── job_run/ │ │ │ ├── rescheduling_spec.cr │ │ │ ├── running_spec.cr │ │ │ └── storage_spec.cr │ │ ├── job_run_spec.cr │ │ ├── job_spec.cr │ │ ├── key_builder_spec.cr │ │ ├── metadata_spec.cr │ │ ├── periodic_job_run_spec.cr │ │ ├── periodic_job_spec.cr │ │ ├── queue_spec.cr │ │ ├── queued_job_spec.cr │ │ ├── rate_limiter_spec.cr │ │ ├── resource_gate_spec.cr │ │ ├── runnable_spec.cr │ │ ├── runners/ │ │ │ ├── coordinator_spec.cr │ │ │ ├── executor_spec.cr │ │ │ ├── overseer_spec.cr │ │ │ ├── queue_list_spec.cr │ │ │ └── run_at_most_spec.cr │ │ ├── serializers/ │ │ │ └── primitive_serializers_spec.cr │ │ ├── testing_backend_spec.cr │ │ ├── unique_job_spec.cr │ │ └── version_spec.cr │ └── spec_helper.cr └── src/ ├── mosquito/ │ ├── api/ │ │ ├── concurrency_config.cr │ │ ├── executor.cr │ │ ├── executor_config.cr │ │ ├── job_run.cr │ │ ├── observability/ │ │ │ └── publisher.cr │ │ ├── overseer.cr │ │ ├── periodic_job.cr │ │ ├── queue.cr │ │ └── queue_list.cr │ ├── api.cr │ ├── backend.cr │ ├── base.cr │ ├── configuration.cr │ ├── dequeue_adapter.cr │ ├── dequeue_adapters/ │ │ ├── concurrency_limited_dequeue_adapter.cr │ │ ├── remote_config_dequeue_adapter.cr │ │ ├── shuffle_dequeue_adapter.cr │ │ └── weighted_dequeue_adapter.cr │ ├── exceptions.cr │ ├── gates/ │ │ ├── open_gate.cr │ │ └── threshold_gate.cr │ ├── job.cr │ ├── job_run.cr │ ├── key_builder.cr │ ├── metadata.cr │ ├── periodic_job.cr │ ├── periodic_job_run.cr │ ├── queue.cr │ ├── queued_job.cr │ ├── rate_limiter.cr │ ├── redis_backend.cr │ ├── resource_gate.cr │ ├── runnable.cr │ ├── runner.cr │ ├── runners/ │ │ ├── coordinator.cr │ │ ├── executor.cr │ │ ├── idle_wait.cr │ │ ├── overseer.cr │ │ ├── queue_list.cr │ │ └── run_at_most.cr │ ├── scheduled_job.cr │ ├── serializers/ │ │ └── primitives.cr │ ├── test_backend.cr │ ├── unique_job.cr │ └── version.cr ├── mosquito.cr └── ye_olde_redis.cr ================================================ FILE CONTENTS ================================================ ================================================ FILE: .claude/hooks/session-start.sh ================================================ #!/bin/bash set -euo pipefail # Only run in remote (cloud) environments if [ "${CLAUDE_CODE_REMOTE:-}" != "true" ]; then exit 0 fi echo '{"async": true, "asyncTimeout": 300000}' # Read Crystal version from .tool-versions CRYSTAL_VERSION=$(grep '^crystal ' "$CLAUDE_PROJECT_DIR/.tool-versions" | awk '{print $2}') # Install Crystal compiler if not already present if ! command -v crystal &> /dev/null; then # Install system dependencies required by Crystal apt-get update apt-get install -y libgmp-dev libxml2-dev libevent-dev libgc-dev # Download and install Crystal from GitHub releases curl -fsSL "https://github.com/crystal-lang/crystal/releases/download/${CRYSTAL_VERSION}/crystal-${CRYSTAL_VERSION}-1-linux-x86_64-bundled.tar.gz" -o /tmp/crystal.tar.gz mkdir -p /usr/local/crystal tar -xzf /tmp/crystal.tar.gz -C /usr/local/crystal --strip-components=2 ln -sf /usr/local/crystal/bin/crystal /usr/local/bin/crystal ln -sf /usr/local/crystal/bin/shards /usr/local/bin/shards rm /tmp/crystal.tar.gz fi # Start Redis server if not already running if ! redis-cli ping &> /dev/null 2>&1; then redis-server --daemonize yes fi # Disable RDB persistence to avoid dump.rdb noise in the project directory redis-cli config set save "" > /dev/null 2>&1 # Install Crystal shard dependencies cd "$CLAUDE_PROJECT_DIR" shards install ================================================ FILE: .claude/settings.json ================================================ { "hooks": { "SessionStart": [ { "hooks": [ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.sh" } ] } ] } } ================================================ FILE: .claude/todo.md ================================================ # Migration from publish_metrics branch ## Background The `publish_metrics` branch contains observability improvements that need to be migrated to master. This branch has ~24 commits of work dating back to October 2024. ## Functionality to Migrate Items ordered by size of change, smallest first. ### 1. Metadata Self-Cleanup ✅ Add TTL to metadata so stale entries auto-expire. - [x] Add `@metadata.delete in: 1.hour` to Executor heartbeat - [x] Add `@metadata.delete in: 1.hour` to Overseer heartbeat Already implemented - `Metadata#heartbeat!` includes `delete in: 1.hour` and both observers use it. ### 2. Overseer Event Naming Standardization ✅ Standardize to past tense for consistency with other events. - [x] "starting" → "started" - [x] "stopping" → "stopped" - [x] "stopped" → "exited" Done in `src/mosquito/api/overseer.cr`. ### 3. Executor Bug Fix ✅ - [x] Fix latent bug: executor calculating run time incorrectly (see commit `mvouzzrz`) Fixed `100_000` → `1_000_000` in microseconds calculation in `src/mosquito/api/executor.cr`. ### 4. Stable Instance IDs — Skipped `object_id` is sufficient; no need for `Random::Secure.hex` IDs. ### 5. Nested Publish Context ✅ Allow executor events to be namespaced under their parent overseer. - [x] Add parent context support to `PublishContext` initializer - [x] Pass overseer reference to Executor - [x] Update Executor observer to create PublishContext with overseer as parent - [x] Executor events publish under `[:overseer, overseer_id, :executor, executor_id]` - [x] Fix tests (executor/overseer specs, mock_overseer) Done. ### 6. Observability Gating ✅ Gate metadata writes behind existing `publish_metrics` config. - [x] Gate `heartbeat!` in Executor observer behind `metrics` macro - [x] Gate `heartbeat` in Overseer observer behind `metrics` macro (includes `register_overseer`) - [x] Gate `update_executor_list` in Overseer observer behind `metrics` macro - [x] Fix pre-existing race condition in executor spec (lazy getter initialization across fibers) Decided against a separate `Enabled` module / `enable_observability` config — no compelling reason to have two flags. Reused the existing `metrics` macro which checks `publish_metrics`. ### 7. Observability Tests ✅ #### Fix `assert_message_received` ✅ The helper in `spec/helpers/pub_sub.cr` doesn't actually assert — `find` returns nil and the result is discarded. All existing event publishing tests are vacuous (always pass). - [x] Fix `assert_message_received` to fail when no matching message is found - [x] Fix overseer event assertions to match actual event names #### Metrics gating ✅ - [x] Executor: heartbeat is skipped when `publish_metrics = false` - [x] Event publishing is skipped when `publish_metrics = false` (tested via publisher_spec, covers all observers) #### Queue observer events ✅ - [x] Publishes "rescheduled" event - [x] Publishes "forgotten" event - [x] Publishes "banished" event #### Publish context structure ✅ - [x] Executor publish context is nested under overseer's context - [x] Overseer publish context has correct originator key - [x] Queue publish context has correct originator key ## Files to Reference on publish_metrics Key source files: - `src/mosquito/observability/concerns/enabled.cr` - `src/mosquito/observability/concerns/publish_context.cr` - `src/mosquito/observability/concerns/publisher.cr` - `src/mosquito/observability/executor.cr` - `src/mosquito/observability/overseer.cr` - `src/mosquito/observability/queue.cr` Key test files: - `test/mosquito/observability/enabled_test.cr` - `test/mosquito/observability/executor_test.cr` - `test/mosquito/observability/overseer_test.cr` - `test/mosquito/observability/queue_test.cr` ## Notes - The publish_metrics branch has diverged (shown as `??` in jj) - resolve carefully - Current working copy already has queue observer events (rescheduled, forgotten, banished) - Duration averaging and expected_duration_ms already implemented on master - Test directory structure (`test/` instead of `spec/`) already migrated on master ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: robacarp patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel ================================================ FILE: .github/ISSUE_TEMPLATE/bug.md ================================================ --- name: Bug about: Mosquito has a bug! title: '' labels: '' assignees: robacarp --- Please include some details: Crystal version: 0.28.0 Mosquito Shard version: 0.4.0 ================================================ FILE: .github/workflows/ci.yml ================================================ name: Test and Demo on: pull_request: branches: - master push: branches: - master jobs: build: strategy: fail-fast: false matrix: crystal_version: [1.19, latest] experimental: - false include: - crystal_version: nightly experimental: true name: Build runs-on: ubuntu-latest container: image: crystallang/crystal:latest continue-on-error: ${{ matrix.experimental }} services: redis: image: redis env: REDIS_URL: redis://redis:6379/1 steps: - uses: actions/checkout@v4 - run: apt-get update - uses: crystal-lang/install-crystal@v1 with: crystal: ${{matrix.crystal_version}} - run: printenv - run: crystal --version - run: shards install - run: make test - run: make demo ================================================ FILE: .github/workflows/docs.yml ================================================ name: Build Docs on: push: branches: - master jobs: deploy: name: Running Docs runs-on: ubuntu-latest container: image: crystallang/crystal:latest steps: - uses: actions/checkout@v2 - run: apt-get update - uses: crystal-lang/install-crystal@v1 - run: crystal --version - run: shards install - run: crystal docs - name: Deploy uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs ================================================ FILE: .gitignore ================================================ /lib/ /bin/ /.shards/ # Libraries don't need dependency lock # Dependencies will be locked in application that uses them /shard.lock docs # Claude Code local user config (not hooks/settings which are shared) .claude/local/ CLAUD.local.md ================================================ FILE: .tool-versions ================================================ crystal 1.19.1 ================================================ FILE: :w ================================================ ================================================ FILE: CHANGELOG.md ================================================ # Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added - Mosquito::Api now allows for inspecting the state of the state of a mosquito cluster. Many of these features are disabled by default by the configuration property `config.publish_metrics`. - Executor api implemented in #147 - JobRun api implemented in #148 and #161 - Overseer api implemented in #150 - Queue api implemented in #153 - Mosquito now publishes a variety of events and metrics to a redis pubsub channel. This behavior is disabled by default with the configuration property `config.publish_metrics`. - Executor events in #154: job-started and job-finished - Overseer events in #160: starting, executor-created, executor-died, stopping, and stopped - Queue events: enqueue, dequeue, reschedule, forget, and banish - Expected job duration is now published with executor events The Mosquito API can be used to subscribe to these events with `Mosquito::API.event_receiver` - Pluggable dequeue adapters allow customizing how jobs are selected from queues (#183) - `DequeueAdapter` abstract base class defines the adapter interface - `ShuffleDequeueAdapter` is the default, preserving existing randomized behavior - `WeightedDequeueAdapter` allows queue-level prioritization via configurable weights - Configurable via `Mosquito.configure { |c| c.dequeue_adapter = ... }` - Executor count is now configurable (default increased from 3 to 6) (#184) - Set via `Mosquito.configure { |c| c.executor_count = 10 }` - Override with the `MOSQUITO_EXECUTOR_COUNT` environment variable - `JobRun#started_at` and `JobRun#finished_at` timestamps are now exposed as typed `Time?` getters (#179) - Graceful worker shutdown: on SIGTERM/SIGINT the overseer stops dequeuing, waits for in-flight executors to finish, and requeues any jobs left in pending back to waiting (#190) - Queues can now be paused and resumed. While paused, `#dequeue` returns nil and jobs accumulate until the queue is resumed. An optional duration enables automatic resumption, useful for backing off rate-limited resources. (#192) - Overseers now take ownership of job runs when dequeued, and clean up abandoned pending job runs on startup (#180) - Mosquito can now accept pre-existing backend connections via `Configuration#backend_connection`. This allows sharing a connection pool with the rest of an application. (#193) - JobRun now uses Metadata for all backend storage operations, replacing direct backend calls with the Metadata abstraction layer. - `Mosquito::UniqueJob` module provides opt-in job deduplication. Including the module in a job class prevents enqueueing duplicate jobs when an identical job is already waiting or scheduled. Uniqueness keys are derived from job parameters at compile time. ### Changed - (breaking) `Configuration#connection_string` has been renamed to `Configuration#backend_connection_string` (#193) - (minor breaking) Logs are now emitted from runners with a slighly different source tag. (#152) For example: The overseer boot message used to be: `INFO - mosquito.runners.overseer.4315742080: Overseer<4315742080> is starting` Now the message is simply: `INFO - mosquito.overseer: starting` - Mosquito now runs CI checks for compatibility with Crystal 1.6 - The coordinator now uses UTC time instead of monotonic time ### Fixed - Fixed a KeyError crash in the demo when job metadata was missing by using safe key access. - the queue_list runner was never being shut down, but it is now as of (#165) - Fixed a bug which would cause a mosquito server to hang at exit indefinitely if a job was mid-run during an interrupt. (#165) - Fixed a bug which would cause a correctly exiting server to prematurely exit without emitting shutdown sequence logs and events. (#165) - Crashed executors are now properly detected and replaced, preventing overseers from running with no executors - Overseer now correctly deregisters on clean exit - Pubsub logging now uses the `mosquito.events` namespace instead of the root `mosquito` namespace - Queue `@empty` latch no longer permanently prevents re-dequeue after a queue drains - Observer functionality is correctly gated behind the `publish_metrics` config flag - Executor events are correctly scoped to within the overseer - Fixed a latent bug which caused job duration to be reported incorrectly - Fixed `Mosquito::Api.list_queues` ### Performance - Optimized `metadata#set` to decrease the number of redis commands ## [2.0.0] ### Added - Adds a test backend, which can be used to inspect jobs that were enqueued and the parameters they were enqueued with. - Job#fail now takes an optional `retry` parameter which defaults to true, allowing a developer to explicitly mark a job as not retry-able during a job run. Additionally a `should_retry` property exists which can be set as well. - Mosquito::Configuration now provides `global_prefix` to change the global Redis namespace prefix, allowing for more than one mosquito app to share a redis instance (thanks @dammer, cf #134). ### Fixed - PeriodicJobs are now correctly run once per interval in an environment with many workers. - Running more than ~10 workers no longer causes workers to crash, fixing #137 (cf #138). - Mosquito is now more broadly compatible with jgaskins redis, swapping 0.7.0 for 0.7, and forward compatible through 0.8. (thanks @rmarronnier) - Mosquito now more gracefully responds to SIGTERM, fixes #122, cf #123. - High CPU usage on linux is no longer an issue, fixes #126, cf #128. ### Breaking Changes - The QueuedJob `params` macro has been replaced with `param` which declares only one parameter at a time. - JobRun#delete now explicitly takes an Int, rather than simply defaulting to 0 (thanks @jwoertink, cf #136). - removes deprecated Backend.delete(String, Int32), use Backend.delete(String, Int64) instead. - removes deprecated Queue#length, use Queue#size instead. - removes option to run the cron scheduler declaratively, it is now always on with a distributed lock. ### Performance - Dramatically decreases the time spent listing queues #120 - Replaces #keys with #scan_each to list runners #138 - Provides for multiple executors operating under a single runner #123 ## [1.0.2] ### Fixed - Mosquito::Runner.start now captures the thread with a spin lock again. The new behavior of returning imediately can be achieved by calling #start(spin: false) ## [1.0.1] [YANKED] ### Added - Implements a distributed lock for scheduler coordination. The behavior is opt-in for now, but will become the default in the next release. See #108. - Provides a helpful error message for most implementation errors dealing with declaring params. ### Changed - Mosquito::QueuedJob: the `params` macro has been deprecated in favor of `param`. See #110. - The deprecated Redis command [`rpoplpush`](https://redis.io/commands/rpoplpush/) is no longer used. This lifts the minimum redis server requirement up to 6.2.0 and jgaskins redis to > 0.7.0. - Mosquito::Runner.start no longer captures the thread with a spin lock. [DEFECT] ### Removed - Mosquito config option `run_cron_scheduler` is no longer present, multiple workers will compete for a distributed lock instead. See #108. ## [1.0.0] ### Added - Jobs can now specify their retry/reschedule logic with the #rescheduleable? and #reschedule_interval methods. - Job metadata storage engine. - Jobs can now specify `after` hooks. - Mosquito::Runner now has a `stop` method which halts the runner after completion of any running tasks. See issue #21 and pull #87. - Mosquito config option `run_cron_scheduler` is no longer present, multiple workers will compete for a distributed lock instead. ### Changed - The storage backend is now implemented via interface, allowing alternate backends to be implemented. - The rate limiting functionality is now implemented in a module, `Mosquito::RateLimiter`. See pull #77 for migration details. - ** BREAKING ** `Job.job_type` has been replaced with `Job.queue_name`. The functionailty is identical but easier to access. See #86. - `log` statements now properly identify where they're coming from rather than just 'mosquito'. See issue #78 and pull #88. - Mosquito now connects to Redis using a connection pool. See #89 - ** BREAKING ** `Mosquito.settings` is now `Mosquito.configuration`. While this is technically a public API, it's unlikely anyone is using it for anything. - Mosquito::Runner.start need not be called from a spawn, it will spawn on it's own. ### Removed - Runner.idle_wait configuration is deprecated. Instead use Mosquito.configure#idle_wait. - Built in serializer for Granite models, and the Model type alias. See Serializers in the documentation if the functionality is necessary. - Mosquito no longer depends on luckyframework/habitat. ### Fixed - Boolean false can now be specified as the default value for a parameter: `params(name = false)` ## [0.11.2] - 2022-01-25 ### Fixed - #66 Jobs with no parameters can now be enqueued without specifying an empty `params()`. - #65 PeriodicJobs can now specify their run period in months. ### Notes The v0 major version is now bugfix-only. Please update to v1.0. v0 will be supported as long as it's feasible to do so. ## [0.11.1] - 2022-01-17 ### Added - Jobs can now specify `before` hooks, which can abort before the perform is triggered. - The Cron scheduler for periodic jobs can now be disabled via Mosquito.configure#run_cron_scheduler - The list of queues which are watched by the runner can now be configured via Mosquito.configure#run_from. ### Updated - Redis shard 2.8.0, removes hash shims which are no longer needed. Thanks @jwoertink. ## [0.11.0] - 2021-04-10 Proforma release for Crystal 1.0. ## [0.10.0] - 2021-02-15 ### Added - UUID serializer helpers. ### Updated - Switches from Benchmark.measure to Time.measure, thanks @anapsix. - Runner.idle_wait is now configured via Mosquito.configure instead of directly on Mosquito::Runner. ## [0.9.0] - 2020-10-26 ### Added - Allows redis connection string to be specified via config option, thanks @watzon. ### Deprecated - Connecting to redis via implicit REDIS_URL parameter is deprecated, thanks @watzon. ## [0.8.0] - 2020-05-28 ### Fixed - (Breaking) Dead tasks which have failed and expired are now cleaned up with a Redis TTL. See Pull #48. ## [0.7.0] - 2020-05-05 ### Added - ability to configure Runner.idle_wait period, thanks @mamantoha. ### Updated - Point to Crystal 0.34.0, thanks @alex-lairan. ### Changed - Replaces `Logger` with the more flexible `Log`. ## [0.6.0] - 2019-12-19 ### Updated - Point to Crystal 0.31.1, 0.32.1. - Redis version, thanks @nsuchy. ## [0.5.0] - 2019-06-14 ### Fixed - Issue #26 Unresolved local var error, thanks @blacksmoke16. ## [0.4.0] - 2019-04-26 ### Added - Throttling logic, thanks @blacksmoke16. ## [0.3.0] - 2018-11-25 ### Updated - Point to crystal 0.27, thanks @blacksmoke16. ### Fixed - Brittle/intermittently failing tests. ## [0.2.1] - 2018-10-01 ### Added - Logo, contributed by @psikoz. - configuration for CI : `make test demo` will run all acceptance criteria. - demo section. - makefile. ### Updated - specify crystal 0.26. - simplify macro logic in QueuedJob. ## [0.2.0] - 2018-06-22 ### Updated - Specify crystal-redis 2.0 and crystal 0.25. ## [0.1.1] - 2018-06-08 ### Added - Job classes can now disable rescheduling on failure. ### Updated - Readme. - Misc typo fixes and flexibility upgrades. - Update Crystal specification 0.23.1 -> .24.2. - Correctly specify and sync version numbers from shard.yml / version.cr / git tag. - Use configurable Logger instead of writing directly to stdout. - Log output is now colorized and formatted to be read by human eyes. ### Changed - Breaking: Update Mosquito::Model type alias to match updates to Granite. ### Fixed - BUG: task id was mutating on each save, causing weird logging when tasks reschedule. - PERFORMANCE: adding IDLE_WAIT to prevent slamming redis when the queues are empty. Smarter querying of the queues for work. ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2019 Robert L Carpenter Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ SHELL=/bin/bash .PHONY: all all: test shards build .PHONY: test test: crystal spec --error-trace -- --chaos .PHONY: demo demo: crystal run demo/run.cr --error-trace ================================================ FILE: README.md ================================================ mosquito [![GitHub](https://img.shields.io/github/license/mosquito-cr/mosquito.svg?style=for-the-badge)](https://tldrlegal.com/license/mit-license) Mosquito is a generic background job runner written primarily for Crystal. Significant inspiration from experience with the successes and failings many Ruby gems in this vein. Once compiled, a mosquito binary can start work in about 10 milliseconds. Mosquito currently provides these features: - Delayed execution (`SendEmailJob.new(email: :welcome, address: user.email).enqueue(in: 3.minutes)`) - Scheduled / Periodic execution (`RunEveryHourJob.new`) - Job Storage in Redis - Automatic rescheduling of failed jobs - Progressively increasing delay of rescheduled failed jobs - Dead letter queue of jobs which have failed too many times - Rate limited jobs Current Limitations: - Visibility into a running job network and queue is limited. There is a working proof of concept [visualization API](https://github.com/mosquito-cr/mosquito/issues/90) and [bare-bones terminal application](https://github.com/mosquito-cr/tui-visualizer). ## Project State The Mosquito project is stable. A few folks are using Mosquito in production, and it's going well. There are some features which would be nice to have, but what is here is both tried and tested. If you're using Mosquito, please [get in touch](https://github.com/mosquito-cr/mosquito/discussions) on the Discussion board or [on Crystal chat](https://crystal-lang.org/community/) with any questions, feature suggestions, or feedback. ## Installation Update your `shard.yml` to include mosquito: ```diff dependencies: + mosquito: + github: mosquito-cr/mosquito ``` ## Usage ### Step 1: Define a queued job ```crystal # src/jobs/puts_job.cr class PutsJob < Mosquito::QueuedJob param message : String def perform puts message end end ``` ### Step 2: Trigger that job ```crystal # src//.cr PutsJob.new(message: "ohai background job").enqueue ``` ### Step 3: Run your worker to process the job ```crystal # src/worker.cr Mosquito.configure do |settings| settings.redis_url = ENV["REDIS_URL"] end Mosquito::Runner.start ``` ```text crystal run src/worker.cr ``` ### Success ``` > crystal run src/worker.cr 2017-11-06 17:07:29 - Mosquito is buzzing... 2017-11-06 17:07:51 - Running task puts_job<...> from puts_job 2017-11-06 17:07:51 - [PutsJob] ohai background job 2017-11-06 17:07:51 - task puts_job<...> succeeded, took 0.0 seconds ``` [More information about queued jobs](https://mosquito-cr.github.io/manual/index.html#queued-jobs) in the manual. ------ ## Periodic Jobs Periodic jobs run according to a predefined period -- once an hour, etc. This periodic job: ```crystal class PeriodicallyPutsJob < Mosquito::PeriodicJob run_every 1.minute def perform emotions = %w{happy sad angry optimistic political skeptical epuhoric} puts "The time is now #{Time.local} and the wizard is feeling #{emotions.sample}" end end ``` Would produce this output: ```crystal 2017-11-06 17:20:13 - Mosquito is buzzing... 2017-11-06 17:20:13 - Queues: periodically_puts_job 2017-11-06 17:20:13 - Running task periodically_puts_job<...> from periodically_puts_job 2017-11-06 17:20:13 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:20:13 and the wizard is feeling skeptical 2017-11-06 17:20:13 - task periodically_puts_job<...> succeeded, took 0.0 seconds 2017-11-06 17:21:14 - Queues: periodically_puts_job 2017-11-06 17:21:14 - Running task periodically_puts_job<...> from periodically_puts_job 2017-11-06 17:21:14 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:21:14 and the wizard is feeling optimistic 2017-11-06 17:21:14 - task periodically_puts_job<...> succeeded, took 0.0 seconds 2017-11-06 17:22:15 - Queues: periodically_puts_job 2017-11-06 17:22:15 - Running task periodically_puts_job<...> from periodically_puts_job 2017-11-06 17:22:15 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:22:15 and the wizard is feeling political 2017-11-06 17:22:15 - task periodically_puts_job<...> succeeded, took 0.0 seconds ``` [More information on periodic jobs](https://mosquito-cr.github.io/manual/index.html#periodic-jobs) in the manual. ## Advanced usage For more advanced topics, including [use with Lucky Framework](https://mosquito-cr.github.io/manual/lucky_framework.html), [throttling or rate limiting](https://mosquito-cr.github.io/manual/rate_limiting.html), check out the [full manual](https://mosquito-cr.github.io/manual). ## Contributing Contributions are welcome. Please fork the repository, commit changes on a branch, and then open a pull request. ### Crystal Versions Mosquito aims to be compatible with the latest Crystal release, and the [latest patch for all post-1.0 minor crystal versions](https://github.com/mosquito-cr/mosquito/blob/master/.github/workflows/ci.yml#L17). For development purposes [you're encouraged to stay in sync with `.tool-versions`](https://github.com/mosquito-cr/mosquito/blob/master/.tool-versions). ### Testing `crystal spec` Will run the tests, or `make test` will too. ================================================ FILE: benchmark/benchmark.cr ================================================ require "../src/mosquito" require "./jobs/*" Mosquito.configure do |settings| settings.backend_connection_string = ENV["REDIS_URL"]? || "redis://localhost:6379/4" settings.publish_metrics = true end Mosquito.configuration.backend.flush Log.setup do |c| backend = Log::IOBackend.new c.bind "redis.*", :error, backend c.bind "mosquito.*", :error, backend end stopping = false Signal::INT.trap do if stopping puts "SIGINT received again, crash-exiting." exit 1 end Mosquito::Runner.stop stopping = true end Mosquito::Runner.start spin: false EventCount = 500 events = Deque(Time).new(EventCount) event_count = 0 missed_messages = 0 channel = Mosquito.backend.subscribe(EmitMessageJob::PUBSUB_CHANNEL) print "enqueuing benchmark jobs..." 10000.times { EmitMessageJob.new.enqueue } puts "done" spawn do loop do break unless Mosquito::Runner.keep_running if missed_messages >= 100 Mosquito::Runner.stop break end select when channel.receive events << Time.utc event_count += 1 when timeout(100.milliseconds) missed_messages += 1 end end end message = ->(span : Time::Span) do print "\r" print "Events: #{event_count} | " print "Span: #{span.total_seconds.round(2)} | " print "Rate: #{events.size.to_f./(span.to_f).round(2)} events/sec" print " " end loop do break unless Mosquito::Runner.keep_running # if events.size >= EventCount # (events.size - EventCount).times { events.shift } # end unless events.size >= 10 print "\r" print "Waiting for events..." sleep 0.1.seconds next end message.call events.last - events.first end Mosquito::Runner.stop wait: true puts print "Total events: #{event_count} | " print "Rate: #{events.size.to_f./(events.last.-(events.first).to_f).round(2)} events/sec" puts ================================================ FILE: benchmark/jobs/emit_message_job.cr ================================================ class EmitMessageJob < Mosquito::QueuedJob PUBSUB_CHANNEL = "benchmark:messages" def perform number = Random::Secure.rand(100) Mosquito.backend.publish PUBSUB_CHANNEL, number.to_s end end ================================================ FILE: demo/jobs/custom_serializers.cr ================================================ class CustomSerializersJob < Mosquito::QueuedJob param count : Int32 def perform log "deserialized: #{count}" metadata.increment "run_count" end def deserialize_int32(raw : String) : Int32 log "using custom serialization: #{raw}" raw.to_i32 * 10 end end CustomSerializersJob.new(3).enqueue CustomSerializersJob.new(12).enqueue CustomSerializersJob.new(525_600).enqueue ================================================ FILE: demo/jobs/periodically_puts.cr ================================================ class PeriodicallyPuts < Mosquito::PeriodicJob run_every 3.seconds queue_name :demo_queue def perform log "Hello from PeriodicallyPuts" # For integration testing metadata.increment "run_count" end end # Periodic jobs do not need to be enqueued, they are executed automatically on schedule. ================================================ FILE: demo/jobs/queued_job.cr ================================================ class QueuedJob < Mosquito::QueuedJob param count : Int32 queue_name :demo_queue def perform count.times do |i| log "ohai #{i}" end # For integration testing metadata.increment "run_count" end end QueuedJob.new(3).enqueue ================================================ FILE: demo/jobs/rate_limited_job.cr ================================================ class RateLimitedJob < Mosquito::QueuedJob before do log self.class.rate_limit_stats end include Mosquito::RateLimiter throttle limit: 3, per: 10.seconds param count : Int32 def perform log @@rate_limit_key end end 15.times do RateLimitedJob.new(3).enqueue end ================================================ FILE: demo/jobs/unique_job.cr ================================================ class UniqueJob < Mosquito::QueuedJob include Mosquito::UniqueJob unique_for 1.hour, key: [:user_id] param user_id : Int64 param message : String def perform log "Sending to user #{user_id}: #{message}" metadata.increment "run_count" end end # First enqueue — accepted UniqueJob.new(user_id: 1_i64, message: "hello").enqueue # Duplicate user_id — suppressed by uniqueness lock UniqueJob.new(user_id: 1_i64, message: "hello again").enqueue # Different user_id — accepted UniqueJob.new(user_id: 2_i64, message: "hello").enqueue ================================================ FILE: demo/run.cr ================================================ require "../src/mosquito" Mosquito.configure do |settings| settings.backend_connection_string = ENV["REDIS_URL"]? || "redis://localhost:6379/3" settings.idle_wait = 1.second end Mosquito.configuration.backend.flush Log.setup do |c| backend = Log::IOBackend.new c.bind "*", :info, backend c.bind "redis.*", :warn, backend c.bind "mosquito.*", :info, backend end require "./jobs/*" def expect_run_count(klass, expected) run_count = (klass.metadata["run_count"]? || "0").to_i if run_count != expected raise "Expected #{klass.name} to have run_count == #{expected}. But got #{run_count}" else puts "#{klass.name} was executed correctly." end end stopping = false Signal::INT.trap do if stopping puts "SIGINT received again, crash-exiting." exit 1 end Mosquito::Runner.stop stopping = true end Mosquito::Runner.start(spin: false) count = 0 while count <= 19 && Mosquito::Runner.keep_running sleep 1.second count += 1 end Mosquito::Runner.stop(wait: true) puts "End of demo." puts "----------------------------------" puts "Checking integration test flags..." expect_run_count(PeriodicallyPuts, 7) expect_run_count(QueuedJob, 1) expect_run_count(CustomSerializersJob, 3) expect_run_count(RateLimitedJob, 3) expect_run_count(UniqueJob, 2) ================================================ FILE: scripts/increment_version ================================================ #!/usr/bin/env crystal require "yaml" require "option_parser" shard_yml = "shard.yml" to_increment = "none" OptionParser.parse! do |p| p.banner = "Usage: $0 -i " p.on("-i field", "--increment=field", "Specifies the field to increment") do |name| destination = name end p.on("-h", "--help", "Show this help") { STDERR.puts p } p.invalid_option do |flag| STDERR.puts "ERROR: #{flag} is not a valid option." STDERR.puts p exit(1) end end document = File.read shard_yml parsed = YAML.parse document version = parsed["version"].as_s major, minor, patch = version.split('.').map(&.to_i) case to_increment when "major" major += 1 minor = 0 patch = 0 when "minor" minor += 1 patch = 0 when "patch" patch += 1 else STDERR.puts "No field to increment specified" if to_increment == "none" end parsed["version"] = "#{major}.#{minor}.#{patch}" pp parsed.to_yaml ================================================ FILE: scripts/lib/increment_version.sh ================================================ #!/bin/bash set -euo pipefail IFS=$'\n\t' print_help () { cat <= 1.19' license: MIT targets: demo: main: demo/run.cr mosquito: main: src/mosquito.cr dependencies: redis: github: jgaskins/redis version: ~> 0.7 development_dependencies: minitest: github: ysbaddaden/minitest.cr version: ~> 1.6.0 timecop: github: crystal-community/timecop.cr version: ~> 0.6.0 ================================================ FILE: spec/helpers/bare_base_class.cr ================================================ module Mosquito class Base # Testing wedge which wipes out the JobRun mapping for the # duration of the block. def self.bare_mapping(&block) scheduled_job_runs = @@scheduled_job_runs @@scheduled_job_runs = [] of PeriodicJobRun mapping = @@mapping @@mapping = {} of String => Job.class yield ensure @@mapping = mapping unless mapping.nil? @@scheduled_job_runs = scheduled_job_runs unless scheduled_job_runs.nil? end end end ================================================ FILE: spec/helpers/configuration_helper.cr ================================================ module Mosquito class_setter configuration macro temp_config(**settings) original_config = {{ @type }}.configuration.dup was_validated = {{ @type }}.configuration.validated {% for key, value in settings %} {{ @type }}.configuration.{{ key }} = {{ value }} {% end %} {{ @type }}.configuration.validated = false {{ yield }} {{ @type }}.configuration = original_config {{ @type }}.configuration.validated = was_validated end end ================================================ FILE: spec/helpers/global_helpers.cr ================================================ module TestHelpers extend self # Testing wedge which provides a clean slate to ensure tests # aren't dependent on each other. def clean_slate(&block) Mosquito::Base.bare_mapping do backend = Mosquito.backend backend.flush Mosquito::TestBackend::Queue.flush_paused_queues! TestingLogBackend.instance.clear PubSub.instance.clear yield end end def backend : Mosquito::Backend Mosquito.configuration.backend end def testing_redis_url : String ENV["REDIS_URL"]? || "redis://localhost:6379/3" end end extend TestHelpers ================================================ FILE: spec/helpers/logging_helper.cr ================================================ require "log" class TestingLogBackend < Log::MemoryBackend def self.instance @@instance ||= new end def clear @entries.clear end end class Minitest::Test def log_entries TestingLogBackend.instance.entries end def logs log_entries.map(&.message) end COLOR_STRIP = /\e\[\d+(;\d+)?m/ private def logs_match(expected : Regex) : Bool log_entries .map(&.message) .map(&.gsub(COLOR_STRIP, "")) .any? { |entry| entry =~ expected } end private def logs_match(source : String, match_text : Regex) : Bool log_entries .select { |entry| entry.source == source } .map(&.message) .map(&.gsub(COLOR_STRIP, "")) .any? { |entry| entry =~ match_text } end def assert_logs_match(expected : String) assert_logs_match %r|#{expected}| end def assert_logs_match(expected : Regex) assert logs_match(expected), "Expected to logs to include #{expected}. Logs contained: \n#{log_entries.map(&.message).join("\n")}" end def refute_logs_match(expected : String) refute_logs_match %r|#{expected}| end def refute_logs_match(expected : Regex) refute logs_match(expected), "Expected to logs to not include #{expected}. Logs contained: \n#{log_entries.map(&.message).join("\n")}" end def assert_logs_match(source : String, expected : String) assert_logs_match source, %r|#{expected}| end def assert_logs_match(source : String, expected : Regex) assert logs_match(source, expected), "Expected to logs to include #{expected}. Logs contained: \n#{log_entries.map{|e| e.source + " " + e.message}.join("\n")}" end def refute_logs_match(source : String, expected : String) refute_logs_match source, %r|#{expected}| end def refute_logs_match(source : String, expected : Regex) refute logs_match(source, expected), "Expected to logs to not include #{expected}. Logs contained: \n#{log_entries.map{|e| e.source + " " + e.message}.join("\n")}" end def clear_logs TestingLogBackend.instance.clear end end Log.setup do |config| config.bind "*", :debug, TestingLogBackend.instance config.bind "redis.*", :warn, TestingLogBackend.instance config.bind "mosquito.*", :trace, TestingLogBackend.instance end ================================================ FILE: spec/helpers/mock_coordinator.cr ================================================ class MockCoordinator < Mosquito::Runners::Coordinator getter schedule_count def initialize(queue_list : Mosquito::Runners::QueueList) super @schedule_count = 0 end def only_if_coordinator : Nil if @always_coordinator yield else # yikes! # https://github.com/crystal-lang/crystal/issues/10399 super do yield end end end def always_coordinator!(always = true) @always_coordinator = always end def schedule @schedule_count += 1 super end end ================================================ FILE: spec/helpers/mock_executor.cr ================================================ class MockExecutor < Mosquito::Runners::Executor setter work_unit : Mosquito::WorkUnit? def state=(state : Mosquito::Runnable::State) super end def run self.state = Mosquito::Runnable::State::Working end def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup self.state = Mosquito::Runnable::State::Stopping spawn do self.state = Mosquito::Runnable::State::Finished wait_group.done end wait_group end def receive_job job_pipeline.receive.job_run end end ================================================ FILE: spec/helpers/mock_overseer.cr ================================================ class MockOverseer < Mosquito::Runners::Overseer property queue_list, coordinator, executors, work_handout, finished_notifier, dequeue_adapter def initialize @executor_count = Mosquito.configuration.executor_count @idle_wait = Mosquito.configuration.idle_wait @finished_notifier = Channel(Mosquito::WorkUnit?).new @queue_list = MockQueueList.new @coordinator = MockCoordinator.new queue_list @dequeue_adapter = Mosquito.configuration.dequeue_adapter @executors = [] of Mosquito::Runners::Executor @work_handout = Channel(Mosquito::WorkUnit).new @executors << build_executor observer.update_executor_list executors end def build_executor MockExecutor.new(self).as(Mosquito::Runners::Executor) end end ================================================ FILE: spec/helpers/mock_queue_list.cr ================================================ class MockQueueList < Mosquito::Runners::QueueList setter state def discovered_queues : Array(Mosquito::Queue) @discovered_queues end def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup self.state = Mosquito::Runnable::State::Stopping spawn do self.state = Mosquito::Runnable::State::Finished wait_group.done end wait_group end end ================================================ FILE: spec/helpers/mocks.cr ================================================ # A global place for global mocks module PerformanceCounter def perform self.class.performed! end macro included class_getter performances = 0 def self.performed! @@performances += 1 end def self.reset_performance_counter! @@performances = 0 end end end class JobWithPerformanceCounter < Mosquito::Job include PerformanceCounter end class PeriodicTestJob < Mosquito::PeriodicJob include PerformanceCounter end class QueuedTestJob < Mosquito::QueuedJob include PerformanceCounter end class QueueHookedTestJob < Mosquito::QueuedJob include PerformanceCounter property fail_before_hook = false property before_hook_ran = false property after_hook_ran = false property passed_job_config : Mosquito::JobRun? = nil before_enqueue do self.before_hook_ran = true self.passed_job_config = job if fail_before_hook false else true end end after_enqueue do self.after_hook_ran = true self.passed_job_config = job end end class PassingJob < QueuedTestJob def perform super true end end class FailingJob < QueuedTestJob property fail_with_exception = false property fail_with_retry = true property exception_message = "this is the reason #{name} failed" include PerformanceCounter def perform super case when fail_with_exception raise exception_message when ! fail_with_retry fail exception_message, retry: false else fail exception_message end end end class CustomRescheduleIntervalJob < PassingJob def reschedule_interval(retry_count) 4.seconds end end class NonReschedulableFailingJob < FailingJob def rescheduleable? false end end class NotImplementedJob < Mosquito::Job end class JobWithConfig < PassingJob getter config = {} of String => String def vars_from(config : Hash(String, String)) @config = config end end class JobWithNoParams < Mosquito::QueuedJob def perform log "no param job performed" end end class JobWithHooks < Mosquito::QueuedJob param should_fail : Bool before do log "Before Hook Executed" end after do log "After Hook Executed" end before do log "2nd Before Hook Executed" fail if should_fail end after do log "2nd After Hook Executed" end def perform log "Perform Executed" end end class EchoJob < Mosquito::QueuedJob queue_name "io_queue" param text : String def perform log text end end class MonthlyJob < Mosquito::PeriodicJob run_every 1.month def perform log "monthly job_run ran" end end class RateLimitedJob < Mosquito::QueuedJob include Mosquito::RateLimiter throttle key: "rate_limit", limit: Int32::MAX param should_fail : Bool = false param increment : Int32 = 1 before do log "Before Hook Executed" fail if should_fail end def perform log "Performed" end def increment_run_count_by increment end end class PreemptingJob < Mosquito::QueuedJob include PerformanceCounter property preempt_until : Time? = nil before do preempt "test preemption", until: preempt_until end end class NonReschedulablePreemptingJob < Mosquito::QueuedJob include PerformanceCounter before do preempt "not reschedulable" end def rescheduleable? : Bool false end end class SleepyJob < Mosquito::QueuedJob class_property should_sleep = true def perform while self.class.should_sleep sleep 0.01.seconds end end end class SecondRateLimitedJob < Mosquito::QueuedJob include Mosquito::RateLimiter throttle key: "rate_limit", limit: Int32::MAX def perform end end class UniqueTestJob < Mosquito::QueuedJob include Mosquito::UniqueJob unique_for 1.hour param user_id : Int64 param email_type : String def perform log "UniqueTestJob performed" end end class UniqueWithKeyJob < Mosquito::QueuedJob include Mosquito::UniqueJob unique_for 30.seconds, key: [:user_id] param user_id : Int64 param message : String def perform log "UniqueWithKeyJob performed" end end class UniqueNoParamsJob < Mosquito::QueuedJob include Mosquito::UniqueJob unique_for 1.minute def perform log "UniqueNoParamsJob performed" end end Mosquito::Base.register_job_mapping "job_with_config", JobWithConfig Mosquito::Base.register_job_mapping "job_with_performance_counter", JobWithPerformanceCounter Mosquito::Base.register_job_mapping "failing_job", FailingJob Mosquito::Base.register_job_mapping "non_reschedulable_failing_job", NonReschedulableFailingJob Mosquito::Base.register_job_mapping "preempting_job", PreemptingJob Mosquito::Base.register_job_mapping "non_reschedulable_preempting_job", NonReschedulablePreemptingJob def job_run_config { "year" => "1752", "name" => "the year september lost 12 days", } end def create_job_run(type = "job_with_config", config = job_run_config) Mosquito::JobRun.new(type).tap do |job_run| job_run.config = config job_run.store end end ================================================ FILE: spec/helpers/null_dequeue_adapter.cr ================================================ # A test adapter that always returns nil, simulating empty queues. class NullDequeueAdapter < Mosquito::DequeueAdapter getter dequeue_count = 0 def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit? @dequeue_count += 1 nil end end ================================================ FILE: spec/helpers/pub_sub.cr ================================================ module Mosquito::Observability::Publisher @[AlwaysInline] def publish(data : NamedTuple) metrics do Log.debug { "Publishing #{data} to #{@publish_context.originator}" } PubSub.instance.capture_message(@publish_context.originator, data.to_json) end end end class PubSub def self.instance @@instance ||= new end def self.eavesdrop : Array(Mosquito::Backend::BroadcastMessage) instance.listen yield instance.messages ensure instance.stop_listening end getter messages = [] of Mosquito::Backend::BroadcastMessage def initialize @listening = false end def listen @listening = true end def stop_listening @listening = false end def capture_message(originator : String, message : String) if @listening @messages << Mosquito::Backend::BroadcastMessage.new(originator, message) end end delegate clear, to: @messages module Helpers delegate eavesdrop, to: PubSub def assert_message_received(matcher : Regex) : Nil found = PubSub.instance.messages.find do |message| matcher === message.message end assert found, "Expected to find a message matching #{matcher.inspect}, but only found: #{PubSub.instance.messages.map(&.message).inspect}" end end end ================================================ FILE: spec/helpers/spy_dequeue_adapter.cr ================================================ # A test adapter that tracks which queues were checked, in order. class SpyDequeueAdapter < Mosquito::DequeueAdapter getter checked_queues = [] of String def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit? queue_list.queues.each do |q| @checked_queues << q.name if job_run = q.dequeue return Mosquito::WorkUnit.of(job_run, from: q) end end end end ================================================ FILE: spec/mosquito/api/executor_config_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::Api::ExecutorConfig" do describe "global executor count" do it "returns nil when no override is stored" do clean_slate do result = Mosquito::Api::ExecutorConfig.stored_executor_count assert_nil result end end it "round-trips a global executor count" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) result = Mosquito::Api::ExecutorConfig.stored_executor_count assert_equal 8, result end end it "clears the global executor count" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) Mosquito::Api::ExecutorConfig.clear_executor_count result = Mosquito::Api::ExecutorConfig.stored_executor_count assert_nil result end end end describe "per-overseer executor count" do it "returns nil when no per-overseer override is stored" do clean_slate do result = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1") assert_nil result end end it "round-trips a per-overseer executor count" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1") result = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1") assert_equal 2, result # Global is unaffected. global = Mosquito::Api::ExecutorConfig.stored_executor_count assert_nil global end end it "clears per-overseer without affecting global" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1") Mosquito::Api::ExecutorConfig.clear_executor_count("gpu-worker-1") per_overseer = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1") assert_nil per_overseer global = Mosquito::Api::ExecutorConfig.stored_executor_count assert_equal 8, global end end end describe ".resolve" do it "returns nil when nothing is stored" do clean_slate do result = Mosquito::Api::ExecutorConfig.resolve assert_nil result end end it "returns the global count when no overseer_id is given" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) result = Mosquito::Api::ExecutorConfig.resolve assert_equal 8, result end end it "prefers per-overseer over global" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1") result = Mosquito::Api::ExecutorConfig.resolve("gpu-worker-1") assert_equal 2, result end end it "falls back to global when per-overseer is not set" do clean_slate do Mosquito::Api::ExecutorConfig.store_executor_count(8) result = Mosquito::Api::ExecutorConfig.resolve("gpu-worker-1") assert_equal 8, result end end end describe "instance methods" do it "delegates to class-level helpers" do clean_slate do config = Mosquito::Api::ExecutorConfig.instance config.update(10) assert_equal 10, config.executor_count config.update(3, overseer_id: "worker-1") assert_equal 3, config.executor_count(overseer_id: "worker-1") config.clear(overseer_id: "worker-1") assert_nil config.executor_count(overseer_id: "worker-1") config.clear assert_nil config.executor_count end end end end describe "Mosquito::Api executor count convenience methods" do it "reads and writes global executor count" do clean_slate do Mosquito::Api.set_executor_count(12) assert_equal 12, Mosquito::Api.executor_count end end it "reads and writes per-overseer executor count" do clean_slate do Mosquito::Api.set_executor_count(4, overseer_id: "gpu-worker-1") assert_equal 4, Mosquito::Api.executor_count(overseer_id: "gpu-worker-1") # Global unaffected. assert_nil Mosquito::Api.executor_count end end end ================================================ FILE: spec/mosquito/api/executor_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::Executor do let(executor_pipeline) { Channel(Mosquito::WorkUnit).new } let(finished_notifier) { Channel(Mosquito::WorkUnit?).new } let(job) { QueuedTestJob.new } let(job_run : Mosquito::JobRun) { job.enqueue } let(overseer) { MockOverseer.new } let(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) } let(api) { Mosquito::Api::Executor.new executor.object_id.to_s } let(observer) { Mosquito::Observability::Executor.new executor } describe "publish context" do it "includes object_id" do assert_equal "executor:#{executor.object_id}", observer.publish_context.context end it "is nested under the overseer publish context" do assert_equal "mosquito:overseer:#{overseer.object_id}:executor:#{executor.object_id}", observer.publish_context.originator end end it "can read the current job and queue after being started, and clears it after" do Mosquito::Base.register_job_mapping job.class.name.underscore, job.class job_run.store job_run.build_job observer.execute job_run, job.class.queue do assert_equal job_run.id, api.current_job assert_equal job.class.queue.name, api.current_job_queue end assert api.current_job.nil? assert api.current_job_queue.nil? end it "returns a nil heartbeat before the executor has triggered it" do assert api.heartbeat.nil? end it "returns a valid heartbeat" do now = Time.utc Timecop.freeze now do observer.heartbeat! end # the heartbeat is stored as a unix epoch without millis assert_equal now.at_beginning_of_second, api.heartbeat end it "doesn't publish a heartbeat when metrics are disabled" do now = Time.utc Timecop.freeze now do executor.observer.heartbeat! end later = Time.utc + 1.minute Mosquito.temp_config(publish_metrics: false) do Timecop.freeze later do executor.observer.heartbeat! end end api = Mosquito::Api::Executor.new executor.object_id.to_s assert_equal now.at_beginning_of_second, api.heartbeat end it "publishes job started/finished events" do job_run.store job_run.build_job eavesdrop do observer.execute job_run, job.class.queue do end end assert_message_received /job-started/ assert_message_received /job-finished/ end it "measures and records average job duration" do job_run.store job_run.build_job # 100x the sleep duration below Timecop.scale(100) do observer.execute job_run, job.class.queue do sleep 0.01.seconds end end average_key = observer.average_key(job_run.type) average = Mosquito.backend.average(average_key) Mosquito.backend.delete average_key # assert that something > 0 comes back from the average. # backend tests cover calculating the average itself. assert average > 0 end end ================================================ FILE: spec/mosquito/api/job_run_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::JobRun do # the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost. def at_beginning_of_millisecond(time) time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds) end getter job : QueuedTestJob { QueuedTestJob.new } getter job_run : Mosquito::JobRun { job.build_job_run } getter api : Mosquito::Api::JobRun { Mosquito::Api::JobRun.new job_run.id } it "can look up a job run" do job_run.store assert api.found? end it "can look up a job run that doesn't exist" do api = Mosquito::Api::JobRun.new "not_a_real_id" refute api.found? end it "can retrieve the job parameters" do job_run = JobWithHooks.new(should_fail: false).build_job_run job_run.store api = Mosquito::Api::JobRun.new job_run.id assert_equal "false", api.runtime_parameters["should_fail"] end it "can retrieve the job type" do job_run.store assert_equal job.class.name.underscore, api.type end it "can retrieve the enqueue time" do now = Time.utc Timecop.freeze now do job_run.store end expected_time = at_beginning_of_millisecond now assert_equal expected_time, api.enqueue_time end it "can retrieve the retry count" do job_run.store assert_equal 0, api.retry_count end it "can retrieve the started at timestamp" do now = at_beginning_of_millisecond Time.utc job_run = create_job_run Timecop.freeze now do job_run.run end api = Mosquito::Api::JobRun.new(job_run.id) assert_equal now, api.started_at end it "can retrieve the finished_at timestamp" do now = at_beginning_of_millisecond Time.utc job_run = create_job_run Timecop.freeze now do job_run.run end api = Mosquito::Api::JobRun.new(job_run.id) assert_equal now, api.finished_at end end ================================================ FILE: spec/mosquito/api/overseer_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::Overseer do let(:overseer) { MockOverseer.new } let(:api) { Mosquito::Api::Overseer.new(overseer.object_id.to_s) } let(:observer) { Observability::Overseer.new(overseer) } let(:executor) { MockExecutor.new(overseer.as(Mosquito::Runners::Overseer))} describe "publish context" do it "includes object_id" do assert_equal "overseer:#{overseer.object_id}", observer.publish_context.context assert_equal "mosquito:overseer:#{overseer.object_id}", observer.publish_context.originator end end it "allows fetching a list of executors" do assert_equal 1, api.executors.size observer.update_executor_list([executor, executor]) assert_equal 2, api.executors.size end it "allows getting the latest heartbeat" do assert_nil api.last_heartbeat observer.heartbeat assert_instance_of Time, api.last_heartbeat end it "publishes the startup event" do eavesdrop do observer.starting end assert_message_received /started/ end it "publishes the stopping event" do eavesdrop do observer.stopping end assert_message_received /stopped/ end it "publishes the stopped event" do eavesdrop do observer.stopped end assert_message_received /exited/ end it "publishes an event when an executor dies" do eavesdrop do observer.executor_died executor end assert_message_received /died/ end it "publishes an event when an executor is created" do eavesdrop do observer.executor_created executor end assert_message_received /created/ end end ================================================ FILE: spec/mosquito/api/periodic_job_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::PeriodicJob do getter interval : Time::Span = 2.minutes describe "publish context" do it "includes the periodic job name" do clean_slate do Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval job_run = Mosquito::Base.scheduled_job_runs.first observer = job_run.observer assert_equal "periodic_job:PeriodicTestJob", observer.publish_context.context assert_equal "mosquito:periodic_job:PeriodicTestJob", observer.publish_context.originator end end end it "can fetch a list of periodic jobs" do clean_slate do Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval periodic_jobs = Mosquito::Api::PeriodicJob.all assert_equal 1, periodic_jobs.size assert_equal "PeriodicTestJob", periodic_jobs.first.name assert_equal interval, periodic_jobs.first.interval end end it "returns nil for last_executed_at when never run" do clean_slate do Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval periodic_jobs = Mosquito::Api::PeriodicJob.all assert_nil periodic_jobs.first.last_executed_at end end it "returns the last executed time after a job runs" do now = Time.utc.at_beginning_of_second clean_slate do Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval job_run = Mosquito::Base.scheduled_job_runs.first Timecop.freeze(now) do job_run.try_to_execute end periodic_jobs = Mosquito::Api::PeriodicJob.all assert_equal now, periodic_jobs.first.last_executed_at end end it "publishes an event when a periodic job is enqueued" do now = Time.utc.at_beginning_of_second clean_slate do Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval eavesdrop do Timecop.freeze(now) do Mosquito::Base.scheduled_job_runs.first.try_to_execute end end assert_message_received /enqueued/ end end end ================================================ FILE: spec/mosquito/api/publisher_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::Publisher do let(executor_pipeline) { Channel(Mosquito::WorkUnit).new } let(finished_notifier) { Channel(Mosquito::WorkUnit?).new } let(job) { QueuedTestJob.new } let(job_run : Mosquito::JobRun) { job.enqueue } let(overseer) { MockOverseer.new } let(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) } let(api) { Mosquito::Api::Executor.new executor.object_id.to_s } let(observer) { Mosquito::Observability::Executor.new executor } it "doesn't publish events when metrics are disabled" do job_run.store job_run.build_job PubSub.instance.clear published_messages = eavesdrop do Mosquito.temp_config(publish_metrics: false) do observer.execute job_run, job.class.queue do end end end assert_equal 0, published_messages.size end end ================================================ FILE: spec/mosquito/api/queue_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Api::Queue do let(job_classes) { [QueuedTestJob, PassingJob, FailingJob, QueueHookedTestJob] } let(queued_test_job) { QueuedTestJob.new } let(passing_job) { PassingJob.new } let(queue : Mosquito::Queue) { queued_test_job.class.queue } let(observer : Mosquito::Observability::Queue) { queue.observer } describe "publish context" do it "includes the queue name" do assert_equal "queue:queued_test_job", observer.publish_context.context assert_equal "mosquito:queue:queued_test_job", observer.publish_context.originator end end it "can fetch a list of current queues" do clean_slate do queued_test_job.enqueue passing_job.enqueue expected_queues = ["queued_test_job", "passing_job"].sort queues = Mosquito::Api::Queue.all assert_equal 2, queues.size assert_equal expected_queues, queues.map(&.name).sort end end it "can fetch the size of a queue" do clean_slate do job_classes.map(&.new).each(&.enqueue) queues = Mosquito::Api::Queue.all queues.each do |queue| assert_equal 1, queue.size end end end it "can fetch the size details of a queue" do clean_slate do job_classes.map(&.new).each(&.enqueue) queues = Mosquito::Api::Queue.all sizes = queues.map(&.size_details) sizes.each do |size| assert_equal 1, size["waiting"] assert_equal 0, size["scheduled"] assert_equal 0, size["pending"] assert_equal 0, size["dead"] end end end it "can fetch job runs from a queue" do clean_slate do job_classes.each do |job_class| job = job_class.new job.enqueue api = Mosquito::Api::Queue.new job_class.queue.name job_runs = api.waiting_job_runs assert_equal 1, job_runs.size assert_equal job.class.name.underscore, job_runs.first.type end end end it "publishes an event when a job is enqueued" do eavesdrop do queued_test_job.enqueue end assert_message_received /enqueued/ end it "publishes an event when a job is enqueued for later" do eavesdrop do queued_test_job.enqueue(60.seconds.from_now) end assert_message_received /enqueued/ end it "publishes an event when a job is dequeued" do clean_slate do queued_test_job.enqueue eavesdrop do queue.dequeue end end assert_message_received /dequeued/ end it "publishes an event when a job is rescheduled" do clean_slate do job_run = queued_test_job.build_job_run eavesdrop do queue.enqueue job_run queue.reschedule job_run, 60.seconds.from_now end end assert_message_received /rescheduled/ end it "publishes an event when a job is forgotten" do clean_slate do job_run = queued_test_job.build_job_run eavesdrop do queue.forget job_run end end assert_message_received /forgotten/ end it "publishes an event when a job is banished" do clean_slate do job_run = queued_test_job.build_job_run eavesdrop do queue.banish job_run end end assert_message_received /banished/ end end ================================================ FILE: spec/mosquito/api_spec.cr ================================================ require "../spec_helper" describe Mosquito::Api do let(queued_test_job) { QueuedTestJob.new } let(passing_job) { PassingJob.new } it "can fetch a list of queues" do clean_slate do queued_test_job.enqueue passing_job.enqueue queues = Mosquito::Api.list_queues assert_equal 2, queues.size queue_names = queues.map(&.name) assert_includes queue_names, queued_test_job.class.queue.name assert_includes queue_names, passing_job.class.queue.name end end end ================================================ FILE: spec/mosquito/backend/deleting_spec.cr ================================================ require "../../spec_helper" describe "Backend deleting" do getter queue_name : String { "test#{rand(1000)}" } getter queue : Mosquito::Backend::Queue { backend.queue queue_name } getter sample_data do { "test" => "#{rand(1000)}" } end getter key : String { "key-#{rand 1000}" } getter field : String { "field-#{rand 1000}" } getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") } describe "delete" do it "deletes immediately" do backend.store key, sample_data backend.delete key blank_data = {} of String => String assert_equal blank_data, backend.retrieve(key) end it "deletes at a ttl" do # Since redis is outside the control of timecop, this test is just showing # that #delete can be called with a ttl and we trust redis to do it's job. backend.store key, sample_data backend.delete key, in: 1.second end end describe "self.flush" do it "wipes the database" do clean_slate do backend.set key, field, "1" backend.flush assert_nil backend.get key, field end end end describe "#flush" do it "empties the queues" do clean_slate do # add a job_run to waiting queue.enqueue job_run # add a job_run to scheduled queue.schedule job_run, at: 1.second.from_now # move a job_run to pending pending_job_run = queue.dequeue # add a job_run to the dead queue queue.terminate job_run queue.flush empty_set = [] of String assert_equal empty_set, queue.list_waiting assert_equal empty_set, queue.list_scheduled assert_equal empty_set, queue.list_pending assert_equal empty_set, queue.list_dead end end it "but doesn't truncate the database" do clean_slate do backend.set key, field, "value" queue.flush assert_equal "value", backend.get key, field end end end end ================================================ FILE: spec/mosquito/backend/executor_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Backend do getter key : String { "key-#{rand 1000}" } it "can calculate an average" do backend.average_push key, 10 backend.average_push key, 20 backend.average_push key, 30 assert_equal 20, backend.average key end it "correctly rolls off old values for the window size" do backend.average_push key, 10, window_size: 3 backend.average_push key, 20, window_size: 3 backend.average_push key, 30, window_size: 3 backend.average_push key, 40, window_size: 3 backend.average_push key, 50, window_size: 3 assert_equal 40, backend.average key end end ================================================ FILE: spec/mosquito/backend/expiring_list_spec.cr ================================================ require "../../spec_helper" describe Mosquito::RedisBackend do describe "expiring lists" do it "can add an item to a list" do now = Time.utc key = "exp-list-test" items = ["item1", "item2", "item3"] redis_backend = backend.as(Mosquito::RedisBackend) Timecop.freeze now do redis_backend.expiring_list_push key, items[0] end Timecop.freeze now + 1.second do redis_backend.expiring_list_push key, items[1] end Timecop.freeze now + 2.seconds do redis_backend.expiring_list_push key, items[2] end found_items = redis_backend.expiring_list_fetch(key, now + 1.second) assert_equal [items[2]], found_items end end end ================================================ FILE: spec/mosquito/backend/hash_storage_spec.cr ================================================ require "../../spec_helper" describe "Backend hash storage" do getter sample_data : Hash(String,String) { { "test" => "#{rand(1000)}" } } getter key : String { "key-#{rand 1000}" } getter field : String { "field-#{rand 1000}" } it "can store and retrieve" do backend.store key, sample_data retrieved_data = backend.retrieve key assert_equal sample_data, retrieved_data end describe "self.get and set" do it "sets and retrieves a value from a hash" do backend.set(key, field, "truth") assert_equal "truth", backend.get(key, field) end end describe "self.increment" do it "adds one" do backend.set(key, field, "1") assert_equal 2, backend.increment(key, field) end it "can add arbitrary values" do backend.set(key, field, "1") assert_equal 4, backend.increment(key, field, by: 3) end end end ================================================ FILE: spec/mosquito/backend/inspection_spec.cr ================================================ require "../../spec_helper" describe "Backend inspection" do getter backend_name : String { "test#{rand(1000)}" } getter queue : Mosquito::Backend::Queue { backend.queue backend_name } getter job : QueuedTestJob { QueuedTestJob.new } getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") } describe "size" do def fill_queues # add to waiting queue queue.enqueue job_run queue.enqueue job_run # move 1 from waiting to pending queue pending_t = queue.dequeue # add to scheduled queue queue.schedule job_run, at: 1.second.from_now # add to dead queue queue.terminate job_run end it "returns the size of the named q" do clean_slate do fill_queues assert_equal 4, queue.size end end it "returns the size of the named q (without the dead_q)" do clean_slate do fill_queues assert_equal 3, queue.size(include_dead: false) end end end describe "list" do it "can list the waiting jobs" do clean_slate do expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") } expected_job_runs.each { |job_run| queue.enqueue job_run } expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort actual_job_runs = queue.list_waiting.sort assert_equal 3, actual_job_runs.size assert_equal expected_job_run_ids, actual_job_runs end end it "can list the scheduled jobs" do clean_slate do expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") } expected_job_runs.each { |job_run| queue.schedule job_run, at: 1.second.from_now } expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort actual_job_runs = queue.list_scheduled.sort assert_equal 3, actual_job_runs.size assert_equal expected_job_run_ids, actual_job_runs end end it "can list the pending jobs" do clean_slate do expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run").tap(&.store) } expected_job_runs.each { |job_run| queue.enqueue job_run } expected_job_run_ids = 3.times.map { queue.dequeue.not_nil!.id }.to_a.sort actual_job_runs = queue.list_pending.sort assert_equal 3, actual_job_runs.size assert_equal expected_job_run_ids, actual_job_runs end end it "can list the dead jobs" do clean_slate do expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") } expected_job_runs.each { |job_run| queue.terminate job_run } expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort actual_job_runs = queue.list_dead.sort assert_equal 3, actual_job_runs.size assert_equal expected_job_run_ids, actual_job_runs end end end end ================================================ FILE: spec/mosquito/backend/lock_spec.cr ================================================ require "../../spec_helper" describe "distributed locking" do getter key : String { "testing:backend:lock" } getter instance_id : String { "abcd" } getter ttl : Time::Span { 1.second } def ensure_unlock(&block) yield Mosquito.backend.delete key end it "locks" do ensure_unlock do got_it = Mosquito.backend.lock? key, instance_id, ttl assert got_it end end it "doesn't double lock" do ensure_unlock do hold = Mosquito.backend.lock? key, "abcd", ttl assert hold try = Mosquito.backend.lock? key, "wxyz", ttl refute try end end it "locks after unlock" do ensure_unlock do hold = Mosquito.backend.lock? key, "abcd", ttl assert hold Mosquito.backend.unlock key, instance_id try = Mosquito.backend.lock? key, "wxyz", ttl assert try end end it "renews a lock held by the same instance" do ensure_unlock do hold = Mosquito.backend.lock? key, instance_id, ttl assert hold renewed = Mosquito.backend.renew_lock? key, instance_id, ttl assert renewed end end it "doesn't renew a lock held by another instance" do ensure_unlock do hold = Mosquito.backend.lock? key, "abcd", ttl assert hold renewed = Mosquito.backend.renew_lock? key, "wxyz", ttl refute renewed end end it "doesn't renew a lock that doesn't exist" do ensure_unlock do renewed = Mosquito.backend.renew_lock? key, instance_id, ttl refute renewed end end end ================================================ FILE: spec/mosquito/backend/overseer_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Backend do it "can keep a list of overseers" do clean_slate do overseer_ids = ["overseer1", "overseer2", "overseer3"] overseer_ids.each do |overseer_id| Mosquito.backend.register_overseer overseer_id end assert_equal overseer_ids, Mosquito.backend.list_overseers end end it "can deregister an overseer" do clean_slate do overseer_ids = ["overseer1", "overseer2", "overseer3"] overseer_ids.each do |overseer_id| Mosquito.backend.register_overseer overseer_id end Mosquito.backend.deregister_overseer "overseer2" assert_equal ["overseer1", "overseer3"], Mosquito.backend.list_overseers end end end ================================================ FILE: spec/mosquito/backend/queueing_spec.cr ================================================ require "../../spec_helper" describe "Backend Queues" do getter backend_name : String { "test#{rand(1000)}" } getter queue : Mosquito::Backend::Queue { backend.queue backend_name } getter job : QueuedTestJob { QueuedTestJob.new } getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") } describe "list_queues" do def fill_queues names = %w|test1 test2 test3 test4| names[0..3].each do |queue_name| backend.queue(queue_name).enqueue job_run end backend.queue(names.last).schedule job_run, at: 1.second.from_now end def fill_uncounted_queues names = %w|test5 test6 test7 test8| names[0..3].each do |queue_name| backend.queue(queue_name).tap do |q| q.enqueue job_run q.dequeue end end backend.queue(names.last).terminate job_run end it "can get a list of available queues" do clean_slate do fill_queues assert_equal %w|test1 test2 test3 test4|, backend.list_queues.sort end end it "de-dups the queue list" do clean_slate do fill_queues assert_equal %w|test1 test2 test3 test4|, backend.list_queues.sort end end end describe "schedule" do it "adds a job_run to the schedule_q at the time" do clean_slate do timestamp = 2.seconds.from_now job_run = job.build_job_run queue.schedule job_run, at: timestamp assert_equal Time.unix_ms(timestamp.to_unix_ms), queue.scheduled_job_run_time job_run end end end describe "deschedule" do it "returns a job_run if it's due" do clean_slate do run_time = Time.utc - 2.seconds job_run = job.build_job_run job_run.store queue.schedule job_run, at: run_time overdue_job_runs = queue.deschedule assert_equal [job_run], overdue_job_runs end end it "returns a blank array when no job_runs exist" do clean_slate do overdue_job_runs = queue.deschedule assert_empty overdue_job_runs end end it "doesn't return job_runs which aren't yet due" do clean_slate do run_time = Time.utc + 2.seconds job_run = job.build_job_run job_run.store queue.schedule job_run, at: run_time overdue_job_runs = queue.deschedule assert_empty overdue_job_runs end end end describe "enqueue" do it "puts a job_run on the waiting_q" do clean_slate do job_run = job.build_job_run queue.enqueue job_run waiting_job_runs = queue.list_waiting assert_equal [job_run.id], waiting_job_runs end end end describe "dequeue" do it "returns a job_run object when one is waiting" do clean_slate do job_run = job.build_job_run job_run.store queue.enqueue job_run waiting_job_run = queue.dequeue assert_equal job_run, waiting_job_run end end it "moves the job_run from waiting to pending" do clean_slate do job_run = job.build_job_run job_run.store queue.enqueue job_run waiting_job_run = queue.dequeue pending_job_runs = queue.list_pending assert_equal [job_run.id], pending_job_runs end end it "returns nil when nothing is waiting" do clean_slate do assert_equal nil, queue.dequeue end end it "returns nil when a job_run is queued but not stored" do clean_slate do job_run = job.build_job_run # job_run.store # explicitly don't store this one queue.enqueue job_run waiting_job_run = queue.dequeue assert_nil waiting_job_run end end end describe "finish" do it "removes the job_run from the pending queue" do clean_slate do job_run = job.build_job_run job_run.store # first move the job_run from waiting to pending queue.enqueue job_run waiting_job_run = queue.dequeue assert_equal job_run, waiting_job_run # now finish it queue.finish job_run pending_job_runs = queue.list_pending assert_empty pending_job_runs end end end describe "terminate" do it "adds a job_run to the dead queue" do clean_slate do job_run = job.build_job_run job_run.store # first move the job_run from waiting to pending queue.enqueue job_run waiting_job_run = queue.dequeue assert_equal job_run, waiting_job_run # now terminate it queue.terminate job_run dead_job_runs = queue.list_dead assert_equal [job_run.id], dead_job_runs end end end end ================================================ FILE: spec/mosquito/backend_spec.cr ================================================ require "../spec_helper" # These tests are explicitly for code which is inherited from the abstract Backend describe Mosquito::Backend do it "can build a key with two strings" do assert_equal "mosquito:one:two", Mosquito.backend.build_key("one", "two") end it "can build a key with an array" do assert_equal "mosquito:one:two", Mosquito.backend.build_key(["one", "two"]) end it "can build a key with a tuple" do assert_equal "mosquito:one:two", Mosquito.backend.build_key(*{"one", "two"}) end it "can be initialized with a string name" do Mosquito.backend.queue "string_backend" end it "can be initialized with a symbol name" do Mosquito.backend.queue :symbol_backend end it "can update a key with a hash" do Mosquito.backend.set "key", {"field" => "value", "field2" => "value2"} assert_equal "value", Mosquito.backend.get("key", "field") assert_equal "value2", Mosquito.backend.get("key", "field2") end end ================================================ FILE: spec/mosquito/base_spec.cr ================================================ require "../spec_helper" describe Mosquito::Base do it "keeps a list of scheduled job_runs" do Base.bare_mapping do Base.register_job_interval PeriodicTestJob, 1.minute assert_equal PeriodicTestJob, Base.scheduled_job_runs.first.class end end it "correctly maps job classes from type strings" do Base.bare_mapping do Base.register_job_mapping "fizzbuzz", QueuedTestJob assert_equal QueuedTestJob, Base.job_for_type "fizzbuzz" end end end ================================================ FILE: spec/mosquito/configuration_spec.cr ================================================ require "../spec_helper" describe "Mosquito Config" do it "allows setting / retrieving the connection string" do Mosquito.temp_config do Mosquito.configuration.backend_connection_string = testing_redis_url assert_equal testing_redis_url, Mosquito.configuration.backend_connection_string end end it "enforces missing settings are set" do config = Mosquito::Configuration.new assert_raises do config.validate end end it "allows setting idle_wait as a float" do test_value = 2.4 Mosquito.temp_config do Mosquito.configuration.idle_wait = test_value assert_equal test_value.seconds, Mosquito.configuration.idle_wait end end it "allows setting idle_wait as a time span" do test_value = 2.seconds Mosquito.temp_config do Mosquito.configuration.idle_wait = test_value assert_equal test_value, Mosquito.configuration.idle_wait end end it "allows setting successful_job_ttl" do test_value = 2 Mosquito.temp_config do Mosquito.configuration.successful_job_ttl = test_value assert_equal test_value, Mosquito.configuration.successful_job_ttl end end it "allows setting failed_job_ttl" do test_value = 2 Mosquito.temp_config do Mosquito.configuration.failed_job_ttl = test_value assert_equal test_value, Mosquito.configuration.failed_job_ttl end end it "allows setting global_prefix string" do test_value = "yolo" Mosquito.temp_config do Mosquito.configuration.global_prefix = test_value assert_equal test_value, Mosquito.configuration.global_prefix Mosquito.configuration.backend.build_key("test").must_equal "yolo:mosquito:test" end end it "allows setting global_prefix nillable" do test_value = nil Mosquito.temp_config do Mosquito.configuration.global_prefix = test_value assert_equal test_value, Mosquito.configuration.global_prefix Mosquito.configuration.backend.build_key("test").must_equal "mosquito:test" end end it "validates when backend_connection_string is set" do Mosquito.temp_config do Mosquito.configuration.backend_connection_string = testing_redis_url Mosquito.configuration.validate end end end ================================================ FILE: spec/mosquito/dequeue_adapters/concurrency_limited_dequeue_adapter_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::ConcurrencyLimitedDequeueAdapter" do getter(overseer : MockOverseer) { MockOverseer.new } getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.queues << job_class.queue end it "dequeues a job when under the limit" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 3, }) result = adapter.dequeue(queue_list) refute_nil result if result assert_equal expected_job_run, result.job_run assert_equal QueuedTestJob.queue, result.queue end end end it "returns nil when no jobs are available" do clean_slate do register QueuedTestJob adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 3, }) result = adapter.dequeue(queue_list) assert_nil result end end it "skips a queue that has reached its concurrency limit" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 2, }) # Dequeue twice — should succeed and fill the limit. result1 = adapter.dequeue(queue_list) refute_nil result1 assert_equal 1, adapter.active_count("queued_test_job") result2 = adapter.dequeue(queue_list) refute_nil result2 assert_equal 2, adapter.active_count("queued_test_job") # Third dequeue should be blocked by the limit. result3 = adapter.dequeue(queue_list) assert_nil result3 end end it "allows dequeue again after finished_with" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 1, }) # Fill the single slot. result1 = adapter.dequeue(queue_list) refute_nil result1 assert_equal 1, adapter.active_count("queued_test_job") # Blocked. result2 = adapter.dequeue(queue_list) assert_nil result2 # Signal that the job finished. adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue) assert_equal 0, adapter.active_count("queued_test_job") # Now dequeue should work again. result3 = adapter.dequeue(queue_list) refute_nil result3 end end it "does not limit queues not in the limits table" do clean_slate do register QueuedTestJob 5.times { QueuedTestJob.new.enqueue } # No limit configured for queued_test_job. adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "other_queue" => 1, }) # Should dequeue all 5 without blocking. 5.times do |i| result = adapter.dequeue(queue_list) refute_nil result, "Expected dequeue ##{i + 1} to succeed" end end end it "enforces independent limits across multiple queues" do clean_slate do register QueuedTestJob register EchoJob 3.times { QueuedTestJob.new.enqueue } 3.times { EchoJob.new(text: "hello").enqueue } adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 1, "io_queue" => 2, }) # Saturate queued_test_job (limit 1). # Because of shuffle we may get either queue first, so keep # dequeuing until the counters match the limits. results = [] of Mosquito::WorkUnit 6.times do if r = adapter.dequeue(queue_list) results << r end end assert_equal 1, adapter.active_count("queued_test_job") assert_equal 2, adapter.active_count("io_queue") assert_equal 3, results.size end end it "finished_with does not go below zero" do adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 3, }) job_run = Mosquito::JobRun.new("queued_test_job") queue = Mosquito::Queue.new("queued_test_job") adapter.finished_with(job_run, queue) assert_equal 0, adapter.active_count("queued_test_job") end it "can be used via the overseer" do clean_slate do adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ "queued_test_job" => 5, }) overseer.dequeue_adapter = adapter register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue result = overseer.dequeue_job? refute_nil result if result assert_equal expected_job_run, result.job_run end end end end ================================================ FILE: spec/mosquito/dequeue_adapters/remote_config_dequeue_adapter_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::RemoteConfigDequeueAdapter" do getter(overseer : MockOverseer) { MockOverseer.new } getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.queues << job_class.queue end it "uses defaults when no remote config is present" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 2}, refresh_interval: 0.seconds, ) # Two dequeues should succeed. result1 = adapter.dequeue(queue_list) refute_nil result1 result2 = adapter.dequeue(queue_list) refute_nil result2 # Third should be blocked by the default limit of 2. result3 = adapter.dequeue(queue_list) assert_nil result3 end end it "picks up remote limits from the backend" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } # Default allows 2, but remote overrides to 1. adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 2}, refresh_interval: 0.seconds, ) Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 1}) result1 = adapter.dequeue(queue_list) refute_nil result1 # Should be blocked — remote limit is 1. result2 = adapter.dequeue(queue_list) assert_nil result2 end end it "merges remote limits on top of defaults" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 3, "queue_b" => 5}, refresh_interval: 0.seconds, ) # Remote only overrides queue_a and adds queue_c. Mosquito::RemoteConfigDequeueAdapter.store_limits({ "queue_a" => 1, "queue_c" => 7, }) adapter.refresh_limits assert_equal 1, adapter.limits["queue_a"] assert_equal 5, adapter.limits["queue_b"] assert_equal 7, adapter.limits["queue_c"] end end it "falls back to defaults when remote config is cleared" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 3}, refresh_interval: 0.seconds, ) Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1}) adapter.refresh_limits assert_equal 1, adapter.limits["queue_a"] Mosquito::RemoteConfigDequeueAdapter.clear_limits adapter.refresh_limits assert_equal 3, adapter.limits["queue_a"] end end it "respects refresh_interval and does not poll on every dequeue" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 3}, refresh_interval: 1.hour, ) # First dequeue triggers the initial refresh. adapter.dequeue(queue_list) # Store a tighter limit — but it should NOT take effect # because the refresh interval hasn't elapsed. Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 1}) result2 = adapter.dequeue(queue_list) refute_nil result2, "Expected dequeue to succeed because refresh hasn't fired" end end it "preserves in-flight counts when limits are refreshed" do clean_slate do register QueuedTestJob 2.times { QueuedTestJob.new.enqueue } adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 1}, refresh_interval: 0.seconds, ) result1 = adapter.dequeue(queue_list) refute_nil result1 assert_equal 1, adapter.active_count("queued_test_job") # Refresh with new limits — must not reset the in-flight counter. Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 2}) adapter.refresh_limits assert_equal 1, adapter.active_count("queued_test_job") adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue) assert_equal 0, adapter.active_count("queued_test_job") end end it "delegates finished_with to the inner adapter" do clean_slate do register QueuedTestJob 2.times { QueuedTestJob.new.enqueue } adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 1}, refresh_interval: 0.seconds, ) result1 = adapter.dequeue(queue_list) refute_nil result1 assert_equal 1, adapter.active_count("queued_test_job") # Blocked. result2 = adapter.dequeue(queue_list) assert_nil result2 # Signal completion. adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue) assert_equal 0, adapter.active_count("queued_test_job") # Now a dequeue should succeed again. result3 = adapter.dequeue(queue_list) refute_nil result3 end end it "can be used via the overseer" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 5}, refresh_interval: 0.seconds, ) overseer.dequeue_adapter = adapter register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue result = overseer.dequeue_job? refute_nil result if result assert_equal expected_job_run, result.job_run end end end describe "per-overseer configuration" do it "uses per-overseer limits when overseer_id is set" do clean_slate do register QueuedTestJob 3.times { QueuedTestJob.new.enqueue } adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queued_test_job" => 3}, overseer_id: "gpu-worker-1", refresh_interval: 0.seconds, ) # Set a per-overseer limit of 1. Mosquito::RemoteConfigDequeueAdapter.store_limits( {"queued_test_job" => 1}, overseer_id: "gpu-worker-1" ) result1 = adapter.dequeue(queue_list) refute_nil result1 # Should be blocked by the per-overseer limit. result2 = adapter.dequeue(queue_list) assert_nil result2 end end it "per-overseer limits override global limits" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 10}, overseer_id: "gpu-worker-1", refresh_interval: 0.seconds, ) # Global says 5, per-overseer says 2 — per-overseer wins. Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5}) Mosquito::RemoteConfigDequeueAdapter.store_limits( {"queue_a" => 2}, overseer_id: "gpu-worker-1" ) adapter.refresh_limits assert_equal 2, adapter.limits["queue_a"] end end it "falls back to global when no per-overseer key exists" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 10}, overseer_id: "gpu-worker-1", refresh_interval: 0.seconds, ) Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5}) adapter.refresh_limits assert_equal 5, adapter.limits["queue_a"] end end it "merges defaults, global, and per-overseer layers" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 10, "queue_b" => 20, "queue_c" => 30}, overseer_id: "gpu-worker-1", refresh_interval: 0.seconds, ) # Global overrides queue_a and adds queue_d. Mosquito::RemoteConfigDequeueAdapter.store_limits({ "queue_a" => 5, "queue_d" => 40, }) # Per-overseer overrides queue_a again and queue_b. Mosquito::RemoteConfigDequeueAdapter.store_limits( {"queue_a" => 1, "queue_b" => 2}, overseer_id: "gpu-worker-1" ) adapter.refresh_limits assert_equal 1, adapter.limits["queue_a"] # per-overseer wins assert_equal 2, adapter.limits["queue_b"] # per-overseer wins assert_equal 30, adapter.limits["queue_c"] # default (untouched) assert_equal 40, adapter.limits["queue_d"] # global (no per-overseer) end end it "adapters without overseer_id ignore per-overseer keys" do clean_slate do adapter = Mosquito::RemoteConfigDequeueAdapter.new( defaults: {"queue_a" => 10}, refresh_interval: 0.seconds, ) Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5}) Mosquito::RemoteConfigDequeueAdapter.store_limits( {"queue_a" => 1}, overseer_id: "gpu-worker-1" ) adapter.refresh_limits # Without an overseer_id, only global is used. assert_equal 5, adapter.limits["queue_a"] end end end describe "class-level storage helpers" do it "round-trips global limits through the backend" do clean_slate do limits = {"queue_a" => 3, "queue_b" => 7} Mosquito::RemoteConfigDequeueAdapter.store_limits(limits) retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal 3, retrieved["queue_a"] assert_equal 7, retrieved["queue_b"] end end it "round-trips per-overseer limits through the backend" do clean_slate do limits = {"queue_a" => 1} Mosquito::RemoteConfigDequeueAdapter.store_limits(limits, overseer_id: "worker-2") retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-2") assert_equal 1, retrieved["queue_a"] # Global should be unaffected. global = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal({} of String => Int32, global) end end it "store_limits overwrites rather than merges (stale entries are removed)" do clean_slate do Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3, "queue_b" => 7}) Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1}) retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal 1, retrieved["queue_a"] refute retrieved.has_key?("queue_b"), "queue_b should have been removed by the overwrite" end end it "store_limits with overseer_id overwrites rather than merges" do clean_slate do Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3, "queue_b" => 7}, overseer_id: "worker-1") Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1}, overseer_id: "worker-1") retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-1") assert_equal 1, retrieved["queue_a"] refute retrieved.has_key?("queue_b"), "queue_b should have been removed by the overwrite" end end it "store_limits with an empty hash removes all stored limits" do clean_slate do Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3}) Mosquito::RemoteConfigDequeueAdapter.store_limits({} of String => Int32) retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal({} of String => Int32, retrieved) end end it "returns an empty hash when no limits are stored" do clean_slate do retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal({} of String => Int32, retrieved) end end it "clear_limits removes global stored data" do clean_slate do Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1}) Mosquito::RemoteConfigDequeueAdapter.clear_limits retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal({} of String => Int32, retrieved) end end it "clear_limits with overseer_id removes only that overseer's data" do clean_slate do Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5}) Mosquito::RemoteConfigDequeueAdapter.store_limits( {"queue_a" => 1}, overseer_id: "worker-1" ) Mosquito::RemoteConfigDequeueAdapter.clear_limits(overseer_id: "worker-1") # Per-overseer is gone. per_overseer = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-1") assert_equal({} of String => Int32, per_overseer) # Global is still there. global = Mosquito::RemoteConfigDequeueAdapter.stored_limits assert_equal 5, global["queue_a"] end end end describe "Api integration" do it "reads and writes global limits through the Api module" do clean_slate do Mosquito::Api.set_concurrency_limits({"queue_x" => 10}) result = Mosquito::Api.concurrency_limits assert_equal 10, result["queue_x"] end end it "reads and writes per-overseer limits through the Api module" do clean_slate do Mosquito::Api.set_concurrency_limits( {"queue_x" => 2}, overseer_id: "gpu-worker-1" ) result = Mosquito::Api.concurrency_limits(overseer_id: "gpu-worker-1") assert_equal 2, result["queue_x"] # Global should be unaffected. global = Mosquito::Api.concurrency_limits assert_equal({} of String => Int32, global) end end end end ================================================ FILE: spec/mosquito/dequeue_adapters/shuffle_dequeue_adapter_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::ShuffleDequeueAdapter" do getter(overseer : MockOverseer) { MockOverseer.new } getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) } getter(executor : MockExecutor) { overseer.executors.first.as(MockExecutor) } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.discovered_queues << job_class.queue end it "is the default adapter" do assert_instance_of Mosquito::ShuffleDequeueAdapter, Mosquito.configuration.dequeue_adapter end it "dequeues a job from the queue list" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue adapter = Mosquito::ShuffleDequeueAdapter.new result = adapter.dequeue(queue_list) refute_nil result if result assert_equal expected_job_run, result.job_run assert_equal QueuedTestJob.queue, result.queue end end end it "returns nil when no jobs are available" do clean_slate do register QueuedTestJob adapter = Mosquito::ShuffleDequeueAdapter.new result = adapter.dequeue(queue_list) assert_nil result end end describe "custom adapter" do it "can be swapped on the overseer" do clean_slate do null_adapter = NullDequeueAdapter.new overseer.dequeue_adapter = null_adapter register QueuedTestJob QueuedTestJob.new.enqueue result = overseer.dequeue_job? assert_nil result assert_equal 1, null_adapter.dequeue_count end end it "receives the queue list when dequeuing" do clean_slate do spy_adapter = SpyDequeueAdapter.new overseer.dequeue_adapter = spy_adapter register QueuedTestJob queue_list.discovered_queues << Mosquito::Queue.new("extra_queue") overseer.dequeue_job? assert_includes spy_adapter.checked_queues, "queued_test_job" assert_includes spy_adapter.checked_queues, "extra_queue" end end end describe "overseer integration" do it "dequeue_job? delegates to the adapter" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue result = overseer.dequeue_job? refute_nil result if result assert_equal expected_job_run, result.job_run end end end end end ================================================ FILE: spec/mosquito/dequeue_adapters/weighted_dequeue_adapter_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::WeightedDequeueAdapter" do getter(overseer : MockOverseer) { MockOverseer.new } getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.discovered_queues << job_class.queue end it "dequeues a job from a weighted queue" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue adapter = Mosquito::WeightedDequeueAdapter.new({ "queued_test_job" => 5, }) result = adapter.dequeue(queue_list) refute_nil result if result assert_equal expected_job_run, result.job_run assert_equal QueuedTestJob.queue, result.queue end end end it "returns nil when no jobs are available" do clean_slate do register QueuedTestJob adapter = Mosquito::WeightedDequeueAdapter.new({ "queued_test_job" => 3, }) result = adapter.dequeue(queue_list) assert_nil result end end it "assigns default weight of 1 to unconfigured queues" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue # No weight configured for queued_test_job — defaults to 1. adapter = Mosquito::WeightedDequeueAdapter.new({ "other_queue" => 10, }) result = adapter.dequeue(queue_list) refute_nil result if result assert_equal expected_job_run, result.job_run end end end it "higher-weight queues are dequeued more often" do clean_slate do register QueuedTestJob register EchoJob adapter = Mosquito::WeightedDequeueAdapter.new({ "queued_test_job" => 10, "io_queue" => 1, }) # Enqueue enough jobs that neither queue drains during the sample. 200.times { QueuedTestJob.new.enqueue } 200.times { EchoJob.new(text: "hello").enqueue } dequeue_counts = Hash(String, Int32).new(0) # Sample 50 dequeues — well within the 200 available per queue. 50.times do result = adapter.dequeue(queue_list) if result dequeue_counts[result.queue.name] = dequeue_counts[result.queue.name] + 1 end end # With weights 10:1, the high-weight queue should be dequeued # significantly more often over a 50-dequeue sample. heavy_count = dequeue_counts.fetch("queued_test_job", 0) light_count = dequeue_counts.fetch("io_queue", 0) assert heavy_count > light_count, "Expected queued_test_job (#{heavy_count}) to be dequeued more than io_queue (#{light_count})" end end it "can be used via the overseer" do clean_slate do adapter = Mosquito::WeightedDequeueAdapter.new({ "queued_test_job" => 5, }) overseer.dequeue_adapter = adapter register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue result = overseer.dequeue_job? refute_nil result if result assert_equal expected_job_run, result.job_run end end end end ================================================ FILE: spec/mosquito/exceptions_spec.cr ================================================ require "../spec_helper" describe "Mosquito exceptions" do it "declares JobFailed" do Mosquito::JobFailed.new "test" end it "declares DoubleRun" do Mosquito::DoubleRun.new "test" end it "declares IrretrievableParameter" do Mosquito::IrretrievableParameter.new "test" end end ================================================ FILE: spec/mosquito/job/job_state_spec.cr ================================================ require "../../spec_helper" describe Mosquito::Job::State do describe "executed?" do it "Marks jobs as executed when they've either succeeded or failed" do assert Mosquito::Job::State::Succeeded.executed? assert Mosquito::Job::State::Failed.executed? end it "Doesn't mark jobs as executed in any other state" do refute Mosquito::Job::State::Initialization.executed? refute Mosquito::Job::State::Running.executed? refute Mosquito::Job::State::Aborted.executed? refute Mosquito::Job::State::Preempted.executed? end end end ================================================ FILE: spec/mosquito/job_run/rescheduling_spec.cr ================================================ require "../../spec_helper" describe "job_run rescheduling" do @failing_job_run : Mosquito::JobRun? getter failing_job_run : Mosquito::JobRun { create_job_run "failing_job" } it "calculates reschedule interval correctly" do intervals = { 1 => 2, 2 => 8, 3 => 18, 4 => 32 } intervals.each do |count, delay| job_run = Mosquito::JobRun.retrieve(failing_job_run.id.not_nil!).not_nil! job_run.run assert_equal delay.seconds, job_run.reschedule_interval end end it "prevents rescheduling a job too many times" do run_job_run = -> do job_run = Mosquito::JobRun.retrieve(failing_job_run.id.not_nil!).not_nil! job_run.run job_run end max_reschedules = 4 max_reschedules.times do job_run = run_job_run.call assert job_run.rescheduleable? end job_run = run_job_run.call refute job_run.rescheduleable? end it "counts retries upon failure" do assert_equal 0, failing_job_run.retry_count failing_job_run.run assert_equal 1, failing_job_run.retry_count end it "updates the backend when a failure happens" do failing_job_run.run saved_job_run = Mosquito::JobRun.retrieve failing_job_run.id.not_nil! assert_equal 1, saved_job_run.not_nil!.retry_count end it "does not reschedule a job which fails with retry=false" do job = FailingJob.new job.fail_with_retry = false job.run refute job.should_retry end describe "preempted jobs" do it "sets state to preempted and does not execute" do job = PreemptingJob.new job.run assert job.preempted? refute job.executed? end it "uses normal backoff when preempted without an until time" do job = PreemptingJob.new job.run assert_equal 2.seconds, job.reschedule_interval(1) assert_equal 8.seconds, job.reschedule_interval(2) end it "uses the until time for reschedule interval when provided" do Timecop.freeze(Time.utc) do future = Time.utc + 30.seconds job = PreemptingJob.new job.preempt_until = future job.run interval = job.reschedule_interval(1) assert_equal 30.seconds, interval end end it "falls back to normal backoff when until time is in the past" do Timecop.freeze(Time.utc) do past = Time.utc - 5.seconds job = PreemptingJob.new job.preempt_until = past job.run assert_equal 2.seconds, job.reschedule_interval(1) end end it "respects rescheduleable? override when preempted" do job = NonReschedulablePreemptingJob.new job.run assert job.preempted? refute job.rescheduleable?(0) end end end ================================================ FILE: spec/mosquito/job_run/running_spec.cr ================================================ require "../../spec_helper" describe "job_run running" do # the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost. def at_beginning_of_millisecond(time) time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds) end it "uses the lookup table to build a job" do job_instance = create_job_run.build_job assert_instance_of JobWithConfig, job_instance end it "populates the variables of a job" do job_instance = create_job_run.build_job assert_instance_of JobWithConfig, job_instance assert_equal job_run_config, job_instance.as(JobWithConfig).config end it "runs the job" do JobWithPerformanceCounter.reset_performance_counter! create_job_run("job_with_performance_counter").run assert_equal 1, JobWithPerformanceCounter.performances end it "sets started_at when a job is run" do now = at_beginning_of_millisecond Time.utc job_run = create_job_run Timecop.freeze now do job_run.run end assert_equal now, job_run.started_at end it "sets finished_at when a job is run" do now = at_beginning_of_millisecond Time.utc job_run = create_job_run Timecop.freeze now do job_run.run end assert_equal now, job_run.finished_at end it "has nil timestamps before a job is run" do job_run = create_job_run assert_nil job_run.started_at assert_nil job_run.finished_at end end ================================================ FILE: spec/mosquito/job_run/storage_spec.cr ================================================ require "../../spec_helper" describe "job_run storage" do getter backend : Mosquito::Backend::Queue = Mosquito.backend.queue("testing") getter config = { "year" => "1752", "name" => "the year september lost 12 days" } getter job_run : Mosquito::JobRun do Mosquito::JobRun.new("mock_job_run").tap do |job_run| job_run.config = config job_run.store end end it "builds the backend key correctly" do assert_equal "mosquito:job_run:1", Mosquito::JobRun.config_key "1" assert_equal "mosquito:job_run:#{job_run.id}", job_run.config_key end it "can store and retrieve a job_run with attributes" do stored_job_run = Mosquito::JobRun.retrieve job_run.id if stored_job_run assert_equal config, stored_job_run.config else flunk "Could not retrieve job_run" end end it "stores job_runs in the backend" do stored_job_run = backend.backend.retrieve Mosquito::JobRun.config_key(job_run.id) stored_config = stored_job_run.reject! %w|type enqueue_time retry_count| assert_equal config, stored_config end it "can delete a job_run" do job_run.delete saved_config = backend.backend.retrieve job_run.config_key assert_empty saved_config end it "can set a timed delete on a job_run" do ttl = 10 job_run.delete(in: ttl) set_ttl = backend.backend.expires_in job_run.config_key assert_equal ttl, set_ttl end it "can reload a job_run" do job_run.reload end describe "timestamp retrieval" do # the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost. def at_beginning_of_millisecond(time) time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds) end it "retrieves started_at and finished_at timestamps" do now = at_beginning_of_millisecond Time.utc job_run = create_job_run Timecop.freeze now do job_run.run end retrieved = Mosquito::JobRun.retrieve job_run.id if retrieved assert_equal now, retrieved.started_at assert_equal now, retrieved.finished_at else flunk "Could not retrieve job_run" end end it "does not include timestamps in config after retrieve" do job_run = create_job_run job_run.run retrieved = Mosquito::JobRun.retrieve job_run.id if retrieved refute retrieved.config.has_key?("started_at") refute retrieved.config.has_key?("finished_at") else flunk "Could not retrieve job_run" end end it "retrieves nil timestamps for unexecuted job runs" do retrieved = Mosquito::JobRun.retrieve job_run.id if retrieved assert_nil retrieved.started_at assert_nil retrieved.finished_at else flunk "Could not retrieve job_run" end end end it "persists overseer_id via claimed_by and retrieves it" do test_overseer = MockOverseer.new job_run.claimed_by test_overseer retrieved = Mosquito::JobRun.retrieve job_run.id assert retrieved assert_equal test_overseer.observer.instance_id, retrieved.not_nil!.overseer_id end it "round-trips overseer_id through store and retrieve" do test_overseer = MockOverseer.new job_run.claimed_by test_overseer job_run.store retrieved = Mosquito::JobRun.retrieve job_run.id assert retrieved assert_equal test_overseer.observer.instance_id, retrieved.not_nil!.overseer_id end end ================================================ FILE: spec/mosquito/job_run_spec.cr ================================================ require "../spec_helper" require "./job_run/*" ================================================ FILE: spec/mosquito/job_spec.cr ================================================ require "../spec_helper" describe Mosquito::Job do getter(passing_job) { PassingJob.new } getter(failing_job) { FailingJob.new } getter(not_implemented_job) { NotImplementedJob.new } getter(throttled_job) { ThrottledJob.new } getter(hooked_job) { JobWithHooks.new } describe "run" do it "captures JobFailed and marks sucess=false" do failing_job.run assert failing_job.failed? end it "sets #executed? and #succeeded?" do refute passing_job.executed? passing_job.run assert passing_job.executed? assert passing_job.succeeded? end it "emits a failure message when #fail contains a reason message" do clear_logs failing_job.run assert failing_job.failed? assert_logs_match failing_job.exception_message end it "exception messages are sent to the logs" do clear_logs failing_job.fail_with_exception = true failing_job.run assert failing_job.failed? assert_logs_match failing_job.exception_message end it "captures and marks failure for other exceptions" do clear_logs assert_nil failing_job.exception failing_job.fail_with_exception = true failing_job.run assert failing_job.failed? refute_nil failing_job.exception end it "sets success=false when #fail-ed" do failing_job.run refute failing_job.succeeded? end it "fails when no perform is implemented, and a messsage is sent to the logs" do clear_logs not_implemented_job.run assert not_implemented_job.failed? assert_logs_match "No job definition found" end end it "fetches the default queue" do assert_equal "passing_job", PassingJob.queue.name end it "fetches the named queue" do assert_equal "io_queue", EchoJob.queue.name end describe "reschedule interval" do it "calculates reschedule interval correctly" do intervals = { 1 => 2, 2 => 8, 3 => 18, 4 => 32 } intervals.each do |count, delay| assert_equal delay.seconds, passing_job.reschedule_interval(count) end end it "allows overriding the reschedule interval" do intervals = 1..4 intervals.each do |count| assert_equal 4.seconds, CustomRescheduleIntervalJob.new.reschedule_interval(count) end end end describe "metadata" do it "returns a metadata instance" do assert_instance_of Mosquito::Metadata, passing_job.metadata end it "is a memoized instance" do one = passing_job.metadata two = passing_job.metadata assert_same one, two end end describe "self.metadata" do it "returns a metadata instance" do assert PassingJob.metadata.is_a?(Mosquito::Metadata) end it "is readonly" do metadata = PassingJob.metadata assert metadata.readonly? end end describe "self.metadata_key" do it "includes the class name" do assert_includes PassingJob.metadata_key, "passing_job" end end describe "before_hooks" do it "should execute hooks" do clear_logs hooked_job.should_fail = false hooked_job.run assert_logs_match "Before Hook Executed" assert_logs_match "2nd Before Hook Executed" assert_logs_match "Perform Executed" end it "should not exec when a before hook fails the job" do clear_logs hooked_job.should_fail = true hooked_job.run assert_logs_match "Before Hook Executed" assert_logs_match "2nd Before Hook Executed" refute_logs_match "Perform Executed" end end describe "after_hooks" do it "should execute `after` hooks" do clear_logs hooked_job.should_fail = false hooked_job.run assert_logs_match "After Hook Executed" assert_logs_match "2nd After Hook Executed" assert_logs_match "Perform Executed" end it "should run the `after` hooks even if a job fails" do clear_logs hooked_job.should_fail = true hooked_job.run assert_logs_match "After Hook Executed" assert_logs_match "2nd After Hook Executed" refute_logs_match "Perform Executed" end end end ================================================ FILE: spec/mosquito/key_builder_spec.cr ================================================ require "../spec_helper" describe Mosquito::KeyBuilder do it "builds keys from tuples" do assert_equal "fizz:buzz", KeyBuilder.build({:fizz, :buzz}) end it "builds keys from strings" do assert_equal "fizz:buzz", KeyBuilder.build("fizz", "buzz") end it "builds keys from an array" do assert_equal "fizz:buzz", KeyBuilder.build(["fizz", "buzz"]) end it "builds keys from integers" do assert_equal "fizz:6", KeyBuilder.build("fizz", 6) end it "builds keys from floats" do assert_equal "2.4:buzz", KeyBuilder.build(2.4, "buzz") end end ================================================ FILE: spec/mosquito/metadata_spec.cr ================================================ require "../spec_helper" describe Mosquito::Metadata do getter(store_name : String) { "test_store#{rand 1000}" } getter(store : Metadata) { Metadata.new store_name } getter(field : String) { "foo#{rand 1000}" } it "increments" do clean_slate do store.increment field value = store[field]? assert_equal "1", value store.increment field value = store[field]? assert_equal "2", value end end it "increments with a configurable amount" do clean_slate do store.increment field value = store[field]?.not_nil! assert_equal "1", value delta = 2 store.increment field, by: delta new_value = store[field]?.not_nil! assert_equal delta, (new_value.to_i - value.to_i) end end it "decrements" do clean_slate do store.decrement field value = store[field]? assert_equal "-1", value store.decrement field value = store[field]? assert_equal "-2", value end end it "dumps to a hash" do clean_slate do expected = { "one" => "1", "two" => "2", "three" => "3" } expected.each { |key, value| store[key] = value } assert_equal expected, store.to_h end end it "can be readonly" do clean_slate do store[field] = "truth" readonly_store = Metadata.new store_name, readonly: true assert_equal "truth", readonly_store[field]? assert_raises RuntimeError do readonly_store[field] = "lies" end end end it "can set and read a value" do clean_slate do store[field] = "truth" assert_equal "truth", store[field]? end end describe "with a hash" do it "can set and read a hash" do clean_slate do store.set({"one" => "1", "two" => "2", "three" => "3"}) assert_equal "1", store["one"]? assert_equal "2", store["two"]? assert_equal "3", store["three"]? end end it "can set a hash and delete a value from the hash" do clean_slate do store.set({"one" => "1", "two" => "2", "three" => "3"}) store.set({"two" => nil, "six" => "6"}) assert_equal "1", store["one"]? assert_equal nil, store["two"]? assert_equal "3", store["three"]? assert_equal "6", store["six"]? end end it "can store string-only values" do clean_slate do values = {"one" => "1", "two" => "2", "three" => "3"} store.set(values) assert_equal "1", store["one"]? assert_equal "2", store["two"]? assert_equal "3", store["three"]? assert_equal values, store.to_h end end end it "can be deleted" do clean_slate do store[field] = "truth" assert_equal "truth", store[field]? store.delete assert_equal nil, Metadata.new(store_name)[field]? end end it "can be deleted with a ttl" do clean_slate do store[field] = "truth" assert_equal "truth", store[field]? store.delete(in: 1.minute) assert_in_epsilon(60, Mosquito.backend.expires_in(store_name)) store.delete end end end ================================================ FILE: spec/mosquito/periodic_job_run_spec.cr ================================================ require "../spec_helper" describe Mosquito::PeriodicJobRun do getter interval : Time::Span = 2.minutes it "tries to execute but fails before the interval has passed" do now = Time.utc.at_beginning_of_second job_run = PeriodicJobRun.new PeriodicTestJob, interval job_run.last_executed_at = now Timecop.freeze(now + 1.minute) do job_run.try_to_execute assert_equal now, job_run.last_executed_at end end it "executes" do now = Time.utc.at_beginning_of_second job_run = PeriodicJobRun.new PeriodicTestJob, interval job_run.last_executed_at = now Timecop.freeze(now + interval) do job_run.try_to_execute assert_equal now + interval, job_run.last_executed_at end end it "checks the metadata store for the last executed timestamp" do now = Time.utc.at_beginning_of_second clean_slate do job_run = PeriodicJobRun.new PeriodicTestJob, interval job_run.last_executed_at = now - 1.minute Timecop.freeze(now) do another_job_run = PeriodicJobRun.new PeriodicTestJob, interval refute another_job_run.try_to_execute end end end it "does not enqueue a second job run when one is already pending" do clean_slate do now = Time.utc.at_beginning_of_second periodic = PeriodicJobRun.new PeriodicTestJob, interval # First execution should enqueue. Timecop.freeze(now) do periodic.last_executed_at = now - interval assert periodic.try_to_execute end queue = PeriodicTestJob.queue first_size = queue.size(include_dead: false) assert first_size > 0, "Expected at least one job in the queue" # Second execution after another interval should be skipped # because the first job run hasn't finished yet. Timecop.freeze(now + interval) do assert periodic.try_to_execute end second_size = queue.size(include_dead: false) assert_equal first_size, second_size end end it "enqueues again after the pending job run finishes" do clean_slate do now = Time.utc.at_beginning_of_second periodic = PeriodicJobRun.new PeriodicTestJob, interval # Enqueue the first job run. Timecop.freeze(now) do periodic.last_executed_at = now - interval periodic.try_to_execute end # Simulate the job finishing by writing finished_at to the backend. pending_id = periodic.metadata["pending_run_id"]? refute_nil pending_id Mosquito.backend.set( Mosquito::JobRun.config_key(pending_id.not_nil!), "finished_at", Time.utc.to_unix_ms.to_s ) queue = PeriodicTestJob.queue size_after_first = queue.size(include_dead: false) # Now a new interval passes — should enqueue since the previous one finished. Timecop.freeze(now + interval) do assert periodic.try_to_execute end size_after_second = queue.size(include_dead: false) assert size_after_second > size_after_first end end it "enqueues again when the pending job run config has been cleaned up" do clean_slate do now = Time.utc.at_beginning_of_second periodic = PeriodicJobRun.new PeriodicTestJob, interval # Enqueue the first job run. Timecop.freeze(now) do periodic.last_executed_at = now - interval periodic.try_to_execute end pending_id = periodic.metadata["pending_run_id"]? refute_nil pending_id # Simulate the job run config being deleted (e.g. TTL expiry). Mosquito.backend.delete Mosquito::JobRun.config_key(pending_id.not_nil!) queue = PeriodicTestJob.queue size_before = queue.size(include_dead: false) # Next interval should enqueue because the old run is gone. Timecop.freeze(now + interval) do assert periodic.try_to_execute end size_after = queue.size(include_dead: false) assert size_after > size_before end end end ================================================ FILE: spec/mosquito/periodic_job_spec.cr ================================================ require "../spec_helper" describe Mosquito::PeriodicJob do getter(runner) { Mosquito::TestableRunner.new } it "correctly renders job_type" do assert_equal "periodic_test_job", PeriodicTestJob.job_type end it "builds a job_run" do job = PeriodicTestJob.new job_run = job.build_job_run assert_instance_of JobRun, job_run assert_equal PeriodicTestJob.job_type, job_run.type end it "is not reschedulable" do refute PeriodicTestJob.new.rescheduleable? end it "registers in job mapping" do assert_equal PeriodicTestJob, Base.job_for_type(PeriodicTestJob.job_type) end it "can be scheduled at a MonthSpan interval" do clean_slate do Mosquito::Base.register_job_mapping MonthlyJob.queue.name, MonthlyJob Mosquito::Base.register_job_interval MonthlyJob, interval: 1.month end end it "schedules itself for an interval" do clean_slate do PeriodicTestJob.run_every 2.minutes scheduled_job_run = Base.scheduled_job_runs.first assert_equal PeriodicTestJob, scheduled_job_run.class assert_equal 2.minutes, scheduled_job_run.interval end end end ================================================ FILE: spec/mosquito/queue_spec.cr ================================================ require "../spec_helper" describe Queue do getter(name) { "test#{rand(1000)}" } getter(test_queue) do Mosquito::Queue.new(name) end @job_run : Mosquito::JobRun? getter(job_run) do Mosquito::JobRun.new("mock_job_run").tap(&.store) end getter backend : Mosquito::Backend::Queue do TestHelpers.backend.queue name end describe "config_key" do it "defaults to name" do name = "random_name" assert_equal name, Mosquito::Queue.new(name).config_key end end describe "flush" do it "purges all of the queue entries" do job_runs = (1..4).map do Mosquito::JobRun.new("mock_job_run").tap do |job_run| job_run.store test_queue.enqueue job_run end end assert_equal job_runs.size, test_queue.size test_queue.flush assert_equal 0, test_queue.size end end describe "enqueue" do it "adds the queue name to the list of queues" do clean_slate do test_queue.enqueue job_run assert_includes Mosquito.backend.list_queues, test_queue.name end end it "can enqueue a job_run for immediate processing" do clean_slate do test_queue.enqueue job_run job_run_ids = backend.list_waiting assert_includes job_run_ids, job_run.id end end it "can enqueue a job_run with a relative time" do Timecop.freeze(Time.utc) do clean_slate do offset = 3.seconds timestamp = offset.from_now test_queue.enqueue job_run, in: offset stored_time = backend.scheduled_job_run_time job_run assert_equal Time.unix_ms(timestamp.to_unix_ms), stored_time end end end it "can enqueue a job_run at a specific time" do Timecop.freeze(Time.utc) do clean_slate do timestamp = 3.seconds.from_now test_queue.enqueue job_run, at: timestamp stored_time = backend.scheduled_job_run_time job_run assert_equal Time.unix_ms(timestamp.to_unix_ms), stored_time end end end end describe "dequeue" do it "moves a job_run from waiting to pending on dequeue" do test_queue.enqueue job_run stored_job_run = test_queue.dequeue assert_equal job_run.id, stored_job_run.not_nil!.id pending_job_runs = backend.list_pending assert_includes pending_job_runs, job_run.id end it "dequeues job_runs which have been scheduled for a time that has passed" do job_run1 = job_run job_run2 = Mosquito::JobRun.new("mock_job_run").tap do |job_run| job_run.store end Timecop.freeze(Time.utc) do past = 1.minute.ago future = 1.minute.from_now test_queue.enqueue job_run1, at: past test_queue.enqueue job_run2, at: future end # check to make sure only job_run1 was dequeued overdue_job_runs = test_queue.dequeue_scheduled assert_equal 1, overdue_job_runs.size assert_equal job_run1.id, overdue_job_runs.first.id # check to make sure job_run2 is still scheduled scheduled_job_runs = backend.list_scheduled refute_includes scheduled_job_runs, job_run1.id assert_includes scheduled_job_runs, job_run2.id end end it "can forget about a pending job_run" do test_queue.enqueue job_run test_queue.dequeue pending_job_runs = backend.list_pending assert_includes pending_job_runs, job_run.id test_queue.forget job_run pending_job_runs = backend.list_pending refute_includes pending_job_runs, job_run.id end describe "banish" do it "can banish a pending job_run, adding it to the dead q" do test_queue.enqueue job_run test_queue.dequeue pending_job_runs = backend.list_pending assert_includes pending_job_runs, job_run.id test_queue.banish job_run pending_job_runs = backend.list_pending refute_includes pending_job_runs, job_run.id dead_job_runs = backend.list_dead assert_includes dead_job_runs, job_run.id end end describe "pause" do it "is not paused by default" do refute test_queue.paused? end it "can be paused" do test_queue.pause assert test_queue.paused? end it "can be resumed" do test_queue.pause assert test_queue.paused? test_queue.resume refute test_queue.paused? end it "prevents dequeue when paused" do test_queue.enqueue job_run test_queue.pause result = test_queue.dequeue assert_nil result # job_run should still be in waiting, not moved to pending waiting_job_runs = backend.list_waiting assert_includes waiting_job_runs, job_run.id pending_job_runs = backend.list_pending refute_includes pending_job_runs, job_run.id end it "allows dequeue after resume" do test_queue.enqueue job_run test_queue.pause assert_nil test_queue.dequeue test_queue.resume stored_job_run = test_queue.dequeue assert_equal job_run.id, stored_job_run.not_nil!.id end it "still allows enqueue while paused" do test_queue.pause test_queue.enqueue job_run waiting_job_runs = backend.list_waiting assert_includes waiting_job_runs, job_run.id end it "can be paused with a duration" do test_queue.pause for: 60.seconds assert test_queue.paused? end it "does not affect other queues" do other_queue = Mosquito::Queue.new("other_#{name}") other_job_run = Mosquito::JobRun.new("mock_job_run").tap(&.store) test_queue.pause other_queue.enqueue other_job_run assert_nil test_queue.dequeue stored = other_queue.dequeue assert_equal other_job_run.id, stored.not_nil!.id end end end ================================================ FILE: spec/mosquito/queued_job_spec.cr ================================================ require "../spec_helper" describe Mosquito::QueuedJob do getter(runner) { Mosquito::TestableRunner.new } getter(name) { "test#{rand(1000)}" } getter(job : QueuedTestJob) { QueuedTestJob.new } getter(queue : Queue) { QueuedTestJob.queue } getter(queue_hooked_job : QueueHookedTestJob) { QueueHookedTestJob.new } describe "enqueue" do it "enqueues" do clean_slate do job_run = job.enqueue enqueued = queue.backend.list_waiting assert_equal [job_run.id], enqueued end end it "enqueues with a delay" do clean_slate do job_run = job.enqueue in: 1.minute enqueued = queue.backend.list_scheduled assert_equal [job_run.id], enqueued end end it "enqueues with a target time" do clean_slate do job_run = job.enqueue at: 1.minute.from_now enqueued = queue.backend.list_scheduled assert_equal [job_run.id], enqueued end end it "fires before_enqueue_hook" do clean_slate do job_run = queue_hooked_job.enqueue assert queue_hooked_job.before_hook_ran end end it "doesnt enqueue if before_enqueue_hook fails" do clean_slate do queue_hooked_job.fail_before_hook = true job_run = queue_hooked_job.enqueue waiting_q = queue.backend.list_waiting assert_empty waiting_q end end it "fires after_enqueue_hook" do clean_slate do job_run = queue_hooked_job.enqueue assert queue_hooked_job.after_hook_ran end end it "passes the job config to the before_enqueue_hook" do clean_slate do job_run = queue_hooked_job.enqueue assert_equal job_run, queue_hooked_job.passed_job_config end end it "passes the job config to the after_enqueue_hook" do clean_slate do job_run = queue_hooked_job.enqueue assert_equal job_run, queue_hooked_job.passed_job_config end end end describe "parameters" do it "can be passed in" do clear_logs EchoJob.new("quack").perform assert_logs_match "quack" end it "can have a boolean false passed as a parameter (and it's not assumed to be a nil)" do clear_logs JobWithHooks.new(false).perform assert_includes logs, "Perform Executed" end it "can be omitted" do clean_slate do clear_logs job = JobWithNoParams.new.perform assert_includes logs, "no param job performed" end end end end ================================================ FILE: spec/mosquito/rate_limiter_spec.cr ================================================ require "../spec_helper" describe Mosquito::RateLimiter do describe "RateLimiter.rate_limit_stats" do it "provides the state and configuration of the limiter" do clean_slate do stats = RateLimitedJob.rate_limit_stats assert stats.has_key? :interval assert stats.has_key? :key assert stats.has_key? :increment assert stats.has_key? :limit assert stats.has_key? :window_start assert stats.has_key? :run_count end end it "defaults the window_start" do clean_slate do assert_equal Time::UNIX_EPOCH, RateLimitedJob.rate_limit_stats[:window_start] now = Time.utc.at_beginning_of_second RateLimitedJob.metadata["window_start"] = now.to_unix.to_s assert_equal now, RateLimitedJob.rate_limit_stats[:window_start] end end it "defaults the run_count" do clean_slate do assert_equal 0, RateLimitedJob.rate_limit_stats[:run_count] run_count = 27 RateLimitedJob.metadata["run_count"] = run_count.to_s assert_equal run_count, RateLimitedJob.rate_limit_stats[:run_count] end end end describe "RateLimiter.metadata" do it "provides an instance of the metadata store" do assert_instance_of Metadata, RateLimitedJob.metadata end end describe "RateLimiter.rate_limit_key" do it "provides the metadata key for this class" do assert_equal "mosquito:rate_limit:rate_limit", RateLimitedJob.rate_limit_key end end describe "job counting" do it "increments the count when a job is run" do clean_slate do RateLimitedJob.new.run count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i RateLimitedJob.new.run new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i assert_equal 1, new_count - count end end it "doesnt increment the count when a job is not run" do clean_slate do RateLimitedJob.new(should_fail: false).run count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i RateLimitedJob.new(should_fail: true).run new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i assert_equal count, new_count end end it "increments the count by a configurable number" do clean_slate do delta = 2 RateLimitedJob.new.run count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i RateLimitedJob.new(increment: delta).run new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i assert_equal delta, new_count - count end end it "resets the count when the window is over" do clean_slate do metadata = RateLimitedJob.metadata metadata["run_count"] = "45" metadata["window_start"] = Time::UNIX_EPOCH.to_unix.to_s RateLimitedJob.new.run count = RateLimitedJob.metadata["run_count"]? assert_equal "1", count end end it "counts multiple jobs with the same key in the same bucket" do clean_slate do metadata = RateLimitedJob.metadata metadata["window_start"] = Time.utc.to_unix.to_s RateLimitedJob.new.run count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i SecondRateLimitedJob.new.run new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i assert_equal RateLimitedJob.rate_limit_key, SecondRateLimitedJob.rate_limit_key assert_equal 1, new_count - count end end end describe "job preempting" do it "doesnt prevent excution if the rate limit count is less than zero" do metadata = RateLimitedJob.metadata metadata["run_count"] = "-1" metadata["window_start"] = Time.utc.to_unix.to_s job = RateLimitedJob.new job.run assert job.executed? end it "prevents a job from executing when the limit is reached" do metadata = RateLimitedJob.metadata metadata["run_count"] = Int32::MAX.to_s metadata["window_start"] = Time.utc.to_unix.to_s job = RateLimitedJob.new job.run refute job.executed? assert job.preempted? end it "allows a job to execute when the limit hasn't been reached" do metadata = RateLimitedJob.metadata metadata["window_start"] = Time.utc.to_unix.to_s metadata["run_count"] = "3" job = RateLimitedJob.new job.run assert job.executed? end it "allows a job to execute when the limit has been reached but the window is over" do metadata = RateLimitedJob.metadata metadata["run_count"] = Int32::MAX.to_s metadata["window_start"] = Time::UNIX_EPOCH.to_unix.to_s job = RateLimitedJob.new job.run assert job.executed? end end end ================================================ FILE: spec/mosquito/resource_gate_spec.cr ================================================ require "../spec_helper" describe "Mosquito::OpenGate" do it "always allows" do gate = Mosquito::OpenGate.new assert gate.allow? end end describe "Mosquito::ThresholdGate" do it "allows when metric is below threshold" do gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 50.0 } assert gate.allow? end it "blocks when metric is at or above threshold" do gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 85.0 } refute gate.allow? end it "blocks when metric equals threshold" do gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 80.0 } refute gate.allow? end end describe "Mosquito::ResourceGate caching" do it "caches the check result within TTL" do call_count = 0 gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 5.seconds) do call_count += 1 50.0 end now = Time.utc Timecop.freeze(now) do gate.allow? gate.allow? gate.allow? assert_equal 1, call_count end end it "re-checks after TTL expires" do call_count = 0 gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 5.seconds) do call_count += 1 50.0 end now = Time.utc Timecop.freeze(now) do gate.allow? assert_equal 1, call_count end Timecop.freeze(now + 3.seconds) do gate.allow? assert_equal 1, call_count, "Should still be cached at 3s" end Timecop.freeze(now + 6.seconds) do gate.allow? assert_equal 2, call_count, "Should re-check after 6s (past 5s TTL)" end end end ================================================ FILE: spec/mosquito/runnable_spec.cr ================================================ require "../spec_helper" class Namespace::ConcreteRunnable include Mosquito::Runnable getter first_run_notifier = Channel(Bool).new getter first_run = true property state : Mosquito::Runnable::State # Testing wedge which calls: run, waits for a run to happen, and then calls stop. def test_run : Nil run first_run_notifier.receive stop.wait end def runnable_name : String "concrete_runnable" end def each_run : Nil if first_run @first_run = false first_run_notifier.send true end Fiber.yield end def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup first_run_notifier.close super(wait_group) end end describe Mosquito::Runnable do let(:runnable) { Namespace::ConcreteRunnable.new } it "builds a my_name" do assert_equal "namespace.concrete_runnable.#{runnable.object_id}", runnable.my_name end describe "run" do it "should log a startup message" do clear_logs runnable.test_run assert_logs_match "mosquito.concrete_runnable", "starting" end it "should log a finished message" do clear_logs runnable.test_run assert_logs_match "mosquito.concrete_runnable", "stopped" end end describe "stop" do it "should set the stopping flag" do runnable.state = Mosquito::Runnable::State::Working runnable.stop assert_equal Mosquito::Runnable::State::Stopping, runnable.state end it "should set the finished flag" do runnable.test_run assert_equal Mosquito::Runnable::State::Finished, runnable.state end end end ================================================ FILE: spec/mosquito/runners/coordinator_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::Runners::Coordinator" do getter(queue : Queue) { test_job.class.queue } getter(test_job) { QueuedTestJob.new } getter(queue_list) { MockQueueList.new } getter(coordinator) { MockCoordinator.new queue_list } getter(enqueue_time) { Time.utc } def enqueue_job_run : JobRun queue_list.discovered_queues << queue job_run = JobRun.new "blah" Timecop.freeze enqueue_time do |t| job_run = test_job.enqueue in: 3.seconds end assert_includes queue.backend.list_scheduled, job_run.id job_run end def opt_in_to_locking Mosquito.temp_config(use_distributed_lock: true) do Mosquito.backend.delete Mosquito.backend.build_key(:coordinator, :leadership_lock) yield Mosquito.backend.delete Mosquito.backend.build_key(:coordinator, :leadership_lock) end end describe "only_if_coordinator" do getter(coordinator1) { Mosquito::Runners::Coordinator.new queue_list } getter(coordinator2) { Mosquito::Runners::Coordinator.new queue_list } it "gets a lock from the backend" do opt_in_to_locking do gotten = false coordinator1.only_if_coordinator do gotten = true end assert gotten end end it "fails to get a lock from the backend" do opt_in_to_locking do gotten = false coordinator1.only_if_coordinator do coordinator2.only_if_coordinator do gotten = true end end refute gotten end end it "releases the lock when release_leadership_lock is called" do opt_in_to_locking do gotten = false coordinator1.only_if_coordinator do end coordinator1.release_leadership_lock coordinator2.only_if_coordinator do gotten = true end assert gotten end end it "sets a ttl on the lock" do opt_in_to_locking do coordinator1.only_if_coordinator do assert Mosquito.backend.expires_in(coordinator.lock_key) > 0 end end end it "retains leadership across calls" do opt_in_to_locking do count = 0 3.times do coordinator1.only_if_coordinator do count += 1 end end assert_equal 3, count assert coordinator1.is_leader? end end it "yields without locking when distributed lock is disabled" do Mosquito.temp_config(use_distributed_lock: false) do gotten = false coordinator1.only_if_coordinator do gotten = true end assert gotten end end end describe "enqueue_periodic_jobs" do it "enqueues a scheduled job_run at the appropriate time" do clean_slate do queue = PeriodicTestJob.queue Mosquito::Base.register_job_mapping PeriodicTestJob.name, PeriodicTestJob Mosquito::Base.register_job_interval PeriodicTestJob, interval: 1.second Timecop.freeze(enqueue_time) do coordinator.enqueue_periodic_jobs end queued_job_runs = queue.backend.list_waiting assert queued_job_runs.size >= 1 last_job_run = queued_job_runs.last job_run_metadata = Mosquito.backend.retrieve JobRun.config_key(last_job_run) assert_equal enqueue_time.to_unix_ms.to_s, job_run_metadata["enqueue_time"] end end end describe "enqueue_delayed_jobs" do it "enqueues a delayed job_run when it's ready" do clean_slate do job_run = enqueue_job_run run_time = enqueue_time + 3.seconds Timecop.freeze run_time do |t| coordinator.enqueue_delayed_jobs end queued_job_runs = queue.backend.list_waiting assert_includes queued_job_runs, job_run.id last_job_run = queued_job_runs.last job_run_metadata = Mosquito.backend.retrieve JobRun.config_key(last_job_run) assert_equal queue.name, job_run_metadata["type"]? end end it "doesn't enqueue job_runs that arent ready yet" do clean_slate do job_run = enqueue_job_run check_time = enqueue_time + 2.999.seconds Timecop.freeze check_time do |t| coordinator.enqueue_delayed_jobs end queued_job_runs = queue.backend.list_waiting # does not deschedule and enqueue anything assert_equal 0, queued_job_runs.size end end it "logs when it finds delayed job_runs" do clean_slate do clear_logs enqueue_job_run Timecop.freeze enqueue_time + 3.seconds do |t| coordinator.enqueue_delayed_jobs end assert_logs_match "1 delayed jobs ready" end end end end ================================================ FILE: spec/mosquito/runners/executor_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::Runners::Executor" do getter(queue_list) { MockQueueList.new } getter(overseer) { MockOverseer.new } getter(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) } getter(api) { Mosquito::Api::Executor.new executor.object_id.to_s } getter(coordinator) { Mosquito::Runners::Coordinator.new queue_list } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.discovered_queues << job_class.queue end def run_job(job_class : Mosquito::Job.class) register job_class job_class.reset_performance_counter! job_run = job_class.new.enqueue executor.work_unit = WorkUnit.of(job_run, from: job_class.queue) executor.execute end describe "status" do it "starts as starting" do assert_equal Runnable::State::Starting, executor.state end it "broadcasts a ping when transitioning to idle" do executor.state = Runnable::State::Idle select when overseer.finished_notifier.receive assert true when timeout(0.5.seconds) refute true, "Timed out waiting for idle notifier" end end it "goes idle in pre_run" do executor.pre_run assert_equal Runnable::State::Idle, executor.state end end describe "running jobs" do it "runs a job from a queue" do clean_slate do run_job QueuedTestJob assert_equal 1, QueuedTestJob.performances end end it "reschedules a job that failed" do clean_slate do register FailingJob now = Time.utc job = FailingJob.new job_run = job.build_job_run job_run.store FailingJob.queue.enqueue job_run Timecop.freeze now do executor.work_unit = WorkUnit.of(job_run, from: FailingJob.queue) executor.execute end job_run.reload assert_equal 1, job_run.retry_count Timecop.freeze now + job.reschedule_interval(1) do coordinator.enqueue_delayed_jobs executor.work_unit = WorkUnit.of(job_run, from: FailingJob.queue) executor.execute end job_run.reload assert_equal 2, job_run.retry_count end end it "schedules deletion of a job_run that hard failed" do clean_slate do register NonReschedulableFailingJob job = NonReschedulableFailingJob.new job_run = job.build_job_run job_run.store NonReschedulableFailingJob.queue.enqueue job_run executor.work_unit = WorkUnit.of(job_run, from: NonReschedulableFailingJob.queue) executor.execute actual_ttl = backend.expires_in job_run.config_key assert_equal executor.failed_job_ttl, actual_ttl end end it "purges a successful job_run from the backend" do clean_slate do register QueuedTestJob job = QueuedTestJob.new job_run = job.build_job_run job_run.store QueuedTestJob.queue.enqueue job_run executor.work_unit = WorkUnit.of(job_run, from: QueuedTestJob.queue) executor.execute assert_logs_match "Success" QueuedTestJob.queue.enqueue job_run actual_ttl = Mosquito.backend.expires_in job_run.config_key assert_equal executor.successful_job_ttl, actual_ttl end end it "doesnt reschedule a job that cant be rescheduled" do clean_slate do run_job NonReschedulableFailingJob assert_logs_match "cannot be rescheduled" end end it "tells the observer what it's working on" do SleepyJob.should_sleep = true job = SleepyJob.new job_run = job.build_job_run job_run.store job_started = Channel(Bool).new job_finished = Channel(Bool).new # Eagerly evaluate to avoid race condition in lazy # getter initialization across fibers. executor api spawn { executor.work_unit = WorkUnit.of(job_run, from: SleepyJob.queue) executor.execute job_finished.send true } spawn { loop { break if api.current_job } assert_equal job_run.id, api.current_job assert_equal SleepyJob.queue.name, api.current_job_queue job_started.send true } select when job_started.receive when timeout(0.5.seconds) refute true, "Timed out waiting for job to start" end SleepyJob.should_sleep = false select when job_finished.receive when timeout(0.5.seconds) refute true, "Timed out waiting for job to finish" end assert_nil api.current_job, "Job should be cleared after finishing" assert_nil api.current_job_queue, "Queue should be cleared after finishing" end end describe "logs success/failures messages" do it "logs a success message when the job succeeds" do clean_slate do run_job QueuedTestJob assert_logs_match "Success" end end it "logs a failure message when the job fails" do clean_slate do run_job FailingJob assert_logs_match "Failure" end end end describe "job timing messages" do it "logs the time a job took to run" do clean_slate do run_job QueuedTestJob assert_logs_match "and took" end end it "logs the time a job took to run when the job fails" do clean_slate do run_job FailingJob assert_logs_match "taking" end end end describe "start and finish messages" do it "logs the job run start message" do clean_slate do run_job QueuedTestJob assert_logs_match "Starting: queued_test_job" end end end end ================================================ FILE: spec/mosquito/runners/overseer_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::Runners::Overseer" do getter(overseer : MockOverseer) { MockOverseer.new } getter(queue_list : MockQueueList ) { overseer.queue_list.as(MockQueueList) } getter(coordinator : MockCoordinator ) { overseer.coordinator.as(MockCoordinator) } getter(executor : MockExecutor) { overseer.executors.first.as(MockExecutor) } def register(job_class : Mosquito::Job.class) Mosquito::Base.register_job_mapping job_class.name.underscore, job_class queue_list.discovered_queues << job_class.queue end def run_job(job_class : Mosquito::Job.class) register job_class job_class.reset_performance_counter! job_run = job_class.new.enqueue executor.execute job_run, from_queue: job_class.queue end describe "pre_run" do it "runs all executors" do overseer.executors.each do |executor| assert_equal Runnable::State::Starting, executor.state end overseer.pre_run overseer.executors.each do |executor| assert_equal Runnable::State::Working, executor.state end end end describe "post_run" do it "stops all executors" do overseer.executors.each(&.run) overseer.post_run overseer.executors.each do |executor| assert_equal Runnable::State::Finished, executor.state end end it "logs messages about stopping the executors" do clear_logs overseer.pre_run overseer.post_run assert_logs_match "Stopping executors." assert_logs_match "All executors stopped." end end describe "each_run" do it "dequeues a job and dispatches it to the pipeline" do clean_slate do register QueuedTestJob expected_job_run = QueuedTestJob.new.enqueue overseer.work_handout = Channel(WorkUnit).new queue_list.state = Runnable::State::Working executor.state = Runnable::State::Idle # each_run will block until there's a receiver on the channel spawn { overseer.each_run } result = overseer.work_handout.receive assert_equal expected_job_run, result.job_run assert_equal QueuedTestJob.queue, result.queue end end it "waits #idle_wait before checking the queue again" do clean_slate do # an idle executor, but no jobs in the queue executor.state = Runnable::State::Idle queue_list.state = Runnable::State::Working tick_time = Time.measure do overseer.each_run end assert tick_time >= overseer.idle_wait, "Expected to wait at least #{overseer.idle_wait}, but only waited #{tick_time}" end end it "triggers the scheduler" do assert_equal 0, coordinator.schedule_count overseer.each_run assert_equal 1, coordinator.schedule_count end end describe "dequeue_job? stamps overseer_id" do it "claims the job run with the overseer's instance id on dequeue" do clean_slate do register QueuedTestJob job_run = QueuedTestJob.new.enqueue queue_list.state = Runnable::State::Working result = overseer.dequeue_job? assert result assert_equal overseer.observer.instance_id, result.not_nil!.job_run.overseer_id end end end describe "remote executor count" do it "applies the remote executor count on each_run" do clean_slate do Mosquito.configuration.overseer_id = "test-worker" Mosquito::Api.set_executor_count(3, overseer_id: "test-worker") queue_list.state = Runnable::State::Working overseer.each_run assert_equal 3, overseer.executor_count ensure Mosquito.configuration.overseer_id = nil end end it "prefers per-overseer count over global" do clean_slate do Mosquito.configuration.overseer_id = "test-worker" Mosquito::Api.set_executor_count(10) Mosquito::Api.set_executor_count(2, overseer_id: "test-worker") queue_list.state = Runnable::State::Working overseer.each_run assert_equal 2, overseer.executor_count ensure Mosquito.configuration.overseer_id = nil end end it "falls back to global when no per-overseer count is set" do clean_slate do Mosquito.configuration.overseer_id = "test-worker" Mosquito::Api.set_executor_count(7) queue_list.state = Runnable::State::Working overseer.each_run assert_equal 7, overseer.executor_count ensure Mosquito.configuration.overseer_id = nil end end it "does not change executor_count when no remote value is set" do clean_slate do original_count = overseer.executor_count queue_list.state = Runnable::State::Working overseer.each_run assert_equal original_count, overseer.executor_count end end it "clamps an invalid remote executor count of 0 to 1" do clean_slate do Mosquito.configuration.overseer_id = "test-worker" Mosquito::Api.set_executor_count(0, overseer_id: "test-worker") queue_list.state = Runnable::State::Working overseer.each_run assert_equal 1, overseer.executor_count ensure Mosquito.configuration.overseer_id = nil end end end describe "cleanup_orphaned_pending_jobs" do it "recovers a pending job whose overseer is dead" do clean_slate do register QueuedTestJob # Use a separate overseer that won't be registered as alive. dead_overseer = MockOverseer.new job = QueuedTestJob.new job_run = job.build_job_run job_run.store QueuedTestJob.queue.enqueue job_run QueuedTestJob.queue.dequeue job_run.claimed_by dead_overseer # Verify job is stuck in pending assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id assert_equal 0, job_run.retry_count # Register only the *live* overseer Mosquito.backend.register_overseer overseer.observer.instance_id # Run cleanup — dead_overseer's id won't be in the active set overseer.cleanup_orphaned_pending_jobs # Job should be removed from pending and rescheduled assert_empty QueuedTestJob.queue.backend.list_pending assert_includes QueuedTestJob.queue.backend.list_scheduled, job_run.id # Retry count should be incremented job_run.reload assert_equal 1, job_run.retry_count end end it "does not touch pending jobs from a live overseer" do clean_slate do register QueuedTestJob job = QueuedTestJob.new job_run = job.build_job_run job_run.store QueuedTestJob.queue.enqueue job_run QueuedTestJob.queue.dequeue # Claim with the live overseer Mosquito.backend.register_overseer overseer.observer.instance_id job_run.claimed_by overseer assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id overseer.cleanup_orphaned_pending_jobs # Job should still be in pending — its overseer is alive assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id end end it "claims unclaimed pending jobs without recovering them" do clean_slate do register QueuedTestJob job = QueuedTestJob.new job_run = job.build_job_run job_run.store QueuedTestJob.queue.enqueue job_run QueuedTestJob.queue.dequeue # No claim — simulates a job from before this feature assert_nil job_run.overseer_id assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id Mosquito.backend.register_overseer overseer.observer.instance_id overseer.cleanup_orphaned_pending_jobs # Job should still be in pending (not recovered) assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id # But it should now be claimed by this overseer job_run.reload assert_equal overseer.observer.instance_id, job_run.overseer_id end end it "banishes an orphaned job that has exhausted retries" do clean_slate do register QueuedTestJob dead_overseer = MockOverseer.new # Create a job_run with retry_count=4 so the next failure (count=5) # exceeds the default rescheduleable? limit of < 5. job_run = Mosquito::JobRun.new("queued_test_job", retry_count: 4) job_run.store QueuedTestJob.queue.enqueue job_run QueuedTestJob.queue.dequeue job_run.claimed_by dead_overseer assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id Mosquito.backend.register_overseer overseer.observer.instance_id overseer.cleanup_orphaned_pending_jobs # Job should be removed from pending and moved to dead assert_empty QueuedTestJob.queue.backend.list_pending assert_empty QueuedTestJob.queue.backend.list_waiting assert_empty QueuedTestJob.queue.backend.list_scheduled assert_includes QueuedTestJob.queue.backend.list_dead, job_run.id end end end end ================================================ FILE: spec/mosquito/runners/queue_list_spec.cr ================================================ require "../../spec_helper" describe "Mosquito::Runners::QueueList" do getter(queue_list) { MockQueueList.new } def enqueue_jobs PassingJob.new.enqueue FailingJob.new.enqueue EchoJob.new(text: "hello world").enqueue end describe "each_run" do it "returns a list of queues" do clean_slate do enqueue_jobs queue_list.each_run assert_equal ["failing_job", "io_queue", "passing_job"], queue_list.queues.map(&.name).sort end end it "logs a message about the number of fetched queues" do clean_slate do clear_logs enqueue_jobs queue_list.each_run assert_logs_match "found 3 new queues" end end end describe "queue filtering" do it "filters the list of queues when a whitelist is present" do clean_slate do enqueue_jobs Mosquito.temp_config(run_from: ["io_queue", "passing_job"]) do queue_list.each_run end end assert_equal ["io_queue", "passing_job"], queue_list.queues.map(&.name).sort end it "logs an error when all queues are filtered out" do clean_slate do enqueue_jobs Mosquito.temp_config(run_from: ["test4"]) do queue_list.each_run end assert_logs_match "No watchable queues found." end end it "doesnt log an error when no queues are present" do clean_slate do queue_list.each_run refute_logs_match "No watchable queues found." end end end describe "paused queue filtering" do it "excludes paused queues from the queue list" do clean_slate do enqueue_jobs Mosquito::Queue.new("passing_job").pause queue_list.each_run assert_equal ["failing_job", "io_queue"], queue_list.queues.map(&.name).sort end end it "logs a message about paused queues" do clean_slate do clear_logs enqueue_jobs Mosquito::Queue.new("passing_job").pause queue_list.each_run assert_logs_match "1 paused queues: passing_job" end end it "includes queues again after they are resumed" do clean_slate do enqueue_jobs q = Mosquito::Queue.new("passing_job") q.pause queue_list.each_run refute_includes queue_list.queues.map(&.name), "passing_job" q.resume queue_list.each_run assert_includes queue_list.queues.map(&.name), "passing_job" end end end describe "resource gate filtering" do it "excludes queues whose gate blocks" do clean_slate do enqueue_jobs queue_list.each_run gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 } queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)} refute_includes queue_list.queues.map(&.name), "passing_job" assert_includes queue_list.queues.map(&.name), "failing_job" assert_includes queue_list.queues.map(&.name), "io_queue" end end it "includes queues whose gate allows" do clean_slate do enqueue_jobs queue_list.each_run gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 50.0 } queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)} assert_includes queue_list.queues.map(&.name), "passing_job" end end it "ungated queues are always included" do clean_slate do enqueue_jobs queue_list.each_run gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 } queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)} assert_equal 2, queue_list.queues.size end end it "multiple queues can share a gate" do clean_slate do enqueue_jobs queue_list.each_run gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 } queue_list.resource_gates = { "passing_job" => gate.as(Mosquito::ResourceGate), "failing_job" => gate.as(Mosquito::ResourceGate), } assert_equal ["io_queue"], queue_list.queues.map(&.name) end end it "gate state is evaluated on each access" do clean_slate do enqueue_jobs queue_list.each_run value = 90.0 gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { value } queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)} refute_includes queue_list.queues.map(&.name), "passing_job" value = 50.0 assert_includes queue_list.queues.map(&.name), "passing_job" end end it "returns all queues when no gates are configured" do clean_slate do enqueue_jobs queue_list.each_run assert_equal 3, queue_list.queues.size end end end end ================================================ FILE: spec/mosquito/runners/run_at_most_spec.cr ================================================ require "../../spec_helper" class RunsAtMostMock include Mosquito::Runners::RunAtMost def yield_once_a_second(&block) run_at_most every: 1.second, label: :testing do |t| yield end end end describe "Mosquito::yielder#run_at_most" do getter(yielder) { RunsAtMostMock.new } it "prevents throttled blocks from running too often" do count = 0 2.times do yielder.yield_once_a_second do count += 1 end end assert_equal 1, count end it "allows throttled blocks to run only after enough time has passed" do count = 0 moment = Time.utc yielder incrementy = ->() do yielder.yield_once_a_second do count += 1 end end # Should increment Timecop.freeze moment do |time| incrementy.call end # Should not increment # Move ahead 0.999 seconds Timecop.freeze(moment + 999.milliseconds) do |time| incrementy.call end assert_equal 1, count # Should increment # Move ahead the rest of the second moment += 1.1.seconds Timecop.freeze(moment) do |time| incrementy.call end assert_equal 2, count # Should not increment # Try again and it shouldn't increment Timecop.freeze(moment) do |time| incrementy.call end assert_equal 2, count end end ================================================ FILE: spec/mosquito/serializers/primitive_serializers_spec.cr ================================================ require "uuid" require "../../spec_helper" class PrimitiveSerializerTester extend Mosquito::Serializers::Primitives end describe Mosquito::Serializers::Primitives do it "serializes uuids" do uuid = UUID.random assert_equal uuid, UUID.new(PrimitiveSerializerTester.serialize_uuid(uuid)) end it "deserializes uuids" do uuid = UUID.random.to_s assert_equal uuid, PrimitiveSerializerTester.deserialize_uuid(uuid).to_s end end ================================================ FILE: spec/mosquito/testing_backend_spec.cr ================================================ require "../spec_helper" describe Mosquito::TestBackend do def latest_enqueued_job Mosquito::TestBackend.enqueued_jobs.last end it "holds a copy of jobs which have been enqueued" do Mosquito.temp_config(backend: Mosquito::TestBackend.new) do QueuedTestJob.new.enqueue assert_equal QueuedTestJob, latest_enqueued_job.klass end end it "embeds job parameters" do Mosquito.temp_config(backend: Mosquito::TestBackend.new) do EchoJob.new(text: "hello world").enqueue assert_equal "hello world", latest_enqueued_job.config["text"] end end it "hold the job id" do Mosquito.temp_config(backend: Mosquito::TestBackend.new) do job_run = QueuedTestJob.new.enqueue assert_equal job_run.id, latest_enqueued_job.id end end it "has a list of job runs which can be emptied" do Mosquito.temp_config(backend: Mosquito::TestBackend.new) do Mosquito::TestBackend.flush_enqueued_jobs! job_run = EchoJob.new(text: "hello world").enqueue assert_equal job_run.id, latest_enqueued_job.id Mosquito::TestBackend.flush_enqueued_jobs! assert Mosquito::TestBackend.enqueued_jobs.empty? end end end ================================================ FILE: spec/mosquito/unique_job_spec.cr ================================================ require "../spec_helper" describe Mosquito::UniqueJob do describe "first enqueue" do it "enqueues a job when no duplicate exists" do clean_slate do job = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job_run = job.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal [job_run.id], enqueued end end end describe "duplicate suppression" do it "prevents a second enqueue with the same parameters" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job_run1 = job1.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job_run2 = job2.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 1, enqueued.size end end it "allows enqueue with different parameters" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job1.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueTestJob.new(user_id: 2_i64, email_type: "welcome") job2.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 2, enqueued.size end end it "allows enqueue with different parameter values" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job1.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "reminder") job2.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 2, enqueued.size end end end describe "key filtering" do it "considers only specified key fields for uniqueness" do clean_slate do # Same user_id, different message — should be suppressed because # key is only [:user_id] job1 = UniqueWithKeyJob.new(user_id: 1_i64, message: "hello") job1.enqueue enqueued = UniqueWithKeyJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueWithKeyJob.new(user_id: 1_i64, message: "world") job2.enqueue enqueued = UniqueWithKeyJob.queue.backend.list_waiting assert_equal 1, enqueued.size end end it "allows enqueue when key fields differ" do clean_slate do job1 = UniqueWithKeyJob.new(user_id: 1_i64, message: "hello") job1.enqueue enqueued = UniqueWithKeyJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueWithKeyJob.new(user_id: 2_i64, message: "hello") job2.enqueue enqueued = UniqueWithKeyJob.queue.backend.list_waiting assert_equal 2, enqueued.size end end end describe "expiration" do it "allows re-enqueue after the uniqueness window expires" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job_run1 = job1.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 1, enqueued.size # Manually remove the lock to simulate expiration lock_key = job1.uniqueness_key(job_run1) Mosquito.backend.unlock(lock_key, job_run1.id) job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job2.enqueue enqueued = UniqueTestJob.queue.backend.list_waiting assert_equal 2, enqueued.size end end end describe "no parameters" do it "works with jobs that have no parameters" do clean_slate do job1 = UniqueNoParamsJob.new job1.enqueue enqueued = UniqueNoParamsJob.queue.backend.list_waiting assert_equal 1, enqueued.size job2 = UniqueNoParamsJob.new job2.enqueue enqueued = UniqueNoParamsJob.queue.backend.list_waiting assert_equal 1, enqueued.size end end end describe "delayed enqueue" do it "prevents duplicate delayed enqueue" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job1.enqueue(in: 5.minutes) scheduled = UniqueTestJob.queue.backend.list_scheduled assert_equal 1, scheduled.size job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job2.enqueue(in: 10.minutes) scheduled = UniqueTestJob.queue.backend.list_scheduled assert_equal 1, scheduled.size end end it "prevents duplicate when mixing immediate and delayed enqueue" do clean_slate do job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job1.enqueue waiting = UniqueTestJob.queue.backend.list_waiting assert_equal 1, waiting.size job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") job2.enqueue(in: 5.minutes) scheduled = UniqueTestJob.queue.backend.list_scheduled assert_equal 0, scheduled.size end end end describe "unique_duration" do it "returns the configured duration" do job = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome") assert_equal 1.hour, job.unique_duration end end end ================================================ FILE: spec/mosquito/version_spec.cr ================================================ require "../spec_helper" require "yaml" describe "mosquito version numbers" do it "is defined" do assert Mosquito::VERSION end it "matches the shard.yml file" do File.open("shard.yml") do |file| assert_equal Mosquito::VERSION, YAML.parse(file)["version"].as_s end end end ================================================ FILE: spec/spec_helper.cr ================================================ require "minitest" require "minitest/focus" require "log" Log.setup :fatal require "timecop" Timecop.safe_mode = true require "../src/mosquito" Mosquito.configure do |settings| settings.backend_connection_string = testing_redis_url settings.publish_metrics = true end require "./helpers/*" class Minitest::Test include PubSub::Helpers end Mosquito.configuration.backend.flush require "minitest/autorun" ================================================ FILE: src/mosquito/api/concurrency_config.cr ================================================ module Mosquito # Provides read/write access to the remotely stored concurrency limits # used by `RemoteConfigDequeueAdapter`. # # Supports both global limits (shared by all overseers) and per-overseer # limits for asymmetric hardware configurations. # # ```crystal # config = Mosquito::Api::ConcurrencyConfig.instance # config.limits # => global limits # config.limits(overseer_id: "gpu-worker-1") # => per-overseer limits # config.update({"queue_a" => 5}) # write global # config.update({"queue_a" => 1}, overseer_id: "gpu-worker-1") # write per-overseer # config.clear # remove global limits # config.clear(overseer_id: "gpu-worker-1") # remove per-overseer limits # ``` class Api::ConcurrencyConfig def self.instance : self new end # Returns the global concurrency limits stored in the backend. def limits : Hash(String, Int32) RemoteConfigDequeueAdapter.stored_limits end # Returns the concurrency limits stored for a specific overseer. def limits(overseer_id : String) : Hash(String, Int32) RemoteConfigDequeueAdapter.stored_limits(overseer_id) end # Overwrites the global stored concurrency limits with *new_limits*. def update(new_limits : Hash(String, Int32)) : Nil RemoteConfigDequeueAdapter.store_limits(new_limits) end # Overwrites the stored concurrency limits for a specific overseer. def update(new_limits : Hash(String, Int32), overseer_id : String) : Nil RemoteConfigDequeueAdapter.store_limits(new_limits, overseer_id) end # Removes all globally stored concurrency limits. def clear : Nil RemoteConfigDequeueAdapter.clear_limits end # Removes stored concurrency limits for a specific overseer. def clear(overseer_id : String) : Nil RemoteConfigDequeueAdapter.clear_limits(overseer_id) end end end ================================================ FILE: src/mosquito/api/executor.cr ================================================ module Mosquito module Api # An interface for an executor. # # This is used to inspect the state of an executor. For more information about executors, see `Mosquito::Runners::Executor`. class Executor getter :instance_id private getter :metadata # Creates an executor inspector. # The metadata is readonly and can be used to inspect the state of the executor. # # see #current_job, #current_job_queue def initialize(@instance_id : String) @metadata = Metadata.new Observability::Executor.metadata_key(@instance_id), readonly: true end # The current job being executed by the executor. # # When the executor is idle, this will be `nil`. def current_job : String? metadata["current_job"]? end # The queue which housed the current job being executed. # # When the executor is idle, this will be `nil`. def current_job_queue : String? metadata["current_job_queue"]? end # The last heartbeat time, or nil if none exists. def heartbeat : Time? metadata.heartbeat? end end end module Observability class Executor include Publisher private getter log : ::Log def self.metadata_key(instance_id : String) : String Mosquito.backend.build_key "executor", instance_id end def initialize(executor : Mosquito::Runners::Executor) @metadata = Metadata.new self.class.metadata_key executor.object_id.to_s @log = Log.for(executor.runnable_name) overseer_publish_context = executor.overseer.observer.publish_context @publish_context = PublishContext.new( overseer_publish_context, [:executor, executor.object_id] ) end def execute(job_run : JobRun, from_queue : Mosquito::Queue) metrics do @metadata.set({ "current_job" => job_run.id, "current_job_queue" => from_queue.name }) end # Calculate what the duration _might_ be expected_duration = Mosquito.backend.average average_key(job_run.type) log.info { "#{"Starting:".colorize.magenta} #{job_run} from #{from_queue.name}" } publish({ event: "job-started", job_run: job_run.id, from_queue: from_queue.name, expected_duration_ms: expected_duration }) duration = Time.measure do yield end if job_run.succeeded? log_success_message job_run, duration elsif job_run.preempted? log_preempted_message job_run, duration else log_failure_message job_run, duration end publish({event: "job-finished", job_run: job_run.id}) metrics do key = average_key(job_run.type) Mosquito.backend.average_push key, duration.total_milliseconds.to_i Mosquito.backend.delete key, in: 30.days @metadata.set( current_job: nil, current_job_queue: nil ) end end def average_key(job_run_type : String) : String Mosquito.backend.build_key "job", job_run_type, "duration" end def log_success_message(job_run : JobRun, duration : Time::Span) log.info { "#{"Success:".colorize.green} #{job_run} finished and took #{time_with_units duration}" } end def log_preempted_message(job_run : JobRun, duration : Time::Span) message = String::Builder.new message << "Preempted: ".colorize.cyan message << job_run message << " was preempted" reason = job_run.preempt_reason unless reason.empty? message << " (" message << reason message << ")" end message << " after " message << time_with_units duration if job_run.rescheduleable? next_execution = Time.utc + job_run.reschedule_interval message << " and will run again".colorize.cyan message << " in " message << job_run.reschedule_interval message << " (at " message << next_execution message << ")" end log.info { message.to_s } end def log_failure_message(job_run : JobRun, duration : Time::Span) message = String::Builder.new message << "Failure: ".colorize.red message << job_run message << " failed, taking " message << time_with_units duration message << " and " if job_run.rescheduleable? next_execution = Time.utc + job_run.reschedule_interval message << "will run again".colorize.cyan message << " in " message << job_run.reschedule_interval message << " (at " message << next_execution message << ")" log.warn { message.to_s } else message << "cannot be rescheduled".colorize.yellow log.error { message.to_s } end end # :nodoc: private def time_with_units(duration : Time::Span) seconds = duration.total_seconds if seconds > 0.1 "#{(seconds).*(100).trunc./(100)}s".colorize.red elsif seconds > 0.001 "#{(seconds * 1_000).trunc}ms".colorize.yellow elsif seconds > 0.000_001 "#{(seconds * 1_000_000).trunc}µs".colorize.green elsif seconds > 0.000_000_001 "#{(seconds * 1_000_000_000).trunc}ns".colorize.green else "no discernible time at all".colorize.green end end def heartbeat! metrics do @metadata.heartbeat! end end end end end ================================================ FILE: src/mosquito/api/executor_config.cr ================================================ module Mosquito # Provides read/write access to the remotely stored executor count # used by overseers configured with a stable `overseer_id`. # # Supports both global counts (shared by all overseers) and per-overseer # counts for asymmetric hardware configurations. # # ```crystal # config = Mosquito::Api::ExecutorConfig.instance # config.executor_count # => global count or nil # config.executor_count(overseer_id: "gpu-worker-1") # => per-overseer count or nil # config.update(8) # write global # config.update(2, overseer_id: "gpu-worker-1") # write per-overseer # config.clear # remove global override # config.clear(overseer_id: "gpu-worker-1") # remove per-overseer override # ``` class Api::ExecutorConfig CONFIG_KEY = "executor_count" def self.instance : self new end # Returns the global executor count stored in the backend, or nil if # no override has been set. def executor_count : Int32? self.class.stored_executor_count end # Returns the executor count for a specific overseer, or nil if no # override has been set for that overseer. def executor_count(overseer_id : String) : Int32? self.class.stored_executor_count(overseer_id) end # Writes a global executor count override. def update(count : Int32) : Nil self.class.store_executor_count(count) end # Writes an executor count override for a specific overseer. def update(count : Int32, overseer_id : String) : Nil self.class.store_executor_count(count, overseer_id) end # Removes the global executor count override. def clear : Nil self.class.clear_executor_count end # Removes the executor count override for a specific overseer. def clear(overseer_id : String) : Nil self.class.clear_executor_count(overseer_id) end # ----- Backend storage helpers ----- def self.stored_executor_count : Int32? value = Mosquito.backend.get(global_config_key, "count") value.try(&.to_i32) end def self.stored_executor_count(overseer_id : String) : Int32? value = Mosquito.backend.get(overseer_config_key(overseer_id), "count") value.try(&.to_i32) end def self.store_executor_count(count : Int32) : Nil Mosquito.backend.set(global_config_key, "count", count.to_s) end def self.store_executor_count(count : Int32, overseer_id : String) : Nil Mosquito.backend.set(overseer_config_key(overseer_id), "count", count.to_s) end def self.clear_executor_count : Nil Mosquito.backend.delete(global_config_key) end def self.clear_executor_count(overseer_id : String) : Nil Mosquito.backend.delete(overseer_config_key(overseer_id)) end # Resolves the effective executor count for an overseer by checking # per-overseer first, then global. Returns nil if neither is set. def self.resolve(overseer_id : String? = nil) : Int32? if oid = overseer_id stored_executor_count(oid) || stored_executor_count else stored_executor_count end end protected def self.global_config_key : String Mosquito.backend.build_key(CONFIG_KEY) end protected def self.overseer_config_key(overseer_id : String) : String Mosquito.backend.build_key(CONFIG_KEY, overseer_id) end end end ================================================ FILE: src/mosquito/api/job_run.cr ================================================ module Mosquito::Api # Represents a job run in Mosquito. # # This class is used to inspect a job run stored in the backend. # # For more information about a JobRun, see `Mosquito::JobRun`. class JobRun # The id of the job run. getter id : String def initialize(@id : String) end # Does a JobRun with this ID exist in the backend? def found? : Bool config.has_key? "type" end # Get the parameters the job was enqueued with. def runtime_parameters : Hash(String, String) config.reject do |key, _| ["id", "type", "enqueue_time", "retry_count", "started_at", "finished_at"].includes? key end end private getter metadata : Metadata { Metadata.new( Mosquito.backend.build_key(Mosquito::JobRun::CONFIG_KEY_PREFIX, id), readonly: true ) } private def config : Hash(String, String) metadata.to_h end # The type of job this job run is for. def type : String config["type"] end # The moment this job was enqueued. def enqueue_time : Time Time.unix_ms config["enqueue_time"].to_i64 end # The moment this job was started. def started_at : Time? if time = config["started_at"]? Time.unix_ms time.to_i64 end end # The moment this job was finished. def finished_at : Time? if time = config["finished_at"]? Time.unix_ms time.to_i64 end end # The number of times this job has been retried. def retry_count : Int config["retry_count"].to_i end end end ================================================ FILE: src/mosquito/api/observability/publisher.cr ================================================ module Mosquito::Observability::Publisher Log = ::Log.for("mosquito.events") getter publish_context : PublishContext macro metrics(&block) if Mosquito.configuration.metrics? {{ block.body }} end end @[AlwaysInline] def publish(data : NamedTuple) metrics do Log.debug { "Publishing #{data} to #{@publish_context.originator}" } Mosquito.backend.publish( publish_context.originator, data.to_json ) end end class PublishContext alias Context = Array(String | Symbol | UInt64) property originator : String property context : String def initialize(context : Context) @context = KeyBuilder.build context @originator = KeyBuilder.build "mosquito", @context end def initialize(parent : self, context : Context) @context = KeyBuilder.build context @originator = KeyBuilder.build "mosquito", parent.context, context end end end ================================================ FILE: src/mosquito/api/overseer.cr ================================================ module Mosquito # An interface for inspecting the state of Mosquito Overseers. # # For more information about overseers, see `Mosquito::Runners::Overseer`. class Api::Overseer # The instance ID of the overseer being inspected. getter :instance_id private getter :metadata # Creates a new Api::Overseer by its instance ID. def initialize(@instance_id : String) @metadata = Metadata.new Observability::Overseer.metadata_key(@instance_id), readonly: true end # Retrieves a list of all overseers in the backend. def self.all : Array(self) Mosquito.backend.list_overseers.map do |id| new id end end # Retrieves a list of executors managed by this overseer. def executors : Array(Executor) if executor_list = @metadata["executors"]? executor_list.split(",").map do |name| Executor.new name end else [] of Executor end end # The time the overseer last sent a heartbeat. def last_heartbeat : Time? metadata.heartbeat? end end class Observability::Overseer include Publisher getter metadata : Metadata getter instance_id : String private getter overseer : Runners::Overseer private getter log : ::Log def self.metadata_key(instance_id : String) : String Mosquito.backend.build_key "overseer", instance_id end def initialize(@overseer : Runners::Overseer) @instance_id = overseer.object_id.to_s @log = Log.for(overseer.runnable_name) @metadata = Metadata.new self.class.metadata_key(instance_id) @publish_context = PublishContext.new [:overseer, overseer.object_id] end def starting log.info { "Starting #{overseer.executor_count} executors." } publish({event: "started"}) heartbeat end def shutting_down log.info { "Shutting down." } end def stopping log.info { "Stopping executors." } publish({event: "stopped"}) end def stopped log.info { "All executors stopped." } log.info { "Finished for now." } publish({event: "exited"}) Mosquito.backend.deregister_overseer self.instance_id metadata.delete end def heartbeat # Registration must always happen so that the pending job cleanup # mechanism can determine which overseers are still alive. Mosquito.backend.register_overseer self.instance_id metrics do metadata.heartbeat! end end def executor_created(executor : Runners::Executor) : Nil publish({event: "executor-created", executor: executor.object_id}) end def executor_died(executor : Runners::Executor) : Nil publish({event: "executor-died", executor: executor.object_id}) log.fatal do <<-MSG Executor #{executor.runnable_name} died. A new executor will be started. MSG end end def channels_closed log.fatal { "Executor communication channels closed, overseer will stop." } end def waiting_for_queue_list log.debug { "Waited for the queue list to fetch possible queues." } end def queue_list_died log.fatal { "QueueList has died, overseer will stop." } end def recovered_orphaned_job(job_run : JobRun, overseer_id : String) log.warn { "Recovered orphaned job #{job_run.id} from dead overseer #{overseer_id}." } end def orphaned_jobs_recovered(total : Int32) log.warn { "Recovered #{total} orphaned job(s) from pending queues." } end def recovered_job_from_executor(job_run : JobRun, executor : Runners::Executor) log.warn { "Recovered job #{job_run.id} from dead executor #{executor.runnable_name}." } end def update_executor_list(executors : Array(Runners::Executor)) : Nil metrics do metadata["executors"] = executors.map(&.object_id).join(",") end end end end ================================================ FILE: src/mosquito/api/periodic_job.cr ================================================ module Mosquito # An interface for inspecting the state of periodic jobs. # # This class provides read-only access to periodic job metadata, # including the last time each periodic job was executed. # # ``` # Mosquito::Api::PeriodicJob.all.each do |job| # puts "#{job.name} last ran at #{job.last_executed_at}" # end # ``` class Api::PeriodicJob # The name of the periodic job class. getter name : String # The configured run interval for this periodic job. getter interval : Time::Span | Time::MonthSpan private getter metadata : Metadata # Returns a list of all registered periodic jobs. def self.all : Array(self) Base.scheduled_job_runs.map do |job_run| new job_run.class.name, job_run.interval end end def initialize(@name : String, @interval : Time::Span | Time::MonthSpan) @metadata = Metadata.new( Mosquito.backend.build_key("periodic_jobs", @name), readonly: true ) end # The last time this periodic job was executed, or nil if it has never run. def last_executed_at : Time? if timestamp = metadata["last_executed_at"]? Time.unix(timestamp.to_i) end end end class Observability::PeriodicJob include Publisher getter log : ::Log getter publish_context : PublishContext def initialize(periodic_job_run : Mosquito::PeriodicJobRun) @name = periodic_job_run.class.name @publish_context = PublishContext.new [:periodic_job, @name] @log = Log.for(@name) end def enqueued(at time : Time) log.info { "Enqueued periodic job at #{time}" } publish({event: "enqueued", executed_at: time.to_unix}) end def skipped log.trace { "Not yet due for execution" } end end end ================================================ FILE: src/mosquito/api/queue.cr ================================================ module Mosquito # Represents a named queue in the system, and allows querying the state of the queue. For more about the internals of a Queue in Mosquito, see `Mosquito::Queue`. class Api::Queue # The name of the queue. getter name : String private property backend : Mosquito::Backend::Queue # Returns a list of all known named queues in the system. def self.all : Array(Queue) Mosquito.backend.list_queues.map { |name| new name } end # Creates an instance of a named queue. def initialize(@name : String) @backend = Mosquito.backend.queue name end {% for name in Mosquito::Backend::QUEUES %} # Gets a list of all the job runs in the internal {{name.id}} queue. def {{name.id}}_job_runs : Array(JobRun) backend.list_{{name.id}} .map { |task_id| JobRun.new task_id } end {% end %} # The operating size of the queue, not including dead jobs. def size : Int64 backend.size(include_dead: false) end # The size of the queue, broken out by job state. # # Example: # # ``` # Mosquito::Api::Queue.all.first.size_details # # => {"waiting" => 0, "scheduled" => 0, "pending" => 0, "dead" => 0} # ``` # # The semantics of the keys are described in detail on the `Mosquito::Queue` class, but in brief: # # - `scheduled` is a list of jobs which are scheduled to be executed at a later time. # - `waiting` is a list of jobs which should be executed ASAP # - `pending` is a list of jobs for which execution has started # - `dead` is a list of jobs which have failed to execute def size_details : Hash(String, Int64) sizes = {} of String => Int64 {% for name in Mosquito::Backend::QUEUES %} sizes["{{name.id}}"] = backend.{{name.id}}_size {% end %} sizes end def paused? : Bool backend.paused? end def <=>(other) name <=> other.name end end class Observability::Queue include Publisher getter log : ::Log getter publish_context : PublishContext delegate name, to: @queue def initialize(queue : String) initialize(Mosquito::Queue.new queue) end def initialize(@queue : Mosquito::Queue) @publish_context = PublishContext.new [:queue, queue.name] @log = Log.for(queue.name) end def enqueued(job_run : JobRun) log.trace { "Enqueuing #{job_run.id} for immediate execution" } publish({event: "enqueued", job_run: job_run.id}) end def enqueued(job_run : JobRun, at execute_time : Time) log.trace { "Enqueuing #{job_run.id} for execution at #{execute_time}" } publish({event: "enqueued", job_run: job_run.id, execute_time: execute_time}) end def dequeued(job_run : JobRun) log.trace { "Dequeuing #{job_run.id}" } publish({event: "dequeued", job_run: job_run.id}) end def rescheduled(job_run : JobRun, to execute_time : Time) log.trace { "Rescheduling #{job_run.id} to execute at #{execute_time}" } publish({event: "rescheduled", job_run: job_run.id, execute_time: execute_time}) end def forgotten(job_run : JobRun) log.trace { "Forgetting #{job_run.id}" } publish({event: "forgotten", job_run: job_run.id}) end def banished(job_run : JobRun) log.trace { "Banishing #{job_run.id} to dead queue" } publish({event: "banished", job_run: job_run.id}) end def paused(duration : Time::Span? = nil) if duration log.info { "Paused for #{duration}" } publish({event: "paused", duration: duration.total_seconds}) else log.info { "Paused indefinitely" } publish({event: "paused"}) end end def resumed log.info { "Resumed" } publish({event: "resumed"}) end end end ================================================ FILE: src/mosquito/api/queue_list.cr ================================================ module Mosquito class Observability::QueueList private getter log : ::Log @last_paused_names = Set(String).new def initialize(queue_list : Runners::QueueList) @log = Log.for(queue_list.runnable_name) end def checked_for_paused_queues(paused : Array(Mosquito::Queue)) paused_names = paused.map(&.name).to_set if paused_names != @last_paused_names @last_paused_names = paused_names log.for("paused_queues").notice { if paused.size > 0 "#{paused.size} paused queues: #{paused.map(&.name).join(", ")}" else "all queues resumed" end } end end end end ================================================ FILE: src/mosquito/api.cr ================================================ require "./backend" require "./api/observability/*" require "./api/*" module Mosquito::Api def self.overseer(id : String) : Overseer Overseer.new id end def self.executor(id : String) : Executor Executor.new id end def self.job_run(id : String) : JobRun JobRun.new id end def self.list_periodic_jobs : Array(PeriodicJob) PeriodicJob.all end def self.list_queues : Array(Observability::Queue) Mosquito.backend.list_queues .map { |name| Observability::Queue.new name } end def self.list_overseers : Array(Overseer) Mosquito.backend.list_overseers .map { |name| Overseer.new name } end def self.event_receiver : Channel(Backend::BroadcastMessage) Mosquito.backend.subscribe "mosquito:*" end # Returns a `ConcurrencyConfig` instance for reading and writing the # remotely stored concurrency limits used by # `RemoteConfigDequeueAdapter`. def self.concurrency_config : ConcurrencyConfig ConcurrencyConfig.instance end # Convenience reader for the current global remote concurrency limits. def self.concurrency_limits : Hash(String, Int32) concurrency_config.limits end # Convenience reader for a specific overseer's concurrency limits. def self.concurrency_limits(overseer_id : String) : Hash(String, Int32) concurrency_config.limits(overseer_id) end # Convenience writer — replaces the global stored concurrency limits so # that all `RemoteConfigDequeueAdapter` instances pick them up on their # next refresh cycle. def self.set_concurrency_limits(limits : Hash(String, Int32)) : Nil concurrency_config.update(limits) end # Convenience writer — replaces stored concurrency limits for a specific # overseer. def self.set_concurrency_limits(limits : Hash(String, Int32), overseer_id : String) : Nil concurrency_config.update(limits, overseer_id) end # Returns an `ExecutorConfig` instance for reading and writing the # remotely stored executor count. def self.executor_config : ExecutorConfig ExecutorConfig.instance end # Convenience reader for the global remote executor count. def self.executor_count : Int32? executor_config.executor_count end # Convenience reader for a specific overseer's executor count. def self.executor_count(overseer_id : String) : Int32? executor_config.executor_count(overseer_id) end # Convenience writer — sets the global executor count override. def self.set_executor_count(count : Int32) : Nil executor_config.update(count) end # Convenience writer — sets the executor count for a specific overseer. def self.set_executor_count(count : Int32, overseer_id : String) : Nil executor_config.update(count, overseer_id) end end ================================================ FILE: src/mosquito/backend.cr ================================================ module Mosquito abstract class Backend struct BroadcastMessage property channel : String property message : String def initialize(@channel, @message) end end # The lifecycle states a job run passes through in any backend. QUEUES = %w(waiting scheduled pending dead) KEY_PREFIX = {"mosquito"} def build_key(*parts) KeyBuilder.build Mosquito.configuration.global_prefix, KEY_PREFIX, *parts end # Factory method to create a named queue for this backend. def queue(name : String | Symbol) : Queue _build_queue(name.to_s) end protected abstract def _build_queue(name : String) : Queue abstract def connection abstract def connection_string=(value : String) abstract def connection_string : String? abstract def valid_configuration? : Bool # Storage abstract def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil abstract def retrieve(key : String) : Hash(String, String) abstract def delete(key : String, in ttl : Int64 = 0) : Nil abstract def delete(key : String, in ttl : Time::Span) : Nil abstract def expires_in(key : String) : Int64 abstract def get(key : String, field : String) : String? abstract def set(key : String, field : String, value : String) : String abstract def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil abstract def delete_field(key : String, field : String) : Nil abstract def increment(key : String, field : String) : Int64 abstract def increment(key : String, field : String, by value : Int32) : Int64 # Global abstract def list_queues : Array(String) abstract def list_overseers : Array(String) abstract def list_active_overseers(since : Time) : Array(String) abstract def register_overseer(id : String) : Nil abstract def deregister_overseer(id : String) : Nil abstract def flush : Nil # Coordination abstract def unlock(key : String, value : String) : Nil abstract def lock?(key : String, value : String, ttl : Time::Span) : Bool abstract def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool abstract def publish(key : String, value : String) : Nil abstract def subscribe(key : String) : Channel(BroadcastMessage) # Metrics abstract def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil abstract def average(key : String) : Int32 abstract class Queue getter backend : Backend private getter name : String def initialize(@backend, @name : String) end # Queue operations abstract def enqueue(job_run : JobRun) : JobRun abstract def dequeue : JobRun? abstract def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun abstract def deschedule : Array(JobRun) abstract def finish(job_run : JobRun) abstract def terminate(job_run : JobRun) abstract def undequeue : JobRun? abstract def flush : Nil abstract def size(include_dead : Bool = true) : Int64 {% for name in ["waiting", "scheduled", "pending", "dead"] %} abstract def list_{{name.id}} : Array(String) abstract def {{name.id}}_size : Int64 {% end %} abstract def scheduled_job_run_time(job_run : JobRun) : Time? # Pause this queue so that `#dequeue` returns nil until it is resumed # or the optional duration expires. abstract def pause(duration : Time::Span? = nil) : Nil # Resume a paused queue, allowing dequeue to proceed. abstract def resume : Nil abstract def paused? : Bool end end end ================================================ FILE: src/mosquito/base.cr ================================================ require "json" module Mosquito alias Id = Int64 | Int32 record WorkUnit, job_run : JobRun, queue : Queue do def self.of(job_run : JobRun, *, from queue : Queue) : self new(job_run, queue) end end class Base class_getter mapping = {} of String => Mosquito::Job.class class_getter scheduled_job_runs = [] of PeriodicJobRun class_getter timetable = [] of PeriodicJobRun def self.register_job_mapping(string, klass) @@mapping[string] = klass end def self.job_for_type(type : String) : Mosquito::Job.class @@mapping[type] rescue e : KeyError error = String.build do |s| s << <<-TEXT Could not find a job class for type "#{type}", perhaps you forgot to register it? Current known types are: TEXT @@mapping.each { |k, v| s << "#{k}=>#{v}\n" } s << "\n\n" end raise KeyError.new(error) end def self.register_job_interval(klass, interval : Time::Span | Time::MonthSpan) @@scheduled_job_runs << PeriodicJobRun.new(klass, interval) end def self.register_job(klass, *, to_run_at scheduled_time : Time) position = @@timetable.index do end end end end ================================================ FILE: src/mosquito/configuration.cr ================================================ module Mosquito class_getter configuration = Configuration.new def self.configure(&block) : Nil yield configuration end class Configuration property idle_wait : Time::Span = 100.milliseconds property successful_job_ttl : Int32 = 1.minute.total_seconds.to_i property failed_job_ttl : Int32 = 86400 property use_distributed_lock : Bool = true property executor_count : Int32 = ENV.fetch("MOSQUITO_EXECUTOR_COUNT", "6").to_i property run_from : Array(String) = [] of String property global_prefix : String? = nil property backend : Mosquito::Backend = Mosquito::RedisBackend.new property dequeue_adapter : Mosquito::DequeueAdapter = Mosquito::ShuffleDequeueAdapter.new # Maps queue names to resource gates. Queues whose gate returns # `false` from `#allow?` are excluded from dequeuing. property resource_gates : Hash(String, Mosquito::ResourceGate) = {} of String => Mosquito::ResourceGate # A stable, user-chosen identifier for this overseer instance. # Used to look up per-overseer remote configuration (executor count, # concurrency limits, etc.). When nil, the overseer only reads global # remote config. property overseer_id : String? = nil property publish_metrics : Bool = false # How often a mosquito runner should emit a heartbeat metric. property heartbeat_interval : Time::Span = 20.seconds # How long an overseer can go without a heartbeat before it is # considered dead and its pending jobs are recovered. property dead_overseer_threshold : Time::Span = 100.seconds property validated = false def backend_connection backend.connection end def backend_connection_string backend.connection_string end def backend_connection_string=(value : String) backend.connection_string = value end def idle_wait=(time_span : Float) @idle_wait = time_span.seconds end def validate return if @validated @validated = true unless backend.valid_configuration? message = <<-error Mosquito cannot start because no backend connection has been provided. For example, in your application config: Mosquito.configure do |settings| settings.backend_connection_string = (ENV["REDIS_TLS_URL"]? || ENV["REDIS_URL"]? || "redis://localhost:6379") end See Also: https://github.com/mosquito-cr/mosquito#connecting-to-redis error raise message end end def metrics? : Bool publish_metrics end end end ================================================ FILE: src/mosquito/dequeue_adapter.cr ================================================ module Mosquito # A DequeueAdapter determines how the Overseer selects the next job to # execute from the available queues. # # Subclass `DequeueAdapter`, implement `#dequeue`, and assign an instance # via `Mosquito.configure`: # # ```crystal # class MyDequeueAdapter < Mosquito::DequeueAdapter # def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit? # queue_list.queues.each do |q| # if job_run = q.dequeue # return WorkUnit.of(job_run, from: q) # end # end # end # end # # Mosquito.configure do |settings| # settings.dequeue_adapter = MyDequeueAdapter.new # end # ``` abstract class DequeueAdapter # Attempt to dequeue a job from one of the queues managed by `queue_list`. # # Returns a `WorkUnit` when a job is available, or `nil` # when all queues are empty. abstract def dequeue(queue_list : Runners::QueueList) : WorkUnit? # Called by the Overseer when a job run has finished executing. # Override this to react to completed jobs (e.g. update internal # counters or rebalance queue weights). def finished_with(job_run : JobRun, queue : Queue) : Nil end end end ================================================ FILE: src/mosquito/dequeue_adapters/concurrency_limited_dequeue_adapter.cr ================================================ require "../dequeue_adapter" module Mosquito # A dequeue adapter that enforces per-queue concurrency limits. # # Each queue can be assigned a maximum number of jobs that may execute # concurrently. When a queue has reached its limit, it is skipped during # dequeue until an in-flight job finishes. # # Queues not present in the limits table have no concurrency ceiling and # are bounded only by the total executor pool size. # # Among eligible queues the adapter uses a shuffle to provide rough # fairness, similar to `ShuffleDequeueAdapter`. # # ## Example # # ```crystal # Mosquito.configure do |settings| # settings.executor_count = 8 # # settings.dequeue_adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({ # "queue_a" => 3, # "queue_b" => 5, # }) # end # ``` # # In this configuration at most 3 jobs from "queue_a" and 5 from "queue_b" # will execute at the same time. Other queues are unlimited. class ConcurrencyLimitedDequeueAdapter < DequeueAdapter property limits : Hash(String, Int32) # Tracks the number of currently in-flight jobs per queue name. # Access is fiber-safe because Crystal fibers are cooperatively # scheduled and we never yield between read and write. @active : Hash(String, Int32) def initialize(@limits : Hash(String, Int32)) @active = Hash(String, Int32).new(0) end def dequeue(queue_list : Runners::QueueList) : WorkUnit? queue_list.queues.shuffle.each do |q| if limit = limits[q.name]? next if @active[q.name] >= limit end if job_run = q.dequeue @active[q.name] = @active[q.name] + 1 return WorkUnit.of(job_run, from: q) end end end # Called by the Overseer when a job from this queue has finished # executing. Decrements the in-flight counter so the queue becomes # eligible for dequeue again. def finished_with(job_run : JobRun, queue : Queue) : Nil count = @active[queue.name] @active[queue.name] = {count - 1, 0}.max end # Returns the current number of in-flight jobs for the given queue. def active_count(queue_name : String) : Int32 @active[queue_name] end end end ================================================ FILE: src/mosquito/dequeue_adapters/remote_config_dequeue_adapter.cr ================================================ require "./concurrency_limited_dequeue_adapter" module Mosquito # A dequeue adapter that wraps `ConcurrencyLimitedDequeueAdapter` with # remotely configurable concurrency limits stored in the Mosquito backend # (e.g. Redis). # # Limits are refreshed by polling the backend at a configurable interval. # When the remote key is absent or empty the adapter falls back to the # `defaults` hash provided at construction time. # # Remote values are **merged on top of** defaults: a queue present only in # defaults keeps its value, a queue present only in the remote config is # added, and a queue present in both uses the remote value. # # ## Per-overseer configuration # # When `overseer_id` is set, the adapter reads from both the global key # and a per-overseer key. The merge order is: # # defaults → global remote → per-overseer remote # # This lets you run overseers on asymmetric hardware and tune each one # independently while still sharing a common baseline. # # ## Setting limits remotely # # Use `Mosquito::Api.set_concurrency_limits` to write global limits: # # ```crystal # Mosquito::Api.set_concurrency_limits({"queue_a" => 2, "queue_b" => 10}) # ``` # # Or target a specific overseer: # # ```crystal # Mosquito::Api.set_concurrency_limits({"queue_a" => 1}, overseer_id: "gpu-worker-1") # ``` # # ## Example # # ```crystal # Mosquito.configure do |settings| # settings.dequeue_adapter = Mosquito::RemoteConfigDequeueAdapter.new( # defaults: {"queue_a" => 3, "queue_b" => 5}, # overseer_id: "gpu-worker-1", # refresh_interval: 5.seconds, # ) # end # ``` # # In this configuration the adapter starts with the given defaults. Any # limits written to the backend via the API will take effect within # `refresh_interval` seconds. Per-overseer limits override global limits # which override defaults. class RemoteConfigDequeueAdapter < DequeueAdapter CONFIG_KEY = "concurrency_limits" getter defaults : Hash(String, Int32) getter refresh_interval : Time::Span getter inner : ConcurrencyLimitedDequeueAdapter getter overseer_id : String? @last_refresh_at : Time = Time::UNIX_EPOCH @last_remote_limits : Hash(String, Int32) = {} of String => Int32 def initialize( @defaults : Hash(String, Int32) = {} of String => Int32, @overseer_id : String? = nil, @refresh_interval : Time::Span = 5.seconds ) @inner = ConcurrencyLimitedDequeueAdapter.new(defaults.dup) end def dequeue(queue_list : Runners::QueueList) : WorkUnit? maybe_refresh_limits inner.dequeue(queue_list) end def finished_with(job_run : JobRun, queue : Queue) : Nil inner.finished_with(job_run, queue) end # Returns the current effective concurrency limits (defaults merged # with any remote overrides). def limits : Hash(String, Int32) inner.limits end # Returns the current in-flight count for *queue_name*, delegated to # the inner adapter. def active_count(queue_name : String) : Int32 inner.active_count(queue_name) end # Force an immediate refresh from the backend, ignoring the # `refresh_interval` timer. def refresh_limits : Nil remote = load_remote_limits merged = defaults.merge(remote) if merged != inner.limits inner.limits = merged end @last_refresh_at = Time.utc end # ----- Backend storage helpers (class-level) ----- # Reads the global concurrency limits hash stored in the backend. def self.stored_limits : Hash(String, Int32) raw = Mosquito.backend.retrieve(global_config_key) raw.transform_values(&.to_i32) end # Reads the concurrency limits for a specific overseer. def self.stored_limits(overseer_id : String) : Hash(String, Int32) raw = Mosquito.backend.retrieve(overseer_config_key(overseer_id)) raw.transform_values(&.to_i32) end # Overwrites the global concurrency limits with *limits*. Any previously # stored queue entries not present in *limits* are removed. def self.store_limits(limits : Hash(String, Int32)) : Nil key = global_config_key Mosquito.backend.delete(key) Mosquito.backend.store(key, limits.transform_values(&.to_s)) unless limits.empty? end # Overwrites the concurrency limits for a specific overseer with *limits*. def self.store_limits(limits : Hash(String, Int32), overseer_id : String) : Nil key = overseer_config_key(overseer_id) Mosquito.backend.delete(key) Mosquito.backend.store(key, limits.transform_values(&.to_s)) unless limits.empty? end # Removes all globally stored concurrency limits, causing adapters to # fall back to their defaults (or per-overseer limits if set). def self.clear_limits : Nil Mosquito.backend.delete(global_config_key) end # Removes stored concurrency limits for a specific overseer. def self.clear_limits(overseer_id : String) : Nil Mosquito.backend.delete(overseer_config_key(overseer_id)) end protected def self.global_config_key : String Mosquito.backend.build_key(CONFIG_KEY) end protected def self.overseer_config_key(overseer_id : String) : String Mosquito.backend.build_key(CONFIG_KEY, overseer_id) end private def maybe_refresh_limits now = Time.utc if now - @last_refresh_at >= @refresh_interval refresh_limits end end private def load_remote_limits : Hash(String, Int32) global = self.class.stored_limits result = if oid = overseer_id per_overseer = self.class.stored_limits(oid) global.merge(per_overseer) else global end @last_remote_limits = result rescue # If the backend is unreachable or the data is corrupt, fall back # to the last known-good remote limits so previously applied overrides # are preserved rather than silently reverting to defaults. @last_remote_limits end end end ================================================ FILE: src/mosquito/dequeue_adapters/shuffle_dequeue_adapter.cr ================================================ require "../dequeue_adapter" module Mosquito # The default dequeue adapter. Shuffles the queue list on each pass and # returns the first available job. # # The shuffle provides rough fairness across queues, preventing any single # queue from being consistently checked first. class ShuffleDequeueAdapter < DequeueAdapter def dequeue(queue_list : Runners::QueueList) : WorkUnit? queue_list.queues.shuffle.each do |q| if job_run = q.dequeue return WorkUnit.of(job_run, from: q) end end end end end ================================================ FILE: src/mosquito/dequeue_adapters/weighted_dequeue_adapter.cr ================================================ require "../dequeue_adapter" module Mosquito # A dequeue adapter that checks queues according to configured weights. # # Higher-weight queues are given proportionally more chances to be dequeued # from. On each call to `#dequeue`, the adapter picks a queue at random # (weighted by its configured value). If that queue is empty, it is removed # from consideration and another weighted pick is made, ensuring each queue # is checked at most once per dequeue call. # # The weight map is built fresh on each dequeue call from the current # queue list, ensuring newly discovered queues are picked up immediately. # # Queues not present in the weights table are assigned a default weight of 1. # # ## Example # # ```crystal # Mosquito.configure do |settings| # settings.dequeue_adapter = Mosquito::WeightedDequeueAdapter.new({ # "critical" => 5, # "default" => 2, # "bulk" => 1, # }) # end # ``` # # In this configuration the "critical" queue will be checked roughly 5x as # often as "bulk" and 2.5x as often as "default". class WeightedDequeueAdapter < DequeueAdapter getter weights : Hash(String, Int32) def initialize(@weights : Hash(String, Int32), @default_weight = 1) end def dequeue(queue_list : Runners::QueueList) : WorkUnit? remaining = queue_list.queues.map { |q| {q, weights.fetch(q.name, @default_weight)} } until remaining.empty? queue, index = weighted_random_select(remaining) if job_run = queue.dequeue return WorkUnit.of(job_run, from: queue) end remaining.delete_at(index) end end # Picks a queue at random, weighted by the associated values. # Returns the selected queue and its index in the candidates array. private def weighted_random_select(candidates : Array(Tuple(Queue, Int32))) : Tuple(Queue, Int32) total = candidates.sum(&.last) roll = rand(total) candidates.each_with_index do |(queue, weight), index| roll -= weight return {queue, index} if roll < 0 end # Unreachable, but satisfies the compiler. {candidates.last.first, candidates.size - 1} end end end ================================================ FILE: src/mosquito/exceptions.cr ================================================ module Mosquito # When a job fails class JobFailed < Exception end # When a job_run tries to run twice class DoubleRun < Exception end # When a job contains a model_id parameter pointing to a database record but the database doesn't return anything for that id. class IrretrievableParameter < Exception end end ================================================ FILE: src/mosquito/gates/open_gate.cr ================================================ require "../resource_gate" module Mosquito # A gate that always allows dequeuing. This is the default when no # resource constraint is configured. class OpenGate < ResourceGate def initialize super(sample_ttl: 0.seconds) end protected def check : Bool true end end end ================================================ FILE: src/mosquito/gates/threshold_gate.cr ================================================ require "../resource_gate" module Mosquito # A gate that samples a metric via a callback and compares it against # a threshold. # # ## Example # # ```crystal # gate = Mosquito::ThresholdGate.new( # threshold: 85.0, # sample_ttl: 2.seconds # ) { `nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits`.strip.to_f } # ``` class ThresholdGate < ResourceGate getter threshold : Float64 @sampler : -> Float64 def initialize(@threshold : Float64, sample_ttl : Time::Span = 2.seconds, &sampler : -> Float64) super(sample_ttl: sample_ttl) @sampler = sampler end protected def check : Bool @sampler.call < @threshold end end end ================================================ FILE: src/mosquito/job.cr ================================================ require "./serializers/*" module Mosquito # A Job is a definition for work to be performed. # Jobs are pieces of code which run a JobRun. # # - Jobs prevent double execution of a job for a job_run # - Jobs Rescue when a #perform method fails a job_run for any reason # - Jobs can be rescheduleable abstract class Job Log = ::Log.for(self) include Mosquito::Serializers::Primitives enum State Initialization Running Succeeded Failed Aborted Preempted def executed? : Bool succeeded? || failed? end end def log stream = Log.for(self.class) if job_run_id_ = job_run_id stream.for(job_run_id_) else stream end end def log(message) log.info { message } end getter state = State::Initialization delegate executed?, succeeded?, failed?, aborted?, preempted?, to: state # When a job is preempted with an `until` parameter, this is the time # at which the job should be retried. getter preempted_until : Time? # When a job is preempted, this is the reason provided by the caller. getter preempt_reason : String = "" # When a job fails and raises an exception, it will be saved into this attribute. getter exception : Exception? property job_run_id : String? # When a job run fails, should it be added to the retry queue? # See: #fail(retry: false) property should_retry : Bool = true # The queue this job is assigned to. # By default every job has it's own named queue: # # - EmailTheUniverseJob.queue = "email_the_universe" def self.queue_name : String {{ @type.id }}.to_s.underscore end # Easily override the queue for any job. macro queue_name(name) def self.queue_name : String "{{ name.id }}" end end # The Queue this job uses to store job_runs. def self.queue if queue_name.blank? Queue.new "default" else Queue.new queue_name end end # Job name is used to differentiate jobs coming off the same queue. # By default it is the class name, and this should never need to be changed. private def self.job_name : String "{{ @type.id }}".underscore end def run begin before_hook rescue e : Exception log.error(exception: e) { "Before hook raised, job will not be executed" } @state = State::Aborted return end return if preempted? @state = State::Running perform @state = State::Succeeded rescue e log.warn(exception: e) do "Job failed! Raised #{e.class}: #{e.message}" end @exception = e @state = State::Failed ensure after_hook end def before_hook # intentionally left blank end def after_hook # intentionally left blank end def retry_later fail end # To be called from inside a before hook. # Preempts this job, preventing execution. The job will be rescheduled. # # The optional `until` parameter specifies when the job should be retried. def preempt(reason = "", *, until preempted_until : Time? = nil) @state = State::Preempted @preempt_reason = reason @preempted_until = preempted_until end macro before(&block) def before_hook {% if @type.methods.map(&.name).includes?(:before_hook.id) %} previous_def {% else %} super {% end %} return if preempted? {{ yield }} end end macro after(&block) def after_hook {% if @type.methods.map(&.name).includes?(:after_hook.id) %} previous_def {% else %} super {% end %} {{ yield }} end end # abstract, override in a Job descendant to do something productive def perform Log.error { "No job definition found for #{self.class.name}" } fail end # To be called from inside a #perform # Marks this job as a failure. By default, if the job is a candidate for # re-scheduling, it will be run again at a later time. def fail(reason = "", *, retry : Bool = true) @should_retry = @should_retry && retry raise JobFailed.new(reason) end # abstract, override if desired. # # True if this job is rescheduleable, false if not. def rescheduleable? : Bool true end # abstract, override if desired. # # For a given retry count, is this job rescheduleable? def rescheduleable?(retry_count : Int32) : Bool rescheduleable? && retry_count < 5 end # abstract, override if desired. # # For a given retry count, how long should the delay between # job attempts be? def reschedule_interval(retry_count : Int32) : Time::Span if preempted? && (wait_until = @preempted_until) delay = wait_until - Time.utc return delay if delay > Time::Span.zero end 2.seconds * (retry_count ** 2) # retry 1 = 2 minutes # 2 = 8 # 3 = 18 # 4 = 32 end def metadata : Metadata @metadata ||= begin Metadata.new self.class.metadata_key end end def self.metadata : Metadata Metadata.new metadata_key, readonly: true end def self.metadata_key Mosquito.backend.build_key "job_metadata", self.name.underscore end end end ================================================ FILE: src/mosquito/job_run.cr ================================================ module Mosquito # A JobRun is a unit of work which will be performed by a Job. # JobRuns know how to: # - store and retrieve their data to and from the datastore # - figure out what Job class they match to # - build an instance of that Job class and pass off the config data # - Ask the job to run # # JobRun data is called `config` and is persisted in the backend under the key # `mosquito:job_run:job_run_id`. class JobRun getter type getter enqueue_time : Time getter id : String getter retry_count = 0 getter job : Mosquito::Job? getter started_at : Time? getter finished_at : Time? getter overseer_id : String? getter metadata : Metadata { Metadata.new(config_key) } def job! : Mosquito::Job job || raise RuntimeError.new("No job yet retrieved for job_run.") end # :nodoc: property config CONFIG_KEY_PREFIX = "job_run" # The config key is the backend storage key for the metadata of this job_run. def config_key self.class.config_key id end # :ditto: def self.config_key(*parts) Mosquito.backend.build_key CONFIG_KEY_PREFIX, parts end def initialize(type : String) new type end def initialize( @type : String, @enqueue_time : Time = Time.utc, id : String? = nil, @retry_count : Int32 = 0, @started_at : Time? = nil, @finished_at : Time? = nil ) @id = id || KeyBuilder.build @enqueue_time.to_unix_ms.to_s, rand(1000) @config = {} of String => String @job = nil end # Stores this job run configuration and metadata in the backend. # Nil-valued fields are deleted from the backend hash. def store fields = {} of String => String? config.each { |k, v| fields[k] = v } fields["enqueue_time"] = enqueue_time.to_unix_ms.to_s fields["type"] = type fields["retry_count"] = retry_count.to_s fields["overseer_id"] = @overseer_id if started_at_ = @started_at fields["started_at"] = started_at_.to_unix_ms.to_s end if finished_at_ = @finished_at fields["finished_at"] = finished_at_.to_unix_ms.to_s end metadata.set fields end # Deletes this job_run from the backend. # Optionally, after a delay in seconds (handled by the backend). def delete(in ttl : Int = 0) metadata.delete(in: ttl.seconds) end # Builds a Job instance from this job_run. Populates the job with config from # the backend. def build_job : Mosquito::Job if job = @job return job end @job = instance = Base.job_for_type(type).new if instance.responds_to? :vars_from instance.vars_from config end instance.job_run_id = id instance end # Builds and runs the job with this job_run config. def run instance = build_job @started_at = Time.utc instance.run @finished_at = Time.utc if executed? && failed? @retry_count += 1 end store end # :nodoc: protected def overseer_id=(id : String?) @overseer_id = id end # Marks this job run as claimed by the given overseer and persists # the association to the backend. Used by the pending cleanup to # determine whether the owning overseer is still alive. def claimed_by(overseer : Runners::Overseer) @overseer_id = overseer.observer.instance_id Mosquito.backend.set config_key, "overseer_id", @overseer_id.not_nil! end # Fails this job run and make sure it's persisted as such. # Clears the overseer_id since the job is no longer in-flight. def fail @retry_count += 1 @overseer_id = nil store end # Treats this job run as a failure: increments the retry count and # either reschedules with backoff or banishes to the dead queue. def retry_or_banish(queue : Queue) : Nil fail build_job if rescheduleable? next_execution = Time.utc + reschedule_interval queue.reschedule self, next_execution else queue.banish self delete in: Mosquito.configuration.failed_job_ttl end end # For the current retry count, is the job rescheduleable? def rescheduleable? job!.rescheduleable? @retry_count end # For the current retry count, how long should a runner wait before retry? def reschedule_interval job!.reschedule_interval @retry_count end # :nodoc: delegate :executed?, :succeeded?, :failed?, :preempted?, :preempt_reason, :failed, :rescheduled, to: job! # Used to construct a job_run from the parameters stored in the backend. def self.retrieve(id : String) fields = Metadata.new(config_key(id)).to_h return unless name = fields.delete "type" return unless timestamp = fields.delete "enqueue_time" retry_count = (fields.delete("retry_count") || 0).to_i started_at_raw = fields.delete("started_at") finished_at_raw = fields.delete("finished_at") started_at = started_at_raw ? Time.unix_ms(started_at_raw.to_i64) : nil finished_at = finished_at_raw ? Time.unix_ms(finished_at_raw.to_i64) : nil overseer_id = fields.delete("overseer_id") instance = new(name, Time.unix_ms(timestamp.to_i64), id, retry_count, started_at, finished_at) instance.config = fields instance.overseer_id = overseer_id instance end # Updates this job_run config from the backend. def reload : Nil config.merge! metadata.to_h @retry_count = config["retry_count"].to_i @overseer_id = config.delete("overseer_id") end def to_s(io : IO) "#{type}<#{id}>".to_s(io) end def ==(other : self) id == self.id end end end ================================================ FILE: src/mosquito/key_builder.cr ================================================ module Mosquito class KeyBuilder KEY_SEPERATOR = ":" def self.build(*parts) id = [] of String parts.each do |part| case part when Symbol id << build part.to_s when String id << part when Array part.each do |e| id << build e end when Tuple part.to_a.each do |e| id << build e end when Number id << part.to_s when Nil # do nothing else raise "#{part.class} is not a keyable type" end end id.flatten.join KEY_SEPERATOR end end end ================================================ FILE: src/mosquito/metadata.cr ================================================ module Mosquito # Provides a real-time metadata store. Data is not cached, which allows # multiple workers to operate on the same structures in real time. # # Each read or write incurs a round trip to the backend. # # Keys and values are always strings. class Metadata property root_key : String getter? readonly : Bool def initialize(@root_key : String, @readonly = false) end # Deletes this metadata immediately. def delete : Nil Mosquito.backend.delete root_key end # Schedule this metadata to be deleted after a time span. def delete(in ttl : Time::Span) : Nil Mosquito.backend.delete root_key, in: ttl end # Reads the metadata and returns it as a hash. def to_h : Hash(String, String) Mosquito.backend.retrieve root_key end # Reads a single key from the metadata. def []?(key : String) : String? Mosquito.backend.get root_key, key end # Writes a value to a key in the metadata. def []=(key : String, value : String) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.set root_key, key, value end # Deletes a value from the metadata def []=(key : String, value : Nil) Mosquito.backend.delete_field root_key, key end def set(**values) set values.to_h end def set(values : Hash(String | Symbol, String?)) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.set root_key, values.transform_keys(&.to_s) end # Writes multiple string values to the metadata at once. def set(values : Hash(String, String)) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.store root_key, values end # Increments a value in the metadata by 1 by 1 by 1 by 1. def increment(key) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.increment root_key, key end # Parametrically incruments a value in the metadata. def increment(key, by increment : Int32) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.increment root_key, key, by: increment end # Decrements a value in the metadata by 1. def decrement(key) raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly? Mosquito.backend.increment root_key, key, by: -1 end # Sets a heartbeat timestamp in the metadata. # Also sets a timer to delete the metadata after 1 hour. def heartbeat! self["heartbeat"] = Time.utc.to_unix.to_s delete in: 1.hour end # Returns the heartbeat timestamp from the metadata. def heartbeat? : Time? if time = self["heartbeat"]? Time.unix(time.to_i) else nil end end delegate to_s, inspect, to: to_h end end ================================================ FILE: src/mosquito/periodic_job.cr ================================================ module Mosquito abstract class PeriodicJob < Job def initialize end abstract def build_job_run macro inherited macro job_name "\{{ @type.id }}".underscore.downcase end Mosquito::Base.register_job_mapping job_name, {{ @type.id }} def self.job_type : String job_name end def build_job_run job_run = Mosquito::JobRun.new(job_name) end macro run_every(interval) Mosquito::Base.register_job_interval \{{ @type.id }}, \{{ interval }} end end def rescheduleable? false end end end ================================================ FILE: src/mosquito/periodic_job_run.cr ================================================ module Mosquito class PeriodicJobRun Log = ::Log.for self property class : Mosquito::PeriodicJob.class property interval : Time::Span | Time::MonthSpan getter metadata : Metadata { Metadata.new(Mosquito.backend.build_key("periodic_jobs", @class.name)) } getter observer : Observability::PeriodicJob { Observability::PeriodicJob.new(self) } # The last executed timestamp for this periodicjob tracked by the backend. def last_executed_at? if timestamp = metadata["last_executed_at"]? Time.unix(timestamp.to_i) else nil end end # The last executed timestamp, or "never" if it doesn't exist. def last_executed_at last_executed_at? || Time.unix(0) end # Updates the last executed timestamp in the backend, # and schedules the metadata for deletion after 3*interval # seconds. # # For Time::Span intervals, the TTL is set to 3 * interval. # For Time::MonthSpan intervals, the TTL is set to approximately 3 * interval. # # A month is approximated to 2635200 seconds, or 30.5 days. def last_executed_at=(time : Time) metadata["last_executed_at"] = time.to_unix.to_s case interval_ = interval when Time::Span metadata.delete(in: interval_ * 3) when Time::MonthSpan seconds_in_an_average_month = 2_635_200.seconds metadata.delete(in: seconds_in_an_average_month * interval_.value * 3) end end def initialize(@class, @interval) end # Check the last executed timestamp against the current time, # and enqueue the job if it's time to execute. def try_to_execute : Bool now = Time.utc if last_executed_at + interval <= now if pending_job_run? Log.info { "Skipping enqueue for #{@class.name}: a job run is already pending" } else execute end self.last_executed_at = now observer.enqueued(at: now) true else observer.skipped false end end # Returns true if a previously enqueued job run has not yet finished. # This prevents duplicate enqueues when executors are busy and the # periodic interval elapses multiple times before the job is run. def pending_job_run? : Bool if pending_id = metadata["pending_run_id"]? if job_run = JobRun.retrieve(pending_id) return true if job_run.finished_at.nil? end # Job run has finished or was cleaned up; clear the stale reference. metadata["pending_run_id"] = nil end false end # Enqueues the job for execution and records the job run id so that # subsequent intervals can detect that a run is already pending. def execute job = @class.new job_run = job.build_job_run job_run.store @class.queue.enqueue job_run metadata["pending_run_id"] = job_run.id end end end ================================================ FILE: src/mosquito/queue.cr ================================================ module Mosquito # A named Queue. # # Named Queues exist and have 4 ordered lists: waiting, pending, scheduled, and dead. # # - The Waiting list is for jobs which need to be executed as soon as possible. # - The Pending list is for jobs which are currently being executed. # - The Scheduled list is indexed by execution time and holds jobs which need to be executed at a later time. # - The Dead list is for jobs which have been retried too many times and are no longer viable. # # A job_run is represented in a queue by its id. # # A job_run flows through the queues in this manner: # # # ```text # Time=0: JobRun does not exist yet, lists are empty # # Waiting Pending Scheduled Dead # # --------------------------------- # Time=1: JobRun is enqueued # # Waiting Pending Scheduled Dead # JobRun#1 # # --------------------------------- # Time=2: JobRun begins. JobRun is moved to pending and executed # # Waiting Pending Scheduled Dead # JobRun#1 # # --------------------------------- # Time=3: JobRuns are Enqueued. # # Waiting Pending Scheduled Dead # JobRun#2 JobRun#1 # JobRun#3 # # --------------------------------- # Time=4: JobRun succeeds, next job_run begins. # # Waiting Pending Scheduled Dead # JobRun#3 JobRun#2 # # --------------------------------- # Time=5: JobRun fails and is scheduled for later, next job_run begins. # # Waiting Pending Scheduled Dead # JobRun#3 t=7:JobRun#2 # # --------------------------------- # Time=6: JobRun succeeds. Nothing is executing. # # Waiting Pending Scheduled Dead # t=7:JobRun#2 # # --------------------------------- # Time=7: Scheduled job_run is due and is moved to waiting. Nothing is executing. # # Waiting Pending Scheduled Dead # JobRun#2 # # --------------------------------- # Time=8: JobRun begins executing (for the second time). # # Waiting Pending Scheduled Dead # JobRun#2 # # --------------------------------- # Time=9: JobRun finished successfully. No more job_runs present. # # Waiting Pending Scheduled Dead # # ``` # class Queue getter name, config_key property backend : Mosquito::Backend::Queue getter observer : Observability::Queue { Observability::Queue.new self } Log = ::Log.for self def initialize(@name : String) @backend = Mosquito.backend.queue name @config_key = @name end def enqueue(job_run : JobRun) : JobRun observer.enqueued(job_run) backend.enqueue job_run end def enqueue(job_run : JobRun, in interval : Time::Span) : JobRun enqueue job_run, at: interval.from_now end def enqueue(job_run : JobRun, at execute_time : Time) : JobRun observer.enqueued(job_run, at: execute_time) backend.schedule job_run, execute_time end def dequeue : JobRun? return if paused? if job_run = backend.dequeue observer.dequeued job_run job_run end end def reschedule(job_run : JobRun, execution_time) backend.finish job_run enqueue(job_run, at: execution_time) observer.rescheduled(job_run, to: execution_time) end def undequeue : JobRun? backend.undequeue end def dequeue_scheduled : Array(JobRun) backend.deschedule end def forget(job_run : JobRun) backend.finish job_run observer.forgotten job_run end def banish(job_run : JobRun) backend.finish job_run backend.terminate job_run observer.banished job_run end def size(*, include_dead : Bool = true) : Int64 backend.size(include_dead) end def ==(other : self) : Bool name == other.name end # Pause this queue. While paused, `#dequeue` returns nil and no jobs # will be dispatched. Jobs can still be enqueued and will accumulate # until the queue is resumed. # # Pass a duration to automatically resume after the given interval, # which is useful for backing off from a rate-limited external resource. def pause(for duration : Time::Span? = nil) : Nil backend.pause(duration) observer.paused(duration) end # Resume a paused queue, allowing jobs to be dequeued again. def resume : Nil backend.resume observer.resumed end def paused? : Bool backend.paused? end def flush backend.flush end end end ================================================ FILE: src/mosquito/queued_job.cr ================================================ module Mosquito abstract class QueuedJob < Job macro inherited def self.job_name "{{ @type.id }}".underscore.downcase end Mosquito::Base.register_job_mapping job_name, {{ @type.id }} PARAMETERS = [] of Nil macro param(parameter) {% verbatim do %} {% a = "multiline macro hack" if ! parameter.is_a?(TypeDeclaration) || parameter.type.nil? || parameter.type.is_a?(Generic) || parameter.type.is_a?(Union) message = <<-TEXT Mosquito::QueuedJob: Unable to build parameter serialization for `#{parameter.type}` in param declaration `#{parameter}`. Mosquito covers most of the crystal primitives for serialization out of the box[1]. More complex types either need to be serialized yourself (recommended) or implement custom serializer logic[2]. Parameter types must be specified explicitly. Make sure your parameter declarations look something like this: class LongJob < Mosquito::QueuedJob param user_email : String end Check the manual on declaring job parameters [3] if needed [1] - https://mosquito-cr.github.io/manual/index.html#primitive-serialization [2] - https://mosquito-cr.github.io/manual/serialization.html [3] - https://mosquito-cr.github.io/manual/index.html#parameters TEXT raise message end name = parameter.var value = parameter.value type = parameter.type simplified_type = type.resolve method_suffix = simplified_type.stringify.underscore.gsub(/::/,"__").id PARAMETERS << { name: name, value: value, type: type, method_suffix: method_suffix } %} @{{ name }} : {{ type }}? def {{ name }}=(value : {{simplified_type}}) : {{simplified_type}} @{{ name }} = value end def {{ name }}? : {{ simplified_type }} | Nil @{{ name }} end def {{ name }} : {{ simplified_type }} if ! (%object = {{ name }}?).nil? %object else msg = <<-MSG Expected a parameter named `{{ name }}` but found nil. The parameter may not have been provided when the job was enqueued. Should you be using `{{ name }}` instead? MSG raise msg end end {% end %} end macro finished {% verbatim do %} def initialize; end def initialize({{ PARAMETERS.map do |parameter| assignment = "@#{parameter["name"]}" assignment = assignment + " : #{parameter["type"]}" if parameter["type"] assignment = assignment + " = #{parameter["value"]}" unless parameter["value"].is_a? Nop assignment end.join(", ").id }}) end # Methods declared in here have the side effect over overwriting any overrides which may have been implemented # otherwise in the job class. In order to allow folks to override the behavior here, these methods are only # injected if none already exists. {% unless @type.methods.map(&.name).includes?(:vars_from.id) %} def vars_from(config : Hash(String, String)) {% for parameter in PARAMETERS %} @{{ parameter["name"] }} = deserialize_{{ parameter["method_suffix"] }}(config["{{ parameter["name"] }}"]) {% end %} end {% end %} {% unless @type.methods.map(&.name).includes?(:build_job_run.id) %} def build_job_run job_run = Mosquito::JobRun.new self.class.job_name {% for parameter in PARAMETERS %} job_run.config["{{ parameter["name"] }}"] = serialize_{{ parameter["method_suffix"] }}(@{{ parameter["name"] }}.not_nil!) {% end %} job_run end {% end %} {% end %} end end def enqueue : JobRun job_run = build_job_run return job_run unless before_enqueue_hook job_run job_run.store self.class.queue.enqueue job_run after_enqueue_hook job_run job_run end def enqueue(in delay_interval : Time::Span) : JobRun job_run = build_job_run return job_run unless before_enqueue_hook job_run job_run.store self.class.queue.enqueue job_run, in: delay_interval after_enqueue_hook job_run job_run end def enqueue(at execute_time : Time) : JobRun job_run = build_job_run return job_run unless before_enqueue_hook job_run job_run.store self.class.queue.enqueue job_run, at: execute_time after_enqueue_hook job_run job_run end def before_enqueue_hook(job : JobRun) : Bool # intentionally left blank, return true by default true end def after_enqueue_hook(job : JobRun) : Nil # intentionally left blank end # Fired before a job is enqueued. Allows preventing enqueue at the job level. # # class SomeJob < Mosquito::QueuedJob # before_enqueue do # # return false to prevent enqueue # end # end macro before_enqueue(&block) def before_enqueue_hook(job : Mosquito::JobRun) : Bool {% if @type.methods.map(&.name).includes?(:before_enqueue_hook.id) %} previous_def {% else %} super {% end %} {{ yield }} end end # Fired after a job is enqueued. macro after_enqueue(&block) def after_enqueue_hook(job : Mosquito::JobRun) : Nil {% if @type.methods.map(&.name).includes?(:after_enqueue_hook.id) %} previous_def {% else %} super {% end %} {{ yield }} end end end end ================================================ FILE: src/mosquito/rate_limiter.cr ================================================ module Mosquito::RateLimiter module ClassMethods # Configures rate limiting for this job. # # `limit` and `per` are used to control the run count and the window # duration. Defaults to a limit of 1 run per second. # # `increment` is used to indicate how many "hits" against a single job is # worth. Defaults to 1. # # `key` is used to combine rate limiting functions across multiple jobs. def throttle(*, limit : Int32 = 1, per : Time::Span = 1.second, increment = 1, key = self.name.underscore ) @@rate_limit_ceiling = limit @@rate_limit_interval = per @@rate_limit_key = Mosquito.backend.build_key "rate_limit", key @@rate_limit_increment = increment end # Statistics about the rate limiter, including both the configuration # parameters and the run counts. def rate_limit_stats : NamedTuple meta = metadata window_start = if window_start_ = meta["window_start"]? Time.unix window_start_.to_i else Time::UNIX_EPOCH end run_count = if run_count_ = meta["run_count"]? run_count_.to_i else 0 end { interval: @@rate_limit_interval, key: @@rate_limit_key, increment: @@rate_limit_increment, limit: @@rate_limit_ceiling, window_start: window_start, run_count: run_count } end # Provides an instance of the metadata store used to track rate limit # stats. def metadata : Metadata Metadata.new @@rate_limit_key end # Resolves the key used to index the metadata store for this test. def rate_limit_key @@rate_limit_key end end macro included extend ClassMethods @@rate_limit_ceiling = -1 @@rate_limit_interval : Time::Span = 1.second @@rate_limit_key = "" @@rate_limit_increment = 1 before do update_window_start if rate_limited? if expires = window_expires_at duration = expires - Time.utc self.class.queue.pause(for: duration) if duration > Time::Span.zero end preempt "rate limited" end end after do increment_run_count if executed? end end @rl_metadata : Metadata? # Storage hash for rate limit data. def metadata : Metadata @rl_metadata ||= self.class.metadata end # Should this job be cancelled? # If not, update the rate limit metadata. def rate_limited? : Bool return false if @@rate_limit_ceiling < 0 return true if maxed_rate_for_window? false end # Has the run count exceeded the ceiling for the current window? def maxed_rate_for_window? : Bool run_count = metadata["run_count"]?.try &.to_i run_count ||= 0 run_count >= @@rate_limit_ceiling end # Calculates the start of the rate limit window. def window_start : Time? if start_time = metadata["window_start"]?.try(&.to_i) Time.unix start_time end end # When does the current rate limit window expire? # Returns nil if the window is already expired. def window_expires_at : Time? return nil unless started_window = window_start expiration_time = started_window + @@rate_limit_interval if expiration_time < Time.utc nil else expiration_time end end # Resets the run count and logs the start of window. def update_window_start : Nil started_window = window_start || Time::UNIX_EPOCH now = Time.utc if (now - started_window) > @@rate_limit_interval metadata["window_start"] = now.to_unix.to_s metadata["run_count"] = "0" end end # Increments the run counter. def increment_run_count : Nil metadata.increment "run_count", by: increment_run_count_by end # How much the run counter should be incremented by. # Implemented as a dynamic method so that it can easily be calculated by # some other metric, eg api calls to a third party library. def increment_run_count_by : Int32 @@rate_limit_increment end end ================================================ FILE: src/mosquito/redis_backend.cr ================================================ require "redis" require "digest/sha1" module Mosquito module Scripts SCRIPTS = { :remove_matching_key => <<-LUA, if redis.call("get",KEYS[1]) == ARGV[1] then return redis.call("del",KEYS[1]) else return 0 end LUA :renew_matching_key => <<-LUA if redis.call("get",KEYS[1]) == ARGV[1] then return redis.call("expire",KEYS[1],ARGV[2]) else return 0 end LUA } @@script_sha = {} of Symbol => String def self.load(connection) SCRIPTS.each do |name, script| sha = @@script_sha[name] = connection.script_load script Log.info { "loading script : #{name} => #{sha}" } end end {% for name, script in SCRIPTS %} @@script_sha[:{{ name.id }}] = Digest::SHA1.hexdigest({{ script }}) @[AlwaysInline] def self.{{ name.id }} @@script_sha[:{{ name.id }}] end {% end %} end class RedisBackend < Mosquito::Backend LIST_OF_QUEUES_KEY = "queues" LIST_OF_OVERSEERS_KEY = "overseers" Log = ::Log.for(self) {% for name, script in Scripts::SCRIPTS %} def {{ name.id }}(*, keys = [] of String, args = [] of String, loadscripts = true) script = {{ script }} digest = Scripts.{{name.id}} redis.evalsha digest, keys: keys, args: args rescue exception : Redis::Error raise exception unless exception.message.try(&.starts_with? "NOSCRIPT") raise exception unless loadscripts Log.for("{{ name.id }}").warn { "Redis Scripts have gone missing, reloading" } Scripts.load redis {{ name.id }} keys: keys, args: args, loadscripts: false end {% end %} getter connection_string : String? getter connection : ::Redis::Client? def connection_string=(value : String) @connection_string = value @connection = ::Redis::Client.new(URI.parse(value)) Scripts.load(@connection.not_nil!) end def connection=(client : ::Redis::Client) @connection = client Scripts.load(client) end def valid_configuration? : Bool !@connection.nil? end @[AlwaysInline] def redis @connection.not_nil! end protected def _build_queue(name : String) : Queue Queue.new(self, name) end def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil set key, value end def retrieve(key : String) : Hash(String, String) result = redis.hgetall(key).as(Array).map(&.to_s) result.in_groups_of(2, "").to_h end def delete(key : String, in ttl : Int64 = 0) : Nil if ttl > 0 redis.expire key, ttl else redis.del key end end def delete(key : String, in ttl : Time::Span) : Nil delete key, ttl.to_i end def get(key : String, field : String) : String? redis.hget(key, field).as?(String) end def set(key : String, field : String, value : String) : String redis.hset key, field, value value end def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil redis.multi do |multi| non_nil_key_values = values.compact if non_nil_key_values.is_a?(Hash(String, String)) multi.hset key, non_nil_key_values end keys_for_nil_values = values.select{|_,v| v.nil?}.keys keys_for_nil_values.each do |nil_key| multi.hdel key, nil_key end end end def delete_field(key : String, field : String) : Nil redis.hdel key, field end def increment(key : String, field : String) : Int64 increment key, field, by: 1 end def increment(key : String, field : String, by value : Int32) : Int64 redis.hincrby(key, field, value).as(Int64) end def expires_in(key : String) : Int64 redis.ttl key end def list_queues : Array(String) key = build_key(LIST_OF_QUEUES_KEY) list_queues = redis.zrange(key, "0", "-1").as(Array) return [] of String if list_queues.empty? list_queues.compact_map(&.as(String)) end def register_overseer(id : String) : Nil key = build_key LIST_OF_OVERSEERS_KEY expiring_list_push key, id end def deregister_overseer(id : String) : Nil key = build_key LIST_OF_OVERSEERS_KEY redis.zrem key, id end def list_overseers : Array(String) key = build_key LIST_OF_OVERSEERS_KEY expiring_list_fetch(key, Time.utc - 1.day) end def list_active_overseers(since : Time) : Array(String) key = build_key LIST_OF_OVERSEERS_KEY redis.zrangebyscore(key, since.to_unix.to_s, "+inf").as(Array).map(&.as(String)) end # TODO: this should take the timestamp as an argument def expiring_list_push(key : String, value : String) : Nil redis.zadd key, Time.utc.to_unix.to_s, value end def expiring_list_fetch(key : String, expire_items_older_than : Time) : Array(String) redis.zremrangebyscore key, "0", expire_items_older_than.to_unix.to_s redis.zrange(key, "0", "-1").as(Array).map(&.as(String)) end # is this even a good idea? def flush : Nil redis.flushdb end def lock?(key : String, value : String, ttl : Time::Span) : Bool response = redis.set key, value, ex: ttl.to_i, nx: true response == "OK" end def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool result = renew_matching_key keys: [key], args: [value, ttl.to_i.to_s] result == 1_i64 end def unlock(key : String, value : String) : Nil remove_matching_key keys: [key], args: [value] end def publish(key : String, value : String) : Nil redis.publish key, value end def subscribe(key : String) : Channel(Backend::BroadcastMessage) stream = Channel(Backend::BroadcastMessage).new spawn do redis.psubscribe(key) do |subscription, connection| subscription.on_message do |channel, message| if stream.closed? connection.unsubscribe channel else stream.send( Backend::BroadcastMessage.new( channel: channel, message: message ) ) end end end end stream end def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil redis.lpush key, [value.to_s] redis.ltrim key, 0, window_size - 1 end def average(key : String) : Int32 stats = redis.lrange key, 0, -1 return 0_i32 if stats.empty? sum = stats.sum(0_i64) { |s| s.as(String).to_i64 } (sum // stats.size).to_i32 end class Queue < Backend::Queue private getter redis_backend : RedisBackend def initialize(backend : RedisBackend, name : String) super(backend, name) @redis_backend = backend end private def redis redis_backend.redis end {% for q in QUEUES %} private def {{q.id}}_q backend.build_key {{q}}, name end {% end %} def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun redis.pipeline do |pipe| pipe.zadd scheduled_q, scheduled_time.to_unix_ms.to_s, job_run.id pipe.zadd backend.build_key(LIST_OF_QUEUES_KEY), Time.utc.to_unix.to_s, name end job_run end def deschedule : Array(JobRun) time = Time.utc overdue_job_runs = redis.zrangebyscore(scheduled_q, "0", time.to_unix_ms.to_s).as(Array) return [] of JobRun if overdue_job_runs.empty? overdue_job_runs.compact_map do |job_run_id| redis.zrem scheduled_q, job_run_id.to_s JobRun.retrieve job_run_id.as(String) end end def enqueue(job_run : JobRun) : JobRun redis.pipeline do |pipe| pipe.lpush waiting_q, job_run.id pipe.zadd backend.build_key(LIST_OF_QUEUES_KEY), Time.utc.to_unix.to_s, name end job_run end def dequeue : JobRun? if id = redis.lmove waiting_q, pending_q, :right, :left JobRun.retrieve id.to_s end end def undequeue : JobRun? if id = redis.rpop pending_q redis.rpush waiting_q, id.to_s JobRun.retrieve id.to_s end end def finish(job_run : JobRun) redis.lrem pending_q, 0, job_run.id end def terminate(job_run : JobRun) redis.lpush dead_q, job_run.id end def flush : Nil redis.del( waiting_q, pending_q, scheduled_q, dead_q ) end def size(include_dead = true) : Int64 queues = [waiting_q, pending_q] queues << dead_q if include_dead queue_size = queues .map { |key| redis.llen(key).as(Int64) } .reduce { |sum, i| sum + i } scheduled_size = redis.zcount scheduled_q, "0", "+inf" queue_size + scheduled_size.as(Int64) end {% for name in ["waiting", "scheduled", "pending", "dead"] %} def list_{{name.id}} : Array(String) key = {{name.id}}_q type = redis.type key if type == "list" redis.lrange(key, "0", "-1").as(Array(Redis::Value)).map(&.as(String)) elsif type == "zset" redis.zrange(key, "0", "-1").as(Array(Redis::Value)).map(&.as(String)) elsif type == "none" [] of String else raise "don't know how to dump a #{type} for {{name.id}}" end end def {{name.id}}_size : Int64 key = {{name.id}}_q type = redis.type key case type when "list" redis.llen(key).as(Int64) when "zset" redis.zcount(key, "0", "+inf").as(Int64) when "none" 0_i64 else raise "don't know how to {{name.id}}_size (redis type is a #{type})." end end {% end %} def scheduled_job_run_time(job_run : JobRun) : Time? if score = redis.zscore(scheduled_q, job_run.id).as?(String) Time.unix_ms(score.to_i64) end end private def pause_key backend.build_key "queue", name, "pause" end def pause(duration : Time::Span? = nil) : Nil if duration ms = {duration.total_milliseconds.to_i64, 1_i64}.max redis.set pause_key, "1", px: ms else redis.set pause_key, "1" end end def resume : Nil redis.del pause_key end def paused? : Bool redis.exists(pause_key) == 1 end end end end ================================================ FILE: src/mosquito/resource_gate.cr ================================================ module Mosquito # A ResourceGate controls whether work should be dequeued based on # external resource availability (GPU utilization, CPU load, network # bandwidth, etc.). # # Subclass `ResourceGate` and implement `#check` to test the resource. # The result is cached for `sample_ttl` so expensive checks (shelling # out to nvidia-smi, reading /sys, etc.) aren't repeated on every # dequeue spin. # # ## Example # # ```crystal # class GpuUtilizationGate < Mosquito::ResourceGate # def initialize(@threshold : Float64 = 85.0) # super(sample_ttl: 2.seconds) # end # # protected def check : Bool # current_gpu_utilization < @threshold # end # end # ``` abstract class ResourceGate getter sample_ttl : Time::Span @last_result : Bool = true @last_check_at : Time = Time::UNIX_EPOCH def initialize(@sample_ttl : Time::Span = 2.seconds) end # Returns the cached result of `#check`, re-evaluating only after # `sample_ttl` has elapsed since the last check. def allow? : Bool now = Time.utc if now - @last_check_at >= @sample_ttl @last_result = check @last_check_at = now end @last_result end # Subclasses implement the actual resource check. Called at most # once per `sample_ttl` interval. protected abstract def check : Bool # Called after a job finishes, in case the gate needs to update # internal bookkeeping (e.g. decrement an in-flight counter). def released(job_run : JobRun, queue : Queue) : Nil end end end ================================================ FILE: src/mosquito/runnable.cr ================================================ require "wait_group" module Mosquito # Runnable implements a general purpose spawn/loop which carries a state # enum. # # ## Managing a Runnable # # The primary purpose of Runnable is to cleanly abstract the details of # spawning a thread, running a loop, and shutting down when asked. # # A service which manages a Runnable might look like this: # # ```crystal # runnable = MyRunnable.new # # # This will spawn and return immediately. # runnable.start # # puts runnable.state # => State::Working # # # Some time later... # wg = WaitGroup.new(1) # runnable.stop(wg) # wg.wait # ``` # # # ## Implementing a Runnable # # A runnable implementation needs to implement only two methods: #each_run # and #runnable_name. In addition, pre_run and post_run are available for # setup and teardown. # # Runnable state is managed automatically through startup and shutdown, but # within each_run it can be manually altered with `#state=`. # # ### Example # # ```crystal # class MyRunnable # include Mosquito::Runnable # # # Optional # def pre_run # puts "my runnable is starting" # end # # def each_run # puts "my runnable is running" # end # # # Optional # def post_run # puts "my runnable has stopped" # end # # def runnable_name # "MyRunnable" # end # end # ``` # # Implementation details about what work should be done in the spawned fiber # are placed in #each_run. # module Runnable enum State Starting Working Idle Stopping Finished Crashed def running? starting? || working? || idle? end # ie, not starting def started? working? || idle? end end # Tracks the state of this runnable. # # Initially it will be `State::Starting`. After `#run` is called it will # be `State::Working`. # # When `#stop` is called it will be `State::Stopping`. After `#run` finishes, # it will be `State::Finished`. # # It is not necessary to set this manually, but it's available to an implementation # if needed. See `Mosquito::Runners::Executor#state=` (source code) for an example. getter state : State = State::Starting # After #run has been called this holds a reference to the Fiber # which is used to check that the fiber is still running. getter fiber : Fiber? # Signaled when the run loop exits (finished or crashed). private getter done = Channel(Nil).new getter my_name : String { "#{self.class.name.underscore.gsub("::", ".")}.#{self.object_id}" } private getter log : ::Log { Log.for runnable_name } private def state=(new_state : State) # If the state is currently stopping, don't go back to idle. if @state.stopping? && new_state.idle? log.trace { "Ignoring state change to #{new_state} because state=stopping." } return end @state = new_state end def dead? : Bool if fiber_ = fiber fiber_.dead? else false end end # Start the Runnable, and capture the fiber to a property. # # The spawned fiber will not return as long as state.running?. # # State can be altered internally or externally to cause it to exit # but the cleanest way to do that is to call #stop. # # By default, the run loop is spawned in a new fiber and control # returns immediately. Pass `spawn: false` to run the loop directly # in the current fiber (blocking until finished). def run(*, spawn spawn_fiber = true) if spawn_fiber @fiber = spawn(name: runnable_name) do run_loop end else run_loop end end private def run_loop log.info { "starting" } self.state = State::Working pre_run while state.running? each_run end post_run self.state = State::Finished log.info { "stopped" } rescue any_exception self.state = State::Crashed log.error { "crashed with #{any_exception.inspect}" } ensure done.close end # Request that the next time the run loop cycles it should exit instead. # The runnable doesn't exit immediately so #stop spawns a fiber to # monitor the state transition. # # Returns the `WaitGroup`, which will be decremented when the # runnable has finished. This enables `runnable.stop.wait`. # # If a `WaitGroup` is provided, it will be decremented when the # runnable has finished. This is useful when stopping multiple # runnables and waiting for all of them to finish. # # Calling stop on a runnable that has already finished or crashed is a # no-op (the wait_group is signaled immediately). def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup unless state.running? || state.stopping? wait_group.done return wait_group end self.state = State::Stopping if state.running? spawn do done.receive? wait_group.done end wait_group end # Used to print a pretty name for logging. abstract def runnable_name : String # Implementation of what this Runnable should do on each cycle. # # Take care that @state is #running? at the end of the method # unless it is finished and should exit. abstract def each_run : Nil # Available to hook a one time setup before the run loop. def pre_run : Nil ; end # Available to hook any teardown logic after the run loop. def post_run : Nil ; end end end ================================================ FILE: src/mosquito/runner.cr ================================================ require "colorize" module Mosquito # This singleton class serves as a shorthand for starting and managing an Overseer. # # A minimal usage of Mosquito::Runner is: # # ``` # require "mosquito" # # # When the process receives sigint, it'll notify the overseer to shut down gracefully. # trap("INT") do # Mosquito::Runner.stop # end # # # Starts the overseer, and holds the thread captive. # Mosquito::Runner.start # ``` # # If for some reason you want to manage an overseer or group of overseers yourself, Mosquito::Runner can be omitted entirely: # # ```crystal # require "mosquito" # # mosquito = Mosquito::Overseer.new # # # Spawns a mosquito managed fiber and returns immediately # mosquito.run # # trap "INT" do # wg = WaitGroup.new(1) # mosquito.stop(wg) # wg.wait # end # ``` class Runner Log = ::Log.for self # Start the mosquito runner. # # If spin = true (default) the function will not return until the runner is # shut down. Otherwise it will return immediately. # def self.start(spin = true) Log.notice { "Mosquito is buzzing..." } if spin instance.run(spawn: false) else instance.run end end # :nodoc: def self.keep_running : Bool instance.state.starting? || instance.state.running? || instance.state.stopping? end # Request the mosquito runner stop. The runner will not abort the current job # but it will not start any new jobs. # # See `Mosquito::Runnable#stop`. def self.stop(wait = false) return unless keep_running Log.notice { "Mosquito is shutting down..." } if wait instance.stop.wait else instance.stop end end def self.overseer instance.overseer end private def self.instance : self @@instance ||= new end # :nodoc: delegate run, stop, state, to: @overseer # :nodoc: delegate running?, to: @overseer.state # :nodoc: getter overseer : Runners::Overseer # :nodoc: def initialize Mosquito.configuration.validate @overseer = Runners::Overseer.new end end end ================================================ FILE: src/mosquito/runners/coordinator.cr ================================================ module Mosquito::Runners # primer? loader? _scheduler_ class Coordinator Log = ::Log.for self LockTTL = 30.seconds getter lock_key : String getter instance_id : String getter queue_list : QueueList getter? is_leader : Bool = false def initialize(@queue_list) @lock_key = Mosquito.backend.build_key :coordinator, :leadership_lock @instance_id = Random::Secure.hex(8) end def runnable_name : String "coordinator.#{object_id}" end def post_run : Nil release_leadership_lock end def schedule : Nil only_if_coordinator do enqueue_periodic_jobs enqueue_delayed_jobs end end def only_if_coordinator : Nil unless Mosquito.configuration.use_distributed_lock yield return end maintain_leadership if is_leader? yield end end # Releases the coordinator lease. Call during shutdown so another # instance can take over immediately instead of waiting for the # TTL to expire. def release_leadership_lock : Nil return unless is_leader? Mosquito.backend.unlock lock_key, instance_id @is_leader = false Log.info { "Coordinator lease released" } end def enqueue_periodic_jobs Base.scheduled_job_runs.each do |scheduled_job_run| enqueued = scheduled_job_run.try_to_execute end end def enqueue_delayed_jobs queue_list.each do |q| overdue_jobs = q.dequeue_scheduled next unless overdue_jobs.any? Log.for("enqueue_delayed_jobs").info { "#{overdue_jobs.size} delayed jobs ready in #{q.name}" } overdue_jobs.each do |job_run| q.enqueue job_run end end end private def maintain_leadership : Nil if is_leader? unless Mosquito.backend.renew_lock? lock_key, instance_id, LockTTL Log.info { "Lost coordinator lease" } @is_leader = false try_acquire end else try_acquire end end private def try_acquire : Nil if Mosquito.backend.lock? lock_key, instance_id, LockTTL Log.info { "Coordinator lease acquired" } @is_leader = true end end end end ================================================ FILE: src/mosquito/runners/executor.cr ================================================ require "./run_at_most" require "../runnable" module Mosquito::Runners # The executor is the center of work in Mosquito, and it's is the demarcation # point between Mosquito framework and application code. Above the Executor # is entirely Mosquito, and below it is application code. # # An Executor is responsible for hydrating Job classes with deserialized # parameters and calling `Mosquito::Job#run` on them. It measures the time it # takes to run a job and provides detailed log messages about the current # status. # # An executor is a `Mosquito::Runnable` and should be interacted with according to # the Runnable API. # # To build an executor, provide a job input channel and an idle bell channel. These # channels can be shared between all available executors. # # The executor will ring the idle bell when it is ready to accept work and then wait # for work to show up on the job pipeline. After the job is finished it will ring the # bell again and wait for more work. class Executor include RunAtMost include Runnable # How long a job config is persisted after success property successful_job_ttl : Int32 { Mosquito.configuration.successful_job_ttl } # How long a job config is persisted after failure property failed_job_ttl : Int32 { Mosquito.configuration.failed_job_ttl } # Where work is received from the overseer. getter job_pipeline : Channel(WorkUnit) getter! work_unit : WorkUnit # Used to notify the overseer when this executor is idle. # Sends the {JobRun, Queue} tuple that was just finished, or nil # when the executor first starts up. getter finished_bell : Channel(WorkUnit?) getter overseer : Overseer getter observer : Observability::Executor { Observability::Executor.new self } getter? decommissioned : Bool = false @stop_channel = Channel(Nil).new(1) # Marks this executor for graceful shutdown. It will stop after # completing its current job (if any). def decommission! return if @decommissioned @decommissioned = true @stop_channel.send(nil) end private def job_run : JobRun work_unit.job_run end private def queue : Queue work_unit.queue end private def state=(state : State) # Send a message to the overseer that this executor is idle, # including the job that was just finished (if any). if state == State::Idle spawn { finished_bell.send @work_unit } end super end def initialize(@overseer : Overseer) @job_pipeline = overseer.work_handout @finished_bell = overseer.finished_notifier end # :nodoc: def runnable_name : String "executor.#{object_id}" end # :nodoc: def pre_run : Nil # Overseer won't try to dequeue and send any jobs unless it # knows that an executor is idle, so the first thing to do # is mark this executor as idle. See #state=. self.state = State::Idle end def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup decommission! super end # :nodoc: def each_run : Nil if @decommissioned self.state = State::Stopping return end dequeue : WorkUnit? = nil begin select when dequeue = job_pipeline.receive when @stop_channel.receive self.state = State::Stopping return end rescue Channel::ClosedError return end return unless dequeue self.state = State::Working @work_unit = dequeue log.trace { "Dequeued #{job_run} from #{queue.name}" } begin execute rescue e log.error { "Crashed executing #{job_run}: #{e.inspect}" } begin job_run.retry_or_banish queue rescue queue.banish job_run end end log.trace { "Finished #{job_run} from #{queue.name}" } if @decommissioned self.state = State::Stopping return end self.state = State::Idle observer.heartbeat! end # Runs a job from a Queue. # # Execution time is measured and logged, and the job is either forgotten # or, if it fails, rescheduled. def execute observer.execute job_run, queue do job_run.run end if job_run.succeeded? queue.forget job_run job_run.delete in: successful_job_ttl elsif job_run.preempted? queue.forget job_run queue.enqueue job_run else if job_run.rescheduleable? next_execution = Time.utc + job_run.reschedule_interval queue.reschedule job_run, next_execution else queue.banish job_run job_run.delete in: failed_job_ttl end end end end end ================================================ FILE: src/mosquito/runners/idle_wait.cr ================================================ module Mosquito::Runners module IdleWait def with_idle_wait(idle_wait : Time::Span) delta = Time.measure do yield end if delta < idle_wait # Fiber.timeout(idle_wait - delta) sleep(idle_wait - delta) end end end end ================================================ FILE: src/mosquito/runners/overseer.cr ================================================ require "./idle_wait" require "./queue_list" require "./run_at_most" require "../runnable" module Mosquito::Runners # The Overseer is responsible for managing: # - a `Coordinator` # - an `Executor` # - the `QueueList` # - any idle state as configured # # An overseer manages the loop that each thread or process runs. class Overseer include IdleWait include RunAtMost include Runnable getter observer : Observability::Overseer { Observability::Overseer.new(self) } getter queue_list : QueueList getter executors getter coordinator getter dequeue_adapter : Mosquito::DequeueAdapter # The channel where job runs which have been dequeued are sent to executors. getter work_handout # When an executor transitions to idle it will send the finished # {JobRun, Queue} tuple here (or nil on first idle). The Overseer # uses this as a signal to check the queues for more work. getter finished_notifier # The number of executors to start. getter executor_count : Int32 def executor_count=(count : Int32) @executor_count = Math.max(count, 1) end getter idle_wait : Time::Span def initialize @executor_count = Mosquito.configuration.executor_count @idle_wait = Mosquito.configuration.idle_wait @finished_notifier = Channel(WorkUnit?).new @queue_list = QueueList.new @queue_list.resource_gates = Mosquito.configuration.resource_gates @coordinator = Coordinator.new queue_list @dequeue_adapter = Mosquito.configuration.dequeue_adapter @executors = [] of Executor @work_handout = Channel(WorkUnit).new executor_count.times do @executors << build_executor end observer.update_executor_list executors end def build_executor : Executor Executor.new(overseer: self).tap do |executor| observer.executor_created executor end end def runnable_name : String "overseer" end def sleep log.trace { "Going to sleep now for #{idle_wait}" } sleep idle_wait end # Starts all the subprocesses. def pre_run : Nil observer.starting @queue_list.run @executors.each(&.run) end def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup observer.shutting_down if state.running? super end # Notify all subprocesses to stop, and wait until they do. # After executors finish, any jobs left in the pending queue are # moved back to waiting so another worker can pick them up. def post_run : Nil observer.stopping coordinator.post_run child_fiber_shutdown = WaitGroup.new(executors.size + 1) executors.each { |e| e.stop(child_fiber_shutdown) } @queue_list.stop(child_fiber_shutdown) work_handout.close child_fiber_shutdown.wait observer.stopped end # The goal for the overseer is to: # - Ensure that the coordinator gets run frequently to schedule delayed/periodic jobs. # - Wait for an executor to be idle, and dequeue work if possible. # - Monitor the executor pool for unexpected termination and respawn. def each_run : Nil # When shutting down, stop dequeuing new work immediately. return if state.stopping? coordinator.schedule # I cannot imagine a situation where this happens in the normal flow of # events, but if it did it would be a mess. If something crashes hard # enough that one of these channels closes the whole thing is going to # come crashing down and we should just quit now. if work_handout.closed? || finished_notifier.closed? observer.channels_closed stop return end # If the queue list hasn't run at least once, it won't have any queues to # search for so we'll just defer until it's available. unless queue_list.state.started? observer.waiting_for_queue_list return end log.trace { "Waiting for an idle executor" } all_executors_busy = true # This feature is under documented in the crystal manual. # This will attempt to receive from a the idle notifier, but only # wait for up to idle_wait seconds. # # The interrupt is necessary to remind the coordinator to schedule # jobs. select when finished_job = @finished_notifier.receive log.trace { "Found an idle executor" } all_executors_busy = false if finished_job dequeue_adapter.finished_with(finished_job.job_run, finished_job.queue) queue_list.notify_released(finished_job.job_run, finished_job.queue) end when timeout(idle_wait) log.trace { "Idled for #{idle_wait.total_seconds}s" } end case when state.stopping? # If none of the executors is idle, don't dequeue anything or it'll get lost. when all_executors_busy log.trace { "No idle executors" } # We know that an executor is idle and will take the work, it's safe to dequeue. when next_job_run = dequeue_job? log.trace { "Dequeued job: #{next_job_run.job_run.id} #{next_job_run.queue.name}" } work_handout.send next_job_run # An executor is idle, but dequeue returned nil. else log.trace { "No job to dequeue" } sleep # The idle notification has been consumed, and it needs to be # re-sent so that the next loop can still find the idle executor. spawn { @finished_notifier.send nil } end maybe_apply_remote_executor_count adjust_executor_pool run_at_most every: Mosquito.configuration.heartbeat_interval, label: :heartbeat do observer.heartbeat end run_at_most every: Mosquito.configuration.heartbeat_interval * 3, label: :pending_cleanup do cleanup_orphaned_pending_jobs end end # Delegates job dequeue to the configured `DequeueAdapter`. # # The adapter can be swapped via `Mosquito.configuration.dequeue_adapter` # to implement custom strategies (priority, round-robin, rate limiting, etc). def dequeue_job? : WorkUnit? if result = dequeue_adapter.dequeue(queue_list) result.job_run.claimed_by self end result end # When a job fails any exceptions are caught and logged. If a job causes something more # catastrophic we can try to recover by spawning a new executor. # # This happens, for example, when a new version of a worker is deployed and work is still # in the queue that references job classes that no longer exist. # # When a dead executor is found, any job it was working on has its # failure counter incremented and follows the standard retry logic. def adjust_executor_pool : Nil # Remove dead/crashed executors and recover their jobs. executors.select {|executor| executor.dead? || executor.state.crashed? } .each do |dead_executor| observer.executor_died dead_executor recover_job_from dead_executor executors.delete dead_executor end # Scale up: spawn new executors to reach the target count. (executor_count - executors.size).times do executors << build_executor.tap(&.run) end # Scale down: decommission excess executors and remove them from the pool. # They will finish their current job (if any) and then stop. while executors.size > executor_count executors.pop.decommission! end observer.update_executor_list executors if queue_list.dead? observer.queue_list_died stop end end # Scans pending queues for jobs owned by overseers that are no longer # alive. Each orphaned job has its failure counter incremented and # follows the standard retry logic. # # An overseer is considered alive if it has registered a heartbeat # within the configured dead_overseer_threshold. Jobs with no overseer_id (pre- # dating this feature) are claimed by this overseer so they become # recoverable when this overseer later dies. # :nodoc: def cleanup_orphaned_pending_jobs : Nil live_overseers = Mosquito.backend.list_active_overseers( since: Time.utc - Mosquito.configuration.dead_overseer_threshold ).to_set queue_names = Mosquito.backend.list_queues return if queue_names.empty? total = 0 queue_names.each do |name| q = Queue.new(name) q.backend.list_pending.each do |job_run_id| job_run = JobRun.retrieve(job_run_id) unless job_run # Job config is gone (expired/deleted), just clean up the # dangling reference in the pending queue. q.backend.finish JobRun.new("_cleanup", id: job_run_id) total += 1 next end # Jobs without an overseer_id predate this feature. Claim them # so a future cleanup cycle can detect if this overseer dies. unless oid = job_run.overseer_id job_run.claimed_by self next end next if live_overseers.includes?(oid) observer.recovered_orphaned_job job_run, oid begin job_run.retry_or_banish q rescue e : KeyError log.warn { "Skipping orphaned job #{job_run_id}: #{e.message}" } q.banish job_run end total += 1 end end if total > 0 observer.orphaned_jobs_recovered total end end # Polls the backend for a remote executor count override and applies # it when present. Checks at most once per heartbeat interval. # The resolved value follows the precedence: per-overseer → global → current. private def maybe_apply_remote_executor_count : Nil run_at_most every: Mosquito.configuration.heartbeat_interval, label: :remote_executor_count do overseer_id = Mosquito.configuration.overseer_id if remote_count = Api::ExecutorConfig.resolve(overseer_id) clamped = Math.max(remote_count, 1) if clamped != executor_count log.info { "Remote executor count changed: #{executor_count} → #{clamped}" } self.executor_count = clamped end end rescue ex log.warn { "Failed to fetch remote executor count: #{ex.message}" } end end # If a dead executor was working on a job, increment its failure # counter and follow the standard retry logic. private def recover_job_from(dead_executor : Executor) : Nil return unless work_unit = dead_executor.work_unit? observer.recovered_job_from_executor work_unit.job_run, dead_executor dequeue_adapter.finished_with(work_unit.job_run, work_unit.queue) work_unit.job_run.retry_or_banish work_unit.queue end end end ================================================ FILE: src/mosquito/runners/queue_list.cr ================================================ require "./run_at_most" require "../runnable" require "./idle_wait" require "../resource_gate" module Mosquito::Runners # QueueList handles searching the redis keyspace for named queues. class QueueList include RunAtMost include Runnable include IdleWait getter observer : Observability::QueueList { Observability::QueueList.new(self) } # Maps queue names to resource gates. Queues not present in this # mapping are always eligible for dequeuing. property resource_gates : Hash(String, ResourceGate) = {} of String => ResourceGate def initialize @discovered_queues = [] of Queue end # Returns the queues eligible for dequeuing: discovered queues # filtered by any configured resource gates. def queues : Array(Queue) return @discovered_queues if resource_gates.empty? @discovered_queues.select do |q| gate = resource_gates[q.name]? gate.nil? || gate.allow? end end def runnable_name : String "queue-list" end # Notifies the resource gate for the given queue that a job has # finished, allowing it to update internal bookkeeping. def notify_released(job_run : JobRun, queue : Queue) : Nil if gate = resource_gates[queue.name]? gate.released(job_run, queue) end end delegate each, to: queues def each_run : Nil # This idle wait should be at most 1 second. Longer can cause periodic jobs # which are specified at the second-level to be executed aperiodically. # Shorter will generate excess noise in the redis connection. with_idle_wait(1.seconds) do @state = State::Working candidate_queues = Mosquito.backend.list_queues.map { |name| Queue.new name } new_queue_list = filter_queues candidate_queues paused, new_queue_list = new_queue_list.partition(&.paused?) observer.checked_for_paused_queues paused log.notice { queues_which_were_expected_but_not_found = @discovered_queues - new_queue_list queues_which_have_never_been_seen = new_queue_list - @discovered_queues if queues_which_have_never_been_seen.size > 0 "found #{queues_which_have_never_been_seen.size} new queues: #{queues_which_have_never_been_seen.map(&.name).join(", ")}" end } @discovered_queues = new_queue_list @state = State::Idle end end private def filter_queues(present_queues : Array(Mosquito::Queue)) permitted_queues = Mosquito.configuration.run_from return present_queues if permitted_queues.empty? filtered_queues = present_queues.select do |queue| permitted_queues.includes? queue.name end log.for("filter_queues").notice { if filtered_queues.empty? filtered_out_queues = present_queues - filtered_queues if filtered_out_queues.size > 0 "No watchable queues found. Ignored #{filtered_out_queues.size} queues not configured to be watched: #{filtered_out_queues.map(&.name).join(", ")}" end end } filtered_queues end end end ================================================ FILE: src/mosquito/runners/run_at_most.cr ================================================ module Mosquito::Runners module RunAtMost getter execution_timestamps = {} of Symbol => Time::Instant private def run_at_most(*, every interval, label name, &block) now = Time.instant last_execution = @execution_timestamps[name]? if last_execution.nil? || (now - last_execution) >= interval @execution_timestamps[name] = now yield now end end end end ================================================ FILE: src/mosquito/scheduled_job.cr ================================================ module Mosquito abstract class ScheduledJob < Job def initialize end abstract def build_job_run macro inherited Mosquito::Base.register_job_mapping job_name, {{ @type.id }} def build_job_run job_run = Mosquito::JobRun.new(job_name) end macro run_at(time) Mosquito::Base.register_job \{{ @type.id }}, to_run_at: time end end def rescheduleable? false end end end ================================================ FILE: src/mosquito/serializers/primitives.cr ================================================ module Mosquito::Serializers::Primitives def serialize_string(str : String) : String str end def deserialize_string(raw : String) : String raw end def serialize_bool(value : Bool) : String value.to_s end def deserialize_bool(raw : String) : Bool raw == "true" end def serialize_symbol(sym : Symbol) : Nil raise "Symbols cannot be deserialized. Stringify your symbol first to pass it as a mosquito job parameter." end def serialize_char(char : Char) : String char.to_s end def deserialize_char(raw : String) : Char raw[0] end def serialize_uuid(uuid : UUID) : String uuid.to_s end def deserialize_uuid(raw : String) : UUID UUID.new raw end {% begin %} {% primitives = [ { Int8, :to_i8 }, { Int16, :to_i16 }, { Int32, :to_i32 }, { Int64, :to_i64 }, { Int128, :to_i128 }, { UInt8, :to_u8 }, { UInt16, :to_u16 }, { UInt32, :to_u32 }, { UInt64, :to_u64 }, { UInt128, :to_u128 }, { Float32, :to_f32 }, { Float64, :to_f64 } ] %} {% for mapping in primitives %} {% type = mapping.first method_suffix = type.stringify.underscore method = mapping.last %} def serialize_{{ method_suffix.id }}(value) : String value.to_s end def deserialize_{{ method_suffix.id }}(raw : String) : {{ type.id }}? if raw raw.{{ method.id }} end end {% end %} {% end %} end ================================================ FILE: src/mosquito/test_backend.cr ================================================ module Mosquito # An in-memory noop backend desigend to be used in application testing. # # The test mode backend simply makes a copy of job_runs at enqueue time and holds them in a class getter array. # # Job run id, config (aka parameters), and runtime class are kept in memory, and a truncate utility function is provided. # # Activate test mode configure the test backend like this: # # ``` # Mosquito.configure do |settings| # settings.backend = Mosquito::TestBackend.new # end # ``` # # Then in your tests: # # ``` # describe "testing" do # it "enqueues the job" do # # build and enqueue a job # job_run = EchoJob.new(text: "hello world").enqueue # # # assert that the job was enqueued # lastest_enqueued_job = Mosquito::TestBackend.enqueued_jobs.last # # # check the job config # assert_equal "hello world", latest_enqueued_job.config["text"] # # # check the job_id matches # assert_equal job_run.id, latest_enqueued_job.id # # # optionally, truncate the history # Mosquito::TestBackend.flush_enqueued_jobs! # end # end # ``` class TestBackend < Mosquito::Backend def connection nil end getter connection_string : String? def connection_string=(value : String) @connection_string = value end def valid_configuration? : Bool true end def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil end def retrieve(key : String) : Hash(String, String) {} of String => String end def list_queues : Array(String) [] of String end def list_overseers : Array(String) [] of String end def list_active_overseers(since : Time) : Array(String) [] of String end def register_overseer(id : String) : Nil end def deregister_overseer(id : String) : Nil end def delete(key : String, in ttl : Int64 = 0) : Nil end def delete(key : String, in ttl : Time::Span) : Nil end def expires_in(key : String) : Int64 0_i64 end def get(key : String, field : String) : String? end def set(key : String, field : String, value : String) : String "" end def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil end def delete_field(key : String, field : String) : Nil end def increment(key : String, field : String) : Int64 0_i64 end def increment(key : String, field : String, by value : Int32) : Int64 0_i64 end def flush : Nil; end def lock?(key : String, value : String, ttl : Time::Span) : Bool false end def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool false end def unlock(key : String, value : String) : Nil end def publish(key : String, value : String) : Nil end def subscribe(key : String) : Channel(BroadcastMessage) Channel(BroadcastMessage).new end def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil end def average(key : String) : Int32 0_i32 end protected def _build_queue(name : String) : Queue Queue.new(self, name) end struct EnqueuedJob getter id : String getter klass : Mosquito::Job.class getter config : Hash(String, String) def self.from(job_run : JobRun) job_class = Mosquito::Base.job_for_type(job_run.type) new( job_run.id, job_class, job_run.config ) end def initialize(@id, @klass, @config) end end class_property enqueued_jobs = [] of EnqueuedJob def self.flush_enqueued_jobs! @@enqueued_jobs = [] of EnqueuedJob end class Queue < Backend::Queue def enqueue(job_run : JobRun) : JobRun TestBackend.enqueued_jobs << EnqueuedJob.from(job_run) job_run end def dequeue : JobRun? raise "Mosquito: attempted to dequeue a job from the testing backend." end def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun job_run end def deschedule : Array(JobRun) raise "Mosquito: attempted to deschedule a job from the testing backend." end def undequeue : JobRun? raise "Mosquito: attempted to undequeue a job from the testing backend." end def finish(job_run : JobRun) end def terminate(job_run : JobRun) end def flush : Nil end def size(include_dead : Bool = true) : Int64 0_i64 end {% for name in ["waiting", "scheduled", "pending", "dead"] %} def list_{{name.id}} : Array(String) [] of String end def {{name.id}}_size : Int64 0_i64 end {% end %} def scheduled_job_run_time(job_run : JobRun) : Time? end @@paused_queues = Set(String).new def self.flush_paused_queues! @@paused_queues.clear end def pause(duration : Time::Span? = nil) : Nil @@paused_queues.add name end def resume : Nil @@paused_queues.delete name end def paused? : Bool @@paused_queues.includes? name end end end end ================================================ FILE: src/mosquito/unique_job.cr ================================================ module Mosquito::UniqueJob module ClassMethods # Configures job uniqueness for this job. # # `duration` controls how long the uniqueness lock is held. After this # period expires, the same job can be enqueued again. # # `key` is an array of parameter names (as strings) used to compute the # uniqueness key. When omitted, all parameters are used by default. # # ``` # class SendEmailJob < Mosquito::QueuedJob # include Mosquito::UniqueJob # # unique_for 1.hour # # param user_id : Int64 # param email_type : String # # def perform # # ... # end # end # ``` # # With a key filter: # # ``` # class SendEmailJob < Mosquito::QueuedJob # include Mosquito::UniqueJob # # unique_for 1.hour, key: [:user_id, :email_type] # # param user_id : Int64 # param email_type : String # param metadata : String # # def perform # # ... # end # end # ``` def unique_for(duration : Time::Span) @@unique_duration = duration end end macro included extend ClassMethods @@unique_duration : Time::Span = 0.seconds @@unique_key_fields : Array(String)? = nil # Configures job uniqueness with an optional key filter. # # When `key` is provided, only the specified parameter names are used # to build the uniqueness fingerprint. When omitted, all parameters # are included. macro unique_for(duration, key = nil) @@unique_duration = \{{ duration }} \{% if key %} @@unique_key_fields = [ \{% for k in key %} \{{ k.id.stringify }}, \{% end %} ] \{% else %} @@unique_key_fields = nil \{% end %} end before_enqueue do if @@unique_duration.total_seconds > 0 key = uniqueness_key(job) lock_value = job.id acquired = Mosquito.backend.lock?(key, lock_value, @@unique_duration) unless acquired Log.info { "Duplicate job suppressed: #{self.class.name} (key: #{key})" } false else true end else true end end end # Builds the uniqueness key from the job name and the job_run's config. # # When `@@unique_key_fields` is set, only those parameter names are # included in the key. Otherwise all config entries are used. def uniqueness_key(job_run : Mosquito::JobRun) : String parts = [] of String parts << self.class.job_name key_fields = @@unique_key_fields job_run.config.keys.sort.each do |param_name| if key_fields.nil? || key_fields.includes?(param_name) parts << "#{param_name}=#{job_run.config[param_name]}" end end fingerprint = parts.join(":") Mosquito.backend.build_key "unique_job", fingerprint end # Returns the uniqueness lock duration configured for this job class. def unique_duration : Time::Span @@unique_duration end end ================================================ FILE: src/mosquito/version.cr ================================================ module Mosquito VERSION = "2.0.0" end ================================================ FILE: src/mosquito.cr ================================================ require "./mosquito/runners/run_at_most" require "./mosquito/api" require "./mosquito/**" module Mosquito Log = ::Log.for self def self.backend configuration.backend end end ================================================ FILE: src/ye_olde_redis.cr ================================================ # Monkeypatch to revert to the old Redis behavior, for Redis servers pre 6.2 which don't support # https://redis.io/docs/latest/commands/lmove/ module Mosquito class RedisBackend < Mosquito::Backend class Queue < Backend::Queue def dequeue : JobRun? if id = redis.rpoplpush waiting_q, pending_q JobRun.retrieve id.to_s end end end end end