Repository: robacarp/mosquito
Branch: master
Commit: 6ededc5e7a34
Files: 134
Total size: 306.2 KB
Directory structure:
gitextract_f3v7pvaw/
├── .claude/
│ ├── hooks/
│ │ └── session-start.sh
│ ├── settings.json
│ └── todo.md
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ └── bug.md
│ └── workflows/
│ ├── ci.yml
│ └── docs.yml
├── .gitignore
├── .tool-versions
├── :w
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── benchmark/
│ ├── benchmark.cr
│ └── jobs/
│ └── emit_message_job.cr
├── demo/
│ ├── jobs/
│ │ ├── custom_serializers.cr
│ │ ├── periodically_puts.cr
│ │ ├── queued_job.cr
│ │ ├── rate_limited_job.cr
│ │ └── unique_job.cr
│ └── run.cr
├── scripts/
│ ├── increment_version
│ ├── lib/
│ │ └── increment_version.sh
│ └── version_tag
├── shard.yml
├── spec/
│ ├── helpers/
│ │ ├── bare_base_class.cr
│ │ ├── configuration_helper.cr
│ │ ├── global_helpers.cr
│ │ ├── logging_helper.cr
│ │ ├── mock_coordinator.cr
│ │ ├── mock_executor.cr
│ │ ├── mock_overseer.cr
│ │ ├── mock_queue_list.cr
│ │ ├── mocks.cr
│ │ ├── null_dequeue_adapter.cr
│ │ ├── pub_sub.cr
│ │ └── spy_dequeue_adapter.cr
│ ├── mosquito/
│ │ ├── api/
│ │ │ ├── executor_config_spec.cr
│ │ │ ├── executor_spec.cr
│ │ │ ├── job_run_spec.cr
│ │ │ ├── overseer_spec.cr
│ │ │ ├── periodic_job_spec.cr
│ │ │ ├── publisher_spec.cr
│ │ │ └── queue_spec.cr
│ │ ├── api_spec.cr
│ │ ├── backend/
│ │ │ ├── deleting_spec.cr
│ │ │ ├── executor_spec.cr
│ │ │ ├── expiring_list_spec.cr
│ │ │ ├── hash_storage_spec.cr
│ │ │ ├── inspection_spec.cr
│ │ │ ├── lock_spec.cr
│ │ │ ├── overseer_spec.cr
│ │ │ └── queueing_spec.cr
│ │ ├── backend_spec.cr
│ │ ├── base_spec.cr
│ │ ├── configuration_spec.cr
│ │ ├── dequeue_adapters/
│ │ │ ├── concurrency_limited_dequeue_adapter_spec.cr
│ │ │ ├── remote_config_dequeue_adapter_spec.cr
│ │ │ ├── shuffle_dequeue_adapter_spec.cr
│ │ │ └── weighted_dequeue_adapter_spec.cr
│ │ ├── exceptions_spec.cr
│ │ ├── job/
│ │ │ └── job_state_spec.cr
│ │ ├── job_run/
│ │ │ ├── rescheduling_spec.cr
│ │ │ ├── running_spec.cr
│ │ │ └── storage_spec.cr
│ │ ├── job_run_spec.cr
│ │ ├── job_spec.cr
│ │ ├── key_builder_spec.cr
│ │ ├── metadata_spec.cr
│ │ ├── periodic_job_run_spec.cr
│ │ ├── periodic_job_spec.cr
│ │ ├── queue_spec.cr
│ │ ├── queued_job_spec.cr
│ │ ├── rate_limiter_spec.cr
│ │ ├── resource_gate_spec.cr
│ │ ├── runnable_spec.cr
│ │ ├── runners/
│ │ │ ├── coordinator_spec.cr
│ │ │ ├── executor_spec.cr
│ │ │ ├── overseer_spec.cr
│ │ │ ├── queue_list_spec.cr
│ │ │ └── run_at_most_spec.cr
│ │ ├── serializers/
│ │ │ └── primitive_serializers_spec.cr
│ │ ├── testing_backend_spec.cr
│ │ ├── unique_job_spec.cr
│ │ └── version_spec.cr
│ └── spec_helper.cr
└── src/
├── mosquito/
│ ├── api/
│ │ ├── concurrency_config.cr
│ │ ├── executor.cr
│ │ ├── executor_config.cr
│ │ ├── job_run.cr
│ │ ├── observability/
│ │ │ └── publisher.cr
│ │ ├── overseer.cr
│ │ ├── periodic_job.cr
│ │ ├── queue.cr
│ │ └── queue_list.cr
│ ├── api.cr
│ ├── backend.cr
│ ├── base.cr
│ ├── configuration.cr
│ ├── dequeue_adapter.cr
│ ├── dequeue_adapters/
│ │ ├── concurrency_limited_dequeue_adapter.cr
│ │ ├── remote_config_dequeue_adapter.cr
│ │ ├── shuffle_dequeue_adapter.cr
│ │ └── weighted_dequeue_adapter.cr
│ ├── exceptions.cr
│ ├── gates/
│ │ ├── open_gate.cr
│ │ └── threshold_gate.cr
│ ├── job.cr
│ ├── job_run.cr
│ ├── key_builder.cr
│ ├── metadata.cr
│ ├── periodic_job.cr
│ ├── periodic_job_run.cr
│ ├── queue.cr
│ ├── queued_job.cr
│ ├── rate_limiter.cr
│ ├── redis_backend.cr
│ ├── resource_gate.cr
│ ├── runnable.cr
│ ├── runner.cr
│ ├── runners/
│ │ ├── coordinator.cr
│ │ ├── executor.cr
│ │ ├── idle_wait.cr
│ │ ├── overseer.cr
│ │ ├── queue_list.cr
│ │ └── run_at_most.cr
│ ├── scheduled_job.cr
│ ├── serializers/
│ │ └── primitives.cr
│ ├── test_backend.cr
│ ├── unique_job.cr
│ └── version.cr
├── mosquito.cr
└── ye_olde_redis.cr
================================================
FILE CONTENTS
================================================
================================================
FILE: .claude/hooks/session-start.sh
================================================
#!/bin/bash
set -euo pipefail
# Only run in remote (cloud) environments
if [ "${CLAUDE_CODE_REMOTE:-}" != "true" ]; then
exit 0
fi
echo '{"async": true, "asyncTimeout": 300000}'
# Read Crystal version from .tool-versions
CRYSTAL_VERSION=$(grep '^crystal ' "$CLAUDE_PROJECT_DIR/.tool-versions" | awk '{print $2}')
# Install Crystal compiler if not already present
if ! command -v crystal &> /dev/null; then
# Install system dependencies required by Crystal
apt-get update
apt-get install -y libgmp-dev libxml2-dev libevent-dev libgc-dev
# Download and install Crystal from GitHub releases
curl -fsSL "https://github.com/crystal-lang/crystal/releases/download/${CRYSTAL_VERSION}/crystal-${CRYSTAL_VERSION}-1-linux-x86_64-bundled.tar.gz" -o /tmp/crystal.tar.gz
mkdir -p /usr/local/crystal
tar -xzf /tmp/crystal.tar.gz -C /usr/local/crystal --strip-components=2
ln -sf /usr/local/crystal/bin/crystal /usr/local/bin/crystal
ln -sf /usr/local/crystal/bin/shards /usr/local/bin/shards
rm /tmp/crystal.tar.gz
fi
# Start Redis server if not already running
if ! redis-cli ping &> /dev/null 2>&1; then
redis-server --daemonize yes
fi
# Disable RDB persistence to avoid dump.rdb noise in the project directory
redis-cli config set save "" > /dev/null 2>&1
# Install Crystal shard dependencies
cd "$CLAUDE_PROJECT_DIR"
shards install
================================================
FILE: .claude/settings.json
================================================
{
"hooks": {
"SessionStart": [
{
"hooks": [
{
"type": "command",
"command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.sh"
}
]
}
]
}
}
================================================
FILE: .claude/todo.md
================================================
# Migration from publish_metrics branch
## Background
The `publish_metrics` branch contains observability improvements that need to be migrated to master.
This branch has ~24 commits of work dating back to October 2024.
## Functionality to Migrate
Items ordered by size of change, smallest first.
### 1. Metadata Self-Cleanup ✅
Add TTL to metadata so stale entries auto-expire.
- [x] Add `@metadata.delete in: 1.hour` to Executor heartbeat
- [x] Add `@metadata.delete in: 1.hour` to Overseer heartbeat
Already implemented - `Metadata#heartbeat!` includes `delete in: 1.hour` and both observers use it.
### 2. Overseer Event Naming Standardization ✅
Standardize to past tense for consistency with other events.
- [x] "starting" → "started"
- [x] "stopping" → "stopped"
- [x] "stopped" → "exited"
Done in `src/mosquito/api/overseer.cr`.
### 3. Executor Bug Fix ✅
- [x] Fix latent bug: executor calculating run time incorrectly (see commit `mvouzzrz`)
Fixed `100_000` → `1_000_000` in microseconds calculation in `src/mosquito/api/executor.cr`.
### 4. Stable Instance IDs — Skipped
`object_id` is sufficient; no need for `Random::Secure.hex` IDs.
### 5. Nested Publish Context ✅
Allow executor events to be namespaced under their parent overseer.
- [x] Add parent context support to `PublishContext` initializer
- [x] Pass overseer reference to Executor
- [x] Update Executor observer to create PublishContext with overseer as parent
- [x] Executor events publish under `[:overseer, overseer_id, :executor, executor_id]`
- [x] Fix tests (executor/overseer specs, mock_overseer)
Done.
### 6. Observability Gating ✅
Gate metadata writes behind existing `publish_metrics` config.
- [x] Gate `heartbeat!` in Executor observer behind `metrics` macro
- [x] Gate `heartbeat` in Overseer observer behind `metrics` macro (includes `register_overseer`)
- [x] Gate `update_executor_list` in Overseer observer behind `metrics` macro
- [x] Fix pre-existing race condition in executor spec (lazy getter initialization across fibers)
Decided against a separate `Enabled` module / `enable_observability` config — no compelling reason
to have two flags. Reused the existing `metrics` macro which checks `publish_metrics`.
### 7. Observability Tests ✅
#### Fix `assert_message_received` ✅
The helper in `spec/helpers/pub_sub.cr` doesn't actually assert — `find` returns nil
and the result is discarded. All existing event publishing tests are vacuous (always pass).
- [x] Fix `assert_message_received` to fail when no matching message is found
- [x] Fix overseer event assertions to match actual event names
#### Metrics gating ✅
- [x] Executor: heartbeat is skipped when `publish_metrics = false`
- [x] Event publishing is skipped when `publish_metrics = false` (tested via publisher_spec, covers all observers)
#### Queue observer events ✅
- [x] Publishes "rescheduled" event
- [x] Publishes "forgotten" event
- [x] Publishes "banished" event
#### Publish context structure ✅
- [x] Executor publish context is nested under overseer's context
- [x] Overseer publish context has correct originator key
- [x] Queue publish context has correct originator key
## Files to Reference on publish_metrics
Key source files:
- `src/mosquito/observability/concerns/enabled.cr`
- `src/mosquito/observability/concerns/publish_context.cr`
- `src/mosquito/observability/concerns/publisher.cr`
- `src/mosquito/observability/executor.cr`
- `src/mosquito/observability/overseer.cr`
- `src/mosquito/observability/queue.cr`
Key test files:
- `test/mosquito/observability/enabled_test.cr`
- `test/mosquito/observability/executor_test.cr`
- `test/mosquito/observability/overseer_test.cr`
- `test/mosquito/observability/queue_test.cr`
## Notes
- The publish_metrics branch has diverged (shown as `??` in jj) - resolve carefully
- Current working copy already has queue observer events (rescheduled, forgotten, banished)
- Duration averaging and expected_duration_ms already implemented on master
- Test directory structure (`test/` instead of `spec/`) already migrated on master
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: robacarp
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
================================================
FILE: .github/ISSUE_TEMPLATE/bug.md
================================================
---
name: Bug
about: Mosquito has a bug!
title: ''
labels: ''
assignees: robacarp
---
Please include some details:
Crystal version: 0.28.0
Mosquito Shard version: 0.4.0
================================================
FILE: .github/workflows/ci.yml
================================================
name: Test and Demo
on:
pull_request:
branches:
- master
push:
branches:
- master
jobs:
build:
strategy:
fail-fast: false
matrix:
crystal_version: [1.19, latest]
experimental:
- false
include:
- crystal_version: nightly
experimental: true
name: Build
runs-on: ubuntu-latest
container:
image: crystallang/crystal:latest
continue-on-error: ${{ matrix.experimental }}
services:
redis:
image: redis
env:
REDIS_URL: redis://redis:6379/1
steps:
- uses: actions/checkout@v4
- run: apt-get update
- uses: crystal-lang/install-crystal@v1
with:
crystal: ${{matrix.crystal_version}}
- run: printenv
- run: crystal --version
- run: shards install
- run: make test
- run: make demo
================================================
FILE: .github/workflows/docs.yml
================================================
name: Build Docs
on:
push:
branches:
- master
jobs:
deploy:
name: Running Docs
runs-on: ubuntu-latest
container:
image: crystallang/crystal:latest
steps:
- uses: actions/checkout@v2
- run: apt-get update
- uses: crystal-lang/install-crystal@v1
- run: crystal --version
- run: shards install
- run: crystal docs
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs
================================================
FILE: .gitignore
================================================
/lib/
/bin/
/.shards/
# Libraries don't need dependency lock
# Dependencies will be locked in application that uses them
/shard.lock
docs
# Claude Code local user config (not hooks/settings which are shared)
.claude/local/
CLAUD.local.md
================================================
FILE: .tool-versions
================================================
crystal 1.19.1
================================================
FILE: :w
================================================
================================================
FILE: CHANGELOG.md
================================================
# Changelog
The format is based on [Keep a
Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Mosquito::Api now allows for inspecting the state of the state of a mosquito cluster. Many of these features are disabled by default by the configuration property `config.publish_metrics`.
- Executor api implemented in #147
- JobRun api implemented in #148 and #161
- Overseer api implemented in #150
- Queue api implemented in #153
- Mosquito now publishes a variety of events and metrics to a redis pubsub channel. This behavior is disabled by default with the configuration property `config.publish_metrics`.
- Executor events in #154: job-started and job-finished
- Overseer events in #160: starting, executor-created, executor-died, stopping, and stopped
- Queue events: enqueue, dequeue, reschedule, forget, and banish
- Expected job duration is now published with executor events
The Mosquito API can be used to subscribe to these events with `Mosquito::API.event_receiver`
- Pluggable dequeue adapters allow customizing how jobs are selected from queues (#183)
- `DequeueAdapter` abstract base class defines the adapter interface
- `ShuffleDequeueAdapter` is the default, preserving existing randomized behavior
- `WeightedDequeueAdapter` allows queue-level prioritization via configurable weights
- Configurable via `Mosquito.configure { |c| c.dequeue_adapter = ... }`
- Executor count is now configurable (default increased from 3 to 6) (#184)
- Set via `Mosquito.configure { |c| c.executor_count = 10 }`
- Override with the `MOSQUITO_EXECUTOR_COUNT` environment variable
- `JobRun#started_at` and `JobRun#finished_at` timestamps are now exposed as typed `Time?` getters (#179)
- Graceful worker shutdown: on SIGTERM/SIGINT the overseer stops dequeuing, waits for in-flight executors to finish, and requeues any jobs left in pending back to waiting (#190)
- Queues can now be paused and resumed. While paused, `#dequeue` returns nil and jobs accumulate until the queue is resumed. An optional duration enables automatic resumption, useful for backing off rate-limited resources. (#192)
- Overseers now take ownership of job runs when dequeued, and clean up abandoned pending job runs on startup (#180)
- Mosquito can now accept pre-existing backend connections via `Configuration#backend_connection`. This allows sharing a connection pool with the rest of an application. (#193)
- JobRun now uses Metadata for all backend storage operations, replacing direct backend calls with the Metadata abstraction layer.
- `Mosquito::UniqueJob` module provides opt-in job deduplication. Including the module in a job class prevents enqueueing duplicate jobs when an identical job is already waiting or scheduled. Uniqueness keys are derived from job parameters at compile time.
### Changed
- (breaking) `Configuration#connection_string` has been renamed to `Configuration#backend_connection_string` (#193)
- (minor breaking) Logs are now emitted from runners with a slighly different source tag. (#152)
For example:
The overseer boot message used to be:
`INFO - mosquito.runners.overseer.4315742080: Overseer<4315742080> is starting`
Now the message is simply:
`INFO - mosquito.overseer: starting`
- Mosquito now runs CI checks for compatibility with Crystal 1.6
- The coordinator now uses UTC time instead of monotonic time
### Fixed
- Fixed a KeyError crash in the demo when job metadata was missing by using safe key access.
- the queue_list runner was never being shut down, but it is now as of (#165)
- Fixed a bug which would cause a mosquito server to hang at exit indefinitely if a job was mid-run during an interrupt. (#165)
- Fixed a bug which would cause a correctly exiting server to prematurely exit without emitting shutdown sequence logs and events. (#165)
- Crashed executors are now properly detected and replaced, preventing overseers from running with no executors
- Overseer now correctly deregisters on clean exit
- Pubsub logging now uses the `mosquito.events` namespace instead of the root `mosquito` namespace
- Queue `@empty` latch no longer permanently prevents re-dequeue after a queue drains
- Observer functionality is correctly gated behind the `publish_metrics` config flag
- Executor events are correctly scoped to within the overseer
- Fixed a latent bug which caused job duration to be reported incorrectly
- Fixed `Mosquito::Api.list_queues`
### Performance
- Optimized `metadata#set` to decrease the number of redis commands
## [2.0.0]
### Added
- Adds a test backend, which can be used to inspect jobs that were enqueued and
the parameters they were enqueued with.
- Job#fail now takes an optional `retry` parameter which defaults to true, allowing
a developer to explicitly mark a job as not retry-able during a job run. Additionally
a `should_retry` property exists which can be set as well.
- Mosquito::Configuration now provides `global_prefix` to change the global Redis namespace
prefix, allowing for more than one mosquito app to share a redis instance (thanks @dammer, cf #134).
### Fixed
- PeriodicJobs are now correctly run once per interval in an environment with many workers.
- Running more than ~10 workers no longer causes workers to crash, fixing #137 (cf #138).
- Mosquito is now more broadly compatible with jgaskins redis, swapping 0.7.0 for 0.7, and
forward compatible through 0.8. (thanks @rmarronnier)
- Mosquito now more gracefully responds to SIGTERM, fixes #122, cf #123.
- High CPU usage on linux is no longer an issue, fixes #126, cf #128.
### Breaking Changes
- The QueuedJob `params` macro has been replaced with `param`
which declares only one parameter at a time.
- JobRun#delete now explicitly takes an Int, rather than simply defaulting to 0 (thanks @jwoertink, cf #136).
- removes deprecated Backend.delete(String, Int32), use Backend.delete(String, Int64) instead.
- removes deprecated Queue#length, use Queue#size instead.
- removes option to run the cron scheduler declaratively, it is now always on with a distributed lock.
### Performance
- Dramatically decreases the time spent listing queues #120
- Replaces #keys with #scan_each to list runners #138
- Provides for multiple executors operating under a single runner #123
## [1.0.2]
### Fixed
- Mosquito::Runner.start now captures the thread with a spin lock again. The new
behavior of returning imediately can be achieved by calling #start(spin: false)
## [1.0.1] [YANKED]
### Added
- Implements a distributed lock for scheduler coordination. The behavior is opt-in
for now, but will become the default in the next release. See #108.
- Provides a helpful error message for most implementation errors dealing with
declaring params.
### Changed
- Mosquito::QueuedJob: the `params` macro has been deprecated in favor of `param`.
See #110.
- The deprecated Redis command [`rpoplpush`](https://redis.io/commands/rpoplpush/)
is no longer used. This lifts the minimum redis server requirement up to 6.2.0
and jgaskins redis to > 0.7.0.
- Mosquito::Runner.start no longer captures the thread with a spin lock. [DEFECT]
### Removed
- Mosquito config option `run_cron_scheduler` is no longer present, multiple
workers will compete for a distributed lock instead. See #108.
## [1.0.0]
### Added
- Jobs can now specify their retry/reschedule logic with the #rescheduleable?
and #reschedule_interval methods.
- Job metadata storage engine.
- Jobs can now specify `after` hooks.
- Mosquito::Runner now has a `stop` method which halts the runner after
completion of any running tasks. See issue #21 and pull #87.
- Mosquito config option `run_cron_scheduler` is no longer present, multiple
workers will compete for a distributed lock instead.
### Changed
- The storage backend is now implemented via interface, allowing alternate
backends to be implemented.
- The rate limiting functionality is now implemented in a module,
`Mosquito::RateLimiter`. See pull #77 for migration details.
- ** BREAKING ** `Job.job_type` has been replaced with `Job.queue_name`. The
functionailty is identical but easier to access. See #86.
- `log` statements now properly identify where they're coming from rather than
just 'mosquito'. See issue #78 and pull #88.
- Mosquito now connects to Redis using a connection pool. See #89
- ** BREAKING ** `Mosquito.settings` is now `Mosquito.configuration`. While
this is technically a public API, it's unlikely anyone is using it for
anything.
- Mosquito::Runner.start need not be called from a spawn, it will spawn on it's own.
### Removed
- Runner.idle_wait configuration is deprecated. Instead use
Mosquito.configure#idle_wait.
- Built in serializer for Granite models, and the Model type alias. See
Serializers in the documentation if the functionality is necessary.
- Mosquito no longer depends on luckyframework/habitat.
### Fixed
- Boolean false can now be specified as the default value for a parameter:
`params(name = false)`
## [0.11.2] - 2022-01-25
### Fixed
- #66 Jobs with no parameters can now be enqueued without specifying an empty
`params()`.
- #65 PeriodicJobs can now specify their run period in months.
### Notes
The v0 major version is now bugfix-only. Please update to v1.0. v0 will be
supported as long as it's feasible to do so.
## [0.11.1] - 2022-01-17
### Added
- Jobs can now specify `before` hooks, which can abort before the perform is
triggered.
- The Cron scheduler for periodic jobs can now be disabled via
Mosquito.configure#run_cron_scheduler
- The list of queues which are watched by the runner can now be configured via
Mosquito.configure#run_from.
### Updated
- Redis shard 2.8.0, removes hash shims which are no longer needed. Thanks
@jwoertink.
## [0.11.0] - 2021-04-10
Proforma release for Crystal 1.0.
## [0.10.0] - 2021-02-15
### Added
- UUID serializer helpers.
### Updated
- Switches from Benchmark.measure to Time.measure, thanks @anapsix.
- Runner.idle_wait is now configured via Mosquito.configure instead of directly
on Mosquito::Runner.
## [0.9.0] - 2020-10-26
### Added
- Allows redis connection string to be specified via config option, thanks
@watzon.
### Deprecated
- Connecting to redis via implicit REDIS_URL parameter is deprecated, thanks
@watzon.
## [0.8.0] - 2020-05-28
### Fixed
- (Breaking) Dead tasks which have failed and expired are now cleaned up with a
Redis TTL. See Pull #48.
## [0.7.0] - 2020-05-05
### Added
- ability to configure Runner.idle_wait period, thanks @mamantoha.
### Updated
- Point to Crystal 0.34.0, thanks @alex-lairan.
### Changed
- Replaces `Logger` with the more flexible `Log`.
## [0.6.0] - 2019-12-19
### Updated
- Point to Crystal 0.31.1, 0.32.1.
- Redis version, thanks @nsuchy.
## [0.5.0] - 2019-06-14
### Fixed
- Issue #26 Unresolved local var error, thanks @blacksmoke16.
## [0.4.0] - 2019-04-26
### Added
- Throttling logic, thanks @blacksmoke16.
## [0.3.0] - 2018-11-25
### Updated
- Point to crystal 0.27, thanks @blacksmoke16.
### Fixed
- Brittle/intermittently failing tests.
## [0.2.1] - 2018-10-01
### Added
- Logo, contributed by @psikoz.
- configuration for CI : `make test demo` will run all acceptance criteria.
- demo section.
- makefile.
### Updated
- specify crystal 0.26.
- simplify macro logic in QueuedJob.
## [0.2.0] - 2018-06-22
### Updated
- Specify crystal-redis 2.0 and crystal 0.25.
## [0.1.1] - 2018-06-08
### Added
- Job classes can now disable rescheduling on failure.
### Updated
- Readme.
- Misc typo fixes and flexibility upgrades.
- Update Crystal specification 0.23.1 -> .24.2.
- Correctly specify and sync version numbers from shard.yml / version.cr / git
tag.
- Use configurable Logger instead of writing directly to stdout.
- Log output is now colorized and formatted to be read by human eyes.
### Changed
- Breaking: Update Mosquito::Model type alias to match updates to Granite.
### Fixed
- BUG: task id was mutating on each save, causing weird logging when tasks
reschedule.
- PERFORMANCE: adding IDLE_WAIT to prevent slamming redis when the queues are
empty. Smarter querying of the queues for work.
================================================
FILE: LICENSE
================================================
The MIT License (MIT)
Copyright (c) 2019 Robert L Carpenter
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
================================================
FILE: Makefile
================================================
SHELL=/bin/bash
.PHONY: all
all: test
shards build
.PHONY: test
test:
crystal spec --error-trace -- --chaos
.PHONY: demo
demo:
crystal run demo/run.cr --error-trace
================================================
FILE: README.md
================================================
[](https://tldrlegal.com/license/mit-license)
Mosquito is a generic background job runner written primarily for Crystal. Significant inspiration from experience with the successes and failings many Ruby gems in this vein. Once compiled, a mosquito binary can start work in about 10 milliseconds.
Mosquito currently provides these features:
- Delayed execution (`SendEmailJob.new(email: :welcome, address: user.email).enqueue(in: 3.minutes)`)
- Scheduled / Periodic execution (`RunEveryHourJob.new`)
- Job Storage in Redis
- Automatic rescheduling of failed jobs
- Progressively increasing delay of rescheduled failed jobs
- Dead letter queue of jobs which have failed too many times
- Rate limited jobs
Current Limitations:
- Visibility into a running job network and queue is limited. There is a working proof of concept [visualization API](https://github.com/mosquito-cr/mosquito/issues/90) and [bare-bones terminal application](https://github.com/mosquito-cr/tui-visualizer).
## Project State
The Mosquito project is stable. A few folks are using Mosquito in production, and it's going well.
There are some features which would be nice to have, but what is here is both tried and tested.
If you're using Mosquito, please [get in touch](https://github.com/mosquito-cr/mosquito/discussions) on the Discussion board or [on Crystal chat](https://crystal-lang.org/community/) with any questions, feature suggestions, or feedback.
## Installation
Update your `shard.yml` to include mosquito:
```diff
dependencies:
+ mosquito:
+ github: mosquito-cr/mosquito
```
## Usage
### Step 1: Define a queued job
```crystal
# src/jobs/puts_job.cr
class PutsJob < Mosquito::QueuedJob
param message : String
def perform
puts message
end
end
```
### Step 2: Trigger that job
```crystal
# src//.cr
PutsJob.new(message: "ohai background job").enqueue
```
### Step 3: Run your worker to process the job
```crystal
# src/worker.cr
Mosquito.configure do |settings|
settings.redis_url = ENV["REDIS_URL"]
end
Mosquito::Runner.start
```
```text
crystal run src/worker.cr
```
### Success
```
> crystal run src/worker.cr
2017-11-06 17:07:29 - Mosquito is buzzing...
2017-11-06 17:07:51 - Running task puts_job<...> from puts_job
2017-11-06 17:07:51 - [PutsJob] ohai background job
2017-11-06 17:07:51 - task puts_job<...> succeeded, took 0.0 seconds
```
[More information about queued jobs](https://mosquito-cr.github.io/manual/index.html#queued-jobs) in the manual.
------
## Periodic Jobs
Periodic jobs run according to a predefined period -- once an hour, etc.
This periodic job:
```crystal
class PeriodicallyPutsJob < Mosquito::PeriodicJob
run_every 1.minute
def perform
emotions = %w{happy sad angry optimistic political skeptical epuhoric}
puts "The time is now #{Time.local} and the wizard is feeling #{emotions.sample}"
end
end
```
Would produce this output:
```crystal
2017-11-06 17:20:13 - Mosquito is buzzing...
2017-11-06 17:20:13 - Queues: periodically_puts_job
2017-11-06 17:20:13 - Running task periodically_puts_job<...> from periodically_puts_job
2017-11-06 17:20:13 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:20:13 and the wizard is feeling skeptical
2017-11-06 17:20:13 - task periodically_puts_job<...> succeeded, took 0.0 seconds
2017-11-06 17:21:14 - Queues: periodically_puts_job
2017-11-06 17:21:14 - Running task periodically_puts_job<...> from periodically_puts_job
2017-11-06 17:21:14 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:21:14 and the wizard is feeling optimistic
2017-11-06 17:21:14 - task periodically_puts_job<...> succeeded, took 0.0 seconds
2017-11-06 17:22:15 - Queues: periodically_puts_job
2017-11-06 17:22:15 - Running task periodically_puts_job<...> from periodically_puts_job
2017-11-06 17:22:15 - [PeriodicallyPutsJob] The time is now 2017-11-06 17:22:15 and the wizard is feeling political
2017-11-06 17:22:15 - task periodically_puts_job<...> succeeded, took 0.0 seconds
```
[More information on periodic jobs](https://mosquito-cr.github.io/manual/index.html#periodic-jobs) in the manual.
## Advanced usage
For more advanced topics, including [use with Lucky Framework](https://mosquito-cr.github.io/manual/lucky_framework.html), [throttling or rate limiting](https://mosquito-cr.github.io/manual/rate_limiting.html), check out the [full manual](https://mosquito-cr.github.io/manual).
## Contributing
Contributions are welcome. Please fork the repository, commit changes on a branch, and then open a pull request.
### Crystal Versions
Mosquito aims to be compatible with the latest Crystal release, and the [latest patch for all post-1.0 minor crystal versions](https://github.com/mosquito-cr/mosquito/blob/master/.github/workflows/ci.yml#L17).
For development purposes [you're encouraged to stay in sync with `.tool-versions`](https://github.com/mosquito-cr/mosquito/blob/master/.tool-versions).
### Testing
`crystal spec` Will run the tests, or `make test` will too.
================================================
FILE: benchmark/benchmark.cr
================================================
require "../src/mosquito"
require "./jobs/*"
Mosquito.configure do |settings|
settings.backend_connection_string = ENV["REDIS_URL"]? || "redis://localhost:6379/4"
settings.publish_metrics = true
end
Mosquito.configuration.backend.flush
Log.setup do |c|
backend = Log::IOBackend.new
c.bind "redis.*", :error, backend
c.bind "mosquito.*", :error, backend
end
stopping = false
Signal::INT.trap do
if stopping
puts "SIGINT received again, crash-exiting."
exit 1
end
Mosquito::Runner.stop
stopping = true
end
Mosquito::Runner.start spin: false
EventCount = 500
events = Deque(Time).new(EventCount)
event_count = 0
missed_messages = 0
channel = Mosquito.backend.subscribe(EmitMessageJob::PUBSUB_CHANNEL)
print "enqueuing benchmark jobs..."
10000.times {
EmitMessageJob.new.enqueue
}
puts "done"
spawn do
loop do
break unless Mosquito::Runner.keep_running
if missed_messages >= 100
Mosquito::Runner.stop
break
end
select
when channel.receive
events << Time.utc
event_count += 1
when timeout(100.milliseconds)
missed_messages += 1
end
end
end
message = ->(span : Time::Span) do
print "\r"
print "Events: #{event_count} | "
print "Span: #{span.total_seconds.round(2)} | "
print "Rate: #{events.size.to_f./(span.to_f).round(2)} events/sec"
print " "
end
loop do
break unless Mosquito::Runner.keep_running
# if events.size >= EventCount
# (events.size - EventCount).times { events.shift }
# end
unless events.size >= 10
print "\r"
print "Waiting for events..."
sleep 0.1.seconds
next
end
message.call events.last - events.first
end
Mosquito::Runner.stop wait: true
puts
print "Total events: #{event_count} | "
print "Rate: #{events.size.to_f./(events.last.-(events.first).to_f).round(2)} events/sec"
puts
================================================
FILE: benchmark/jobs/emit_message_job.cr
================================================
class EmitMessageJob < Mosquito::QueuedJob
PUBSUB_CHANNEL = "benchmark:messages"
def perform
number = Random::Secure.rand(100)
Mosquito.backend.publish PUBSUB_CHANNEL, number.to_s
end
end
================================================
FILE: demo/jobs/custom_serializers.cr
================================================
class CustomSerializersJob < Mosquito::QueuedJob
param count : Int32
def perform
log "deserialized: #{count}"
metadata.increment "run_count"
end
def deserialize_int32(raw : String) : Int32
log "using custom serialization: #{raw}"
raw.to_i32 * 10
end
end
CustomSerializersJob.new(3).enqueue
CustomSerializersJob.new(12).enqueue
CustomSerializersJob.new(525_600).enqueue
================================================
FILE: demo/jobs/periodically_puts.cr
================================================
class PeriodicallyPuts < Mosquito::PeriodicJob
run_every 3.seconds
queue_name :demo_queue
def perform
log "Hello from PeriodicallyPuts"
# For integration testing
metadata.increment "run_count"
end
end
# Periodic jobs do not need to be enqueued, they are executed automatically on schedule.
================================================
FILE: demo/jobs/queued_job.cr
================================================
class QueuedJob < Mosquito::QueuedJob
param count : Int32
queue_name :demo_queue
def perform
count.times do |i|
log "ohai #{i}"
end
# For integration testing
metadata.increment "run_count"
end
end
QueuedJob.new(3).enqueue
================================================
FILE: demo/jobs/rate_limited_job.cr
================================================
class RateLimitedJob < Mosquito::QueuedJob
before do
log self.class.rate_limit_stats
end
include Mosquito::RateLimiter
throttle limit: 3, per: 10.seconds
param count : Int32
def perform
log @@rate_limit_key
end
end
15.times do
RateLimitedJob.new(3).enqueue
end
================================================
FILE: demo/jobs/unique_job.cr
================================================
class UniqueJob < Mosquito::QueuedJob
include Mosquito::UniqueJob
unique_for 1.hour, key: [:user_id]
param user_id : Int64
param message : String
def perform
log "Sending to user #{user_id}: #{message}"
metadata.increment "run_count"
end
end
# First enqueue — accepted
UniqueJob.new(user_id: 1_i64, message: "hello").enqueue
# Duplicate user_id — suppressed by uniqueness lock
UniqueJob.new(user_id: 1_i64, message: "hello again").enqueue
# Different user_id — accepted
UniqueJob.new(user_id: 2_i64, message: "hello").enqueue
================================================
FILE: demo/run.cr
================================================
require "../src/mosquito"
Mosquito.configure do |settings|
settings.backend_connection_string = ENV["REDIS_URL"]? || "redis://localhost:6379/3"
settings.idle_wait = 1.second
end
Mosquito.configuration.backend.flush
Log.setup do |c|
backend = Log::IOBackend.new
c.bind "*", :info, backend
c.bind "redis.*", :warn, backend
c.bind "mosquito.*", :info, backend
end
require "./jobs/*"
def expect_run_count(klass, expected)
run_count = (klass.metadata["run_count"]? || "0").to_i
if run_count != expected
raise "Expected #{klass.name} to have run_count == #{expected}. But got #{run_count}"
else
puts "#{klass.name} was executed correctly."
end
end
stopping = false
Signal::INT.trap do
if stopping
puts "SIGINT received again, crash-exiting."
exit 1
end
Mosquito::Runner.stop
stopping = true
end
Mosquito::Runner.start(spin: false)
count = 0
while count <= 19 && Mosquito::Runner.keep_running
sleep 1.second
count += 1
end
Mosquito::Runner.stop(wait: true)
puts "End of demo."
puts "----------------------------------"
puts "Checking integration test flags..."
expect_run_count(PeriodicallyPuts, 7)
expect_run_count(QueuedJob, 1)
expect_run_count(CustomSerializersJob, 3)
expect_run_count(RateLimitedJob, 3)
expect_run_count(UniqueJob, 2)
================================================
FILE: scripts/increment_version
================================================
#!/usr/bin/env crystal
require "yaml"
require "option_parser"
shard_yml = "shard.yml"
to_increment = "none"
OptionParser.parse! do |p|
p.banner = "Usage: $0 -i "
p.on("-i field", "--increment=field", "Specifies the field to increment") do |name|
destination = name
end
p.on("-h", "--help", "Show this help") { STDERR.puts p }
p.invalid_option do |flag|
STDERR.puts "ERROR: #{flag} is not a valid option."
STDERR.puts p
exit(1)
end
end
document = File.read shard_yml
parsed = YAML.parse document
version = parsed["version"].as_s
major, minor, patch = version.split('.').map(&.to_i)
case to_increment
when "major"
major += 1
minor = 0
patch = 0
when "minor"
minor += 1
patch = 0
when "patch"
patch += 1
else
STDERR.puts "No field to increment specified" if to_increment == "none"
end
parsed["version"] = "#{major}.#{minor}.#{patch}"
pp parsed.to_yaml
================================================
FILE: scripts/lib/increment_version.sh
================================================
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'
print_help () {
cat <= 1.19'
license: MIT
targets:
demo:
main: demo/run.cr
mosquito:
main: src/mosquito.cr
dependencies:
redis:
github: jgaskins/redis
version: ~> 0.7
development_dependencies:
minitest:
github: ysbaddaden/minitest.cr
version: ~> 1.6.0
timecop:
github: crystal-community/timecop.cr
version: ~> 0.6.0
================================================
FILE: spec/helpers/bare_base_class.cr
================================================
module Mosquito
class Base
# Testing wedge which wipes out the JobRun mapping for the
# duration of the block.
def self.bare_mapping(&block)
scheduled_job_runs = @@scheduled_job_runs
@@scheduled_job_runs = [] of PeriodicJobRun
mapping = @@mapping
@@mapping = {} of String => Job.class
yield
ensure
@@mapping = mapping unless mapping.nil?
@@scheduled_job_runs = scheduled_job_runs unless scheduled_job_runs.nil?
end
end
end
================================================
FILE: spec/helpers/configuration_helper.cr
================================================
module Mosquito
class_setter configuration
macro temp_config(**settings)
original_config = {{ @type }}.configuration.dup
was_validated = {{ @type }}.configuration.validated
{% for key, value in settings %}
{{ @type }}.configuration.{{ key }} = {{ value }}
{% end %}
{{ @type }}.configuration.validated = false
{{ yield }}
{{ @type }}.configuration = original_config
{{ @type }}.configuration.validated = was_validated
end
end
================================================
FILE: spec/helpers/global_helpers.cr
================================================
module TestHelpers
extend self
# Testing wedge which provides a clean slate to ensure tests
# aren't dependent on each other.
def clean_slate(&block)
Mosquito::Base.bare_mapping do
backend = Mosquito.backend
backend.flush
Mosquito::TestBackend::Queue.flush_paused_queues!
TestingLogBackend.instance.clear
PubSub.instance.clear
yield
end
end
def backend : Mosquito::Backend
Mosquito.configuration.backend
end
def testing_redis_url : String
ENV["REDIS_URL"]? || "redis://localhost:6379/3"
end
end
extend TestHelpers
================================================
FILE: spec/helpers/logging_helper.cr
================================================
require "log"
class TestingLogBackend < Log::MemoryBackend
def self.instance
@@instance ||= new
end
def clear
@entries.clear
end
end
class Minitest::Test
def log_entries
TestingLogBackend.instance.entries
end
def logs
log_entries.map(&.message)
end
COLOR_STRIP = /\e\[\d+(;\d+)?m/
private def logs_match(expected : Regex) : Bool
log_entries
.map(&.message)
.map(&.gsub(COLOR_STRIP, ""))
.any? { |entry| entry =~ expected }
end
private def logs_match(source : String, match_text : Regex) : Bool
log_entries
.select { |entry| entry.source == source }
.map(&.message)
.map(&.gsub(COLOR_STRIP, ""))
.any? { |entry| entry =~ match_text }
end
def assert_logs_match(expected : String)
assert_logs_match %r|#{expected}|
end
def assert_logs_match(expected : Regex)
assert logs_match(expected), "Expected to logs to include #{expected}. Logs contained: \n#{log_entries.map(&.message).join("\n")}"
end
def refute_logs_match(expected : String)
refute_logs_match %r|#{expected}|
end
def refute_logs_match(expected : Regex)
refute logs_match(expected), "Expected to logs to not include #{expected}. Logs contained: \n#{log_entries.map(&.message).join("\n")}"
end
def assert_logs_match(source : String, expected : String)
assert_logs_match source, %r|#{expected}|
end
def assert_logs_match(source : String, expected : Regex)
assert logs_match(source, expected), "Expected to logs to include #{expected}. Logs contained: \n#{log_entries.map{|e| e.source + " " + e.message}.join("\n")}"
end
def refute_logs_match(source : String, expected : String)
refute_logs_match source, %r|#{expected}|
end
def refute_logs_match(source : String, expected : Regex)
refute logs_match(source, expected), "Expected to logs to not include #{expected}. Logs contained: \n#{log_entries.map{|e| e.source + " " + e.message}.join("\n")}"
end
def clear_logs
TestingLogBackend.instance.clear
end
end
Log.setup do |config|
config.bind "*", :debug, TestingLogBackend.instance
config.bind "redis.*", :warn, TestingLogBackend.instance
config.bind "mosquito.*", :trace, TestingLogBackend.instance
end
================================================
FILE: spec/helpers/mock_coordinator.cr
================================================
class MockCoordinator < Mosquito::Runners::Coordinator
getter schedule_count
def initialize(queue_list : Mosquito::Runners::QueueList)
super
@schedule_count = 0
end
def only_if_coordinator : Nil
if @always_coordinator
yield
else
# yikes!
# https://github.com/crystal-lang/crystal/issues/10399
super do
yield
end
end
end
def always_coordinator!(always = true)
@always_coordinator = always
end
def schedule
@schedule_count += 1
super
end
end
================================================
FILE: spec/helpers/mock_executor.cr
================================================
class MockExecutor < Mosquito::Runners::Executor
setter work_unit : Mosquito::WorkUnit?
def state=(state : Mosquito::Runnable::State)
super
end
def run
self.state = Mosquito::Runnable::State::Working
end
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
self.state = Mosquito::Runnable::State::Stopping
spawn do
self.state = Mosquito::Runnable::State::Finished
wait_group.done
end
wait_group
end
def receive_job
job_pipeline.receive.job_run
end
end
================================================
FILE: spec/helpers/mock_overseer.cr
================================================
class MockOverseer < Mosquito::Runners::Overseer
property queue_list, coordinator, executors, work_handout, finished_notifier, dequeue_adapter
def initialize
@executor_count = Mosquito.configuration.executor_count
@idle_wait = Mosquito.configuration.idle_wait
@finished_notifier = Channel(Mosquito::WorkUnit?).new
@queue_list = MockQueueList.new
@coordinator = MockCoordinator.new queue_list
@dequeue_adapter = Mosquito.configuration.dequeue_adapter
@executors = [] of Mosquito::Runners::Executor
@work_handout = Channel(Mosquito::WorkUnit).new
@executors << build_executor
observer.update_executor_list executors
end
def build_executor
MockExecutor.new(self).as(Mosquito::Runners::Executor)
end
end
================================================
FILE: spec/helpers/mock_queue_list.cr
================================================
class MockQueueList < Mosquito::Runners::QueueList
setter state
def discovered_queues : Array(Mosquito::Queue)
@discovered_queues
end
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
self.state = Mosquito::Runnable::State::Stopping
spawn do
self.state = Mosquito::Runnable::State::Finished
wait_group.done
end
wait_group
end
end
================================================
FILE: spec/helpers/mocks.cr
================================================
# A global place for global mocks
module PerformanceCounter
def perform
self.class.performed!
end
macro included
class_getter performances = 0
def self.performed!
@@performances += 1
end
def self.reset_performance_counter!
@@performances = 0
end
end
end
class JobWithPerformanceCounter < Mosquito::Job
include PerformanceCounter
end
class PeriodicTestJob < Mosquito::PeriodicJob
include PerformanceCounter
end
class QueuedTestJob < Mosquito::QueuedJob
include PerformanceCounter
end
class QueueHookedTestJob < Mosquito::QueuedJob
include PerformanceCounter
property fail_before_hook = false
property before_hook_ran = false
property after_hook_ran = false
property passed_job_config : Mosquito::JobRun? = nil
before_enqueue do
self.before_hook_ran = true
self.passed_job_config = job
if fail_before_hook
false
else
true
end
end
after_enqueue do
self.after_hook_ran = true
self.passed_job_config = job
end
end
class PassingJob < QueuedTestJob
def perform
super
true
end
end
class FailingJob < QueuedTestJob
property fail_with_exception = false
property fail_with_retry = true
property exception_message = "this is the reason #{name} failed"
include PerformanceCounter
def perform
super
case
when fail_with_exception
raise exception_message
when ! fail_with_retry
fail exception_message, retry: false
else
fail exception_message
end
end
end
class CustomRescheduleIntervalJob < PassingJob
def reschedule_interval(retry_count)
4.seconds
end
end
class NonReschedulableFailingJob < FailingJob
def rescheduleable?
false
end
end
class NotImplementedJob < Mosquito::Job
end
class JobWithConfig < PassingJob
getter config = {} of String => String
def vars_from(config : Hash(String, String))
@config = config
end
end
class JobWithNoParams < Mosquito::QueuedJob
def perform
log "no param job performed"
end
end
class JobWithHooks < Mosquito::QueuedJob
param should_fail : Bool
before do
log "Before Hook Executed"
end
after do
log "After Hook Executed"
end
before do
log "2nd Before Hook Executed"
fail if should_fail
end
after do
log "2nd After Hook Executed"
end
def perform
log "Perform Executed"
end
end
class EchoJob < Mosquito::QueuedJob
queue_name "io_queue"
param text : String
def perform
log text
end
end
class MonthlyJob < Mosquito::PeriodicJob
run_every 1.month
def perform
log "monthly job_run ran"
end
end
class RateLimitedJob < Mosquito::QueuedJob
include Mosquito::RateLimiter
throttle key: "rate_limit", limit: Int32::MAX
param should_fail : Bool = false
param increment : Int32 = 1
before do
log "Before Hook Executed"
fail if should_fail
end
def perform
log "Performed"
end
def increment_run_count_by
increment
end
end
class PreemptingJob < Mosquito::QueuedJob
include PerformanceCounter
property preempt_until : Time? = nil
before do
preempt "test preemption", until: preempt_until
end
end
class NonReschedulablePreemptingJob < Mosquito::QueuedJob
include PerformanceCounter
before do
preempt "not reschedulable"
end
def rescheduleable? : Bool
false
end
end
class SleepyJob < Mosquito::QueuedJob
class_property should_sleep = true
def perform
while self.class.should_sleep
sleep 0.01.seconds
end
end
end
class SecondRateLimitedJob < Mosquito::QueuedJob
include Mosquito::RateLimiter
throttle key: "rate_limit", limit: Int32::MAX
def perform
end
end
class UniqueTestJob < Mosquito::QueuedJob
include Mosquito::UniqueJob
unique_for 1.hour
param user_id : Int64
param email_type : String
def perform
log "UniqueTestJob performed"
end
end
class UniqueWithKeyJob < Mosquito::QueuedJob
include Mosquito::UniqueJob
unique_for 30.seconds, key: [:user_id]
param user_id : Int64
param message : String
def perform
log "UniqueWithKeyJob performed"
end
end
class UniqueNoParamsJob < Mosquito::QueuedJob
include Mosquito::UniqueJob
unique_for 1.minute
def perform
log "UniqueNoParamsJob performed"
end
end
Mosquito::Base.register_job_mapping "job_with_config", JobWithConfig
Mosquito::Base.register_job_mapping "job_with_performance_counter", JobWithPerformanceCounter
Mosquito::Base.register_job_mapping "failing_job", FailingJob
Mosquito::Base.register_job_mapping "non_reschedulable_failing_job", NonReschedulableFailingJob
Mosquito::Base.register_job_mapping "preempting_job", PreemptingJob
Mosquito::Base.register_job_mapping "non_reschedulable_preempting_job", NonReschedulablePreemptingJob
def job_run_config
{
"year" => "1752",
"name" => "the year september lost 12 days",
}
end
def create_job_run(type = "job_with_config", config = job_run_config)
Mosquito::JobRun.new(type).tap do |job_run|
job_run.config = config
job_run.store
end
end
================================================
FILE: spec/helpers/null_dequeue_adapter.cr
================================================
# A test adapter that always returns nil, simulating empty queues.
class NullDequeueAdapter < Mosquito::DequeueAdapter
getter dequeue_count = 0
def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit?
@dequeue_count += 1
nil
end
end
================================================
FILE: spec/helpers/pub_sub.cr
================================================
module Mosquito::Observability::Publisher
@[AlwaysInline]
def publish(data : NamedTuple)
metrics do
Log.debug { "Publishing #{data} to #{@publish_context.originator}" }
PubSub.instance.capture_message(@publish_context.originator, data.to_json)
end
end
end
class PubSub
def self.instance
@@instance ||= new
end
def self.eavesdrop : Array(Mosquito::Backend::BroadcastMessage)
instance.listen
yield
instance.messages
ensure
instance.stop_listening
end
getter messages = [] of Mosquito::Backend::BroadcastMessage
def initialize
@listening = false
end
def listen
@listening = true
end
def stop_listening
@listening = false
end
def capture_message(originator : String, message : String)
if @listening
@messages << Mosquito::Backend::BroadcastMessage.new(originator, message)
end
end
delegate clear, to: @messages
module Helpers
delegate eavesdrop, to: PubSub
def assert_message_received(matcher : Regex) : Nil
found = PubSub.instance.messages.find do |message|
matcher === message.message
end
assert found, "Expected to find a message matching #{matcher.inspect}, but only found: #{PubSub.instance.messages.map(&.message).inspect}"
end
end
end
================================================
FILE: spec/helpers/spy_dequeue_adapter.cr
================================================
# A test adapter that tracks which queues were checked, in order.
class SpyDequeueAdapter < Mosquito::DequeueAdapter
getter checked_queues = [] of String
def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit?
queue_list.queues.each do |q|
@checked_queues << q.name
if job_run = q.dequeue
return Mosquito::WorkUnit.of(job_run, from: q)
end
end
end
end
================================================
FILE: spec/mosquito/api/executor_config_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::Api::ExecutorConfig" do
describe "global executor count" do
it "returns nil when no override is stored" do
clean_slate do
result = Mosquito::Api::ExecutorConfig.stored_executor_count
assert_nil result
end
end
it "round-trips a global executor count" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
result = Mosquito::Api::ExecutorConfig.stored_executor_count
assert_equal 8, result
end
end
it "clears the global executor count" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
Mosquito::Api::ExecutorConfig.clear_executor_count
result = Mosquito::Api::ExecutorConfig.stored_executor_count
assert_nil result
end
end
end
describe "per-overseer executor count" do
it "returns nil when no per-overseer override is stored" do
clean_slate do
result = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1")
assert_nil result
end
end
it "round-trips a per-overseer executor count" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1")
result = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1")
assert_equal 2, result
# Global is unaffected.
global = Mosquito::Api::ExecutorConfig.stored_executor_count
assert_nil global
end
end
it "clears per-overseer without affecting global" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1")
Mosquito::Api::ExecutorConfig.clear_executor_count("gpu-worker-1")
per_overseer = Mosquito::Api::ExecutorConfig.stored_executor_count("gpu-worker-1")
assert_nil per_overseer
global = Mosquito::Api::ExecutorConfig.stored_executor_count
assert_equal 8, global
end
end
end
describe ".resolve" do
it "returns nil when nothing is stored" do
clean_slate do
result = Mosquito::Api::ExecutorConfig.resolve
assert_nil result
end
end
it "returns the global count when no overseer_id is given" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
result = Mosquito::Api::ExecutorConfig.resolve
assert_equal 8, result
end
end
it "prefers per-overseer over global" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
Mosquito::Api::ExecutorConfig.store_executor_count(2, "gpu-worker-1")
result = Mosquito::Api::ExecutorConfig.resolve("gpu-worker-1")
assert_equal 2, result
end
end
it "falls back to global when per-overseer is not set" do
clean_slate do
Mosquito::Api::ExecutorConfig.store_executor_count(8)
result = Mosquito::Api::ExecutorConfig.resolve("gpu-worker-1")
assert_equal 8, result
end
end
end
describe "instance methods" do
it "delegates to class-level helpers" do
clean_slate do
config = Mosquito::Api::ExecutorConfig.instance
config.update(10)
assert_equal 10, config.executor_count
config.update(3, overseer_id: "worker-1")
assert_equal 3, config.executor_count(overseer_id: "worker-1")
config.clear(overseer_id: "worker-1")
assert_nil config.executor_count(overseer_id: "worker-1")
config.clear
assert_nil config.executor_count
end
end
end
end
describe "Mosquito::Api executor count convenience methods" do
it "reads and writes global executor count" do
clean_slate do
Mosquito::Api.set_executor_count(12)
assert_equal 12, Mosquito::Api.executor_count
end
end
it "reads and writes per-overseer executor count" do
clean_slate do
Mosquito::Api.set_executor_count(4, overseer_id: "gpu-worker-1")
assert_equal 4, Mosquito::Api.executor_count(overseer_id: "gpu-worker-1")
# Global unaffected.
assert_nil Mosquito::Api.executor_count
end
end
end
================================================
FILE: spec/mosquito/api/executor_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::Executor do
let(executor_pipeline) { Channel(Mosquito::WorkUnit).new }
let(finished_notifier) { Channel(Mosquito::WorkUnit?).new }
let(job) { QueuedTestJob.new }
let(job_run : Mosquito::JobRun) { job.enqueue }
let(overseer) { MockOverseer.new }
let(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) }
let(api) { Mosquito::Api::Executor.new executor.object_id.to_s }
let(observer) { Mosquito::Observability::Executor.new executor }
describe "publish context" do
it "includes object_id" do
assert_equal "executor:#{executor.object_id}", observer.publish_context.context
end
it "is nested under the overseer publish context" do
assert_equal "mosquito:overseer:#{overseer.object_id}:executor:#{executor.object_id}", observer.publish_context.originator
end
end
it "can read the current job and queue after being started, and clears it after" do
Mosquito::Base.register_job_mapping job.class.name.underscore, job.class
job_run.store
job_run.build_job
observer.execute job_run, job.class.queue do
assert_equal job_run.id, api.current_job
assert_equal job.class.queue.name, api.current_job_queue
end
assert api.current_job.nil?
assert api.current_job_queue.nil?
end
it "returns a nil heartbeat before the executor has triggered it" do
assert api.heartbeat.nil?
end
it "returns a valid heartbeat" do
now = Time.utc
Timecop.freeze now do
observer.heartbeat!
end
# the heartbeat is stored as a unix epoch without millis
assert_equal now.at_beginning_of_second, api.heartbeat
end
it "doesn't publish a heartbeat when metrics are disabled" do
now = Time.utc
Timecop.freeze now do
executor.observer.heartbeat!
end
later = Time.utc + 1.minute
Mosquito.temp_config(publish_metrics: false) do
Timecop.freeze later do
executor.observer.heartbeat!
end
end
api = Mosquito::Api::Executor.new executor.object_id.to_s
assert_equal now.at_beginning_of_second, api.heartbeat
end
it "publishes job started/finished events" do
job_run.store
job_run.build_job
eavesdrop do
observer.execute job_run, job.class.queue do
end
end
assert_message_received /job-started/
assert_message_received /job-finished/
end
it "measures and records average job duration" do
job_run.store
job_run.build_job
# 100x the sleep duration below
Timecop.scale(100) do
observer.execute job_run, job.class.queue do
sleep 0.01.seconds
end
end
average_key = observer.average_key(job_run.type)
average = Mosquito.backend.average(average_key)
Mosquito.backend.delete average_key
# assert that something > 0 comes back from the average.
# backend tests cover calculating the average itself.
assert average > 0
end
end
================================================
FILE: spec/mosquito/api/job_run_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::JobRun do
# the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost.
def at_beginning_of_millisecond(time)
time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds)
end
getter job : QueuedTestJob { QueuedTestJob.new }
getter job_run : Mosquito::JobRun { job.build_job_run }
getter api : Mosquito::Api::JobRun { Mosquito::Api::JobRun.new job_run.id }
it "can look up a job run" do
job_run.store
assert api.found?
end
it "can look up a job run that doesn't exist" do
api = Mosquito::Api::JobRun.new "not_a_real_id"
refute api.found?
end
it "can retrieve the job parameters" do
job_run = JobWithHooks.new(should_fail: false).build_job_run
job_run.store
api = Mosquito::Api::JobRun.new job_run.id
assert_equal "false", api.runtime_parameters["should_fail"]
end
it "can retrieve the job type" do
job_run.store
assert_equal job.class.name.underscore, api.type
end
it "can retrieve the enqueue time" do
now = Time.utc
Timecop.freeze now do
job_run.store
end
expected_time = at_beginning_of_millisecond now
assert_equal expected_time, api.enqueue_time
end
it "can retrieve the retry count" do
job_run.store
assert_equal 0, api.retry_count
end
it "can retrieve the started at timestamp" do
now = at_beginning_of_millisecond Time.utc
job_run = create_job_run
Timecop.freeze now do
job_run.run
end
api = Mosquito::Api::JobRun.new(job_run.id)
assert_equal now, api.started_at
end
it "can retrieve the finished_at timestamp" do
now = at_beginning_of_millisecond Time.utc
job_run = create_job_run
Timecop.freeze now do
job_run.run
end
api = Mosquito::Api::JobRun.new(job_run.id)
assert_equal now, api.finished_at
end
end
================================================
FILE: spec/mosquito/api/overseer_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::Overseer do
let(:overseer) { MockOverseer.new }
let(:api) { Mosquito::Api::Overseer.new(overseer.object_id.to_s) }
let(:observer) { Observability::Overseer.new(overseer) }
let(:executor) { MockExecutor.new(overseer.as(Mosquito::Runners::Overseer))}
describe "publish context" do
it "includes object_id" do
assert_equal "overseer:#{overseer.object_id}", observer.publish_context.context
assert_equal "mosquito:overseer:#{overseer.object_id}", observer.publish_context.originator
end
end
it "allows fetching a list of executors" do
assert_equal 1, api.executors.size
observer.update_executor_list([executor, executor])
assert_equal 2, api.executors.size
end
it "allows getting the latest heartbeat" do
assert_nil api.last_heartbeat
observer.heartbeat
assert_instance_of Time, api.last_heartbeat
end
it "publishes the startup event" do
eavesdrop do
observer.starting
end
assert_message_received /started/
end
it "publishes the stopping event" do
eavesdrop do
observer.stopping
end
assert_message_received /stopped/
end
it "publishes the stopped event" do
eavesdrop do
observer.stopped
end
assert_message_received /exited/
end
it "publishes an event when an executor dies" do
eavesdrop do
observer.executor_died executor
end
assert_message_received /died/
end
it "publishes an event when an executor is created" do
eavesdrop do
observer.executor_created executor
end
assert_message_received /created/
end
end
================================================
FILE: spec/mosquito/api/periodic_job_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::PeriodicJob do
getter interval : Time::Span = 2.minutes
describe "publish context" do
it "includes the periodic job name" do
clean_slate do
Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval
job_run = Mosquito::Base.scheduled_job_runs.first
observer = job_run.observer
assert_equal "periodic_job:PeriodicTestJob", observer.publish_context.context
assert_equal "mosquito:periodic_job:PeriodicTestJob", observer.publish_context.originator
end
end
end
it "can fetch a list of periodic jobs" do
clean_slate do
Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval
periodic_jobs = Mosquito::Api::PeriodicJob.all
assert_equal 1, periodic_jobs.size
assert_equal "PeriodicTestJob", periodic_jobs.first.name
assert_equal interval, periodic_jobs.first.interval
end
end
it "returns nil for last_executed_at when never run" do
clean_slate do
Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval
periodic_jobs = Mosquito::Api::PeriodicJob.all
assert_nil periodic_jobs.first.last_executed_at
end
end
it "returns the last executed time after a job runs" do
now = Time.utc.at_beginning_of_second
clean_slate do
Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval
job_run = Mosquito::Base.scheduled_job_runs.first
Timecop.freeze(now) do
job_run.try_to_execute
end
periodic_jobs = Mosquito::Api::PeriodicJob.all
assert_equal now, periodic_jobs.first.last_executed_at
end
end
it "publishes an event when a periodic job is enqueued" do
now = Time.utc.at_beginning_of_second
clean_slate do
Mosquito::Base.register_job_interval PeriodicTestJob, interval: interval
eavesdrop do
Timecop.freeze(now) do
Mosquito::Base.scheduled_job_runs.first.try_to_execute
end
end
assert_message_received /enqueued/
end
end
end
================================================
FILE: spec/mosquito/api/publisher_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::Publisher do
let(executor_pipeline) { Channel(Mosquito::WorkUnit).new }
let(finished_notifier) { Channel(Mosquito::WorkUnit?).new }
let(job) { QueuedTestJob.new }
let(job_run : Mosquito::JobRun) { job.enqueue }
let(overseer) { MockOverseer.new }
let(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) }
let(api) { Mosquito::Api::Executor.new executor.object_id.to_s }
let(observer) { Mosquito::Observability::Executor.new executor }
it "doesn't publish events when metrics are disabled" do
job_run.store
job_run.build_job
PubSub.instance.clear
published_messages = eavesdrop do
Mosquito.temp_config(publish_metrics: false) do
observer.execute job_run, job.class.queue do
end
end
end
assert_equal 0, published_messages.size
end
end
================================================
FILE: spec/mosquito/api/queue_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Api::Queue do
let(job_classes) {
[QueuedTestJob, PassingJob, FailingJob, QueueHookedTestJob]
}
let(queued_test_job) { QueuedTestJob.new }
let(passing_job) { PassingJob.new }
let(queue : Mosquito::Queue) { queued_test_job.class.queue }
let(observer : Mosquito::Observability::Queue) { queue.observer }
describe "publish context" do
it "includes the queue name" do
assert_equal "queue:queued_test_job", observer.publish_context.context
assert_equal "mosquito:queue:queued_test_job", observer.publish_context.originator
end
end
it "can fetch a list of current queues" do
clean_slate do
queued_test_job.enqueue
passing_job.enqueue
expected_queues = ["queued_test_job", "passing_job"].sort
queues = Mosquito::Api::Queue.all
assert_equal 2, queues.size
assert_equal expected_queues, queues.map(&.name).sort
end
end
it "can fetch the size of a queue" do
clean_slate do
job_classes.map(&.new).each(&.enqueue)
queues = Mosquito::Api::Queue.all
queues.each do |queue|
assert_equal 1, queue.size
end
end
end
it "can fetch the size details of a queue" do
clean_slate do
job_classes.map(&.new).each(&.enqueue)
queues = Mosquito::Api::Queue.all
sizes = queues.map(&.size_details)
sizes.each do |size|
assert_equal 1, size["waiting"]
assert_equal 0, size["scheduled"]
assert_equal 0, size["pending"]
assert_equal 0, size["dead"]
end
end
end
it "can fetch job runs from a queue" do
clean_slate do
job_classes.each do |job_class|
job = job_class.new
job.enqueue
api = Mosquito::Api::Queue.new job_class.queue.name
job_runs = api.waiting_job_runs
assert_equal 1, job_runs.size
assert_equal job.class.name.underscore, job_runs.first.type
end
end
end
it "publishes an event when a job is enqueued" do
eavesdrop do
queued_test_job.enqueue
end
assert_message_received /enqueued/
end
it "publishes an event when a job is enqueued for later" do
eavesdrop do
queued_test_job.enqueue(60.seconds.from_now)
end
assert_message_received /enqueued/
end
it "publishes an event when a job is dequeued" do
clean_slate do
queued_test_job.enqueue
eavesdrop do
queue.dequeue
end
end
assert_message_received /dequeued/
end
it "publishes an event when a job is rescheduled" do
clean_slate do
job_run = queued_test_job.build_job_run
eavesdrop do
queue.enqueue job_run
queue.reschedule job_run, 60.seconds.from_now
end
end
assert_message_received /rescheduled/
end
it "publishes an event when a job is forgotten" do
clean_slate do
job_run = queued_test_job.build_job_run
eavesdrop do
queue.forget job_run
end
end
assert_message_received /forgotten/
end
it "publishes an event when a job is banished" do
clean_slate do
job_run = queued_test_job.build_job_run
eavesdrop do
queue.banish job_run
end
end
assert_message_received /banished/
end
end
================================================
FILE: spec/mosquito/api_spec.cr
================================================
require "../spec_helper"
describe Mosquito::Api do
let(queued_test_job) { QueuedTestJob.new }
let(passing_job) { PassingJob.new }
it "can fetch a list of queues" do
clean_slate do
queued_test_job.enqueue
passing_job.enqueue
queues = Mosquito::Api.list_queues
assert_equal 2, queues.size
queue_names = queues.map(&.name)
assert_includes queue_names, queued_test_job.class.queue.name
assert_includes queue_names, passing_job.class.queue.name
end
end
end
================================================
FILE: spec/mosquito/backend/deleting_spec.cr
================================================
require "../../spec_helper"
describe "Backend deleting" do
getter queue_name : String { "test#{rand(1000)}" }
getter queue : Mosquito::Backend::Queue { backend.queue queue_name }
getter sample_data do
{ "test" => "#{rand(1000)}" }
end
getter key : String { "key-#{rand 1000}" }
getter field : String { "field-#{rand 1000}" }
getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") }
describe "delete" do
it "deletes immediately" do
backend.store key, sample_data
backend.delete key
blank_data = {} of String => String
assert_equal blank_data, backend.retrieve(key)
end
it "deletes at a ttl" do
# Since redis is outside the control of timecop, this test is just showing
# that #delete can be called with a ttl and we trust redis to do it's job.
backend.store key, sample_data
backend.delete key, in: 1.second
end
end
describe "self.flush" do
it "wipes the database" do
clean_slate do
backend.set key, field, "1"
backend.flush
assert_nil backend.get key, field
end
end
end
describe "#flush" do
it "empties the queues" do
clean_slate do
# add a job_run to waiting
queue.enqueue job_run
# add a job_run to scheduled
queue.schedule job_run, at: 1.second.from_now
# move a job_run to pending
pending_job_run = queue.dequeue
# add a job_run to the dead queue
queue.terminate job_run
queue.flush
empty_set = [] of String
assert_equal empty_set, queue.list_waiting
assert_equal empty_set, queue.list_scheduled
assert_equal empty_set, queue.list_pending
assert_equal empty_set, queue.list_dead
end
end
it "but doesn't truncate the database" do
clean_slate do
backend.set key, field, "value"
queue.flush
assert_equal "value", backend.get key, field
end
end
end
end
================================================
FILE: spec/mosquito/backend/executor_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Backend do
getter key : String { "key-#{rand 1000}" }
it "can calculate an average" do
backend.average_push key, 10
backend.average_push key, 20
backend.average_push key, 30
assert_equal 20, backend.average key
end
it "correctly rolls off old values for the window size" do
backend.average_push key, 10, window_size: 3
backend.average_push key, 20, window_size: 3
backend.average_push key, 30, window_size: 3
backend.average_push key, 40, window_size: 3
backend.average_push key, 50, window_size: 3
assert_equal 40, backend.average key
end
end
================================================
FILE: spec/mosquito/backend/expiring_list_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::RedisBackend do
describe "expiring lists" do
it "can add an item to a list" do
now = Time.utc
key = "exp-list-test"
items = ["item1", "item2", "item3"]
redis_backend = backend.as(Mosquito::RedisBackend)
Timecop.freeze now do
redis_backend.expiring_list_push key, items[0]
end
Timecop.freeze now + 1.second do
redis_backend.expiring_list_push key, items[1]
end
Timecop.freeze now + 2.seconds do
redis_backend.expiring_list_push key, items[2]
end
found_items = redis_backend.expiring_list_fetch(key, now + 1.second)
assert_equal [items[2]], found_items
end
end
end
================================================
FILE: spec/mosquito/backend/hash_storage_spec.cr
================================================
require "../../spec_helper"
describe "Backend hash storage" do
getter sample_data : Hash(String,String) { { "test" => "#{rand(1000)}" } }
getter key : String { "key-#{rand 1000}" }
getter field : String { "field-#{rand 1000}" }
it "can store and retrieve" do
backend.store key, sample_data
retrieved_data = backend.retrieve key
assert_equal sample_data, retrieved_data
end
describe "self.get and set" do
it "sets and retrieves a value from a hash" do
backend.set(key, field, "truth")
assert_equal "truth", backend.get(key, field)
end
end
describe "self.increment" do
it "adds one" do
backend.set(key, field, "1")
assert_equal 2, backend.increment(key, field)
end
it "can add arbitrary values" do
backend.set(key, field, "1")
assert_equal 4, backend.increment(key, field, by: 3)
end
end
end
================================================
FILE: spec/mosquito/backend/inspection_spec.cr
================================================
require "../../spec_helper"
describe "Backend inspection" do
getter backend_name : String { "test#{rand(1000)}" }
getter queue : Mosquito::Backend::Queue { backend.queue backend_name }
getter job : QueuedTestJob { QueuedTestJob.new }
getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") }
describe "size" do
def fill_queues
# add to waiting queue
queue.enqueue job_run
queue.enqueue job_run
# move 1 from waiting to pending queue
pending_t = queue.dequeue
# add to scheduled queue
queue.schedule job_run, at: 1.second.from_now
# add to dead queue
queue.terminate job_run
end
it "returns the size of the named q" do
clean_slate do
fill_queues
assert_equal 4, queue.size
end
end
it "returns the size of the named q (without the dead_q)" do
clean_slate do
fill_queues
assert_equal 3, queue.size(include_dead: false)
end
end
end
describe "list" do
it "can list the waiting jobs" do
clean_slate do
expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") }
expected_job_runs.each { |job_run| queue.enqueue job_run }
expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort
actual_job_runs = queue.list_waiting.sort
assert_equal 3, actual_job_runs.size
assert_equal expected_job_run_ids, actual_job_runs
end
end
it "can list the scheduled jobs" do
clean_slate do
expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") }
expected_job_runs.each { |job_run| queue.schedule job_run, at: 1.second.from_now }
expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort
actual_job_runs = queue.list_scheduled.sort
assert_equal 3, actual_job_runs.size
assert_equal expected_job_run_ids, actual_job_runs
end
end
it "can list the pending jobs" do
clean_slate do
expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run").tap(&.store) }
expected_job_runs.each { |job_run| queue.enqueue job_run }
expected_job_run_ids = 3.times.map { queue.dequeue.not_nil!.id }.to_a.sort
actual_job_runs = queue.list_pending.sort
assert_equal 3, actual_job_runs.size
assert_equal expected_job_run_ids, actual_job_runs
end
end
it "can list the dead jobs" do
clean_slate do
expected_job_runs = Array(Mosquito::JobRun).new(3) { Mosquito::JobRun.new("mock_job_run") }
expected_job_runs.each { |job_run| queue.terminate job_run }
expected_job_run_ids = expected_job_runs.map { |job_run| job_run.id }.sort
actual_job_runs = queue.list_dead.sort
assert_equal 3, actual_job_runs.size
assert_equal expected_job_run_ids, actual_job_runs
end
end
end
end
================================================
FILE: spec/mosquito/backend/lock_spec.cr
================================================
require "../../spec_helper"
describe "distributed locking" do
getter key : String { "testing:backend:lock" }
getter instance_id : String { "abcd" }
getter ttl : Time::Span { 1.second }
def ensure_unlock(&block)
yield
Mosquito.backend.delete key
end
it "locks" do
ensure_unlock do
got_it = Mosquito.backend.lock? key, instance_id, ttl
assert got_it
end
end
it "doesn't double lock" do
ensure_unlock do
hold = Mosquito.backend.lock? key, "abcd", ttl
assert hold
try = Mosquito.backend.lock? key, "wxyz", ttl
refute try
end
end
it "locks after unlock" do
ensure_unlock do
hold = Mosquito.backend.lock? key, "abcd", ttl
assert hold
Mosquito.backend.unlock key, instance_id
try = Mosquito.backend.lock? key, "wxyz", ttl
assert try
end
end
it "renews a lock held by the same instance" do
ensure_unlock do
hold = Mosquito.backend.lock? key, instance_id, ttl
assert hold
renewed = Mosquito.backend.renew_lock? key, instance_id, ttl
assert renewed
end
end
it "doesn't renew a lock held by another instance" do
ensure_unlock do
hold = Mosquito.backend.lock? key, "abcd", ttl
assert hold
renewed = Mosquito.backend.renew_lock? key, "wxyz", ttl
refute renewed
end
end
it "doesn't renew a lock that doesn't exist" do
ensure_unlock do
renewed = Mosquito.backend.renew_lock? key, instance_id, ttl
refute renewed
end
end
end
================================================
FILE: spec/mosquito/backend/overseer_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Backend do
it "can keep a list of overseers" do
clean_slate do
overseer_ids = ["overseer1", "overseer2", "overseer3"]
overseer_ids.each do |overseer_id|
Mosquito.backend.register_overseer overseer_id
end
assert_equal overseer_ids, Mosquito.backend.list_overseers
end
end
it "can deregister an overseer" do
clean_slate do
overseer_ids = ["overseer1", "overseer2", "overseer3"]
overseer_ids.each do |overseer_id|
Mosquito.backend.register_overseer overseer_id
end
Mosquito.backend.deregister_overseer "overseer2"
assert_equal ["overseer1", "overseer3"], Mosquito.backend.list_overseers
end
end
end
================================================
FILE: spec/mosquito/backend/queueing_spec.cr
================================================
require "../../spec_helper"
describe "Backend Queues" do
getter backend_name : String { "test#{rand(1000)}" }
getter queue : Mosquito::Backend::Queue { backend.queue backend_name }
getter job : QueuedTestJob { QueuedTestJob.new }
getter job_run : Mosquito::JobRun { Mosquito::JobRun.new("mock_job_run") }
describe "list_queues" do
def fill_queues
names = %w|test1 test2 test3 test4|
names[0..3].each do |queue_name|
backend.queue(queue_name).enqueue job_run
end
backend.queue(names.last).schedule job_run, at: 1.second.from_now
end
def fill_uncounted_queues
names = %w|test5 test6 test7 test8|
names[0..3].each do |queue_name|
backend.queue(queue_name).tap do |q|
q.enqueue job_run
q.dequeue
end
end
backend.queue(names.last).terminate job_run
end
it "can get a list of available queues" do
clean_slate do
fill_queues
assert_equal %w|test1 test2 test3 test4|, backend.list_queues.sort
end
end
it "de-dups the queue list" do
clean_slate do
fill_queues
assert_equal %w|test1 test2 test3 test4|, backend.list_queues.sort
end
end
end
describe "schedule" do
it "adds a job_run to the schedule_q at the time" do
clean_slate do
timestamp = 2.seconds.from_now
job_run = job.build_job_run
queue.schedule job_run, at: timestamp
assert_equal Time.unix_ms(timestamp.to_unix_ms), queue.scheduled_job_run_time job_run
end
end
end
describe "deschedule" do
it "returns a job_run if it's due" do
clean_slate do
run_time = Time.utc - 2.seconds
job_run = job.build_job_run
job_run.store
queue.schedule job_run, at: run_time
overdue_job_runs = queue.deschedule
assert_equal [job_run], overdue_job_runs
end
end
it "returns a blank array when no job_runs exist" do
clean_slate do
overdue_job_runs = queue.deschedule
assert_empty overdue_job_runs
end
end
it "doesn't return job_runs which aren't yet due" do
clean_slate do
run_time = Time.utc + 2.seconds
job_run = job.build_job_run
job_run.store
queue.schedule job_run, at: run_time
overdue_job_runs = queue.deschedule
assert_empty overdue_job_runs
end
end
end
describe "enqueue" do
it "puts a job_run on the waiting_q" do
clean_slate do
job_run = job.build_job_run
queue.enqueue job_run
waiting_job_runs = queue.list_waiting
assert_equal [job_run.id], waiting_job_runs
end
end
end
describe "dequeue" do
it "returns a job_run object when one is waiting" do
clean_slate do
job_run = job.build_job_run
job_run.store
queue.enqueue job_run
waiting_job_run = queue.dequeue
assert_equal job_run, waiting_job_run
end
end
it "moves the job_run from waiting to pending" do
clean_slate do
job_run = job.build_job_run
job_run.store
queue.enqueue job_run
waiting_job_run = queue.dequeue
pending_job_runs = queue.list_pending
assert_equal [job_run.id], pending_job_runs
end
end
it "returns nil when nothing is waiting" do
clean_slate do
assert_equal nil, queue.dequeue
end
end
it "returns nil when a job_run is queued but not stored" do
clean_slate do
job_run = job.build_job_run
# job_run.store # explicitly don't store this one
queue.enqueue job_run
waiting_job_run = queue.dequeue
assert_nil waiting_job_run
end
end
end
describe "finish" do
it "removes the job_run from the pending queue" do
clean_slate do
job_run = job.build_job_run
job_run.store
# first move the job_run from waiting to pending
queue.enqueue job_run
waiting_job_run = queue.dequeue
assert_equal job_run, waiting_job_run
# now finish it
queue.finish job_run
pending_job_runs = queue.list_pending
assert_empty pending_job_runs
end
end
end
describe "terminate" do
it "adds a job_run to the dead queue" do
clean_slate do
job_run = job.build_job_run
job_run.store
# first move the job_run from waiting to pending
queue.enqueue job_run
waiting_job_run = queue.dequeue
assert_equal job_run, waiting_job_run
# now terminate it
queue.terminate job_run
dead_job_runs = queue.list_dead
assert_equal [job_run.id], dead_job_runs
end
end
end
end
================================================
FILE: spec/mosquito/backend_spec.cr
================================================
require "../spec_helper"
# These tests are explicitly for code which is inherited from the abstract Backend
describe Mosquito::Backend do
it "can build a key with two strings" do
assert_equal "mosquito:one:two", Mosquito.backend.build_key("one", "two")
end
it "can build a key with an array" do
assert_equal "mosquito:one:two", Mosquito.backend.build_key(["one", "two"])
end
it "can build a key with a tuple" do
assert_equal "mosquito:one:two", Mosquito.backend.build_key(*{"one", "two"})
end
it "can be initialized with a string name" do
Mosquito.backend.queue "string_backend"
end
it "can be initialized with a symbol name" do
Mosquito.backend.queue :symbol_backend
end
it "can update a key with a hash" do
Mosquito.backend.set "key", {"field" => "value", "field2" => "value2"}
assert_equal "value", Mosquito.backend.get("key", "field")
assert_equal "value2", Mosquito.backend.get("key", "field2")
end
end
================================================
FILE: spec/mosquito/base_spec.cr
================================================
require "../spec_helper"
describe Mosquito::Base do
it "keeps a list of scheduled job_runs" do
Base.bare_mapping do
Base.register_job_interval PeriodicTestJob, 1.minute
assert_equal PeriodicTestJob, Base.scheduled_job_runs.first.class
end
end
it "correctly maps job classes from type strings" do
Base.bare_mapping do
Base.register_job_mapping "fizzbuzz", QueuedTestJob
assert_equal QueuedTestJob, Base.job_for_type "fizzbuzz"
end
end
end
================================================
FILE: spec/mosquito/configuration_spec.cr
================================================
require "../spec_helper"
describe "Mosquito Config" do
it "allows setting / retrieving the connection string" do
Mosquito.temp_config do
Mosquito.configuration.backend_connection_string = testing_redis_url
assert_equal testing_redis_url, Mosquito.configuration.backend_connection_string
end
end
it "enforces missing settings are set" do
config = Mosquito::Configuration.new
assert_raises do
config.validate
end
end
it "allows setting idle_wait as a float" do
test_value = 2.4
Mosquito.temp_config do
Mosquito.configuration.idle_wait = test_value
assert_equal test_value.seconds, Mosquito.configuration.idle_wait
end
end
it "allows setting idle_wait as a time span" do
test_value = 2.seconds
Mosquito.temp_config do
Mosquito.configuration.idle_wait = test_value
assert_equal test_value, Mosquito.configuration.idle_wait
end
end
it "allows setting successful_job_ttl" do
test_value = 2
Mosquito.temp_config do
Mosquito.configuration.successful_job_ttl = test_value
assert_equal test_value, Mosquito.configuration.successful_job_ttl
end
end
it "allows setting failed_job_ttl" do
test_value = 2
Mosquito.temp_config do
Mosquito.configuration.failed_job_ttl = test_value
assert_equal test_value, Mosquito.configuration.failed_job_ttl
end
end
it "allows setting global_prefix string" do
test_value = "yolo"
Mosquito.temp_config do
Mosquito.configuration.global_prefix = test_value
assert_equal test_value, Mosquito.configuration.global_prefix
Mosquito.configuration.backend.build_key("test").must_equal "yolo:mosquito:test"
end
end
it "allows setting global_prefix nillable" do
test_value = nil
Mosquito.temp_config do
Mosquito.configuration.global_prefix = test_value
assert_equal test_value, Mosquito.configuration.global_prefix
Mosquito.configuration.backend.build_key("test").must_equal "mosquito:test"
end
end
it "validates when backend_connection_string is set" do
Mosquito.temp_config do
Mosquito.configuration.backend_connection_string = testing_redis_url
Mosquito.configuration.validate
end
end
end
================================================
FILE: spec/mosquito/dequeue_adapters/concurrency_limited_dequeue_adapter_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::ConcurrencyLimitedDequeueAdapter" do
getter(overseer : MockOverseer) { MockOverseer.new }
getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.queues << job_class.queue
end
it "dequeues a job when under the limit" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 3,
})
result = adapter.dequeue(queue_list)
refute_nil result
if result
assert_equal expected_job_run, result.job_run
assert_equal QueuedTestJob.queue, result.queue
end
end
end
it "returns nil when no jobs are available" do
clean_slate do
register QueuedTestJob
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 3,
})
result = adapter.dequeue(queue_list)
assert_nil result
end
end
it "skips a queue that has reached its concurrency limit" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 2,
})
# Dequeue twice — should succeed and fill the limit.
result1 = adapter.dequeue(queue_list)
refute_nil result1
assert_equal 1, adapter.active_count("queued_test_job")
result2 = adapter.dequeue(queue_list)
refute_nil result2
assert_equal 2, adapter.active_count("queued_test_job")
# Third dequeue should be blocked by the limit.
result3 = adapter.dequeue(queue_list)
assert_nil result3
end
end
it "allows dequeue again after finished_with" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 1,
})
# Fill the single slot.
result1 = adapter.dequeue(queue_list)
refute_nil result1
assert_equal 1, adapter.active_count("queued_test_job")
# Blocked.
result2 = adapter.dequeue(queue_list)
assert_nil result2
# Signal that the job finished.
adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue)
assert_equal 0, adapter.active_count("queued_test_job")
# Now dequeue should work again.
result3 = adapter.dequeue(queue_list)
refute_nil result3
end
end
it "does not limit queues not in the limits table" do
clean_slate do
register QueuedTestJob
5.times { QueuedTestJob.new.enqueue }
# No limit configured for queued_test_job.
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"other_queue" => 1,
})
# Should dequeue all 5 without blocking.
5.times do |i|
result = adapter.dequeue(queue_list)
refute_nil result, "Expected dequeue ##{i + 1} to succeed"
end
end
end
it "enforces independent limits across multiple queues" do
clean_slate do
register QueuedTestJob
register EchoJob
3.times { QueuedTestJob.new.enqueue }
3.times { EchoJob.new(text: "hello").enqueue }
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 1,
"io_queue" => 2,
})
# Saturate queued_test_job (limit 1).
# Because of shuffle we may get either queue first, so keep
# dequeuing until the counters match the limits.
results = [] of Mosquito::WorkUnit
6.times do
if r = adapter.dequeue(queue_list)
results << r
end
end
assert_equal 1, adapter.active_count("queued_test_job")
assert_equal 2, adapter.active_count("io_queue")
assert_equal 3, results.size
end
end
it "finished_with does not go below zero" do
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 3,
})
job_run = Mosquito::JobRun.new("queued_test_job")
queue = Mosquito::Queue.new("queued_test_job")
adapter.finished_with(job_run, queue)
assert_equal 0, adapter.active_count("queued_test_job")
end
it "can be used via the overseer" do
clean_slate do
adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
"queued_test_job" => 5,
})
overseer.dequeue_adapter = adapter
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
result = overseer.dequeue_job?
refute_nil result
if result
assert_equal expected_job_run, result.job_run
end
end
end
end
================================================
FILE: spec/mosquito/dequeue_adapters/remote_config_dequeue_adapter_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::RemoteConfigDequeueAdapter" do
getter(overseer : MockOverseer) { MockOverseer.new }
getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.queues << job_class.queue
end
it "uses defaults when no remote config is present" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 2},
refresh_interval: 0.seconds,
)
# Two dequeues should succeed.
result1 = adapter.dequeue(queue_list)
refute_nil result1
result2 = adapter.dequeue(queue_list)
refute_nil result2
# Third should be blocked by the default limit of 2.
result3 = adapter.dequeue(queue_list)
assert_nil result3
end
end
it "picks up remote limits from the backend" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
# Default allows 2, but remote overrides to 1.
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 2},
refresh_interval: 0.seconds,
)
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 1})
result1 = adapter.dequeue(queue_list)
refute_nil result1
# Should be blocked — remote limit is 1.
result2 = adapter.dequeue(queue_list)
assert_nil result2
end
end
it "merges remote limits on top of defaults" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 3, "queue_b" => 5},
refresh_interval: 0.seconds,
)
# Remote only overrides queue_a and adds queue_c.
Mosquito::RemoteConfigDequeueAdapter.store_limits({
"queue_a" => 1,
"queue_c" => 7,
})
adapter.refresh_limits
assert_equal 1, adapter.limits["queue_a"]
assert_equal 5, adapter.limits["queue_b"]
assert_equal 7, adapter.limits["queue_c"]
end
end
it "falls back to defaults when remote config is cleared" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 3},
refresh_interval: 0.seconds,
)
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1})
adapter.refresh_limits
assert_equal 1, adapter.limits["queue_a"]
Mosquito::RemoteConfigDequeueAdapter.clear_limits
adapter.refresh_limits
assert_equal 3, adapter.limits["queue_a"]
end
end
it "respects refresh_interval and does not poll on every dequeue" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 3},
refresh_interval: 1.hour,
)
# First dequeue triggers the initial refresh.
adapter.dequeue(queue_list)
# Store a tighter limit — but it should NOT take effect
# because the refresh interval hasn't elapsed.
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 1})
result2 = adapter.dequeue(queue_list)
refute_nil result2, "Expected dequeue to succeed because refresh hasn't fired"
end
end
it "preserves in-flight counts when limits are refreshed" do
clean_slate do
register QueuedTestJob
2.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 1},
refresh_interval: 0.seconds,
)
result1 = adapter.dequeue(queue_list)
refute_nil result1
assert_equal 1, adapter.active_count("queued_test_job")
# Refresh with new limits — must not reset the in-flight counter.
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queued_test_job" => 2})
adapter.refresh_limits
assert_equal 1, adapter.active_count("queued_test_job")
adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue)
assert_equal 0, adapter.active_count("queued_test_job")
end
end
it "delegates finished_with to the inner adapter" do
clean_slate do
register QueuedTestJob
2.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 1},
refresh_interval: 0.seconds,
)
result1 = adapter.dequeue(queue_list)
refute_nil result1
assert_equal 1, adapter.active_count("queued_test_job")
# Blocked.
result2 = adapter.dequeue(queue_list)
assert_nil result2
# Signal completion.
adapter.finished_with(result1.not_nil!.job_run, result1.not_nil!.queue)
assert_equal 0, adapter.active_count("queued_test_job")
# Now a dequeue should succeed again.
result3 = adapter.dequeue(queue_list)
refute_nil result3
end
end
it "can be used via the overseer" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 5},
refresh_interval: 0.seconds,
)
overseer.dequeue_adapter = adapter
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
result = overseer.dequeue_job?
refute_nil result
if result
assert_equal expected_job_run, result.job_run
end
end
end
describe "per-overseer configuration" do
it "uses per-overseer limits when overseer_id is set" do
clean_slate do
register QueuedTestJob
3.times { QueuedTestJob.new.enqueue }
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queued_test_job" => 3},
overseer_id: "gpu-worker-1",
refresh_interval: 0.seconds,
)
# Set a per-overseer limit of 1.
Mosquito::RemoteConfigDequeueAdapter.store_limits(
{"queued_test_job" => 1}, overseer_id: "gpu-worker-1"
)
result1 = adapter.dequeue(queue_list)
refute_nil result1
# Should be blocked by the per-overseer limit.
result2 = adapter.dequeue(queue_list)
assert_nil result2
end
end
it "per-overseer limits override global limits" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 10},
overseer_id: "gpu-worker-1",
refresh_interval: 0.seconds,
)
# Global says 5, per-overseer says 2 — per-overseer wins.
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5})
Mosquito::RemoteConfigDequeueAdapter.store_limits(
{"queue_a" => 2}, overseer_id: "gpu-worker-1"
)
adapter.refresh_limits
assert_equal 2, adapter.limits["queue_a"]
end
end
it "falls back to global when no per-overseer key exists" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 10},
overseer_id: "gpu-worker-1",
refresh_interval: 0.seconds,
)
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5})
adapter.refresh_limits
assert_equal 5, adapter.limits["queue_a"]
end
end
it "merges defaults, global, and per-overseer layers" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 10, "queue_b" => 20, "queue_c" => 30},
overseer_id: "gpu-worker-1",
refresh_interval: 0.seconds,
)
# Global overrides queue_a and adds queue_d.
Mosquito::RemoteConfigDequeueAdapter.store_limits({
"queue_a" => 5,
"queue_d" => 40,
})
# Per-overseer overrides queue_a again and queue_b.
Mosquito::RemoteConfigDequeueAdapter.store_limits(
{"queue_a" => 1, "queue_b" => 2},
overseer_id: "gpu-worker-1"
)
adapter.refresh_limits
assert_equal 1, adapter.limits["queue_a"] # per-overseer wins
assert_equal 2, adapter.limits["queue_b"] # per-overseer wins
assert_equal 30, adapter.limits["queue_c"] # default (untouched)
assert_equal 40, adapter.limits["queue_d"] # global (no per-overseer)
end
end
it "adapters without overseer_id ignore per-overseer keys" do
clean_slate do
adapter = Mosquito::RemoteConfigDequeueAdapter.new(
defaults: {"queue_a" => 10},
refresh_interval: 0.seconds,
)
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5})
Mosquito::RemoteConfigDequeueAdapter.store_limits(
{"queue_a" => 1}, overseer_id: "gpu-worker-1"
)
adapter.refresh_limits
# Without an overseer_id, only global is used.
assert_equal 5, adapter.limits["queue_a"]
end
end
end
describe "class-level storage helpers" do
it "round-trips global limits through the backend" do
clean_slate do
limits = {"queue_a" => 3, "queue_b" => 7}
Mosquito::RemoteConfigDequeueAdapter.store_limits(limits)
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal 3, retrieved["queue_a"]
assert_equal 7, retrieved["queue_b"]
end
end
it "round-trips per-overseer limits through the backend" do
clean_slate do
limits = {"queue_a" => 1}
Mosquito::RemoteConfigDequeueAdapter.store_limits(limits, overseer_id: "worker-2")
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-2")
assert_equal 1, retrieved["queue_a"]
# Global should be unaffected.
global = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal({} of String => Int32, global)
end
end
it "store_limits overwrites rather than merges (stale entries are removed)" do
clean_slate do
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3, "queue_b" => 7})
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1})
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal 1, retrieved["queue_a"]
refute retrieved.has_key?("queue_b"), "queue_b should have been removed by the overwrite"
end
end
it "store_limits with overseer_id overwrites rather than merges" do
clean_slate do
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3, "queue_b" => 7}, overseer_id: "worker-1")
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1}, overseer_id: "worker-1")
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-1")
assert_equal 1, retrieved["queue_a"]
refute retrieved.has_key?("queue_b"), "queue_b should have been removed by the overwrite"
end
end
it "store_limits with an empty hash removes all stored limits" do
clean_slate do
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 3})
Mosquito::RemoteConfigDequeueAdapter.store_limits({} of String => Int32)
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal({} of String => Int32, retrieved)
end
end
it "returns an empty hash when no limits are stored" do
clean_slate do
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal({} of String => Int32, retrieved)
end
end
it "clear_limits removes global stored data" do
clean_slate do
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 1})
Mosquito::RemoteConfigDequeueAdapter.clear_limits
retrieved = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal({} of String => Int32, retrieved)
end
end
it "clear_limits with overseer_id removes only that overseer's data" do
clean_slate do
Mosquito::RemoteConfigDequeueAdapter.store_limits({"queue_a" => 5})
Mosquito::RemoteConfigDequeueAdapter.store_limits(
{"queue_a" => 1}, overseer_id: "worker-1"
)
Mosquito::RemoteConfigDequeueAdapter.clear_limits(overseer_id: "worker-1")
# Per-overseer is gone.
per_overseer = Mosquito::RemoteConfigDequeueAdapter.stored_limits("worker-1")
assert_equal({} of String => Int32, per_overseer)
# Global is still there.
global = Mosquito::RemoteConfigDequeueAdapter.stored_limits
assert_equal 5, global["queue_a"]
end
end
end
describe "Api integration" do
it "reads and writes global limits through the Api module" do
clean_slate do
Mosquito::Api.set_concurrency_limits({"queue_x" => 10})
result = Mosquito::Api.concurrency_limits
assert_equal 10, result["queue_x"]
end
end
it "reads and writes per-overseer limits through the Api module" do
clean_slate do
Mosquito::Api.set_concurrency_limits(
{"queue_x" => 2}, overseer_id: "gpu-worker-1"
)
result = Mosquito::Api.concurrency_limits(overseer_id: "gpu-worker-1")
assert_equal 2, result["queue_x"]
# Global should be unaffected.
global = Mosquito::Api.concurrency_limits
assert_equal({} of String => Int32, global)
end
end
end
end
================================================
FILE: spec/mosquito/dequeue_adapters/shuffle_dequeue_adapter_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::ShuffleDequeueAdapter" do
getter(overseer : MockOverseer) { MockOverseer.new }
getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) }
getter(executor : MockExecutor) { overseer.executors.first.as(MockExecutor) }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.discovered_queues << job_class.queue
end
it "is the default adapter" do
assert_instance_of Mosquito::ShuffleDequeueAdapter, Mosquito.configuration.dequeue_adapter
end
it "dequeues a job from the queue list" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
adapter = Mosquito::ShuffleDequeueAdapter.new
result = adapter.dequeue(queue_list)
refute_nil result
if result
assert_equal expected_job_run, result.job_run
assert_equal QueuedTestJob.queue, result.queue
end
end
end
it "returns nil when no jobs are available" do
clean_slate do
register QueuedTestJob
adapter = Mosquito::ShuffleDequeueAdapter.new
result = adapter.dequeue(queue_list)
assert_nil result
end
end
describe "custom adapter" do
it "can be swapped on the overseer" do
clean_slate do
null_adapter = NullDequeueAdapter.new
overseer.dequeue_adapter = null_adapter
register QueuedTestJob
QueuedTestJob.new.enqueue
result = overseer.dequeue_job?
assert_nil result
assert_equal 1, null_adapter.dequeue_count
end
end
it "receives the queue list when dequeuing" do
clean_slate do
spy_adapter = SpyDequeueAdapter.new
overseer.dequeue_adapter = spy_adapter
register QueuedTestJob
queue_list.discovered_queues << Mosquito::Queue.new("extra_queue")
overseer.dequeue_job?
assert_includes spy_adapter.checked_queues, "queued_test_job"
assert_includes spy_adapter.checked_queues, "extra_queue"
end
end
end
describe "overseer integration" do
it "dequeue_job? delegates to the adapter" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
result = overseer.dequeue_job?
refute_nil result
if result
assert_equal expected_job_run, result.job_run
end
end
end
end
end
================================================
FILE: spec/mosquito/dequeue_adapters/weighted_dequeue_adapter_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::WeightedDequeueAdapter" do
getter(overseer : MockOverseer) { MockOverseer.new }
getter(queue_list : MockQueueList) { overseer.queue_list.as(MockQueueList) }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.discovered_queues << job_class.queue
end
it "dequeues a job from a weighted queue" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
adapter = Mosquito::WeightedDequeueAdapter.new({
"queued_test_job" => 5,
})
result = adapter.dequeue(queue_list)
refute_nil result
if result
assert_equal expected_job_run, result.job_run
assert_equal QueuedTestJob.queue, result.queue
end
end
end
it "returns nil when no jobs are available" do
clean_slate do
register QueuedTestJob
adapter = Mosquito::WeightedDequeueAdapter.new({
"queued_test_job" => 3,
})
result = adapter.dequeue(queue_list)
assert_nil result
end
end
it "assigns default weight of 1 to unconfigured queues" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
# No weight configured for queued_test_job — defaults to 1.
adapter = Mosquito::WeightedDequeueAdapter.new({
"other_queue" => 10,
})
result = adapter.dequeue(queue_list)
refute_nil result
if result
assert_equal expected_job_run, result.job_run
end
end
end
it "higher-weight queues are dequeued more often" do
clean_slate do
register QueuedTestJob
register EchoJob
adapter = Mosquito::WeightedDequeueAdapter.new({
"queued_test_job" => 10,
"io_queue" => 1,
})
# Enqueue enough jobs that neither queue drains during the sample.
200.times { QueuedTestJob.new.enqueue }
200.times { EchoJob.new(text: "hello").enqueue }
dequeue_counts = Hash(String, Int32).new(0)
# Sample 50 dequeues — well within the 200 available per queue.
50.times do
result = adapter.dequeue(queue_list)
if result
dequeue_counts[result.queue.name] = dequeue_counts[result.queue.name] + 1
end
end
# With weights 10:1, the high-weight queue should be dequeued
# significantly more often over a 50-dequeue sample.
heavy_count = dequeue_counts.fetch("queued_test_job", 0)
light_count = dequeue_counts.fetch("io_queue", 0)
assert heavy_count > light_count, "Expected queued_test_job (#{heavy_count}) to be dequeued more than io_queue (#{light_count})"
end
end
it "can be used via the overseer" do
clean_slate do
adapter = Mosquito::WeightedDequeueAdapter.new({
"queued_test_job" => 5,
})
overseer.dequeue_adapter = adapter
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
result = overseer.dequeue_job?
refute_nil result
if result
assert_equal expected_job_run, result.job_run
end
end
end
end
================================================
FILE: spec/mosquito/exceptions_spec.cr
================================================
require "../spec_helper"
describe "Mosquito exceptions" do
it "declares JobFailed" do
Mosquito::JobFailed.new "test"
end
it "declares DoubleRun" do
Mosquito::DoubleRun.new "test"
end
it "declares IrretrievableParameter" do
Mosquito::IrretrievableParameter.new "test"
end
end
================================================
FILE: spec/mosquito/job/job_state_spec.cr
================================================
require "../../spec_helper"
describe Mosquito::Job::State do
describe "executed?" do
it "Marks jobs as executed when they've either succeeded or failed" do
assert Mosquito::Job::State::Succeeded.executed?
assert Mosquito::Job::State::Failed.executed?
end
it "Doesn't mark jobs as executed in any other state" do
refute Mosquito::Job::State::Initialization.executed?
refute Mosquito::Job::State::Running.executed?
refute Mosquito::Job::State::Aborted.executed?
refute Mosquito::Job::State::Preempted.executed?
end
end
end
================================================
FILE: spec/mosquito/job_run/rescheduling_spec.cr
================================================
require "../../spec_helper"
describe "job_run rescheduling" do
@failing_job_run : Mosquito::JobRun?
getter failing_job_run : Mosquito::JobRun { create_job_run "failing_job" }
it "calculates reschedule interval correctly" do
intervals = {
1 => 2,
2 => 8,
3 => 18,
4 => 32
}
intervals.each do |count, delay|
job_run = Mosquito::JobRun.retrieve(failing_job_run.id.not_nil!).not_nil!
job_run.run
assert_equal delay.seconds, job_run.reschedule_interval
end
end
it "prevents rescheduling a job too many times" do
run_job_run = -> do
job_run = Mosquito::JobRun.retrieve(failing_job_run.id.not_nil!).not_nil!
job_run.run
job_run
end
max_reschedules = 4
max_reschedules.times do
job_run = run_job_run.call
assert job_run.rescheduleable?
end
job_run = run_job_run.call
refute job_run.rescheduleable?
end
it "counts retries upon failure" do
assert_equal 0, failing_job_run.retry_count
failing_job_run.run
assert_equal 1, failing_job_run.retry_count
end
it "updates the backend when a failure happens" do
failing_job_run.run
saved_job_run = Mosquito::JobRun.retrieve failing_job_run.id.not_nil!
assert_equal 1, saved_job_run.not_nil!.retry_count
end
it "does not reschedule a job which fails with retry=false" do
job = FailingJob.new
job.fail_with_retry = false
job.run
refute job.should_retry
end
describe "preempted jobs" do
it "sets state to preempted and does not execute" do
job = PreemptingJob.new
job.run
assert job.preempted?
refute job.executed?
end
it "uses normal backoff when preempted without an until time" do
job = PreemptingJob.new
job.run
assert_equal 2.seconds, job.reschedule_interval(1)
assert_equal 8.seconds, job.reschedule_interval(2)
end
it "uses the until time for reschedule interval when provided" do
Timecop.freeze(Time.utc) do
future = Time.utc + 30.seconds
job = PreemptingJob.new
job.preempt_until = future
job.run
interval = job.reschedule_interval(1)
assert_equal 30.seconds, interval
end
end
it "falls back to normal backoff when until time is in the past" do
Timecop.freeze(Time.utc) do
past = Time.utc - 5.seconds
job = PreemptingJob.new
job.preempt_until = past
job.run
assert_equal 2.seconds, job.reschedule_interval(1)
end
end
it "respects rescheduleable? override when preempted" do
job = NonReschedulablePreemptingJob.new
job.run
assert job.preempted?
refute job.rescheduleable?(0)
end
end
end
================================================
FILE: spec/mosquito/job_run/running_spec.cr
================================================
require "../../spec_helper"
describe "job_run running" do
# the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost.
def at_beginning_of_millisecond(time)
time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds)
end
it "uses the lookup table to build a job" do
job_instance = create_job_run.build_job
assert_instance_of JobWithConfig, job_instance
end
it "populates the variables of a job" do
job_instance = create_job_run.build_job
assert_instance_of JobWithConfig, job_instance
assert_equal job_run_config, job_instance.as(JobWithConfig).config
end
it "runs the job" do
JobWithPerformanceCounter.reset_performance_counter!
create_job_run("job_with_performance_counter").run
assert_equal 1, JobWithPerformanceCounter.performances
end
it "sets started_at when a job is run" do
now = at_beginning_of_millisecond Time.utc
job_run = create_job_run
Timecop.freeze now do
job_run.run
end
assert_equal now, job_run.started_at
end
it "sets finished_at when a job is run" do
now = at_beginning_of_millisecond Time.utc
job_run = create_job_run
Timecop.freeze now do
job_run.run
end
assert_equal now, job_run.finished_at
end
it "has nil timestamps before a job is run" do
job_run = create_job_run
assert_nil job_run.started_at
assert_nil job_run.finished_at
end
end
================================================
FILE: spec/mosquito/job_run/storage_spec.cr
================================================
require "../../spec_helper"
describe "job_run storage" do
getter backend : Mosquito::Backend::Queue = Mosquito.backend.queue("testing")
getter config = {
"year" => "1752",
"name" => "the year september lost 12 days"
}
getter job_run : Mosquito::JobRun do
Mosquito::JobRun.new("mock_job_run").tap do |job_run|
job_run.config = config
job_run.store
end
end
it "builds the backend key correctly" do
assert_equal "mosquito:job_run:1", Mosquito::JobRun.config_key "1"
assert_equal "mosquito:job_run:#{job_run.id}", job_run.config_key
end
it "can store and retrieve a job_run with attributes" do
stored_job_run = Mosquito::JobRun.retrieve job_run.id
if stored_job_run
assert_equal config, stored_job_run.config
else
flunk "Could not retrieve job_run"
end
end
it "stores job_runs in the backend" do
stored_job_run = backend.backend.retrieve Mosquito::JobRun.config_key(job_run.id)
stored_config = stored_job_run.reject! %w|type enqueue_time retry_count|
assert_equal config, stored_config
end
it "can delete a job_run" do
job_run.delete
saved_config = backend.backend.retrieve job_run.config_key
assert_empty saved_config
end
it "can set a timed delete on a job_run" do
ttl = 10
job_run.delete(in: ttl)
set_ttl = backend.backend.expires_in job_run.config_key
assert_equal ttl, set_ttl
end
it "can reload a job_run" do
job_run.reload
end
describe "timestamp retrieval" do
# the job run timestamps are stored as a unix epoch with millis, so nanosecond precision is lost.
def at_beginning_of_millisecond(time)
time - (time.nanosecond.nanoseconds) + (time.millisecond.milliseconds)
end
it "retrieves started_at and finished_at timestamps" do
now = at_beginning_of_millisecond Time.utc
job_run = create_job_run
Timecop.freeze now do
job_run.run
end
retrieved = Mosquito::JobRun.retrieve job_run.id
if retrieved
assert_equal now, retrieved.started_at
assert_equal now, retrieved.finished_at
else
flunk "Could not retrieve job_run"
end
end
it "does not include timestamps in config after retrieve" do
job_run = create_job_run
job_run.run
retrieved = Mosquito::JobRun.retrieve job_run.id
if retrieved
refute retrieved.config.has_key?("started_at")
refute retrieved.config.has_key?("finished_at")
else
flunk "Could not retrieve job_run"
end
end
it "retrieves nil timestamps for unexecuted job runs" do
retrieved = Mosquito::JobRun.retrieve job_run.id
if retrieved
assert_nil retrieved.started_at
assert_nil retrieved.finished_at
else
flunk "Could not retrieve job_run"
end
end
end
it "persists overseer_id via claimed_by and retrieves it" do
test_overseer = MockOverseer.new
job_run.claimed_by test_overseer
retrieved = Mosquito::JobRun.retrieve job_run.id
assert retrieved
assert_equal test_overseer.observer.instance_id, retrieved.not_nil!.overseer_id
end
it "round-trips overseer_id through store and retrieve" do
test_overseer = MockOverseer.new
job_run.claimed_by test_overseer
job_run.store
retrieved = Mosquito::JobRun.retrieve job_run.id
assert retrieved
assert_equal test_overseer.observer.instance_id, retrieved.not_nil!.overseer_id
end
end
================================================
FILE: spec/mosquito/job_run_spec.cr
================================================
require "../spec_helper"
require "./job_run/*"
================================================
FILE: spec/mosquito/job_spec.cr
================================================
require "../spec_helper"
describe Mosquito::Job do
getter(passing_job) { PassingJob.new }
getter(failing_job) { FailingJob.new }
getter(not_implemented_job) { NotImplementedJob.new }
getter(throttled_job) { ThrottledJob.new }
getter(hooked_job) { JobWithHooks.new }
describe "run" do
it "captures JobFailed and marks sucess=false" do
failing_job.run
assert failing_job.failed?
end
it "sets #executed? and #succeeded?" do
refute passing_job.executed?
passing_job.run
assert passing_job.executed?
assert passing_job.succeeded?
end
it "emits a failure message when #fail contains a reason message" do
clear_logs
failing_job.run
assert failing_job.failed?
assert_logs_match failing_job.exception_message
end
it "exception messages are sent to the logs" do
clear_logs
failing_job.fail_with_exception = true
failing_job.run
assert failing_job.failed?
assert_logs_match failing_job.exception_message
end
it "captures and marks failure for other exceptions" do
clear_logs
assert_nil failing_job.exception
failing_job.fail_with_exception = true
failing_job.run
assert failing_job.failed?
refute_nil failing_job.exception
end
it "sets success=false when #fail-ed" do
failing_job.run
refute failing_job.succeeded?
end
it "fails when no perform is implemented, and a messsage is sent to the logs" do
clear_logs
not_implemented_job.run
assert not_implemented_job.failed?
assert_logs_match "No job definition found"
end
end
it "fetches the default queue" do
assert_equal "passing_job", PassingJob.queue.name
end
it "fetches the named queue" do
assert_equal "io_queue", EchoJob.queue.name
end
describe "reschedule interval" do
it "calculates reschedule interval correctly" do
intervals = {
1 => 2,
2 => 8,
3 => 18,
4 => 32
}
intervals.each do |count, delay|
assert_equal delay.seconds, passing_job.reschedule_interval(count)
end
end
it "allows overriding the reschedule interval" do
intervals = 1..4
intervals.each do |count|
assert_equal 4.seconds, CustomRescheduleIntervalJob.new.reschedule_interval(count)
end
end
end
describe "metadata" do
it "returns a metadata instance" do
assert_instance_of Mosquito::Metadata, passing_job.metadata
end
it "is a memoized instance" do
one = passing_job.metadata
two = passing_job.metadata
assert_same one, two
end
end
describe "self.metadata" do
it "returns a metadata instance" do
assert PassingJob.metadata.is_a?(Mosquito::Metadata)
end
it "is readonly" do
metadata = PassingJob.metadata
assert metadata.readonly?
end
end
describe "self.metadata_key" do
it "includes the class name" do
assert_includes PassingJob.metadata_key, "passing_job"
end
end
describe "before_hooks" do
it "should execute hooks" do
clear_logs
hooked_job.should_fail = false
hooked_job.run
assert_logs_match "Before Hook Executed"
assert_logs_match "2nd Before Hook Executed"
assert_logs_match "Perform Executed"
end
it "should not exec when a before hook fails the job" do
clear_logs
hooked_job.should_fail = true
hooked_job.run
assert_logs_match "Before Hook Executed"
assert_logs_match "2nd Before Hook Executed"
refute_logs_match "Perform Executed"
end
end
describe "after_hooks" do
it "should execute `after` hooks" do
clear_logs
hooked_job.should_fail = false
hooked_job.run
assert_logs_match "After Hook Executed"
assert_logs_match "2nd After Hook Executed"
assert_logs_match "Perform Executed"
end
it "should run the `after` hooks even if a job fails" do
clear_logs
hooked_job.should_fail = true
hooked_job.run
assert_logs_match "After Hook Executed"
assert_logs_match "2nd After Hook Executed"
refute_logs_match "Perform Executed"
end
end
end
================================================
FILE: spec/mosquito/key_builder_spec.cr
================================================
require "../spec_helper"
describe Mosquito::KeyBuilder do
it "builds keys from tuples" do
assert_equal "fizz:buzz", KeyBuilder.build({:fizz, :buzz})
end
it "builds keys from strings" do
assert_equal "fizz:buzz", KeyBuilder.build("fizz", "buzz")
end
it "builds keys from an array" do
assert_equal "fizz:buzz", KeyBuilder.build(["fizz", "buzz"])
end
it "builds keys from integers" do
assert_equal "fizz:6", KeyBuilder.build("fizz", 6)
end
it "builds keys from floats" do
assert_equal "2.4:buzz", KeyBuilder.build(2.4, "buzz")
end
end
================================================
FILE: spec/mosquito/metadata_spec.cr
================================================
require "../spec_helper"
describe Mosquito::Metadata do
getter(store_name : String) { "test_store#{rand 1000}" }
getter(store : Metadata) { Metadata.new store_name }
getter(field : String) { "foo#{rand 1000}" }
it "increments" do
clean_slate do
store.increment field
value = store[field]?
assert_equal "1", value
store.increment field
value = store[field]?
assert_equal "2", value
end
end
it "increments with a configurable amount" do
clean_slate do
store.increment field
value = store[field]?.not_nil!
assert_equal "1", value
delta = 2
store.increment field, by: delta
new_value = store[field]?.not_nil!
assert_equal delta, (new_value.to_i - value.to_i)
end
end
it "decrements" do
clean_slate do
store.decrement field
value = store[field]?
assert_equal "-1", value
store.decrement field
value = store[field]?
assert_equal "-2", value
end
end
it "dumps to a hash" do
clean_slate do
expected = { "one" => "1", "two" => "2", "three" => "3" }
expected.each { |key, value| store[key] = value }
assert_equal expected, store.to_h
end
end
it "can be readonly" do
clean_slate do
store[field] = "truth"
readonly_store = Metadata.new store_name, readonly: true
assert_equal "truth", readonly_store[field]?
assert_raises RuntimeError do
readonly_store[field] = "lies"
end
end
end
it "can set and read a value" do
clean_slate do
store[field] = "truth"
assert_equal "truth", store[field]?
end
end
describe "with a hash" do
it "can set and read a hash" do
clean_slate do
store.set({"one" => "1", "two" => "2", "three" => "3"})
assert_equal "1", store["one"]?
assert_equal "2", store["two"]?
assert_equal "3", store["three"]?
end
end
it "can set a hash and delete a value from the hash" do
clean_slate do
store.set({"one" => "1", "two" => "2", "three" => "3"})
store.set({"two" => nil, "six" => "6"})
assert_equal "1", store["one"]?
assert_equal nil, store["two"]?
assert_equal "3", store["three"]?
assert_equal "6", store["six"]?
end
end
it "can store string-only values" do
clean_slate do
values = {"one" => "1", "two" => "2", "three" => "3"}
store.set(values)
assert_equal "1", store["one"]?
assert_equal "2", store["two"]?
assert_equal "3", store["three"]?
assert_equal values, store.to_h
end
end
end
it "can be deleted" do
clean_slate do
store[field] = "truth"
assert_equal "truth", store[field]?
store.delete
assert_equal nil, Metadata.new(store_name)[field]?
end
end
it "can be deleted with a ttl" do
clean_slate do
store[field] = "truth"
assert_equal "truth", store[field]?
store.delete(in: 1.minute)
assert_in_epsilon(60, Mosquito.backend.expires_in(store_name))
store.delete
end
end
end
================================================
FILE: spec/mosquito/periodic_job_run_spec.cr
================================================
require "../spec_helper"
describe Mosquito::PeriodicJobRun do
getter interval : Time::Span = 2.minutes
it "tries to execute but fails before the interval has passed" do
now = Time.utc.at_beginning_of_second
job_run = PeriodicJobRun.new PeriodicTestJob, interval
job_run.last_executed_at = now
Timecop.freeze(now + 1.minute) do
job_run.try_to_execute
assert_equal now, job_run.last_executed_at
end
end
it "executes" do
now = Time.utc.at_beginning_of_second
job_run = PeriodicJobRun.new PeriodicTestJob, interval
job_run.last_executed_at = now
Timecop.freeze(now + interval) do
job_run.try_to_execute
assert_equal now + interval, job_run.last_executed_at
end
end
it "checks the metadata store for the last executed timestamp" do
now = Time.utc.at_beginning_of_second
clean_slate do
job_run = PeriodicJobRun.new PeriodicTestJob, interval
job_run.last_executed_at = now - 1.minute
Timecop.freeze(now) do
another_job_run = PeriodicJobRun.new PeriodicTestJob, interval
refute another_job_run.try_to_execute
end
end
end
it "does not enqueue a second job run when one is already pending" do
clean_slate do
now = Time.utc.at_beginning_of_second
periodic = PeriodicJobRun.new PeriodicTestJob, interval
# First execution should enqueue.
Timecop.freeze(now) do
periodic.last_executed_at = now - interval
assert periodic.try_to_execute
end
queue = PeriodicTestJob.queue
first_size = queue.size(include_dead: false)
assert first_size > 0, "Expected at least one job in the queue"
# Second execution after another interval should be skipped
# because the first job run hasn't finished yet.
Timecop.freeze(now + interval) do
assert periodic.try_to_execute
end
second_size = queue.size(include_dead: false)
assert_equal first_size, second_size
end
end
it "enqueues again after the pending job run finishes" do
clean_slate do
now = Time.utc.at_beginning_of_second
periodic = PeriodicJobRun.new PeriodicTestJob, interval
# Enqueue the first job run.
Timecop.freeze(now) do
periodic.last_executed_at = now - interval
periodic.try_to_execute
end
# Simulate the job finishing by writing finished_at to the backend.
pending_id = periodic.metadata["pending_run_id"]?
refute_nil pending_id
Mosquito.backend.set(
Mosquito::JobRun.config_key(pending_id.not_nil!),
"finished_at",
Time.utc.to_unix_ms.to_s
)
queue = PeriodicTestJob.queue
size_after_first = queue.size(include_dead: false)
# Now a new interval passes — should enqueue since the previous one finished.
Timecop.freeze(now + interval) do
assert periodic.try_to_execute
end
size_after_second = queue.size(include_dead: false)
assert size_after_second > size_after_first
end
end
it "enqueues again when the pending job run config has been cleaned up" do
clean_slate do
now = Time.utc.at_beginning_of_second
periodic = PeriodicJobRun.new PeriodicTestJob, interval
# Enqueue the first job run.
Timecop.freeze(now) do
periodic.last_executed_at = now - interval
periodic.try_to_execute
end
pending_id = periodic.metadata["pending_run_id"]?
refute_nil pending_id
# Simulate the job run config being deleted (e.g. TTL expiry).
Mosquito.backend.delete Mosquito::JobRun.config_key(pending_id.not_nil!)
queue = PeriodicTestJob.queue
size_before = queue.size(include_dead: false)
# Next interval should enqueue because the old run is gone.
Timecop.freeze(now + interval) do
assert periodic.try_to_execute
end
size_after = queue.size(include_dead: false)
assert size_after > size_before
end
end
end
================================================
FILE: spec/mosquito/periodic_job_spec.cr
================================================
require "../spec_helper"
describe Mosquito::PeriodicJob do
getter(runner) { Mosquito::TestableRunner.new }
it "correctly renders job_type" do
assert_equal "periodic_test_job", PeriodicTestJob.job_type
end
it "builds a job_run" do
job = PeriodicTestJob.new
job_run = job.build_job_run
assert_instance_of JobRun, job_run
assert_equal PeriodicTestJob.job_type, job_run.type
end
it "is not reschedulable" do
refute PeriodicTestJob.new.rescheduleable?
end
it "registers in job mapping" do
assert_equal PeriodicTestJob, Base.job_for_type(PeriodicTestJob.job_type)
end
it "can be scheduled at a MonthSpan interval" do
clean_slate do
Mosquito::Base.register_job_mapping MonthlyJob.queue.name, MonthlyJob
Mosquito::Base.register_job_interval MonthlyJob, interval: 1.month
end
end
it "schedules itself for an interval" do
clean_slate do
PeriodicTestJob.run_every 2.minutes
scheduled_job_run = Base.scheduled_job_runs.first
assert_equal PeriodicTestJob, scheduled_job_run.class
assert_equal 2.minutes, scheduled_job_run.interval
end
end
end
================================================
FILE: spec/mosquito/queue_spec.cr
================================================
require "../spec_helper"
describe Queue do
getter(name) { "test#{rand(1000)}" }
getter(test_queue) do
Mosquito::Queue.new(name)
end
@job_run : Mosquito::JobRun?
getter(job_run) do
Mosquito::JobRun.new("mock_job_run").tap(&.store)
end
getter backend : Mosquito::Backend::Queue do
TestHelpers.backend.queue name
end
describe "config_key" do
it "defaults to name" do
name = "random_name"
assert_equal name, Mosquito::Queue.new(name).config_key
end
end
describe "flush" do
it "purges all of the queue entries" do
job_runs = (1..4).map do
Mosquito::JobRun.new("mock_job_run").tap do |job_run|
job_run.store
test_queue.enqueue job_run
end
end
assert_equal job_runs.size, test_queue.size
test_queue.flush
assert_equal 0, test_queue.size
end
end
describe "enqueue" do
it "adds the queue name to the list of queues" do
clean_slate do
test_queue.enqueue job_run
assert_includes Mosquito.backend.list_queues, test_queue.name
end
end
it "can enqueue a job_run for immediate processing" do
clean_slate do
test_queue.enqueue job_run
job_run_ids = backend.list_waiting
assert_includes job_run_ids, job_run.id
end
end
it "can enqueue a job_run with a relative time" do
Timecop.freeze(Time.utc) do
clean_slate do
offset = 3.seconds
timestamp = offset.from_now
test_queue.enqueue job_run, in: offset
stored_time = backend.scheduled_job_run_time job_run
assert_equal Time.unix_ms(timestamp.to_unix_ms), stored_time
end
end
end
it "can enqueue a job_run at a specific time" do
Timecop.freeze(Time.utc) do
clean_slate do
timestamp = 3.seconds.from_now
test_queue.enqueue job_run, at: timestamp
stored_time = backend.scheduled_job_run_time job_run
assert_equal Time.unix_ms(timestamp.to_unix_ms), stored_time
end
end
end
end
describe "dequeue" do
it "moves a job_run from waiting to pending on dequeue" do
test_queue.enqueue job_run
stored_job_run = test_queue.dequeue
assert_equal job_run.id, stored_job_run.not_nil!.id
pending_job_runs = backend.list_pending
assert_includes pending_job_runs, job_run.id
end
it "dequeues job_runs which have been scheduled for a time that has passed" do
job_run1 = job_run
job_run2 = Mosquito::JobRun.new("mock_job_run").tap do |job_run|
job_run.store
end
Timecop.freeze(Time.utc) do
past = 1.minute.ago
future = 1.minute.from_now
test_queue.enqueue job_run1, at: past
test_queue.enqueue job_run2, at: future
end
# check to make sure only job_run1 was dequeued
overdue_job_runs = test_queue.dequeue_scheduled
assert_equal 1, overdue_job_runs.size
assert_equal job_run1.id, overdue_job_runs.first.id
# check to make sure job_run2 is still scheduled
scheduled_job_runs = backend.list_scheduled
refute_includes scheduled_job_runs, job_run1.id
assert_includes scheduled_job_runs, job_run2.id
end
end
it "can forget about a pending job_run" do
test_queue.enqueue job_run
test_queue.dequeue
pending_job_runs = backend.list_pending
assert_includes pending_job_runs, job_run.id
test_queue.forget job_run
pending_job_runs = backend.list_pending
refute_includes pending_job_runs, job_run.id
end
describe "banish" do
it "can banish a pending job_run, adding it to the dead q" do
test_queue.enqueue job_run
test_queue.dequeue
pending_job_runs = backend.list_pending
assert_includes pending_job_runs, job_run.id
test_queue.banish job_run
pending_job_runs = backend.list_pending
refute_includes pending_job_runs, job_run.id
dead_job_runs = backend.list_dead
assert_includes dead_job_runs, job_run.id
end
end
describe "pause" do
it "is not paused by default" do
refute test_queue.paused?
end
it "can be paused" do
test_queue.pause
assert test_queue.paused?
end
it "can be resumed" do
test_queue.pause
assert test_queue.paused?
test_queue.resume
refute test_queue.paused?
end
it "prevents dequeue when paused" do
test_queue.enqueue job_run
test_queue.pause
result = test_queue.dequeue
assert_nil result
# job_run should still be in waiting, not moved to pending
waiting_job_runs = backend.list_waiting
assert_includes waiting_job_runs, job_run.id
pending_job_runs = backend.list_pending
refute_includes pending_job_runs, job_run.id
end
it "allows dequeue after resume" do
test_queue.enqueue job_run
test_queue.pause
assert_nil test_queue.dequeue
test_queue.resume
stored_job_run = test_queue.dequeue
assert_equal job_run.id, stored_job_run.not_nil!.id
end
it "still allows enqueue while paused" do
test_queue.pause
test_queue.enqueue job_run
waiting_job_runs = backend.list_waiting
assert_includes waiting_job_runs, job_run.id
end
it "can be paused with a duration" do
test_queue.pause for: 60.seconds
assert test_queue.paused?
end
it "does not affect other queues" do
other_queue = Mosquito::Queue.new("other_#{name}")
other_job_run = Mosquito::JobRun.new("mock_job_run").tap(&.store)
test_queue.pause
other_queue.enqueue other_job_run
assert_nil test_queue.dequeue
stored = other_queue.dequeue
assert_equal other_job_run.id, stored.not_nil!.id
end
end
end
================================================
FILE: spec/mosquito/queued_job_spec.cr
================================================
require "../spec_helper"
describe Mosquito::QueuedJob do
getter(runner) { Mosquito::TestableRunner.new }
getter(name) { "test#{rand(1000)}" }
getter(job : QueuedTestJob) { QueuedTestJob.new }
getter(queue : Queue) { QueuedTestJob.queue }
getter(queue_hooked_job : QueueHookedTestJob) { QueueHookedTestJob.new }
describe "enqueue" do
it "enqueues" do
clean_slate do
job_run = job.enqueue
enqueued = queue.backend.list_waiting
assert_equal [job_run.id], enqueued
end
end
it "enqueues with a delay" do
clean_slate do
job_run = job.enqueue in: 1.minute
enqueued = queue.backend.list_scheduled
assert_equal [job_run.id], enqueued
end
end
it "enqueues with a target time" do
clean_slate do
job_run = job.enqueue at: 1.minute.from_now
enqueued = queue.backend.list_scheduled
assert_equal [job_run.id], enqueued
end
end
it "fires before_enqueue_hook" do
clean_slate do
job_run = queue_hooked_job.enqueue
assert queue_hooked_job.before_hook_ran
end
end
it "doesnt enqueue if before_enqueue_hook fails" do
clean_slate do
queue_hooked_job.fail_before_hook = true
job_run = queue_hooked_job.enqueue
waiting_q = queue.backend.list_waiting
assert_empty waiting_q
end
end
it "fires after_enqueue_hook" do
clean_slate do
job_run = queue_hooked_job.enqueue
assert queue_hooked_job.after_hook_ran
end
end
it "passes the job config to the before_enqueue_hook" do
clean_slate do
job_run = queue_hooked_job.enqueue
assert_equal job_run, queue_hooked_job.passed_job_config
end
end
it "passes the job config to the after_enqueue_hook" do
clean_slate do
job_run = queue_hooked_job.enqueue
assert_equal job_run, queue_hooked_job.passed_job_config
end
end
end
describe "parameters" do
it "can be passed in" do
clear_logs
EchoJob.new("quack").perform
assert_logs_match "quack"
end
it "can have a boolean false passed as a parameter (and it's not assumed to be a nil)" do
clear_logs
JobWithHooks.new(false).perform
assert_includes logs, "Perform Executed"
end
it "can be omitted" do
clean_slate do
clear_logs
job = JobWithNoParams.new.perform
assert_includes logs, "no param job performed"
end
end
end
end
================================================
FILE: spec/mosquito/rate_limiter_spec.cr
================================================
require "../spec_helper"
describe Mosquito::RateLimiter do
describe "RateLimiter.rate_limit_stats" do
it "provides the state and configuration of the limiter" do
clean_slate do
stats = RateLimitedJob.rate_limit_stats
assert stats.has_key? :interval
assert stats.has_key? :key
assert stats.has_key? :increment
assert stats.has_key? :limit
assert stats.has_key? :window_start
assert stats.has_key? :run_count
end
end
it "defaults the window_start" do
clean_slate do
assert_equal Time::UNIX_EPOCH, RateLimitedJob.rate_limit_stats[:window_start]
now = Time.utc.at_beginning_of_second
RateLimitedJob.metadata["window_start"] = now.to_unix.to_s
assert_equal now, RateLimitedJob.rate_limit_stats[:window_start]
end
end
it "defaults the run_count" do
clean_slate do
assert_equal 0, RateLimitedJob.rate_limit_stats[:run_count]
run_count = 27
RateLimitedJob.metadata["run_count"] = run_count.to_s
assert_equal run_count, RateLimitedJob.rate_limit_stats[:run_count]
end
end
end
describe "RateLimiter.metadata" do
it "provides an instance of the metadata store" do
assert_instance_of Metadata, RateLimitedJob.metadata
end
end
describe "RateLimiter.rate_limit_key" do
it "provides the metadata key for this class" do
assert_equal "mosquito:rate_limit:rate_limit", RateLimitedJob.rate_limit_key
end
end
describe "job counting" do
it "increments the count when a job is run" do
clean_slate do
RateLimitedJob.new.run
count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
RateLimitedJob.new.run
new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
assert_equal 1, new_count - count
end
end
it "doesnt increment the count when a job is not run" do
clean_slate do
RateLimitedJob.new(should_fail: false).run
count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
RateLimitedJob.new(should_fail: true).run
new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
assert_equal count, new_count
end
end
it "increments the count by a configurable number" do
clean_slate do
delta = 2
RateLimitedJob.new.run
count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
RateLimitedJob.new(increment: delta).run
new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
assert_equal delta, new_count - count
end
end
it "resets the count when the window is over" do
clean_slate do
metadata = RateLimitedJob.metadata
metadata["run_count"] = "45"
metadata["window_start"] = Time::UNIX_EPOCH.to_unix.to_s
RateLimitedJob.new.run
count = RateLimitedJob.metadata["run_count"]?
assert_equal "1", count
end
end
it "counts multiple jobs with the same key in the same bucket" do
clean_slate do
metadata = RateLimitedJob.metadata
metadata["window_start"] = Time.utc.to_unix.to_s
RateLimitedJob.new.run
count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
SecondRateLimitedJob.new.run
new_count = RateLimitedJob.metadata["run_count"]?.not_nil!.to_i
assert_equal RateLimitedJob.rate_limit_key, SecondRateLimitedJob.rate_limit_key
assert_equal 1, new_count - count
end
end
end
describe "job preempting" do
it "doesnt prevent excution if the rate limit count is less than zero" do
metadata = RateLimitedJob.metadata
metadata["run_count"] = "-1"
metadata["window_start"] = Time.utc.to_unix.to_s
job = RateLimitedJob.new
job.run
assert job.executed?
end
it "prevents a job from executing when the limit is reached" do
metadata = RateLimitedJob.metadata
metadata["run_count"] = Int32::MAX.to_s
metadata["window_start"] = Time.utc.to_unix.to_s
job = RateLimitedJob.new
job.run
refute job.executed?
assert job.preempted?
end
it "allows a job to execute when the limit hasn't been reached" do
metadata = RateLimitedJob.metadata
metadata["window_start"] = Time.utc.to_unix.to_s
metadata["run_count"] = "3"
job = RateLimitedJob.new
job.run
assert job.executed?
end
it "allows a job to execute when the limit has been reached but the window is over" do
metadata = RateLimitedJob.metadata
metadata["run_count"] = Int32::MAX.to_s
metadata["window_start"] = Time::UNIX_EPOCH.to_unix.to_s
job = RateLimitedJob.new
job.run
assert job.executed?
end
end
end
================================================
FILE: spec/mosquito/resource_gate_spec.cr
================================================
require "../spec_helper"
describe "Mosquito::OpenGate" do
it "always allows" do
gate = Mosquito::OpenGate.new
assert gate.allow?
end
end
describe "Mosquito::ThresholdGate" do
it "allows when metric is below threshold" do
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 50.0 }
assert gate.allow?
end
it "blocks when metric is at or above threshold" do
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 85.0 }
refute gate.allow?
end
it "blocks when metric equals threshold" do
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 80.0 }
refute gate.allow?
end
end
describe "Mosquito::ResourceGate caching" do
it "caches the check result within TTL" do
call_count = 0
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 5.seconds) do
call_count += 1
50.0
end
now = Time.utc
Timecop.freeze(now) do
gate.allow?
gate.allow?
gate.allow?
assert_equal 1, call_count
end
end
it "re-checks after TTL expires" do
call_count = 0
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 5.seconds) do
call_count += 1
50.0
end
now = Time.utc
Timecop.freeze(now) do
gate.allow?
assert_equal 1, call_count
end
Timecop.freeze(now + 3.seconds) do
gate.allow?
assert_equal 1, call_count, "Should still be cached at 3s"
end
Timecop.freeze(now + 6.seconds) do
gate.allow?
assert_equal 2, call_count, "Should re-check after 6s (past 5s TTL)"
end
end
end
================================================
FILE: spec/mosquito/runnable_spec.cr
================================================
require "../spec_helper"
class Namespace::ConcreteRunnable
include Mosquito::Runnable
getter first_run_notifier = Channel(Bool).new
getter first_run = true
property state : Mosquito::Runnable::State
# Testing wedge which calls: run, waits for a run to happen, and then calls stop.
def test_run : Nil
run
first_run_notifier.receive
stop.wait
end
def runnable_name : String
"concrete_runnable"
end
def each_run : Nil
if first_run
@first_run = false
first_run_notifier.send true
end
Fiber.yield
end
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
first_run_notifier.close
super(wait_group)
end
end
describe Mosquito::Runnable do
let(:runnable) { Namespace::ConcreteRunnable.new }
it "builds a my_name" do
assert_equal "namespace.concrete_runnable.#{runnable.object_id}", runnable.my_name
end
describe "run" do
it "should log a startup message" do
clear_logs
runnable.test_run
assert_logs_match "mosquito.concrete_runnable", "starting"
end
it "should log a finished message" do
clear_logs
runnable.test_run
assert_logs_match "mosquito.concrete_runnable", "stopped"
end
end
describe "stop" do
it "should set the stopping flag" do
runnable.state = Mosquito::Runnable::State::Working
runnable.stop
assert_equal Mosquito::Runnable::State::Stopping, runnable.state
end
it "should set the finished flag" do
runnable.test_run
assert_equal Mosquito::Runnable::State::Finished, runnable.state
end
end
end
================================================
FILE: spec/mosquito/runners/coordinator_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::Runners::Coordinator" do
getter(queue : Queue) { test_job.class.queue }
getter(test_job) { QueuedTestJob.new }
getter(queue_list) { MockQueueList.new }
getter(coordinator) { MockCoordinator.new queue_list }
getter(enqueue_time) { Time.utc }
def enqueue_job_run : JobRun
queue_list.discovered_queues << queue
job_run = JobRun.new "blah"
Timecop.freeze enqueue_time do |t|
job_run = test_job.enqueue in: 3.seconds
end
assert_includes queue.backend.list_scheduled, job_run.id
job_run
end
def opt_in_to_locking
Mosquito.temp_config(use_distributed_lock: true) do
Mosquito.backend.delete Mosquito.backend.build_key(:coordinator, :leadership_lock)
yield
Mosquito.backend.delete Mosquito.backend.build_key(:coordinator, :leadership_lock)
end
end
describe "only_if_coordinator" do
getter(coordinator1) { Mosquito::Runners::Coordinator.new queue_list }
getter(coordinator2) { Mosquito::Runners::Coordinator.new queue_list }
it "gets a lock from the backend" do
opt_in_to_locking do
gotten = false
coordinator1.only_if_coordinator do
gotten = true
end
assert gotten
end
end
it "fails to get a lock from the backend" do
opt_in_to_locking do
gotten = false
coordinator1.only_if_coordinator do
coordinator2.only_if_coordinator do
gotten = true
end
end
refute gotten
end
end
it "releases the lock when release_leadership_lock is called" do
opt_in_to_locking do
gotten = false
coordinator1.only_if_coordinator do
end
coordinator1.release_leadership_lock
coordinator2.only_if_coordinator do
gotten = true
end
assert gotten
end
end
it "sets a ttl on the lock" do
opt_in_to_locking do
coordinator1.only_if_coordinator do
assert Mosquito.backend.expires_in(coordinator.lock_key) > 0
end
end
end
it "retains leadership across calls" do
opt_in_to_locking do
count = 0
3.times do
coordinator1.only_if_coordinator do
count += 1
end
end
assert_equal 3, count
assert coordinator1.is_leader?
end
end
it "yields without locking when distributed lock is disabled" do
Mosquito.temp_config(use_distributed_lock: false) do
gotten = false
coordinator1.only_if_coordinator do
gotten = true
end
assert gotten
end
end
end
describe "enqueue_periodic_jobs" do
it "enqueues a scheduled job_run at the appropriate time" do
clean_slate do
queue = PeriodicTestJob.queue
Mosquito::Base.register_job_mapping PeriodicTestJob.name, PeriodicTestJob
Mosquito::Base.register_job_interval PeriodicTestJob, interval: 1.second
Timecop.freeze(enqueue_time) do
coordinator.enqueue_periodic_jobs
end
queued_job_runs = queue.backend.list_waiting
assert queued_job_runs.size >= 1
last_job_run = queued_job_runs.last
job_run_metadata = Mosquito.backend.retrieve JobRun.config_key(last_job_run)
assert_equal enqueue_time.to_unix_ms.to_s, job_run_metadata["enqueue_time"]
end
end
end
describe "enqueue_delayed_jobs" do
it "enqueues a delayed job_run when it's ready" do
clean_slate do
job_run = enqueue_job_run
run_time = enqueue_time + 3.seconds
Timecop.freeze run_time do |t|
coordinator.enqueue_delayed_jobs
end
queued_job_runs = queue.backend.list_waiting
assert_includes queued_job_runs, job_run.id
last_job_run = queued_job_runs.last
job_run_metadata = Mosquito.backend.retrieve JobRun.config_key(last_job_run)
assert_equal queue.name, job_run_metadata["type"]?
end
end
it "doesn't enqueue job_runs that arent ready yet" do
clean_slate do
job_run = enqueue_job_run
check_time = enqueue_time + 2.999.seconds
Timecop.freeze check_time do |t|
coordinator.enqueue_delayed_jobs
end
queued_job_runs = queue.backend.list_waiting
# does not deschedule and enqueue anything
assert_equal 0, queued_job_runs.size
end
end
it "logs when it finds delayed job_runs" do
clean_slate do
clear_logs
enqueue_job_run
Timecop.freeze enqueue_time + 3.seconds do |t|
coordinator.enqueue_delayed_jobs
end
assert_logs_match "1 delayed jobs ready"
end
end
end
end
================================================
FILE: spec/mosquito/runners/executor_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::Runners::Executor" do
getter(queue_list) { MockQueueList.new }
getter(overseer) { MockOverseer.new }
getter(executor) { MockExecutor.new overseer.as(Mosquito::Runners::Overseer) }
getter(api) { Mosquito::Api::Executor.new executor.object_id.to_s }
getter(coordinator) { Mosquito::Runners::Coordinator.new queue_list }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.discovered_queues << job_class.queue
end
def run_job(job_class : Mosquito::Job.class)
register job_class
job_class.reset_performance_counter!
job_run = job_class.new.enqueue
executor.work_unit = WorkUnit.of(job_run, from: job_class.queue)
executor.execute
end
describe "status" do
it "starts as starting" do
assert_equal Runnable::State::Starting, executor.state
end
it "broadcasts a ping when transitioning to idle" do
executor.state = Runnable::State::Idle
select
when overseer.finished_notifier.receive
assert true
when timeout(0.5.seconds)
refute true, "Timed out waiting for idle notifier"
end
end
it "goes idle in pre_run" do
executor.pre_run
assert_equal Runnable::State::Idle, executor.state
end
end
describe "running jobs" do
it "runs a job from a queue" do
clean_slate do
run_job QueuedTestJob
assert_equal 1, QueuedTestJob.performances
end
end
it "reschedules a job that failed" do
clean_slate do
register FailingJob
now = Time.utc
job = FailingJob.new
job_run = job.build_job_run
job_run.store
FailingJob.queue.enqueue job_run
Timecop.freeze now do
executor.work_unit = WorkUnit.of(job_run, from: FailingJob.queue)
executor.execute
end
job_run.reload
assert_equal 1, job_run.retry_count
Timecop.freeze now + job.reschedule_interval(1) do
coordinator.enqueue_delayed_jobs
executor.work_unit = WorkUnit.of(job_run, from: FailingJob.queue)
executor.execute
end
job_run.reload
assert_equal 2, job_run.retry_count
end
end
it "schedules deletion of a job_run that hard failed" do
clean_slate do
register NonReschedulableFailingJob
job = NonReschedulableFailingJob.new
job_run = job.build_job_run
job_run.store
NonReschedulableFailingJob.queue.enqueue job_run
executor.work_unit = WorkUnit.of(job_run, from: NonReschedulableFailingJob.queue)
executor.execute
actual_ttl = backend.expires_in job_run.config_key
assert_equal executor.failed_job_ttl, actual_ttl
end
end
it "purges a successful job_run from the backend" do
clean_slate do
register QueuedTestJob
job = QueuedTestJob.new
job_run = job.build_job_run
job_run.store
QueuedTestJob.queue.enqueue job_run
executor.work_unit = WorkUnit.of(job_run, from: QueuedTestJob.queue)
executor.execute
assert_logs_match "Success"
QueuedTestJob.queue.enqueue job_run
actual_ttl = Mosquito.backend.expires_in job_run.config_key
assert_equal executor.successful_job_ttl, actual_ttl
end
end
it "doesnt reschedule a job that cant be rescheduled" do
clean_slate do
run_job NonReschedulableFailingJob
assert_logs_match "cannot be rescheduled"
end
end
it "tells the observer what it's working on" do
SleepyJob.should_sleep = true
job = SleepyJob.new
job_run = job.build_job_run
job_run.store
job_started = Channel(Bool).new
job_finished = Channel(Bool).new
# Eagerly evaluate to avoid race condition in lazy
# getter initialization across fibers.
executor
api
spawn {
executor.work_unit = WorkUnit.of(job_run, from: SleepyJob.queue)
executor.execute
job_finished.send true
}
spawn {
loop {
break if api.current_job
}
assert_equal job_run.id, api.current_job
assert_equal SleepyJob.queue.name, api.current_job_queue
job_started.send true
}
select
when job_started.receive
when timeout(0.5.seconds)
refute true, "Timed out waiting for job to start"
end
SleepyJob.should_sleep = false
select
when job_finished.receive
when timeout(0.5.seconds)
refute true, "Timed out waiting for job to finish"
end
assert_nil api.current_job, "Job should be cleared after finishing"
assert_nil api.current_job_queue, "Queue should be cleared after finishing"
end
end
describe "logs success/failures messages" do
it "logs a success message when the job succeeds" do
clean_slate do
run_job QueuedTestJob
assert_logs_match "Success"
end
end
it "logs a failure message when the job fails" do
clean_slate do
run_job FailingJob
assert_logs_match "Failure"
end
end
end
describe "job timing messages" do
it "logs the time a job took to run" do
clean_slate do
run_job QueuedTestJob
assert_logs_match "and took"
end
end
it "logs the time a job took to run when the job fails" do
clean_slate do
run_job FailingJob
assert_logs_match "taking"
end
end
end
describe "start and finish messages" do
it "logs the job run start message" do
clean_slate do
run_job QueuedTestJob
assert_logs_match "Starting: queued_test_job"
end
end
end
end
================================================
FILE: spec/mosquito/runners/overseer_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::Runners::Overseer" do
getter(overseer : MockOverseer) { MockOverseer.new }
getter(queue_list : MockQueueList ) { overseer.queue_list.as(MockQueueList) }
getter(coordinator : MockCoordinator ) { overseer.coordinator.as(MockCoordinator) }
getter(executor : MockExecutor) { overseer.executors.first.as(MockExecutor) }
def register(job_class : Mosquito::Job.class)
Mosquito::Base.register_job_mapping job_class.name.underscore, job_class
queue_list.discovered_queues << job_class.queue
end
def run_job(job_class : Mosquito::Job.class)
register job_class
job_class.reset_performance_counter!
job_run = job_class.new.enqueue
executor.execute job_run, from_queue: job_class.queue
end
describe "pre_run" do
it "runs all executors" do
overseer.executors.each do |executor|
assert_equal Runnable::State::Starting, executor.state
end
overseer.pre_run
overseer.executors.each do |executor|
assert_equal Runnable::State::Working, executor.state
end
end
end
describe "post_run" do
it "stops all executors" do
overseer.executors.each(&.run)
overseer.post_run
overseer.executors.each do |executor|
assert_equal Runnable::State::Finished, executor.state
end
end
it "logs messages about stopping the executors" do
clear_logs
overseer.pre_run
overseer.post_run
assert_logs_match "Stopping executors."
assert_logs_match "All executors stopped."
end
end
describe "each_run" do
it "dequeues a job and dispatches it to the pipeline" do
clean_slate do
register QueuedTestJob
expected_job_run = QueuedTestJob.new.enqueue
overseer.work_handout = Channel(WorkUnit).new
queue_list.state = Runnable::State::Working
executor.state = Runnable::State::Idle
# each_run will block until there's a receiver on the channel
spawn { overseer.each_run }
result = overseer.work_handout.receive
assert_equal expected_job_run, result.job_run
assert_equal QueuedTestJob.queue, result.queue
end
end
it "waits #idle_wait before checking the queue again" do
clean_slate do
# an idle executor, but no jobs in the queue
executor.state = Runnable::State::Idle
queue_list.state = Runnable::State::Working
tick_time = Time.measure do
overseer.each_run
end
assert tick_time >= overseer.idle_wait, "Expected to wait at least #{overseer.idle_wait}, but only waited #{tick_time}"
end
end
it "triggers the scheduler" do
assert_equal 0, coordinator.schedule_count
overseer.each_run
assert_equal 1, coordinator.schedule_count
end
end
describe "dequeue_job? stamps overseer_id" do
it "claims the job run with the overseer's instance id on dequeue" do
clean_slate do
register QueuedTestJob
job_run = QueuedTestJob.new.enqueue
queue_list.state = Runnable::State::Working
result = overseer.dequeue_job?
assert result
assert_equal overseer.observer.instance_id, result.not_nil!.job_run.overseer_id
end
end
end
describe "remote executor count" do
it "applies the remote executor count on each_run" do
clean_slate do
Mosquito.configuration.overseer_id = "test-worker"
Mosquito::Api.set_executor_count(3, overseer_id: "test-worker")
queue_list.state = Runnable::State::Working
overseer.each_run
assert_equal 3, overseer.executor_count
ensure
Mosquito.configuration.overseer_id = nil
end
end
it "prefers per-overseer count over global" do
clean_slate do
Mosquito.configuration.overseer_id = "test-worker"
Mosquito::Api.set_executor_count(10)
Mosquito::Api.set_executor_count(2, overseer_id: "test-worker")
queue_list.state = Runnable::State::Working
overseer.each_run
assert_equal 2, overseer.executor_count
ensure
Mosquito.configuration.overseer_id = nil
end
end
it "falls back to global when no per-overseer count is set" do
clean_slate do
Mosquito.configuration.overseer_id = "test-worker"
Mosquito::Api.set_executor_count(7)
queue_list.state = Runnable::State::Working
overseer.each_run
assert_equal 7, overseer.executor_count
ensure
Mosquito.configuration.overseer_id = nil
end
end
it "does not change executor_count when no remote value is set" do
clean_slate do
original_count = overseer.executor_count
queue_list.state = Runnable::State::Working
overseer.each_run
assert_equal original_count, overseer.executor_count
end
end
it "clamps an invalid remote executor count of 0 to 1" do
clean_slate do
Mosquito.configuration.overseer_id = "test-worker"
Mosquito::Api.set_executor_count(0, overseer_id: "test-worker")
queue_list.state = Runnable::State::Working
overseer.each_run
assert_equal 1, overseer.executor_count
ensure
Mosquito.configuration.overseer_id = nil
end
end
end
describe "cleanup_orphaned_pending_jobs" do
it "recovers a pending job whose overseer is dead" do
clean_slate do
register QueuedTestJob
# Use a separate overseer that won't be registered as alive.
dead_overseer = MockOverseer.new
job = QueuedTestJob.new
job_run = job.build_job_run
job_run.store
QueuedTestJob.queue.enqueue job_run
QueuedTestJob.queue.dequeue
job_run.claimed_by dead_overseer
# Verify job is stuck in pending
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
assert_equal 0, job_run.retry_count
# Register only the *live* overseer
Mosquito.backend.register_overseer overseer.observer.instance_id
# Run cleanup — dead_overseer's id won't be in the active set
overseer.cleanup_orphaned_pending_jobs
# Job should be removed from pending and rescheduled
assert_empty QueuedTestJob.queue.backend.list_pending
assert_includes QueuedTestJob.queue.backend.list_scheduled, job_run.id
# Retry count should be incremented
job_run.reload
assert_equal 1, job_run.retry_count
end
end
it "does not touch pending jobs from a live overseer" do
clean_slate do
register QueuedTestJob
job = QueuedTestJob.new
job_run = job.build_job_run
job_run.store
QueuedTestJob.queue.enqueue job_run
QueuedTestJob.queue.dequeue
# Claim with the live overseer
Mosquito.backend.register_overseer overseer.observer.instance_id
job_run.claimed_by overseer
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
overseer.cleanup_orphaned_pending_jobs
# Job should still be in pending — its overseer is alive
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
end
end
it "claims unclaimed pending jobs without recovering them" do
clean_slate do
register QueuedTestJob
job = QueuedTestJob.new
job_run = job.build_job_run
job_run.store
QueuedTestJob.queue.enqueue job_run
QueuedTestJob.queue.dequeue
# No claim — simulates a job from before this feature
assert_nil job_run.overseer_id
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
Mosquito.backend.register_overseer overseer.observer.instance_id
overseer.cleanup_orphaned_pending_jobs
# Job should still be in pending (not recovered)
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
# But it should now be claimed by this overseer
job_run.reload
assert_equal overseer.observer.instance_id, job_run.overseer_id
end
end
it "banishes an orphaned job that has exhausted retries" do
clean_slate do
register QueuedTestJob
dead_overseer = MockOverseer.new
# Create a job_run with retry_count=4 so the next failure (count=5)
# exceeds the default rescheduleable? limit of < 5.
job_run = Mosquito::JobRun.new("queued_test_job", retry_count: 4)
job_run.store
QueuedTestJob.queue.enqueue job_run
QueuedTestJob.queue.dequeue
job_run.claimed_by dead_overseer
assert_includes QueuedTestJob.queue.backend.list_pending, job_run.id
Mosquito.backend.register_overseer overseer.observer.instance_id
overseer.cleanup_orphaned_pending_jobs
# Job should be removed from pending and moved to dead
assert_empty QueuedTestJob.queue.backend.list_pending
assert_empty QueuedTestJob.queue.backend.list_waiting
assert_empty QueuedTestJob.queue.backend.list_scheduled
assert_includes QueuedTestJob.queue.backend.list_dead, job_run.id
end
end
end
end
================================================
FILE: spec/mosquito/runners/queue_list_spec.cr
================================================
require "../../spec_helper"
describe "Mosquito::Runners::QueueList" do
getter(queue_list) { MockQueueList.new }
def enqueue_jobs
PassingJob.new.enqueue
FailingJob.new.enqueue
EchoJob.new(text: "hello world").enqueue
end
describe "each_run" do
it "returns a list of queues" do
clean_slate do
enqueue_jobs
queue_list.each_run
assert_equal ["failing_job", "io_queue", "passing_job"], queue_list.queues.map(&.name).sort
end
end
it "logs a message about the number of fetched queues" do
clean_slate do
clear_logs
enqueue_jobs
queue_list.each_run
assert_logs_match "found 3 new queues"
end
end
end
describe "queue filtering" do
it "filters the list of queues when a whitelist is present" do
clean_slate do
enqueue_jobs
Mosquito.temp_config(run_from: ["io_queue", "passing_job"]) do
queue_list.each_run
end
end
assert_equal ["io_queue", "passing_job"], queue_list.queues.map(&.name).sort
end
it "logs an error when all queues are filtered out" do
clean_slate do
enqueue_jobs
Mosquito.temp_config(run_from: ["test4"]) do
queue_list.each_run
end
assert_logs_match "No watchable queues found."
end
end
it "doesnt log an error when no queues are present" do
clean_slate do
queue_list.each_run
refute_logs_match "No watchable queues found."
end
end
end
describe "paused queue filtering" do
it "excludes paused queues from the queue list" do
clean_slate do
enqueue_jobs
Mosquito::Queue.new("passing_job").pause
queue_list.each_run
assert_equal ["failing_job", "io_queue"], queue_list.queues.map(&.name).sort
end
end
it "logs a message about paused queues" do
clean_slate do
clear_logs
enqueue_jobs
Mosquito::Queue.new("passing_job").pause
queue_list.each_run
assert_logs_match "1 paused queues: passing_job"
end
end
it "includes queues again after they are resumed" do
clean_slate do
enqueue_jobs
q = Mosquito::Queue.new("passing_job")
q.pause
queue_list.each_run
refute_includes queue_list.queues.map(&.name), "passing_job"
q.resume
queue_list.each_run
assert_includes queue_list.queues.map(&.name), "passing_job"
end
end
end
describe "resource gate filtering" do
it "excludes queues whose gate blocks" do
clean_slate do
enqueue_jobs
queue_list.each_run
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 }
queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)}
refute_includes queue_list.queues.map(&.name), "passing_job"
assert_includes queue_list.queues.map(&.name), "failing_job"
assert_includes queue_list.queues.map(&.name), "io_queue"
end
end
it "includes queues whose gate allows" do
clean_slate do
enqueue_jobs
queue_list.each_run
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 50.0 }
queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)}
assert_includes queue_list.queues.map(&.name), "passing_job"
end
end
it "ungated queues are always included" do
clean_slate do
enqueue_jobs
queue_list.each_run
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 }
queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)}
assert_equal 2, queue_list.queues.size
end
end
it "multiple queues can share a gate" do
clean_slate do
enqueue_jobs
queue_list.each_run
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { 90.0 }
queue_list.resource_gates = {
"passing_job" => gate.as(Mosquito::ResourceGate),
"failing_job" => gate.as(Mosquito::ResourceGate),
}
assert_equal ["io_queue"], queue_list.queues.map(&.name)
end
end
it "gate state is evaluated on each access" do
clean_slate do
enqueue_jobs
queue_list.each_run
value = 90.0
gate = Mosquito::ThresholdGate.new(threshold: 80.0, sample_ttl: 0.seconds) { value }
queue_list.resource_gates = {"passing_job" => gate.as(Mosquito::ResourceGate)}
refute_includes queue_list.queues.map(&.name), "passing_job"
value = 50.0
assert_includes queue_list.queues.map(&.name), "passing_job"
end
end
it "returns all queues when no gates are configured" do
clean_slate do
enqueue_jobs
queue_list.each_run
assert_equal 3, queue_list.queues.size
end
end
end
end
================================================
FILE: spec/mosquito/runners/run_at_most_spec.cr
================================================
require "../../spec_helper"
class RunsAtMostMock
include Mosquito::Runners::RunAtMost
def yield_once_a_second(&block)
run_at_most every: 1.second, label: :testing do |t|
yield
end
end
end
describe "Mosquito::yielder#run_at_most" do
getter(yielder) { RunsAtMostMock.new }
it "prevents throttled blocks from running too often" do
count = 0
2.times do
yielder.yield_once_a_second do
count += 1
end
end
assert_equal 1, count
end
it "allows throttled blocks to run only after enough time has passed" do
count = 0
moment = Time.utc
yielder
incrementy = ->() do
yielder.yield_once_a_second do
count += 1
end
end
# Should increment
Timecop.freeze moment do |time|
incrementy.call
end
# Should not increment
# Move ahead 0.999 seconds
Timecop.freeze(moment + 999.milliseconds) do |time|
incrementy.call
end
assert_equal 1, count
# Should increment
# Move ahead the rest of the second
moment += 1.1.seconds
Timecop.freeze(moment) do |time|
incrementy.call
end
assert_equal 2, count
# Should not increment
# Try again and it shouldn't increment
Timecop.freeze(moment) do |time|
incrementy.call
end
assert_equal 2, count
end
end
================================================
FILE: spec/mosquito/serializers/primitive_serializers_spec.cr
================================================
require "uuid"
require "../../spec_helper"
class PrimitiveSerializerTester
extend Mosquito::Serializers::Primitives
end
describe Mosquito::Serializers::Primitives do
it "serializes uuids" do
uuid = UUID.random
assert_equal uuid, UUID.new(PrimitiveSerializerTester.serialize_uuid(uuid))
end
it "deserializes uuids" do
uuid = UUID.random.to_s
assert_equal uuid, PrimitiveSerializerTester.deserialize_uuid(uuid).to_s
end
end
================================================
FILE: spec/mosquito/testing_backend_spec.cr
================================================
require "../spec_helper"
describe Mosquito::TestBackend do
def latest_enqueued_job
Mosquito::TestBackend.enqueued_jobs.last
end
it "holds a copy of jobs which have been enqueued" do
Mosquito.temp_config(backend: Mosquito::TestBackend.new) do
QueuedTestJob.new.enqueue
assert_equal QueuedTestJob, latest_enqueued_job.klass
end
end
it "embeds job parameters" do
Mosquito.temp_config(backend: Mosquito::TestBackend.new) do
EchoJob.new(text: "hello world").enqueue
assert_equal "hello world", latest_enqueued_job.config["text"]
end
end
it "hold the job id" do
Mosquito.temp_config(backend: Mosquito::TestBackend.new) do
job_run = QueuedTestJob.new.enqueue
assert_equal job_run.id, latest_enqueued_job.id
end
end
it "has a list of job runs which can be emptied" do
Mosquito.temp_config(backend: Mosquito::TestBackend.new) do
Mosquito::TestBackend.flush_enqueued_jobs!
job_run = EchoJob.new(text: "hello world").enqueue
assert_equal job_run.id, latest_enqueued_job.id
Mosquito::TestBackend.flush_enqueued_jobs!
assert Mosquito::TestBackend.enqueued_jobs.empty?
end
end
end
================================================
FILE: spec/mosquito/unique_job_spec.cr
================================================
require "../spec_helper"
describe Mosquito::UniqueJob do
describe "first enqueue" do
it "enqueues a job when no duplicate exists" do
clean_slate do
job = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job_run = job.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal [job_run.id], enqueued
end
end
end
describe "duplicate suppression" do
it "prevents a second enqueue with the same parameters" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job_run1 = job1.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job_run2 = job2.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
end
end
it "allows enqueue with different parameters" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job1.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueTestJob.new(user_id: 2_i64, email_type: "welcome")
job2.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 2, enqueued.size
end
end
it "allows enqueue with different parameter values" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job1.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "reminder")
job2.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 2, enqueued.size
end
end
end
describe "key filtering" do
it "considers only specified key fields for uniqueness" do
clean_slate do
# Same user_id, different message — should be suppressed because
# key is only [:user_id]
job1 = UniqueWithKeyJob.new(user_id: 1_i64, message: "hello")
job1.enqueue
enqueued = UniqueWithKeyJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueWithKeyJob.new(user_id: 1_i64, message: "world")
job2.enqueue
enqueued = UniqueWithKeyJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
end
end
it "allows enqueue when key fields differ" do
clean_slate do
job1 = UniqueWithKeyJob.new(user_id: 1_i64, message: "hello")
job1.enqueue
enqueued = UniqueWithKeyJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueWithKeyJob.new(user_id: 2_i64, message: "hello")
job2.enqueue
enqueued = UniqueWithKeyJob.queue.backend.list_waiting
assert_equal 2, enqueued.size
end
end
end
describe "expiration" do
it "allows re-enqueue after the uniqueness window expires" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job_run1 = job1.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
# Manually remove the lock to simulate expiration
lock_key = job1.uniqueness_key(job_run1)
Mosquito.backend.unlock(lock_key, job_run1.id)
job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job2.enqueue
enqueued = UniqueTestJob.queue.backend.list_waiting
assert_equal 2, enqueued.size
end
end
end
describe "no parameters" do
it "works with jobs that have no parameters" do
clean_slate do
job1 = UniqueNoParamsJob.new
job1.enqueue
enqueued = UniqueNoParamsJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
job2 = UniqueNoParamsJob.new
job2.enqueue
enqueued = UniqueNoParamsJob.queue.backend.list_waiting
assert_equal 1, enqueued.size
end
end
end
describe "delayed enqueue" do
it "prevents duplicate delayed enqueue" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job1.enqueue(in: 5.minutes)
scheduled = UniqueTestJob.queue.backend.list_scheduled
assert_equal 1, scheduled.size
job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job2.enqueue(in: 10.minutes)
scheduled = UniqueTestJob.queue.backend.list_scheduled
assert_equal 1, scheduled.size
end
end
it "prevents duplicate when mixing immediate and delayed enqueue" do
clean_slate do
job1 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job1.enqueue
waiting = UniqueTestJob.queue.backend.list_waiting
assert_equal 1, waiting.size
job2 = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
job2.enqueue(in: 5.minutes)
scheduled = UniqueTestJob.queue.backend.list_scheduled
assert_equal 0, scheduled.size
end
end
end
describe "unique_duration" do
it "returns the configured duration" do
job = UniqueTestJob.new(user_id: 1_i64, email_type: "welcome")
assert_equal 1.hour, job.unique_duration
end
end
end
================================================
FILE: spec/mosquito/version_spec.cr
================================================
require "../spec_helper"
require "yaml"
describe "mosquito version numbers" do
it "is defined" do
assert Mosquito::VERSION
end
it "matches the shard.yml file" do
File.open("shard.yml") do |file|
assert_equal Mosquito::VERSION, YAML.parse(file)["version"].as_s
end
end
end
================================================
FILE: spec/spec_helper.cr
================================================
require "minitest"
require "minitest/focus"
require "log"
Log.setup :fatal
require "timecop"
Timecop.safe_mode = true
require "../src/mosquito"
Mosquito.configure do |settings|
settings.backend_connection_string = testing_redis_url
settings.publish_metrics = true
end
require "./helpers/*"
class Minitest::Test
include PubSub::Helpers
end
Mosquito.configuration.backend.flush
require "minitest/autorun"
================================================
FILE: src/mosquito/api/concurrency_config.cr
================================================
module Mosquito
# Provides read/write access to the remotely stored concurrency limits
# used by `RemoteConfigDequeueAdapter`.
#
# Supports both global limits (shared by all overseers) and per-overseer
# limits for asymmetric hardware configurations.
#
# ```crystal
# config = Mosquito::Api::ConcurrencyConfig.instance
# config.limits # => global limits
# config.limits(overseer_id: "gpu-worker-1") # => per-overseer limits
# config.update({"queue_a" => 5}) # write global
# config.update({"queue_a" => 1}, overseer_id: "gpu-worker-1") # write per-overseer
# config.clear # remove global limits
# config.clear(overseer_id: "gpu-worker-1") # remove per-overseer limits
# ```
class Api::ConcurrencyConfig
def self.instance : self
new
end
# Returns the global concurrency limits stored in the backend.
def limits : Hash(String, Int32)
RemoteConfigDequeueAdapter.stored_limits
end
# Returns the concurrency limits stored for a specific overseer.
def limits(overseer_id : String) : Hash(String, Int32)
RemoteConfigDequeueAdapter.stored_limits(overseer_id)
end
# Overwrites the global stored concurrency limits with *new_limits*.
def update(new_limits : Hash(String, Int32)) : Nil
RemoteConfigDequeueAdapter.store_limits(new_limits)
end
# Overwrites the stored concurrency limits for a specific overseer.
def update(new_limits : Hash(String, Int32), overseer_id : String) : Nil
RemoteConfigDequeueAdapter.store_limits(new_limits, overseer_id)
end
# Removes all globally stored concurrency limits.
def clear : Nil
RemoteConfigDequeueAdapter.clear_limits
end
# Removes stored concurrency limits for a specific overseer.
def clear(overseer_id : String) : Nil
RemoteConfigDequeueAdapter.clear_limits(overseer_id)
end
end
end
================================================
FILE: src/mosquito/api/executor.cr
================================================
module Mosquito
module Api
# An interface for an executor.
#
# This is used to inspect the state of an executor. For more information about executors, see `Mosquito::Runners::Executor`.
class Executor
getter :instance_id
private getter :metadata
# Creates an executor inspector.
# The metadata is readonly and can be used to inspect the state of the executor.
#
# see #current_job, #current_job_queue
def initialize(@instance_id : String)
@metadata = Metadata.new Observability::Executor.metadata_key(@instance_id), readonly: true
end
# The current job being executed by the executor.
#
# When the executor is idle, this will be `nil`.
def current_job : String?
metadata["current_job"]?
end
# The queue which housed the current job being executed.
#
# When the executor is idle, this will be `nil`.
def current_job_queue : String?
metadata["current_job_queue"]?
end
# The last heartbeat time, or nil if none exists.
def heartbeat : Time?
metadata.heartbeat?
end
end
end
module Observability
class Executor
include Publisher
private getter log : ::Log
def self.metadata_key(instance_id : String) : String
Mosquito.backend.build_key "executor", instance_id
end
def initialize(executor : Mosquito::Runners::Executor)
@metadata = Metadata.new self.class.metadata_key executor.object_id.to_s
@log = Log.for(executor.runnable_name)
overseer_publish_context = executor.overseer.observer.publish_context
@publish_context = PublishContext.new(
overseer_publish_context,
[:executor, executor.object_id]
)
end
def execute(job_run : JobRun, from_queue : Mosquito::Queue)
metrics do
@metadata.set({
"current_job" => job_run.id,
"current_job_queue" => from_queue.name
})
end
# Calculate what the duration _might_ be
expected_duration = Mosquito.backend.average average_key(job_run.type)
log.info { "#{"Starting:".colorize.magenta} #{job_run} from #{from_queue.name}" }
publish({
event: "job-started",
job_run: job_run.id,
from_queue: from_queue.name,
expected_duration_ms: expected_duration
})
duration = Time.measure do
yield
end
if job_run.succeeded?
log_success_message job_run, duration
elsif job_run.preempted?
log_preempted_message job_run, duration
else
log_failure_message job_run, duration
end
publish({event: "job-finished", job_run: job_run.id})
metrics do
key = average_key(job_run.type)
Mosquito.backend.average_push key, duration.total_milliseconds.to_i
Mosquito.backend.delete key, in: 30.days
@metadata.set(
current_job: nil,
current_job_queue: nil
)
end
end
def average_key(job_run_type : String) : String
Mosquito.backend.build_key "job", job_run_type, "duration"
end
def log_success_message(job_run : JobRun, duration : Time::Span)
log.info { "#{"Success:".colorize.green} #{job_run} finished and took #{time_with_units duration}" }
end
def log_preempted_message(job_run : JobRun, duration : Time::Span)
message = String::Builder.new
message << "Preempted: ".colorize.cyan
message << job_run
message << " was preempted"
reason = job_run.preempt_reason
unless reason.empty?
message << " ("
message << reason
message << ")"
end
message << " after "
message << time_with_units duration
if job_run.rescheduleable?
next_execution = Time.utc + job_run.reschedule_interval
message << " and will run again".colorize.cyan
message << " in "
message << job_run.reschedule_interval
message << " (at "
message << next_execution
message << ")"
end
log.info { message.to_s }
end
def log_failure_message(job_run : JobRun, duration : Time::Span)
message = String::Builder.new
message << "Failure: ".colorize.red
message << job_run
message << " failed, taking "
message << time_with_units duration
message << " and "
if job_run.rescheduleable?
next_execution = Time.utc + job_run.reschedule_interval
message << "will run again".colorize.cyan
message << " in "
message << job_run.reschedule_interval
message << " (at "
message << next_execution
message << ")"
log.warn { message.to_s }
else
message << "cannot be rescheduled".colorize.yellow
log.error { message.to_s }
end
end
# :nodoc:
private def time_with_units(duration : Time::Span)
seconds = duration.total_seconds
if seconds > 0.1
"#{(seconds).*(100).trunc./(100)}s".colorize.red
elsif seconds > 0.001
"#{(seconds * 1_000).trunc}ms".colorize.yellow
elsif seconds > 0.000_001
"#{(seconds * 1_000_000).trunc}µs".colorize.green
elsif seconds > 0.000_000_001
"#{(seconds * 1_000_000_000).trunc}ns".colorize.green
else
"no discernible time at all".colorize.green
end
end
def heartbeat!
metrics do
@metadata.heartbeat!
end
end
end
end
end
================================================
FILE: src/mosquito/api/executor_config.cr
================================================
module Mosquito
# Provides read/write access to the remotely stored executor count
# used by overseers configured with a stable `overseer_id`.
#
# Supports both global counts (shared by all overseers) and per-overseer
# counts for asymmetric hardware configurations.
#
# ```crystal
# config = Mosquito::Api::ExecutorConfig.instance
# config.executor_count # => global count or nil
# config.executor_count(overseer_id: "gpu-worker-1") # => per-overseer count or nil
# config.update(8) # write global
# config.update(2, overseer_id: "gpu-worker-1") # write per-overseer
# config.clear # remove global override
# config.clear(overseer_id: "gpu-worker-1") # remove per-overseer override
# ```
class Api::ExecutorConfig
CONFIG_KEY = "executor_count"
def self.instance : self
new
end
# Returns the global executor count stored in the backend, or nil if
# no override has been set.
def executor_count : Int32?
self.class.stored_executor_count
end
# Returns the executor count for a specific overseer, or nil if no
# override has been set for that overseer.
def executor_count(overseer_id : String) : Int32?
self.class.stored_executor_count(overseer_id)
end
# Writes a global executor count override.
def update(count : Int32) : Nil
self.class.store_executor_count(count)
end
# Writes an executor count override for a specific overseer.
def update(count : Int32, overseer_id : String) : Nil
self.class.store_executor_count(count, overseer_id)
end
# Removes the global executor count override.
def clear : Nil
self.class.clear_executor_count
end
# Removes the executor count override for a specific overseer.
def clear(overseer_id : String) : Nil
self.class.clear_executor_count(overseer_id)
end
# ----- Backend storage helpers -----
def self.stored_executor_count : Int32?
value = Mosquito.backend.get(global_config_key, "count")
value.try(&.to_i32)
end
def self.stored_executor_count(overseer_id : String) : Int32?
value = Mosquito.backend.get(overseer_config_key(overseer_id), "count")
value.try(&.to_i32)
end
def self.store_executor_count(count : Int32) : Nil
Mosquito.backend.set(global_config_key, "count", count.to_s)
end
def self.store_executor_count(count : Int32, overseer_id : String) : Nil
Mosquito.backend.set(overseer_config_key(overseer_id), "count", count.to_s)
end
def self.clear_executor_count : Nil
Mosquito.backend.delete(global_config_key)
end
def self.clear_executor_count(overseer_id : String) : Nil
Mosquito.backend.delete(overseer_config_key(overseer_id))
end
# Resolves the effective executor count for an overseer by checking
# per-overseer first, then global. Returns nil if neither is set.
def self.resolve(overseer_id : String? = nil) : Int32?
if oid = overseer_id
stored_executor_count(oid) || stored_executor_count
else
stored_executor_count
end
end
protected def self.global_config_key : String
Mosquito.backend.build_key(CONFIG_KEY)
end
protected def self.overseer_config_key(overseer_id : String) : String
Mosquito.backend.build_key(CONFIG_KEY, overseer_id)
end
end
end
================================================
FILE: src/mosquito/api/job_run.cr
================================================
module Mosquito::Api
# Represents a job run in Mosquito.
#
# This class is used to inspect a job run stored in the backend.
#
# For more information about a JobRun, see `Mosquito::JobRun`.
class JobRun
# The id of the job run.
getter id : String
def initialize(@id : String)
end
# Does a JobRun with this ID exist in the backend?
def found? : Bool
config.has_key? "type"
end
# Get the parameters the job was enqueued with.
def runtime_parameters : Hash(String, String)
config.reject do |key, _|
["id", "type", "enqueue_time", "retry_count", "started_at", "finished_at"].includes? key
end
end
private getter metadata : Metadata {
Metadata.new(
Mosquito.backend.build_key(Mosquito::JobRun::CONFIG_KEY_PREFIX, id),
readonly: true
)
}
private def config : Hash(String, String)
metadata.to_h
end
# The type of job this job run is for.
def type : String
config["type"]
end
# The moment this job was enqueued.
def enqueue_time : Time
Time.unix_ms config["enqueue_time"].to_i64
end
# The moment this job was started.
def started_at : Time?
if time = config["started_at"]?
Time.unix_ms time.to_i64
end
end
# The moment this job was finished.
def finished_at : Time?
if time = config["finished_at"]?
Time.unix_ms time.to_i64
end
end
# The number of times this job has been retried.
def retry_count : Int
config["retry_count"].to_i
end
end
end
================================================
FILE: src/mosquito/api/observability/publisher.cr
================================================
module Mosquito::Observability::Publisher
Log = ::Log.for("mosquito.events")
getter publish_context : PublishContext
macro metrics(&block)
if Mosquito.configuration.metrics?
{{ block.body }}
end
end
@[AlwaysInline]
def publish(data : NamedTuple)
metrics do
Log.debug { "Publishing #{data} to #{@publish_context.originator}" }
Mosquito.backend.publish(
publish_context.originator,
data.to_json
)
end
end
class PublishContext
alias Context = Array(String | Symbol | UInt64)
property originator : String
property context : String
def initialize(context : Context)
@context = KeyBuilder.build context
@originator = KeyBuilder.build "mosquito", @context
end
def initialize(parent : self, context : Context)
@context = KeyBuilder.build context
@originator = KeyBuilder.build "mosquito", parent.context, context
end
end
end
================================================
FILE: src/mosquito/api/overseer.cr
================================================
module Mosquito
# An interface for inspecting the state of Mosquito Overseers.
#
# For more information about overseers, see `Mosquito::Runners::Overseer`.
class Api::Overseer
# The instance ID of the overseer being inspected.
getter :instance_id
private getter :metadata
# Creates a new Api::Overseer by its instance ID.
def initialize(@instance_id : String)
@metadata = Metadata.new Observability::Overseer.metadata_key(@instance_id), readonly: true
end
# Retrieves a list of all overseers in the backend.
def self.all : Array(self)
Mosquito.backend.list_overseers.map do |id|
new id
end
end
# Retrieves a list of executors managed by this overseer.
def executors : Array(Executor)
if executor_list = @metadata["executors"]?
executor_list.split(",").map do |name|
Executor.new name
end
else
[] of Executor
end
end
# The time the overseer last sent a heartbeat.
def last_heartbeat : Time?
metadata.heartbeat?
end
end
class Observability::Overseer
include Publisher
getter metadata : Metadata
getter instance_id : String
private getter overseer : Runners::Overseer
private getter log : ::Log
def self.metadata_key(instance_id : String) : String
Mosquito.backend.build_key "overseer", instance_id
end
def initialize(@overseer : Runners::Overseer)
@instance_id = overseer.object_id.to_s
@log = Log.for(overseer.runnable_name)
@metadata = Metadata.new self.class.metadata_key(instance_id)
@publish_context = PublishContext.new [:overseer, overseer.object_id]
end
def starting
log.info { "Starting #{overseer.executor_count} executors." }
publish({event: "started"})
heartbeat
end
def shutting_down
log.info { "Shutting down." }
end
def stopping
log.info { "Stopping executors." }
publish({event: "stopped"})
end
def stopped
log.info { "All executors stopped." }
log.info { "Finished for now." }
publish({event: "exited"})
Mosquito.backend.deregister_overseer self.instance_id
metadata.delete
end
def heartbeat
# Registration must always happen so that the pending job cleanup
# mechanism can determine which overseers are still alive.
Mosquito.backend.register_overseer self.instance_id
metrics do
metadata.heartbeat!
end
end
def executor_created(executor : Runners::Executor) : Nil
publish({event: "executor-created", executor: executor.object_id})
end
def executor_died(executor : Runners::Executor) : Nil
publish({event: "executor-died", executor: executor.object_id})
log.fatal do
<<-MSG
Executor #{executor.runnable_name} died.
A new executor will be started.
MSG
end
end
def channels_closed
log.fatal { "Executor communication channels closed, overseer will stop." }
end
def waiting_for_queue_list
log.debug { "Waited for the queue list to fetch possible queues." }
end
def queue_list_died
log.fatal { "QueueList has died, overseer will stop." }
end
def recovered_orphaned_job(job_run : JobRun, overseer_id : String)
log.warn { "Recovered orphaned job #{job_run.id} from dead overseer #{overseer_id}." }
end
def orphaned_jobs_recovered(total : Int32)
log.warn { "Recovered #{total} orphaned job(s) from pending queues." }
end
def recovered_job_from_executor(job_run : JobRun, executor : Runners::Executor)
log.warn { "Recovered job #{job_run.id} from dead executor #{executor.runnable_name}." }
end
def update_executor_list(executors : Array(Runners::Executor)) : Nil
metrics do
metadata["executors"] = executors.map(&.object_id).join(",")
end
end
end
end
================================================
FILE: src/mosquito/api/periodic_job.cr
================================================
module Mosquito
# An interface for inspecting the state of periodic jobs.
#
# This class provides read-only access to periodic job metadata,
# including the last time each periodic job was executed.
#
# ```
# Mosquito::Api::PeriodicJob.all.each do |job|
# puts "#{job.name} last ran at #{job.last_executed_at}"
# end
# ```
class Api::PeriodicJob
# The name of the periodic job class.
getter name : String
# The configured run interval for this periodic job.
getter interval : Time::Span | Time::MonthSpan
private getter metadata : Metadata
# Returns a list of all registered periodic jobs.
def self.all : Array(self)
Base.scheduled_job_runs.map do |job_run|
new job_run.class.name, job_run.interval
end
end
def initialize(@name : String, @interval : Time::Span | Time::MonthSpan)
@metadata = Metadata.new(
Mosquito.backend.build_key("periodic_jobs", @name),
readonly: true
)
end
# The last time this periodic job was executed, or nil if it has never run.
def last_executed_at : Time?
if timestamp = metadata["last_executed_at"]?
Time.unix(timestamp.to_i)
end
end
end
class Observability::PeriodicJob
include Publisher
getter log : ::Log
getter publish_context : PublishContext
def initialize(periodic_job_run : Mosquito::PeriodicJobRun)
@name = periodic_job_run.class.name
@publish_context = PublishContext.new [:periodic_job, @name]
@log = Log.for(@name)
end
def enqueued(at time : Time)
log.info { "Enqueued periodic job at #{time}" }
publish({event: "enqueued", executed_at: time.to_unix})
end
def skipped
log.trace { "Not yet due for execution" }
end
end
end
================================================
FILE: src/mosquito/api/queue.cr
================================================
module Mosquito
# Represents a named queue in the system, and allows querying the state of the queue. For more about the internals of a Queue in Mosquito, see `Mosquito::Queue`.
class Api::Queue
# The name of the queue.
getter name : String
private property backend : Mosquito::Backend::Queue
# Returns a list of all known named queues in the system.
def self.all : Array(Queue)
Mosquito.backend.list_queues.map { |name| new name }
end
# Creates an instance of a named queue.
def initialize(@name : String)
@backend = Mosquito.backend.queue name
end
{% for name in Mosquito::Backend::QUEUES %}
# Gets a list of all the job runs in the internal {{name.id}} queue.
def {{name.id}}_job_runs : Array(JobRun)
backend.list_{{name.id}}
.map { |task_id| JobRun.new task_id }
end
{% end %}
# The operating size of the queue, not including dead jobs.
def size : Int64
backend.size(include_dead: false)
end
# The size of the queue, broken out by job state.
#
# Example:
#
# ```
# Mosquito::Api::Queue.all.first.size_details
# # => {"waiting" => 0, "scheduled" => 0, "pending" => 0, "dead" => 0}
# ```
#
# The semantics of the keys are described in detail on the `Mosquito::Queue` class, but in brief:
#
# - `scheduled` is a list of jobs which are scheduled to be executed at a later time.
# - `waiting` is a list of jobs which should be executed ASAP
# - `pending` is a list of jobs for which execution has started
# - `dead` is a list of jobs which have failed to execute
def size_details : Hash(String, Int64)
sizes = {} of String => Int64
{% for name in Mosquito::Backend::QUEUES %}
sizes["{{name.id}}"] = backend.{{name.id}}_size
{% end %}
sizes
end
def paused? : Bool
backend.paused?
end
def <=>(other)
name <=> other.name
end
end
class Observability::Queue
include Publisher
getter log : ::Log
getter publish_context : PublishContext
delegate name, to: @queue
def initialize(queue : String)
initialize(Mosquito::Queue.new queue)
end
def initialize(@queue : Mosquito::Queue)
@publish_context = PublishContext.new [:queue, queue.name]
@log = Log.for(queue.name)
end
def enqueued(job_run : JobRun)
log.trace { "Enqueuing #{job_run.id} for immediate execution" }
publish({event: "enqueued", job_run: job_run.id})
end
def enqueued(job_run : JobRun, at execute_time : Time)
log.trace { "Enqueuing #{job_run.id} for execution at #{execute_time}" }
publish({event: "enqueued", job_run: job_run.id, execute_time: execute_time})
end
def dequeued(job_run : JobRun)
log.trace { "Dequeuing #{job_run.id}" }
publish({event: "dequeued", job_run: job_run.id})
end
def rescheduled(job_run : JobRun, to execute_time : Time)
log.trace { "Rescheduling #{job_run.id} to execute at #{execute_time}" }
publish({event: "rescheduled", job_run: job_run.id, execute_time: execute_time})
end
def forgotten(job_run : JobRun)
log.trace { "Forgetting #{job_run.id}" }
publish({event: "forgotten", job_run: job_run.id})
end
def banished(job_run : JobRun)
log.trace { "Banishing #{job_run.id} to dead queue" }
publish({event: "banished", job_run: job_run.id})
end
def paused(duration : Time::Span? = nil)
if duration
log.info { "Paused for #{duration}" }
publish({event: "paused", duration: duration.total_seconds})
else
log.info { "Paused indefinitely" }
publish({event: "paused"})
end
end
def resumed
log.info { "Resumed" }
publish({event: "resumed"})
end
end
end
================================================
FILE: src/mosquito/api/queue_list.cr
================================================
module Mosquito
class Observability::QueueList
private getter log : ::Log
@last_paused_names = Set(String).new
def initialize(queue_list : Runners::QueueList)
@log = Log.for(queue_list.runnable_name)
end
def checked_for_paused_queues(paused : Array(Mosquito::Queue))
paused_names = paused.map(&.name).to_set
if paused_names != @last_paused_names
@last_paused_names = paused_names
log.for("paused_queues").notice {
if paused.size > 0
"#{paused.size} paused queues: #{paused.map(&.name).join(", ")}"
else
"all queues resumed"
end
}
end
end
end
end
================================================
FILE: src/mosquito/api.cr
================================================
require "./backend"
require "./api/observability/*"
require "./api/*"
module Mosquito::Api
def self.overseer(id : String) : Overseer
Overseer.new id
end
def self.executor(id : String) : Executor
Executor.new id
end
def self.job_run(id : String) : JobRun
JobRun.new id
end
def self.list_periodic_jobs : Array(PeriodicJob)
PeriodicJob.all
end
def self.list_queues : Array(Observability::Queue)
Mosquito.backend.list_queues
.map { |name| Observability::Queue.new name }
end
def self.list_overseers : Array(Overseer)
Mosquito.backend.list_overseers
.map { |name| Overseer.new name }
end
def self.event_receiver : Channel(Backend::BroadcastMessage)
Mosquito.backend.subscribe "mosquito:*"
end
# Returns a `ConcurrencyConfig` instance for reading and writing the
# remotely stored concurrency limits used by
# `RemoteConfigDequeueAdapter`.
def self.concurrency_config : ConcurrencyConfig
ConcurrencyConfig.instance
end
# Convenience reader for the current global remote concurrency limits.
def self.concurrency_limits : Hash(String, Int32)
concurrency_config.limits
end
# Convenience reader for a specific overseer's concurrency limits.
def self.concurrency_limits(overseer_id : String) : Hash(String, Int32)
concurrency_config.limits(overseer_id)
end
# Convenience writer — replaces the global stored concurrency limits so
# that all `RemoteConfigDequeueAdapter` instances pick them up on their
# next refresh cycle.
def self.set_concurrency_limits(limits : Hash(String, Int32)) : Nil
concurrency_config.update(limits)
end
# Convenience writer — replaces stored concurrency limits for a specific
# overseer.
def self.set_concurrency_limits(limits : Hash(String, Int32), overseer_id : String) : Nil
concurrency_config.update(limits, overseer_id)
end
# Returns an `ExecutorConfig` instance for reading and writing the
# remotely stored executor count.
def self.executor_config : ExecutorConfig
ExecutorConfig.instance
end
# Convenience reader for the global remote executor count.
def self.executor_count : Int32?
executor_config.executor_count
end
# Convenience reader for a specific overseer's executor count.
def self.executor_count(overseer_id : String) : Int32?
executor_config.executor_count(overseer_id)
end
# Convenience writer — sets the global executor count override.
def self.set_executor_count(count : Int32) : Nil
executor_config.update(count)
end
# Convenience writer — sets the executor count for a specific overseer.
def self.set_executor_count(count : Int32, overseer_id : String) : Nil
executor_config.update(count, overseer_id)
end
end
================================================
FILE: src/mosquito/backend.cr
================================================
module Mosquito
abstract class Backend
struct BroadcastMessage
property channel : String
property message : String
def initialize(@channel, @message)
end
end
# The lifecycle states a job run passes through in any backend.
QUEUES = %w(waiting scheduled pending dead)
KEY_PREFIX = {"mosquito"}
def build_key(*parts)
KeyBuilder.build Mosquito.configuration.global_prefix, KEY_PREFIX, *parts
end
# Factory method to create a named queue for this backend.
def queue(name : String | Symbol) : Queue
_build_queue(name.to_s)
end
protected abstract def _build_queue(name : String) : Queue
abstract def connection
abstract def connection_string=(value : String)
abstract def connection_string : String?
abstract def valid_configuration? : Bool
# Storage
abstract def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil
abstract def retrieve(key : String) : Hash(String, String)
abstract def delete(key : String, in ttl : Int64 = 0) : Nil
abstract def delete(key : String, in ttl : Time::Span) : Nil
abstract def expires_in(key : String) : Int64
abstract def get(key : String, field : String) : String?
abstract def set(key : String, field : String, value : String) : String
abstract def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil
abstract def delete_field(key : String, field : String) : Nil
abstract def increment(key : String, field : String) : Int64
abstract def increment(key : String, field : String, by value : Int32) : Int64
# Global
abstract def list_queues : Array(String)
abstract def list_overseers : Array(String)
abstract def list_active_overseers(since : Time) : Array(String)
abstract def register_overseer(id : String) : Nil
abstract def deregister_overseer(id : String) : Nil
abstract def flush : Nil
# Coordination
abstract def unlock(key : String, value : String) : Nil
abstract def lock?(key : String, value : String, ttl : Time::Span) : Bool
abstract def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool
abstract def publish(key : String, value : String) : Nil
abstract def subscribe(key : String) : Channel(BroadcastMessage)
# Metrics
abstract def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil
abstract def average(key : String) : Int32
abstract class Queue
getter backend : Backend
private getter name : String
def initialize(@backend, @name : String)
end
# Queue operations
abstract def enqueue(job_run : JobRun) : JobRun
abstract def dequeue : JobRun?
abstract def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun
abstract def deschedule : Array(JobRun)
abstract def finish(job_run : JobRun)
abstract def terminate(job_run : JobRun)
abstract def undequeue : JobRun?
abstract def flush : Nil
abstract def size(include_dead : Bool = true) : Int64
{% for name in ["waiting", "scheduled", "pending", "dead"] %}
abstract def list_{{name.id}} : Array(String)
abstract def {{name.id}}_size : Int64
{% end %}
abstract def scheduled_job_run_time(job_run : JobRun) : Time?
# Pause this queue so that `#dequeue` returns nil until it is resumed
# or the optional duration expires.
abstract def pause(duration : Time::Span? = nil) : Nil
# Resume a paused queue, allowing dequeue to proceed.
abstract def resume : Nil
abstract def paused? : Bool
end
end
end
================================================
FILE: src/mosquito/base.cr
================================================
require "json"
module Mosquito
alias Id = Int64 | Int32
record WorkUnit, job_run : JobRun, queue : Queue do
def self.of(job_run : JobRun, *, from queue : Queue) : self
new(job_run, queue)
end
end
class Base
class_getter mapping = {} of String => Mosquito::Job.class
class_getter scheduled_job_runs = [] of PeriodicJobRun
class_getter timetable = [] of PeriodicJobRun
def self.register_job_mapping(string, klass)
@@mapping[string] = klass
end
def self.job_for_type(type : String) : Mosquito::Job.class
@@mapping[type]
rescue e : KeyError
error = String.build do |s|
s << <<-TEXT
Could not find a job class for type "#{type}", perhaps you forgot to register it?
Current known types are:
TEXT
@@mapping.each { |k, v| s << "#{k}=>#{v}\n" }
s << "\n\n"
end
raise KeyError.new(error)
end
def self.register_job_interval(klass, interval : Time::Span | Time::MonthSpan)
@@scheduled_job_runs << PeriodicJobRun.new(klass, interval)
end
def self.register_job(klass, *, to_run_at scheduled_time : Time)
position = @@timetable.index do
end
end
end
end
================================================
FILE: src/mosquito/configuration.cr
================================================
module Mosquito
class_getter configuration = Configuration.new
def self.configure(&block) : Nil
yield configuration
end
class Configuration
property idle_wait : Time::Span = 100.milliseconds
property successful_job_ttl : Int32 = 1.minute.total_seconds.to_i
property failed_job_ttl : Int32 = 86400
property use_distributed_lock : Bool = true
property executor_count : Int32 = ENV.fetch("MOSQUITO_EXECUTOR_COUNT", "6").to_i
property run_from : Array(String) = [] of String
property global_prefix : String? = nil
property backend : Mosquito::Backend = Mosquito::RedisBackend.new
property dequeue_adapter : Mosquito::DequeueAdapter = Mosquito::ShuffleDequeueAdapter.new
# Maps queue names to resource gates. Queues whose gate returns
# `false` from `#allow?` are excluded from dequeuing.
property resource_gates : Hash(String, Mosquito::ResourceGate) = {} of String => Mosquito::ResourceGate
# A stable, user-chosen identifier for this overseer instance.
# Used to look up per-overseer remote configuration (executor count,
# concurrency limits, etc.). When nil, the overseer only reads global
# remote config.
property overseer_id : String? = nil
property publish_metrics : Bool = false
# How often a mosquito runner should emit a heartbeat metric.
property heartbeat_interval : Time::Span = 20.seconds
# How long an overseer can go without a heartbeat before it is
# considered dead and its pending jobs are recovered.
property dead_overseer_threshold : Time::Span = 100.seconds
property validated = false
def backend_connection
backend.connection
end
def backend_connection_string
backend.connection_string
end
def backend_connection_string=(value : String)
backend.connection_string = value
end
def idle_wait=(time_span : Float)
@idle_wait = time_span.seconds
end
def validate
return if @validated
@validated = true
unless backend.valid_configuration?
message = <<-error
Mosquito cannot start because no backend connection has been provided.
For example, in your application config:
Mosquito.configure do |settings|
settings.backend_connection_string = (ENV["REDIS_TLS_URL"]? || ENV["REDIS_URL"]? || "redis://localhost:6379")
end
See Also: https://github.com/mosquito-cr/mosquito#connecting-to-redis
error
raise message
end
end
def metrics? : Bool
publish_metrics
end
end
end
================================================
FILE: src/mosquito/dequeue_adapter.cr
================================================
module Mosquito
# A DequeueAdapter determines how the Overseer selects the next job to
# execute from the available queues.
#
# Subclass `DequeueAdapter`, implement `#dequeue`, and assign an instance
# via `Mosquito.configure`:
#
# ```crystal
# class MyDequeueAdapter < Mosquito::DequeueAdapter
# def dequeue(queue_list : Mosquito::Runners::QueueList) : Mosquito::WorkUnit?
# queue_list.queues.each do |q|
# if job_run = q.dequeue
# return WorkUnit.of(job_run, from: q)
# end
# end
# end
# end
#
# Mosquito.configure do |settings|
# settings.dequeue_adapter = MyDequeueAdapter.new
# end
# ```
abstract class DequeueAdapter
# Attempt to dequeue a job from one of the queues managed by `queue_list`.
#
# Returns a `WorkUnit` when a job is available, or `nil`
# when all queues are empty.
abstract def dequeue(queue_list : Runners::QueueList) : WorkUnit?
# Called by the Overseer when a job run has finished executing.
# Override this to react to completed jobs (e.g. update internal
# counters or rebalance queue weights).
def finished_with(job_run : JobRun, queue : Queue) : Nil
end
end
end
================================================
FILE: src/mosquito/dequeue_adapters/concurrency_limited_dequeue_adapter.cr
================================================
require "../dequeue_adapter"
module Mosquito
# A dequeue adapter that enforces per-queue concurrency limits.
#
# Each queue can be assigned a maximum number of jobs that may execute
# concurrently. When a queue has reached its limit, it is skipped during
# dequeue until an in-flight job finishes.
#
# Queues not present in the limits table have no concurrency ceiling and
# are bounded only by the total executor pool size.
#
# Among eligible queues the adapter uses a shuffle to provide rough
# fairness, similar to `ShuffleDequeueAdapter`.
#
# ## Example
#
# ```crystal
# Mosquito.configure do |settings|
# settings.executor_count = 8
#
# settings.dequeue_adapter = Mosquito::ConcurrencyLimitedDequeueAdapter.new({
# "queue_a" => 3,
# "queue_b" => 5,
# })
# end
# ```
#
# In this configuration at most 3 jobs from "queue_a" and 5 from "queue_b"
# will execute at the same time. Other queues are unlimited.
class ConcurrencyLimitedDequeueAdapter < DequeueAdapter
property limits : Hash(String, Int32)
# Tracks the number of currently in-flight jobs per queue name.
# Access is fiber-safe because Crystal fibers are cooperatively
# scheduled and we never yield between read and write.
@active : Hash(String, Int32)
def initialize(@limits : Hash(String, Int32))
@active = Hash(String, Int32).new(0)
end
def dequeue(queue_list : Runners::QueueList) : WorkUnit?
queue_list.queues.shuffle.each do |q|
if limit = limits[q.name]?
next if @active[q.name] >= limit
end
if job_run = q.dequeue
@active[q.name] = @active[q.name] + 1
return WorkUnit.of(job_run, from: q)
end
end
end
# Called by the Overseer when a job from this queue has finished
# executing. Decrements the in-flight counter so the queue becomes
# eligible for dequeue again.
def finished_with(job_run : JobRun, queue : Queue) : Nil
count = @active[queue.name]
@active[queue.name] = {count - 1, 0}.max
end
# Returns the current number of in-flight jobs for the given queue.
def active_count(queue_name : String) : Int32
@active[queue_name]
end
end
end
================================================
FILE: src/mosquito/dequeue_adapters/remote_config_dequeue_adapter.cr
================================================
require "./concurrency_limited_dequeue_adapter"
module Mosquito
# A dequeue adapter that wraps `ConcurrencyLimitedDequeueAdapter` with
# remotely configurable concurrency limits stored in the Mosquito backend
# (e.g. Redis).
#
# Limits are refreshed by polling the backend at a configurable interval.
# When the remote key is absent or empty the adapter falls back to the
# `defaults` hash provided at construction time.
#
# Remote values are **merged on top of** defaults: a queue present only in
# defaults keeps its value, a queue present only in the remote config is
# added, and a queue present in both uses the remote value.
#
# ## Per-overseer configuration
#
# When `overseer_id` is set, the adapter reads from both the global key
# and a per-overseer key. The merge order is:
#
# defaults → global remote → per-overseer remote
#
# This lets you run overseers on asymmetric hardware and tune each one
# independently while still sharing a common baseline.
#
# ## Setting limits remotely
#
# Use `Mosquito::Api.set_concurrency_limits` to write global limits:
#
# ```crystal
# Mosquito::Api.set_concurrency_limits({"queue_a" => 2, "queue_b" => 10})
# ```
#
# Or target a specific overseer:
#
# ```crystal
# Mosquito::Api.set_concurrency_limits({"queue_a" => 1}, overseer_id: "gpu-worker-1")
# ```
#
# ## Example
#
# ```crystal
# Mosquito.configure do |settings|
# settings.dequeue_adapter = Mosquito::RemoteConfigDequeueAdapter.new(
# defaults: {"queue_a" => 3, "queue_b" => 5},
# overseer_id: "gpu-worker-1",
# refresh_interval: 5.seconds,
# )
# end
# ```
#
# In this configuration the adapter starts with the given defaults. Any
# limits written to the backend via the API will take effect within
# `refresh_interval` seconds. Per-overseer limits override global limits
# which override defaults.
class RemoteConfigDequeueAdapter < DequeueAdapter
CONFIG_KEY = "concurrency_limits"
getter defaults : Hash(String, Int32)
getter refresh_interval : Time::Span
getter inner : ConcurrencyLimitedDequeueAdapter
getter overseer_id : String?
@last_refresh_at : Time = Time::UNIX_EPOCH
@last_remote_limits : Hash(String, Int32) = {} of String => Int32
def initialize(
@defaults : Hash(String, Int32) = {} of String => Int32,
@overseer_id : String? = nil,
@refresh_interval : Time::Span = 5.seconds
)
@inner = ConcurrencyLimitedDequeueAdapter.new(defaults.dup)
end
def dequeue(queue_list : Runners::QueueList) : WorkUnit?
maybe_refresh_limits
inner.dequeue(queue_list)
end
def finished_with(job_run : JobRun, queue : Queue) : Nil
inner.finished_with(job_run, queue)
end
# Returns the current effective concurrency limits (defaults merged
# with any remote overrides).
def limits : Hash(String, Int32)
inner.limits
end
# Returns the current in-flight count for *queue_name*, delegated to
# the inner adapter.
def active_count(queue_name : String) : Int32
inner.active_count(queue_name)
end
# Force an immediate refresh from the backend, ignoring the
# `refresh_interval` timer.
def refresh_limits : Nil
remote = load_remote_limits
merged = defaults.merge(remote)
if merged != inner.limits
inner.limits = merged
end
@last_refresh_at = Time.utc
end
# ----- Backend storage helpers (class-level) -----
# Reads the global concurrency limits hash stored in the backend.
def self.stored_limits : Hash(String, Int32)
raw = Mosquito.backend.retrieve(global_config_key)
raw.transform_values(&.to_i32)
end
# Reads the concurrency limits for a specific overseer.
def self.stored_limits(overseer_id : String) : Hash(String, Int32)
raw = Mosquito.backend.retrieve(overseer_config_key(overseer_id))
raw.transform_values(&.to_i32)
end
# Overwrites the global concurrency limits with *limits*. Any previously
# stored queue entries not present in *limits* are removed.
def self.store_limits(limits : Hash(String, Int32)) : Nil
key = global_config_key
Mosquito.backend.delete(key)
Mosquito.backend.store(key, limits.transform_values(&.to_s)) unless limits.empty?
end
# Overwrites the concurrency limits for a specific overseer with *limits*.
def self.store_limits(limits : Hash(String, Int32), overseer_id : String) : Nil
key = overseer_config_key(overseer_id)
Mosquito.backend.delete(key)
Mosquito.backend.store(key, limits.transform_values(&.to_s)) unless limits.empty?
end
# Removes all globally stored concurrency limits, causing adapters to
# fall back to their defaults (or per-overseer limits if set).
def self.clear_limits : Nil
Mosquito.backend.delete(global_config_key)
end
# Removes stored concurrency limits for a specific overseer.
def self.clear_limits(overseer_id : String) : Nil
Mosquito.backend.delete(overseer_config_key(overseer_id))
end
protected def self.global_config_key : String
Mosquito.backend.build_key(CONFIG_KEY)
end
protected def self.overseer_config_key(overseer_id : String) : String
Mosquito.backend.build_key(CONFIG_KEY, overseer_id)
end
private def maybe_refresh_limits
now = Time.utc
if now - @last_refresh_at >= @refresh_interval
refresh_limits
end
end
private def load_remote_limits : Hash(String, Int32)
global = self.class.stored_limits
result = if oid = overseer_id
per_overseer = self.class.stored_limits(oid)
global.merge(per_overseer)
else
global
end
@last_remote_limits = result
rescue
# If the backend is unreachable or the data is corrupt, fall back
# to the last known-good remote limits so previously applied overrides
# are preserved rather than silently reverting to defaults.
@last_remote_limits
end
end
end
================================================
FILE: src/mosquito/dequeue_adapters/shuffle_dequeue_adapter.cr
================================================
require "../dequeue_adapter"
module Mosquito
# The default dequeue adapter. Shuffles the queue list on each pass and
# returns the first available job.
#
# The shuffle provides rough fairness across queues, preventing any single
# queue from being consistently checked first.
class ShuffleDequeueAdapter < DequeueAdapter
def dequeue(queue_list : Runners::QueueList) : WorkUnit?
queue_list.queues.shuffle.each do |q|
if job_run = q.dequeue
return WorkUnit.of(job_run, from: q)
end
end
end
end
end
================================================
FILE: src/mosquito/dequeue_adapters/weighted_dequeue_adapter.cr
================================================
require "../dequeue_adapter"
module Mosquito
# A dequeue adapter that checks queues according to configured weights.
#
# Higher-weight queues are given proportionally more chances to be dequeued
# from. On each call to `#dequeue`, the adapter picks a queue at random
# (weighted by its configured value). If that queue is empty, it is removed
# from consideration and another weighted pick is made, ensuring each queue
# is checked at most once per dequeue call.
#
# The weight map is built fresh on each dequeue call from the current
# queue list, ensuring newly discovered queues are picked up immediately.
#
# Queues not present in the weights table are assigned a default weight of 1.
#
# ## Example
#
# ```crystal
# Mosquito.configure do |settings|
# settings.dequeue_adapter = Mosquito::WeightedDequeueAdapter.new({
# "critical" => 5,
# "default" => 2,
# "bulk" => 1,
# })
# end
# ```
#
# In this configuration the "critical" queue will be checked roughly 5x as
# often as "bulk" and 2.5x as often as "default".
class WeightedDequeueAdapter < DequeueAdapter
getter weights : Hash(String, Int32)
def initialize(@weights : Hash(String, Int32), @default_weight = 1)
end
def dequeue(queue_list : Runners::QueueList) : WorkUnit?
remaining = queue_list.queues.map { |q|
{q, weights.fetch(q.name, @default_weight)}
}
until remaining.empty?
queue, index = weighted_random_select(remaining)
if job_run = queue.dequeue
return WorkUnit.of(job_run, from: queue)
end
remaining.delete_at(index)
end
end
# Picks a queue at random, weighted by the associated values.
# Returns the selected queue and its index in the candidates array.
private def weighted_random_select(candidates : Array(Tuple(Queue, Int32))) : Tuple(Queue, Int32)
total = candidates.sum(&.last)
roll = rand(total)
candidates.each_with_index do |(queue, weight), index|
roll -= weight
return {queue, index} if roll < 0
end
# Unreachable, but satisfies the compiler.
{candidates.last.first, candidates.size - 1}
end
end
end
================================================
FILE: src/mosquito/exceptions.cr
================================================
module Mosquito
# When a job fails
class JobFailed < Exception
end
# When a job_run tries to run twice
class DoubleRun < Exception
end
# When a job contains a model_id parameter pointing to a database record but the database doesn't return anything for that id.
class IrretrievableParameter < Exception
end
end
================================================
FILE: src/mosquito/gates/open_gate.cr
================================================
require "../resource_gate"
module Mosquito
# A gate that always allows dequeuing. This is the default when no
# resource constraint is configured.
class OpenGate < ResourceGate
def initialize
super(sample_ttl: 0.seconds)
end
protected def check : Bool
true
end
end
end
================================================
FILE: src/mosquito/gates/threshold_gate.cr
================================================
require "../resource_gate"
module Mosquito
# A gate that samples a metric via a callback and compares it against
# a threshold.
#
# ## Example
#
# ```crystal
# gate = Mosquito::ThresholdGate.new(
# threshold: 85.0,
# sample_ttl: 2.seconds
# ) { `nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits`.strip.to_f }
# ```
class ThresholdGate < ResourceGate
getter threshold : Float64
@sampler : -> Float64
def initialize(@threshold : Float64, sample_ttl : Time::Span = 2.seconds, &sampler : -> Float64)
super(sample_ttl: sample_ttl)
@sampler = sampler
end
protected def check : Bool
@sampler.call < @threshold
end
end
end
================================================
FILE: src/mosquito/job.cr
================================================
require "./serializers/*"
module Mosquito
# A Job is a definition for work to be performed.
# Jobs are pieces of code which run a JobRun.
#
# - Jobs prevent double execution of a job for a job_run
# - Jobs Rescue when a #perform method fails a job_run for any reason
# - Jobs can be rescheduleable
abstract class Job
Log = ::Log.for(self)
include Mosquito::Serializers::Primitives
enum State
Initialization
Running
Succeeded
Failed
Aborted
Preempted
def executed? : Bool
succeeded? || failed?
end
end
def log
stream = Log.for(self.class)
if job_run_id_ = job_run_id
stream.for(job_run_id_)
else
stream
end
end
def log(message)
log.info { message }
end
getter state = State::Initialization
delegate executed?, succeeded?, failed?, aborted?, preempted?, to: state
# When a job is preempted with an `until` parameter, this is the time
# at which the job should be retried.
getter preempted_until : Time?
# When a job is preempted, this is the reason provided by the caller.
getter preempt_reason : String = ""
# When a job fails and raises an exception, it will be saved into this attribute.
getter exception : Exception?
property job_run_id : String?
# When a job run fails, should it be added to the retry queue?
# See: #fail(retry: false)
property should_retry : Bool = true
# The queue this job is assigned to.
# By default every job has it's own named queue:
#
# - EmailTheUniverseJob.queue = "email_the_universe"
def self.queue_name : String
{{ @type.id }}.to_s.underscore
end
# Easily override the queue for any job.
macro queue_name(name)
def self.queue_name : String
"{{ name.id }}"
end
end
# The Queue this job uses to store job_runs.
def self.queue
if queue_name.blank?
Queue.new "default"
else
Queue.new queue_name
end
end
# Job name is used to differentiate jobs coming off the same queue.
# By default it is the class name, and this should never need to be changed.
private def self.job_name : String
"{{ @type.id }}".underscore
end
def run
begin
before_hook
rescue e : Exception
log.error(exception: e) { "Before hook raised, job will not be executed" }
@state = State::Aborted
return
end
return if preempted?
@state = State::Running
perform
@state = State::Succeeded
rescue e
log.warn(exception: e) do
"Job failed! Raised #{e.class}: #{e.message}"
end
@exception = e
@state = State::Failed
ensure
after_hook
end
def before_hook
# intentionally left blank
end
def after_hook
# intentionally left blank
end
def retry_later
fail
end
# To be called from inside a before hook.
# Preempts this job, preventing execution. The job will be rescheduled.
#
# The optional `until` parameter specifies when the job should be retried.
def preempt(reason = "", *, until preempted_until : Time? = nil)
@state = State::Preempted
@preempt_reason = reason
@preempted_until = preempted_until
end
macro before(&block)
def before_hook
{% if @type.methods.map(&.name).includes?(:before_hook.id) %}
previous_def
{% else %}
super
{% end %}
return if preempted?
{{ yield }}
end
end
macro after(&block)
def after_hook
{% if @type.methods.map(&.name).includes?(:after_hook.id) %}
previous_def
{% else %}
super
{% end %}
{{ yield }}
end
end
# abstract, override in a Job descendant to do something productive
def perform
Log.error { "No job definition found for #{self.class.name}" }
fail
end
# To be called from inside a #perform
# Marks this job as a failure. By default, if the job is a candidate for
# re-scheduling, it will be run again at a later time.
def fail(reason = "", *, retry : Bool = true)
@should_retry = @should_retry && retry
raise JobFailed.new(reason)
end
# abstract, override if desired.
#
# True if this job is rescheduleable, false if not.
def rescheduleable? : Bool
true
end
# abstract, override if desired.
#
# For a given retry count, is this job rescheduleable?
def rescheduleable?(retry_count : Int32) : Bool
rescheduleable? && retry_count < 5
end
# abstract, override if desired.
#
# For a given retry count, how long should the delay between
# job attempts be?
def reschedule_interval(retry_count : Int32) : Time::Span
if preempted? && (wait_until = @preempted_until)
delay = wait_until - Time.utc
return delay if delay > Time::Span.zero
end
2.seconds * (retry_count ** 2)
# retry 1 = 2 minutes
# 2 = 8
# 3 = 18
# 4 = 32
end
def metadata : Metadata
@metadata ||= begin
Metadata.new self.class.metadata_key
end
end
def self.metadata : Metadata
Metadata.new metadata_key, readonly: true
end
def self.metadata_key
Mosquito.backend.build_key "job_metadata", self.name.underscore
end
end
end
================================================
FILE: src/mosquito/job_run.cr
================================================
module Mosquito
# A JobRun is a unit of work which will be performed by a Job.
# JobRuns know how to:
# - store and retrieve their data to and from the datastore
# - figure out what Job class they match to
# - build an instance of that Job class and pass off the config data
# - Ask the job to run
#
# JobRun data is called `config` and is persisted in the backend under the key
# `mosquito:job_run:job_run_id`.
class JobRun
getter type
getter enqueue_time : Time
getter id : String
getter retry_count = 0
getter job : Mosquito::Job?
getter started_at : Time?
getter finished_at : Time?
getter overseer_id : String?
getter metadata : Metadata { Metadata.new(config_key) }
def job! : Mosquito::Job
job || raise RuntimeError.new("No job yet retrieved for job_run.")
end
# :nodoc:
property config
CONFIG_KEY_PREFIX = "job_run"
# The config key is the backend storage key for the metadata of this job_run.
def config_key
self.class.config_key id
end
# :ditto:
def self.config_key(*parts)
Mosquito.backend.build_key CONFIG_KEY_PREFIX, parts
end
def initialize(type : String)
new type
end
def initialize(
@type : String,
@enqueue_time : Time = Time.utc,
id : String? = nil,
@retry_count : Int32 = 0,
@started_at : Time? = nil,
@finished_at : Time? = nil
)
@id = id || KeyBuilder.build @enqueue_time.to_unix_ms.to_s, rand(1000)
@config = {} of String => String
@job = nil
end
# Stores this job run configuration and metadata in the backend.
# Nil-valued fields are deleted from the backend hash.
def store
fields = {} of String => String?
config.each { |k, v| fields[k] = v }
fields["enqueue_time"] = enqueue_time.to_unix_ms.to_s
fields["type"] = type
fields["retry_count"] = retry_count.to_s
fields["overseer_id"] = @overseer_id
if started_at_ = @started_at
fields["started_at"] = started_at_.to_unix_ms.to_s
end
if finished_at_ = @finished_at
fields["finished_at"] = finished_at_.to_unix_ms.to_s
end
metadata.set fields
end
# Deletes this job_run from the backend.
# Optionally, after a delay in seconds (handled by the backend).
def delete(in ttl : Int = 0)
metadata.delete(in: ttl.seconds)
end
# Builds a Job instance from this job_run. Populates the job with config from
# the backend.
def build_job : Mosquito::Job
if job = @job
return job
end
@job = instance = Base.job_for_type(type).new
if instance.responds_to? :vars_from
instance.vars_from config
end
instance.job_run_id = id
instance
end
# Builds and runs the job with this job_run config.
def run
instance = build_job
@started_at = Time.utc
instance.run
@finished_at = Time.utc
if executed? && failed?
@retry_count += 1
end
store
end
# :nodoc:
protected def overseer_id=(id : String?)
@overseer_id = id
end
# Marks this job run as claimed by the given overseer and persists
# the association to the backend. Used by the pending cleanup to
# determine whether the owning overseer is still alive.
def claimed_by(overseer : Runners::Overseer)
@overseer_id = overseer.observer.instance_id
Mosquito.backend.set config_key, "overseer_id", @overseer_id.not_nil!
end
# Fails this job run and make sure it's persisted as such.
# Clears the overseer_id since the job is no longer in-flight.
def fail
@retry_count += 1
@overseer_id = nil
store
end
# Treats this job run as a failure: increments the retry count and
# either reschedules with backoff or banishes to the dead queue.
def retry_or_banish(queue : Queue) : Nil
fail
build_job
if rescheduleable?
next_execution = Time.utc + reschedule_interval
queue.reschedule self, next_execution
else
queue.banish self
delete in: Mosquito.configuration.failed_job_ttl
end
end
# For the current retry count, is the job rescheduleable?
def rescheduleable?
job!.rescheduleable? @retry_count
end
# For the current retry count, how long should a runner wait before retry?
def reschedule_interval
job!.reschedule_interval @retry_count
end
# :nodoc:
delegate :executed?, :succeeded?, :failed?, :preempted?, :preempt_reason, :failed, :rescheduled, to: job!
# Used to construct a job_run from the parameters stored in the backend.
def self.retrieve(id : String)
fields = Metadata.new(config_key(id)).to_h
return unless name = fields.delete "type"
return unless timestamp = fields.delete "enqueue_time"
retry_count = (fields.delete("retry_count") || 0).to_i
started_at_raw = fields.delete("started_at")
finished_at_raw = fields.delete("finished_at")
started_at = started_at_raw ? Time.unix_ms(started_at_raw.to_i64) : nil
finished_at = finished_at_raw ? Time.unix_ms(finished_at_raw.to_i64) : nil
overseer_id = fields.delete("overseer_id")
instance = new(name, Time.unix_ms(timestamp.to_i64), id, retry_count, started_at, finished_at)
instance.config = fields
instance.overseer_id = overseer_id
instance
end
# Updates this job_run config from the backend.
def reload : Nil
config.merge! metadata.to_h
@retry_count = config["retry_count"].to_i
@overseer_id = config.delete("overseer_id")
end
def to_s(io : IO)
"#{type}<#{id}>".to_s(io)
end
def ==(other : self)
id == self.id
end
end
end
================================================
FILE: src/mosquito/key_builder.cr
================================================
module Mosquito
class KeyBuilder
KEY_SEPERATOR = ":"
def self.build(*parts)
id = [] of String
parts.each do |part|
case part
when Symbol
id << build part.to_s
when String
id << part
when Array
part.each do |e|
id << build e
end
when Tuple
part.to_a.each do |e|
id << build e
end
when Number
id << part.to_s
when Nil
# do nothing
else
raise "#{part.class} is not a keyable type"
end
end
id.flatten.join KEY_SEPERATOR
end
end
end
================================================
FILE: src/mosquito/metadata.cr
================================================
module Mosquito
# Provides a real-time metadata store. Data is not cached, which allows
# multiple workers to operate on the same structures in real time.
#
# Each read or write incurs a round trip to the backend.
#
# Keys and values are always strings.
class Metadata
property root_key : String
getter? readonly : Bool
def initialize(@root_key : String, @readonly = false)
end
# Deletes this metadata immediately.
def delete : Nil
Mosquito.backend.delete root_key
end
# Schedule this metadata to be deleted after a time span.
def delete(in ttl : Time::Span) : Nil
Mosquito.backend.delete root_key, in: ttl
end
# Reads the metadata and returns it as a hash.
def to_h : Hash(String, String)
Mosquito.backend.retrieve root_key
end
# Reads a single key from the metadata.
def []?(key : String) : String?
Mosquito.backend.get root_key, key
end
# Writes a value to a key in the metadata.
def []=(key : String, value : String)
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.set root_key, key, value
end
# Deletes a value from the metadata
def []=(key : String, value : Nil)
Mosquito.backend.delete_field root_key, key
end
def set(**values)
set values.to_h
end
def set(values : Hash(String | Symbol, String?))
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.set root_key, values.transform_keys(&.to_s)
end
# Writes multiple string values to the metadata at once.
def set(values : Hash(String, String))
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.store root_key, values
end
# Increments a value in the metadata by 1 by 1 by 1 by 1.
def increment(key)
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.increment root_key, key
end
# Parametrically incruments a value in the metadata.
def increment(key, by increment : Int32)
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.increment root_key, key, by: increment
end
# Decrements a value in the metadata by 1.
def decrement(key)
raise RuntimeError.new("Cannot write to metadata, readonly=true") if readonly?
Mosquito.backend.increment root_key, key, by: -1
end
# Sets a heartbeat timestamp in the metadata.
# Also sets a timer to delete the metadata after 1 hour.
def heartbeat!
self["heartbeat"] = Time.utc.to_unix.to_s
delete in: 1.hour
end
# Returns the heartbeat timestamp from the metadata.
def heartbeat? : Time?
if time = self["heartbeat"]?
Time.unix(time.to_i)
else
nil
end
end
delegate to_s, inspect, to: to_h
end
end
================================================
FILE: src/mosquito/periodic_job.cr
================================================
module Mosquito
abstract class PeriodicJob < Job
def initialize
end
abstract def build_job_run
macro inherited
macro job_name
"\{{ @type.id }}".underscore.downcase
end
Mosquito::Base.register_job_mapping job_name, {{ @type.id }}
def self.job_type : String
job_name
end
def build_job_run
job_run = Mosquito::JobRun.new(job_name)
end
macro run_every(interval)
Mosquito::Base.register_job_interval \{{ @type.id }}, \{{ interval }}
end
end
def rescheduleable?
false
end
end
end
================================================
FILE: src/mosquito/periodic_job_run.cr
================================================
module Mosquito
class PeriodicJobRun
Log = ::Log.for self
property class : Mosquito::PeriodicJob.class
property interval : Time::Span | Time::MonthSpan
getter metadata : Metadata { Metadata.new(Mosquito.backend.build_key("periodic_jobs", @class.name)) }
getter observer : Observability::PeriodicJob { Observability::PeriodicJob.new(self) }
# The last executed timestamp for this periodicjob tracked by the backend.
def last_executed_at?
if timestamp = metadata["last_executed_at"]?
Time.unix(timestamp.to_i)
else
nil
end
end
# The last executed timestamp, or "never" if it doesn't exist.
def last_executed_at
last_executed_at? || Time.unix(0)
end
# Updates the last executed timestamp in the backend,
# and schedules the metadata for deletion after 3*interval
# seconds.
#
# For Time::Span intervals, the TTL is set to 3 * interval.
# For Time::MonthSpan intervals, the TTL is set to approximately 3 * interval.
#
# A month is approximated to 2635200 seconds, or 30.5 days.
def last_executed_at=(time : Time)
metadata["last_executed_at"] = time.to_unix.to_s
case interval_ = interval
when Time::Span
metadata.delete(in: interval_ * 3)
when Time::MonthSpan
seconds_in_an_average_month = 2_635_200.seconds
metadata.delete(in: seconds_in_an_average_month * interval_.value * 3)
end
end
def initialize(@class, @interval)
end
# Check the last executed timestamp against the current time,
# and enqueue the job if it's time to execute.
def try_to_execute : Bool
now = Time.utc
if last_executed_at + interval <= now
if pending_job_run?
Log.info { "Skipping enqueue for #{@class.name}: a job run is already pending" }
else
execute
end
self.last_executed_at = now
observer.enqueued(at: now)
true
else
observer.skipped
false
end
end
# Returns true if a previously enqueued job run has not yet finished.
# This prevents duplicate enqueues when executors are busy and the
# periodic interval elapses multiple times before the job is run.
def pending_job_run? : Bool
if pending_id = metadata["pending_run_id"]?
if job_run = JobRun.retrieve(pending_id)
return true if job_run.finished_at.nil?
end
# Job run has finished or was cleaned up; clear the stale reference.
metadata["pending_run_id"] = nil
end
false
end
# Enqueues the job for execution and records the job run id so that
# subsequent intervals can detect that a run is already pending.
def execute
job = @class.new
job_run = job.build_job_run
job_run.store
@class.queue.enqueue job_run
metadata["pending_run_id"] = job_run.id
end
end
end
================================================
FILE: src/mosquito/queue.cr
================================================
module Mosquito
# A named Queue.
#
# Named Queues exist and have 4 ordered lists: waiting, pending, scheduled, and dead.
#
# - The Waiting list is for jobs which need to be executed as soon as possible.
# - The Pending list is for jobs which are currently being executed.
# - The Scheduled list is indexed by execution time and holds jobs which need to be executed at a later time.
# - The Dead list is for jobs which have been retried too many times and are no longer viable.
#
# A job_run is represented in a queue by its id.
#
# A job_run flows through the queues in this manner:
#
#
# ```text
# Time=0: JobRun does not exist yet, lists are empty
#
# Waiting Pending Scheduled Dead
#
# ---------------------------------
# Time=1: JobRun is enqueued
#
# Waiting Pending Scheduled Dead
# JobRun#1
#
# ---------------------------------
# Time=2: JobRun begins. JobRun is moved to pending and executed
#
# Waiting Pending Scheduled Dead
# JobRun#1
#
# ---------------------------------
# Time=3: JobRuns are Enqueued.
#
# Waiting Pending Scheduled Dead
# JobRun#2 JobRun#1
# JobRun#3
#
# ---------------------------------
# Time=4: JobRun succeeds, next job_run begins.
#
# Waiting Pending Scheduled Dead
# JobRun#3 JobRun#2
#
# ---------------------------------
# Time=5: JobRun fails and is scheduled for later, next job_run begins.
#
# Waiting Pending Scheduled Dead
# JobRun#3 t=7:JobRun#2
#
# ---------------------------------
# Time=6: JobRun succeeds. Nothing is executing.
#
# Waiting Pending Scheduled Dead
# t=7:JobRun#2
#
# ---------------------------------
# Time=7: Scheduled job_run is due and is moved to waiting. Nothing is executing.
#
# Waiting Pending Scheduled Dead
# JobRun#2
#
# ---------------------------------
# Time=8: JobRun begins executing (for the second time).
#
# Waiting Pending Scheduled Dead
# JobRun#2
#
# ---------------------------------
# Time=9: JobRun finished successfully. No more job_runs present.
#
# Waiting Pending Scheduled Dead
#
# ```
#
class Queue
getter name, config_key
property backend : Mosquito::Backend::Queue
getter observer : Observability::Queue { Observability::Queue.new self }
Log = ::Log.for self
def initialize(@name : String)
@backend = Mosquito.backend.queue name
@config_key = @name
end
def enqueue(job_run : JobRun) : JobRun
observer.enqueued(job_run)
backend.enqueue job_run
end
def enqueue(job_run : JobRun, in interval : Time::Span) : JobRun
enqueue job_run, at: interval.from_now
end
def enqueue(job_run : JobRun, at execute_time : Time) : JobRun
observer.enqueued(job_run, at: execute_time)
backend.schedule job_run, execute_time
end
def dequeue : JobRun?
return if paused?
if job_run = backend.dequeue
observer.dequeued job_run
job_run
end
end
def reschedule(job_run : JobRun, execution_time)
backend.finish job_run
enqueue(job_run, at: execution_time)
observer.rescheduled(job_run, to: execution_time)
end
def undequeue : JobRun?
backend.undequeue
end
def dequeue_scheduled : Array(JobRun)
backend.deschedule
end
def forget(job_run : JobRun)
backend.finish job_run
observer.forgotten job_run
end
def banish(job_run : JobRun)
backend.finish job_run
backend.terminate job_run
observer.banished job_run
end
def size(*, include_dead : Bool = true) : Int64
backend.size(include_dead)
end
def ==(other : self) : Bool
name == other.name
end
# Pause this queue. While paused, `#dequeue` returns nil and no jobs
# will be dispatched. Jobs can still be enqueued and will accumulate
# until the queue is resumed.
#
# Pass a duration to automatically resume after the given interval,
# which is useful for backing off from a rate-limited external resource.
def pause(for duration : Time::Span? = nil) : Nil
backend.pause(duration)
observer.paused(duration)
end
# Resume a paused queue, allowing jobs to be dequeued again.
def resume : Nil
backend.resume
observer.resumed
end
def paused? : Bool
backend.paused?
end
def flush
backend.flush
end
end
end
================================================
FILE: src/mosquito/queued_job.cr
================================================
module Mosquito
abstract class QueuedJob < Job
macro inherited
def self.job_name
"{{ @type.id }}".underscore.downcase
end
Mosquito::Base.register_job_mapping job_name, {{ @type.id }}
PARAMETERS = [] of Nil
macro param(parameter)
{% verbatim do %}
{%
a = "multiline macro hack"
if ! parameter.is_a?(TypeDeclaration) || parameter.type.nil? || parameter.type.is_a?(Generic) || parameter.type.is_a?(Union)
message = <<-TEXT
Mosquito::QueuedJob: Unable to build parameter serialization for `#{parameter.type}` in param declaration `#{parameter}`.
Mosquito covers most of the crystal primitives for serialization out of the box[1]. More complex types
either need to be serialized yourself (recommended) or implement custom serializer logic[2].
Parameter types must be specified explicitly. Make sure your parameter declarations look something like this:
class LongJob < Mosquito::QueuedJob
param user_email : String
end
Check the manual on declaring job parameters [3] if needed
[1] - https://mosquito-cr.github.io/manual/index.html#primitive-serialization
[2] - https://mosquito-cr.github.io/manual/serialization.html
[3] - https://mosquito-cr.github.io/manual/index.html#parameters
TEXT
raise message
end
name = parameter.var
value = parameter.value
type = parameter.type
simplified_type = type.resolve
method_suffix = simplified_type.stringify.underscore.gsub(/::/,"__").id
PARAMETERS << {
name: name,
value: value,
type: type,
method_suffix: method_suffix
}
%}
@{{ name }} : {{ type }}?
def {{ name }}=(value : {{simplified_type}}) : {{simplified_type}}
@{{ name }} = value
end
def {{ name }}? : {{ simplified_type }} | Nil
@{{ name }}
end
def {{ name }} : {{ simplified_type }}
if ! (%object = {{ name }}?).nil?
%object
else
msg = <<-MSG
Expected a parameter named `{{ name }}` but found nil.
The parameter may not have been provided when the job was enqueued.
Should you be using `{{ name }}` instead?
MSG
raise msg
end
end
{% end %}
end
macro finished
{% verbatim do %}
def initialize; end
def initialize({{
PARAMETERS.map do |parameter|
assignment = "@#{parameter["name"]}"
assignment = assignment + " : #{parameter["type"]}" if parameter["type"]
assignment = assignment + " = #{parameter["value"]}" unless parameter["value"].is_a? Nop
assignment
end.join(", ").id
}})
end
# Methods declared in here have the side effect over overwriting any overrides which may have been implemented
# otherwise in the job class. In order to allow folks to override the behavior here, these methods are only
# injected if none already exists.
{% unless @type.methods.map(&.name).includes?(:vars_from.id) %}
def vars_from(config : Hash(String, String))
{% for parameter in PARAMETERS %}
@{{ parameter["name"] }} = deserialize_{{ parameter["method_suffix"] }}(config["{{ parameter["name"] }}"])
{% end %}
end
{% end %}
{% unless @type.methods.map(&.name).includes?(:build_job_run.id) %}
def build_job_run
job_run = Mosquito::JobRun.new self.class.job_name
{% for parameter in PARAMETERS %}
job_run.config["{{ parameter["name"] }}"] = serialize_{{ parameter["method_suffix"] }}(@{{ parameter["name"] }}.not_nil!)
{% end %}
job_run
end
{% end %}
{% end %}
end
end
def enqueue : JobRun
job_run = build_job_run
return job_run unless before_enqueue_hook job_run
job_run.store
self.class.queue.enqueue job_run
after_enqueue_hook job_run
job_run
end
def enqueue(in delay_interval : Time::Span) : JobRun
job_run = build_job_run
return job_run unless before_enqueue_hook job_run
job_run.store
self.class.queue.enqueue job_run, in: delay_interval
after_enqueue_hook job_run
job_run
end
def enqueue(at execute_time : Time) : JobRun
job_run = build_job_run
return job_run unless before_enqueue_hook job_run
job_run.store
self.class.queue.enqueue job_run, at: execute_time
after_enqueue_hook job_run
job_run
end
def before_enqueue_hook(job : JobRun) : Bool
# intentionally left blank, return true by default
true
end
def after_enqueue_hook(job : JobRun) : Nil
# intentionally left blank
end
# Fired before a job is enqueued. Allows preventing enqueue at the job level.
#
# class SomeJob < Mosquito::QueuedJob
# before_enqueue do
# # return false to prevent enqueue
# end
# end
macro before_enqueue(&block)
def before_enqueue_hook(job : Mosquito::JobRun) : Bool
{% if @type.methods.map(&.name).includes?(:before_enqueue_hook.id) %}
previous_def
{% else %}
super
{% end %}
{{ yield }}
end
end
# Fired after a job is enqueued.
macro after_enqueue(&block)
def after_enqueue_hook(job : Mosquito::JobRun) : Nil
{% if @type.methods.map(&.name).includes?(:after_enqueue_hook.id) %}
previous_def
{% else %}
super
{% end %}
{{ yield }}
end
end
end
end
================================================
FILE: src/mosquito/rate_limiter.cr
================================================
module Mosquito::RateLimiter
module ClassMethods
# Configures rate limiting for this job.
#
# `limit` and `per` are used to control the run count and the window
# duration. Defaults to a limit of 1 run per second.
#
# `increment` is used to indicate how many "hits" against a single job is
# worth. Defaults to 1.
#
# `key` is used to combine rate limiting functions across multiple jobs.
def throttle(*,
limit : Int32 = 1,
per : Time::Span = 1.second,
increment = 1,
key = self.name.underscore
)
@@rate_limit_ceiling = limit
@@rate_limit_interval = per
@@rate_limit_key = Mosquito.backend.build_key "rate_limit", key
@@rate_limit_increment = increment
end
# Statistics about the rate limiter, including both the configuration
# parameters and the run counts.
def rate_limit_stats : NamedTuple
meta = metadata
window_start = if window_start_ = meta["window_start"]?
Time.unix window_start_.to_i
else
Time::UNIX_EPOCH
end
run_count = if run_count_ = meta["run_count"]?
run_count_.to_i
else
0
end
{
interval: @@rate_limit_interval,
key: @@rate_limit_key,
increment: @@rate_limit_increment,
limit: @@rate_limit_ceiling,
window_start: window_start,
run_count: run_count
}
end
# Provides an instance of the metadata store used to track rate limit
# stats.
def metadata : Metadata
Metadata.new @@rate_limit_key
end
# Resolves the key used to index the metadata store for this test.
def rate_limit_key
@@rate_limit_key
end
end
macro included
extend ClassMethods
@@rate_limit_ceiling = -1
@@rate_limit_interval : Time::Span = 1.second
@@rate_limit_key = ""
@@rate_limit_increment = 1
before do
update_window_start
if rate_limited?
if expires = window_expires_at
duration = expires - Time.utc
self.class.queue.pause(for: duration) if duration > Time::Span.zero
end
preempt "rate limited"
end
end
after do
increment_run_count if executed?
end
end
@rl_metadata : Metadata?
# Storage hash for rate limit data.
def metadata : Metadata
@rl_metadata ||= self.class.metadata
end
# Should this job be cancelled?
# If not, update the rate limit metadata.
def rate_limited? : Bool
return false if @@rate_limit_ceiling < 0
return true if maxed_rate_for_window?
false
end
# Has the run count exceeded the ceiling for the current window?
def maxed_rate_for_window? : Bool
run_count = metadata["run_count"]?.try &.to_i
run_count ||= 0
run_count >= @@rate_limit_ceiling
end
# Calculates the start of the rate limit window.
def window_start : Time?
if start_time = metadata["window_start"]?.try(&.to_i)
Time.unix start_time
end
end
# When does the current rate limit window expire?
# Returns nil if the window is already expired.
def window_expires_at : Time?
return nil unless started_window = window_start
expiration_time = started_window + @@rate_limit_interval
if expiration_time < Time.utc
nil
else
expiration_time
end
end
# Resets the run count and logs the start of window.
def update_window_start : Nil
started_window = window_start || Time::UNIX_EPOCH
now = Time.utc
if (now - started_window) > @@rate_limit_interval
metadata["window_start"] = now.to_unix.to_s
metadata["run_count"] = "0"
end
end
# Increments the run counter.
def increment_run_count : Nil
metadata.increment "run_count", by: increment_run_count_by
end
# How much the run counter should be incremented by.
# Implemented as a dynamic method so that it can easily be calculated by
# some other metric, eg api calls to a third party library.
def increment_run_count_by : Int32
@@rate_limit_increment
end
end
================================================
FILE: src/mosquito/redis_backend.cr
================================================
require "redis"
require "digest/sha1"
module Mosquito
module Scripts
SCRIPTS = {
:remove_matching_key => <<-LUA,
if redis.call("get",KEYS[1]) == ARGV[1] then
return redis.call("del",KEYS[1])
else
return 0
end
LUA
:renew_matching_key => <<-LUA
if redis.call("get",KEYS[1]) == ARGV[1] then
return redis.call("expire",KEYS[1],ARGV[2])
else
return 0
end
LUA
}
@@script_sha = {} of Symbol => String
def self.load(connection)
SCRIPTS.each do |name, script|
sha = @@script_sha[name] = connection.script_load script
Log.info { "loading script : #{name} => #{sha}" }
end
end
{% for name, script in SCRIPTS %}
@@script_sha[:{{ name.id }}] = Digest::SHA1.hexdigest({{ script }})
@[AlwaysInline]
def self.{{ name.id }}
@@script_sha[:{{ name.id }}]
end
{% end %}
end
class RedisBackend < Mosquito::Backend
LIST_OF_QUEUES_KEY = "queues"
LIST_OF_OVERSEERS_KEY = "overseers"
Log = ::Log.for(self)
{% for name, script in Scripts::SCRIPTS %}
def {{ name.id }}(*, keys = [] of String, args = [] of String, loadscripts = true)
script = {{ script }}
digest = Scripts.{{name.id}}
redis.evalsha digest, keys: keys, args: args
rescue exception : Redis::Error
raise exception unless exception.message.try(&.starts_with? "NOSCRIPT")
raise exception unless loadscripts
Log.for("{{ name.id }}").warn { "Redis Scripts have gone missing, reloading" }
Scripts.load redis
{{ name.id }} keys: keys, args: args, loadscripts: false
end
{% end %}
getter connection_string : String?
getter connection : ::Redis::Client?
def connection_string=(value : String)
@connection_string = value
@connection = ::Redis::Client.new(URI.parse(value))
Scripts.load(@connection.not_nil!)
end
def connection=(client : ::Redis::Client)
@connection = client
Scripts.load(client)
end
def valid_configuration? : Bool
!@connection.nil?
end
@[AlwaysInline]
def redis
@connection.not_nil!
end
protected def _build_queue(name : String) : Queue
Queue.new(self, name)
end
def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil
set key, value
end
def retrieve(key : String) : Hash(String, String)
result = redis.hgetall(key).as(Array).map(&.to_s)
result.in_groups_of(2, "").to_h
end
def delete(key : String, in ttl : Int64 = 0) : Nil
if ttl > 0
redis.expire key, ttl
else
redis.del key
end
end
def delete(key : String, in ttl : Time::Span) : Nil
delete key, ttl.to_i
end
def get(key : String, field : String) : String?
redis.hget(key, field).as?(String)
end
def set(key : String, field : String, value : String) : String
redis.hset key, field, value
value
end
def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil
redis.multi do |multi|
non_nil_key_values = values.compact
if non_nil_key_values.is_a?(Hash(String, String))
multi.hset key, non_nil_key_values
end
keys_for_nil_values = values.select{|_,v| v.nil?}.keys
keys_for_nil_values.each do |nil_key|
multi.hdel key, nil_key
end
end
end
def delete_field(key : String, field : String) : Nil
redis.hdel key, field
end
def increment(key : String, field : String) : Int64
increment key, field, by: 1
end
def increment(key : String, field : String, by value : Int32) : Int64
redis.hincrby(key, field, value).as(Int64)
end
def expires_in(key : String) : Int64
redis.ttl key
end
def list_queues : Array(String)
key = build_key(LIST_OF_QUEUES_KEY)
list_queues = redis.zrange(key, "0", "-1").as(Array)
return [] of String if list_queues.empty?
list_queues.compact_map(&.as(String))
end
def register_overseer(id : String) : Nil
key = build_key LIST_OF_OVERSEERS_KEY
expiring_list_push key, id
end
def deregister_overseer(id : String) : Nil
key = build_key LIST_OF_OVERSEERS_KEY
redis.zrem key, id
end
def list_overseers : Array(String)
key = build_key LIST_OF_OVERSEERS_KEY
expiring_list_fetch(key, Time.utc - 1.day)
end
def list_active_overseers(since : Time) : Array(String)
key = build_key LIST_OF_OVERSEERS_KEY
redis.zrangebyscore(key, since.to_unix.to_s, "+inf").as(Array).map(&.as(String))
end
# TODO: this should take the timestamp as an argument
def expiring_list_push(key : String, value : String) : Nil
redis.zadd key, Time.utc.to_unix.to_s, value
end
def expiring_list_fetch(key : String, expire_items_older_than : Time) : Array(String)
redis.zremrangebyscore key, "0", expire_items_older_than.to_unix.to_s
redis.zrange(key, "0", "-1").as(Array).map(&.as(String))
end
# is this even a good idea?
def flush : Nil
redis.flushdb
end
def lock?(key : String, value : String, ttl : Time::Span) : Bool
response = redis.set key, value, ex: ttl.to_i, nx: true
response == "OK"
end
def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool
result = renew_matching_key keys: [key], args: [value, ttl.to_i.to_s]
result == 1_i64
end
def unlock(key : String, value : String) : Nil
remove_matching_key keys: [key], args: [value]
end
def publish(key : String, value : String) : Nil
redis.publish key, value
end
def subscribe(key : String) : Channel(Backend::BroadcastMessage)
stream = Channel(Backend::BroadcastMessage).new
spawn do
redis.psubscribe(key) do |subscription, connection|
subscription.on_message do |channel, message|
if stream.closed?
connection.unsubscribe channel
else
stream.send(
Backend::BroadcastMessage.new(
channel: channel,
message: message
)
)
end
end
end
end
stream
end
def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil
redis.lpush key, [value.to_s]
redis.ltrim key, 0, window_size - 1
end
def average(key : String) : Int32
stats = redis.lrange key, 0, -1
return 0_i32 if stats.empty?
sum = stats.sum(0_i64) { |s| s.as(String).to_i64 }
(sum // stats.size).to_i32
end
class Queue < Backend::Queue
private getter redis_backend : RedisBackend
def initialize(backend : RedisBackend, name : String)
super(backend, name)
@redis_backend = backend
end
private def redis
redis_backend.redis
end
{% for q in QUEUES %}
private def {{q.id}}_q
backend.build_key {{q}}, name
end
{% end %}
def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun
redis.pipeline do |pipe|
pipe.zadd scheduled_q, scheduled_time.to_unix_ms.to_s, job_run.id
pipe.zadd backend.build_key(LIST_OF_QUEUES_KEY), Time.utc.to_unix.to_s, name
end
job_run
end
def deschedule : Array(JobRun)
time = Time.utc
overdue_job_runs = redis.zrangebyscore(scheduled_q, "0", time.to_unix_ms.to_s).as(Array)
return [] of JobRun if overdue_job_runs.empty?
overdue_job_runs.compact_map do |job_run_id|
redis.zrem scheduled_q, job_run_id.to_s
JobRun.retrieve job_run_id.as(String)
end
end
def enqueue(job_run : JobRun) : JobRun
redis.pipeline do |pipe|
pipe.lpush waiting_q, job_run.id
pipe.zadd backend.build_key(LIST_OF_QUEUES_KEY), Time.utc.to_unix.to_s, name
end
job_run
end
def dequeue : JobRun?
if id = redis.lmove waiting_q, pending_q, :right, :left
JobRun.retrieve id.to_s
end
end
def undequeue : JobRun?
if id = redis.rpop pending_q
redis.rpush waiting_q, id.to_s
JobRun.retrieve id.to_s
end
end
def finish(job_run : JobRun)
redis.lrem pending_q, 0, job_run.id
end
def terminate(job_run : JobRun)
redis.lpush dead_q, job_run.id
end
def flush : Nil
redis.del(
waiting_q,
pending_q,
scheduled_q,
dead_q
)
end
def size(include_dead = true) : Int64
queues = [waiting_q, pending_q]
queues << dead_q if include_dead
queue_size = queues
.map { |key| redis.llen(key).as(Int64) }
.reduce { |sum, i| sum + i }
scheduled_size = redis.zcount scheduled_q, "0", "+inf"
queue_size + scheduled_size.as(Int64)
end
{% for name in ["waiting", "scheduled", "pending", "dead"] %}
def list_{{name.id}} : Array(String)
key = {{name.id}}_q
type = redis.type key
if type == "list"
redis.lrange(key, "0", "-1").as(Array(Redis::Value)).map(&.as(String))
elsif type == "zset"
redis.zrange(key, "0", "-1").as(Array(Redis::Value)).map(&.as(String))
elsif type == "none"
[] of String
else
raise "don't know how to dump a #{type} for {{name.id}}"
end
end
def {{name.id}}_size : Int64
key = {{name.id}}_q
type = redis.type key
case type
when "list"
redis.llen(key).as(Int64)
when "zset"
redis.zcount(key, "0", "+inf").as(Int64)
when "none"
0_i64
else
raise "don't know how to {{name.id}}_size (redis type is a #{type})."
end
end
{% end %}
def scheduled_job_run_time(job_run : JobRun) : Time?
if score = redis.zscore(scheduled_q, job_run.id).as?(String)
Time.unix_ms(score.to_i64)
end
end
private def pause_key
backend.build_key "queue", name, "pause"
end
def pause(duration : Time::Span? = nil) : Nil
if duration
ms = {duration.total_milliseconds.to_i64, 1_i64}.max
redis.set pause_key, "1", px: ms
else
redis.set pause_key, "1"
end
end
def resume : Nil
redis.del pause_key
end
def paused? : Bool
redis.exists(pause_key) == 1
end
end
end
end
================================================
FILE: src/mosquito/resource_gate.cr
================================================
module Mosquito
# A ResourceGate controls whether work should be dequeued based on
# external resource availability (GPU utilization, CPU load, network
# bandwidth, etc.).
#
# Subclass `ResourceGate` and implement `#check` to test the resource.
# The result is cached for `sample_ttl` so expensive checks (shelling
# out to nvidia-smi, reading /sys, etc.) aren't repeated on every
# dequeue spin.
#
# ## Example
#
# ```crystal
# class GpuUtilizationGate < Mosquito::ResourceGate
# def initialize(@threshold : Float64 = 85.0)
# super(sample_ttl: 2.seconds)
# end
#
# protected def check : Bool
# current_gpu_utilization < @threshold
# end
# end
# ```
abstract class ResourceGate
getter sample_ttl : Time::Span
@last_result : Bool = true
@last_check_at : Time = Time::UNIX_EPOCH
def initialize(@sample_ttl : Time::Span = 2.seconds)
end
# Returns the cached result of `#check`, re-evaluating only after
# `sample_ttl` has elapsed since the last check.
def allow? : Bool
now = Time.utc
if now - @last_check_at >= @sample_ttl
@last_result = check
@last_check_at = now
end
@last_result
end
# Subclasses implement the actual resource check. Called at most
# once per `sample_ttl` interval.
protected abstract def check : Bool
# Called after a job finishes, in case the gate needs to update
# internal bookkeeping (e.g. decrement an in-flight counter).
def released(job_run : JobRun, queue : Queue) : Nil
end
end
end
================================================
FILE: src/mosquito/runnable.cr
================================================
require "wait_group"
module Mosquito
# Runnable implements a general purpose spawn/loop which carries a state
# enum.
#
# ## Managing a Runnable
#
# The primary purpose of Runnable is to cleanly abstract the details of
# spawning a thread, running a loop, and shutting down when asked.
#
# A service which manages a Runnable might look like this:
#
# ```crystal
# runnable = MyRunnable.new
#
# # This will spawn and return immediately.
# runnable.start
#
# puts runnable.state # => State::Working
#
# # Some time later...
# wg = WaitGroup.new(1)
# runnable.stop(wg)
# wg.wait
# ```
#
#
# ## Implementing a Runnable
#
# A runnable implementation needs to implement only two methods: #each_run
# and #runnable_name. In addition, pre_run and post_run are available for
# setup and teardown.
#
# Runnable state is managed automatically through startup and shutdown, but
# within each_run it can be manually altered with `#state=`.
#
# ### Example
#
# ```crystal
# class MyRunnable
# include Mosquito::Runnable
#
# # Optional
# def pre_run
# puts "my runnable is starting"
# end
#
# def each_run
# puts "my runnable is running"
# end
#
# # Optional
# def post_run
# puts "my runnable has stopped"
# end
#
# def runnable_name
# "MyRunnable"
# end
# end
# ```
#
# Implementation details about what work should be done in the spawned fiber
# are placed in #each_run.
#
module Runnable
enum State
Starting
Working
Idle
Stopping
Finished
Crashed
def running?
starting? || working? || idle?
end
# ie, not starting
def started?
working? || idle?
end
end
# Tracks the state of this runnable.
#
# Initially it will be `State::Starting`. After `#run` is called it will
# be `State::Working`.
#
# When `#stop` is called it will be `State::Stopping`. After `#run` finishes,
# it will be `State::Finished`.
#
# It is not necessary to set this manually, but it's available to an implementation
# if needed. See `Mosquito::Runners::Executor#state=` (source code) for an example.
getter state : State = State::Starting
# After #run has been called this holds a reference to the Fiber
# which is used to check that the fiber is still running.
getter fiber : Fiber?
# Signaled when the run loop exits (finished or crashed).
private getter done = Channel(Nil).new
getter my_name : String {
"#{self.class.name.underscore.gsub("::", ".")}.#{self.object_id}"
}
private getter log : ::Log { Log.for runnable_name }
private def state=(new_state : State)
# If the state is currently stopping, don't go back to idle.
if @state.stopping? && new_state.idle?
log.trace { "Ignoring state change to #{new_state} because state=stopping." }
return
end
@state = new_state
end
def dead? : Bool
if fiber_ = fiber
fiber_.dead?
else
false
end
end
# Start the Runnable, and capture the fiber to a property.
#
# The spawned fiber will not return as long as state.running?.
#
# State can be altered internally or externally to cause it to exit
# but the cleanest way to do that is to call #stop.
#
# By default, the run loop is spawned in a new fiber and control
# returns immediately. Pass `spawn: false` to run the loop directly
# in the current fiber (blocking until finished).
def run(*, spawn spawn_fiber = true)
if spawn_fiber
@fiber = spawn(name: runnable_name) do
run_loop
end
else
run_loop
end
end
private def run_loop
log.info { "starting" }
self.state = State::Working
pre_run
while state.running?
each_run
end
post_run
self.state = State::Finished
log.info { "stopped" }
rescue any_exception
self.state = State::Crashed
log.error { "crashed with #{any_exception.inspect}" }
ensure
done.close
end
# Request that the next time the run loop cycles it should exit instead.
# The runnable doesn't exit immediately so #stop spawns a fiber to
# monitor the state transition.
#
# Returns the `WaitGroup`, which will be decremented when the
# runnable has finished. This enables `runnable.stop.wait`.
#
# If a `WaitGroup` is provided, it will be decremented when the
# runnable has finished. This is useful when stopping multiple
# runnables and waiting for all of them to finish.
#
# Calling stop on a runnable that has already finished or crashed is a
# no-op (the wait_group is signaled immediately).
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
unless state.running? || state.stopping?
wait_group.done
return wait_group
end
self.state = State::Stopping if state.running?
spawn do
done.receive?
wait_group.done
end
wait_group
end
# Used to print a pretty name for logging.
abstract def runnable_name : String
# Implementation of what this Runnable should do on each cycle.
#
# Take care that @state is #running? at the end of the method
# unless it is finished and should exit.
abstract def each_run : Nil
# Available to hook a one time setup before the run loop.
def pre_run : Nil ; end
# Available to hook any teardown logic after the run loop.
def post_run : Nil ; end
end
end
================================================
FILE: src/mosquito/runner.cr
================================================
require "colorize"
module Mosquito
# This singleton class serves as a shorthand for starting and managing an Overseer.
#
# A minimal usage of Mosquito::Runner is:
#
# ```
# require "mosquito"
#
# # When the process receives sigint, it'll notify the overseer to shut down gracefully.
# trap("INT") do
# Mosquito::Runner.stop
# end
#
# # Starts the overseer, and holds the thread captive.
# Mosquito::Runner.start
# ```
#
# If for some reason you want to manage an overseer or group of overseers yourself, Mosquito::Runner can be omitted entirely:
#
# ```crystal
# require "mosquito"
#
# mosquito = Mosquito::Overseer.new
#
# # Spawns a mosquito managed fiber and returns immediately
# mosquito.run
#
# trap "INT" do
# wg = WaitGroup.new(1)
# mosquito.stop(wg)
# wg.wait
# end
# ```
class Runner
Log = ::Log.for self
# Start the mosquito runner.
#
# If spin = true (default) the function will not return until the runner is
# shut down. Otherwise it will return immediately.
#
def self.start(spin = true)
Log.notice { "Mosquito is buzzing..." }
if spin
instance.run(spawn: false)
else
instance.run
end
end
# :nodoc:
def self.keep_running : Bool
instance.state.starting? || instance.state.running? || instance.state.stopping?
end
# Request the mosquito runner stop. The runner will not abort the current job
# but it will not start any new jobs.
#
# See `Mosquito::Runnable#stop`.
def self.stop(wait = false)
return unless keep_running
Log.notice { "Mosquito is shutting down..." }
if wait
instance.stop.wait
else
instance.stop
end
end
def self.overseer
instance.overseer
end
private def self.instance : self
@@instance ||= new
end
# :nodoc:
delegate run, stop, state, to: @overseer
# :nodoc:
delegate running?, to: @overseer.state
# :nodoc:
getter overseer : Runners::Overseer
# :nodoc:
def initialize
Mosquito.configuration.validate
@overseer = Runners::Overseer.new
end
end
end
================================================
FILE: src/mosquito/runners/coordinator.cr
================================================
module Mosquito::Runners
# primer? loader? _scheduler_
class Coordinator
Log = ::Log.for self
LockTTL = 30.seconds
getter lock_key : String
getter instance_id : String
getter queue_list : QueueList
getter? is_leader : Bool = false
def initialize(@queue_list)
@lock_key = Mosquito.backend.build_key :coordinator, :leadership_lock
@instance_id = Random::Secure.hex(8)
end
def runnable_name : String
"coordinator.#{object_id}"
end
def post_run : Nil
release_leadership_lock
end
def schedule : Nil
only_if_coordinator do
enqueue_periodic_jobs
enqueue_delayed_jobs
end
end
def only_if_coordinator : Nil
unless Mosquito.configuration.use_distributed_lock
yield
return
end
maintain_leadership
if is_leader?
yield
end
end
# Releases the coordinator lease. Call during shutdown so another
# instance can take over immediately instead of waiting for the
# TTL to expire.
def release_leadership_lock : Nil
return unless is_leader?
Mosquito.backend.unlock lock_key, instance_id
@is_leader = false
Log.info { "Coordinator lease released" }
end
def enqueue_periodic_jobs
Base.scheduled_job_runs.each do |scheduled_job_run|
enqueued = scheduled_job_run.try_to_execute
end
end
def enqueue_delayed_jobs
queue_list.each do |q|
overdue_jobs = q.dequeue_scheduled
next unless overdue_jobs.any?
Log.for("enqueue_delayed_jobs").info { "#{overdue_jobs.size} delayed jobs ready in #{q.name}" }
overdue_jobs.each do |job_run|
q.enqueue job_run
end
end
end
private def maintain_leadership : Nil
if is_leader?
unless Mosquito.backend.renew_lock? lock_key, instance_id, LockTTL
Log.info { "Lost coordinator lease" }
@is_leader = false
try_acquire
end
else
try_acquire
end
end
private def try_acquire : Nil
if Mosquito.backend.lock? lock_key, instance_id, LockTTL
Log.info { "Coordinator lease acquired" }
@is_leader = true
end
end
end
end
================================================
FILE: src/mosquito/runners/executor.cr
================================================
require "./run_at_most"
require "../runnable"
module Mosquito::Runners
# The executor is the center of work in Mosquito, and it's is the demarcation
# point between Mosquito framework and application code. Above the Executor
# is entirely Mosquito, and below it is application code.
#
# An Executor is responsible for hydrating Job classes with deserialized
# parameters and calling `Mosquito::Job#run` on them. It measures the time it
# takes to run a job and provides detailed log messages about the current
# status.
#
# An executor is a `Mosquito::Runnable` and should be interacted with according to
# the Runnable API.
#
# To build an executor, provide a job input channel and an idle bell channel. These
# channels can be shared between all available executors.
#
# The executor will ring the idle bell when it is ready to accept work and then wait
# for work to show up on the job pipeline. After the job is finished it will ring the
# bell again and wait for more work.
class Executor
include RunAtMost
include Runnable
# How long a job config is persisted after success
property successful_job_ttl : Int32 { Mosquito.configuration.successful_job_ttl }
# How long a job config is persisted after failure
property failed_job_ttl : Int32 { Mosquito.configuration.failed_job_ttl }
# Where work is received from the overseer.
getter job_pipeline : Channel(WorkUnit)
getter! work_unit : WorkUnit
# Used to notify the overseer when this executor is idle.
# Sends the {JobRun, Queue} tuple that was just finished, or nil
# when the executor first starts up.
getter finished_bell : Channel(WorkUnit?)
getter overseer : Overseer
getter observer : Observability::Executor {
Observability::Executor.new self
}
getter? decommissioned : Bool = false
@stop_channel = Channel(Nil).new(1)
# Marks this executor for graceful shutdown. It will stop after
# completing its current job (if any).
def decommission!
return if @decommissioned
@decommissioned = true
@stop_channel.send(nil)
end
private def job_run : JobRun
work_unit.job_run
end
private def queue : Queue
work_unit.queue
end
private def state=(state : State)
# Send a message to the overseer that this executor is idle,
# including the job that was just finished (if any).
if state == State::Idle
spawn { finished_bell.send @work_unit }
end
super
end
def initialize(@overseer : Overseer)
@job_pipeline = overseer.work_handout
@finished_bell = overseer.finished_notifier
end
# :nodoc:
def runnable_name : String
"executor.#{object_id}"
end
# :nodoc:
def pre_run : Nil
# Overseer won't try to dequeue and send any jobs unless it
# knows that an executor is idle, so the first thing to do
# is mark this executor as idle. See #state=.
self.state = State::Idle
end
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
decommission!
super
end
# :nodoc:
def each_run : Nil
if @decommissioned
self.state = State::Stopping
return
end
dequeue : WorkUnit? = nil
begin
select
when dequeue = job_pipeline.receive
when @stop_channel.receive
self.state = State::Stopping
return
end
rescue Channel::ClosedError
return
end
return unless dequeue
self.state = State::Working
@work_unit = dequeue
log.trace { "Dequeued #{job_run} from #{queue.name}" }
begin
execute
rescue e
log.error { "Crashed executing #{job_run}: #{e.inspect}" }
begin
job_run.retry_or_banish queue
rescue
queue.banish job_run
end
end
log.trace { "Finished #{job_run} from #{queue.name}" }
if @decommissioned
self.state = State::Stopping
return
end
self.state = State::Idle
observer.heartbeat!
end
# Runs a job from a Queue.
#
# Execution time is measured and logged, and the job is either forgotten
# or, if it fails, rescheduled.
def execute
observer.execute job_run, queue do
job_run.run
end
if job_run.succeeded?
queue.forget job_run
job_run.delete in: successful_job_ttl
elsif job_run.preempted?
queue.forget job_run
queue.enqueue job_run
else
if job_run.rescheduleable?
next_execution = Time.utc + job_run.reschedule_interval
queue.reschedule job_run, next_execution
else
queue.banish job_run
job_run.delete in: failed_job_ttl
end
end
end
end
end
================================================
FILE: src/mosquito/runners/idle_wait.cr
================================================
module Mosquito::Runners
module IdleWait
def with_idle_wait(idle_wait : Time::Span)
delta = Time.measure do
yield
end
if delta < idle_wait
# Fiber.timeout(idle_wait - delta)
sleep(idle_wait - delta)
end
end
end
end
================================================
FILE: src/mosquito/runners/overseer.cr
================================================
require "./idle_wait"
require "./queue_list"
require "./run_at_most"
require "../runnable"
module Mosquito::Runners
# The Overseer is responsible for managing:
# - a `Coordinator`
# - an `Executor`
# - the `QueueList`
# - any idle state as configured
#
# An overseer manages the loop that each thread or process runs.
class Overseer
include IdleWait
include RunAtMost
include Runnable
getter observer : Observability::Overseer { Observability::Overseer.new(self) }
getter queue_list : QueueList
getter executors
getter coordinator
getter dequeue_adapter : Mosquito::DequeueAdapter
# The channel where job runs which have been dequeued are sent to executors.
getter work_handout
# When an executor transitions to idle it will send the finished
# {JobRun, Queue} tuple here (or nil on first idle). The Overseer
# uses this as a signal to check the queues for more work.
getter finished_notifier
# The number of executors to start.
getter executor_count : Int32
def executor_count=(count : Int32)
@executor_count = Math.max(count, 1)
end
getter idle_wait : Time::Span
def initialize
@executor_count = Mosquito.configuration.executor_count
@idle_wait = Mosquito.configuration.idle_wait
@finished_notifier = Channel(WorkUnit?).new
@queue_list = QueueList.new
@queue_list.resource_gates = Mosquito.configuration.resource_gates
@coordinator = Coordinator.new queue_list
@dequeue_adapter = Mosquito.configuration.dequeue_adapter
@executors = [] of Executor
@work_handout = Channel(WorkUnit).new
executor_count.times do
@executors << build_executor
end
observer.update_executor_list executors
end
def build_executor : Executor
Executor.new(overseer: self).tap do |executor|
observer.executor_created executor
end
end
def runnable_name : String
"overseer"
end
def sleep
log.trace { "Going to sleep now for #{idle_wait}" }
sleep idle_wait
end
# Starts all the subprocesses.
def pre_run : Nil
observer.starting
@queue_list.run
@executors.each(&.run)
end
def stop(wait_group : WaitGroup = WaitGroup.new(1)) : WaitGroup
observer.shutting_down if state.running?
super
end
# Notify all subprocesses to stop, and wait until they do.
# After executors finish, any jobs left in the pending queue are
# moved back to waiting so another worker can pick them up.
def post_run : Nil
observer.stopping
coordinator.post_run
child_fiber_shutdown = WaitGroup.new(executors.size + 1)
executors.each { |e| e.stop(child_fiber_shutdown) }
@queue_list.stop(child_fiber_shutdown)
work_handout.close
child_fiber_shutdown.wait
observer.stopped
end
# The goal for the overseer is to:
# - Ensure that the coordinator gets run frequently to schedule delayed/periodic jobs.
# - Wait for an executor to be idle, and dequeue work if possible.
# - Monitor the executor pool for unexpected termination and respawn.
def each_run : Nil
# When shutting down, stop dequeuing new work immediately.
return if state.stopping?
coordinator.schedule
# I cannot imagine a situation where this happens in the normal flow of
# events, but if it did it would be a mess. If something crashes hard
# enough that one of these channels closes the whole thing is going to
# come crashing down and we should just quit now.
if work_handout.closed? || finished_notifier.closed?
observer.channels_closed
stop
return
end
# If the queue list hasn't run at least once, it won't have any queues to
# search for so we'll just defer until it's available.
unless queue_list.state.started?
observer.waiting_for_queue_list
return
end
log.trace { "Waiting for an idle executor" }
all_executors_busy = true
# This feature is under documented in the crystal manual.
# This will attempt to receive from a the idle notifier, but only
# wait for up to idle_wait seconds.
#
# The interrupt is necessary to remind the coordinator to schedule
# jobs.
select
when finished_job = @finished_notifier.receive
log.trace { "Found an idle executor" }
all_executors_busy = false
if finished_job
dequeue_adapter.finished_with(finished_job.job_run, finished_job.queue)
queue_list.notify_released(finished_job.job_run, finished_job.queue)
end
when timeout(idle_wait)
log.trace { "Idled for #{idle_wait.total_seconds}s" }
end
case
when state.stopping?
# If none of the executors is idle, don't dequeue anything or it'll get lost.
when all_executors_busy
log.trace { "No idle executors" }
# We know that an executor is idle and will take the work, it's safe to dequeue.
when next_job_run = dequeue_job?
log.trace { "Dequeued job: #{next_job_run.job_run.id} #{next_job_run.queue.name}" }
work_handout.send next_job_run
# An executor is idle, but dequeue returned nil.
else
log.trace { "No job to dequeue" }
sleep
# The idle notification has been consumed, and it needs to be
# re-sent so that the next loop can still find the idle executor.
spawn { @finished_notifier.send nil }
end
maybe_apply_remote_executor_count
adjust_executor_pool
run_at_most every: Mosquito.configuration.heartbeat_interval, label: :heartbeat do
observer.heartbeat
end
run_at_most every: Mosquito.configuration.heartbeat_interval * 3, label: :pending_cleanup do
cleanup_orphaned_pending_jobs
end
end
# Delegates job dequeue to the configured `DequeueAdapter`.
#
# The adapter can be swapped via `Mosquito.configuration.dequeue_adapter`
# to implement custom strategies (priority, round-robin, rate limiting, etc).
def dequeue_job? : WorkUnit?
if result = dequeue_adapter.dequeue(queue_list)
result.job_run.claimed_by self
end
result
end
# When a job fails any exceptions are caught and logged. If a job causes something more
# catastrophic we can try to recover by spawning a new executor.
#
# This happens, for example, when a new version of a worker is deployed and work is still
# in the queue that references job classes that no longer exist.
#
# When a dead executor is found, any job it was working on has its
# failure counter incremented and follows the standard retry logic.
def adjust_executor_pool : Nil
# Remove dead/crashed executors and recover their jobs.
executors.select {|executor| executor.dead? || executor.state.crashed? }
.each do |dead_executor|
observer.executor_died dead_executor
recover_job_from dead_executor
executors.delete dead_executor
end
# Scale up: spawn new executors to reach the target count.
(executor_count - executors.size).times do
executors << build_executor.tap(&.run)
end
# Scale down: decommission excess executors and remove them from the pool.
# They will finish their current job (if any) and then stop.
while executors.size > executor_count
executors.pop.decommission!
end
observer.update_executor_list executors
if queue_list.dead?
observer.queue_list_died
stop
end
end
# Scans pending queues for jobs owned by overseers that are no longer
# alive. Each orphaned job has its failure counter incremented and
# follows the standard retry logic.
#
# An overseer is considered alive if it has registered a heartbeat
# within the configured dead_overseer_threshold. Jobs with no overseer_id (pre-
# dating this feature) are claimed by this overseer so they become
# recoverable when this overseer later dies.
# :nodoc:
def cleanup_orphaned_pending_jobs : Nil
live_overseers = Mosquito.backend.list_active_overseers(
since: Time.utc - Mosquito.configuration.dead_overseer_threshold
).to_set
queue_names = Mosquito.backend.list_queues
return if queue_names.empty?
total = 0
queue_names.each do |name|
q = Queue.new(name)
q.backend.list_pending.each do |job_run_id|
job_run = JobRun.retrieve(job_run_id)
unless job_run
# Job config is gone (expired/deleted), just clean up the
# dangling reference in the pending queue.
q.backend.finish JobRun.new("_cleanup", id: job_run_id)
total += 1
next
end
# Jobs without an overseer_id predate this feature. Claim them
# so a future cleanup cycle can detect if this overseer dies.
unless oid = job_run.overseer_id
job_run.claimed_by self
next
end
next if live_overseers.includes?(oid)
observer.recovered_orphaned_job job_run, oid
begin
job_run.retry_or_banish q
rescue e : KeyError
log.warn { "Skipping orphaned job #{job_run_id}: #{e.message}" }
q.banish job_run
end
total += 1
end
end
if total > 0
observer.orphaned_jobs_recovered total
end
end
# Polls the backend for a remote executor count override and applies
# it when present. Checks at most once per heartbeat interval.
# The resolved value follows the precedence: per-overseer → global → current.
private def maybe_apply_remote_executor_count : Nil
run_at_most every: Mosquito.configuration.heartbeat_interval, label: :remote_executor_count do
overseer_id = Mosquito.configuration.overseer_id
if remote_count = Api::ExecutorConfig.resolve(overseer_id)
clamped = Math.max(remote_count, 1)
if clamped != executor_count
log.info { "Remote executor count changed: #{executor_count} → #{clamped}" }
self.executor_count = clamped
end
end
rescue ex
log.warn { "Failed to fetch remote executor count: #{ex.message}" }
end
end
# If a dead executor was working on a job, increment its failure
# counter and follow the standard retry logic.
private def recover_job_from(dead_executor : Executor) : Nil
return unless work_unit = dead_executor.work_unit?
observer.recovered_job_from_executor work_unit.job_run, dead_executor
dequeue_adapter.finished_with(work_unit.job_run, work_unit.queue)
work_unit.job_run.retry_or_banish work_unit.queue
end
end
end
================================================
FILE: src/mosquito/runners/queue_list.cr
================================================
require "./run_at_most"
require "../runnable"
require "./idle_wait"
require "../resource_gate"
module Mosquito::Runners
# QueueList handles searching the redis keyspace for named queues.
class QueueList
include RunAtMost
include Runnable
include IdleWait
getter observer : Observability::QueueList { Observability::QueueList.new(self) }
# Maps queue names to resource gates. Queues not present in this
# mapping are always eligible for dequeuing.
property resource_gates : Hash(String, ResourceGate) = {} of String => ResourceGate
def initialize
@discovered_queues = [] of Queue
end
# Returns the queues eligible for dequeuing: discovered queues
# filtered by any configured resource gates.
def queues : Array(Queue)
return @discovered_queues if resource_gates.empty?
@discovered_queues.select do |q|
gate = resource_gates[q.name]?
gate.nil? || gate.allow?
end
end
def runnable_name : String
"queue-list"
end
# Notifies the resource gate for the given queue that a job has
# finished, allowing it to update internal bookkeeping.
def notify_released(job_run : JobRun, queue : Queue) : Nil
if gate = resource_gates[queue.name]?
gate.released(job_run, queue)
end
end
delegate each, to: queues
def each_run : Nil
# This idle wait should be at most 1 second. Longer can cause periodic jobs
# which are specified at the second-level to be executed aperiodically.
# Shorter will generate excess noise in the redis connection.
with_idle_wait(1.seconds) do
@state = State::Working
candidate_queues = Mosquito.backend.list_queues.map { |name| Queue.new name }
new_queue_list = filter_queues candidate_queues
paused, new_queue_list = new_queue_list.partition(&.paused?)
observer.checked_for_paused_queues paused
log.notice {
queues_which_were_expected_but_not_found = @discovered_queues - new_queue_list
queues_which_have_never_been_seen = new_queue_list - @discovered_queues
if queues_which_have_never_been_seen.size > 0
"found #{queues_which_have_never_been_seen.size} new queues: #{queues_which_have_never_been_seen.map(&.name).join(", ")}"
end
}
@discovered_queues = new_queue_list
@state = State::Idle
end
end
private def filter_queues(present_queues : Array(Mosquito::Queue))
permitted_queues = Mosquito.configuration.run_from
return present_queues if permitted_queues.empty?
filtered_queues = present_queues.select do |queue|
permitted_queues.includes? queue.name
end
log.for("filter_queues").notice {
if filtered_queues.empty?
filtered_out_queues = present_queues - filtered_queues
if filtered_out_queues.size > 0
"No watchable queues found. Ignored #{filtered_out_queues.size} queues not configured to be watched: #{filtered_out_queues.map(&.name).join(", ")}"
end
end
}
filtered_queues
end
end
end
================================================
FILE: src/mosquito/runners/run_at_most.cr
================================================
module Mosquito::Runners
module RunAtMost
getter execution_timestamps = {} of Symbol => Time::Instant
private def run_at_most(*, every interval, label name, &block)
now = Time.instant
last_execution = @execution_timestamps[name]?
if last_execution.nil? || (now - last_execution) >= interval
@execution_timestamps[name] = now
yield now
end
end
end
end
================================================
FILE: src/mosquito/scheduled_job.cr
================================================
module Mosquito
abstract class ScheduledJob < Job
def initialize
end
abstract def build_job_run
macro inherited
Mosquito::Base.register_job_mapping job_name, {{ @type.id }}
def build_job_run
job_run = Mosquito::JobRun.new(job_name)
end
macro run_at(time)
Mosquito::Base.register_job \{{ @type.id }}, to_run_at: time
end
end
def rescheduleable?
false
end
end
end
================================================
FILE: src/mosquito/serializers/primitives.cr
================================================
module Mosquito::Serializers::Primitives
def serialize_string(str : String) : String
str
end
def deserialize_string(raw : String) : String
raw
end
def serialize_bool(value : Bool) : String
value.to_s
end
def deserialize_bool(raw : String) : Bool
raw == "true"
end
def serialize_symbol(sym : Symbol) : Nil
raise "Symbols cannot be deserialized. Stringify your symbol first to pass it as a mosquito job parameter."
end
def serialize_char(char : Char) : String
char.to_s
end
def deserialize_char(raw : String) : Char
raw[0]
end
def serialize_uuid(uuid : UUID) : String
uuid.to_s
end
def deserialize_uuid(raw : String) : UUID
UUID.new raw
end
{% begin %}
{%
primitives = [
{ Int8, :to_i8 },
{ Int16, :to_i16 },
{ Int32, :to_i32 },
{ Int64, :to_i64 },
{ Int128, :to_i128 },
{ UInt8, :to_u8 },
{ UInt16, :to_u16 },
{ UInt32, :to_u32 },
{ UInt64, :to_u64 },
{ UInt128, :to_u128 },
{ Float32, :to_f32 },
{ Float64, :to_f64 }
]
%}
{% for mapping in primitives %}
{%
type = mapping.first
method_suffix = type.stringify.underscore
method = mapping.last
%}
def serialize_{{ method_suffix.id }}(value) : String
value.to_s
end
def deserialize_{{ method_suffix.id }}(raw : String) : {{ type.id }}?
if raw
raw.{{ method.id }}
end
end
{% end %}
{% end %}
end
================================================
FILE: src/mosquito/test_backend.cr
================================================
module Mosquito
# An in-memory noop backend desigend to be used in application testing.
#
# The test mode backend simply makes a copy of job_runs at enqueue time and holds them in a class getter array.
#
# Job run id, config (aka parameters), and runtime class are kept in memory, and a truncate utility function is provided.
#
# Activate test mode configure the test backend like this:
#
# ```
# Mosquito.configure do |settings|
# settings.backend = Mosquito::TestBackend.new
# end
# ```
#
# Then in your tests:
#
# ```
# describe "testing" do
# it "enqueues the job" do
# # build and enqueue a job
# job_run = EchoJob.new(text: "hello world").enqueue
#
# # assert that the job was enqueued
# lastest_enqueued_job = Mosquito::TestBackend.enqueued_jobs.last
#
# # check the job config
# assert_equal "hello world", latest_enqueued_job.config["text"]
#
# # check the job_id matches
# assert_equal job_run.id, latest_enqueued_job.id
#
# # optionally, truncate the history
# Mosquito::TestBackend.flush_enqueued_jobs!
# end
# end
# ```
class TestBackend < Mosquito::Backend
def connection
nil
end
getter connection_string : String?
def connection_string=(value : String)
@connection_string = value
end
def valid_configuration? : Bool
true
end
def store(key : String, value : Hash(String, String?) | Hash(String, String)) : Nil
end
def retrieve(key : String) : Hash(String, String)
{} of String => String
end
def list_queues : Array(String)
[] of String
end
def list_overseers : Array(String)
[] of String
end
def list_active_overseers(since : Time) : Array(String)
[] of String
end
def register_overseer(id : String) : Nil
end
def deregister_overseer(id : String) : Nil
end
def delete(key : String, in ttl : Int64 = 0) : Nil
end
def delete(key : String, in ttl : Time::Span) : Nil
end
def expires_in(key : String) : Int64
0_i64
end
def get(key : String, field : String) : String?
end
def set(key : String, field : String, value : String) : String
""
end
def set(key : String, values : Hash(String, String?) | Hash(String, Nil) | Hash(String, String)) : Nil
end
def delete_field(key : String, field : String) : Nil
end
def increment(key : String, field : String) : Int64
0_i64
end
def increment(key : String, field : String, by value : Int32) : Int64
0_i64
end
def flush : Nil; end
def lock?(key : String, value : String, ttl : Time::Span) : Bool
false
end
def renew_lock?(key : String, value : String, ttl : Time::Span) : Bool
false
end
def unlock(key : String, value : String) : Nil
end
def publish(key : String, value : String) : Nil
end
def subscribe(key : String) : Channel(BroadcastMessage)
Channel(BroadcastMessage).new
end
def average_push(key : String, value : Int32, window_size : Int32 = 100) : Nil
end
def average(key : String) : Int32
0_i32
end
protected def _build_queue(name : String) : Queue
Queue.new(self, name)
end
struct EnqueuedJob
getter id : String
getter klass : Mosquito::Job.class
getter config : Hash(String, String)
def self.from(job_run : JobRun)
job_class = Mosquito::Base.job_for_type(job_run.type)
new(
job_run.id,
job_class,
job_run.config
)
end
def initialize(@id, @klass, @config)
end
end
class_property enqueued_jobs = [] of EnqueuedJob
def self.flush_enqueued_jobs!
@@enqueued_jobs = [] of EnqueuedJob
end
class Queue < Backend::Queue
def enqueue(job_run : JobRun) : JobRun
TestBackend.enqueued_jobs << EnqueuedJob.from(job_run)
job_run
end
def dequeue : JobRun?
raise "Mosquito: attempted to dequeue a job from the testing backend."
end
def schedule(job_run : JobRun, at scheduled_time : Time) : JobRun
job_run
end
def deschedule : Array(JobRun)
raise "Mosquito: attempted to deschedule a job from the testing backend."
end
def undequeue : JobRun?
raise "Mosquito: attempted to undequeue a job from the testing backend."
end
def finish(job_run : JobRun)
end
def terminate(job_run : JobRun)
end
def flush : Nil
end
def size(include_dead : Bool = true) : Int64
0_i64
end
{% for name in ["waiting", "scheduled", "pending", "dead"] %}
def list_{{name.id}} : Array(String)
[] of String
end
def {{name.id}}_size : Int64
0_i64
end
{% end %}
def scheduled_job_run_time(job_run : JobRun) : Time?
end
@@paused_queues = Set(String).new
def self.flush_paused_queues!
@@paused_queues.clear
end
def pause(duration : Time::Span? = nil) : Nil
@@paused_queues.add name
end
def resume : Nil
@@paused_queues.delete name
end
def paused? : Bool
@@paused_queues.includes? name
end
end
end
end
================================================
FILE: src/mosquito/unique_job.cr
================================================
module Mosquito::UniqueJob
module ClassMethods
# Configures job uniqueness for this job.
#
# `duration` controls how long the uniqueness lock is held. After this
# period expires, the same job can be enqueued again.
#
# `key` is an array of parameter names (as strings) used to compute the
# uniqueness key. When omitted, all parameters are used by default.
#
# ```
# class SendEmailJob < Mosquito::QueuedJob
# include Mosquito::UniqueJob
#
# unique_for 1.hour
#
# param user_id : Int64
# param email_type : String
#
# def perform
# # ...
# end
# end
# ```
#
# With a key filter:
#
# ```
# class SendEmailJob < Mosquito::QueuedJob
# include Mosquito::UniqueJob
#
# unique_for 1.hour, key: [:user_id, :email_type]
#
# param user_id : Int64
# param email_type : String
# param metadata : String
#
# def perform
# # ...
# end
# end
# ```
def unique_for(duration : Time::Span)
@@unique_duration = duration
end
end
macro included
extend ClassMethods
@@unique_duration : Time::Span = 0.seconds
@@unique_key_fields : Array(String)? = nil
# Configures job uniqueness with an optional key filter.
#
# When `key` is provided, only the specified parameter names are used
# to build the uniqueness fingerprint. When omitted, all parameters
# are included.
macro unique_for(duration, key = nil)
@@unique_duration = \{{ duration }}
\{% if key %}
@@unique_key_fields = [
\{% for k in key %}
\{{ k.id.stringify }},
\{% end %}
]
\{% else %}
@@unique_key_fields = nil
\{% end %}
end
before_enqueue do
if @@unique_duration.total_seconds > 0
key = uniqueness_key(job)
lock_value = job.id
acquired = Mosquito.backend.lock?(key, lock_value, @@unique_duration)
unless acquired
Log.info { "Duplicate job suppressed: #{self.class.name} (key: #{key})" }
false
else
true
end
else
true
end
end
end
# Builds the uniqueness key from the job name and the job_run's config.
#
# When `@@unique_key_fields` is set, only those parameter names are
# included in the key. Otherwise all config entries are used.
def uniqueness_key(job_run : Mosquito::JobRun) : String
parts = [] of String
parts << self.class.job_name
key_fields = @@unique_key_fields
job_run.config.keys.sort.each do |param_name|
if key_fields.nil? || key_fields.includes?(param_name)
parts << "#{param_name}=#{job_run.config[param_name]}"
end
end
fingerprint = parts.join(":")
Mosquito.backend.build_key "unique_job", fingerprint
end
# Returns the uniqueness lock duration configured for this job class.
def unique_duration : Time::Span
@@unique_duration
end
end
================================================
FILE: src/mosquito/version.cr
================================================
module Mosquito
VERSION = "2.0.0"
end
================================================
FILE: src/mosquito.cr
================================================
require "./mosquito/runners/run_at_most"
require "./mosquito/api"
require "./mosquito/**"
module Mosquito
Log = ::Log.for self
def self.backend
configuration.backend
end
end
================================================
FILE: src/ye_olde_redis.cr
================================================
# Monkeypatch to revert to the old Redis behavior, for Redis servers pre 6.2 which don't support
# https://redis.io/docs/latest/commands/lmove/
module Mosquito
class RedisBackend < Mosquito::Backend
class Queue < Backend::Queue
def dequeue : JobRun?
if id = redis.rpoplpush waiting_q, pending_q
JobRun.retrieve id.to_s
end
end
end
end
end