Repository: stitchfix/flotilla-os
Branch: master
Commit: d16becadf8cb
Files: 224
Total size: 1.1 MB

Directory structure:
gitextract_unzartjt/

├── .circleci/
│   └── config.yml
├── .github/
│   ├── CODEOWNERS
│   └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .migrations/
│   ├── V20200123054713__initial_table_create.sql
│   ├── V20200123054714__add_spark_extension.sql
│   ├── V20200205133700__executable.sql
│   ├── V20200206115000__template.sql
│   ├── V20200210154600__template_refactor.sql
│   ├── V20200211160100__task_col_fix.sql
│   ├── V20200211161900__template_indicies.sql
│   ├── V20200212101900__template.sql
│   ├── V20200213101400__task_indexes.sql
│   ├── V20200213125200__rename_default_payload.sql
│   ├── V20200225125200__add_limits.sql
│   ├── V20200325125200__add_attempts.sql
│   ├── V20200325125201__add_spawned.sql
│   ├── V20200625125201__add_run_exceptions.sql
│   ├── V20210083054714__metrics_uri.sql
│   ├── V20210427125201__add_active_deadline_seconds.sql
│   ├── V20210807125201__drop_index_container_name.sql
│   ├── V20211007125201__add_description.sql
│   ├── V20220907125201__add_idempotence.sql
│   ├── V20220907125202__add_arch.sql
│   ├── V20221215125203__add_labels.sql
│   ├── V20230718115000__add_ephemeral_storage.sql
│   ├── V20231013191711__add_requires_docker.sql
│   ├── V20231122141100__add_target_cluster.sql
│   ├── V20240205132100__add_service_account.sql
│   ├── V20250122141100__add_cluster_routing.sql
│   └── dev.conf
├── ARA_METRICS_COMPARISON.md
├── Dockerfile
├── LICENSE
├── README.html
├── README.md
├── ara-impact-report-staging.md
├── ara-impact-report.md
├── clients/
│   ├── cluster/
│   │   ├── cluster.go
│   │   └── eks_cluster_client.go
│   ├── httpclient/
│   │   ├── client.go
│   │   └── client_test.go
│   ├── logs/
│   │   ├── eks_cloudwatch_logs_client.go
│   │   ├── eks_s3_logs_client.go
│   │   └── logs.go
│   ├── metrics/
│   │   ├── datadog_metrics_client.go
│   │   └── metrics.go
│   └── middleware/
│       └── client.go
├── conf/
│   └── config.yml
├── config/
│   ├── config.go
│   └── config_test.go
├── datadog-ara-dashboard-api.json
├── docker-compose.yml
├── docs/
│   ├── ara-command-hash-bug-report.md
│   ├── ara-command-hash-fix-locations.md
│   ├── ara-command-hash-history.md
│   ├── ara-instrumentation.md
│   └── ara.md
├── exceptions/
│   └── errors.go
├── execution/
│   ├── adapter/
│   │   ├── eks_adapter.go
│   │   └── eks_adapter_test.go
│   └── engine/
│       ├── dcm.go
│       ├── eks_engine.go
│       ├── emr_engine.go
│       └── engine.go
├── flotilla/
│   ├── app.go
│   ├── endpoints.go
│   ├── endpoints_test.go
│   └── router.go
├── go.mod
├── go.sum
├── log/
│   ├── event.go
│   ├── event_test.go
│   ├── logger.go
│   └── logger_test.go
├── main.go
├── queue/
│   ├── manager.go
│   ├── sqs_manager.go
│   └── sqs_manager_test.go
├── services/
│   ├── definition.go
│   ├── definition_test.go
│   ├── execution.go
│   ├── execution_test.go
│   ├── logs.go
│   ├── logs_test.go
│   ├── template.go
│   └── worker.go
├── state/
│   ├── manager.go
│   ├── models.go
│   ├── models_test.go
│   ├── pg_queries.go
│   ├── pg_state_manager.go
│   └── pg_state_manager_test.go
├── testutils/
│   └── mocks.go
├── tracing/
│   └── tracing.go
├── ui/
│   ├── .gitignore
│   ├── .prettierrc
│   ├── Dockerfile
│   ├── README.md
│   ├── package.json
│   ├── public/
│   │   └── index.html
│   ├── src/
│   │   ├── api.ts
│   │   ├── components/
│   │   │   ├── ARASwitch.tsx
│   │   │   ├── App.tsx
│   │   │   ├── Attribute.tsx
│   │   │   ├── AutoscrollSwitch.tsx
│   │   │   ├── BaseTaskForm.tsx
│   │   │   ├── CloudtrailRecords.tsx
│   │   │   ├── ClusterSelect.tsx
│   │   │   ├── CreateTaskForm.tsx
│   │   │   ├── DeleteTaskButton.tsx
│   │   │   ├── Duration.tsx
│   │   │   ├── EngineTag.tsx
│   │   │   ├── EnvFieldArray.tsx
│   │   │   ├── EnvList.tsx
│   │   │   ├── EnvQueryFilter.tsx
│   │   │   ├── ErrorCallout.tsx
│   │   │   ├── FieldError.tsx
│   │   │   ├── GenericMultiSelect.tsx
│   │   │   ├── GroupNameSelect.tsx
│   │   │   ├── ISO8601AttributeValue.tsx
│   │   │   ├── ListFiltersDropdown.tsx
│   │   │   ├── ListRequest.tsx
│   │   │   ├── Log.tsx
│   │   │   ├── LogProcessor.tsx
│   │   │   ├── LogRequesterCloudWatchLogs.tsx
│   │   │   ├── LogRequesterS3.tsx
│   │   │   ├── LogVirtualized.tsx
│   │   │   ├── LogVirtualizedRow.tsx
│   │   │   ├── LogVirtualizedSearch.tsx
│   │   │   ├── Navigation.tsx
│   │   │   ├── NodeLifecycleSelect.tsx
│   │   │   ├── Pagination.tsx
│   │   │   ├── QueryParams.tsx
│   │   │   ├── Request.tsx
│   │   │   ├── ResourceUsageValue.tsx
│   │   │   ├── Run.tsx
│   │   │   ├── RunAttributes.tsx
│   │   │   ├── RunDebugAttributes.tsx
│   │   │   ├── RunEvents.tsx
│   │   │   ├── RunSidebar.tsx
│   │   │   ├── RunStatusSelect.tsx
│   │   │   ├── RunTag.tsx
│   │   │   ├── Runs.tsx
│   │   │   ├── SettingsButton.tsx
│   │   │   ├── SortableTh.tsx
│   │   │   ├── StopRunButton.tsx
│   │   │   ├── Table.tsx
│   │   │   ├── TagsSelect.tsx
│   │   │   ├── Task.tsx
│   │   │   ├── TaskDetails.tsx
│   │   │   ├── TaskExecutionForm.tsx
│   │   │   ├── TaskRuns.tsx
│   │   │   ├── Tasks.tsx
│   │   │   ├── Template.tsx
│   │   │   ├── TemplateDetails.tsx
│   │   │   ├── TemplateExecutionForm.tsx
│   │   │   ├── TemplateHistoryTable.tsx
│   │   │   ├── TemplateRunForm.tsx
│   │   │   ├── Templates.tsx
│   │   │   ├── Toaster.ts
│   │   │   ├── Toggler.tsx
│   │   │   ├── UpdateTaskForm.tsx
│   │   │   ├── ViewHeader.tsx
│   │   │   └── __tests__/
│   │   │       ├── BaseTaskForm.spec.tsx
│   │   │       ├── ClusterSelect.spec.tsx
│   │   │       ├── CreateTaskForm.spec.tsx
│   │   │       ├── DeleteTaskButton.spec.tsx
│   │   │       ├── EnvFieldArray.spec.tsx
│   │   │       ├── GroupNameSelect.spec.tsx
│   │   │       ├── ListRequest.spec.tsx
│   │   │       ├── LogProcessor.spec.tsx
│   │   │       ├── LogVirtualized.spec.tsx
│   │   │       ├── LogVirtualizedSearch.spec.tsx
│   │   │       ├── Pagination.spec.tsx
│   │   │       ├── QueryParams.spec.tsx
│   │   │       ├── Request.spec.tsx
│   │   │       ├── Run.spec.tsx
│   │   │       ├── Runs.spec.tsx
│   │   │       ├── StopRunButton.spec.tsx
│   │   │       ├── TaskRuns.spec.tsx
│   │   │       ├── Tasks.spec.tsx
│   │   │       └── UpdateTaskForm.spec.tsx
│   │   ├── constants.ts
│   │   ├── helpers/
│   │   │   ├── FlotillaClient.ts
│   │   │   ├── __mocks__/
│   │   │   │   └── FlotillaClient.ts
│   │   │   ├── __tests__/
│   │   │   │   ├── FlotillaClient.spec.ts
│   │   │   │   ├── getInitialValuesForTaskRun.spec.ts
│   │   │   │   └── pageToOffsetLimit.spec.ts
│   │   │   ├── calculateDuration.ts
│   │   │   ├── constructDefaultObjectFromJsonSchema.ts
│   │   │   ├── getEnhancedRunStatus.ts
│   │   │   ├── getInitialValuesForExecutionForm.ts
│   │   │   ├── getOwnerIdRunTagFromCookies.ts
│   │   │   ├── pageToOffsetLimit.ts
│   │   │   ├── runFormHelpers.ts
│   │   │   ├── selectHelpers.ts
│   │   │   ├── taskFormHelpers.ts
│   │   │   └── testHelpers.ts
│   │   ├── index.css
│   │   ├── index.tsx
│   │   ├── localstorage.ts
│   │   ├── react-app-env.d.ts
│   │   ├── setupTests.js
│   │   ├── state/
│   │   │   ├── runView.ts
│   │   │   ├── settings.ts
│   │   │   └── store.ts
│   │   ├── types.ts
│   │   └── workers/
│   │       ├── index.ts
│   │       └── log.worker.ts
│   └── tsconfig.json
├── utils/
│   ├── dd_tracing.go
│   └── utils.go
└── worker/
    ├── events_worker.go
    ├── events_worker_test.go
    ├── retry_worker.go
    ├── retry_worker_test.go
    ├── status_worker.go
    ├── status_worker_test.go
    ├── submit_worker.go
    ├── submit_worker_test.go
    ├── worker.go
    ├── worker_manager.go
    └── worker_test.go

================================================
FILE CONTENTS
================================================

================================================
FILE: .circleci/config.yml
================================================
---
version: 2
jobs:
  build:
    working_directory: ~/go/src/github.com/stitchfix/flotilla-os
    docker:
      - image: cimg/go:1.24
        environment:
          FLOTILLA_MODE: test
          DATABASE_URL: postgresql://flotilla:flotilla@localhost/flotilla?sslmode=disable
          READONLY_DATABASE_URL: postgresql://flotilla:flotilla@localhost/flotilla?sslmode=disable
          PG_USER: flotilla
          PG_HOST: 127.0.0.1
          GO111MODULE: "on"
      - image: cimg/postgres:17.4
        environment:
          POSTGRES_USER: flotilla
          POSTGRES_DB: flotilla
          POSTGRES_PASSWORD: flotilla
    steps:
      - checkout
      - run:
          name: Installing Flyway
          command: curl -sL https://repo1.maven.org/maven2/org/flywaydb/flyway-commandline/6.5.7/flyway-commandline-6.5.7-linux-x64.tar.gz | tar xz && sudo ln -s "$(pwd)/flyway-6.5.7/flyway" /usr/local/bin/flyway
      - run:
          name: Waiting for Postgres to be ready
          command: dockerize -wait tcp://localhost:5432 -timeout 5m
      - run:
          name: Set Up DB
          command: |
            pwd
            ls -a
            flyway baseline -configFiles=./.migrations/dev.conf \
              -user=flotilla \
              -password=flotilla
            flyway migrate -configFiles=./.migrations/dev.conf \
              -locations=filesystem:./.migrations/ \
              -user=flotilla \
              -password=flotilla
      - run: go get ./...
      - run: go test -v ./...


================================================
FILE: .github/CODEOWNERS
================================================
# This file uses the GitHub CODEOWNERS convention to assign PR reviewers:
# https://help.github.com/articles/about-codeowners/

* @stitchfix/dev-platform

================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## PROBLEM


## SOLUTION

================================================
FILE: .gitignore
================================================
# Binaries for programs and plugins
*.exe
*.dll
*.so
*.dylib

# Test binary, build with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/

vendor/**
!vendor/vendor.json

.idea
*.iml

flotilla-os

# gh-pages and ui_branch
node_modules
.cache/
.DS_Store
yarn-error.log
ui/build/
.env


================================================
FILE: .migrations/V20200123054713__initial_table_create.sql
================================================
--
-- Definitions
--
CREATE TABLE IF NOT EXISTS task_def (
  definition_id character varying PRIMARY KEY,
  alias character varying,
  image character varying NOT NULL,
  group_name character varying NOT NULL,
  memory integer,
  cpu integer,
  gpu integer,
  command text,
  env jsonb,
  -- Refactor these
  "user" character varying,
  arn character varying,
  container_name character varying NOT NULL,
  task_type character varying,
  privileged boolean,
  adaptive_resource_allocation boolean,
  -- Refactor these
  CONSTRAINT task_def_alias UNIQUE(alias)
);

CREATE TABLE IF NOT EXISTS task_def_ports (
  task_def_id character varying NOT NULL REFERENCES task_def(definition_id),
  port integer NOT NULL,
  CONSTRAINT task_def_ports_pkey PRIMARY KEY(task_def_id, port)
);

CREATE INDEX IF NOT EXISTS ix_task_def_alias ON task_def(alias);
CREATE INDEX IF NOT EXISTS ix_task_def_group_name ON task_def(group_name);
CREATE INDEX IF NOT EXISTS ix_task_def_image ON task_def(image);
CREATE INDEX IF NOT EXISTS ix_task_def_env ON task_def USING gin (env jsonb_path_ops);

--
-- Runs
--
CREATE TABLE IF NOT EXISTS task (
  run_id character varying NOT NULL PRIMARY KEY,
  definition_id character varying REFERENCES task_def(definition_id),
  alias character varying,
  image character varying,
  cluster_name character varying,
  exit_code integer,
  exit_reason character varying,
  status character varying,
  queued_at timestamp with time zone,
  started_at timestamp with time zone,
  finished_at timestamp with time zone,
  instance_id character varying,
  instance_dns_name character varying,
  group_name character varying,
  env jsonb,
  -- Refactor these --
  task_arn character varying,
  docker_id character varying,
  "user" character varying,
  task_type character varying,
  -- Refactor these --
  command text,
  command_hash text,
  memory integer,
  cpu integer,
  gpu integer,
  ephemeral_storage integer,
  node_lifecycle text,
  engine character varying DEFAULT 'eks' NOT NULL,
  container_name text,
  pod_name text,
  namespace text,
  max_cpu_used integer,
  max_memory_used integer,
  pod_events jsonb,
  cloudtrail_notifications jsonb
);
CREATE INDEX IF NOT EXISTS ix_task_definition_id ON task(definition_id);
CREATE INDEX IF NOT EXISTS ix_task_cluster_name ON task(cluster_name);
CREATE INDEX IF NOT EXISTS ix_task_status ON task(status);
CREATE INDEX IF NOT EXISTS ix_task_group_name ON task(group_name);
CREATE INDEX IF NOT EXISTS ix_task_env ON task USING gin (env jsonb_path_ops);
CREATE INDEX IF NOT EXISTS ix_task_definition_id ON task(definition_id);
CREATE INDEX IF NOT EXISTS ix_task_task_arn ON task(task_arn);
CREATE INDEX IF NOT EXISTS ix_task_definition_id_started_at_desc ON task(definition_id, started_at DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS ix_task_definition_id_started_at_desc_engine ON task(definition_id, started_at DESC NULLS LAST, engine);
CREATE INDEX IF NOT EXISTS ix_finished_at_status_cluster_name ON task USING btree (cluster_name, status, finished_at DESC);
CREATE INDEX IF NOT EXISTS ix_task_definition_id_started_at_asc ON task USING btree (definition_id, started_at);
CREATE INDEX IF NOT EXISTS ix_task_pod_events ON task USING gin (pod_events jsonb_path_ops);
CREATE INDEX IF NOT EXISTS ix_task_queued_at_status_engine ON task USING btree (queued_at, status, engine);
CREATE INDEX IF NOT EXISTS task_definition_id_engine_started_at_index ON task USING btree (definition_id, engine, started_at DESC);
--
-- Status
--
CREATE TABLE IF NOT EXISTS task_status (
  status_id integer NOT NULL PRIMARY KEY,
  task_arn character varying,
  status_version integer NOT NULL,
  status character varying,
  "timestamp" timestamp with time zone DEFAULT now()
);
CREATE INDEX IF NOT EXISTS ix_task_status_task_arn ON task_status(task_arn);
CREATE SEQUENCE IF NOT EXISTS task_status_status_id_seq
  START WITH 1
  INCREMENT BY 1
  NO MINVALUE
  NO MAXVALUE
  CACHE 1;
ALTER TABLE ONLY task_status ALTER COLUMN status_id SET DEFAULT nextval('task_status_status_id_seq'::regclass);
--
-- Tags
--
CREATE TABLE IF NOT EXISTS tags (
  text character varying NOT NULL PRIMARY KEY
);
CREATE TABLE IF NOT EXISTS task_def_tags (
  tag_id character varying NOT NULL REFERENCES tags(text),
  task_def_id character varying NOT NULL REFERENCES task_def(definition_id)
);
CREATE TABLE IF NOT EXISTS worker (
  worker_type character varying,
  engine character varying,
  count_per_instance integer
);

================================================
FILE: .migrations/V20200123054714__add_spark_extension.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS spark_extension JSONB;


================================================
FILE: .migrations/V20200205133700__executable.sql
================================================
ALTER TABLE task
  ADD COLUMN executable_id VARCHAR,
  ADD COLUMN executable_type VARCHAR DEFAULT 'task_definition';

================================================
FILE: .migrations/V20200206115000__template.sql
================================================
CREATE TABLE template (
  template_id VARCHAR PRIMARY KEY,
  type VARCHAR NOT NULL,
  version INTEGER NOT NULL,
  schema JSONB NOT NULL,
  command_template TEXT NOT NULL,
  image VARCHAR NOT NULL,
  memory INTEGER NOT NULL,
  gpu INTEGER NOT NULL,
  cpu INTEGER NOT NULL,
  env JSONB,
  privileged BOOLEAN,
  adaptive_resource_allocation BOOLEAN,
  container_name VARCHAR NOT NULL,
  CONSTRAINT template_type_version UNIQUE(type, version)
);

ALTER TABLE task ADD COLUMN IF NOT EXISTS executable_request_custom JSONB;


================================================
FILE: .migrations/V20200210154600__template_refactor.sql
================================================
ALTER TABLE template DROP CONSTRAINT template_type_version;
ALTER TABLE template RENAME COLUMN type to template_name;
ALTER TABLE template ADD CONSTRAINT template_name_version UNIQUE(template_name, version);

================================================
FILE: .migrations/V20200211160100__task_col_fix.sql
================================================
ALTER TABLE task RENAME COLUMN executable_request_custom to execution_request_custom;

================================================
FILE: .migrations/V20200211161900__template_indicies.sql
================================================
CREATE INDEX IF NOT EXISTS ix_template_id ON template(template_id);
CREATE INDEX IF NOT EXISTS ix_template_name ON template(template_name);


================================================
FILE: .migrations/V20200212101900__template.sql
================================================
ALTER TABLE template ADD COLUMN default_payload JSONB;
ALTER TABLE template ADD COLUMN avatar_uri VARCHAR;

================================================
FILE: .migrations/V20200213101400__task_indexes.sql
================================================
CREATE INDEX IF NOT EXISTS ix_task_executable_id ON task(executable_id);
CREATE INDEX IF NOT EXISTS ix_task_executable_id_started_at_desc ON task(executable_id, started_at DESC NULLS LAST);
CREATE INDEX IF NOT EXISTS ix_task_executable_id_started_at_desc_engine ON task(executable_id, started_at DESC NULLS LAST, engine);


================================================
FILE: .migrations/V20200213125200__rename_default_payload.sql
================================================
ALTER TABLE template RENAME COLUMN default_payload to defaults;

================================================
FILE: .migrations/V20200225125200__add_limits.sql
================================================
ALTER TABLE task ADD COLUMN memory_limit integer;
ALTER TABLE task ADD COLUMN cpu_limit integer;


================================================
FILE: .migrations/V20200325125200__add_attempts.sql
================================================
ALTER TABLE task ADD COLUMN attempt_count integer;


================================================
FILE: .migrations/V20200325125201__add_spawned.sql
================================================
ALTER TABLE task ADD COLUMN spawned_runs jsonb;


================================================
FILE: .migrations/V20200625125201__add_run_exceptions.sql
================================================
ALTER TABLE task ADD COLUMN run_exceptions jsonb;


================================================
FILE: .migrations/V20210083054714__metrics_uri.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS metrics_uri varchar;


================================================
FILE: .migrations/V20210427125201__add_active_deadline_seconds.sql
================================================
ALTER TABLE task ADD COLUMN active_deadline_seconds integer;


================================================
FILE: .migrations/V20210807125201__drop_index_container_name.sql
================================================
alter table task_def alter column container_name drop not null;


================================================
FILE: .migrations/V20211007125201__add_description.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS description varchar;


================================================
FILE: .migrations/V20220907125201__add_idempotence.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS idempotence_key varchar;


================================================
FILE: .migrations/V20220907125202__add_arch.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS arch varchar;


================================================
FILE: .migrations/V20221215125203__add_labels.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS labels jsonb;


================================================
FILE: .migrations/V20230718115000__add_ephemeral_storage.sql
================================================
ALTER TABLE task_def ADD COLUMN IF NOT EXISTS ephemeral_storage INTEGER;
ALTER TABLE task ADD COLUMN IF NOT EXISTS ephemeral_storage INTEGER;


================================================
FILE: .migrations/V20231013191711__add_requires_docker.sql
================================================
ALTER TABLE task_def ADD COLUMN IF NOT EXISTS requires_docker BOOLEAN DEFAULT(false);
ALTER TABLE task ADD COLUMN IF NOT EXISTS requires_docker BOOLEAN DEFAULT(false);


================================================
FILE: .migrations/V20231122141100__add_target_cluster.sql
================================================
ALTER TABLE task_def ADD COLUMN IF NOT EXISTS target_cluster VARCHAR;

================================================
FILE: .migrations/V20240205132100__add_service_account.sql
================================================
ALTER TABLE task ADD COLUMN IF NOT EXISTS service_account VARCHAR;

================================================
FILE: .migrations/V20250122141100__add_cluster_routing.sql
================================================
DO $$
BEGIN
    IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'cluster_status') THEN
        CREATE TYPE cluster_status AS ENUM ('active', 'maintenance', 'offline');
    END IF;
END$$;

CREATE TABLE IF NOT EXISTS cluster_state (
    id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
    name VARCHAR NOT NULL,
    cluster_version VARCHAR NOT NULL DEFAULT '',
    status cluster_status NOT NULL DEFAULT 'active',
    status_reason VARCHAR,
    status_since TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    capabilities VARCHAR[] NOT NULL DEFAULT '{}',
    allowed_tiers VARCHAR[] NOT NULL DEFAULT '{}',
    region VARCHAR NOT NULL,
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    namespace VARCHAR NOT NULL DEFAULT '',
    emr_virtual_cluster VARCHAR NOT NULL DEFAULT '',
    spark_server_uri VARCHAR NOT NULL DEFAULT ''
);

CREATE INDEX IF NOT EXISTS ix_cluster_state_name ON cluster_state(name);
CREATE INDEX IF NOT EXISTS ix_cluster_state_status ON cluster_state(status);

DO $$
BEGIN
    IF NOT EXISTS (SELECT 1
        FROM information_schema.columns
        WHERE table_name='task' AND column_name='tier')
    THEN
ALTER TABLE task ADD COLUMN tier TEXT;
    END IF;
END$$;

================================================
FILE: .migrations/dev.conf
================================================
flyway.url=jdbc:postgresql://127.0.0.1:5432/flotilla
flyway.user=flotilla
flyway.password=flotilla
flyway.cleanDisabled=true
flyway.group=true
flyway.locations=filesystem:.migrations

================================================
FILE: ARA_METRICS_COMPARISON.md
================================================
# ARA Metrics Implementation Comparison

Comparing `ez/ara-metrics` (HEAD) vs `ez/ara-metrics-alt`

## Overview

Both implementations add instrumentation to track Auto Resource Adjustment (ARA) behavior to identify over-provisioning patterns, particularly the ~300GB memory limit issue. However, they differ significantly in approach, metrics design, logging strategy, and code structure.

---

## Similarities

### Shared Goals
- Track ARA resource adjustments
- Detect when jobs hit maximum resource limits (especially 350GB memory)
- Enable monitoring to identify over-provisioning patterns
- Instrument `adaptiveResources()` function
- Add structured logging for debugging

### Common Changes
- Both modify `execution/adapter/eks_adapter.go`
- Both add new metric constants to `clients/metrics/metrics.go`
- Both track default resources before ARA applies adjustments
- Both detect and report when max bounds are hit
- Both use structured key-value logging format

---

## Key Differences

### 1. **Metric Naming Convention**

**HEAD (`ez/ara-metrics`):**
- Uses hierarchical dot notation: `engine.eks.ara.*`
- Examples: `engine.eks.ara.estimation_attempted`, `engine.eks.ara.memory_increase`
- Consistent with existing codebase pattern (`engine.eks.execute`, etc.)

**Alt (`ez/ara-metrics-alt`):**
- Uses flat namespace: `ara.*`
- Examples: `ara.resource_adjustment`, `ara.memory_increase_ratio`
- Shorter, more concise names

**Winner:** HEAD - Consistent with existing naming conventions

---

### 2. **Metrics Coverage**

**HEAD (10 metrics):**
```go
// Estimation tracking
EngineEKSARAEstimationAttempted  // Counter
EngineEKSARAEstimationSucceeded  // Counter
EngineEKSARAEstimationFailed     // Counter

// Resource tracking
EngineEKSARAMaxResourceHit       // Counter (tagged with resource:memory or resource:cpu)
EngineEKSARAMemoryIncrease       // Distribution
EngineEKSARACPUIncrease          // Distribution
EngineEKSARADefaultMemory        // Distribution
EngineEKSARAARAMemory            // Distribution
EngineEKSARADefaultCPU           // Distribution
EngineEKSARAARACPU               // Distribution
```

**Alt (8 metrics):**
```go
// Core tracking
ARAResourceAdjustment            // Counter (when ARA triggers)
ARANoHistoricalData              // Counter (when no data found)

// Ratio tracking
ARAMemoryIncreaseRatio           // Histogram
ARACPUIncreaseRatio              // Histogram

// Limit detection
ARAHitMaxMemory                  // Counter
ARAHitMaxCPU                     // Counter

// Final distributions
ARAFinalMemoryMB                 // Histogram
ARAFinalCPUMillicores            // Histogram
```

**Comparison:**
- **HEAD:** More granular - separates estimation attempts from successes/failures
- **ALT:** More focused - tracks key ratios and final states
- **HEAD:** Tracks resource increases as absolute values
- **ALT:** Tracks increases as ratios (better for understanding relative growth)

**Winner:** Tie - Both approaches have merit. HEAD provides more granularity; ALT provides better insight into relative growth.

---

### 3. **Logging Strategy**

**HEAD:**
- Logging only occurs when max resource bounds are hit
- Uses stored logger instance (field on `eksAdapter`)
- Separate `emitARAMetrics()` method for structured logging
- Logs once per max-bound-hit event
- Fields: `run_id`, `definition_id`, `executable_id`, `command`, default/final resources, max hit flags

**ALT:**
- **Multiple logging points:**
  1. When ARA triggers adjustments (INFO level)
  2. When max limits hit (WARN level)
  3. In `state/pg_state_manager.go` for historical data lookups (success/no data/error)
- Uses inline `flotillaLog.NewLogger(nil, nil)` - creates new logger instances
- More verbose logging at each step
- Detailed structured fields including ratios, overage amounts, cluster name
- Separate logs for historical data lookup success/failure

**Winner:** ALT - More comprehensive logging provides better debugging capability

---

### 4. **Logger Management**

**HEAD:**
```go
type eksAdapter struct {
    logger flotillaLog.Logger  // Stored as field
}

func NewEKSAdapter(logger flotillaLog.Logger) (EKSAdapter, error) {
    adapter := eksAdapter{logger: logger}
    return &adapter, nil
}

// Usage in HEAD
if a.logger == nil {
    return
}
a.logger.Log(logFields...)
```

**ALT:**
```go
// No logger field stored
// Creates new logger instances inline
_ = flotillaLog.NewLogger(nil, nil).Log(...)
```

**Comparison:**
- **HEAD:** Dependency injection pattern - logger passed via constructor, stored as field
- **ALT:** Creates new logger instances inline (less efficient, harder to test)
- **HEAD:** Requires updating `eks_engine.go` to pass logger (which it does)
- **ALT:** No changes needed to constructor/initialization

**Winner:** HEAD - Better design pattern (dependency injection), more testable

---

### 5. **Tagging Strategy**

**HEAD:**
- No tags used on metrics (empty `[]string{}`)
- Simpler, avoids cardinality concerns
- May limit filtering/grouping capabilities in DataDog

**ALT:**
- Uses cluster tags: `[]string{fmt.Sprintf("cluster:%s", run.ClusterName)}`
- Explicitly documented as "low-cardinality tags to avoid excessive volume"
- Enables per-cluster analysis

**Winner:** ALT - Tags enable better filtering and per-cluster analysis

---

### 6. **Metric Types**

**HEAD:**
- Uses `Distribution()` for all numeric metrics
- Uses `Increment()` for counters

**ALT:**
- Uses `Histogram()` for ratios and final values
- Uses `Increment()` for counters

**Comparison:**
- DataDog treats Histogram and Distribution similarly for most use cases
- Both approaches are valid

**Winner:** Tie - No significant difference

---

### 7. **Code Structure**

**HEAD:**
- Cleaner separation: detects max hits after bounds checking
- Uses helper method `emitARAMetrics()` to centralize logging logic
- More modular: logging logic separate from bounds checking

**ALT:**
- Metrics/logging embedded directly in `checkResourceBounds()` 
- Requires passing additional parameters (`run`, `executable`, `defaultCPU`, etc.) to `checkResourceBounds()`
- More invasive changes to function signatures
- Inline logging at multiple points

**Winner:** HEAD - Better code organization, less invasive changes

---

### 8. **State Manager Instrumentation**

**HEAD:**
- No changes to `state/pg_state_manager.go`
- Only instruments the adapter layer

**ALT:**
- **Adds instrumentation to `state/pg_state_manager.go`**
- Logs when historical data is found/not found/errors occur
- Provides visibility into the data lookup layer
- Helps debug issues with historical data queries

**Winner:** ALT - Provides better end-to-end visibility

---

### 9. **Test Coverage**

**HEAD:**
- **Comprehensive test suite** (524 lines in `eks_adapter_test.go`)
- Tests multiple scenarios:
  - ARA enabled with successful estimation
  - GPU jobs (skip ARA)
  - Estimation failures
  - Max resource bounds hitting
  - ARA disabled
  - Logger nil handling
- Mock implementations for logger and state manager

**ALT:**
- No test files included

**Winner:** HEAD - Significantly better test coverage

---

### 10. **Documentation**

**HEAD:**
- Commit message describes changes
- No separate documentation file

**ALT:**
- **Comprehensive 317-line documentation** (`docs/ara-instrumentation.md`)
- Includes:
  - Overview of ARA algorithm
  - Historical context of ARA implementation
  - Detailed explanation of metrics
  - DataDog query examples
  - Alert recommendations
  - Investigation workflow
  - Future improvement suggestions
- Extremely helpful for operators and future developers

**Winner:** ALT - Outstanding documentation

---

### 11. **Detection Logic**

**HEAD:**
```go
// After bounds checking
cpuRequestBeforeBounds := cpuRequest
memRequestBeforeBounds := memRequest
cpuRequest, memRequest = a.checkResourceBounds(...)

// Then detect hits
if memRequestBeforeBounds > maxMem {
    maxMemHit = true
    // emit metrics/logs
}
```

**ALT:**
```go
// Inside checkResourceBounds()
if mem > maxMem {
    // Emit metrics and logs immediately
    _ = metrics.Increment(metrics.ARAHitMaxMemory, ...)
    // ... logging ...
    mem = maxMem
}
```

**Comparison:**
- **HEAD:** Two-step process - check bounds, then detect if hit
- **ALT:** Single-step - detect and log during bounds checking
- **ALT:** More straightforward, less code

**Winner:** ALT - Simpler, more direct approach

---

### 12. **ARA Trigger Detection**

**HEAD:**
- No explicit "ARA triggered" detection
- Only tracks estimation attempts/success/failure
- Doesn't distinguish between "ARA found same values" vs "ARA actually changed resources"

**ALT:**
```go
araTriggered := (estimatedResources.Cpu != cpuRequest || 
                estimatedResources.Memory != memRequest)
```
- Explicitly detects when ARA actually changes resources
- Only logs/increments metrics when resources actually change
- More precise tracking

**Winner:** ALT - More accurate tracking of actual ARA adjustments

---

## Best-of-Breed Recommendation

**The ideal solution would combine:**

### From HEAD:
1. ? **Metric naming convention** - Use `engine.eks.ara.*` pattern
2. ? **Logger as dependency** - Store logger as field, inject via constructor
3. ? **Code organization** - Separate `emitARAMetrics()` method
4. ? **Test coverage** - Include comprehensive test suite
5. ? **Granular metrics** - Track estimation attempts/success/failure separately

### From ALT:
1. ? **Logging strategy** - Log when ARA triggers AND when limits hit
2. ? **State manager instrumentation** - Add logging in `pg_state_manager.go`
3. ? **Documentation** - Include comprehensive docs file
4. ? **Tagging** - Use cluster tags for filtering
5. ? **Ratio metrics** - Track ratios instead of/in addition to absolute increases
6. ? **ARA trigger detection** - Explicitly detect when ARA actually changes resources

### Hybrid Approach:
```go
// Metrics (combine both approaches)
- engine.eks.ara.estimation_attempted     // Counter
- engine.eks.ara.estimation_succeeded     // Counter  
- engine.eks.ara.estimation_failed         // Counter
- engine.eks.ara.resource_adjustment       // Counter (only when changed)
- engine.eks.ara.memory_increase_ratio     // Histogram (ALT's approach)
- engine.eks.ara.cpu_increase_ratio        // Histogram
- engine.eks.ara.hit_max_memory            // Counter
- engine.eks.ara.hit_max_cpu               // Counter
- engine.eks.ara.final_memory_mb           // Histogram
- engine.eks.ara.final_cpu_millicores      // Histogram

// Logging (ALT's comprehensive approach)
- Log when ARA triggers (INFO)
- Log when limits hit (WARN)
- Log in state manager for historical lookups

// Code structure (HEAD's approach)
- Store logger as field
- Separate emitARAMetrics() method
- Use cluster tags on metrics

// Documentation
- Include ALT's comprehensive docs

// Tests
- Include HEAD's comprehensive test suite
```

---

## Verdict

**Best Overall:** Neither solution is perfect alone. **ALT is closer to production-ready** due to:
- Comprehensive documentation
- Better logging strategy
- End-to-end instrumentation
- Ratio-based metrics (easier to understand)

**But HEAD has better engineering practices:**
- Dependency injection
- Test coverage
- Code organization

**Recommendation:** Start with ALT as the base, then incorporate HEAD's improvements:
1. Store logger as field (HEAD)
2. Add test suite (HEAD)
3. Optionally adjust metric names to match HEAD's convention
4. Keep ALT's logging and documentation

This hybrid would be the best-of-breed solution.


================================================
FILE: Dockerfile
================================================
FROM golang:latest

RUN mkdir -p /go/src/github.com/stitchfix/flotilla-os
ADD . /go/src/github.com/stitchfix/flotilla-os
WORKDIR /go/src/github.com/stitchfix/flotilla-os
RUN go install github.com/stitchfix/flotilla-os

ENTRYPOINT /go/bin/flotilla-os /go/src/github.com/stitchfix/flotilla-os/conf


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright {yyyy} {name of copyright owner}

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.html
================================================
<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><style>body {
  max-width: 980px;
  border: 1px solid #ddd;
  outline: 1300px solid #fff;
  margin: 16px auto;
}

body .markdown-body
{
  padding: 45px;
}

@font-face {
  font-family: fontawesome-mini;
  src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAABE0AA8AAAAAHWwAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABWAAAADsAAABUIIslek9TLzIAAAGUAAAAQwAAAFY3d1HZY21hcAAAAdgAAACqAAACOvWLi0FjdnQgAAAChAAAABMAAAAgBtX/BGZwZ20AAAKYAAAFkAAAC3CKkZBZZ2FzcAAACCgAAAAIAAAACAAAABBnbHlmAAAIMAAABdQAAAjkYT9TNWhlYWQAAA4EAAAAMwAAADYQ6WvNaGhlYQAADjgAAAAfAAAAJAc6A1pobXR4AAAOWAAAACAAAAA0Kmz/7mxvY2EAAA54AAAAHAAAABwQPBJubWF4cAAADpQAAAAgAAAAIAEHC/NuYW1lAAAOtAAAAYQAAALxhQT4h3Bvc3QAABA4AAAAfgAAAMS3SYh9cHJlcAAAELgAAAB6AAAAhuVBK7x4nGNgZGBg4GIwYLBjYHJx8wlh4MtJLMljkGJgYYAAkDwymzEnMz2RgQPGA8qxgGkOIGaDiAIAJjsFSAB4nGNgZHZmnMDAysDAVMW0h4GBoQdCMz5gMGRkAooysDIzYAUBaa4pDA4Pwz+yMwf9z2KIYg5imAYUZgTJAQDcoQvQAHic7ZHNDYJAFIRnBXf94cDRIiyCKkCpwFCPJ092RcKNDoYKcN4+EmMPvpdvk539zQyAPYBCXEUJhBcCrJ5SQ9YLnLJe4qF5rdb+uWPDngNHTkta101pNyWa8lMhn6xx2dqUnW4q9YOIhAOOeueMSgsR/6ry+P7O5s6xVNg4chBsHUuFnWNJ8uZYwrw7chrsHXkODo7cB0dHOYCTY8kv0VE2WJKD6gOlWjsxAAB4nGNgQAMSEMgc9D8LhAESbAPdAHicrVZpd9NGFB15SZyELCULLWphxMRpsEYmbMGACUGyYyBdnK2VoIsUO+m+8Ynf4F/zZNpz6Dd+Wu8bLySQtOdwmpOjd+fN1czbZRJaktgL65GUmy/F1NYmjew8CemGTctRfCg7eyFlisnfBVEQrZbatx2HREQiULWusEQQ+x5ZmmR86FFGy7akV03KLT3pLlvjQb1V334aOsqxO6GkZjN0aD2yJVUYVaJIpj1S0qZlqPorSSu8v8LMV81QwohOImm8GcbQSN4bZ7TKaDW24yiKbLLcKFIkmuFBFHmU1RLn5IoJDMoHzZDyyqcR5cP8iKzYo5xWsEu20/y+L3mndzk/sV9vUbbkQB/Ijuzg7HQlX4RbW2HctJPtKFQRdtd3QmzZ7FT/Zo/ymkYDtysyvdCMYKl8hRArP6HM/iFZLZxP+ZJHo1qykRNB62VO7Es+gdbjiClxzRhZ0N3RCRHU/ZIzDPaYPh788d4plgsTAngcy3pHJZwIEylhczRJ2jByYCVliyqp9a6YOOV1WsRbwn7t2tGXzmjjUHdiPFsPHVs5UcnxaFKnmUyd2knNoykNopR0JnjMrwMoP6JJXm1jNYmVR9M4ZsaERCICLdxLU0EsO7GkKQTNoxm9uRumuXYtWqTJA/Xco/f05la4udNT2g70s0Z/VqdiOtgL0+lp5C/xadrlIkXp+ukZfkziQdYCMpEtNsOUgwdv/Q7Sy9eWHIXXBtju7fMrqH3WRPCkAfsb0B5P1SkJTIWYVYhWQGKta1mWydWsFqnI1HdDmla+rNMEinIcF8e+jHH9XzMzlpgSvt+J07MjLj1z7UsI0xx8m3U9mtepxXIBcWZ5TqdZlu/rNMfyA53mWZ7X6QhLW6ejLD/UaYHlRzodY3lBC5p038GQizDkAg6QMISlA0NYXoIhLBUMYbkIQ1gWYQjLJRjC8mMYwnIZhrC8rGXV1FNJ49qZWAZsQmBijh65zEXlaiq5VEK7aFRqQ54SbpVUFM+qf2WgXjzyhjmwFkiXyJpfMc6Vj0bl+NYVLW8aO1fAsepvH472OfFS1ouFPwX/1dZUJb1izcOTq/Abhp5sJ6o2qXh0TZfPVT26/l9UVFgL9BtIhVgoyrJscGcihI86nYZqoJVDzGzMPLTrdcuan8P9NzFCFlD9+DcUGgvcg05ZSVnt4KzV19uy3DuDcjgTLEkxN/P6VvgiI7PSfpFZyp6PfB5wBYxKZdhqA60VvNknMQ+Z3iTPBHFbUTZI2tjOBIkNHPOAefOdBCZh6qoN5E7hhg34BWFuwXknXKJ6oyyH7kXs8yik/Fun4kT2qGiMwLPZG2Gv70LKb3EMJDT5pX4MVBWhqRg1FdA0Um6oBl/G2bptQsYO9CMqdsOyrOLDxxb3lZJtGYR8pIjVo6Of1l6iTqrcfmYUl++dvgXBIDUxf3vfdHGQyrtayTJHbQNTtxqVU9eaQ+NVh+rmUfW94+wTOWuabronHnpf06rbwcVcLLD2bQ7SUiYX1PVhhQ2iy8WlUOplNEnvuAcYFhjQ71CKjf+r+th8nitVhdFxJN9O1LfR52AM/A/Yf0f1A9D3Y+hyDS7P95oTn2704WyZrqIX66foNzBrrblZugbc0HQD4iFHrY64yg18pwZxeqS5HOkh4GPdFeIBwCaAxeAT3bWM5lMAo/mMOT7A58xh0GQOgy3mMNhmzhrADnMY7DKHwR5zGHzBnHWAL5nDIGQOg4g5DJ4wJwB4yhwGXzGHwdfMYfANc+4DfMscBjFzGCTMYbCv6dYwzC1e0F2gtkFVoANTT1jcw+JQU2XI/o4Xhv29Qcz+wSCm/qjp9pD6Ey8M9WeDmPqLQUz9VdOdIfU3Xhjq7wYx9Q+DmPpMvxjLZQa/jHyXCgeUXWw+5++J9w/bxUC5AAEAAf//AA94nIVVX2hbZRQ/5/t7893s5ja9f7ouzdZ0TTqz3bRJmogbWya6bG6Cq0VbSV2ddIJjFtfIQHEig80Hda8yUN/0YQz8AyriiyD+xQd92R4HCnaCb3samnpumrpsCsLlfPf7zvedc37nL3CAtc/5W/wQZGA3tOBSY/g+TMjHmwzEoM1Q8+ZjRZY4oJhmBw5/YB6Za0yC5AkhlwA1A1yCBIBOwCII0Cj0U8BAMdUCzq05sKwkP7SlUY6fcJk4Fb/RyE79/6P5hjM/F4aZiXBoeMgzcqQ4Xi1hPqfDLG5FT+lchCVU3lYMyvuwhl1mqndQL0RsuloLywHtthLXI06OblTrhfWVnpSJ5+mwu/JdbtuN3IAnkW0LLMcRwaC7ktrlzridM6kVdyf9uO1UNBByI7JhwtG2sEwab07ORBeilWhqavJCqV0qzZTOl/7ZXQ5TbTcdcFelyGhhRDAQpdqp1FEX3w3cFTc1k9pJQkmm4ySCbSikxRP2QOfN+0tHS5MrpQuTU1Mk5nw0E5Xa0WvrOwDyGax9yB9ma6DAg82wHc43SAGTI4GjBWebOePAERFE8/AHaQpZASSTy8A4WwZiLQMQ82mFKATO0ILicRAoDm9p5P99E5b/fXG+kQYY3TYUuqmERWYoT0u/GNYL2q/4WB3LaVS+VynXsVYIcWw6DkCh3nX1D+VzlYN4LClF5yexSQos8exqZ3KVP+wtrC54u4Nznq6cq+xpMpUUnZ8FUYzE86ud0g28NOIv3Gj5/rmA3ABs7S/ywzFuQ4qyd6QxfNtiQIaEgp3w/entQg4Vcbqa16M5FfpeUB8t1+qeg7mI7cUyOe79wOk86gSxkVec4KPTX69++5x68Yubn5/F+w52z7u08sJX7fZXv8ekT/d2mILJxq6sn+SC6qEJknzLJCxyZEKwWVqYmAPBxBE/9DLeZiWHu7lcr/VytrCRuHojncNuTt9h46tmacmYisnSamdN2bZptcsmSysdVsy1PrOvOzF3xN64Rb937t/og9KHxYdcjIUqFAmIAHGHNzlns+RTPgeUYAQm9DwpNxfxbhhBHPaw3/gfTcXO2L+eJVIx5nsyGkvm9X4/f+bGkH45G0PaSjcMXTjcZyTvi3UdHoCDjQd3IDUVsgwYmUoJK/gp4JJxeRI0MKHZIkgynyIBqBTOUs6rOVCojvjZ4mCQz49ZMlMcp8QoYk6NoBfsxnJtsBohpa8iGJS+ZH7gU7NxME6cmF+t7cO9vB8d3jTWSct0ycW9ranXmolNDwmVkNnxe+8JtoztwS5rKJ0xWS95tQ/1zMYzg69MzUZnNtl1ofNbsml/OJm6f9wjRjpnu2o4MzHzn77IQkRd+1DjwMQ2pqSjGMMhyjrgTbBAKksuUm0iU7hI0aN2wOKOq7WYBSH0HGihj/jkiPxAfmwsEbfYrjMG+j3ij932Db/LV7I/xruNrhnroxjR9HRMb2nTvO0ZXOoHPk8H2ZhDPx93qcE/53sH5np/dkIP7zzhTVKdR/BAY/9ElkkR+A6lJGsqpJ4oQcTxpvBT3Kn58VkaJjgHyPEIws57xkaHh9KuVpDEpJZeMbZ5w/zBHi5NMQ4r5VphsFqID7TyB9eR4pX216c3AHxpdAwoqU9qg0ZJ6yVLKmMSz1iG2z27ifx18NkY0LPx1W/wCc2l5LrznrIsiKsqbmB78A9wIGx4tI8rjihVHJyY9pgMirenVq0yWg7Iw7eogG7ZgYM3qR9959A/fZkg6MnD/exlkmc+jWV4SB15XUR+eqC6l6ZmgPtN9z5JMfik05OV8ljylunJ4J+wA/FUaQSSKotsYsCWqaPBidBLcxkWx7XKFRIb45TGaEhjlF9uUVPqXOtcIwsXbBvfoZXIyRYFdkfnqjExH98xpnPczqzjX/uNdO1Y17Wpi5+6Ts8BXtjVFasp9KZ1mOiNbH65c5w6HgmyF2jFCZywM8mWjRc7T5Pmt0lRy7Y71+jYbpGyvwG4sH0XeJxjYGRgYADiwBB/53h+m68M3MwvgCIM1z5N/g6j///9v5H5BbMnkMvBwAQSBQCIcA9gAHicY2BkYGAO+p8FJF/8//v/F/MLBqAICuAFALYQB5kAeJxjfsHAwLwAiCNB+P9fbJjJmoGBMRUo/wKCAfO2EnQAAAAAANoBXgGcAgICVALaA1IDvAPkBAYEPARyAAEAAAANAF0ABAAAAAAAAgAUACQAcwAAAG4LcAAAAAB4nHWRzWrCQBSFT+pPqUIXLXTTzayKUohGKIibCoLuhbrrYtTRxCYZmYyKyz5Fd32HvlDfoO/QkziIFJtw9bvnnpl7ZwLgBt/wcHieGAf2UGd24Atcou+4RH3kuEweO66QXx1XyaHjGh6ROa7jFp/cwStfMVvhy7GHO+/e8QWuvcBxifqz4zL5xXGF/Oa4Sn53XMPE+3Bcx4P3M9DrvYmWoRWNQVN02kFXTPdCU4pSGQu5saE2meiLhU6timPtz3SSs9ypTCdqrJabWJoT5QQnymSRTkXgt0/UkUqVkVbN807ZdtmxdiEWRidi6HqItdErNbN+aO2612qd9sYAGmvsYRBhyUu0EGhQbfK/gzYCdElTOgSdB1eEFBIxFYkNV4RFJWPeZyyYpVQVHTHZx4y/yVGX2LGWFZri51TccUOn5B7nPefVCSPvGhVVwUl9znveO2KkhV8Wk82PZ8qwZf8OVcu1+fSmWCMw/HMOwXvKaysqM+p+cVuWag8tvv+c+xdd+4+teJxtjUEOwiAURJla24KliQfhUA2g/Sl+CKXx+loNrpzVezOLEY34Ron/0WhwQoszOvQYIKFwwQiNSbSBeO2SZ0tBP4j3zVjKNng32ZmtD1VVXCuOiw/pJ8S3WOU6l+K5UOTaDC4+2TjKMtN9KQf1ezLx/Sg/00FCvABHhjDjAAB4nGPw3sFwIihiIyNjX+QGxp0cDBwMyQUbGVidNjEwMmiBGJu5mBg5ICw+BjCLzWkX0wGgNCeQze60i8EBwmZmcNmowtgRGLHBoSNiI3OKy0Y1EG8XRwMDI4tDR3JIBEhJJBBs5mFi5NHawfi/dQNL70YmBhcADHYj9AAA) format('woff');
}

.markdown-body {
  font-family: sans-serif;
  -ms-text-size-adjust: 100%;
  -webkit-text-size-adjust: 100%;
  color: #333333;
  overflow: hidden;
  font-family: "Helvetica Neue", Helvetica, "Segoe UI", Arial, freesans, sans-serif;
  font-size: 16px;
  line-height: 1.6;
  word-wrap: break-word;
}

.markdown-body a {
  background: transparent;
}

.markdown-body a:active,
.markdown-body a:hover {
  outline: 0;
}

.markdown-body b,
.markdown-body strong {
  font-weight: bold;
}

.markdown-body mark {
  background: #ff0;
  color: #000;
  font-style: italic;
  font-weight: bold;
}

.markdown-body sub,
.markdown-body sup {
  font-size: 75%;
  line-height: 0;
  position: relative;
  vertical-align: baseline;
}
.markdown-body sup {
  top: -0.5em;
}
.markdown-body sub {
  bottom: -0.25em;
}

.markdown-body h1 {
  font-size: 2em;
  margin: 0.67em 0;
}

.markdown-body img {
  border: 0;
}

.markdown-body hr {
  -moz-box-sizing: content-box;
  box-sizing: content-box;
  height: 0;
}

.markdown-body pre {
  overflow: auto;
}

.markdown-body code,
.markdown-body kbd,
.markdown-body pre,
.markdown-body samp {
  font-family: monospace, monospace;
  font-size: 1em;
}

.markdown-body input {
  color: inherit;
  font: inherit;
  margin: 0;
}

.markdown-body html input[disabled] {
  cursor: default;
}

.markdown-body input {
  line-height: normal;
}

.markdown-body input[type="checkbox"] {
  box-sizing: border-box;
  padding: 0;
}

.markdown-body table {
  border-collapse: collapse;
  border-spacing: 0;
}

.markdown-body td,
.markdown-body th {
  padding: 0;
}

.markdown-body .codehilitetable {
  border: 0;
  border-spacing: 0;
}

.markdown-body .codehilitetable tr {
  border: 0;
}

.markdown-body .codehilitetable pre,
.markdown-body .codehilitetable div.codehilite {
  margin: 0;
}

.markdown-body .linenos,
.markdown-body .code,
.markdown-body .codehilitetable td {
  border: 0;
  padding: 0;
}

.markdown-body td:not(.linenos) .linenodiv {
  padding: 0 !important;
}

.markdown-body .code {
  width: 100%;
}

.markdown-body .linenos div pre,
.markdown-body .linenodiv pre,
.markdown-body .linenodiv {
  border: 0;
  -webkit-border-radius: 0;
  -moz-border-radius: 0;
  border-radius: 0;
  -webkit-border-top-left-radius: 3px;
  -webkit-border-bottom-left-radius: 3px;
  -moz-border-radius-topleft: 3px;
  -moz-border-radius-bottomleft: 3px;
  border-top-left-radius: 3px;
  border-bottom-left-radius: 3px;
}

.markdown-body .code div pre,
.markdown-body .code div {
  border: 0;
  -webkit-border-radius: 0;
  -moz-border-radius: 0;
  border-radius: 0;
  -webkit-border-top-right-radius: 3px;
  -webkit-border-bottom-right-radius: 3px;
  -moz-border-radius-topright: 3px;
  -moz-border-radius-bottomright: 3px;
  border-top-right-radius: 3px;
  border-bottom-right-radius: 3px;
}

.markdown-body * {
  -moz-box-sizing: border-box;
  box-sizing: border-box;
}

.markdown-body input {
  font: 13px Helvetica, arial, freesans, clean, sans-serif, "Segoe UI Emoji", "Segoe UI Symbol";
  line-height: 1.4;
}

.markdown-body a {
  color: #4183c4;
  text-decoration: none;
}

.markdown-body a:hover,
.markdown-body a:focus,
.markdown-body a:active {
  text-decoration: underline;
}

.markdown-body hr {
  height: 0;
  margin: 15px 0;
  overflow: hidden;
  background: transparent;
  border: 0;
  border-bottom: 1px solid #ddd;
}

.markdown-body hr:before,
.markdown-body hr:after {
  display: table;
  content: " ";
}

.markdown-body hr:after {
  clear: both;
}

.markdown-body h1,
.markdown-body h2,
.markdown-body h3,
.markdown-body h4,
.markdown-body h5,
.markdown-body h6 {
  margin-top: 15px;
  margin-bottom: 15px;
  line-height: 1.1;
}

.markdown-body h1 {
  font-size: 30px;
}

.markdown-body h2 {
  font-size: 21px;
}

.markdown-body h3 {
  font-size: 16px;
}

.markdown-body h4 {
  font-size: 14px;
}

.markdown-body h5 {
  font-size: 12px;
}

.markdown-body h6 {
  font-size: 11px;
}

.markdown-body blockquote {
  margin: 0;
}

.markdown-body ul,
.markdown-body ol {
  padding: 0;
  margin-top: 0;
  margin-bottom: 0;
}

.markdown-body ol ol,
.markdown-body ul ol {
  list-style-type: lower-roman;
}

.markdown-body ul ul ol,
.markdown-body ul ol ol,
.markdown-body ol ul ol,
.markdown-body ol ol ol {
  list-style-type: lower-alpha;
}

.markdown-body dd {
  margin-left: 0;
}

.markdown-body code,
.markdown-body pre,
.markdown-body samp {
  font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
  font-size: 12px;
}

.markdown-body pre {
  margin-top: 0;
  margin-bottom: 0;
}

.markdown-body kbd {
  background-color: #e7e7e7;
  background-image: -moz-linear-gradient(#fefefe, #e7e7e7);
  background-image: -webkit-linear-gradient(#fefefe, #e7e7e7);
  background-image: linear-gradient(#fefefe, #e7e7e7);
  background-repeat: repeat-x;
  border-radius: 2px;
  border: 1px solid #cfcfcf;
  color: #000;
  padding: 3px 5px;
  line-height: 10px;
  font: 11px Consolas, "Liberation Mono", Menlo, Courier, monospace;
  display: inline-block;
}

.markdown-body>*:first-child {
  margin-top: 0 !important;
}

.markdown-body>*:last-child {
  margin-bottom: 0 !important;
}

.markdown-body .headerlink {
  font: normal 400 16px fontawesome-mini;
  vertical-align: middle;
  margin-left: -16px;
  float: left;
  display: inline-block;
  text-decoration: none;
  opacity: 0;
  color: #333;
}

.markdown-body .headerlink:focus {
  outline: none;
}

.markdown-body h1 .headerlink {
  margin-top: 0.8rem;
}

.markdown-body h2 .headerlink,
.markdown-body h3 .headerlink {
  margin-top: 0.6rem;
}

.markdown-body h4 .headerlink {
  margin-top: 0.2rem;
}

.markdown-body h5 .headerlink,
.markdown-body h6 .headerlink {
  margin-top: 0;
}

.markdown-body .headerlink:hover,
.markdown-body h1:hover .headerlink,
.markdown-body h2:hover .headerlink,
.markdown-body h3:hover .headerlink,
.markdown-body h4:hover .headerlink,
.markdown-body h5:hover .headerlink,
.markdown-body h6:hover .headerlink {
  opacity: 1;
  text-decoration: none;
}

.markdown-body h1 {
  padding-bottom: 0.3em;
  font-size: 2.25em;
  line-height: 1.2;
  border-bottom: 1px solid #eee;
}

.markdown-body h2 {
  padding-bottom: 0.3em;
  font-size: 1.75em;
  line-height: 1.225;
  border-bottom: 1px solid #eee;
}

.markdown-body h3 {
  font-size: 1.5em;
  line-height: 1.43;
}

.markdown-body h4 {
  font-size: 1.25em;
}

.markdown-body h5 {
  font-size: 1em;
}

.markdown-body h6 {
  font-size: 1em;
  color: #777;
}

.markdown-body p,
.markdown-body blockquote,
.markdown-body ul,
.markdown-body ol,
.markdown-body dl,
.markdown-body table,
.markdown-body pre,
.markdown-body .admonition {
  margin-top: 0;
  margin-bottom: 16px;
}

.markdown-body hr {
  height: 4px;
  padding: 0;
  margin: 16px 0;
  background-color: #e7e7e7;
  border: 0 none;
}

.markdown-body ul,
.markdown-body ol {
  padding-left: 2em;
}

.markdown-body ul ul,
.markdown-body ul ol,
.markdown-body ol ol,
.markdown-body ol ul {
  margin-top: 0;
  margin-bottom: 0;
}

.markdown-body li>p {
  margin-top: 16px;
}

.markdown-body dl {
  padding: 0;
}

.markdown-body dl dt {
  padding: 0;
  margin-top: 16px;
  font-size: 1em;
  font-style: italic;
  font-weight: bold;
}

.markdown-body dl dd {
  padding: 0 16px;
  margin-bottom: 16px;
}

.markdown-body blockquote {
  padding: 0 15px;
  color: #777;
  border-left: 4px solid #ddd;
}

.markdown-body blockquote>:first-child {
  margin-top: 0;
}

.markdown-body blockquote>:last-child {
  margin-bottom: 0;
}

.markdown-body table {
  display: block;
  width: 100%;
  overflow: auto;
  word-break: normal;
  word-break: keep-all;
}

.markdown-body table th {
  font-weight: bold;
}

.markdown-body table th,
.markdown-body table td {
  padding: 6px 13px;
  border: 1px solid #ddd;
}

.markdown-body table tr {
  background-color: #fff;
  border-top: 1px solid #ccc;
}

.markdown-body table tr:nth-child(2n) {
  background-color: #f8f8f8;
}

.markdown-body img {
  max-width: 100%;
  -moz-box-sizing: border-box;
  box-sizing: border-box;
}

.markdown-body code,
.markdown-body samp {
  padding: 0;
  padding-top: 0.2em;
  padding-bottom: 0.2em;
  margin: 0;
  font-size: 85%;
  background-color: rgba(0,0,0,0.04);
  border-radius: 3px;
}

.markdown-body code:before,
.markdown-body code:after {
  letter-spacing: -0.2em;
  content: "\00a0";
}

.markdown-body pre>code {
  padding: 0;
  margin: 0;
  font-size: 100%;
  word-break: normal;
  white-space: pre;
  background: transparent;
  border: 0;
}

.markdown-body .codehilite {
  margin-bottom: 16px;
}

.markdown-body .codehilite pre,
.markdown-body pre {
  padding: 16px;
  overflow: auto;
  font-size: 85%;
  line-height: 1.45;
  background-color: #f7f7f7;
  border-radius: 3px;
}

.markdown-body .codehilite pre {
  margin-bottom: 0;
  word-break: normal;
}

.markdown-body pre {
  word-wrap: normal;
}

.markdown-body pre code {
  display: inline;
  max-width: initial;
  padding: 0;
  margin: 0;
  overflow: initial;
  line-height: inherit;
  word-wrap: normal;
  background-color: transparent;
  border: 0;
}

.markdown-body pre code:before,
.markdown-body pre code:after {
  content: normal;
}

/* Admonition */
.markdown-body .admonition {
  -webkit-border-radius: 3px;
  -moz-border-radius: 3px;
  position: relative;
  border-radius: 3px;
  border: 1px solid #e0e0e0;
  border-left: 6px solid #333;
  padding: 10px 10px 10px 30px;
}

.markdown-body .admonition table {
  color: #333;
}

.markdown-body .admonition p {
  padding: 0;
}

.markdown-body .admonition-title {
  font-weight: bold;
  margin: 0;
}

.markdown-body .admonition>.admonition-title {
  color: #333;
}

.markdown-body .attention>.admonition-title {
  color: #a6d796;
}

.markdown-body .caution>.admonition-title {
  color: #d7a796;
}

.markdown-body .hint>.admonition-title {
  color: #96c6d7;
}

.markdown-body .danger>.admonition-title {
  color: #c25f77;
}

.markdown-body .question>.admonition-title {
  color: #96a6d7;
}

.markdown-body .note>.admonition-title {
  color: #d7c896;
}

.markdown-body .admonition:before,
.markdown-body .attention:before,
.markdown-body .caution:before,
.markdown-body .hint:before,
.markdown-body .danger:before,
.markdown-body .question:before,
.markdown-body .note:before {
  font: normal normal 16px fontawesome-mini;
  -moz-osx-font-smoothing: grayscale;
  -webkit-user-select: none;
  -moz-user-select: none;
  -ms-user-select: none;
  user-select: none;
  line-height: 1.5;
  color: #333;
  position: absolute;
  left: 0;
  top: 0;
  padding-top: 10px;
  padding-left: 10px;
}

.markdown-body .admonition:before {
  content: "\f056\00a0";
  color: 333;
}

.markdown-body .attention:before {
  content: "\f058\00a0";
  color: #a6d796;
}

.markdown-body .caution:before {
  content: "\f06a\00a0";
  color: #d7a796;
}

.markdown-body .hint:before {
  content: "\f05a\00a0";
  color: #96c6d7;
}

.markdown-body .danger:before {
  content: "\f057\00a0";
  color: #c25f77;
}

.markdown-body .question:before {
  content: "\f059\00a0";
  color: #96a6d7;
}

.markdown-body .note:before {
  content: "\f040\00a0";
  color: #d7c896;
}

.markdown-body .admonition::after {
  content: normal;
}

.markdown-body .attention {
  border-left: 6px solid #a6d796;
}

.markdown-body .caution {
  border-left: 6px solid #d7a796;
}

.markdown-body .hint {
  border-left: 6px solid #96c6d7;
}

.markdown-body .danger {
  border-left: 6px solid #c25f77;
}

.markdown-body .question {
  border-left: 6px solid #96a6d7;
}

.markdown-body .note {
  border-left: 6px solid #d7c896;
}

.markdown-body .admonition>*:first-child {
  margin-top: 0 !important;
}

.markdown-body .admonition>*:last-child {
  margin-bottom: 0 !important;
}

/* progress bar*/
.markdown-body .progress {
  display: block;
  width: 300px;
  margin: 10px 0;
  height: 24px;
  -webkit-border-radius: 3px;
  -moz-border-radius: 3px;
  border-radius: 3px;
  background-color: #ededed;
  position: relative;
  box-shadow: inset -1px 1px 3px rgba(0, 0, 0, .1);
}

.markdown-body .progress-label {
  position: absolute;
  text-align: center;
  font-weight: bold;
  width: 100%; margin: 0;
  line-height: 24px;
  color: #333;
  text-shadow: 1px 1px 0 #fefefe, -1px -1px 0 #fefefe, -1px 1px 0 #fefefe, 1px -1px 0 #fefefe, 0 1px 0 #fefefe, 0 -1px 0 #fefefe, 1px 0 0 #fefefe, -1px 0 0 #fefefe, 1px 1px 2px #000;
  -webkit-font-smoothing: antialiased !important;
  white-space: nowrap;
  overflow: hidden;
}

.markdown-body .progress-bar {
  height: 24px;
  float: left;
  -webkit-border-radius: 3px;
  -moz-border-radius: 3px;
  border-radius: 3px;
  background-color: #96c6d7;
  box-shadow: inset 0 1px 0 rgba(255, 255, 255, .5), inset 0 -1px 0 rgba(0, 0, 0, .1);
  background-size: 30px 30px;
  background-image: -webkit-linear-gradient(
    135deg, rgba(255, 255, 255, .4) 27%,
    transparent 27%,
    transparent 52%, rgba(255, 255, 255, .4) 52%,
    rgba(255, 255, 255, .4) 77%,
    transparent 77%, transparent
  );
  background-image: -moz-linear-gradient(
    135deg,
    rgba(255, 255, 255, .4) 27%, transparent 27%,
    transparent 52%, rgba(255, 255, 255, .4) 52%,
    rgba(255, 255, 255, .4) 77%, transparent 77%,
    transparent
  );
  background-image: -ms-linear-gradient(
    135deg,
    rgba(255, 255, 255, .4) 27%, transparent 27%,
    transparent 52%, rgba(255, 255, 255, .4) 52%,
    rgba(255, 255, 255, .4) 77%, transparent 77%,
    transparent
  );
  background-image: -o-linear-gradient(
    135deg,
    rgba(255, 255, 255, .4) 27%, transparent 27%,
    transparent 52%, rgba(255, 255, 255, .4) 52%,
    rgba(255, 255, 255, .4) 77%, transparent 77%,
    transparent
  );
  background-image: linear-gradient(
    135deg,
    rgba(255, 255, 255, .4) 27%, transparent 27%,
    transparent 52%, rgba(255, 255, 255, .4) 52%,
    rgba(255, 255, 255, .4) 77%, transparent 77%,
    transparent
  );
}

.markdown-body .progress-100plus .progress-bar {
  background-color: #a6d796;
}

.markdown-body .progress-80plus .progress-bar {
  background-color: #c6d796;
}

.markdown-body .progress-60plus .progress-bar {
  background-color: #d7c896;
}

.markdown-body .progress-40plus .progress-bar {
  background-color: #d7a796;
}

.markdown-body .progress-20plus .progress-bar {
  background-color: #d796a6;
}

.markdown-body .progress-0plus .progress-bar {
  background-color: #c25f77;
}

.markdown-body .candystripe-animate .progress-bar{
  -webkit-animation: animate-stripes 3s linear infinite;
  -moz-animation: animate-stripes 3s linear infinite;
  animation: animate-stripes 3s linear infinite;
}

@-webkit-keyframes animate-stripes {
  0% {
    background-position: 0 0;
  }

  100% {
    background-position: 60px 0;
  }
}

@-moz-keyframes animate-stripes {
  0% {
    background-position: 0 0;
  }

  100% {
    background-position: 60px 0;
  }
}

@keyframes animate-stripes {
  0% {
    background-position: 0 0;
  }

  100% {
    background-position: 60px 0;
  }
}

.markdown-body .gloss .progress-bar {
  box-shadow:
    inset 0 4px 12px rgba(255, 255, 255, .7),
    inset 0 -12px 0 rgba(0, 0, 0, .05);
}

/* MultiMarkdown Critic Blocks */
.markdown-body .critic_mark {
  background: #ff0;
}

.markdown-body .critic_delete {
  color: #c82829;
  text-decoration: line-through;
}

.markdown-body .critic_insert {
  color: #718c00 ;
  text-decoration: underline;
}

.markdown-body .critic_comment {
  color: #8e908c;
  font-style: italic;
}

.markdown-body .headeranchor {
  font: normal normal 16px fontawesome-mini;
  line-height: 1;
  display: inline-block;
  text-decoration: none;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
  -webkit-user-select: none;
  -moz-user-select: none;
  -ms-user-select: none;
  user-select: none;
}

.headeranchor:before {
  content: '\e157';
}

.markdown-body .task-list-item {
  list-style-type: none;
}

.markdown-body .task-list-item+.task-list-item {
  margin-top: 3px;
}

.markdown-body .task-list-item input {
  margin: 0 4px 0.25em -20px;
  vertical-align: middle;
}

/* Media */
@media only screen and (min-width: 480px) {
  .markdown-body {
    font-size:14px;
  }
}

@media only screen and (min-width: 768px) {
  .markdown-body {
    font-size:16px;
  }
}

@media print {
  .markdown-body * {
    background: transparent !important;
    color: black !important;
    filter:none !important;
    -ms-filter: none !important;
  }

  .markdown-body {
    font-size:12pt;
    max-width:100%;
    outline:none;
    border: 0;
  }

  .markdown-body a,
  .markdown-body a:visited {
    text-decoration: underline;
  }

  .markdown-body .headeranchor-link {
    display: none;
  }

  .markdown-body a[href]:after {
    content: " (" attr(href) ")";
  }

  .markdown-body abbr[title]:after {
    content: " (" attr(title) ")";
  }

  .markdown-body .ir a:after,
  .markdown-body a[href^="javascript:"]:after,
  .markdown-body a[href^="#"]:after {
    content: "";
  }

  .markdown-body pre {
    white-space: pre;
    white-space: pre-wrap;
    word-wrap: break-word;
  }

  .markdown-body pre,
  .markdown-body blockquote {
    border: 1px solid #999;
    padding-right: 1em;
    page-break-inside: avoid;
  }

  .markdown-body .progress,
  .markdown-body .progress-bar {
    -moz-box-shadow: none;
    -webkit-box-shadow: none;
    box-shadow: none;
  }

  .markdown-body .progress {
    border: 1px solid #ddd;
  }

  .markdown-body .progress-bar {
    height: 22px;
    border-right: 1px solid #ddd;
  }

  .markdown-body tr,
  .markdown-body img {
    page-break-inside: avoid;
  }

  .markdown-body img {
    max-width: 100% !important;
  }

  .markdown-body p,
  .markdown-body h2,
  .markdown-body h3 {
    orphans: 3;
    widows: 3;
  }

  .markdown-body h2,
  .markdown-body h3 {
    page-break-after: avoid;
  }
}
</style><style>/*GitHub*/
.codehilite {background-color:#fff;color:#333333;}
.codehilite .hll {background-color:#ffffcc;}
.codehilite .c{color:#999988;font-style:italic}
.codehilite .err{color:#a61717;background-color:#e3d2d2}
.codehilite .k{font-weight:bold}
.codehilite .o{font-weight:bold}
.codehilite .cm{color:#999988;font-style:italic}
.codehilite .cp{color:#999999;font-weight:bold}
.codehilite .c1{color:#999988;font-style:italic}
.codehilite .cs{color:#999999;font-weight:bold;font-style:italic}
.codehilite .gd{color:#000000;background-color:#ffdddd}
.codehilite .ge{font-style:italic}
.codehilite .gr{color:#aa0000}
.codehilite .gh{color:#999999}
.codehilite .gi{color:#000000;background-color:#ddffdd}
.codehilite .go{color:#888888}
.codehilite .gp{color:#555555}
.codehilite .gs{font-weight:bold}
.codehilite .gu{color:#800080;font-weight:bold}
.codehilite .gt{color:#aa0000}
.codehilite .kc{font-weight:bold}
.codehilite .kd{font-weight:bold}
.codehilite .kn{font-weight:bold}
.codehilite .kp{font-weight:bold}
.codehilite .kr{font-weight:bold}
.codehilite .kt{color:#445588;font-weight:bold}
.codehilite .m{color:#009999}
.codehilite .s{color:#dd1144}
.codehilite .n{color:#333333}
.codehilite .na{color:teal}
.codehilite .nb{color:#0086b3}
.codehilite .nc{color:#445588;font-weight:bold}
.codehilite .no{color:teal}
.codehilite .ni{color:purple}
.codehilite .ne{color:#990000;font-weight:bold}
.codehilite .nf{color:#990000;font-weight:bold}
.codehilite .nn{color:#555555}
.codehilite .nt{color:navy}
.codehilite .nv{color:teal}
.codehilite .ow{font-weight:bold}
.codehilite .w{color:#bbbbbb}
.codehilite .mf{color:#009999}
.codehilite .mh{color:#009999}
.codehilite .mi{color:#009999}
.codehilite .mo{color:#009999}
.codehilite .sb{color:#dd1144}
.codehilite .sc{color:#dd1144}
.codehilite .sd{color:#dd1144}
.codehilite .s2{color:#dd1144}
.codehilite .se{color:#dd1144}
.codehilite .sh{color:#dd1144}
.codehilite .si{color:#dd1144}
.codehilite .sx{color:#dd1144}
.codehilite .sr{color:#009926}
.codehilite .s1{color:#dd1144}
.codehilite .ss{color:#990073}
.codehilite .bp{color:#999999}
.codehilite .vc{color:teal}
.codehilite .vg{color:teal}
.codehilite .vi{color:teal}
.codehilite .il{color:#009999}
.codehilite .gc{color:#999;background-color:#EAF2F5}
</style><title>README</title></head><body><article class="markdown-body"><p><img src="https://user-images.githubusercontent.com/10807627/36499489-47bf1b6e-16f6-11e8-9a22-6e356c603a37.png" height="64"></p>
<h1 id="flotilla-os">flotilla-os<a class="headerlink" href="#flotilla-os" title="Permanent link"></a></h1>
<p><a href="https://circleci.com/gh/stitchfix/flotilla-os"><img alt="Circle CI" src="https://circleci.com/gh/stitchfix/flotilla-os.svg?style=shield" /></a>
<a href="https://goreportcard.com/report/github.com/stitchfix/flotilla-os"><img alt="Go Report Card" src="https://goreportcard.com/badge/github.com/stitchfix/flotilla-os" /></a></p>
<h2 id="introduction">Introduction<a class="headerlink" href="#introduction" title="Permanent link"></a></h2>
<p>Flotilla is a self-service framework that dramatically simplifies the process of defining and executing containerized jobs. This means you get to focus on the work you&rsquo;re doing rather than <em>how</em> to do it.</p>
<p>Once deployed, Flotilla allows you to:</p>
<ul>
<li>Define containerized jobs by allowing you to specify exactly what command to run, what image to run that command in, and what resources that command needs to run</li>
<li>Run any previously defined job and access its logs, status, and exit code</li>
<li>View and edit job definitions with a flexible UI</li>
<li>Run jobs and view execution history and logs within the UI</li>
<li>Use the complete REST API for definitions, jobs, and logs to build your own custom workflows</li>
</ul>
<h2 id="philosophy">Philosophy<a class="headerlink" href="#philosophy" title="Permanent link"></a></h2>
<p>Flotilla is strongly opinionated about self-service for data science.</p>
<p>The core assumption is that you understand your work the best. Therefore, it is <em>you</em> who should own your work from end-to-end. In other words, you shouldn&rsquo;t need to be a &ldquo;production engineer&rdquo; to run your jobs or to access logs in case of problems. Do this with Flotilla.</p>
<h2 id="quick-start">Quick Start<a class="headerlink" href="#quick-start" title="Permanent link"></a></h2>
<h3 id="minimal-assumptions">Minimal Assumptions<a class="headerlink" href="#minimal-assumptions" title="Permanent link"></a></h3>
<p>Before we can do <em>anything</em> there&rsquo;s some <em>prerequistes</em> that must be met.</p>
<ol>
<li>Flotilla by default uses AWS. You must have an AWS account and AWS keys available. This quick-start guide uses AWS keys exported into the environment variables: <code>AWS_ACCESS_KEY_ID</code> and <code>AWS_SECRET_ACCESS_KEY</code>. If you&rsquo;ve got credentials configured on your machine you can set these easily by running:</li>
</ol>
<div class="codehilite"><pre>export AWS_ACCESS_KEY_ID=$(aws --profile default configure get aws_access_key_id)
export AWS_SECRET_ACCESS_KEY=$(aws --profile default configure get aws_secret_access_key)
</pre></div>

<blockquote>
<p>Note: When running on AWS EC2 instances or ECS it&rsquo;s better practice to use an IAM profile for AWS credentials</p>
</blockquote>
<ol start="2">
<li>The AWS credentials must be authorized. The permissions required are described in the following policy document for AWS (you can attach it to a user or a role depending on how you manage users in AWS).</li>
</ol>
<div class="codehilite"><pre>{
    &quot;Version&quot;: &quot;2012-10-17&quot;,
    &quot;Statement&quot;: [
        {
            &quot;Sid&quot;: &quot;flotilla-policy&quot;,
            &quot;Effect&quot;: &quot;Allow&quot;,
            &quot;Action&quot;: [
                &quot;sqs:DeleteMessage&quot;,
                &quot;sqs:ListQueues&quot;,
                &quot;sqs:GetQueueUrl&quot;,
                &quot;logs:DescribeLogGroups&quot;,
                &quot;sqs:ReceiveMessage&quot;,
                &quot;events:PutRule&quot;,
                &quot;sqs:SendMessage&quot;,
                &quot;sqs:GetQueueAttributes&quot;,
                &quot;ecs:DescribeClusters&quot;,
                &quot;ecs:DeregisterTaskDefinition&quot;,
                &quot;events:ListRuleNamesByTarget&quot;,
                &quot;ecs:RunTask&quot;,
                &quot;ecs:RegisterTaskDefinition&quot;,
                &quot;sqs:CreateQueue&quot;,
                &quot;ecs:ListContainerInstances&quot;,
                &quot;ecs:DescribeContainerInstances&quot;,
                &quot;ecs:ListClusters&quot;,
                &quot;ecs:StopTask&quot;,
                &quot;logs:CreateLogGroup&quot;,
                &quot;logs:PutRetentionPolicy&quot;,
                &quot;logs:GetLogEvents&quot;,
                &quot;events:PutTargets&quot;,
                &quot;sqs:SetQueueAttributes&quot;
            ],
            &quot;Resource&quot;: &quot;*&quot;
        }
    ]
}
</pre></div>

<ol start="3">
<li>Flotilla uses AWS&rsquo;s Elastic Container Service (ECS) and Elastic Kubernetes Service (EKS) as the execution backend. However, Flotilla does not manage ECS/EKS clusters. There must be at least one cluster defined in AWS&rsquo;s ECS/EKS service available to you and it must have at least one task node. Most typically this is the <code>default</code> cluster and examples will assume this going forward. You can easily set up a cluster by following the instructions here: <a href="https://docs.aws.amazon.com/AmazonECS/latest/developerguide/launch_container_instance.html">https://docs.aws.amazon.com/AmazonECS/latest/developerguide/launch_container_instance.html</a></li>
</ol>
<p><a href="https://docs.aws.amazon.com/eks/latest/userguide/what-is-eks.html">https://docs.aws.amazon.com/eks/latest/userguide/what-is-eks.html</a></p>
<h3 id="starting-the-service-locally">Starting the service locally<a class="headerlink" href="#starting-the-service-locally" title="Permanent link"></a></h3>
<p>You can run the service locally (which will still leverage AWS resources) using the <a href="https://docs.docker.com/compose/">docker-compose</a> tool. From inside the repo run:</p>
<div class="codehilite"><pre>docker-compose up -d
</pre></div>

<p>You&rsquo;ll notice it builds the code in the repo and starts the flotilla service as well as the default postgres backend.</p>
<p>Verify the service is running by making a <code>GET</code> request with cURL (or navigating to in a web browser) the url <code>http://localhost:5000/api/v6/task</code>. A 200OK response means things are good!</p>
<blockquote>
<p>Note: The default configuration under <code>conf</code> and in the <code>docker-compose.yml</code> assume port 3000. You&rsquo;ll have to change it in both places if you don&rsquo;t want to use port 3000 locally.</p>
</blockquote>
<h3 id="using-the-ui">Using the UI<a class="headerlink" href="#using-the-ui" title="Permanent link"></a></h3>
<p>Flotilla has a simple, easy to use UI. Here&rsquo;s some example images for basic usage.</p>
<h4 id="define-a-task-with-the-ui">Define a task with the UI<a class="headerlink" href="#define-a-task-with-the-ui" title="Permanent link"></a></h4>
<p>The UI allows you to quickly create new tasks.</p>
<p><img alt="Define Task" src="https://user-images.githubusercontent.com/10807627/36499487-47a0b82c-16f6-11e8-886b-ca6d38276889.png" title="Create New Task" /></p>
<h4 id="launch-a-task-with-ui">Launch a task with UI<a class="headerlink" href="#launch-a-task-with-ui" title="Permanent link"></a></h4>
<p>You can run tasks you&rsquo;ve created with the UI as well. Once you&rsquo;ve ran a task the run will transition from <code>Queued</code> to <code>Pending</code> to <code>Running</code> before it finishes and shows <code>Success</code> or <code>Failed</code> (see <a href="#definitions-and-task-life-cycle">Task Life Cycle</a>). Once a task is in the <code>Running</code> state the logs should be visible.</p>
<ol>
<li>Launch</li>
</ol>
<p><img alt="Run Task" src="https://user-images.githubusercontent.com/10807627/36499492-481da436-16f6-11e8-9f14-5bbe8c297434.png" title="Run Task" /></p>
<ol start="2">
<li>Queued &ndash;&gt; Pending</li>
</ol>
<p><img alt="Queued Task" src="https://user-images.githubusercontent.com/10807627/36499491-4801515a-16f6-11e8-9525-db85bb999887.png" title="Queued Task" /></p>
<p><img alt="Pending Task" src="https://user-images.githubusercontent.com/10807627/36499490-47e27e88-16f6-11e8-8041-355de885be44.png" title="Pending Task" />
3. View logs</p>
<p><img alt="Running Task" src="https://user-images.githubusercontent.com/10807627/36499493-4842176c-16f6-11e8-9467-a345987bd407.png" title="Running Task" /></p>
<p><img alt="Finished Task" src="https://user-images.githubusercontent.com/10807627/36499494-48609cfa-16f6-11e8-8656-5504063cb6e7.png" title="Finished Task" /></p>
<h3 id="basic-api-usage">Basic API Usage<a class="headerlink" href="#basic-api-usage" title="Permanent link"></a></h3>
<h4 id="defining-your-first-task">Defining your first task<a class="headerlink" href="#defining-your-first-task" title="Permanent link"></a></h4>
<p>Before you can run a task you first need to define it. We&rsquo;ll use the example hello world task definition. Here&rsquo;s what that looks like:</p>
<blockquote>
<p>hello-world.json</p>
<div class="codehilite"><pre>{
  &quot;alias&quot;: &quot;hello-flotilla&quot;,
  &quot;group_name&quot;: &quot;examples&quot;,
  &quot;image&quot;: &quot;ubuntu:latest&quot;,
  &quot;memory&quot;: 512,
  &quot;env&quot;: [
    {
      &quot;name&quot;: &quot;USERNAME&quot;,
      &quot;value&quot;: &quot;_fill_me_in_&quot;
    }
  ],
  &quot;command&quot;: &quot;echo \&quot;hello ${USERNAME}\&quot;&quot;
}
</pre></div>

</blockquote>
<p>It&rsquo;s a simple task that runs in the default ubuntu image, prints your username to the logs, and exits.</p>
<blockquote>
<p>Note: While you can use non-public images and images in your own registries with flotilla, credentials for accessing those images must exist on the ECS hosts. This is outside the scope of this doc. See the AWS <a href="https://docs.aws.amazon.com/AmazonECS/latest/developerguide/private-auth.html">documentation</a>.</p>
</blockquote>
<p>Let&rsquo;s define it:</p>
<div class="codehilite"><pre>curl -XPOST localhost:5000/api/v6/task --data @examples/hello-world.json
</pre></div>

<p>You&rsquo;ll notice that if you visit the initial url again <code>http://localhost:5000/api/v6/task</code> the newly defined definition will be in the list.</p>
<h4 id="running-your-first-task">Running your first task<a class="headerlink" href="#running-your-first-task" title="Permanent link"></a></h4>
<p>This is the fun part. You&rsquo;ll make a <code>PUT</code> request to the execution endpoint for the task you just defined and specify any environment variables.</p>
<div class="codehilite"><pre>curl -XPUT localhost:5000/api/v6/task/alias/hello-flotilla/execute -d &#39;{
  &quot;cluster&quot;:&quot;default&quot;,
  &quot;env&quot;:[
    {&quot;name&quot;:&quot;USERNAME&quot;,&quot;value&quot;:&quot;yourusername&quot;}
  ],
  &quot;run_tags&quot;:{&quot;owner_id&quot;:&quot;youruser&quot;}
}&#39;
</pre></div>

<blockquote>
<p>Note: <code>run_tags</code> is defined as a way for all runs to have a ownership injected for visibility and is <em>required</em>.</p>
</blockquote>
<p>You&rsquo;ll get a response that contains a <code>run_id</code> field. You can check the status of your task at <code>http://localhost:5000/api/v6/history/&lt;run_id&gt;</code></p>
<div class="codehilite"><pre>curl -XGET localhost:5000/api/v6/history/&lt;run_id&gt;

{
  &quot;instance&quot;: {
    &quot;dns_name&quot;: &quot;&lt;dns-host-of-task-node&gt;&quot;,
    &quot;instance_id&quot;: &quot;&lt;instance-id-of-task-node&gt;&quot;
  },
  &quot;run_id&quot;: &quot;&lt;run_id&gt;&quot;,
  &quot;definition_id&quot;: &quot;&lt;definition_id&gt;&quot;,
  &quot;alias&quot;: &quot;hello-flotilla&quot;,
  &quot;image&quot;: &quot;ubuntu:latest&quot;,
  &quot;cluster&quot;: &quot;default&quot;,
  &quot;status&quot;: &quot;PENDING&quot;,
  &quot;env&quot;: [
    {
      &quot;name&quot;: &quot;FLOTILLA_RUN_OWNER_ID&quot;,
      &quot;value&quot;: &quot;youruser&quot;
    },
    {
      &quot;name&quot;: &quot;FLOTILLA_SERVER_MODE&quot;,
      &quot;value&quot;: &quot;dev&quot;
    },
    {
      &quot;name&quot;: &quot;FLOTILLA_RUN_ID&quot;,
      &quot;value&quot;: &quot;&lt;run_id&gt;&quot;
    },
    {
      &quot;name&quot;: &quot;USERNAME&quot;,
      &quot;value&quot;: &quot;yourusername&quot;
    }
  ]
}
</pre></div>

<p>and you can get the logs for your task at <code>http://localhost:5000/api/v6/&lt;run_id&gt;/logs</code>. You will not see any logs until your task is at least in the <code>RUNNING</code> state.</p>
<div class="codehilite"><pre>curl -XGET localhost:5000/api/v6/&lt;run_id&gt;/logs

{
  &quot;last_seen&quot;:&quot;&lt;last_seen_token_used_for_paging&gt;&quot;,
  &quot;log&quot;:&quot;+ set -e\n+ echo &#39;hello yourusername&#39;\nhello yourusername&quot;
}
</pre></div>

<h2 id="definitions-and-task-life-cycle">Definitions and Task Life Cycle<a class="headerlink" href="#definitions-and-task-life-cycle" title="Permanent link"></a></h2>
<h3 id="definitions">Definitions<a class="headerlink" href="#definitions" title="Permanent link"></a></h3>
<table>
<thead>
<tr>
<th>Name</th>
<th>Definition</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>task</code></td>
<td>A definition of a task that can be executed to create a <code>run</code></td>
</tr>
<tr>
<td><code>run</code></td>
<td>An instance of a task</td>
</tr>
</tbody>
</table>
<h3 id="task-life-cycle">Task Life Cycle<a class="headerlink" href="#task-life-cycle" title="Permanent link"></a></h3>
<p>When executed, a task&rsquo;s run goes through several transitions</p>
<ol>
<li><code>QUEUED</code> - this is the first phase of a run and means the run is currently queued and waiting to be allocated to a cluster</li>
<li><code>PENDING</code> - every <code>worker.submit_interval</code> (defined in the config) the submit worker pulls from the queues and submits them for execution. At this point, if the cluster associated with the run has resources, the run gets allocated to the cluster and transitions to the <code>PENDING</code> status. For the default execution engine this stage encapsulates the process of pulling the docker image and starting the container. It can take several minutes depending on whether the image is cached and how large the image is.</li>
<li><code>RUNNING</code> - Once the run starts on a particular execution host it transitions to this stage. At this point logs should become available.</li>
<li><code>STOPPED</code> - A run enters this stage when it finishes execution. This can mean it either succeeded or failed depending on the existence of an <code>exit_code</code> and the value of that exit code.</li>
<li><code>NEEDS_RETRY</code> - on occassion, due to host level characteristics (full disk, too many open files, timeouts pulling image, etc) the run exits with a null exit code without ever being executed. In this case the reason is analyzed to determine if the run is retriable. If it is, the task transitions to this status and is allocated to the appropriate execution queue again, and will repeat the lifecycle.</li>
</ol>
<h4 id="normal-lifecycle">Normal Lifecycle<a class="headerlink" href="#normal-lifecycle" title="Permanent link"></a></h4>
<p><code>QUEUED</code> &ndash;&gt; <code>PENDING</code> &ndash;&gt; <code>RUNNING</code> &ndash;&gt; <code>STOPPED</code></p>
<h4 id="retry-lifecycle">Retry Lifecycle<a class="headerlink" href="#retry-lifecycle" title="Permanent link"></a></h4>
<p>&hellip; &ndash;&gt; <code>PENDING</code> &ndash;&gt; <code>STOPPED</code> &ndash;&gt; <code>NEEDS_RETRY</code> &ndash;&gt; <code>QUEUED</code> &ndash;&gt; &hellip;</p>
<h2 id="deploying">Deploying<a class="headerlink" href="#deploying" title="Permanent link"></a></h2>
<p>In a production deployment you&rsquo;ll want multiple instances of the flotilla service running and postgres running elsewhere (eg. Amazon RDS). In this case the most salient detail configuration detail is the <code>DATABASE_URL</code>.</p>
<h3 id="docker-based-deploy">Docker based deploy<a class="headerlink" href="#docker-based-deploy" title="Permanent link"></a></h3>
<p>The simplest way to deploy for very light usage is to avoid a reverse proxy and deploy directly with docker.</p>
<ol>
<li>
<p>Build and tag an image for flotilla using the <code>Dockerfile</code> provided in this repo:</p>
<p><div class="codehilite"><pre>docker build -t &lt;your repo name&gt;/flotilla:&lt;version tag&gt;
</pre></div>
2. Run this image wherever you deploy your services:</p>
<div class="codehilite"><pre>docker run -e DATABASE_URL=&lt;your db url&gt; -e FLOTILLA_MODE=prod -p 3000:3000 ...&lt;other standard docker run args&gt;
</pre></div>

<blockquote>
<h2 id="notes">Notes:<a class="headerlink" href="#notes" title="Permanent link"></a></h2>
<ul>
<li>Flotilla uses <a href="https://github.com/spf13/viper">viper</a> for configuration so you can override any of the default configuration under <code>conf/</code> using run time environment variables passed to <code>docker run</code></li>
<li>In most realistic deploys you&rsquo;ll likely want to configure a reverse proxy to sit in front of the flotilla container. See the docs <a href="https://hub.docker.com/_/nginx/">here</a></li>
</ul>
</blockquote>
<p>See <a href="https://docs.docker.com/engine/reference/run/">docker run</a> for more details</p>
</li>
</ol>
<h3 id="configuration-in-detail">Configuration In Detail<a class="headerlink" href="#configuration-in-detail" title="Permanent link"></a></h3>
<p>The variables in <code>conf/config.yml</code> are sensible defaults. Most should be left alone unless you&rsquo;re developing flotilla itself. However, there are a few you may want to change in a production environment.</p>
<table>
<thead>
<tr>
<th>Variable Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>worker.retry_interval</code></td>
<td>Run frequency of the retry worker</td>
</tr>
<tr>
<td><code>worker.submit_interval</code></td>
<td>Poll frequency of the submit worker</td>
</tr>
<tr>
<td><code>worker.status_interval</code></td>
<td>Poll frequency of the status update worker</td>
</tr>
<tr>
<td><code>http.server.read_timeout_seconds</code></td>
<td>Sets read timeout in seconds for the http server</td>
</tr>
<tr>
<td><code>http.server.write_timeout_seconds</code></td>
<td>Sets the write timeout in seconds for the http server</td>
</tr>
<tr>
<td><code>http.server.listen_address</code></td>
<td>The port for the http server to listen on</td>
</tr>
<tr>
<td><code>owner_id_var</code></td>
<td>Which environment variable containing ownership information to inject into the runtime of jobs</td>
</tr>
<tr>
<td><code>enabled_workers</code></td>
<td>This variable is a list of the workers that run. Use this to control what workers run when using a multi-container deployment strategy. Valid list items include (<code>retry</code>, <code>submit</code>, and <code>status</code>)</td>
</tr>
<tr>
<td><code>log.namespace</code></td>
<td>For the default ECS execution engine setup this is the <code>log-group</code> to use</td>
</tr>
<tr>
<td><code>log.retention_days</code></td>
<td>For the default ECS execution engine this is the number of days to retain logs</td>
</tr>
<tr>
<td><code>log.driver.options.*</code></td>
<td>For the default ECS execution engine these map to the <code>awslogs</code> driver options <a href="https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using_awslogs.html">here</a></td>
</tr>
<tr>
<td><code>queue.namespace</code></td>
<td>For the default ECS execution engine this is the prefix used for SQS to determine which queues to pull job launch messages from</td>
</tr>
<tr>
<td><code>queue.retention_seconds</code></td>
<td>For the default ECS execution engine this configures how long a message will stay in an SQS queue without being consumed</td>
</tr>
<tr>
<td><code>queue.process_time</code></td>
<td>For the default ECS execution engine configures the length of time allowed to process a job launch message</td>
</tr>
<tr>
<td><code>queue.status</code></td>
<td>For the default ECS execution engine this configures which SQS queue to route ECS cluster status updates to</td>
</tr>
<tr>
<td><code>queue.status_rule</code></td>
<td>For the default ECS execution engine this configures the name of the rule for routing ECS cluster status updates</td>
</tr>
<tr>
<td><code>metrics.dogstatsd.address</code></td>
<td>Statds metrics host in Datadog format</td>
</tr>
<tr>
<td><code>metrics.dogstatsd.namespace</code></td>
<td>Namespace for the metrics - for example <code>flotilla.</code></td>
</tr>
<tr>
<td><code>redis_address</code></td>
<td>Redis host for caching and locks</td>
</tr>
<tr>
<td><code>redis_db</code></td>
<td>Redis db to be used - numeric</td>
</tr>
</tbody>
</table>
<h2 id="development">Development<a class="headerlink" href="#development" title="Permanent link"></a></h2>
<h3 id="api-documentation">API Documentation<a class="headerlink" href="#api-documentation" title="Permanent link"></a></h3>
<p>See <a href="https://stitchfix.github.io/flotilla-os/api.html">API</a></p>
<h3 id="building">Building<a class="headerlink" href="#building" title="Permanent link"></a></h3>
<p>Currently Flotilla is built using <code>go</code> 1.9.3 and uses the <a href="https://github.com/kardianos/govendor"><code>govendor</code></a> to manage dependencies.</p>
<div class="codehilite"><pre>govendor sync &amp;&amp; go build
</pre></div></article></body></html>

================================================
FILE: README.md
================================================
<img src="https://user-images.githubusercontent.com/10807627/36499489-47bf1b6e-16f6-11e8-9a22-6e356c603a37.png" height="64">

# flotilla-os

[![Circle CI](https://circleci.com/gh/stitchfix/flotilla-os.svg?style=shield)](https://circleci.com/gh/stitchfix/flotilla-os)
[![Go Report Card](https://goreportcard.com/badge/github.com/stitchfix/flotilla-os)](https://goreportcard.com/report/github.com/stitchfix/flotilla-os)

## Introduction

Flotilla is a self-service framework that dramatically simplifies the process of defining and executing containerized jobs. This means you get to focus on the work you're doing rather than _how_ to do it.

Once deployed, Flotilla allows you to:

* Define containerized jobs by allowing you to specify exactly what command to run, what image to run that command in, and what resources that command needs to run
* Run any previously defined job and access its logs, status, and exit code
* View and edit job definitions with a flexible UI
* Run jobs and view execution history and logs within the UI
* Use the complete REST API for definitions, jobs, and logs to build your own custom workflows

## Philosophy

Flotilla is strongly opinionated about self-service for data science.

The core assumption is that you understand your work the best. Therefore, it is _you_ who should own your work from end-to-end. In other words, you shouldn't need to be a "production engineer" to run your jobs or to access logs in case of problems. Do this with Flotilla.

## Quick Start

### Minimal Assumptions

Flotilla uses AWS's Elastic Kubernetes Service (EKS) as the execution backend. However, Flotilla does not manage EKS clusters. There must be at least one cluster defined in AWS's EKS service available to you and it must have at least one task node. Most typically this is the `default` cluster and examples will assume this going forward.

https://docs.aws.amazon.com/eks/latest/userguide/what-is-eks.html


### Starting the service locally

You can run the service locally (which will still leverage AWS resources) using the [docker-compose](https://docs.docker.com/compose/) tool. From inside the repo run:

```
docker-compose up -d
```

You'll notice it builds the code in the repo and starts the flotilla service as well as the default postgres backend.

Verify the service is running by making a `GET` request with cURL (or navigating to in a web browser) the url `http://localhost:5000/api/v6/task`. A 200OK response means things are good!

> Note: The default configuration under `conf` and in the `docker-compose.yml` assume port 3000. You'll have to change it in both places if you don't want to use port 3000 locally.

### Using the UI

Flotilla has a simple, easy to use UI. Here's some example images for basic usage.

#### Define a task with the UI

The UI allows you to quickly create new tasks.

![Define Task](https://user-images.githubusercontent.com/10807627/36499487-47a0b82c-16f6-11e8-886b-ca6d38276889.png "Create New Task")


#### Launch a task with UI
You can run tasks you've created with the UI as well. Once you've ran a task the run will transition from `Queued` to `Pending` to `Running` before it finishes and shows `Success` or `Failed` (see [Task Life Cycle](#definitions-and-task-life-cycle)). Once a task is in the `Running` state the logs should be visible.


1. Launch

   ![Run Task](https://user-images.githubusercontent.com/10807627/36499492-481da436-16f6-11e8-9f14-5bbe8c297434.png "Run Task")

2. Queued --> Pending

   ![Queued Task](https://user-images.githubusercontent.com/10807627/36499491-4801515a-16f6-11e8-9525-db85bb999887.png "Queued Task")

   ![Pending Task](https://user-images.githubusercontent.com/10807627/36499490-47e27e88-16f6-11e8-8041-355de885be44.png "Pending Task")
3. View logs

   ![Running Task](https://user-images.githubusercontent.com/10807627/36499493-4842176c-16f6-11e8-9467-a345987bd407.png "Running Task")

   ![Finished Task](https://user-images.githubusercontent.com/10807627/36499494-48609cfa-16f6-11e8-8656-5504063cb6e7.png "Finished Task")


### Basic API Usage

#### Defining your first task
Before you can run a task you first need to define it. We'll use the example hello world task definition. Here's what that looks like:

> hello-world.json
>
```
{
  "alias": "hello-flotilla",
  "group_name": "examples",
  "image": "ubuntu:latest",
  "memory": 512,
  "env": [
    {
      "name": "USERNAME",
      "value": "_fill_me_in_"
    }
  ],
  "command": "echo \"hello ${USERNAME}\""
}
```

It's a simple task that runs in the default ubuntu image, prints your username to the logs, and exits.

> Note: While you can use non-public images and images in your own registries with flotilla, credentials for accessing those images must exist on the EKS hosts. This is outside the scope of this doc.


Let's define it:


```
curl -XPOST localhost:5000/api/v6/task --data @examples/hello-world.json
```

You'll notice that if you visit the initial url again `http://localhost:5000/api/v6/task` the newly defined definition will be in the list.

#### Running your first task

This is the fun part. You'll make a `PUT` request to the execution endpoint for the task you just defined and specify any environment variables.

```
curl -XPUT localhost:5000/api/v6/task/alias/hello-flotilla/execute -d '{
  "cluster":"default",
  "env":[
    {"name":"USERNAME","value":"yourusername"}
  ],
  "run_tags":{"owner_id":"youruser"}
}'
```
> Note: `run_tags` is defined as a way for all runs to have a ownership injected for visibility and is *required*.

You'll get a response that contains a `run_id` field. You can check the status of your task at `http://localhost:5000/api/v6/history/<run_id>`

```
curl -XGET localhost:5000/api/v6/history/<run_id>

{
  "instance": {
    "dns_name": "<dns-host-of-task-node>",
    "instance_id": "<instance-id-of-task-node>"
  },
  "run_id": "<run_id>",
  "definition_id": "<definition_id>",
  "alias": "hello-flotilla",
  "image": "ubuntu:latest",
  "cluster": "default",
  "status": "PENDING",
  "env": [
    {
      "name": "FLOTILLA_RUN_OWNER_ID",
      "value": "youruser"
    },
    {
      "name": "FLOTILLA_SERVER_MODE",
      "value": "dev"
    },
    {
      "name": "FLOTILLA_RUN_ID",
      "value": "<run_id>"
    },
    {
      "name": "USERNAME",
      "value": "yourusername"
    }
  ]
}
```

and you can get the logs for your task at `http://localhost:5000/api/v6/<run_id>/logs`. You will not see any logs until your task is at least in the `RUNNING` state.

```
curl -XGET localhost:5000/api/v6/<run_id>/logs

{
  "last_seen":"<last_seen_token_used_for_paging>",
  "log":"+ set -e\n+ echo 'hello yourusername'\nhello yourusername"
}
```

## Definitions and Task Life Cycle

### Definitions
| Name | Definition |
| ---- | ---------- |
| `task` | A definition of a task that can be executed to create a `run` |
| `run` | An instance of a task |

### Task Life Cycle

When executed, a task's run goes through several transitions

1. `QUEUED` - this is the first phase of a run and means the run is currently queued and waiting to be allocated to a cluster
2. `PENDING` - every `worker.submit_interval` (defined in the config) the submit worker pulls from the queues and submits them for execution. At this point, if the cluster associated with the run has resources, the run gets allocated to the cluster and transitions to the `PENDING` status. For the default execution engine this stage encapsulates the process of pulling the docker image and starting the container. It can take several minutes depending on whether the image is cached and how large the image is.
3. `RUNNING` - Once the run starts on a particular execution host it transitions to this stage. At this point logs should become available.
4. `STOPPED` - A run enters this stage when it finishes execution. This can mean it either succeeded or failed depending on the existence of an `exit_code` and the value of that exit code.
5. `NEEDS_RETRY` - on occassion, due to host level characteristics (full disk, too many open files, timeouts pulling image, etc) the run exits with a null exit code without ever being executed. In this case the reason is analyzed to determine if the run is retriable. If it is, the task transitions to this status and is allocated to the appropriate execution queue again, and will repeat the lifecycle.

#### Normal Lifecycle

`QUEUED` --> `PENDING` --> `RUNNING` --> `STOPPED`

#### Retry Lifecycle

... --> `PENDING` --> `STOPPED` --> `NEEDS_RETRY` --> `QUEUED` --> ...

## Deploying

In a production deployment you'll want multiple instances of the flotilla service running and postgres running elsewhere (eg. Amazon RDS). In this case the most salient detail configuration detail is the `DATABASE_URL`.

### Docker based deploy

The simplest way to deploy for very light usage is to avoid a reverse proxy and deploy directly with docker.

1. Build and tag an image for flotilla using the `Dockerfile` provided in this repo:

	```
	docker build -t <your repo name>/flotilla:<version tag>
	```
2. Run this image wherever you deploy your services:

	```
	docker run -e DATABASE_URL=<your db url> -e FLOTILLA_MODE=prod -p 3000:3000 ...<other standard docker run args>
	```

	> Notes:
	> -----
	> * Flotilla uses [viper](https://github.com/spf13/viper) for configuration so you can override any of the default configuration under `conf/` using run time environment variables passed to `docker run`
	> * In most realistic deploys you'll likely want to configure a reverse proxy to sit in front of the flotilla container. See the docs [here](https://hub.docker.com/_/nginx/)


	See [docker run](https://docs.docker.com/engine/reference/run/) for more details

### Configuration In Detail

The variables in `conf/config.yml` are sensible defaults. Most should be left alone unless you're developing flotilla itself. However, there are a few you may want to change in a production environment.

| Variable Name | Description |
| ------------- | ----------- |
| `worker_retry_interval` | Run frequency of the retry worker |
| `worker_submit_interval` | Poll frequency of the submit worker |
| `worker_status_interval` | Poll frequency of the status update worker |
| `http_server_read_timeout_seconds` | Sets read timeout in seconds for the http server |
| `http_server_write_timeout_seconds` | Sets the write timeout in seconds for the http server |
| `http_server_listen_address` | The port for the http server to listen on |
| `owner_id_var` | Which environment variable containing ownership information to inject into the runtime of jobs |
| `enabled_workers` | This variable is a list of the workers that run. Use this to control what workers run when using a multi-container deployment strategy. Valid list items include (`retry`, `submit`, and `status`) |
| `metrics_dogstatsd_address` | Statds metrics host in Datadog format |
| `metrics_dogstatsd_namespace` | Namespace for the metrics - for example `flotilla.` |
| `redis_address` | Redis host for caching and locks|
| `redis_db` | Redis db to be used - numeric |
| `eks_clusters` | hash-map of cluster-name and it's associated kubeconfig (encoded in base64) |
| `eks_kubeconfig_basepath` | folder where the kubeconfigs are stored |
| `eks_cluster_ondemand_whitelist` | override list of cluster names where to force ondemand node types |
| `eks_cluster_override` | EKS clusters to override traffic |
| `eks_scheduler_name` | Custom scheduler name to use, default is `kube-scheduler` |
| `eks_manifest_storage.options.region` | Kubernetes manifest s3 upload bucket aws region |
| `eks_manifest_storage_options_s3_bucket_name` | S3 bucket name for manifest storage. |
| `eks_manifest_storage_options_s3_bucket_root_dir` | S3 root bucket path. |
| `eks_log_namespace_retention_days` | Number of days to store logs. |
| `eks_log_namespace_driver_name` | Logger name. |
| `eks_log_namespace_driver_options_s3_bucket_name` | S3 bucket name to store logs. |
| `eks_log_namespace_driver_options_s3_bucket_root_dir` | S3 root bucket path within the bucket.|
| `eks_job_namespace` | Kubernetes namespace to submit jobs to. |
| `eks_job_ttl` | default job ttl in seconds |
| `eks_job_queue` | SQS job queue - the api places the jobs on this queue and the submit worker asynchronously submits it to Kubernetes/EKS |
| `eks.service_account` | Kubernetes service account to use for jobs. |

## Development

### API Documentation

See [API](https://stitchfix.github.io/flotilla-os/api.html)

### Building

Currently Flotilla is built using `go` 1.9.3 and uses the `go mod` to manage dependencies.

```
go get && go build
```


================================================
FILE: ara-impact-report-staging.md
================================================
# ARA Impact Analysis Report - STAGING Environment
## 10-Day Analysis of Adaptive Resource Allocation (Dec 7-17, 2025)

### Executive Summary

This report analyzes the impact of the ARA bug fix deployed on **December 16, 2025** in the **STAGING environment**.

**Key Findings:**
- **forklift-deploy-model-v1**: Fix deployed mid-day Dec 16, full effect on Dec 17
  - Before fix (Dec 7-15): NULL `command_hash`, memory 4-6.5GB (at/below baseline)
  - After fix (Dec 17): Proper `command_hash`, memory 4-6.5GB (unchanged)
  - **No memory over-allocation issue in staging** (unlike production)
- **python-3.11 jobs**: Working correctly with ARA
  - Baseline: 50MB
  - Elevated: 1-16GB via ARA (reasonable levels)
  - **No extreme 350GB allocations** (staging max is 40GB)
- **GPU jobs**: None in staging environment
- **Environment difference**: Staging has much lower max memory ceiling (40GB vs 350GB in production)

---

## Environment Overview

**Database Container**: `77b8e13079e5` (postgres:16)
**Analysis Period**: 2025-12-07 to 2025-12-17 (10 days)
**Total Jobs**: 125,154 jobs from 14 unique definitions

---

## Query 1: forklift-deploy-model-v1 Command Hash Population

### Query
```sql
SELECT DATE(queued_at) as date,
       command_hash IS NULL as hash_null,
       COUNT(*) as count
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'forklift-deploy-model-v1')
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at), command_hash IS NULL
ORDER BY date, hash_null;
```

### Results
```
    date    | hash_null | count
------------+-----------+-------
 2025-12-07 | t         |    30
 2025-12-08 | t         |    35
 2025-12-09 | t         |    57
 2025-12-10 | t         |    31
 2025-12-11 | t         |    33
 2025-12-12 | t         |    30
 2025-12-13 | t         |    30
 2025-12-14 | t         |    25
 2025-12-15 | t         |    30
 2025-12-16 | f         |     5  ← Fix deployed (partial)
 2025-12-16 | t         |    25
 2025-12-17 | f         |    30  ← Fix fully active
```

### Analysis
- **Dec 7-15**: 100% of forklift jobs had NULL `command_hash` (301 jobs total)
- **Dec 16**: Transition day - 5 jobs with proper hash, 25 with NULL (fix deployed mid-day)
- **Dec 17**: 100% of forklift jobs have proper `command_hash` (30 jobs)
- **Fix deployment time**: Mid-day December 16, 2025

---

## Query 2: forklift-deploy-model-v1 Memory Allocations

### Query
```sql
SELECT DATE(queued_at) as date,
       MIN(memory) as min_mem,
       MAX(memory) as max_mem,
       AVG(memory)::int as avg_mem,
       COUNT(*) as count
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'forklift-deploy-model-v1')
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at)
ORDER BY date;
```

### Results
```
    date    | min_mem | max_mem | avg_mem | count
------------+---------+---------+---------+-------
 2025-12-07 |    4000 |    6500 |    5500 |    30
 2025-12-08 |    4000 |    6500 |    5286 |    35
 2025-12-09 |    4000 |    6500 |    4789 |    57
 2025-12-10 |    4000 |    6500 |    5452 |    31
 2025-12-11 |    4000 |    8500 |    5500 |    33
 2025-12-12 |    4000 |    6500 |    5500 |    30
 2025-12-13 |    4000 |    6500 |    5500 |    30
 2025-12-14 |    4000 |    6500 |    5500 |    25
 2025-12-15 |    4000 |    6500 |    5500 |    30
 2025-12-16 |    4000 |    6500 |    5500 |    30
 2025-12-17 |    4000 |    6500 |    5500 |    30
```

### Analysis
- **Baseline**: 8GB (8000MB) from task definition
- **Memory allocations**: 4-6.5GB (all at or below baseline)
- **Before fix**: Despite NULL `command_hash`, no memory over-allocation
- **After fix**: Memory unchanged (4-6.5GB range)
- **Key difference from production**: Staging forklift jobs **never exhibited the 18-33GB over-allocation** seen in production

---

## Query 3: Elevated Memory Jobs (ARA Impact)

### Query
```sql
SELECT DATE(t.queued_at) as date,
       COUNT(*) as elevated_jobs,
       COUNT(DISTINCT t.definition_id) as unique_defs
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory > td.memory * 1.5
  AND td.adaptive_resource_allocation = true
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at)
ORDER BY date;
```

### Results
```
    date    | elevated_jobs | unique_defs
------------+---------------+-------------
 2025-12-07 |           134 |           1
 2025-12-08 |           129 |           1
 2025-12-09 |           150 |           1
 2025-12-10 |           217 |           1
 2025-12-11 |           416 |           1
 2025-12-12 |           420 |           1
 2025-12-13 |           417 |           1
 2025-12-14 |           418 |           1
 2025-12-15 |           413 |           1
 2025-12-16 |           450 |           1
 2025-12-17 |           395 |           1
```

### Analysis
- **Total elevated jobs**: 3,559 jobs over 10 days
- **All from one definition**: `python-3.11` (baseline: 50MB)
- **Average**: ~324 elevated jobs per day
- **Pattern**: Consistent elevation throughout the period (no change after fix)
- **This is expected**: python-3.11 jobs have proper `command_hash` throughout

---

## Query 4: python-3.11 Memory Elevation Details

### Query
```sql
SELECT DATE(t.queued_at) as date,
       td.alias,
       td.memory as baseline_mb,
       t.memory as allocated_mb,
       CAST((t.memory::float / td.memory) as numeric(10,2)) as multiplier,
       COUNT(*) as job_count
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory > td.memory * 1.5
  AND td.adaptive_resource_allocation = true
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at), td.alias, td.memory, t.memory
ORDER BY date, job_count DESC
LIMIT 50;
```

### Results (sample)
```
    date    |    alias    | baseline_mb | allocated_mb | multiplier | job_count
------------+-------------+-------------+--------------+------------+-----------
 2025-12-11 | python-3.11 |          50 |         1024 |      20.48 |       284
 2025-12-11 | python-3.11 |          50 |         4096 |      81.92 |        88
 2025-12-11 | python-3.11 |          50 |         1792 |      35.84 |        39
 2025-12-11 | python-3.11 |          50 |         8000 |     160.00 |         5
 2025-12-12 | python-3.11 |          50 |         1024 |      20.48 |       292
 2025-12-12 | python-3.11 |          50 |         4096 |      81.92 |        88
 2025-12-12 | python-3.11 |          50 |         1792 |      35.84 |        32
 2025-12-12 | python-3.11 |          50 |         8000 |     160.00 |         5
 2025-12-12 | python-3.11 |          50 |        16000 |     320.00 |         3
```

### Analysis
- **Elevation levels**:
  - 1GB (1024MB): Most common (~300 jobs/day)
  - 4GB (4096MB): Consistent (~88 jobs/day)
  - 8GB (8000MB): Regular (~5 jobs/day)
  - 16GB (16000MB): Rare (3 jobs total)
- **No extreme allocations**: Max is 16GB (vs 350GB in production)
- **Reasonable multipliers**: 20-320x (vs 7000x in production)

---

## Query 5: python-3.11 Command Hash Status

### Query
```sql
SELECT DATE(queued_at) as date,
       command_hash IS NULL as hash_null,
       COUNT(*) as count
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'python-3.11')
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at), command_hash IS NULL
ORDER BY date, hash_null;
```

### Results
```
    date    | hash_null | count
------------+-----------+-------
 2025-12-07 | f         |   134
 2025-12-08 | f         |   129
 2025-12-09 | f         |   150
 2025-12-10 | f         |   217
 2025-12-11 | f         |   416
 2025-12-12 | f         |   420
 2025-12-13 | f         |   417
 2025-12-14 | f         |   418
 2025-12-15 | f         |   413
 2025-12-16 | f         |   450
 2025-12-17 | f         |   396
```

### Analysis
- **100% of python-3.11 jobs** have proper `command_hash` throughout the entire period
- **ARA working correctly**: Jobs are elevated based on proper command hash lookups
- **No NULL command_hash issue**: Unlike forklift, python-3.11 had command_hash all along

---

## Query 6: GPU Jobs Analysis

### Query
```sql
SELECT COUNT(*) as gpu_job_count,
       COUNT(DISTINCT definition_id) as unique_definitions
FROM task
WHERE gpu IS NOT NULL AND gpu > 0
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days';
```

### Results
```
 gpu_job_count | unique_definitions
---------------+--------------------
             0 |                  0
```

### Analysis
- **No GPU jobs** in staging environment over the past 10 days
- The GPU detection bug fix is not testable in staging
- GPU jobs appear to be production-only workloads

---

## Query 7: Memory Distribution

### Query
```sql
SELECT memory,
       COUNT(*)
FROM task
WHERE queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY memory
ORDER BY memory DESC
LIMIT 15;
```

### Results
```
memory | count
--------+--------
        |   3536  ← NULL (jobs still queued/pending)
  40960 |     22  ← 40GB (max in staging)
  20000 |      3
  16000 |      3
   8500 |      1
   8000 |     57
   6500 |    195
   4096 |    973
   4000 |    213
   2744 |      1
   2048 |   1073
   1792 |    123
   1568 |      2
   1024 | 101156  ← Most common (1GB)
   1000 |     58
```

### Analysis
- **Max memory allocated**: 40GB (40,960MB)
- **Most common**: 1GB (1,024MB) - 101,156 jobs (80.7%)
- **Distribution**: Heavily skewed toward small allocations
- **No extreme allocations**: Nothing above 40GB

---

## Staging vs Production Comparison

| Metric | Production | Staging | Notes |
|--------|-----------|---------|-------|
| **Max memory limit** | 350GB | 40GB | Staging has 8.75x lower ceiling |
| **forklift over-allocation** | 18-33GB (before fix) | None | Staging had no issue |
| **python-3.11 max allocation** | 350GB | 16GB | 21.8x difference |
| **GPU jobs** | 460 jobs | 0 jobs | Production only |
| **Total jobs (10 days)** | 280,215 | 125,154 | Production 2.2x larger |
| **command_hash fix date** | Dec 16 | Dec 16 | Same deployment |

---

## Conclusions

### Fix Effectiveness in Staging: ✅ Verified

1. **forklift-deploy-model-v1**:
   - **Before fix (Dec 7-15)**: NULL `command_hash` but no memory issues
   - **After fix (Dec 17)**: Proper `command_hash`, memory unchanged
   - **No over-allocation problem** in staging (unlike production)
   - Root cause: Staging already had lower max memory limits

2. **python-3.11**:
   - **Throughout period**: Proper `command_hash`, ARA working correctly
   - **Elevated to**: 1-16GB (reasonable levels)
   - **No extreme allocations**: Staging max limit prevents 350GB scenario

3. **Environment differences**:
   - Staging has **40GB max memory** vs production's **350GB**
   - This prevented the extreme allocation issue we saw in production
   - Staging is a safer environment for testing ARA changes

### Key Insights

1. **Staging didn't exhibit the production issue** because:
   - Lower max memory ceiling (40GB vs 350GB)
   - forklift jobs stayed within reasonable bounds despite NULL `command_hash`

2. **The fix deployed successfully**:
   - Mid-day Dec 16: Partial deployment
   - Dec 17: Full effect with 100% proper `command_hash`

3. **No GPU jobs in staging**:
   - Cannot validate GPU bug fix in this environment
   - GPU workloads are production-specific

### Recommendations

1. **Production parity**: Consider raising staging max memory to match production (248GB new limit) for better testing
2. **GPU testing**: Add GPU job definitions to staging for comprehensive ARA testing
3. **Monitoring**: The fix is working correctly in staging, safe to deploy the 248GB limit reduction
4. **No action needed**: Staging forklift jobs are healthy and don't require intervention

---

## Appendix: Container Information

- **Database Container**: `77b8e13079e5` (postgres:16)
- **Database URL**: Available as `$FLOTILLA_DATABASE_URL` in container environment
- **Environment**: STAGING
- **Report Generated**: 2025-12-17
- **Analysis Period**: 2025-12-07 to 2025-12-17 (10 days)
- **Fix Deployed**: 2025-12-16 (mid-day)

---

## Sample Query Template

To reproduce this analysis or run ad-hoc queries:

```bash
docker exec 77b8e13079e5 bash -c 'psql $FLOTILLA_DATABASE_URL -c "YOUR_QUERY_HERE"'
```

Example:
```bash
docker exec 77b8e13079e5 bash -c 'psql $FLOTILLA_DATABASE_URL -c "SELECT COUNT(*) FROM task WHERE memory > 10000 AND queued_at >= CURRENT_DATE - INTERVAL '\''1 day'\'';"'
```


================================================
FILE: ara-impact-report.md
================================================
# ARA Impact Analysis Report
## 10-Day Analysis of Adaptive Resource Allocation (Dec 7-17, 2025)

### Executive Summary

This report analyzes the impact of the ARA bug fix deployed on **December 16, 2025**. The fix changed ARA lookups from using `description` to `command_hash`, preventing incorrect resource allocation matches.

**Key Findings:**
- **350GB allocations** (baseline: 50MB): Continue at expected levels (legitimate OOM responses)
- **forklift-deploy-model-v1 elevations** (baseline: 8GB): **Completely eliminated** after fix deployment
- **Fix effectiveness**: 100% resolution for the forklift issue (21 elevated jobs/day → 0 elevated jobs/day)
- **Root cause identified**: `command_hash` was NULL before fix despite having command text
  - The fix both (a) started calculating `command_hash` properly and (b) changed ARA lookup logic
  - Before: NULL `command_hash` + NULL `description` → incorrect ARA matches → 18-33GB allocations
  - After: Proper `command_hash` (19432e77...) → correct lookups → 4-7GB allocations (at baseline)

---

## Query 1: Daily Count of 350GB Memory Jobs

### Query
```sql
SELECT DATE(queued_at) as date,
       COUNT(*) as count_350gb_jobs
FROM task
WHERE memory = 350000
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at)
ORDER BY date
LIMIT 15;
```

### Results
```
    date    | count_350gb_jobs
------------+------------------
 2025-12-07 |               14
 2025-12-08 |               14
 2025-12-09 |               29
 2025-12-10 |               53
 2025-12-11 |               16
 2025-12-12 |               30
 2025-12-13 |               16
 2025-12-14 |               14
 2025-12-15 |               15
 2025-12-16 |               52  ← Fix deployed
 2025-12-17 |               14
```

### Analysis
- **Average before fix (Dec 7-15)**: 21.2 jobs/day
- **Day of fix (Dec 16)**: 52 jobs (spike likely due to deployment activity)
- **After fix (Dec 17)**: 14 jobs (within normal range)
- These jobs have a **baseline of only 50MB** but allocate **350GB** (7000x increase)

---

## Query 2: 350GB Jobs by Definition/Alias

### Query
```sql
SELECT DATE(t.queued_at) as date,
       td.alias,
       COUNT(*) as job_count
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory = 350000
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at), td.alias
ORDER BY date, job_count DESC
LIMIT 50;
```

### Results (sample)
```
    date    |        alias         | job_count
------------+----------------------+-----------
 2025-12-15 | python-3.11          |        10
 2025-12-15 | pytorch2-24.05-py3_8 |         3
 2025-12-15 | pytorch2-24.05-py3_1 |         2
 2025-12-16 | python-3.11          |        30
 2025-12-16 | pytorch2-24.05-py3_8 |        15
 2025-12-16 | pytorch2-24.05-py3_1 |         7
 2025-12-17 | python-3.11          |         5
 2025-12-17 | pytorch2-24.05-py3_8 |         5
 2025-12-17 | pytorch2-24.05-py3_1 |         4
```

### Analysis
- Three definition aliases affected: `python-3.11`, `pytorch2-24.05-py3_8`, `pytorch2-24.05-py3_1`
- All three definitions have baseline memory of **50MB**
- Distribution across definitions remains consistent before and after fix
- These appear to be **legitimate ARA responses** to actual OOM conditions

---

## Query 3: Other Elevated Memory Jobs (Non-350GB)

### Query
```sql
SELECT DATE(t.queued_at) as date,
       COUNT(*) as elevated_jobs,
       COUNT(DISTINCT t.definition_id) as unique_defs
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory > td.memory * 1.5
  AND td.adaptive_resource_allocation = true
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at)
ORDER BY date
LIMIT 15;
```

### Results
```
    date    | elevated_jobs | unique_defs
------------+---------------+-------------
 2025-12-07 |            16 |           1
 2025-12-08 |            11 |           1
 2025-12-09 |            14 |           1
 2025-12-10 |            24 |           1
 2025-12-11 |             4 |           1
 2025-12-12 |             5 |           1
 2025-12-13 |            10 |           1
 2025-12-14 |             6 |           1
 2025-12-15 |            21 |           1
 2025-12-16 |             5 |           1  ← Fix deployed
 2025-12-17 |             0 |           0  ← No elevated jobs!
```

### Analysis
- **Average before fix (Dec 7-15)**: 12.3 elevated jobs/day
- **After fix (Dec 17)**: **0 jobs** ✅
- All elevated jobs came from a **single definition** (forklift-deploy-model-v1)
- **100% fix effectiveness** for this issue

---

## Query 4: Detailed Elevation Analysis (forklift-deploy-model-v1)

### Query
```sql
SELECT DATE(t.queued_at) as date,
       td.alias,
       td.memory as baseline_mb,
       t.memory as allocated_mb,
       CAST((t.memory::float / td.memory) as numeric(10,2)) as multiplier,
       COUNT(*) as job_count
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory > td.memory * 1.5
  AND td.adaptive_resource_allocation = true
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at), td.alias, td.memory, t.memory
ORDER BY date, job_count DESC
LIMIT 40;
```

### Results (sample)
```
    date    |          alias           | baseline_mb | allocated_mb | multiplier | job_count
------------+--------------------------+-------------+--------------+------------+-----------
 2025-12-14 | forklift-deploy-model-v1 |        8000 |        19000 |       2.38 |         4
 2025-12-14 | forklift-deploy-model-v1 |        8000 |        33000 |       4.13 |         2
 2025-12-15 | forklift-deploy-model-v1 |        8000 |        33000 |       4.13 |        17
 2025-12-15 | forklift-deploy-model-v1 |        8000 |        19000 |       2.38 |         4
 2025-12-16 | forklift-deploy-model-v1 |        8000 |        19000 |       2.38 |         4
 2025-12-16 | forklift-deploy-model-v1 |        8000 |        33000 |       4.13 |         1
 2025-12-17 | (no results)             |         N/A |          N/A |        N/A |         0
```

### Analysis
- **Baseline**: 8GB (8000MB)
- **Elevated allocations**:
  - 18GB (2.25x multiplier)
  - 19GB (2.38x multiplier)
  - 33GB (4.13x multiplier)
- **Peak day**: Dec 15 with 21 total elevated jobs
- **After fix**: Complete elimination on Dec 17

---

## Query 5: Command Hash Diversity (350GB Jobs)

### Query
```sql
SELECT DATE(t.queued_at) as date,
       td.alias,
       COUNT(*) as total_jobs,
       COUNT(DISTINCT t.command_hash) as unique_commands
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory = 350000
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at), td.alias
ORDER BY date, total_jobs DESC
LIMIT 50;
```

### Results (sample)
```
    date    |        alias         | total_jobs | unique_commands
------------+----------------------+------------+-----------------
 2025-12-15 | python-3.11          |         10 |               5
 2025-12-15 | pytorch2-24.05-py3_8 |          3 |               3
 2025-12-15 | pytorch2-24.05-py3_1 |          2 |               2
 2025-12-16 | python-3.11          |         30 |               8
 2025-12-16 | pytorch2-24.05-py3_8 |         15 |               7
 2025-12-16 | pytorch2-24.05-py3_1 |          7 |               5
 2025-12-17 | python-3.11          |          5 |               5
 2025-12-17 | pytorch2-24.05-py3_8 |          5 |               5
 2025-12-17 | pytorch2-24.05-py3_1 |          4 |               4
```

### Analysis
- **High command diversity**: Multiple unique command hashes per day
- **Dec 15**: 15 jobs with 10 unique commands (67% unique)
- **Dec 17**: 14 jobs with 14 unique commands (100% unique)
- This diversity indicates **legitimate ARA responses** to different workloads with actual OOM history
- The fix correctly uses `command_hash` for matching, not generic descriptions

---

## Query 6: Command Hash Analysis (forklift-deploy-model-v1)

### Query
```sql
SELECT DATE(t.queued_at) as date,
       t.memory as allocated_mb,
       COUNT(*) as total_jobs,
       COUNT(t.command_hash) as non_null_hashes,
       COUNT(DISTINCT t.command_hash) as unique_commands
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE td.alias = 'forklift-deploy-model-v1'
  AND t.memory > td.memory * 1.5
  AND t.queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(t.queued_at), t.memory
ORDER BY date, allocated_mb
LIMIT 50;
```

### Results (sample)
```
    date    | allocated_mb | total_jobs | non_null_hashes | unique_commands
------------+--------------+------------+-----------------+-----------------
 2025-12-14 |        19000 |          4 |               0 |               0
 2025-12-14 |        33000 |          2 |               0 |               0
 2025-12-15 |        19000 |          4 |               0 |               0
 2025-12-15 |        33000 |         17 |               0 |               0
 2025-12-16 |        19000 |          4 |               0 |               0
 2025-12-16 |        33000 |          1 |               0 |               0
```

### Critical Finding: The command_hash Bug

**Before Fix (Dec 7-16):**
- **ALL forklift-deploy-model-v1 jobs had `command_hash = NULL`** (despite having a 206-char shell script)
- The `description` field is also **always NULL** for forklift jobs
- With both NULL, the old ARA code was incorrectly matching these jobs, causing false elevations

**After Fix (Dec 17):**
- `command_hash = 19432e77696deb6666bb12c67feb2b8d` (now properly calculated)
- All forklift jobs get the same hash because they run the identical command
- ARA now correctly looks up this hash and finds no OOM history
- Result: No elevation (jobs run at or below the 8GB baseline)

---

## Query 7: Baseline vs Allocated Memory (350GB Jobs)

### Query
```sql
SELECT t.definition_id,
       td.memory as baseline_memory,
       t.memory as allocated_memory,
       COUNT(*) as job_count
FROM task t
JOIN task_def td ON t.definition_id = td.definition_id
WHERE t.memory = 350000
  AND t.queued_at >= CURRENT_DATE - INTERVAL '3 days'
GROUP BY t.definition_id, td.memory, t.memory
ORDER BY job_count DESC
LIMIT 20;
```

### Results
```
definition_id                                            | baseline_memory | allocated_memory | job_count
---------------------------------------------------------+-----------------+------------------+-----------
sf-base_python-3_11-7449eda4-b8b3-4146-77c5-a47f8caac81b |              50 |           350000 |        52
sf-base_pytorch2-24__5-py3-505a283c-1e0a-43da-4c9b-071... |              50 |           350000 |        24
sf-base_pytorch2-24__5-py3-ceef4c9e-6ebc-41e5-6cef-a33... |              50 |           350000 |        16
```

### Analysis
- **Massive increase**: 50MB → 350GB (7000x multiplier)
- Indicates these are **ML training jobs** with significant memory requirements
- The ARA system is correctly identifying commands that have historically run out of memory
- These allocations continue appropriately after the fix

---

## Query 8: forklift-deploy-model-v1 Memory Allocation Timeline

### Query
```sql
SELECT DATE(queued_at) as date,
       MIN(memory) as min_mem,
       MAX(memory) as max_mem,
       AVG(memory)::int as avg_mem,
       COUNT(*) as count
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'forklift-deploy-model-v1')
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at)
ORDER BY date;
```

### Results
```
    date    | min_mem | max_mem | avg_mem | count
------------+---------+---------+---------+-------
 2025-12-07 |    4000 |   33000 |   13431 |    35
 2025-12-08 |    4000 |   33000 |   10792 |    38
 2025-12-09 |    4000 |   33000 |   13062 |    34
 2025-12-10 |    4000 |   33000 |   13117 |    52
 2025-12-11 |    4000 |   19000 |    9392 |    13
 2025-12-12 |    4000 |   33000 |   11842 |    12
 2025-12-13 |    4000 |   33000 |    9524 |    46
 2025-12-14 |    4000 |   33000 |    8930 |    27
 2025-12-15 |    4000 |   33000 |   18078 |    40
 2025-12-16 |    4000 |   33000 |   10807 |    15
 2025-12-17 |    4000 |    7000 |    5007 |    15  ← Fix deployed
```

### Analysis
- **Baseline**: 8GB (8000 MB)
- **Before fix**: Jobs randomly allocated 4-33GB (some below baseline, many elevated)
- **After fix**: Jobs allocated 4-7GB (all at or below baseline) ✅

### The command Field Content

Query to inspect the command field:
```sql
SELECT DISTINCT command, command_hash
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'forklift-deploy-model-v1')
  AND queued_at >= CURRENT_DATE
LIMIT 1;
```

Result shows forklift jobs run this **206-character shell script**:
```bash
#
# Use absolute latest forklift
#
mkdir -p /code/stitchfix
cd /code/stitchfix
git clone -b $GIT_BRANCH --single-branch git@github.com:stitchfix/forklift.git
cd forklift/destinations/ml_model_deploy/

./run
```

**Key Insight**: The command field is **NOT empty** - but `command_hash` was NULL before the fix, preventing proper ARA lookups.

---

## Query 9: command_hash Population Status by Date

### Query
```sql
SELECT DATE(queued_at) as date,
       command_hash IS NULL as hash_null,
       COUNT(*) as count
FROM task
WHERE definition_id IN (SELECT definition_id FROM task_def WHERE alias = 'forklift-deploy-model-v1')
  AND queued_at >= CURRENT_DATE - INTERVAL '10 days'
GROUP BY DATE(queued_at), command_hash IS NULL
ORDER BY date, hash_null;
```

### Results
```
    date    | hash_null | count
------------+-----------+-------
 2025-12-07 | t         |    35
 2025-12-08 | t         |    38
 2025-12-09 | t         |    34
 2025-12-10 | t         |    52
 2025-12-11 | t         |    13
 2025-12-12 | t         |    12
 2025-12-13 | t         |    46
 2025-12-14 | t         |    27
 2025-12-15 | t         |    40
 2025-12-16 | t         |    15
 2025-12-17 | f         |    15  ← command_hash now populated!
```

### Analysis
- **Dec 7-16**: 100% of forklift jobs had `command_hash = NULL`
- **Dec 17**: 100% of forklift jobs have `command_hash = 19432e77696deb6666bb12c67feb2b8d`
- The fix not only changed the lookup logic but also **started calculating command_hash** for new jobs

---

## Conclusions

### Fix Effectiveness: ✅ Confirmed

1. **forklift-deploy-model-v1 issue**: **100% resolved**
   - Before: 12.3 elevated jobs/day (average, elevated to 18-33GB)
   - After: 0 elevated jobs (all at or below 8GB baseline)
   - Root cause discovered:
     - The command field was populated (206-char shell script) but `command_hash` was **NULL**
     - The description field was also **NULL**
     - The fix both (a) started calculating `command_hash` and (b) changed lookup logic
     - Now all forklift jobs get the same `command_hash` and ARA finds no OOM history for it

2. **350GB allocations**: **Working as designed**
   - Jobs continue at expected levels
   - High command hash diversity (different workloads)
   - Baseline of 50MB suggests these are script runners with variable workloads
   - ARA correctly identifies specific commands with OOM history

### Before and After Comparison

| Metric | Dec 15 (Before) | Dec 17 (After) | Change |
|--------|----------------|----------------|---------|
| 350GB jobs | 15 | 14 | -7% (normal variance) |
| forklift elevated | 21 | 0 | -100% ✅ |
| Total elevated | 36 | 14 | -61% |

### Recommendations

1. **Monitor next 7 days**: Verify forklift-deploy-model-v1 remains at baseline (8GB) ✅
2. **350GB jobs**: These appear legitimate - monitor for OOM failures to validate
3. **Command hash calculation**:
   - Investigate why `command_hash` was NULL before Dec 17
   - Verify all new jobs now properly calculate `command_hash`
   - Consider backfilling `command_hash` for historical records if needed for analytics
4. **ARA lookup logic**: Confirm the fix properly handles NULL `command_hash` cases (doesn't match)
5. **Documentation**: Update ARA docs to clarify:
   - `command_hash` is calculated from the `command` field (not `description`)
   - ARA requires valid `command_hash` for proper operation
   - Behavior when `command_hash` is NULL

---

## Appendix: Container Information

- **Database Container**: `360a9dd48242` (postgres:16)
- **Database URL**: Available as `$FLOTILLA_DATABASE_URL` in container environment
- **Report Generated**: 2025-12-17 (updated with latest data)
- **Analysis Period**: 2025-12-07 to 2025-12-17 (10 days)
- **Fix Deployed**: 2025-12-16

### Update Log
- **Initial report**: Generated with data up to 12 jobs on Dec 17
- **Updated**: Refreshed with latest data showing 14 jobs on Dec 17 (100% unique command hashes)

---

## Sample Query Template

To reproduce this analysis or run ad-hoc queries:

```bash
docker exec 360a9dd48242 bash -c 'psql $FLOTILLA_DATABASE_URL -c "YOUR_QUERY_HERE"'
```

Example:
```bash
docker exec 360a9dd48242 bash -c 'psql $FLOTILLA_DATABASE_URL -c "SELECT COUNT(*) FROM task WHERE memory = 350000 AND queued_at >= CURRENT_DATE - INTERVAL '\''1 day'\'';"'
```


================================================
FILE: clients/cluster/cluster.go
================================================
package cluster

import (
	"fmt"

	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
)

//
// Client validates whether or not the given definition can be run
// on the specified cluster. This is to prevent infinite queue
// times - the case that the requested resources will -never- become
// available on the user's chosen cluster
//

type Client interface {
	Name() string
	Initialize(conf config.Config) error
	CanBeRun(clusterName string, executableResources state.ExecutableResources) (bool, error)
	ListClusters() ([]state.ClusterMetadata, error)
}

// NewClusterClient returns a cluster client
func NewClusterClient(conf config.Config, name string) (Client, error) {
	switch name {
	case "eks":
		eksc := &EKSClusterClient{}
		if err := eksc.Initialize(conf); err != nil {
			return nil, errors.Wrap(err, "problem initializing EKSClusterClient")
		}
		return eksc, nil
	default:
		return nil, fmt.Errorf("No Client named [%s] was found", name)
	}
}


================================================
FILE: clients/cluster/eks_cluster_client.go
================================================
package cluster

import (
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
)

// EKSClusterClient is the cluster client for EKS
// [NOTE] This client assumes the EKS cluster is capable is running a mixed varieties of jobs.
type EKSClusterClient struct{}

func (EKSClusterClient) Name() string {
	return ""
}

func (EKSClusterClient) Initialize(conf config.Config) error {
	return nil
}

// CanBeRun for EKSCluster is always true
func (EKSClusterClient) CanBeRun(clusterName string, executableResources state.ExecutableResources) (bool, error) {
	return true, nil
}

// Since it is a single cluster environment for EKS, slice of clusters is empty.
func (EKSClusterClient) ListClusters() ([]state.ClusterMetadata, error) {
	return []state.ClusterMetadata{}, nil
}


================================================
FILE: clients/httpclient/client.go
================================================
package httpclient

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"strings"
	"time"
)

type RetryableError interface {
	Err() string
}

type HttpRetryableError struct {
	e error
}

func (re HttpRetryableError) Error() string {
	return re.e.Error()
}

func (re HttpRetryableError) Err() string {
	return re.e.Error()
}

type RequestExecutor interface {
	Do(req *http.Request, timeout time.Duration, entity interface{}) error
}

type defaultExecutor struct{}

func (de *defaultExecutor) Do(req *http.Request, timeout time.Duration, entity interface{}) error {
	client := http.Client{Timeout: timeout}
	if client.Timeout == 0 {
		client.Timeout = time.Second * 10
	}

	r, err := client.Do(req)
	if r != nil {
		defer r.Body.Close()
	}
	if err != nil {
		return err
	}
	if r.StatusCode >= 200 && r.StatusCode < 400 {
		return json.NewDecoder(r.Body).Decode(entity)
	} else if r.StatusCode >= 500 {
		return HttpRetryableError{fmt.Errorf("Error response: %v", r.Status)}
	} else {
		return fmt.Errorf("Error response: %v", r.Status)
	}
}

// Generic http client to make http requests.
type Client struct {
	Host       string
	Timeout    time.Duration
	RetryCount int
	Executor   RequestExecutor
}

func (c *Client) Get(path string, headers map[string]string, entity interface{}) error {
	req, err := c.prepareRequestNoBody("GET", path, headers)
	if err != nil {
		return fmt.Errorf("httpclient GET: %v", err)
	}
	return c.doRequestWithRetry(req, entity)
}

func (c *Client) Delete(path string, headers map[string]string, entity interface{}) error {
	req, err := c.prepareRequestNoBody("DELETE", path, headers)
	if err != nil {
		return fmt.Errorf("httpclient DELETE: %v", err)
	}
	return c.doRequestWithRetry(req, entity)
}

func (c *Client) Put(path string, headers map[string]string, inEntity interface{}, outEntity interface{}) error {
	req, err := c.prepareRequestWithBody("PUT", path, headers, inEntity)
	if err != nil {
		return fmt.Errorf("httpclient PUT: %v", err)
	}
	return c.doRequestWithRetry(req, outEntity)
}

func (c *Client) Post(path string, headers map[string]string, inEntity interface{}, outEntity interface{}) error {
	req, err := c.prepareRequestWithBody("POST", path, headers, inEntity)
	if err != nil {
		return fmt.Errorf("httpclient POST: %v", err)
	}
	return c.doRequestWithRetry(req, outEntity)
}

func (c *Client) prepareRequestNoBody(method string, path string, headers map[string]string) (*http.Request, error) {
	return c.makeRequest(method, path, headers, nil)
}

func (c *Client) prepareRequestWithBody(method string, path string, headers map[string]string, entity interface{}) (*http.Request, error) {
	encoded, err := json.Marshal(entity)
	if err != nil {
		return nil, fmt.Errorf("httpclient get: %v", err)
	}

	return c.makeRequest(method, path, headers, bytes.NewBuffer(encoded))
}

func (c *Client) makeURL(path string) (string, error) {
	host := c.Host
	if !strings.HasPrefix(c.Host, "http") {
		host = strings.Join([]string{"http://", c.Host}, "")
	}

	u, err := url.Parse(host)
	if err != nil {
		return "", fmt.Errorf("Unable to parse hostname (%v): %v", c.Host, err)
	}

	parsedPath, err := url.Parse(path)
	if err != nil {
		return "", fmt.Errorf("Unable to parse path (%v): %v", path, err)
	}

	u.Path = parsedPath.Path
	u.RawQuery = parsedPath.RawQuery

	return u.String(), nil
}

func (c *Client) makeRequest(method, path string, headers map[string]string, body io.Reader) (*http.Request, error) {

	u, err := c.makeURL(path)

	req, err := http.NewRequest(method, u, body)
	if headers != nil {
		for k, v := range headers {
			req.Header.Set(k, v)
		}
	}

	if err != nil {
		return nil, fmt.Errorf("could not create request: %v", err)
	}

	return req, nil
}

func (c *Client) doRequestWithRetry(req *http.Request, entity interface{}) error {
	if c.Executor == nil {
		c.Executor = &defaultExecutor{}
	}
	err := c.retryRequest(3*time.Second, func() error {
		return c.Executor.Do(req, c.Timeout, entity)
	})
	return err
}

type httpreqfunc func() error

func (c *Client) retryRequest(sleepTime time.Duration, fn httpreqfunc) error {
	err := fn()
	if err != nil {

		_, isRetryable := err.(RetryableError)
		if !isRetryable {
			return err
		}

		toSleep := sleepTime
		for retries := 0; retries < c.RetryCount; retries++ {
			time.Sleep(toSleep)
			toSleep = toSleep * 2
			err := fn()

			_, isRetryable := err.(RetryableError)
			if err == nil {
				return nil
			} else if !isRetryable {
				return err
			}
		}
	}
	return err
}


================================================
FILE: clients/httpclient/client_test.go
================================================
package httpclient

import (
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"
)

type Cupcake struct {
	Flavour   string
	Sprinkles bool
}

const cupcakeResponse = `{"flavour": "vomit", "sprinkles":  true}`

type MockExecutor struct {
	TryCount int // keep track of how many times 'Do' got called
}

func (me *MockExecutor) Do(req *http.Request, timeout time.Duration, entity interface{}) error {
	me.TryCount += 1
	if req.URL.Path == "/" {
		return HttpRetryableError{errors.New("bork")}
	} else {
		return errors.New("not found yo")
	}
}

func TestClientRetry(t *testing.T) {
	me := &MockExecutor{}
	retryCount := 2
	client := &Client{
		Host:       "nope",
		Timeout:    1 * time.Second,
		RetryCount: retryCount,
		Executor:   me,
	}

	client.Get("/", nil, &Cupcake{})
	if me.TryCount != retryCount+1 {
		t.Errorf("Expected to try request [%v] times but got [%v]", retryCount+1, me.TryCount)
	}

	me.TryCount = 0
	client.Get("/404", nil, &Cupcake{})
	if me.TryCount != 1 {
		t.Errorf("Expected to try request [%v] times but got [%v]", 1, me.TryCount)
	}
}

func TestClientDo(t *testing.T) {
	testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		switch r.Method {
		case "GET", "DELETE":
			if len(r.URL.RawQuery) > 0 {
				fmt.Fprintf(w, `{"flavour":"vague","sprinkles":false}`)
			} else {
				fmt.Fprintf(w, cupcakeResponse)
			}
		case "PUT", "POST":
			content := r.Header.Get("Content-Type")
			if content != "application/json" {
				t.Errorf("Expected Content-Type to eq %s got %s", "application/json", content)
			}

			c := Cupcake{}
			err := json.NewDecoder(r.Body).Decode(&c)
			if err != nil {
				t.Errorf("Expected body to deserialize but got error %s", err.Error())
			}
			fmt.Fprintf(w, cupcakeResponse)
		}

	}))

	cupcake := Cupcake{}

	client := &Client{
		Host:       testServer.URL,
		Timeout:    1 * time.Second,
		RetryCount: 1,
	}

	var err error
	var headers = map[string]string{
		"Content-Type": "application/json",
	}
	err = client.Get("/", nil, &cupcake)
	if err != nil {
		t.Errorf("Expected err to be nil got %s", err.Error())
	}

	if cupcake.Flavour != "vomit" {
		t.Errorf("Expected flavour to be 'vomit', got: %v", cupcake.Flavour)
	}
	if !cupcake.Sprinkles {
		t.Errorf("Expected sprinkles to be true, got: %v", cupcake.Sprinkles)
	}

	cupcake = Cupcake{}
	err = client.Get("/?some_rando_param=thing", nil, &cupcake)
	if err != nil {
		t.Errorf("Expected err to be nil got %s", err.Error())
	}

	if cupcake.Flavour != "vague" {
		t.Errorf("Expected flavour to be 'vague', got: %v", cupcake.Flavour)
	}
	if cupcake.Sprinkles {
		t.Errorf("Expected sprinkles to be false, got: %v", cupcake.Sprinkles)
	}

	cupcake = Cupcake{}
	err = client.Put("/", headers, &Cupcake{"vomit", true}, &cupcake)
	if err != nil {
		t.Errorf("Expected err to be nil got %s", err.Error())
	}

	if cupcake.Flavour != "vomit" {
		t.Errorf("Expected flavour to be 'vomit', got: %v", cupcake.Flavour)
	}
	if !cupcake.Sprinkles {
		t.Errorf("Expected sprinkles to be true, got: %v", cupcake.Sprinkles)
	}

	cupcake = Cupcake{}
	err = client.Post("/", headers, &Cupcake{"vomit", true}, &cupcake)
	if err != nil {
		t.Errorf("Expected err to be nil got %s", err.Error())
	}
	if cupcake.Flavour != "vomit" {
		t.Errorf("Expected flavour to be 'vomit', got: %v", cupcake.Flavour)
	}
	if !cupcake.Sprinkles {
		t.Errorf("Expected sprinkles to be true, got: %v", cupcake.Sprinkles)
	}

	cupcake = Cupcake{}
	err = client.Delete("/", nil, &cupcake)
	if err != nil {
		t.Errorf("Expected err to be nil got %s", err.Error())
	}
}


================================================
FILE: clients/logs/eks_cloudwatch_logs_client.go
================================================
package logs

import (
	"encoding/json"
	"fmt"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/awserr"
	"github.com/aws/aws-sdk-go/aws/request"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/state"
	"log"
	"net/http"
	"os"
	"sort"
	"strings"
	awstrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/aws/aws-sdk-go/aws"
)

//
// EKSCloudWatchLogsClient corresponds with the aws logs driver
// for ECS and returns logs for runs
//
type EKSCloudWatchLogsClient struct {
	logRetentionInDays int64
	logNamespace       string
	logsClient         logsClient
	logger             *log.Logger
}

type EKSCloudWatchLog struct {
	Log string `json:"log"`
}

//
// Name returns the name of the logs client
//
func (lc *EKSCloudWatchLogsClient) Name() string {
	return "eks-cloudwatch"
}

//
// Initialize sets up the EKSCloudWatchLogsClient
//
func (lc *EKSCloudWatchLogsClient) Initialize(conf config.Config) error {
	//confLogOptions := conf.GetStringMapString("eks_log_driver_options")

	awsRegion := conf.GetString("eks_log_driver_options_awslogs_region")
	if len(awsRegion) == 0 {
		awsRegion = conf.GetString("aws_default_region")
	}

	if len(awsRegion) == 0 {
		return errors.Errorf(
			"EKSCloudWatchLogsClient needs one of [eks.log.driver.options.awslogs-region] or [aws_default_region] set in config")
	}

	//
	// log.namespace in conf takes precedence over log.driver.options.awslogs-group
	//
	lc.logNamespace = conf.GetString("eks_log_namespace")

	if len(lc.logNamespace) == 0 {
		return errors.Errorf(
			"EKSCloudWatchLogsClient needs one of [eks.log.driver.options.awslogs-group] or [eks.log.namespace] set in config")
	}

	lc.logRetentionInDays = int64(conf.GetInt("eks_log_retention_days"))
	if lc.logRetentionInDays == 0 {
		lc.logRetentionInDays = int64(30)
	}

	flotillaMode := conf.GetString("flotilla_mode")
	if flotillaMode != "test" {
		sess := awstrace.WrapSession(session.Must(session.NewSession(&aws.Config{
			Region: aws.String(awsRegion)})))

		lc.logsClient = cloudwatchlogs.New(sess)
	}
	lc.logger = log.New(os.Stderr, "[cloudwatchlogs] ",
		log.Ldate|log.Ltime|log.Lshortfile)
	return lc.createNamespaceIfNotExists()
}

//
// Logs returns all logs from the log stream identified by handle since lastSeen
//
func (lc *EKSCloudWatchLogsClient) Logs(executable state.Executable, run state.Run, lastSeen *string, role *string, facility *string) (string, *string, error) {
	startFromHead := true

	//Pod isn't there yet - dont return a 404
	if run.PodName == nil {
		return "", nil, nil
	}
	handle := lc.toStreamName(run)
	args := &cloudwatchlogs.GetLogEventsInput{
		LogGroupName:  &lc.logNamespace,
		LogStreamName: &handle,
		StartFromHead: &startFromHead,
	}

	if lastSeen != nil && len(*lastSeen) > 0 {
		args.NextToken = lastSeen
	}

	result, err := lc.logsClient.GetLogEvents(args)
	if err != nil {
		if aerr, ok := err.(awserr.Error); ok {
			if aerr.Code() == cloudwatchlogs.ErrCodeResourceNotFoundException {
				return "", nil, exceptions.MissingResource{err.Error()}
			} else if request.IsErrorThrottle(err) {
				lc.logger.Printf(
					"thottled getting logs; executable_id: %v, run_id: %s, error: %+v\n",
					executable.GetExecutableID(), run.RunID, err)
				return "", lastSeen, nil
			}
		}
		return "", nil, errors.Wrap(err, "problem getting logs")
	}

	if len(result.Events) == 0 {
		return "", result.NextForwardToken, nil
	}

	message := lc.logsToMessage(result.Events)
	return message, result.NextForwardToken, nil
}

// This method doesn't return log string, it is a placeholder only.
func (lc *EKSCloudWatchLogsClient) LogsText(executable state.Executable, run state.Run, w http.ResponseWriter) error {
	return errors.Errorf("EKSCloudWatchLogsClient does not support LogsText method.")
}

// Generate stream name
func (lc *EKSCloudWatchLogsClient) toStreamName(run state.Run) string {
	return fmt.Sprintf("%s", *run.PodName)
}

// Convert Cloudwatch logs to strings
func (lc *EKSCloudWatchLogsClient) logsToMessage(events []*cloudwatchlogs.OutputLogEvent) string {
	sort.Sort(byTimestamp(events))

	messages := make([]string, len(events))
	for i, event := range events {
		var l EKSCloudWatchLog
		err := json.Unmarshal([]byte(*event.Message), &l)
		if err != nil {
			messages[i] = *event.Message
		}
		messages[i] = l.Log
	}
	return strings.Join(messages, "")
}

func (lc *EKSCloudWatchLogsClient) createNamespaceIfNotExists() error {
	exists, err := lc.namespaceExists()
	if err != nil {
		return errors.Wrapf(err, "problem checking if log namespace [%s] exists", lc.logNamespace)
	}
	if !exists {
		return lc.createNamespace()
	}
	return nil
}

// Check for the existence of a namespace.
func (lc *EKSCloudWatchLogsClient) namespaceExists() (bool, error) {
	result, err := lc.logsClient.DescribeLogGroups(&cloudwatchlogs.DescribeLogGroupsInput{
		LogGroupNamePrefix: &lc.logNamespace,
	})

	if err != nil {
		return false, errors.Wrapf(err, "problem describing log groups with prefix [%s]", lc.logNamespace)
	}
	if len(result.LogGroups) == 0 {
		return false, nil
	}
	for _, group := range result.LogGroups {
		if *group.LogGroupName == lc.logNamespace {
			return true, nil
		}
	}
	return false, nil
}

// Creates namespace is not present.
func (lc *EKSCloudWatchLogsClient) createNamespace() error {
	_, err := lc.logsClient.CreateLogGroup(&cloudwatchlogs.CreateLogGroupInput{
		LogGroupName: &lc.logNamespace,
	})
	if err != nil {
		return errors.Wrapf(err, "problem creating log group with log group name [%s]", lc.logNamespace)
	}

	_, err = lc.logsClient.PutRetentionPolicy(&cloudwatchlogs.PutRetentionPolicyInput{
		LogGroupName:    &lc.logNamespace,
		RetentionInDays: &lc.logRetentionInDays,
	})
	if err != nil {
		return errors.Wrapf(err, "problem setting log group retention policy for log group name [%s]", lc.logNamespace)
	}
	return nil
}


================================================
FILE: clients/logs/eks_s3_logs_client.go
================================================
package logs

import (
	"bufio"
	"bytes"
	"compress/gzip"
	"context"
	"encoding/json"
	"fmt"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/request"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
	awstrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/aws/aws-sdk-go/aws"
	"io"
	"log"
	"net/http"
	"os"
	"strconv"
	"strings"
	"time"
)

// EKSS3LogsClient corresponds with the aws logs driver
// for ECS and returns logs for runs
type EKSS3LogsClient struct {
	logRetentionInDays int64
	logNamespace       string
	s3Client           *s3.S3
	s3Bucket           string
	s3BucketRootDir    string
	logger             *log.Logger
	emrS3LogsBucket    string
	emrS3LogsBasePath  string
}

type s3Log struct {
	Log    string    `json:"log"`
	Stream string    `json:"stream"`
	Time   time.Time `json:"time"`
}

// Name returns the name of the logs client
func (lc *EKSS3LogsClient) Name() string {
	return "eks-s3"
}

// Initialize sets up the EKSS3LogsClient
func (lc *EKSS3LogsClient) Initialize(conf config.Config) error {
	//confLogOptions := conf.GetStringMapString("eks_log_driver_options")

	awsRegion := conf.GetString("eks_log_driver_options_awslogs_region")
	if len(awsRegion) == 0 {
		awsRegion = conf.GetString("aws_default_region")
	}

	if len(awsRegion) == 0 {
		return errors.Errorf(
			"EKSS3LogsClient needs one of [eks.log.driver.options.awslogs-region] or [aws_default_region] set in config")
	}

	flotillaMode := conf.GetString("flotilla_mode")
	if flotillaMode != "test" {
		sess := awstrace.WrapSession(session.Must(session.NewSession(&aws.Config{
			Region: aws.String(awsRegion)})))
		sess = awstrace.WrapSession(sess)
		lc.s3Client = s3.New(sess, aws.NewConfig().WithRegion(awsRegion))
	}
	lc.emrS3LogsBucket = conf.GetString("emr_log_bucket")
	lc.emrS3LogsBasePath = conf.GetString("emr_log_base_path")
	s3BucketName := conf.GetString("eks_log_driver_options_s3_bucket_name")

	if len(s3BucketName) == 0 {
		return errors.Errorf(
			"EKSS3LogsClient needs [eks_log_driver_options_s3_bucket_name] set in config")
	}
	lc.s3Bucket = s3BucketName

	s3BucketRootDir := conf.GetString("eks_log_driver_options_s3_bucket_root_dir")

	if len(s3BucketRootDir) == 0 {
		return errors.Errorf(
			"EKSS3LogsClient needs [eks.log.driver.options.s3_bucket_root_dir] set in config")
	}
	lc.s3BucketRootDir = s3BucketRootDir

	lc.logger = log.New(os.Stderr, "[s3logs] ",
		log.Ldate|log.Ltime|log.Lshortfile)
	return nil
}

func (lc *EKSS3LogsClient) emrLogsToMessageString(run state.Run, lastSeen *string, role *string, facility *string) (string, *string, error) {
	s3DirName, err := lc.emrDriverLogsPath(run)
	if err != nil {
		return "", aws.String(""), errors.Errorf("No logs")
	}

	params := &s3.ListObjectsV2Input{
		Bucket:  aws.String(lc.emrS3LogsBucket),
		Prefix:  aws.String(s3DirName),
		MaxKeys: aws.Int64(1000),
	}

	pageNum := 0
	lastModified := &time.Time{}
	var key *string

	err = lc.s3Client.ListObjectsV2Pages(params,
		func(result *s3.ListObjectsV2Output, lastPage bool) bool {
			pageNum++
			if result != nil {
				for _, content := range result.Contents {
					if strings.Contains(*content.Key, *role) && strings.Contains(*content.Key, *facility) && lastModified.Before(*content.LastModified) {
						if content != nil && *content.Size < int64(10000000) {
							key = content.Key
							lastModified = content.LastModified
						}
					}
				}
			}
			if lastPage {
				return false
			}
			return pageNum <= 10
		})

	if key == nil {
		lc.logger.Println(fmt.Sprintf("run=%s emr logging key not found for role=%s facility=%s", run.RunID, *role, *facility))
		return "", aws.String(""), errors.Errorf("No driver logs found")
	}

	startPosition := int64(0)
	if lastSeen != nil {
		parsed, err := strconv.ParseInt(*lastSeen, 10, 64)
		if err == nil {
			startPosition = parsed
		}
	}

	s3Obj, err := lc.s3Client.GetObjectWithContext(
		context.Background(),
		&s3.GetObjectInput{
			Bucket: aws.String(lc.emrS3LogsBucket),
			Key:    aws.String(*key),
		}, func(r *request.Request) {
			// Otherwise we get an unzipped response.
			r.HTTPRequest.Header.Add("Accept-Encoding", "gzip")
		})

	if s3Obj != nil && err == nil {

		if s3Obj.ContentLength != nil && *s3Obj.ContentLength > int64(10000000) {
			return "", aws.String(""), errors.Errorf("Logs > 10MB, will not display.")
		}

		defer s3Obj.Body.Close()
		gr, err := gzip.NewReader(s3Obj.Body)
		if err != nil {
			return "", aws.String(""), err
		}
		defer gr.Close()
		reader := bufio.NewReader(gr)
		var b0 bytes.Buffer
		counter := int64(0)
		for {
			line, err := reader.ReadBytes('\n')
			if err != nil {
				if err == io.EOF {
					err = nil
					return b0.String(), aws.String(fmt.Sprintf("%d", counter)), nil
				}

			} else {
				if counter >= startPosition {
					b0.Write(line)
				}
				counter = counter + 1
			}
		}
	}
	return "", aws.String(""), errors.Errorf("No driver logs found")
}

func (lc *EKSS3LogsClient) emrDriverLogsPath(run state.Run) (string, error) {
	if run.SparkExtension.EMRJobId != nil &&
		run.SparkExtension.VirtualClusterId != nil {
		return fmt.Sprintf("%s/%s/jobs/%s/",
			lc.emrS3LogsBasePath,
			*run.SparkExtension.VirtualClusterId,
			*run.SparkExtension.EMRJobId,
		), nil
	}
	return "", errors.New("couldn't construct s3 path.")
}

func (lc *EKSS3LogsClient) Logs(executable state.Executable, run state.Run, lastSeen *string, role *string, facility *string) (string, *string, error) {
	if *run.Engine == state.EKSSparkEngine {
		return lc.emrLogsToMessageString(run, lastSeen, role, facility)
	}

	result, err := lc.getS3Object(run)
	startPosition := int64(0)
	if lastSeen != nil {
		parsed, err := strconv.ParseInt(*lastSeen, 10, 64)
		if err == nil {
			startPosition = parsed
		}
	}

	if result != nil && err == nil {
		acc, position, err := lc.logsToMessageString(result, startPosition)
		newLastSeen := fmt.Sprintf("%d", position)
		return acc, &newLastSeen, err
	}

	return "", aws.String(""), errors.Errorf("No logs.")
}

// Logs returns all logs from the log stream identified by handle since lastSeen
func (lc *EKSS3LogsClient) LogsText(executable state.Executable, run state.Run, w http.ResponseWriter) error {

	if run.Engine == nil || *run.Engine == state.EKSEngine {
		result, err := lc.getS3Object(run)
		if err != nil {
			return err
		} else if result != nil {
			return lc.logsToMessage(result, w)
		}
	}
	if *run.Engine == state.EKSSparkEngine {
		return lc.logsEMR(w)
	}
	return nil
}

// Fetch S3Object associated with the pod's log.
func (lc *EKSS3LogsClient) getS3Object(run state.Run) (*s3.GetObjectOutput, error) {
	//Pod isn't there yet - dont return a 404
	//if run.PodName == nil {
	//	return nil, errors.New("no pod associated with the run.")
	//}
	s3DirName := lc.toS3DirName(run)

	// Get list of S3 objects in the run_id folder.
	result, err := lc.s3Client.ListObjects(&s3.ListObjectsInput{
		Bucket: aws.String(lc.s3Bucket),
		Prefix: aws.String(s3DirName),
	})

	if err != nil {
		return nil, errors.Wrap(err, "problem getting logs")
	}

	if result == nil || result.Contents == nil || len(result.Contents) == 0 {
		return nil, errors.New("no s3 files associated with the run.")
	}
	var key *string
	lastModified := &time.Time{}

	//Find latest log file (could have multiple log files per pod - due to pod retries)
	for _, content := range result.Contents {
		if strings.Contains(*content.Key, run.RunID) && lastModified.Before(*content.LastModified) {
			if content != nil && *content.Size < int64(10000000) {
				key = content.Key
				lastModified = content.LastModified
			}
		}
	}
	if key != nil {
		return lc.getS3Key(key)
	} else {
		return nil, errors.New("no s3 files associated with the run.")
	}
}

func (lc *EKSS3LogsClient) getS3Key(s3Key *string) (*s3.GetObjectOutput, error) {
	result, err := lc.s3Client.GetObject(&s3.GetObjectInput{
		Bucket: aws.String(lc.s3Bucket),
		Key:    aws.String(*s3Key),
	})
	if err != nil {
		return nil, err
	}
	return result, nil
}

// Formulate dir name on S3.
func (lc *EKSS3LogsClient) toS3DirName(run state.Run) string {
	return fmt.Sprintf("%s/%s", lc.s3BucketRootDir, run.RunID)
}

// Converts log messages from S3 to strings - returns the contents of the entire file.
func (lc *EKSS3LogsClient) logsToMessage(result *s3.GetObjectOutput, w http.ResponseWriter) error {
	reader := bufio.NewReader(result.Body)
	for {
		line, err := reader.ReadBytes('\n')
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return err
		} else {
			var parsedLine s3Log

			parsedLine, err := parseLines(line)
			if err != nil {
				return err
			}
			_, err = io.WriteString(w, parsedLine.Log)
			if err != nil {
				return err
			}
		}
	}

}

func (lc *EKSS3LogsClient) logsEMR(w http.ResponseWriter) error {
	_, _ = io.WriteString(w, "todo!!!")
	return nil
}

// Converts log messages from S3 to strings, takes a starting offset.
func (lc *EKSS3LogsClient) logsToMessageString(result *s3.GetObjectOutput, startingPosition int64) (string, int64, error) {
	acc := ""
	currentPosition := int64(0)
	// if less than/equal to 0, read entire log.
	if startingPosition <= 0 {
		startingPosition = currentPosition
	}

	// No S3 file or object, return "", 0, err
	if result == nil {
		return acc, startingPosition, errors.New("s3 object not present.")
	}

	reader := bufio.NewReader(result.Body)

	// Reading until startingPosition and discard unneeded lines.
	for currentPosition < startingPosition {
		currentPosition = currentPosition + 1
		_, err := reader.ReadBytes('\n')
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return acc, startingPosition, err
		}
	}

	// Read upto MaxLogLines
	for currentPosition <= startingPosition+state.MaxLogLines {
		currentPosition = currentPosition + 1
		line, err := reader.ReadBytes('\n')
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return acc, currentPosition, err
		} else {
			parsedLine, err := parseLines(line)
			if err == nil {
				acc = fmt.Sprintf("%s%s", acc, parsedLine.Log)
			}
		}
	}

	_ = result.Body.Close()

	return acc, currentPosition, nil
}

func parseLines(input []byte) (s3Log, error) {
	//handling both dockerengine and containterd log formats
	//TODO I don't love this - clean up post migration
	var parsedInput s3Log
	err := json.Unmarshal(input, &parsedInput)
	if err != nil {
		splitLines := strings.Split(string(input), " ")
		if len(splitLines) > 0 {
			layout := "2006-01-02T15:04:05.999999999Z"
			timestamp, err := time.Parse(layout, splitLines[0])
			if err != nil {
				return parsedInput, err
			}
			parsedInput.Time = timestamp
			parsedInput.Stream = splitLines[1]
			parsedInput.Log = strings.Join(splitLines[3:], " ")
		}
	}
	return parsedInput, nil
}


================================================
FILE: clients/logs/logs.go
================================================
package logs

import (
	"fmt"
	"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"net/http"
)

//
// Client returns logs for a Run
//
type Client interface {
	Name() string
	Initialize(config config.Config) error
	Logs(executable state.Executable, run state.Run, lastSeen *string, role *string, facility *string) (string, *string, error)
	LogsText(executable state.Executable, run state.Run, w http.ResponseWriter) error
}

type logsClient interface {
	DescribeLogGroups(input *cloudwatchlogs.DescribeLogGroupsInput) (*cloudwatchlogs.DescribeLogGroupsOutput, error)
	CreateLogGroup(input *cloudwatchlogs.CreateLogGroupInput) (*cloudwatchlogs.CreateLogGroupOutput, error)
	PutRetentionPolicy(input *cloudwatchlogs.PutRetentionPolicyInput) (*cloudwatchlogs.PutRetentionPolicyOutput, error)
	GetLogEvents(input *cloudwatchlogs.GetLogEventsInput) (*cloudwatchlogs.GetLogEventsOutput, error)
}

type byTimestamp []*cloudwatchlogs.OutputLogEvent

func (events byTimestamp) Len() int           { return len(events) }
func (events byTimestamp) Swap(i, j int)      { events[i], events[j] = events[j], events[i] }
func (events byTimestamp) Less(i, j int) bool { return *(events[i].Timestamp) < *(events[j].Timestamp) }

//
// NewLogsClient creates and initializes a run logs client
//
func NewLogsClient(conf config.Config, logger flotillaLog.Logger, name string) (Client, error) {
	_ = logger.Log("level", "info", "message", "Initializing logs client", "client", name)
	switch name {
	case "eks":
		// awslogs as an ecs log driver sends logs to AWS CloudWatch Logs service
		ekscw := &EKSS3LogsClient{}
		if err := ekscw.Initialize(conf); err != nil {
			return nil, errors.Wrap(err, "problem initializing EKSCloudWatchLogsClient")
		}
		return ekscw, nil
	default:
		return nil, fmt.Errorf("No Client named [%s] was found", name)
	}
}


================================================
FILE: clients/metrics/datadog_metrics_client.go
================================================
package metrics

import (
	"fmt"
	"github.com/DataDog/datadog-go/v5/statsd"
	"github.com/stitchfix/flotilla-os/config"
	"os"
	"strings"
	"time"
)

// Client accepts statsd metrics
type DatadogStatsdMetricsClient struct {
	client *statsd.Client
}

// Initialize the client. Assumes the following keys are passed in:
// *metrics.dogstatsd.address* -- localhost:8125
// *metrics.dogstatsd.namespace* -- fixed key you want to prefix to all the metrics
func (dd *DatadogStatsdMetricsClient) Init(conf config.Config) error {
	host := os.Getenv("DD_AGENT_HOST")
	var addr string
	// If the host contains a colon and does not contain a square bracket, then the address is ipv6
	if strings.Contains(host, ":") && !strings.Contains(host, "[") {
		addr = fmt.Sprintf("[%s]:8125", host)
	} else {
		addr = fmt.Sprintf("%s:8125", host)
	}
	client, err := statsd.New(addr, statsd.WithNamespace(conf.GetString("metrics_dogstatsd_namespace")))
	if err != nil {
		return err
	}

	dd.client = client

	return nil
}

// Decrement metric value, tags associated with the metric, and rate corresponds to the value
func (dd *DatadogStatsdMetricsClient) Decrement(name Metric, tags []string, rate float64) error {
	return dd.client.Decr(string(name), tags, rate)
}

// Increment metric value, tags associated with the metric, and rate corresponds to the value
func (dd *DatadogStatsdMetricsClient) Increment(name Metric, tags []string, rate float64) error {
	return dd.client.Incr(string(name), tags, rate)
}

// Histogram tracks the statistical distribution of a set of values
func (dd *DatadogStatsdMetricsClient) Histogram(name Metric, value float64, tags []string, rate float64) error {
	return dd.client.Histogram(string(name), value, tags, rate)
}

// Distribution tracks the statistical distribution of a set of values
func (dd *DatadogStatsdMetricsClient) Distribution(name Metric, value float64, tags []string, rate float64) error {
	return dd.client.Distribution(string(name), value, tags, rate)
}

// Timing sends timing information, it is an alias for TimeInMilliseconds
func (dd *DatadogStatsdMetricsClient) Timing(name Metric, value time.Duration, tags []string, rate float64) error {
	return dd.client.Timing(string(name), value, tags, rate)
}

// Set counts the number of unique elements in a group
func (dd *DatadogStatsdMetricsClient) Set(name Metric, value string, tags []string, rate float64) error {
	return dd.client.Set(string(name), value, tags, rate)
}

// NewEvent creates a new event with the given title and text.
func (dd *DatadogStatsdMetricsClient) Event(e event) error {
	se := statsd.NewEvent(e.Title, e.Text)
	se.Tags = e.Tags
	return dd.client.Event(se)
}


================================================
FILE: clients/metrics/metrics.go
================================================
package metrics

import (
	"fmt"
	"sync"
	"time"

	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
)

type Metric string

const (
	// Metric associated to submission of jobs to EKS
	EngineEKSExecute Metric = "engine.eks.execute"
	// Metric associated to submission of jobs to SQS queue, before EKS submission.
	EngineEKSEnqueue Metric = "engine.eks.enqueue"
	// Metric associated to submission of jobs to EMR
	EngineEMRExecute Metric = "engine.emr.execute"
	// Metric associated to submission of jobs to SQS queue, before EMR submission.
	EngineEMREnqueue Metric = "engine.emr.enqueue"
	// Metric associated to termination of jobs via the API.
	EngineEKSTerminate Metric = "engine.eks.terminate"
	// Metric associated to termination of jobs via the API.
	EngineEMRTerminate Metric = "engine.emr.terminate"
	// Metric associated to termination of pods hopping between hosts.
	EngineEKSRunPodnameChange Metric = "engine.eks.run_podname_changed"
	// Metric associated to pod events where there was a Cluster Autoscale event.
	EngineEKSNodeTriggeredScaledUp Metric = "engine.eks.triggered_scale_up"
	// Timing for status worker processEKSRun
	StatusWorkerProcessEKSRun Metric = "status_worker.timing.process_eks_run"
	// Timing for acquire lock
	StatusWorkerAcquireLock Metric = "status_worker.timing.acquire_lock"
	// Timing for fetch_pod_metrics
	StatusWorkerFetchPodMetrics Metric = "status_worker.timing.fetch_pod_metrics"
	// Timing for fetch_update_status
	StatusWorkerFetchUpdateStatus Metric = "status_worker.timing.fetch_update_status"
	// Metric for locked runs
	StatusWorkerLockedRuns Metric = "status_worker.locked_runs"
	// Timing for fetch metrics
	StatusWorkerFetchMetrics Metric = "status_worker.fetch_metrics"
	// Timing for get pod list
	StatusWorkerGetPodList Metric = "status_worker.get_pod_list"
	// Timing for get events
	StatusWorkerGetEvents Metric = "status_worker.get_events"
	// Timing for get job
	StatusWorkerGetJob Metric = "status_worker.get_job"
	// Engine update run
	EngineUpdateRun Metric = "engine.update_run"
	// ARA metrics - tracking Auto Resource Adjustment behavior
	EngineEKSARAEstimationAttempted Metric = "engine.eks.ara.estimation_attempted"
	EngineEKSARAEstimationSucceeded Metric = "engine.eks.ara.estimation_succeeded"
	EngineEKSARAEstimationFailed    Metric = "engine.eks.ara.estimation_failed"
	EngineEKSARAResourceAdjustment  Metric = "engine.eks.ara.resource_adjustment"
	EngineEKSARANoHistoricalData    Metric = "engine.eks.ara.no_historical_data"
	EngineEKSARAHitMaxMemory         Metric = "engine.eks.ara.hit_max_memory"
	EngineEKSARAHitMaxCPU            Metric = "engine.eks.ara.hit_max_cpu"
	EngineEKSARAMemoryIncreaseRatio  Metric = "engine.eks.ara.memory_increase_ratio"
	EngineEKSARACPUIncreaseRatio     Metric = "engine.eks.ara.cpu_increase_ratio"
	EngineEKSARAFinalMemoryMB        Metric = "engine.eks.ara.final_memory_mb"
	EngineEKSARAFinalCPUMillicores   Metric = "engine.eks.ara.final_cpu_millicores"
	EngineEKSARADefaultMemory        Metric = "engine.eks.ara.default_memory"
	EngineEKSARAARAMemory            Metric = "engine.eks.ara.ara_memory"
	EngineEKSARADefaultCPU           Metric = "engine.eks.ara.default_cpu"
	EngineEKSARAARACPU               Metric = "engine.eks.ara.ara_cpu"
	EngineEKSARAMemoryIncrease       Metric = "engine.eks.ara.memory_increase"
	EngineEKSARACPUIncrease          Metric = "engine.eks.ara.cpu_increase"
	EngineEKSARANullCommandHash      Metric = "engine.eks.ara.null_command_hash"
)

type MetricTag string

const (
	// Metric tag for job success.
	StatusSuccess MetricTag = "status:success"
	// Metric tag for job failure.
	StatusFailure MetricTag = "status:failure"
)

type Client interface {
	Init(conf config.Config) error
	Decrement(name Metric, tags []string, rate float64) error
	Increment(name Metric, tags []string, rate float64) error
	Histogram(name Metric, value float64, tags []string, rate float64) error
	Distribution(name Metric, value float64, tags []string, rate float64) error
	Set(name Metric, value string, tags []string, rate float64) error
	Event(evt event) error
	Timing(name Metric, value time.Duration, tags []string, rate float64) error
}

type event struct {
	Title string
	Text  string
	Tags  []string
}

var once sync.Once
var instance Client

// Instantiating the Metrics Client.
func InstantiateClient(conf config.Config) error {
	// Return an error if `metrics_client` isn't set in config.
	if !conf.IsSet("metrics_client") {
		return fmt.Errorf("`metrics_client` not set in config, unable to instantiate metrics client")
	}

	var err error = nil
	name := conf.GetString("metrics_client")

	once.Do(func() {
		switch name {
		case "dogstatsd":
			instance = &DatadogStatsdMetricsClient{}

			if err = instance.Init(conf); err != nil {
				err = errors.Errorf("Unable to initialize dogstatsd client.")
				instance = nil
				break
			}
		default:
			err = fmt.Errorf("no client named [%s] was found", name)
		}
	})

	return err
}

// Decr is just Count of -1
func Decrement(name Metric, tags []string, rate float64) error {
	if instance != nil {
		return instance.Decrement(name, tags, rate)
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Decrement metric.")
}

// Incr is just Count of -1
func Increment(name Metric, tags []string, rate float64) error {
	if instance != nil {
		return instance.Increment(name, tags, rate)
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Increment metric.")
}

// Histogram tracks the statistical distribution of a set of values
func Histogram(name Metric, value float64, tags []string, rate float64) error {
	if instance != nil {
		return instance.Histogram(name, value, tags, rate)
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Histogram metric.")
}

// Distribution tracks the statistical distribution of a set of values
func Distribution(name Metric, value float64, tags []string, rate float64) error {
	if instance != nil {
		return instance.Distribution(name, value, tags, rate)
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Distribution metric.")
}

// Set counts the number of unique elements in a group
func Set(name Metric, value string, tags []string, rate float64) error {
	if instance != nil {
		return instance.Set(name, value, tags, rate)
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Set metric.")
}

// NewEvent creates a new event with the given title and text.
func Event(title string, text string, tags []string) error {
	if instance != nil {
		return instance.Event(event{
			Title: title,
			Text:  text,
			Tags:  tags,
		})
	}

	return errors.Errorf("MetricsClient instance is nil, unable to send Event metric.")
}

// Timing sends timing information, it is an alias for TimeInMilliseconds
func Timing(name Metric, value time.Duration, tags []string, rate float64) error {
	if instance != nil {
		return instance.Timing(name, value, tags, rate)
	}
	return errors.Errorf("MetricsClient instance is nil, unable to send Event metric.")
}


================================================
FILE: clients/middleware/client.go
================================================
package middleware

import (
	"github.com/stitchfix/flotilla-os/state"
	"net/http"
)

type Client interface {
	AnnotateLaunchRequest(headers *http.Header, lr *state.LaunchRequestV2) error
}

type middlewareClient struct{}

func NewClient() (Client, error) {
	return &middlewareClient{}, nil
}

func (mwC middlewareClient) AnnotateLaunchRequest(headers *http.Header, lr *state.LaunchRequestV2) error {
	return nil
}


================================================
FILE: conf/config.yml
================================================
aws_default_region: us-east-1
cluster_client: eks
create_database_schema: true
database_url: postgresql://flotilla:flotilla@localhost/flotilla?sslmode=disable
eks_clusters: 'clusta, cupcake'
eks_cluster_default: 'clusta'
eks_gpu_cluster_default: 'clusta'
eks_tier_default: '4'
eks_log_driver_name: awslogs
eks_log_driver_options_awslogs-group: flotilla-eks-namespace
eks_log_driver_options_awslogs-region: us-east-1
eks_log_namespace: flotilla-eks-namespace
eks_log_retention_days: 90
enabled_workers:
  - retry
  - submit
execution_engine: eks
flotilla_mode: test
http_server_cors_allowed_origins:
  - http://localhost:3001
http_server_listen_address: :3000
http_server_read_timeout_seconds: 5
http_server_write_timeout_seconds: 10
logs_client: cloudwatch
metrics_client: dogstatsd
metrics_dogstatsd_address: 127.0.0.1:8125
metrics_dogstatsd_namespace: my.flotilla.namespace
metrics_dogstatsd_tags:
  - test
owner_id_var: FLOTILLA_RUN_OWNER_ID
queue_manager: sqs
queue_namespace: dev-flotilla
queue_process_time: 45
queue_retention_seconds: 604800
queue_status: flotilla-status-updates-dev
queue_status_rule: flotilla-task-status
readonly_database_url: postgresql://flotilla:flotilla@localhost/flotilla?sslmode=disable


================================================
FILE: config/config.go
================================================
package config

import (
	"github.com/pkg/errors"
	"github.com/spf13/viper"
	"strings"
)

//
// Config interface to wrap external configuration object
//
type Config interface {
	GetString(key string) string
	GetStringSlice(key string) []string
	GetStringMapString(key string) map[string]string
	GetInt(key string) int
	GetBool(key string) bool
	GetFloat64(key string) float64
	IsSet(key string) bool
}

//
// NewConfig initializes a configuration object
// - if confDir is non-nil searches there and loads a "config.yml"
// - sets configuration to read from environment variables automatically
//
func NewConfig(confDir *string) (Config, error) {
	v := viper.New()
	if v == nil {
		return &conf{}, errors.New("Error initializing internal config")
	}
	if confDir != nil {
		v.SetConfigName("config")
		v.SetConfigType("yaml")
		v.AddConfigPath(*confDir)
		if err := v.ReadInConfig(); err != nil {
			return &conf{}, errors.Wrapf(err, "problem reading config from [%s]", *confDir)
		}
	}
	v.AutomaticEnv()
	v.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
	return &conf{v}, nil
}

type conf struct {
	v *viper.Viper
}

// GetString returns the value associated with the key as a string.
func (c *conf) GetString(key string) string {
	return c.v.GetString(key)
}

// GetFloat returns the value associated with the key as a float.
func (c *conf) GetFloat64(key string) float64 {
	return c.v.GetFloat64(key)
}

// GetInt returns the value associated with the key as an integer.
func (c *conf) GetInt(key string) int {
	return c.v.GetInt(key)
}

// GetBool returns the value associated with the key as a boolean.
func (c *conf) GetBool(key string) bool {
	return c.v.GetBool(key)
}

// GetStringMapString returns the value associated with the key as a map of strings.
func (c *conf) GetStringMapString(key string) map[string]string {
	return c.v.GetStringMapString(key)
}

// GetStringSlice returns the value associated with the key as a slice of strings.
func (c *conf) GetStringSlice(key string) []string {
	return c.v.GetStringSlice(key)
}

// IsSet checks to see if the key has been set in any of the data locations.
// IsSet is case-insensitive for a key.
func (c *conf) IsSet(key string) bool {
	return c.v.IsSet(key)
}


================================================
FILE: config/config_test.go
================================================
package config

import (
	"os"
	"testing"
)

func TestNewConfig(t *testing.T) {
	var c Config
	c, _ = NewConfig(nil)

	toSet := "sprinkles"
	os.Setenv("CUPCAKE", toSet)

	if c.GetString("cupcake") != toSet {
		t.Errorf("Environment variables not set - expected %s but was %s", toSet, c.GetString("cupcake"))
	}

	confDir := "../conf"
	c, _ = NewConfig(&confDir)
	if !c.IsSet("queue_namespace") || c.GetString("queue_namespace") != "dev-flotilla" {
		t.Errorf("Expected to read from conf dir [queue_namespace]:[dev-flotilla], was: %s",
			c.GetString("queue_namespace"))
	}
}


================================================
FILE: datadog-ara-dashboard-api.json
================================================
{
  "title": "Flotilla ARA (Auto Resource Adjustment) Metrics",
  "description": "Dashboard tracking Auto Resource Adjustment behavior for EKS and Spark jobs. Monitors resource growth patterns, over-provisioning detection, and OOM-based memory adjustments. Use the engine filter to view EKS (P99-based 1.75x/1.25x) vs Spark (OOM-based 1.25x/3.0x) jobs separately.",
  "widgets": [
    {
      "id": 1,
      "layout": {
        "x": 0,
        "y": 0,
        "width": 47,
        "height": 15
      },
      "definition": {
        "title": "ARA Estimation Attempts vs Successes",
        "title_size": "16",
        "title_align": "left",
        "show_legend": true,
        "legend_layout": "auto",
        "legend_columns": [
          "avg",
          "min",
          "max",
          "value",
          "sum"
        ],
        "type": "timeseries",
        "requests": [
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.estimation_attempted{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "dog_classic",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "bars"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.estimation_succeeded{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "green",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "bars"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.estimation_failed{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "red",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "bars"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.no_historical_data{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "orange",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "bars"
          }
        ],
        "yaxis": {
          "label": "",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "markers": []
      }
    },
    {
      "id": 2,
      "layout": {
        "x": 48,
        "y": 0,
        "width": 47,
        "height": 15
      },
      "definition": {
        "title": "ARA Resource Adjustments",
        "title_size": "16",
        "title_align": "left",
        "show_legend": true,
        "legend_size": "0",
        "type": "timeseries",
        "requests": [
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.resource_adjustment{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "blue",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "bars"
          }
        ],
        "yaxis": {
          "label": "",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "markers": []
      }
    },
    {
      "id": 3,
      "layout": {
        "x": 0,
        "y": 16,
        "width": 47,
        "height": 15
      },
      "definition": {
        "title": "Max Resource Limits Hit (Critical)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": true,
        "legend_size": "0",
        "type": "timeseries",
        "requests": [
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.hit_max_memory{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "red",
              "line_type": "solid",
              "line_width": "thick"
            },
            "display_type": "line"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.hit_max_cpu{$cluster,$env,$engine}.as_count()"
              }
            ],
            "style": {
              "palette": "orange",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "line"
          }
        ],
        "yaxis": {
          "label": "",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "markers": [
          {
            "label": "Alert Threshold",
            "value": "y = 0",
            "display_type": "error dashed"
          }
        ]
      }
    },
    {
      "id": 4,
      "layout": {
        "x": 48,
        "y": 16,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Success Rate",
        "title_size": "16",
        "title_align": "left",
        "type": "query_value",
        "requests": [
          {
            "conditional_formats": [
              {
                "comparator": ">=",
                "value": 95,
                "palette": "green_on_white"
              },
              {
                "comparator": ">=",
                "value": 80,
                "palette": "yellow_on_white"
              },
              {
                "comparator": "<",
                "value": 80,
                "palette": "red_on_white"
              }
            ],
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.estimation_succeeded{$cluster,$env,$engine}.as_count()",
                "aggregator": "sum"
              },
              {
                "data_source": "metrics",
                "name": "query2",
                "query": "sum:algo.flotilla.engine.eks.ara.estimation_attempted{$cluster,$env,$engine}.as_count()",
                "aggregator": "sum"
              }
            ],
            "formulas": [
              {
                "number_format": {
                  "unit": {
                    "label": "%",
                    "type": "custom_unit_label"
                  }
                },
                "formula": "(query1 / query2) * 100"
              }
            ]
          }
        ],
        "autoscale": true,
        "precision": 2
      }
    },
    {
      "id": 5,
      "layout": {
        "x": 72,
        "y": 16,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Max Memory Hits (Last Hour)",
        "title_size": "16",
        "title_align": "left",
        "type": "query_value",
        "requests": [
          {
            "conditional_formats": [
              {
                "comparator": ">",
                "value": 0,
                "palette": "red_on_white"
              },
              {
                "comparator": "=",
                "value": 0,
                "palette": "green_on_white"
              }
            ],
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.hit_max_memory{$cluster,$env,$engine}.as_count()",
                "aggregator": "sum"
              }
            ]
          }
        ],
        "autoscale": true,
        "custom_unit": "",
        "precision": 0
      }
    },
    {
      "id": 6,
      "layout": {
        "x": 0,
        "y": 32,
        "width": 31,
        "height": 15
      },
      "definition": {
        "title": "Memory Increase Ratio Distribution",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "heatmap",
        "yaxis": {
          "label": "",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "requests": [
          {
            "style": {
              "palette": "YlOrRd"
            },
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.memory_increase_ratio{$cluster,$env,$engine} by {cluster}"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 7,
      "layout": {
        "x": 32,
        "y": 32,
        "width": 31,
        "height": 15
      },
      "definition": {
        "title": "CPU Increase Ratio Distribution",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "heatmap",
        "yaxis": {
          "label": "",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "requests": [
          {
            "style": {
              "palette": "YlOrRd"
            },
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.cpu_increase_ratio{$cluster,$env,$engine} by {cluster}"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 8,
      "layout": {
        "x": 64,
        "y": 32,
        "width": 31,
        "height": 15
      },
      "definition": {
        "title": "Top Clusters by Max Memory Hits",
        "title_size": "16",
        "title_align": "left",
        "type": "toplist",
        "requests": [
          {
            "style": {
              "palette": "red"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "sum:algo.flotilla.engine.eks.ara.hit_max_memory{$cluster,$env,$engine}.as_count()",
                "aggregator": "avg"
              }
            ],
            "formulas": [
              {
                "formula": "query1"
              }
            ],
            "sort": {
              "order_by": [
                {
                  "type": "formula",
                  "index": 0,
                  "order": "desc"
                }
              ]
            }
          }
        ]
      }
    },
    {
      "id": 9,
      "layout": {
        "x": 0,
        "y": 48,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Default Memory Distribution (Before ARA)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "blue"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.default_memory{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 10,
      "layout": {
        "x": 24,
        "y": 48,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "ARA Memory Distribution (After ARA)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "orange"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.ara_memory{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 11,
      "layout": {
        "x": 48,
        "y": 48,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Final Memory Distribution (After Bounds)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "red"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.final_memory_mb{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 12,
      "layout": {
        "x": 72,
        "y": 48,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Memory Increase (Absolute MB)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "purple"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.memory_increase{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 13,
      "layout": {
        "x": 0,
        "y": 64,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Default CPU Distribution (Before ARA)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "blue"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.default_cpu{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 14,
      "layout": {
        "x": 24,
        "y": 64,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "ARA CPU Distribution (After ARA)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "orange"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.ara_cpu{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 15,
      "layout": {
        "x": 48,
        "y": 64,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "Final CPU Distribution (After Bounds)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "red"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.final_cpu_millicores{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 16,
      "layout": {
        "x": 72,
        "y": 64,
        "width": 23,
        "height": 15
      },
      "definition": {
        "title": "CPU Increase (Absolute Millicores)",
        "title_size": "16",
        "title_align": "left",
        "show_legend": false,
        "type": "distribution",
        "requests": [
          {
            "style": {
              "palette": "purple"
            },
            "response_format": "scalar",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.cpu_increase{$cluster,$env,$engine} by {cluster}",
                "aggregator": "avg"
              }
            ]
          }
        ]
      }
    },
    {
      "id": 17,
      "layout": {
        "x": 0,
        "y": 80,
        "width": 47,
        "height": 15
      },
      "definition": {
        "title": "Resource Growth Over Time",
        "title_size": "16",
        "title_align": "left",
        "show_legend": true,
        "legend_size": "0",
        "type": "timeseries",
        "requests": [
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.default_memory{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "blue",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "line"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.ara_memory{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "orange",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "line"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.final_memory_mb{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "red",
              "line_type": "solid",
              "line_width": "thick"
            },
            "display_type": "line"
          }
        ],
        "yaxis": {
          "label": "Memory (MB)",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "markers": [
          {
            "label": "248GB Limit (Non-GPU EKS)",
            "value": "y = 248000",
            "display_type": "error dashed"
          }
        ]
      }
    },
    {
      "id": 18,
      "layout": {
        "x": 48,
        "y": 80,
        "width": 47,
        "height": 15
      },
      "definition": {
        "title": "CPU Growth Over Time",
        "title_size": "16",
        "title_align": "left",
        "show_legend": true,
        "legend_size": "0",
        "type": "timeseries",
        "requests": [
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.default_cpu{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "blue",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "line"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.ara_cpu{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "orange",
              "line_type": "solid",
              "line_width": "normal"
            },
            "display_type": "line"
          },
          {
            "response_format": "timeseries",
            "queries": [
              {
                "data_source": "metrics",
                "name": "query1",
                "query": "avg:algo.flotilla.engine.eks.ara.final_cpu_millicores{$cluster,$env,$engine}"
              }
            ],
            "style": {
              "palette": "red",
              "line_type": "solid",
              "line_width": "thick"
            },
            "display_type": "line"
          }
        ],
        "yaxis": {
          "label": "CPU (millicores)",
          "scale": "linear",
          "include_zero": true,
          "min": "auto",
          "max": "auto"
        },
        "markers": [
          {
            "label": "60K Limit",
            "value": "y = 60000",
            "display_type": "error dashed"
          }
        ]
      }
    },
    {
      "id": 19,
      "layout": {
        "x": 0,
        "y": 96,
        "width": 47,
        "height": 30
      },
      "definition": {
        "title": "ARA Logs - Resource Adjustments & Max Limits",
        "title_size": "16",
        "title_align": "left",
        "requests": [
          {
            "response_format": "event_list",
            "query": {
              "data_source": "logs_stream",
              "query_string": "source:flotilla (\"ARA adjusted resources\" OR \"Spark ARA adjusted executor memory\" OR \"Spark ARA adjusted driver memory\" OR \"ARA resource allocation hit maximum limit\" OR \"ARA memory allocation hit maximum limit\" OR \"ARA CPU allocation hit maximum limit\")",
              "indexes": [],
              "storage": "hot",
              "sort": {
                "order": "desc",
                "column": "timestamp"
              }
            },
            "columns": [
              {
                "field": "status_line",
                "width": "auto"
              },
              {
                "field": "timestamp",
                "width": "auto"
              },
              {
                "field": "host",
                "width": "auto"
              },
              {
                "field": "service",
                "width": "auto"
              },
              {
                "field": "source",
                "width": "auto"
              },
              {
                "field": "@status",
                "width": "auto"
              },
              {
                "field": "content",
                "width": "compact"
              }
            ]
          }
        ],
        "type": "list_stream"
      }
    },
    {
      "id": 20,
      "layout": {
        "x": 48,
        "y": 96,
        "width": 47,
        "height": 30
      },
      "definition": {
        "title": "ARA Logs - Historical Data Lookups",
        "title_size": "16",
        "title_align": "left",
        "requests": [
          {
            "response_format": "event_list",
            "query": {
              "data_source": "logs_stream",
              "query_string": "source:flotilla (\"ARA: Historical resource data found\" OR \"ARA: No historical resource data found\" OR \"ARA: Error querying historical resource data\")",
              "indexes": [],
              "storage": "hot",
              "sort": {
                "order": "desc",
                "column": "timestamp"
              }
            },
            "columns": [
              {
                "field": "status_line",
                "width": "auto"
              },
              {
                "field": "timestamp",
                "width": "auto"
              },
              {
                "field": "host",
                "width": "auto"
              },
              {
                "field": "service",
                "width": "auto"
              },
              {
                "field": "source",
                "width": "auto"
              },
              {
                "field": "@status",
                "width": "auto"
              },
              {
                "field": "content",
                "width": "compact"
              }
            ]
          }
        ],
        "type": "list_stream"
      }
    }
  ],
  "template_variables": [
    {
      "name": "cluster",
      "prefix": "cluster",
      "available_values": [],
      "default": "*"
    },
    {
      "name": "env",
      "prefix": "env",
      "available_values": [],
      "default": "*"
    },
    {
      "name": "engine",
      "prefix": "engine",
      "available_values": [
        "eks",
        "eks-spark"
      ],
      "default": "*"
    }
  ],
  "layout_type": "free",
  "notify_list": [],
  "pause_auto_refresh": false
}


================================================
FILE: docker-compose.yml
================================================
version: '3'
services:
  ui:
    build:
      context: ./ui
      args:
        FLOTILLA_API: http://localhost:3000/api/v1
        DEFAULT_CLUSTER: default
    environment:
      FLOTILLA_API: http://localhost:3000/api/v1
      DEFAULT_CLUSTER: default
    ports:
      - 5000:5000
      
  flotilla:
    build: .
    environment:
      DATABASE_URL: postgresql://flotilla:flotilla@db/flotilla?sslmode=disable
      FLOTILLA_MODE: dev
      HTTP_SERVER_CORS_ALLOWED_ORIGINS: http://localhost:5000
      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
    ports:
      - 3000:3000
  db:
    image: postgres
    environment:
      POSTGRES_USER: flotilla
      POSTGRES_DB: flotilla
      POSTGRES_PASSWORD: flotilla
    ports:
      - 5432:5432


================================================
FILE: docs/ara-command-hash-bug-report.md
================================================
# ARA command_hash Bug Report

## Executive Summary

The Auto Resource Adjustment (ARA) feature has a **critical bug** where `command_hash` is calculated from the **description** field instead of the actual command, causing:

1. **21,357 runs** (23 definitions) with NULL command_hash receive **no ARA benefit**
2. **Hundreds of thousands of runs** share ARA data across **completely different commands** that happen to have the same description

This means jobs can inherit resource allocations from unrelated workloads, leading to incorrect over- or under-provisioning.

## The Bug

### How command_hash Should Work

`command_hash` is used by ARA to match similar jobs and apply historical OOM data. The intent is to group jobs running the **same command**.

### How It Actually Works

**Location:** `flotilla/endpoints.go:451-453, 514-516, 592-593`

```go
if lr.CommandHash == nil && lr.Description != nil {
    lr.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*lr.Description))))
}
```

**Problems:**
1. Hash is MD5 of **Description**, not Command
2. If Description is NULL, command_hash stays NULL
3. NULL command_hash never matches anything in SQL (`command_hash = NULL` always FALSE)

## Impact by the Numbers

### Bug #1: NULL command_hash (No ARA)

```sql
SELECT COUNT(*) as total_runs, COUNT(DISTINCT definition_id) as definitions_affected
FROM task WHERE command_hash IS NULL;
```

**Result:**
- **21,357 runs** have NULL command_hash
- **23 definitions** affected
- These jobs **never benefit from ARA** despite it being enabled

**Example:** Definition `sf-base_python-3_11-...` has **55 different commands**, all with NULL command_hash, none sharing ARA data.

### Bug #2: Description-based Hash (Incorrect ARA Sharing)

```sql
-- Find command_hash values with multiple different commands
SELECT definition_id, command_hash,
       COUNT(DISTINCT command) as distinct_commands,
       COUNT(*) FILTER (WHERE exit_code = 137) as oom_count,
       COUNT(*) as total_runs
FROM task
WHERE command_hash IS NOT NULL AND command IS NOT NULL
GROUP BY definition_id, command_hash
HAVING COUNT(DISTINCT command) > 1
ORDER BY oom_count DESC, total_runs DESC
LIMIT 1;
```

**Result:**
- **Worst case:** `command_hash = 407f6885beaec163a742e8c3c8a50d3e`
  - **176 different commands** share the same hash
  - **115 OOMs** across these different commands
  - **287 total runs**
  - All share description: "Calibrate Psale Prod / Calibrate Psale"

**Other severe cases:**
- `a0798e54ea76fb8dc1e743fe37f761e0`: 2 commands, **87,142 runs** affected
- `1eeb37af6d7e0e4bb2a73a0f61ac7a79`: 2 commands, **52,844 runs** affected
- `123fad187daf3847583761f5495e3ce8`: 2 commands, **39,181 runs** affected

## Concrete Example: The Smoking Gun

### Timeline

**November 22-24, 2025** - Daily data processing job with description "Calibrate Psale Prod / Calibrate Psale"

#### OOMs in 3-Day Window (Contributing to ARA):

| Date | Run ID | Memory | Command Differs By |
|------|--------|--------|-------------------|
| Nov 22 | `eks-c662-2a1e-44f7...` | 1024 MB | `--as_of 20251121` |
| Nov 22 | `eks-a9fd-92f6-4fe1...` | 1792 MB | `--as_of 20251121` |
| Nov 23 | `eks-055c-c578-4951...` | 1024 MB | `--as_of 20251122` |

**ARA Calculation:**
- P99([1024, 1792, 1024]) = 1792 MB
- 1792 MB × 1.75 = **3136 MB**

#### Next Day Run (Inherits OOM Data):

| Date | Run ID | Memory | Command Differs By | Exit Code |
|------|--------|--------|-------------------|-----------|
| Nov 24 | `eks-0d33-a443-43b9...` | **3136 MB** | `--as_of 20251123` | 0 (Success) |

### The Commands Are Different!

**Nov 23 OOM Command:**
```bash
python3 /dsn-algo-adhoc/damien/projects/fy25q4_psale_calibration/calibrate.py --as_of 20251122
```

**Nov 24 Command (Got ARA from above):**
```bash
python3 /dsn-algo-adhoc/damien/projects/fy25q4_psale_calibration/calibrate.py --as_of 20251123
```

**Only difference:** The date parameter (`20251122` vs `20251123`)

**Why this matters:** These are daily data processing jobs. Each date's data could have completely different characteristics and memory requirements, but they share ARA data because they have the same description.

### Verification

The exact ARA query for the Nov 24 run returns:

```sql
SELECT cast((percentile_disc(0.99) within GROUP (ORDER BY A.max_memory_used)) * 1.75 as int) as memory
FROM (SELECT memory as max_memory_used FROM TASK
      WHERE queued_at >= '2025-11-21 15:10:01' AND queued_at < '2025-11-24 15:10:01'
        AND (exit_code = 137 or exit_reason = 'OOMKilled')
        AND definition_id = 'sf-base_python-3_9-59ab1a32-cdda-4eb8-5824-49d17d96b1fd'
        AND command_hash = '407f6885beaec163a742e8c3c8a50d3e'
      LIMIT 30) A;
```

**Result:** 3136 MB ← **Exactly what the Nov 24 run received**

## Concrete Example #2: Catastrophic Case at 350GB Maximum

### The Worst-Case Scenario: ML Training at the Limit

**Definition:** `sf-base_pytorch2-24__5-py3-698fef2e-4bad-4e45-624c-c57fec2f2aa7`
**Command Hash:** `b4c7adde0a3dc7dd13a8da282f1693c1`
**Shared Description:** "CTSM PF ATRF Metrics SubSeqRefactor 12-2 Train Staging / Model Training"

This case demonstrates the bug at its most destructive: **12 completely different machine learning training configurations** all sharing one command_hash and **starting at the 350GB maximum memory limit from day one**.

### The Three Training Configurations

All run PyTorch model training (`client_time_series_model/train.py`) but with **completely different parameters**:

#### Configuration A: March 2 Data, Full Dataset
```bash
python3 train.py --as_of 20250302 --max_epochs 4 --pct_client_subset_dev 100
```
- **Runs:** 24
- **OOMs:** 22 (92% OOM rate!)
- **Training:** Full dataset (100% of clients), 4 epochs
- **Memory:** 350GB (maximum limit)

#### Configuration B: June 28 Data, 10% Subset
```bash
python3 train.py --as_of 20250628 --max_epochs 10 --pct_client_subset_dev 10
```
- **Runs:** 24
- **OOMs:** 8 (33% OOM rate)
- **Training:** 10% of data, 10 epochs
- **Memory:** 350GB (maximum limit)

#### Configuration C: May 17 Data, 1% Subset
```bash
python3 train.py --as_of 20250517 --max_epochs 10 --pct_client_subset_dev 1
```
- **Runs:** 18
- **OOMs:** 2 (11% OOM rate)
- **Training:** Only 1% of data, 10 epochs
- **Memory:** 350GB (maximum limit)

### The Cross-Contamination Timeline

**August 14-September 4, 2025** - All runs execute at 350GB from the start:

```
Aug 14: Config C (1% data)  → OOM at 350GB
Aug 14: Config A (100% data) → 18 OOMs at 350GB over 6 days
Aug 19: Config A continues   → More OOMs at ceiling
Aug 28: Config B (10% data)  → 8 OOMs at 350GB
Aug 28: Configs A, B, C mix  → All hit 350GB ceiling
Sep 1-4: Various configs     → Continue OOM'ing at maximum
```

### Why This is Catastrophic

1. **No room to grow:** ARA wants to increase memory after OOMs, but all runs are already at the 350GB maximum limit

2. **Massive over-provisioning for small jobs:** Configuration C trains on **1% of the data** but gets **350GB** because Configurations A and B OOM'd with full datasets

3. **Trapped at the ceiling:** Once at max memory, ARA becomes useless:
   - Jobs that need >350GB: Keep OOM'ing, can't grow further
   - Jobs that need <<350GB: Massively over-allocated, wasting resources

4. **Cross-training contamination:** Three completely different ML experiments share OOM history:
   - Different months of training data (March, May, June)
   - Different model hyperparameters (4 vs 10 epochs)
   - Different data sizes (100% vs 10% vs 1% of clients)

### The Numbers

**Total Impact:**
- **83 runs** across **12 different commands**
- **32 OOMs** (39% OOM rate **at maximum memory**)
- **All 83 runs allocated 350GB** regardless of actual needs

**Configuration C alone** (1% subset):
- Likely needs <50GB based on data size
- Receives 350GB due to cross-contamination
- **700% over-provisioned** (7x more memory than needed)

### Root Cause

All 12 commands share the same description:
```
"CTSM PF ATRF Metrics SubSeqRefactor 12-2 Train Staging / Model Training"
```

Therefore: `command_hash = MD5(description) = b4c7adde0a3dc7dd13a8da282f1693c1`

ARA cannot distinguish between:
- Training on March data vs June data (4 months apart)
- 4 epochs vs 10 epochs (2.5x difference)
- 100% data vs 10% vs 1% (100x difference!)

### What Should Happen

If `command_hash` were calculated from the actual command:

- **Config A hash:** MD5("...as_of 20250302...max_epochs 4...pct_client_subset_dev 100...")
- **Config B hash:** MD5("...as_of 20250628...max_epochs 10...pct_client_subset_dev 10...")
- **Config C hash:** MD5("...as_of 20250517...max_epochs 10...pct_client_subset_dev 1...")

Each would have **independent ARA history** based on its actual resource needs:
- Config A might legitimately need 350GB (full dataset)
- Config B might need ~50GB (10% subset)
- Config C might need ~10GB (1% subset)

Instead, all three get 350GB because they share a description.

## Why This Causes Over-Provisioning

1. **Cross-contamination:** Jobs inherit OOM data from unrelated workloads
2. **Compounding growth:** The 1.75x multiplier compounds across different jobs
3. **Never stabilizes:** Each day's job can trigger growth for the next day's job
4. **Reaches maximum:** Eventually hits the 350GB limit, explaining the "jobs growing to 300GB" issue

## Scale of the Problem

### Definitions with Most Cross-Command OOMs

```sql
SELECT definition_id, command_hash,
       COUNT(DISTINCT command) as distinct_commands,
       COUNT(*) FILTER (WHERE exit_code = 137 OR exit_reason = 'OOMKilled') as oom_count,
       COUNT(*) as total_runs
FROM task
WHERE command_hash IS NOT NULL AND engine = 'eks' AND command IS NOT NULL
GROUP BY definition_id, command_hash
HAVING COUNT(DISTINCT command) > 1
   AND COUNT(*) FILTER (WHERE exit_code = 137 OR exit_reason = 'OOMKilled') > 0
ORDER BY oom_count DESC
LIMIT 10;
```

| Rank | command_hash | Distinct Commands | OOMs | Total Runs |
|------|--------------|-------------------|------|------------|
| 1 | `407f6885beaec163...` | 176 | 115 | 287 |
| 2 | `a5bdb8f3302110219...` | 164 | 87 | 304 |
| 3 | `2344c10bd7229...` | 184 | 83 | 564 |
| 4 | `7803d8faa568610...` | 97 | 82 | 261 |
| 5 | `90ceb0cabff4958...` | 135 | 82 | 230 |

All from the same definition: `sf-base_python-3_9-59ab1a32-cdda-4eb8-5824-49d17d96b1fd`

### Definitions with NULL command_hash (No ARA)

```sql
SELECT definition_id,
       COUNT(DISTINCT command) as distinct_commands,
       COUNT(*) as total_runs
FROM task
WHERE command_hash IS NULL AND command IS NOT NULL
GROUP BY definition_id
HAVING COUNT(DISTINCT command) > 1
ORDER BY total_runs DESC
LIMIT 5;
```

| Definition ID | Distinct Commands | Total Runs |
|---------------|-------------------|------------|
| `sf-base_python-3_11-7449eda4-b8b3-4146-77c5-a47f8caac81b` | 55 | 91 |
| `sf-base_python-3_9-59ab1a32-cdda-4eb8-5824-49d17d96b1fd` | 40 | 49 |
| `data-platform-d834291f-d984-408e-5da4-8646f7e2f5b7` | 4 | 31 |
| `platform-8a651dbe-1794-485b-6ba4-ba58b4a10212` | 5 | 21 |
| `sf-base_pytorch2-24__5-py3-ceef4c9e-6ebc-41e5-6cef-a334aed6e829` | 6 | 17 |

## Root Cause Analysis

### Design Intent vs Implementation

**Intended behavior:**
- Jobs running the **same command** should share ARA data
- Different commands should have separate ARA histories

**Actual behavior:**
- Jobs with the **same description** share ARA data
- Command can be completely different

### Why Description Was Used

Looking at the code flow:

1. API receives execution request with optional `description` field
2. If `command_hash` not provided by client, generate from description
3. **Problem:** Command isn't available yet at this point in the flow
4. Command is constructed later during job submission

**The Disconnect:**
- `command_hash` is set in `flotilla/endpoints.go` (API layer)
- Actual `command` is finalized in `execution/adapter/eks_adapter.go` (execution layer)
- By the time the command is known, the hash is already set

## The Fix

### Recommended Solution

Calculate `command_hash` from the **actual command** that will run:

**Location to fix:** Where the Run object gets its final command, likely in the execution service before calling `EstimateRunResources()`.

**Pseudocode:**
```go
// After command is finalized, before ARA lookup
if run.Command != nil && len(*run.Command) > 0 {
    run.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*run.Command))))
} else {
    // Fallback: use description if no command (shouldn't happen for EKS jobs)
    if run.Description != nil {
        run.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*run.Description))))
    }
}
```

### Migration Strategy

**Challenge:** Changing command_hash breaks ARA history

**Options:**

1. **Clean break (Recommended):**
   - Fix the hash calculation
   - Accept that ARA starts fresh for all jobs
   - Monitor via new instrumentation to ensure it works correctly

2. **Dual-hash lookup:**
   - Try command-based hash first
   - Fall back to description-based hash for historical data
   - Gradually phase out old hashes

3. **Per-definition rollout:**
   - Fix hash for definitions most affected by the bug
   - Leave others on old behavior temporarily
   - Migrate gradually

### Testing Plan

1. **Verify hash calculation:**
   - Unit tests ensuring hash comes from command, not description
   - Integration tests with various command/description combinations

2. **Verify ARA still works:**
   - Test that identical commands share ARA data
   - Test that different commands DON'T share data

3. **Monitor after deployment:**
   - Use new `ara.*` metrics to track behavior
   - Watch for unexpected resource changes
   - Check logs for `ara.no_historical_data` - should increase initially

## Impact on Current Investigation

This bug significantly impacts the "jobs growing to 300GB" investigation:

1. **Over-provisioning is worse than thought:**
   - Jobs inherit OOMs from unrelated workloads
   - The 1.75x multiplier compounds across different jobs
   - Growth isn't just from retrying the same job, but cross-contamination

2. **Instrumentation still valuable:**
   - The new ARA metrics will help measure the bug's impact
   - After fixing, metrics will show if ARA works correctly

3. **Fix priority:**
   - This bug should be fixed **before** tuning ARA multipliers
   - Otherwise, you're tuning a broken system

## Queries for Further Investigation

### Find your most affected definitions

```sql
-- Definitions with most OOM cross-contamination
SELECT
    definition_id,
    command_hash,
    COUNT(DISTINCT MD5(command)) as distinct_commands,
    COUNT(*) FILTER (WHERE exit_code = 137 OR exit_reason = 'OOMKilled') as oom_count,
    COUNT(*) as total_runs,
    MAX(memory) as max_memory_allocated
FROM task
WHERE command_hash IS NOT NULL
  AND engine = 'eks'
  AND command IS NOT NULL
  AND queued_at >= CURRENT_TIMESTAMP - INTERVAL '30 days'
GROUP BY definition_id, command_hash
HAVING COUNT(DISTINCT MD5(command)) > 1
   AND COUNT(*) FILTER (WHERE exit_code = 137 OR exit_reason = 'OOMKilled') > 0
ORDER BY oom_count * distinct_commands DESC
LIMIT 20;
```

### Find jobs hitting memory limits with cross-command contamination

```sql
-- Jobs at max memory (350GB) that share command_hash with different commands
SELECT DISTINCT t1.definition_id, t1.command_hash
FROM task t1
JOIN task t2 ON t1.definition_id = t2.definition_id
            AND t1.command_hash = t2.command_hash
            AND MD5(t1.command) != MD5(t2.command)
WHERE t1.memory >= 300000  -- Close to or at max
  AND t1.queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
GROUP BY t1.definition_id, t1.command_hash
HAVING COUNT(DISTINCT MD5(t1.command)) > 1;
```

## Recommendations

1. **Immediate:**
   - Review the examples in this report with the team
   - Decide on fix approach (clean break vs dual-hash)
   - Prioritize this fix before tuning ARA parameters

2. **Short-term:**
   - Implement command-based hash calculation
   - Deploy with new instrumentation
   - Monitor via `ara.*` metrics

3. **Long-term:**
   - Consider whether description should exist separately from command
   - Review if ARA should use command hash at all, or something more semantic
   - Add validation to prevent command_hash from being NULL

## Related Files

- **Bug location:** `flotilla/endpoints.go:451-453, 514-516, 592-593`
- **ARA query:** `state/pg_queries.go:54-66` (TaskResourcesSelectCommandSQL)
- **ARA lookup:** `state/pg_state_manager.go:118-162` (EstimateRunResources)
- **Resource adjustment:** `execution/adapter/eks_adapter.go:352-421` (adaptiveResources)
- **New instrumentation:** `docs/ara-instrumentation.md`

## Database Evidence

All evidence in this report is from production database queries run on 2025-11-24.

Key run IDs for reproduction:
- OOM: `eks-055c-c578-4951-75d8-3f5a0bb15b37` (Nov 23, 1024 MB, OOM)
- Inherited: `eks-0d33-a443-43b9-45f9-04b780868880` (Nov 24, 3136 MB, Success)
- Command hash: `407f6885beaec163a742e8c3c8a50d3e`
- Definition: `sf-base_python-3_9-59ab1a32-cdda-4eb8-5824-49d17d96b1fd`


================================================
FILE: docs/ara-command-hash-fix-locations.md
================================================
# ARA command_hash Fix: Implementation Locations

## ✅ STATUS: IMPLEMENTED

**All code changes have been completed.** This document now serves as a record of what was changed.

**Changes made:**
1. ✅ Added command_hash calculation from command in `services/execution.go`
2. ✅ Removed description-based hash calculation from `flotilla/endpoints.go` (3 locations)
3. ✅ Optimized SQL query in `state/pg_queries.go` to use direct parameter
4. ✅ Updated call site in `execution/adapter/eks_adapter.go` with NULL check

**Remaining work:**
- ⏳ Add unit tests (see Testing Plan section)
- ⏳ Deploy and monitor (see Success Criteria section)

---

## Executive Summary

The `command_hash` bug required moving hash calculation from the API layer (where only description is available) to the execution service layer (where the actual command is finalized).

## Current Broken Flow

```
1. API Layer (flotilla/endpoints.go:451-453, 514-516, 592-593)
   ├─ Receives execution request
   ├─ Sets: lr.CommandHash = MD5(description)  ❌ WRONG
   └─ Passes to execution service

2. Execution Service (services/execution.go:320-327)
   ├─ Constructs final command from template/request
   ├─ Command is now finalized  ✓
   └─ But hash was already set from description  ❌

3. Database (state/pg_state_manager.go:1168)
   └─ Stores the wrong hash from step 1  ❌

4. EKS Adapter (execution/adapter/eks_adapter.go:109)
   ├─ Final command formatting
   └─ Hash still wrong  ❌

5. ARA Lookup (execution/adapter/eks_adapter.go:369)
   └─ Uses wrong hash to query historical data  ❌
```

## Fixed Flow

```
1. API Layer (flotilla/endpoints.go)
   ├─ Receives execution request
   └─ Does NOT set command_hash (remove this code)  ✓

2. Execution Service (services/execution.go:359)
   ├─ Constructs final command
   ├─ Calculates: fields.CommandHash = MD5(command)  ✓ NEW
   └─ Passes to CreateRun

3. Database (state/pg_state_manager.go:1168)
   └─ Stores correct hash  ✓

4. EKS Adapter (execution/adapter/eks_adapter.go:109)
   └─ Command already hashed correctly  ✓

5. ARA Lookup (execution/adapter/eks_adapter.go:369)
   └─ Uses correct hash  ✓
```

## Code Changes Required

### 1. PRIMARY FIX: Add hash calculation in services/execution.go

**Location:** `services/execution.go:359` (right before constructing the Run object)

**Current code (lines 319-381):**
```go
if *fields.Engine == state.EKSEngine {
    executableCmd, err := executable.GetExecutableCommand(req)
    if err != nil {
        return run, err
    }

    if (fields.Command == nil || len(*fields.Command) == 0) && (len(executableCmd) > 0) {
        fields.Command = aws.String(executableCmd)
    }
    executableID := executable.GetExecutableID()
    // ... spot/ondemand logic ...
}

if *fields.Engine == state.EKSSparkEngine {
    // ... spark setup ...
}

if fields.NodeLifecycle == nil {
    fields.NodeLifecycle = &state.SpotLifecycle
}

run = state.Run{
    RunID:          runID,
    // ...
    Command:        fields.Command,
    CommandHash:    fields.CommandHash,  // ❌ Uses wrong hash from API layer
    // ...
}
```

**New code (insert at line ~359, before `run = state.Run{...}`):**
```go
if fields.NodeLifecycle == nil {
    fields.NodeLifecycle = &state.SpotLifecycle
}

// Calculate command_hash from actual command (FIX for ARA bug)
// This ensures jobs with different commands have different hashes,
// even if they share the same description.
if fields.Command != nil && len(*fields.Command) > 0 {
    fields.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*fields.Command))))
}
// If command is NULL/empty, command_hash remains NULL (malformed job)
// Do NOT fall back to description - that was the bug we're fixing

run = state.Run{
    RunID:          runID,
    // ...
    Command:        fields.Command,
    CommandHash:    fields.CommandHash,  // ✓ Now has correct hash
    // ...
}
```

**Why this location:**
- Command is finalized (line 326 for EKS, or from request)
- Before `CreateRun` is called (line 653)
- Works for both EKS standard and Spark engines
- No database update needed (hash set correctly from start)

**Imports needed:**
```go
import (
    "crypto/md5"
    // ... existing imports ...
)
```

### 2. CLEANUP: Remove broken hash calculation from endpoints.go

**Locations to modify:**
- `flotilla/endpoints.go:451-453` (CreateRunV2)
- `flotilla/endpoints.go:514-516` (CreateRunV4)
- `flotilla/endpoints.go:592-594` (CreateRunByAlias)

**Current code (appears in 3 places):**
```go
if lr.CommandHash == nil && lr.Description != nil {
    lr.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*lr.Description))))
}
```

**Action:** **REMOVED these 3 blocks entirely** ✅ COMPLETED

**Rationale:**
- This was the source of the bug (hashing description instead of command)
- Hash will now be calculated correctly in execution service
- API clients already don't pass command_hash, so removal has no client impact
- No fallback to description - that perpetuates the bug

### 3. OPTIMIZATION: Update SQL query to use direct parameter ✅ COMPLETED

**File:** `state/pg_queries.go`
**Location:** Line 64

**Changed from:**
```sql
AND command_hash = (SELECT command_hash FROM task WHERE run_id = $2)
```

**Changed to:**
```sql
AND command_hash = $2
```

**Benefit:** Eliminates unnecessary subquery, improves performance

### 4. OPTIMIZATION: Update call site to pass command_hash ✅ COMPLETED

**File:** `execution/adapter/eks_adapter.go`
**Location:** Lines 368-422 (in `adaptiveResources` function)

**Changed from:**
```go
if !isGPUJob {
    estimatedResources, err := manager.EstimateRunResources(ctx, *executable.GetExecutableID(), run.RunID)
    if err == nil {
        // ARA found historical data...
    } else {
        // No historical data available
        _ = metrics.Increment(metrics.EngineEKSARANoHistoricalData, metricTags, 1)
    }
}
```

**Changed to:**
```go
if !isGPUJob {
    // Only attempt ARA if we have a command hash
    if run.CommandHash == nil {
        // Command hash is NULL - job has no command (malformed job definition)
        _ = metrics.Increment(metrics.EngineEKSARANullCommandHash, metricTags, 1)
        _ = a.logger.Log(
            "level", "warn",
            "message", "Skipping ARA - NULL command_hash",
            "reason", "Job has no command (malformed definition)",
            "run_id", run.RunID,
            "definition_id", *executable.GetExecutableID(),
        )
    } else {
        estimatedResources, err := manager.EstimateRunResources(ctx, *executable.GetExecutableID(), *run.CommandHash)
        if err == nil {
            // ARA found historical data...
        } else {
            // No historical data available
            _ = metrics.Increment(metrics.EngineEKSARANoHistoricalData, metricTags, 1)
        }
    }
}
```

**Changes:**
- Added NULL check for `run.CommandHash`
- Pass `*run.CommandHash` instead of `run.RunID`
- Added specific metric and logging for NULL case

**Note:** The metric `metrics.EngineEKSARANullCommandHash` may need to be added to the metrics package.

### 5. OPTIONAL: Add validation/logging

**Location:** `state/pg_state_manager.go:1168` (CreateRun, where command_hash is stored)

**Add validation before insert:**
```go
// Validate that command_hash matches command (helps catch bugs)
if r.Command != nil && r.CommandHash != nil {
    expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(*r.Command)))
    if expectedHash != *r.CommandHash {
        // Log mismatch but don't fail (for observability)
        flotillaLog.Log(
            "message", "WARNING: command_hash mismatch",
            "run_id", r.RunID,
            "expected_hash", expectedHash,
            "actual_hash", *r.CommandHash,
        )
    }
}
```

## Migration Considerations

### Clean Break (Recommended)

Since current command_hash values are incorrect, the best approach is:

1. **Deploy the fix** - All new runs get correct hash
2. **Accept loss of history** - New hashes won't match old hashes
3. **Monitor ARA metrics** - Use instrumentation to verify behavior
4. **Expect initial spike** - `ara.no_historical_data` metric will increase temporarily

**Why this is OK:**
- Current ARA data is contaminated anyway
- Better to start fresh with correct data
- New instrumentation will help monitor the recovery

### Alternative: Dual-Hash Lookup (NOT IMPLEMENTED)

**Decision:** We chose the clean break approach. No dual-hash lookup was implemented.

**Reason:** The historical data is contaminated and would perpetuate the bug. Starting fresh with correct hashing is the right approach.

## Testing Plan

### Unit Tests

**Location:** `services/execution_test.go`

```go
func TestCommandHashCalculatedFromCommand(t *testing.T) {
    // Test that command_hash is MD5 of command, not description
    req := &state.DefinitionExecutionRequest{
        ExecutionRequestCommon: &state.ExecutionRequestCommon{
            Command:     aws.String("python script.py --arg value"),
            Description: aws.String("Different description"),
        },
    }

    run, err := executionService.constructBaseRunFromExecutable(ctx, definition, req)

    expectedHash := fmt.Sprintf("%x", md5.Sum([]byte("python script.py --arg value")))
    assert.Equal(t, expectedHash, *run.CommandHash)
    assert.NotEqual(t, fmt.Sprintf("%x", md5.Sum([]byte("Different description"))), *run.CommandHash)
}

func TestCommandHashWithSameDescriptionDifferentCommands(t *testing.T) {
    // Test that different commands get different hashes even with same description
    description := "Daily processing job"

    req1 := &state.DefinitionExecutionRequest{
        ExecutionRequestCommon: &state.ExecutionRequestCommon{
            Command:     aws.String("python process.py --date 2025-01-01"),
            Description: aws.String(description),
        },
    }

    req2 := &state.DefinitionExecutionRequest{
        ExecutionRequestCommon: &state.ExecutionRequestCommon{
            Command:     aws.String("python process.py --date 2025-01-02"),
            Description: aws.String(description),
        },
    }

    run1, _ := executionService.constructBaseRunFromExecutable(ctx, definition, req1)
    run2, _ := executionService.constructBaseRunFromExecutable(ctx, definition, req2)

    assert.NotEqual(t, run1.CommandHash, run2.CommandHash,
        "Different commands should have different hashes even with same description")
}
```

### Integration Tests

**Verify end-to-end:**

1. Submit two runs with:
   - Same description
   - Different commands (e.g., different dates)

2. Check database:
   ```sql
   SELECT command, command_hash, description
   FROM task
   WHERE run_id IN ('run1', 'run2');
   ```

3. Verify:
   - Different commands → different hashes ✓
   - Same description ✓
   - Hashes are MD5 of commands ✓

### Production Verification

**After deployment, monitor:**

1. **New runs have non-NULL hash:**
   ```sql
   SELECT COUNT(*)
   FROM task
   WHERE queued_at > NOW() - INTERVAL '1 hour'
     AND command_hash IS NULL
     AND command IS NOT NULL;
   ```
   Should be 0.

2. **Hash matches command:**
   ```sql
   SELECT run_id, command, command_hash,
          MD5(command) as expected_hash
   FROM task
   WHERE queued_at > NOW() - INTERVAL '1 hour'
   LIMIT 100;
   ```
   Verify `command_hash = expected_hash`.

3. **ARA metrics (from instrumentation):**
   - `ara.no_historical_data` - will spike initially (expected)
   - `ara.resource_adjustment` - should stabilize over 3-7 days
   - `ara.hit_max_memory` - should decrease for over-provisioned jobs

## Rollback Plan

If the fix causes issues:

1. **Quick rollback:** Revert the code changes and redeploy
2. **Data is safe:** Database schema unchanged, no migrations needed
3. **Monitoring:** New instrumentation continues to work regardless

## Summary of Changes Made

| File | Lines | Action | Status |
|------|-------|--------|--------|
| `services/execution.go` | 5 | **ADD** crypto/md5 import | ✅ COMPLETED |
| `services/execution.go` | 361-368 | **ADD** command_hash calculation | ✅ COMPLETED |
| `flotilla/endpoints.go` | 451-453 | **REMOVE** description-based hash | ✅ COMPLETED |
| `flotilla/endpoints.go` | 510-512 | **REMOVE** description-based hash | ✅ COMPLETED |
| `flotilla/endpoints.go` | 584-586 | **REMOVE** description-based hash | ✅ COMPLETED |
| `state/pg_queries.go` | 64 | **MODIFY** Remove subquery, use $2 directly | ✅ COMPLETED |
| `execution/adapter/eks_adapter.go` | 369-422 | **ADD** NULL check and pass *run.CommandHash | ✅ COMPLETED |
| `services/execution_test.go` | New | **ADD** unit tests (TODO) | ⏳ PENDING |

## Timeline Estimate

- Code changes: 30 minutes
- Unit tests: 1 hour
- Integration testing: 2 hours
- Deployment: Standard release process
- Monitoring period: 3-7 days for ARA to stabilize

## Success Criteria

1. ✓ All new runs have `command_hash = MD5(command)`
2. ✓ Different commands have different hashes
3. ✓ Zero NULL command_hash for new runs (except truly NULL commands)
4. ✓ ARA metrics stabilize within 7 days
5. ✓ OOM rates decrease for previously over-provisioned jobs


================================================
FILE: docs/ara-command-hash-history.md
================================================
# History of command_hash Implementation

## Timeline of Changes

### January 17, 2020 - Original Design (Commit a5d7e0f)
**Author:** Ujjwal Sarin
**PR:** #269
**Title:** "Adding command hash to task"

**What was added:**
1. `command_hash` column added to `task` table
2. Changed ARA query from matching exact `command` text to `command_hash`
3. **Database automatically calculated hash:** `MD5($17)` where `$17` is the command parameter

**Original CreateRun SQL:**
```sql
INSERT INTO task (
  ..., command, ..., command_hash
) VALUES (
  ..., $17, ..., MD5($17)
);
```

**Original UpdateRun SQL:**
```sql
UPDATE task SET
  command = $17, ..., command_hash = MD5($17)
WHERE run_id = $1;
```

**Intent:** Hash was calculated FROM THE COMMAND to group similar jobs for ARA resource estimation.

**Original Query Change:**
```sql
-- BEFORE: Match exact command text
WHERE command = (SELECT command FROM TASK WHERE run_id = $2)

-- AFTER: Match command hash
WHERE command_hash = (SELECT command_hash FROM task WHERE run_id = $2)
```

### January 22, 2020 - Removed Auto-Hashing from UpdateRun (Commit fbe8409)
**Author:** Ujjwal Sarin
**Title:** "removing adding command_hash on updates"

**What changed:**
- Removed `command_hash = MD5($17)` from UpdateRun SQL
- Left CreateRun unchanged (still had MD5 calculation)

**Why this matters:** This suggests the design started shifting toward setting command_hash earlier in the flow, not in the database.

### December 31, 2021 - API Layer Auto-Generation from Description (Commit 7802cfe)
**Author:** Ujjwal Sarin
**Commit message:** "encode lr"

**What was added:**
```go
// In flotilla/endpoints.go - CreateRunV2, CreateRunV4, CreateRunByAlias
if lr.CommandHash == nil && lr.Description != nil {
    lr.CommandHash = aws.String(hex.EncodeToString([]byte(*lr.Description)))
}
```

**THE BUG INTRODUCED:** Changed from hashing the command to hashing the description.

**Why description was used:** At the API layer (endpoints.go), the final command isn't constructed yet. The command gets finalized later during job submission in the execution layer.

**Context:** This commit was for Spark executor estimation feature (see below).

### December 31, 2021 - Same Day: Changed to MD5 (Commit 7e84338)
**Author:** Ujjwal Sarin
**Title:** "adding support for predicting executor"

**What changed:**
```go
// Changed from hex encoding to MD5 (same day, 2 hours later)
if lr.CommandHash == nil && lr.Description != nil {
    lr.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*lr.Description))))
}
```

**What was added:** Spark executor count estimation using command_hash:
```go
// execution/engine/emr_engine.go
func (emr *EMRExecutionEngine) estimateExecutorCount(run state.Run, manager state.Manager) *int64 {
    if run.Engine != nil && *run.Engine == state.EKSSparkEngine {
        count, err := manager.EstimateExecutorCount(run.DefinitionID, *run.CommandHash)
        if err == nil {
            return aws.Int64(count)
        }
    }
    return aws.Int64(100)
}
```

**New Query Added:**
```sql
const TaskResourcesExecutorCountSQL = `
SELECT COALESCE(cast((percentile_disc(0.99) within GROUP (ORDER BY A.executor_count)) * 1.75 as int), 100)
FROM (SELECT CASE WHEN (exit_reason like '%Exception%') THEN spark_extension->'num_executors' END
      FROM TASK
      WHERE queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
        AND engine = 'eks-spark'
        AND definition_id = $1
        AND command_hash = $2
        AND (exit_code != 0)
      LIMIT 30) A
`
```

**Significance:** This shows command_hash was being used for TWO features:
1. ARA memory/CPU estimation (original, Jan 2020)
2. Spark executor count estimation (new, Dec 2021)

Both rely on grouping similar jobs, but the Dec 2021 implementation broke this by hashing description instead of command.

## Current State (2025)

### API Layer (flotilla/endpoints.go)
```go
// Lines 451-453, 514-516, 592-593
if lr.CommandHash == nil && lr.Description != nil {
    lr.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*lr.Description))))
}
```

**Problem:** Hashes description, not command.

### Database Layer (state/pg_state_manager.go)
```go
// CreateRun - Line 1168
r.CommandHash  // Just uses whatever was passed in, no calculation
```

**Problem:** No fallback calculation. If API layer provides wrong hash, database accepts it.

### API Schema (state/models.go)
```go
// LaunchRequestV2 - Line 1235
type LaunchRequestV2 struct {
    Command     *string `json:"command,omitempty"`
    Description *string `json:"description,omitempty"`
    CommandHash *string `json:"command_hash,omitempty"`
    // ...
}
```

**Observation:** `command_hash` IS exposed as an optional API field, but:
1. Clients rarely/never pass it explicitly
2. API layer auto-generates from description as fallback
3. This means nearly all command_hash values in production are MD5(description)

## Root Cause Analysis

### The Design Disconnect

**Layer 1 - API (endpoints.go):**
- Receives execution request
- Command might not be finalized yet
- Needs to set command_hash for downstream use
- Only has description available
- **Decision:** Hash description as proxy for command

**Layer 2 - Execution (execution/adapter/eks_adapter.go):**
- Constructs final command from template + parameters
- Command is now known
- But command_hash was already set in Layer 1
- **Missing:** No code to recalculate hash from actual command

**Layer 3 - Database (state/pg_state_manager.go):**
- Just stores whatever command_hash was provided
- No validation that hash matches command
- **Assumption:** Hash was calculated correctly upstream

### Why This Wasn't Caught

1. **Description often stable:** Many jobs use the same description repeatedly
2. **Worked for simple cases:** Jobs with truly identical descriptions often have identical commands
3. **Gradual degradation:** As users started parameterizing commands (dates, configs), descriptions stayed same but commands diverged
4. **No monitoring:** Until the recent instrumentation patches, there was no visibility into ARA behavior

## Evidence from Production

### NULL command_hash
- **21,357 runs** with NULL command_hash (description also NULL)
- These runs get NO ARA benefit despite feature being enabled

### Cross-Command Contamination
- **Worst case:** 176 different commands sharing one command_hash
- **High-volume case:** 87,142 runs across 2 different commands
- **ML Training catastrophe:** 12 different training configs all sharing 350GB allocation

### The Smoking Gun
From docs/ara-command-hash-bug-report.md:

**Daily jobs differing only by date:**
```bash
# Nov 23 OOM
python3 calibrate.py --as_of 20251122

# Nov 24 (inherited ARA from above)
python3 calibrate.py --as_of 20251123
```

Both have description "Calibrate Psale Prod / Calibrate Psale"
→ Same command_hash
→ Share ARA data
→ Nov 24 job gets 3136 MB from Nov 23 OOM

**The data being processed is completely different** (different dates), but they share resource allocation history.

## The Original Intent vs Reality

### Original Intent (Jan 2020)
- Jobs running the **same command** share ARA data
- Different commands have separate ARA histories
- Performance optimization: hash instead of full text comparison

### Current Reality (Dec 2021 - Present)
- Jobs with the **same description** share ARA data
- Commands can be completely different
- Leads to incorrect resource allocation

## Why Description Was Chosen

Looking at the code flow:

1. API receives execution request (`flotilla/endpoints.go`)
   - Has: description (optional), command template
   - Needs: command_hash for ARA lookup

2. Command construction happens later (`execution/adapter/eks_adapter.go`)
   - Combines template + env vars + parameters
   - Final command not available at API layer

3. Timing problem:
   - `command_hash` needed before `adaptiveResources()` call
   - `command` not finalized until during job construction
   - Description available early, command available late

**The Compromise:** Use description as a "proxy" for command.

**Why it seemed reasonable:**
- Description often correlates with command
- Better than nothing for grouping similar jobs
- Performance: avoid expensive string operations on long commands

**Why it fails:**
- Parameterized commands (dates, configs, data subsets)
- Description captures "what" but not "how"
- Catastrophic cross-contamination at scale

## Related Queries

### Original ARA Query (2020-2021)
```sql
-- Before command_hash
WHERE command = (SELECT command FROM TASK WHERE run_id = $2)
```

### Current ARA Query (2022-Present)
```sql
-- Using command_hash
WHERE command_hash = (SELECT command_hash FROM task WHERE run_id = $2)
```

**Irony:** The query change was meant to make ARA more efficient, but combined with description-based hashing, it made it incorrect.

## Conclusion

The bug wasn't a single mistake but an **architectural mismatch**:

1. **2020:** Designed command_hash to group identical commands
2. **2021:** Needed to set hash early in request flow
3. **2021:** Command not available early, description chosen as proxy
4. **2021-2025:** Production usage reveals proxy doesn't work at scale

The fix requires moving command_hash calculation to **after** command is finalized, or making command available earlier in the flow.

## References

- **Original feature:** Commit a5d7e0f (Jan 17, 2020)
- **Auto-hash removal:** Commit fbe8409 (Jan 22, 2020)
- **Bug introduction:** Commit 7802cfe (Dec 31, 2021)
- **MD5 change:** Commit 7e84338 (Dec 31, 2021)
- **ARA enablement:** Commit 4c0ffc8 (Feb 23, 2022)
- **Bug documentation:** docs/ara-command-hash-bug-report.md (Nov 25, 2025)


================================================
FILE: docs/ara-instrumentation.md
================================================
# ARA Instrumentation Guide

## Overview

This document describes the instrumentation added to measure Auto Resource Adjustment (ARA) behavior in Flotilla. The goal is to understand how often ARA causes resource growth and identify potential over-provisioning, particularly when jobs repeatedly hit maximum resource limits (~300GB memory).

## Background: How ARA Works

### What is ARA?

Auto Resource Adjustment (ARA) is a feature that automatically adjusts CPU and memory resources for Kubernetes jobs based on historical usage data from previous runs that experienced Out-Of-Memory (OOM) failures.

### Historical Context

1. **Initial Implementation (~2020):** ARA was introduced as an optional feature controlled by the `adaptive_resource_allocation` field on task definitions
2. **Global Override (Jan 2020):** Added `eks.ara_enabled` config parameter for global control
3. **Always Enabled (Mar 2022, commit 6eb44086):** ARA was hardcoded to always be enabled in `execution/engine/eks_engine.go:70`
   - All jobs now run with ARA regardless of configuration
   - The toggle was removed

### ARA Algorithm

**Location:** `execution/adapter/eks_adapter.go:adaptiveResources()`

**Process:**
1. Job starts with default resources from task definition
2. ARA queries historical data via `EstimateRunResources()` in `state/pg_state_manager.go`
3. SQL query (`state/pg_queries.go:TaskResourcesSelectCommandSQL`) looks for:
   - Jobs from the same definition with matching command hash
   - That OOM'd (exit_code=137 or exit_reason='OOMKilled')
   - Within the last 3 days
   - Up to 30 most recent runs
4. Calculates P99 (99th percentile) of resource usage and applies multipliers:
   - **Memory:** P99 max memory × **1.75**
   - **CPU:** P99 max CPU × **1.25**
5. Ensures request ≤ limit, applies bounds checking

**Resource Limits:**
- Min CPU: 256 millicores
- Max CPU: 60,000 millicores (94,000 for GPU jobs)
- Min Memory: 512 MB
- Max Memory: **350,000 MB** (~341 GB) for standard jobs (376,000 MB for GPU)

### Why Jobs Grow to ~300GB

The 1.75x multiplier compounds with each OOM:
1. Job runs with 10GB → OOMs
2. Next run: 10GB × 1.75 = 17.5GB → OOMs
3. Next run: 17.5GB × 1.75 = 30.6GB → OOMs
4. Pattern continues: 30.6GB → 53.5GB → 93.6GB → 163GB → 285GB → **350GB limit hit**

Each OOM triggers exponential growth until the maximum limit is reached.

## Instrumentation Added

### Metrics (DataDog)

All metrics use low-cardinality tags (`cluster` only) to avoid excessive volume.

#### Counters

| Metric | Description | When to Alert |
|--------|-------------|---------------|
| `engine.eks.ara.resource_adjustment` | Incremented when ARA triggers resource changes | Track frequency of ARA usage |
| `engine.eks.ara.estimation_attempted` | Total ARA estimation attempts | Baseline metric |
| `engine.eks.ara.estimation_succeeded` | Successful ARA estimations | Success rate tracking |
| `engine.eks.ara.estimation_failed` | Failed ARA estimations (errors) | Error tracking |
| `engine.eks.ara.no_historical_data` | Jobs with no ARA historical data (using defaults) | Monitor new job patterns |
| `engine.eks.ara.hit_max_memory` | **Jobs hitting 350GB memory limit** | **Critical: indicates over-provisioning** |
| `engine.eks.ara.hit_max_cpu` | Jobs hitting CPU limit | Monitor CPU exhaustion |

#### Histograms/Distributions

| Metric | Description | Use Case |
|--------|-------------|----------|
| `engine.eks.ara.memory_increase_ratio` | Ratio of adjusted/original memory | Understand typical growth (e.g., 1.75 = 75% increase) |
| `engine.eks.ara.cpu_increase_ratio` | Ratio of adjusted/original CPU | Understand CPU scaling patterns |
| `engine.eks.ara.final_memory_mb` | Final memory allocated (after ARA + bounds) | Distribution of actual allocations |
| `engine.eks.ara.final_cpu_millicores` | Final CPU allocated (after ARA + bounds) | Distribution of CPU allocations |
| `engine.eks.ara.default_memory` | Default memory before ARA | Baseline memory distribution |
| `engine.eks.ara.ara_memory` | ARA-adjusted memory | ARA memory distribution |
| `engine.eks.ara.default_cpu` | Default CPU before ARA | Baseline CPU distribution |
| `engine.eks.ara.ara_cpu` | ARA-adjusted CPU | ARA CPU distribution |
| `engine.eks.ara.memory_increase` | Absolute memory increase (MB) | Track growth amounts |
| `engine.eks.ara.cpu_increase` | Absolute CPU increase (millicores) | Track CPU growth amounts |

### Structured Logging

All logs use key-value pairs compatible with standard log aggregation tools.

#### ARA Adjustment Logs (Info Level)

**Location:** `execution/adapter/eks_adapter.go:adaptiveResources()`

**When:** ARA triggers resource changes based on historical data

**Fields:**
```
message: "ARA adjusted resources"
definition_id: <definition UUID>
run_id: <run UUID>
cluster: <cluster name>
default_cpu_millicores: <original CPU>
adjusted_cpu_millicores: <ARA-adjusted CPU>
cpu_ratio: <adjusted/original>
default_memory_mb: <original memory>
adjusted_memory_mb: <ARA-adjusted memory>
memory_ratio: <adjusted/original>
```

#### Limit Hit Logs (Warning Level) - CRITICAL

**Location:** `execution/adapter/eks_adapter.go:checkResourceBounds()`

**When:** Jobs hit maximum memory or CPU limits

**Memory Limit Example:**
```
level: "warn"
message: "ARA memory allocation hit maximum limit - potential over-provisioning"
definition_id: <definition UUID>
run_id: <run UUID>
cluster: <cluster name>
default_memory_mb: <original memory from definition>
requested_memory_mb: <what ARA calculated>
final_memory_mb: 350000
memory_overage_mb: <how much over limit was requested>
ara_triggered: true/false
```

**CPU Limit Example:**
```
level: "warn"
message: "ARA CPU allocation hit maximum limit"
definition_id: <definition UUID>
run_id: <run UUID>
cluster: <cluster name>
default_cpu_millicores: <original CPU>
requested_cpu_millicores: <what ARA calculated>
final_cpu_millicores: 60000
cpu_overage_millicores: <how much over limit>
ara_triggered: true/false
```

#### Historical Data Lookup Logs

**Location:** `state/pg_state_manager.go:EstimateRunResources()`

**Success:**
```
message: "ARA: Historical resource data found"
definition_id: <definition UUID>
command_hash: <MD5 of command>
estimated_memory_mb: <calculated value>
estimated_cpu_millicores: <calculated value>
```

**No Data (Expected):**
```
message: "ARA: No historical resource data found"
definition_id: <definition UUID>
command_hash: <MD5 of command>
```

**Error:**
```
level: "error"
message: "ARA: Error querying historical resource data"
definition_id: <definition UUID>
command_hash: <MD5 of command>
error: <error message>
```

## Using the Instrumentation

### Key Questions You Can Answer

#### 1. How often does ARA trigger resource increases?

**DataDog Query:**
```
sum:engine.eks.ara.resource_adjustment{*}.as_count()
```

Compare to total job submissions to get percentage.

#### 2. How many jobs are hitting the ~300GB limit? ⭐ MOST IMPORTANT

**DataDog Query:**
```
sum:engine.eks.ara.hit_max_memory{*}.as_count()
```

**Log Query (to identify specific jobs):**
```
message:"ARA memory allocation hit maximum limit - potential over-provisioning"
```

Group by `definition_id` to find which task definitions are affected.

#### 3. What's the typical resource growth ratio?

**DataDog Query:**
```
avg:engine.eks.ara.memory_increase_ratio{*}
p50:engine.eks.ara.memory_increase_ratio{*}
p90:engine.eks.ara.memory_increase_ratio{*}
p99:engine.eks.ara.memory_increase_ratio{*}
```

A ratio of 1.75 means 75% increase, 3.0 means 200% increase, etc.

#### 4. Distribution of final memory allocations

**DataDog Query:**
```
avg:engine.eks.ara.final_memory_mb{*}
p50:engine.eks.ara.final_memory_mb{*}
p90:engine.eks.ara.final_memory_mb{*}
p95:engine.eks.ara.final_memory_mb{*}
p99:engine.eks.ara.final_memory_mb{*}
```

Shows the actual memory being allocated across all jobs.

#### 5. Which specific definitions are over-provisioning?

**Log Filter:**
```
message:"potential over-provisioning"
```

Extract `definition_id` and `memory_overage_mb` to prioritize which jobs need attention.

### Recommended Alerts

#### Critical: Excessive Memory Limit Hits

**Metric:** `engine.eks.ara.hit_max_memory`

**Threshold:** Alert if > 10 hits per hour

**Why:** Indicates jobs are repeatedly hitting the 350GB limit, suggesting either:
- Jobs genuinely need more than 350GB (need larger instances)
- ARA is over-provisioning (need to adjust multipliers)

#### High CPU Limit Hits

**Metric:** `engine.eks.ara.hit_max_cpu`

**Threshold:** Alert if > 5 hits per hour

**Why:** CPU exhaustion can cause job failures or slowdowns.

### Investigation Workflow

When you see high `engine.eks.ara.hit_max_memory` counts:

1. **Identify affected definitions:**
   ```
   Log filter: message:"potential over-provisioning"
   Group by: definition_id
   Sort by: count
   ```

2. **Analyze a specific definition:**
   ```
   Filter: definition_id:"<uuid>" AND message:"ARA"
   Look for patterns:
   - How much overage? (memory_overage_mb)
   - What was the original default? (default_memory_mb)
   - Growth ratio? (memory_ratio)
   ```

3. **Check job success rate:**
   - Are these jobs actually succeeding despite hitting the limit?
   - Or are they still OOM'ing even at max resources?

4. **Decide on action:**
   - If jobs succeed at max limit: Likely over-provisioning, consider:
     - Reducing ARA multiplier from 1.75x to 1.5x or 1.25x
     - Making ARA configurable per-definition again
     - Setting reasonable max limits per definition type
   - If jobs fail even at max limit: Jobs legitimately need more resources:
     - Increase max memory limit
     - Use larger instance types
     - Optimize job code to use less memory

## Code Locations

### Metrics Constants
- File: `clients/metrics/metrics.go`
- Lines: 51-59

### Main Instrumentation
- File: `execution/adapter/eks_adapter.go`
- Functions: `adaptiveResources()`, `checkResourceBounds()`
- Lines: 352-492

### Historical Data Logging
- File: `state/pg_state_manager.go`
- Function: `EstimateRunResources()`
- Lines: 118-162

### ARA SQL Query
- File: `state/pg_queries.go`
- Constant: `TaskResourcesSelectCommandSQL`
- Lines: 54-66

## Future Improvements

Based on instrumentation data, consider:

1. **Make ARA configurable again** - Restore per-definition or global toggles for A/B testing
2. **Adjust multipliers** - If 1.75x is too aggressive, reduce to 1.5x or 1.25x
3. **Per-definition limits** - Set different max memory based on job type
4. **Graduated multipliers** - Use smaller multipliers as resources grow (e.g., 1.75x up to 50GB, then 1.25x)
5. **Decay historical data** - Weight recent OOMs more than old ones
6. **Track actual usage vs allocation** - Compare requested resources to what jobs actually use

## Related Documentation

- ARA Feature Documentation: `docs/ara.md`
- State Models: `state/models.go`
- Resource Queries: `state/pg_queries.go`
- Main CLAUDE.md: Project overview and development guide


================================================
FILE: docs/ara.md
================================================
*Adaptive Resource Allocation for Kubernetes Pods*

At StitchFix we empower our data scientists to deploy their models and applications end to end without needing engineering skills. To facilitate batch processing we use Flotilla, a task execution service. Flotilla can run jobs on top of Kubernetes or AWS ECS.

One of the problems we faced was how much CPU and memory should we assign to the container pods? The workloads are highly variable on their demands. 

If we give too few resources the jobs may run slower and in the pathological case of running out of memory. If we give too much we are wasting resources and starving other jobs that could potentially be scheduled alongside. 

Solution
The first step was to accurately record the utilization of the resources per pod. We looked at a few different monitoring solutions (kube-state-metrics, Prometheus, and metrics-server). We decided to use the metrics-server since it provided a simple API and tracked the state of the pods in memory. 

```
helm install --name=metrics-server --namespace=kube-system --set args={'--metric-resolution=1s'} stable/metrics-server
```
To instrument fetching the pod metrics, we used the metrics ClientSet. While the job is running, Flotilla fetches the metrics every 2-5 seconds.

If the prior recorded value of memory and CPU are lower than what the Metrics Server is outputting the highest of the two are recorded back with job metadata.

Also, an MD5 checksum of the command and its arguments are stored in the database. This becomes a signature of the job and its resources. 

The core [query for ARA](https://github.com/stitchfix/flotilla-os/blob/master/state/pg_queries.go#L53-L66) and the associated [adapter code](https://github.com/stitchfix/flotilla-os/blob/master/execution/adapter/eks_adapter.go#L269-L301)


================================================
FILE: exceptions/errors.go
================================================
package exceptions

//
// MalformedInput describes malformed or otherwise incorrect input
//
type MalformedInput struct {
	ErrorString string
}

func (e MalformedInput) Error() string {
	return e.ErrorString
}

//
// ConflictingResource describes a conflict case:
// eg. definition already exists, reserved fields
//
type ConflictingResource struct {
	ErrorString string
}

func (e ConflictingResource) Error() string {
	return e.ErrorString
}

//
// ResourceMissing describes case where a resource does not exist
// eg. missing definition or run or no image found
//
type MissingResource struct {
	ErrorString string
}

func (e MissingResource) Error() string {
	return e.ErrorString
}


================================================
FILE: execution/adapter/eks_adapter.go
================================================
package adapter

import (
	"context"
	"errors"
	"fmt"
	"os"
	"regexp"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/exceptions"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	batchv1 "k8s.io/api/batch/v1"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type EKSAdapter interface {
	AdaptJobToFlotillaRun(job *batchv1.Job, run state.Run, pod *corev1.Pod) (state.Run, error)
	AdaptFlotillaDefinitionAndRunToJob(ctx context.Context, executable state.Executable, run state.Run, schedulerName string, manager state.Manager, araEnabled bool) (batchv1.Job, error)
}
type eksAdapter struct {
	logger flotillaLog.Logger
}

// NewEKSAdapter configures and returns an eks adapter for translating
// from EKS api specific objects to our representation
func NewEKSAdapter(logger flotillaLog.Logger) (EKSAdapter, error) {
	adapter := eksAdapter{logger: logger}
	return &adapter, nil
}

// Adapting Kubernetes batch/v1 job to a Flotilla run object.
// This method maps the exit code & timestamps from Kubernetes to Flotilla's Run object.
func (a *eksAdapter) AdaptJobToFlotillaRun(job *batchv1.Job, run state.Run, pod *corev1.Pod) (state.Run, error) {
	updated := run
	if job.Status.Active == 1 && job.Status.CompletionTime == nil {
		updated.Status = state.StatusRunning
	} else if job.Status.Succeeded == 1 {
		if pod != nil {
			if pod.Status.Phase == corev1.PodSucceeded {
				var exitCode int64 = 0
				var exitReason = fmt.Sprintf("Pod %s Exited Successfully", pod.Name)
				updated.ExitReason = &exitReason
				updated.Status = state.StatusStopped
				updated.ExitCode = &exitCode
			}
		} else {
			var exitCode int64 = 0
			updated.Status = state.StatusStopped
			updated.ExitCode = &exitCode
		}
	} else if job.Status.Failed == 1 {
		var exitCode int64 = 1
		updated.Status = state.StatusStopped
		if pod != nil {
			if pod.Status.ContainerStatuses != nil && len(pod.Status.ContainerStatuses) > 0 {
				containerStatus := pod.Status.ContainerStatuses[len(pod.Status.ContainerStatuses)-1]
				if containerStatus.State.Terminated != nil {
					updated.ExitReason = &containerStatus.State.Terminated.Reason
					exitCode = int64(containerStatus.State.Terminated.ExitCode)
				}
			}
		}
		updated.ExitCode = &exitCode
	}

	if pod != nil && len(pod.Spec.Containers) > 0 {
		container := pod.Spec.Containers[0]
		//First three lines are injected by Flotilla, strip those out.
		if len(container.Command) > 3 {
			cmd := strings.Join(container.Command[3:], "\n")
			updated.Command = &cmd
		}
	}

	if job != nil && job.Status.StartTime != nil {
		updated.StartedAt = &job.Status.StartTime.Time
	}

	if updated.Status == state.StatusStopped {
		if job != nil && job.Status.CompletionTime != nil {
			updated.FinishedAt = &job.Status.CompletionTime.Time
		} else {
			finishedAt := time.Now()
			updated.FinishedAt = &finishedAt
		}
	}
	return updated, nil
}

// Adapting Flotilla run object to Kubernetes batch/v1 job.
// 1. Construction of the cmd that will be run.
// 2. Resources associated to a pod (includes Adaptive Resource Allocation)
// 3. Environment variables to be setup.
// 4. Port mappings.
// 5. Node lifecycle.
// 6. Node affinity and anti-affinity
func (a *eksAdapter) AdaptFlotillaDefinitionAndRunToJob(ctx context.Context, executable state.Executable, run state.Run, schedulerName string, manager state.Manager, araEnabled bool) (batchv1.Job, error) {
	cmd := ""

	if run.Command != nil && len(*run.Command) > 0 {
		cmd = *run.Command
	}

	cmdSlice := a.constructCmdSlice(cmd)
	cmd = strings.Join(cmdSlice[3:], "\n")
	run.Command = &cmd
	resourceRequirements, run := a.constructResourceRequirements(ctx, executable, run, manager, araEnabled)

	volumeMounts, volumes := a.constructVolumeMounts(ctx, executable, run, manager, araEnabled)

	container := corev1.Container{
		Name:            run.RunID,
		Image:           run.Image,
		Command:         cmdSlice,
		Resources:       resourceRequirements,
		Env:             a.envOverrides(executable, run),
		Ports:           a.constructContainerPorts(executable),
		ImagePullPolicy: corev1.PullAlways,
	}

	if volumeMounts != nil {
		container.VolumeMounts = volumeMounts
	}
	affinity := a.constructAffinity(ctx, executable, run, manager)
	tolerations := a.constructTolerations(executable, run)

	annotations := map[string]string{}
	annotations["prometheus.io/port"] = "9090"
	annotations["prometheus.io/scrape"] = "true"

	labels := state.GetLabels(run)

	jobSpec := batchv1.JobSpec{
		TTLSecondsAfterFinished: &state.TTLSecondsAfterFinished,
		ActiveDeadlineSeconds:   run.ActiveDeadlineSeconds,
		BackoffLimit:            &state.EKSBackoffLimit,

		Template: corev1.PodTemplateSpec{
			ObjectMeta: v1.ObjectMeta{
				Annotations: annotations,
				Labels:      labels,
			},
			Spec: corev1.PodSpec{
				SchedulerName:      schedulerName,
				Containers:         []corev1.Container{container},
				RestartPolicy:      corev1.RestartPolicyNever,
				ServiceAccountName: *run.ServiceAccount,
				Affinity:           affinity,
				Tolerations:        tolerations,
			},
		},
	}

	if volumes != nil {
		jobSpec.Template.Spec.Volumes = volumes
	}

	eksJob := batchv1.Job{
		Spec: jobSpec,
		ObjectMeta: v1.ObjectMeta{
			Name: run.RunID,
		},
	}

	return eksJob, nil
}
func (a *eksAdapter) constructEviction(ctx context.Context, run state.Run, manager state.Manager) string {
	if run.Gpu != nil && *run.Gpu > 0 {
		return "false"
	}

	if run.NodeLifecycle != nil && *run.NodeLifecycle == state.OndemandLifecycle {
		return "false"
	}
	if run.CommandHash != nil {
		nodeType, err := manager.GetNodeLifecycle(ctx, run.DefinitionID, *run.CommandHash)
		if err == nil && nodeType == state.OndemandLifecycle {
			return "false"
		}
	}
	return "true"
}

func (a *eksAdapter) constructContainerPorts(executable state.Executable) []corev1.ContainerPort {
	var containerPorts []corev1.ContainerPort
	executableResources := executable.GetExecutableResources()
	if executableResources.Ports != nil && len(*executableResources.Ports) > 0 {
		for _, port := range *executableResources.Ports {
			containerPorts = append(containerPorts, corev1.ContainerPort{
				ContainerPort: int32(port),
			})
		}
	}
	return containerPorts
}

func (a *eksAdapter) constructTolerations(executable state.Executable, run state.Run) []corev1.Toleration {
	executableResources := executable.GetExecutableResources()
	tolerations := []corev1.Toleration{}

	isGPU := (executableResources.Gpu != nil && *executableResources.Gpu > 0) || (run.Gpu != nil && *run.Gpu > 0)
	if isGPU {
		tolerations = append(tolerations, corev1.Toleration{
			Key:      "nvidia.com/gpu",
			Operator: "Equal",
			Value:    "true",
			Effect:   "NoSchedule",
		})
	}

	isWaitForData := run.Labels["kube_task_type"] == "wait_for_data"
	if team, ok := run.Labels["team"]; ok && team != "" && !isGPU && !isWaitForData {
		tolerations = append(tolerations, corev1.Toleration{
			Key:      team,
			Operator: "Equal",
			Value:    "true",
			Effect:   "NoSchedule",
		})
	}

	return tolerations
}

func (a *eksAdapter) constructAffinity(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) *corev1.Affinity {
	affinity := &corev1.Affinity{}
	var requiredMatch []corev1.NodeSelectorRequirement
	var preferredMatches []corev1.PreferredSchedulingTerm
	//todo move to config
	nodeLifecycleKey := "karpenter.sh/capacity-type"
	nodeArchKey := "kubernetes.io/arch"

	var nodeLifecycle []string
	if run.NodeLifecycle != nil && *run.NodeLifecycle == state.OndemandLifecycle {
		nodeLifecycle = append(nodeLifecycle, "on-demand")
	} else {
		nodeLifecycle = append(nodeLifecycle, "spot", "on-demand")
	}

	//todo move to config
	arch := []string{"amd64"}
	if run.Arch != nil && *run.Arch == "arm64" {
		arch = []string{"arm64"}
	}

	requiredMatch = append(requiredMatch, corev1.NodeSelectorRequirement{
		Key:      nodeLifecycleKey,
		Operator: corev1.NodeSelectorOpIn,
		Values:   nodeLifecycle,
	})

	requiredMatch = append(requiredMatch, corev1.NodeSelectorRequirement{
		Key:      nodeArchKey,
		Operator: corev1.NodeSelectorOpIn,
		Values:   arch,
	})

	executableResources := executable.GetExecutableResources()
	isGPU := (run.Gpu != nil && *run.Gpu > 0) || (executableResources.Gpu != nil && *executableResources.Gpu > 0)
	isWaitForData := run.Labels["kube_task_type"] == "wait_for_data"
	if team, ok := run.Labels["team"]; ok && team != "" && !isGPU && !isWaitForData {
		requiredMatch = append(requiredMatch, corev1.NodeSelectorRequirement{
			Key:      "team",
			Operator: corev1.NodeSelectorOpIn,
			Values:   []string{team},
		})
		if env := os.Getenv("FLOTILLA_MODE"); env != "" {
			requiredMatch = append(requiredMatch, corev1.NodeSelectorRequirement{
				Key:      "environment",
				Operator: corev1.NodeSelectorOpIn,
				Values:   []string{env},
			})
		}
	}

	affinity = &corev1.Affinity{
		NodeAffinity: &corev1.NodeAffinity{
			RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
				NodeSelectorTerms: []corev1.NodeSelectorTerm{
					{
						MatchExpressions: requiredMatch,
					},
				},
			},
			PreferredDuringSchedulingIgnoredDuringExecution: preferredMatches,
		},
	}

	return affinity
}

func (a *eksAdapter) constructResourceRequirements(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager, araEnabled bool) (corev1.ResourceRequirements, state.Run) {
	var ephemeralStorageRequestQuantity resource.Quantity
	maxEphemeralStorage := state.MaxEphemeralStorage
	limits := make(corev1.ResourceList)
	requests := make(corev1.ResourceList)

	cpuLimit, memLimit, cpuRequest, memRequest := a.adaptiveResources(ctx, executable, run, manager, araEnabled)

	// Round CPU values to avoid systemd cgroup rounding issues.
	cpuLimit = a.roundCPUMillicores(cpuLimit)
	cpuRequest = a.roundCPUMillicores(cpuRequest)

	cpuLimitQuantity := resource.MustParse(fmt.Sprintf("%dm", cpuLimit))
	cpuRequestQuantity := resource.MustParse(fmt.Sprintf("%dm", cpuRequest))

	memLimitQuantity := resource.MustParse(fmt.Sprintf("%dM", memLimit))
	memRequestQuantity := resource.MustParse(fmt.Sprintf("%dM", memRequest))

	limits[corev1.ResourceCPU] = cpuLimitQuantity
	limits[corev1.ResourceMemory] = memLimitQuantity

	requests[corev1.ResourceCPU] = cpuRequestQuantity
	requests[corev1.ResourceMemory] = memRequestQuantity

	executableResources := executable.GetExecutableResources()
	if run.Gpu != nil && *run.Gpu > 0 {
		limits["nvidia.com/gpu"] = resource.MustParse(fmt.Sprintf("%d", *run.Gpu))
		requests["nvidia.com/gpu"] = resource.MustParse(fmt.Sprintf("%d", *run.Gpu))
		run.NodeLifecycle = &state.OndemandLifecycle
	} else if executableResources.Gpu != nil && *executableResources.Gpu > 0 {
		limits["nvidia.com/gpu"] = resource.MustParse(fmt.Sprintf("%d", *executableResources.Gpu))
		requests["nvidia.com/gpu"] = resource.MustParse(fmt.Sprintf("%d", *executableResources.Gpu))
		run.NodeLifecycle = &state.OndemandLifecycle
	}

	run.Memory = aws.Int64(memRequestQuantity.ScaledValue(resource.Mega))
	run.Cpu = aws.Int64(cpuRequestQuantity.ScaledValue(resource.Milli))
	run.MemoryLimit = aws.Int64(memLimitQuantity.ScaledValue(resource.Mega))
	run.CpuLimit = aws.Int64(cpuLimitQuantity.ScaledValue(resource.Milli))

	if run.EphemeralStorage != nil {
		ephemeralStorageRequest := *run.EphemeralStorage
		if ephemeralStorageRequest > maxEphemeralStorage {
			ephemeralStorageRequest = maxEphemeralStorage
		}
		ephemeralStorageRequestQuantity = resource.MustParse(fmt.Sprintf("%dM", ephemeralStorageRequest))
		requests[corev1.ResourceEphemeralStorage] = ephemeralStorageRequestQuantity
		run.EphemeralStorage = aws.Int64(ephemeralStorageRequestQuantity.ScaledValue(resource.Mega))
	}

	resourceRequirements := corev1.ResourceRequirements{
		Limits:   limits,
		Requests: requests,
	}
	return resourceRequirements, run
}

func (a *eksAdapter) constructVolumeMounts(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager, araEnabled bool) ([]corev1.VolumeMount, []corev1.Volume) {
	var mounts []corev1.VolumeMount = nil
	var volumes []corev1.Volume = nil
	if run.Gpu != nil && *run.Gpu > 0 {
		mounts = make([]corev1.VolumeMount, 1)
		mounts[0] = corev1.VolumeMount{Name: "shared-memory", MountPath: "/dev/shm"}
		volumes = make([]corev1.Volume, 1)
		sharedLimit := resource.MustParse(fmt.Sprintf("%dGi", *run.Gpu*int64(8)))
		emptyDir := corev1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sharedLimit}
		volumes[0] = corev1.Volume{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &emptyDir}}
	}
	if run.RequiresDocker {
		volumes = append(volumes, corev1.Volume{
			Name: "dockersock",
			VolumeSource: corev1.VolumeSource{
				HostPath: &corev1.HostPathVolumeSource{
					Path: "/var/run/docker.sock",
					Type: nil,
				},
			},
		})
		mounts = append(mounts, corev1.VolumeMount{
			Name:      "dockersock",
			MountPath: "/var/run/docker.sock",
		})
	}
	return mounts, volumes
}

func (a *eksAdapter) adaptiveResources(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager, araEnabled bool) (int64, int64, int64, int64) {
	executableResources := executable.GetExecutableResources()
	// Check both run.Gpu (from execution request) and executableResources.Gpu (from definition)
	// This matches the GPU allocation logic in constructResourceRequirements (lines 300-308)
	isGPUJob := (run.Gpu != nil && *run.Gpu > 0) || (executableResources.Gpu != nil && *executableResources.Gpu > 0)

	cpuLimit, memLimit := a.getResourceDefaults(run, executable)
	cpuRequest, memRequest := a.getResourceDefaults(run, executable)

	// Track default resources before ARA
	defaultCPU := cpuRequest
	defaultMem := memRequest

	// Create tags for metrics (engine + cluster to avoid high cardinality)
	metricTags := []string{"engine:eks"}
	if run.ClusterName != "" {
		metricTags = append(metricTags, fmt.Sprintf("cluster:%s", run.ClusterName))
	}

	if !isGPUJob && araEnabled {
		// Check if command_hash is NULL (malformed job with no command)
		if run.CommandHash == nil {
			// Command hash is NULL - skip ARA for malformed jobs
			_ = metrics.Increment(metrics.EngineEKSARANullCommandHash, metricTags, 1)
			if a.logger != nil {
				_ = a.logger.Log(
					"level", "warn",
					"message", "Skipping ARA - NULL command_hash",
					"reason", "Job has no command (malformed definition)",
					"run_id", run.RunID,
					"definition_id", *executable.GetExecutableID(),
				)
			}
		} else {
			// Track ARA estimation attempt
			_ = metrics.Increment(metrics.EngineEKSARAEstimationAttempted, metricTags, 1)

			// Pass command_hash directly instead of run_id (optimization)
			estimatedResources, err := manager.EstimateRunResources(ctx, *executable.GetExecutableID(), *run.CommandHash)
			if err == nil {
				// Track successful estimation
				_ = metrics.Increment(metrics.EngineEKSARAEstimationSucceeded, metricTags, 1)

				// Extract int64 values from NullInt64 (we know they're valid because err == nil)
				estimatedCPU := estimatedResources.Cpu.Int64
				estimatedMemory := estimatedResources.Memory.Int64

				// Detect if ARA actually triggered resource changes
				araTriggered := (estimatedCPU != cpuRequest || estimatedMemory != memRequest)

				if araTriggered {
					// Track that ARA triggered resource adjustment
					_ = metrics.Increment(metrics.EngineEKSARAResourceAdjustment, metricTags, 1)

					// Track the magnitude of adjustment as ratios (better for understanding relative growth)
					if defaultMem > 0 {
						memoryRatio := float64(estimatedMemory) / float64(defaultMem)
						_ = metrics.Histogram(metrics.EngineEKSARAMemoryIncreaseRatio, memoryRatio, metricTags, 1)
					}
					if defaultCPU > 0 {
						cpuRatio := float64(estimatedCPU) / float64(defaultCPU)
						_ = metrics.Histogram(metrics.EngineEKSARACPUIncreaseRatio, cpuRatio, metricTags, 1)
					}

					// Log detailed information when ARA triggers (INFO level)
					if a.logger != nil {
						_ = a.logger.Log(
							"level", "info",
							"message", "ARA adjusted resources",
							"definition_id", *executable.GetExecutableID(),
							"run_id", run.RunID,
							"cluster", run.ClusterName,
							"default_cpu_millicores", defaultCPU,
							"adjusted_cpu_millicores", estimatedCPU,
							"cpu_ratio", float64(estimatedCPU)/float64(defaultCPU),
							"default_memory_mb", defaultMem,
							"adjusted_memory_mb", estimatedMemory,
							"memory_ratio", float64(estimatedMemory)/float64(defaultMem),
						)
					}
				}

				cpuRequest = estimatedCPU
				memRequest = estimatedMemory

				// Calculate resource increases for absolute tracking
				cpuIncrease := cpuRequest - defaultCPU
				memIncrease := memRequest - defaultMem

				// Emit default and ARA resource distributions
				_ = metrics.Distribution(metrics.EngineEKSARADefaultCPU, float64(defaultCPU), metricTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARAARACPU, float64(cpuRequest), metricTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARADefaultMemory, float64(defaultMem), metricTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARAARAMemory, float64(memRequest), metricTags, 1)

				// Emit increase amounts
				if cpuIncrease > 0 {
					_ = metrics.Distribution(metrics.EngineEKSARACPUIncrease, float64(cpuIncrease), metricTags, 1)
				}
				if memIncrease > 0 {
					_ = metrics.Distribution(metrics.EngineEKSARAMemoryIncrease, float64(memIncrease), metricTags, 1)
				}
			} else {
				// Check if this is a missing resource error (expected for new jobs) vs a real error
				var missingResource exceptions.MissingResource
				if errors.As(err, &missingResource) {
					// No historical data available - this is expected for new jobs or jobs that haven't OOM'd
					_ = metrics.Increment(metrics.EngineEKSARANoHistoricalData, metricTags, 1)
				} else {
					// Track failed estimation (actual error)
					_ = metrics.Increment(metrics.EngineEKSARAEstimationFailed, metricTags, 1)
				}
			}

			if cpuRequest > cpuLimit {
				cpuLimit = cpuRequest
			}

			if memRequest > memLimit {
				memLimit = memRequest
			}
		}
	}

	// Check bounds - this will also emit metrics/logs for max hits
	cpuRequestBeforeBounds := cpuRequest
	memRequestBeforeBounds := memRequest
	cpuRequest, memRequest, maxCPUHit, maxMemHit := a.checkResourceBounds(cpuRequest, memRequest, isGPUJob, run, executable, defaultCPU, defaultMem)
	cpuLimit, memLimit, _, _ = a.checkResourceBounds(cpuLimit, memLimit, isGPUJob, run, executable, defaultCPU, defaultMem)

	// Emit final resource distributions
	_ = metrics.Histogram(metrics.EngineEKSARAFinalMemoryMB, float64(memRequest), metricTags, 1)
	_ = metrics.Histogram(metrics.EngineEKSARAFinalCPUMillicores, float64(cpuRequest), metricTags, 1)

	// Emit structured log when max resources hit
	if maxMemHit || maxCPUHit {
		a.emitARAMetrics(run, defaultCPU, defaultMem, cpuRequest, memRequest, cpuRequestBeforeBounds, memRequestBeforeBounds, maxCPUHit, maxMemHit)
	}

	return cpuLimit, memLimit, cpuRequest, memRequest
}

// emitARAMetrics logs structured information when ARA hits max resource bounds
func (a *eksAdapter) emitARAMetrics(run state.Run, defaultCPU int64, defaultMem int64, finalCPU int64, finalMem int64, requestedCPU int64, requestedMem int64, maxCPUHit bool, maxMemHit bool) {
	if a.logger == nil {
		return
	}

	logFields := []interface{}{
		"level", "warn",
		"message", "ARA resource allocation hit maximum limit",
		"run_id", run.RunID,
		"cluster", run.ClusterName,
		"default_cpu_millicores", defaultCPU,
		"default_memory_mb", defaultMem,
		"requested_cpu_millicores", requestedCPU,
		"requested_memory_mb", requestedMem,
		"final_cpu_millicores", finalCPU,
		"final_memory_mb", finalMem,
		"max_cpu_hit", maxCPUHit,
		"max_memory_hit", maxMemHit,
	}

	if run.DefinitionID != "" {
		logFields = append(logFields, "definition_id", run.DefinitionID)
	}
	if run.ExecutableID != nil {
		logFields = append(logFields, "executable_id", *run.ExecutableID)
	}
	if run.Command != nil {
		logFields = append(logFields, "command", *run.Command)
	}

	// Add overage information for memory (critical for 300GB issue)
	if maxMemHit {
		overage := requestedMem - finalMem
		logFields = append(logFields, "memory_overage_mb", overage)
		// Critical message for memory over-provisioning
		logFields[3] = "ARA memory allocation hit maximum limit - potential over-provisioning"
	}
	if maxCPUHit {
		overage := requestedCPU - finalCPU
		logFields = append(logFields, "cpu_overage_millicores", overage)
	}

	_ = a.logger.Log(logFields...)
}

// checkResourceBounds enforces resource limits and emits metrics/logs when limits are hit
// Returns: adjusted CPU, adjusted memory, whether max CPU was hit, whether max memory was hit
func (a *eksAdapter) checkResourceBounds(cpu int64, mem int64, isGPUJob bool, run state.Run, executable state.Executable, defaultCPU int64, defaultMem int64) (int64, int64, bool, bool) {
	maxMem := state.MaxMem
	maxCPU := state.MaxCPU

	if isGPUJob {
		maxMem = state.MaxGPUMem
		maxCPU = state.MaxGPUCPU
	}

	// Create tags for metrics (engine + cluster to avoid high cardinality)
	metricTags := []string{"engine:eks"}
	if run.ClusterName != "" {
		metricTags = append(metricTags, fmt.Sprintf("cluster:%s", run.ClusterName))
	}

	maxCPUHit := false
	maxMemHit := false

	if cpu < state.MinCPU {
		cpu = state.MinCPU
	}
	if cpu > maxCPU {
		maxCPUHit = true
		// Track hitting max CPU limit
		_ = metrics.Increment(metrics.EngineEKSARAHitMaxCPU, metricTags, 1)

		cpu = maxCPU
	}

	if mem < state.MinMem {
		mem = state.MinMem
	}
	if mem > maxMem {
		maxMemHit = true
		// Track hitting max memory limit - THIS IS THE KEY METRIC
		_ = metrics.Increment(metrics.EngineEKSARAHitMaxMemory, metricTags, 1)

		mem = maxMem
	}

	return cpu, mem, maxCPUHit, maxMemHit
}

func (a *eksAdapter) getResourceDefaults(run state.Run, executable state.Executable) (int64, int64) {
	// 1. Init with the global defaults
	cpu := state.MinCPU
	mem := state.MinMem
	executableResources := executable.GetExecutableResources()

	// 2. Look up Run level
	// 3. If not at Run level check Definitions
	if run.Cpu != nil && *run.Cpu != 0 {
		cpu = *run.Cpu
	} else {
		if executableResources.Cpu != nil && *executableResources.Cpu != 0 {
			cpu = *executableResources.Cpu
		}
	}
	if run.Memory != nil && *run.Memory != 0 {
		mem = *run.Memory
	} else {
		if executableResources.Memory != nil && *executableResources.Memory != 0 {
			mem = *executableResources.Memory
		}
	}

	// 4. Override for very large memory requests.
	// Remove after migration.
	if mem >= 36864 && mem < 131072 && (executableResources.Gpu == nil || *executableResources.Gpu == 0) {
		// using the 8x ratios between cpu and memory ~ r5 class of instances
		cpuOverride := mem / 8
		if cpuOverride > cpu {
			cpu = cpuOverride
		}
	}

	return cpu, mem
}

func (a *eksAdapter) getLastRun(ctx context.Context, manager state.Manager, run state.Run) state.Run {
	var lastRun state.Run
	runList, err := manager.ListRuns(ctx, 1, 0, "started_at", "desc", map[string][]string{
		"queued_at_since": {
			time.Now().AddDate(0, 0, -7).Format(time.RFC3339),
		},
		"status":        {state.StatusStopped},
		"command":       {strings.Replace(*run.Command, "'", "''", -1)},
		"executable_id": {*run.ExecutableID},
	}, nil, []string{state.EKSEngine})
	if err == nil && len(runList.Runs) > 0 {
		lastRun = runList.Runs[0]
	}
	return lastRun
}

func (a *eksAdapter) constructCmdSlice(cmdString string) []string {
	bashCmd := "bash"
	optLogin := "-l"
	optStr := "-cex"
	return []string{bashCmd, optLogin, optStr, cmdString}
}

func (a *eksAdapter) envOverrides(executable state.Executable, run state.Run) []corev1.EnvVar {
	pairs := make(map[string]string)
	resources := executable.GetExecutableResources()

	if resources.Env != nil && len(*resources.Env) > 0 {
		for _, ev := range *resources.Env {
			name := a.sanitizeEnvVar(ev.Name)
			value := ev.Value
			pairs[name] = value
		}
	}

	if run.Env != nil && len(*run.Env) > 0 {
		for _, ev := range *run.Env {
			name := a.sanitizeEnvVar(ev.Name)
			value := ev.Value
			pairs[name] = value
		}
	}

	var res []corev1.EnvVar
	for key := range pairs {
		if len(key) > 0 {
			res = append(res, corev1.EnvVar{
				Name:  key,
				Value: pairs[key],
			})
		}
	}
	return res
}

func (a *eksAdapter) sanitizeEnvVar(key string) string {
	// Environment variable can't start with a $
	if strings.HasPrefix(key, "$") {
		key = strings.Replace(key, "$", "", 1)
	}
	// Environment variable names can't contain spaces.
	key = strings.Replace(key, " ", "", -1)
	return key
}

func (a *eksAdapter) sanitizeLabel(key string) string {
	key = strings.TrimSpace(key)
	key = regexp.MustCompile(`[^-a-z0-9A-Z_.]+`).ReplaceAllString(key, "_")
	key = strings.TrimPrefix(key, "_")
	key = strings.ToLower(key)
	if len(key) > 63 {
		key = key[:63]
	}
	return key
}

// roundCPUMillicores rounds CPU millicores to the nearest 250m (quarter core) to avoid systemd cgroup rounding issues. When CPU limits produce non-integer percentages
func (a *eksAdapter) roundCPUMillicores(millicores int64) int64 {
	return ((millicores + 125) / 250) * 250
}


================================================
FILE: execution/adapter/eks_adapter_test.go
================================================
package adapter

import (
	"context"
	"database/sql"
	"errors"
	"testing"

	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
)

func TestRoundCPUMillicores(t *testing.T) {
	adapter := &eksAdapter{}

	tests := []struct {
		name     string
		input    int64
		expected int64
	}{
		// The problematic case that triggered this fix
		{"1024m rounds to 1000m", 1024, 1000},

		// Edge cases around quarters
		{"1000m stays 1000m", 1000, 1000},
		{"1125m rounds to 1250m", 1125, 1250},
		{"1150m rounds to 1250m", 1150, 1250},
		{"1250m stays 1250m", 1250, 1250},

		// Test rounding up and down
		{"100m rounds to 0m", 100, 0},
		{"125m rounds to 250m", 125, 250},
		{"137m rounds to 250m", 137, 250},
		{"250m stays 250m", 250, 250},
		{"374m rounds to 250m", 374, 250},
		{"375m rounds to 500m", 375, 500},
		{"500m stays 500m", 500, 500},
		{"624m rounds to 500m", 624, 500},
		{"625m rounds to 750m", 625, 750},
		{"750m stays 750m", 750, 750},

		// Higher values - test both rounding up and down
		{"2048m rounds to 2000m", 2048, 2000},
		{"2100m rounds to 2000m", 2100, 2000},
		{"2126m rounds UP to 2250m", 2126, 2250},
		{"3000m stays 3000m", 3000, 3000},
		{"3001m rounds to 3000m", 3001, 3000},
		{"3126m rounds UP to 3250m", 3126, 3250},
		{"3200m rounds UP to 3250m", 3200, 3250},

		// Large values
		{"60000m stays 60000m", 60000, 60000},
		{"60024m rounds to 60000m", 60024, 60000},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := adapter.roundCPUMillicores(tt.input)
			if result != tt.expected {
				t.Errorf("roundCPUMillicores(%d) = %d, want %d", tt.input, result, tt.expected)
			}
		})
	}
}

// TestRoundCPUAvoidsCgroupIssue verifies that rounded values avoid the systemd
// cgroup rounding issue where non-integer percentages get rounded up by systemd
func TestRoundCPUAvoidsCgroupIssue(t *testing.T) {
	adapter := &eksAdapter{}

	// Test values that would cause systemd rounding issues
	problematicValues := []int64{
		1024, // 102.4% -> systemd rounds to 103%
		1025, // 102.5% -> systemd rounds to 103%
		1026, // 102.6% -> systemd rounds to 103%
		2048, // 204.8% -> systemd rounds to 205%
		3072, // 307.2% -> systemd rounds to 308%
	}

	for _, input := range problematicValues {
		result := adapter.roundCPUMillicores(input)

		// Verify result is a multiple of 250 (quarter core)
		if result%250 != 0 {
			t.Errorf("roundCPUMillicores(%d) = %d, which is not a multiple of 250m", input, result)
		}

		// Verify result produces an integer percentage (whole or quarter)
		// Valid: 0%, 25%, 50%, 75%, 100%, 125%, etc.
		// 1000m = 100%, 250m = 25%
		percentage := (result * 100) / 1000 // percentage with 1 decimal place
		if percentage%25 != 0 {
			t.Errorf("roundCPUMillicores(%d) = %d, which produces non-quarter percentage (%d)",
				input, result, percentage)
		}
	}
}

// mockLogger implements flotillaLog.Logger for testing
type mockLogger struct {
	logCalls   [][]interface{}
	eventCalls [][]interface{}
}

func (m *mockLogger) Log(keyvals ...interface{}) error {
	m.logCalls = append(m.logCalls, keyvals)
	return nil
}

func (m *mockLogger) Event(keyvals ...interface{}) error {
	m.eventCalls = append(m.eventCalls, keyvals)
	return nil
}

func (m *mockLogger) reset() {
	m.logCalls = nil
	m.eventCalls = nil
}

// mockStateManager implements state.Manager for testing
type mockStateManager struct {
	estimateResourcesResult state.TaskResources
	estimateResourcesError  error
}

func (m *mockStateManager) EstimateRunResources(ctx context.Context, executableID string, commandHash string) (state.TaskResources, error) {
	return m.estimateResourcesResult, m.estimateResourcesError
}

// Stub implementations for required interface methods
func (m *mockStateManager) Name() string                      { return "mock" }
func (m *mockStateManager) Initialize(conf config.Config) error { return nil }
func (m *mockStateManager) Cleanup() error                                    { return nil }
func (m *mockStateManager) ListDefinitions(ctx context.Context, limit int, offset int, sortBy string, order string, filters map[string][]string, envFilters map[string]string) (state.DefinitionList, error) {
	return state.DefinitionList{}, nil
}
func (m *mockStateManager) GetDefinition(ctx context.Context, definitionID string) (state.Definition, error) {
	return state.Definition{}, nil
}
func (m *mockStateManager) GetDefinitionByAlias(ctx context.Context, alias string) (state.Definition, error) {
	return state.Definition{}, nil
}
func (m *mockStateManager) UpdateDefinition(ctx context.Context, definitionID string, updates state.Definition) (state.Definition, error) {
	return state.Definition{}, nil
}
func (m *mockStateManager) CreateDefinition(ctx context.Context, d state.Definition) error { return nil }
func (m *mockStateManager) DeleteDefinition(ctx context.Context, definitionID string) error { return nil }
func (m *mockStateManager) ListRuns(ctx context.Context, limit int, offset int, sortBy string, order string, filters map[string][]string, envFilters map[string]string, engines []string) (state.RunList, error) {
	return state.RunList{}, nil
}
func (m *mockStateManager) EstimateExecutorCount(ctx context.Context, executableID string, commandHash string) (int64, error) {
	return 0, nil
}
func (m *mockStateManager) ExecutorOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	return false, nil
}
func (m *mockStateManager) DriverOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	return false, nil
}
func (m *mockStateManager) GetRun(ctx context.Context, runID string) (state.Run, error) {
	return state.Run{}, nil
}
func (m *mockStateManager) CreateRun(ctx context.Context, r state.Run) error { return nil }
func (m *mockStateManager) UpdateRun(ctx context.Context, runID string, updates state.Run) (state.Run, error) {
	return state.Run{}, nil
}
func (m *mockStateManager) ListGroups(ctx context.Context, limit int, offset int, name *string) (state.GroupsList, error) {
	return state.GroupsList{}, nil
}
func (m *mockStateManager) ListTags(ctx context.Context, limit int, offset int, name *string) (state.TagsList, error) {
	return state.TagsList{}, nil
}
func (m *mockStateManager) ListWorkers(ctx context.Context, engine string) (state.WorkersList, error) {
	return state.WorkersList{}, nil
}
func (m *mockStateManager) BatchUpdateWorkers(ctx context.Context, updates []state.Worker) (state.WorkersList, error) {
	return state.WorkersList{}, nil
}
func (m *mockStateManager) GetWorker(ctx context.Context, workerType string, engine string) (state.Worker, error) {
	return state.Worker{}, nil
}
func (m *mockStateManager) UpdateWorker(ctx context.Context, workerType string, updates state.Worker) (state.Worker, error) {
	return state.Worker{}, nil
}
func (m *mockStateManager) GetExecutableByTypeAndID(ctx context.Context, executableType state.ExecutableType, executableID string) (state.Executable, error) {
	return state.Definition{}, nil
}
func (m *mockStateManager) GetTemplateByID(ctx context.Context, templateID string) (state.Template, error) {
	return state.Template{}, nil
}
func (m *mockStateManager) GetLatestTemplateByTemplateName(ctx context.Context, templateName string) (bool, state.Template, error) {
	return false, state.Template{}, nil
}
func (m *mockStateManager) GetTemplateByVersion(ctx context.Context, templateName string, templateVersion int64) (bool, state.Template, error) {
	return false, state.Template{}, nil
}
func (m *mockStateManager) ListTemplates(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	return state.TemplateList{}, nil
}
func (m *mockStateManager) ListTemplatesLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	return state.TemplateList{}, nil
}
func (m *mockStateManager) CreateTemplate(ctx context.Context, t state.Template) error { return nil }
func (m *mockStateManager) ListFailingNodes(ctx context.Context) (state.NodeList, error) {
	return state.NodeList{}, nil
}
func (m *mockStateManager) GetPodReAttemptRate(ctx context.Context) (float32, error) {
	return 0, nil
}
func (m *mockStateManager) GetNodeLifecycle(ctx context.Context, executableID string, commandHash string) (string, error) {
	return "", nil
}
func (m *mockStateManager) GetTaskHistoricalRuntime(ctx context.Context, executableID string, runId string) (float32, error) {
	return 0, nil
}
func (m *mockStateManager) CheckIdempotenceKey(ctx context.Context, idempotenceKey string) (string, error) {
	return "", nil
}
func (m *mockStateManager) GetRunByEMRJobId(ctx context.Context, emrJobId string) (state.Run, error) {
	return state.Run{}, nil
}
func (m *mockStateManager) GetResources(ctx context.Context, runID string) (state.Run, error) {
	return state.Run{}, nil
}
func (m *mockStateManager) ListClusterStates(ctx context.Context) ([]state.ClusterMetadata, error) {
	return nil, nil
}
func (m *mockStateManager) UpdateClusterMetadata(ctx context.Context, cluster state.ClusterMetadata) error {
	return nil
}
func (m *mockStateManager) DeleteClusterMetadata(ctx context.Context, clusterID string) error {
	return nil
}
func (m *mockStateManager) GetClusterByID(ctx context.Context, clusterID string) (state.ClusterMetadata, error) {
	return state.ClusterMetadata{}, nil
}
func (m *mockStateManager) GetRunStatus(ctx context.Context, runID string) (state.RunStatus, error) {
	return state.RunStatus{}, nil
}

// mockExecutable implements state.Executable for testing
type mockExecutable struct {
	executableID string
	resources    *state.ExecutableResources
}

func (m *mockExecutable) GetExecutableID() *string {
	return &m.executableID
}

func (m *mockExecutable) GetExecutableType() *state.ExecutableType {
	t := state.ExecutableTypeDefinition
	return &t
}

func (m *mockExecutable) GetExecutableResources() *state.ExecutableResources {
	return m.resources
}

func (m *mockExecutable) GetExecutableCommand(req state.ExecutionRequest) (string, error) {
	return "", nil
}

func (m *mockExecutable) GetExecutableResourceName() string {
	return m.executableID
}

func TestAdaptiveResources_NonGPUJob_ARAEnabled_Success(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	executable := &mockExecutable{
		executableID: executableID,
		resources: &state.ExecutableResources{
			Memory: int64Ptr(1000),
			Cpu:    int64Ptr(500),
		},
	}

	commandHash := "test-command-hash"
	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
		CommandHash:  &commandHash,
	}

	manager := &mockStateManager{
		estimateResourcesResult: state.TaskResources{
			Cpu:    sql.NullInt64{Int64: 2000, Valid: true},
			Memory: sql.NullInt64{Int64: 3000, Valid: true},
		},
		estimateResourcesError: nil,
	}

	// Note: We can't easily test metrics emission since they're package-level functions,
	// but we can verify the logic works correctly
	cpuLimit, memLimit, cpuRequest, memRequest := adapter.(*eksAdapter).adaptiveResources(
		context.Background(),
		executable,
		run,
		manager,
		true, // araEnabled
	)

	// Verify ARA increased resources
	if cpuRequest != 2000 {
		t.Errorf("Expected CPU request 2000, got %d", cpuRequest)
	}
	if memRequest != 3000 {
		t.Errorf("Expected memory request 3000, got %d", memRequest)
	}
	if cpuLimit != 2000 {
		t.Errorf("Expected CPU limit 2000, got %d", cpuLimit)
	}
	if memLimit != 3000 {
		t.Errorf("Expected memory limit 3000, got %d", memLimit)
	}
}

func TestAdaptiveResources_GPUJob_SkipsARA(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	gpu := int64(1)
	executable := &mockExecutable{
		executableID: executableID,
		resources: &state.ExecutableResources{
			Memory: int64Ptr(1000),
			Cpu:    int64Ptr(500),
		},
	}

	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
		Gpu:          &gpu,
	}

	manager := &mockStateManager{}

	_, _, cpuRequest, memRequest := adapter.(*eksAdapter).adaptiveResources(
		context.Background(),
		executable,
		run,
		manager,
		true, // araEnabled
	)

	// Verify GPU jobs use defaults (no ARA)
	defaultCPU := int64(500)
	defaultMem := int64(1000)
	if cpuRequest != defaultCPU {
		t.Errorf("Expected CPU request %d (default), got %d", defaultCPU, cpuRequest)
	}
	if memRequest != defaultMem {
		t.Errorf("Expected memory request %d (default), got %d", defaultMem, memRequest)
	}
}

func TestAdaptiveResources_EstimationFailed(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	executable := &mockExecutable{
		executableID: executableID,
		resources: &state.ExecutableResources{
			Memory: int64Ptr(1000),
			Cpu:    int64Ptr(500),
		},
	}

	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
	}

	manager := &mockStateManager{
		estimateResourcesError: errors.New("estimation failed"),
	}

	_, _, cpuRequest, memRequest := adapter.(*eksAdapter).adaptiveResources(
		context.Background(),
		executable,
		run,
		manager,
		true, // araEnabled
	)

	// Verify defaults are used when estimation fails
	defaultCPU := int64(500)
	defaultMem := int64(1000)
	if cpuRequest != defaultCPU {
		t.Errorf("Expected CPU request %d (default), got %d", defaultCPU, cpuRequest)
	}
	if memRequest != defaultMem {
		t.Errorf("Expected memory request %d (default), got %d", defaultMem, memRequest)
	}
}

func TestAdaptiveResources_MaxResourceBoundsHit(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	definitionID := "test-definition"
	command := "test-command"
	commandHash := "test-command-hash"
	executable := &mockExecutable{
		executableID: executableID,
		resources: &state.ExecutableResources{
			Memory: int64Ptr(1000),
			Cpu:    int64Ptr(500),
		},
	}

	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
		DefinitionID: definitionID,
		Command:      &command,
		CommandHash:  &commandHash,
		ClusterName:  "test-cluster",
	}

	// Return resources that exceed max bounds
	manager := &mockStateManager{
		estimateResourcesResult: state.TaskResources{
			Cpu:    sql.NullInt64{Int64: state.MaxCPU + 10000, Valid: true}, // Exceeds max
			Memory: sql.NullInt64{Int64: state.MaxMem + 50000, Valid: true}, // Exceeds max
		},
		estimateResourcesError: nil,
	}

	cpuLimit, memLimit, cpuRequest, memRequest := adapter.(*eksAdapter).adaptiveResources(
		context.Background(),
		executable,
		run,
		manager,
		true, // araEnabled
	)

	// Verify resources are capped at max bounds
	if cpuRequest != state.MaxCPU {
		t.Errorf("Expected CPU request capped at %d, got %d", state.MaxCPU, cpuRequest)
	}
	if memRequest != state.MaxMem {
		t.Errorf("Expected memory request capped at %d, got %d", state.MaxMem, memRequest)
	}
	if cpuLimit != state.MaxCPU {
		t.Errorf("Expected CPU limit capped at %d, got %d", state.MaxCPU, cpuLimit)
	}
	if memLimit != state.MaxMem {
		t.Errorf("Expected memory limit capped at %d, got %d", state.MaxMem, memLimit)
	}

	// Verify logger was called for max resource hit
	// There should be two logs: one for ARA adjustment, one for max bounds hit
	if len(logger.logCalls) < 2 {
		t.Errorf("Expected at least 2 logger.Log calls (ARA adjustment + max bounds hit), got %d", len(logger.logCalls))
		return
	}
	// Find the max bounds hit log (should have level:warn)
	var maxBoundsLog []interface{}
	for _, logCall := range logger.logCalls {
		for i := 0; i < len(logCall); i += 2 {
			if i+1 < len(logCall) && logCall[i] == "level" && logCall[i+1] == "warn" {
				maxBoundsLog = logCall
				break
			}
		}
		if maxBoundsLog != nil {
			break
		}
	}
	if maxBoundsLog == nil {
		t.Errorf("Expected log with level:warn for max bounds hit, got logCalls: %v", logger.logCalls)
		return
	}
	// Verify log contains expected fields
	foundMessage := false
	foundRunID := false
	for i := 0; i < len(maxBoundsLog); i += 2 {
		if i+1 < len(maxBoundsLog) {
			key := maxBoundsLog[i]
			value := maxBoundsLog[i+1]
			if key == "message" {
				msg := value.(string)
				if msg == "ARA resource allocation hit maximum limit" || msg == "ARA memory allocation hit maximum limit - potential over-provisioning" {
					foundMessage = true
				}
			}
			if key == "run_id" && value == "test-run" {
				foundRunID = true
			}
		}
	}
	if !foundMessage {
		t.Errorf("Expected log to contain message about max resource hit")
	}
	if !foundRunID {
		t.Error("Expected log to contain 'run_id: test-run'")
	}
}

func TestAdaptiveResources_ARADisabled(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	executable := &mockExecutable{
		executableID: executableID,
		resources: &state.ExecutableResources{
			Memory: int64Ptr(1000),
			Cpu:    int64Ptr(500),
		},
	}

	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
	}

	manager := &mockStateManager{}

	_, _, cpuRequest, memRequest := adapter.(*eksAdapter).adaptiveResources(
		context.Background(),
		executable,
		run,
		manager,
		false, // araEnabled = false
	)

	// Verify defaults are used when ARA is disabled
	defaultCPU := int64(500)
	defaultMem := int64(1000)
	if cpuRequest != defaultCPU {
		t.Errorf("Expected CPU request %d (default), got %d", defaultCPU, cpuRequest)
	}
	if memRequest != defaultMem {
		t.Errorf("Expected memory request %d (default), got %d", defaultMem, memRequest)
	}
}

func TestEmitARAMetrics_StructuredLog(t *testing.T) {
	logger := &mockLogger{}
	adapter, err := NewEKSAdapter(logger)
	if err != nil {
		t.Fatalf("Failed to create adapter: %v", err)
	}

	executableID := "test-executable"
	definitionID := "test-definition"
	command := "test-command"
	run := state.Run{
		RunID:        "test-run",
		ExecutableID: &executableID,
		DefinitionID: definitionID,
		Command:      &command,
		ClusterName:  "test-cluster",
	}

	adapter.(*eksAdapter).emitARAMetrics(run, 1000, 2000, 3000, 4000, 5000, 6000, true, true)

	// Verify logger was called
	if len(logger.logCalls) == 0 {
		t.Error("Expected logger.Log to be called")
		return
	}

	logCall := logger.logCalls[0]
	expectedFields := map[string]interface{}{
		"level":                  "warn",
		"message":                "ARA memory allocation hit maximum limit - potential over-provisioning",
		"run_id":                 "test-run",
		"cluster":                 "test-cluster",
		"default_cpu_millicores": int64(1000),
		"default_memory_mb":       int64(2000),
		"requested_cpu_millicores": int64(5000),
		"requested_memory_mb":       int64(6000),
		"final_cpu_millicores":     int64(3000),
		"final_memory_mb":          int64(4000),
		"max_cpu_hit":             true,
		"max_memory_hit":           true,
		"definition_id":           "test-definition",
		"executable_id":           "test-executable",
		"command":                 "test-command",
		"memory_overage_mb":       int64(2000), // 6000 - 4000
		"cpu_overage_millicores":  int64(2000), // 5000 - 3000
	}

	// Verify all expected fields are present
	logMap := make(map[interface{}]interface{})
	for i := 0; i < len(logCall); i += 2 {
		if i+1 < len(logCall) {
			logMap[logCall[i]] = logCall[i+1]
		}
	}

	for key, expectedValue := range expectedFields {
		if actualValue, ok := logMap[key]; !ok {
			t.Errorf("Expected log to contain field '%s'", key)
		} else if actualValue != expectedValue {
			t.Errorf("Expected log field '%s' to be %v, got %v", key, expectedValue, actualValue)
		}
	}
}

func TestEmitARAMetrics_NilLogger(t *testing.T) {
	// Create adapter with nil logger (shouldn't panic)
	adapter := &eksAdapter{logger: nil}

	run := state.Run{
		RunID: "test-run",
	}

	// Should not panic
	adapter.emitARAMetrics(run, 1000, 2000, 3000, 4000, 5000, 6000, true, true)
}

// Helper function
func int64Ptr(i int64) *int64 {
	return &i
}


================================================
FILE: execution/engine/dcm.go
================================================
package engine

import (
	"context"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/eks"
	"github.com/pkg/errors"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	kubernetestrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/k8s.io/client-go/kubernetes"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/rest"
	"k8s.io/client-go/tools/clientcmd"
	metricsv "k8s.io/metrics/pkg/client/clientset/versioned"
	"os"
	"os/exec"
	"path/filepath"
	"sync"
)

// DynamicClusterManager handles dynamic loading of K8s clients
type DynamicClusterManager struct {
	mutex      sync.RWMutex
	log        flotillaLog.Logger
	eksClient  *eks.EKS
	awsRegion  string
	manager    state.Manager
	awsSession *session.Session
}

// getKubeconfigBaseDir returns the base directory for kubeconfig files
func getKubeconfigBaseDir() string {
	dir := os.Getenv("EKS_KUBECONFIG_BASEPATH")
	if dir != "" {
		dir, _ = os.Getwd()
	}
	return dir
}

// NewDynamicClusterManager creates a cluster manager that loads clusters from the state manager
func NewDynamicClusterManager(awsRegion string, log flotillaLog.Logger, manager state.Manager) (*DynamicClusterManager, error) {
	sess := session.Must(session.NewSession(&aws.Config{
		Region: aws.String(awsRegion),
	}))
	eksClient := eks.New(sess)

	return &DynamicClusterManager{
		log:        log,
		eksClient:  eksClient,
		awsRegion:  awsRegion,
		manager:    manager,
		awsSession: sess,
	}, nil
}

// getOrCreateKubeconfig ensures a valid kubeconfig exists for the given cluster
func (dcm *DynamicClusterManager) getOrCreateKubeconfig(clusterName string) (string, error) {
	kubeconfigBaseDir := getKubeconfigBaseDir()
	kubeconfigPath := filepath.Join(kubeconfigBaseDir, clusterName)

	if _, err := os.Stat(kubeconfigBaseDir); os.IsNotExist(err) {
		if err := os.MkdirAll(kubeconfigBaseDir, 0755); err != nil {
			return "", errors.Wrap(err, "failed to create directory for kubeconfigs")
		}
	}

	needsGeneration := false
	if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) {
		needsGeneration = true
	} else {
		_, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
		if err != nil {
			needsGeneration = true
		}
	}

	if needsGeneration {
		if err := dcm.generateKubeconfig(clusterName, kubeconfigPath); err != nil {
			return "", err
		}
	}

	return kubeconfigPath, nil
}

// generateKubeconfig creates a kubeconfig file for the specified cluster
func (dcm *DynamicClusterManager) generateKubeconfig(clusterName, kubeconfigPath string) error {
	cmd := exec.Command("aws", "eks", "update-kubeconfig",
		"--name", clusterName,
		"--region", dcm.awsRegion,
		"--kubeconfig", kubeconfigPath)

	if output, err := cmd.CombinedOutput(); err != nil {
		dcm.log.Log("level", "error", "message", "Failed to generate kubeconfig",
			"cluster", clusterName,
			"error", err.Error(),
			"output", string(output))
		return errors.Wrapf(err, "failed to generate kubeconfig: %s", string(output))
	}

	dcm.log.Log("level", "info", "message", "Successfully generated kubeconfig",
		"cluster", clusterName,
		"path", kubeconfigPath)
	return nil
}

// createRestConfig builds a rest.Config from a kubeconfig path
func (dcm *DynamicClusterManager) createRestConfig(kubeconfigPath string) (*rest.Config, error) {
	config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
	if err != nil {
		return nil, errors.Wrap(err, "failed to load kubeconfig")
	}

	config.WrapTransport = kubernetestrace.WrapRoundTripper
	return config, nil
}

// GetKubernetesClient returns a k8s client for the requested cluster
func (dcm *DynamicClusterManager) GetKubernetesClient(clusterName string) (kubernetes.Clientset, error) {
	kubeconfigPath, err := dcm.getOrCreateKubeconfig(clusterName)
	if err != nil {
		return kubernetes.Clientset{}, err
	}

	config, err := dcm.createRestConfig(kubeconfigPath)
	if err != nil {
		return kubernetes.Clientset{}, err
	}

	kClient, err := kubernetes.NewForConfig(config)
	if err != nil {
		return kubernetes.Clientset{}, errors.Wrap(err, "failed to create kubernetes client")
	}

	return *kClient, nil
}

// GetMetricsClient returns a metrics client for the requested cluster
func (dcm *DynamicClusterManager) GetMetricsClient(clusterName string) (metricsv.Clientset, error) {
	kubeconfigPath, err := dcm.getOrCreateKubeconfig(clusterName)
	if err != nil {
		return metricsv.Clientset{}, err
	}

	config, err := dcm.createRestConfig(kubeconfigPath)
	if err != nil {
		return metricsv.Clientset{}, err
	}

	metricsClient, err := metricsv.NewForConfig(config)
	if err != nil {
		return metricsv.Clientset{}, errors.Wrap(err, "failed to create metrics client")
	}

	return *metricsClient, nil
}

// InitializeClusters handles both static and dynamic cluster configurations
func (dcm *DynamicClusterManager) InitializeClusters(ctx context.Context, staticClusters []string) error {
	kubeconfigBaseDir := getKubeconfigBaseDir()
	if err := os.MkdirAll(kubeconfigBaseDir, 0755); err != nil {
		return errors.Wrap(err, "failed to create directory for kubeconfigs")
	}

	// Initialize static clusters
	for _, clusterName := range staticClusters {
		kubeconfigPath := filepath.Join(kubeconfigBaseDir, clusterName)
		if err := dcm.generateKubeconfig(clusterName, kubeconfigPath); err != nil {
			dcm.log.Log("level", "error", "message", "Failed to initialize static cluster",
				"cluster", clusterName,
				"error", err.Error())
		}
	}

	// Initialize dynamic clusters from state manager
	clusters, err := dcm.manager.ListClusterStates(ctx)
	if err != nil {
		return errors.Wrap(err, "failed to list clusters")
	}

	for _, cluster := range clusters {
		if cluster.Status == state.StatusActive {
			kubeconfigPath := filepath.Join(kubeconfigBaseDir, cluster.Name)
			if err := dcm.generateKubeconfig(cluster.Name, kubeconfigPath); err != nil {
				dcm.log.Log("level", "error", "message", "Failed to initialize dynamic cluster",
					"cluster", cluster.Name,
					"error", err.Error())
			}
		}
	}

	return nil
}


================================================
FILE: execution/engine/eks_engine.go
================================================
package engine

import (
	"bytes"
	"context"
	"fmt"
	"github.com/go-redis/redis"
	"github.com/stitchfix/flotilla-os/utils"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/adapter"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	awstrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/aws/aws-sdk-go/aws"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	k8sJson "k8s.io/apimachinery/pkg/runtime/serializer/json"
	"k8s.io/client-go/kubernetes"
	metricsv "k8s.io/metrics/pkg/client/clientset/versioned"
)

// EKSExecutionEngine submits runs to EKS.
type EKSExecutionEngine struct {
	kClients        map[string]kubernetes.Clientset
	metricsClients  map[string]metricsv.Clientset
	adapter         adapter.EKSAdapter
	qm              queue.Manager
	log             flotillaLog.Logger
	jobQueue        string
	jobNamespace    string
	jobTtl          int
	jobSA           string
	jobARAEnabled   bool
	schedulerName   string
	serializer      *k8sJson.Serializer
	s3Client        *s3.S3
	s3Bucket        string
	s3BucketRootDir string
	statusQueue     string
	clusters        []string
	clusterManager  *DynamicClusterManager
	stateManager    state.Manager
	redisClient     *redis.Client
}

// Initialize configures the EKSExecutionEngine and initializes internal clients
func (ee *EKSExecutionEngine) Initialize(conf config.Config) error {
	ee.jobQueue = conf.GetString("eks_job_queue")
	ee.schedulerName = "default-scheduler"

	if conf.IsSet("eks_scheduler_name") {
		ee.schedulerName = conf.GetString("eks_scheduler_name")
	}
	if conf.IsSet("eks_status_queue") {
		ee.statusQueue = conf.GetString("eks_status_queue")
	}
	ee.jobNamespace = conf.GetString("eks_job_namespace")
	ee.jobTtl = conf.GetInt("eks_job_ttl")
	ee.jobSA = conf.GetString("eks_default_service_account")
	ee.jobARAEnabled = true
	clusterManager, err := NewDynamicClusterManager(
		conf.GetString("aws_default_region"),
		ee.log,
		ee.stateManager,
	)
	if err != nil {
		return errors.Wrap(err, "failed to create dynamic cluster manager")
	}
	ee.clusterManager = clusterManager

	// Get static clusters if configured
	var staticClusters []string
	if conf.IsSet("eks_clusters") {
		clusters := strings.Split(conf.GetString("eks_clusters"), ",")
		for i := range clusters {
			staticClusters = append(staticClusters, strings.TrimSpace(clusters[i]))
		}
	}

	// Initialize all clusters (both static and dynamic)
	if err := clusterManager.InitializeClusters(context.Background(), staticClusters); err != nil {
		ee.log.Log("level", "error", "message", "failed to initialize clusters", "error", err.Error())
	}

	adapt, err := adapter.NewEKSAdapter(ee.log)
	if err != nil {
		return err
	}

	ee.serializer = k8sJson.NewSerializerWithOptions(
		k8sJson.DefaultMetaFactory, nil, nil,
		k8sJson.SerializerOptions{
			Yaml:   true,
			Pretty: true,
			Strict: true,
		},
	)
	awsRegion := conf.GetString("eks_manifest_storage_options_region")
	awsConfig := &aws.Config{Region: aws.String(awsRegion)}
	sess := awstrace.WrapSession(session.Must(session.NewSessionWithOptions(session.Options{Config: *awsConfig})))
	sess = awstrace.WrapSession(sess)
	ee.s3Client = s3.New(sess, aws.NewConfig().WithRegion(awsRegion))
	ee.s3Bucket = conf.GetString("eks_manifest_storage_options_s3_bucket_name")
	ee.s3BucketRootDir = conf.GetString("eks_manifest_storage_options_s3_bucket_root_dir")

	ee.adapter = adapt
	return nil
}

func (ee *EKSExecutionEngine) Execute(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) (state.Run, bool, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.execute", "")
	span.SetTag("job.run_id", run.RunID)
	span.SetTag("job.tier", run.Tier)
	defer span.Finish()
	utils.TagJobRun(span, run)
	if run.Namespace == nil || *run.Namespace == "" {
		clusters, err := manager.ListClusterStates(ctx)
		if err == nil {
			for _, cluster := range clusters {
				if cluster.Name == run.ClusterName && cluster.Namespace != "" {
					run.Namespace = &cluster.Namespace
					break
				}
			}
		}
	}

	if run.ServiceAccount == nil {
		run.ServiceAccount = aws.String(ee.jobSA)
	}
	tierTag := fmt.Sprintf("tier:%s", run.Tier)

	job, err := ee.adapter.AdaptFlotillaDefinitionAndRunToJob(ctx, executable, run, ee.schedulerName, manager, ee.jobARAEnabled)
	if err != nil {
		exitReason := fmt.Sprintf("Error creating k8s manigest - %s", err.Error())
		run.ExitReason = &exitReason
		return run, false, err
	}

	kClient, err := ee.getKClient(run)
	if err != nil {
		exitReason := fmt.Sprintf("Invalid cluster name - %s", run.ClusterName)
		run.ExitReason = &exitReason
		return run, false, err
	}

	result, err := kClient.BatchV1().Jobs(ee.jobNamespace).Create(ctx, &job, metav1.CreateOptions{})

	if err != nil {
		// Job is already submitted, don't retry
		if strings.Contains(strings.ToLower(err.Error()), "already exists") {
			return run, false, nil
		}

		// Job spec is invalid, don't retry.
		if strings.Contains(strings.ToLower(err.Error()), "is invalid") {
			exitReason := err.Error()
			run.ExitReason = &exitReason
			return run, false, err
		}

		// Legitimate submit error, retryable.
		_ = metrics.Increment(metrics.EngineEKSExecute, []string{string(metrics.StatusFailure), tierTag}, 1)
		return run, true, err
	}

	var b0 bytes.Buffer
	err = ee.serializer.Encode(result, &b0)
	if err == nil {
		putObject := s3.PutObjectInput{
			Bucket:      aws.String(ee.s3Bucket),
			Body:        bytes.NewReader(b0.Bytes()),
			Key:         aws.String(fmt.Sprintf("%s/%s/%s.yaml", ee.s3BucketRootDir, run.RunID, run.RunID)),
			ContentType: aws.String("text/yaml"),
		}
		_, err = ee.s3Client.PutObject(&putObject)

		if err != nil {
			_ = ee.log.Log("level", "error", "message", "s3_upload_error", "error", err.Error())
		}
	}
	_ = metrics.Increment(metrics.EngineEKSExecute, []string{string(metrics.StatusSuccess), tierTag}, 1)

	run, _ = ee.getPodName(run)
	adaptedRun, err := ee.adapter.AdaptJobToFlotillaRun(result, run, nil)

	if err != nil {
		return adaptedRun, false, err
	}

	// Set status to running.
	adaptedRun.Status = state.StatusRunning
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	} else {
		span.SetTag("job.submitted", true)
		utils.TagJobRun(span, adaptedRun)
	}
	return adaptedRun, false, nil
}

func (ee *EKSExecutionEngine) getPodName(run state.Run) (state.Run, error) {
	podList, err := ee.getPodList(run)

	if err != nil {
		return run, err
	}

	if podList != nil && podList.Items != nil && len(podList.Items) > 0 {
		pod := podList.Items[len(podList.Items)-1]
		run.PodName = &pod.Name
		run.Namespace = &pod.Namespace
		if pod.Spec.Containers != nil && len(pod.Spec.Containers) > 0 {
			container := pod.Spec.Containers[len(pod.Spec.Containers)-1]
			cpu := container.Resources.Requests.Cpu().ScaledValue(resource.Milli)
			cpuLimit := container.Resources.Limits.Cpu().ScaledValue(resource.Milli)
			run.Cpu = &cpu
			run.CpuLimit = &cpuLimit
			run = ee.getInstanceDetails(pod, run)
			mem := container.Resources.Requests.Memory().ScaledValue(resource.Mega)
			run.Memory = &mem
			memLimit := container.Resources.Limits.Memory().ScaledValue(resource.Mega)
			run.MemoryLimit = &memLimit
		}
	}
	return run, nil
}

func (ee *EKSExecutionEngine) getInstanceDetails(pod v1.Pod, run state.Run) state.Run {
	if len(pod.Spec.NodeName) > 0 {
		run.InstanceDNSName = pod.Spec.NodeName
	}
	return run
}

func (ee *EKSExecutionEngine) getPodList(run state.Run) (*v1.PodList, error) {
	ctx := context.Background()
	kClient, err := ee.getKClient(run)
	if err != nil {
		return &v1.PodList{}, err
	}

	if run.PodName != nil {
		pod, err := kClient.CoreV1().Pods(ee.jobNamespace).Get(ctx, *run.PodName, metav1.GetOptions{})
		if pod != nil {
			return &v1.PodList{Items: []v1.Pod{*pod}}, err
		}
	} else {
		if run.QueuedAt == nil {
			return &v1.PodList{}, err
		}
		queuedAt := *run.QueuedAt
		if time.Now().After(queuedAt.Add(time.Minute * time.Duration(5))) {
			podList, err := kClient.CoreV1().Pods(ee.jobNamespace).List(ctx, metav1.ListOptions{
				LabelSelector: fmt.Sprintf("job-name=%s", run.RunID),
			})
			return podList, err
		}
	}
	return &v1.PodList{}, err
}

func (ee *EKSExecutionEngine) getKClient(run state.Run) (kubernetes.Clientset, error) {
	ctx := context.Background()
	ctx, span := utils.TraceJob(ctx, "flotilla.job.get_k8s_client", run.RunID)
	defer span.Finish()
	startTime := time.Now()
	kClient, err := ee.clusterManager.GetKubernetesClient(run.ClusterName)
	span.SetTag("k8s.client_init_ms", time.Since(startTime).Milliseconds())
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		span.SetTag("error.type", "k8s_client_init")
		return kubernetes.Clientset{}, errors.Wrapf(err, "failed to get Kubernetes client for cluster %s", run.ClusterName)
	}
	return kClient, nil
}

func (ee *EKSExecutionEngine) Terminate(ctx context.Context, run state.Run) error {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.eks_terminate", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	gracePeriod := int64(300)
	deletionPropagation := metav1.DeletePropagationBackground
	_ = ee.log.Log("level", "info", "message", "terminating run", "run_id", run.RunID)
	deleteOptions := &metav1.DeleteOptions{
		GracePeriodSeconds: &gracePeriod,
		PropagationPolicy:  &deletionPropagation,
	}

	kClient, err := ee.getKClient(run)
	if err != nil {
		exitReason := fmt.Sprint(err.Error())
		run.ExitReason = &exitReason
		return err
	}

	_ = kClient.BatchV1().Jobs(ee.jobNamespace).Delete(ctx, run.RunID, *deleteOptions)
	if run.PodName != nil {
		_ = kClient.CoreV1().Pods(ee.jobNamespace).Delete(ctx, *run.PodName, *deleteOptions)
	}

	tierTag := fmt.Sprintf("tier:%s", run.Tier)
	_ = metrics.Increment(metrics.EngineEKSTerminate, []string{string(metrics.StatusSuccess), tierTag}, 1)
	return nil
}

func (ee *EKSExecutionEngine) Enqueue(ctx context.Context, run state.Run) error {
	var span tracer.Span
	ctx, span = utils.TraceJob(ctx, "flotilla.job.eks_enqueue", "")
	defer span.Finish()
	span.SetTag("job.run_id", run.RunID)
	utils.TagJobRun(span, run)

	tierTag := fmt.Sprintf("tier:%s", run.Tier)

	// Get qurl
	qurl, err := ee.qm.QurlFor(ee.jobQueue, false)
	if err != nil {
		_ = metrics.Increment(metrics.EngineEKSEnqueue, []string{string(metrics.StatusFailure), tierTag}, 1)
		return errors.Wrapf(err, "problem getting queue url for [%s]", run.ClusterName)
	}

	// Queue run
	if err = ee.qm.Enqueue(ctx, qurl, run); err != nil {
		_ = metrics.Increment(metrics.EngineEKSEnqueue, []string{string(metrics.StatusFailure), tierTag}, 1)
		return errors.Wrapf(err, "problem enqueing run [%s] to queue [%s]", run.RunID, qurl)
	}

	_ = metrics.Increment(metrics.EngineEKSEnqueue, []string{string(metrics.StatusSuccess), tierTag}, 1)
	return nil
}

func (ee *EKSExecutionEngine) PollRuns(ctx context.Context) ([]RunReceipt, error) {
	qurl, err := ee.qm.QurlFor(ee.jobQueue, false)
	if err != nil {
		return nil, errors.Wrap(err, "problem listing queues to poll")
	}
	queues := []string{qurl}
	var runs []RunReceipt
	for _, qurl := range queues {
		//
		// Get new queued Run
		//
		runReceipt, err := ee.qm.ReceiveRun(ctx, qurl)

		if err != nil {
			return runs, errors.Wrapf(err, "problem receiving run from queue url [%s]", qurl)
		}

		if runReceipt.Run == nil {
			continue
		}
		if runReceipt.TraceID != 0 && runReceipt.ParentID != 0 {
			ee.log.Log("level", "info", "message", "Received run with trace context",
				"run_id", runReceipt.Run.RunID,
				"trace_id", runReceipt.TraceID,
				"parent_id", runReceipt.ParentID)
		}
		runs = append(runs, RunReceipt{
			RunReceipt:       runReceipt,
			TraceID:          runReceipt.TraceID,
			ParentID:         runReceipt.ParentID,
			SamplingPriority: runReceipt.SamplingPriority,
		})
	}
	return runs, nil
}

// PollStatus is a dummy function as EKS does not emit task status
// change events.
func (ee *EKSExecutionEngine) PollStatus(ctx context.Context) (RunReceipt, error) {
	return RunReceipt{}, nil
}

// Reads off SQS queue and generates a Run object based on the runId
func (ee *EKSExecutionEngine) PollRunStatus(ctx context.Context) (state.Run, error) {
	return state.Run{}, nil
}

// Define returns a blank task definition and an error for the EKS engine.
func (ee *EKSExecutionEngine) Define(ctx context.Context, td state.Definition) (state.Definition, error) {
	return td, errors.New("Definition of tasks are only for ECSs.")
}

// Deregister returns an error for the EKS engine.
func (ee *EKSExecutionEngine) Deregister(ctx context.Context, definition state.Definition) error {
	return errors.Errorf("EKSExecutionEngine does not allow for deregistering of task definitions.")
}

func (ee *EKSExecutionEngine) Get(ctx context.Context, run state.Run) (state.Run, error) {
	if ctx == nil {
		ctx = context.Background()
	}
	kClient, err := ee.getKClient(run)
	if err != nil {
		return state.Run{}, err
	}
	job, err := kClient.BatchV1().Jobs(ee.jobNamespace).Get(ctx, run.RunID, metav1.GetOptions{})

	if err != nil {
		return state.Run{}, errors.Errorf("error getting kubernetes job %s", err)
	}
	updates, err := ee.adapter.AdaptJobToFlotillaRun(job, run, nil)

	if err != nil {
		return state.Run{}, errors.Errorf("error adapting kubernetes job to flotilla run %s", err)
	}

	return updates, nil
}

func (ee *EKSExecutionEngine) GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.get_events", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	if run.PodName == nil {
		return state.PodEventList{}, nil
	}
	kClient, err := ee.getKClient(run)
	if err != nil {
		return state.PodEventList{}, err
	}

	eventList, err := kClient.CoreV1().Events(ee.jobNamespace).List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name==%s", *run.PodName)})
	if err != nil {
		return state.PodEventList{}, errors.Errorf("error getting kubernetes event for flotilla run %s", err)
	}

	var podEvents []state.PodEvent
	for _, e := range eventList.Items {
		eTime := e.FirstTimestamp.Time
		runEvent := state.PodEvent{
			Message:      e.Message,
			Timestamp:    &eTime,
			EventType:    e.Type,
			Reason:       e.Reason,
			SourceObject: e.ObjectMeta.Name,
		}

		if strings.Contains(e.Reason, "TriggeredScaleUp") {
			source := fmt.Sprintf("source:%s", e.ObjectMeta.Name)
			_ = metrics.Increment(metrics.EngineEKSNodeTriggeredScaledUp, []string{source}, 1)
		}
		podEvents = append(podEvents, runEvent)
	}

	podEventList := state.PodEventList{
		Total:     len(podEvents),
		PodEvents: podEvents,
	}

	return podEventList, nil
}

func (ee *EKSExecutionEngine) FetchPodMetrics(ctx context.Context, run state.Run) (state.Run, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.eks_fetch_metrics", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	if run.PodName != nil {
		metricsClient, err := ee.clusterManager.GetMetricsClient(run.ClusterName)
		if err != nil {
			return run, errors.Wrapf(err, "failed to get metrics client for cluster %s", run.ClusterName)
		}
		start := time.Now()
		podMetrics, err := metricsClient.MetricsV1beta1().PodMetricses(ee.jobNamespace).Get(ctx, *run.PodName, metav1.GetOptions{})
		_ = metrics.Timing(metrics.StatusWorkerFetchMetrics, time.Since(start), []string{run.ClusterName}, 1)

		if err != nil {
			return run, err
		}
		if len(podMetrics.Containers) > 0 {
			containerMetrics := podMetrics.Containers[0]
			mem := containerMetrics.Usage.Memory().ScaledValue(resource.Mega)
			if run.MaxMemoryUsed == nil || *run.MaxMemoryUsed == 0 || *run.MaxMemoryUsed < mem {
				run.MaxMemoryUsed = &mem
			}

			cpu := containerMetrics.Usage.Cpu().MilliValue()
			if run.MaxCpuUsed == nil || *run.MaxCpuUsed == 0 || *run.MaxCpuUsed < cpu {
				run.MaxCpuUsed = &cpu
			}
		}
		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
		} else if run.MaxMemoryUsed != nil {
			span.SetTag("job.metrics.memory_mb", *run.MaxMemoryUsed)
		}

		if run.MaxCpuUsed != nil {
			span.SetTag("job.metrics.cpu_millicores", *run.MaxCpuUsed)
		}
		return run, nil
	}
	return run, errors.New("no pod associated with the run.")
}

func (ee *EKSExecutionEngine) FetchUpdateStatus(ctx context.Context, run state.Run) (state.Run, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.eks_fetch_status", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	kClient, err := ee.getKClient(run)
	if err != nil {
		return state.Run{}, err
	}

	start := time.Now()
	job, err := kClient.BatchV1().Jobs(ee.jobNamespace).Get(ctx, run.RunID, metav1.GetOptions{})
	span.SetTag("k8s.job_get_ms", time.Since(start).Milliseconds())
	_ = metrics.Timing(metrics.StatusWorkerGetJob, time.Since(start), []string{run.ClusterName}, 1)

	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		span.SetTag("error.type", "k8s_get_job")
		return run, err
	}
	if job.Status.Active > 0 {
		span.SetTag("job.k8s.active", job.Status.Active)
	}
	if job.Status.Succeeded > 0 {
		span.SetTag("job.k8s.succeeded", job.Status.Succeeded)
	}
	if job.Status.Failed > 0 {
		span.SetTag("job.k8s.failed", job.Status.Failed)
	}

	var mostRecentPod *v1.Pod
	var mostRecentPodCreationTimestamp metav1.Time

	start = time.Now()
	podList, err := ee.getPodList(run)
	_ = metrics.Timing(metrics.StatusWorkerGetPodList, time.Since(start), []string{run.ClusterName}, 1)

	if err == nil && podList != nil && podList.Items != nil && len(podList.Items) > 0 {
		// Iterate over associated pods to find the most recent.
		for _, p := range podList.Items {
			if mostRecentPodCreationTimestamp.Before(&p.CreationTimestamp) || len(podList.Items) == 1 {
				mostRecentPod = &p
				mostRecentPodCreationTimestamp = p.CreationTimestamp
			}
		}

		// If the run doesn't have an associated pod name yet OR
		// there is a newer pod (i.e. the old pod was killed),
		// update it.
		if mostRecentPod != nil && (run.PodName == nil || mostRecentPod.Name != *run.PodName) {
			if run.PodName != nil && mostRecentPod.Name != *run.PodName {
				_ = metrics.Increment(metrics.EngineEKSRunPodnameChange, []string{}, 1)
			}

			run.PodName = &mostRecentPod.Name
			run = ee.getInstanceDetails(*mostRecentPod, run)
		}

		// Pod didn't change, but Instance information is not populated.
		if mostRecentPod != nil && len(run.InstanceDNSName) == 0 {
			run = ee.getInstanceDetails(*mostRecentPod, run)
		}

		if mostRecentPod != nil && mostRecentPod.Spec.Containers != nil && len(mostRecentPod.Spec.Containers) > 0 {
			container := mostRecentPod.Spec.Containers[len(mostRecentPod.Spec.Containers)-1]
			cpu := container.Resources.Requests.Cpu().ScaledValue(resource.Milli)
			run.Cpu = &cpu
			mem := container.Resources.Requests.Memory().ScaledValue(resource.Mega)
			run.Memory = &mem
			cpuLimit := container.Resources.Limits.Cpu().ScaledValue(resource.Milli)
			run.CpuLimit = &cpuLimit
			memLimit := container.Resources.Limits.Memory().ScaledValue(resource.Mega)
			run.MemoryLimit = &memLimit
		}
	}

	//run, _ = ee.FetchPodMetrics(ctx, run)
	hoursBack := time.Now().Add(-24 * time.Hour)

	start = time.Now()
	var events state.PodEventList
	//events, err = ee.GetEvents(ctx, run)
	_ = metrics.Timing(metrics.StatusWorkerGetEvents, time.Since(start), []string{run.ClusterName}, 1)

	if err == nil && len(events.PodEvents) > 0 {
		newEvents := events.PodEvents
		if run.PodEvents != nil && len(*run.PodEvents) > 0 {
			priorEvents := *run.PodEvents
			for _, newEvent := range newEvents {
				unseen := true
				for _, priorEvent := range priorEvents {
					if priorEvent.Equal(newEvent) {
						unseen = false
						break
					}
				}
				if unseen {
					priorEvents = append(priorEvents, newEvent)
				}
			}
			run.PodEvents = &priorEvents
		} else {
			run.PodEvents = &newEvents
		}
	}

	if run.PodEvents != nil {
		attemptCount := int64(0)
		for _, podEvent := range *run.PodEvents {
			if strings.Contains(podEvent.Reason, "Scheduled") {
				attemptCount = attemptCount + 1
			}
		}
		run.AttemptCount = &attemptCount
	}

	// Handle edge case for dangling jobs.
	// Run used to have a pod and now it is not there, job is older than 24 hours. Terminate it.
	if err == nil && podList != nil && podList.Items != nil && len(podList.Items) == 0 && run.PodName != nil && run.QueuedAt.Before(hoursBack) {
		err = ee.Terminate(ctx, run)
		if err == nil {
			job.Status.Failed = 1
			mostRecentPod = nil
		}
	}

	return ee.adapter.AdaptJobToFlotillaRun(job, run, mostRecentPod)
}


================================================
FILE: execution/engine/emr_engine.go
================================================
package engine

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"os"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/emrcontainers"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/go-redis/redis"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/utils"

	"github.com/stitchfix/flotilla-os/config"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	awstrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/aws/aws-sdk-go/aws"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
	v1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
	_ "k8s.io/apimachinery/pkg/apis/meta/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/runtime"
	k8sJson "k8s.io/apimachinery/pkg/runtime/serializer/json"
	"k8s.io/client-go/kubernetes"
	_ "k8s.io/client-go/kubernetes/scheme"
	"regexp"
	"strings"
)

// EMRExecutionEngine submits runs to EMR-EKS.
type EMRExecutionEngine struct {
	sqsQueueManager      queue.Manager
	log                  flotillaLog.Logger
	emrJobQueue          string
	emrJobNamespace      string
	emrJobRoleArn        map[string]string
	emrJobSA             string
	emrVirtualClusters   map[string]string
	emrContainersClient  *emrcontainers.EMRContainers
	schedulerName        string
	s3Client             *s3.S3
	awsRegion            string
	s3LogsBucket         string
	s3EventLogPath       string
	s3LogsBasePath       string
	s3ManifestBucket     string
	s3ManifestBasePath   string
	serializer           *k8sJson.Serializer
	clusters             []string
	driverInstanceType   string
	kClients             map[string]kubernetes.Clientset
	clusterManager       *DynamicClusterManager
	stateManager         state.Manager
	redisClient          *redis.Client
	lakekeeperSecretName string
}

// Initialize configures the EMRExecutionEngine and initializes internal clients
func (emr *EMRExecutionEngine) Initialize(conf config.Config) error {

	emr.emrVirtualClusters = make(map[string]string)
	emr.emrVirtualClusters = conf.GetStringMapString("emr_virtual_clusters")

	emr.emrJobQueue = conf.GetString("emr_job_queue")
	emr.emrJobNamespace = conf.GetString("emr_job_namespace")
	emr.emrJobRoleArn = conf.GetStringMapString("emr_job_role_arn")
	emr.awsRegion = conf.GetString("emr_aws_region")
	emr.s3LogsBucket = conf.GetString("emr_log_bucket")
	emr.s3LogsBasePath = conf.GetString("emr_log_base_path")
	emr.s3EventLogPath = conf.GetString("emr_log_event_log_path")
	emr.s3ManifestBucket = conf.GetString("emr_manifest_bucket")
	emr.s3ManifestBasePath = conf.GetString("emr_manifest_base_path")
	emr.emrJobSA = conf.GetString("emr_default_service_account")
	emr.schedulerName = conf.GetString("eks_scheduler_name")
	emr.driverInstanceType = conf.GetString("emr_driver_instance_type")
	emr.lakekeeperSecretName = conf.GetString("emr_lakekeeper_secret_name")
	awsConfig := &aws.Config{Region: aws.String(emr.awsRegion)}
	sess := session.Must(session.NewSessionWithOptions(session.Options{Config: *awsConfig}))
	sess = awstrace.WrapSession(sess)
	emr.s3Client = s3.New(sess, aws.NewConfig().WithRegion(emr.awsRegion))
	emr.emrContainersClient = emrcontainers.New(sess, aws.NewConfig().WithRegion(emr.awsRegion))

	emr.serializer = k8sJson.NewSerializerWithOptions(
		k8sJson.SimpleMetaFactory{}, nil, nil,
		k8sJson.SerializerOptions{
			Yaml:   true,
			Pretty: true,
			Strict: true,
		},
	)

	clusterManager, err := NewDynamicClusterManager(
		emr.awsRegion,
		emr.log,
		emr.stateManager,
	)
	if err != nil {
		return errors.Wrap(err, "failed to create dynamic cluster manager")
	}
	emr.clusterManager = clusterManager

	// Get static clusters if configured
	var staticClusters []string
	if conf.IsSet("eks_clusters") {
		clusters := strings.Split(conf.GetString("eks_clusters"), ",")
		for i := range clusters {
			staticClusters = append(staticClusters, strings.TrimSpace(clusters[i]))
		}
	}

	// Initialize all clusters (both static and dynamic)
	if err := clusterManager.InitializeClusters(context.Background(), staticClusters); err != nil {
		emr.log.Log("level", "error", "message", "failed to initialize clusters", "error", err.Error())
	}

	return nil
}

func (emr *EMRExecutionEngine) getKClient(run state.Run) (kubernetes.Clientset, error) {
	kClient, err := emr.clusterManager.GetKubernetesClient(run.ClusterName)
	if err != nil {
		return kubernetes.Clientset{}, errors.Wrapf(err, "failed to get Kubernetes client for cluster %s", run.ClusterName)
	}
	return kClient, nil
}
func (emr *EMRExecutionEngine) Execute(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) (state.Run, bool, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_execute", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	run = emr.estimateExecutorCount(run, manager)
	run = emr.estimateMemoryResources(ctx, run, manager)

	if run.ServiceAccount == nil || *run.ServiceAccount == "" {
		run.ServiceAccount = aws.String(emr.emrJobSA)
	}

	if run.CommandHash != nil && run.NodeLifecycle != nil && *run.NodeLifecycle == state.SpotLifecycle {
		nodeType, err := manager.GetNodeLifecycle(ctx, run.DefinitionID, *run.CommandHash)
		if err == nil && nodeType == state.OndemandLifecycle {
			run.NodeLifecycle = &state.OndemandLifecycle
		}
	}

	startJobRunInput, err := emr.generateEMRStartJobRunInput(ctx, executable, run, manager)
	emrJobManifest := aws.String(fmt.Sprintf("%s/%s/%s.json", emr.s3ManifestBasePath, run.RunID, "start-job-run-input"))
	obj, err := json.MarshalIndent(startJobRunInput, "", "\t")
	if err == nil {
		emrJobManifest = emr.writeStringToS3(emrJobManifest, obj)
	}

	emr.log.Log("level", "info", "message", "Start EMR JobRun", "ExecutionRoleArn", startJobRunInput.ExecutionRoleArn)
	tierTag := fmt.Sprintf("tier:%s", run.Tier)

	startJobRunOutput, err := emr.emrContainersClient.StartJobRun(&startJobRunInput)
	if err == nil {
		run.SparkExtension.VirtualClusterId = startJobRunOutput.VirtualClusterId
		run.SparkExtension.EMRJobId = startJobRunOutput.Id
		run.SparkExtension.EMRJobManifest = emrJobManifest
		run.Status = state.StatusQueued
		_ = metrics.Increment(metrics.EngineEMRExecute, []string{string(metrics.StatusSuccess), tierTag}, 1)
	} else {
		run.ExitReason = aws.String(fmt.Sprintf("%v", err))
		run.ExitCode = aws.Int64(-1)
		run.StartedAt = run.QueuedAt
		run.FinishedAt = run.QueuedAt
		run.Status = state.StatusStopped
		_ = emr.log.Log("level", "error", "message", "EMR job submission error", "error", err.Error())
		_ = metrics.Increment(metrics.EngineEKSExecute, []string{string(metrics.StatusFailure), tierTag}, 1)
		return run, false, err
	}
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	} else {
		span.SetTag("emr.job_id", *run.SparkExtension.EMRJobId)
		span.SetTag("emr.virtual_cluster_id", *run.SparkExtension.VirtualClusterId)
		utils.TagJobRun(span, run)
	}
	return run, false, nil
}

func (emr *EMRExecutionEngine) generateApplicationConf(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) []*emrcontainers.Configuration {
	if ctx == nil {
		ctx = context.Background()
	}
	sparkDefaults := map[string]*string{
		"spark.kubernetes.driver.podTemplateFile":   emr.driverPodTemplate(ctx, executable, run, manager),
		"spark.kubernetes.executor.podTemplateFile": emr.executorPodTemplate(ctx, executable, run, manager),
		"spark.kubernetes.container.image":          &run.Image,
		"spark.eventLog.dir":                        aws.String(fmt.Sprintf("s3://%s/%s", emr.s3LogsBucket, emr.s3EventLogPath)),
		"spark.history.fs.logDirectory":             aws.String(fmt.Sprintf("s3://%s/%s", emr.s3LogsBucket, emr.s3EventLogPath)),
		"spark.eventLog.enabled":                    aws.String("true"),
		"spark.default.parallelism":                 aws.String("256"),
		"spark.sql.shuffle.partitions":              aws.String("256"),

		// PrometheusServlet metrics config
		"spark.metrics.conf.*.sink.prometheusServlet.class": aws.String("org.apache.spark.metrics.sink.PrometheusServlet"),
		"spark.metrics.conf.*.sink.prometheusServlet.path":  aws.String("/metrics/driver/prometheus"),
		"master.sink.prometheusServlet.path":                aws.String("/metrics/master/prometheus"),
		"applications.sink.prometheusServlet.path":          aws.String("/metrics/applications/prometheus"),

		// Metrics grouped per component instance and source namespace e.g., Component instance = Driver or Component instance = shuffleService
		"spark.kubernetes.driver.service.annotation.prometheus.io/port":   aws.String("4040"),
		"spark.kubernetes.driver.service.annotation.prometheus.io/path":   aws.String("/metrics/driver/prometheus/"),
		"spark.kubernetes.driver.service.annotation.prometheus.io/scrape": aws.String("true"),

		// Executor-level metrics are sent from each executor to the driver. Prometheus endpoint at: /metrics/executors/prometheus
		"spark.kubernetes.driver.annotation.prometheus.io/scrape": aws.String("true"),
		"spark.kubernetes.driver.annotation.prometheus.io/path":   aws.String("/metrics/executors/prometheus/"),
		"spark.kubernetes.driver.annotation.prometheus.io/port":   aws.String("4040"),
		"spark.ui.prometheus.enabled":                             aws.String("true"),
	}

	hiveDefaults := map[string]*string{}

	for _, k := range run.SparkExtension.ApplicationConf {
		sparkDefaults[*k.Name] = k.Value
	}
	if run.SparkExtension.HiveConf != nil {
		for _, k := range run.SparkExtension.HiveConf {
			if k.Name != nil && k.Value != nil {
				hiveDefaults[*k.Name] = k.Value
			}
		}
	}

	return []*emrcontainers.Configuration{
		{
			Classification: aws.String("spark-defaults"),
			Properties:     sparkDefaults,
		},
		{
			Classification: aws.String("spark-hive-site"),
			Properties:     hiveDefaults,
		},
	}
}

func (emr *EMRExecutionEngine) generateEMRStartJobRunInput(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) (emrcontainers.StartJobRunInput, error) {
	roleArn := emr.emrJobRoleArn[*run.ServiceAccount]
	if ctx == nil {
		ctx = context.Background()
	}
	dbClusters, err := emr.stateManager.ListClusterStates(ctx)
	if err != nil {
		emr.log.Log("level", "error", "message", "failed to get clusters from database", "error", err.Error())
		return emrcontainers.StartJobRunInput{}, err
	}
	var clusterID string
	clusterFound := false
	for _, cluster := range dbClusters {
		if cluster.Namespace == emr.emrJobNamespace && cluster.Name == run.ClusterName {
			clusterID = cluster.EMRVirtualCluster
			if cluster.SparkServerURI != "" {
				run.SparkExtension.SparkServerURI = aws.String(cluster.SparkServerURI)
			}
			clusterFound = true
			break
		}
	}
	if !clusterFound {
		clusterID = emr.emrVirtualClusters[run.ClusterName]
	}

	if clusterID == "" {
		return emrcontainers.StartJobRunInput{}, fmt.Errorf("EMR virtual cluster ID not found for EKS cluster: %s", run.ClusterName)
	}
	startJobRunInput := emrcontainers.StartJobRunInput{
		ClientToken: &run.RunID,
		ConfigurationOverrides: &emrcontainers.ConfigurationOverrides{
			MonitoringConfiguration: &emrcontainers.MonitoringConfiguration{
				PersistentAppUI: aws.String(emrcontainers.PersistentAppUIEnabled),
				S3MonitoringConfiguration: &emrcontainers.S3MonitoringConfiguration{
					LogUri: aws.String(fmt.Sprintf("s3://%s/%s", emr.s3LogsBucket, emr.s3LogsBasePath)),
				},
			},
			ApplicationConfiguration: emr.generateApplicationConf(ctx, executable, run, manager),
		},
		ExecutionRoleArn: &roleArn,
		JobDriver: &emrcontainers.JobDriver{
			SparkSubmitJobDriver: &emrcontainers.SparkSubmitJobDriver{
				EntryPoint:            run.SparkExtension.SparkSubmitJobDriver.EntryPoint,
				EntryPointArguments:   run.SparkExtension.SparkSubmitJobDriver.EntryPointArguments,
				SparkSubmitParameters: emr.sparkSubmitParams(run),
			}},
		Name:             &run.RunID,
		ReleaseLabel:     run.SparkExtension.EMRReleaseLabel,
		VirtualClusterId: &clusterID,
	}
	return startJobRunInput, nil
}

func (emr *EMRExecutionEngine) generateTags(run state.Run) map[string]*string {
	tags := make(map[string]*string)
	if run.Env != nil && len(*run.Env) > 0 {
		for _, ev := range *run.Env {
			name := emr.sanitizeEnvVar(ev.Name)
			space := regexp.MustCompile(`\s+`)
			if len(ev.Value) < 256 && len(name) < 128 {
				tags[name] = aws.String(space.ReplaceAllString(ev.Value, ""))
			}
		}
	}
	return tags
}

// generates volumes and volumemounts depending on cluster name.
// TODO cleanup after migration
func generateVolumesForCluster(clusterName string, isEmptyDir bool) ([]v1.Volume, []v1.VolumeMount) {
	var volumes []v1.Volume
	var volumeMounts []v1.VolumeMount

	if isEmptyDir {
		// Use a emptyDir volume
		specificVolume := v1.Volume{
			Name: "shared-lib-volume",
			VolumeSource: v1.VolumeSource{
				EmptyDir: &(v1.EmptyDirVolumeSource{}),
			},
		}

		volumes = append(volumes, specificVolume)
	} else {
		// Use the persistent volume claim
		sharedLibVolume := v1.Volume{
			Name: "shared-lib-volume",
			VolumeSource: v1.VolumeSource{
				PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
					ClaimName: "s3-claim",
				},
			},
		}
		volumes = append(volumes, sharedLibVolume)
	}
	volumeMount := v1.VolumeMount{
		Name:      "shared-lib-volume",
		MountPath: "/var/lib/app",
	}
	volumeMounts = append(volumeMounts, volumeMount)

	return volumes, volumeMounts
}

func (emr *EMRExecutionEngine) driverPodTemplate(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) *string {
	if ctx == nil {
		ctx = context.Background()
	}
	// Override driver pods to always be on ondemand nodetypes.
	run.NodeLifecycle = &state.OndemandLifecycle
	workingDir := "/var/lib/app"
	if run.SparkExtension != nil && run.SparkExtension.SparkSubmitJobDriver != nil && run.SparkExtension.SparkSubmitJobDriver.WorkingDir != nil {
		workingDir = *run.SparkExtension.SparkSubmitJobDriver.WorkingDir
	}

	volumes, volumeMounts := generateVolumesForCluster(run.ClusterName, true)

	podSpec := v1.PodSpec{
		TerminationGracePeriodSeconds: aws.Int64(90),
		Volumes:                       volumes,
		SchedulerName:                 emr.schedulerName,
		Containers: []v1.Container{
			{
				Name:         "spark-kubernetes-driver",
				Env:          append(emr.envOverrides(executable, run), emr.lakekeeperSecretEnvVars()...),
				VolumeMounts: volumeMounts,
				WorkingDir:   workingDir,
			},
		},
		InitContainers: []v1.Container{{
			Name:         fmt.Sprintf("init-driver-%s", run.RunID),
			Image:        run.Image,
			Env:          emr.envOverrides(executable, run),
			VolumeMounts: volumeMounts,
			Command:      emr.constructCmdSlice(run.SparkExtension.DriverInitCommand),
		}},
		RestartPolicy: v1.RestartPolicyNever,
		Affinity:      emr.constructAffinity(ctx, executable, run, manager, true),
		Tolerations:   emr.constructTolerations(executable, run),
	}

	if emr.driverInstanceType != "" {
		podSpec.NodeSelector = map[string]string{
			"node.kubernetes.io/instance-type": emr.driverInstanceType,
		}
	}

	labels := state.GetLabels(run)
	pod := v1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Annotations: map[string]string{
				"karpenter.sh/do-not-evict": "true",
				"flotilla-run-id":           run.RunID,
			},
			Labels: labels,
		},
		Spec: podSpec,
	}

	key := aws.String(fmt.Sprintf("%s/%s/%s.yaml", emr.s3ManifestBasePath, run.RunID, "driver-template"))
	return emr.writeK8ObjToS3(&pod, key)
}

func (emr *EMRExecutionEngine) executorPodTemplate(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) *string {
	if ctx == nil {
		ctx = context.Background()
	}
	workingDir := "/var/lib/app"
	if run.SparkExtension != nil && run.SparkExtension.SparkSubmitJobDriver != nil && run.SparkExtension.SparkSubmitJobDriver.WorkingDir != nil {
		workingDir = *run.SparkExtension.SparkSubmitJobDriver.WorkingDir
	}

	labels := state.GetLabels(run)

	// TODO Remove after migration
	volumes, volumeMounts := generateVolumesForCluster(run.ClusterName, true)

	pod := v1.Pod{
		Status: v1.PodStatus{},
		ObjectMeta: metav1.ObjectMeta{
			Annotations: map[string]string{
				"karpenter.sh/do-not-evict": "true",
				"flotilla-run-id":           run.RunID},
			Labels: labels,
		},
		Spec: v1.PodSpec{
			TerminationGracePeriodSeconds: aws.Int64(90),
			Volumes:                       volumes,
			SchedulerName:                 emr.schedulerName,
			Containers: []v1.Container{
				{
					Name:         "spark-kubernetes-executor",
					Env:          emr.envOverrides(executable, run),
					VolumeMounts: volumeMounts,
					WorkingDir:   workingDir,
				},
			},
			InitContainers: []v1.Container{{
				Name:         fmt.Sprintf("init-executor-%s", run.RunID),
				Image:        run.Image,
				Env:          emr.envOverrides(executable, run),
				VolumeMounts: volumeMounts,
				Command:      emr.constructCmdSlice(run.SparkExtension.ExecutorInitCommand),
			}},
			RestartPolicy: v1.RestartPolicyNever,
			Affinity:      emr.constructAffinity(ctx, executable, run, manager, false),
			Tolerations:   emr.constructTolerations(executable, run),
		},
	}

	key := aws.String(fmt.Sprintf("%s/%s/%s.yaml", emr.s3ManifestBasePath, run.RunID, "executor-template"))
	return emr.writeK8ObjToS3(&pod, key)
}

func (emr *EMRExecutionEngine) writeK8ObjToS3(obj runtime.Object, key *string) *string {
	var b0 bytes.Buffer
	err := emr.serializer.Encode(obj, &b0)
	payload := bytes.ReplaceAll(b0.Bytes(), []byte("status: {}"), []byte(""))
	payload = bytes.ReplaceAll(payload, []byte("creationTimestamp: null"), []byte(""))
	payload = bytes.ReplaceAll(payload, []byte("resources: {}"), []byte(""))

	if err == nil {
		putObject := s3.PutObjectInput{
			Bucket:      aws.String(emr.s3ManifestBucket),
			Body:        bytes.NewReader(payload),
			Key:         key,
			ContentType: aws.String("text/yaml"),
		}
		_, err = emr.s3Client.PutObject(&putObject)
		if err != nil {
			_ = emr.log.Log("level", "error", "message", "s3_upload_error", "error", err.Error())
		}
	}

	return aws.String(fmt.Sprintf("s3://%s/%s", emr.s3ManifestBucket, *key))
}

func (emr *EMRExecutionEngine) writeStringToS3(key *string, body []byte) *string {
	if body != nil && key != nil {
		putObject := s3.PutObjectInput{
			Bucket:      aws.String(emr.s3ManifestBucket),
			Body:        bytes.NewReader(body),
			Key:         key,
			ContentType: aws.String("text/yaml"),
		}
		_, err := emr.s3Client.PutObject(&putObject)
		if err != nil {
			_ = emr.log.Log("level", "error", "message", "s3_upload_error", "error", err.Error())
		}
	}
	return aws.String(fmt.Sprintf("s3://%s/%s", emr.s3ManifestBucket, *key))
}

func (emr *EMRExecutionEngine) constructEviction(ctx context.Context, run state.Run, manager state.Manager) string {
	if ctx == nil {
		ctx = context.Background()
	}
	if run.NodeLifecycle != nil && *run.NodeLifecycle == state.OndemandLifecycle {
		return "false"
	}
	if run.CommandHash != nil {
		nodeType, err := manager.GetNodeLifecycle(ctx, run.DefinitionID, *run.CommandHash)
		if err == nil && nodeType == state.OndemandLifecycle {
			return "false"
		}
	}
	return "true"
}

func (emr *EMRExecutionEngine) constructTolerations(executable state.Executable, run state.Run) []v1.Toleration {
	tolerations := []v1.Toleration{}

	tolerations = append(tolerations, v1.Toleration{
		Key:      "emr",
		Operator: "Equal",
		Value:    "true",
		Effect:   "NoSchedule",
	})

	if team, ok := run.Labels["team"]; ok && team != "" {
		tolerations = append(tolerations, v1.Toleration{
			Key:      team,
			Operator: "Equal",
			Value:    "true",
			Effect:   "NoSchedule",
		})
	}

	return tolerations
}

func (emr *EMRExecutionEngine) constructAffinity(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager, driver bool) *v1.Affinity {
	affinity := &v1.Affinity{}
	if ctx == nil {
		ctx = context.Background()
	}
	var requiredMatch []v1.NodeSelectorRequirement
	//todo move to config
	nodeLifecycleKey := "karpenter.sh/capacity-type"
	nodeArchKey := "kubernetes.io/arch"

	newCluster := true

	arch := []string{"amd64"}
	if run.Arch != nil && *run.Arch == "arm64" {
		arch = []string{"arm64"}
	}

	var nodeLifecycle []string
	nodePreference := "spot"
	if (run.NodeLifecycle != nil && *run.NodeLifecycle == state.OndemandLifecycle) || driver {
		nodeLifecycle = append(nodeLifecycle, "on-demand")
		nodePreference = "on-demand"
	} else {
		nodeLifecycle = append(nodeLifecycle, "spot", "on-demand")
	}

	if run.CommandHash != nil {
		nodeType, err := manager.GetNodeLifecycle(ctx, run.DefinitionID, *run.CommandHash)
		if err == nil && nodeType == state.OndemandLifecycle {
			nodeLifecycle = []string{"on-demand"}
		}
	}

	requiredMatch = append(requiredMatch, v1.NodeSelectorRequirement{
		Key:      nodeLifecycleKey,
		Operator: v1.NodeSelectorOpIn,
		Values:   nodeLifecycle,
	})

	requiredMatch = append(requiredMatch, v1.NodeSelectorRequirement{
		Key:      nodeArchKey,
		Operator: v1.NodeSelectorOpIn,
		Values:   arch,
	})

	if team, ok := run.Labels["team"]; ok && team != "" {
		requiredMatch = append(requiredMatch, v1.NodeSelectorRequirement{
			Key:      "team",
			Operator: v1.NodeSelectorOpIn,
			Values:   []string{team},
		})
		if env := os.Getenv("FLOTILLA_MODE"); env != "" {
			requiredMatch = append(requiredMatch, v1.NodeSelectorRequirement{
				Key:      "environment",
				Operator: v1.NodeSelectorOpIn,
				Values:   []string{env},
			})
		}
	}

	//todo remove conditional after migration
	_, hasTeam := run.Labels["team"]
	if newCluster && !hasTeam {
		requiredMatch = append(requiredMatch, v1.NodeSelectorRequirement{
			Key:      "emr",
			Operator: v1.NodeSelectorOpIn,
			Values:   []string{"true"},
		})
	}

	affinity = &v1.Affinity{
		NodeAffinity: &v1.NodeAffinity{
			RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
				NodeSelectorTerms: []v1.NodeSelectorTerm{
					{
						MatchExpressions: requiredMatch,
					},
				},
			},
			PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{{
				Weight: 50,
				Preference: v1.NodeSelectorTerm{
					MatchExpressions: []v1.NodeSelectorRequirement{{
						Key:      nodeLifecycleKey,
						Operator: v1.NodeSelectorOpIn,
						Values:   []string{nodePreference},
					}},
				},
			}},
		},
		PodAffinity: &v1.PodAffinity{
			PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
				{
					Weight: 40,
					PodAffinityTerm: v1.PodAffinityTerm{
						LabelSelector: &metav1.LabelSelector{
							MatchLabels: map[string]string{
								"flotilla-run-id": run.RunID},
						},
						TopologyKey: "topology.kubernetes.io/zone",
					},
				},
			},
		},
	}
	return affinity
}

func (emr *EMRExecutionEngine) estimateExecutorCount(run state.Run, manager state.Manager) state.Run {
	return run
}

// buildMetricTags creates a standard set of tags for Spark ARA metrics
func (emr *EMRExecutionEngine) buildMetricTags(run state.Run) []string {
	tags := []string{"engine:eks-spark"}
	if run.ClusterName != "" {
		tags = append(tags, fmt.Sprintf("cluster:%s", run.ClusterName))
	}
	return tags
}

func setResourceSuffix(value string) string {
	if strings.Contains(value, "g") || strings.Contains(value, "m") {
		return strings.ToUpper(value)
	}
	if strings.Contains(value, "K") {
		return strings.ToLower(value)
	}
	return value
}

func (emr *EMRExecutionEngine) estimateMemoryResources(ctx context.Context, run state.Run, manager state.Manager) state.Run {
	// Early return for NULL command_hash
	if run.CommandHash == nil {
		metricTags := emr.buildMetricTags(run)
		_ = metrics.Increment(metrics.EngineEKSARANullCommandHash, metricTags, 1)
		if emr.log != nil {
			_ = emr.log.Log(
				"level", "warn",
				"message", "Skipping Spark ARA - NULL command_hash",
				"reason", "Spark job has no command_hash (malformed)",
				"run_id", run.RunID,
				"definition_id", run.DefinitionID,
			)
		}
		return run
	}

	if ctx == nil {
		ctx = context.Background()
	}

	metricTags := emr.buildMetricTags(run)

	// Track adjustment attempt
	_ = metrics.Increment(metrics.EngineEKSARAEstimationAttempted, metricTags, 1)

	// Query for OOMs
	executorOOM, executorErr := manager.ExecutorOOM(ctx, run.DefinitionID, *run.CommandHash)
	driverOOM, driverErr := manager.DriverOOM(ctx, run.DefinitionID, *run.CommandHash)

	// Track query success/failure
	if executorErr != nil || driverErr != nil {
		var missingResource exceptions.MissingResource
		if errors.As(executorErr, &missingResource) || errors.As(driverErr, &missingResource) {
			// No historical data - expected for new jobs
			_ = metrics.Increment(metrics.EngineEKSARANoHistoricalData, metricTags, 1)
		} else {
			// Query failed with real error
			_ = metrics.Increment(metrics.EngineEKSARAEstimationFailed, metricTags, 1)
		}
	} else {
		// Query succeeded
		_ = metrics.Increment(metrics.EngineEKSARAEstimationSucceeded, metricTags, 1)
	}

	var sparkSubmitConf []state.Conf
	for _, k := range run.SparkExtension.SparkSubmitJobDriver.SparkSubmitConf {
		if *k.Name == "spark.executor.memory" && k.Value != nil {
			// 1.25x executor memory - OOM in the last 30 days
			if executorOOM {
				originalValue := *k.Value
				quantity := resource.MustParse(setResourceSuffix(originalValue))
				originalMB := quantity.Value() / (1024 * 1024) // Convert to MB
				quantity.Set(int64(float64(quantity.Value()) * 1.25))
				adjustedMB := quantity.Value() / (1024 * 1024)
				k.Value = aws.String(strings.ToLower(quantity.String()))

				// Emit metrics with component:executor tag
				executorTags := append(metricTags, "component:executor")
				_ = metrics.Increment(metrics.EngineEKSARAResourceAdjustment, executorTags, 1)
				_ = metrics.Histogram(metrics.EngineEKSARAMemoryIncreaseRatio, 1.25, executorTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARADefaultMemory, float64(originalMB), executorTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARAARAMemory, float64(adjustedMB), executorTags, 1)
				increaseMB := adjustedMB - originalMB
				_ = metrics.Distribution(metrics.EngineEKSARAMemoryIncrease, float64(increaseMB), executorTags, 1)

				// Log executor adjustment
				if emr.log != nil {
					_ = emr.log.Log(
						"level", "info",
						"message", "Spark ARA adjusted executor memory",
						"definition_id", run.DefinitionID,
						"run_id", run.RunID,
						"cluster", run.ClusterName,
						"component", "executor",
						"default_memory_mb", originalMB,
						"adjusted_memory_mb", adjustedMB,
						"increase_ratio", 1.25,
						"oom_detected", true,
					)
				}
			} else {
				quantity := resource.MustParse(setResourceSuffix(*k.Value))
				minVal := resource.MustParse("1G")
				if quantity.MilliValue() > minVal.MilliValue() {
					quantity.Set(int64(float64(quantity.Value()) * 1.0))
					k.Value = aws.String(strings.ToLower(quantity.String()))
				}
			}
		}
		if driverOOM {
			// Bump up driver by 3x, jvm memory strings
			if *k.Name == "spark.driver.memory" && k.Value != nil {
				originalValue := *k.Value
				quantity := resource.MustParse(setResourceSuffix(originalValue))
				originalMB := quantity.Value() / (1024 * 1024)
				quantity.Set(quantity.Value() * 3)
				adjustedMB := quantity.Value() / (1024 * 1024)
				k.Value = aws.String(strings.ToLower(quantity.String()))

				// Emit metrics with component:driver tag
				driverTags := append(metricTags, "component:driver")
				_ = metrics.Increment(metrics.EngineEKSARAResourceAdjustment, driverTags, 1)
				_ = metrics.Histogram(metrics.EngineEKSARAMemoryIncreaseRatio, 3.0, driverTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARADefaultMemory, float64(originalMB), driverTags, 1)
				_ = metrics.Distribution(metrics.EngineEKSARAARAMemory, float64(adjustedMB), driverTags, 1)
				increaseMB := adjustedMB - originalMB
				_ = metrics.Distribution(metrics.EngineEKSARAMemoryIncrease, float64(increaseMB), driverTags, 1)

				// Log driver adjustment
				if emr.log != nil {
					_ = emr.log.Log(
						"level", "info",
						"message", "Spark ARA adjusted driver memory",
						"definition_id", run.DefinitionID,
						"run_id", run.RunID,
						"cluster", run.ClusterName,
						"component", "driver",
						"default_memory_mb", originalMB,
						"adjusted_memory_mb", adjustedMB,
						"increase_ratio", 3.0,
						"oom_detected", true,
					)
				}
			}
		}
		sparkSubmitConf = append(sparkSubmitConf, state.Conf{Name: k.Name, Value: k.Value})
	}
	run.SparkExtension.SparkSubmitJobDriver.SparkSubmitConf = sparkSubmitConf
	return run
}

func (emr *EMRExecutionEngine) sparkSubmitParams(run state.Run) *string {
	var buffer bytes.Buffer
	buffer.WriteString(fmt.Sprintf(" --name %s", run.RunID))

	for _, k := range run.SparkExtension.SparkSubmitJobDriver.SparkSubmitConf {
		buffer.WriteString(fmt.Sprintf(" --conf %s=%s", *k.Name, *k.Value))
	}

	buffer.WriteString(fmt.Sprintf(" --conf %s=%s", "spark.kubernetes.executor.podNamePrefix", run.RunID))
	buffer.WriteString(fmt.Sprintf(" --conf spark.log4j.rootLogger=DEBUG"))
	buffer.WriteString(fmt.Sprintf(" --conf spark.log4j.rootCategory=DEBUG"))

	if run.SparkExtension.SparkSubmitJobDriver.Class != nil {
		buffer.WriteString(fmt.Sprintf(" --class %s", *run.SparkExtension.SparkSubmitJobDriver.Class))
	}

	if len(run.SparkExtension.SparkSubmitJobDriver.Files) > 0 {
		files := strings.Join(run.SparkExtension.SparkSubmitJobDriver.Files, ",")
		buffer.WriteString(fmt.Sprintf(" --files %s", files))
	}

	if len(run.SparkExtension.SparkSubmitJobDriver.PyFiles) > 0 {
		files := strings.Join(run.SparkExtension.SparkSubmitJobDriver.PyFiles, ",")
		buffer.WriteString(fmt.Sprintf(" --py-files %s", files))
	}

	if len(run.SparkExtension.SparkSubmitJobDriver.Jars) > 0 {
		jars := strings.Join(run.SparkExtension.SparkSubmitJobDriver.Jars, ",")
		buffer.WriteString(fmt.Sprintf(" --jars %s", jars))
	}

	return aws.String(buffer.String())
}

func (emr *EMRExecutionEngine) Terminate(ctx context.Context, run state.Run) error {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_terminate", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	if run.Status == state.StatusStopped {
		return errors.New("Run is already in a stopped state.")
	}

	cancelJobRunInput := emrcontainers.CancelJobRunInput{
		Id:               run.SparkExtension.EMRJobId,
		VirtualClusterId: run.SparkExtension.VirtualClusterId,
	}
	tierTag := fmt.Sprintf("tier:%s", run.Tier)

	key := aws.String(fmt.Sprintf("%s/%s/%s.json", emr.s3ManifestBasePath, run.RunID, "cancel-job-run-input"))
	obj, err := json.Marshal(cancelJobRunInput)
	if err == nil {
		emr.writeStringToS3(key, obj)
	}

	_, err = emr.emrContainersClient.CancelJobRun(&cancelJobRunInput)
	if err != nil {
		_ = metrics.Increment(metrics.EngineEMRTerminate, []string{string(metrics.StatusFailure), tierTag}, 1)
		_ = emr.log.Log("level", "error", "message", "EMR job termination error", "error", err.Error())
	}
	_ = metrics.Increment(metrics.EngineEMRTerminate, []string{string(metrics.StatusSuccess), tierTag}, 1)

	return err
}

func (emr *EMRExecutionEngine) Enqueue(ctx context.Context, run state.Run) error {
	var span tracer.Span
	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_enqueue", "")
	defer span.Finish()
	span.SetTag("job.run_id", run.RunID)
	span.SetTag("job.tier", run.Tier)
	utils.TagJobRun(span, run)
	tierTag := fmt.Sprintf("tier:%s", run.Tier)
	qurl, err := emr.sqsQueueManager.QurlFor(emr.emrJobQueue, false)
	if err != nil {
		_ = metrics.Increment(metrics.EngineEMREnqueue, []string{string(metrics.StatusFailure), tierTag}, 1)
		_ = emr.log.Log("level", "error", "message", "EMR job enqueue error", "error", err.Error())
		return errors.Wrapf(err, "problem getting queue url for [%s]", run.ClusterName)
	}

	// Queue run
	if err = emr.sqsQueueManager.Enqueue(ctx, qurl, run); err != nil {
		_ = metrics.Increment(metrics.EngineEMREnqueue, []string{string(metrics.StatusFailure), tierTag}, 1)
		_ = emr.log.Log("level", "error", "message", "EMR job enqueue error", "error", err.Error())
		return errors.Wrapf(err, "problem enqueing run [%s] to queue [%s]", run.RunID, qurl)
	}

	_ = metrics.Increment(metrics.EngineEMREnqueue, []string{string(metrics.StatusSuccess), tierTag}, 1)
	return nil
}

func (emr *EMRExecutionEngine) PollRuns(ctx context.Context) ([]RunReceipt, error) {
	qurl, err := emr.sqsQueueManager.QurlFor(emr.emrJobQueue, false)
	if err != nil {
		return nil, errors.Wrap(err, "problem listing queues to poll")
	}
	queues := []string{qurl}
	var runs []RunReceipt
	for _, qurl := range queues {
		//
		// Get new queued Run
		//
		runReceipt, err := emr.sqsQueueManager.ReceiveRun(ctx, qurl)

		if err != nil {
			return runs, errors.Wrapf(err, "problem receiving run from queue url [%s]", qurl)
		}

		if runReceipt.Run == nil {
			continue
		}

		runs = append(runs, RunReceipt{
			RunReceipt:       runReceipt,
			TraceID:          runReceipt.TraceID,
			ParentID:         runReceipt.ParentID,
			SamplingPriority: runReceipt.SamplingPriority,
		})
	}
	return runs, nil
}

func (emr *EMRExecutionEngine) PollStatus(ctx context.Context) (RunReceipt, error) {
	return RunReceipt{}, nil
}

func (emr *EMRExecutionEngine) PollRunStatus(ctx context.Context) (state.Run, error) {
	return state.Run{}, nil
}

func (emr *EMRExecutionEngine) Define(ctx context.Context, td state.Definition) (state.Definition, error) {
	return td, nil
}

func (emr *EMRExecutionEngine) Deregister(ctx context.Context, definition state.Definition) error {
	return errors.Errorf("EMRExecutionEngine does not allow for deregistering of task definitions.")
}

func (emr *EMRExecutionEngine) Get(ctx context.Context, run state.Run) (state.Run, error) {
	if ctx == nil {
		ctx = context.Background()
	}
	return run, nil
}

func (emr *EMRExecutionEngine) GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_get_events", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	return state.PodEventList{}, nil
}

func (emr *EMRExecutionEngine) FetchPodMetrics(ctx context.Context, run state.Run) (state.Run, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_fetch_metrics", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	return run, nil
}

func (emr *EMRExecutionEngine) FetchUpdateStatus(ctx context.Context, run state.Run) (state.Run, error) {
	var span tracer.Span
	if ctx == nil {
		ctx = context.Background()
	}

	ctx, span = utils.TraceJob(ctx, "flotilla.job.emr_fetch_status", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	return run, nil
}
func (emr *EMRExecutionEngine) lakekeeperSecretEnvVars() []v1.EnvVar {
	if emr.lakekeeperSecretName == "" {
		return nil
	}
	return []v1.EnvVar{
		{
			Name: "OAUTH2_CLIENT_ID",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "client_id",
					Optional:             aws.Bool(true),
				},
			},
		},
		{
			Name: "OAUTH2_CLIENT_SECRET",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "client_secret",
					Optional:             aws.Bool(true),
				},
			},
		},
		{
			Name: "OAUTH2_SERVER_URI",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "token_url",
					Optional:             aws.Bool(true),
				},
			},
		},
		{
			Name: "OAUTH2_SCOPE",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "scope",
					Optional:             aws.Bool(true),
				},
			},
		},
		{
			Name: "CATALOG_URI",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "uri",
					Optional:             aws.Bool(true),
				},
			},
		},
		{
			Name: "WAREHOUSE",
			ValueFrom: &v1.EnvVarSource{
				SecretKeyRef: &v1.SecretKeySelector{
					LocalObjectReference: v1.LocalObjectReference{Name: emr.lakekeeperSecretName},
					Key:                  "warehouse",
					Optional:             aws.Bool(true),
				},
			},
		},
	}
}

func (emr *EMRExecutionEngine) envOverrides(executable state.Executable, run state.Run) []v1.EnvVar {
	pairs := make(map[string]string)
	resources := executable.GetExecutableResources()

	if resources.Env != nil && len(*resources.Env) > 0 {
		for _, ev := range *resources.Env {
			name := emr.sanitizeEnvVar(ev.Name)
			value := ev.Value
			pairs[name] = value
		}
	}

	if run.Env != nil && len(*run.Env) > 0 {
		for _, ev := range *run.Env {
			name := emr.sanitizeEnvVar(ev.Name)
			value := ev.Value
			pairs[name] = value
		}
	}

	var res []v1.EnvVar
	for key := range pairs {
		if len(key) > 0 {
			res = append(res, v1.EnvVar{
				Name:  key,
				Value: pairs[key],
			})
		}
	}

	return res
}

func (emr *EMRExecutionEngine) sanitizeEnvVar(key string) string {
	// Environment variable can't start with emr $
	if strings.HasPrefix(key, "$") {
		key = strings.Replace(key, "$", "", 1)
	}
	// Environment variable names can't contain spaces.
	key = strings.Replace(key, " ", "", -1)
	return key
}

func (emr *EMRExecutionEngine) constructCmdSlice(command *string) []string {
	cmdString := ""
	if command != nil {
		cmdString = *command
	}
	bashCmd := "bash"
	optLogin := "-l"
	optStr := "-ce"
	return []string{bashCmd, optLogin, optStr, cmdString}
}


================================================
FILE: execution/engine/engine.go
================================================
package engine

import (
	"context"
	"fmt"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
)

// Engine defines the execution engine interface.
type Engine interface {
	Initialize(conf config.Config) error
	Execute(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) (state.Run, bool, error)
	Terminate(ctx context.Context, run state.Run) error
	Enqueue(ctx context.Context, run state.Run) error
	PollRuns(ctx context.Context) ([]RunReceipt, error)
	PollRunStatus(ctx context.Context) (state.Run, error)
	PollStatus(ctx context.Context) (RunReceipt, error)
	GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error)
	FetchUpdateStatus(ctx context.Context, run state.Run) (state.Run, error)
	FetchPodMetrics(ctx context.Context, run state.Run) (state.Run, error)
	// Legacy methods from the ECS era. Here for backwards compatibility.
	Define(ctx context.Context, definition state.Definition) (state.Definition, error)
	Deregister(ctx context.Context, definition state.Definition) error
}

type RunReceipt struct {
	queue.RunReceipt
	TraceID          uint64
	ParentID         uint64
	SamplingPriority int
}

// NewExecutionEngine initializes and returns a new Engine
func NewExecutionEngine(conf config.Config, qm queue.Manager, name string, logger log.Logger, clusterManager *DynamicClusterManager, stateManager state.Manager) (Engine, error) {
	switch name {
	case state.EKSEngine:
		eksEng := &EKSExecutionEngine{qm: qm, log: logger, clusterManager: clusterManager, stateManager: stateManager}
		if err := eksEng.Initialize(conf); err != nil {
			return nil, errors.Wrap(err, "problem initializing EKSExecutionEngine")
		}
		return eksEng, nil
	case state.EKSSparkEngine:
		emrEng := &EMRExecutionEngine{sqsQueueManager: qm, log: logger, clusterManager: clusterManager, stateManager: stateManager}
		if err := emrEng.Initialize(conf); err != nil {
			return nil, errors.Wrap(err, "problem initializing EMRExecutionEngine")
		}
		return emrEng, nil
	default:
		return nil, fmt.Errorf("no Engine named [%s] was found", name)
	}
}


================================================
FILE: flotilla/app.go
================================================
package flotilla

import (
	"context"
	"github.com/stitchfix/flotilla-os/clients/middleware"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/utils"
	"net/http"
	"strings"
	"time"

	"github.com/pkg/errors"
	"github.com/rs/cors"
	"github.com/stitchfix/flotilla-os/clients/cluster"
	"github.com/stitchfix/flotilla-os/clients/logs"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/services"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/worker"
)

type App struct {
	address            string
	mode               string
	corsAllowedOrigins []string
	logger             flotillaLog.Logger
	readTimeout        time.Duration
	writeTimeout       time.Duration
	handler            http.Handler
	workerManager      worker.Worker
}

// Start the Application.
func (app *App) Run() error {
	srv := &http.Server{
		Addr:         app.address,
		Handler:      app.handler,
		ReadTimeout:  app.readTimeout,
		WriteTimeout: app.writeTimeout,
	}
	// Start worker manager's run goroutine.
	app.workerManager.GetTomb().Go(func() error {
		ctx, span := utils.TraceJob(context.Background(), "worker_manager.run", "startup")
		defer span.Finish()
		return app.workerManager.Run(ctx)
	})
	return srv.ListenAndServe()
}

// Function to initialize a new Flotilla app.
func NewApp(conf config.Config,
	log flotillaLog.Logger,
	eksLogsClient logs.Client,
	eksExecutionEngine engine.Engine,
	stateManager state.Manager,
	eksClusterClient cluster.Client,
	eksQueueManager queue.Manager,
	emrExecutionEngine engine.Engine,
	emrQueueManager queue.Manager,
	middlewareClient middleware.Client,
	clusterManager *engine.DynamicClusterManager,
) (App, error) {
	var app App
	app.logger = log
	app.configure(conf)

	executionService, err := services.NewExecutionService(conf, eksExecutionEngine, stateManager, eksClusterClient, emrExecutionEngine)
	if err != nil {
		return app, errors.Wrap(err, "problem initializing execution service")
	}
	templateService, err := services.NewTemplateService(conf, stateManager)
	if err != nil {
		return app, errors.Wrap(err, "problem initializing template service")
	}
	eksLogService, err := services.NewLogService(stateManager, eksLogsClient)
	if err != nil {
		return app, errors.Wrap(err, "problem initializing eks log service")
	}
	workerService, err := services.NewWorkerService(conf, stateManager)
	if err != nil {
		return app, errors.Wrap(err, "problem initializing worker service")
	}
	definitionService, err := services.NewDefinitionService(stateManager)
	if err != nil {
		return app, errors.Wrap(err, "problem initializing definition service")
	}
	ep := endpoints{
		executionService:  executionService,
		eksLogService:     eksLogService,
		workerService:     workerService,
		templateService:   templateService,
		logger:            log,
		middlewareClient:  middlewareClient,
		definitionService: definitionService,
	}

	app.configureRoutes(ep)
	if err = app.initializeEKSWorkers(conf, log, eksExecutionEngine, emrExecutionEngine, stateManager, eksQueueManager, clusterManager); err != nil {
		return app, errors.Wrap(err, "problem eks initializing workers")
	}

	return app, nil
}

func (app *App) configure(conf config.Config) {
	app.address = conf.GetString("http_server_listen_address")
	if len(app.address) == 0 {
		app.address = ":5000"
	}

	readTimeout := conf.GetInt("http_server_read_timeout_seconds")
	if readTimeout == 0 {
		readTimeout = 5
	}
	writeTimeout := conf.GetInt("http_server_write_timeout_seconds")
	if writeTimeout == 0 {
		writeTimeout = 10
	}
	app.readTimeout = time.Duration(readTimeout) * time.Second
	app.writeTimeout = time.Duration(writeTimeout) * time.Second

	app.mode = conf.GetString("flotilla_mode")
	app.corsAllowedOrigins = strings.Split(conf.GetString("http_server_cors_allowed_origins"), ",")
}

func (app *App) configureRoutes(ep endpoints) {
	router := NewRouter(ep)
	c := cors.New(cors.Options{
		AllowedOrigins: app.corsAllowedOrigins,
		AllowedMethods: []string{"GET", "DELETE", "POST", "PUT"},
	})
	app.handler = c.Handler(router)
}

func (app *App) initializeEKSWorkers(
	conf config.Config,
	log flotillaLog.Logger,
	ee engine.Engine,
	emr engine.Engine,
	sm state.Manager,
	qm queue.Manager,
	clusterManager *engine.DynamicClusterManager) error {
	workerManager, err := worker.NewWorker("worker_manager", log, conf, ee, emr, sm, qm, clusterManager)
	_ = app.logger.Log("level", "info", "message", "Starting worker", "name", "worker_manager")
	if err != nil {
		return errors.Wrapf(err, "problem initializing worker with name [%s]", "worker_manager")
	}
	app.workerManager = workerManager
	return nil
}

func (app *App) initializeEMRWorkers(
	conf config.Config,
	log flotillaLog.Logger,
	ee engine.Engine,
	emr engine.Engine,
	sm state.Manager,
	qm queue.Manager,
	clusterManager *engine.DynamicClusterManager) error {
	workerManager, err := worker.NewWorker("worker_manager", log, conf, ee, emr, sm, qm, clusterManager)
	_ = app.logger.Log("level", "info", "message", "Starting worker", "name", "worker_manager")
	if err != nil {
		return errors.Wrapf(err, "problem initializing worker with name [%s]", "worker_manager")
	}
	app.workerManager = workerManager
	return nil
}


================================================
FILE: flotilla/endpoints.go
================================================
package flotilla

import (
	"encoding/json"
	"fmt"
	"net/http"
	"net/url"
	"strconv"
	"strings"

	"github.com/gorilla/mux"
	"github.com/stitchfix/flotilla-os/clients/middleware"
	"github.com/stitchfix/flotilla-os/exceptions"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/services"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/utils"
)

type endpoints struct {
	executionService  services.ExecutionService
	definitionService services.DefinitionService
	templateService   services.TemplateService
	eksLogService     services.LogService
	workerService     services.WorkerService
	middlewareClient  middleware.Client
	logger            flotillaLog.Logger
}

type listRequest struct {
	limit      int
	offset     int
	sortBy     string
	order      string
	filters    map[string][]string
	envFilters map[string]string
}

func (ep *endpoints) getURLParam(v url.Values, key string, defaultValue string) string {
	val, ok := v[key]
	if ok && len(val) > 0 {
		return val[0]
	}
	return defaultValue
}

func (ep *endpoints) getFilters(params url.Values, nonFilters map[string]bool) (map[string][]string, map[string]string) {
	filters := make(map[string][]string)
	envFilters := make(map[string]string)
	for k, v := range params {
		if !nonFilters[k] && len(v) > 0 {
			// Env filters have the "env" key and are "|" separated key-value pairs
			//
			// eg. env=FOO|BAR&env=CUPCAKE|SPRINKLES
			//
			if k == "env" {
				for _, kv := range v {
					split := strings.Split(kv, "|")
					if len(split) == 2 {
						envFilters[split[0]] = split[1]
					}
				}
			} else {
				filters[k] = v
			}
		}
	}
	return filters, envFilters
}

func (ep *endpoints) decodeListRequest(r *http.Request) listRequest {
	var lr listRequest
	params := r.URL.Query()

	lr.limit, _ = strconv.Atoi(ep.getURLParam(params, "limit", "1024"))
	lr.offset, _ = strconv.Atoi(ep.getURLParam(params, "offset", "0"))
	lr.sortBy = ep.getURLParam(params, "sort_by", "group_name")
	lr.order = ep.getURLParam(params, "order", "asc")
	lr.filters, lr.envFilters = ep.getFilters(params, map[string]bool{
		"limit":   true,
		"offset":  true,
		"sort_by": true,
		"order":   true,
	})
	return lr
}

// Note: the difference between this method and `decodeListRequest` is that
// this method does not assume that all entities can be sorted by `group_name`.
// Instead, it relies on the IOrderable interface's DefaultOrderField method.
func (ep *endpoints) decodeOrderableListRequest(r *http.Request, orderable state.IOrderable) listRequest {
	var lr listRequest
	params := r.URL.Query()

	lr.limit, _ = strconv.Atoi(ep.getURLParam(params, "limit", "1024"))
	lr.offset, _ = strconv.Atoi(ep.getURLParam(params, "offset", "0"))
	lr.sortBy = ep.getURLParam(params, "sort_by", orderable.DefaultOrderField())
	lr.order = ep.getURLParam(params, "order", "asc")
	lr.filters, lr.envFilters = ep.getFilters(params, map[string]bool{
		"limit":   true,
		"offset":  true,
		"sort_by": true,
		"order":   true,
	})
	return lr
}

func (ep *endpoints) decodeRequest(r *http.Request, entity interface{}) error {
	return json.NewDecoder(r.Body).Decode(entity)
}

func (ep endpoints) encodeError(w http.ResponseWriter, err error) {
	w.Header().Set("Content-Type", "application/json; charset=utf-8")
	switch err.(type) {
	case exceptions.MalformedInput:
		w.WriteHeader(http.StatusBadRequest)
	case exceptions.ConflictingResource:
		w.WriteHeader(http.StatusConflict)
	case exceptions.MissingResource:
		w.WriteHeader(http.StatusNotFound)
	default:
		w.WriteHeader(http.StatusInternalServerError)
	}
	_ = json.NewEncoder(w).Encode(map[string]interface{}{
		"error": err.Error(),
	})
}

func (ep *endpoints) encodeResponse(w http.ResponseWriter, response interface{}) {
	w.Header().Set("Content-Type", "application/json; charset=utf-8")
	_ = json.NewEncoder(w).Encode(response)
}

func (ep *endpoints) ListDefinitions(w http.ResponseWriter, r *http.Request) {
	lr := ep.decodeListRequest(r)

	definitionList, err := ep.definitionService.List(
		r.Context(), lr.limit, lr.offset, lr.sortBy, lr.order, lr.filters, lr.envFilters)
	if definitionList.Definitions == nil {
		definitionList.Definitions = []state.Definition{}
	}
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem listing definitions",
			"operation", "ListDefinitions",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		response := make(map[string]interface{})
		response["total"] = definitionList.Total
		response["definitions"] = definitionList.Definitions
		response["limit"] = lr.limit
		response["offset"] = lr.offset
		response["sort_by"] = lr.sortBy
		response["order"] = lr.order
		response["env_filters"] = lr.envFilters
		for k, v := range lr.filters {
			response[k] = v
		}
		ep.encodeResponse(w, response)
	}
}

// Fetches definition from DB using definition id.
func (ep *endpoints) GetDefinition(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	definition, err := ep.definitionService.Get(r.Context(), vars["definition_id"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting definitions",
			"operation", "GetDefinition",
			"error", fmt.Sprintf("%+v", err),
			"definition_id", vars["definition_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, definition)
	}
}

// Fetches definition from DB using definition alias.
func (ep *endpoints) GetDefinitionByAlias(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	definition, err := ep.definitionService.GetByAlias(r.Context(), vars["alias"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting definition by alias",
			"operation", "GetDefinitionByAlias",
			"error", fmt.Sprintf("%+v", err),
			"alias", vars["alias"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, definition)
	}
}

// Creates new definition.
func (ep *endpoints) CreateDefinition(w http.ResponseWriter, r *http.Request) {
	var definition state.Definition
	err := ep.decodeRequest(r, &definition)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	created, err := ep.definitionService.Create(r.Context(), &definition)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating definition",
			"operation", "CreateDefinition",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, created)
	}
}

// Updates existing definition.
func (ep *endpoints) UpdateDefinition(w http.ResponseWriter, r *http.Request) {
	var definition state.Definition
	err := ep.decodeRequest(r, &definition)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	vars := mux.Vars(r)
	updated, err := ep.definitionService.Update(r.Context(), vars["definition_id"], definition)

	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem updating definition",
			"operation", "UpdateDefinition",
			"error", fmt.Sprintf("%+v", err),
			"definition_id", vars["definition_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, updated)
	}
}

// Deletes a defiition.
func (ep *endpoints) DeleteDefinition(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	err := ep.definitionService.Delete(r.Context(), vars["definition_id"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem deleting definition",
			"operation", "DeleteDefinition",
			"error", fmt.Sprintf("%+v", err),
			"definition_id", vars["definition_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, map[string]bool{"deleted": true})
	}
}

// List all runs, supports filtering based on environment variables.
// ListRequest is object used here to construct the query.
func (ep *endpoints) ListRuns(w http.ResponseWriter, r *http.Request) {
	lr := ep.decodeListRequest(r)
	runList, err := ep.executionService.List(r.Context(), lr.limit, lr.offset, lr.order, lr.sortBy, lr.filters, lr.envFilters)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem listing runs",
			"operation", "ListRuns",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		response := make(map[string]interface{})
		response["total"] = runList.Total
		response["history"] = runList.Runs
		response["limit"] = lr.limit
		response["offset"] = lr.offset
		response["sort_by"] = lr.sortBy
		response["order"] = lr.order
		response["env_filters"] = lr.envFilters
		for k, v := range lr.filters {
			response[k] = v
		}
		ep.encodeResponse(w, response)
	}
}

// List runs for a definition ID.
func (ep *endpoints) ListDefinitionRuns(w http.ResponseWriter, r *http.Request) {
	lr := ep.decodeListRequest(r)

	vars := mux.Vars(r)
	definitionID, ok := vars["definition_id"]
	if ok {
		lr.filters["definition_id"] = []string{definitionID}
	}

	runList, err := ep.executionService.List(r.Context(), lr.limit, lr.offset, lr.order, lr.sortBy, lr.filters, lr.envFilters)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem listing definition runs",
			"operation", "ListDefinitionRuns",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		response := ep.createListRunsResponse(runList, lr)
		ep.encodeResponse(w, response)
	}
}

// List runs based on a template id.
func (ep *endpoints) ListTemplateRuns(w http.ResponseWriter, r *http.Request) {
	lr := ep.decodeListRequest(r)

	vars := mux.Vars(r)
	tplID, ok := vars["template_id"]
	if ok {
		lr.filters["executable_id"] = []string{tplID}
	}

	runList, err := ep.executionService.List(r.Context(), lr.limit, lr.offset, lr.order, lr.sortBy, lr.filters, lr.envFilters)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem listing runs for template",
			"operation", "ListTemplateRuns",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		response := ep.createListRunsResponse(runList, lr)
		ep.encodeResponse(w, response)
	}
}

func (ep *endpoints) createListRunsResponse(runList state.RunList, req listRequest) map[string]interface{} {
	response := make(map[string]interface{})
	response["total"] = runList.Total
	response["history"] = runList.Runs
	response["limit"] = req.limit
	response["offset"] = req.offset
	response["sort_by"] = req.sortBy
	response["order"] = req.order
	response["env_filters"] = req.envFilters
	for k, v := range req.filters {
		response[k] = v
	}
	return response
}

// Fetches a run based on Run ID.
func (ep *endpoints) GetRun(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	run, err := ep.executionService.Get(r.Context(), vars["run_id"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting run",
			"operation", "GetRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// Fetches a run based on Run ID.
func (ep *endpoints) GetPayload(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	run, err := ep.executionService.Get(r.Context(), vars["run_id"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting run",
			"operation", "GetRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
		ep.encodeError(w, err)
	} else {
		if run.ExecutionRequestCustom != nil {
			ep.encodeResponse(w, run.ExecutionRequestCustom)
		} else {
			ep.encodeResponse(w, map[string]string{})
		}
	}
}

// Creates a new Run (deprecated). Only present for legacy support.
func (ep *endpoints) CreateRun(w http.ResponseWriter, r *http.Request) {
	var lr state.LaunchRequest
	err := ep.decodeRequest(r, &lr)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	vars := mux.Vars(r)
	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Env:              lr.Env,
			OwnerID:          "v1-unknown",
			Command:          nil,
			Memory:           nil,
			Cpu:              nil,
			Gpu:              nil,
			Engine:           &state.DefaultEngine,
			EphemeralStorage: nil,
			NodeLifecycle:    nil,
			CommandHash:      nil,
			Tier:             lr.Tier,
		},
	}
	run, err := ep.executionService.CreateDefinitionRunByDefinitionID(r.Context(), vars["definition_id"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating run",
			"operation", "CreateRun",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// Creates a new Run (deprecated). Only present for legacy support.
func (ep *endpoints) CreateRunV2(w http.ResponseWriter, r *http.Request) {
	var lr state.LaunchRequestV2
	err := ep.decodeRequest(r, &lr)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	err = ep.middlewareClient.AnnotateLaunchRequest(&r.Header, &lr)
	if err != nil {
		ep.encodeError(w, err)
		return
	}

	// check if OwnerEmail is present in lr.EventLabels
	if len(lr.RunTags.OwnerEmail) == 0 || len(lr.RunTags.TeamName) == 0 {
		ep.encodeError(w, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("run_tags must exist in body and contain [owner_email] and [team_name]")})
		return
	}

	vars := mux.Vars(r)
	if lr.Engine == nil {
		if lr.SparkExtension != nil {
			lr.Engine = &state.EKSSparkEngine
		} else {
			lr.Engine = &state.EKSEngine
		}
	}

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Env:              lr.Env,
			OwnerID:          lr.RunTags.OwnerEmail,
			Command:          nil,
			Memory:           nil,
			Cpu:              nil,
			Gpu:              nil,
			Engine:           lr.Engine,
			EphemeralStorage: nil,
			NodeLifecycle:    nil,
			SparkExtension:   lr.SparkExtension,
			Description:      lr.Description,
			CommandHash:      lr.CommandHash,
			IdempotenceKey:   lr.IdempotenceKey,
			Arch:             lr.Arch,
			Labels:           lr.Labels,
			ServiceAccount:   lr.ServiceAccount,
			Tier:             lr.Tier,
		},
	}
	run, err := ep.executionService.CreateDefinitionRunByDefinitionID(r.Context(), vars["definition_id"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating V2 run",
			"operation", "CreateRunV2",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// Creates a new Run.
func (ep *endpoints) CreateRunV4(w http.ResponseWriter, r *http.Request) {
	var lr state.LaunchRequestV2
	err := ep.decodeRequest(r, &lr)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}
	err = ep.middlewareClient.AnnotateLaunchRequest(&r.Header, &lr)
	if err != nil {
		ep.encodeError(w, err)
		return
	}
	if len(lr.RunTags.OwnerID) == 0 {
		ep.encodeError(w, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("run_tags must exist in body and contain [owner_id]")})
		return
	}
	if lr.Engine == nil {
		if lr.SparkExtension != nil {
			lr.Engine = &state.EKSSparkEngine
		} else {
			lr.Engine = &state.EKSEngine
		}
	}

	if lr.NodeLifecycle != nil {
		if !utils.StringSliceContains(state.NodeLifeCycles, *lr.NodeLifecycle) {
			ep.encodeError(w, exceptions.MalformedInput{
				ErrorString: fmt.Sprintf("Nodelifecyle must be [normal, spot]")})
			return
		}
	} else {
		lr.NodeLifecycle = &state.DefaultLifecycle
	}
	vars := mux.Vars(r)
	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Env:                   lr.Env,
			OwnerID:               lr.RunTags.OwnerID,
			Command:               lr.Command,
			Memory:                lr.Memory,
			Cpu:                   lr.Cpu,
			Gpu:                   lr.Gpu,
			EphemeralStorage:      lr.EphemeralStorage,
			Engine:                lr.Engine,
			NodeLifecycle:         lr.NodeLifecycle,
			ActiveDeadlineSeconds: lr.ActiveDeadlineSeconds,
			SparkExtension:        lr.SparkExtension,
			Description:           lr.Description,
			CommandHash:           lr.CommandHash,
			IdempotenceKey:        lr.IdempotenceKey,
			Arch:                  lr.Arch,
			Labels:                lr.Labels,
			ServiceAccount:        lr.ServiceAccount,
			Tier:                  lr.Tier,
		},
	}

	run, err := ep.executionService.CreateDefinitionRunByDefinitionID(r.Context(), vars["definition_id"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating V4 run",
			"operation", "CreateRunV4",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// Creates a new Run based on definition alias.
func (ep *endpoints) CreateRunByAlias(w http.ResponseWriter, r *http.Request) {
	var lr state.LaunchRequestV2
	err := ep.decodeRequest(r, &lr)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	err = ep.middlewareClient.AnnotateLaunchRequest(&r.Header, &lr)
	if err != nil {
		ep.encodeError(w, err)
		return
	}

	if len(lr.RunTags.OwnerID) == 0 {
		ep.encodeError(w, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("run_tags must exist in body and contain [owner_id]")})
		return
	}

	if lr.Engine == nil || *lr.Engine == "ecs" {
		if lr.SparkExtension != nil {
			lr.Engine = &state.EKSSparkEngine
		} else {
			lr.Engine = &state.EKSEngine
		}
	}

	if lr.NodeLifecycle != nil {
		if !utils.StringSliceContains(state.NodeLifeCycles, *lr.NodeLifecycle) {
			ep.encodeError(w, exceptions.MalformedInput{
				ErrorString: fmt.Sprintf("Nodelifecyle must be [normal, spot]")})
			return
		}
	} else {
		lr.NodeLifecycle = &state.DefaultLifecycle
	}

	vars := mux.Vars(r)
	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Env:                   lr.Env,
			OwnerID:               lr.RunTags.OwnerID,
			Command:               lr.Command,
			Memory:                lr.Memory,
			Cpu:                   lr.Cpu,
			Gpu:                   lr.Gpu,
			EphemeralStorage:      lr.EphemeralStorage,
			Engine:                lr.Engine,
			NodeLifecycle:         lr.NodeLifecycle,
			ActiveDeadlineSeconds: lr.ActiveDeadlineSeconds,
			SparkExtension:        lr.SparkExtension,
			Description:           lr.Description,
			CommandHash:           lr.CommandHash,
			IdempotenceKey:        lr.IdempotenceKey,
			Arch:                  lr.Arch,
			Labels:                lr.Labels,
			ServiceAccount:        lr.ServiceAccount,
			Tier:                  lr.Tier,
		},
	}
	run, err := ep.executionService.CreateDefinitionRunByAlias(r.Context(), vars["alias"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating run alias",
			"operation", "CreateRunByAlias",
			"error", fmt.Sprintf("%+v", err),
			"alias", vars["alias"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// Stops a run based on run ID.
func (ep *endpoints) StopRun(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	userInfo := ep.ExtractUserInfo(r)
	err := ep.executionService.Terminate(r.Context(), vars["run_id"], userInfo)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem stopping run",
			"operation", "StopRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
	}
	ep.encodeResponse(w, map[string]bool{"terminated": true})
}

// Extracts user info if present in the headers.s
func (ep *endpoints) ExtractUserInfo(r *http.Request) state.UserInfo {
	var userInfo state.UserInfo
	for name, headers := range r.Header {
		name = strings.ToLower(name)
		for _, h := range headers {

			if strings.Contains(name, "-name") {
				userInfo.Name = h
			}

			if strings.Contains(name, "-email") {
				userInfo.Email = h
			}
		}
	}
	return userInfo
}

// Update an existing run.
func (ep *endpoints) UpdateRun(w http.ResponseWriter, r *http.Request) {
	var run state.Run
	err := ep.decodeRequest(r, &run)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	vars := mux.Vars(r)
	err = ep.executionService.UpdateStatus(r.Context(), vars["run_id"], run.Status, run.ExitCode, run.RunExceptions, run.ExitReason)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem updating run",
			"operation", "UpdateRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, map[string]bool{"updated": true})
	}
}

// Get Pod Events (EKS only) for a run ID.
func (ep *endpoints) GetEvents(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	run, err := ep.executionService.Get(r.Context(), vars["run_id"])

	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting run",
			"operation", "GetRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
		ep.encodeError(w, err)
		return
	}
	var podEventList state.PodEventList
	if run.PodEvents != nil {
		podEventList.Total = len(*run.PodEvents)
		podEventList.PodEvents = *run.PodEvents
	} else {
		// If run doesn't have PodEvents in the cached record, fetch them
		podEventList, _ = ep.executionService.GetEvents(r.Context(), run)
	}
	ep.encodeResponse(w, podEventList)

}

// Get logs for a run.
func (ep *endpoints) GetLogs(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	params := r.URL.Query()

	lastSeen := ep.getURLParam(params, "last_seen", "")
	rawText := ep.getStringBoolVal(ep.getURLParam(params, "raw_text", ""))
	run, err := ep.executionService.Get(r.Context(), vars["run_id"])
	role := ep.getURLParam(params, "role", "driver")
	facility := ep.getURLParam(params, "facility", "stderr")

	if err != nil {
		_ = ep.logger.Log(
			"message", "problem getting run",
			"operation", "GetRun",
			"error", fmt.Sprintf("%+v", err),
			"run_id", vars["run_id"])
		ep.encodeError(w, err)
		return
	}

	if run.Engine == nil {
		run.Engine = &state.DefaultEngine
	}

	if rawText == true {
		_ = ep.eksLogService.LogsText(vars["run_id"], w)
	} else {
		log, newLastSeen, err := ep.eksLogService.Logs(vars["run_id"], &lastSeen, &role, &facility)

		res := map[string]string{
			"log":       "",
			"last_seen": lastSeen,
		}

		if err == nil {
			res = map[string]string{
				"log":       log,
				"last_seen": *newLastSeen,
			}
		}

		ep.encodeResponse(w, res)
	}
}

// Get list of groups.
func (ep *endpoints) GetGroups(w http.ResponseWriter, r *http.Request) {
	response := make(map[string]interface{})
	response["total"] = 0
	response["groups"] = []string{}
	ep.encodeResponse(w, response)
}

// Get listing of tags.
func (ep *endpoints) GetTags(w http.ResponseWriter, r *http.Request) {
	response := make(map[string]interface{})
	response["total"] = 0
	response["tags"] = []string{}
	ep.encodeResponse(w, response)
}

func (ep *endpoints) ListClusters(w http.ResponseWriter, r *http.Request) {
	clusters, err := ep.executionService.ListClusters(r.Context())
	if err != nil {
		ep.encodeError(w, err)
		return
	}

	ep.encodeResponse(w, map[string]interface{}{
		"clusters": clusters,
	})
}

// List active workers.
func (ep *endpoints) ListWorkers(w http.ResponseWriter, r *http.Request) {
	wl, err := ep.workerService.List(r.Context(), state.EKSEngine)
	wlEKS, errEKS := ep.workerService.List(r.Context(), state.EKSEngine)

	if wl.Workers == nil {
		wl.Workers = []state.Worker{}
	}

	if wlEKS.Workers == nil {
		wlEKS.Workers = []state.Worker{}
	}

	if err != nil || errEKS != nil {
		ep.encodeError(w, err)
	} else {
		response := make(map[string]interface{})
		response["total"] = wl.Total + wlEKS.Total
		response["workers"] = append(wl.Workers, wlEKS.Workers...)
		ep.encodeResponse(w, response)
	}
}

// Get information about an active worker.
func (ep *endpoints) GetWorker(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	worker, err := ep.workerService.Get(r.Context(), vars["worker_type"], state.DefaultEngine)
	if err != nil {
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, worker)
	}
}

// Update worker counts.
func (ep *endpoints) UpdateWorker(w http.ResponseWriter, r *http.Request) {
	var worker state.Worker
	err := ep.decodeRequest(r, &worker)

	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	vars := mux.Vars(r)
	updated, err := ep.workerService.Update(r.Context(), vars["worker_type"], worker)

	if err != nil {
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, updated)
	}
}

// Update batches of workers - used to turn on/off in bulk.
func (ep *endpoints) BatchUpdateWorkers(w http.ResponseWriter, r *http.Request) {
	var wks []state.Worker
	err := ep.decodeRequest(r, &wks)

	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	updated, err := ep.workerService.BatchUpdate(r.Context(), wks)

	if err != nil {
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, updated)
	}
}

func (ep *endpoints) getStringBoolVal(s string) bool {
	l := strings.ToLower(s)

	if l == "true" {
		return true
	}

	return false
}

// Create a new template run based on template name/alias.
func (ep *endpoints) CreateTemplateRunByName(w http.ResponseWriter, r *http.Request) {
	var req state.TemplateExecutionRequest
	err := ep.decodeRequest(r, &req)

	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	if len(req.OwnerID) == 0 {
		ep.encodeError(w, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("request payload must contain [owner_id]; the run_tags field is deprecated for the v7 endpoint.")})
		return
	}

	req.Engine = &state.DefaultEngine

	if req.NodeLifecycle != nil {
		if !utils.StringSliceContains(state.NodeLifeCycles, *req.NodeLifecycle) {
			ep.encodeError(w, exceptions.MalformedInput{
				ErrorString: fmt.Sprintf("Nodelifecyle must be [normal, spot]")})
			return
		}
	} else {
		req.NodeLifecycle = &state.DefaultLifecycle
	}
	vars := mux.Vars(r)

	run, err := ep.executionService.CreateTemplateRunByTemplateName(r.Context(), vars["template_name"], vars["template_version"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating template run",
			"operation", "CreateTemplateRun",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}

}

// Create a new template run based on template id.
func (ep *endpoints) CreateTemplateRun(w http.ResponseWriter, r *http.Request) {
	var req state.TemplateExecutionRequest
	err := ep.decodeRequest(r, &req)

	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	if len(req.OwnerID) == 0 {
		ep.encodeError(w, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("request payload must contain [owner_id]; the run_tags field is deprecated for the v7 endpoint.")})
		return
	}

	req.Engine = &state.DefaultEngine

	if req.NodeLifecycle != nil {
		if !utils.StringSliceContains(state.NodeLifeCycles, *req.NodeLifecycle) {
			ep.encodeError(w, exceptions.MalformedInput{
				ErrorString: fmt.Sprintf("Nodelifecyle must be [normal, spot]")})
			return
		}
	} else {
		req.NodeLifecycle = &state.DefaultLifecycle
	}
	vars := mux.Vars(r)

	run, err := ep.executionService.CreateTemplateRunByTemplateID(r.Context(), vars["template_id"], &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating template run",
			"operation", "CreateTemplateRun",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, run)
	}
}

// List all templates.
func (ep *endpoints) ListTemplates(w http.ResponseWriter, r *http.Request) {
	var (
		tl  state.TemplateList
		err error
	)
	lr := ep.decodeOrderableListRequest(r, &state.Template{})

	params := r.URL.Query()
	latestOnly := ep.getStringBoolVal(ep.getURLParam(params, "latest_only", "true"))

	if latestOnly == true {
		tl, err = ep.templateService.ListLatestOnly(r.Context(), lr.limit, lr.offset, lr.sortBy, lr.order)
	} else {
		tl, err = ep.templateService.List(r.Context(), lr.limit, lr.offset, lr.sortBy, lr.order)
	}

	if tl.Templates == nil {
		tl.Templates = []state.Template{}
	}
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem listing templates",
			"operation", "ListTemplates",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		response := make(map[string]interface{})
		response["total"] = tl.Total
		response["templates"] = tl.Templates
		response["limit"] = lr.limit
		response["offset"] = lr.offset
		response["sort_by"] = lr.sortBy
		response["order"] = lr.order
		ep.encodeResponse(w, response)
	}
}

// Get a template.
func (ep *endpoints) GetTemplate(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	tpl, err := ep.templateService.GetByID(r.Context(), vars["template_id"])
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting templates",
			"operation", "GetTemplate",
			"error", fmt.Sprintf("%+v", err),
			"template_id", vars["template_id"])
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, tpl)
	}
}

// Create a template.
func (ep *endpoints) CreateTemplate(w http.ResponseWriter, r *http.Request) {
	var req state.CreateTemplateRequest
	err := ep.decodeRequest(r, &req)
	if err != nil {
		ep.encodeError(w, exceptions.MalformedInput{ErrorString: err.Error()})
		return
	}

	created, err := ep.templateService.Create(r.Context(), &req)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem creating template",
			"operation", "CreateTemplate",
			"error", fmt.Sprintf("%+v", err))
		ep.encodeError(w, err)
	} else {
		ep.encodeResponse(w, created)
	}
}

// Get a cluster.
func (ep *endpoints) GetCluster(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	cluster, err := ep.executionService.GetClusterByID(r.Context(), vars["cluster_id"])
	if err != nil {
		ep.encodeError(w, err)
		return
	}
	ep.encodeResponse(w, cluster)
}

// Update a cluster.
func (ep *endpoints) UpdateCluster(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	var clusterMetadata state.ClusterMetadata
	if err := json.NewDecoder(r.Body).Decode(&clusterMetadata); err != nil {
		ep.encodeError(w, err)
		return
	}

	if vars["cluster_id"] != "" {
		clusterMetadata.ID = vars["cluster_id"]
	}
	err := ep.executionService.UpdateClusterMetadata(r.Context(), clusterMetadata)
	if err != nil {
		ep.encodeError(w, err)
		return
	}
	ep.encodeResponse(w, map[string]bool{"updated": true})
}

func (ep *endpoints) DeleteCluster(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	err := ep.executionService.DeleteClusterMetadata(r.Context(), vars["cluster_id"])
	if err != nil {
		ep.encodeError(w, err)
		return
	}
	ep.encodeResponse(w, map[string]bool{"deleted": true})
}

// Health check endpoint.
func (ep *endpoints) HealthCheck(w http.ResponseWriter, r *http.Request) {
	ep.encodeResponse(w, map[string]string{
		"status":  "healthy",
		"message": "Service is up and running",
	})
}

// Create a new cluster.
func (ep *endpoints) CreateCluster(w http.ResponseWriter, r *http.Request) {
	var cluster state.ClusterMetadata
	if err := json.NewDecoder(r.Body).Decode(&cluster); err != nil {
		ep.encodeError(w, err)
		return
	}

	cluster.ID = ""

	err := ep.executionService.UpdateClusterMetadata(r.Context(), cluster)
	if err != nil {
		ep.encodeError(w, err)
		return
	}

	ep.encodeResponse(w, map[string]bool{"created": true})
}

func (ep *endpoints) GetRunStatus(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	runID := vars["run_id"]

	status, err := ep.executionService.GetRunStatus(r.Context(), runID)
	if err != nil {
		ep.logger.Log(
			"level", "error",
			"message", "problem getting run status",
			"operation", "GetRunStatus",
			"error", fmt.Sprintf("%+v", err),
			"run_id", runID)
		ep.encodeError(w, err)
		return
	}

	w.Header().Set("Cache-Control", "max-age=5") // Cache for 5 seconds

	exitCode := "unknown"
	if status.ExitCode != nil {
		exitCode = fmt.Sprintf("%v", *status.ExitCode)
	}
	statusHash := fmt.Sprintf("%s-%s", status.Status, exitCode)
	etag := fmt.Sprintf(`"%s"`, statusHash)
	w.Header().Set("ETag", etag)

	if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
		w.WriteHeader(http.StatusNotModified)
		return
	}

	ep.encodeResponse(w, status)
}


================================================
FILE: flotilla/endpoints_test.go
================================================
package flotilla

import (
	"bytes"
	"encoding/json"
	"net/http/httptest"
	"testing"

	"github.com/stitchfix/flotilla-os/clients/middleware"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/services"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
	muxtrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/gorilla/mux"
)

func setUp(t *testing.T) *muxtrace.Router {
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"A": {DefinitionID: "A", Alias: "aliasA"},
			"B": {DefinitionID: "B", Alias: "aliasB"},
			"C": {DefinitionID: "C", Alias: "aliasC", ExecutableResources: state.ExecutableResources{Image: "invalidimage"}},
		},
		Runs: map[string]state.Run{
			"runA": {DefinitionID: "A", ClusterName: "cluster1",
				GroupName: "A",
				RunID:     "runA", Status: state.StatusRunning},
			"runB": {DefinitionID: "B", ClusterName: "cluster2",
				GroupName: "B", RunID: "runB",
				InstanceDNSName: "cupcakedns", InstanceID: "cupcakeid"},
		},
		Qurls: map[string]string{
			"A": "a/",
			"B": "b/",
		},
		ClusterStates: []state.ClusterMetadata{
			{Name: "cluster1", Status: state.StatusActive, StatusReason: "Active and healthy"},
			{Name: "cluster2", Status: state.StatusActive, StatusReason: "Active and healthy"},
		},
		Groups: []string{"g1", "g2", "g3"},
		Tags:   []string{"t1", "t2", "t3"},
	}
	ds, _ := services.NewDefinitionService(&imp)
	es, _ := services.NewExecutionService(c, &imp, &imp, &imp, &imp)
	ls, _ := services.NewLogService(&imp, &imp)
	mwc, _ := middleware.NewClient()
	ep := endpoints{definitionService: ds, executionService: es, eksLogService: ls, middlewareClient: mwc}
	return NewRouter(ep)
}

func TestEndpoints_CreateDefinition(t *testing.T) {
	router := setUp(t)

	newDef := `{"alias":"cupcake", "memory":100, "group_name":"cupcake", "image":"someimage", "command":"echo 'hi'"}`
	req := httptest.NewRequest("POST", "/api/v1/task", bytes.NewBufferString(newDef))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	r := state.Definition{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if len(r.DefinitionID) == 0 {
		t.Errorf("Expected non-empty definition id")
	}
}

func TestEndpoints_UpdateDefinition(t *testing.T) {
	router := setUp(t)

	updatedDef := `{"image":"updatedImage"}`
	req := httptest.NewRequest("PUT", "/api/v1/task/A", bytes.NewBufferString(updatedDef))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	r := state.Definition{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if r.Image != "updatedImage" {
		t.Errorf("Expected image [updatedImage] but was [%s]", r.Image)
	}
}

func TestEndpoints_CreateRun(t *testing.T) {
	router := setUp(t)

	newRun := `{"cluster":"cupcake", "env":[{"name":"E1","value":"V1"}]}`
	req := httptest.NewRequest("PUT", "/api/v1/task/A/execute", bytes.NewBufferString(newRun))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	r := state.Run{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if len(r.RunID) == 0 {
		t.Errorf("Expected non-empty run id")
	}

	if r.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status [%s] but was [%s]", state.StatusQueued, r.Status)
	}
}

func TestEndpoints_CreateRun2(t *testing.T) {
	router := setUp(t)

	newRun := `{"cluster":"cupcake", "env":[{"name":"E1","value":"V1"}], "run_tags":{"owner_email":"flotilla@github.com", "team_name":"thebest"}}`
	req := httptest.NewRequest("PUT", "/api/v2/task/A/execute", bytes.NewBufferString(newRun))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	r := state.Run{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if len(r.RunID) == 0 {
		t.Errorf("Expected non-empty run id")
	}

	if r.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status [%s] but was [%s]", state.StatusQueued, r.Status)
	}

	if r.User != "flotilla@github.com" {
		t.Errorf("Expected new run to have user set to run_tags.owner_email but was [%s]", r.User)
	}
}

func TestEndpoints_CreateRun4(t *testing.T) {
	router := setUp(t)

	newRun := `{"cluster":"cluster1", "env":[{"name":"E1","value":"V1"}], "run_tags":{"owner_id":"flotilla"}, "labels": {"foo": "bar"}}`
	req := httptest.NewRequest("PUT", "/api/v4/task/A/execute", bytes.NewBufferString(newRun))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v\n%s", resp.StatusCode, resp.Status)
	}

	r := state.Run{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if len(r.RunID) == 0 {
		t.Errorf("Expected non-empty run id")
	}

	if r.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status [%s] but was [%s]", state.StatusQueued, r.Status)
	}

	if len(r.Labels) != 1 || r.Labels["foo"] != "bar" {
		labelRes, _ := json.Marshal(r.Labels)
		t.Error(string(labelRes))
	}

	if r.User != "flotilla" {
		t.Errorf("Expected new run to have user set to run_tags.owner_id but was [%s]", r.User)
	}
}

func TestEndpoints_CreateRunByAlias(t *testing.T) {
	router := setUp(t)

	newRun := `{"cluster":"cupcake", "env":[{"name":"E1","value":"V1"}], "run_tags":{"owner_id":"flotilla"}}`
	req := httptest.NewRequest("PUT", "/api/v1/task/alias/aliasA/execute", bytes.NewBufferString(newRun))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)

	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	r := state.Run{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if len(r.RunID) == 0 {
		t.Errorf("Expected non-empty run id")
	}

	if r.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status [%s] but was [%s]", state.StatusQueued, r.Status)
	}

	if r.User != "flotilla" {
		t.Errorf("Expected new run to have user set to run_tags.owner_id but was [%s]", r.User)
	}
}

func TestEndpoints_DeleteDefinition(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("DELETE", "/api/v1/task/A", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var ack map[string]bool
	err := json.NewDecoder(resp.Body).Decode(&ack)
	if err != nil {
		t.Error(err.Error())
	}
	if _, ok := ack["deleted"]; !ok {
		t.Errorf("Expected [deleted] acknowledgement")
	}
}

func TestEndpoints_GetDefinition(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/task/A", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r state.Definition
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if r.DefinitionID != "A" {
		t.Errorf("Expected definition_id [A] but was [%s]", r.DefinitionID)
	}

	if r.Env == nil {
		t.Errorf("Expected non-nil environment")
	}
}

func TestEndpoints_GetDefinitionByAlias(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/task/alias/aliasA", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r state.Definition
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if r.DefinitionID != "A" {
		t.Errorf("Expected definition_id [A] but was [%s]", r.DefinitionID)
	}

	if r.Env == nil {
		t.Errorf("Expected non-nil environment")
	}
}

func TestEndpoints_GetGroups(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/groups", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := r["total"]; !ok {
		t.Errorf("Expected total in response")
	}

	if _, ok := r["groups"]; !ok {
		t.Errorf("Expected groups in response")
	}

	groups, _ := r["groups"]
	if _, ok := groups.([]interface{}); !ok {
		t.Errorf("Cannot cast groups to list, expected list")
	}
}

func TestEndpoints_GetLogs(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/runA/logs", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := r["log"]; !ok {
		t.Errorf("Expected log in response")
	}
}

func TestEndpoints_GetRun(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/history/runA", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r state.Run
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if r.RunID != "runA" {
		t.Errorf("Expected run with runID [runA] but was [%s]", r.RunID)
	}
}

func TestEndpoints_GetRun2(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/history/runB", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var other map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&other)
	if err != nil {
		t.Error(err.Error())
	}

	instance, ok := other["instance"]
	if !ok {
		t.Errorf("Expected [instance] in response")
	}

	if _, ok = instance.(map[string]interface{}); !ok {
		t.Errorf("Expected [instance] in response to be a map")
	}
}

func TestEndpoints_GetTags(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/tags", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := r["total"]; !ok {
		t.Errorf("Expected total in response")
	}

	if _, ok := r["tags"]; !ok {
		t.Errorf("Expected tags in response")
	}

	tags, _ := r["tags"]
	if _, ok := tags.([]interface{}); !ok {
		t.Errorf("Cannot cast tags to list, expected list")
	}
}

func TestEndpoints_ListDefinitions(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v1/task?limit=100&offset=2&sort_by=alias&order=desc&group_name=cupcake&env=E1%7CV1", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := r["total"]; !ok {
		t.Errorf("Expected total in response")
	}

	if _, ok := r["definitions"]; !ok {
		t.Errorf("Expected definitions in response")
	}

	if _, ok := r["limit"]; !ok {
		t.Errorf("Expected limit in response")
	}

	if _, ok := r["offset"]; !ok {
		t.Errorf("Expected offset in response")
	}

	if _, ok := r["sort_by"]; !ok {
		t.Errorf("Expected sort_by in response")
	}

	if _, ok := r["order"]; !ok {
		t.Errorf("Expected order in response")
	}

	if _, ok := r["group_name"]; !ok {
		t.Errorf("Expected [group_name] filter in response")
	}

	if _, ok := r["env_filters"]; !ok {
		t.Errorf("Expected env_filters in response")
	}

	definitions, _ := r["definitions"]
	if _, ok := definitions.([]interface{}); !ok {
		t.Errorf("Cannot cast definitions to list, expected list")
	}

	envFilters, _ := r["env_filters"]
	if _, ok := envFilters.(map[string]interface{}); !ok {
		t.Errorf("Cannot cast env_filters to map, expected map")
	}

	envFiltersMap := envFilters.(map[string]interface{})
	e1Filter, ok := envFiltersMap["E1"]
	if !ok {
		t.Errorf("Expected env_filters to contain key [E1]")
	}

	if e1Filter.(string) != "V1" {
		t.Errorf("Expected env_filter [E1:V1]")
	}
}

func TestEndpoints_ListRuns(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest(
		"GET",
		"/api/v1/history?status=RUNNING&status=QUEUED&limit=100&offset=2&sort_by=started_at&order=desc&cluster=cupcake&env=E1%7CV1", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var r map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&r)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := r["total"]; !ok {
		t.Errorf("Expected total in response")
	}

	if _, ok := r["history"]; !ok {
		t.Errorf("Expected runs in response")
	}

	if _, ok := r["limit"]; !ok {
		t.Errorf("Expected limit in response")
	}

	if _, ok := r["offset"]; !ok {
		t.Errorf("Expected offset in response")
	}

	if _, ok := r["sort_by"]; !ok {
		t.Errorf("Expected sort_by in response")
	}

	if _, ok := r["order"]; !ok {
		t.Errorf("Expected order in response")
	}

	if _, ok := r["cluster"]; !ok {
		t.Errorf("Expected [cluster] filter in response")
	}

	if _, ok := r["env_filters"]; !ok {
		t.Errorf("Expected env_filters in response")
	}

	if _, ok := r["status"]; !ok {
		t.Errorf("Expected [status] filter in response")
	}

	runs, _ := r["history"]
	if _, ok := runs.([]interface{}); !ok {
		t.Errorf("Cannot cast runs to list, expected list")
	}

	statusFilters, _ := r["status"]
	if _, ok := statusFilters.([]interface{}); !ok {
		t.Errorf("Cannot cast status filters to list, expected list")
	}

	expectedStatusFilters := map[string]bool{"RUNNING": true, "QUEUED": true}
	statusFiltersList := statusFilters.([]interface{})
	if len(statusFiltersList) != 2 {
		t.Errorf("Expected 2 status filters, was %v", len(statusFiltersList))
	}
	for _, statusFilter := range statusFiltersList {
		if _, ok := expectedStatusFilters[statusFilter.(string)]; !ok {
			t.Errorf("Unexpected status filter: %s", statusFilter.(string))
		}
	}

	envFilters, _ := r["env_filters"]
	if _, ok := envFilters.(map[string]interface{}); !ok {
		t.Errorf("Cannot cast env_filters to map, expected map")
	}

	envFiltersMap := envFilters.(map[string]interface{})
	e1Filter, ok := envFiltersMap["E1"]
	if !ok {
		t.Errorf("Expected env_filters to contain key [E1]")
	}

	if e1Filter.(string) != "V1" {
		t.Errorf("Expected env_filter [E1:V1]")
	}
}

func TestEndpoints_StopRun(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("DELETE", "/api/v1/task/A/history/runA", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var ack map[string]bool
	err := json.NewDecoder(resp.Body).Decode(&ack)
	if err != nil {
		t.Error(err.Error())
	}
	if _, ok := ack["terminated"]; !ok {
		t.Errorf("Expected [terminated] acknowledgement")
	}
}

func TestEndpoints_ListClusters(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v6/clusters", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var response map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&response)
	if err != nil {
		t.Error(err.Error())
	}

	clusters, ok := response["clusters"]
	if !ok {
		t.Errorf("Expected clusters in response")
	}

	clustersList, ok := clusters.([]interface{})
	if !ok {
		t.Errorf("Cannot cast clusters to list, expected list")
	}

	if len(clustersList) != 2 {
		t.Errorf("Expected 2 clusters, got %d", len(clustersList))
	}

	cluster, ok := clustersList[0].(map[string]interface{})
	if !ok {
		t.Errorf("Cannot cast cluster to map, expected map")
	}

	if _, ok := cluster["name"]; !ok {
		t.Errorf("Expected cluster to have name field")
	}

	if _, ok := cluster["status"]; !ok {
		t.Errorf("Expected cluster to have status field")
	}
}

func TestEndpoints_GetCluster(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("GET", "/api/v6/clusters/cluster1", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var cluster map[string]interface{}
	err := json.NewDecoder(resp.Body).Decode(&cluster)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := cluster["name"]; !ok {
		t.Errorf("Expected cluster to have name field")
	}

	if _, ok := cluster["status"]; !ok {
		t.Errorf("Expected cluster to have status field")
	}
}

func TestEndpoints_UpdateCluster(t *testing.T) {
	router := setUp(t)

	updateReq := `{"status":"ACTIVE", "reason":"Testing update"}`
	req := httptest.NewRequest("PUT", "/api/v6/clusters/cluster1", bytes.NewBufferString(updateReq))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var ack map[string]bool
	err := json.NewDecoder(resp.Body).Decode(&ack)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := ack["updated"]; !ok {
		t.Errorf("Expected [updated] acknowledgement")
	}
}

func TestEndpoints_DeleteCluster(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("DELETE", "/api/v6/clusters/cluster1", nil)
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var ack map[string]bool
	err := json.NewDecoder(resp.Body).Decode(&ack)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := ack["deleted"]; !ok {
		t.Errorf("Expected [deleted] acknowledgement")
	}
}

func TestEndpoints_CreateCluster(t *testing.T) {
	router := setUp(t)

	req := httptest.NewRequest("POST", "/api/v6/clusters", bytes.NewBufferString(`{"name":"cluster1", "status":"ACTIVE", "reason":"Testing create"}`))
	w := httptest.NewRecorder()

	router.ServeHTTP(w, req)
	resp := w.Result()

	if resp.Header.Get("Content-Type") != "application/json; charset=utf-8" {
		t.Errorf("Expected Content-Type [application/json; charset=utf-8], but was [%s]", resp.Header.Get("Content-Type"))
	}

	if resp.StatusCode != 200 {
		t.Errorf("Expected status 200, was %v", resp.StatusCode)
	}

	var ack map[string]bool
	err := json.NewDecoder(resp.Body).Decode(&ack)
	if err != nil {
		t.Error(err.Error())
	}

	if _, ok := ack["created"]; !ok {
		t.Errorf("Expected [created] acknowledgement")
	}
}


================================================
FILE: flotilla/router.go
================================================
package flotilla

import (
	muxtrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/gorilla/mux"
)

// NewRouter creates and returns a Mux Router
func NewRouter(ep endpoints) *muxtrace.Router {
	r := muxtrace.NewRouter()
	v1 := r.PathPrefix("/api/v1").Subrouter()

	v1.HandleFunc("/task", ep.ListDefinitions).Methods("GET")
	v1.HandleFunc("/task", ep.CreateDefinition).Methods("POST")
	v1.HandleFunc("/task/{definition_id}", ep.GetDefinition).Methods("GET")
	v1.HandleFunc("/task/{definition_id}", ep.UpdateDefinition).Methods("PUT")
	v1.HandleFunc("/task/{definition_id}", ep.DeleteDefinition).Methods("DELETE")
	v1.HandleFunc("/task/{definition_id}/execute", ep.CreateRun).Methods("PUT")
	v1.HandleFunc("/task/alias/{alias}", ep.GetDefinitionByAlias).Methods("GET")
	v1.HandleFunc("/task/alias/{alias}/execute", ep.CreateRunByAlias).Methods("PUT")

	v1.HandleFunc("/history", ep.ListRuns).Methods("GET")
	v1.HandleFunc("/history/{run_id}", ep.GetRun).Methods("GET")
	v1.HandleFunc("/task/history/{run_id}", ep.GetRun).Methods("GET")
	v1.HandleFunc("/task/{definition_id}/history", ep.ListDefinitionRuns).Methods("GET")
	v1.HandleFunc("/task/{definition_id}/history/{run_id}", ep.GetRun).Methods("GET")
	v1.HandleFunc("/task/{definition_id}/history/{run_id}", ep.StopRun).Methods("DELETE")

	v1.HandleFunc("/{run_id}/status", ep.UpdateRun).Methods("PUT")
	v1.HandleFunc("/{run_id}/logs", ep.GetLogs).Methods("GET")
	v1.HandleFunc("/{run_id}/events", ep.GetEvents).Methods("GET")
	v1.HandleFunc("/groups", ep.GetGroups).Methods("GET")
	v1.HandleFunc("/tags", ep.GetTags).Methods("GET")
	v1.HandleFunc("/clusters", ep.ListClusters).Methods("GET")

	v2 := r.PathPrefix("/api/v2").Subrouter()
	v2.HandleFunc("/task/{definition_id}/execute", ep.CreateRunV2).Methods("PUT")

	v4 := r.PathPrefix("/api/v4").Subrouter()
	v4.HandleFunc("/task/{definition_id}/execute", ep.CreateRunV4).Methods("PUT")

	v5 := r.PathPrefix("/api/v5").Subrouter()
	v5.HandleFunc("/worker", ep.ListWorkers).Methods("GET")
	v5.HandleFunc("/worker", ep.BatchUpdateWorkers).Methods("PUT")
	v5.HandleFunc("/worker/{worker_type}", ep.GetWorker).Methods("GET")
	v5.HandleFunc("/worker/{worker_type}", ep.UpdateWorker).Methods("PUT")

	v6 := r.PathPrefix("/api/v6").Subrouter()
	v6.HandleFunc("/clusters", ep.ListClusters).Methods("GET")
	v6.HandleFunc("/clusters", ep.CreateCluster).Methods("POST")
	v6.HandleFunc("/clusters/{cluster_id}", ep.GetCluster).Methods("GET")
	v6.HandleFunc("/clusters/{cluster_id}", ep.UpdateCluster).Methods("PUT")
	v6.HandleFunc("/clusters/{cluster_id}", ep.DeleteCluster).Methods("DELETE")
	v6.HandleFunc("/{run_id}/events", ep.GetEvents).Methods("GET")
	v6.HandleFunc("/groups", ep.GetGroups).Methods("GET")
	v6.HandleFunc("/health", ep.HealthCheck).Methods("GET")
	v6.HandleFunc("/history", ep.ListRuns).Methods("GET")
	v6.HandleFunc("/history/{run_id}", ep.GetRun).Methods("GET")
	v6.HandleFunc("/tags", ep.GetTags).Methods("GET")
	v6.HandleFunc("/task", ep.ListDefinitions).Methods("GET")
	v6.HandleFunc("/task", ep.CreateDefinition).Methods("POST")
	v6.HandleFunc("/task/alias/{alias}", ep.GetDefinitionByAlias).Methods("GET")
	v6.HandleFunc("/task/alias/{alias}/execute", ep.CreateRunByAlias).Methods("PUT")
	v6.HandleFunc("/task/{definition_id}", ep.GetDefinition).Methods("GET")
	v6.HandleFunc("/task/{definition_id}", ep.UpdateDefinition).Methods("PUT")
	v6.HandleFunc("/task/{definition_id}", ep.DeleteDefinition).Methods("DELETE")
	v6.HandleFunc("/task/{definition_id}/execute", ep.CreateRunV4).Methods("PUT")
	v6.HandleFunc("/task/{definition_id}/history", ep.ListDefinitionRuns).Methods("GET")
	v6.HandleFunc("/task/{definition_id}/history/{run_id}", ep.GetRun).Methods("GET")
	v6.HandleFunc("/task/{definition_id}/history/{run_id}", ep.StopRun).Methods("DELETE")
	v6.HandleFunc("/task/history/{run_id}", ep.GetRun).Methods("GET")
	v6.HandleFunc("/{run_id}/status", ep.UpdateRun).Methods("PUT")
	v6.HandleFunc("/{run_id}/status", ep.GetRunStatus).Methods("GET")
	v6.HandleFunc("/{run_id}/logs", ep.GetLogs).Methods("GET")

	v7 := r.PathPrefix("/api/v7").Subrouter()
	v7.HandleFunc("/template/{template_id}/execute", ep.CreateTemplateRun).Methods("PUT")
	v7.HandleFunc("/template/name/{template_name}/version/{template_version}/execute", ep.CreateTemplateRunByName).Methods("PUT")
	v7.HandleFunc("/template", ep.ListTemplates).Methods("GET")
	v7.HandleFunc("/template", ep.CreateTemplate).Methods("POST")
	v7.HandleFunc("/template/{template_id}", ep.GetTemplate).Methods("GET")
	v7.HandleFunc("/template/history/{run_id}", ep.GetRun).Methods("GET")
	v7.HandleFunc("/template/{template_id}/history", ep.ListTemplateRuns).Methods("GET")
	v7.HandleFunc("/template/{template_id}/history/{run_id}", ep.GetRun).Methods("GET")
	v7.HandleFunc("/template/{template_id}/history/{run_id}", ep.StopRun).Methods("DELETE")

	return r
}


================================================
FILE: go.mod
================================================
module github.com/stitchfix/flotilla-os

go 1.26.1

require (
	github.com/DataDog/datadog-go/v5 v5.1.0
	github.com/Masterminds/sprig v2.22.0+incompatible
	github.com/aws/aws-sdk-go v1.40.18
	github.com/go-kit/kit v0.9.0
	github.com/go-redis/redis v6.15.9+incompatible
	github.com/gorilla/mux v1.7.4-0.20190701202633-d83b6ffe499a
	github.com/jmoiron/sqlx v1.2.1-0.20190426154859-38398a30ed85
	github.com/lib/pq v1.10.2
	github.com/nu7hatch/gouuid v0.0.0-20131221200532-179d4d0c4d8d
	github.com/pkg/errors v0.9.1
	github.com/rs/cors v1.6.1-0.20190613161432-33ffc0734c60
	github.com/spf13/viper v1.4.1-0.20190614151712-3349bd9cc288
	github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f
	go.uber.org/multierr v1.5.0
	gopkg.in/DataDog/dd-trace-go.v1 v1.38.0
	gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637
	k8s.io/api v0.35.0
	k8s.io/apimachinery v0.35.0
	k8s.io/client-go v0.35.0
	k8s.io/metrics v0.35.0
)

require (
	github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583 // indirect
	github.com/DataDog/datadog-go v4.8.3+incompatible // indirect
	github.com/DataDog/sketches-go v1.0.0 // indirect
	github.com/Masterminds/goutils v1.1.1 // indirect
	github.com/Masterminds/semver v1.5.0 // indirect
	github.com/Microsoft/go-winio v0.5.1 // indirect
	github.com/cespare/xxhash/v2 v2.1.2 // indirect
	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
	github.com/dgraph-io/ristretto v0.1.0 // indirect
	github.com/dustin/go-humanize v1.0.0 // indirect
	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
	github.com/fsnotify/fsnotify v1.4.9 // indirect
	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
	github.com/go-logfmt/logfmt v0.5.0 // indirect
	github.com/go-logr/logr v1.4.3 // indirect
	github.com/go-openapi/jsonpointer v0.21.0 // indirect
	github.com/go-openapi/jsonreference v0.20.2 // indirect
	github.com/go-openapi/swag v0.23.0 // indirect
	github.com/golang/glog v1.2.4 // indirect
	github.com/golang/protobuf v1.5.4 // indirect
	github.com/google/gnostic-models v0.7.0 // indirect
	github.com/google/uuid v1.6.0 // indirect
	github.com/hashicorp/hcl v1.0.0 // indirect
	github.com/huandu/xstrings v1.3.0 // indirect
	github.com/imdario/mergo v0.3.6 // indirect
	github.com/jmespath/go-jmespath v0.4.0 // indirect
	github.com/josharian/intern v1.0.0 // indirect
	github.com/json-iterator/go v1.1.12 // indirect
	github.com/magiconair/properties v1.8.1 // indirect
	github.com/mailru/easyjson v0.7.7 // indirect
	github.com/mitchellh/copystructure v1.0.0 // indirect
	github.com/mitchellh/mapstructure v1.4.2 // indirect
	github.com/mitchellh/reflectwalk v1.0.0 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/pelletier/go-toml v1.7.0 // indirect
	github.com/philhofer/fwd v1.1.1 // indirect
	github.com/spf13/afero v1.2.2 // indirect
	github.com/spf13/cast v1.3.0 // indirect
	github.com/spf13/jwalterweatherman v1.0.0 // indirect
	github.com/spf13/pflag v1.0.9 // indirect
	github.com/subosito/gotenv v1.2.0 // indirect
	github.com/tinylib/msgp v1.1.2 // indirect
	github.com/x448/float16 v0.8.4 // indirect
	github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
	go.uber.org/atomic v1.6.0 // indirect
	go.yaml.in/yaml/v2 v2.4.3 // indirect
	go.yaml.in/yaml/v3 v3.0.4 // indirect
	golang.org/x/crypto v0.45.0 // indirect
	golang.org/x/net v0.47.0 // indirect
	golang.org/x/oauth2 v0.30.0 // indirect
	golang.org/x/sys v0.38.0 // indirect
	golang.org/x/term v0.37.0 // indirect
	golang.org/x/text v0.31.0 // indirect
	golang.org/x/time v0.9.0 // indirect
	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
	google.golang.org/protobuf v1.36.8 // indirect
	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
	gopkg.in/inf.v0 v0.9.1 // indirect
	gopkg.in/yaml.v2 v2.4.0 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
	k8s.io/klog/v2 v2.130.1 // indirect
	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
	k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
	sigs.k8s.io/randfill v1.0.0 // indirect
	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
	sigs.k8s.io/yaml v1.6.0 // indirect
)


================================================
FILE: go.sum
================================================
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
cloud.google.com/go/pubsub v1.4.0/go.mod h1:LFrqilwgdw4X2cJS9ALgzYmMu+ULyrUN6IHV3CPK4TM=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI=
github.com/Azure/go-autorest/autorest/adal v0.5.0/go.mod h1:8Z9fGy2MpX0PvDjB1pEgQTmVqjGhiHBW7RJJEciWzS0=
github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjWF/7vwR+cUD/ELuzDCXwHUVA=
github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0=
github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0=
github.com/Azure/go-autorest/logger v0.1.0/go.mod h1:oExouG+K6PryycPJfVSxi/koC6LSNgds39diKLz7Vrc=
github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583 h1:3nVO1nQyh64IUY6BPZUpMYMZ738Pu+LsMt3E0eqqIYw=
github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583/go.mod h1:EP9f4GqaDJyP1F5jTNMtzdIpw3JpNs3rMSJOnYywCiw=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DataDog/datadog-go v4.8.2+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DataDog/datadog-go v4.8.3+incompatible h1:fNGaYSuObuQb5nzeTQqowRAd9bpDIRRV4/gUtIBjh8Q=
github.com/DataDog/datadog-go v4.8.3+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DataDog/datadog-go/v5 v5.0.2/go.mod h1:ZI9JFB4ewXbw1sBnF4sxsR2k1H3xjV+PUAOUsHvKpcU=
github.com/DataDog/datadog-go/v5 v5.1.0 h1:Zmq3tCk9+Tdq8Du73M71Zo6Dyx+cEo9QkCSCqQlHFaQ=
github.com/DataDog/datadog-go/v5 v5.1.0/go.mod h1:KhiYb2Badlv9/rofz+OznKoEF5XKTonWyhx5K83AP8E=
github.com/DataDog/gostackparse v0.5.0/go.mod h1:lTfqcJKqS9KnXQGnyQMCugq3u1FP6UZMfWR0aitKFMM=
github.com/DataDog/sketches-go v1.0.0 h1:chm5KSXO7kO+ywGWJ0Zs6tdmWU8PBXSbywFVciL6BG4=
github.com/DataDog/sketches-go v1.0.0/go.mod h1:O+XkJHWk9w4hDwY2ZUDU31ZC9sNYlYo8DiFsxjYeo1k=
github.com/DataDog/zstd v1.3.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig v2.22.0+incompatible h1:z4yfnGrZ7netVz+0EDJ0Wi+5VZCSYp4Z0m2dk6cEM60=
github.com/Masterminds/sprig v2.22.0+incompatible/go.mod h1:y6hNFY5UBTIWBxnzTeuNhlNS5hqE0NB0E6fgfo2Br3o=
github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY=
github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/Shopify/sarama v1.22.0/go.mod h1:lm3THZ8reqBDBQKQyb5HB3sY1lKp3grEbQ81aWSgPp4=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/aws/aws-sdk-go v1.25.37/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48=
github.com/aws/aws-sdk-go v1.40.18 h1:ifWmCucvV20Kyx2t/l9+8gGqNzZ4CW+HO5uz8bCOK/o=
github.com/aws/aws-sdk-go v1.40.18/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q=
github.com/aws/aws-sdk-go-v2 v1.0.0/go.mod h1:smfAbmpW+tcRVuNUjo3MOArSZmW72t62rkCzc2i0TWM=
github.com/aws/aws-sdk-go-v2/config v1.0.0/go.mod h1:WysE/OpUgE37tjtmtJd8GXgT8s1euilE5XtUkRNUQ1w=
github.com/aws/aws-sdk-go-v2/credentials v1.0.0/go.mod h1:/SvsiqBf509hG4Bddigr3NB12MIpfHhZapyBurJe8aY=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.0.0/go.mod h1:wpMHDCXvOXZxGCRSidyepa8uJHY4vaBGfY2/+oKU/Bc=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.0.0/go.mod h1:3jExOmpbjgPnz2FJaMOfbSk1heTkZ66aD3yNtVhnjvI=
github.com/aws/aws-sdk-go-v2/service/sqs v1.0.0/go.mod h1:w5BclCU8ptTbagzXS/fHBr+vAyXUjggg/72qDIURKMk=
github.com/aws/aws-sdk-go-v2/service/sts v1.0.0/go.mod h1:5f+cELGATgill5Pu3/vK3Ebuigstc+qYEHW5MvGWZO4=
github.com/aws/smithy-go v1.0.0/go.mod h1:EzMw8dbp/YJL4A5/sbhGddag+NPT7q084agLbB9LgIw=
github.com/aws/smithy-go v1.11.0/go.mod h1:3xHYmszWVx2c0kIwQeEVf9uSm4fYZt67FBJnwub1bgM=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/bradfitz/gomemcache v0.0.0-20220106215444-fb4bf637b56d/go.mod h1:H0wQNHz2YrLsuXOZozoeDmnHXkNCRmMW0gwFWDfEZDA=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
github.com/confluentinc/confluent-kafka-go v1.4.0/go.mod h1:u2zNLny2xq+5rWeTQjFHbDzzNuba4P1vo31r9r4uAdg=
github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/denisenkom/go-mssqldb v0.0.0-20200428022330-06a60b6afbbc/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU=
github.com/denisenkom/go-mssqldb v0.11.0 h1:9rHa233rhdOyrz2GcP9NM+gi2psgJZ4GWDpL/7ND8HI=
github.com/denisenkom/go-mssqldb v0.11.0/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU=
github.com/dgraph-io/ristretto v0.1.0 h1:Jv3CGQHp9OjuMBSne1485aDpUkTKEcUqF+jm/LuerPI=
github.com/dgraph-io/ristretto v0.1.0/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
github.com/elastic/go-elasticsearch/v6 v6.8.5/go.mod h1:UwaDJsD3rWLM5rKNFzv9hgox93HoX8utj1kxD9aFUcI=
github.com/elastic/go-elasticsearch/v7 v7.17.1/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5/go.mod h1:a2zkGnVExMxdzMo3M0Hi/3sEU+cWnZpSni0O6/Yb/P0=
github.com/evanphx/json-patch v4.2.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
github.com/garyburd/redigo v1.6.3/go.mod h1:rTb6epsqigu3kYKBnaF028A7Tf/Aw5s0cqA47doKKqw=
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.7.0/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY=
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
github.com/go-asn1-ber/asn1-ber v1.3.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/go-chi/chi v1.5.0/go.mod h1:REp24E+25iKvxgeTfHmdUoL5x15kBiDBlnIl5bCwe2k=
github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0 h1:wDJmvq38kDhkVxi50ni9ykkdUr1PKgqKOoi01fa0Mdk=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
github.com/go-ldap/ldap/v3 v3.1.3/go.mod h1:3rbOH3jRS2u6jg2rJnKAMLE/xQyCKIveG2Sa/Cohzb8=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.5.0 h1:TrB8swr/68K7m9CcGut2g3UOihhbcbiMAYiuTXdEih4=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc=
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-pg/pg/v10 v10.0.0/go.mod h1:XHU1AkQW534GFuUdSiQ46+Xw6Ah+9+b8DlT4YwhiXL8=
github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=
github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
github.com/go-redis/redis/v7 v7.1.0/go.mod h1:JDNMw23GTyLNC4GZu9njt15ctBQVn7xjRfnwdHj/Dcg=
github.com/go-redis/redis/v8 v8.0.0/go.mod h1:isLoQT/NFSP7V67lyvM9GmdvLdyZ7pEhsXvvyQtnQTo=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 h1:p104kn46Q8WdvHunIJ9dAyjPVtrBPhSr3KT2yUst43I=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/go-test/deep v1.0.2-0.20181118220953-042da051cf31/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/go-test/deep v1.0.2/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0=
github.com/gobuffalo/depgen v0.0.0-20190329151759-d478694a28d3/go.mod h1:3STtPUQYuzV0gBVOY3vy6CfMm/ljR4pABfrTeHNLHUY=
github.com/gobuffalo/depgen v0.1.0/go.mod h1:+ifsuy7fhi15RWncXQQKjWS9JPkdah5sZvtHc2RXGlg=
github.com/gobuffalo/envy v1.6.15/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI=
github.com/gobuffalo/envy v1.7.0/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI=
github.com/gobuffalo/flect v0.1.0/go.mod h1:d2ehjJqGOH/Kjqcoz+F7jHTBbmDb38yXA598Hb50EGs=
github.com/gobuffalo/flect v0.1.1/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI=
github.com/gobuffalo/flect v0.1.3/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI=
github.com/gobuffalo/genny v0.0.0-20190329151137-27723ad26ef9/go.mod h1:rWs4Z12d1Zbf19rlsn0nurr75KqhYp52EAGGxTbBhNk=
github.com/gobuffalo/genny v0.0.0-20190403191548-3ca520ef0d9e/go.mod h1:80lIj3kVJWwOrXWWMRzzdhW3DsrdjILVil/SFKBzF28=
github.com/gobuffalo/genny v0.1.0/go.mod h1:XidbUqzak3lHdS//TPu2OgiFB+51Ur5f7CSnXZ/JDvo=
github.com/gobuffalo/genny v0.1.1/go.mod h1:5TExbEyY48pfunL4QSXxlDOmdsD44RRq4mVZ0Ex28Xk=
github.com/gobuffalo/gitgen v0.0.0-20190315122116-cc086187d211/go.mod h1:vEHJk/E9DmhejeLeNt7UVvlSGv3ziL+djtTr3yyzcOw=
github.com/gobuffalo/gogen v0.0.0-20190315121717-8f38393713f5/go.mod h1:V9QVDIxsgKNZs6L2IYiGR8datgMhB577vzTDqypH360=
github.com/gobuffalo/gogen v0.1.0/go.mod h1:8NTelM5qd8RZ15VjQTFkAW6qOMx5wBbW4dSCS3BY8gg=
github.com/gobuffalo/gogen v0.1.1/go.mod h1:y8iBtmHmGc4qa3urIyo1shvOD8JftTtfcKi+71xfDNE=
github.com/gobuffalo/logger v0.0.0-20190315122211-86e12af44bc2/go.mod h1:QdxcLw541hSGtBnhUc4gaNIXRjiDppFGaDqzbrBd3v8=
github.com/gobuffalo/mapi v1.0.1/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc=
github.com/gobuffalo/mapi v1.0.2/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc=
github.com/gobuffalo/packd v0.0.0-20190315124812-a385830c7fc0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4=
github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4=
github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ=
github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0=
github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw=
github.com/gocql/gocql v0.0.0-20220224095938-0eacd3183625/go.mod h1:3gM2c4D3AnkISwBxGnMMsS8Oy4y2lhbPRsH4xnJrHG8=
github.com/gofiber/fiber/v2 v2.11.0/go.mod h1:oZTLWqYnqpMMuF922SjGbsYZsdpE1MCfh416HNdweIM=
github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.2.4 h1:CNNw5U8lSiiBk7druxtSHHTsRWcxKoac6kZKm2peBBc=
github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gomodule/redigo v1.7.0/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20210423192551-a2663126120b/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY=
github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/mux v1.5.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.4-0.20190701202633-d83b6ffe499a h1:Rhv8JUcDkZJkUmzzjpysRtn5joJ/3T8Lt9QpdJZUz1c=
github.com/gorilla/mux v1.7.4-0.20190701202633-d83b6ffe499a/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/graph-gophers/graphql-go v1.3.0/go.mod h1:9CQHMSxwO4MprSdzoIEobiHpoLtHm77vfxsvsIN5Vuc=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4=
github.com/hashicorp/consul/api v1.0.0/go.mod h1:mbFwfRxOTDHZpT3iUsMAFcLNoVm6Xbe1xZ6KiSm8FY0=
github.com/hashicorp/consul/internal v0.1.0/go.mod h1:zi9bMZYbiPHyAjgBWo7kCUcy5l2NrTdrkVupCc7Oo6c=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd/go.mod h1:9bjs9uLqI8l75knNv3lV1kA55veR+WUPSiKIWcQHudI=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-hclog v0.16.2/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-kms-wrapping/entropy v0.1.0/go.mod h1:d1g9WGtAunDNpek8jUIEJnBlbgKS1N2Q61QkHiZyR1g=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-plugin v1.0.1/go.mod h1:++UyYGoz3o5w9ZzAdZxtQKrWWP+iqPBn3cQptSMzBuY=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-retryablehttp v0.6.6/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
github.com/hashicorp/memberlist v0.1.6/go.mod h1:5VDNHjqFMgEcclnwmkCnC99IPwxBmIsxwY8qn+Nl0H4=
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
github.com/hashicorp/serf v0.8.6/go.mod h1:P/AVgr4UHsUYqVHG1y9eFhz8S35pqhGhLZaDpfGKIMo=
github.com/hashicorp/vault/api v1.1.0/go.mod h1:R3Umvhlxi2TN7Ex2hzOowyeNb+SfbVWI973N+ctaFMk=
github.com/hashicorp/vault/sdk v0.1.14-0.20200519221838-e0cfd64bc267/go.mod h1:WX57W2PwkrOPQ6rVQk+dy5/htHIaB4aBM70EwKThu10=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/huandu/xstrings v1.3.0 h1:gvV6jG9dTgFEncxo+AF7PH6MZXi/vZl25owA/8Dg8Wo=
github.com/huandu/xstrings v1.3.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/pgconn v0.0.0-20190420214824-7e0022ef6ba3/go.mod h1:jkELnwuX+w9qN5YIfX0fl88Ehu4XC3keFuOJJk9pcnA=
github.com/jackc/pgconn v0.0.0-20190824142844-760dd75542eb/go.mod h1:lLjNuW/+OfW9/pnVKPazfWOgNfH2aPem8YQ7ilXGvJE=
github.com/jackc/pgconn v0.0.0-20190831204454-2fabfa3c18b7/go.mod h1:ZJKsE/KZfsUgOEh9hBm+xYTstcNHg7UPMVJqRfQxq4s=
github.com/jackc/pgconn v1.4.0/go.mod h1:Y2O3ZDF0q4mMacyWV3AstPJpeHXWGEetiFttmq5lahk=
github.com/jackc/pgconn v1.5.0/go.mod h1:QeD3lBfpTFe8WUnPZWN5KY/mB8FGMIYRdd8P8Jr0fAI=
github.com/jackc/pgconn v1.5.1-0.20200601181101-fa742c524853/go.mod h1:QeD3lBfpTFe8WUnPZWN5KY/mB8FGMIYRdd8P8Jr0fAI=
github.com/jackc/pgconn v1.6.4/go.mod h1:w2pne1C2tZgP+TvjqLpOigGzNqjBgQW9dUw/4Chex78=
github.com/jackc/pgconn v1.8.0/go.mod h1:1C2Pb36bGIP9QHGBYCjnyhqu7Rv3sGshaQUvmfGIB/o=
github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8/2JY=
github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI=
github.com/jackc/pgconn v1.10.1/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI=
github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8=
github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE=
github.com/jackc/pgmock v0.0.0-20201204152224-4fe30f7445fd/go.mod h1:hrBW0Enj2AZTNpt/7Y5rr2xe/9Mn757Wtb2xeBzPv2c=
github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78=
github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA=
github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg=
github.com/jackc/pgproto3/v2 v2.0.0-rc3/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
github.com/jackc/pgproto3/v2 v2.0.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.0.2/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.2.0/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgservicefile v0.0.0-20200307190119-3430c5407db8/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E=
github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E=
github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg=
github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc=
github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw=
github.com/jackc/pgtype v1.2.0/go.mod h1:5m2OfMh1wTK7x+Fk952IDmI4nw3nPrvtQdM0ZT4WpC0=
github.com/jackc/pgtype v1.3.1-0.20200510190516-8cd94a14c75a/go.mod h1:vaogEUkALtxZMCH411K+tKzNpwzCKU+AnPzBKZ+I+Po=
github.com/jackc/pgtype v1.3.1-0.20200606141011-f6355165a91c/go.mod h1:cvk9Bgu/VzJ9/lxTO5R5sf80p0DiucVtN7ZxvaC4GmQ=
github.com/jackc/pgtype v1.4.2/go.mod h1:JCULISAZBFGrHaOXIIFiyfzW5VY0GRitRr8NeJsrdig=
github.com/jackc/pgtype v1.8.1-0.20210724151600-32e20a603178/go.mod h1:C516IlIV9NKqfsMCXTdChteoXmwgUceqaLfjg2e3NlM=
github.com/jackc/pgtype v1.9.0/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4=
github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y=
github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM=
github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc=
github.com/jackc/pgx/v4 v4.5.0/go.mod h1:EpAKPLdnTorwmPUUsqrPxy5fphV18j9q3wrfRXgo+kA=
github.com/jackc/pgx/v4 v4.6.1-0.20200510190926-94ba730bb1e9/go.mod h1:t3/cdRQl6fOLDxqtlyhe9UWgfIi9R8+8v8GKV5TRA/o=
github.com/jackc/pgx/v4 v4.6.1-0.20200606145419-4e5062306904/go.mod h1:ZDaNWkt9sW1JMiNn0kdYBaLelIhw7Pg4qd+Vk6tw7Hg=
github.com/jackc/pgx/v4 v4.8.1/go.mod h1:4HOLxrl8wToZJReD04/yB20GDwf4KBYETvlHciCnwW0=
github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs=
github.com/jackc/pgx/v4 v4.14.0/go.mod h1:jT3ibf/A0ZVCp89rtCIN0zCJxcE74ypROmHEZYsG/j8=
github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.2.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jinzhu/gorm v1.9.1/go.mod h1:Vla75njaFJ8clLU1W44h34PjIkijhjHIYnZxMqCdxqo=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jinzhu/now v1.1.3/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/jmoiron/sqlx v1.2.1-0.20190426154859-38398a30ed85 h1:M3C5MxZHP36CMRk0c0XWgtnixXDIEh8RE1cnnjCbjzw=
github.com/jmoiron/sqlx v1.2.1-0.20190426154859-38398a30ed85/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.1.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s=
github.com/labstack/echo/v4 v4.2.0/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg=
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
github.com/labstack/gommon v0.3.1/go.mod h1:uW6kP17uPlLJsD3ijUYn3/M5bAxtlZhMI6m3MFxTMTM=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.2 h1:AqzbZs4ZoCBp+GtejcpCpcxM3zlSMx29dXbUSeVtJb8=
github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20180730094502-03f2033d19d5/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/markbates/oncer v0.0.0-20181203154359-bf2de49a0be2/go.mod h1:Ld9puTsIW75CHf65OeIOkyKbteujpZVXDpWK6YGZbxE=
github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-sqlite3 v1.14.12 h1:TJ1bhYJPV44phC+IMu1u2K/i5RriLTPe+yc68XDJ1Z0=
github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.25/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v0.0.0-20171004221916-a61a99592b77/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg=
github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.3.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/mapstructure v1.4.2 h1:6h7AQ0yhTcIsmFmnAwQls75jp2Gzs4iB8W7pjMO+rqo=
github.com/mitchellh/mapstructure v1.4.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nu7hatch/gouuid v0.0.0-20131221200532-179d4d0c4d8d h1:VhgPp6v9qf9Agr/56bj7Y/xa04UccTW04VP0Qed4vnQ=
github.com/nu7hatch/gouuid v0.0.0-20131221200532-179d4d0c4d8d/go.mod h1:YUTz3bUH2ZwIWBy3CJBeOBEugqcmXREj14T+iG/4k4U=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.7.0 h1:7utD74fnzVc/cpcyy8sjrlFr5vYpypUixARcHIMIGuI=
github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pierrec/lz4 v0.0.0-20190327172049-315a67e90e41/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc=
github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM=
github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/cors v1.6.1-0.20190613161432-33ffc0734c60 h1:zjQeTJDXNmRPVGSsU1G3VErobzE1BwlmHuBqdyR4JgE=
github.com/rs/cors v1.6.1-0.20190613161432-33ffc0734c60/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/segmentio/kafka-go v0.4.29/go.mod h1:m1lXeqJtIFYZayv0shM/tjrAFljvWLTprxBHd+3PnaU=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.4.1-0.20190614151712-3349bd9cc288 h1:qWb7etNPDy3ShqmQ+e8YM+30P6D3/n+QUwrAwxWIfnk=
github.com/spf13/viper v1.4.1-0.20190614151712-3349bd9cc288/go.mod h1:LLu5zwCkRPEBY0VPcRMqh58VtcO8Lp1DgqwstU7rYlk=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/tidwall/btree v0.3.0/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=
github.com/tidwall/btree v1.1.0/go.mod h1:TzIRzen6yHbibdSfK6t8QimqbUnoxUSrZfeW7Uob0q4=
github.com/tidwall/buntdb v1.2.0/go.mod h1:XLza/dhlwzO6dc5o/KWor4kfZSt3BP8QV+77ZMKfI58=
github.com/tidwall/gjson v1.6.7/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI=
github.com/tidwall/gjson v1.6.8/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI=
github.com/tidwall/gjson v1.12.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/grect v0.1.0/go.mod h1:sa5O42oP6jWfTShL9ka6Sgmg3TgIK649veZe05B7+J8=
github.com/tidwall/grect v0.1.4/go.mod h1:9FBsaYRaR0Tcy4UwefBX/UDcDcDy9V5jUcxHzv2jd5Q=
github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/rtred v0.1.2/go.mod h1:hd69WNXQ5RP9vHd7dqekAz+RIdtfBogmglkZSRxCHFQ=
github.com/tidwall/tinyqueue v0.1.1/go.mod h1:O/QNHwrnjqr6IHItYrzoHAKYhBkLI67Q096fQP5zMYw=
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/twitchtv/twirp v8.1.1+incompatible/go.mod h1:RRJoFSAmTEh2weEqWtpPE3vFK5YBhA6bqp2l1kfCC5A=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.26.0/go.mod h1:cmWIqlu99AO/RKcp1HWaViTqc57FswJOfYYdPJBl8BA=
github.com/valyala/fasthttp v1.32.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ=
github.com/vmihailenco/msgpack/v4 v4.3.11/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
github.com/vmihailenco/msgpack/v5 v5.0.0-beta.1/go.mod h1:xlngVLeyQ/Qi05oQxhQ+oTuqa03RjMwMfk/7/TCs+QI=
github.com/vmihailenco/msgpack/v5 v5.3.4/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc=
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs=
github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f h1:mvXjJIHRZyhNuGassLTcXTwjiWq7NmjdavZsUnmFybQ=
github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
github.com/zenazn/goji v1.0.1/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.mongodb.org/mongo-driver v1.5.1/go.mod h1:gRXCHX4Jo7J0IJ1oDQyUxF7jfy19UfxniMS4xxMmUqw=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opentelemetry.io/otel v0.11.0/go.mod h1:G8UCk+KooF2HLkgo8RHX9epABH/aRGYET7gQOqBVdB0=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM=
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180910181607-0e37d006457b/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/crypto v0.0.0-20190418165655-df01cb2cc480/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/crypto v0.0.0-20190422162423-af44ce270edf/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/crypto v0.0.0-20190506204251-e1dfcc566284/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20200901203048-c4f52b2c50aa/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20200908183739-ae8ad444f925/go.mod h1:1phAWC201xIgDyaFpmDeZkgf70Q4Pd/CNqfRtVPtxNw=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211020060615-d418f374d309/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190412183630-56d357773e84/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190129075346-302c3dd5f1cc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190209173611-3b5209105503/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190419153524-e8e3143a4f4a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190531175056-4c3a928424d2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190329151228-23e29df326fe/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190416151739-9c9e1878f421/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190420181800-aa740d480789/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190531172133-b3315ee88b7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200527183253-8e7acdbce89d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.25.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20200528110217-3d3490e7e671/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
google.golang.org/genproto v0.0.0-20200726014623-da3ae01ef02d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/DataDog/dd-trace-go.v1 v1.38.0 h1:vm/mYIZCEp5j2MoKPmwM3t6EGthxpvVbCOm2hRl5uDc=
gopkg.in/DataDog/dd-trace-go.v1 v1.38.0/go.mod h1:GBhK4yaMJ1h329ivtKAqRNe1EZ944UnZwtz5lh7CnJc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/jinzhu/gorm.v1 v1.9.1/go.mod h1:56JJPUzbikvTVnoyP1nppSkbJ2L8sunqTBDY2fDrmFg=
gopkg.in/olivere/elastic.v3 v3.0.75/go.mod h1:yDEuSnrM51Pc8dM5ov7U8aI/ToR3PG0llA8aRv2qmw0=
gopkg.in/olivere/elastic.v5 v5.0.84/go.mod h1:LXF6q9XNBxpMqrcgax95C6xyARXWbbCXUrtTxrNrxJI=
gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637 h1:yiW+nvdHb9LVqSHQBXfZCieqV4fzYhNBql77zY0ykqs=
gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637/go.mod h1:BHsqpu/nsuzkT5BpiH1EMZPLyqSMM8JbIavyFACoFNk=
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/driver/mysql v1.0.1/go.mod h1:KtqSthtg55lFp3S5kUXqlGaelnWpKitn4k1xZTnoiPw=
gorm.io/driver/postgres v1.0.0/go.mod h1:wtMFcOzmuA5QigNsgEIb7O5lhvH1tHAF1RbWmLWV4to=
gorm.io/driver/sqlserver v1.0.4/go.mod h1:ciEo5btfITTBCj9BkoUVDvgQbUdLWQNqdFY5OGuGnRg=
gorm.io/gorm v1.9.19/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw=
gorm.io/gorm v1.20.0/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw=
gorm.io/gorm v1.20.6/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4 h1:UoveltGrhghAA7ePc+e+QYDHXrBps2PqFZiHkGR/xK8=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
k8s.io/api v0.17.0/go.mod h1:npsyOePkeP0CPwyGfXDHxvypiYMJxBWAMpQxCaJ4ZxI=
k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY=
k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA=
k8s.io/apimachinery v0.17.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg=
k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8=
k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns=
k8s.io/client-go v0.17.0/go.mod h1:TYgR6EUHs6k45hb6KWjVD6jFZvJV4gHDikv/It0xz+k=
k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE=
k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o=
k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
k8s.io/klog v0.3.0/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk=
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20191107075043-30be4d16710a/go.mod h1:1TqjTSzOxsLGIKfj0lK8EeCP7K1iUG65v09OM0/WG5E=
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
k8s.io/metrics v0.35.0 h1:xVFoqtAGm2dMNJAcB5TFZJPCen0uEqqNt52wW7ABbX8=
k8s.io/metrics v0.35.0/go.mod h1:g2Up4dcBygZi2kQSEQVDByFs+VUwepJMzzQLJJLpq4M=
k8s.io/utils v0.0.0-20191114184206-e782cd3c129f/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew=
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck=
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
mellium.im/sasl v0.2.1/go.mod h1:ROaEDLQNuf9vjKqE1SrAfnsobm2YKXT1gnN1uDp1PjQ=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=


================================================
FILE: log/event.go
================================================
package log

import (
	"errors"
	"github.com/stitchfix/flotilla-os/clients/httpclient"
	"log"
	"os"
	"time"
)

//
// EventSink interface
//
type EventSink interface {
	Receive(keyvals ...interface{}) error
}

//
// LocalEventSink - an implementation of EventSink that
// simply logs events to os.Stderr.
//
type LocalEventSink struct {
	logger *log.Logger
}

// New Logs local sink.
func NewLocalEventSink() *LocalEventSink {
	logger := log.New(os.Stderr, "[LocalEventSink] ",
		log.Ldate|log.Ltime|log.Lshortfile)

	return &LocalEventSink{logger}
}

// Receive Log events.
func (localSink *LocalEventSink) Receive(keyvals ...interface{}) error {
	log.Printf("\n%v\n", keyvals)
	return nil
}

//
// HTTPEventSink pushes arbitrary key-value
// events to an external location
//
type HTTPEventSink struct {
	path   string
	method string
	client httpclient.Client
}

//
// HTTPEvent represents an arbitrary key-value
// event
//
type HTTPEvent struct {
	Timestamp time.Time              `json:"timestamp"`
	Message   map[string]interface{} `json:"message"`
}

//
// NewHTTPSink initializes and returns an HTTPEventSink
//
func NewHTTPSink(host string, path string, method string) HTTPEventSink {
	return HTTPEventSink{

		path, method, httpclient.Client{Host: host},
	}
}

func (httpsink *HTTPEventSink) headers() map[string]string {
	return map[string]string{
		"Content-Type": "application/json",
	}
}

func (httpsink *HTTPEventSink) constructMessage(keyvals ...interface{}) (map[string]interface{}, error) {
	n := (len(keyvals) + 1) / 2
	m := make(map[string]interface{}, n)
	for i := 0; i < len(keyvals); i += 2 {
		k := keyvals[i]
		key, ok := k.(string)
		if !ok {
			return m, errors.New("Not all keys are strings")
		}
		var v interface{}
		if i+1 < len(keyvals) {
			v = keyvals[i+1]
		}
		m[key] = v
	}
	return m, nil
}

//
// Receive consumes an arbitrary set of keys and values (k1,v1,k2,v2,...),
// constructs an HTTPEvent from them, and sends them to the configured
// http endpoint using the configured method
//
func (httpsink *HTTPEventSink) Receive(keyvals ...interface{}) error {
	var err error
	var event HTTPEvent

	m, err := httpsink.constructMessage(keyvals...)
	if err != nil {
		return err
	}
	event.Message = m
	event.Timestamp = time.Now().UTC()

	var response interface{}

	return httpsink.client.Post(
		httpsink.method,
		httpsink.headers(),
		&event, &response)
}


================================================
FILE: log/event_test.go
================================================
package log

import (
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"
)

type TestDomainSpecificEvent struct {
	Timestamp time.Time
	Message   struct {
		A int `json: "a`
		B int `json: "b"`
	}
}

func TestHTTPEventSink_Receive(t *testing.T) {
	testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

		content := r.Header.Get("Content-Type")
		if content != "application/json" {
			t.Errorf("Expected Content-Type to eq %s got %s", "application/json", content)
		}

		e := TestDomainSpecificEvent{}
		err := json.NewDecoder(r.Body).Decode(&e)

		if err != nil {
			t.Errorf("Expected body to deserialize properly but got error %s", err.Error())
		}
	}))

	httpSink := NewHTTPSink(testServer.URL, "/", "POST")
	httpSink.Receive("a", 1, "b", 2)

	err := httpSink.Receive(1, "noway")
	if err == nil {
		t.Errorf("Expected message construction to fail with non-string keys")
	}
}


================================================
FILE: log/logger.go
================================================
package log

import "github.com/go-kit/kit/log"

//
// Logger interface, supports log messages and "events"
// where an event is an object that should get received
// by the configured EventSinks
//
type Logger interface {
	Log(keyvals ...interface{}) error
	Event(keyvals ...interface{}) error
}

type logger struct {
	wrapped log.Logger
	sinks   []EventSink
}

//
// NewLogger sets up and returns a Logger
//
func NewLogger(wrapped log.Logger, sinks []EventSink) Logger {
	return &logger{wrapped, sinks}
}

func (l *logger) Log(keyvals ...interface{}) error {
	return l.wrapped.Log(keyvals...)
}

//
// Event iterates through the configured EventSinks and
// sends the event to each one
//
func (l *logger) Event(keyvals ...interface{}) error {
	var err error
	if l.sinks != nil {
		for _, sink := range l.sinks {
			if err = sink.Receive(keyvals...); err != nil {
				_ = l.Log("level", "error", "message", "error sending event", "sink", sink, "error", err)
			}
		}
	}
	return err
}


================================================
FILE: log/logger_test.go
================================================
package log

import (
	"testing"
)

type testLogger struct {
	keyvals []interface{}
}

func (tl *testLogger) Log(keyvals ...interface{}) error {
	tl.keyvals = keyvals
	return nil
}

type testSink struct {
	keyvals []interface{}
}

func (ts *testSink) Receive(keyvals ...interface{}) error {
	ts.keyvals = keyvals
	return nil
}

func TestLogger_Log(t *testing.T) {
	tl := &testLogger{}
	l := NewLogger(tl, nil)

	// Verify that the wrapped logger's Log method gets called
	l.Log("message", "value")
	if len(tl.keyvals) != 2 {
		t.Errorf("Expected log message with 2 values, got %v", len(tl.keyvals))
	}

	m1 := tl.keyvals[0]
	m2 := tl.keyvals[1]
	if m1.(string) != "message" || m2.(string) != "value" {
		t.Errorf("Expected [message, value] but got %s", tl.keyvals)
	}
}

func TestLogger_Event(t *testing.T) {
	ts := &testSink{}
	tl := &testLogger{}
	l := NewLogger(tl, []EventSink{ts})

	// Verify that the wrapped logger's Log method gets called
	l.Event("important_event", "act_on_me")
	if len(ts.keyvals) != 2 {
		t.Errorf("Expected to recieve event with 2 values, got %v", len(ts.keyvals))
	}

	m1 := ts.keyvals[0]
	m2 := ts.keyvals[1]
	if m1.(string) != "important_event" || m2.(string) != "act_on_me" {
		t.Errorf("Expected [important_event, act_on_me] but got %s", ts.keyvals)
	}
}


================================================
FILE: main.go
================================================
package main

import (
	"fmt"
	gklog "github.com/go-kit/kit/log"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/clients/cluster"
	"github.com/stitchfix/flotilla-os/clients/logs"
	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/clients/middleware"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	"github.com/stitchfix/flotilla-os/flotilla"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
	"log"
	"os"
)

func main() {
	tracer.Start()
	defer tracer.Stop()
	args := os.Args
	if len(args) < 2 {
		fmt.Println("Usage: flotilla-os <conf_dir>")
		os.Exit(1)
	}

	//
	// Use go-kit for structured logging (JSON format for DataDog compatibility)
	//
	l := gklog.NewJSONLogger(gklog.NewSyncWriter(os.Stderr))
	l = gklog.With(l, "ts", gklog.DefaultTimestampUTC)
	eventSinks := []flotillaLog.EventSink{flotillaLog.NewLocalEventSink()}
	logger := flotillaLog.NewLogger(l, eventSinks)

	//
	// Wrap viper for configuration
	//
	confDir := args[1]
	c, err := config.NewConfig(&confDir)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize config"))
		os.Exit(1)
	}

	//
	// Instantiate metrics client.
	//
	if err = metrics.InstantiateClient(c); err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize metrics client"))
		os.Exit(1)
	}

	//
	// Get state manager for reading and writing
	// state about definitions and runs
	//
	stateManager, err := state.NewStateManager(c, logger)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize state manager"))
		os.Exit(1)
	}

	//
	// Get registry client for validating images
	//
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize registry client"))
		os.Exit(1)
	}

	//
	// Get cluster client for validating definitions
	// against execution clusters
	//
	eksClusterClient, err := cluster.NewClusterClient(c, state.EKSEngine)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize EKS cluster client"))
		//TODO
		//os.Exit(1)
	}

	eksLogsClient, err := logs.NewLogsClient(c, logger, state.EKSEngine)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize EKS logs client"))
		//TODO
		//os.Exit(1)
	}

	//
	// Get queue manager for queuing runs
	//
	eksQueueManager, err := queue.NewQueueManager(c, state.EKSEngine)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize eks queue manager"))
		os.Exit(1)
	}

	emrQueueManager, err := queue.NewQueueManager(c, state.EKSSparkEngine)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize eks queue manager"))
		os.Exit(1)
	}
	clusterManager, err := engine.NewDynamicClusterManager(
		c.GetString("aws_default_region"),
		logger,
		stateManager,
	)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize dynamic cluster manager"))
		os.Exit(1)
	}

	//
	// Get execution engine for interacting with backend
	// execution management framework (eg. EKS)
	//
	eksExecutionEngine, err := engine.NewExecutionEngine(c, eksQueueManager, state.EKSEngine, logger, clusterManager, stateManager)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize EKS execution engine"))
		os.Exit(1)
	}

	emrExecutionEngine, err := engine.NewExecutionEngine(c, eksQueueManager, state.EKSSparkEngine, logger, clusterManager, stateManager)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize EMR execution engine"))
		os.Exit(1)
	}
	middlewareClient, err := middleware.NewClient()
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize middleware client"))
		os.Exit(1)
	}
	app, err := flotilla.NewApp(c, logger, eksLogsClient, eksExecutionEngine, stateManager, eksClusterClient, eksQueueManager, emrExecutionEngine, emrQueueManager, middlewareClient, clusterManager)
	if err != nil {
		fmt.Printf("%+v\n", errors.Wrap(err, "unable to initialize app"))
		os.Exit(1)
	}

	log.Fatal(app.Run())
}


================================================
FILE: queue/manager.go
================================================
package queue

import (
	"context"
	"fmt"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
)

// Manager wraps operations on a queue
type Manager interface {
	Name() string
	QurlFor(name string, prefixed bool) (string, error)
	Initialize(config.Config, string) error
	Enqueue(ctx context.Context, qURL string, run state.Run) error
	ReceiveRun(ctx context.Context, qURL string) (RunReceipt, error)
	ReceiveStatus(qURL string) (StatusReceipt, error)
	ReceiveCloudTrail(qURL string) (state.CloudTrailS3File, error)
	ReceiveKubernetesEvent(qURL string) (state.KubernetesEvent, error)
	ReceiveEMREvent(qURL string) (state.EmrEvent, error)
	ReceiveKubernetesRun(queue string) (string, error)
	List() ([]string, error)
}

// RunReceipt wraps a Run and a callback to use
// when Run is finished processing
type RunReceipt struct {
	Run              *state.Run
	Done             func() error
	TraceID          uint64
	ParentID         uint64
	SamplingPriority int
}

// StatusReceipt wraps a StatusUpdate and a callback to use
// when StatusUpdate is finished applying
type StatusReceipt struct {
	StatusUpdate *string
	Done         func() error
}

// NewQueueManager returns the Manager configured via `queue_manager`
func NewQueueManager(conf config.Config, name string) (Manager, error) {
	switch name {
	case state.EKSEngine:
		sqsEKS := &SQSManager{}
		if err := sqsEKS.Initialize(conf, state.EKSEngine); err != nil {
			return nil, errors.Wrap(err, "problem initializing SQSManager")
		}
		return sqsEKS, nil
	case state.EKSSparkEngine:
		sqsEKSSpark := &SQSManager{}
		if err := sqsEKSSpark.Initialize(conf, state.EKSSparkEngine); err != nil {
			return nil, errors.Wrap(err, "problem initializing SQSManager")
		}
		return sqsEKSSpark, nil
	default:
		return nil, fmt.Errorf("no QueueManager named [%s] was found", name)
	}
}


================================================
FILE: queue/sqs_manager.go
================================================
package queue

import (
	"context"
	"encoding/json"
	"fmt"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/sqs"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/utils"
	awstrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/aws/aws-sdk-go/aws"
	"strconv"
)

// SQSManager - queue manager implementation for sqs
type SQSManager struct {
	namespace         string
	retentionSeconds  string
	visibilityTimeout string
	qc                sqsClient
	qurlCache         map[string]string
}

type sqsClient interface {
	GetQueueUrl(input *sqs.GetQueueUrlInput) (*sqs.GetQueueUrlOutput, error)
	CreateQueue(input *sqs.CreateQueueInput) (*sqs.CreateQueueOutput, error)
	ListQueues(input *sqs.ListQueuesInput) (*sqs.ListQueuesOutput, error)
	SendMessage(input *sqs.SendMessageInput) (*sqs.SendMessageOutput, error)
	ReceiveMessage(input *sqs.ReceiveMessageInput) (*sqs.ReceiveMessageOutput, error)
	DeleteMessage(input *sqs.DeleteMessageInput) (*sqs.DeleteMessageOutput, error)
}

// Name of queue manager - matches value in configuration
func (qm *SQSManager) Name() string {
	return "sqs"
}

// Initialize new sqs queue manager
func (qm *SQSManager) Initialize(conf config.Config, engine string) error {
	if !conf.IsSet("aws_default_region") {
		return errors.Errorf("SQSManager needs [aws_default_region] set in config")
	}

	qm.retentionSeconds = "604800"
	if conf.IsSet("queue_retention_seconds") {
		qm.retentionSeconds = conf.GetString("queue_retention_seconds")
	}

	qm.visibilityTimeout = "45"
	if conf.IsSet("queue_process_time") {
		qm.visibilityTimeout = conf.GetString("queue_process_time")
	}

	if !conf.IsSet("queue_namespace") {
		return errors.Errorf("SQSManager needs [queue_namespace] set in config")
	}

	qm.namespace = conf.GetString("queue_namespace")
	flotillaMode := conf.GetString("flotilla_mode")
	if flotillaMode != "test" {
		sess := awstrace.WrapSession(session.Must(session.NewSession(&aws.Config{
			Region: aws.String(conf.GetString("aws_default_region"))})))

		qm.qc = sqs.New(sess)
	}

	qm.qurlCache = make(map[string]string)
	return nil
}

// QurlFor returns the queue url that corresponds to the given name
// * if the queue does not exist it is created
func (qm *SQSManager) QurlFor(name string, prefixed bool) (string, error) {
	key := fmt.Sprintf("%s-%t", name, prefixed)
	val, ok := qm.qurlCache[key]
	if ok {
		return val, nil
	}

	val, err := qm.getOrCreateQueue(name, prefixed)
	if err == nil {
		qm.qurlCache[key] = val
	}
	return val, err
}

func (qm *SQSManager) getOrCreateQueue(name string, prefixed bool) (string, error) {
	qname := name
	if prefixed {
		qname = fmt.Sprintf("%s-%s", qm.namespace, name)
	}
	res, err := qm.qc.GetQueueUrl(&sqs.GetQueueUrlInput{
		QueueName: &qname,
	})
	if err != nil || res.QueueUrl == nil {
		cqi := sqs.CreateQueueInput{
			Attributes: map[string]*string{
				"MessageRetentionPeriod": &qm.retentionSeconds,
				"VisibilityTimeout":      &qm.visibilityTimeout,
			},
			QueueName: &qname,
		}
		createQueueResponse, err := qm.qc.CreateQueue(&cqi)
		if err != nil {
			return "", errors.Wrapf(err, "problem trying to create sqs queue with name [%s]", qname)
		}
		return *createQueueResponse.QueueUrl, nil
	}
	return *res.QueueUrl, nil
}

func (qm *SQSManager) messageFromRun(run state.Run) (*string, error) {
	jsonized, err := json.Marshal(run)
	if err != nil {
		return nil, errors.Wrapf(err, "problem trying to serialize run with id [%s] as json", run.RunID)
	}
	asString := string(jsonized)
	return &asString, nil
}

func (qm *SQSManager) runFromMessage(message *sqs.Message) (state.Run, error) {
	var run state.Run
	if message == nil {
		return run, errors.Errorf("can't generate Run from nil message")
	}

	body := message.Body
	if body == nil {
		return run, errors.Errorf("can't generate Run from empty message")
	}

	if err := json.Unmarshal([]byte(*body), &run); err != nil {
		errors.Wrapf(err, "problem trying to deserialize run from json [%s]", *body)
	}

	return run, nil
}

func (qm *SQSManager) statusFromMessage(message *sqs.Message) (string, error) {
	var statusUpdate string
	if message == nil {
		return statusUpdate, errors.Errorf("can't generate StatusUpdate from nil message")
	}

	body := message.Body
	if body == nil {
		return statusUpdate, errors.Errorf("can't generate StatusUpdate from empty message")
	}

	return *body, nil
}

// Enqueue queues run
func (qm *SQSManager) Enqueue(ctx context.Context, qURL string, run state.Run) error {
	if len(qURL) == 0 {
		return errors.Errorf("no queue url specified, can't enqueue")
	}
	ctx, span := utils.TraceJob(ctx, "flotilla.queue.sqs_enqueue", "")
	defer span.Finish()

	span.SetTag("job.run_id", run.RunID)
	span.SetTag("queue.url", qURL)

	message, err := qm.messageFromRun(run)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.WithStack(err)
	}

	sme := sqs.SendMessageInput{
		QueueUrl:    &qURL,
		MessageBody: message,
		MessageAttributes: map[string]*sqs.MessageAttributeValue{
			"dd-trace-id": {
				DataType:    aws.String("String"),
				StringValue: aws.String(fmt.Sprintf("%d", span.Context().TraceID())),
			},
			"dd-parent-id": {
				DataType:    aws.String("String"),
				StringValue: aws.String(fmt.Sprintf("%d", span.Context().SpanID())),
			},
			"dd-sampling-priority": {
				DataType:    aws.String("String"),
				StringValue: aws.String("1"),
			},
		},
	}

	_, err = qm.qc.SendMessage(&sme)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.Wrap(err, "problem sending sqs message")
	}
	return nil
}

// Receive receives a new run to operate on
func (qm *SQSManager) ReceiveRun(ctx context.Context, qURL string) (RunReceipt, error) {
	var receipt RunReceipt

	ctx, span := utils.TraceJob(ctx, "flotilla.queue.sqs_receive", "")
	defer span.Finish()
	span.SetTag("queue.url", qURL)

	if len(qURL) == 0 {
		return receipt, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
		MessageAttributeNames: []*string{
			aws.String("dd-trace-id"),
			aws.String("dd-parent-id"),
			aws.String("dd-sampling-priority"),
			aws.String("All"),
		},
	}

	var err error

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return receipt, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if len(response.Messages) == 0 {
		return receipt, nil
	}

	run, err := qm.runFromMessage(response.Messages[0])
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return receipt, errors.WithStack(err)
	}
	var traceID, parentID uint64
	var samplingPriority int
	if attr, exists := response.Messages[0].MessageAttributes["dd-trace-id"]; exists && attr.StringValue != nil {
		traceID, _ = strconv.ParseUint(*attr.StringValue, 10, 64)
	}
	if attr, exists := response.Messages[0].MessageAttributes["dd-parent-id"]; exists && attr.StringValue != nil {
		parentID, _ = strconv.ParseUint(*attr.StringValue, 10, 64)
	}
	if attr, exists := response.Messages[0].MessageAttributes["dd-sampling-priority"]; exists && attr.StringValue != nil {
		sp, _ := strconv.Atoi(*attr.StringValue)
		samplingPriority = sp
	}
	receipt.Run = &run
	receipt.Done = func() error {
		return qm.ack(qURL, response.Messages[0].ReceiptHandle)
	}
	receipt.TraceID = traceID
	receipt.ParentID = parentID
	receipt.SamplingPriority = samplingPriority
	return receipt, nil
}

func (qm *SQSManager) ReceiveStatus(qURL string) (StatusReceipt, error) {
	var receipt StatusReceipt

	if len(qURL) == 0 {
		return receipt, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
	}

	var err error

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		return receipt, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if len(response.Messages) == 0 {
		return receipt, nil
	}

	statusUpdate, err := qm.statusFromMessage(response.Messages[0])
	if err != nil {
		return receipt, errors.WithStack(err)
	}
	receipt.StatusUpdate = &statusUpdate
	receipt.Done = func() error {
		return qm.ack(qURL, response.Messages[0].ReceiptHandle)
	}
	return receipt, nil
}

func (qm *SQSManager) ReceiveCloudTrail(qURL string) (state.CloudTrailS3File, error) {
	var receipt state.CloudTrailS3File

	if len(qURL) == 0 {
		return receipt, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
	}

	var err error

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		return receipt, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if response != nil && response.Messages != nil && len(response.Messages) > 0 && response.Messages[0].Body != nil {
		body := response.Messages[0].Body

		err = json.Unmarshal([]byte(*body), &receipt)
		_ = qm.ack(qURL, response.Messages[0].ReceiptHandle)

	}
	return receipt, nil
}

func (qm *SQSManager) ReceiveEMREvent(qURL string) (state.EmrEvent, error) {
	var emrEvent state.EmrEvent

	if len(qURL) == 0 {
		return emrEvent, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
	}

	var err error

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		return emrEvent, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if response != nil && response.Messages != nil && len(response.Messages) > 0 && response.Messages[0].Body != nil {
		body := response.Messages[0].Body

		err = json.Unmarshal([]byte(*body), &emrEvent)
		emrEvent.Done = func() error {
			return qm.ack(qURL, response.Messages[0].ReceiptHandle)
		}

	}
	return emrEvent, nil
}

func (qm *SQSManager) ReceiveKubernetesEvent(qURL string) (state.KubernetesEvent, error) {
	var kubernetesEvent state.KubernetesEvent

	if len(qURL) == 0 {
		return kubernetesEvent, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
	}

	var err error

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		return kubernetesEvent, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if response != nil && response.Messages != nil && len(response.Messages) > 0 && response.Messages[0].Body != nil {
		body := response.Messages[0].Body

		err = json.Unmarshal([]byte(*body), &kubernetesEvent)
		kubernetesEvent.Done = func() error {
			return qm.ack(qURL, response.Messages[0].ReceiptHandle)
		}

	}
	return kubernetesEvent, nil
}

func (qm *SQSManager) ReceiveKubernetesRun(queue string) (string, error) {
	var runId string

	qURL, err := qm.QurlFor(queue, false)
	if len(qURL) == 0 || err != nil {
		return runId, errors.Errorf("no queue url specified, can't dequeue")
	}

	maxMessages := int64(1)
	visibilityTimeout := int64(45)
	rmi := sqs.ReceiveMessageInput{
		QueueUrl:            &qURL,
		MaxNumberOfMessages: &maxMessages,
		VisibilityTimeout:   &visibilityTimeout,
	}

	response, err := qm.qc.ReceiveMessage(&rmi)
	if err != nil {
		return runId, errors.Wrapf(err, "problem receiving sqs message from queue url [%s]", qURL)
	}

	if response != nil && response.Messages != nil && len(response.Messages) > 0 && response.Messages[0].Body != nil {
		_ = qm.ack(qURL, response.Messages[0].ReceiptHandle)
		return *response.Messages[0].Body, nil
	}

	return runId, errors.Wrapf(err, "no message")
}

// Ack acknowledges the receipt -AND- processing of the
// the message referred to by handle
func (qm *SQSManager) ack(qURL string, handle *string) error {
	if handle == nil {
		return errors.Errorf("cannot acknowledge message with nil receipt")
	}
	if len(*handle) == 0 {
		return errors.Errorf("cannot acknowledge message with empty receipt")
	}
	dmi := sqs.DeleteMessageInput{
		QueueUrl:      &qURL,
		ReceiptHandle: handle,
	}
	if _, err := qm.qc.DeleteMessage(&dmi); err != nil {
		return errors.Wrapf(
			err, "problem deleting sqs message with handle [%s] from queue url [%s]", *handle, qURL)
	}
	return nil
}

// List lists all the queue URLS available
func (qm *SQSManager) List() ([]string, error) {
	response, err := qm.qc.ListQueues(
		&sqs.ListQueuesInput{QueueNamePrefix: &qm.namespace})
	if err != nil {
		return nil, errors.Wrap(err, "problem listing sqs queues")
	}

	listed := make([]string, len(response.QueueUrls))
	for i, qurl := range response.QueueUrls {
		listed[i] = *qurl
	}
	return listed, nil
}


================================================
FILE: queue/sqs_manager_test.go
================================================
package queue

import (
	"context"
	"encoding/json"
	"errors"
	"github.com/aws/aws-sdk-go/service/sqs"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
	"testing"
)

type testSQSClient struct {
	t      *testing.T
	queues []*string
	calls  []string
}

func (qc *testSQSClient) GetQueueUrl(input *sqs.GetQueueUrlInput) (*sqs.GetQueueUrlOutput, error) {
	qc.calls = append(qc.calls, "GetQueueUrl")
	if input.QueueName == nil || len(*input.QueueName) == 0 {
		qc.t.Errorf("Expected non-nil and non empty QueueName")
	}

	if *input.QueueName == "qtest-nope" {
		return nil, errors.New("No queue here")
	}

	qurl := "cupcake"
	return &sqs.GetQueueUrlOutput{QueueUrl: &qurl}, nil
}

func (qc *testSQSClient) CreateQueue(input *sqs.CreateQueueInput) (*sqs.CreateQueueOutput, error) {
	qc.calls = append(qc.calls, "CreateQueue")
	if input.QueueName == nil || len(*input.QueueName) == 0 {
		qc.t.Errorf("Expected non-nil and non empty QueueName")
	}

	if _, ok := input.Attributes["MessageRetentionPeriod"]; !ok {
		qc.t.Errorf("Expected MessageRetentionPeriod in attributes")
	}

	if _, ok := input.Attributes["VisibilityTimeout"]; !ok {
		qc.t.Errorf("Expected VisibilityTimeout in attributes")
	}

	qurl := "nope"
	return &sqs.CreateQueueOutput{QueueUrl: &qurl}, nil
}

func (qc *testSQSClient) ListQueues(input *sqs.ListQueuesInput) (*sqs.ListQueuesOutput, error) {
	qc.calls = append(qc.calls, "ListQueues")
	if input.QueueNamePrefix == nil {
		qc.t.Errorf("Expected non-nil QueueNamePrefix")
	}

	if len(*input.QueueNamePrefix) == 0 {
		qc.t.Errorf("Expected non-empty QueueNamePrefix")
	}

	response := sqs.ListQueuesOutput{QueueUrls: qc.queues}
	return &response, nil
}

func (qc *testSQSClient) SendMessage(input *sqs.SendMessageInput) (*sqs.SendMessageOutput, error) {
	qc.calls = append(qc.calls, "SendMessage")
	if input.QueueUrl == nil {
		qc.t.Errorf("Expected non-nil QueueUrl")
	}

	if len(*input.QueueUrl) == 0 {
		qc.t.Errorf("Expected non-empty QueueUrl")
	}

	body := input.MessageBody
	if body == nil {
		qc.t.Errorf("Expected non-nil MessageBody")
	}
	var run state.Run
	var smo sqs.SendMessageOutput
	err := json.Unmarshal([]byte(*body), &run)
	if err != nil {
		qc.t.Errorf("Error deserializing MessageBody to Run, [%v]", err)
	}

	if len(run.RunID) == 0 {
		qc.t.Errorf("RunID of deserialized Run should not be empty")
	}
	return &smo, nil
}

func (qc *testSQSClient) ReceiveMessage(input *sqs.ReceiveMessageInput) (*sqs.ReceiveMessageOutput, error) {
	qc.calls = append(qc.calls, "ReceiveMessage")
	if input.VisibilityTimeout == nil {
		qc.t.Errorf("Expected non-nil VisibilityTimeout")
	}
	if input.MaxNumberOfMessages == nil {
		qc.t.Errorf("Expected non-nil MaxNumberOfMessages")
	}
	if *input.MaxNumberOfMessages != 1 {
		qc.t.Errorf("Expected MaxNumberOfMessages to be 1, was %v", *input.MaxNumberOfMessages)
	}
	if input.QueueUrl == nil {
		qc.t.Errorf("Expected non-nil QueueUrl")
	}
	if len(*input.QueueUrl) == 0 {
		qc.t.Errorf("Expected non-empty QueueUrl")
	}

	handle := "handle"
	asString := ""
	if *input.QueueUrl == "statusQ" {
		asString = `{"detail":{"taskArn":"sometaskarn","lastStatus":"STOPPED","version":17, "overrides":{"containerOverrides":[{"environment":[{"name":"FLOTILLA_SERVER_MODE","value":"prod"}]}]}}}`
	} else {
		jsonRun, _ := json.Marshal(state.Run{RunID: "cupcake"})
		asString = string(jsonRun)
	}

	msg := sqs.Message{
		ReceiptHandle: &handle,
		Body:          &asString,
	}
	rmo := sqs.ReceiveMessageOutput{
		Messages: []*sqs.Message{&msg},
	}
	return &rmo, nil
}

func (qc *testSQSClient) DeleteMessage(input *sqs.DeleteMessageInput) (*sqs.DeleteMessageOutput, error) {
	qc.calls = append(qc.calls, "DeleteMessage")
	if input.QueueUrl == nil {
		qc.t.Errorf("Expected non-nil QueueUrl")
	}
	if len(*input.QueueUrl) == 0 {
		qc.t.Errorf("Expected non-empty QueueUrl")
	}
	if input.ReceiptHandle == nil {
		qc.t.Errorf("Expected non-nil ReceiptHandle")
	}
	if len(*input.ReceiptHandle) == 0 {
		qc.t.Errorf("Expected non-empty ReceiptHandle")
	}
	return &sqs.DeleteMessageOutput{}, nil
}

func setUp(t *testing.T) SQSManager {
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)

	qm := SQSManager{}
	qm.Initialize(c, state.EKSEngine)
	qm.namespace = "qtest"

	qA := "A"
	qB := "B"
	qC := "C"
	qStatus := "statusQ"
	testClient := testSQSClient{
		t:      t,
		queues: []*string{&qA, &qB, &qC, &qStatus},
	}
	qm.qc = &testClient

	return qm
}

func TestSQSManager_List(t *testing.T) {
	qm := setUp(t)

	listed, _ := qm.List()
	if len(listed) != 4 {
		t.Errorf("Expected listed queues to be [4] but was %v", len(listed))
	}
}

func TestSQSManager_Enqueue(t *testing.T) {
	qm := setUp(t)

	var err error
	toQ := state.Run{
		RunID: "cupcake",
	}
	qm.Enqueue(context.Background(), "A", toQ)

	err = qm.Enqueue(context.Background(), "", toQ)
	if err == nil {
		t.Errorf("Expected empty queue url to result in error")
	}
}

func TestSQSManager_QurlFor(t *testing.T) {
	qm := setUp(t)

	testClient := testSQSClient{t: t}
	qm.qc = &testClient

	expectedCalls := map[string]bool{
		"GetQueueUrl": true,
	}
	qm.QurlFor("cupcake", true)

	if len(testClient.calls) != len(expectedCalls) {
		t.Errorf(
			"Expected exactly %v calls for existing queue, but was %v",
			len(expectedCalls), len(testClient.calls))
	}

	for _, call := range testClient.calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call for existing queue [%v]", call)
		}
	}

	testClient = testSQSClient{t: t}
	qm.qc = &testClient

	expectedCalls = map[string]bool{
		"GetQueueUrl": true,
		"CreateQueue": true,
	}
	qm.QurlFor("nope", true)

	if len(testClient.calls) != len(expectedCalls) {
		t.Errorf(
			"Expected exactly %v calls for non-existing queue, but was %v",
			len(expectedCalls), len(testClient.calls))
	}

	for _, call := range testClient.calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call for non-existing queue [%v]", call)
		}
	}
}

func TestSQSManager_ReceiveRun(t *testing.T) {
	qm := setUp(t)
	receipt, _ := qm.ReceiveRun(context.Background(), "A")
	receipt.Done()
}

func TestSQSManager_ReceiveStatus(t *testing.T) {
	qm := setUp(t)
	receipt, _ := qm.ReceiveStatus("statusQ")
	receipt.Done()
}


================================================
FILE: services/definition.go
================================================
package services

import (
	"context"
	"fmt"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/state"
	"strings"
)

//
// DefinitionService defines an interface for operations involving
// definitions
// * Like the ExecutionService, is an intermediary layer between state and the execution engine
//
type DefinitionService interface {
	Create(ctx context.Context, definition *state.Definition) (state.Definition, error)
	Get(ctx context.Context, definitionID string) (state.Definition, error)
	GetByAlias(ctx context.Context, alias string) (state.Definition, error)
	List(ctx context.Context, limit int, offset int, sortBy string,
		order string, filters map[string][]string,
		envFilters map[string]string) (state.DefinitionList, error)
	Update(ctx context.Context, definitionID string, updates state.Definition) (state.Definition, error)
	Delete(ctx context.Context, definitionID string) error

	// Metadata oriented
	ListGroups(ctx context.Context, limit int, offset int, name *string) (state.GroupsList, error)
	ListTags(ctx context.Context, limit int, offset int, name *string) (state.TagsList, error)
}

type definitionService struct {
	sm state.Manager
}

//
// NewDefinitionService configures and returns a DefinitionService
//
func NewDefinitionService(stateManager state.Manager) (DefinitionService, error) {
	ds := definitionService{sm: stateManager}
	return &ds, nil
}

//
// Create fully initialize and save the new definition
// * Allocates new definition id
// * Defines definition with execution engine
// * Stores definition using state manager
//
func (ds *definitionService) Create(ctx context.Context, definition *state.Definition) (state.Definition, error) {
	if valid, reasons := definition.IsValid(); !valid {
		return state.Definition{}, exceptions.MalformedInput{strings.Join(reasons, "\n")}
	}

	exists, err := ds.aliasExists(ctx, definition.Alias)
	if err != nil {
		return state.Definition{}, err
	}

	if exists {
		return state.Definition{}, exceptions.ConflictingResource{
			fmt.Sprintf("definition with alias [%s] aleady exists", definition.Alias)}
	}
	// Attach definition id here
	definitionID, err := state.NewDefinitionID(*definition)
	if err != nil {
		return state.Definition{}, err
	}
	definition.DefinitionID = definitionID
	return *definition, ds.sm.CreateDefinition(ctx, *definition)
}

func (ds *definitionService) aliasExists(ctx context.Context, alias string) (bool, error) {
	// Short circuit, to check if alias already exists
	dl, err := ds.sm.ListDefinitions(
		ctx, 1024, 0, "alias", "asc", map[string][]string{"alias": {alias}}, nil)

	if err != nil {
		return false, err
	}

	for _, def := range dl.Definitions {
		if def.Alias == alias {
			return true, nil
		}
	}
	return false, nil
}

//
// Get returns the definition specified by definitionID
//
func (ds *definitionService) Get(ctx context.Context, definitionID string) (state.Definition, error) {
	return ds.sm.GetDefinition(ctx, definitionID)
}

func (ds *definitionService) GetByAlias(ctx context.Context, alias string) (state.Definition, error) {
	return ds.sm.GetDefinitionByAlias(ctx, alias)
}

// List lists definitions
func (ds *definitionService) List(ctx context.Context, limit int, offset int, sortBy string,
	order string, filters map[string][]string,
	envFilters map[string]string) (state.DefinitionList, error) {
	return ds.sm.ListDefinitions(ctx, limit, offset, sortBy, order, filters, envFilters)
}

// UpdateStatus updates the definition specified by definitionID with the given updates
func (ds *definitionService) Update(ctx context.Context, definitionID string, updates state.Definition) (state.Definition, error) {
	definition, err := ds.sm.GetDefinition(ctx, definitionID)
	if err != nil {
		return definition, err
	}

	definition.UpdateWith(updates)
	return ds.sm.UpdateDefinition(ctx, definitionID, definition)
}

// Delete deletes and deregisters the definition specified by definitionID
func (ds *definitionService) Delete(ctx context.Context, definitionID string) error {
	return ds.sm.DeleteDefinition(ctx, definitionID)
}

func (ds *definitionService) ListGroups(ctx context.Context, limit int, offset int, name *string) (state.GroupsList, error) {
	return ds.sm.ListGroups(ctx, limit, offset, name)
}

func (ds *definitionService) ListTags(ctx context.Context, limit int, offset int, name *string) (state.TagsList, error) {
	return ds.sm.ListTags(ctx, limit, offset, name)
}


================================================
FILE: services/definition_test.go
================================================
package services

import (
	"context"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
	"testing"
)

func setUpDefinitionServiceTest(t *testing.T) (DefinitionService, *testutils.ImplementsAllTheThings) {
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"A": {DefinitionID: "A"},
			"B": {DefinitionID: "B"},
			"C": {DefinitionID: "C", ExecutableResources: state.ExecutableResources{Image: "invalidimage"}},
		},
		Runs: map[string]state.Run{
			"runA": {DefinitionID: "A", ClusterName: "A", GroupName: "A", RunID: "runA"},
			"runB": {DefinitionID: "B", ClusterName: "B", GroupName: "B", RunID: "runB"},
		},
		Qurls: map[string]string{
			"A": "a/",
			"B": "b/",
		},
	}
	ds, _ := NewDefinitionService(&imp)
	return ds, &imp
}

func TestDefinitionService_Create(t *testing.T) {
	ds, imp := setUpDefinitionServiceTest(t)
	// Check that new definition id
	// Check that define is called
	// Check that save is called and has the new definition id
	memory := int64(512)
	newValidDef := state.Definition{
		Alias:     "cupcake",
		GroupName: "group-cupcake",
		Command:   "echo 'hi'",
		ExecutableResources: state.ExecutableResources{
			Image:  "image:cupcake",
			Memory: &memory,
		},
	}

	created, _ := ds.Create(context.Background(), &newValidDef)
	if len(created.DefinitionID) == 0 {
		t.Errorf("Expected non-empty definition id")
	}

	// order matters
	expected := []string{"ListDefinitions", "CreateDefinition"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of create calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}

	// Check that the saved definition is the one with the id
	_, ok := imp.Definitions[created.DefinitionID]
	if !ok {
		t.Errorf("Expected that definition with id %s would be saved in state manager", created.DefinitionID)
	}
}

func TestDefinitionService_Create2(t *testing.T) {
	// Check that invalid definitions return errors
	ds, _ := setUpDefinitionServiceTest(t)
	var err error
	memory := int64(512)
	invalid4 := state.Definition{
		Alias:               "cupcake",
		GroupName:           "group-cupcake",
		ExecutableResources: state.ExecutableResources{Memory: &memory},
	}
	_, err = ds.Create(context.Background(), &invalid4)
	if err == nil {
		t.Errorf("Expected invalid definition with no image to result in error")
	}
}

func TestDefinitionService_Update(t *testing.T) {
	ds, imp := setUpDefinitionServiceTest(t)
	memory := int64(512)
	d := state.Definition{
		ExecutableResources: state.ExecutableResources{Memory: &memory},
	}
	ds.Update(context.Background(), "A", d)

	// order matters
	expected := []string{"GetDefinition", "UpdateDefinition"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of create calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}

func TestDefinitionService_Delete(t *testing.T) {
	ds, imp := setUpDefinitionServiceTest(t)
	ds.Delete(context.Background(), "A")

	// order matters
	expected := []string{"DeleteDefinition"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of create calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}


================================================
FILE: services/execution.go
================================================
package services

import (
	"context"
	"crypto/md5"
	"encoding/json"
	"errors"
	"fmt"
	"math/rand"
	"regexp"
	"slices"
	"strconv"
	"strings"
	"time"

	"github.com/stitchfix/flotilla-os/utils"

	"github.com/aws/aws-sdk-go/aws"

	"github.com/stitchfix/flotilla-os/clients/cluster"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/execution/engine"
	"github.com/stitchfix/flotilla-os/state"
)

// ExecutionService interacts with the state manager and queue manager to queue runs, and perform
// CRUD operations on them
// * Acts as an intermediary layer between state and the execution engine
type ExecutionService interface {
	CreateDefinitionRunByDefinitionID(ctx context.Context, definitionID string, req *state.DefinitionExecutionRequest) (state.Run, error)
	CreateDefinitionRunByAlias(ctx context.Context, alias string, req *state.DefinitionExecutionRequest) (state.Run, error)
	List(
		ctx context.Context,
		limit int,
		offset int,
		sortOrder string,
		sortField string,
		filters map[string][]string,
		envFilters map[string]string) (state.RunList, error)
	Get(ctx context.Context, runID string) (state.Run, error)
	UpdateStatus(ctx context.Context, runID string, status string, exitCode *int64, runExceptions *state.RunExceptions, exitReason *string) error
	Terminate(ctx context.Context, runID string, userInfo state.UserInfo) error
	ReservedVariables() []string
	ListClusters(ctx context.Context) ([]state.ClusterMetadata, error)
	GetDefaultCluster() string
	GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error)
	CreateTemplateRunByTemplateID(ctx context.Context, templateID string, req *state.TemplateExecutionRequest) (state.Run, error)
	CreateTemplateRunByTemplateName(ctx context.Context, templateName string, templateVersion string, req *state.TemplateExecutionRequest) (state.Run, error)
	UpdateClusterMetadata(ctx context.Context, cluster state.ClusterMetadata) error
	DeleteClusterMetadata(ctx context.Context, clusterID string) error
	GetClusterByID(ctx context.Context, clusterID string) (state.ClusterMetadata, error)
	GetRunStatus(ctx context.Context, runID string) (state.RunStatus, error)
}

type executionService struct {
	stateManager          state.Manager
	eksClusterClient      cluster.Client
	eksExecutionEngine    engine.Engine
	emrExecutionEngine    engine.Engine
	reservedEnv           map[string]func(run state.Run) string
	eksClusterOverride    string
	eksClusterDefault     string
	eksTierDefault        string
	eksGPUClusterOverride string
	eksGPUClusterDefault  string
	checkImageValidity    bool
	baseUri               string
	spotReAttemptOverride float32
	eksSpotOverride       bool
	spotThresholdMinutes  float64
	terminateJobChannel   chan state.TerminateJob
	validEksClusters      []string
	//validEksClusterTiers  string
}

func (es *executionService) GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.get_events", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	return es.eksExecutionEngine.GetEvents(ctx, run)
}

// NewExecutionService configures and returns an ExecutionService
func NewExecutionService(conf config.Config, eksExecutionEngine engine.Engine, sm state.Manager, eksClusterClient cluster.Client, emrExecutionEngine engine.Engine) (ExecutionService, error) {
	es := executionService{
		stateManager:       sm,
		eksClusterClient:   eksClusterClient,
		eksExecutionEngine: eksExecutionEngine,
		emrExecutionEngine: emrExecutionEngine,
	}
	//
	// Reserved environment variables dynamically generated
	// per run

	ownerKey := conf.GetString("owner_id_var")
	if len(ownerKey) == 0 {
		ownerKey = "FLOTILLA_RUN_OWNER_ID"
	}

	es.validEksClusters = strings.Split(conf.GetString("eks_clusters"), ",")
	for k, _ := range es.validEksClusters {
		es.validEksClusters[k] = strings.TrimSpace(es.validEksClusters[k])
	}
	es.eksClusterOverride = conf.GetString("eks_cluster_override")
	es.eksGPUClusterOverride = conf.GetString("eks_gpu_cluster_override")
	es.eksClusterDefault = conf.GetString("eks_cluster_default")
	es.eksGPUClusterDefault = conf.GetString("eks_gpu_cluster_default")
	es.eksTierDefault = conf.GetString("eks_tier_default")
	//es.validEksClusterTiers = conf.GetString("eks_cluster_tiers")

	if !slices.Contains(es.validEksClusters, es.eksClusterDefault) || !slices.Contains(es.validEksClusters, es.eksGPUClusterDefault) {
		return nil, fmt.Errorf("an invalid cluster has been set as a default\nvalid_clusters:%s\neks_cluster_default:%s\neks_gpu_cluster_default:%s", es.validEksClusters, es.eksClusterDefault, es.eksGPUClusterDefault)
	}

	if conf.IsSet("check_image_validity") {
		es.checkImageValidity = conf.GetBool("check_image_validity")
	} else {
		es.checkImageValidity = true
	}

	if conf.IsSet("base_uri") {
		es.baseUri = conf.GetString("base_uri")
	}

	if conf.IsSet("eks_spot_reattempt_override") {
		es.spotReAttemptOverride = float32(conf.GetFloat64("eks_spot_reattempt_override"))
	} else {
		// defaults to 5% override.
		es.spotReAttemptOverride = float32(0.05)
	}

	if conf.IsSet("eks_spot_override") {
		es.eksSpotOverride = conf.GetBool("eks_spot_override")
	} else {
		es.eksSpotOverride = false
	}

	if conf.IsSet("eks_spot_threshold_minutes") {
		es.spotThresholdMinutes = conf.GetFloat64("eks_spot_threshold_minutes")
	} else {
		es.spotThresholdMinutes = 30.0
	}

	es.reservedEnv = map[string]func(run state.Run) string{
		"FLOTILLA_SERVER_MODE": func(run state.Run) string {
			return conf.GetString("flotilla_mode")
		},
		"FLOTILLA_RUN_ID": func(run state.Run) string {
			return run.RunID
		},
		"AWS_ROLE_SESSION_NAME": func(run state.Run) string {
			return run.RunID
		},
		ownerKey: func(run state.Run) string {
			return run.User
		},
	}

	es.terminateJobChannel = make(chan state.TerminateJob, 100)
	return &es, nil
}

// ReservedVariables returns the list of reserved run environment variable
// names
func (es *executionService) ReservedVariables() []string {
	var keys []string
	for k := range es.reservedEnv {
		keys = append(keys, k)
	}
	return keys
}

// Create constructs and queues a new Run on the cluster specified.
func (es *executionService) CreateDefinitionRunByDefinitionID(ctx context.Context, definitionID string, req *state.DefinitionExecutionRequest) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.definition.create_run", "")
	defer span.Finish()
	span.SetTag("definition_id", definitionID)

	// Ensure definition exists
	definition, err := es.stateManager.GetDefinition(ctx, definitionID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return state.Run{}, err
	}
	return es.createFromDefinition(ctx, definition, req)
}

// Create constructs and queues a new Run on the cluster specified, based on an alias
func (es *executionService) CreateDefinitionRunByAlias(ctx context.Context, alias string, req *state.DefinitionExecutionRequest) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.alias.create_run", "")
	defer span.Finish()
	span.SetTag("alias", alias)

	// Ensure definition exists
	definition, err := es.stateManager.GetDefinitionByAlias(ctx, alias)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return state.Run{}, err
	}

	return es.createFromDefinition(ctx, definition, req)
}

func (es *executionService) createFromDefinition(ctx context.Context, definition state.Definition, req *state.DefinitionExecutionRequest) (state.Run, error) {
	var (
		run state.Run
		err error
	)
	ctx, span := utils.TraceJob(ctx, "flotilla.definition.create_run", run.RunID)
	defer span.Finish()

	fields := req.GetExecutionRequestCommon()
	rand.Seed(time.Now().Unix())

	/*
		cluster is set based on the following precedence (low to high):
			1. Cluster is passed in from request
			2. Cluster from cluster metadata and active
			3. Cluster from task definition
			3. Default cluster from config

		cluster is then checked for validity.

		if required, cluster overrides should be introduced and set here
	*/
	clusterMetadata, err := es.ListClusters(ctx)
	var activeClusters []string
	if len(clusterMetadata) > 0 {
		for _, cluster := range clusterMetadata {
			if cluster.Status == state.StatusActive {
				if es.clusterSupportsTier(cluster, req.Tier) {
					activeClusters = append(activeClusters, cluster.Name)
				}
			}
		}
	}

	if req.ClusterName != "" {
		fields.ClusterName = req.ClusterName
	} else if len(activeClusters) > 0 {
		fields.ClusterName = activeClusters[rand.Intn(len(activeClusters))]
	} else if definition.TargetCluster != "" {
		fields.ClusterName = definition.TargetCluster
	} else if fields.Gpu != nil && *fields.Gpu > 0 {
		fields.ClusterName = es.eksGPUClusterDefault
	} else {
		fields.ClusterName = es.eksClusterDefault
	}

	for _, c := range clusterMetadata {
		es.validEksClusters = append(es.validEksClusters, c.Name)
	}
	if !es.isClusterValid(fields.ClusterName) {
		return run, fmt.Errorf("%s was not found in the list of valid clusters: %s", fields.ClusterName, es.validEksClusters)
	}
	span.SetTag("clusterName", fields.ClusterName)
	run.User = req.OwnerID
	es.sanitizeExecutionRequestCommonFields(fields)
	// Construct run object with StatusQueued and new UUID4 run id
	run, err = es.constructRunFromDefinition(ctx, definition, req)
	if err != nil {
		return run, err
	}
	return es.createAndEnqueueRun(ctx, run)
}

func (es *executionService) constructRunFromDefinition(ctx context.Context, definition state.Definition, req *state.DefinitionExecutionRequest) (state.Run, error) {
	run, err := es.constructBaseRunFromExecutable(ctx, definition, req)

	if err != nil {
		return run, err
	}

	run.DefinitionID = definition.DefinitionID
	run.Alias = definition.Alias
	queuedAt := time.Now()
	run.QueuedAt = &queuedAt
	run.GroupName = definition.GroupName
	run.RequiresDocker = definition.RequiresDocker

	if req.Description != nil {
		run.Description = req.Description
	}

	if req.IdempotenceKey != nil {
		run.IdempotenceKey = req.IdempotenceKey
	}

	if req.Arch != nil {
		run.Arch = req.Arch
	}

	if req.Labels != nil {
		run.Labels = *req.Labels
	}
	return run, nil
}

func (es *executionService) constructBaseRunFromExecutable(ctx context.Context, executable state.Executable, req state.ExecutionRequest) (state.Run, error) {
	resources := executable.GetExecutableResources()
	fields := req.GetExecutionRequestCommon()
	var (
		run state.Run
		err error
	)

	fields.Engine = req.GetExecutionRequestCommon().Engine
	fields.Tier = es.resolveRequestTier(req.GetExecutionRequestCommon().Tier)
	// Compute the executable command based on the execution request. If the
	// execution request did not specify an overriding command, use the computed
	// `executableCmd` as the Run's Command.

	runID, err := state.NewRunID(fields.Engine)
	if err != nil {
		return run, err
	}

	if *fields.Engine == state.EKSEngine {
		executableCmd, err := executable.GetExecutableCommand(req)
		if err != nil {
			return run, err
		}

		if (fields.Command == nil || len(*fields.Command) == 0) && (len(executableCmd) > 0) {
			fields.Command = aws.String(executableCmd)
		}
		executableID := executable.GetExecutableID()

		taskExecutionMinutes, _ := es.stateManager.GetTaskHistoricalRuntime(ctx, *executableID, runID)
		reAttemptRate, _ := es.stateManager.GetPodReAttemptRate(ctx)
		if reAttemptRate >= es.spotReAttemptOverride &&
			fields.Engine != nil &&
			fields.NodeLifecycle != nil &&
			*fields.Engine == state.EKSEngine &&
			*fields.NodeLifecycle == state.SpotLifecycle {
			fields.NodeLifecycle = &state.OndemandLifecycle
		}

		if taskExecutionMinutes > float32(es.spotThresholdMinutes) {
			fields.NodeLifecycle = &state.OndemandLifecycle
		}
	}

	if *fields.Engine == state.EKSSparkEngine {
		if req.GetExecutionRequestCommon().SparkExtension == nil {
			return run, errors.New("spark_extension can't be nil, when using eks-spark engine type")
		}
		fields.SparkExtension = req.GetExecutionRequestCommon().SparkExtension
		reAttemptRate, _ := es.stateManager.GetPodReAttemptRate(ctx)
		if reAttemptRate >= es.spotReAttemptOverride {
			fields.NodeLifecycle = &state.OndemandLifecycle
		}
	}

	if fields.NodeLifecycle == nil {
		fields.NodeLifecycle = &state.SpotLifecycle
	}

	// Calculate command_hash from actual command (FIX for ARA bug)
	// This ensures jobs with different commands have different hashes,
	// even if they share the same description.
	if fields.Command != nil && len(*fields.Command) > 0 {
		// Regular EKS jobs: Hash the command
		fields.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*fields.Command))))
	} else if *fields.Engine == state.EKSSparkEngine && fields.Description != nil && len(*fields.Description) > 0 {
		// Spark jobs: Fall back to description (Spark jobs don't have commands)
		// The Spark "command" is in spark_extension, not the command field
		// Description uniquely identifies the Spark job type for ARA tracking
		fields.CommandHash = aws.String(fmt.Sprintf("%x", md5.Sum([]byte(*fields.Description))))
	}
	// If both command and description are NULL, command_hash remains NULL (malformed job)

	run = state.Run{
		RunID:                 runID,
		ClusterName:           fields.ClusterName,
		Image:                 resources.Image,
		Status:                state.StatusQueued,
		User:                  fields.OwnerID,
		Command:               fields.Command,
		Memory:                fields.Memory,
		Cpu:                   fields.Cpu,
		Gpu:                   fields.Gpu,
		Engine:                fields.Engine,
		NodeLifecycle:         fields.NodeLifecycle,
		EphemeralStorage:      fields.EphemeralStorage,
		ExecutableID:          executable.GetExecutableID(),
		ExecutableType:        executable.GetExecutableType(),
		ActiveDeadlineSeconds: fields.ActiveDeadlineSeconds,
		TaskType:              state.DefaultTaskType,
		SparkExtension:        fields.SparkExtension,
		CommandHash:           fields.CommandHash,
		ServiceAccount:        fields.ServiceAccount,
		Tier:                  fields.Tier,
	}

	if fields.Labels != nil {
		run.Labels = *fields.Labels
	}

	runEnv := es.constructEnviron(run, fields.Env)
	run.Env = &runEnv
	return run, nil
}

func (es *executionService) constructEnviron(run state.Run, env *state.EnvList) state.EnvList {
	size := len(es.reservedEnv)
	if env != nil {
		size += len(*env)
	}
	runEnv := make([]state.EnvVar, size)
	i := 0
	for k, f := range es.reservedEnv {
		runEnv[i] = state.EnvVar{
			Name:  k,
			Value: f(run),
		}
		i++
	}
	if env != nil {
		for j, e := range *env {
			runEnv[i+j] = e
		}
	}
	return state.EnvList(runEnv)
}

// List returns a list of Runs
// * validates definition_id and status filters
func (es *executionService) List(
	ctx context.Context,
	limit int,
	offset int,
	sortOrder string,
	sortField string,
	filters map[string][]string,
	envFilters map[string]string) (state.RunList, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.list_runs", "")
	defer span.Finish()
	span.SetTag("limit", limit)
	span.SetTag("offset", offset)

	// If definition_id is present in filters, validate its
	// existence first
	definitionID, ok := filters["definition_id"]
	if ok {
		_, err := es.stateManager.GetDefinition(ctx, definitionID[0])
		if err != nil {
			return state.RunList{}, err
		}
	}

	if statusFilters, ok := filters["status"]; ok {
		for _, status := range statusFilters {
			if !state.IsValidStatus(status) {
				// Status filter is invalid
				err := exceptions.MalformedInput{
					ErrorString: fmt.Sprintf("invalid status [%s]", status)}
				return state.RunList{}, err
			}
		}
	}
	return es.stateManager.ListRuns(ctx, limit, offset, sortField, sortOrder, filters, envFilters, []string{state.EKSEngine, state.EKSSparkEngine})
}

// Get returns the run with the given runID
func (es *executionService) Get(ctx context.Context, runID string) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.get_run", runID)
	defer span.Finish()
	span.SetTag("run_id", runID)
	run, err := es.stateManager.GetRun(ctx, runID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	}
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	}
	return run, err
}

// UpdateStatus is for supporting some legacy runs that still manually update their status
func (es *executionService) UpdateStatus(ctx context.Context, runID string, status string, exitCode *int64, runExceptions *state.RunExceptions, exitReason *string) error {
	ctx, span := utils.TraceJob(ctx, "flotilla.update_status", runID)
	defer span.Finish()
	span.SetTag("run_id", runID)
	span.SetTag("status", status)
	if !state.IsValidStatus(status) {
		return exceptions.MalformedInput{ErrorString: fmt.Sprintf("status %s is invalid", status)}
	}
	run, err := es.stateManager.GetRun(ctx, runID)
	if err != nil {
		return err
	}
	var startedAt *time.Time
	if run.StartedAt == nil {
		startedAt = run.QueuedAt
	} else {
		startedAt = run.StartedAt
	}
	finishedAt := time.Now()

	if exitReason == nil {
		extractedExitReason := es.extractExitReason(runExceptions)
		exitReason = &extractedExitReason
	}

	_, err = es.stateManager.UpdateRun(ctx, runID, state.Run{Status: status, ExitCode: exitCode, ExitReason: exitReason, RunExceptions: runExceptions, FinishedAt: &finishedAt, StartedAt: startedAt})
	return err
}

func (es *executionService) extractExitReason(runExceptions *state.RunExceptions) string {
	connectionError := regexp.MustCompile(`(?i).*(timeout|gatewayerror|socketerror|\s503\s|\s502\s|\s500\s|\s504\s|connectionerror).*`)
	pipError := regexp.MustCompile(`(?i).*(could\snot\sfind\sa\sversion|package\snot\sfound|ModuleNotFoundError|No\smatching\sdistribution\sfound).*`)
	yumError := regexp.MustCompile(`(?i).*(Nothing\sto\sdo).*`)
	gitError := regexp.MustCompile(`(?i).*(Could\snot\sread\sfrom\sremote\srepository|correct\saccess\srights|Repository\snot\sfound).*`)
	argumentError := regexp.MustCompile(`(?i).*(404|400|keyerror|column\smissing|RuntimeError).*`)
	syntaxError := regexp.MustCompile(`(?i).*(syntaxerror|typeerror|).*`)

	value, _ := json.Marshal(runExceptions)
	if value != nil {
		errorMsg := string(value)
		switch {
		case connectionError.MatchString(errorMsg):
			return "Connection error to downstream uri"
		case pipError.MatchString(errorMsg):
			return "Python pip package installation error"
		case yumError.MatchString(errorMsg):
			return "Yum installation error"
		case gitError.MatchString(errorMsg):
			return "Git clone error"
		case argumentError.MatchString(errorMsg):
			return "Data or argument error"
		case syntaxError.MatchString(errorMsg):
			return "Code or syntax error"
		default:
			return "Runtime exception encountered"
		}
	}
	return "Runtime exception encountered"
}

func (es *executionService) terminateWorker(jobChan <-chan state.TerminateJob) {
	ctx := context.Background()
	for job := range jobChan {
		runID := job.RunID
		userInfo := job.UserInfo
		ctx, span := utils.TraceJob(ctx, "flotilla.job.terminate_worker", runID)
		defer span.Finish()
		run, err := es.stateManager.GetRun(ctx, runID)
		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
			break
		}
		utils.TagJobRun(span, run)
		if err != nil {
			break
		}

		subRuns, err := es.stateManager.ListRuns(ctx, 1000, 0, "status", "desc", nil, map[string]string{"PARENT_FLOTILLA_RUN_ID": run.RunID}, state.Engines)
		if err == nil && subRuns.Total > 0 {
			for _, subRun := range subRuns.Runs {
				es.terminateJobChannel <- state.TerminateJob{
					RunID:    subRun.RunID,
					UserInfo: job.UserInfo,
				}
			}
		}

		if run.Engine == nil {
			run.Engine = &state.EKSEngine
		}

		if run.Status != state.StatusStopped {
			if *run.Engine == state.EKSSparkEngine {
				err = es.emrExecutionEngine.Terminate(ctx, run)
			} else {
				err = es.eksExecutionEngine.Terminate(ctx, run)
			}
			exitReason := "Task terminated by user"
			if len(userInfo.Email) > 0 {
				exitReason = fmt.Sprintf("Task terminated by - %s", userInfo.Email)
			}

			exitCode := int64(1)
			finishedAt := time.Now()
			_, err = es.stateManager.UpdateRun(ctx, run.RunID, state.Run{
				Status:     state.StatusStopped,
				ExitReason: &exitReason,
				ExitCode:   &exitCode,
				FinishedAt: &finishedAt,
			})
			break
		}
		break
	}
}

// Terminate stops the run with the given runID
func (es *executionService) Terminate(ctx context.Context, runID string, userInfo state.UserInfo) error {
	ctx, span := utils.TraceJob(ctx, "flotilla.terminate_run", runID)
	defer span.Finish()
	span.SetTag("run_id", runID)
	if userInfo.Email != "" {
		span.SetTag("user.email", userInfo.Email)
	}
	es.terminateJobChannel <- state.TerminateJob{RunID: runID, UserInfo: userInfo}
	go es.terminateWorker(es.terminateJobChannel)
	return nil
}

// ListClusters returns a list of all execution clusters available with their metadata
func (es *executionService) ListClusters(ctx context.Context) ([]state.ClusterMetadata, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.list_clusters", "")
	defer span.Finish()
	clusters, err := es.stateManager.ListClusterStates(ctx)
	if err != nil {
		return nil, err
	}
	return clusters, nil
}

func (es *executionService) GetDefaultCluster() string {
	return es.eksClusterDefault
}

// sanitizeExecutionRequestCommonFields does what its name implies - sanitizes
func (es *executionService) sanitizeExecutionRequestCommonFields(fields *state.ExecutionRequestCommon) {
	if fields.Engine == nil {
		fields.Engine = &state.EKSEngine
	}

	if es.eksSpotOverride {
		fields.NodeLifecycle = &state.OndemandLifecycle
	}
	if fields.ActiveDeadlineSeconds == nil {
		if fields.NodeLifecycle == &state.OndemandLifecycle {
			fields.ActiveDeadlineSeconds = &state.OndemandActiveDeadlineSeconds
		} else {
			fields.ActiveDeadlineSeconds = &state.SpotActiveDeadlineSeconds
		}
	}
}

// createAndEnqueueRun creates a run object in the DB, enqueues it, then
// updates the db's run object with a new `queued_at` field.
func (es *executionService) createAndEnqueueRun(ctx context.Context, run state.Run) (state.Run, error) {
	var err error
	ctx, span := utils.TraceJob(ctx, "flotilla.job.create_and_enqueue", "")
	defer span.Finish()
	span.SetTag("job.run_id", run.RunID)
	utils.TagJobRun(span, run)
	if run.IdempotenceKey != nil {
		priorRunId, err := es.stateManager.CheckIdempotenceKey(ctx, *run.IdempotenceKey)
		if err == nil && len(priorRunId) > 0 {
			priorRun, err := es.Get(ctx, priorRunId)
			if err == nil {
				return priorRun, nil
			}
		}
	}

	// Save run to source of state - it is *CRITICAL* to do this
	// -before- queuing to avoid processing unsaved runs
	if err = es.stateManager.CreateRun(ctx, run); err != nil {
		return run, err
	}

	if *run.Engine == state.EKSEngine {
		err = es.eksExecutionEngine.Enqueue(ctx, run)
	} else {
		err = es.emrExecutionEngine.Enqueue(ctx, run)
	}
	queuedAt := time.Now()

	if err != nil {
		return run, err
	}

	// UpdateStatus the run's QueuedAt field
	if run, err = es.stateManager.UpdateRun(ctx, run.RunID, state.Run{QueuedAt: &queuedAt}); err != nil {
		return run, err
	}
	return run, nil
}
func (es *executionService) CreateTemplateRunByTemplateName(ctx context.Context, templateName string, templateVersion string, req *state.TemplateExecutionRequest) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.template.create_run_by_name", "")
	defer span.Finish()
	span.SetTag("template_name", templateName)
	span.SetTag("template_version", templateVersion)
	version, err := strconv.Atoi(templateVersion)

	if err != nil {
		//use the "latest" template - version not a integer
		fetch, template, err := es.stateManager.GetLatestTemplateByTemplateName(ctx, templateName)
		if fetch && err == nil {
			return es.CreateTemplateRunByTemplateID(ctx, template.TemplateID, req)
		}
	} else {
		fetch, template, err := es.stateManager.GetTemplateByVersion(ctx, templateName, int64(version))
		if fetch && err == nil {
			return es.CreateTemplateRunByTemplateID(ctx, template.TemplateID, req)
		}
	}
	return state.Run{},
		errors.New(fmt.Sprintf("invalid template name or version, template_name: %s, template_version: %s", templateName, templateVersion))
}

// Create constructs and queues a new Run on the cluster specified.
func (es *executionService) CreateTemplateRunByTemplateID(ctx context.Context, templateID string, req *state.TemplateExecutionRequest) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.template.create_run_by_id", "")
	defer span.Finish()
	span.SetTag("template_id", templateID)
	// Ensure template exists
	template, err := es.stateManager.GetTemplateByID(ctx, templateID)
	if err != nil {
		return state.Run{}, err
	}

	return es.createFromTemplate(ctx, template, req)
}

func (es *executionService) createFromTemplate(ctx context.Context, template state.Template, req *state.TemplateExecutionRequest) (state.Run, error) {
	var (
		run state.Run
		err error
	)

	fields := req.GetExecutionRequestCommon()
	es.sanitizeExecutionRequestCommonFields(fields)

	// Construct run object with StatusQueued and new UUID4 run id
	run, err = es.constructRunFromTemplate(ctx, template, req)
	if err != nil {
		return run, err
	}
	if !req.DryRun {
		return es.createAndEnqueueRun(ctx, run)
	}
	return run, nil
}

func (es *executionService) constructRunFromTemplate(ctx context.Context, template state.Template, req *state.TemplateExecutionRequest) (state.Run, error) {
	run, err := es.constructBaseRunFromExecutable(ctx, template, req)

	if err != nil {
		return run, err
	}

	run.DefinitionID = template.TemplateID
	run.Alias = template.TemplateID
	run.GroupName = "template_group_name"
	run.ExecutionRequestCustom = req.GetExecutionRequestCustom()

	return run, nil
}

// resolveRequestTier returns the requested tier or default tier if empty
func (es *executionService) resolveRequestTier(requestedTier state.Tier) state.Tier {
	if requestedTier == "" {
		return state.Tier(es.eksTierDefault)
	}
	return requestedTier
}

// clusterSupportsTier checks if a cluster supports the specified tier
func (es *executionService) clusterSupportsTier(cluster state.ClusterMetadata, requestedTier state.Tier) bool {
	resolvedTier := es.resolveRequestTier(requestedTier)
	for _, allowedTier := range cluster.AllowedTiers {
		if allowedTier == string(resolvedTier) {
			return true
		}
	}

	return false
}

func (es *executionService) isClusterValid(clusterName string) bool {
	return slices.Contains(es.validEksClusters, clusterName)
}

func (es *executionService) UpdateClusterMetadata(ctx context.Context, cluster state.ClusterMetadata) error {
	ctx, span := utils.TraceJob(ctx, "flotilla.update_cluster_metadata", cluster.Name)
	defer span.Finish()
	span.SetTag("cluster_name", cluster.Name)
	return es.stateManager.UpdateClusterMetadata(ctx, cluster)
}

func (es *executionService) DeleteClusterMetadata(ctx context.Context, clusterID string) error {
	ctx, span := utils.TraceJob(ctx, "flotilla.delete_cluster_metadata", clusterID)
	defer span.Finish()
	span.SetTag("cluster_id", clusterID)
	return es.stateManager.DeleteClusterMetadata(ctx, clusterID)
}

func (es *executionService) GetClusterByID(ctx context.Context, clusterID string) (state.ClusterMetadata, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.get_cluster_by_id", clusterID)
	defer span.Finish()
	span.SetTag("cluster_id", clusterID)
	return es.stateManager.GetClusterByID(ctx, clusterID)
}

// GetRunStatus fetches only the essential status information for a run
func (es *executionService) GetRunStatus(ctx context.Context, runID string) (state.RunStatus, error) {
	ctx, span := utils.TraceJob(ctx, "flotilla.get_run_status", runID)
	defer span.Finish()
	span.SetTag("run_id", runID)
	return es.stateManager.GetRunStatus(ctx, runID)
}


================================================
FILE: services/execution_test.go
================================================
package services

import (
	"context"
	"crypto/md5"
	"fmt"
	"log"
	"testing"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
)

func setUp(t *testing.T) (ExecutionService, *testutils.ImplementsAllTheThings) {
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"A": {DefinitionID: "A", Alias: "aliasA"},
			"B": {DefinitionID: "B", Alias: "aliasB"},
			"C": {DefinitionID: "C", Alias: "aliasC", ExecutableResources: state.ExecutableResources{Image: "invalidimage"}},
		},
		Runs: map[string]state.Run{
			"runA": {DefinitionID: "A", ClusterName: "A", GroupName: "A", RunID: "runA"},
			"runB": {DefinitionID: "B", ClusterName: "B", GroupName: "B", RunID: "runB"},
		},
		Qurls: map[string]string{
			"A": "a/",
			"B": "b/",
		},
		ClusterStates: []state.ClusterMetadata{
			{Name: "cluster1", Status: state.StatusActive, StatusReason: "Active and healthy"},
			{Name: "cluster2", Status: state.StatusActive, StatusReason: "Active and healthy"},
		},
	}

	es, err := NewExecutionService(c, &imp, &imp, &imp, &imp)
	if err != nil {
		log.Fatalf("error seting up execution service: %s", err.Error())
	}
	return es, &imp
}

func TestExecutionService_CreateDefinitionRunByDefinitionID(t *testing.T) {
	ctx := context.Background()
	// Tests valid create
	es, imp := setUp(t)

	env := &state.EnvList{
		{Name: "K1", Value: "V1"},
	}

	expectedCalls := map[string]bool{
		"GetDefinition":            true,
		"CreateRun":                true,
		"UpdateRun":                true,
		"GetTaskHistoricalRuntime": true,
		"GetPodReAttemptRate":      true,
		"Enqueue":                  true,
		"ListClusterStates":        true,
	}

	cmd := "_test_cmd_"
	sa := "fooAccount"
	cpu := int64(512)
	engine := state.DefaultEngine
	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			ClusterName:      "clusta",
			Env:              env,
			OwnerID:          "somebody",
			Command:          &cmd,
			Memory:           nil,
			Cpu:              &cpu,
			Engine:           &engine,
			EphemeralStorage: nil,
			NodeLifecycle:    nil,
			IdempotenceKey:   nil,
			Arch:             nil,
			ServiceAccount:   &sa,
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "B", &req)
	if err != nil {
		t.Error(err.Error())
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls during run creation but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during run creation: %s", call)
		}
	}

	if len(run.RunID) == 0 {
		t.Errorf("Expected Create to populated run with non-empty RunID")
	}

	if run.DefinitionID != "B" {
		t.Errorf("Expected definitionID 'B' but was '%s'", run.DefinitionID)
	}

	if run.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status '%s' but was '%s'", state.StatusQueued, run.Status)
	}

	if run.User != "somebody" {
		t.Errorf("Expected new run to have user 'somebody' but was '%s'", run.User)
	}

	if run.QueuedAt == nil {
		t.Errorf("Expected new run to have a 'queued_at' field but was nil.")
	}

	if run.Env == nil {
		t.Errorf("Expected non-nil environment")
	}

	if len(*run.Env) != (len(es.ReservedVariables()) + len(*env)) {
		t.Errorf("Unexpected number of environment variables; expected %v but was %v",
			len(es.ReservedVariables())+len(*env), len(*run.Env))
	}

	if run.Command == nil {
		t.Errorf("Expected non-nil command")
	} else {
		if *run.Command != cmd {
			t.Errorf("Unexpected command, found [%s], exptecting [%s]", *run.Command, cmd)
		}
	}

	if run.Cpu == nil {
		t.Errorf("Expected non-nil cpu")
	} else {
		if *run.Cpu != cpu {
			t.Errorf("Unexpected cpu, found [%d], exptecting [%d]", *run.Cpu, cpu)
		}
	}

	if run.ServiceAccount == nil {
		t.Errorf("Expected non-nil service account")
	} else {
		if *run.ServiceAccount != sa {
			t.Errorf("Unexpected service account, found [%s], exptecting [%s]", *run.ServiceAccount, sa)
		}
	}
	includesExpected := false
	for _, e := range *run.Env {
		if e.Name == "K1" && e.Value == "V1" {
			includesExpected = true
		}
	}

	if !includesExpected {
		t.Errorf("Expected K1:V1 in run environment")
	}
}

func TestExecutionService_CreateDefinitionRunByAlias(t *testing.T) {
	ctx := context.Background()
	// Tests valid create
	es, imp := setUp(t)
	env := &state.EnvList{
		{Name: "K1", Value: "V1"},
	}
	expectedCalls := map[string]bool{
		"GetDefinitionByAlias":     true,
		"CreateRun":                true,
		"UpdateRun":                true,
		"GetTaskHistoricalRuntime": true,
		"GetPodReAttemptRate":      true,
		"Enqueue":                  true,
		"ListClusterStates":        true,
	}
	mem := int64(1024)
	engine := state.DefaultEngine
	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			ClusterName:      "",
			Env:              env,
			OwnerID:          "somebody",
			Command:          nil,
			Memory:           &mem,
			Cpu:              nil,
			Engine:           &engine,
			EphemeralStorage: nil,
			NodeLifecycle:    nil,
			IdempotenceKey:   nil,
			Arch:             nil,
		},
	}
	run, err := es.CreateDefinitionRunByAlias(ctx, "aliasB", &req)
	if err != nil {
		t.Error(err.Error())
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls during run creation but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during run creation: %s", call)
		}
	}

	if len(run.RunID) == 0 {
		t.Errorf("Expected Create to populated run with non-empty RunID")
	}

	if run.DefinitionID != "B" {
		t.Errorf("Expected definitionID 'B' but was '%s'", run.DefinitionID)
	}

	if run.Status != state.StatusQueued {
		t.Errorf("Expected new run to have status '%s' but was '%s'", state.StatusQueued, run.Status)
	}

	if run.User != "somebody" {
		t.Errorf("Expected new run to have user 'somebody' but was '%s'", run.User)
	}

	if run.QueuedAt == nil {
		t.Errorf("Expected new run to have a 'queued_at' field but was nil.")
	}

	if run.Env == nil {
		t.Errorf("Expected non-nil environment")
	}

	if len(*run.Env) != (len(es.ReservedVariables()) + len(*env)) {
		t.Errorf("Unexpected number of environment variables; expected %v but was %v",
			len(es.ReservedVariables())+len(*env), len(*run.Env))
	}

	if run.Memory == nil {
		t.Errorf("Expected non-nil memory")
	} else {
		if *run.Memory != mem {
			t.Errorf("Unexpected memory , found [%d], exptecting [%d]", *run.Memory, mem)
		}
	}

	includesExpected := false
	for _, e := range *run.Env {
		if e.Name == "K1" && e.Value == "V1" {
			includesExpected = true
		}
	}

	if !includesExpected {
		t.Errorf("Expected K1:V1 in run environment")
	}
}

func TestExecutionService_List(t *testing.T) {
	ctx := context.Background()
	es, imp := setUp(t)
	es.List(ctx, 1, 0, "asc", "cluster_name", nil, nil)

	expectedCalls := map[string]bool{
		"ListRuns": true,
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls during run list with no filters but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during run list with no filters: %s", call)
		}
	}
}

func TestExecutionService_List2(t *testing.T) {
	ctx := context.Background()
	es, imp := setUp(t)
	es.List(
		ctx, 1, 0,
		"asc", "cluster_name",
		map[string][]string{"definition_id": {"A"}}, nil)

	expectedCalls := map[string]bool{
		"GetDefinition": true,
		"ListRuns":      true,
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls during run list with no filters but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during run list with no filters: %s", call)
		}
	}
}
func TestExecutionService_ListClusters(t *testing.T) {
	ctx := context.Background()
	es, imp := setUp(t)

	clusters, err := es.ListClusters(ctx)
	if err != nil {
		t.Errorf("Expected no error listing clusters, got: %v", err)
	}

	expectedCalls := map[string]bool{
		"ListClusterStates": true,
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during cluster listing: %s", call)
		}
	}

	if len(clusters) != 2 {
		t.Errorf("Expected 2 clusters, got %d", len(clusters))
	}
}

func TestExecutionService_CreateDefinitionRunWithTier(t *testing.T) {
	ctx := context.Background()
	// Set up test environment
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)

	// Create mock implementation with clusters supporting different tiers
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"A": {DefinitionID: "A", Alias: "aliasA"},
		},
		Runs: map[string]state.Run{},
		Qurls: map[string]string{
			"A": "a/",
		},
		ClusterStates: []state.ClusterMetadata{
			{
				Name:         "prod-cluster",
				Status:       state.StatusActive,
				StatusReason: "Active and healthy",
				AllowedTiers: []string{"1", "2"},
			},
			{
				Name:         "staging-cluster",
				Status:       state.StatusActive,
				StatusReason: "Active and healthy",
				AllowedTiers: []string{"3", "4"},
			},
			{
				Name:         "string-cluster",
				Status:       state.StatusActive,
				StatusReason: "Active and healthy",
				AllowedTiers: []string{"tier3", "tier4"},
			},
			{
				Name:         "unrestricted-cluster",
				Status:       state.StatusActive,
				StatusReason: "Active and healthy",
				// No tiers specified - should use default tier
			},
			{
				Name:         "maintenance-cluster",
				Status:       state.StatusMaintenance,
				StatusReason: "In maintenance",
				AllowedTiers: []string{"1", "2", "3", "4"},
			},
		},
	}

	imp.GetRandomClusterName = func(clusters []string) string {
		if len(clusters) > 0 {
			return clusters[0]
		}
		return ""
	}

	es, err := NewExecutionService(c, &imp, &imp, &imp, &imp)
	if err != nil {
		t.Fatalf("Error setting up execution service: %s", err.Error())
	}

	// Test cases with different tiers
	testCases := []struct {
		name            string
		tier            string
		expectedCluster string
	}{
		{
			name:            "Production tier request",
			tier:            "1",
			expectedCluster: "prod-cluster",
		},
		{
			name:            "Staging tier request",
			tier:            "3",
			expectedCluster: "staging-cluster",
		},
		{
			name:            "No tier specified",
			tier:            "",
			expectedCluster: "staging-cluster",
		},
		{
			name:            "String Tier",
			tier:            "tier3",
			expectedCluster: "string-cluster",
		},
		{
			name:            "Invalid tier",
			tier:            "nonexistent",
			expectedCluster: es.GetDefaultCluster(),
		},
	}

	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			imp.Calls = make([]string, 0)
			cmd := "echo test"
			engine := state.DefaultEngine
			req := state.DefinitionExecutionRequest{
				ExecutionRequestCommon: &state.ExecutionRequestCommon{
					Tier:    state.Tier(tc.tier),
					Command: &cmd,
					OwnerID: "testuser",
					Engine:  &engine,
				},
			}

			run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
			if err != nil {
				t.Errorf("Error creating run: %s", err.Error())
				return
			}
			// Verify the selected cluster matches expectations
			if run.ClusterName != tc.expectedCluster {
				t.Errorf("Expected cluster %s for tier %s, but got %s",
					tc.expectedCluster, tc.tier, run.ClusterName)
			}

			// Verify tier was set correctly
			if string(run.Tier) != tc.tier && tc.tier != "" {
				t.Errorf("Expected tier %s, but got %s", tc.tier, string(run.Tier))
			}
		})
	}
}

func TestExecutionService_GetRunStatus(t *testing.T) {
	ctx := context.Background()
	es, imp := setUp(t)

	expectedCalls := map[string]bool{
		"GetRunStatus": true,
	}

	status, err := es.GetRunStatus(ctx, "runA")

	if err != nil {
		t.Errorf("Expected no error when getting status of existing run, got: %s", err.Error())
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls during status retrieval but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during status retrieval: %s", call)
		}
	}

	if status.RunID != "runA" {
		t.Errorf("Expected run ID 'runA' but got '%s'", status.RunID)
	}

	if status.DefinitionID != "A" {
		t.Errorf("Expected definition ID 'A' but got '%s'", status.DefinitionID)
	}

	if status.ClusterName != "A" {
		t.Errorf("Expected cluster name 'A' but got '%s'", status.ClusterName)
	}

	imp.Calls = []string{}

	_, err = es.GetRunStatus(ctx, "nonexistent")

	if err == nil {
		t.Errorf("Expected error when getting status of non-existent run, got nil")
	}

	expectedErrorString := "No run with ID: nonexistent"
	if err != nil && err.Error() != expectedErrorString {
		t.Errorf("Expected error message '%s', got '%s'", expectedErrorString, err.Error())
	}

}

func TestExecutionService_CommandHashCalculatedFromCommand(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that command_hash is MD5 of command, not description
	cmd := "python script.py --arg value"
	desc := "Different description"
	engine := state.DefaultEngine

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd,
			Description: &desc,
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Verify command_hash is MD5 of command
	expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(cmd)))
	if run.CommandHash == nil {
		t.Errorf("Expected non-nil command_hash")
	} else if *run.CommandHash != expectedHash {
		t.Errorf("Expected command_hash to be MD5 of command '%s', got '%s'", expectedHash, *run.CommandHash)
	}

	// Verify it's NOT MD5 of description
	descHash := fmt.Sprintf("%x", md5.Sum([]byte(desc)))
	if run.CommandHash != nil && *run.CommandHash == descHash {
		t.Errorf("command_hash should NOT be MD5 of description (that was the bug!)")
	}
}

func TestExecutionService_CommandHashWithSameDescriptionDifferentCommands(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that different commands get different hashes even with same description
	description := "Daily processing job"
	cmd1 := "python process.py --date 2025-01-01"
	cmd2 := "python process.py --date 2025-01-02"
	engine := state.DefaultEngine

	req1 := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd1,
			Description: &description,
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	req2 := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd2,
			Description: &description,
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	run1, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req1)
	if err != nil {
		t.Fatalf("Error creating run1: %s", err.Error())
	}

	run2, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req2)
	if err != nil {
		t.Fatalf("Error creating run2: %s", err.Error())
	}

	// Verify both have non-nil command_hash
	if run1.CommandHash == nil {
		t.Errorf("Expected run1 to have non-nil command_hash")
	}
	if run2.CommandHash == nil {
		t.Errorf("Expected run2 to have non-nil command_hash")
	}

	// Verify hashes are different (critical for ARA fix)
	if run1.CommandHash != nil && run2.CommandHash != nil {
		if *run1.CommandHash == *run2.CommandHash {
			t.Errorf("Different commands should have different hashes even with same description. "+
				"Both got hash '%s'. This was the ARA bug!", *run1.CommandHash)
		}
	}

	// Verify they match expected hashes
	expectedHash1 := fmt.Sprintf("%x", md5.Sum([]byte(cmd1)))
	expectedHash2 := fmt.Sprintf("%x", md5.Sum([]byte(cmd2)))

	if run1.CommandHash != nil && *run1.CommandHash != expectedHash1 {
		t.Errorf("run1 command_hash mismatch: expected '%s', got '%s'", expectedHash1, *run1.CommandHash)
	}
	if run2.CommandHash != nil && *run2.CommandHash != expectedHash2 {
		t.Errorf("run2 command_hash mismatch: expected '%s', got '%s'", expectedHash2, *run2.CommandHash)
	}
}

func TestExecutionService_CommandHashNullWhenCommandNull(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that NULL command results in NULL command_hash
	// (This is a malformed job, but should not crash)
	engine := state.DefaultEngine
	desc := "A description without a command"

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     nil, // NULL command
			Description: &desc,
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Command should be set from definition's command (if any)
	// But if definition also has no command, command_hash should be NULL
	if run.Command == nil || len(*run.Command) == 0 {
		// Command is NULL/empty, so command_hash should also be NULL
		if run.CommandHash != nil {
			t.Errorf("Expected NULL command_hash when command is NULL, got '%s'", *run.CommandHash)
		}
	}

	// Even if command gets set from definition, command_hash should NOT be from description
	if run.CommandHash != nil {
		descHash := fmt.Sprintf("%x", md5.Sum([]byte(desc)))
		if *run.CommandHash == descHash {
			t.Errorf("command_hash should NOT be MD5 of description (that was the bug!)")
		}
	}
}

func TestExecutionService_CommandHashMatchesCommand(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test with various command strings to ensure consistent hashing
	testCases := []struct {
		name    string
		command string
	}{
		{"Simple command", "echo hello"},
		{"Command with args", "python train.py --epochs 10 --lr 0.001"},
		{"Multi-line command", "set -e\necho 'Starting'\npython script.py\necho 'Done'"},
		{"Command with special chars", "grep -r 'pattern' /path/to/files | sort | uniq"},
	}

	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {
			engine := state.DefaultEngine
			cmd := tc.command

			req := state.DefinitionExecutionRequest{
				ExecutionRequestCommon: &state.ExecutionRequestCommon{
					Command: &cmd,
					OwnerID: "testuser",
					Engine:  &engine,
				},
			}

			run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
			if err != nil {
				t.Fatalf("Error creating run: %s", err.Error())
			}

			expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(tc.command)))
			if run.CommandHash == nil {
				t.Errorf("Expected non-nil command_hash for command: %s", tc.command)
			} else if *run.CommandHash != expectedHash {
				t.Errorf("command_hash mismatch for '%s': expected '%s', got '%s'",
					tc.command, expectedHash, *run.CommandHash)
			}
		})
	}
}

func TestExecutionService_CommandHashStableAcrossRuns(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Verify same command always produces same hash (consistency check)
	cmd := "python train.py --model resnet50"
	engine := state.DefaultEngine

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command: &cmd,
			OwnerID: "testuser",
			Engine:  &engine,
		},
	}

	// Create multiple runs with same command
	run1, err1 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	run2, err2 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	run3, err3 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)

	if err1 != nil || err2 != nil || err3 != nil {
		t.Fatalf("Error creating runs")
	}

	// All should have same command_hash
	if run1.CommandHash == nil || run2.CommandHash == nil || run3.CommandHash == nil {
		t.Errorf("All runs should have non-nil command_hash")
	}

	if *run1.CommandHash != *run2.CommandHash || *run1.CommandHash != *run3.CommandHash {
		t.Errorf("Same command should always produce same hash. Got: '%s', '%s', '%s'",
			*run1.CommandHash, *run2.CommandHash, *run3.CommandHash)
	}

	// Verify it matches expected
	expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(cmd)))
	if *run1.CommandHash != expectedHash {
		t.Errorf("Expected hash '%s', got '%s'", expectedHash, *run1.CommandHash)
	}
}

func TestExecutionService_CommandHashNotSetInEndpoints(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that even if description is provided, command_hash comes from command
	// This verifies the endpoints.go fix (removal of description-based hashing)
	cmd := "python app.py"
	desc := "This is a description"
	engine := state.DefaultEngine

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd,
			Description: &desc,
			CommandHash: nil, // Explicitly NULL to verify it gets calculated
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Should be MD5 of command, not description
	cmdHash := fmt.Sprintf("%x", md5.Sum([]byte(cmd)))
	descHash := fmt.Sprintf("%x", md5.Sum([]byte(desc)))

	if run.CommandHash == nil {
		t.Errorf("Expected command_hash to be calculated")
	} else {
		if *run.CommandHash == descHash {
			t.Errorf("BUG: command_hash is MD5 of description! This should have been fixed.")
		}
		if *run.CommandHash != cmdHash {
			t.Errorf("Expected command_hash to be MD5 of command '%s', got '%s'", cmdHash, *run.CommandHash)
		}
	}
}

func TestExecutionService_CommandHashWithOverride(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that if API client explicitly provides a command_hash, it gets overwritten
	// by the correct hash calculated from the command
	cmd := "python script.py"
	wrongHash := "this_is_wrong_hash"
	engine := state.DefaultEngine

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd,
			CommandHash: aws.String(wrongHash), // Wrong hash provided by client
			OwnerID:     "testuser",
			Engine:      &engine,
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Should be overwritten with correct hash
	expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(cmd)))
	if run.CommandHash == nil {
		t.Errorf("Expected non-nil command_hash")
	} else if *run.CommandHash == wrongHash {
		t.Errorf("BUG: Wrong hash was not overwritten! Still has '%s'", wrongHash)
	} else if *run.CommandHash != expectedHash {
		t.Errorf("Expected command_hash '%s', got '%s'", expectedHash, *run.CommandHash)
	}
}

func TestExecutionService_SparkCommandHashFromDescription(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that Spark jobs with NULL command get command_hash from description
	// Spark jobs don't have a command field - they store config in spark_extension
	desc := "Vmi Po Recon Data Extract / Run Snapshots"
	engine := state.EKSSparkEngine
	entryPoint := "s3://bucket/script.py"

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     nil, // Spark jobs have NULL command
			Description: &desc,
			OwnerID:     "testuser",
			Engine:      &engine,
			SparkExtension: &state.SparkExtension{
				SparkSubmitJobDriver: &state.SparkSubmitJobDriver{
					EntryPoint: &entryPoint,
				},
			},
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Should have command_hash from description (for Spark jobs)
	expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(desc)))
	if run.CommandHash == nil {
		t.Errorf("Expected non-nil command_hash for Spark job with description")
	} else if *run.CommandHash != expectedHash {
		t.Errorf("Expected Spark command_hash to be MD5 of description '%s', got '%s'", expectedHash, *run.CommandHash)
	}
}

func TestExecutionService_SparkCommandHashConsistent(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that Spark jobs with same description get same hash (critical for ARA)
	desc := "Vmi Po Recon Data Extract / Run Snapshots"
	engine := state.EKSSparkEngine
	entryPoint := "s3://bucket/script.py"

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     nil,
			Description: &desc,
			OwnerID:     "testuser",
			Engine:      &engine,
			SparkExtension: &state.SparkExtension{
				SparkSubmitJobDriver: &state.SparkSubmitJobDriver{
					EntryPoint: &entryPoint,
				},
			},
		},
	}

	// Create multiple Spark runs with same description
	run1, err1 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	run2, err2 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	run3, err3 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)

	if err1 != nil || err2 != nil || err3 != nil {
		t.Fatalf("Error creating Spark runs")
	}

	// All should have same command_hash for ARA tracking
	if run1.CommandHash == nil || run2.CommandHash == nil || run3.CommandHash == nil {
		t.Errorf("All Spark runs should have non-nil command_hash")
	}

	if *run1.CommandHash != *run2.CommandHash || *run1.CommandHash != *run3.CommandHash {
		t.Errorf("Spark jobs with same description should always produce same hash. Got: '%s', '%s', '%s'",
			*run1.CommandHash, *run2.CommandHash, *run3.CommandHash)
	}

	// Verify it matches expected
	expectedHash := fmt.Sprintf("%x", md5.Sum([]byte(desc)))
	if *run1.CommandHash != expectedHash {
		t.Errorf("Expected Spark hash '%s', got '%s'", expectedHash, *run1.CommandHash)
	}
}

func TestExecutionService_SparkVsRegularEKSHashing(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that Spark and regular EKS jobs use different hashing strategies
	// This ensures no cross-contamination between Spark and regular jobs
	description := "Process data files"
	cmd := "python process.py"
	entryPoint := "s3://bucket/script.py"

	// Regular EKS job
	regularEngine := state.DefaultEngine
	regularReq := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     &cmd,
			Description: &description,
			OwnerID:     "testuser",
			Engine:      &regularEngine,
		},
	}

	// Spark job
	sparkEngine := state.EKSSparkEngine
	sparkReq := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     nil, // Spark has no command
			Description: &description,
			OwnerID:     "testuser",
			Engine:      &sparkEngine,
			SparkExtension: &state.SparkExtension{
				SparkSubmitJobDriver: &state.SparkSubmitJobDriver{
					EntryPoint: &entryPoint,
				},
			},
		},
	}

	regularRun, err1 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &regularReq)
	sparkRun, err2 := es.CreateDefinitionRunByDefinitionID(ctx, "A", &sparkReq)

	if err1 != nil || err2 != nil {
		t.Fatalf("Error creating runs")
	}

	// Verify both have command_hash
	if regularRun.CommandHash == nil {
		t.Errorf("Regular EKS job should have command_hash")
	}
	if sparkRun.CommandHash == nil {
		t.Errorf("Spark job should have command_hash")
	}

	// Verify they use different hash sources
	cmdHash := fmt.Sprintf("%x", md5.Sum([]byte(cmd)))
	descHash := fmt.Sprintf("%x", md5.Sum([]byte(description)))

	if regularRun.CommandHash != nil && *regularRun.CommandHash != cmdHash {
		t.Errorf("Regular EKS job should hash from command, expected '%s', got '%s'", cmdHash, *regularRun.CommandHash)
	}

	if sparkRun.CommandHash != nil && *sparkRun.CommandHash != descHash {
		t.Errorf("Spark job should hash from description, expected '%s', got '%s'", descHash, *sparkRun.CommandHash)
	}

	// Most importantly: they should have DIFFERENT hashes (no cross-contamination)
	if regularRun.CommandHash != nil && sparkRun.CommandHash != nil {
		if *regularRun.CommandHash == *sparkRun.CommandHash {
			t.Errorf("Regular EKS and Spark jobs should have different hashes to prevent ARA cross-contamination. Both got '%s'", *regularRun.CommandHash)
		}
	}
}

func TestExecutionService_SparkNullDescriptionNullHash(t *testing.T) {
	ctx := context.Background()
	es, _ := setUp(t)

	// Test that Spark jobs with NULL command AND NULL description get NULL hash
	// (This is a malformed job, but should not crash)
	engine := state.EKSSparkEngine
	entryPoint := "s3://bucket/script.py"

	req := state.DefinitionExecutionRequest{
		ExecutionRequestCommon: &state.ExecutionRequestCommon{
			Command:     nil, // Spark has no command
			Description: nil, // Also no description (malformed)
			OwnerID:     "testuser",
			Engine:      &engine,
			SparkExtension: &state.SparkExtension{
				SparkSubmitJobDriver: &state.SparkSubmitJobDriver{
					EntryPoint: &entryPoint,
				},
			},
		},
	}

	run, err := es.CreateDefinitionRunByDefinitionID(ctx, "A", &req)
	if err != nil {
		t.Fatalf("Error creating run: %s", err.Error())
	}

	// Should have NULL command_hash (malformed job)
	if run.CommandHash != nil {
		t.Errorf("Expected NULL command_hash for Spark job with NULL description, got '%s'", *run.CommandHash)
	}
}


================================================
FILE: services/logs.go
================================================
package services

import (
	"context"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/stitchfix/flotilla-os/clients/logs"
	"github.com/stitchfix/flotilla-os/state"
	"net/http"
)

type LogService interface {
	Logs(runID string, lastSeen *string, role *string, facility *string) (string, *string, error)
	LogsText(runID string, w http.ResponseWriter) error
}

type logService struct {
	sm state.Manager
	lc logs.Client
}

// Initialize a Log service.
func NewLogService(sm state.Manager, lc logs.Client) (LogService, error) {
	return &logService{sm: sm, lc: lc}, nil
}

// Returns logs associated with a RunId
func (ls *logService) Logs(runID string, lastSeen *string, role *string, facility *string) (string, *string, error) {
	run, err := ls.sm.GetRun(context.Background(), runID)
	if err != nil {
		return "", nil, err
	}

	if run.Status != state.StatusRunning && run.Status != state.StatusStopped {
		// Won't have logs yet
		return "", aws.String(""), nil
	}

	if run.ExecutableType == nil {
		defaultExecutableType := state.ExecutableTypeDefinition
		run.ExecutableType = &defaultExecutableType
	}

	if run.ExecutableID == nil {
		run.ExecutableID = &run.DefinitionID
	}
	executable, err := ls.sm.GetExecutableByTypeAndID(context.Background(), *run.ExecutableType, *run.ExecutableID)

	return ls.lc.Logs(executable, run, lastSeen, role, facility)
}

// Returns all the logs as text associated with a runID (supported only for s3 logs).
func (ls *logService) LogsText(runID string, w http.ResponseWriter) error {
	run, err := ls.sm.GetRun(context.Background(), runID)
	if err != nil {
		return err
	}

	if run.Status != state.StatusRunning && run.Status != state.StatusStopped {
		// Won't have logs yet
		return nil
	}

	if run.ExecutableType == nil {
		defaultExecutableType := state.ExecutableTypeDefinition
		run.ExecutableType = &defaultExecutableType
	}
	if run.ExecutableID == nil {
		run.ExecutableID = &run.DefinitionID
	}
	executable, err := ls.sm.GetExecutableByTypeAndID(context.Background(), *run.ExecutableType, *run.ExecutableID)

	return ls.lc.LogsText(executable, run, w)
}


================================================
FILE: services/logs_test.go
================================================
package services

import (
	"testing"

	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
)

func setUpLogServiceTest(t *testing.T) (LogService, *testutils.ImplementsAllTheThings) {
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"B": {DefinitionID: "{}"},
		},
		Runs: map[string]state.Run{
			"isQueued": {DefinitionID: "q", RunID: "isQueued", Status: state.StatusQueued},
			"running":  {DefinitionID: "B", RunID: "running", Status: state.StatusRunning},
		},
	}
	ls, _ := NewLogService(&imp, &imp)
	return ls, &imp
}

func TestLogService_Logs(t *testing.T) {
	ls, imp := setUpLogServiceTest(t)

	//
	// Check that we don't try to get logs for runs that won't have them yet
	//

	expectedCalls := map[string]bool{
		"GetRun": true,
	}

	_, _, err := ls.Logs("isQueued", nil, nil, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls for log query for queued run but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during log query for queued run: %s", call)
		}
	}

	//
	// Check that we do get logs for runs that should have them
	//
	ls, imp = setUpLogServiceTest(t)
	expectedCalls = map[string]bool{
		"GetRun":                   true,
		"GetDefinition":            true,
		"Logs":                     true,
		"GetExecutableByTypeAndID": true,
	}

	_, _, err = ls.Logs("running", nil, nil, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if len(imp.Calls) != len(expectedCalls) {
		t.Errorf("Expected exactly %v calls for log query for running run but was: %v", len(expectedCalls), len(imp.Calls))
	}

	for _, call := range imp.Calls {
		_, ok := expectedCalls[call]
		if !ok {
			t.Errorf("Unexpected call during log query for running run: %s", call)
		}
	}
}


================================================
FILE: services/template.go
================================================
package services

import (
	"context"
	"reflect"
	"strings"

	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/state"
)

// TemplateService defines an interface for operations involving templates.
type TemplateService interface {
	GetByID(ctx context.Context, id string) (state.Template, error)
	GetLatestByName(ctx context.Context, templateName string) (bool, state.Template, error)
	List(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error)
	ListLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error)
	Create(ctx context.Context, tpl *state.CreateTemplateRequest) (state.CreateTemplateResponse, error)
}

type templateService struct {
	sm state.Manager
}

// NewTemplateService configures and returns a TemplateService.
func NewTemplateService(conf config.Config, sm state.Manager) (TemplateService, error) {
	ts := templateService{sm: sm}
	return &ts, nil
}

// Create fully initialize and save the new template.
func (ts *templateService) Create(ctx context.Context, req *state.CreateTemplateRequest) (state.CreateTemplateResponse, error) {
	res := state.CreateTemplateResponse{
		DidCreate: false,
		Template:  state.Template{},
	}
	curr, err := ts.constructTemplateFromCreateTemplateRequest(req)

	// 1. Check validity.
	if valid, reasons := curr.IsValid(); !valid {
		return res, exceptions.MalformedInput{ErrorString: strings.Join(reasons, "\n")}
	}

	// 2. Attach template id.
	templateID, err := state.NewTemplateID(curr)
	if err != nil {
		return res, err
	}
	curr.TemplateID = templateID

	// 3. Check if template name exists - if it does NOT, we will insert it into
	// the DB with a version number of 1. If it does, and if there are any
	// changed fields, then we will create a new row in the DB w/ the version
	// incremented by 1. If there are NO changed fields, then just return the
	// latest version.
	doesExist, prev, err := ts.sm.GetLatestTemplateByTemplateName(ctx, curr.TemplateName)

	if err != nil {
		return res, err
	}

	// No previous template with the same name; write it.
	if doesExist == false {
		curr.Version = 1
		res.Template = curr
		res.DidCreate = true
		return res, ts.sm.CreateTemplate(ctx, curr)
	}

	// Check if prev and curr are diff, if they are, write curr to DB (increment)
	// version number by 1. Otherwise, return prev.
	if ts.diff(prev, curr) == true {
		curr.Version = prev.Version + 1
		res.Template = curr
		res.DidCreate = true
		return res, ts.sm.CreateTemplate(ctx, curr)
	}

	res.Template = prev
	return res, nil
}

// Get returns the template specified by id.
func (ts *templateService) GetByID(ctx context.Context, id string) (state.Template, error) {
	return ts.sm.GetTemplateByID(ctx, id)
}

// Get returns the template specified by id.
func (ts *templateService) GetLatestByName(ctx context.Context, templateName string) (bool, state.Template, error) {
	return ts.sm.GetLatestTemplateByTemplateName(ctx, templateName)
}

// List lists templates.
func (ts *templateService) List(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	return ts.sm.ListTemplates(ctx, limit, offset, sortBy, order)
}

// List lists templates.
func (ts *templateService) ListLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	return ts.sm.ListTemplatesLatestOnly(ctx, limit, offset, sortBy, order)
}

// diff performs a diff between all fields (except for TemplateName and
// Version) of two templates.
func (ts *templateService) diff(prev state.Template, curr state.Template) bool {
	if prev.TemplateName != curr.TemplateName {
		return true
	}
	if prev.CommandTemplate != curr.CommandTemplate {
		return true
	}
	if prev.Image != curr.Image {
		return true
	}
	if *prev.Memory != *curr.Memory {
		return true
	}
	if *prev.Gpu != *curr.Gpu {
		return true
	}
	if *prev.Cpu != *curr.Cpu {
		return true
	}

	if prev.Env != nil && curr.Env != nil {
		prevEnv := *prev.Env
		currEnv := *curr.Env
		if len(prevEnv) != len(currEnv) {
			return true
		}

		for i, e := range prevEnv {
			if e != currEnv[i] {
				return true
			}
		}
	}
	if *prev.AdaptiveResourceAllocation != *curr.AdaptiveResourceAllocation {
		return true
	}

	if reflect.DeepEqual(prev.Defaults, curr.Defaults) == false {
		return true
	}

	if prev.AvatarURI != curr.AvatarURI {
		return true
	}

	if prev.Ports != nil && curr.Ports != nil {
		prevPorts := *prev.Ports
		currPorts := *curr.Ports
		if len(prevPorts) != len(currPorts) {
			return true
		}

		for i, e := range prevPorts {
			if e != currPorts[i] {
				return true
			}
		}
	}

	if prev.Tags != nil && curr.Tags != nil {
		prevTags := *prev.Tags
		currTags := *curr.Tags
		if len(prevTags) != len(currTags) {
			return true
		}

		for i, e := range prevTags {
			if e != currTags[i] {
				return true
			}
		}
	}

	if reflect.DeepEqual(prev.Schema, curr.Schema) == false {
		return true
	}

	return false
}

// constructTemplateFromCreateTemplateRequest takes a CreateTemplateRequest and
// dumps the requisite fields into a Template.
func (ts *templateService) constructTemplateFromCreateTemplateRequest(req *state.CreateTemplateRequest) (state.Template, error) {
	tpl := state.Template{}

	if len(req.TemplateName) > 0 {
		tpl.TemplateName = req.TemplateName
	}
	if req.Schema != nil {
		tpl.Schema = req.Schema
	}
	if len(req.CommandTemplate) > 0 {
		tpl.CommandTemplate = req.CommandTemplate
	}
	if len(req.Image) > 0 {
		tpl.Image = req.Image
	}
	if req.Memory != nil {
		tpl.Memory = req.Memory
	} else {
		tpl.Memory = &state.MinMem
	}

	if req.Gpu != nil {
		tpl.Gpu = req.Gpu
	}
	if req.Cpu != nil {
		tpl.Cpu = req.Cpu
	} else {
		tpl.Cpu = &state.MinCPU
	}
	if req.Env != nil {
		tpl.Env = req.Env
	}

	if req.AdaptiveResourceAllocation != nil {
		tpl.AdaptiveResourceAllocation = req.AdaptiveResourceAllocation
	} else {
		*tpl.AdaptiveResourceAllocation = true
	}

	if req.Ports != nil {
		tpl.Ports = req.Ports
	}
	if req.Tags != nil {
		tpl.Tags = req.Tags
	}
	if req.Defaults != nil {
		tpl.Defaults = req.Defaults
	} else {
		tpl.Defaults = state.TemplatePayload{}
	}
	if len(req.AvatarURI) > 0 {
		tpl.AvatarURI = req.AvatarURI
	} else {
		tpl.AvatarURI = ""
	}

	return tpl, nil
}


================================================
FILE: services/worker.go
================================================
package services

import (
	"context"
	"fmt"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/exceptions"
	"github.com/stitchfix/flotilla-os/state"
)

//
// WorkerService defines an interface for operations involving workers
//
type WorkerService interface {
	List(ctx context.Context, engine string) (state.WorkersList, error)
	Get(ctx context.Context, workerType string, engine string) (state.Worker, error)
	Update(ctx context.Context, workerType string, updates state.Worker) (state.Worker, error)
	BatchUpdate(ctx context.Context, updates []state.Worker) (state.WorkersList, error)
}

type workerService struct {
	sm state.Manager
}

//
// NewWorkerService configures and returns a WorkerService
//
func NewWorkerService(conf config.Config, sm state.Manager) (WorkerService, error) {
	ws := workerService{sm: sm}
	return &ws, nil
}

func (ws *workerService) List(ctx context.Context, engine string) (state.WorkersList, error) {
	return ws.sm.ListWorkers(ctx, engine)
}

func (ws *workerService) Get(ctx context.Context, workerType string, engine string) (state.Worker, error) {
	var w state.Worker
	if err := ws.validate(workerType); err != nil {
		return w, err
	}
	return ws.sm.GetWorker(ctx, workerType, engine)
}

func (ws *workerService) Update(ctx context.Context, workerType string, updates state.Worker) (state.Worker, error) {
	var w state.Worker
	if err := ws.validate(workerType); err != nil {
		return w, err
	}

	return ws.sm.UpdateWorker(ctx, workerType, updates)
}

func (ws *workerService) BatchUpdate(ctx context.Context, updates []state.Worker) (state.WorkersList, error) {
	var wl state.WorkersList
	for _, update := range updates {
		if err := ws.validate(update.WorkerType); err != nil {
			return wl, err
		}
	}
	return ws.sm.BatchUpdateWorkers(ctx, updates)
}

func (ws *workerService) validate(workerType string) error {
	if !state.IsValidWorkerType(workerType) {
		var validTypesList []string
		for validType := range state.WorkerTypes {
			validTypesList = append(validTypesList, validType)
		}
		return exceptions.MalformedInput{
			ErrorString: fmt.Sprintf(
				"Worker type: [%s] is not a valid worker type; valid types: %s",
				workerType, validTypesList)}
	}
	return nil
}


================================================
FILE: state/manager.go
================================================
package state

import (
	"context"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/log"
)

// Manager interface for CRUD operations
// on definitions and runs
type Manager interface {
	Name() string
	Initialize(conf config.Config) error
	Cleanup() error
	ListDefinitions(
		ctx context.Context,
		limit int, offset int, sortBy string,
		order string, filters map[string][]string,
		envFilters map[string]string) (DefinitionList, error)
	GetDefinition(ctx context.Context, definitionID string) (Definition, error)
	GetDefinitionByAlias(ctx context.Context, alias string) (Definition, error)
	UpdateDefinition(ctx context.Context, definitionID string, updates Definition) (Definition, error)
	CreateDefinition(ctx context.Context, d Definition) error
	DeleteDefinition(ctx context.Context, definitionID string) error

	ListRuns(ctx context.Context, limit int, offset int, sortBy string, order string, filters map[string][]string, envFilters map[string]string, engines []string) (RunList, error)
	EstimateRunResources(ctx context.Context, executableID string, commandHash string) (TaskResources, error)
	EstimateExecutorCount(ctx context.Context, executableID string, commandHash string) (int64, error)
	ExecutorOOM(ctx context.Context, executableID string, commandHash string) (bool, error)
	DriverOOM(ctx context.Context, executableID string, commandHash string) (bool, error)

	GetRun(ctx context.Context, runID string) (Run, error)
	CreateRun(ctx context.Context, r Run) error
	UpdateRun(ctx context.Context, runID string, updates Run) (Run, error)

	ListGroups(ctx context.Context, limit int, offset int, name *string) (GroupsList, error)
	ListTags(ctx context.Context, limit int, offset int, name *string) (TagsList, error)

	ListWorkers(ctx context.Context, engine string) (WorkersList, error)
	BatchUpdateWorkers(ctx context.Context, updates []Worker) (WorkersList, error)
	GetWorker(ctx context.Context, workerType string, engine string) (Worker, error)
	UpdateWorker(ctx context.Context, workerType string, updates Worker) (Worker, error)

	GetExecutableByTypeAndID(ctx context.Context, executableType ExecutableType, executableID string) (Executable, error)

	GetTemplateByID(ctx context.Context, templateID string) (Template, error)
	GetLatestTemplateByTemplateName(ctx context.Context, templateName string) (bool, Template, error)
	GetTemplateByVersion(ctx context.Context, templateName string, templateVersion int64) (bool, Template, error)
	ListTemplates(ctx context.Context, limit int, offset int, sortBy string, order string) (TemplateList, error)
	ListTemplatesLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (TemplateList, error)
	CreateTemplate(ctx context.Context, t Template) error

	ListFailingNodes(ctx context.Context) (NodeList, error)
	GetPodReAttemptRate(ctx context.Context) (float32, error)
	GetNodeLifecycle(ctx context.Context, executableID string, commandHash string) (string, error)
	GetTaskHistoricalRuntime(ctx context.Context, executableID string, runId string) (float32, error)
	CheckIdempotenceKey(ctx context.Context, idempotenceKey string) (string, error)

	GetRunByEMRJobId(ctx context.Context, emrJobId string) (Run, error)
	GetResources(ctx context.Context, runID string) (Run, error)
	ListClusterStates(ctx context.Context) ([]ClusterMetadata, error)
	UpdateClusterMetadata(ctx context.Context, cluster ClusterMetadata) error
	DeleteClusterMetadata(ctx context.Context, clusterID string) error
	GetClusterByID(ctx context.Context, clusterID string) (ClusterMetadata, error)
	GetRunStatus(ctx context.Context, runID string) (RunStatus, error)
}

// NewStateManager sets up and configures a new statemanager
// - if no `state_manager` is configured, will use postgres
func NewStateManager(conf config.Config, logger log.Logger) (Manager, error) {
	name := "postgres"
	if conf.IsSet("state_manager") {
		name = conf.GetString("state_manager")
	}

	switch name {
	case "postgres":
		pgm := &SQLStateManager{log: logger}
		err := pgm.Initialize(conf)
		if err != nil {
			return nil, errors.Wrap(err, "problem initializing SQLStateManager")
		}
		return pgm, nil
	default:
		return nil, errors.Errorf("state.Manager named [%s] not found", name)
	}
}


================================================
FILE: state/models.go
================================================
package state

import (
	"bytes"
	"database/sql"
	"encoding/json"
	"fmt"
	"os"
	"reflect"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"text/template"
	"time"

	"github.com/Masterminds/sprig"
	"github.com/aws/aws-sdk-go/aws"
	uuid "github.com/nu7hatch/gouuid"
	"github.com/pkg/errors"
	"github.com/xeipuuv/gojsonschema"
)

var EKSEngine = "eks"

var EKSSparkEngine = "eks-spark"

var DefaultEngine = EKSEngine

var DefaultTaskType = "task"

var MinCPU = int64(256)

var MaxCPU = int64(60000)

var MaxGPUCPU = int64(94000)

var MinMem = int64(512)

// var MaxMem = int64(248000)
var MaxMem = int64(350000) // increasing to 350 GB for #incident-616

var MaxGPUMem = int64(376000)

var MaxEphemeralStorage = int64(5000)

var TTLSecondsAfterFinished = int32(3600)

var SpotActiveDeadlineSeconds = int64(172800)

var OndemandActiveDeadlineSeconds = int64(604800)

var SpotLifecycle = "spot"

var OndemandLifecycle = "ondemand"

var DefaultLifecycle = SpotLifecycle

var NodeLifeCycles = []string{OndemandLifecycle, SpotLifecycle}

var Engines = []string{EKSEngine, EKSSparkEngine}

// StatusRunning indicates the run is running
var StatusRunning = "RUNNING"

// StatusQueued indicates the run is queued
var StatusQueued = "QUEUED"

// StatusNeedsRetry indicates the run failed for infra reasons and needs retried
var StatusNeedsRetry = "NEEDS_RETRY"

// StatusPending indicates the run has been allocated to a host and is in the process of launching
var StatusPending = "PENDING"

// StatusStopped means the run is finished
var StatusStopped = "STOPPED"

var MaxLogLines = int64(256)

var EKSBackoffLimit = int32(0)

var GPUNodeTypes = []string{"p3.2xlarge", "p3.8xlarge", "p3.16xlarge", "g5.xlarge", "g5.2xlarge", "g5.4xlarge", "g5.8xlarge", "g5.12xlarge", "g5.16xlarge", "g5.24xlarge", "g5.48xlarge"}

var WorkerTypes = map[string]bool{
	"retry":  true,
	"submit": true,
	"status": true,
}

func IsValidWorkerType(workerType string) bool {
	return WorkerTypes[workerType]
}

// IsValidStatus checks that the given status
// string is one of the valid statuses
func IsValidStatus(status string) bool {
	return status == StatusRunning ||
		status == StatusQueued ||
		status == StatusNeedsRetry ||
		status == StatusPending ||
		status == StatusStopped
}

// NewRunID returns a new uuid for a Run
func NewRunID(engine *string) (string, error) {
	s, err := newUUIDv4()
	return fmt.Sprintf("%s-%s", *engine, s[len(*engine)+1:]), err
}

// NewDefinitionID returns a new uuid for a Definition
func NewDefinitionID(definition Definition) (string, error) {
	uuid4, err := newUUIDv4()
	if err != nil {
		return "", err
	}
	return fmt.Sprintf("%s-%s", definition.GroupName, uuid4), nil
}

func newUUIDv4() (string, error) {
	u, err := uuid.NewV4()
	if err != nil {
		return "", err
	}
	return u.String(), nil
}

// EnvList wraps a list of EnvVar
//   - abstraction to make it easier to read
//     and write to db
type EnvList []EnvVar

// PortsList wraps a list of int
//   - abstraction to make it easier to read
//     and write to db
type PortsList []int

// EnvVar represents a single environment variable
// for either a definition or a run
type EnvVar struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

type NodeList []string

// Tags wraps a list of strings
//   - abstraction to make it easier to read
//     and write to db
type Tags []string

// ExecutableResources define the resources and flags required to run an
// executable.
type ExecutableResources struct {
	Image                      string     `json:"image"`
	Memory                     *int64     `json:"memory,omitempty"`
	Gpu                        *int64     `json:"gpu,omitempty"`
	Cpu                        *int64     `json:"cpu,omitempty"`
	EphemeralStorage           *int64     `json:"ephemeral_storage,omitempty" db:"ephemeral_storage"`
	Env                        *EnvList   `json:"env"`
	AdaptiveResourceAllocation *bool      `json:"adaptive_resource_allocation,omitempty"`
	Ports                      *PortsList `json:"ports,omitempty"`
	Tags                       *Tags      `json:"tags,omitempty"`
}

type ExecutableType string

const (
	ExecutableTypeDefinition ExecutableType = "task_definition"
	ExecutableTypeTemplate   ExecutableType = "template"
)

type Executable interface {
	GetExecutableID() *string
	GetExecutableType() *ExecutableType
	GetExecutableResources() *ExecutableResources
	GetExecutableCommand(req ExecutionRequest) (string, error)
	GetExecutableResourceName() string // This will typically be an ARN.
}

func UnmarshalSparkExtension(data []byte) (SparkExtension, error) {
	var r SparkExtension
	err := json.Unmarshal(data, &r)
	return r, err
}

func (r *SparkExtension) Marshal() ([]byte, error) {
	return json.Marshal(r)
}

type SparkExtension struct {
	SparkSubmitJobDriver *SparkSubmitJobDriver `json:"spark_submit_job_driver,omitempty"`
	ApplicationConf      []Conf                `json:"application_conf,omitempty"`
	HiveConf             []Conf                `json:"hive_conf,omitempty"`
	EMRJobId             *string               `json:"emr_job_id,omitempty"`
	SparkAppId           *string               `json:"spark_app_id,omitempty"`
	EMRJobManifest       *string               `json:"emr_job_manifest,omitempty"`
	HistoryUri           *string               `json:"history_uri,omitempty"`
	MetricsUri           *string               `json:"metrics_uri,omitempty"`
	VirtualClusterId     *string               `json:"virtual_cluster_id,omitempty"`
	EMRReleaseLabel      *string               `json:"emr_release_label,omitempty"`
	ExecutorInitCommand  *string               `json:"executor_init_command,omitempty"`
	DriverInitCommand    *string               `json:"driver_init_command,omitempty"`
	SparkServerURI       *string               `json:"spark_server_uri,omitempty"`
	AppUri               *string               `json:"app_uri,omitempty"`
	Executors            []string              `json:"executors,omitempty"`
	ExecutorOOM          *bool                 `json:"executor_oom,omitempty"`
	DriverOOM            *bool                 `json:"driver_oom,omitempty"`
}

type Conf struct {
	Name  *string `json:"name,omitempty"`
	Value *string `json:"value,omitempty"`
}

type SparkSubmitJobDriver struct {
	EntryPoint          *string   `json:"entry_point,omitempty"`
	EntryPointArguments []*string `json:"entry_point_arguments,omitempty"`
	SparkSubmitConf     []Conf    `json:"spark_submit_conf,omitempty"`
	Files               []string  `json:"files,omitempty"`
	PyFiles             []string  `json:"py_files,omitempty"`
	Jars                []string  `json:"jars,omitempty"`
	Class               *string   `json:"class,omitempty"`
	WorkingDir          *string   `json:"working_dir,omitempty"`
	NumExecutors        *int64    `json:"num_executors,omitempty"`
	ExecutorMemory      *int64    `json:"executor_memory,omitempty"`
}

type Labels map[string]string

// Common fields required to execute any Executable.
type ExecutionRequestCommon struct {
	ClusterName           string          `json:"cluster_name"`
	Tier                  Tier            `json:"tier"`
	Env                   *EnvList        `json:"env"`
	OwnerID               string          `json:"owner_id"`
	Command               *string         `json:"command"`
	Memory                *int64          `json:"memory"`
	Cpu                   *int64          `json:"cpu"`
	Gpu                   *int64          `json:"gpu"`
	Engine                *string         `json:"engine"`
	EphemeralStorage      *int64          `json:"ephemeral_storage"`
	NodeLifecycle         *string         `json:"node_lifecycle"`
	ActiveDeadlineSeconds *int64          `json:"active_deadline_seconds,omitempty"`
	SparkExtension        *SparkExtension `json:"spark_extension,omitempty"`
	Description           *string         `json:"description,omitempty"`
	CommandHash           *string         `json:"command_hash,omitempty"`
	IdempotenceKey        *string         `json:"idempotence_key,omitempty"`
	Arch                  *string         `json:"arch,omitempty"`
	Labels                *Labels         `json:"labels,omitempty"`
	ServiceAccount        *string         `json:"service_account,omitempty"`
}

type ExecutionRequestCustom map[string]interface{}
type ExecutionRequest interface {
	GetExecutionRequestCommon() *ExecutionRequestCommon
	GetExecutionRequestCustom() *ExecutionRequestCustom
}

type DefinitionExecutionRequest struct {
	*ExecutionRequestCommon
}

// Returns ExecutionRequestCommon, common between Template and Definition types
func (d *DefinitionExecutionRequest) GetExecutionRequestCommon() *ExecutionRequestCommon {
	return d.ExecutionRequestCommon
}

// Only relevant to the template type
func (d *DefinitionExecutionRequest) GetExecutionRequestCustom() *ExecutionRequestCustom {
	return nil
}

type TerminateJob struct {
	RunID    string
	UserInfo UserInfo
}

// task definition. It implements the `Executable` interface.
type Definition struct {
	DefinitionID   string `json:"definition_id"`
	GroupName      string `json:"group_name,omitempty"`
	Alias          string `json:"alias"`
	Command        string `json:"command,omitempty"`
	TaskType       string `json:"task_type,omitempty"`
	RequiresDocker bool   `json:"requires_docker,omitempty" db:"requires_docker"`
	TargetCluster  string `json:"target_cluster,omitempty" db:"target_cluster"`
	ExecutableResources
}

// Return definition or template id
func (d Definition) GetExecutableID() *string {
	return &d.DefinitionID
}

// Returns definition or template
func (d Definition) GetExecutableType() *ExecutableType {
	t := ExecutableTypeDefinition
	return &t
}
func (d Definition) GetExecutableResources() *ExecutableResources {
	return &d.ExecutableResources
}

func (d Definition) GetExecutableCommand(req ExecutionRequest) (string, error) {
	return d.Command, nil
}

func (d Definition) GetExecutableResourceName() string {
	return d.DefinitionID
}

var commandWrapper = `
set -e
set -x

{{.Command}}
`
var CommandTemplate, _ = template.New("command").Parse(commandWrapper)

// WrappedCommand returns the wrapped command for the definition
// * wrapping ensures lines are logged and exit code is set
func (d *Definition) WrappedCommand() (string, error) {
	var result bytes.Buffer
	if err := CommandTemplate.Execute(&result, d); err != nil {
		return "", err
	}
	return result.String(), nil
}

type validationCondition struct {
	condition bool
	reason    string
}

// IsValid returns true only if this is a valid definition with all
// required information
func (d *Definition) IsValid() (bool, []string) {
	conditions := []validationCondition{
		{len(d.Image) == 0, "string [image] must be specified"},
		{len(d.Alias) == 0, "string [alias] must be specified"},
	}

	valid := true
	var reasons []string
	for _, cond := range conditions {
		if cond.condition {
			valid = false
			reasons = append(reasons, cond.reason)
		}
	}
	return valid, reasons
}

// UpdateWith updates this definition with information from another
func (d *Definition) UpdateWith(other Definition) {
	if len(other.DefinitionID) > 0 {
		d.DefinitionID = other.DefinitionID
	}
	if len(other.Image) > 0 {
		d.Image = other.Image
	}
	if len(other.GroupName) > 0 {
		d.GroupName = other.GroupName
	}
	if len(other.Alias) > 0 {
		d.Alias = other.Alias
	}
	if other.Memory != nil {
		d.Memory = other.Memory
	}
	if other.Gpu != nil {
		d.Gpu = other.Gpu
	}
	if other.Cpu != nil {
		d.Cpu = other.Cpu
	}
	if other.EphemeralStorage != nil {
		d.EphemeralStorage = other.EphemeralStorage
	}
	if other.AdaptiveResourceAllocation != nil {
		d.AdaptiveResourceAllocation = other.AdaptiveResourceAllocation
	}
	if len(other.Command) > 0 {
		d.Command = other.Command
	}
	if len(other.TaskType) > 0 {
		d.TaskType = other.TaskType
	}
	if other.Env != nil {
		d.Env = other.Env
	}
	if other.Ports != nil {
		d.Ports = other.Ports
	}
	if other.Tags != nil {
		d.Tags = other.Tags
	}
}

func (d Definition) MarshalJSON() ([]byte, error) {
	type Alias Definition

	env := d.Env
	if env == nil {
		env = &EnvList{}
	}

	return json.Marshal(&struct {
		Env *EnvList `json:"env"`
		Alias
	}{
		Env:   env,
		Alias: (Alias)(d),
	})
}

// DefinitionList wraps a list of Definitions
type DefinitionList struct {
	Total       int          `json:"total"`
	Definitions []Definition `json:"definitions"`
}

func (dl *DefinitionList) MarshalJSON() ([]byte, error) {
	type Alias DefinitionList
	l := dl.Definitions
	if l == nil {
		l = []Definition{}
	}
	return json.Marshal(&struct {
		Definitions []Definition `json:"definitions"`
		*Alias
	}{
		Definitions: l,
		Alias:       (*Alias)(dl),
	})
}

// Run represents a single run of a Definition
//
// TODO:
//
//	Runs need to -copy- the run relevant information
//	from their associated definition when they are
//	created so they always have correct info. Currently
//	the definition can change during or after the run
//	is created and launched meaning the run is acting
//	on information that is no longer accessible.
type Run struct {
	RunID                   string                   `json:"run_id"`
	DefinitionID            string                   `json:"definition_id"`
	Alias                   string                   `json:"alias"`
	Image                   string                   `json:"image"`
	ClusterName             string                   `json:"cluster"`
	ExitCode                *int64                   `json:"exit_code,omitempty"`
	Status                  string                   `json:"status"`
	QueuedAt                *time.Time               `json:"queued_at,omitempty"`
	StartedAt               *time.Time               `json:"started_at,omitempty"`
	FinishedAt              *time.Time               `json:"finished_at,omitempty"`
	InstanceID              string                   `json:"-"`
	InstanceDNSName         string                   `json:"-"`
	GroupName               string                   `json:"group_name"`
	User                    string                   `json:"user,omitempty"`
	TaskType                string                   `json:"task_type,omitempty"`
	Env                     *EnvList                 `json:"env,omitempty"`
	Command                 *string                  `json:"command,omitempty"`
	CommandHash             *string                  `json:"command_hash,omitempty"`
	Memory                  *int64                   `json:"memory,omitempty"`
	MemoryLimit             *int64                   `json:"memory_limit,omitempty"`
	Cpu                     *int64                   `json:"cpu,omitempty"`
	CpuLimit                *int64                   `json:"cpu_limit,omitempty"`
	Gpu                     *int64                   `json:"gpu,omitempty"`
	ExitReason              *string                  `json:"exit_reason,omitempty"`
	Engine                  *string                  `json:"engine,omitempty"`
	NodeLifecycle           *string                  `json:"node_lifecycle,omitempty"`
	EphemeralStorage        *int64                   `json:"ephemeral_storage,omitempty" db:"ephemeral_storage"`
	PodName                 *string                  `json:"pod_name,omitempty"`
	Namespace               *string                  `json:"namespace,omitempty"`
	MaxMemoryUsed           *int64                   `json:"max_memory_used,omitempty"`
	MaxCpuUsed              *int64                   `json:"max_cpu_used,omitempty"`
	PodEvents               *PodEvents               `json:"pod_events,omitempty"`
	CloudTrailNotifications *CloudTrailNotifications `json:"cloudtrail_notifications,omitempty"`
	ExecutableID            *string                  `json:"executable_id,omitempty"`
	ExecutableType          *ExecutableType          `json:"executable_type,omitempty"`
	ExecutionRequestCustom  *ExecutionRequestCustom  `json:"execution_request_custom,omitempty"`
	AttemptCount            *int64                   `json:"attempt_count,omitempty"`
	SpawnedRuns             *SpawnedRuns             `json:"spawned_runs,omitempty"`
	RunExceptions           *RunExceptions           `json:"run_exceptions,omitempty"`
	ActiveDeadlineSeconds   *int64                   `json:"active_deadline_seconds,omitempty"`
	SparkExtension          *SparkExtension          `json:"spark_extension,omitempty"`
	MetricsUri              *string                  `json:"metrics_uri,omitempty"`
	Description             *string                  `json:"description,omitempty"`
	IdempotenceKey          *string                  `json:"idempotence_key,omitempty"`
	Arch                    *string                  `json:"arch,omitempty"`
	Labels                  Labels                   `json:"labels,omitempty"`
	RequiresDocker          bool                     `json:"requires_docker,omitempty" db:"requires_docker"`
	ServiceAccount          *string                  `json:"service_account,omitempty" db:"service_account"`
	Tier                    Tier                     `json:"tier,omitempty"`
}

// UpdateWith updates this run with information from another
func (d *Run) UpdateWith(other Run) {
	if len(other.RunID) > 0 {
		d.RunID = other.RunID
	}
	if len(other.DefinitionID) > 0 {
		d.DefinitionID = other.DefinitionID
	}
	if other.Tier != "" {
		d.Tier = other.Tier
	}
	if len(other.Alias) > 0 {
		d.Alias = other.Alias
	}
	if len(other.Image) > 0 {
		d.Image = other.Image
	}
	if len(other.ClusterName) > 0 {
		d.ClusterName = other.ClusterName
	}
	if other.ExitCode != nil {
		d.ExitCode = other.ExitCode
	}
	if other.QueuedAt != nil {
		d.QueuedAt = other.QueuedAt
	}
	if other.StartedAt != nil {
		d.StartedAt = other.StartedAt
	}
	if other.FinishedAt != nil {
		d.FinishedAt = other.FinishedAt
	}
	if len(other.InstanceID) > 0 {
		d.InstanceID = other.InstanceID
	}
	if len(other.InstanceDNSName) > 0 {
		d.InstanceDNSName = other.InstanceDNSName
	}
	if len(other.GroupName) > 0 {
		d.GroupName = other.GroupName
	}
	if len(other.User) > 0 {
		d.User = other.User
	}
	if len(other.TaskType) > 0 {
		d.TaskType = other.TaskType
	}
	if other.Env != nil {
		d.Env = other.Env
	}

	if other.ExitReason != nil {
		d.ExitReason = other.ExitReason
	}

	if other.Command != nil && len(*other.Command) > 0 {
		d.Command = other.Command
	}

	if other.CommandHash != nil && len(*other.CommandHash) > 0 {
		d.CommandHash = other.CommandHash
	}

	if other.Memory != nil {
		d.Memory = other.Memory
	}

	if other.Cpu != nil {
		d.Cpu = other.Cpu
	}

	if other.Gpu != nil {
		d.Gpu = other.Gpu
	}

	if other.MaxMemoryUsed != nil {
		d.MaxMemoryUsed = other.MaxMemoryUsed
	}

	if other.MaxCpuUsed != nil {
		d.MaxCpuUsed = other.MaxCpuUsed
	}

	if other.Engine != nil {
		d.Engine = other.Engine
	}

	if other.EphemeralStorage != nil {
		d.EphemeralStorage = other.EphemeralStorage
	}

	if other.NodeLifecycle != nil {
		d.NodeLifecycle = other.NodeLifecycle
	}

	if other.PodName != nil {
		d.PodName = other.PodName
	}

	if other.Namespace != nil {
		d.Namespace = other.Namespace
	}

	if other.PodEvents != nil {
		d.PodEvents = other.PodEvents
	}

	if other.SpawnedRuns != nil {
		d.SpawnedRuns = other.SpawnedRuns
	}

	if other.RunExceptions != nil {
		d.RunExceptions = other.RunExceptions
	}

	if other.ExecutableID != nil {
		d.ExecutableID = other.ExecutableID
	}

	if other.ExecutableType != nil {
		d.ExecutableType = other.ExecutableType
	}

	if other.SparkExtension != nil {
		d.SparkExtension = other.SparkExtension
	}

	if other.CloudTrailNotifications != nil && len((*other.CloudTrailNotifications).Records) > 0 {
		d.CloudTrailNotifications = other.CloudTrailNotifications
	}

	if other.ExecutionRequestCustom != nil {
		d.ExecutionRequestCustom = other.ExecutionRequestCustom
	}

	if other.CpuLimit != nil {
		d.CpuLimit = other.CpuLimit
	}

	if other.MetricsUri != nil {
		d.MetricsUri = other.MetricsUri
	}

	if other.Description != nil {
		d.Description = other.Description
	}

	if other.IdempotenceKey != nil {
		d.IdempotenceKey = other.IdempotenceKey
	}

	if other.Arch != nil {
		d.Arch = other.Arch
	}

	if other.MemoryLimit != nil {
		d.MemoryLimit = other.MemoryLimit
	}

	if other.AttemptCount != nil {
		d.AttemptCount = other.AttemptCount
	}

	if other.Labels != nil {
		d.Labels = other.Labels
	}
	//
	// Runs have a deterministic lifecycle
	//
	// QUEUED --> PENDING --> RUNNING --> STOPPED
	// QUEUED --> PENDING --> NEEDS_RETRY --> QUEUED ...
	// QUEUED --> PENDING --> STOPPED ...
	//
	statusPrecedence := map[string]int{
		StatusNeedsRetry: -1,
		StatusQueued:     0,
		StatusPending:    1,
		StatusRunning:    2,
		StatusStopped:    3,
	}

	if other.Status == StatusNeedsRetry {
		d.Status = StatusNeedsRetry
	} else {
		if runStatus, ok := statusPrecedence[d.Status]; ok {
			if newStatus, ok := statusPrecedence[other.Status]; ok {
				if newStatus > runStatus {
					d.Status = other.Status
				}
			}
		}
	}
}

func removeDuplicateStr(strSlice []string) []string {
	allKeys := make(map[string]bool)
	var list []string
	for _, item := range strSlice {
		if _, value := allKeys[item]; !value {
			allKeys[item] = true
			list = append(list, item)
		}
	}
	return list
}

type byExecutorName []string

type RunStatus struct {
	RunID        string     `json:"run_id"`
	Status       string     `json:"status"`
	QueuedAt     *time.Time `json:"queued_at,omitempty"`
	StartedAt    *time.Time `json:"started_at,omitempty"`
	FinishedAt   *time.Time `json:"finished_at,omitempty"`
	ExitCode     *int64     `json:"exit_code,omitempty"`
	ExitReason   *string    `json:"exit_reason,omitempty"`
	Engine       *string    `json:"engine,omitempty"`
	DefinitionID string     `json:"definition_id"`
	Alias        string     `json:"alias"`
	ClusterName  string     `json:"cluster_name"`
}

func (s byExecutorName) Len() int {
	return len(s)
}
func (s byExecutorName) Key(i int) int {
	r, _ := regexp.Compile("-exec-(\\d+)")
	matches := r.FindStringSubmatch(s[i])
	if matches == nil || len(matches) < 2 {
		return 0
	}
	key, err := strconv.Atoi(matches[1])
	if err != nil {
		return 0
	}
	return key
}
func (s byExecutorName) Swap(i, j int) {
	s[i], s[j] = s[j], s[i]
}

func (s byExecutorName) Less(i, j int) bool {
	return s.Key(i) < s.Key(j)
}

func (r Run) MarshalJSON() ([]byte, error) {
	type Alias Run
	instance := map[string]string{
		"instance_id": r.InstanceID,
		"dns_name":    r.InstanceDNSName,
	}
	podEvents := r.PodEvents
	if podEvents == nil {
		podEvents = &PodEvents{}
	}

	var executors []string
	for _, podEvent := range *podEvents {
		if strings.Contains(podEvent.SourceObject, "-exec-") {
			executors = append(executors, podEvent.SourceObject)
		}
	}

	if executors != nil && len(executors) > 0 && *r.Engine != EKSEngine {
		executors = removeDuplicateStr(executors)
		sort.Sort(byExecutorName(executors))
		r.SparkExtension.Executors = executors
	}

	cloudTrailNotifications := r.CloudTrailNotifications
	if cloudTrailNotifications == nil {
		cloudTrailNotifications = &CloudTrailNotifications{}
	}

	executionRequestCustom := r.ExecutionRequestCustom
	if executionRequestCustom == nil {
		executionRequestCustom = &ExecutionRequestCustom{}
	}

	if r.Description == nil {
		r.Description = aws.String(r.Alias)
	}

	sparkExtension := r.SparkExtension

	if sparkExtension == nil {
		sparkExtension = &SparkExtension{}
	} else {
		if sparkExtension.HiveConf != nil {
			for _, conf := range sparkExtension.HiveConf {
				if conf.Name != nil && strings.Contains(*conf.Name, "ConnectionPassword") {
					conf.Value = aws.String("****")
				}
			}
		}
		if r.Status != StatusStopped && r.SparkExtension.AppUri != nil {
			r.SparkExtension.HistoryUri = r.SparkExtension.AppUri
		}
	}

	return json.Marshal(&struct {
		Instance                map[string]string        `json:"instance"`
		PodEvents               *PodEvents               `json:"pod_events"`
		CloudTrailNotifications *CloudTrailNotifications `json:"cloudtrail_notifications"`
		SparkExtension          *SparkExtension          `json:"spark_extension"`
		Alias
	}{
		Instance:                instance,
		PodEvents:               podEvents,
		CloudTrailNotifications: cloudTrailNotifications,
		SparkExtension:          sparkExtension,
		Alias:                   (Alias)(r),
	})
}

// RunList wraps a list of Runs
type RunList struct {
	Total int   `json:"total"`
	Runs  []Run `json:"history"`
}

type PodEvents []PodEvent

type PodEventList struct {
	Total     int       `json:"total"`
	PodEvents PodEvents `json:"pod_events"`
}

type SpawnedRun struct {
	RunID string `json:"run_id"`
}

type SpawnedRuns []SpawnedRun

type RunExceptions []string

func (w *PodEvent) Equal(other PodEvent) bool {
	return w.Reason == other.Reason &&
		other.Timestamp != nil &&
		w.Timestamp.Equal(*other.Timestamp) &&
		w.SourceObject == other.SourceObject &&
		w.Message == other.Message &&
		w.EventType == other.EventType
}

type PodEvent struct {
	Timestamp    *time.Time `json:"timestamp,omitempty"`
	EventType    string     `json:"event_type"`
	Reason       string     `json:"reason"`
	SourceObject string     `json:"source_object"`
	Message      string     `json:"message"`
}

// GroupsList wraps a list of group names
type GroupsList struct {
	Groups []string
	Total  int
}

// TagsList wraps a list of tag names
type TagsList struct {
	Tags  []string
	Total int
}

// Worker represents a Flotilla Worker
type Worker struct {
	WorkerType       string `json:"worker_type"`
	CountPerInstance int    `json:"count_per_instance"`
	Engine           string `json:"engine"`
}

// UpdateWith updates this definition with information from another
func (w *Worker) UpdateWith(other Worker) {
	if other.CountPerInstance >= 0 {
		w.CountPerInstance = other.CountPerInstance
	}
}

// WorkersList wraps a list of Workers
type WorkersList struct {
	Total   int      `json:"total"`
	Workers []Worker `json:"workers"`
}

// User information making the API calls
type UserInfo struct {
	Name  string `json:"name"`
	Email string `json:"email"`
}

// Internal object for tracking cpu / memory resources.
type TaskResources struct {
	Cpu    sql.NullInt64 `json:"cpu" db:"cpu"`
	Memory sql.NullInt64 `json:"memory" db:"memory"`
}

// SQS notification object for CloudTrail S3 files.
type CloudTrailS3File struct {
	S3Bucket    string   `json:"s3Bucket"`
	S3ObjectKey []string `json:"s3ObjectKey"`
	Done        func() error
}

// Marshal method for CloudTrail SQS notifications.
func (e *CloudTrailNotifications) Marshal() ([]byte, error) {
	return json.Marshal(e)
}

// CloudTrail notification object that is persisted into the DB.
type CloudTrailNotifications struct {
	Records []Record `json:"Records"`
}

// CloudTrail notification record.
type Record struct {
	UserIdentity UserIdentity `json:"userIdentity"`
	EventSource  string       `json:"eventSource"`
	EventName    string       `json:"eventName"`
}

// User ARN who performed the AWS api action.
type UserIdentity struct {
	Arn string `json:"arn"`
}

// Equals helper method for Record.
func (w *Record) Equal(other Record) bool {
	return w.EventName == other.EventName && w.EventSource == other.EventSource
}

// String helper method for Record.
func (w *Record) String() string {
	return fmt.Sprintf("%s-%s", w.EventSource, w.EventName)
}

const TemplatePayloadKey = "template_payload"

type TemplatePayload map[string]interface{}

type TemplateExecutionRequest struct {
	*ExecutionRequestCommon
	TemplatePayload TemplatePayload `json:"template_payload"`
	DryRun          bool            `json:"dry_run,omitempty"`
}

// Returns ExecutionRequestCommon associated with a Template type.
func (t TemplateExecutionRequest) GetExecutionRequestCommon() *ExecutionRequestCommon {
	return t.ExecutionRequestCommon
}

// Returns ExecutionRequestCustom associated with a Template type.
func (t TemplateExecutionRequest) GetExecutionRequestCustom() *ExecutionRequestCustom {
	return &ExecutionRequestCustom{
		TemplatePayloadKey: t.TemplatePayload,
	}
}

// Templates uses JSON Schema types.
type TemplateJSONSchema map[string]interface{}

// Template Object Type. The CommandTemplate is a Go Template type.
type Template struct {
	TemplateID      string             `json:"template_id"`
	TemplateName    string             `json:"template_name"`
	Version         int64              `json:"version"`
	Schema          TemplateJSONSchema `json:"schema"`
	CommandTemplate string             `json:"command_template"`
	Defaults        TemplatePayload    `json:"defaults"`
	AvatarURI       string             `json:"avatar_uri"`
	ExecutableResources
}

type CreateTemplateRequest struct {
	TemplateName    string             `json:"template_name"`
	Schema          TemplateJSONSchema `json:"schema"`
	CommandTemplate string             `json:"command_template"`
	Defaults        TemplatePayload    `json:"defaults"`
	AvatarURI       string             `json:"avatar_uri"`
	ExecutableResources
}

type CreateTemplateResponse struct {
	DidCreate bool     `json:"did_create"`
	Template  Template `json:"template,omitempty"`
}

// Returns Template ID
func (t Template) GetExecutableID() *string {
	return &t.TemplateID
}

// Returns Template Type
func (t Template) GetExecutableType() *ExecutableType {
	et := ExecutableTypeTemplate
	return &et
}

// Returns default resources associated with that Template.
func (t Template) GetExecutableResources() *ExecutableResources {
	return &t.ExecutableResources
}

// Renders the command to be rendered for that Template.
func (t Template) GetExecutableCommand(req ExecutionRequest) (string, error) {
	var (
		err    error
		result bytes.Buffer
	)

	// Get the request's custom fields.
	customFields := *req.GetExecutionRequestCustom()
	executionPayload, ok := customFields[TemplatePayloadKey]
	if !ok || executionPayload == nil {
		return "", err
	}

	executionPayload, err = t.compositeUserAndDefaults(executionPayload)

	schemaLoader := gojsonschema.NewGoLoader(t.Schema)
	documentLoader := gojsonschema.NewGoLoader(executionPayload)

	// Perform JSON schema validation to ensure that the request's template
	// payload conforms to the template's JSON schema.
	validationResult, err := gojsonschema.Validate(schemaLoader, documentLoader)
	if err != nil {
		return "", err
	}
	if validationResult != nil && validationResult.Valid() != true {
		var res []string
		for _, resultError := range validationResult.Errors() {
			res = append(res, resultError.String())
		}
		return "", errors.New(strings.Join(res, "\n"))
	}

	// Create a new template string based on the template.Template.
	textTemplate, err := template.New("command").Funcs(sprig.TxtFuncMap()).Parse(t.CommandTemplate)
	if err != nil {
		return "", err
	}

	// Dump payload into the template string.
	if err = textTemplate.Execute(&result, executionPayload); err != nil {
		return "", err
	}

	return result.String(), nil
}

// Returns the Template Id.
func (t Template) GetExecutableResourceName() string {
	return t.TemplateID
}

func (t Template) compositeUserAndDefaults(userPayload interface{}) (TemplatePayload, error) {
	var (
		final map[string]interface{}
		ok    bool
	)

	final, ok = userPayload.(TemplatePayload)
	if !ok {
		return final, errors.New("unable to cast request payload to TemplatePayload struct")
	}

	err := MergeMaps(&final, t.Defaults)

	if err != nil {
		return final, err
	}

	return final, nil
}

// NewTemplateID returns a new uuid for a Template
func NewTemplateID(t Template) (string, error) {
	uuid4, err := newUUIDv4()
	if err != nil {
		return "", err
	}
	return fmt.Sprintf("tpl-%s", uuid4[4:]), nil
}

// Checks validity of a template.
func (t *Template) IsValid() (bool, []string) {
	conditions := []validationCondition{
		{len(t.TemplateName) == 0, "string [template_name] must be specified"},
		{len(t.Schema) == 0, "schema must be specified"},
		{len(t.CommandTemplate) == 0, "string [command_template] must be specified"},
		{len(t.Image) == 0, "string [image] must be specified"},
		{t.Memory == nil, "int [memory] must be specified"},
	}

	valid := true
	var reasons []string
	for _, cond := range conditions {
		if cond.condition {
			valid = false
			reasons = append(reasons, cond.reason)
		}
	}
	return valid, reasons
}

// TemplateList wraps a list of Templates
type TemplateList struct {
	Total     int        `json:"total"`
	Templates []Template `json:"templates"`
}

// Template Marshal method.
func (tl *TemplateList) MarshalJSON() ([]byte, error) {
	type Alias TemplateList
	l := tl.Templates
	if l == nil {
		l = []Template{}
	}
	return json.Marshal(&struct {
		Templates []Template `json:"templates"`
		*Alias
	}{
		Templates: l,
		Alias:     (*Alias)(tl),
	})
}

func (r *KubernetesEvent) Marshal() ([]byte, error) {
	return json.Marshal(r)
}

type KubernetesEvent struct {
	Metadata           Metadata       `json:"metadata,omitempty"`
	Reason             string         `json:"reason,omitempty"`
	Message            string         `json:"message,omitempty"`
	Source             Source         `json:"source,omitempty"`
	FirstTimestamp     string         `json:"firstTimestamp,omitempty"`
	LastTimestamp      string         `json:"lastTimestamp,omitempty"`
	Count              int64          `json:"count,omitempty"`
	Type               string         `json:"type,omitempty"`
	EventTime          interface{}    `json:"eventTime,omitempty"`
	ReportingComponent string         `json:"reportingComponent,omitempty"`
	ReportingInstance  string         `json:"reportingInstance,omitempty"`
	InvolvedObject     InvolvedObject `json:"involvedObject,omitempty"`
	Done               func() error
}

type InvolvedObject struct {
	Kind            string      `json:"kind,omitempty"`
	Namespace       string      `json:"namespace,omitempty"`
	Name            string      `json:"name,omitempty"`
	Uid             string      `json:"uid,omitempty"`
	APIVersion      string      `json:"apiVersion,omitempty"`
	ResourceVersion string      `json:"resourceVersion,omitempty"`
	FieldPath       string      `json:"fieldPath,omitempty"`
	Labels          EventLabels `json:"labels,omitempty"`
}

type EventLabels struct {
	ControllerUid string `json:"controller-uid,omitempty"`
	JobName       string `json:"job-name,omitempty"`
	ClusterName   string `json:"cluster-name,omitempty"`
}

type Metadata struct {
	Name              string `json:"name,omitempty"`
	Namespace         string `json:"namespace,omitempty"`
	SelfLink          string `json:"selfLink,omitempty"`
	Uid               string `json:"uid,omitempty"`
	ResourceVersion   string `json:"resourceVersion,omitempty"`
	CreationTimestamp string `json:"creationTimestamp,omitempty"`
}

type Source struct {
	Component string `json:"component,omitempty"`
	Host      string `json:"host,omitempty"`
}

func UnmarshalEmrEvents(data []byte) (EmrEvent, error) {
	var r EmrEvent
	err := json.Unmarshal(data, &r)
	return r, err
}

func (r *EmrEvent) Marshal() ([]byte, error) {
	return json.Marshal(r)
}

type EmrEvent struct {
	Version    *string       `json:"version,omitempty"`
	ID         *string       `json:"id,omitempty"`
	DetailType *string       `json:"detail-type,omitempty"`
	Source     *string       `json:"source,omitempty"`
	Account    *string       `json:"account,omitempty"`
	Time       *string       `json:"time,omitempty"`
	Region     *string       `json:"region,omitempty"`
	Resources  []interface{} `json:"resources,omitempty"`
	Detail     *Detail       `json:"detail,omitempty"`
	Done       func() error
}

type Detail struct {
	Severity         *string `json:"severity,omitempty"`
	Name             *string `json:"name,omitempty"`
	ID               *string `json:"id,omitempty"`
	Arn              *string `json:"arn,omitempty"`
	VirtualClusterID *string `json:"virtualClusterId,omitempty"`
	State            *string `json:"state,omitempty"`
	CreatedBy        *string `json:"createdBy,omitempty"`
	ReleaseLabel     *string `json:"releaseLabel,omitempty"`
	ExecutionRoleArn *string `json:"executionRoleArn,omitempty"`
	FailureReason    *string `json:"failureReason,omitempty"`
	StateDetails     *string `json:"stateDetails,omitempty"`
	Message          *string `json:"message,omitempty"`
}

type LaunchRequest struct {
	ClusterName *string  `json:"cluster,omitempty"`
	Env         *EnvList `json:"env,omitempty"`
	Tier        Tier     `json:"tier"`
}

type LaunchRequestV2 struct {
	Tier                  Tier            `json:"tier"`
	RunTags               RunTags         `json:"run_tags"`
	Command               *string         `json:"command,omitempty"`
	Memory                *int64          `json:"memory,omitempty"`
	Cpu                   *int64          `json:"cpu,omitempty"`
	Gpu                   *int64          `json:"gpu,omitempty"`
	EphemeralStorage      *int64          `json:"ephemeral_storage,omitempty"`
	Engine                *string         `json:"engine,omitempty"`
	NodeLifecycle         *string         `json:"node_lifecycle,omitempty"`
	ActiveDeadlineSeconds *int64          `json:"active_deadline_seconds,omitempty"`
	SparkExtension        *SparkExtension `json:"spark_extension,omitempty"`
	ClusterName           *string         `json:"cluster,omitempty"`
	Env                   *EnvList        `json:"env,omitempty"`
	Description           *string         `json:"description,omitempty"`
	CommandHash           *string         `json:"command_hash,omitempty"`
	IdempotenceKey        *string         `json:"idempotence_key,omitempty"`
	Arch                  *string         `json:"arch,omitempty"`
	Labels                *Labels         `json:"labels,omitempty"`
	ServiceAccount        *string         `json:"service_account,omitempty"`
}

// RunTags represents which user is responsible for a task run
type RunTags struct {
	OwnerEmail string `json:"owner_email"`
	TeamName   string `json:"team_name"`
	OwnerID    string `json:"owner_id"`
}

type ClusterStatus string
type Tier string
type Tiers []string
type Capability string
type Capabilities []string

const (
	StatusActive      ClusterStatus = "active"
	StatusMaintenance ClusterStatus = "maintenance"
	StatusOffline     ClusterStatus = "offline"
)

type ClusterMetadata struct {
	ID                string        `json:"id" db:"id"`
	Name              string        `json:"name" db:"name"`
	ClusterVersion    string        `json:"cluster_version" db:"cluster_version"`
	Status            ClusterStatus `json:"status" db:"status"`
	StatusReason      string        `json:"status_reason" db:"status_reason"`
	StatusSince       time.Time     `json:"status_since" db:"status_since"`
	AllowedTiers      Tiers         `json:"allowed_tiers" db:"allowed_tiers"`
	Capabilities      Capabilities  `json:"capabilities" db:"capabilities"`
	UpdatedAt         time.Time     `json:"updated_at" db:"updated_at"`
	Namespace         string        `json:"namespace" db:"namespace"`
	Region            string        `json:"region" db:"region"`
	EMRVirtualCluster string        `json:"emr_virtual_cluster" db:"emr_virtual_cluster"`
	SparkServerURI    string        `json:"spark_server_uri" db:"spark_server_uri"`
}

// MergeMaps takes a pointer to a map (first arg) and map containing default
// values (second arg) and recursively sets values that exist in `b` but are
// not set in `a`. For existing values, it does not override those of `a` with
// those of `b`.
func MergeMaps(a *map[string]interface{}, b map[string]interface{}) error {
	return mergeMapsRecursive(a, b)
}

func mergeMapsRecursive(a *map[string]interface{}, b map[string]interface{}) error {
	for k, v := range b {
		// If the value is a map, check recursively.
		if reflect.TypeOf(v).Kind() == reflect.Map {
			if _, ok := (*a)[k]; !ok {
				(*a)[k] = v
			} else {
				aVal, ok := (*a)[k].(map[string]interface{})
				bVal, ok := v.(map[string]interface{})

				if !ok {
					return errors.New("unable to cast interface{} to map[string]interface{}")
				}

				if err := mergeMapsRecursive(&aVal, bVal); err != nil {
					return err
				}
			}
		} else {
			if _, ok := (*a)[k]; !ok {
				(*a)[k] = v
			}
		}
	}

	return nil
}

func GetLabels(run Run) map[string]string {
	var labels = make(map[string]string)

	if run.ClusterName != "" {
		labels["cluster-name"] = run.ClusterName
	}

	if run.RunID != "" {
		labels["flotilla-run-id"] = SanitizeLabel(run.RunID)
		labels["flotilla-run-mode"] = SanitizeLabel(os.Getenv("FLOTILLA_MODE"))
	}

	if run.User != "" {
		labels["owner"] = SanitizeLabel(run.User)
	}

	if run.Tier != "" {
		labels["tier"] = SanitizeLabel(string(run.Tier))
	}

	if _, workflowExists := run.Labels["kube_workflow"]; !workflowExists {
		if _, taskNameExists := run.Labels["kube_task_name"]; taskNameExists {
			labels["kube_workflow"] = SanitizeLabel(run.Labels["kube_task_name"])
		}
	}

	for k, v := range run.Labels {
		labels[k] = SanitizeLabel(v)
	}

	return labels
}

func SanitizeLabel(key string) string {
	key = strings.TrimSpace(key)
	key = regexp.MustCompile(`[^-a-z0-9A-Z_.]+`).ReplaceAllString(key, "_")
	key = strings.TrimPrefix(key, "_")
	key = strings.ToLower(key)
	if len(key) > 63 {
		key = key[:63]
	}
	for {
		tempKey := strings.TrimSuffix(key, "_")
		if tempKey == key {
			break
		}
		key = tempKey
	}

	return key
}


================================================
FILE: state/models_test.go
================================================
package state

import (
	"os"
	"reflect"
	"strings"
	"testing"
)

func TestMergeMaps_Simple(t *testing.T) {
	mapA := map[string]interface{}{
		"A": "aaa",
		"B": "bbb",
		"C": "ccc",
	}
	mapB := map[string]interface{}{
		"B": "xxx",
		"D": "ddd",
	}

	expectedMapA := map[string]interface{}{
		"A": "aaa",
		"B": "bbb",
		"C": "ccc",
		"D": "ddd",
	}

	err := MergeMaps(&mapA, mapB)

	if err != nil {
		t.Error("unable to merge maps")
	}

	if reflect.DeepEqual(mapA, expectedMapA) == false {
		t.Error("map merge unsuccessful")
	}
}

func TestMergeMaps_Nested(t *testing.T) {
	nestedAValue := "aaa"
	nestedCValue := "ccc"
	overrideNestedBVal := "zzzzzz"
	nestedD1Value := "d1"
	overrideNestedD1Value := "override_d1"
	overrideNestedD2Value := "override_d2"

	mapA := map[string]interface{}{
		"Nested": map[string]interface{}{
			"A": nestedAValue,
			"C": nestedCValue,
			"D": map[string]interface{}{
				"D1": nestedD1Value,
			},
		},
	}

	mapB := map[string]interface{}{
		"Nested": map[string]interface{}{
			"B": overrideNestedBVal,
			"D": map[string]interface{}{
				"D1": overrideNestedD1Value,
				"D2": overrideNestedD2Value,
			},
		},
	}

	// After merging, mapA should have its `B` value set. Additionally, mapA[D]
	// should have its D2 value set BUT its D1 value should not be overriden.
	expectedMapA := map[string]interface{}{
		"Nested": map[string]interface{}{
			"A": nestedAValue,
			"B": overrideNestedBVal,
			"C": nestedCValue,
			"D": map[string]interface{}{
				"D1": nestedD1Value,
				"D2": overrideNestedD2Value,
			},
		},
	}

	err := MergeMaps(&mapA, mapB)

	if err != nil {
		t.Error("unable to merge maps")
	}

	if reflect.DeepEqual(mapA, expectedMapA) == false {
		t.Error("map merge unsuccessful")
	}
}

func TestSanitizeLabel(t *testing.T) {
	tests := []struct {
		name     string
		input    string
		expected string
	}{
		{
			name:     "should truncate",
			input:    strings.Repeat("a", 64),
			expected: strings.Repeat("a", 63),
		},
		{
			name:     "leaves lowercase alone",
			input:    "lowercasealphanumeric11",
			expected: "lowercasealphanumeric11",
		},
		{
			name:     "lowercases stuff",
			input:    "UPPERCASEALPHANUMERIC11",
			expected: "uppercasealphanumeric11",
		},
		{
			name:     "replaces special chars",
			input:    "a*s",
			expected: "a_s",
		},
		{
			name:     "trims spaces",
			input:    " foo ",
			expected: "foo",
		},
		{
			name:     "removes leading _'s",
			input:    "_a",
			expected: "a",
		},
		{
			name:     "removes trailing _'s",
			input:    "a_",
			expected: "a",
		},
		{
			name:     "removes repeated trailing _'s",
			input:    "a_____",
			expected: "a",
		},
	}
	for _, test := range tests {
		t.Run(test.name, func(t *testing.T) {
			result := SanitizeLabel(test.input)
			if result != test.expected {
				t.Errorf("expected %s, got %s", test.expected, result)
			}
		})
	}
}

func TestGetLabels(t *testing.T) {
	type args struct {
		run Run
	}
	var tests []struct {
		name string
		args args
		want map[string]string
	}
	os.Setenv("FLOTILLA_MODE", "test")

	tests = []struct {
		name string
		args args
		want map[string]string
	}{
		{
			name: "should return labels for run with definition",
			args: args{
				run: Run{
					DefinitionID: "A",
					ClusterName:  "A",
					GroupName:    "groupA",
					RunID:        "runA",
					User:         "userA",
					Tier:         "tierA",
					Labels: map[string]string{
						"kube_foo":       "bar",
						"team":           "awesomeness",
						"kube_task_name": "foo",
					},
				},
			},
			want: map[string]string{
				"cluster-name":      "A",
				"flotilla-run-id":   "runa",
				"kube_workflow":     "foo",
				"kube_foo":          "bar",
				"kube_task_name":    "foo",
				"team":              "awesomeness",
				"tier":              "tiera",
				"owner":             "usera",
				"flotilla-run-mode": "test",
			},
		},
		{
			name: "should return empty labels for run with no definition",
			args: args{
				run: Run{},
			},
			want: map[string]string{},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			if got := GetLabels(tt.args.run); !reflect.DeepEqual(got, tt.want) {
				t.Errorf("GetLabels() = %v, want %v", got, tt.want)
			}
		})
	}
}


================================================
FILE: state/pg_queries.go
================================================
package state

// DefinitionSelect postgres specific query for definitions
const DefinitionSelect = `
select td.definition_id                    as definitionid,
       td.adaptive_resource_allocation     as adaptiveresourceallocation,
       td.image                            as image,
       td.group_name                       as groupname,
       td.alias                            as alias,
       td.memory                           as memory,
       coalesce(td.command, '')            as command,
       coalesce(td.task_type, '')          as tasktype,
       env::TEXT                           as env,
       td.cpu                              as cpu,
       td.gpu                              as gpu,
       td.ephemeral_storage 			   as ephemeral_storage,
       coalesce(td.requires_docker, false) as requires_docker,
       coalesce(td.target_cluster, '')     as target_cluster,
       array_to_json('{""}'::TEXT[])::TEXT as tags,
       array_to_json('{}'::INT[])::TEXT    as ports
from (select * from task_def) td
`

// ListDefinitionsSQL postgres specific query for listing definitions
const ListDefinitionsSQL = DefinitionSelect + "\n%s %s limit $1 offset $2"

// ListClusterStatesSQL postgres query for listing cluster status
const (
	ListClusterStatesSQL = `
SELECT
	id,
	name,
	cluster_version,
	status,
	status_reason,
	status_since,
	capabilities,
	allowed_tiers,
	region,
	updated_at,
	namespace,
	emr_virtual_cluster,
	spark_server_uri
FROM cluster_state
ORDER BY name ASC`
)

// GetDefinitionSQL postgres specific query for getting a single definition
const GetDefinitionSQL = DefinitionSelect + "\nwhere definition_id = $1"

// GetDefinitionByAliasSQL get definition by alias
const GetDefinitionByAliasSQL = DefinitionSelect + "\nwhere alias = $1"

const TaskResourcesSelectCommandSQL = `
SELECT cast((percentile_disc(0.99) within GROUP (ORDER BY A.max_memory_used)) * 1.75 as int) as memory,
       cast((percentile_disc(0.99) within GROUP (ORDER BY A.max_cpu_used)) * 1.25  as int)  as cpu
FROM (SELECT memory as max_memory_used, cpu as max_cpu_used
      FROM TASK
      WHERE
           queued_at >= CURRENT_TIMESTAMP - INTERVAL '3 days'
           AND (exit_code = 137 or exit_reason = 'OOMKilled')
           AND engine = 'eks'
           AND definition_id = $1
           AND command_hash = $2
      LIMIT 30) A
`

const TaskResourcesExecutorCountSQL = `
SELECT least(coalesce(cast((percentile_disc(0.99) within GROUP (ORDER BY A.executor_count)) as int), 25), 100) as executor_count
FROM (SELECT CASE
                 WHEN (exit_reason like '%Exception%')
                     THEN (spark_extension -> 'spark_submit_job_driver' -> 'num_executors')::int * 1.75
                 ELSE (spark_extension -> 'spark_submit_job_driver' -> 'num_executors')::int * 1
                 END as executor_count
      FROM TASK
      WHERE
           queued_at >= CURRENT_TIMESTAMP - INTERVAL '24 hours'
           AND engine = 'eks-spark'
           AND definition_id = $1
           AND command_hash = $2
      LIMIT 30) A
`
const TaskResourcesDriverOOMSQL = `
SELECT (spark_extension -> 'driver_oom')::boolean AS driver_oom
FROM TASK
WHERE queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
  AND engine = 'eks-spark'
  AND definition_id = $1
  AND command_hash = $2
  AND exit_code = 137
  AND spark_extension ? 'driver_oom'
GROUP BY 1
`

const TaskIdempotenceKeyCheckSQL = `
WITH runs as (
    SELECT run_id
    FROM task
    WHERE idempotence_key = $1
      and (exit_code = 0 or exit_code is null)
      and queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days')
SELECT run_id
FROM runs
LIMIT 1;
`

const TaskResourcesExecutorOOMSQL = `
SELECT CASE WHEN A.c >= 1 THEN true::boolean ELSE false::boolean END
FROM (SELECT count(*) as c
      FROM TASK
      WHERE
           queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
           AND definition_id = $1
           AND command_hash = $2
		   AND engine = 'eks-spark'
           AND exit_code !=0
      LIMIT 30) A
`

const TaskResourcesExecutorNodeLifecycleSQL = `
SELECT CASE WHEN A.c >= 1 THEN 'ondemand' ELSE 'spot' END
FROM (SELECT count(*) as c
      FROM TASK
      WHERE
           queued_at >= CURRENT_TIMESTAMP - INTERVAL '12 hour'
           AND definition_id = $1
           AND command_hash = $2
           AND exit_code !=0
      LIMIT 30) A
`

const TaskExecutionRuntimeCommandSQL = `
SELECT percentile_disc(0.95) within GROUP (ORDER BY A.minutes) as minutes
FROM (SELECT EXTRACT(epoch from finished_at - started_at) / 60 as minutes
      FROM TASK
      WHERE definition_id = $1
        AND exit_code = 0
        AND engine = 'eks'
        AND queued_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
        AND command_hash = (SELECT command_hash FROM task WHERE run_id = $2)
      LIMIT 30) A
`

const ListFailingNodesSQL = `
SELECT instance_dns_name
FROM (
         SELECT instance_dns_name, count(*) as c
         FROM TASK
         WHERE (exit_code = 128 OR
                pod_events @> '[{"reason": "Failed"}]' OR
                pod_events @> '[{"reason": "FailedSync"}]' OR
                pod_events @> '[{"reason": "FailedCreatePodSandBox"}]' OR
                pod_events @> '[{"reason": "OutOfmemory"}]')
           AND engine = 'eks'
           AND queued_at >= NOW() - INTERVAL '1 HOURS'
           AND instance_dns_name like 'ip-%'
         GROUP BY 1
         order by 2 desc) AS all_nodes
WHERE c >= 5
`

const PodReAttemptRate = `
SELECT (multiple_attempts / (CASE WHEN single_attempts = 0 THEN 1 ELSE single_attempts END)) AS attempts
FROM (
      SELECT COUNT(CASE WHEN attempt_count <= 1 THEN 1 END) * 1.0 AS single_attempts,
             COUNT(CASE WHEN attempt_count > 1 THEN 1 END) * 1.0 AS multiple_attempts
      FROM task
      WHERE engine = 'eks' AND
            queued_at >= NOW() - INTERVAL '18 MINUTES' AND
            node_lifecycle = 'spot') A
`

// RunSelect postgres specific query for runs
const RunSelect = `
select t.run_id                          as runid,
       coalesce(t.definition_id, '')     as definitionid,
       coalesce(t.alias, '')             as alias,
       coalesce(t.image, '')             as image,
       coalesce(t.cluster_name, '')      as clustername,
       t.exit_code                       as exitcode,
       t.exit_reason                     as exitreason,
       coalesce(t.status, '')            as status,
       queued_at                         as queuedat,
       started_at                        as startedat,
       finished_at                       as finishedat,
       coalesce(t.instance_id, '')       as instanceid,
       coalesce(t.instance_dns_name, '') as instancednsname,
       coalesce(t.group_name, '')        as groupname,
       coalesce(t.task_type, '')         as tasktype,
       env::TEXT                         as env,
       command,
       memory,
       cpu,
       gpu,
       engine,
       ephemeral_storage                 as ephemeral_storage,
       node_lifecycle                    as nodelifecycle,
       pod_name                          as podname,
       namespace,
       max_cpu_used                      as maxcpuused,
       max_memory_used                   as maxmemoryused,
       pod_events::TEXT                  as podevents,
       command_hash                      as commandhash,
       cloudtrail_notifications::TEXT    as cloudtrailnotifications,
       coalesce(executable_id, '')       as executableid,
       coalesce(executable_type, '')     as executabletype,
       execution_request_custom::TEXT    as executionrequestcustom,
       cpu_limit                         as cpulimit,
       memory_limit                      as memorylimit,
       attempt_count                     as attemptcount,
       spawned_runs::TEXT                as spawnedruns,
       run_exceptions::TEXT              as runexceptions,
       active_deadline_seconds           as activedeadlineseconds,
       spark_extension::TEXT             as sparkextension,
       metrics_uri                       as metricsuri,
       description                       as description,
	   idempotence_key                   as idempotencekey,
       coalesce("user", '')              as user,
	   coalesce(arch, '')                as arch,
	   labels::TEXT                      as labels,
	   coalesce(requires_docker,false)   as requires_docker,
	   service_account 				 	 as service_account,
     coalesce(tier::text, 'Tier4')   as tier
from task t
`
const GetRunStatusSQL = `
SELECT 
    run_id, 
    definition_id,
    alias,
    cluster_name,
    status, 
    queued_at, 
    started_at, 
    finished_at, 
    exit_code, 
    exit_reason,
    engine
FROM task
WHERE run_id = $1
`

// ListRunsSQL postgres specific query for listing runs
const ListRunsSQL = RunSelect + "\n%s %s limit $1 offset $2"

// GetRunSQL postgres specific query for getting a single run
const GetRunSQL = RunSelect + "\nwhere run_id = $1"

const GetRunSQLByEMRJobId = RunSelect + "\nwhere spark_extension->>'emr_job_id' = $1"

// GetRunSQLForUpdate postgres specific query for getting a single run
// for update
const GetRunSQLForUpdate = GetRunSQL + " for update"

// GroupsSelect postgres specific query for getting existing definition
// group_names
const GroupsSelect = `
select distinct group_name from task_def
`

// TagsSelect postgres specific query for getting existing definition tags
const TagsSelect = `
select distinct text from tags
`

// ListGroupsSQL postgres specific query for listing definition group_names
const ListGroupsSQL = GroupsSelect + "\n%s order by group_name asc limit $1 offset $2"

// ListTagsSQL postgres specific query for listing definition tags
const ListTagsSQL = TagsSelect + "\n%s order by text asc limit $1 offset $2"

// WorkerSelect postgres specific query for workers
const WorkerSelect = `
  select
    worker_type        as workertype,
    count_per_instance as countperinstance,
    engine
  from worker
`

// ListWorkersSQL postgres specific query for listing workers
const ListWorkersSQL = WorkerSelect

const GetWorkerEngine = WorkerSelect + "\nwhere engine = $1"

// GetWorkerSQL postgres specific query for retrieving data for a specific
// worker type.
const GetWorkerSQL = WorkerSelect + "\nwhere worker_type = $1 and engine = $2"

// GetWorkerSQLForUpdate postgres specific query for retrieving data for a specific
// worker type; locks the row.
const GetWorkerSQLForUpdate = GetWorkerSQL + " for update"

// TemplateSelect selects a template
const TemplateSelect = `
SELECT
  template_id as templateid,
  template_name as templatename,
  version,
  schema,
  command_template as commandtemplate,
  adaptive_resource_allocation as adaptiveresourceallocation,
  image,
  memory,
  env::TEXT as env,
  privileged,
  cpu,
  gpu,
  defaults,
  coalesce(avatar_uri, '') as avataruri
FROM template
`

// ListTemplatesSQL postgres specific query for listing templates
const ListTemplatesSQL = TemplateSelect + "\n%s limit $1 offset $2"

// GetTemplateByIDSQL postgres specific query for getting a single template
const GetTemplateByIDSQL = TemplateSelect + "\nwhere template_id = $1"

// ListTemplatesLatestOnlySQL lists the latest version of each distinct
// template name.
const ListTemplatesLatestOnlySQL = `
  SELECT DISTINCT ON (template_name)
    template_id as templateid,
    template_name as templatename,
    version,
    schema,
    command_template as commandtemplate,
    adaptive_resource_allocation as adaptiveresourceallocation,
    image,
    memory,
    env::TEXT as env,
    privileged,
    cpu,
    gpu,
    defaults,
    coalesce(avatar_uri, '') as avataruri
  FROM template
  ORDER BY template_name, version DESC, template_id
  LIMIT $1 OFFSET $2
`

// GetTemplateLatestOnlySQL get the latest version of a specific template name.
const GetTemplateLatestOnlySQL = TemplateSelect + "\nWHERE template_name = $1 ORDER BY version DESC LIMIT 1;"
const GetTemplateByVersionSQL = TemplateSelect + "\nWHERE template_name = $1 AND version = $2 ORDER BY version DESC LIMIT 1;"


================================================
FILE: state/pg_state_manager.go
================================================
package state

import (
	"context"
	"database/sql/driver"
	"encoding/json"
	"fmt"
	"time"

	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/tracing"

	"github.com/jmoiron/sqlx"

	// Pull in postgres specific drivers
	"database/sql"
	"math"
	"strings"

	"github.com/lib/pq"
	_ "github.com/lib/pq"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/exceptions"
	"go.uber.org/multierr"
	sqltrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/database/sql"
	sqlxtrace "gopkg.in/DataDog/dd-trace-go.v1/contrib/jmoiron/sqlx"
)

// SQLStateManager uses postgresql to manage state
type SQLStateManager struct {
	db         *sqlx.DB
	readonlyDB *sqlx.DB
	log        log.Logger
}

func (sm *SQLStateManager) ListFailingNodes(ctx context.Context) (NodeList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_failing_nodes", "")
	defer span.Finish()

	var err error
	var nodeList NodeList

	err = sm.readonlyDB.SelectContext(ctx, &nodeList, ListFailingNodesSQL)

	if err != nil {
		if err == sql.ErrNoRows {
			return nodeList, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Error fetching node list")}
		} else {
			return nodeList, errors.Wrapf(err, "Error fetching node list")
		}
	}
	return nodeList, err
}

func (sm *SQLStateManager) GetPodReAttemptRate(ctx context.Context) (float32, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_pod_reattempt_rate", "")
	defer span.Finish()

	var err error
	attemptRate := float32(1.0)
	err = sm.readonlyDB.GetContext(ctx, &attemptRate, PodReAttemptRate)

	if err != nil {
		if err == sql.ErrNoRows {
			return attemptRate, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Error fetching attempt rate")}
		} else {
			return attemptRate, errors.Wrapf(err, "Error fetching attempt rate")
		}
	}
	return attemptRate, err
}

func (sm *SQLStateManager) GetNodeLifecycle(ctx context.Context, executableID string, commandHash string) (string, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_node_lifecycle", "")
	defer span.Finish()
	//span.SetTag("command_hash", commandHash)

	var err error
	nodeType := "spot"
	err = sm.readonlyDB.GetContext(ctx, &nodeType, TaskResourcesExecutorNodeLifecycleSQL, executableID, commandHash)

	if err != nil {
		if err == sql.ErrNoRows {
			return nodeType, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Error fetching node type")}
		} else {
			return nodeType, errors.Wrapf(err, "Error fetching node type")
		}
	}
	return nodeType, err
}

func (sm *SQLStateManager) GetTaskHistoricalRuntime(ctx context.Context, executableID string, runID string) (float32, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_task_historical_runtime", "")
	defer span.Finish()

	span.SetTag("job.run_id", runID)

	var err error
	minutes := float32(1.0)
	err = sm.readonlyDB.GetContext(ctx, &minutes, TaskExecutionRuntimeCommandSQL, executableID, runID)

	if err != nil {
		if err == sql.ErrNoRows {
			return minutes, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Error fetching TaskRuntime rate")}
		} else {
			return minutes, errors.Wrapf(err, "Error fetching attempt rate")
		}
	}
	return minutes, err
}

func (sm *SQLStateManager) EstimateRunResources(ctx context.Context, executableID string, commandHash string) (TaskResources, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.estimate_run_resources", "")
	defer span.Finish()

	//span.SetTag("command_hash", commandHash)

	var err error
	var taskResources TaskResources

	err = sm.readonlyDB.GetContext(ctx, &taskResources, TaskResourcesSelectCommandSQL, executableID, commandHash)

	if err != nil {
		if err == sql.ErrNoRows {
			// No historical data found - this is expected for new jobs or jobs that haven't OOM'd
			if sm.log != nil {
				_ = sm.log.Log(
					"level", "info",
					"message", "ARA: No historical resource data found",
					"definition_id", executableID,
					"command_hash", commandHash,
				)
			}
			return taskResources, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Resource usage with executable %s not found", executableID)}
		} else {
			// Check if this is a PostgreSQL recovery conflict (expected on read replicas)
			errMsg := err.Error()
			isRecoveryConflict := strings.Contains(errMsg, "conflict with recovery") ||
				strings.Contains(errMsg, "canceling statement due to conflict")

			if isRecoveryConflict {
				// Recovery conflicts are expected on read replicas - treat as missing data
				// Log at info level since this is expected behavior, not an error
				if sm.log != nil {
					_ = sm.log.Log(
						"level", "info",
						"message", "ARA: Query canceled due to recovery conflict on read replica (using defaults)",
						"definition_id", executableID,
						"command_hash", commandHash,
					)
				}
				return taskResources, exceptions.MissingResource{
					ErrorString: fmt.Sprintf("Resource usage with executable %s not found (recovery conflict)", executableID)}
			}

			// Unexpected error querying historical data
			if sm.log != nil {
				_ = sm.log.Log(
					"level", "error",
					"message", "ARA: Error querying historical resource data",
					"definition_id", executableID,
					"command_hash", commandHash,
					"error", err.Error(),
				)
			}
			return taskResources, errors.Wrapf(err, "issue getting resources with executable [%s]", executableID)
		}
	}

	// Check if the query returned NULL values (can happen when percentile_disc has no valid data)
	if !taskResources.Memory.Valid || !taskResources.Cpu.Valid {
		// NULL values mean no valid historical data - treat as missing resource
		if sm.log != nil {
			_ = sm.log.Log(
				"level", "info",
				"message", "ARA: No historical resource data found (NULL values returned)",
				"definition_id", executableID,
				"command_hash", commandHash,
			)
		}
		return taskResources, exceptions.MissingResource{
			ErrorString: fmt.Sprintf("Resource usage with executable %s not found (NULL values)", executableID)}
	}

	// Successfully found historical data - log the values being returned
	if sm.log != nil {
		_ = sm.log.Log(
			"level", "info",
			"message", "ARA: Historical resource data found",
			"definition_id", executableID,
			"command_hash", commandHash,
			"estimated_memory_mb", taskResources.Memory.Int64,
			"estimated_cpu_millicores", taskResources.Cpu.Int64,
		)
	}

	return taskResources, err
}

func (sm *SQLStateManager) EstimateExecutorCount(ctx context.Context, executableID string, commandHash string) (int64, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.estimate_executor_count", "")
	defer span.Finish()

	//span.SetTag("command_hash", commandHash)

	var err error
	executorCount := int64(25)
	err = sm.readonlyDB.GetContext(ctx, &executorCount, TaskResourcesExecutorCountSQL, executableID, commandHash)

	if err != nil {
		if err == sql.ErrNoRows {
			return executorCount, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Resource usage with executable %s not found", executableID)}
		} else {
			return executorCount, errors.Wrapf(err, "issue getting resources with executable [%s]", executableID)
		}
	}
	return executorCount, err
}
func (sm *SQLStateManager) CheckIdempotenceKey(ctx context.Context, idempotenceKey string) (string, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.check_idempotence_key", "")
	defer span.Finish()

	var err error
	runId := ""
	err = sm.readonlyDB.GetContext(ctx, &runId, TaskIdempotenceKeyCheckSQL, idempotenceKey)

	if err != nil || len(runId) == 0 {
		err = errors.New("no run_id found for idempotence key")
	}
	return runId, err
}

func (sm *SQLStateManager) ExecutorOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.executor_oom", "")
	defer span.Finish()

	//span.SetTag("command_hash", commandHash)

	var err error
	executorOOM := false
	err = sm.readonlyDB.GetContext(ctx, &executorOOM, TaskResourcesExecutorOOMSQL, executableID, commandHash)

	if err != nil {
		if err == sql.ErrNoRows {
			return executorOOM, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Resource oom for executable %s not found", executableID)}
		} else {
			return executorOOM, errors.Wrapf(err, "issue getting resources with executable [%s]", executableID)
		}
	}
	return executorOOM, err
}

func (sm *SQLStateManager) DriverOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.driver_oom", "")
	defer span.Finish()

	//span.SetTag("command_hash", commandHash)

	var err error
	driverOOM := false
	err = sm.readonlyDB.GetContext(ctx, &driverOOM, TaskResourcesDriverOOMSQL, executableID, commandHash)

	if err != nil {
		if err == sql.ErrNoRows {
			return driverOOM, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Resource oom for driver %s not found", executableID)}
		} else {
			return driverOOM, errors.Wrapf(err, "issue getting resources with executable [%s]", executableID)
		}
	}
	return driverOOM, err
}

// Name is the name of the state manager - matches value in configuration
func (sm *SQLStateManager) Name() string {
	return "postgres"
}

// likeFields are the set of fields
// that are filtered using a `like` clause
var likeFields = map[string]bool{
	"image":       true,
	"alias":       true,
	"group_name":  true,
	"command":     true,
	"text":        true,
	"exit_reason": true,
}

// Initialize creates tables if they do not exist
func (sm *SQLStateManager) Initialize(conf config.Config) error {
	dburl := conf.GetString("database_url")
	readonlyDbUrl := conf.GetString("readonly_database_url")

	createSchema := conf.GetBool("create_database_schema")
	fmt.Printf("create_database_schema: %t\ncreating schema...\n", createSchema)
	sqltrace.Register("postgres", &pq.Driver{}, sqltrace.WithServiceName("flotilla"))
	var err error
	if sm.db, err = sqlxtrace.Open("postgres", dburl); err != nil {
		return errors.Wrap(err, "unable to open postgres db")
	}

	sqltrace.Register("postgres", &pq.Driver{}, sqltrace.WithServiceName("flotilla"))
	if sm.readonlyDB, err = sqlxtrace.Open("postgres", readonlyDbUrl); err != nil {
		return errors.Wrap(err, "unable to open readonly postgres db")
	}

	if conf.IsSet("database_max_idle_connections") {
		sm.db.SetMaxIdleConns(conf.GetInt("database_max_idle_connections"))
		sm.readonlyDB.SetMaxIdleConns(conf.GetInt("database_max_idle_connections"))
	}

	if createSchema {
		// Since this happens at initialization we
		// could encounter racy conditions waiting for pg
		// to become available. Wait for it a bit
		if err = sm.db.Ping(); err != nil {
			// Try 3 more times
			// 5, 10, 20
			for i := 0; i < 3 && err != nil; i++ {
				time.Sleep(time.Duration(5*math.Pow(2, float64(i))) * time.Second)
				err = sm.db.Ping()
			}
			if err != nil {
				return errors.Wrap(err, "error trying to connect to postgres db, retries exhausted")
			}
		}

		// Populate worker table
		if err = sm.initWorkerTable(conf); err != nil {
			return errors.Wrap(err, "problem populating worker table sql")
		}
	}
	return nil
}

func (sm *SQLStateManager) makeWhereClause(filters map[string][]string) []string {

	// These will be joined with "AND"
	wc := []string{}
	for k, v := range filters {
		if len(v) > 1 {
			// No like queries for multiple filters with same key
			quoted := make([]string, len(v))
			for i, filterVal := range v {
				quoted[i] = fmt.Sprintf("'%s'", filterVal)
			}
			wc = append(wc, fmt.Sprintf("%s in (%s)", k, strings.Join(quoted, ",")))
		} else if len(v) == 1 {
			fmtString := "%s='%s'"
			fieldName := k
			if likeFields[k] {
				fmtString = "%s like '%%%s%%'"
			} else if strings.HasSuffix(k, "_since") {
				fieldName = strings.Replace(k, "_since", "", -1)
				fmtString = "%s > '%s'"
			} else if strings.HasSuffix(k, "_until") {
				fieldName = strings.Replace(k, "_until", "", -1)
				fmtString = "%s < '%s'"
			}
			wc = append(wc, fmt.Sprintf(fmtString, fieldName, v[0]))
		}
	}
	return wc
}

func (sm *SQLStateManager) makeEnvWhereClause(filters map[string]string) []string {
	wc := make([]string, len(filters))
	i := 0
	for k, v := range filters {
		fmtString := `env @> '[{"name":"%s","value":"%s"}]'`
		wc[i] = fmt.Sprintf(fmtString, k, v)
		i++
	}

	return wc
}

func (sm *SQLStateManager) orderBy(obj IOrderable, field string, order string) (string, error) {
	if order == "asc" || order == "desc" {
		if obj.ValidOrderField(field) {
			return fmt.Sprintf("order by %s %s NULLS LAST", field, order), nil
		}
		return "", errors.Errorf("Invalid field to order by [%s], must be one of [%s]",
			field,
			strings.Join(obj.ValidOrderFields(), ", "))
	}
	return "", errors.Errorf("Invalid order string, must be one of ('asc', 'desc'), was %s", order)
}

// ListDefinitions returns a DefinitionList
// limit: limit the result to this many definitions
// offset: start the results at this offset
// sortBy: sort by this field
// order: 'asc' or 'desc'
// filters: map of field filters on Definition - joined with AND
// envFilters: map of environment variable filters - joined with AND
func (sm *SQLStateManager) ListDefinitions(
	ctx context.Context,
	limit int, offset int, sortBy string,
	order string, filters map[string][]string,
	envFilters map[string]string) (DefinitionList, error) {
	// Use "list" as an identifier since there's no specific runID for a list operation
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_definitions", "")
	defer span.Finish()

	var err error
	var result DefinitionList
	var whereClause, orderQuery string
	where := append(sm.makeWhereClause(filters), sm.makeEnvWhereClause(envFilters)...)
	if len(where) > 0 {
		whereClause = fmt.Sprintf("where %s", strings.Join(where, " and "))
	}

	orderQuery, err = sm.orderBy(&Definition{}, sortBy, order)
	if err != nil {
		return result, errors.WithStack(err)
	}

	sql := fmt.Sprintf(ListDefinitionsSQL, whereClause, orderQuery)
	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", sql)

	err = sm.db.Select(&result.Definitions, sql, limit, offset)
	if err != nil {
		return result, errors.Wrap(err, "issue running list definitions sql")
	}
	err = sm.db.Get(&result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list definitions count sql")
	}

	return result, nil
}

// GetDefinition returns a single definition by id
func (sm *SQLStateManager) GetDefinition(ctx context.Context, definitionID string) (Definition, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_definition", "")
	defer span.Finish()

	var err error
	var definition Definition
	err = sm.db.GetContext(ctx, &definition, GetDefinitionSQL, definitionID)
	if err != nil {
		if err == sql.ErrNoRows {
			return definition, exceptions.MissingResource{
				fmt.Sprintf("Definition with ID %s not found", definitionID)}
		} else {
			return definition, errors.Wrapf(err, "issue getting definition with id [%s]", definitionID)
		}
	}
	return definition, nil
}

// GetDefinitionByAlias returns a single definition by id
func (sm *SQLStateManager) GetDefinitionByAlias(ctx context.Context, alias string) (Definition, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_definition_by_alias", "")
	defer span.Finish()

	//span.SetTag("alias", alias)

	var err error
	var definition Definition
	err = sm.db.GetContext(ctx, &definition, GetDefinitionByAliasSQL, alias)
	if err != nil {
		if err == sql.ErrNoRows {
			return definition, exceptions.MissingResource{
				fmt.Sprintf("Definition with alias %s not found", alias)}
		} else {
			return definition, errors.Wrapf(err, "issue getting definition with alias [%s]", alias)
		}
	}
	return definition, err
}

// UpdateDefinition updates a definition
// - updates can be partial
func (sm *SQLStateManager) UpdateDefinition(ctx context.Context, definitionID string, updates Definition) (Definition, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.update_definition", "")
	defer span.Finish()
	var (
		err      error
		existing Definition
	)
	existing, err = sm.GetDefinition(ctx, definitionID)
	if err != nil {
		return existing, errors.WithStack(err)
	}

	existing.UpdateWith(updates)

	selectForUpdate := `SELECT * FROM task_def WHERE definition_id = $1 FOR UPDATE;`
	deletePorts := `DELETE FROM task_def_ports WHERE task_def_id = $1;`
	deleteTags := `DELETE FROM task_def_tags WHERE task_def_id = $1`

	insertPorts := `
    INSERT INTO task_def_ports(
      task_def_id, port
    ) VALUES ($1, $2);
    `

	insertDefTags := `
	INSERT INTO task_def_tags(
	  task_def_id, tag_id
	) VALUES ($1, $2);
	`

	insertTags := `
	INSERT INTO tags(text) SELECT $1 WHERE NOT EXISTS (SELECT text from tags where text = $2)
	`

	tx, err := sm.db.Begin()
	if err != nil {
		return existing, errors.WithStack(err)
	}

	if _, err = tx.Exec(selectForUpdate, definitionID); err != nil {
		return existing, errors.WithStack(err)
	}

	if _, err = tx.Exec(deletePorts, definitionID); err != nil {
		return existing, errors.WithStack(err)
	}

	if _, err = tx.Exec(deleteTags, definitionID); err != nil {
		return existing, errors.WithStack(err)
	}

	update := `
    UPDATE task_def SET
      image = $2,
      alias = $3,
      memory = $4,
      command = $5,
      env = $6,
      cpu = $7,
      gpu = $8,
      adaptive_resource_allocation = $9,
      ephemeral_storage = $10,
	  requires_docker = $11,
      target_cluster = $12
    WHERE definition_id = $1;
    `
	if _, err = tx.Exec(
		update,
		definitionID,
		existing.Image,
		existing.Alias,
		existing.Memory,
		existing.Command,
		existing.Env,
		existing.Cpu,
		existing.Gpu,
		existing.AdaptiveResourceAllocation,
		existing.EphemeralStorage,
		existing.RequiresDocker,
		existing.TargetCluster); err != nil {
		return existing, errors.Wrapf(err, "issue updating definition [%s]", definitionID)
	}

	if existing.Ports != nil {
		for _, p := range *existing.Ports {
			if _, err = tx.Exec(insertPorts, definitionID, p); err != nil {
				tx.Rollback()
				return existing, errors.WithStack(err)
			}
		}
	}

	if existing.Tags != nil {
		for _, t := range *existing.Tags {
			if _, err = tx.Exec(insertTags, t, t); err != nil {
				tx.Rollback()
				return existing, errors.WithStack(err)
			}
			if _, err = tx.Exec(insertDefTags, definitionID, t); err != nil {
				tx.Rollback()
				return existing, errors.WithStack(err)
			}
		}
	}
	err = tx.Commit()
	if err != nil {
		return existing, errors.WithStack(err)
	}
	return existing, nil
}

// CreateDefinition creates the passed in definition object
// - error if definition already exists
func (sm *SQLStateManager) CreateDefinition(ctx context.Context, d Definition) error {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.create_definition", "")
	defer span.Finish()
	var err error

	insertPorts := `
    INSERT INTO task_def_ports(
      task_def_id, port
    ) VALUES ($1, $2);
    `

	insertDefTags := `
	INSERT INTO task_def_tags(
	  task_def_id, tag_id
	) VALUES ($1, $2);
	`

	insertTags := `
	INSERT INTO tags(text) SELECT $1 WHERE NOT EXISTS (SELECT text from tags where text = $2)
	`

	tx, err := sm.db.Begin()
	if err != nil {
		return errors.WithStack(err)
	}

	insert := `
    INSERT INTO task_def(
      definition_id,
      image,
      group_name,
      alias,
      memory,
      command,
      env,
      cpu,
      gpu,
      adaptive_resource_allocation,
      ephemeral_storage,
      requires_docker,
      target_cluster
    )
    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);
    `

	if _, err = tx.Exec(insert,
		d.DefinitionID,
		d.Image,
		d.GroupName,
		d.Alias,
		d.Memory,
		d.Command,
		d.Env,
		d.Cpu,
		d.Gpu,
		d.AdaptiveResourceAllocation,
		d.EphemeralStorage,
		d.RequiresDocker,
		d.TargetCluster); err != nil {
		tx.Rollback()
		return errors.Wrapf(
			err, "issue creating new task definition with alias [%s] and id [%s]", d.DefinitionID, d.Alias)
	}

	if d.Ports != nil {
		for _, p := range *d.Ports {
			if _, err = tx.Exec(insertPorts, d.DefinitionID, p); err != nil {
				tx.Rollback()
				return errors.WithStack(err)
			}
		}
	}

	if d.Tags != nil {
		for _, t := range *d.Tags {
			if _, err = tx.Exec(insertTags, t, t); err != nil {
				tx.Rollback()
				return errors.WithStack(err)
			}
			if _, err = tx.Exec(insertDefTags, d.DefinitionID, t); err != nil {
				tx.Rollback()
				return errors.WithStack(err)
			}
		}
	}
	err = tx.Commit()
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.WithStack(err)
	}
	return nil
}

// DeleteDefinition deletes definition and associated runs and environment variables
func (sm *SQLStateManager) DeleteDefinition(ctx context.Context, definitionID string) error {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.delete_definition", "")
	defer span.Finish()
	var err error

	statements := []string{
		"DELETE FROM task_def_ports WHERE task_def_id = $1",
		"DELETE FROM task_def_tags WHERE task_def_id = $1",
		"DELETE FROM task WHERE definition_id = $1",
		"DELETE FROM task_def WHERE definition_id = $1",
	}
	tx, err := sm.db.Begin()
	if err != nil {
		return errors.WithStack(err)
	}

	for _, stmt := range statements {
		if _, err = tx.Exec(stmt, definitionID); err != nil {
			tx.Rollback()
			return errors.Wrapf(err, "issue deleting definition with id [%s]", definitionID)
		}
	}

	err = tx.Commit()
	if err != nil {
		return errors.WithStack(err)
	}
	return nil
}

// ListRuns returns a RunList
// limit: limit the result to this many runs
// offset: start the results at this offset
// sortBy: sort by this field
// order: 'asc' or 'desc'
// filters: map of field filters on Run - joined with AND
// envFilters: map of environment variable filters - joined with AND
func (sm *SQLStateManager) ListRuns(ctx context.Context, limit int, offset int, sortBy string, order string, filters map[string][]string, envFilters map[string]string, engines []string) (RunList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_runs", "")
	defer span.Finish()
	var err error
	var result RunList
	var whereClause, orderQuery string

	if filters == nil {
		filters = make(map[string][]string)
	}

	if engines != nil {
		filters["engine"] = engines
	} else {
		filters["engine"] = []string{DefaultEngine}
	}

	where := append(sm.makeWhereClause(filters), sm.makeEnvWhereClause(envFilters)...)
	if len(where) > 0 {
		whereClause = fmt.Sprintf("where %s", strings.Join(where, " and "))
	}

	orderQuery, err = sm.orderBy(&Run{}, sortBy, order)
	if err != nil {
		return result, errors.WithStack(err)
	}

	sql := fmt.Sprintf(ListRunsSQL, whereClause, orderQuery)
	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", sql)

	err = sm.db.Select(&result.Runs, sql, limit, offset)
	if err != nil {
		return result, errors.Wrap(err, "issue running list runs sql")
	}
	err = sm.db.Get(&result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list runs count sql")
	}
	return result, nil
}

// GetRun gets run by id
func (sm *SQLStateManager) GetRun(ctx context.Context, runID string) (Run, error) {
	// Create a span for this database operation using the utils.TraceJob function
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_run", "")
	defer span.Finish()
	span.SetTag("job.run_id", runID)
	var r Run
	err := sm.db.GetContext(ctx, &r, GetRunSQL, runID)
	if err != nil {
		// Tag error for easier debugging
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())

		if err == sql.ErrNoRows {
			return r, exceptions.MissingResource{
				fmt.Sprintf("Run with id %s not found", runID)}
		} else {
			return r, errors.Wrapf(err, "issue getting run with id [%s]", runID)
		}
	}

	// Tag the span with run metadata
	tracing.TagRunInfo(span,
		r.RunID, r.DefinitionID, r.Alias, r.Status, r.ClusterName,
		r.QueuedAt, r.StartedAt, r.FinishedAt,
		r.PodName, r.Namespace, r.ExitReason, r.ExitCode, string(r.Tier))

	return r, nil
}

func (sm *SQLStateManager) GetRunByEMRJobId(ctx context.Context, emrJobId string) (Run, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_run_by_emr_job_id", "")
	defer span.Finish()
	span.SetTag("job.emr_job_id", emrJobId)
	var err error
	var r Run
	err = sm.db.GetContext(ctx, &r, GetRunSQLByEMRJobId, emrJobId)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if err == sql.ErrNoRows {
			return r, exceptions.MissingResource{
				fmt.Sprintf("Run with emrjobid %s not found", emrJobId)}
		} else {
			return r, errors.Wrapf(err, "issue getting run with emrjobid [%s]", emrJobId)
		}
	}

	// Tag the span with run metadata
	tracing.TagRunInfo(span,
		r.RunID, r.DefinitionID, r.Alias, r.Status, r.ClusterName,
		r.QueuedAt, r.StartedAt, r.FinishedAt,
		r.PodName, r.Namespace, r.ExitReason, r.ExitCode, string(r.Tier))

	return r, nil
}

func (sm *SQLStateManager) GetResources(ctx context.Context, runID string) (Run, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_resources", "")
	defer span.Finish()
	span.SetTag("job.run_id", runID)
	var err error
	var r Run
	err = sm.db.GetContext(ctx, &r, GetRunSQL, runID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if err == sql.ErrNoRows {
			return r, exceptions.MissingResource{
				fmt.Sprintf("Run with id %s not found", runID)}
		} else {
			return r, errors.Wrapf(err, "issue getting run with id [%s]", runID)
		}
	}

	// Tag the span with run metadata
	tracing.TagRunInfo(span,
		r.RunID, r.DefinitionID, r.Alias, r.Status, r.ClusterName,
		r.QueuedAt, r.StartedAt, r.FinishedAt,
		r.PodName, r.Namespace, r.ExitReason, r.ExitCode, string(r.Tier))

	return r, nil
}

// UpdateRun updates run with updates - can be partial
func (sm *SQLStateManager) UpdateRun(ctx context.Context, runID string, updates Run) (Run, error) {
	start := time.Now()
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.update_run", "")
	defer span.Finish()
	span.SetTag("job.run_id", runID)
	span.SetTag("status", updates.Status)
	var (
		err      error
		existing Run
	)

	tx, err := sm.db.BeginTx(ctx, nil)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		span.SetTag("error.type", "begin_transaction")
		return existing, errors.WithStack(err)
	}

	rows, err := tx.QueryContext(ctx, GetRunSQLForUpdate, runID)
	if err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		span.SetTag("error.type", "query")
		return existing, errors.WithStack(err)
	}

	for rows.Next() {
		err = rows.Scan(
			&existing.RunID,
			&existing.DefinitionID,
			&existing.Alias,
			&existing.Image,
			&existing.ClusterName,
			&existing.ExitCode,
			&existing.ExitReason,
			&existing.Status,
			&existing.QueuedAt,
			&existing.StartedAt,
			&existing.FinishedAt,
			&existing.InstanceID,
			&existing.InstanceDNSName,
			&existing.GroupName,
			&existing.TaskType,
			&existing.Env,
			&existing.Command,
			&existing.Memory,
			&existing.Cpu,
			&existing.Gpu,
			&existing.Engine,
			&existing.EphemeralStorage,
			&existing.NodeLifecycle,
			&existing.PodName,
			&existing.Namespace,
			&existing.MaxCpuUsed,
			&existing.MaxMemoryUsed,
			&existing.PodEvents,
			&existing.CommandHash,
			&existing.CloudTrailNotifications,
			&existing.ExecutableID,
			&existing.ExecutableType,
			&existing.ExecutionRequestCustom,
			&existing.CpuLimit,
			&existing.MemoryLimit,
			&existing.AttemptCount,
			&existing.SpawnedRuns,
			&existing.RunExceptions,
			&existing.ActiveDeadlineSeconds,
			&existing.SparkExtension,
			&existing.MetricsUri,
			&existing.Description,
			&existing.IdempotenceKey,
			&existing.User,
			&existing.Arch,
			&existing.Labels,
			&existing.RequiresDocker,
			&existing.ServiceAccount,
			&existing.Tier,
		)
	}
	if err != nil {
		return existing, errors.WithStack(err)
	}

	existing.UpdateWith(updates)

	update := `
    UPDATE task SET
        definition_id = $2,
		alias = $3,
		image = $4,
		cluster_name = $5,
		exit_code = $6,
		exit_reason = $7,
		status = $8,
		queued_at = $9,
		started_at = $10,
		finished_at = $11,
		instance_id = $12,
		instance_dns_name = $13,
		group_name = $14,
		env = $15,
		command = $16,
		memory = $17,
		cpu = $18,
		gpu = $19,
		engine = $20,
		ephemeral_storage = $21,
		node_lifecycle = $22,
		pod_name = $23,
		namespace = $24,
		max_cpu_used = $25,
		max_memory_used = $26,
		pod_events = $27,
		cloudtrail_notifications = $28,
		executable_id = $29,
		executable_type = $30,
		execution_request_custom = $31,
		cpu_limit = $32,
		memory_limit = $33,
		attempt_count = $34,
		spawned_runs = $35,
		run_exceptions = $36,
		active_deadline_seconds = $37,
		spark_extension = $38,
		metrics_uri = $39,
		description = $40,
		idempotence_key = $41,
		"user" = $42,
		arch = $43,
		labels = $44,
		requires_docker = $45,
		service_account = $46,
        tier = $47
    WHERE run_id = $1;
    `

	if _, err = tx.Exec(
		update,
		runID,
		existing.DefinitionID,
		existing.Alias,
		existing.Image,
		existing.ClusterName,
		existing.ExitCode,
		existing.ExitReason,
		existing.Status,
		existing.QueuedAt,
		existing.StartedAt,
		existing.FinishedAt,
		existing.InstanceID,
		existing.InstanceDNSName,
		existing.GroupName,
		existing.Env,
		existing.Command,
		existing.Memory,
		existing.Cpu,
		existing.Gpu,
		existing.Engine,
		existing.EphemeralStorage,
		existing.NodeLifecycle,
		existing.PodName,
		existing.Namespace,
		existing.MaxCpuUsed,
		existing.MaxMemoryUsed,
		existing.PodEvents,
		existing.CloudTrailNotifications,
		existing.ExecutableID,
		existing.ExecutableType,
		existing.ExecutionRequestCustom,
		existing.CpuLimit,
		existing.MemoryLimit,
		existing.AttemptCount,
		existing.SpawnedRuns,
		existing.RunExceptions,
		existing.ActiveDeadlineSeconds,
		existing.SparkExtension,
		existing.MetricsUri,
		existing.Description,
		existing.IdempotenceKey,
		existing.User,
		existing.Arch,
		existing.Labels,
		existing.RequiresDocker,
		existing.ServiceAccount,
		existing.Tier); err != nil {
		tx.Rollback()
		return existing, errors.WithStack(err)
	}

	if err = tx.Commit(); err != nil {
		return existing, errors.WithStack(err)
	}

	_ = metrics.Timing(metrics.EngineUpdateRun, time.Since(start), []string{existing.ClusterName}, 1)
	go sm.logStatusUpdate(existing)
	return existing, nil
}

// CreateRun creates the passed in run
func (sm *SQLStateManager) CreateRun(ctx context.Context, r Run) error {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.create_run", "")
	defer span.Finish()
	span.SetTag("job.run_id", r.RunID)
	// Now utils.TraceJob already sets the run_id tag
	var err error
	insert := `
	INSERT INTO task (
      	run_id,
		definition_id,
		alias,
		image,
		cluster_name,
		exit_code,
		exit_reason,
		status,
		queued_at,
		started_at,
		finished_at,
		instance_id,
		instance_dns_name,
		group_name,
		env,
		command,
		memory,
		cpu,
		gpu,
		engine,
		node_lifecycle,
		ephemeral_storage,
		pod_name,
		namespace,
		max_cpu_used,
		max_memory_used,
		pod_events,
		executable_id,
		executable_type,
		execution_request_custom,
		cpu_limit,
		memory_limit,
		attempt_count,
		spawned_runs,
		run_exceptions,
		active_deadline_seconds,
		task_type,
		command_hash,
		spark_extension,
		metrics_uri,
		description,
	    idempotence_key,
	    "user",
	    arch,
	    labels,
		requires_docker,
		service_account,
		tier
    ) VALUES (
        $1,
		$2,
		$3,
		$4,
		$5,
		$6,
		$7,
		$8,
		$9,
		$10,
		$11,
		$12,
		$13,
		$14,
		$15,
		$16,
		$17,
		$18,
		$19,
		$20,
		$21,
		$22,
		$23,
		$24,
		$25,
		$26,
		$27,
		$28,
		$29,
		$30,
		$31,
		$32,
		$33,
		$34,
		$35,
		$36,
		$37,
		$38,
		$39,
		$40,
        $41,
        $42,
        $43,
        $44,
        $45,
    	$46,
    	$47,
    	$48
	);
    `

	tx, err := sm.db.BeginTx(ctx, nil)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.WithStack(err)
	}

	if _, err = tx.ExecContext(ctx, insert,
		r.RunID,
		r.DefinitionID,
		r.Alias,
		r.Image,
		r.ClusterName,
		r.ExitCode,
		r.ExitReason,
		r.Status,
		r.QueuedAt,
		r.StartedAt,
		r.FinishedAt,
		r.InstanceID,
		r.InstanceDNSName,
		r.GroupName,
		r.Env,
		r.Command,
		r.Memory,
		r.Cpu,
		r.Gpu,
		r.Engine,
		r.NodeLifecycle,
		r.EphemeralStorage,
		r.PodName,
		r.Namespace,
		r.MaxCpuUsed,
		r.MaxMemoryUsed,
		r.PodEvents,
		r.ExecutableID,
		r.ExecutableType,
		r.ExecutionRequestCustom,
		r.CpuLimit,
		r.MemoryLimit,
		r.AttemptCount,
		r.SpawnedRuns,
		r.RunExceptions,
		r.ActiveDeadlineSeconds,
		r.TaskType,
		r.CommandHash,
		r.SparkExtension,
		r.MetricsUri,
		r.Description,
		r.IdempotenceKey,
		r.User,
		r.Arch,
		r.Labels,
		r.RequiresDocker,
		r.ServiceAccount,
		r.Tier); err != nil {
		tx.Rollback()
		return errors.Wrapf(err, "issue creating new task run with id [%s]", r.RunID)
	}

	if err = tx.Commit(); err != nil {
		return errors.WithStack(err)
	}
	go sm.logStatusUpdate(r)
	return nil
}

// ListGroups returns a list of the existing group names.
func (sm *SQLStateManager) ListGroups(ctx context.Context, limit int, offset int, name *string) (GroupsList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_groups", "")
	defer span.Finish()
	var (
		err         error
		result      GroupsList
		whereClause string
	)
	if name != nil && len(*name) > 0 {
		whereClause = fmt.Sprintf("where %s", strings.Join(
			sm.makeWhereClause(map[string][]string{"group_name": {*name}}), " and "))
	}

	sql := fmt.Sprintf(ListGroupsSQL, whereClause)
	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", sql)

	err = sm.db.Select(&result.Groups, sql, limit, offset)
	if err != nil {
		return result, errors.Wrap(err, "issue running list groups sql")
	}
	err = sm.db.Get(&result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list groups count sql")
	}

	return result, nil
}

// ListTags returns a list of the existing tags.
func (sm *SQLStateManager) ListTags(ctx context.Context, limit int, offset int, name *string) (TagsList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_tags", "")
	defer span.Finish()
	var (
		err         error
		result      TagsList
		whereClause string
	)
	if name != nil && len(*name) > 0 {
		whereClause = fmt.Sprintf("where %s", strings.Join(
			sm.makeWhereClause(map[string][]string{"text": {*name}}), " and "))
	}

	sql := fmt.Sprintf(ListTagsSQL, whereClause)
	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", sql)

	err = sm.db.SelectContext(ctx, &result.Tags, sql, limit, offset)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return result, errors.Wrap(err, "issue running list tags sql")
	}
	err = sm.db.GetContext(ctx, &result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list tags count sql")
	}

	return result, nil
}

// initWorkerTable initializes the `worker` table with values from the config
func (sm *SQLStateManager) initWorkerTable(c config.Config) error {
	// Get worker count from configuration (set to 1 as default)

	for _, engine := range Engines {
		fmt.Printf("init worker table for %s engine", engine)
		retryCount := int64(1)
		if c.IsSet(fmt.Sprintf("worker.%s.retry_worker_count_per_instance", engine)) {
			retryCount = int64(c.GetInt("worker.ecs.retry_worker_count_per_instance"))
		}
		submitCount := int64(1)
		if c.IsSet(fmt.Sprintf("worker.%s.submit_worker_count_per_instance", engine)) {
			submitCount = int64(c.GetInt("worker.ecs.submit_worker_count_per_instance"))
		}
		statusCount := int64(1)
		if c.IsSet(fmt.Sprintf("worker.%s.status_worker_count_per_instance", engine)) {
			statusCount = int64(c.GetInt("worker.ecs.status_worker_count_per_instance"))
		}

		var err error
		insert := `
		INSERT INTO worker (worker_type, count_per_instance, engine)
		VALUES ('retry', $1, $4), ('submit', $2, $4), ('status', $3, $4);
	`

		tx, err := sm.db.Begin()
		if err != nil {
			return errors.WithStack(err)
		}

		if _, err = tx.Exec(insert, retryCount, submitCount, statusCount, engine); err != nil {
			tx.Rollback()
			return errors.Wrapf(err, "issue populating worker table")
		}

		err = tx.Commit()

		if err != nil {
			return errors.WithStack(err)
		}
	}

	return nil
}

// ListWorkers returns list of workers
func (sm *SQLStateManager) ListWorkers(ctx context.Context, engine string) (WorkersList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_workers", "")
	defer span.Finish()
	var err error
	var result WorkersList

	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", ListWorkersSQL)

	err = sm.readonlyDB.SelectContext(ctx, &result.Workers, GetWorkerEngine, engine)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return result, errors.Wrap(err, "issue running list workers sql")
	}

	err = sm.readonlyDB.GetContext(ctx, &result.Total, countSQL)
	if err != nil {
		return result, errors.Wrap(err, "issue running list workers count sql")
	}

	return result, nil
}

// GetWorker returns data for a single worker.
func (sm *SQLStateManager) GetWorker(ctx context.Context, workerType string, engine string) (w Worker, err error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_worker", "")
	defer span.Finish()
	//span.SetTag("engine", engine)
	if err := sm.readonlyDB.GetContext(ctx, &w, GetWorkerSQL, workerType, engine); err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if err == sql.ErrNoRows {
			err = exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Worker of type %s not found", workerType)}
		} else {
			err = errors.Wrapf(err, "issue getting worker of type [%s]", workerType)
		}
	}
	return
}

// UpdateWorker updates a single worker.
func (sm *SQLStateManager) UpdateWorker(ctx context.Context, workerType string, updates Worker) (Worker, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.update_worker", "")
	defer span.Finish()
	var (
		err      error
		existing Worker
	)

	engine := DefaultEngine
	tx, err := sm.db.BeginTx(ctx, nil)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return existing, errors.WithStack(err)
	}

	rows, err := tx.QueryContext(ctx, GetWorkerSQLForUpdate, workerType, engine)
	if err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return existing, errors.WithStack(err)
	}

	for rows.Next() {
		err = rows.Scan(&existing.WorkerType, &existing.CountPerInstance, &existing.Engine)
	}
	if err != nil {
		return existing, errors.WithStack(err)
	}

	existing.UpdateWith(updates)

	update := `
		UPDATE worker SET count_per_instance = $2
    WHERE worker_type = $1;
    `

	if _, err = tx.ExecContext(ctx, update, workerType, existing.CountPerInstance); err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return existing, errors.WithStack(err)
	}

	if err = tx.Commit(); err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return existing, errors.WithStack(err)
	}

	return existing, nil
}

// BatchUpdateWorker updates multiple workers.
func (sm *SQLStateManager) BatchUpdateWorkers(ctx context.Context, updates []Worker) (WorkersList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.batch_update_workers", "")
	defer span.Finish()
	var existing WorkersList

	for _, w := range updates {
		_, err := sm.UpdateWorker(ctx, w.WorkerType, w)

		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
			return existing, err
		}
	}

	return sm.ListWorkers(ctx, DefaultEngine)
}

// Cleanup close any open resources
func (sm *SQLStateManager) Cleanup() error {
	return multierr.Combine(sm.db.Close(), sm.readonlyDB.Close())
}

type IOrderable interface {
	ValidOrderField(field string) bool
	ValidOrderFields() []string
	DefaultOrderField() string
}

func (d *Definition) ValidOrderField(field string) bool {
	for _, f := range d.ValidOrderFields() {
		if field == f {
			return true
		}
	}
	return false
}

func (d *Definition) ValidOrderFields() []string {
	return []string{"alias", "image", "group_name", "memory"}
}

func (d *Definition) DefaultOrderField() string {
	return "group_name"
}

func (r *Run) ValidOrderField(field string) bool {
	for _, f := range r.ValidOrderFields() {
		if field == f {
			return true
		}
	}
	return false
}

func (r *Run) ValidOrderFields() []string {
	return []string{"run_id", "cluster_name", "status", "started_at", "finished_at", "group_name"}
}

func (r *Run) DefaultOrderField() string {
	return "group_name"
}

func (t *Template) ValidOrderField(field string) bool {
	for _, f := range t.ValidOrderFields() {
		if field == f {
			return true
		}
	}
	return false
}

func (t *Template) ValidOrderFields() []string {
	// @TODO: figure what fields should be orderable.
	return []string{"template_name", "version"}
}

func (t *Template) DefaultOrderField() string {
	return "template_name"
}

// Scan from db
func (e *EnvList) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e *EnvList) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (e *PodEvents) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e SpawnedRuns) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

func (e *SpawnedRuns) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e SparkExtension) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

func (e *SparkExtension) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e RunExceptions) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

func (e *RunExceptions) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e PodEvents) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (e *PortsList) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e PortsList) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (e *Tags) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e Tags) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (e *CloudTrailNotifications) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e CloudTrailNotifications) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (e *ExecutionRequestCustom) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// Value to db
func (e ExecutionRequestCustom) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

// Scan from db
func (tjs *TemplateJSONSchema) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.([]uint8))
		json.Unmarshal(s, &tjs)
	}
	return nil
}

// Value to db
func (tjs TemplateJSONSchema) Value() (driver.Value, error) {
	res, _ := json.Marshal(tjs)
	return res, nil
}

// Scan from db
func (tjs *TemplatePayload) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.([]uint8))
		json.Unmarshal(s, &tjs)
	}
	return nil
}

// Value to db
func (tjs TemplatePayload) Value() (driver.Value, error) {
	res, _ := json.Marshal(tjs)
	return res, nil
}

// Value to db
func (e Labels) Value() (driver.Value, error) {
	res, _ := json.Marshal(e)
	return res, nil
}

func (e *Labels) Scan(value interface{}) error {
	if value != nil {
		s := []byte(value.(string))
		json.Unmarshal(s, &e)
	}
	return nil
}

// GetTemplateByID returns a single template by id.
func (sm *SQLStateManager) GetTemplateByID(ctx context.Context, templateID string) (Template, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_template_by_id", "")
	defer span.Finish()
	var err error
	var tpl Template
	err = sm.db.GetContext(ctx, &tpl, GetTemplateByIDSQL, templateID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if err == sql.ErrNoRows {
			return tpl, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Template with ID %s not found", templateID)}
		}

		return tpl, errors.Wrapf(err, "issue getting tpl with id [%s]", templateID)
	}
	return tpl, nil
}

func (sm *SQLStateManager) GetTemplateByVersion(ctx context.Context, templateName string, templateVersion int64) (bool, Template, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_template_by_version", "")
	defer span.Finish()
	span.SetTag("template.version", templateVersion)
	var err error
	var tpl Template
	err = sm.db.GetContext(ctx, &tpl, GetTemplateByVersionSQL, templateName, templateVersion)
	if err != nil {
		if err == sql.ErrNoRows {
			return false, tpl, nil
		}

		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return false, tpl, errors.Wrapf(err, "issue getting tpl with id [%s]", templateName)
	}
	return true, tpl, nil
}

// GetLatestTemplateByTemplateName returns the latest version of a template
// of a specific template name.
func (sm *SQLStateManager) GetLatestTemplateByTemplateName(ctx context.Context, templateName string) (bool, Template, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_latest_template_by_name", "")
	defer span.Finish()
	var err error
	var tpl Template
	err = sm.db.GetContext(ctx, &tpl, GetTemplateLatestOnlySQL, templateName)
	if err != nil {
		if err == sql.ErrNoRows {
			return false, tpl, nil
		}

		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return false, tpl, errors.Wrapf(err, "issue getting tpl with id [%s]", templateName)
	}
	return true, tpl, nil
}

// ListTemplates returns list of templates from the database.
func (sm *SQLStateManager) ListTemplates(ctx context.Context, limit int, offset int, sortBy string, order string) (TemplateList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_templates", "")
	defer span.Finish()
	var err error
	var result TemplateList
	var orderQuery string

	orderQuery, err = sm.orderBy(&Template{}, sortBy, order)
	if err != nil {
		return result, errors.WithStack(err)
	}

	sql := fmt.Sprintf(ListTemplatesSQL, orderQuery)
	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", sql)

	err = sm.db.SelectContext(ctx, &result.Templates, sql, limit, offset)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return result, errors.Wrap(err, "issue running list templates sql")
	}
	err = sm.db.GetContext(ctx, &result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list templates count sql")
	}

	return result, nil
}

// ListTemplatesLatestOnly returns list of templates from the database.
func (sm *SQLStateManager) ListTemplatesLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (TemplateList, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_templates_latest_only", "")
	defer span.Finish()
	var err error
	var result TemplateList

	countSQL := fmt.Sprintf("select COUNT(*) from (%s) as sq", ListTemplatesLatestOnlySQL)

	err = sm.db.SelectContext(ctx, &result.Templates, ListTemplatesLatestOnlySQL, limit, offset)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return result, errors.Wrap(err, "issue running list templates sql")
	}
	err = sm.db.GetContext(ctx, &result.Total, countSQL, nil, 0)
	if err != nil {
		return result, errors.Wrap(err, "issue running list templates count sql")
	}

	return result, nil
}

// CreateTemplate creates a new template.
func (sm *SQLStateManager) CreateTemplate(ctx context.Context, t Template) error {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.create_template", "")
	defer span.Finish()
	var err error
	insert := `
    INSERT INTO template(
			template_id, template_name, version, schema, command_template,
			adaptive_resource_allocation, image, memory, env, cpu, gpu, defaults, avatar_uri
    )
    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15);
    `

	tx, err := sm.db.BeginTx(ctx, nil)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.WithStack(err)
	}

	if _, err = tx.ExecContext(ctx, insert,
		t.TemplateID, t.TemplateName, t.Version, t.Schema, t.CommandTemplate,
		t.AdaptiveResourceAllocation, t.Image, t.Memory, t.Env,
		t.Cpu, t.Gpu, t.Defaults, t.AvatarURI); err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.Wrapf(
			err, "issue creating new template with template_name [%s] and version [%d]", t.TemplateName, t.Version)
	}

	err = tx.Commit()
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return errors.WithStack(err)
	}
	return nil
}

// GetExecutableByExecutableType returns a single executable by id.
func (sm *SQLStateManager) GetExecutableByTypeAndID(ctx context.Context, t ExecutableType, id string) (Executable, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_executable_by_type_and_id", "")
	defer span.Finish()
	span.SetTag("executable.type", string(t))

	switch t {
	case ExecutableTypeDefinition:
		return sm.GetDefinition(ctx, id)
	case ExecutableTypeTemplate:
		return sm.GetTemplateByID(ctx, id)
	default:
		span.SetTag("error", true)
		span.SetTag("error.msg", fmt.Sprintf("executable type of [%s] not valid", t))
		return nil, exceptions.MalformedInput{
			ErrorString: fmt.Sprintf("executable type of [%s] not valid.", t),
		}
	}
}

func (sm *SQLStateManager) logStatusUpdate(update Run) {
	var err error
	var startedAt, finishedAt time.Time
	var duration float64
	var env EnvList
	var command string

	if update.StartedAt != nil {
		startedAt = *update.StartedAt
		duration = time.Now().Sub(startedAt).Seconds()
	}

	if update.FinishedAt != nil {
		finishedAt = *update.FinishedAt
		duration = finishedAt.Sub(startedAt).Seconds()
	}

	if update.Env != nil {
		env = *update.Env
	}

	if update.Command != nil {
		command = *update.Command
	}

	if update.ExitCode != nil {
		err = sm.log.Event("eventClassName", "FlotillaTaskStatus",
			"run_id", update.RunID,
			"definition_id", update.DefinitionID,
			"alias", update.Alias,
			"image", update.Image,
			"cluster_name", update.ClusterName,
			"command", command,
			"exit_code", *update.ExitCode,
			"status", update.Status,
			"started_at", startedAt,
			"finished_at", finishedAt,
			"duration", duration,
			"instance_id", update.InstanceID,
			"instance_dns_name", update.InstanceDNSName,
			"group_name", update.GroupName,
			"user", update.User,
			"task_type", update.TaskType,
			"env", env,
			"executable_id", update.ExecutableID,
			"executable_type", update.ExecutableType,
			"Tier", update.Tier)
	} else {
		err = sm.log.Event("eventClassName", "FlotillaTaskStatus",
			"run_id", update.RunID,
			"definition_id", update.DefinitionID,
			"alias", update.Alias,
			"image", update.Image,
			"cluster_name", update.ClusterName,
			"command", command,
			"status", update.Status,
			"started_at", startedAt,
			"finished_at", finishedAt,
			"duration", duration,
			"instance_id", update.InstanceID,
			"instance_dns_name", update.InstanceDNSName,
			"group_name", update.GroupName,
			"user", update.User,
			"task_type", update.TaskType,
			"env", env,
			"executable_id", update.ExecutableID,
			"executable_type", update.ExecutableType,
			"Tier", update.Tier)
	}

	if err != nil {
		sm.log.Log("level", "error", "message", "Failed to emit status event", "run_id", update.RunID, "error", err.Error())
	}
}

func (sm *SQLStateManager) ListClusterStates(ctx context.Context) ([]ClusterMetadata, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.list_cluster_states", "")
	defer span.Finish()

	var clusters []ClusterMetadata
	err := sm.db.SelectContext(ctx, &clusters, ListClusterStatesSQL)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	}
	return clusters, err
}

func (sm *SQLStateManager) UpdateClusterMetadata(ctx context.Context, cluster ClusterMetadata) error {
	operationName := "flotilla.state.create_cluster_metadata"
	identifier := cluster.Name

	if cluster.ID != "" {
		operationName = "flotilla.state.update_cluster_metadata"
		identifier = cluster.ID
	}

	ctx, span := tracing.TraceJob(ctx, operationName, "")
	defer span.Finish()
	span.SetTag("cluster.id", identifier)
	// Add relevant tags
	span.SetTag("cluster.name", cluster.Name)
	span.SetTag("cluster.status", cluster.Status)
	if cluster.ClusterVersion != "" {
		span.SetTag("cluster.version", cluster.ClusterVersion)
	}

	if cluster.ID == "" {
		sql := `
			INSERT INTO cluster_state (name, cluster_version, status, status_reason, allowed_tiers, capabilities, namespace, region, emr_virtual_cluster, spark_server_uri)
			VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
			RETURNING id;
		`
		var id string
		err := sm.db.QueryRowContext(ctx, sql,
			cluster.Name,
			cluster.ClusterVersion,
			cluster.Status,
			cluster.StatusReason,
			pq.Array(cluster.AllowedTiers),
			pq.Array(cluster.Capabilities),
			cluster.Namespace,
			cluster.Region,
			cluster.EMRVirtualCluster,
			cluster.SparkServerURI).Scan(&id)

		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
			return err
		}
		return nil
	} else {
		sql := `
			UPDATE cluster_state
			SET 
				name = $2,
				cluster_version = $3,
				status = $4,
				status_reason = $5,
				allowed_tiers = $6,
				capabilities = $7,
				namespace = $8,
				region = $9,
				emr_virtual_cluster = $10,
				spark_server_uri = $11,
				updated_at = NOW()
			WHERE id = $1;
		`
		result, err := sm.db.ExecContext(ctx, sql,
			cluster.ID,
			cluster.Name,
			cluster.ClusterVersion,
			cluster.Status,
			cluster.StatusReason,
			pq.Array(cluster.AllowedTiers),
			pq.Array(cluster.Capabilities),
			cluster.Namespace,
			cluster.Region,
			cluster.EMRVirtualCluster,
			cluster.SparkServerURI)

		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
			return err
		}

		rows, err := result.RowsAffected()
		if err != nil {
			span.SetTag("error", true)
			span.SetTag("error.msg", err.Error())
			return err
		}

		if rows == 0 {
			span.SetTag("error", true)
			span.SetTag("error.msg", "Cluster not found")
			return exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Cluster with ID %s not found", cluster.ID),
			}
		}
		return nil
	}
}

func (sm *SQLStateManager) DeleteClusterMetadata(ctx context.Context, clusterID string) error {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.delete_cluster_metadata", "")
	defer span.Finish()
	span.SetTag("cluster.id", clusterID)
	sql := `DELETE FROM cluster_state WHERE id = $1`
	result, err := sm.db.ExecContext(ctx, sql, clusterID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return err
	}

	count, err := result.RowsAffected()
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return err
	}

	if count == 0 {
		span.SetTag("error", true)
		span.SetTag("error.msg", "Cluster not found")
		return exceptions.MissingResource{
			ErrorString: fmt.Sprintf("Cluster with ID %s not found", clusterID),
		}
	}
	return nil
}

func (sm *SQLStateManager) GetClusterByID(ctx context.Context, clusterID string) (ClusterMetadata, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_cluster_by_id", "")
	defer span.Finish()
	span.SetTag("cluster.id", clusterID)
	var cluster ClusterMetadata
	query := `
		SELECT 
			id, name, status, status_reason, status_since, allowed_tiers,
			capabilities, region, updated_at, namespace, emr_virtual_cluster, spark_server_uri
		FROM cluster_state 
		WHERE id = $1
	`
	err := sm.db.GetContext(ctx, &cluster, query, clusterID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if err == sql.ErrNoRows {
			return cluster, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Cluster with ID %s not found", clusterID),
			}
		}
		return cluster, err
	}

	// Add tags for the cluster data
	span.SetTag("cluster.name", cluster.Name)
	span.SetTag("cluster.status", cluster.Status)
	if cluster.ClusterVersion != "" {
		span.SetTag("cluster.version", cluster.ClusterVersion)
	}

	return cluster, nil
}

func ScanStringArray(arr *[]string, value interface{}) error {
	if value == nil {
		*arr = []string{}
		return nil
	}
	switch v := value.(type) {
	case []byte:
		var result []string
		if err := json.Unmarshal(v, &result); err == nil {
			*arr = result
			return nil
		}
		str := string(v)
		if len(str) < 2 {
			*arr = []string{}
			return nil
		}
		elements := strings.Split(str[1:len(str)-1], ",")
		result = make([]string, 0, len(elements))
		for _, e := range elements {
			if e != "" {
				// Remove quotes if they exist
				e = strings.Trim(e, "\"")
				result = append(result, e)
			}
		}
		*arr = result
		return nil
	default:
		return fmt.Errorf("unexpected type for string array: %T", value)
	}
}

func (arr *Tiers) Scan(value interface{}) error {
	if value == nil {
		*arr = Tiers{}
		return nil
	}
	switch v := value.(type) {
	case []byte:
		var result []string
		if err := json.Unmarshal(v, &result); err == nil {
			*arr = Tiers(result)
			return nil
		}
		str := string(v)
		if len(str) < 2 || str[0] != '{' || str[len(str)-1] != '}' {
			*arr = Tiers{}
			return nil
		}
		str = str[1 : len(str)-1]
		if len(str) == 0 {
			*arr = Tiers{}
			return nil
		}
		elements := strings.Split(str, ",")
		result = make([]string, 0, len(elements))
		for _, e := range elements {
			if e == "" {
				continue
			}
			e = strings.Trim(e, "\"")
			result = append(result, e)
		}
		*arr = Tiers(result)
		return nil
	default:
		return fmt.Errorf("unsupported Scan, storing driver.Value type %T into type *Tiers", value)
	}
}

func (arr Tiers) Value() (driver.Value, error) {
	if len(arr) == 0 {
		return "{}", nil
	}
	quoted := make([]string, len(arr))
	for i, v := range arr {
		quoted[i] = fmt.Sprintf("\"%s\"", v)
	}
	return fmt.Sprintf("{%s}", strings.Join(quoted, ",")), nil
}

// Scan from db for Capabilities
func (arr *Capabilities) Scan(value interface{}) error {
	if value == nil {
		*arr = Capabilities{}
		return nil
	}

	switch v := value.(type) {
	case []byte:
		var result []string
		if err := json.Unmarshal(v, &result); err == nil {
			*arr = Capabilities(result)
			return nil
		}

		str := string(v)
		if len(str) < 2 {
			*arr = Capabilities{}
			return nil
		}
		elements := strings.Split(str[1:len(str)-1], ",")
		result = make([]string, 0, len(elements))
		for _, e := range elements {
			if e != "" {
				result = append(result, e)
			}
		}
		*arr = Capabilities(result)
		return nil
	default:
		return fmt.Errorf("unexpected type for string array: %T", value)
	}
}

// Value to db for Capabilities
func (arr Capabilities) Value() (driver.Value, error) {
	if len(arr) == 0 {
		return "{}", nil
	}
	return fmt.Sprintf("{%s}", strings.Join(arr, ",")), nil
}

func (sm *SQLStateManager) GetRunStatus(ctx context.Context, runID string) (RunStatus, error) {
	ctx, span := tracing.TraceJob(ctx, "flotilla.state.get_run_status", "")
	defer span.Finish()
	span.SetTag("job.run.id", runID)
	var status RunStatus

	tx, err := sm.db.BeginTx(ctx, nil)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return status, errors.Wrap(err, "failed to begin transaction")
	}

	_, err = tx.ExecContext(ctx, "SET LOCAL lock_timeout = '500ms'")
	if err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return status, errors.Wrap(err, "failed to set lock timeout")
	}

	err = tx.QueryRowContext(ctx, GetRunStatusSQL, runID).Scan(
		&status.RunID,
		&status.DefinitionID,
		&status.Alias,
		&status.ClusterName,
		&status.Status,
		&status.QueuedAt,
		&status.StartedAt,
		&status.FinishedAt,
		&status.ExitCode,
		&status.ExitReason,
		&status.Engine,
	)

	if err != nil {
		tx.Rollback()
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())

		if err == sql.ErrNoRows {
			return status, exceptions.MissingResource{
				ErrorString: fmt.Sprintf("Run with id %s not found", runID)}
		}

		if pqErr, ok := err.(*pq.Error); ok && pqErr.Code == "55P03" {
			return status, exceptions.ConflictingResource{
				ErrorString: fmt.Sprintf("Run with id %s is currently locked, please retry", runID)}
		}

		return status, errors.Wrapf(err, "issue getting run status with id [%s]", runID)
	}

	err = tx.Commit()
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return status, errors.Wrap(err, "failed to commit transaction")
	}

	//if status.Status != "" {
	//	span.SetTag("job.status", status.Status)
	//}

	return status, nil
}


================================================
FILE: state/pg_state_manager_test.go
================================================
package state

import (
	"context"
	"fmt"
	"log"
	"os"
	"testing"
	"time"

	gklog "github.com/go-kit/kit/log"
	flotillaLog "github.com/stitchfix/flotilla-os/log"

	"database/sql/driver"
	"reflect"

	"github.com/jmoiron/sqlx"
	_ "github.com/lib/pq"
	"github.com/stitchfix/flotilla-os/config"
)

func getDB(conf config.Config) *sqlx.DB {
	dbURL := conf.GetString("database_url")
	if dbURL == "" {
		dbURL = "postgresql://postgres:docker@localhost/postgres?sslmode=disable"
	}
	db, err := sqlx.Connect("postgres", dbURL)
	if err != nil {
		log.Fatal(err)
	}
	return db
}

func setUp() Manager {
	conf, _ := config.NewConfig(nil)
	db := getDB(conf)
	err := os.Setenv("STATE_MANAGER", "postgres")
	if err != nil {
		log.Fatal("error setting env, STATE_MANAGER")
	}
	err = os.Setenv("CREATE_DATABASE_SCHEMA", "true")
	if err != nil {
		log.Fatal("error setting env, CREATE_DATABASE_SCHEMA")
	}

	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	l = gklog.With(l, "ts", gklog.DefaultTimestampUTC)
	eventSinks := []flotillaLog.EventSink{flotillaLog.NewLocalEventSink()}
	logger := flotillaLog.NewLogger(l, eventSinks)

	sm, err := NewStateManager(conf, logger)
	fmt.Println(err)

	insertDefinitions(db)

	return sm
}

func insertDefinitions(db *sqlx.DB) {
	defsql := `
    INSERT INTO task_def (definition_id, image, group_name, alias, memory, command, env)
      VALUES ($1, $2, $3, $4, $5, $6, $7)
    `

	portsql := `
    INSERT INTO task_def_ports(task_def_id, port) VALUES ($1, $2)
    `

	taskDefTagsSQL := `
	INSERT INTO task_def_tags(task_def_id, tag_id) VALUES($1, $2)
	`
	tagSQL := `
	INSERT INTO tags(text) VALUES($1)
	`

	taskSQL := `
    INSERT INTO task (
      run_id, definition_id, cluster_name, alias, image, exit_code, status,
      started_at, finished_at, instance_id, instance_dns_name, group_name, env, engine, "user", service_account, tier
    ) VALUES (
      $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, 'eks', 'foo', 'flotilla', $14
    )
    `

	db.MustExec(defsql,
		"A", "imageA", "groupZ", "aliasA", 1024, "echo 'hi'", `[{"name":"E_A1","value":"V_A1"}]`)
	db.MustExec(defsql,
		"B", "imageB", "groupY", "aliasB", 1024, "echo 'hi'",
		`[{"name":"E_B1","value":"V_B1"},{"name":"E_B2","value":"V_B2"},{"name":"E_B3","value":"V_B3"}]`)
	db.MustExec(defsql, "C", "imageC", "groupX", "aliasC", 1024, "echo 'hi'", nil)
	db.MustExec(defsql, "D", "imageD", "groupW", "aliasD", 1024, "echo 'hi'", nil)
	db.MustExec(defsql, "E", "imageE", "groupV", "aliasE", 1024, "echo 'hi'", nil)

	db.MustExec(portsql, "A", 10000)
	db.MustExec(portsql, "C", 10001)
	db.MustExec(portsql, "D", 10002)
	db.MustExec(portsql, "E", 10003)
	db.MustExec(portsql, "E", 10004)

	db.MustExec(tagSQL, "tagA")
	db.MustExec(tagSQL, "tagB")
	db.MustExec(tagSQL, "tagC")

	db.MustExec(taskDefTagsSQL, "A", "tagA")
	db.MustExec(taskDefTagsSQL, "A", "tagC")
	db.MustExec(taskDefTagsSQL, "B", "tagB")

	t1, _ := time.Parse(time.RFC3339, "2017-07-04T00:01:00+00:00")
	t2, _ := time.Parse(time.RFC3339, "2017-07-04T00:02:00+00:00")
	t3, _ := time.Parse(time.RFC3339, "2017-07-04T00:03:00+00:00")
	t4, _ := time.Parse(time.RFC3339, "2017-07-04T00:04:00+00:00")

	db.MustExec(taskSQL,
		"run0", "A", "clusta", "aliasA", "imgA", nil, StatusRunning, t1, nil, "id1", "dns1", "groupZ", `[{"name":"E0","value":"V0"}]`, 4)
	db.MustExec(
		taskSQL, "run1", "B", "clusta", "aliasB", "imgB", nil, StatusRunning, t2, nil, "id1", "dns1", "groupY", `[{"name":"E1","value":"V1"}]`, 4)

	db.MustExec(
		taskSQL, "run2", "B", "clusta", "aliasB", "imgB", 1, StatusStopped, t2, t3, "id1", "dns1", "groupY", `[{"name":"E2","value":"V2"}]`, 4)

	db.MustExec(taskSQL,
		"run3", "C", "clusta", "aliasC", "imgC", nil, StatusQueued, nil, nil, "", "", "groupX",
		`[{"name":"E3_1","value":"V3_1"},{"name":"E3_2","value":"v3_2"},{"name":"E3_3","value":"V3_3"}]`, 4)

	db.MustExec(taskSQL, "run4", "C", "clusta", "aliasC", "imgC", 0, StatusStopped, t3, t4, "id1", "dns1", "groupX", nil, 4)
	db.MustExec(taskSQL, "run5", "D", "clustb", "aliasD", "imgD", nil, StatusPending, nil, nil, "", "", "groupW", nil, 4)
}

func tearDown() {
	conf, _ := config.NewConfig(nil)
	db := getDB(conf)
	db.MustExec(`
		DELETE FROM task_def_ports;
		DELETE FROM task_def_tags;
		DELETE FROM task_status;
		DELETE FROM task;
		DELETE FROM task_def;
		DELETE FROM tags;
  `)
}

func TestSQLStateManager_ListDefinitions(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	var dl DefinitionList
	// Test limiting
	expectedTotal := 5
	dl, err = sm.ListDefinitions(ctx, 1, 0, "alias", "asc", nil, nil)
	if err != nil {
		t.Error(err.Error())
	}
	if dl.Total != expectedTotal {
		t.Errorf("Expected %v total definitions, got %v", expectedTotal, dl.Total)
	}

	if len(dl.Definitions) != 1 {
		t.Errorf("Expected 1 definition returned, got %v", len(dl.Definitions))
	}

	dA := dl.Definitions[0]
	if dA.DefinitionID != "A" {
		t.Errorf("Listing returned incorrect definition, expected A but got %s", dA.DefinitionID)
	}

	if len(*dA.Env) != 1 {
		t.Errorf("Expected returned definitions to have correctly attached env vars, was %v", dA.Env)
	}

	// Test ordering and offset
	dl, _ = sm.ListDefinitions(ctx, 1, 1, "group_name", "asc", nil, nil)
	if dl.Definitions[0].GroupName != "groupW" {
		t.Errorf("Error ordering with offset - expected groupW but got %s", dl.Definitions[0].GroupName)
	}

	// Test order validation
	dl, err = sm.ListDefinitions(ctx, 1, 0, "nonexistent_field", "asc", nil, nil)
	if err == nil {
		t.Errorf("Sorting by [nonexistent_field] did not produce an error")
	}
	dl, err = sm.ListDefinitions(ctx, 1, 0, "alias", "nooop", nil, nil)
	if err == nil {
		t.Errorf("Sort order [nooop] is not valid but did not produce an error")
	}

	// Test filtering on fields
	dl, _ = sm.ListDefinitions(ctx, 1, 0, "alias", "asc", map[string][]string{"image": {"imageC"}}, nil)
	if dl.Definitions[0].Image != "imageC" {
		t.Errorf("Error filtering by field - expected imageC but got %s", dl.Definitions[0].Image)
	}

	// Test filtering on environment variables
	dl, _ = sm.ListDefinitions(ctx, 1, 0, "alias", "desc", nil, map[string]string{"E_B1": "V_B1", "E_B2": "V_B2"})
	if dl.Definitions[0].DefinitionID != "B" {
		t.Errorf(
			`Expected environment variable filters (E_B1:V_B1 AND E_B2:V_B2) to yield
            definition B, but was %s`, dl.Definitions[0].DefinitionID)
	}
}

func TestSQLStateManager_GetDefinition(t *testing.T) {
	defer tearDown()
	sm := setUp()

	dE, _ := sm.GetDefinition(ctx, "E")
	if dE.DefinitionID != "E" {
		t.Errorf("Expected definition E to be fetched, got %s", dE.DefinitionID)
	}

	if dE.Env != nil {
		t.Errorf("Expected empty environment but got %s", *dE.Env)
	}

	_, err := sm.GetDefinition(ctx, "Z")
	if err == nil {
		t.Errorf("Expected get for non-existent definition Z to return error, was nil")
	}
}

func TestSQLStateManager_GetDefinitionByAlias(t *testing.T) {
	defer tearDown()
	sm := setUp()

	dE, _ := sm.GetDefinitionByAlias(ctx, "aliasE")
	if dE.DefinitionID != "E" {
		t.Errorf("Expected definition E to be fetched, got %s", dE.DefinitionID)
	}

	if dE.Env != nil {
		t.Errorf("Expected empty environment but got %s", *dE.Env)
	}

	_, err := sm.GetDefinitionByAlias(ctx, "aliasZ")
	if err == nil {
		t.Errorf("Expected get for non-existent definition Z to return error, was nil")
	}
}

func TestSQLStateManager_CreateDefinition(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	memory := int64(512)
	d := Definition{
		DefinitionID: "id:cupcake",
		GroupName:    "group:cupcake",
		Alias:        "cupcake",
		Command:      "echo 'hi'",
		ExecutableResources: ExecutableResources{
			Memory: &memory,
			Image:  "image:cupcake",
			Env: &EnvList{
				{Name: "E1", Value: "V1"},
			},
			Ports: &PortsList{12345, 6789},
			Tags:  &Tags{"apple", "orange", "tiger"},
		},
	}

	err = sm.CreateDefinition(ctx, d)
	if err != nil {
		t.Error(err.Error())
	}

	f, err := sm.GetDefinition(ctx, "id:cupcake")
	if err != nil {
		t.Errorf("Expected create definition to create definition with id [id:cupcake]")
		t.Error(err)
	}

	if f.Alias != d.Alias ||
		len(*f.Env) != len(*d.Env) ||
		*f.Memory != *d.Memory {
		t.Errorf("Expected created definition to match the one passed in for creation")
	}
}

func TestSQLStateManager_UpdateDefinition(t *testing.T) {
	defer tearDown()
	sm := setUp()

	env := EnvList{
		{Name: "NEW1", Value: "NEWVAL1"},
		{Name: "NEW2", Value: "NEWVAL2"},
	}

	tags := Tags{
		"cupcake",
	}
	updates := Definition{
		ExecutableResources: ExecutableResources{
			Tags:  &tags,
			Image: "updated",
			Env:   &env,
			Ports: &PortsList{}, // <---- empty, set ports to empty list
		},
	}
	_, err := sm.UpdateDefinition(ctx, "A", updates)
	if err != nil {
		t.Error(err.Error())
	}

	d, _ := sm.GetDefinition(ctx, "A")
	if d.Image != "updated" {
		t.Errorf("Expected image to be updated to [updated] but is %s", d.Image)
	}

	if len(*d.Env) != 2 {
		t.Errorf("Expected new env to have length 2, was %v", len(*d.Env))
	}

	updatedEnv := *d.Env
	matches := 0
	for i := range updatedEnv {
		updatedVar := updatedEnv[i]
		for j := range env {
			expectedVar := env[j]
			if updatedVar.Name == expectedVar.Name &&
				updatedVar.Value == expectedVar.Value {
				matches++
			}
		}
	}
	if matches != len(env) {
		t.Errorf("Not all updated env vars match")
	}
}

func TestSQLStateManager_DeleteDefinition(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	err = sm.DeleteDefinition(ctx, "A")
	if err != nil {
		t.Error(err.Error())
	}

	_, err = sm.GetDefinition(ctx, "A")
	if err == nil {
		t.Errorf("Expected querying definition after delete would return error")
	}
}

func TestSQLStateManager_ListRuns(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	expectedTotal := 6
	rl, err := sm.ListRuns(ctx, 1, 0, "started_at", "asc", nil, nil, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if rl.Total != expectedTotal {
		t.Errorf("Expected total to be %v but was %v", expectedTotal, rl.Total)
	}

	if len(rl.Runs) != 1 {
		t.Errorf("Expected limit query to limit to 1 but was %v", len(rl.Runs))
	}

	r0 := rl.Runs[0]
	if r0.RunID != "run0" {
		t.Errorf("Listing with order returned incorrect run, expected run0 but got %s", r0.RunID)
	}

	if r0.Env == nil {
		t.Errorf("Expected non-nil env for run")
	}

	if len(*r0.Env) != 1 {
		t.Errorf("Expected returned runs to have correctly attached env vars, was %v", r0.Env)
	}

	// Test ordering and offset
	// - there's only two, so offset 1 should return second one
	rl, err = sm.ListRuns(ctx, 1, 1, "cluster_name", "desc", nil, nil, nil)
	if rl.Runs[0].ClusterName != "clusta" {
		t.Errorf("Error ordering with offset - expected clusta but got %s", rl.Runs[0].ClusterName)
	}

	// Test order validation
	rl, err = sm.ListRuns(ctx, 1, 0, "nonexistent_field", "asc", nil, nil, nil)
	if err == nil {
		t.Errorf("Sorting by [nonexistent_field] did not produce an error")
	}
	rl, err = sm.ListRuns(ctx, 1, 0, "started_at", "nooop", nil, nil, nil)
	if err == nil {
		t.Errorf("Sort order [nooop] is not valid but did not produce an error")
	}

	// Test filtering on fields
	rl, err = sm.ListRuns(ctx, 1, 0, "started_at", "asc", map[string][]string{"cluster_name": {"clustb"}}, nil, nil)
	if rl.Runs[0].ClusterName != "clustb" {
		t.Errorf("Error filtering by field - expected clustb but got %s", rl.Runs[0].ClusterName)
	}

	// Test filtering on environment variables
	rl, err = sm.ListRuns(ctx, 1, 0, "started_at", "desc", nil, map[string]string{"E2": "V2"}, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if rl.Runs[0].RunID != "run2" {
		t.Errorf(
			`Expected environment variable filters (E2:V2) to yield
            run run2, but was %s`, rl.Runs[0].RunID)
	}
}

func TestSQLStateManager_ListRuns2(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	expectedTotal := 1
	expectedRun := "run4"
	rl, err := sm.ListRuns(ctx, 100, 0, "started_at", "asc", map[string][]string{
		"started_at_since": {
			"2017-07-04T00:02:59+00:00",
		},
		"started_at_until": {
			"2017-07-04T00:03:01+00:00",
		},
	}, nil, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if rl.Total != expectedTotal {
		t.Errorf("Expected total to be %v but was %v", expectedTotal, rl.Total)
	}

	r := rl.Runs[0]
	if r.RunID != expectedRun {
		t.Errorf("Got unexpected run: %s", r.RunID)
	}
}

func TestSQLStateManager_ListRuns3(t *testing.T) {
	defer tearDown()
	sm := setUp()

	var err error
	expectedTotal := 2
	expectedRuns := map[string]bool{"run3": true, "run5": true}
	rl, err := sm.ListRuns(ctx, 100, 0, "started_at", "asc", map[string][]string{
		"status": {
			StatusPending,
			StatusQueued,
		},
	}, nil, nil)
	if err != nil {
		t.Error(err.Error())
	}

	if rl.Total != expectedTotal {
		t.Errorf("Expected total to be %v but was %v", expectedTotal, rl.Total)
	}

	for _, r := range rl.Runs {
		if _, ok := expectedRuns[r.RunID]; !ok {
			t.Errorf("Got unexpected run: %s", r.RunID)
		}
	}
}

func TestSQLStateManager_GetRun(t *testing.T) {
	defer tearDown()
	sm := setUp()

	r2, _ := sm.GetRun(ctx, "run2")
	if r2.RunID != "run2" {
		t.Errorf("Expected run 2 to be fetched, got %s", r2.RunID)
	}

	if len(*r2.Env) != 1 {
		t.Errorf("Expected environment to have exactly one entry, but was %v", len(*r2.Env))
	}

	_, err := sm.GetRun(ctx, "run100")
	if err == nil {
		t.Errorf("Expected get for non-existent run100 to return error, was nil")
	}
}

func TestSQLStateManager_CreateRun(t *testing.T) {
	defer tearDown()
	sm := setUp()

	r1 := Run{
		RunID:        "run:17",
		GroupName:    "group:cupcake",
		Alias:        "cute",
		Image:        "someImage",
		DefinitionID: "A",
		ClusterName:  "clusta",
		Status:       StatusQueued,
		Env: &EnvList{
			{Name: "RUN_PARAM", Value: "VAL"},
		},
		Engine: &DefaultEngine,
		Tier:   Tier("4"),
	}

	ec := int64(137)
	reason := "instance is ded."
	cmd := "_test cmd__"

	mem := int64(10)
	t1, _ := time.Parse(time.RFC3339, "2017-07-04T00:01:00+00:00")
	t2, _ := time.Parse(time.RFC3339, "2017-07-04T00:02:00+00:00")
	t1 = t1.UTC()
	t2 = t2.UTC()
	r2 := Run{
		RunID:        "run:18",
		GroupName:    "group:cupcake",
		DefinitionID: "A",
		Alias:        "AliasA",
		Image:        "ImageA",
		ExitCode:     &ec,
		ExitReason:   &reason,
		StartedAt:    &t1,
		FinishedAt:   &t2,
		ClusterName:  "clusta",
		Status:       StatusStopped,
		Env: &EnvList{
			{Name: "RUN_PARAM", Value: "VAL"},
		},
		Command: &cmd,
		Memory:  &mem,
		Engine:  &DefaultEngine,
		Tier:    Tier("4"),
	}
	sm.CreateRun(ctx, r1)
	sm.CreateRun(ctx, r2)

	f1, _ := sm.GetRun(ctx, "run:17")
	f2, _ := sm.GetRun(ctx, "run:18")

	if f1.RunID != "run:17" {
		t.Errorf("Expected to fetch inserted run:17, but got %s", f1.RunID)
	}

	// Check null handling
	if f1.ExitCode != nil || f1.StartedAt != nil || f1.FinishedAt != nil {
		t.Errorf("Expected run:17 to have null exit code, started_at, and finished_at")
	}

	if f2.ExitCode == nil || f2.StartedAt == nil || f2.FinishedAt == nil {
		t.Errorf("Expected run:18 to have non null exit code, started_at, and finished_at")
	}

	if *f2.ExitCode != *r2.ExitCode {
		t.Errorf("Expected exit code %v but was %v", *r2.ExitCode, *f2.ExitCode)
	}

	if *f2.ExitReason != *r2.ExitReason {
		t.Errorf("Expected exit reason %s but was %s", *r2.ExitReason, *f2.ExitReason)
	}

	if (*f2.StartedAt).UTC().String() != (*r2.StartedAt).String() {
		t.Errorf("Expected started_at %s but was %s", *r2.StartedAt, *f2.StartedAt)
	}

	if (*f2.FinishedAt).UTC().String() != (*r2.FinishedAt).String() {
		t.Errorf("Expected finished_at %s but was %s", *r2.FinishedAt, *f2.FinishedAt)
	}

	if f2.Alias != r2.Alias {
		t.Errorf("Expected alias: [%s] but was [%s]", r2.Alias, f2.Alias)
	}

	if f2.Image != r2.Image {
		t.Errorf("Expected image: [%s] but was [%s]", r2.Image, f2.Image)
	}

	if f1.Command != nil {
		t.Errorf("Expected null command, but was [%s]", *f1.Command)
	}

	if f1.Memory != nil {
		t.Errorf("Expected null mem, but was [%d]", *f1.Memory)
	}

	if f2.Command == nil {
		t.Errorf("Expected non-null command, but was null")
	}

	if f2.Memory == nil {
		t.Errorf("Expected non-null memory, but was null")
	}

	if f2.Command != nil && *f2.Command != cmd {
		t.Errorf("Expected command [%s], but got [%s]", cmd, *f2.Command)
	}

	if f2.Memory != nil && *f2.Memory != mem {
		t.Errorf("Expected mem [%d], but got [%d]", mem, *f2.Memory)
	}

}

func TestSQLStateManager_UpdateRun(t *testing.T) {
	defer tearDown()
	sm := setUp()

	ec := int64(1)
	env := EnvList{
		{Name: "NEW1", Value: "NEWVAL1"},
		{Name: "NEW2", Value: "NEWVAL2"},
	}
	t1, _ := time.Parse(time.RFC3339, "2017-07-04T00:01:00+00:00")
	t2, _ := time.Parse(time.RFC3339, "2017-07-04T00:02:00+00:00")
	t1 = t1.UTC()
	t2 = t2.UTC()
	u := Run{
		Alias:      "alien",
		Image:      "imagine",
		ExitCode:   &ec,
		Status:     StatusStopped,
		StartedAt:  &t1,
		FinishedAt: &t2,
		Env:        &env,
		Tier:       Tier("4"),
	}
	u2 := Run{
		Status: StatusNeedsRetry,
	}
	_, e := sm.UpdateRun(ctx, "run3", u)
	if e != nil {
		t.Errorf("Error while updating %v", e)
	}

	r, e := sm.GetRun(ctx, "run3")

	if e != nil {
		t.Errorf("Error in GetRun %v", e)
	}
	if *r.ExitCode != ec {
		t.Errorf("Expected update to set exit code to %v but was %v", ec, *r.ExitCode)
	}

	if (*r.StartedAt).UTC().String() != t1.String() {
		t.Errorf("Expected update to started_at to %s but was %s", t1, *r.StartedAt)
	}

	if (*r.FinishedAt).UTC().String() != t2.String() {
		t.Errorf("Expected update to set finished_at to %s but was %s", t1, *r.FinishedAt)
	}

	if r.Status != u.Status {
		t.Errorf("Expected update to set status to %s but was %s", u.Status, r.Status)
	}

	if r.Alias != u.Alias {
		t.Errorf("Expected update to set alias: [%s] but was [%s]", u.Alias, r.Alias)
	}

	if r.Image != u.Image {
		t.Errorf("Expected update to set image: [%s] but was [%s]", u.Image, r.Image)
	}

	updatedEnv := *r.Env
	matches := 0
	for i := range updatedEnv {
		updatedVar := updatedEnv[i]
		for j := range env {
			expectedVar := env[j]
			if updatedVar.Name == expectedVar.Name &&
				updatedVar.Value == expectedVar.Value {
				matches++
			}
		}
	}
	if matches != len(env) {
		t.Errorf("Not all updated env vars match")
	}

	sm.UpdateRun(ctx, "run3", u2)
	r, _ = sm.GetRun(ctx, "run3")
	if r.Status != u2.Status {
		t.Errorf("Expected to update status to %s but was %s", u2.Status, r.Status)
	}
}

func TestSQLStateManager_UpdateWorker(t *testing.T) {
	defer tearDown()
	sm := setUp()

	// First, list workers to find an existing worker type created during init
	workers, err := sm.ListWorkers(ctx, DefaultEngine)
	if err != nil {
		t.Fatalf("Error listing workers: %v", err)
	}
	if len(workers.Workers) == 0 {
		t.Fatal("Expected at least one worker to exist after setUp")
	}

	originalWorker := workers.Workers[0]

	// Update the worker's count to call row.Scan in UpdateWorker,
	// which previously only scanned 2 of 3 columns (missing Engine), causing:
	//   "sql: expected 2 destination arguments in Scan, not 3"
	newCount := originalWorker.CountPerInstance + 5
	updates := Worker{
		CountPerInstance: newCount,
	}

	updated, err := sm.UpdateWorker(ctx, originalWorker.WorkerType, updates)
	if err != nil {
		t.Fatalf("UpdateWorker failed: %v", err)
	}

	if updated.CountPerInstance != newCount {
		t.Errorf("Expected CountPerInstance to be %d, got %d", newCount, updated.CountPerInstance)
	}

	if updated.Engine != DefaultEngine {
		t.Errorf("Expected Engine to be %s, got %s", DefaultEngine, updated.Engine)
	}

	// Verify via GetWorker that the update persisted
	fetched, err := sm.GetWorker(ctx, originalWorker.WorkerType, DefaultEngine)
	if err != nil {
		t.Fatalf("GetWorker failed: %v", err)
	}

	if fetched.CountPerInstance != newCount {
		t.Errorf("Expected persisted CountPerInstance to be %d, got %d", newCount, fetched.CountPerInstance)
	}
}

func TestSQLStateManager_ListClusterStates(t *testing.T) {
	defer tearDown()
	sm := setUp()

	// Simple test to ensure the method exists and returns without error
	_, err := sm.ListClusterStates(ctx)
	if err != nil {
		t.Errorf("Error listing cluster states: %v", err)
	}
}

func TestStringArray_Scan(t *testing.T) {
	tests := []struct {
		name     string
		input    interface{}
		expected Tiers
		wantErr  bool
	}{
		{
			name:     "nil input",
			input:    nil,
			expected: Tiers{},
			wantErr:  false,
		},
		{
			name:     "empty array",
			input:    []byte("{}"),
			expected: Tiers{},
			wantErr:  false,
		},
		{
			name:     "single value",
			input:    []byte("{\"tier1\"}"),
			expected: Tiers{"tier1"},
			wantErr:  false,
		},
		{
			name:     "multiple values",
			input:    []byte("{\"tier1\",\"tier2\",\"tier3\"}"),
			expected: Tiers{"tier1", "tier2", "tier3"},
			wantErr:  false,
		},
		{
			name:     "values with empty elements",
			input:    []byte("{\"tier1\",,\"tier3\"}"),
			expected: Tiers{"tier1", "tier3"},
			wantErr:  false,
		},
		{
			name:     "unquoted values",
			input:    []byte("{tier1,tier2,tier3}"),
			expected: Tiers{"tier1", "tier2", "tier3"},
			wantErr:  false,
		},
		{
			name:     "unsupported type",
			input:    123,
			expected: nil,
			wantErr:  true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var result Tiers
			err := result.Scan(tt.input)

			if (err != nil) != tt.wantErr {
				t.Errorf("StringArray.Scan() error = %v, wantErr %v", err, tt.wantErr)
				return
			}

			if !reflect.DeepEqual(result, tt.expected) {
				t.Errorf("StringArray.Scan() = %v, want %v", result, tt.expected)
			}
		})
	}
}

func TestStringArray_Value(t *testing.T) {
	tests := []struct {
		name     string
		array    Tiers
		expected driver.Value
		wantErr  bool
	}{
		{
			name:     "empty slice",
			array:    Tiers{},
			expected: "{}",
			wantErr:  false,
		},
		{
			name:     "single value",
			array:    Tiers{"tier1"},
			expected: "{\"tier1\"}",
			wantErr:  false,
		},
		{
			name:     "multiple values",
			array:    Tiers{"tier1", "tier2", "tier3"},
			expected: "{\"tier1\",\"tier2\",\"tier3\"}",
			wantErr:  false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := tt.array.Value()

			if (err != nil) != tt.wantErr {
				t.Errorf("StringArray.Value() error = %v, wantErr %v", err, tt.wantErr)
				return
			}

			if !reflect.DeepEqual(got, tt.expected) {
				t.Errorf("StringArray.Value() = %v, want %v", got, tt.expected)
			}
		})
	}
}

// This test verifies that a value that's converted to a database format
// can be correctly scanned back into the original structure
func TestStringArray_RoundTrip(t *testing.T) {
	tests := []struct {
		name  string
		array Tiers
	}{
		{
			name:  "empty array",
			array: Tiers{},
		},
		{
			name:  "single value",
			array: Tiers{"tier1"},
		},
		{
			name:  "multiple values",
			array: Tiers{"tier1", "tier2", "tier3"},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			dbValue, err := tt.array.Value()
			if err != nil {
				t.Fatalf("Failed to convert to DB value: %v", err)
			}

			stringValue, ok := dbValue.(string)
			if !ok {
				t.Fatalf("Expected dbValue to be a string, got %T", dbValue)
			}
			byteValue := []byte(stringValue)

			var result Tiers
			err = result.Scan(byteValue)
			if err != nil {
				t.Fatalf("Failed to scan from DB value: %v", err)
			}

			if !reflect.DeepEqual(result, tt.array) {
				t.Errorf("Round trip failed: got %v, want %v", result, tt.array)
			}
		})
	}
}

func TestCapabilities_Scan(t *testing.T) {
	tests := []struct {
		name     string
		input    interface{}
		expected Capabilities
		wantErr  bool
	}{
		{
			name:     "nil input",
			input:    nil,
			expected: Capabilities{},
			wantErr:  false,
		},
		{
			name:     "empty array",
			input:    []byte("{}"),
			expected: Capabilities{},
			wantErr:  false,
		},
		{
			name:     "single value",
			input:    []byte("{spark}"),
			expected: Capabilities{"spark"},
			wantErr:  false,
		},
		{
			name:     "multiple values",
			input:    []byte("{spark,ray,gpu}"),
			expected: Capabilities{"spark", "ray", "gpu"},
			wantErr:  false,
		},
		{
			name:     "values with empty elements",
			input:    []byte("{spark,gpu}"),
			expected: Capabilities{"spark", "gpu"},
			wantErr:  false,
		},
		{
			name:     "unsupported type",
			input:    123,
			expected: nil,
			wantErr:  true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var result Capabilities
			err := result.Scan(tt.input)

			if (err != nil) != tt.wantErr {
				t.Errorf("Capabilities.Scan() error = %v, wantErr %v", err, tt.wantErr)
				return
			}

			if !reflect.DeepEqual(result, tt.expected) {
				t.Errorf("Capabilities.Scan() = %v, want %v", result, tt.expected)
			}
		})
	}
}

func TestCapabilities_Value(t *testing.T) {
	tests := []struct {
		name         string
		capabilities Capabilities
		expected     driver.Value
		wantErr      bool
	}{
		{
			name:         "empty slice",
			capabilities: Capabilities{},
			expected:     "{}",
			wantErr:      false,
		},
		{
			name:         "single value",
			capabilities: Capabilities{"gpu"},
			expected:     "{gpu}",
			wantErr:      false,
		},
		{
			name:         "multiple values",
			capabilities: Capabilities{"gpu", "cpu", "memory"},
			expected:     "{gpu,cpu,memory}",
			wantErr:      false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got, err := tt.capabilities.Value()

			if (err != nil) != tt.wantErr {
				t.Errorf("Capabilities.Value() error = %v, wantErr %v", err, tt.wantErr)
				return
			}

			if !reflect.DeepEqual(got, tt.expected) {
				t.Errorf("Capabilities.Value() = %v, want %v", got, tt.expected)
			}
		})
	}
}

func TestCapabilities_RoundTrip(t *testing.T) {
	tests := []struct {
		name         string
		capabilities Capabilities
	}{
		{
			name:         "empty capabilities",
			capabilities: Capabilities{},
		},
		{
			name:         "single capability",
			capabilities: Capabilities{"gpu"},
		},
		{
			name:         "multiple capabilities",
			capabilities: Capabilities{"gpu", "spark", "ray"},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Convert to database value
			dbValue, err := tt.capabilities.Value()
			if err != nil {
				t.Fatalf("Failed to convert to DB value: %v", err)
			}

			// Convert the string to []byte since that's what
			// would happen in a real database call
			stringValue, ok := dbValue.(string)
			if !ok {
				t.Fatalf("Expected dbValue to be a string, got %T", dbValue)
			}
			byteValue := []byte(stringValue)

			// Convert database value back to Capabilities
			var result Capabilities
			err = result.Scan(byteValue)
			if err != nil {
				t.Fatalf("Failed to scan from DB value: %v", err)
			}

			// Check that we got back what we started with
			if !reflect.DeepEqual(result, tt.capabilities) {
				t.Errorf("Round trip failed: got %v, want %v", result, tt.capabilities)
			}
		})
	}
}

func tearDownClusters() {
	conf, _ := config.NewConfig(nil)
	db := getDB(conf)
	db.MustExec(`DELETE FROM cluster_state;`)
}

var ctx = context.Background()

func TestSQLStateManager_UpdateClusterMetadata(t *testing.T) {
	defer tearDownClusters()
	sm := setUp()
	initialCluster := ClusterMetadata{
		Name:              "test-cluster",
		Status:            StatusActive,
		StatusReason:      "Initial setup",
		AllowedTiers:      Tiers{"1", "2"},
		Capabilities:      Capabilities{"gpu", "spark"},
		Namespace:         "flotilla",
		Region:            "us-east-1",
		EMRVirtualCluster: "11111111",
		SparkServerURI:    "spark://spark-server:7077",
	}
	err := sm.UpdateClusterMetadata(ctx, initialCluster)
	if err != nil {
		t.Fatalf("Error creating initial cluster: %v", err)
	}

	clusters, err := sm.ListClusterStates(ctx)
	if err != nil {
		t.Fatalf("Error listing clusters: %v", err)
	}

	var clusterID string
	for _, c := range clusters {
		if c.Name == "test-cluster" {
			clusterID = c.ID
			break
		}
	}

	if clusterID == "" {
		t.Fatalf("Test cluster not found after insertion")
	}

	updatedCluster := ClusterMetadata{
		ID:                clusterID,
		Name:              "test-cluster",
		Status:            StatusMaintenance,
		StatusReason:      "Under maintenance",
		AllowedTiers:      Tiers{"1", "2"},
		Capabilities:      Capabilities{"gpu", "spark", "ray"},
		Namespace:         "flotilla-test",
		Region:            "us-east-1",
		EMRVirtualCluster: "test-emr-cluster",
		SparkServerURI:    "spark://spark-server:7077",
	}

	err = sm.UpdateClusterMetadata(ctx, updatedCluster)
	if err != nil {
		t.Fatalf("Error updating cluster: %v", err)
	}

	updatedFromDB, err := sm.GetClusterByID(ctx, clusterID)
	if err != nil {
		t.Fatalf("Error getting updated cluster: %v", err)
	}
	if updatedFromDB.Status != StatusMaintenance {
		t.Errorf("Expected status %s, got %s", StatusMaintenance, updatedFromDB.Status)
	}

	if updatedFromDB.StatusReason != "Under maintenance" {
		t.Errorf("Expected reason 'Under maintenance', got '%s'", updatedFromDB.StatusReason)
	}
}

func TestSQLStateManager_DeleteClusterMetadata(t *testing.T) {
	tearDown()
	sm := setUp()
	initialCluster := ClusterMetadata{
		Name:              "test-delete-cluster",
		Status:            StatusActive,
		StatusReason:      "For deletion test",
		AllowedTiers:      Tiers{"1", "2"},
		Capabilities:      Capabilities{"gpu", "spark"},
		Namespace:         "flotilla",
		Region:            "us-east-1",
		EMRVirtualCluster: "11111111",
		SparkServerURI:    "spark://spark-server:7077",
	}
	err := sm.UpdateClusterMetadata(ctx, initialCluster)
	if err != nil {
		t.Fatalf("Error creating initial cluster: %v", err)
	}
	clusters, err := sm.ListClusterStates(ctx)
	if err != nil {
		t.Fatalf("Error listing clusters: %v", err)
	}
	var clusterID string
	for _, c := range clusters {
		if c.Name == "test-delete-cluster" {
			clusterID = c.ID
			break
		}
	}
	if clusterID == "" {
		t.Fatalf("Test cluster not found after insertion")
	}

	err = sm.DeleteClusterMetadata(ctx, clusterID)
	if err != nil {
		t.Fatalf("Error deleting cluster: %v", err)
	}

	_, err = sm.GetClusterByID(ctx, clusterID)
	if err == nil {
		t.Errorf("Expected error when getting deleted cluster")
	}
	tearDown()
}


================================================
FILE: testutils/mocks.go
================================================
package testutils

import (
	"context"
	"fmt"
	"math"
	"net/http"
	"testing"

	"github.com/aws/aws-sdk-go/aws"

	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
)

// ImplementsAllTheThings defines a struct which implements many of the interfaces
// to facilitate easier testing
type ImplementsAllTheThings struct {
	T                       *testing.T
	Calls                   []string                    // Collects calls
	Definitions             map[string]state.Definition // Definitions stored in "state"
	Runs                    map[string]state.Run        // Runs stored in "state"
	Workers                 []state.Worker              // Workers stored in "state"
	Qurls                   map[string]string           // Urls returned by Queue Manager
	Defined                 []string                    // List of defined definitions (Execution Engine)
	Queued                  []string                    // List of queued runs (Queue Manager)
	StatusUpdates           []string                    // List of queued status updates (Queue Manager)
	StatusUpdatesAsRuns     []state.Run                 // List of queued status updates (Execution Engine)
	ExecuteError            error                       // Execution Engine - error to return
	ExecuteErrorIsRetryable bool                        // Execution Engine - is the run retryable?
	Groups                  []string
	Tags                    []string
	Templates               map[string]state.Template
	ClusterStates           []state.ClusterMetadata
	GetRandomClusterName    func(clusters []string) string
}

func (iatt *ImplementsAllTheThings) GetResources(ctx context.Context, runID string) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "GetResources")
	run, exists := iatt.Runs[runID]
	if !exists {
		return state.Run{}, fmt.Errorf("Run with id %s not found", runID)
	}
	return run, nil
}

func (iatt *ImplementsAllTheThings) ListClusters() ([]state.ClusterMetadata, error) {
	iatt.Calls = append(iatt.Calls, "ListClusters")
	return iatt.ClusterStates, nil
}

func (i *ImplementsAllTheThings) ListClusterStates(ctx context.Context) ([]state.ClusterMetadata, error) {
	i.Calls = append(i.Calls, "ListClusterStates")
	fmt.Printf("ListClusterStates called, returning %d clusters\n", len(i.ClusterStates))
	return i.ClusterStates, nil
}

func (i *ImplementsAllTheThings) GetClusterByID(ctx context.Context, clusterID string) (state.ClusterMetadata, error) {
	i.Calls = append(i.Calls, "GetClusterByID")
	return i.ClusterStates[0], nil
}

func (i *ImplementsAllTheThings) DeleteClusterMetadata(ctx context.Context, clusterName string) error {
	i.Calls = append(i.Calls, "DeleteClusterMetadata")
	return nil
}

func (i *ImplementsAllTheThings) UpdateClusterMetadata(ctx context.Context, cluster state.ClusterMetadata) error {
	i.Calls = append(i.Calls, "UpdateClusterMetadata")
	return nil
}

func (iatt *ImplementsAllTheThings) LogsText(executable state.Executable, run state.Run, w http.ResponseWriter) error {
	iatt.Calls = append(iatt.Calls, "LogsText")
	return nil
}

func (iatt *ImplementsAllTheThings) Log(keyvals ...interface{}) error {
	iatt.Calls = append(iatt.Calls, "Name")
	return nil
}

func (iatt *ImplementsAllTheThings) Event(keyvals ...interface{}) error {
	iatt.Calls = append(iatt.Calls, "Name")
	return nil
}

// Name - general
func (iatt *ImplementsAllTheThings) Name() string {
	iatt.Calls = append(iatt.Calls, "Name")
	return "implementer"
}

// Initialize - general
func (iatt *ImplementsAllTheThings) Initialize(conf config.Config) error {
	iatt.Calls = append(iatt.Calls, "Initialize")
	return nil
}

// Cleanup - general
func (iatt *ImplementsAllTheThings) Cleanup() error {
	iatt.Calls = append(iatt.Calls, "Cleanup")
	return nil
}

func (iatt *ImplementsAllTheThings) ListFailingNodes(ctx context.Context) (state.NodeList, error) {
	var nodeList state.NodeList
	iatt.Calls = append(iatt.Calls, "ListFailingNodes")
	return nodeList, nil
}

func (iatt *ImplementsAllTheThings) GetPodReAttemptRate(ctx context.Context) (float32, error) {
	iatt.Calls = append(iatt.Calls, "GetPodReAttemptRate")
	return 1.0, nil
}

func (iatt *ImplementsAllTheThings) GetNodeLifecycle(ctx context.Context, executableID string, commandHash string) (string, error) {
	iatt.Calls = append(iatt.Calls, "GetNodeLifecycle")
	return "spot", nil
}

func (iatt *ImplementsAllTheThings) GetTaskHistoricalRuntime(ctx context.Context, executableID string, runId string) (float32, error) {
	iatt.Calls = append(iatt.Calls, "GetTaskHistoricalRuntime")
	return 1.0, nil
}

// ListDefinitions - StateManager
func (iatt *ImplementsAllTheThings) ListDefinitions(
	ctx context.Context,
	limit int, offset int, sortBy string,
	order string, filters map[string][]string,
	envFilters map[string]string) (state.DefinitionList, error) {
	iatt.Calls = append(iatt.Calls, "ListDefinitions")
	dl := state.DefinitionList{Total: len(iatt.Definitions)}
	for _, d := range iatt.Definitions {
		dl.Definitions = append(dl.Definitions, d)
	}
	return dl, nil
}

// GetDefinition - StateManager
func (iatt *ImplementsAllTheThings) GetDefinition(ctx context.Context, definitionID string) (state.Definition, error) {
	iatt.Calls = append(iatt.Calls, "GetDefinition")
	var err error
	d, ok := iatt.Definitions[definitionID]
	if !ok {
		err = fmt.Errorf("No definition %s", definitionID)
	}
	return d, err
}

// GetDefinitionByAlias - StateManager
func (iatt *ImplementsAllTheThings) GetDefinitionByAlias(ctx context.Context, alias string) (state.Definition, error) {
	iatt.Calls = append(iatt.Calls, "GetDefinitionByAlias")
	for _, d := range iatt.Definitions {
		if d.Alias == alias {
			return d, nil
		}
	}
	return state.Definition{}, fmt.Errorf("No definition with alias %s", alias)
}

// UpdateDefinition - StateManager
func (iatt *ImplementsAllTheThings) UpdateDefinition(ctx context.Context, definitionID string, updates state.Definition) (state.Definition, error) {
	iatt.Calls = append(iatt.Calls, "UpdateDefinition")
	defn := iatt.Definitions[definitionID]
	defn.UpdateWith(updates)
	iatt.Definitions[definitionID] = defn
	return defn, nil
}

// CreateDefinition - StateManager
func (iatt *ImplementsAllTheThings) CreateDefinition(ctx context.Context, d state.Definition) error {
	iatt.Calls = append(iatt.Calls, "CreateDefinition")
	iatt.Definitions[d.DefinitionID] = d
	return nil
}

// DeleteDefinition - StateManager
func (iatt *ImplementsAllTheThings) DeleteDefinition(ctx context.Context, definitionID string) error {
	iatt.Calls = append(iatt.Calls, "DeleteDefinition")
	delete(iatt.Definitions, definitionID)
	return nil
}

// ListRuns - StateManager
func (iatt *ImplementsAllTheThings) ListRuns(ctx context.Context, limit int, offset int, sortBy string, order string, filters map[string][]string, envFilters map[string]string, engines []string) (state.RunList, error) {
	iatt.Calls = append(iatt.Calls, "ListRuns")
	rl := state.RunList{Total: len(iatt.Runs)}
	for _, r := range iatt.Runs {
		rl.Runs = append(rl.Runs, r)
	}
	return rl, nil
}

// GetRun - StateManager
func (iatt *ImplementsAllTheThings) GetRun(ctx context.Context, runID string) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "GetRun")
	var err error
	r, ok := iatt.Runs[runID]
	if !ok {
		err = fmt.Errorf("No run %s", runID)
	}
	return r, err
}

func (iatt *ImplementsAllTheThings) GetRunByEMRJobId(ctx context.Context, emrJobId string) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "GetRunByEMRJobId")
	var err error
	r, ok := iatt.Runs[emrJobId]
	if !ok {
		err = fmt.Errorf("No run %s", emrJobId)
	}
	return r, err
}

// CreateRun - StateManager
func (iatt *ImplementsAllTheThings) CreateRun(ctx context.Context, r state.Run) error {
	iatt.Calls = append(iatt.Calls, "CreateRun")
	iatt.Runs[r.RunID] = r
	return nil
}

func (iatt *ImplementsAllTheThings) EstimateRunResources(ctx context.Context, executableID string, command string) (state.TaskResources, error) {
	iatt.Calls = append(iatt.Calls, "EstimateRunResources")
	return state.TaskResources{}, nil
}

func (iatt *ImplementsAllTheThings) EstimateExecutorCount(ctx context.Context, executableID string, commandHash string) (int64, error) {
	iatt.Calls = append(iatt.Calls, "EstimateExecutorCount")
	return 0, nil
}

func (iatt *ImplementsAllTheThings) ExecutorOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	iatt.Calls = append(iatt.Calls, "ExecutorOOM")
	return false, nil
}
func (iatt *ImplementsAllTheThings) DriverOOM(ctx context.Context, executableID string, commandHash string) (bool, error) {
	iatt.Calls = append(iatt.Calls, "DriverOOM")
	return false, nil
}

// UpdateRun - StateManager
func (iatt *ImplementsAllTheThings) UpdateRun(ctx context.Context, runID string, updates state.Run) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "UpdateRun")
	run := iatt.Runs[runID]
	run.UpdateWith(updates)
	iatt.Runs[runID] = run
	return run, nil
}

// ListGroups - StateManager
func (iatt *ImplementsAllTheThings) ListGroups(ctx context.Context, limit int, offset int, name *string) (state.GroupsList, error) {
	iatt.Calls = append(iatt.Calls, "ListGroups")
	return state.GroupsList{Total: len(iatt.Groups), Groups: iatt.Groups}, nil
}

// ListTags - StateManager
func (iatt *ImplementsAllTheThings) ListTags(ctx context.Context, limit int, offset int, name *string) (state.TagsList, error) {
	iatt.Calls = append(iatt.Calls, "ListTags")
	return state.TagsList{Total: len(iatt.Tags), Tags: iatt.Tags}, nil
}

// initWorkerTable - StateManager
func (iatt *ImplementsAllTheThings) initWorkerTable(c config.Config) error {
	iatt.Calls = append(iatt.Calls, "initWorkerTable")
	return nil
}

// ListWorkers - StateManager
func (iatt *ImplementsAllTheThings) ListWorkers(ctx context.Context, engine string) (state.WorkersList, error) {
	iatt.Calls = append(iatt.Calls, "ListWorkers")
	return state.WorkersList{Total: len(iatt.Workers), Workers: iatt.Workers}, nil
}

func (iatt *ImplementsAllTheThings) CheckIdempotenceKey(ctx context.Context, idempotenceKey string) (string, error) {
	iatt.Calls = append(iatt.Calls, "CheckIdempotenceKey")
	return "42", nil
}

// GetWorker - StateManager
func (iatt *ImplementsAllTheThings) GetWorker(ctx context.Context, workerType string, engine string) (state.Worker, error) {
	iatt.Calls = append(iatt.Calls, "GetWorker")
	return state.Worker{WorkerType: workerType, CountPerInstance: 2}, nil
}

// UpdateWorker - StateManager
func (iatt *ImplementsAllTheThings) UpdateWorker(ctx context.Context, workerType string, updates state.Worker) (state.Worker, error) {
	iatt.Calls = append(iatt.Calls, "UpdateWorker")
	return state.Worker{WorkerType: workerType, CountPerInstance: updates.CountPerInstance}, nil
}

// BatchUpdateWorkers- StateManager
func (iatt *ImplementsAllTheThings) BatchUpdateWorkers(ctx context.Context, updates []state.Worker) (state.WorkersList, error) {
	iatt.Calls = append(iatt.Calls, "BatchUpdateWorkers")
	return state.WorkersList{Total: len(iatt.Workers), Workers: iatt.Workers}, nil
}

// QurlFor - QueueManager
func (iatt *ImplementsAllTheThings) QurlFor(name string, prefixed bool) (string, error) {
	iatt.Calls = append(iatt.Calls, "QurlFor")
	qurl, _ := iatt.Qurls[name]
	return qurl, nil
}

func (iatt *ImplementsAllTheThings) Enqueue(ctx context.Context, run state.Run) error {
	iatt.Calls = append(iatt.Calls, "Enqueue")
	iatt.Queued = append(iatt.Queued, run.RunID)
	return nil
}

// ReceiveRun - QueueManager
func (iatt *ImplementsAllTheThings) ReceiveRun(qURL string) (queue.RunReceipt, error) {
	iatt.Calls = append(iatt.Calls, "ReceiveRun")
	if len(iatt.Queued) == 0 {
		return queue.RunReceipt{}, nil
	}

	popped := iatt.Queued[0]
	iatt.Queued = iatt.Queued[1:]
	receipt := queue.RunReceipt{
		Run: &state.Run{RunID: popped},
	}
	receipt.Done = func() error {
		iatt.Calls = append(iatt.Calls, "RunReceipt.Done")
		return nil
	}
	return receipt, nil
}

// ReceiveStatus - QueueManager
func (iatt *ImplementsAllTheThings) ReceiveStatus(qURL string) (queue.StatusReceipt, error) {
	iatt.Calls = append(iatt.Calls, "ReceiveStatus")
	if len(iatt.StatusUpdates) == 0 {
		return queue.StatusReceipt{}, nil
	}

	popped := iatt.StatusUpdates[0]
	iatt.StatusUpdates = iatt.StatusUpdates[1:]

	receipt := queue.StatusReceipt{
		StatusUpdate: &popped,
	}

	receipt.Done = func() error {
		iatt.Calls = append(iatt.Calls, "RunReceipt.Done")
		return nil
	}
	return receipt, nil
}

// List - QueueManager
func (iatt *ImplementsAllTheThings) List() ([]string, error) {
	iatt.Calls = append(iatt.Calls, "List")
	res := make([]string, len(iatt.Qurls))
	i := 0
	for _, qurl := range iatt.Qurls {
		res[i] = qurl
		i++
	}
	return res, nil
}

func (iatt *ImplementsAllTheThings) GetEvents(ctx context.Context, run state.Run) (state.PodEventList, error) {
	iatt.Calls = append(iatt.Calls, "GetEvents")

	return state.PodEventList{
		Total:     0,
		PodEvents: nil,
	}, nil
}

func (iatt *ImplementsAllTheThings) FetchUpdateStatus(ctx context.Context, run state.Run) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "FetchUpdateStatus")

	return run, nil
}

func (iatt *ImplementsAllTheThings) FetchPodMetrics(ctx context.Context, run state.Run) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "FetchPodMetrics")
	return run, nil
}

// CanBeRun - Cluster Client
func (iatt *ImplementsAllTheThings) CanBeRun(clusterName string, executableResources state.ExecutableResources) (bool, error) {
	iatt.Calls = append(iatt.Calls, "CanBeRun")
	if clusterName == "invalidcluster" {
		return false, nil
	}
	return true, nil
}

// IsImageValid - Registry Client
func (iatt *ImplementsAllTheThings) IsImageValid(imageRef string) (bool, error) {
	iatt.Calls = append(iatt.Calls, "IsImageValid")
	if imageRef == "invalidimage" {
		return false, nil
	}
	return true, nil
}

func (iatt *ImplementsAllTheThings) PollRunStatus(ctx context.Context) (state.Run, error) {
	iatt.Calls = append(iatt.Calls, "PollRunStatus")
	return state.Run{}, nil
}

// PollRuns - Execution Engine
func (iatt *ImplementsAllTheThings) PollRuns(ctx context.Context) ([]engine.RunReceipt, error) {
	iatt.Calls = append(iatt.Calls, "PollRuns")

	var r []engine.RunReceipt
	if len(iatt.Queued) == 0 {
		return r, nil
	}

	popped := iatt.Queued[0]
	iatt.Queued = iatt.Queued[1:]
	receipt := queue.RunReceipt{
		Run: &state.Run{RunID: popped},
	}
	receipt.Done = func() error {
		iatt.Calls = append(iatt.Calls, "RunReceipt.Done")
		return nil
	}
	r = append(r, engine.RunReceipt{receipt, 1111, 1111111, 1})
	return r, nil
}

// PollStatus - Execution Engine
func (iatt *ImplementsAllTheThings) PollStatus(ctx context.Context) (engine.RunReceipt, error) {
	iatt.Calls = append(iatt.Calls, "PollStatus")
	if len(iatt.StatusUpdatesAsRuns) == 0 {
		return engine.RunReceipt{}, nil
	}

	popped := iatt.StatusUpdatesAsRuns[0]
	iatt.StatusUpdatesAsRuns = iatt.StatusUpdatesAsRuns[1:]

	receipt := queue.RunReceipt{
		Run: &popped,
	}

	receipt.Done = func() error {
		iatt.Calls = append(iatt.Calls, "StatusReceipt.Done")
		return nil
	}
	return engine.RunReceipt{receipt, 1111, 1111111, 1}, nil
}

// Execute - Execution Engine
func (iatt *ImplementsAllTheThings) Execute(ctx context.Context, executable state.Executable, run state.Run, manager state.Manager) (state.Run, bool, error) {
	iatt.Calls = append(iatt.Calls, "Execute")
	return state.Run{}, iatt.ExecuteErrorIsRetryable, iatt.ExecuteError
}

// Terminate - Execution Engine
func (iatt *ImplementsAllTheThings) Terminate(ctx context.Context, run state.Run) error {
	iatt.Calls = append(iatt.Calls, "Terminate")
	return nil
}

// Define - Execution Engine
func (iatt *ImplementsAllTheThings) Define(ctx context.Context, definition state.Definition) (state.Definition, error) {
	iatt.Calls = append(iatt.Calls, "Define")
	iatt.Defined = append(iatt.Defined, definition.DefinitionID)
	return definition, nil
}

// Deregister - Execution Engine
func (iatt *ImplementsAllTheThings) Deregister(ctx context.Context, definition state.Definition) error {
	iatt.Calls = append(iatt.Calls, "Deregister")
	return nil
}

// Logs - Logs Client
func (iatt *ImplementsAllTheThings) Logs(executable state.Executable, run state.Run, lastSeen *string, role *string, facility *string) (string, *string, error) {
	iatt.Calls = append(iatt.Calls, "Logs")
	return "", aws.String(""), nil
}

// GetExecutableByTypeAndID - StateManager
func (iatt *ImplementsAllTheThings) GetExecutableByTypeAndID(ctx context.Context, t state.ExecutableType, id string) (state.Executable, error) {
	iatt.Calls = append(iatt.Calls, "GetExecutableByTypeAndID")
	switch t {
	case state.ExecutableTypeDefinition:
		return iatt.GetDefinition(ctx, id)
	case state.ExecutableTypeTemplate:
		return iatt.GetTemplateByID(ctx, id)
	default:
		return nil, fmt.Errorf("Invalid executable type %s", t)
	}
}

// ListTemplates - StateManager
func (iatt *ImplementsAllTheThings) ListTemplates(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	iatt.Calls = append(iatt.Calls, "ListTemplates")
	tl := state.TemplateList{Total: len(iatt.Templates)}
	for _, t := range iatt.Templates {
		tl.Templates = append(tl.Templates, t)
	}
	return tl, nil
}

// ListTemplatesLatestOnly - StateManager
func (iatt *ImplementsAllTheThings) ListTemplatesLatestOnly(ctx context.Context, limit int, offset int, sortBy string, order string) (state.TemplateList, error) {
	// TODO: this is not actually implemented correctly - but also we're never
	// using it.
	iatt.Calls = append(iatt.Calls, "ListTemplatesLatestOnly")
	tl := state.TemplateList{Total: len(iatt.Templates)}
	for _, t := range iatt.Templates {
		tl.Templates = append(tl.Templates, t)
	}
	return tl, nil
}

func (iatt *ImplementsAllTheThings) GetTemplateByVersion(ctx context.Context, templateName string, templateVersion int64) (bool, state.Template, error) {
	iatt.Calls = append(iatt.Calls, "GetTemplateByVersion")
	var err error
	var tpl *state.Template

	// Iterate over templates to find max version.
	for _, t := range iatt.Templates {
		if t.TemplateName == templateName && t.Version == templateVersion {
			tpl = &t
		}
	}

	if tpl == nil {
		return false, *tpl, fmt.Errorf("No template with name: %s", templateName)
	}

	return true, *tpl, err
}

// GetTemplateByID - StateManager
func (iatt *ImplementsAllTheThings) GetTemplateByID(ctx context.Context, id string) (state.Template, error) {
	iatt.Calls = append(iatt.Calls, "GetTemplateByID")
	var err error
	t, ok := iatt.Templates[id]
	if !ok {
		err = fmt.Errorf("No template %s", id)
	}
	return t, err
}

// GetLatestTemplateByTemplateName - StateManager
func (iatt *ImplementsAllTheThings) GetLatestTemplateByTemplateName(ctx context.Context, templateName string) (bool, state.Template, error) {
	iatt.Calls = append(iatt.Calls, "GetLatestTemplateByTemplateName")
	var err error
	var tpl *state.Template
	var maxVersion int64 = int64(math.Inf(-1))

	// Iterate over templates to find max version.
	for _, t := range iatt.Templates {
		if t.TemplateName == templateName && t.Version > maxVersion {
			tpl = &t
			maxVersion = t.Version
		}
	}

	if tpl == nil {
		return false, *tpl, fmt.Errorf("No template with name: %s", templateName)
	}

	return true, *tpl, err
}

// CreateTemplate - StateManager
func (iatt *ImplementsAllTheThings) CreateTemplate(ctx context.Context, t state.Template) error {
	iatt.Calls = append(iatt.Calls, "CreateTemplate")
	iatt.Templates[t.TemplateID] = t
	return nil
}

func (iatt *ImplementsAllTheThings) GetRunStatus(ctx context.Context, runID string) (state.RunStatus, error) {
	iatt.Calls = append(iatt.Calls, "GetRunStatus")
	var err error

	r, ok := iatt.Runs[runID]
	if !ok {
		err = fmt.Errorf("No run with ID: %s", runID)
		return state.RunStatus{}, err
	}

	status := state.RunStatus{
		RunID:        r.RunID,
		Status:       r.Status,
		DefinitionID: r.DefinitionID,
		ClusterName:  r.ClusterName,
		QueuedAt:     r.QueuedAt,
		StartedAt:    r.StartedAt,
		FinishedAt:   r.FinishedAt,
		ExitCode:     r.ExitCode,
		ExitReason:   r.ExitReason,
		Engine:       r.Engine,
		Alias:        r.Alias,
	}

	return status, err
}


================================================
FILE: tracing/tracing.go
================================================
package tracing

import (
	"context"
	"time"

	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
)

// TraceJob starts or continues a trace for a job operation
func TraceJob(ctx context.Context, operationName string, runID string) (context.Context, tracer.Span) {
	span, ctx := tracer.StartSpanFromContext(
		ctx,
		operationName,
		tracer.ResourceName(runID),
		tracer.Tag("job.run_id", runID),
	)

	return ctx, span
}

// TagRunInfo adds standardized job metadata to a span
func TagRunInfo(span tracer.Span,
	runID, definitionID, alias, status, clusterName string,
	queuedAt, startedAt, finishedAt *time.Time,
	podName, namespace, exitReason *string,
	exitCode *int64, tier string) {

	if span == nil {
		return
	}

	span.SetTag("job.run_id", runID)

	if exitReason != nil {
		span.SetTag("job.exit_reason", *exitReason)
	}
}

type TextMapCarrier map[string]string

// ForeachKey implements the TextMapReader interface for Extract
func (c TextMapCarrier) ForeachKey(handler func(key, val string) error) error {
	for k, v := range c {
		if err := handler(k, v); err != nil {
			return err
		}
	}
	return nil
}

// Set implements the TextMapWriter interface for Inject
func (c TextMapCarrier) Set(key, val string) {
	c[key] = val
}


================================================
FILE: ui/.gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# production
/build

# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*
package-lock.json

================================================
FILE: ui/.prettierrc
================================================
{
  "trailingComma": "es5",
  "semi": false
}


================================================
FILE: ui/Dockerfile
================================================
FROM node:carbon
WORKDIR /usr/src/app
ADD . /usr/src/app
RUN npm install -g serve
RUN npm install
ARG FLOTILLA_API
ARG DEFAULT_CLUSTER
RUN npm run build
ENTRYPOINT serve -s build


================================================
FILE: ui/README.md
================================================
# Flotilla UI

The Flotilla UI is a React application bundled along with the rest of Flotilla. If you are running the entire Flotilla stack locally, it is recommended to use docker-compose as documented in the main [README](https://github.com/stitchfix/flotilla-os#starting-the-service-locally). If you are interested in developing the UI itself, you can follow these steps:

## Development

### Running Locally

```
git clone git@github.com:stitchfix/flotilla-os.git
cd flotilla-os/ui
npm install
REACT_APP_BASE_URL=http://my-flotilla.com REACT_APP_BASE_URL_DEV=http://flotilla.staging.vertigo.stitchfix.com/api npm start
```

### Testing

UI testing is done with Jest and Enzyme. You can run the tests via:

```
npm run test
```


================================================
FILE: ui/package.json
================================================
{
  "name": "flotilla",
  "version": "5.1.1",
  "dependencies": {
    "@blueprintjs/core": "3.15.1",
    "@blueprintjs/datetime": "3.15.1",
    "@reduxjs/toolkit": "^1.1.0",
    "ansi-to-react": "5.1.0",
    "axios": "1.15.2",
    "cookie": "0.7.0",
    "formik": "1.5.7",
    "localforage": "^1.7.3",
    "lodash": "4.18.1",
    "moment": "2.29.4",
    "pretty-ms": "5.0.0",
    "qs": "6.14.1",
    "react": "^16.8.6",
    "react-copy-to-clipboard": "5.0.2",
    "react-debounce-input": "3.2.0",
    "react-dom": "16.8.6",
    "react-helmet": "^5.2.1",
    "react-json-editor-ajrm": "^2.5.9",
    "react-json-view": "^1.19.1",
    "react-jsonschema-form": "^1.8.1",
    "react-redux": "^7.1.3",
    "react-resize-detector": "^4.2.1",
    "react-router-dom": "^5.1.2",
    "react-scripts": "^5.0.1",
    "react-select": "2.4.4",
    "react-window": "^1.8.5",
    "redux-logger": "^3.0.6",
    "url-join": "^4.0.1",
    "yup": "0.27.0"
  },
  "scripts": {
    "start": "react-scripts start",
    "build": "react-scripts build",
    "test": "react-scripts test",
    "eject": "react-scripts eject"
  },
  "eslintConfig": {
    "extends": "react-app"
  },
  "browserslist": {
    "production": [
      ">0.2%",
      "not dead",
      "not op_mini all"
    ],
    "development": [
      "last 1 chrome version",
      "last 1 firefox version",
      "last 1 safari version"
    ]
  },
  "devDependencies": {
    "@babel/plugin-proposal-private-property-in-object": "^7.21.11",
    "@types/cookie": "0.3.3",
    "@types/enzyme": "3.9.3",
    "@types/history": "4.7.2",
    "@types/jest": "24.0.13",
    "@types/lodash": "4.17.16",
    "@types/node": "12.0.2",
    "@types/qs": "6.5.3",
    "@types/react": "16.8.18",
    "@types/react-copy-to-clipboard": "4.3.0",
    "@types/react-dom": "16.8.4",
    "@types/react-helmet": "^5.0.14",
    "@types/react-jsonschema-form": "^1.7.0",
    "@types/react-redux": "^7.1.5",
    "@types/react-resize-detector": "^4.2.0",
    "@types/react-router-dom": "^5.1.3",
    "@types/react-select": "2.0.9",
    "@types/react-window": "^1.8.1",
    "@types/redux-logger": "^3.0.7",
    "@types/url-join": "^4.0.0",
    "@types/yup": "0.26.14",
    "axios-mock-adapter": "1.16.0",
    "babel-core": "6.26.3",
    "babel-jest": "24.8.0",
    "enzyme": "3.9.0",
    "enzyme-adapter-react-16": "1.13.2",
    "enzyme-to-json": "3.3.5",
    "flush-promises": "1.0.2",
    "regenerator-runtime": "0.13.2",
    "typescript": "3.4.5"
  }
}


================================================
FILE: ui/public/index.html
================================================
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <link
      href="https://fonts.googleapis.com/css?family=Roboto+Mono&display=swap"
      rel="stylesheet"
    />
    <title>Flotilla | Stitch Fix</title>
  </head>
  <body>
    <noscript>You need to enable JavaScript to run this app.</noscript>
    <div id="root"></div>
  </body>
</html>


================================================
FILE: ui/src/api.ts
================================================
import FlotillaClient from "./helpers/FlotillaClient"

const err =
  "Base URL undefined. If you are running this in development, please set the `REACT_APP_BASE_URL_DEV` environment variable. If you are running this in production, please set the `REACT_APP_BASE_URL` environment variable."

let baseURL: string | undefined = undefined

switch (process.env.NODE_ENV) {
  case "production":
    baseURL = process.env.REACT_APP_BASE_URL
    break
  case "development":
  case "test":
  default:
    baseURL = process.env.REACT_APP_BASE_URL_DEV
    break
}

if (baseURL === undefined) {
  throw new Error(err)
}

const client = new FlotillaClient({ baseURL })

export default client


================================================
FILE: ui/src/components/ARASwitch.tsx
================================================
import * as React from "react"
import { get } from "lodash"
import { Tag, Colors, Checkbox, Intent } from "@blueprintjs/core"
import { Task, UpdateTaskPayload } from "../types"
import api from "../api"
import Toaster from "./Toaster"
import Request, { ChildProps } from "./Request"

type Props = {
  task: Task
} & ChildProps<Task, { definitionID: string; data: UpdateTaskPayload }>

class ARASwitch extends React.Component<Props> {
  constructor(props: Props) {
    super(props)
    this.handleChange = this.handleChange.bind(this)
  }

  handleChange() {
    const { task, request } = this.props

    let enabled: boolean
    if (this.isEnabled()) {
      enabled = false
    } else {
      enabled = true
    }

    request({
      definitionID: task.definition_id,
      data: {
        env: task.env,
        image: task.image,
        group_name: task.group_name,
        memory: task.memory,
        cpu: task.cpu,
        command: task.command,
        tags: task.tags,
        adaptive_resource_allocation: enabled,
      },
    })
  }

  isEnabled() {
    return get(this.props.task, "adaptive_resource_allocation", false) === true
  }

  render() {
    const enabled = this.isEnabled()
    return (
      <div style={{ display: "flex", alignItems: "center" }}>
        <Checkbox
          checked={enabled}
          onChange={this.handleChange}
          style={{ marginBottom: 0 }}
        />
        <Tag
          style={{
            background: enabled ? Colors.ROSE5 : "",
            color: enabled ? Colors.WHITE : "",
            cursor: "default",
          }}
        >
          {enabled ? "Enabled" : "Disabled"}
        </Tag>
      </div>
    )
  }
}

type ConnectedProps = {
  task: Task
  request: (opts: { definitionID: string }) => void
}

const Connected: React.FC<ConnectedProps> = ({ task, request }) => (
  <Request<Task, { definitionID: string; data: UpdateTaskPayload }>
    requestFn={api.updateTask}
    shouldRequestOnMount={false}
    onSuccess={(data: Task) => {
      Toaster.show({
        message: `${data.alias} updated successfully!`,
        intent: Intent.SUCCESS,
      })
      // Re-request data.
      request({ definitionID: data.definition_id })
    }}
    onFailure={() => {
      Toaster.show({
        message: "An error occurred.",
        intent: Intent.DANGER,
      })
    }}
  >
    {requestProps => <ARASwitch task={task} {...requestProps} />}
  </Request>
)

export default Connected


================================================
FILE: ui/src/components/App.tsx
================================================
import * as React from "react"
import { BrowserRouter, Route, Switch, Redirect } from "react-router-dom"
import Tasks from "./Tasks"
import Task from "./Task"
import CreateTaskForm from "./CreateTaskForm"
import Run from "./Run"
import Runs from "./Runs"
import Templates from "./Templates"
import Template from "./Template"
import Navigation from "./Navigation"
import ls from "../localstorage"
import { LOCAL_STORAGE_IS_ONBOARDED_KEY } from "../constants"
import Toaster from "./Toaster"
import { Intent } from "@blueprintjs/core"
import { connect, ConnectedProps } from "react-redux"
import { toggleDialogVisibilityChange } from "../state/settings"

const connector = connect()

class App extends React.Component<ConnectedProps<typeof connector>> {
  componentDidMount() {
    this.checkOnboardingStatus()
  }

  checkOnboardingStatus() {
    ls.getItem<boolean>(LOCAL_STORAGE_IS_ONBOARDED_KEY).then(res => {
      if (res !== true) {
        Toaster.show({
          icon: "clean",
          message:
            "You can now configure global settings via the Settings menu.",
          timeout: 0,
          intent: Intent.PRIMARY,
          action: {
            onClick: () => {
              ls.setItem<boolean>(LOCAL_STORAGE_IS_ONBOARDED_KEY, true).then(
                () => {
                  this.props.dispatch(toggleDialogVisibilityChange(true))
                }
              )
            },
            text: "Open settings menu",
          },
          onDismiss: () => {
            ls.setItem<boolean>(LOCAL_STORAGE_IS_ONBOARDED_KEY, true)
          },
        })
      }
    })
  }

  render() {
    return (
      <div className="flotilla-app-container bp3-dark">
        <BrowserRouter>
          <Navigation />
          <Switch>
            <Route exact path="/tasks" component={Tasks} />
            <Route exact path="/tasks/create" component={CreateTaskForm} />
            <Route path="/tasks/:definitionID" component={Task} />
            <Route path="/tasks/alias/:alias" component={Task} />

            <Route exact path="/templates" component={Templates} />
            <Route path="/templates/:templateID" component={Template} />

            <Route exact path="/runs" component={Runs} />
            <Route path="/runs/:runID" component={Run} />
            <Redirect from="/" to="/tasks" />
          </Switch>
        </BrowserRouter>
      </div>
    )
  }
}

export default connector(App)


================================================
FILE: ui/src/components/Attribute.tsx
================================================
import * as React from "react"
import { Tag, Tooltip, Icon, Intent } from "@blueprintjs/core"
import CopyToClipboard from "react-copy-to-clipboard"

type Props = { rawValue: string }
type State = { isCopied: boolean }

class CopyableAttributeValue extends React.Component<Props, State> {
  constructor(props: Props) {
    super(props)
    this.handleCopy = this.handleCopy.bind(this)
  }

  state = {
    isCopied: false,
  }

  handleCopy() {
    this.setState({ isCopied: true })
  }

  render() {
    return (
      <Tooltip
        content={
          <div>
            Click to copy to clipboard
            {this.state.isCopied && (
              <Icon
                icon="confirm"
                intent={Intent.SUCCESS}
                style={{ marginLeft: 6 }}
              />
            )}
          </div>
        }
      >
        <CopyToClipboard text={this.props.rawValue} onCopy={this.handleCopy}>
          <div style={{ cursor: "pointer" }}>{this.props.children}</div>
        </CopyToClipboard>
      </Tooltip>
    )
  }
}

const Attribute: React.FunctionComponent<{
  name: React.ReactNode
  value: React.ReactNode
  containerStyle?: object
  isCopyable?: boolean
  rawValue?: string
  description?: React.ReactElement
  isNew?: boolean
}> = ({
  name,
  value,
  containerStyle,
  isCopyable,
  rawValue,
  description,
  isNew,
}) => (
  <div
    className="flotilla-attribute-container"
    style={containerStyle ? containerStyle : {}}
  >
    <div className="flotilla-attribute-name">
      <div>{name}</div>
      {description && (
        <Tooltip content={description}>
          <Icon icon="info-sign" iconSize={14} />
        </Tooltip>
      )}
      {isNew && <Tag intent={Intent.DANGER}>New!</Tag>}
    </div>
    {isCopyable && rawValue ? (
      <CopyableAttributeValue rawValue={rawValue}>
        <div className="flotilla-attribute-value">{value}</div>
      </CopyableAttributeValue>
    ) : (
      <div className="flotilla-attribute-value">{value}</div>
    )}
  </div>
)

export default Attribute


================================================
FILE: ui/src/components/AutoscrollSwitch.tsx
================================================
import * as React from "react"
import { useDispatch, useSelector } from "react-redux"
import { Switch } from "@blueprintjs/core"
import { RootState } from "../state/store"
import { toggleAutoscroll } from "../state/runView"

const AutoscrollSwitch: React.FC = () => {
  const dispatch = useDispatch()
  const shouldAutoscroll = useSelector(
    (state: RootState) => state.runView.shouldAutoscroll
  )

  return (
    <Switch
      checked={shouldAutoscroll}
      onChange={() => {
        dispatch(toggleAutoscroll())
      }}
    />
  )
}

export default AutoscrollSwitch


================================================
FILE: ui/src/components/BaseTaskForm.tsx
================================================
import * as React from "react"
import { FormGroup, Classes } from "@blueprintjs/core"
import { FastField, FormikProps } from "formik"
import * as Yup from "yup"
import GroupNameSelect from "./GroupNameSelect"
import TagsSelect from "./TagsSelect"
import EnvFieldArray from "./EnvFieldArray"
import FieldError from "./FieldError"
import {
  groupNameFieldSpec,
  imageFieldSpec,
  commandFieldSpec,
  memoryFieldSpec,
  tagsFieldSpec,
  cpuFieldSpec,
} from "../helpers/taskFormHelpers"

export const validationSchema = {
  env: Yup.array().of(
    Yup.object().shape({
      name: Yup.string().required(),
      value: Yup.string().required(),
    })
  ),
  image: Yup.string()
    .min(1)
    .required("Required"),
  group_name: Yup.string()
    .min(1)
    .required("Required"),
  memory: Yup.number()
    .required("Required")
    .min(0),
  cpu: Yup.number()
    .required("Required")
    .min(512),
  command: Yup.string()
    .min(1)
    .required("Required"),
  tags: Yup.array().of(Yup.string()),
}

export type Props = Pick<
  FormikProps<any>,
  "values" | "setFieldValue" | "errors"
>

const BaseTaskForm: React.FunctionComponent<Props> = ({
  values,
  setFieldValue,
  errors,
}) => (
  <>
    <FormGroup
      label={groupNameFieldSpec.label}
      helperText={groupNameFieldSpec.description}
    >
      <FastField
        name={groupNameFieldSpec.name}
        component={GroupNameSelect}
        value={values.group_name}
        onChange={(value: string) => {
          setFieldValue(groupNameFieldSpec.name, value)
        }}
      />
      {errors.group_name && <FieldError>{errors.group_name}</FieldError>}
    </FormGroup>
    <FormGroup
      label={imageFieldSpec.label}
      helperText={imageFieldSpec.description}
    >
      <FastField name={imageFieldSpec.name} className={Classes.INPUT} />
      {errors.image && <FieldError>{errors.image}</FieldError>}
    </FormGroup>
    <FormGroup
      label={commandFieldSpec.label}
      helperText={commandFieldSpec.description}
    >
      <FastField
        className={`${Classes.INPUT} ${Classes.CODE}`}
        component="textarea"
        name={commandFieldSpec.name}
        rows={14}
        style={{ fontSize: "0.8rem" }}
      />
      {errors.command && <FieldError>{errors.command}</FieldError>}
    </FormGroup>
    <FormGroup label={cpuFieldSpec.label} helperText={cpuFieldSpec.description}>
      <FastField
        type="number"
        name={cpuFieldSpec.name}
        className={Classes.INPUT}
        min="512"
      />
      {errors.cpu && <FieldError>{errors.cpu}</FieldError>}
    </FormGroup>
    <FormGroup
      label={memoryFieldSpec.label}
      helperText={memoryFieldSpec.description}
    >
      <FastField
        type="number"
        name={memoryFieldSpec.name}
        className={Classes.INPUT}
      />
      {errors.memory && <FieldError>{errors.memory}</FieldError>}
    </FormGroup>
    <FormGroup
      label={tagsFieldSpec.label}
      helperText={tagsFieldSpec.description}
    >
      <FastField
        name={tagsFieldSpec.name}
        component={TagsSelect}
        value={values.tags}
        onChange={(value: string[]) => {
          setFieldValue(tagsFieldSpec.name, value)
        }}
      />
      {errors.tags && <FieldError>{errors.tags}</FieldError>}
    </FormGroup>
    <EnvFieldArray />
  </>
)

export default BaseTaskForm


================================================
FILE: ui/src/components/CloudtrailRecords.tsx
================================================
import * as React from "react"
import { CloudtrailRecord } from "../types"
import { HTMLTable } from "@blueprintjs/core"

type Props = {
  data: CloudtrailRecord[]
}

const CloudtrailRecords: React.FC<Props> = ({ data }) => (
  <HTMLTable interactive bordered striped>
    <thead>
      <tr>
        <th>Event Name</th>
        <th>Event Source</th>
      </tr>
    </thead>
    <tbody>
      {data.map((r, i) => (
        <tr style={{ marginBottom: 12 }} key={i}>
          <td>{r.eventName}</td>
          <td>{r.eventSource}</td>
        </tr>
      ))}
    </tbody>
  </HTMLTable>
)

export default CloudtrailRecords


================================================
FILE: ui/src/components/ClusterSelect.tsx
================================================
import * as React from "react"
import { get, isArray } from "lodash"
import Creatable from "react-select/lib/Creatable"
import Request from "./Request"
import { ListClustersResponse, SelectOption, SelectProps } from "../types"
import api from "../api"
import * as helpers from "../helpers/selectHelpers"

/**
 * ClusterSelect allows users to select an ECS cluster on which to run a
 * particular task. This component hits the `/clusters` endpoint and renders
 * the results into a React Select component.
 */
export const ClusterSelect: React.FunctionComponent<SelectProps & {
  options: SelectOption[]
}> = props => {
  return (
    <Creatable<SelectOption>
      value={helpers.stringToSelectOpt(props.value)}
      options={props.options}
      isClearable
      onChange={option => {
        props.onChange(helpers.preprocessSelectOption(option))
      }}
      styles={helpers.selectStyles}
      theme={helpers.selectTheme}
      isDisabled={props.isDisabled}
    />
  )
}

const Connected: React.FunctionComponent<SelectProps> = props => (
  <Request<ListClustersResponse, {}> requestFn={api.listClusters}>
    {res => {
      let options = get(res, ["data", "clusters"], [])

      // If there's an error fetching available clusters, set the options to
      // an empty array.
      if (!isArray(options)) options = []
      return (
        <ClusterSelect
          options={options.map(helpers.stringToSelectOpt)}
          value={props.value}
          onChange={props.onChange}
          isDisabled={props.isDisabled}
        />
      )
    }}
  </Request>
)

export default Connected


================================================
FILE: ui/src/components/CreateTaskForm.tsx
================================================
import * as React from "react"
import { RouteComponentProps } from "react-router-dom"
import { Button, Intent, FormGroup, Classes } from "@blueprintjs/core"
import { Formik, Form, FastField, FormikProps } from "formik"
import * as Yup from "yup"
import api from "../api"
import { CreateTaskPayload, Task } from "../types"
import Request, {
  RequestStatus,
  ChildProps as RequestChildProps,
} from "./Request"
import BaseTaskForm, {
  validationSchema as baseTaskFormValidationSchema,
} from "./BaseTaskForm"
import Toaster from "./Toaster"
import ErrorCallout from "./ErrorCallout"
import FieldError from "./FieldError"

export const validationSchema = Yup.object().shape({
  ...baseTaskFormValidationSchema,
  alias: Yup.string()
    .min(1)
    .required("Required"),
})

export type Props = Pick<
  FormikProps<CreateTaskPayload>,
  "values" | "setFieldValue" | "isValid" | "errors"
> &
  Pick<
    RequestChildProps<Task, { data: CreateTaskPayload }>,
    "requestStatus" | "error" | "isLoading"
  >

export const CreateTaskForm: React.FunctionComponent<Props> = ({
  values,
  isValid,
  setFieldValue,
  requestStatus,
  error,
  isLoading,
  errors,
}) => {
  return (
    <>
      {requestStatus === RequestStatus.ERROR && error && (
        <ErrorCallout error={error} />
      )}
      <Form className="flotilla-form-container">
        <FormGroup
          label="Alias"
          helperText="Choose a descriptive alias for this task."
        >
          <FastField className={Classes.INPUT} name="alias" />
          {errors.alias && <FieldError>{errors.alias}</FieldError>}
        </FormGroup>
        <BaseTaskForm
          setFieldValue={setFieldValue}
          values={values}
          errors={errors}
        />
        <Button
          id="submitButton"
          type="submit"
          disabled={isLoading || isValid === false}
          intent={Intent.PRIMARY}
        >
          Submit
        </Button>
      </Form>
    </>
  )
}

export type ConnectedProps = RouteComponentProps & {
  initialValues: CreateTaskPayload
  onSuccess?: (data: Task) => void
}

const Connected: React.FunctionComponent<ConnectedProps> = props => (
  <Request<Task, { data: CreateTaskPayload }>
    requestFn={api.createTask}
    shouldRequestOnMount={false}
    onSuccess={(data: Task) => {
      Toaster.show({
        message: `Task ${data.alias} created successfully!`,
        intent: Intent.SUCCESS,
      })
      props.history.push(`/tasks/${data.definition_id}`)

      if (props.onSuccess) {
        props.onSuccess(data)
      }
    }}
    onFailure={() => {
      Toaster.show({
        message: "An error occurred.",
        intent: Intent.DANGER,
      })
    }}
  >
    {requestProps => (
      <Formik
        initialValues={props.initialValues}
        validationSchema={validationSchema}
        onSubmit={data => {
          requestProps.request({ data })
        }}
      >
        {({ values, setFieldValue, isValid, errors }) => (
          <CreateTaskForm
            values={values}
            setFieldValue={setFieldValue}
            isValid={isValid}
            requestStatus={requestProps.requestStatus}
            isLoading={requestProps.isLoading}
            error={requestProps.error}
            errors={errors}
          />
        )}
      </Formik>
    )}
  </Request>
)

Connected.defaultProps = {
  initialValues: {
    env: [],
    image: "",
    group_name: "",
    alias: "",
    memory: 1024,
    cpu: 512,
    command: "",
    tags: [],
  },
}

export default Connected


================================================
FILE: ui/src/components/DeleteTaskButton.tsx
================================================
import * as React from "react"
import { Button, Dialog, Intent, Classes } from "@blueprintjs/core"
import { withRouter, RouteComponentProps } from "react-router-dom"
import Request, { ChildProps } from "./Request"
import api from "../api"
import Toaster from "./Toaster"
import ErrorCallout from "./ErrorCallout"

type Args = { definitionID: string }
export type Props = ChildProps<any, Args> & ConnectedProps
type State = { isOpen: boolean }

export class DeleteTaskButton extends React.Component<Props, State> {
  constructor(props: Props) {
    super(props)
    this.handleSubmitClick = this.handleSubmitClick.bind(this)
    this.openDialog = this.openDialog.bind(this)
    this.closeDialog = this.closeDialog.bind(this)
  }
  state = {
    isOpen: false,
  }

  handleSubmitClick() {
    this.props.request({ definitionID: this.props.definitionID })
  }

  openDialog() {
    this.setState({ isOpen: true })
  }

  closeDialog() {
    this.setState({ isOpen: false })
  }

  render() {
    const { isLoading, error } = this.props

    return (
      <>
        <Button
          intent={Intent.DANGER}
          onClick={this.openDialog}
          rightIcon="trash"
        >
          Delete
        </Button>
        <Dialog isOpen={this.state.isOpen}>
          <div className={Classes.DIALOG_BODY}>
            {error && <ErrorCallout error={error} />}
            <span>Are you sure you want to delete this task?</span>
          </div>
          <div className={Classes.DIALOG_FOOTER}>
            <div className={Classes.DIALOG_FOOTER_ACTIONS}>
              <Button onClick={this.closeDialog}>Close</Button>
              <Button
                loading={isLoading}
                intent={Intent.DANGER}
                onClick={this.handleSubmitClick}
                id="flotillaDeleteTaskSubmitButton"
              >
                Delete
              </Button>
            </div>
          </div>
        </Dialog>
      </>
    )
  }
}

type ConnectedProps = {
  definitionID: string
}

const Connected: React.FunctionComponent<
  RouteComponentProps & ConnectedProps
> = ({ definitionID, history }) => (
  <Request<any, Args>
    requestFn={api.deleteTask}
    initialRequestArgs={{ definitionID }}
    shouldRequestOnMount={false}
    onSuccess={() => {
      Toaster.show({
        message: "Task deleted!",
        intent: Intent.SUCCESS,
      })
      history.push(`/tasks`)
    }}
    onFailure={() => {
      Toaster.show({
        message: "An error occurred.",
        intent: Intent.DANGER,
      })
    }}
  >
    {requestProps => (
      <DeleteTaskButton {...requestProps} definitionID={definitionID} />
    )}
  </Request>
)

export default withRouter(Connected)


================================================
FILE: ui/src/components/Duration.tsx
================================================
import * as React from "react"
import prettyMS from "pretty-ms"
import calculateDuration from "../helpers/calculateDuration"

type Props = {
  start: string
  end: string | undefined | null
  isActive: boolean
}

type State = {
  duration: number
}

class Duration extends React.Component<Props, State> {
  private intervalID: number | undefined
  constructor(props: Props) {
    super(props)
    this.process = this.process.bind(this)
  }

  state = {
    duration: 0,
  }

  componentDidMount() {
    // Immediately process duration on mount.
    this.process()

    // If the end date is undefined, begin interval to process duration.
    if (this.props.end === undefined && this.props.isActive === true) {
      this.intervalID = window.setInterval(this.process.bind(this), 1000)
    }
  }

  componentWillUnmount() {
    window.clearInterval(this.intervalID)
  }

  process() {
    const { start, end } = this.props
    this.setState({ duration: calculateDuration(start, end) })
  }

  render() {
    return (
      <div className="flotilla-duration-container">
        {prettyMS(this.state.duration, { secondsDecimalDigits: 0 })}
      </div>
    )
  }
}

export default Duration


================================================
FILE: ui/src/components/EngineTag.tsx
================================================
import * as React from "react"
import { Tag } from "@blueprintjs/core"
import { ExecutionEngine } from "../types"

const EngineTag: React.FC<{ engine: ExecutionEngine }> = ({ engine }) => (
  <Tag>{engine}</Tag>
)

export default EngineTag


================================================
FILE: ui/src/components/EnvFieldArray.tsx
================================================
import * as React from "react"
import { FieldArray, FastField, FormikErrors } from "formik"
import { get } from "lodash"
import { Button, FormGroup, Classes, Intent } from "@blueprintjs/core"
import { Env } from "../types"
import { IconNames } from "@blueprintjs/icons"
import { envFieldSpec } from "../helpers/taskFormHelpers"
import FieldError from "./FieldError"

export type Props = {
  values: Env[]
  push: (env: Env) => void
  remove: (index: number) => void
  errors: string | FormikErrors<any> | undefined
}

export const EnvFieldArray: React.FunctionComponent<Props> = ({
  values,
  push,
  remove,
  errors,
}) => (
  <div>
    <div className="flotilla-form-section-header-container">
      <div>{envFieldSpec.label}</div>
      <Button
        onClick={() => {
          push({ name: "", value: "" })
        }}
        type="button"
        className="flotilla-env-field-array-add-button"
      >
        Add
      </Button>
    </div>
    <div>
      {values.map((env: Env, i: number) => (
        <div key={i} className="flotilla-env-field-array-item">
          <FormGroup label={i === 0 ? "Name" : null}>
            <FastField
              name={`${envFieldSpec.name}[${i}].name`}
              className={Classes.INPUT}
            />
            <FieldError>{get(errors, [i, "name"], null)}</FieldError>
          </FormGroup>
          <FormGroup label={i === 0 ? "Value" : null}>
            <FastField
              name={`${envFieldSpec.name}[${i}].value`}
              className={Classes.INPUT}
            />
            <FieldError>{get(errors, [i, "value"], null)}</FieldError>
          </FormGroup>
          <Button
            onClick={() => {
              remove(i)
            }}
            type="button"
            intent={Intent.DANGER}
            style={i === 0 ? { transform: `translateY(8px)` } : {}}
            icon={IconNames.CROSS}
          ></Button>
        </div>
      ))}
    </div>
  </div>
)

const ConnectedEnvFieldArray: React.FunctionComponent<{}> = () => (
  <FieldArray name={envFieldSpec.name}>
    {({ form, push, remove }) => (
      <EnvFieldArray
        values={form.values.env}
        push={push}
        remove={remove}
        errors={form.errors.env}
      />
    )}
  </FieldArray>
)

export default ConnectedEnvFieldArray


================================================
FILE: ui/src/components/EnvList.tsx
================================================
import * as React from "react"
import { isEmpty, isArray } from "lodash"
import { Env } from "../types"
import Attribute from "./Attribute"

const EnvList: React.FunctionComponent<{ env: Env[] }> = ({ env }) => (
  <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
    {isArray(env) &&
      !isEmpty(env) &&
      env.map(e => (
        <Attribute key={`${e.name}|${e.value}`} name={e.name} value={e.value} />
      ))}
  </div>
)

export default EnvList


================================================
FILE: ui/src/components/EnvQueryFilter.tsx
================================================
import * as React from "react"
import { Button, FormGroup, Classes, Intent } from "@blueprintjs/core"
import { Env } from "../types"
import { IconNames } from "@blueprintjs/icons"
import { DebounceInput } from "react-debounce-input"
import { envFieldSpec } from "../helpers/taskFormHelpers"

type Props = {
  value: string[]
  onChange: (value: string[]) => void
}

type State = {
  newEnvName: string
  newEnvValue: string
}

class EnvQueryFilter extends React.Component<Props, State> {
  private delimiter: string = "|"

  constructor(props: Props) {
    super(props)
    this.handleNameChange = this.handleNameChange.bind(this)
    this.handleValueChange = this.handleValueChange.bind(this)
    this.handleRemove = this.handleRemove.bind(this)
    this.handleNewNameChange = this.handleNewNameChange.bind(this)
    this.handleNewValueChange = this.handleNewValueChange.bind(this)
    this.handleAddNewEnv = this.handleAddNewEnv.bind(this)
  }

  state = {
    newEnvName: "",
    newEnvValue: "",
  }

  serialize(env: Env): string {
    return `${env.name}${this.delimiter}${env.value}`
  }

  deserialize(str: string): Env {
    const split = str.split(this.delimiter)
    return {
      name: split[0],
      value: split[1],
    }
  }

  handleNameChange(i: number, evt: React.ChangeEvent<HTMLInputElement>) {
    const { value, onChange } = this.props
    const prevEnvValue = this.deserialize(value[i]).value
    const nextArr = value
    nextArr[i] = this.serialize({ name: evt.target.value, value: prevEnvValue })
    onChange(nextArr)
  }

  handleValueChange(i: number, evt: React.ChangeEvent<HTMLInputElement>) {
    const { value, onChange } = this.props
    const prevEnvName = this.deserialize(value[i]).name
    const nextArr = value
    nextArr[i] = this.serialize({ name: prevEnvName, value: evt.target.value })
    onChange(nextArr)
  }

  handleRemove(i: number) {
    const { value, onChange } = this.props
    let nextArr = value
    nextArr.splice(i, 1)
    onChange(nextArr)
  }

  handleNewNameChange(evt: React.ChangeEvent<HTMLInputElement>) {
    this.setState({ newEnvName: evt.target.value })
  }

  handleNewValueChange(evt: React.ChangeEvent<HTMLInputElement>) {
    this.setState({ newEnvValue: evt.target.value })
  }

  handleAddNewEnv() {
    const { value, onChange } = this.props
    const { newEnvName, newEnvValue } = this.state
    const prev = value
    const e = this.serialize({ name: newEnvName, value: newEnvValue })
    const next = prev.concat(e)
    this.setState({ newEnvName: "", newEnvValue: "" }, () => {
      onChange(next)
    })
  }

  shouldDisableAddNewEnvButton(): boolean {
    const { newEnvName, newEnvValue } = this.state
    return newEnvName.length === 0 || newEnvValue.length === 0
  }

  render() {
    const { value } = this.props
    const { newEnvName, newEnvValue } = this.state

    return (
      <div>
        <div className="flotilla-env-field-array-header">
          <div className={Classes.LABEL}>{envFieldSpec.label}</div>
        </div>
        <div>
          {value.map((s: string, i: number) => {
            const e: Env = this.deserialize(s)
            return (
              <div key={i} className="flotilla-env-field-array-item">
                <FormGroup label={i === 0 ? "Name" : null}>
                  <DebounceInput
                    className={Classes.INPUT}
                    value={e.name}
                    onChange={this.handleNameChange.bind(this, i)}
                    debounceTimeout={500}
                  />
                </FormGroup>
                <FormGroup label={i === 0 ? "Value" : null}>
                  <DebounceInput
                    className={Classes.INPUT}
                    value={e.value}
                    onChange={this.handleValueChange.bind(this, i)}
                    debounceTimeout={500}
                  />
                </FormGroup>
                <Button
                  onClick={this.handleRemove.bind(this, i)}
                  type="button"
                  intent={Intent.DANGER}
                  style={i === 0 ? { transform: `translateY(8px)` } : {}}
                  icon={IconNames.CROSS}
                />
              </div>
            )
          })}
        </div>
        <div className="flotilla-env-field-array-item">
          <FormGroup label="Name">
            <input
              className={Classes.INPUT}
              value={newEnvName}
              onChange={this.handleNewNameChange}
            />
          </FormGroup>
          <FormGroup label="value">
            <input
              className={Classes.INPUT}
              value={newEnvValue}
              onChange={this.handleNewValueChange}
            />
          </FormGroup>
          <Button
            onClick={this.handleAddNewEnv}
            type="button"
            icon={IconNames.PLUS}
            style={{ transform: `translateY(8px)` }}
            disabled={this.shouldDisableAddNewEnvButton()}
          />
        </div>
      </div>
    )
  }
}

export default EnvQueryFilter


================================================
FILE: ui/src/components/ErrorCallout.tsx
================================================
import * as React from "react"
import { Callout, Intent } from "@blueprintjs/core"
import { get } from "lodash"
import { AxiosError } from "axios"
import Attribute from "./Attribute"

const ErrorCallout: React.FunctionComponent<{ error: AxiosError | null }> = ({
  error,
}) => {
  return (
    <Callout intent={Intent.DANGER}>
      <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
        <Attribute
          name="Code"
          value={error ? error.code : "No Error Code Provided"}
        />
        <Attribute
          name="Message"
          value={error ? error.message : "No Error Message Provided"}
        />
        <Attribute
          name="Response"
          value={get(error, ["response", "data", "error"], "")}
        />
      </div>
    </Callout>
  )
}

export default ErrorCallout


================================================
FILE: ui/src/components/FieldError.tsx
================================================
import * as React from "react"
import { Colors } from "@blueprintjs/core"

const FieldError: React.FunctionComponent = ({ children }) => (
  <div style={{ color: Colors.RED3 }}>{children}</div>
)

export default FieldError


================================================
FILE: ui/src/components/GenericMultiSelect.tsx
================================================
import * as React from "react"
import { isArray } from "lodash"
import Creatable from "react-select/lib/Creatable"
import { SelectOption, MultiSelectProps } from "../types"
import * as helpers from "../helpers/selectHelpers"

const GenericMultiSelect: React.FunctionComponent<MultiSelectProps> = props => {
  let value = props.value
  if (!isArray(props.value)) {
    value = [props.value]
  }

  return (
    <Creatable<SelectOption>
      value={value.map(helpers.stringToSelectOpt)}
      options={[]}
      onChange={option => {
        props.onChange(helpers.preprocessMultiSelectOption(option))
      }}
      isMulti
      isClearable
      styles={helpers.selectStyles}
      theme={helpers.selectTheme}
      isDisabled={props.isDisabled}
    />
  )
}

export default GenericMultiSelect


================================================
FILE: ui/src/components/GroupNameSelect.tsx
================================================
import * as React from "react"
import { get } from "lodash"
import Creatable from "react-select/lib/Creatable"
import Request, { RequestStatus } from "./Request"
import { ListGroupsResponse, SelectOption, SelectProps } from "../types"
import api from "../api"
import * as helpers from "../helpers/selectHelpers"
import { Classes, Spinner } from "@blueprintjs/core"

/**
 * GroupNameSelect lets users choose a group name for their task definition. It
 * hits the `/groups` endpoint and renders the results into a React Select
 * component. If there are no existing groups, it will render an `<input>`
 * element as a fallback.
 */
export const GroupNameSelect: React.FunctionComponent<SelectProps & {
  options: SelectOption[]
}> = props => {
  return (
    <Creatable<SelectOption>
      value={helpers.stringToSelectOpt(props.value)}
      options={props.options}
      onChange={option => {
        props.onChange(helpers.preprocessSelectOption(option))
      }}
      isClearable
      id="groupNameSelect"
      styles={helpers.selectStyles}
      theme={helpers.selectTheme}
      isDisabled={props.isDisabled}
    />
  )
}

const ConnectedGroupNameSelect: React.FunctionComponent<SelectProps> = props => (
  <Request<ListGroupsResponse, {}> requestFn={api.listGroups}>
    {({ data, requestStatus }) => {
      switch (requestStatus) {
        case RequestStatus.ERROR:
          return (
            <input
              className={Classes.INPUT}
              value={props.value}
              onChange={evt => {
                props.onChange(evt.target.value)
              }}
            />
          )
        case RequestStatus.READY:
          let options =
            get(data, "groups", []) === null ? [] : get(data, "groups", [])
          if (options === null) options = []
          return (
            <GroupNameSelect
              options={options.map(helpers.stringToSelectOpt)}
              value={props.value}
              onChange={props.onChange}
              isDisabled={props.isDisabled}
            />
          )
        case RequestStatus.NOT_READY:
        default:
          return <Spinner size={Spinner.SIZE_SMALL} />
      }
    }}
  </Request>
)

export default ConnectedGroupNameSelect


================================================
FILE: ui/src/components/ISO8601AttributeValue.tsx
================================================
import * as React from "react"
import moment from "moment"
import { Classes } from "@blueprintjs/core"

const ISO8601AttributeValue: React.FunctionComponent<{
  time: string | null | undefined
  inline?: boolean
  verbose?: boolean
}> = ({ time, inline, verbose }) => {
  return (
    <div
      style={{
        display: "flex",
        flexDirection: inline && inline === true ? "row" : "column",
        alignItems: inline && inline === true ? "flex-end" : "flex-start",
      }}
    >
      <div style={{ marginRight: inline && inline === true ? 4 : 0 }}>
        {time !== null && time !== undefined ? moment(time).fromNow() : "-"}
      </div>
      {verbose && time !== null && time !== undefined && (
        <div className={Classes.TEXT_SMALL}>{time.substr(0, 19)}</div>
      )}
    </div>
  )
}

ISO8601AttributeValue.defaultProps = {
  verbose: true,
}

export default ISO8601AttributeValue


================================================
FILE: ui/src/components/ListFiltersDropdown.tsx
================================================
import * as React from "react"
import { Button, Tooltip, Popover, Position, Card } from "@blueprintjs/core"

const ListFiltersDropdown: React.FunctionComponent<{}> = ({ children }) => (
  <Popover
    minimal
    position={Position.BOTTOM_RIGHT}
    content={<Card className="flotilla-list-filters-card">{children}</Card>}
  >
    <Tooltip content="Show Advanced Filters">
      <Button icon="filter-list" />
    </Tooltip>
  </Popover>
)

export default ListFiltersDropdown


================================================
FILE: ui/src/components/ListRequest.tsx
================================================
import * as React from "react"
import { get, isEqual, isEmpty, Omit } from "lodash"
import Request, { ChildProps as RequestChildProps } from "./Request"
import QueryParams, { ChildProps as QueryChildProps } from "./QueryParams"
import { SortOrder } from "../types"

const DEFAULT_PROPS = {
  initialQuery: { page: 1 },
}

export type Props<Response, Args> = RequestChildProps<Response, Args> &
  QueryChildProps &
  Pick<
    ConnectedProps<Response, Args>,
    "children" | "initialQuery" | "getRequestArgs"
  >

export type ChildProps<Response, Args> = Omit<
  RequestChildProps<Response, Args>,
  "request"
> & {
  updateSort: (sortKey: string) => void
  updatePage: (n: number) => void
  updateFilter: (key: string, value: any) => void
  currentPage: number
  currentSortKey: string
  currentSortOrder: SortOrder
  query: any
}

export class ListRequest<Response, Args> extends React.Component<
  Props<Response, Args>
> {
  static defaultProps = DEFAULT_PROPS

  componentDidMount() {
    // Read query to see if relevant parameters are set
    if (isEmpty(this.props.query)) {
      this.props.setQuery(this.props.initialQuery, true)
    } else {
      this.request()
    }
  }

  componentDidUpdate(prevProps: Props<Response, Args>) {
    if (!isEqual(prevProps.query, this.props.query)) {
      this.request()
    }
  }

  request() {
    const { request, getRequestArgs, query } = this.props
    request(getRequestArgs(query))
  }

  /**
   * Updates the query's `sort_by` and `order` keys.
   * @param sortKey - the key to sort by
   */
  updateSort(sortKey: string): void {
    const { query, setQuery } = this.props
    const currSortKey = get(query, "sort_by", null)

    if (currSortKey === sortKey) {
      const currSortOrder = get(query, "order", null)

      if (currSortOrder === SortOrder.ASC) {
        setQuery({
          ...this.props.query,
          page: 1,
          sort_by: sortKey,
          order: SortOrder.DESC,
        })
      } else {
        setQuery({
          ...this.props.query,
          page: 1,
          sort_by: sortKey,
          order: SortOrder.ASC,
        })
      }
    } else {
      setQuery({
        ...this.props.query,
        page: 1,
        sort_by: sortKey,
        order: SortOrder.ASC,
      })
    }
  }

  /**
   * @param n - page number
   */
  updatePage(n: number): void {
    this.props.setQuery({ ...this.props.query, page: n })
  }

  /**
   * @param key - the filter's key, e.g. "alias"
   * @param value - the filter's value, e.g. "etl" or ["a", "b"]
   */
  updateFilter(key: string, value: any): void {
    this.props.setQuery({ ...this.props.query, page: 1, [key]: value })
  }

  getChildProps(): ChildProps<Response, Args> {
    return {
      requestStatus: this.props.requestStatus,
      data: this.props.data,
      isLoading: this.props.isLoading,
      error: this.props.error,
      receivedAt: this.props.receivedAt,
      updateSort: this.updateSort.bind(this),
      updatePage: this.updatePage.bind(this),
      updateFilter: this.updateFilter.bind(this),
      currentPage: Number(get(this.props.query, "page", 1)),
      currentSortKey: get(this.props.query, "sort_by", ""),
      currentSortOrder: get(this.props.query, "order", ""),
      query: this.props.query,
    }
  }

  render() {
    return this.props.children(this.getChildProps())
  }
}

type ConnectedProps<Response, Args> = {
  children: (props: ChildProps<Response, Args>) => React.ReactNode
  requestFn: (args: Args) => Promise<Response>
  initialQuery: object
  getRequestArgs: (query: object) => Args
}

class ConnectedListRequest<Response, Args> extends React.Component<
  ConnectedProps<Response, Args>
> {
  static defaultProps = DEFAULT_PROPS
  render() {
    const { requestFn, initialQuery, getRequestArgs, children } = this.props
    return (
      <Request requestFn={requestFn} shouldRequestOnMount={false}>
        {requestProps => (
          <QueryParams>
            {({ query, setQuery }) => (
              <ListRequest
                {...requestProps}
                query={query}
                setQuery={setQuery}
                initialQuery={initialQuery}
                getRequestArgs={getRequestArgs}
              >
                {children}
              </ListRequest>
            )}
          </QueryParams>
        )}
      </Request>
    )
  }
}

export default ConnectedListRequest


================================================
FILE: ui/src/components/Log.tsx
================================================
import * as React from "react"
import { connect, ConnectedProps } from "react-redux"
import Ansi from "ansi-to-react"
import { Spinner, Pre, Classes, Tag } from "@blueprintjs/core"
import { RootState } from "../state/store"

const connector = connect((state: RootState) => state.runView)

type Props = {
  logs: string
  hasRunFinished: boolean
  isLoading: boolean
} & ConnectedProps<typeof connector>

class Log extends React.Component<Props> {
  private CONTAINER_DIV = React.createRef<HTMLDivElement>()

  componentDidMount() {
    if (this.props.shouldAutoscroll) {
      this.scrollToBottom()
    }
  }

  componentDidUpdate(prevProps: Props) {
    if (this.shouldScrollToBottom(prevProps, this.props)) {
      this.scrollToBottom()
    }
  }

  scrollToTop = (): void => {
    const container = this.CONTAINER_DIV.current

    if (container) {
      container.scrollTop = 0
    }
  }

  scrollToBottom = (): void => {
    const container = this.CONTAINER_DIV.current

    if (container) {
      container.scrollTop = container.scrollHeight
    }
  }

  shouldScrollToBottom(prev: Props, next: Props) {
    // Handle manual override.
    if (next.shouldAutoscroll === false) return false
    if (prev.logs.length !== next.logs.length) return true
  }

  render() {
    const { logs, hasRunFinished, isLoading } = this.props

    let loader = <Tag>END OF LOGS</Tag>

    if (!hasRunFinished || isLoading) {
      loader = <Spinner size={Spinner.SIZE_SMALL} />
    }

    return (
      <div ref={this.CONTAINER_DIV} className="flotilla-logs-container">
        <Pre className={`flotilla-pre ${Classes.DARK}`}>
          <Ansi linkify={false} className="flotilla-ansi">
            {logs}
          </Ansi>
        </Pre>
        <div className="flotilla-logs-loader-container">{loader}</div>
      </div>
    )
  }
}

export default connector(Log)


================================================
FILE: ui/src/components/LogProcessor.tsx
================================================
import * as React from "react"
import { get } from "lodash"
import ReactResizeDetector from "react-resize-detector"
import WebWorker from "../workers/index"
import LogWorker from "../workers/log.worker"
import { CHAR_TO_PX_RATIO } from "../constants"
import LogVirtualized from "./LogVirtualized"
import { Spinner, Callout } from "@blueprintjs/core"

type ConnectedProps = {
  logs: string
  hasRunFinished: boolean
}

type Props = ConnectedProps & {
  width: number
  height: number
}

type State = {
  isProcessing: boolean
  processedLogs: string[]
}

export class LogProcessor extends React.Component<Props, State> {
  private logWorker: any
  constructor(props: Props) {
    super(props)

    // Instantiate worker and add event listener.
    if (process.env.NODE_ENV !== "test") {
      this.logWorker = new WebWorker(LogWorker)
      this.logWorker.addEventListener("message", (evt: any) => {
        this.setState({
          processedLogs: get(evt, "data", []),
          isProcessing: false,
        })
      })
    }
  }

  state: State = {
    isProcessing: false,
    processedLogs: [],
  }

  componentDidMount() {
    this.processLogs()
  }

  componentDidUpdate(prevProps: Props) {
    // If the log length or container width change, re-process logs. Note: the
    // container height has no effect on this.
    if (
      prevProps.logs.length !== this.props.logs.length ||
      prevProps.width !== this.props.width
    ) {
      this.processLogs()
    }
  }

  /** Returns the max number of characters allowed per line. */
  getMaxLineLength = (): number =>
    Math.floor(this.props.width * CHAR_TO_PX_RATIO)

  /** Send props.logs to web worker for processing. */
  processLogs(): void {
    const { logs } = this.props

    // Early exit if running tests or no logs.
    if (process.env.NODE_ENV === "test" || logs.length === 0) return

    this.setState({ isProcessing: true })
    this.logWorker.postMessage({
      logs,
      maxLen: this.getMaxLineLength(),
    })
  }

  render() {
    const { width, height, hasRunFinished } = this.props
    let { isProcessing, processedLogs } = this.state

    processedLogs = processedLogs.map((el) => el + "\n")

    // If no existing logs and processing, return spinner.
    if (isProcessing && processedLogs.length === 0) {
      return (
        <Callout>
          <div style={{ display: "flex" }}>
            Optimizing... <Spinner size={Spinner.SIZE_SMALL} />
          </div>
        </Callout>
      )
    }

    return (
      <LogVirtualized
        logs={processedLogs}
        width={width}
        height={height}
        hasRunFinished={hasRunFinished}
      />
    )
  }
}

const Connected: React.FC<ConnectedProps> = props => (
  <ReactResizeDetector
    handleHeight
    handleWidth
    refreshMode="throttle"
    refreshRate={1000}
  >
    {({ width }: { width?: number; height?: number }) => (
      <LogProcessor
        logs={props.logs}
        hasRunFinished={props.hasRunFinished}
        width={width || 500}
        height={600}
      />
    )}
  </ReactResizeDetector>
)

export default Connected


================================================
FILE: ui/src/components/LogRequesterCloudWatchLogs.tsx
================================================
import * as React from "react"
import { has, isEmpty } from "lodash"
import { connect, ConnectedProps } from "react-redux"
import api from "../api"
import Log from "./Log"
import { RunStatus, RunLog } from "../types"
import { LOG_FETCH_INTERVAL_MS } from "../constants"
import ErrorCallout from "./ErrorCallout"
import { setHasLogs } from "../state/runView"
import { RootState } from "../state/store"
import LogProcessor from "./LogProcessor"

const connected = connect((state: RootState) => ({
  ...state.runView,
  settings: state.settings.settings,
}))

type Props = {
  status: RunStatus | undefined
  runID: string
} & ConnectedProps<typeof connected>

type State = {
  logs: string
  lastSeen: string | undefined
  isLoading: boolean
  error: any
}

const initialState: State = {
  logs: "",
  lastSeen: undefined,
  isLoading: false,
  error: false,
}

class LogRequesterCloudWatchLogs extends React.Component<Props, State> {
  private requestInterval: number | undefined
  state: State = initialState

  componentDidMount() {
    this.initialize()
  }

  componentDidUpdate(prevProps: Props) {
    if (prevProps.runID !== this.props.runID) {
      this.handleRunIDChange()
      return
    }

    // Stop request interval if run transitions from running to stopped.
    if (
      prevProps.status !== RunStatus.STOPPED &&
      this.props.status === RunStatus.STOPPED
    ) {
      this.clearRequestInterval()
    }
  }

  componentWillUnmount() {
    window.clearInterval(this.requestInterval)
  }

  setRequestInterval = (): void => {
    this.requestInterval = window.setInterval(
      this.requestLogs,
      LOG_FETCH_INTERVAL_MS
    )
  }

  clearRequestInterval = () => {
    window.clearInterval(this.requestInterval)
  }

  /**
   * Performs one initial API call to the logs endpoint and starts a request
   * interval if the run is not stopped.
   */
  initialize() {
    this.requestLogs()

    if (this.props.status !== RunStatus.STOPPED) {
      this.setRequestInterval()
    }
  }

  /**
   * Clears the request interval, resets the component state, and calls
   * this.initialize.
   */
  handleRunIDChange() {
    // Clear request interval
    this.clearRequestInterval()

    // Reset state.
    this.setState(initialState, () => {
      // Initialize, as if the component just mounted.
      this.initialize()
    })
  }

  requestLogs = () => {
    const { runID } = this.props
    const { lastSeen } = this.state

    this.setState({ isLoading: true })

    api
      .getRunLog({ runID, lastSeen })
      .then((res: RunLog) => {
        this.handleResponse(res)
      })
      .catch(error => {
        this.clearRequestInterval()
        this.setState({ isLoading: false, error })
      })
  }

  handleResponse = (res: RunLog) => {
    const PREV_LAST_SEEN = this.state.lastSeen
    this.setState(
      prev => {
        const isLoading = false
        const error = false
        const lastSeen: string | undefined = res.last_seen

        // Return if there are no logs.
        if (!has(res, "log") || isEmpty(res.log)) {
          return { ...prev, isLoading, error, lastSeen }
        }

        let logs = prev.logs

        // Append logs if necessary.
        if (res.last_seen && res.last_seen !== prev.lastSeen) {
          logs += res.log
        }

        return { ...prev, isLoading, error, logs, lastSeen }
      },
      () => {
        if (
          this.props.status === RunStatus.STOPPED &&
          (!PREV_LAST_SEEN || res.last_seen !== PREV_LAST_SEEN)
        ) {
          if (has(res, "last_seen")) {
            this.requestLogs()
          }
        }
      }
    )

    if (this.props.hasLogs === false && res.log.length > 0) {
      this.props.dispatch(setHasLogs())
    }
  }

  render() {
    const { status, settings } = this.props
    const { isLoading, error, logs } = this.state

    if (error) return <ErrorCallout error={error} />

    if (settings.USE_OPTIMIZED_LOG_RENDERER === true) {
      return (
        <LogProcessor
          logs={logs}
          hasRunFinished={status === RunStatus.STOPPED}
        />
      )
    }

    return (
      <Log
        logs={logs}
        hasRunFinished={status === RunStatus.STOPPED}
        isLoading={isLoading}
      />
    )
  }
}

export default connected(LogRequesterCloudWatchLogs)


================================================
FILE: ui/src/components/LogRequesterS3.tsx
================================================
import * as React from "react"
import { connect, ConnectedProps } from "react-redux"
import api from "../api"
import LogProcessor from "./LogProcessor"
import { RunStatus } from "../types"
import {
  LOG_FETCH_INTERVAL_MS,
  KILL_LOG_POLLING_TIMEOUT_MS,
} from "../constants"
import ErrorCallout from "./ErrorCallout"
import { RootState } from "../state/store"
import { setHasLogs, toggleIsLogRequestIntervalActive } from "../state/runView"
import Log from "./Log"

const connected = connect((state: RootState) => ({
  ...state.runView,
  settings: state.settings.settings,
}))

type Props = {
  status: RunStatus | undefined
  runID: string
} & ConnectedProps<typeof connected>

type State = {
  logs: string
  isLoading: boolean
  error: any
}

const initialState: State = {
  logs: "",
  isLoading: false,
  error: false,
}

class LogRequesterS3 extends React.PureComponent<Props, State> {
  private requestInterval: number | undefined
  private killPollingTimeout: number | undefined
  state = initialState

  componentDidMount() {
    this.initialize()
  }

  componentDidUpdate(prevProps: Props) {
    if (prevProps.runID !== this.props.runID) {
      this.handleRunIDChange()
      return
    }

    if (
      prevProps.status !== RunStatus.STOPPED &&
      this.props.status === RunStatus.STOPPED
    ) {
      // Kill the polling process after n seconds after the run transitions
      // from a non-stopped state to a stopped state.
      this.killPollingTimeout = window.setTimeout(() => {
        this.clearRequestInterval()
      }, KILL_LOG_POLLING_TIMEOUT_MS)
    }
  }

  componentWillUnmount() {
    this.props.dispatch(toggleIsLogRequestIntervalActive(false))

    if (this.requestInterval) {
      window.clearInterval(this.requestInterval)
    }

    if (this.killPollingTimeout) {
      window.clearTimeout(this.killPollingTimeout)
    }
  }

  setRequestInterval = (): void => {
    this.requestInterval = window.setInterval(
      this.requestLogs,
      LOG_FETCH_INTERVAL_MS
    )
    this.props.dispatch(toggleIsLogRequestIntervalActive(true))
  }

  clearRequestInterval = () => {
    window.clearInterval(this.requestInterval)
    this.props.dispatch(toggleIsLogRequestIntervalActive(false))
  }

  initialize() {
    this.requestLogs()

    if (this.props.status !== RunStatus.STOPPED) {
      this.setRequestInterval()
    }
  }

  handleRunIDChange() {
    // Clear request interval
    this.clearRequestInterval()

    // Reset state.
    this.setState(initialState, () => {
      // Initialize, as if the component just mounted.
      this.initialize()
    })
  }

  requestLogs = () => {
    const { runID, hasLogs } = this.props

    this.setState({ isLoading: true })

    api
      .getRunLogRaw({ runID })
      .then((logs: string) => {
        this.setState({
          isLoading: false,
          error: false,
          logs,
        })

        if (hasLogs === false && logs.length > 0) {
          this.props.dispatch(setHasLogs())
        }
      })
      .catch(error => {
        this.clearRequestInterval()
        this.setState({ isLoading: false, error })
      })
  }

  render() {
    const { status, settings } = this.props
    const { error, logs, isLoading } = this.state
    if (error) return <ErrorCallout error={error} />
    if (settings.USE_OPTIMIZED_LOG_RENDERER === true) {
      return (
        <LogProcessor
          logs={logs}
          hasRunFinished={status === RunStatus.STOPPED}
        />
      )
    }

    return (
      <Log
        logs={logs}
        hasRunFinished={status === RunStatus.STOPPED}
        isLoading={isLoading}
      />
    )
  }
}

export default connected(LogRequesterS3)


================================================
FILE: ui/src/components/LogVirtualized.tsx
================================================
import * as React from "react"
import { FixedSizeList as List } from "react-window"
import { connect, ConnectedProps } from "react-redux"
import { get } from "lodash"
import LogRow from "./LogVirtualizedRow"
import LogVirtualizedSearch from "./LogVirtualizedSearch"
import { RootState } from "../state/store"
import { Callout } from "@blueprintjs/core"

const connected = connect((state: RootState) => ({
  ...state.runView,
  settings: state.settings.settings,
}))

export type Props = {
  width: number
  height: number
  logs: string[]
  hasRunFinished: boolean
} & ConnectedProps<typeof connected>

type State = {
  isSearchProcessing: boolean
  isSearchInputFocused: boolean
  searchMatches: [number, number][] // [line number, char index]
  searchCursor: number
  searchQuery: string
}

enum KeyCode {
  F = 70,
  ESC = 27,
  ENTER = 13,
}

/** Renders the processed logs using react-window for performance. */
export class LogVirtualized extends React.Component<Props, State> {
  static defaultProps: Partial<Props> = {
    height: 0,
    logs: [],
    width: 0,
  }
  private reactWindowRef = React.createRef<List>()
  private searchInputRef = React.createRef<HTMLInputElement>()

  constructor(props: Props) {
    super(props)
    this.search = this.search.bind(this)
    this.handleCursorChange = this.handleCursorChange.bind(this)
    this.handleIncrementCursor = this.handleIncrementCursor.bind(this)
    this.handleDecrementCursor = this.handleDecrementCursor.bind(this)
    this.handleKeydown = this.handleKeydown.bind(this)
  }

  state: State = {
    isSearchProcessing: false,
    isSearchInputFocused: false,
    searchMatches: [],
    searchCursor: -1,
    searchQuery: "",
  }

  componentDidMount() {
    window.addEventListener("keydown", this.handleKeydown)

    // Scroll to the most recent log.
    if (this.props.shouldAutoscroll === true) {
      this.scrollTo(this.props.logs.length, "end")
    }
  }

  componentDidUpdate(prevProps: Props, prevState: State) {
    if (
      prevState.searchCursor !== this.state.searchCursor ||
      prevState.searchQuery !== this.state.searchQuery
    ) {
      this.handleCursorChange()
    }

    if (
      this.props.shouldAutoscroll === true &&
      prevProps.logs.length !== this.props.logs.length
    ) {
      this.scrollTo(this.props.logs.length, "end")
    }
  }

  componentWillUnmount() {
    window.removeEventListener("keydown", this.handleKeydown)
  }

  /**
   * Given a valid query (length > 0), this method will iterate through
   * this.props.logs (string[]) and push the index of the first occurence of
   * the query for each line into the `matches` array.
   */
  search(q: string): void {
    this.setState({ isSearchProcessing: true }, () => {
      let matches = []

      if (q.length > 0) {
        const { logs } = this.props

        for (let i = 0; i < logs.length; i++) {
          const line: string = logs[i]
          const firstIndex = line.indexOf(q)
          // todo: search more than first index.
          if (firstIndex > -1) {
            const m: [number, number] = [i, firstIndex]
            matches.push(m)
          }
        }
      }

      this.setState({
        searchMatches: matches,
        searchCursor: 0,
        isSearchProcessing: false,
        searchQuery: q,
      })
    })
  }

  handleCursorChange(): void {
    const { searchMatches, searchCursor } = this.state

    // If search cursor is within bounds, scroll to the item.
    if (searchCursor >= 0 && searchCursor < searchMatches.length) {
      const lineNumber = get(searchMatches, [searchCursor, 0], 0)
      this.scrollTo(lineNumber, "center")
    }
  }

  handleIncrementCursor(): void {
    if (this.state.searchMatches.length > 0) {
      this.setState(prev => ({
        searchCursor:
          prev.searchCursor === this.state.searchMatches.length - 1
            ? 0
            : prev.searchCursor + 1,
      }))
    }
  }

  handleDecrementCursor(): void {
    if (this.state.searchMatches.length > 0) {
      this.setState(prev => ({
        searchCursor:
          prev.searchCursor === 0
            ? this.state.searchMatches.length - 1
            : prev.searchCursor - 1,
      }))
    }
  }

  handleKeydown(evt: KeyboardEvent) {
    const { settings } = this.props
    const { isSearchInputFocused } = this.state

    if (settings.SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW === false) return

    // If the search component is visible and the user hits the escape key,
    // reset search state (hide input, reset matches to an empty array, etc.)
    if (evt.keyCode === KeyCode.ESC && isSearchInputFocused) {
      this.resetSearchState()
      return
    }

    // Handle cmd-f.
    if (evt.keyCode === KeyCode.F && evt.metaKey) {
      evt.preventDefault()
      evt.stopPropagation()
      this.searchInputFocus()
      return
    }

    // If search input is focused and the enter key is pressed, jump to the
    // next search match.
    if (evt.keyCode === KeyCode.ENTER && isSearchInputFocused) {
      this.handleIncrementCursor()
      return
    }
  }

  resetSearchState(): void {
    this.setState({
      isSearchProcessing: false,
      isSearchInputFocused: false,
      searchMatches: [],
      searchCursor: 0,
    })
  }

  searchInputFocus() {
    if (this.searchInputRef.current) {
      this.searchInputRef.current.focus()
    }
  }

  scrollTo(
    line: number,
    align?: "auto" | "smart" | "center" | "end" | "start" | undefined
  ) {
    const listRef = this.reactWindowRef.current
    if (listRef) {
      listRef.scrollToItem(line, align)
    }
  }

  render() {
    const {
      width,
      height,
      logs,
      hasRunFinished,
      hasLogs,
      isLogRequestIntervalActive,
    } = this.props
    const { searchMatches, searchCursor } = this.state

    if (hasLogs === false && isLogRequestIntervalActive === true) {
      return (
        <Callout>
          <div style={{ display: "flex" }}>No logs</div>
        </Callout>
      )
    }

    return (
      <div className="flotilla-logs-virtualized-container">
        <LogVirtualizedSearch
          onChange={this.search}
          searchQuery={this.state.searchQuery}
          onFocus={() => {
            this.setState({ isSearchInputFocused: true })
          }}
          onBlur={() => {
            this.setState({ isSearchInputFocused: false })
          }}
          onIncrement={this.handleIncrementCursor}
          onDecrement={this.handleDecrementCursor}
          inputRef={this.searchInputRef}
          cursorIndex={searchCursor}
          totalMatches={searchMatches.length}
          isSearchProcessing={this.state.isSearchProcessing}
        />
        <div className="flotilla-logs-container">
          <List
            ref={this.reactWindowRef}
            height={height}
            itemCount={logs.length + 1}
            itemData={{
              lines: logs,
              searchMatches,
              searchCursor,
              hasRunFinished,
            }}
            itemSize={24}
            width={width}
            overscanCount={100}
          >
            {LogRow}
          </List>
        </div>
      </div>
    )
  }
}

export default connected(LogVirtualized)


================================================
FILE: ui/src/components/LogVirtualizedRow.tsx
================================================
import * as React from "react"
import Ansi from "ansi-to-react"
import { get } from "lodash"
import { ListChildComponentProps } from "react-window"
import { Pre, Classes, Colors, Tag, Spinner } from "@blueprintjs/core"

const LogVirtualizedRow: React.FC<ListChildComponentProps> = props => {
  const { index, style, data } = props
  const lines: string[] = get(data, "lines", [])
  const hasRunFinished: boolean = get(data, "hasRunFinished", false)
  const searchMatches: [number, number][] = get(data, "searchMatches", [])
  const searchCursor: number = get(data, "searchCursor", 0)
  const searchCursorLineNumber = get(searchMatches, [searchCursor, 0], null)

  // Note: the last item will be a spinner or a tag indicating the end of logs.
  if (index === lines.length) {
    if (hasRunFinished) {
      return (
        <div style={style}>
          <Tag>END OF LOGS</Tag>
        </div>
      )
    }

    return (
      <div style={style}>
        <Spinner size={Spinner.SIZE_SMALL} />
      </div>
    )
  }

  return (
    <Pre
      className={`flotilla-pre ${Classes.DARK}`}
      style={{
        ...style,
        color: searchCursorLineNumber === index ? Colors.GOLD5 : "",
      }}
    >
      <Ansi className="flotilla-ansi" linkify={false}>
        {lines[index]}
      </Ansi>
    </Pre>
  )
}

export default LogVirtualizedRow


================================================
FILE: ui/src/components/LogVirtualizedSearch.tsx
================================================
import * as React from "react"
import { ButtonGroup, Button, Spinner } from "@blueprintjs/core"

type Props = {
  onChange: (value: string) => void
  onFocus: () => void
  onBlur: () => void
  onIncrement: () => void
  onDecrement: () => void
  inputRef: React.Ref<HTMLInputElement> | null
  cursorIndex: number
  totalMatches: number
  isSearchProcessing: boolean
  searchQuery: string
}

const LogVirtualizedSearch: React.FC<Props> = ({
  onChange,
  onFocus,
  onBlur,
  inputRef,
  onIncrement,
  onDecrement,
  cursorIndex,
  totalMatches,
  isSearchProcessing,
  searchQuery,
}) => (
  <div className="flotilla-logs-virtualized-search-container">
    <input
      onChange={evt => {
        onChange(evt.target.value)
      }}
      className="bp3-input flotilla-logs-virtualized-search-input"
      ref={inputRef}
      onFocus={onFocus}
      onBlur={onBlur}
      placeholder="Search..."
      value={searchQuery}
    />
    {isSearchProcessing ? (
      <Spinner size={Spinner.SIZE_SMALL} />
    ) : (
      totalMatches > 0 && (
        <div className="flotilla-logs-virtualized-search-info">
          {cursorIndex + 1}/{totalMatches}
        </div>
      )
    )}
    <ButtonGroup>
      <Button
        icon="chevron-left"
        onClick={onDecrement}
        minimal
        disabled={totalMatches === 0}
      />
      <Button
        icon="chevron-right"
        onClick={onIncrement}
        minimal
        disabled={totalMatches === 0}
      />
    </ButtonGroup>
  </div>
)

export default LogVirtualizedSearch


================================================
FILE: ui/src/components/Navigation.tsx
================================================
import * as React from "react"
import { Link, NavLink } from "react-router-dom"
import {
  ButtonGroup,
  Navbar,
  NavbarDivider,
  NavbarGroup,
  Alignment,
  Classes,
  Tag,
  Intent,
} from "@blueprintjs/core"
import SettingsButton from "./SettingsButton"

const Navigation: React.FunctionComponent = () => (
  <Navbar fixedToTop className="bp3-dark">
    <NavbarGroup align={Alignment.LEFT}>
      <Link to="/tasks" className="bp3-button bp3-minimal">
        Flotilla
      </Link>
      <NavbarDivider />
      <ButtonGroup className={Classes.MINIMAL}>
        <NavLink
          to="/tasks"
          className={Classes.BUTTON}
          activeClassName={Classes.ACTIVE}
        >
          Tasks
        </NavLink>
        <NavLink
          to="/templates"
          className={Classes.BUTTON}
          activeClassName={Classes.ACTIVE}
        >
          <span>Templates</span>
          <Tag intent={Intent.DANGER}>New!</Tag>
        </NavLink>
        <NavLink
          to="/runs"
          className={Classes.BUTTON}
          activeClassName={Classes.ACTIVE}
        >
          Runs
        </NavLink>
      </ButtonGroup>
    </NavbarGroup>
    <NavbarGroup align={Alignment.RIGHT}>
      <ButtonGroup>
        <SettingsButton />
      </ButtonGroup>
    </NavbarGroup>
  </Navbar>
)

export default Navigation


================================================
FILE: ui/src/components/NodeLifecycleSelect.tsx
================================================
import * as React from "react"
import Select from "react-select"
import { SelectOption, SelectProps, NodeLifecycle } from "../types"
import * as helpers from "../helpers/selectHelpers"

/**
 * NodeLifecycleSelect
 */
export const NodeLifecycleSelect: React.FunctionComponent<SelectProps & {
  options: SelectOption[]
}> = props => {
  return (
    <Select<SelectOption>
      value={helpers.stringToSelectOpt(props.value)}
      options={[
        { label: NodeLifecycle.SPOT, value: NodeLifecycle.SPOT },
        { label: NodeLifecycle.ON_DEMAND, value: NodeLifecycle.ON_DEMAND },
      ]}
      isClearable
      onChange={option => {
        props.onChange(helpers.preprocessSelectOption(option))
      }}
      styles={helpers.selectStyles}
      theme={helpers.selectTheme}
      isDisabled={props.isDisabled}
    />
  )
}

export default NodeLifecycleSelect


================================================
FILE: ui/src/components/Pagination.tsx
================================================
import * as React from "react"
import { Button, ButtonGroup } from "@blueprintjs/core"

export type Props = {
  updatePage: (n: number) => void
  currentPage: number
  numItems: number
  pageSize: number
  isLoading: boolean
}

const Pagination: React.FunctionComponent<Props> = ({
  numItems,
  pageSize,
  updatePage,
  currentPage,
  isLoading,
}) => {
  const isFirstPage = currentPage === 1
  const isLastPage = currentPage * pageSize >= numItems
  return (
    <ButtonGroup>
      <Button
        onClick={() => {
          updatePage(currentPage - 1)
        }}
        disabled={isFirstPage || isLoading}
        loading={isLoading}
        icon="chevron-left"
      />
      <Button
        onClick={() => {
          updatePage(currentPage + 1)
        }}
        disabled={isLastPage || isLoading}
        loading={isLoading}
        icon="chevron-right"
      />
    </ButtonGroup>
  )
}

export default Pagination


================================================
FILE: ui/src/components/QueryParams.tsx
================================================
import * as React from "react"
import * as qs from "qs"
import { withRouter, RouteComponentProps } from "react-router-dom"

type Props = RouteComponentProps & {
  children: (props: ChildProps) => React.ReactNode
}

export type ChildProps = {
  query: object
  setQuery: (query: object, shouldReplace?: boolean) => void
}

export class QueryParams extends React.Component<Props> {
  setQuery(query: object, shouldReplace?: boolean): void {
    const { history } = this.props

    if (shouldReplace === true) {
      history.replace({ search: qs.stringify(query, { indices: false }) })
    } else {
      history.push({ search: qs.stringify(query, { indices: false }) })
    }
  }

  getQuery(): object {
    const { location } = this.props

    if (location.search.length > 0) {
      return qs.parse(location.search.substr(1))
    }

    return {}
  }

  getChildProps(): ChildProps {
    return {
      query: this.getQuery(),
      setQuery: this.setQuery.bind(this),
    }
  }

  render() {
    return this.props.children(this.getChildProps())
  }
}

export default withRouter(QueryParams)


================================================
FILE: ui/src/components/Request.tsx
================================================
import * as React from "react"
import { AxiosError } from "axios"

export enum RequestStatus {
  READY = "READY",
  NOT_READY = "NOT_READY",
  ERROR = "ERROR",
}

export type Props<ResponseType, ArgsType> = {
  children: (props: ChildProps<ResponseType, ArgsType>) => React.ReactNode
  requestFn: (args: ArgsType) => Promise<ResponseType>
  initialRequestArgs: ArgsType
  shouldRequestOnMount: boolean
  onSuccess?: (res: ResponseType) => void
  onFailure?: (error: any) => void
}

export type State<ResponseType> = {
  requestStatus: RequestStatus
  data: ResponseType | null
  isLoading: boolean
  error: AxiosError | null
  receivedAt: Date | null
}

export type ChildProps<ResponseType, ArgsType> = State<ResponseType> & {
  request: (opts: ArgsType) => void
}

class Request<ResponseType, ArgsType> extends React.Component<
  Props<ResponseType, ArgsType>,
  State<ResponseType>
> {
  static defaultProps = {
    shouldRequestOnMount: true,
    initialRequestArgs: null,
  }

  state = {
    requestStatus: RequestStatus.NOT_READY,
    data: null,
    isLoading: false,
    error: null,
    receivedAt: null,
  }

  componentDidMount() {
    if (this.props.shouldRequestOnMount) {
      this.request(this.props.initialRequestArgs)
    }
  }

  request(args: ArgsType): void {
    const { requestFn, onSuccess, onFailure } = this.props

    this.setState({ isLoading: true })

    requestFn(args)
      .then((data: ResponseType) => {
        this.setState({
          data,
          isLoading: false,
          requestStatus: RequestStatus.READY,
          error: null,
          receivedAt: new Date(),
        })
        if (onSuccess) onSuccess(data)
      })
      .catch((error: AxiosError) => {
        this.setState({
          isLoading: false,
          requestStatus: RequestStatus.ERROR,
          error,
        })
        if (onFailure) onFailure(error)
      })
  }

  getChildProps = () => ({
    ...this.state,
    request: this.request.bind(this),
  })

  render() {
    return this.props.children(this.getChildProps())
  }
}

export default Request


================================================
FILE: ui/src/components/ResourceUsageValue.tsx
================================================
import { Tooltip, Colors } from "@blueprintjs/core"


const isLessThanPct = (x: number, y: number, pct: number): boolean => {
    if (x < pct * y) return true
    return false
  }
  
const ResourceUsageValue: React.FC<{
    requested: number | undefined | null
    actual: number | undefined | null
    requestedName: string
    actualName: string
  }> = ({ requested, actual, requestedName, actualName }) => {
    if (!requested) {
      return <span>-</span>
    }
  
    if (!actual) {
      return <span>{requested}</span>
    }
  
    return (
      <div>
        <Tooltip content={actualName}>
          <span
            style={{
              color:
                actual && isLessThanPct(actual, requested, 0.5)
                  ? Colors.RED5
                  : "",
            }}
          >
            {actual}
          </span>
        </Tooltip>{" "}
        / <Tooltip content={requestedName}>{requested}</Tooltip>
      </div>
    )
  }

  export default ResourceUsageValue

================================================
FILE: ui/src/components/Run.tsx
================================================
import * as React from "react"
import { connect, ConnectedProps } from "react-redux"
import { get } from "lodash"
import { Link, RouteComponentProps } from "react-router-dom"
import {
  Card,
  Spinner,
  Classes,
  Button,
  Icon,
  Tabs,
  Tab,
  Tooltip,
  Callout,
  Intent,
} from "@blueprintjs/core"
import Request, {
  ChildProps as RequestChildProps,
  RequestStatus,
} from "./Request"
import api from "../api"
import {
  Run as RunShape,
  RunStatus,
  ExecutionEngine,
  RunTabId,
  ExecutableType,
  EnhancedRunStatusEmojiMap,
  EnhancedRunStatus,
} from "../types"
import ViewHeader from "./ViewHeader"
import StopRunButton from "./StopRunButton"
import { RUN_FETCH_INTERVAL_MS } from "../constants"
import Toggler from "./Toggler"
import LogRequesterCloudWatchLogs from "./LogRequesterCloudWatchLogs"
import LogRequesterS3 from "./LogRequesterS3"
import RunEvents from "./RunEvents"
import QueryParams, { ChildProps as QPChildProps } from "./QueryParams"
import { RUN_TAB_ID_QUERY_KEY } from "../constants"
import Attribute from "./Attribute"
import RunTag from "./RunTag"
import Duration from "./Duration"
import ErrorCallout from "./ErrorCallout"
import RunSidebar from "./RunSidebar"
import Helmet from "react-helmet"
import AutoscrollSwitch from "./AutoscrollSwitch"
import { RootState } from "../state/store"
import CloudtrailRecords from "./CloudtrailRecords"
import getEnhancedRunStatus from "../helpers/getEnhancedRunStatus"

const connected = connect((state: RootState) => state.runView)

export type Props = QPChildProps &
  RequestChildProps<RunShape, { runID: string }> & {
    runID: string
  } & ConnectedProps<typeof connected>

export class Run extends React.Component<Props> {
  requestIntervalID: number | undefined

  constructor(props: Props) {
    super(props)
    this.request = this.request.bind(this)
  }

  componentDidMount() {
    const { data } = this.props

    // If data has been fetched and the run hasn't stopped, start polling.
    if (data && data.status !== RunStatus.STOPPED) this.setRequestInterval()
  }

  componentDidUpdate(prevProps: Props) {
    if (
      prevProps.requestStatus === RequestStatus.NOT_READY &&
      this.props.requestStatus === RequestStatus.READY &&
      this.props.data &&
      this.props.data.status !== RunStatus.STOPPED
    ) {
      // If the RequestStatus transitions from NOT_READY to READY and the run
      // isn't stopped, start polling.
      this.setRequestInterval()
    }

    if (this.props.data && this.props.data.status === RunStatus.STOPPED) {
      // If the Run transitions to a STOPPED state, stop polling.
      this.clearRequestInterval()
    }
  }

  componentWillUnmount() {
    window.clearInterval(this.requestIntervalID)
  }

  request() {
    const { isLoading, error, request, runID } = this.props
    if (isLoading === true || error !== null) return
    request({ runID })
  }

  setRequestInterval() {
    this.requestIntervalID = window.setInterval(
      this.request,
      RUN_FETCH_INTERVAL_MS
    )
  }

  clearRequestInterval() {
    window.clearInterval(this.requestIntervalID)
  }

  getActiveTabId(): RunTabId {
    const { data, query, hasLogs } = this.props
    const queryTabId: RunTabId | null = get(query, RUN_TAB_ID_QUERY_KEY, null)

    if (queryTabId === null) {
      if (hasLogs === true) {
        return RunTabId.LOGS
      }

      if (
        data &&
        data.engine === ExecutionEngine.EKS &&
        data.status !== RunStatus.STOPPED
      ) {
        return RunTabId.EVENTS
      }

      return RunTabId.LOGS
    }

    return queryTabId
  }

  setActiveTabId(id: RunTabId): void {
    this.props.setQuery({ [RUN_TAB_ID_QUERY_KEY]: id })
  }

  getExecutableLinkName(): string {
    const { data } = this.props
    if (data) {
      switch (data.executable_type) {
        case ExecutableType.ExecutableTypeDefinition:
          return data.alias
        case ExecutableType.ExecutableTypeTemplate:
          return data.executable_id
      }
    }
    return ""
  }

  getExecutableLinkURL(): string {
    const { data } = this.props
    if (data) {
      switch (data.executable_type) {
        case ExecutableType.ExecutableTypeDefinition:
          return `/tasks/${data.definition_id}`
        case ExecutableType.ExecutableTypeTemplate:
          return `/templates/${data.executable_id}`
      }
    }
    return ""
  }

  render() {
    const { data, requestStatus, runID, error } = this.props

    switch (requestStatus) {
      case RequestStatus.ERROR:
        return <ErrorCallout error={error} />
      case RequestStatus.READY:
        if (data) {
          const cloudtrailRecords = get(
            data,
            ["cloudtrail_notifications", "Records"],
            null
          )
          const hasCloudtrailRecords = cloudtrailRecords !== null
          let btn: React.ReactNode = null

          if (data.status === RunStatus.STOPPED) {
            btn = (
              <Link
                className={Classes.BUTTON}
                to={{
                  pathname: `${this.getExecutableLinkURL()}/execute`,
                  state: data,
                }}
              >
                <div className="bp3-button-text">Retry</div>
                <Icon icon="repeat" />
              </Link>
            )
          } else {
            btn = (
              <StopRunButton runID={runID} definitionID={data.definition_id} />
            )
          }

          return (
            <Toggler>
              {metadataVisibility => (
                <>
                  <ViewHeader
                    leftButton={
                      <Button
                        onClick={metadataVisibility.toggleVisibility}
                        icon={
                          metadataVisibility.isVisible
                            ? "menu-closed"
                            : "menu-open"
                        }
                        style={{ marginRight: 12 }}
                      >
                        {metadataVisibility.isVisible ? "Hide" : "Show"}
                      </Button>
                    }
                    breadcrumbs={[
                      {
                        text: this.getExecutableLinkName(),
                        href: this.getExecutableLinkURL(),
                      },
                      {
                        text: data.run_id,
                        href: `/runs/${data.run_id}`,
                      },
                    ]}
                    buttons={btn}
                  />
                  <div className="flotilla-sidebar-view-container">
                    {metadataVisibility.isVisible && <RunSidebar data={data} />}
                    <div className="flotilla-sidebar-view-content">
                      <Card style={{ marginBottom: 12 }}>
                        <div className="flotilla-attributes-container flotilla-attributes-container-horizontal">
                          <Attribute
                            name="Status"
                            value={<RunTag {...data} />}
                          />
                          <Attribute
                            name="Duration"
                            value={
                              data.started_at && (
                                <Duration
                                  start={data.started_at}
                                  end={data.finished_at}
                                  isActive={data.status !== RunStatus.STOPPED}
                                />
                              )
                            }
                          />
                          <Attribute name="Exit Code" value={data.exit_code} />
                          <Attribute
                            name="Exit Reason"
                            value={data.exit_reason || "-"}
                          />
                          <Attribute
                            name="Autoscroll"
                            value={<AutoscrollSwitch />}
                          />
                        </div>
                      </Card>
                      <Tabs
                        selectedTabId={this.getActiveTabId()}
                        onChange={id => {
                          this.setActiveTabId(id as RunTabId)
                        }}
                      >
                        <Tab
                          id={RunTabId.LOGS}
                          title="Container Logs"
                          panel={
                            data.engine === ExecutionEngine.EKS ? (
                              <LogRequesterS3
                                runID={data.run_id}
                                status={data.status}
                              />
                            ) : (
                              <LogRequesterCloudWatchLogs
                                runID={data.run_id}
                                status={data.status}
                              />
                            )
                          }
                        />
                        <Tab
                          id={RunTabId.EVENTS}
                          title={
                            data.engine !== ExecutionEngine.EKS ? (
                              <Tooltip content="Run events are only available for tasks run on EKS.">
                                EKS Pod Events
                              </Tooltip>
                            ) : (
                              "EKS Pod Events"
                            )
                          }
                          panel={
                            <RunEvents
                              runID={data.run_id}
                              status={data.status}
                              hasLogs={this.props.hasLogs}
                            />
                          }
                          disabled={data.engine !== ExecutionEngine.EKS}
                        />
                        <Tab
                          id={RunTabId.CLOUDTRAIL}
                          title={
                            data.engine !== ExecutionEngine.EKS ? (
                              <Tooltip content="Cloudtrail records are only available for tasks run on EKS.">
                                Cloudtrail Records
                              </Tooltip>
                            ) : (
                              `EKS Cloudtrail Records (${
                                hasCloudtrailRecords
                                  ? get(
                                      data,
                                      ["cloudtrail_notifications", "Records"],
                                      []
                                    ).length
                                  : 0
                              })`
                            )
                          }
                          panel={
                            <CloudtrailRecords data={cloudtrailRecords || []} />
                          }
                          disabled={
                            data.engine !== ExecutionEngine.EKS ||
                            hasCloudtrailRecords === false
                          }
                        />
                      </Tabs>
                    </div>
                  </div>
                </>
              )}
            </Toggler>
          )
        }
        return <Callout title="Run not found" intent={Intent.WARNING} />
      case RequestStatus.NOT_READY:
      default:
        return <Spinner />
    }
  }
}

const ReduxConnectedRun = connected(Run)

const Connected: React.FunctionComponent<RouteComponentProps<{
  runID: string
}>> = ({ match }) => (
  <QueryParams>
    {({ query, setQuery }) => (
      <Request<RunShape, { runID: string }>
        requestFn={api.getRun}
        initialRequestArgs={{ runID: match.params.runID }}
      >
        {props => (
          <>
            <Helmet>
              <title>
                {`${
                  props.data
                    ? EnhancedRunStatusEmojiMap.get(
                        getEnhancedRunStatus(props.data) as EnhancedRunStatus
                      )
                    : ""
                }
                ${match.params.runID}`}
              </title>
            </Helmet>
            <ReduxConnectedRun
              {...props}
              runID={match.params.runID}
              query={query}
              setQuery={setQuery}
            />
          </>
        )}
      </Request>
    )}
  </QueryParams>
)

export default Connected


================================================
FILE: ui/src/components/RunAttributes.tsx
================================================
import * as React from "react"
import { Card, Pre, Tag } from "@blueprintjs/core"
import { Run, ExecutionEngine } from "../types"
import Attribute from "./Attribute"
import ISO8601AttributeValue from "./ISO8601AttributeValue"

const RunAttributes: React.FC<{ data: Run }> = ({ data }) => (
  <Card style={{ marginBottom: 12 }}>
    <div
      className="flotilla-attributes-container flotilla-attributes-container-horizontal"
      style={{ marginBottom: 12 }}
    >
      <Attribute name="Engine Type" value={<Tag>{data.engine}</Tag>} />
      {data.engine !== ExecutionEngine.EKS && (
        <Attribute name="Cluster" value={data.cluster} />
      )}
      <Attribute
        name="Node Lifecycle"
        value={<Tag>{data.node_lifecycle || "-"}</Tag>}
      />
    </div>
    <div className="flotilla-form-section-divider" />


      <div className="flotilla-attributes-container flotilla-attributes-container-horizontal">
        {data.max_cpu_used && <Attribute
          name="CPU Used"
          value={`${data.max_cpu_used} units`}
        />}
        <Attribute
          name="CPU Requested"
          value={data.cpu ? `${data.cpu} units` : ""}
        />
        <Attribute
          name="CPU Limit"
          value={data.cpu_limit ? `${data.cpu_limit} units` : ""}
        />
      </div>

      <div className="flotilla-form-section-divider" />

      <div className="flotilla-attributes-container flotilla-attributes-container-horizontal">
        {data.max_memory_used && <Attribute
          name="Memory Used"
          value={`${data.max_memory_used} MB`}
        />}
        <Attribute
          name="Memory Requested"
          value={`${data.memory} MB`}
        />
        <Attribute
          name="Memory Limit"
          value={`${data.memory_limit} MB`}
        />
      </div>


    {data.gpu && (
      <div
        className="flotilla-attributes-container flotilla-attributes-container-horizontal"
        style={{ marginBottom: 12 }}
      >
        <Attribute name="GPU Count" value={data.gpu || 0} />
      </div>
    )}
    <div className="flotilla-form-section-divider" />
    <div
      className="flotilla-attributes-container flotilla-attributes-container-horizontal"
      style={{ marginBottom: 12 }}
    >
      <Attribute
        name="Queued At"
        value={<ISO8601AttributeValue time={data.queued_at} />}
      />
      <Attribute
        name="Started At"
        value={<ISO8601AttributeValue time={data.started_at} />}
      />
      <Attribute
        name="Finished At"
        value={<ISO8601AttributeValue time={data.finished_at} />}
      />
    </div>
    <div className="flotilla-form-section-divider" />
    <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
      <Attribute
        name="Run ID"
        value={data.run_id}
        isCopyable
        rawValue={data.run_id}
      />
      <Attribute
        name="Definition ID"
        value={data.definition_id}
        isCopyable
        rawValue={data.definition_id}
      />
      <Attribute name="Image" value={data.image} />
      <Attribute
        name="Command"
        value={
          data.command ? (
            <Pre className="flotilla-pre">
              {data.command.replace(/\n(\s)+/g, "\n")}
            </Pre>
          ) : (
              "Existing task definition command was used."
            )
        }
      />
    </div>
  </Card>
)

export default RunAttributes


================================================
FILE: ui/src/components/RunDebugAttributes.tsx
================================================
import * as React from "react"
import { Card, Icon } from "@blueprintjs/core"
import urljoin from "url-join"
import { Run, ExecutionEngine } from "../types"
import Attribute from "./Attribute"

const createS3LogsUrl = (runID: string): string => {
  const prefix = process.env.REACT_APP_S3_BUCKET_PREFIX || ""
  return urljoin(prefix, "logs", runID, "/")
}

const createEC2Url = (dns: string): string => {
  const prefix = process.env.REACT_APP_EC2_INSTANCE_URL_PREFIX || ""
  return urljoin(prefix, dns)
}

const createS3ManifestUrl = (runID: string): string => {
  const prefix = process.env.REACT_APP_S3_OBJECT_PREFIX || ""
  return urljoin(prefix, "manifests", runID, `${runID}.yaml`)
}

const RunDebugAttributes: React.FC<{ data: Run }> = ({ data }) => (
  <Card style={{ marginTop: 12 }}>
    <div className="flotilla-card-header-container">
      <div className="flotilla-card-header">EKS Debug</div>
    </div>
    <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
      {data.cluster && <Attribute name="EKS Cluster" value={data.cluster} />}
      {data.pod_name && <Attribute name="EKS Pod Name" value={data.pod_name} />}
      {data.attempt_count && <Attribute name="EKS Pod Attempt Count" value={data.attempt_count} />}
      {data.engine === ExecutionEngine.EKS && (
        <Attribute
          name="EKS S3 Logs"
          value={
            <a
              href={createS3LogsUrl(data.run_id)}
              target="_blank"
              rel="noopener noreferrer"
            >
              Link
              <Icon
                icon="share"
                style={{ marginLeft: 4, transform: "translateY(-2px)" }}
                iconSize={12}
              />
            </a>
          }
        />
      )}
      {data.instance.dns_name && (
        <Attribute
          name="EC2 Instance"
          value={
            <a
              href={createEC2Url(data.instance.dns_name)}
              target="_blank"
              rel="noopener noreferrer"
            >
              {data.instance.dns_name}
              <Icon
                icon="share"
                style={{ marginLeft: 4, transform: "translateY(-2px)" }}
                iconSize={12}
              />
            </a>
          }
        />
      )}
      {data.engine === ExecutionEngine.EKS && (
        <Attribute
          name="EKS Manifest"
          value={
            <a
              href={createS3ManifestUrl(data.run_id)}
              target="_blank"
              rel="noopener noreferrer"
            >
              Link
              <Icon
                icon="share"
                style={{ marginLeft: 4, transform: "translateY(-2px)" }}
                iconSize={12}
              />
            </a>
          }
        />
      )}
    </div>
  </Card>
)

export default RunDebugAttributes


================================================
FILE: ui/src/components/RunEvents.tsx
================================================
import * as React from "react"
import { RunStatus, RunTabId } from "../types"
import Request, { RequestStatus } from "./Request"
import api from "../api"
import { ListRunEventsResponse } from "../types"
import ErrorCallout from "./ErrorCallout"
import { Spinner, Callout, Card, Tag, Button, Intent } from "@blueprintjs/core"
import QueryParams from "./QueryParams"
import { RUN_TAB_ID_QUERY_KEY } from "../constants"

type Props = {
  runID: string
  status: RunStatus
  hasLogs: boolean
}

const RunEvents: React.FC<Props> = ({ runID, status, hasLogs }) => (
  <QueryParams>
    {({ setQuery }) => (
      <Request<ListRunEventsResponse, string>
        requestFn={api.listRunEvents}
        initialRequestArgs={runID}
      >
        {({ data, requestStatus, isLoading, error }) => {
          switch (requestStatus) {
            case RequestStatus.ERROR:
              return <ErrorCallout error={error} />
            case RequestStatus.READY:
              let viewLogsCallout = (
                <Callout
                  intent={Intent.PRIMARY}
                  title="Logs Available!"
                  style={{ marginTop: 24 }}
                >
                  <Button
                    intent={Intent.PRIMARY}
                    onClick={() => {
                      setQuery({ [RUN_TAB_ID_QUERY_KEY]: RunTabId.LOGS })
                    }}
                  >
                    View Logs
                  </Button>
                </Callout>
              )
              if (data && data.pod_events !== null) {
                return (
                  <>
                    <div>
                      {data.pod_events.map((evt, i) => (
                        <Card style={{ marginBottom: 12 }} key={i}>
                          <div className="flotilla-card-header-container">
                            <div className="flotilla-card-header">
                              {evt.timestamp} <Tag>{evt.reason}</Tag>
                            </div>
                          </div>
                          {evt.message}
                        </Card>
                      ))}
                    </div>
                    {hasLogs && viewLogsCallout}
                  </>
                )
              }
              return (
                <>
                  <Callout>No events found.</Callout>
                  {hasLogs && viewLogsCallout}
                </>
              )
            case RequestStatus.NOT_READY:
            default:
              return <Spinner />
          }
        }}
      </Request>
    )}
  </QueryParams>
)

export default RunEvents


================================================
FILE: ui/src/components/RunSidebar.tsx
================================================
import * as React from "react"
import { get } from "lodash"
import { Card } from "@blueprintjs/core"
import JsonView from "react-json-view"
import { ExecutionEngine, Run, ExecutableType } from "../types"
import EnvList from "./EnvList"
import RunAttributes from "./RunAttributes"
import RunDebugAttributes from "./RunDebugAttributes"
import { JSON_VIEW_PROPS } from "../constants"

const RunSidebar: React.FC<{ data: Run }> = ({ data }) => {
  const templatePayload = get(
    data,
    ["execution_request_custom", "template_payload"],
    {}
  )

  return (
    <div className="flotilla-sidebar-view-sidebar">
      {data && data.executable_type === ExecutableType.ExecutableTypeTemplate && (
        <Card style={{ marginBottom: 12 }}>
          <div className="flotilla-card-header-container">
            <div className="flotilla-card-header">Template Payload</div>
          </div>
          <JsonView {...JSON_VIEW_PROPS} src={templatePayload} />
        </Card>
      )}
      <RunAttributes data={data} />
      <Card>
        <div className="flotilla-card-header-container">
          <div className="flotilla-card-header">Environment Variables</div>
        </div>
        <EnvList env={data.env} />
      </Card>
      {data && data.engine === ExecutionEngine.EKS && (
        <RunDebugAttributes data={data} />
      )}
    </div>
  )
}

export default RunSidebar


================================================
FILE: ui/src/components/RunStatusSelect.tsx
================================================
import * as React from "react"
import { isArray } from "lodash"
import Select from "react-select"
import { SelectOption, MultiSelectProps, RunStatus } from "../types"
import * as helpers from "../helpers/selectHelpers"

const RunStatusSelect: React.FunctionComponent<MultiSelectProps> = props => {
  let v: SelectOption[]
  if (!isArray(props.value)) {
    v = [helpers.stringToSelectOpt(props.value)]
  } else {
    v = props.value.map(helpers.stringToSelectOpt)
  }
  return (
    <Select<SelectOption>
      value={v}
      options={[
        { label: RunStatus.PENDING, value: RunStatus.PENDING },
        { label: RunStatus.QUEUED, value: RunStatus.QUEUED },
        { label: RunStatus.RUNNING, value: RunStatus.RUNNING },
      ]}
      onChange={option => {
        props.onChange(helpers.preprocessMultiSelectOption(option))
      }}
      isMulti
      styles={helpers.selectStyles}
      theme={helpers.selectTheme}
      isDisabled={props.isDisabled}
    />
  )
}

export default RunStatusSelect


================================================
FILE: ui/src/components/RunTag.tsx
================================================
import * as React from "react"
import { Run } from "../types"
import { Tag, Colors } from "@blueprintjs/core"
import { RUN_STATUS_COLOR_MAP } from "../constants"
import getEnhancedRunStatus from "../helpers/getEnhancedRunStatus"

const RunTag: React.FunctionComponent<Run> = run => {
  const enhancedStatus = getEnhancedRunStatus(run)

  return (
    <Tag
      style={{
        color: Colors.WHITE,
        fontWeight: 500,
        background: RUN_STATUS_COLOR_MAP.get(enhancedStatus) || "",
      }}
    >
      {enhancedStatus}
    </Tag>
  )
}

export default RunTag


================================================
FILE: ui/src/components/Runs.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { get, omit, isArray, isString } from "lodash"
import { DebounceInput } from "react-debounce-input"
import ListRequest, { ChildProps as ListRequestChildProps } from "./ListRequest"
import api from "../api"
import {
  ListRunParams,
  ListRunResponse,
  SortOrder,
  Run,
  RunStatus,
} from "../types"
import pageToOffsetLimit from "../helpers/pageToOffsetLimit"
import Table from "./Table"
import ViewHeader from "./ViewHeader"
import ListFiltersDropdown from "./ListFiltersDropdown"
import Pagination from "./Pagination"
import GenericMultiSelect from "./GenericMultiSelect"
import RunStatusSelect from "./RunStatusSelect"
import { FormGroup, Classes, Spinner, Tag } from "@blueprintjs/core"
import { PAGE_SIZE } from "../constants"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"
import ISO8601AttributeValue from "./ISO8601AttributeValue"
import RunTag from "./RunTag"
import EnvQueryFilter from "./EnvQueryFilter"

export const initialQuery = {
  page: 1,
  sort_by: "started_at",
  order: SortOrder.DESC,
  status: [RunStatus.PENDING, RunStatus.QUEUED, RunStatus.RUNNING],
}
export type Props = ListRequestChildProps<
  ListRunResponse,
  { params: ListRunParams }
>

export const Runs: React.FunctionComponent<Props> = ({
  data,
  updateSort,
  currentSortKey,
  currentSortOrder,
  updatePage,
  currentPage,
  query,
  updateFilter,
  isLoading,
  requestStatus,
  error,
}) => {
  let content: React.ReactNode

  switch (requestStatus) {
    case RequestStatus.ERROR:
      content = <ErrorCallout error={error} />
      break
    case RequestStatus.READY:
      content = (
        <Table<Run>
          items={get(data, "history", [])}
          getItemKey={(r: Run) => r.run_id}
          updateSort={updateSort}
          currentSortKey={currentSortKey}
          currentSortOrder={currentSortOrder}
          columns={{
            status: {
              displayName: "Status",
              render: (r: Run) => <RunTag {...r} />,
              isSortable: true,
            },
            started_at: {
              displayName: "Started At",
              render: (r: Run) => <ISO8601AttributeValue time={r.started_at} />,
              isSortable: true,
            },
            run_id: {
              displayName: "Run ID",
              render: (r: Run) => (
                <Link to={`/runs/${r.run_id}`}>{r.run_id}</Link>
              ),
              isSortable: true,
            },
            alias: {
              displayName: "Alias",
              render: (r: Run) => (
                <Link to={`/tasks/${r.definition_id}`}>{r.alias}</Link>
              ),
              isSortable: false,
            },
            engine: {
              displayName: "Engine",
              render: (r: Run) => <Tag>{r.engine}</Tag>,
              isSortable: false,
            },
          }}
        />
      )
      break
    case RequestStatus.NOT_READY:
    default:
      content = <Spinner />
      break
  }

  // Preprocess `env` query to ensure that it's an array.
  let env: string | string[] = get(query, "env", [])
  if (!isArray(env) && isString(env)) env = [env]

  return (
    <>
      <ViewHeader
        breadcrumbs={[
          { text: "Runs", href: "/runs?page=1&sort_by=started_at&order=desc" },
        ]}
      />
      <div className="flotilla-list-utils-container">
        <FormGroup label="Alias" helperText="Search by task alias.">
          <GenericMultiSelect
            value={get(query, "alias", [])}
            onChange={(value: string[]) => {
              updateFilter("alias", value)
            }}
            isDisabled={false}
          />
        </FormGroup>
        <ListFiltersDropdown>
          <FormGroup label="Run Status" helperText="Search by run status.">
            <RunStatusSelect
              value={get(query, "status", [])}
              onChange={(value: string[]) => {
                updateFilter("status", value)
              }}
              isDisabled={false}
            />
          </FormGroup>
          <EnvQueryFilter
            value={env}
            onChange={value => {
              updateFilter("env", value)
            }}
          />
          <FormGroup label="Cluster" helperText="Search by ECS cluster.">
            <GenericMultiSelect
              value={get(query, "cluster_name", [])}
              onChange={(value: string[]) => {
                updateFilter("cluster_name", value)
              }}
              isDisabled={false}
            />
          </FormGroup>
          <FormGroup
            label="Started At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Started At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_until", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_until", evt.target.value)
              }}
            />
          </FormGroup>
        </ListFiltersDropdown>
        <Pagination
          updatePage={updatePage}
          currentPage={currentPage}
          isLoading={isLoading}
          pageSize={PAGE_SIZE}
          numItems={data ? data.total : 0}
        />
      </div>
      {content}
    </>
  )
}

const ConnectedRuns: React.FunctionComponent<{}> = () => (
  <ListRequest<ListRunResponse, { params: ListRunParams }>
    requestFn={api.listRun}
    initialQuery={initialQuery}
    getRequestArgs={params => ({
      params: {
        ...omit(params, "page"),
        ...pageToOffsetLimit({
          page: get(params, "page", 1),
          limit: PAGE_SIZE,
        }),
      },
    })}
  >
    {props => <Runs {...props} />}
  </ListRequest>
)

export default ConnectedRuns


================================================
FILE: ui/src/components/SettingsButton.tsx
================================================
import * as React from "react"
import { useSelector, useDispatch } from "react-redux"
import { Formik, Form, FastField, Field } from "formik"
import {
  Classes,
  Button,
  Dialog,
  Switch,
  FormGroup,
  Intent,
} from "@blueprintjs/core"
import { RootState } from "../state/store"
import {
  Settings,
  update,
  toggleDialogVisibilityChange,
} from "../state/settings"

const SettingsButton: React.FC = () => {
  const dispatch = useDispatch()
  const { settings, isSettingsDialogOpen, isLoading } = useSelector(
    (s: RootState) => s.settings
  )

  return (
    <>
      <Button
        rightIcon="cog"
        onClick={() => {
          dispatch(toggleDialogVisibilityChange(true))
        }}
      >
        Settings
      </Button>
      <Dialog
        isOpen={isSettingsDialogOpen}
        onClose={() => {
          dispatch(toggleDialogVisibilityChange(false))
        }}
        className="bp3-dark"
        title={`Settings (v${process.env.REACT_APP_VERSION})`}
      >
        <Formik<Settings>
          initialValues={settings}
          onSubmit={values => {
            dispatch(update(values))
          }}
        >
          {({ values, setFieldValue }) => {
            return (
              <Form>
                <div className={Classes.DIALOG_BODY}>
                  <FormGroup helperText="Enabling this will ensure that the UI doesn't crash for runs with massive log output">
                    <FastField
                      name="USE_OPTIMIZED_LOG_RENDERER"
                      component={Switch}
                      checked={values.USE_OPTIMIZED_LOG_RENDERER}
                      onChange={() => {
                        setFieldValue(
                          "USE_OPTIMIZED_LOG_RENDERER",
                          !values.USE_OPTIMIZED_LOG_RENDERER
                        )
                      }}
                      label="Use optimized log renderer."
                    />
                  </FormGroup>
                  <FormGroup helperText="Enabling this will allow you to search through the optimized logs by pressing ⌘-F">
                    <Field
                      name="SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW"
                      component={Switch}
                      checked={values.SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW}
                      onChange={() => {
                        setFieldValue(
                          "SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW",
                          !values.SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW
                        )
                      }}
                      label="Override ⌘-F in run view."
                      disabled={values.USE_OPTIMIZED_LOG_RENDERER === false}
                    />
                  </FormGroup>
                </div>
                <div className={Classes.DIALOG_FOOTER}>
                  <div className={Classes.DIALOG_FOOTER_ACTIONS}>
                    <Button
                      onClick={() => {
                        dispatch(toggleDialogVisibilityChange(false))
                      }}
                    >
                      Close
                    </Button>
                    <Button
                      intent={Intent.PRIMARY}
                      type="submit"
                      loading={isLoading}
                    >
                      Save Changes
                    </Button>
                  </div>
                </div>
              </Form>
            )
          }}
        </Formik>
      </Dialog>
    </>
  )
}

export default SettingsButton


================================================
FILE: ui/src/components/SortableTh.tsx
================================================
import * as React from "react"
import { SortOrder } from "../types"

export type Props = {
  isSortable: boolean
  isActive: boolean
  order: SortOrder
  onClick: () => void
}

const Th: React.FunctionComponent<Props> = ({
  isSortable,
  isActive,
  order,
  children,
  onClick,
}) => {
  let className = ""

  if (isSortable) {
    className += "flotilla-th-sortable"
    if (isActive) {
      className += " active"

      if (order === SortOrder.ASC) {
        className += " active-asc"
      } else {
        className += " active-desc"
      }
    }
  }

  return (
    <th onClick={onClick} className={className}>
      {children}
    </th>
  )
}

export default Th


================================================
FILE: ui/src/components/StopRunButton.tsx
================================================
import * as React from "react"
import { Button, Dialog, Intent, Classes } from "@blueprintjs/core"
import Request, { ChildProps } from "./Request"
import api from "../api"
import Toaster from "./Toaster"
import { withRouter, RouteComponentProps } from "react-router-dom"
import ErrorCallout from "./ErrorCallout"

type Args = { definitionID: string; runID: string }
export type Props = ChildProps<any, Args> & ConnectedProps
type State = { isOpen: boolean }

export class StopRunButton extends React.Component<Props, State> {
  constructor(props: Props) {
    super(props)
    this.handleSubmitClick = this.handleSubmitClick.bind(this)
    this.openDialog = this.openDialog.bind(this)
    this.closeDialog = this.closeDialog.bind(this)
  }

  state = {
    isOpen: false,
  }

  openDialog() {
    this.setState({ isOpen: true })
  }

  closeDialog() {
    this.setState({ isOpen: false })
  }

  handleSubmitClick() {
    this.props.request({
      definitionID: this.props.definitionID,
      runID: this.props.runID,
    })
    this.closeDialog()
  }

  render() {
    const { error, isLoading } = this.props
    return (
      <>
        <Button
          intent={Intent.DANGER}
          onClick={this.openDialog}
          rightIcon="stop"
        >
          Stop
        </Button>
        <Dialog isOpen={this.state.isOpen}>
          <div className={Classes.DIALOG_BODY}>
            {error && <ErrorCallout error={error} />}
            <span>Are you sure you want to stop this run?</span>
          </div>
          <div className={Classes.DIALOG_FOOTER}>
            <div className={Classes.DIALOG_FOOTER_ACTIONS}>
              <Button onClick={this.closeDialog}>Close</Button>
              <Button
                loading={isLoading}
                intent={Intent.DANGER}
                onClick={this.handleSubmitClick}
                id="flotillaStopRunSubmitButton"
              >
                Stop
              </Button>
            </div>
          </div>
        </Dialog>
      </>
    )
  }
}

type ConnectedProps = {
  definitionID: string
  runID: string
}

const Connected: React.FunctionComponent<
  RouteComponentProps & ConnectedProps
> = ({ runID, definitionID, history }) => (
  <Request<any, Args>
    requestFn={api.stopRun}
    initialRequestArgs={{ runID, definitionID }}
    shouldRequestOnMount={false}
    onSuccess={() => {
      Toaster.show({
        message: "Run stopped!",
        intent: Intent.SUCCESS,
      })
    }}
    onFailure={() => {
      Toaster.show({
        message: "An error occurred.",
        intent: Intent.DANGER,
      })
    }}
  >
    {requestProps => (
      <StopRunButton
        {...requestProps}
        runID={runID}
        definitionID={definitionID}
      />
    )}
  </Request>
)

export default withRouter(Connected)


================================================
FILE: ui/src/components/Table.tsx
================================================
import * as React from "react"
import { HTMLTable, Callout } from "@blueprintjs/core"
import { isArray } from "lodash"
import SortableTh from "./SortableTh"
import { SortOrder } from "../types"

type Column<ItemType> = {
  displayName: string
  render: (item: ItemType) => React.ReactNode
  isSortable: boolean
}

type Props<ItemType> = {
  items: ItemType[]
  columns: { [key: string]: Column<ItemType> }
  getItemKey: (item: ItemType, index: number) => any
  updateSort: (sortKey: string) => void
  currentSortKey: string
  currentSortOrder: SortOrder
}

class Table<ItemType> extends React.Component<Props<ItemType>> {
  render() {
    const {
      columns,
      items,
      getItemKey,
      updateSort,
      currentSortKey,
      currentSortOrder,
    } = this.props

    if (isArray(items) && items.length > 0) {
      return (
        <HTMLTable interactive bordered>
          <thead>
            <tr>
              {Object.entries(columns).map(([k, v]) => (
                <SortableTh
                  isSortable={v.isSortable}
                  isActive={currentSortKey === k}
                  order={currentSortOrder}
                  onClick={() => {
                    if (v.isSortable === true) {
                      updateSort(k)
                    }
                  }}
                  key={k}
                >
                  {v.displayName}
                </SortableTh>
              ))}
            </tr>
          </thead>
          <tbody>
            {items.map((item, i) => (
              <tr key={getItemKey(item, i)}>
                {Object.entries(columns).map(([k, v]) => (
                  <td key={k}>{v.render(item)}</td>
                ))}
              </tr>
            ))}
          </tbody>
        </HTMLTable>
      )
    }

    return <Callout>No items were found.</Callout>
  }
}

export default Table


================================================
FILE: ui/src/components/TagsSelect.tsx
================================================
import * as React from "react"
import { get, isArray } from "lodash"
import Creatable from "react-select/lib/Creatable"
import Request from "./Request"
import { ListTagsResponse, SelectOption, MultiSelectProps } from "../types"
import api from "../api"
import * as helpers from "../helpers/selectHelpers"

export const TagsSelect: React.FunctionComponent<MultiSelectProps & {
  options: SelectOption[]
}> = props => (
  <Creatable<SelectOption>
    isMulti
    value={props.value.map(helpers.stringToSelectOpt)}
    options={props.options}
    onChange={options => {
      props.onChange(helpers.preprocessMultiSelectOption(options))
    }}
    styles={helpers.selectStyles}
    theme={helpers.selectTheme}
    closeMenuOnSelect={false}
    isDisabled={props.isDisabled}
  />
)

const ConnectedTagsSelect: React.FunctionComponent<MultiSelectProps> = props => (
  <Request<ListTagsResponse, {}> requestFn={api.listTags}>
    {res => {
      let options = get(res, ["data", "tags"], [])
      if (!isArray(options)) options = []
      return (
        <TagsSelect
          value={props.value || []}
          options={options.map(helpers.stringToSelectOpt)}
          onChange={props.onChange}
          isDisabled={props.isDisabled}
        />
      )
    }}
  </Request>
)

export default ConnectedTagsSelect


================================================
FILE: ui/src/components/Task.tsx
================================================
import * as React from "react"
import { Switch, Route, RouteComponentProps } from "react-router-dom"
import { get } from "lodash"
import Request, { ChildProps, RequestStatus } from "./Request"
import api from "../api"
import { Task as TaskShape, Task as TaskTypeDef } from "../types"
import TaskDetails from "./TaskDetails"
import UpdateTaskForm from "./UpdateTaskForm"
import TaskExecutionForm from "./TaskExecutionForm"
import CreateTaskForm from "./CreateTaskForm"
import ErrorCallout from "./ErrorCallout"
import { Spinner } from "@blueprintjs/core"

export type TaskCtx = ChildProps<TaskShape, { definitionID: string }> & {
  basePath: string
  definitionID: string
}

export const TaskContext = React.createContext<TaskCtx>({
  data: null,
  requestStatus: RequestStatus.NOT_READY,
  isLoading: false,
  error: null,
  request: () => {},
  basePath: "", // TODO: maybe this is not required.
  definitionID: "",
  receivedAt: null,
})

export const Task: React.FunctionComponent<TaskCtx> = props => {
  return (
    <TaskContext.Provider value={props}>
      <Switch>
        <Route exact path={props.basePath} component={TaskDetails} />
        <Route
          exact
          path={`${props.basePath}/update`}
          component={UpdateTaskForm}
        />
        <Route
          exact
          path={`${props.basePath}/copy`}
          render={routerProps => (
            <TaskContext.Consumer>
              {ctx => {
                switch (ctx.requestStatus) {
                  case RequestStatus.ERROR:
                    return <ErrorCallout error={ctx.error} />
                  case RequestStatus.READY:
                    return (
                      <CreateTaskForm
                        {...routerProps}
                        onSuccess={(data: TaskTypeDef) => {
                          ctx.request({ definitionID: data.definition_id })
                        }}
                        initialValues={{
                          env: get(props, ["data", "env"], []),
                          image: get(props, ["data", "image"], ""),
                          group_name: get(props, ["data", "group_name"], ""),
                          cpu: get(props, ["data", "cpu"], ""),
                          memory: get(props, ["data", "memory"], ""),
                          command: get(props, ["data", "command"], ""),
                          tags: get(props, ["data", "tags"], []),
                          alias: "",
                        }}
                      />
                    )
                  case RequestStatus.NOT_READY:
                    return <Spinner />
                  default:
                    return null
                }
              }}
            </TaskContext.Consumer>
          )}
        />
        <Route
          exact
          path={`${props.basePath}/execute`}
          component={TaskExecutionForm}
        />
      </Switch>
    </TaskContext.Provider>
  )
}

type ConnectedProps = RouteComponentProps<{ definitionID: string }>
const Connected: React.FunctionComponent<ConnectedProps> = ({ match }) => (
  <Request<TaskShape, { definitionID: string }>
    requestFn={api.getTask}
    initialRequestArgs={{ definitionID: match.params.definitionID }}
  >
    {props => (
      <Task
        {...props}
        basePath={match.path}
        definitionID={match.params.definitionID}
      />
    )}
  </Request>
)

export default Connected


================================================
FILE: ui/src/components/TaskDetails.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import {
  Collapse,
  Card,
  ButtonGroup,
  Pre,
  Classes,
  Button,
  Spinner,
  Icon,
} from "@blueprintjs/core"
import { TaskContext } from "./Task"
import Attribute from "./Attribute"
import TaskRuns from "./TaskRuns"
import ViewHeader from "./ViewHeader"
import EnvList from "./EnvList"
import DeleteTaskButton from "./DeleteTaskButton"
import Toggler from "./Toggler"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"
import ARASwitch from "./ARASwitch"

const TaskDetails: React.FC<{}> = () => (
  <TaskContext.Consumer>
    {({ requestStatus, data, error, definitionID, request }) => {
      switch (requestStatus) {
        case RequestStatus.ERROR:
          return <ErrorCallout error={error} />
        case RequestStatus.READY:
          if (data) {
            return (
              <>
                <ViewHeader
                  breadcrumbs={[
                    { text: "Tasks", href: "/tasks" },
                    {
                      text: data.alias || definitionID,
                      href: `/tasks/${definitionID}`,
                    },
                  ]}
                  buttons={
                    <ButtonGroup>
                      <DeleteTaskButton definitionID={definitionID} />
                      <Link
                        className={Classes.BUTTON}
                        to={`/tasks/${definitionID}/copy`}
                      >
                        <div className="bp3-button-text">Copy</div>
                        <Icon icon="duplicate" />
                      </Link>
                      <Link
                        className={Classes.BUTTON}
                        to={`/tasks/${definitionID}/update`}
                      >
                        <div className="bp3-button-text">Update</div>
                        <Icon icon="edit" />
                      </Link>
                      <Link
                        className={Classes.BUTTON}
                        to={`/tasks/${definitionID}/execute`}
                      >
                        Run
                      </Link>
                    </ButtonGroup>
                  }
                />
                <div className="flotilla-sidebar-view-container">
                  <div className="flotilla-sidebar-view-sidebar">
                    <Toggler>
                      {({ isVisible, toggleVisibility }) => (
                        <Card style={{ marginBottom: 12 }}>
                          <div className="flotilla-card-header-container">
                            <div className="flotilla-card-header">
                              Attributes
                            </div>
                            <ButtonGroup>
                              <Button
                                small
                                onClick={toggleVisibility}
                                rightIcon={isVisible ? "minimize" : "maximize"}
                              >
                                {isVisible ? "Hide" : "Show"}
                              </Button>
                            </ButtonGroup>
                          </div>
                          <Collapse isOpen={isVisible}>
                            <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
                              <Attribute
                                name="Adaptive Resource Allocation"
                                value={
                                  <ARASwitch task={data} request={request} />
                                }
                                description={
                                  <span>
                                    Adaptive CPU and memory resource allocation
                                    based on prior run history.
                                  </span>
                                }
                              />
                              <Attribute name="Alias" value={data.alias} />
                              <Attribute
                                name="Definition ID"
                                value={data.definition_id}
                              />
                              <Attribute
                                name="Container Name"
                                value={data.container_name}
                              />
                              <Attribute
                                name="Group Name"
                                value={data.group_name}
                              />
                              <Attribute name="Image" value={data.image} />
                              <Attribute
                                name="Command"
                                value={
                                  <Pre className="flotilla-pre">
                                    {data.command}
                                  </Pre>
                                }
                              />
                              <Attribute name="CPU (Units)" value={data.cpu} />
                              <Attribute
                                name="Memory (MB)"
                                value={data.memory}
                              />
                              <Attribute name="Arn" value={data.arn} />
                              <Attribute name="Tags" value={data.tags} />
                            </div>
                          </Collapse>
                        </Card>
                      )}
                    </Toggler>
                    {data.env && (
                      <Toggler>
                        {({ isVisible, toggleVisibility }) => (
                          <Card>
                            <div className="flotilla-card-header-container">
                              <div className="flotilla-card-header">
                                Environment Variables
                              </div>
                              <ButtonGroup>
                                <Button
                                  small
                                  onClick={toggleVisibility}
                                  rightIcon={
                                    isVisible ? "minimize" : "maximize"
                                  }
                                >
                                  {isVisible ? "Hide" : "Show"}
                                </Button>
                              </ButtonGroup>
                            </div>
                            <Collapse isOpen={isVisible}>
                              <EnvList env={data.env} />
                            </Collapse>
                          </Card>
                        )}
                      </Toggler>
                    )}
                  </div>
                  <div className="flotilla-sidebar-view-content">
                    <TaskRuns definitionID={definitionID} />
                  </div>
                </div>
              </>
            )
          }
          return null
        case RequestStatus.NOT_READY:
        default:
          return <Spinner />
      }
    }}
  </TaskContext.Consumer>
)
export default TaskDetails


================================================
FILE: ui/src/components/TaskExecutionForm.tsx
================================================
import * as React from "react"
import { Formik, Form, FastField, Field } from "formik"
import * as Yup from "yup"
import { RouteComponentProps } from "react-router-dom"
import {
  FormGroup,
  Button,
  Intent,
  Spinner,
  Classes,
  RadioGroup,
  Radio,
} from "@blueprintjs/core"
import api from "../api"
import { LaunchRequestV2, Run, ExecutionEngine } from "../types"
import { getInitialValuesForTaskExecutionForm } from "../helpers/getInitialValuesForExecutionForm"
import Request, {
  ChildProps as RequestChildProps,
  RequestStatus,
} from "./Request"
import EnvFieldArray from "./EnvFieldArray"
import ClusterSelect from "./ClusterSelect"
import { TaskContext, TaskCtx } from "./Task"
import Toaster from "./Toaster"
import ErrorCallout from "./ErrorCallout"
import FieldError from "./FieldError"
import NodeLifecycleSelect from "./NodeLifecycleSelect"
import * as helpers from "../helpers/runFormHelpers"
import { commandFieldSpec } from "../helpers/taskFormHelpers"

const validationSchema = Yup.object().shape({
  owner_id: Yup.string(),
  cluster: Yup.string().required("Required"),
  memory: Yup.number()
    .required("Required")
    .min(0),
  cpu: Yup.number()
    .required("Required")
    .min(512),
  env: Yup.array().of(
    Yup.object().shape({
      name: Yup.string().required(),
      value: Yup.string().required(),
    })
  ),
  engine: Yup.string()
    .matches(/(eks|ecs)/)
    .required("A valid engine type of ecs or eks must be set."),
  node_lifecycle: Yup.string().matches(/(spot|ondemand)/),
  command: Yup.string()
    .min(1)
    .nullable(),
})

type Props = RequestChildProps<
  Run,
  { definitionID: string; data: LaunchRequestV2 }
> & {
  definitionID: string
  initialValues: LaunchRequestV2
}

const TaskExecutionForm: React.FC<Props> = ({
  initialValues,
  request,
  requestStatus,
  isLoading,
  error,
  definitionID,
}) => (
  <Formik
    isInitialValid={(values: any) =>
      validationSchema.isValidSync(values.initialValues)
    }
    initialValues={initialValues}
    validationSchema={validationSchema}
    onSubmit={data => {
      request({ definitionID, data })
    }}
  >
    {({ errors, values, setFieldValue, isValid, ...rest }) => {
      const getEngine = (): ExecutionEngine => values.engine
      return (
        <Form className="flotilla-form-container">
          {requestStatus === RequestStatus.ERROR && error && (
            <ErrorCallout error={error} />
          )}
          {/* Owner ID Field */}
          <FormGroup
            label={helpers.ownerIdFieldSpec.label}
            helperText={helpers.ownerIdFieldSpec.description}
          >
            <FastField
              name={helpers.ownerIdFieldSpec.name}
              value={values.owner_id}
              className={Classes.INPUT}
            />
            {errors.owner_id && <FieldError>{errors.owner_id}</FieldError>}
          </FormGroup>
          {/* Engine Type Field */}
          <RadioGroup
            inline
            label="Engine Type"
            onChange={(evt: React.FormEvent<HTMLInputElement>) => {
              setFieldValue("engine", evt.currentTarget.value)

              if (evt.currentTarget.value === ExecutionEngine.EKS) {
                setFieldValue(
                  "cluster",
                  process.env.REACT_APP_EKS_CLUSTER_NAME || ""
                )
              } else if (getEngine() === ExecutionEngine.EKS) {
                setFieldValue("cluster", "")
              }
            }}
            selectedValue={values.engine}
          >
            <Radio label="EKS" value={ExecutionEngine.EKS} />
            <Radio label="ECS" value={ExecutionEngine.ECS} />
          </RadioGroup>
          {/*
            Cluster Field. Note: this is a "Field" rather than a
            "FastField" as it needs to re-render when value.engine is
            updated.
          */}
          {getEngine() !== ExecutionEngine.EKS && (
            <FormGroup
              label="Cluster"
              helperText="Select a cluster for this task to execute on."
            >
              <Field
                name="cluster"
                component={ClusterSelect}
                value={values.cluster}
                onChange={(value: string) => {
                  setFieldValue("cluster", value)
                }}
              />
              {errors.cluster && <FieldError>{errors.cluster}</FieldError>}
            </FormGroup>
          )}
          {/* CPU Field */}
          <FormGroup
            label={helpers.cpuFieldSpec.label}
            helperText={helpers.cpuFieldSpec.description}
          >
            <FastField
              type="number"
              name={helpers.cpuFieldSpec.name}
              className={Classes.INPUT}
              min="512"
            />
            {errors.cpu && <FieldError>{errors.cpu}</FieldError>}
          </FormGroup>
          {/* Memory Field */}
          <FormGroup
            label={helpers.memoryFieldSpec.label}
            helperText={helpers.memoryFieldSpec.description}
          >
            <FastField
              type="number"
              name={helpers.memoryFieldSpec.name}
              className={Classes.INPUT}
            />
            {errors.memory && <FieldError>{errors.memory}</FieldError>}
          </FormGroup>
          <FormGroup
            label={helpers.nodeLifecycleFieldSpec.label}
            helperText={helpers.nodeLifecycleFieldSpec.description}
          >
            <Field
              name={helpers.nodeLifecycleFieldSpec.name}
              component={NodeLifecycleSelect}
              value={values.node_lifecycle}
              onChange={(value: string) => {
                setFieldValue(helpers.nodeLifecycleFieldSpec.name, value)
              }}
              isDisabled={getEngine() !== ExecutionEngine.EKS}
            />
            {errors.node_lifecycle && (
              <FieldError>{errors.node_lifecycle}</FieldError>
            )}
          </FormGroup>
          <FormGroup
            label={commandFieldSpec.label}
            helperText="Override your task definition command."
          >
            <FastField
              className={`${Classes.INPUT} ${Classes.CODE}`}
              component="textarea"
              name={commandFieldSpec.name}
              rows={14}
              style={{ fontSize: "0.8rem" }}
            />
            {errors.command && <FieldError>{errors.command}</FieldError>}
          </FormGroup>
          <EnvFieldArray />
          <Button
            intent={Intent.PRIMARY}
            type="submit"
            disabled={isLoading || isValid === false}
            style={{ marginTop: 24 }}
            large
          >
            Submit
          </Button>
        </Form>
      )
    }}
  </Formik>
)

const Connected: React.FunctionComponent<RouteComponentProps<
  any,
  any,
  Run
>> = ({ location, history }) => (
  <Request<Run, { definitionID: string; data: LaunchRequestV2 }>
    requestFn={api.runTask}
    shouldRequestOnMount={false}
    onSuccess={(data: Run) => {
      Toaster.show({
        message: `Run ${data.run_id} submitted successfully!`,
        intent: Intent.SUCCESS,
      })
      history.push(`/runs/${data.run_id}`)
    }}
    onFailure={() => {
      Toaster.show({
        message: "An error occurred.",
        intent: Intent.DANGER,
      })
    }}
  >
    {requestProps => (
      <TaskContext.Consumer>
        {(ctx: TaskCtx) => {
          switch (ctx.requestStatus) {
            case RequestStatus.ERROR:
              return <ErrorCallout error={ctx.error} />
            case RequestStatus.READY:
              if (ctx.data) {
                const initialValues: LaunchRequestV2 = getInitialValuesForTaskExecutionForm(
                  ctx.data,
                  location.state
                )
                return (
                  <TaskExecutionForm
                    definitionID={ctx.definitionID}
                    initialValues={initialValues}
                    {...requestProps}
                  />
                )
              }
              break
            case RequestStatus.NOT_READY:
            default:
              return <Spinner />
          }
        }}
      </TaskContext.Consumer>
    )}
  </Request>
)

export default Connected


================================================
FILE: ui/src/components/TaskRuns.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { get, omit, isArray, isString } from "lodash"
import ListRequest, { ChildProps as ListRequestChildProps } from "./ListRequest"
import api from "../api"
import {
  ListTaskRunsParams,
  ListTaskRunsResponse,
  SortOrder,
  Run,
  RunStatus,
  ExecutionEngine,
} from "../types"
import pageToOffsetLimit from "../helpers/pageToOffsetLimit"
import Table from "./Table"
import { FormGroup, Classes, Spinner, Tag } from "@blueprintjs/core"
import GenericMultiSelect from "./GenericMultiSelect"
import RunStatusSelect from "./RunStatusSelect"
import ListFiltersDropdown from "./ListFiltersDropdown"
import { DebounceInput } from "react-debounce-input"
import Pagination from "./Pagination"
import { PAGE_SIZE } from "../constants"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"
import RunTag from "./RunTag"
import ISO8601AttributeValue from "./ISO8601AttributeValue"
import EnvQueryFilter from "./EnvQueryFilter"
import Duration from "./Duration"

export const initialQuery = {
  page: 1,
  sort_by: "started_at",
  order: SortOrder.DESC,
}

export type Props = ListRequestChildProps<
  ListTaskRunsResponse,
  { params: ListTaskRunsParams }
>

export const TaskRuns: React.FunctionComponent<Props> = ({
  data,
  updateSort,
  currentSortKey,
  currentSortOrder,
  query,
  updateFilter,
  updatePage,
  currentPage,
  isLoading,
  requestStatus,
  error,
}) => {
  let content: React.ReactNode

  // Preprocess `env` query to ensure that it's an array.
  let env: string | string[] = get(query, "env", [])
  if (!isArray(env) && isString(env)) env = [env]

  switch (requestStatus) {
    case RequestStatus.ERROR:
      content = <ErrorCallout error={error} />
      break
    case RequestStatus.READY:
      content = (
        <Table<Run>
          items={get(data, "history", [])}
          getItemKey={(r: Run) => r.run_id}
          updateSort={updateSort}
          currentSortKey={currentSortKey}
          currentSortOrder={currentSortOrder}
          columns={{
            run_id: {
              displayName: "Run ID",
              render: (r: Run) => (
                <Link to={`/runs/${r.run_id}`}>{r.run_id}</Link>
              ),
              isSortable: true,
            },
            status: {
              displayName: "Status",
              render: (r: Run) => <RunTag {...r}></RunTag>,
              isSortable: true,
            },
            engine: {
              displayName: "Engine",
              render: (r: Run) => <Tag>{r.engine}</Tag>,
              isSortable: false,
            },
            duration: {
              displayName: "Duration",
              render: (r: Run) =>
                r.started_at ? (
                  <Duration
                    start={r.started_at}
                    end={r.finished_at}
                    isActive={r.status !== RunStatus.STOPPED}
                  />
                ) : (
                  "-"
                ),
              isSortable: false,
            },
            started_at: {
              displayName: "Started At",
              render: (r: Run) => (
                <ISO8601AttributeValue
                  time={r.started_at}
                ></ISO8601AttributeValue>
              ),
              isSortable: true,
            },
            finished_at: {
              displayName: "Finished At",
              render: (r: Run) => (
                <ISO8601AttributeValue
                  time={r.finished_at}
                ></ISO8601AttributeValue>
              ),
              isSortable: true,
            },
            cluster: {
              displayName: "Cluster",
              render: (r: Run) =>
                r.engine === ExecutionEngine.EKS ? "-" : r.cluster,
              isSortable: false,
            },
          }}
        />
      )
      break
    case RequestStatus.NOT_READY:
    default:
      content = <Spinner />
      break
  }

  return (
    <>
      <div className="flotilla-list-utils-container">
        <FormGroup label="Run Status" helperText="Search by run status.">
          <RunStatusSelect
            value={get(query, "status", [])}
            onChange={(value: string[]) => {
              updateFilter("status", value)
            }}
            isDisabled={false}
          />
        </FormGroup>
        <ListFiltersDropdown>
          <EnvQueryFilter
            value={env}
            onChange={value => {
              updateFilter("env", value)
            }}
          />
          <FormGroup label="Cluster" helperText="Search by ECS cluster.">
            <GenericMultiSelect
              value={get(query, "cluster_name", [])}
              onChange={(value: string[]) => {
                updateFilter("cluster_name", value)
              }}
              isDisabled={false}
            />
          </FormGroup>
          <FormGroup
            label="Started At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Started At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_until", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_until", evt.target.value)
              }}
            />
          </FormGroup>
        </ListFiltersDropdown>
        <Pagination
          updatePage={updatePage}
          currentPage={currentPage}
          isLoading={isLoading}
          pageSize={PAGE_SIZE}
          numItems={data ? data.total : 0}
        />
      </div>
      {content}
    </>
  )
}

const ConnectedTaskRuns: React.FunctionComponent<{ definitionID: string }> = ({
  definitionID,
}) => (
  <ListRequest<
    ListTaskRunsResponse,
    { definitionID: string; params: ListTaskRunsParams }
  >
    requestFn={api.listTaskRuns}
    initialQuery={initialQuery}
    // @TODO: this function should be extracted and tested.
    getRequestArgs={params => ({
      definitionID,
      params: {
        ...omit(params, "page"),
        ...pageToOffsetLimit({
          page: get(params, "page", 1),
          limit: PAGE_SIZE,
        }),
      },
    })}
  >
    {props => <TaskRuns {...props} />}
  </ListRequest>
)

export default ConnectedTaskRuns


================================================
FILE: ui/src/components/Tasks.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { get, omit } from "lodash"
import { DebounceInput } from "react-debounce-input"
import { FormGroup, Classes, Spinner } from "@blueprintjs/core"
import ListRequest, { ChildProps as ListRequestChildProps } from "./ListRequest"
import api from "../api"
import { ListTaskParams, ListTaskResponse, SortOrder, Task } from "../types"
import pageToOffsetLimit from "../helpers/pageToOffsetLimit"
import Table from "./Table"
import Pagination from "./Pagination"
import GroupNameSelect from "./GroupNameSelect"
import ViewHeader from "./ViewHeader"
import ListFiltersDropdown from "./ListFiltersDropdown"
import { PAGE_SIZE } from "../constants"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"

export const initialQuery = {
  page: 1,
  sort_by: "alias",
  order: SortOrder.ASC,
}

export type Props = ListRequestChildProps<
  ListTaskResponse,
  { params: ListTaskParams }
>

export const Tasks: React.FunctionComponent<Props> = props => {
  const {
    query,
    data,
    updateFilter,
    updatePage,
    updateSort,
    currentPage,
    currentSortKey,
    currentSortOrder,
    isLoading,
    requestStatus,
    error,
  } = props

  let content: React.ReactNode

  switch (requestStatus) {
    case RequestStatus.ERROR:
      content = <ErrorCallout error={error} />
      break
    case RequestStatus.READY:
      content = (
        <Table<Task>
          items={get(data, "definitions", [])}
          getItemKey={(task: Task) => task.definition_id}
          updateSort={updateSort}
          currentSortKey={currentSortKey}
          currentSortOrder={currentSortOrder}
          columns={{
            alias: {
              displayName: "Alias",
              render: (item: Task) => (
                <Link to={`/tasks/${item.definition_id}`}>{item.alias}</Link>
              ),
              isSortable: true,
            },
            group_name: {
              displayName: "Group Name",
              render: (item: Task) => item.group_name,
              isSortable: true,
            },
            image: {
              displayName: "Image",
              render: (item: Task) => item.image,
              isSortable: true,
            },
            memory: {
              displayName: "Memory (MB)",
              render: (item: Task) => item.memory,
              isSortable: true,
            },
          }}
        />
      )
      break
    case RequestStatus.NOT_READY:
    default:
      content = <Spinner />
      break
  }

  return (
    <>
      <ViewHeader
        breadcrumbs={[{ text: "Tasks", href: "/tasks" }]}
        buttons={
          <Link
            className={`${Classes.BUTTON} ${Classes.INTENT_PRIMARY}`}
            to={`/tasks/create`}
          >
            Create Task
          </Link>
        }
      />
      <div className="flotilla-list-utils-container">
        <FormGroup label="Alias" helperText="Search by task alias.">
          <DebounceInput
            id="tasksAliasFilter"
            style={{ flex: 1 }}
            className="bp3-input flotilla-list-utils-searchbar"
            debounceTimeout={500}
            value={get(query, "alias", "")}
            onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
              updateFilter("alias", evt.target.value)
            }}
            placeholder="Search by task alias..."
          />
        </FormGroup>
        <ListFiltersDropdown>
          <FormGroup label="Group Name" helperText="Search by group name.">
            <GroupNameSelect
              value={get(query, "group_name", "")}
              onChange={value => {
                updateFilter("group_name", value)
              }}
              isDisabled={false}
            />
          </FormGroup>
          <FormGroup label="Image" helperText="Search by Docker image.">
            <DebounceInput
              id="tasksImageFilter"
              className="bp3-input"
              debounceTimeout={500}
              value={get(query, "image", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("image", evt.target.value)
              }}
            />
          </FormGroup>
        </ListFiltersDropdown>
        <Pagination
          updatePage={updatePage}
          currentPage={currentPage}
          isLoading={isLoading}
          pageSize={PAGE_SIZE}
          numItems={data ? data.total : 0}
        />
      </div>
      {content}
    </>
  )
}

const ConnectedTasks: React.FunctionComponent = () => (
  <ListRequest<ListTaskResponse, { params: ListTaskParams }>
    requestFn={api.listTasks}
    initialQuery={initialQuery}
    getRequestArgs={params => ({
      params: {
        ...omit(params, "page"),
        ...pageToOffsetLimit({
          page: get(params, "page", 1),
          limit: PAGE_SIZE,
        }),
      },
    })}
  >
    {props => <Tasks {...props} />}
  </ListRequest>
)

export default ConnectedTasks


================================================
FILE: ui/src/components/Template.tsx
================================================
import * as React from "react"
import { Switch, Route, RouteComponentProps } from "react-router-dom"
import Request, { ChildProps, RequestStatus } from "./Request"
import api from "../api"
import { Template as TemplateShape } from "../types"
import TemplateDetails from "./TemplateDetails"
import TemplateExecutionForm from "./TemplateExecutionForm"

export type TemplateCtx = ChildProps<TemplateShape, { templateID: string }> & {
  basePath: string
  templateID: string
}

export const TemplateContext = React.createContext<TemplateCtx>({
  data: null,
  requestStatus: RequestStatus.NOT_READY,
  isLoading: false,
  error: null,
  request: () => {},
  basePath: "", // TODO: maybe this is not required.
  templateID: "",
  receivedAt: null,
})

export const Template: React.FunctionComponent<TemplateCtx> = props => {
  return (
    <TemplateContext.Provider value={props}>
      <Switch>
        <Route exact path={props.basePath} component={TemplateDetails} />
        <Route
          exact
          path={`${props.basePath}/execute`}
          component={TemplateExecutionForm}
        />
      </Switch>
    </TemplateContext.Provider>
  )
}

type ConnectedProps = RouteComponentProps<{ templateID: string }>
const Connected: React.FunctionComponent<ConnectedProps> = ({ match }) => (
  <Request<TemplateShape, { templateID: string }>
    requestFn={api.getTemplate}
    initialRequestArgs={{ templateID: match.params.templateID }}
  >
    {props => (
      <Template
        {...props}
        basePath={match.path}
        templateID={match.params.templateID}
      />
    )}
  </Request>
)

export default Connected


================================================
FILE: ui/src/components/TemplateDetails.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import {
  Collapse,
  Card,
  ButtonGroup,
  Classes,
  Button,
  Spinner,
} from "@blueprintjs/core"
import { TemplateContext } from "./Template"
import Attribute from "./Attribute"
import ViewHeader from "./ViewHeader"
import EnvList from "./EnvList"
import Toggler from "./Toggler"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"
import TemplateHistoryTable from "./TemplateHistoryTable"

const TemplateDetails: React.FC<{}> = () => (
  <TemplateContext.Consumer>
    {({ requestStatus, data, error, templateID }) => {
      switch (requestStatus) {
        case RequestStatus.ERROR:
          return <ErrorCallout error={error} />
        case RequestStatus.READY:
          if (data) {
            return (
              <>
                <ViewHeader
                  breadcrumbs={[
                    { text: "Templates", href: "/Templates" },
                    {
                      text: (
                        <div style={{ display: "flex" }}>
                          {`${data.template_name} v${data.version}` ||
                            templateID}{" "}
                          <img
                            src={data.avatar_uri || ""}
                            width={20}
                            height={20}
                            alt="template-logo"
                            style={{ marginLeft: 6 }}
                          />
                        </div>
                      ),
                      href: `/templates/${templateID}`,
                    },
                  ]}
                  buttons={
                    <Link
                      to={`/templates/${templateID}/execute`}
                      className={[Classes.BUTTON, Classes.INTENT_PRIMARY].join(
                        " "
                      )}
                    >
                      Run
                    </Link>
                  }
                />
                <div className="flotilla-sidebar-view-container">
                  <div className="flotilla-sidebar-view-sidebar">
                    <Toggler>
                      {({ isVisible, toggleVisibility }) => (
                        <Card style={{ marginBottom: 12 }}>
                          <div className="flotilla-card-header-container">
                            <div className="flotilla-card-header">
                              Attributes
                            </div>
                            <ButtonGroup>
                              <Button
                                small
                                onClick={toggleVisibility}
                                rightIcon={isVisible ? "minimize" : "maximize"}
                              >
                                {isVisible ? "Hide" : "Show"}
                              </Button>
                            </ButtonGroup>
                          </div>
                          <Collapse isOpen={isVisible}>
                            <div className="flotilla-attributes-container flotilla-attributes-container-vertical">
                              <Attribute
                                name="Template Name"
                                value={data.template_name}
                              />
                              <Attribute name="Version" value={data.version} />
                              <Attribute name="Image" value={data.image} />
                              <Attribute name="CPU (Units)" value={data.cpu} />
                              <Attribute
                                name="Memory (MB)"
                                value={data.memory}
                              />
                              <Attribute name="GPU" value={data.gpu} />
                            </div>
                          </Collapse>
                        </Card>
                      )}
                    </Toggler>
                    {data.env && (
                      <Toggler>
                        {({ isVisible, toggleVisibility }) => (
                          <Card>
                            <div className="flotilla-card-header-container">
                              <div className="flotilla-card-header">
                                Environment Variables
                              </div>
                              <ButtonGroup>
                                <Button
                                  small
                                  onClick={toggleVisibility}
                                  rightIcon={
                                    isVisible ? "minimize" : "maximize"
                                  }
                                >
                                  {isVisible ? "Hide" : "Show"}
                                </Button>
                              </ButtonGroup>
                            </div>
                            <Collapse isOpen={isVisible}>
                              <EnvList env={data.env} />
                            </Collapse>
                          </Card>
                        )}
                      </Toggler>
                    )}
                  </div>
                  <div className="flotilla-sidebar-view-content">
                    <TemplateHistoryTable templateID={templateID} />
                  </div>
                </div>
              </>
            )
          }
          return null
        case RequestStatus.NOT_READY:
        default:
          return <Spinner />
      }
    }}
  </TemplateContext.Consumer>
)
export default TemplateDetails


================================================
FILE: ui/src/components/TemplateExecutionForm.tsx
================================================
import * as React from "react"
import { Formik, Form, FastField, Field } from "formik"
import * as Yup from "yup"
import { RouteComponentProps } from "react-router-dom"
import JSONInput from "react-json-editor-ajrm"
import locale from "react-json-editor-ajrm/locale/en"
import {
  FormGroup,
  Button,
  Intent,
  Spinner,
  Classes,
  RadioGroup,
  Radio,
  Colors,
} from "@blueprintjs/core"
import api from "../api"
import { TemplateExecutionRequest, Run, ExecutionEngine } from "../types"
import Request, {
  ChildProps as RequestChildProps,
  RequestStatus,
} from "./Request"
import EnvFieldArray from "./EnvFieldArray"
import ClusterSelect from "./ClusterSelect"
import { TemplateContext, TemplateCtx } from "./Template"
import Toaster from "./Toaster"
import ErrorCallout from "./ErrorCallout"
import FieldError from "./FieldError"
import NodeLifecycleSelect from "./NodeLifecycleSelect"
import * as helpers from "../helpers/runFormHelpers"
import { getInitialValuesForTemplateExecutionForm } from "../helpers/getInitialValuesForExecutionForm"

const validationSchema = Yup.object().shape({
  owner_id: Yup.string(),
  cluster: Yup.string().required("Required"),
  memory: Yup.number()
    .required("Required")
    .min(0),
  cpu: Yup.number()
    .required("Required")
    .min(512),
  env: Yup.array().of(
    Yup.object().shape({
      name: Yup.string().required(),
      value: Yup.string().required(),
    })
  ),
  engine: Yup.string()
    .matches(/(eks|ecs)/)
    .required("A valid engine type of ecs or eks must be set."),
  node_lifecycle: Yup.string().matches(/(spot|ondemand)/),
  template_payload: Yup.object().required("Template payload is required."),
})

type Props = RequestChildProps<
  Run,
  { templateID: string; data: TemplateExecutionRequest }
> & {
  templateID: string
  initialValues: TemplateExecutionRequest
}

const TemplateExecutionForm: React.FC<Props> = ({
  initialValues,
  request,
  requestStatus,
  isLoading,
  error,
  templateID,
}) => {
  return (
    <Formik<TemplateExecutionRequest>
      isInitialValid={(values: any) =>
        validationSchema.isValidSync(values.initialValues)
      }
      initialValues={initialValues}
      validationSchema={validationSchema}
      onSubmit={data => {
        request({ templateID, data })
      }}
    >
      {({ errors, values, setFieldValue, isValid, ...rest }) => {
        const getEngine = (): ExecutionEngine => values.engine
        console.log(values)
        return (
          <Form className="flotilla-form-container">
            {requestStatus === RequestStatus.ERROR && error && (
              <ErrorCallout error={error} />
            )}
            {/* Owner ID Field */}
            <FormGroup
              label={helpers.ownerIdFieldSpec.label}
              helperText={helpers.ownerIdFieldSpec.description}
            >
              <FastField
                name={helpers.ownerIdFieldSpec.name}
                value={values.owner_id}
                className={Classes.INPUT}
              />
              {errors.owner_id && <FieldError>{errors.owner_id}</FieldError>}
            </FormGroup>
            {/* Engine Type Field */}
            <FormGroup>
              <RadioGroup
                inline
                label="Engine Type"
                onChange={(evt: React.FormEvent<HTMLInputElement>) => {
                  setFieldValue("engine", evt.currentTarget.value)

                  if (evt.currentTarget.value === ExecutionEngine.EKS) {
                    setFieldValue(
                      "cluster",
                      process.env.REACT_APP_EKS_CLUSTER_NAME || ""
                    )
                  } else if (getEngine() === ExecutionEngine.EKS) {
                    setFieldValue("cluster", "")
                  }
                }}
                selectedValue={values.engine}
              >
                <Radio label="EKS" value={ExecutionEngine.EKS} />
                <Radio label="ECS" value={ExecutionEngine.ECS} />
              </RadioGroup>
            </FormGroup>
            {/*
                Cluster Field. Note: this is a "Field" rather than a
                "FastField" as it needs to re-render when value.engine is
                updated.
            */}
            {getEngine() !== ExecutionEngine.EKS && (
              <FormGroup
                label="Cluster"
                helperText="Select a cluster for this task to execute on."
              >
                <Field
                  name="cluster"
                  component={ClusterSelect}
                  value={values.cluster}
                  onChange={(value: string) => {
                    setFieldValue("cluster", value)
                  }}
                />
                {errors.cluster && <FieldError>{errors.cluster}</FieldError>}
              </FormGroup>
            )}
            {/* CPU Field */}
            <FormGroup
              label={helpers.cpuFieldSpec.label}
              helperText={helpers.cpuFieldSpec.description}
            >
              <FastField
                type="number"
                name={helpers.cpuFieldSpec.name}
                className={Classes.INPUT}
                min="512"
              />
              {errors.cpu && <FieldError>{errors.cpu}</FieldError>}
            </FormGroup>
            {/* Memory Field */}
            <FormGroup
              label={helpers.memoryFieldSpec.label}
              helperText={helpers.memoryFieldSpec.description}
            >
              <FastField
                type="number"
                name={helpers.memoryFieldSpec.name}
                className={Classes.INPUT}
              />
              {errors.memory && <FieldError>{errors.memory}</FieldError>}
            </FormGroup>
            <FormGroup
              label={helpers.nodeLifecycleFieldSpec.label}
              helperText={helpers.nodeLifecycleFieldSpec.description}
            >
              <Field
                name={helpers.nodeLifecycleFieldSpec.name}
                component={NodeLifecycleSelect}
                value={values.node_lifecycle}
                onChange={(value: string) => {
                  setFieldValue(helpers.nodeLifecycleFieldSpec.name, value)
                }}
                isDisabled={getEngine() !== ExecutionEngine.EKS}
              />
              {errors.node_lifecycle && (
                <FieldError>{errors.node_lifecycle}</FieldError>
              )}
            </FormGroup>
            <FormGroup label="Template Payload">
              <FastField
                className={Classes.CODE}
                component={JSONInput}
                name="template_payload"
                placeholder={values.template_payload}
                onChange={({ jsObject }: any) => {
                  setFieldValue("template_payload", jsObject)
                }}
                colors={{
                  background: Colors.DARK_GRAY2,
                }}
                width={600}
                height={400}
                style={{
                  body: {
                    fontSize: "13px",
                  },
                }}
                locale={locale}
              />
              {errors.template_payload && (
                <FieldError>{errors.template_payload}</FieldError>
              )}
            </FormGroup>
            <EnvFieldArray />
            <Button
              intent={Intent.PRIMARY}
              type="submit"
              disabled={isLoading || isValid === false}
              style={{ marginTop: 24 }}
              large
            >
              Submit
            </Button>
          </Form>
        )
      }}
    </Formik>
  )
}

const Connected: React.FunctionComponent<RouteComponentProps> = ({
  location,
  history,
}) => {
  return (
    <Request<Run, { templateID: string; data: TemplateExecutionRequest }>
      requestFn={api.runTemplate}
      shouldRequestOnMount={false}
      onSuccess={(data: Run) => {
        Toaster.show({
          message: `Run ${data.run_id} submitted successfully!`,
          intent: Intent.SUCCESS,
        })
        history.push(`/runs/${data.run_id}`)
      }}
      onFailure={() => {
        Toaster.show({
          message: "An error occurred.",
          intent: Intent.DANGER,
        })
      }}
    >
      {requestProps => (
        <TemplateContext.Consumer>
          {(ctx: TemplateCtx) => {
            switch (ctx.requestStatus) {
              case RequestStatus.ERROR:
                return <ErrorCallout error={ctx.error} />
              case RequestStatus.READY:
                if (ctx.data) {
                  const initialValues: TemplateExecutionRequest = getInitialValuesForTemplateExecutionForm(
                    ctx.data,
                    location.state
                  )
                  return (
                    <TemplateExecutionForm
                      templateID={ctx.templateID}
                      initialValues={initialValues}
                      {...requestProps}
                    />
                  )
                }
                break
              case RequestStatus.NOT_READY:
              default:
                return <Spinner />
            }
          }}
        </TemplateContext.Consumer>
      )}
    </Request>
  )
}

export default Connected


================================================
FILE: ui/src/components/TemplateHistoryTable.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { get, omit, isArray, isString } from "lodash"
import ListRequest, { ChildProps as ListRequestChildProps } from "./ListRequest"
import api from "../api"
import {
  ListTemplateHistoryParams,
  ListTemplateHistoryResponse,
  SortOrder,
  Run,
  RunStatus,
  ExecutionEngine,
} from "../types"
import pageToOffsetLimit from "../helpers/pageToOffsetLimit"
import Table from "./Table"
import { FormGroup, Classes, Spinner, Tag } from "@blueprintjs/core"
import GenericMultiSelect from "./GenericMultiSelect"
import RunStatusSelect from "./RunStatusSelect"
import ListFiltersDropdown from "./ListFiltersDropdown"
import { DebounceInput } from "react-debounce-input"
import Pagination from "./Pagination"
import { PAGE_SIZE } from "../constants"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"
import RunTag from "./RunTag"
import ISO8601AttributeValue from "./ISO8601AttributeValue"
import EnvQueryFilter from "./EnvQueryFilter"
import Duration from "./Duration"

export const initialQuery = {
  page: 1,
  sort_by: "started_at",
  order: SortOrder.DESC,
}

export type Props = ListRequestChildProps<
  ListTemplateHistoryResponse,
  { params: ListTemplateHistoryParams }
>

export const TemplateHistoryTable: React.FunctionComponent<Props> = ({
  data,
  updateSort,
  currentSortKey,
  currentSortOrder,
  query,
  updateFilter,
  updatePage,
  currentPage,
  isLoading,
  requestStatus,
  error,
}) => {
  let content: React.ReactNode

  // Preprocess `env` query to ensure that it's an array.
  let env: string | string[] = get(query, "env", [])
  if (!isArray(env) && isString(env)) env = [env]

  switch (requestStatus) {
    case RequestStatus.ERROR:
      content = <ErrorCallout error={error} />
      break
    case RequestStatus.READY:
      content = (
        <Table<Run>
          items={get(data, "history", [])}
          getItemKey={(r: Run) => r.run_id}
          updateSort={updateSort}
          currentSortKey={currentSortKey}
          currentSortOrder={currentSortOrder}
          columns={{
            run_id: {
              displayName: "Run ID",
              render: (r: Run) => (
                <Link to={`/runs/${r.run_id}`}>{r.run_id}</Link>
              ),
              isSortable: true,
            },
            status: {
              displayName: "Status",
              render: (r: Run) => <RunTag {...r}></RunTag>,
              isSortable: true,
            },
            engine: {
              displayName: "Engine",
              render: (r: Run) => <Tag>{r.engine}</Tag>,
              isSortable: false,
            },
            duration: {
              displayName: "Duration",
              render: (r: Run) =>
                r.started_at ? (
                  <Duration
                    start={r.started_at}
                    end={r.finished_at}
                    isActive={r.status !== RunStatus.STOPPED}
                  />
                ) : (
                  "-"
                ),
              isSortable: false,
            },
            started_at: {
              displayName: "Started At",
              render: (r: Run) => (
                <ISO8601AttributeValue
                  time={r.started_at}
                ></ISO8601AttributeValue>
              ),
              isSortable: true,
            },
            finished_at: {
              displayName: "Finished At",
              render: (r: Run) => (
                <ISO8601AttributeValue
                  time={r.finished_at}
                ></ISO8601AttributeValue>
              ),
              isSortable: true,
            },
            cluster: {
              displayName: "Cluster",
              render: (r: Run) =>
                r.engine === ExecutionEngine.EKS ? "-" : r.cluster,
              isSortable: false,
            },
          }}
        />
      )
      break
    case RequestStatus.NOT_READY:
    default:
      content = <Spinner />
      break
  }

  return (
    <>
      <div className="flotilla-list-utils-container">
        <FormGroup label="Run Status" helperText="Search by run status.">
          <RunStatusSelect
            value={get(query, "status", [])}
            onChange={(value: string[]) => {
              updateFilter("status", value)
            }}
            isDisabled={false}
          />
        </FormGroup>
        <ListFiltersDropdown>
          <EnvQueryFilter
            value={env}
            onChange={value => {
              updateFilter("env", value)
            }}
          />
          <FormGroup label="Cluster" helperText="Search by ECS cluster.">
            <GenericMultiSelect
              value={get(query, "cluster_name", [])}
              onChange={(value: string[]) => {
                updateFilter("cluster_name", value)
              }}
              isDisabled={false}
            />
          </FormGroup>
          <FormGroup
            label="Started At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Started At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "started_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("started_at_until", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Since"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_since", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_since", evt.target.value)
              }}
            />
          </FormGroup>
          <FormGroup
            label="Finished At Until"
            helperText="Enter a valid ISO8601 string."
          >
            <DebounceInput
              style={{ flex: 1 }}
              className={Classes.INPUT}
              debounceTimeout={500}
              value={get(query, "finished_at_until", "")}
              onChange={(evt: React.ChangeEvent<HTMLInputElement>) => {
                updateFilter("finished_at_until", evt.target.value)
              }}
            />
          </FormGroup>
        </ListFiltersDropdown>
        <Pagination
          updatePage={updatePage}
          currentPage={currentPage}
          isLoading={isLoading}
          pageSize={PAGE_SIZE}
          numItems={data ? data.total : 0}
        />
      </div>
      {content}
    </>
  )
}

const ConnectedTaskRuns: React.FunctionComponent<{ templateID: string }> = ({
  templateID,
}) => (
  <ListRequest<
    ListTemplateHistoryResponse,
    { templateID: string; params: ListTemplateHistoryParams }
  >
    requestFn={api.listTemplateHistoryByTemplateID}
    initialQuery={initialQuery}
    // @TODO: this function should be extracted and tested.
    getRequestArgs={params => ({
      templateID,
      params: {
        ...omit(params, "page"),
        ...pageToOffsetLimit({
          page: get(params, "page", 1),
          limit: PAGE_SIZE,
        }),
      },
    })}
  >
    {props => <TemplateHistoryTable {...props} />}
  </ListRequest>
)

export default ConnectedTaskRuns


================================================
FILE: ui/src/components/TemplateRunForm.tsx
================================================
import * as React from "react"
import { Formik, Form, FastField, Field } from "formik"
import * as Yup from "yup"
import { RouteComponentProps } from "react-router-dom"
import {
  FormGroup,
  Button,
  Intent,
  Spinner,
  Classes,
  RadioGroup,
  Radio,
  Collapse,
} from "@blueprintjs/core"
import api from "../api"
import {
  TemplateExecutionRequest,
  Run,
  ExecutionEngine,
  Template,
} from "../types"
import Request, {
  ChildProps as RequestChildProps,
  RequestStatus,
} from "./Request"
import EnvFieldArray from "./EnvFieldArray"
import ClusterSelect from "./ClusterSelect"
import { TemplateContext, TemplateCtx } from "./Template"
import Toaster from "./Toaster"
import ErrorCallout from "./ErrorCallout"
import FieldError from "./FieldError"
import NodeLifecycleSelect from "./NodeLifecycleSelect"
import * as helpers from "../helpers/runFormHelpers"
import { useSelector } from "react-redux"
import { RootState } from "../state/store"
import JSONSchemaForm, {
  FieldTemplateProps,
  UiSchema,
  ArrayFieldTemplateProps,
} from "react-jsonschema-form"

const getInitialValuesForTemplateRun = (): TemplateExecutionRequest => {
  return {
    template_payload: {},
    cluster: "",
    env: [],
    owner_id: "",
    memory: 512,
    cpu: 512,
    engine: ExecutionEngine.EKS,
  }
}

const validationSchema = Yup.object().shape({
  owner_id: Yup.string(),
  cluster: Yup.string().required("Required"),
  memory: Yup.number()
    .required("Required")
    .min(0),
  cpu: Yup.number()
    .required("Required")
    .min(512),
  env: Yup.array().of(
    Yup.object().shape({
      name: Yup.string().required(),
      value: Yup.string().required(),
    })
  ),
  engine: Yup.string()
    .matches(/(eks|ecs)/)
    .required("A valid engine type of ecs or eks must be set."),
  node_lifecycle: Yup.string().matches(/(spot|ondemand)/),
  template_payload: Yup.object().required("template_payload is required"),
})

type Props = RequestChildProps<
  Run,
  { templateID: string; data: TemplateExecutionRequest }
> & {
  templateID: string
  initialValues: TemplateExecutionRequest
  template: Template
}

const FieldTemplate: React.FC<FieldTemplateProps> = props => {
  return (
    <FormGroup
      label={props.label}
      helperText={props.description}
      labelInfo={props.required ? "(Required)" : ""}
    >
      {props.children}
    </FormGroup>
  )
}

const ArrayFieldTemplate: React.FC<ArrayFieldTemplateProps> = props => {
  return (
    <div>
      {props.items.map((element, i) =>
        React.cloneElement(element.children, { key: i })
      )}
      {props.canAdd && (
        <Button type="button" onClick={props.onAddClick} icon="plus" fill>
          Add {props.title}
        </Button>
      )}
    </div>
  )
}

class RunForm extends React.Component<Props> {
  private FORMIK_REF = React.createRef<Formik<TemplateExecutionRequest>>()

  // Note: this method is a bit hacky as we have two form elements - Formik (F)
  // and JSONSchemaForm (J). F does not have a submit button, J does. When J's
  // submit button is clicked, this method is called. We get the values of the
  // F form via the `FORMIK_REF` ref binding. Then we take the J form's values
  // and shove them into F form's `template_payload` field. This request is
  // then sent to the server.
  onSubmit(jsonschemaForm: any) {
    if (this.FORMIK_REF.current) {
      const formikValues = this.FORMIK_REF.current.state.values
      formikValues["template_payload"] = jsonschemaForm
      this.props.request({
        templateID: this.props.templateID,
        data: formikValues,
      })
    }
  }

  render() {
    const {
      initialValues,
      request,
      requestStatus,
      isLoading,
      error,
      templateID,
      template,
    } = this.props

    return (
      <div className="flotilla-form-container">
        <Formik<TemplateExecutionRequest>
          ref={this.FORMIK_REF}
          isInitialValid={(values: any) =>
            validationSchema.isValidSync(values.initialValues)
          }
          initialValues={initialValues}
          validationSchema={validationSchema}
          onSubmit={data => {}}
        >
          {({ errors, values, setFieldValue, isValid, ...rest }) => {
            const getEngine = (): ExecutionEngine => values.engine
            return (
              <Form>
                {requestStatus === RequestStatus.ERROR && error && (
                  <ErrorCallout error={error} />
                )}
                {/* Owner ID Field */}
                <FormGroup
                  label={helpers.ownerIdFieldSpec.label}
                  helperText={helpers.ownerIdFieldSpec.description}
                >
                  <FastField
                    name={helpers.ownerIdFieldSpec.name}
                    value={values.owner_id}
                    className={Classes.INPUT}
                  />
                  {errors.owner_id && (
                    <FieldError>{errors.owner_id}</FieldError>
                  )}
                </FormGroup>
                <div className="flotilla-form-section-divider" />
                {/* Engine Type Field */}
                <RadioGroup
                  inline
                  label="Engine Type"
                  onChange={(evt: React.FormEvent<HTMLInputElement>) => {
                    setFieldValue("engine", evt.currentTarget.value)

                    if (evt.currentTarget.value === ExecutionEngine.EKS) {
                      setFieldValue(
                        "cluster",
                        process.env.REACT_APP_EKS_CLUSTER_NAME || ""
                      )
                    } else if (getEngine() === ExecutionEngine.EKS) {
                      setFieldValue("cluster", "")
                    }
                  }}
                  selectedValue={values.engine}
                >
                  <Radio label="EKS" value={ExecutionEngine.EKS} />
                  <Radio label="ECS" value={ExecutionEngine.ECS} />
                </RadioGroup>
                <div className="flotilla-form-section-divider" />

                {/*
                Cluster Field. Note: this is a "Field" rather than a
                "FastField" as it needs to re-render when value.engine is
                updated.
              */}
                {getEngine() !== ExecutionEngine.EKS && (
                  <FormGroup
                    label="Cluster"
                    helperText="Select a cluster for this task to execute on."
                  >
                    <Field
                      name="cluster"
                      component={ClusterSelect}
                      value={values.cluster}
                      onChange={(value: string) => {
                        setFieldValue("cluster", value)
                      }}
                    />
                    {errors.cluster && (
                      <FieldError>{errors.cluster}</FieldError>
                    )}
                  </FormGroup>
                )}

                {/* CPU Field */}
                <FormGroup
                  label={helpers.cpuFieldSpec.label}
                  helperText={helpers.cpuFieldSpec.description}
                >
                  <FastField
                    type="number"
                    name={helpers.cpuFieldSpec.name}
                    className={Classes.INPUT}
                    min="512"
                  />
                  {errors.cpu && <FieldError>{errors.cpu}</FieldError>}
                </FormGroup>

                {/* Memory Field */}
                <FormGroup
                  label={helpers.memoryFieldSpec.label}
                  helperText={helpers.memoryFieldSpec.description}
                >
                  <FastField
                    type="number"
                    name={helpers.memoryFieldSpec.name}
                    className={Classes.INPUT}
                  />
                  {errors.memory && <FieldError>{errors.memory}</FieldError>}
                </FormGroup>
                <div className="flotilla-form-section-divider" />
                {/* Node Lifecycle Field */}
                <FormGroup
                  label="Node Lifecycle"
                  helperText="This field is only applicable to tasks running on EKS. For more information, please view this document: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-instances.html"
                >
                  <Field
                    name="node_lifecycle"
                    component={NodeLifecycleSelect}
                    value={values.node_lifecycle}
                    onChange={(value: string) => {
                      setFieldValue("node_lifecycle", value)
                    }}
                    isDisabled={getEngine() !== ExecutionEngine.EKS}
                  />
                  {errors.node_lifecycle && (
                    <FieldError>{errors.node_lifecycle}</FieldError>
                  )}
                </FormGroup>
                <div className="flotilla-form-section-divider" />
                <EnvFieldArray />
              </Form>
            )
          }}
        </Formik>
        <div className="flotilla-form-section-divider" />
        <JSONSchemaForm
          schema={template.schema}
          onSubmit={({ formData }) => {
            this.onSubmit(formData)
          }}
          onError={() => console.log("errors")}
          FieldTemplate={FieldTemplate}
          ArrayFieldTemplate={ArrayFieldTemplate}
          widgets={{
            BaseInput: props => {
              return (
                <input
                  className="bp3-input"
                  value={props.value}
                  required={props.required}
                  onChange={evt => {
                    props.onChange(evt.target.value)
                  }}
                />
              )
            },
          }}
        >
          <Button
            intent={Intent.PRIMARY}
            type="submit"
            disabled={isLoading}
            style={{ marginTop: 24 }}
            large
            fill
          >
            Submit
          </Button>
        </JSONSchemaForm>
      </div>
    )
  }
}

const Connected: React.FunctionComponent<RouteComponentProps> = ({
  history,
}) => {
  return (
    <Request<Run, { templateID: string; data: TemplateExecutionRequest }>
      requestFn={api.runTemplate}
      shouldRequestOnMount={false}
      onSuccess={(data: Run) => {
        Toaster.show({
          message: `Run ${data.run_id} submitted successfully!`,
          intent: Intent.SUCCESS,
        })
        history.push(`/runs/${data.run_id}`)
      }}
      onFailure={() => {
        Toaster.show({
          message: "An error occurred.",
          intent: Intent.DANGER,
        })
      }}
    >
      {requestProps => (
        <TemplateContext.Consumer>
          {(ctx: TemplateCtx) => {
            switch (ctx.requestStatus) {
              case RequestStatus.ERROR:
                return <ErrorCallout error={ctx.error} />
              case RequestStatus.READY:
                if (ctx.data) {
                  const initialValues: TemplateExecutionRequest = getInitialValuesForTemplateRun()
                  return (
                    <RunForm
                      templateID={ctx.templateID}
                      initialValues={initialValues}
                      template={ctx.data}
                      {...requestProps}
                    />
                  )
                }
                break
              case RequestStatus.NOT_READY:
              default:
                return <Spinner />
            }
          }}
        </TemplateContext.Consumer>
      )}
    </Request>
  )
}

export default Connected


================================================
FILE: ui/src/components/Templates.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { get, omit } from "lodash"
import { Spinner, Callout } from "@blueprintjs/core"
import ListRequest, { ChildProps as ListRequestChildProps } from "./ListRequest"
import api from "../api"
import { ListTemplateParams, ListTemplateResponse, SortOrder } from "../types"
import pageToOffsetLimit from "../helpers/pageToOffsetLimit"
import Pagination from "./Pagination"
import ViewHeader from "./ViewHeader"
import { PAGE_SIZE } from "../constants"
import { RequestStatus } from "./Request"
import ErrorCallout from "./ErrorCallout"

export const initialQuery = {
  page: 1,
  sort_by: "template_name",
  order: SortOrder.ASC,
}

export type Props = ListRequestChildProps<
  ListTemplateResponse,
  { params: ListTemplateParams }
>

export const Templates: React.FunctionComponent<Props> = props => {
  const {
    data,
    updatePage,
    currentPage,
    isLoading,
    requestStatus,
    error,
  } = props

  let content: React.ReactNode

  switch (requestStatus) {
    case RequestStatus.ERROR:
      content = <ErrorCallout error={error} />
      break
    case RequestStatus.READY:
      if (data) {
        content = (
          <div className="flotilla-templates-container">
            {data.templates.map(t => (
              <Link
                className="flotilla-template-container"
                key={t.template_id}
                to={`/templates/${t.template_id}`}
              >
                <img
                  src={t.avatar_uri || ""}
                  width={36}
                  height={36}
                  alt="template-logo"
                />
                <div style={{ marginTop: 8 }}>
                  {t.template_name} v{t.version}
                </div>
              </Link>
            ))}
          </div>
        )
      } else {
        content = (
          <Callout>
            No templates found! Please contact your nearest Flotilla customer
            support agent for assistance.
          </Callout>
        )
      }
      break
    case RequestStatus.NOT_READY:
    default:
      content = <Spinner />
      break
  }

  return (
    <>
      <ViewHeader breadcrumbs={[{ text: "Templates", href: "/templates" }]} />
      <div className="flotilla-list-utils-container">
        <Pagination
          updatePage={updatePage}
          currentPage={currentPage}
          isLoading={isLoading}
          pageSize={PAGE_SIZE}
          numItems={data ? data.total : 0}
        />
      </div>
      {content}
    </>
  )
}

const ConnectedTasks: React.FunctionComponent = () => (
  <ListRequest<ListTemplateResponse, { params: ListTemplateParams }>
    requestFn={api.listTemplates}
    initialQuery={initialQuery}
    getRequestArgs={params => ({
      params: {
        ...omit(params, "page"),
        ...pageToOffsetLimit({
          page: get(params, "page", 1),
          limit: PAGE_SIZE,
        }),
      },
    })}
  >
    {props => <Templates {...props} />}
  </ListRequest>
)

export default ConnectedTasks


================================================
FILE: ui/src/components/Toaster.ts
================================================
import { Position, Toaster } from "@blueprintjs/core"

export default Toaster.create({
  position: Position.BOTTOM_RIGHT,
})


================================================
FILE: ui/src/components/Toggler.tsx
================================================
import * as React from "react"

type Props = {
  children: (props: ChildProps) => React.ReactNode
}

type State = {
  isVisible: boolean
}

type ChildProps = {
  isVisible: boolean
  toggleVisibility: () => void
}

class Toggler extends React.Component<Props, State> {
  state = {
    isVisible: true,
  }

  toggleVisibility() {
    this.setState(prev => ({ isVisible: !prev.isVisible }))
  }

  getChildProps(): ChildProps {
    return {
      isVisible: this.state.isVisible,
      toggleVisibility: this.toggleVisibility.bind(this),
    }
  }

  render() {
    return this.props.children(this.getChildProps())
  }
}

export default Toggler


================================================
FILE: ui/src/components/UpdateTaskForm.tsx
================================================
import * as React from "react"
import { RouteComponentProps } from "react-router-dom"
import { Button, Intent, Spinner } from "@blueprintjs/core"
import { Formik, Form, FormikProps } from "formik"
import { get } from "lodash"
import * as Yup from "yup"
import api from "../api"
import { UpdateTaskPayload, Task } from "../types"
import Request, {
  ChildProps as RequestChildProps,
  RequestStatus,
} from "./Request"
import BaseTaskForm, {
  validationSchema as baseTaskFormValidationSchema,
} from "./BaseTaskForm"
import { TaskContext, TaskCtx } from "./Task"
import ErrorCallout from "./ErrorCallout"
import Toaster from "./Toaster"

export const validationSchema = Yup.object().shape(baseTaskFormValidationSchema)

export type Props = Pick<
  FormikProps<UpdateTaskPayload>,
  "values" | "setFieldValue" | "isValid" | "errors"
> &
  Pick<
    RequestChildProps<Task, { data: UpdateTaskPayload }>,
    "requestStatus" | "error" | "isLoading"
  >

export const UpdateTaskForm: React.FunctionComponent<Props> = ({
  values,
  isValid,
  setFieldValue,
  requestStatus,
  error,
  isLoading,
  errors,
}) => (
  <Form className="flotilla-form-container">
    {requestStatus === RequestStatus.ERROR && error && (
      <ErrorCallout error={error} />
    )}
    <BaseTaskForm
      setFieldValue={setFieldValue}
      values={values}
      errors={errors}
    />
    <Button
      id="submitButton"
      type="submit"
      disabled={isLoading || isValid === false}
      intent={Intent.PRIMARY}
    >
      Submit
    </Button>
  </Form>
)

export type ConnectedProps = RouteComponentProps & {
  definitionID: string
}

const Connected: React.FunctionComponent<ConnectedProps> = props => (
  <TaskContext.Consumer>
    {(ctx: TaskCtx) => {
      switch (ctx.requestStatus) {
        case RequestStatus.ERROR:
          return <ErrorCallout error={ctx.error} />
        case RequestStatus.READY:
          if (ctx.data) {
            const initialValues: UpdateTaskPayload = {
              env: get(ctx.data, "env", []),
              image: get(ctx.data, "image", ""),
              group_name: get(ctx.data, "group_name", ""),
              memory: get(ctx.data, "memory", 0),
              cpu: get(ctx.data, "cpu", 0),
              command: get(ctx.data, "command", ""),
              tags: get(ctx.data, "tags", []),
            }
            return (
              <Request<Task, { definitionID: string; data: UpdateTaskPayload }>
                requestFn={api.updateTask}
                shouldRequestOnMount={false}
                onSuccess={(data: Task) => {
                  Toaster.show({
                    message: `Task ${data.alias} updated successfully!`,
                    intent: Intent.SUCCESS,
                  })
                  // Return to task page, re-request data.
                  ctx.request({ definitionID: ctx.definitionID })
                  props.history.push(`/tasks/${ctx.definitionID}`)
                }}
                onFailure={() => {
                  Toaster.show({
                    message: "An error occurred.",
                    intent: Intent.DANGER,
                  })
                }}
              >
                {requestProps => (
                  <Formik
                    initialValues={initialValues}
                    validationSchema={validationSchema}
                    onSubmit={data => {
                      requestProps.request({
                        data,
                        definitionID: ctx.definitionID,
                      })
                    }}
                  >
                    {({ values, setFieldValue, isValid, errors }) => (
                      <UpdateTaskForm
                        values={values}
                        setFieldValue={setFieldValue}
                        isValid={isValid}
                        requestStatus={requestProps.requestStatus}
                        isLoading={requestProps.isLoading}
                        error={requestProps.error}
                        errors={errors}
                      />
                    )}
                  </Formik>
                )}
              </Request>
            )
          }
          break
        case RequestStatus.NOT_READY:
        default:
          return <Spinner />
      }
    }}
  </TaskContext.Consumer>
)

export default Connected


================================================
FILE: ui/src/components/ViewHeader.tsx
================================================
import * as React from "react"
import { Link } from "react-router-dom"
import { Breadcrumbs, IBreadcrumbProps, Classes } from "@blueprintjs/core"

type Props = {
  breadcrumbs: IBreadcrumbProps[]
  buttons?: React.ReactNode
  leftButton?: React.ReactNode
}

const ViewHeader: React.FunctionComponent<Props> = ({
  breadcrumbs,
  buttons,
  leftButton,
}) => (
  <div className="flotilla-view-header-container">
    <div style={{ display: "flex" }}>
      {leftButton && leftButton}
      <Breadcrumbs
        items={breadcrumbs}
        breadcrumbRenderer={(props: IBreadcrumbProps) => (
          <Link to={props.href ? props.href : "/"}>{props.text}</Link>
        )}
        className={Classes.TEXT_LARGE}
      />
    </div>
    {buttons}
  </div>
)

export default ViewHeader


================================================
FILE: ui/src/components/__tests__/BaseTaskForm.spec.tsx
================================================
import * as React from "react"
import { mount } from "enzyme"
import { Formik, FastField } from "formik"
import { FormGroup } from "@blueprintjs/core"
import {
  groupNameFieldSpec,
  imageFieldSpec,
  commandFieldSpec,
  memoryFieldSpec,
  tagsFieldSpec,
  envFieldSpec,
  cpuFieldSpec,
} from "../../helpers/taskFormHelpers"
import BaseTaskForm from "../BaseTaskForm"
import EnvFieldArray from "../EnvFieldArray"
import { Env } from "../../types"
import FieldError from "../FieldError"

jest.mock("../../helpers/FlotillaClient")

describe("BaseTaskForm", () => {
  it("renders the correct fields", () => {
    const groupNameInitialValue = "my_group_name"
    const imageInitialValue = "my_image"
    const commandInitialValue = "my_command"
    const memoryInitialValue = 1024
    const cpuInitialValue = 512
    const tagsInitialValue = ["a", "b", "c"]
    const envInitialValue: Env[] = []
    const wrapper = mount(
      <Formik
        initialValues={{
          [groupNameFieldSpec.name]: groupNameInitialValue,
          [imageFieldSpec.name]: imageInitialValue,
          [commandFieldSpec.name]: commandInitialValue,
          [memoryFieldSpec.name]: memoryInitialValue,
          [cpuFieldSpec.name]: cpuInitialValue,
          [tagsFieldSpec.name]: tagsInitialValue,
          [envFieldSpec.name]: envInitialValue,
        }}
        onSubmit={jest.fn()}
      >
        {({ values, setFieldValue, errors }) => {
          return (
            <BaseTaskForm
              values={values}
              setFieldValue={setFieldValue}
              errors={errors}
            />
          )
        }}
      </Formik>
    )

    const formGroups = wrapper.find(FormGroup)
    const fields = wrapper.find(FastField)

    // Ensure that components have the correct lengths.
    expect(formGroups).toHaveLength(6)
    expect(fields).toHaveLength(6)
    expect(wrapper.find(EnvFieldArray)).toHaveLength(1)
    expect(wrapper.find(FieldError)).toHaveLength(0)

    // Group name field.
    const groupNameFieldIndex = 0
    expect(formGroups.at(groupNameFieldIndex).props().label).toEqual(
      groupNameFieldSpec.label
    )
    expect(formGroups.at(groupNameFieldIndex).props().helperText).toEqual(
      groupNameFieldSpec.description
    )
    expect(fields.at(groupNameFieldIndex).props().name).toEqual(
      groupNameFieldSpec.name
    )
    expect(fields.at(groupNameFieldIndex).props().value).toEqual(
      groupNameInitialValue
    )

    // Image field.
    const imageFieldIndex = 1
    expect(formGroups.at(imageFieldIndex).props().label).toEqual(
      imageFieldSpec.label
    )
    expect(formGroups.at(imageFieldIndex).props().helperText).toEqual(
      imageFieldSpec.description
    )
    expect(fields.at(imageFieldIndex).props().name).toEqual(imageFieldSpec.name)
    expect(
      fields
        .at(imageFieldIndex)
        .find("input")
        .props().value
    ).toEqual(imageInitialValue)

    // Command field.
    const commandFieldIndex = 2
    expect(formGroups.at(commandFieldIndex).props().label).toEqual(
      commandFieldSpec.label
    )
    expect(formGroups.at(commandFieldIndex).props().helperText).toEqual(
      commandFieldSpec.description
    )
    expect(fields.at(commandFieldIndex).props().name).toEqual(
      commandFieldSpec.name
    )
    expect(
      fields
        .at(commandFieldIndex)
        .find("textarea")
        .props().value
    ).toEqual(commandInitialValue)

    // CPU field.
    const cpuFieldIndex = 3
    expect(formGroups.at(cpuFieldIndex).props().label).toEqual(
      cpuFieldSpec.label
    )
    expect(formGroups.at(cpuFieldIndex).props().helperText).toEqual(
      cpuFieldSpec.description
    )
    expect(fields.at(cpuFieldIndex).props().name).toEqual(cpuFieldSpec.name)
    expect(
      fields
        .at(cpuFieldIndex)
        .find("input")
        .props().value
    ).toEqual(cpuInitialValue)

    // Memory field.
    const memoryFieldIndex = 4
    expect(formGroups.at(memoryFieldIndex).props().label).toEqual(
      memoryFieldSpec.label
    )
    expect(formGroups.at(memoryFieldIndex).props().helperText).toEqual(
      memoryFieldSpec.description
    )
    expect(fields.at(memoryFieldIndex).props().name).toEqual(
      memoryFieldSpec.name
    )
    expect(
      fields
        .at(memoryFieldIndex)
        .find("input")
        .props().value
    ).toEqual(memoryInitialValue)

    // Tags field.
    const tagsFieldIndex = 5
    expect(formGroups.at(tagsFieldIndex).props().label).toEqual(
      tagsFieldSpec.label
    )
    expect(formGroups.at(tagsFieldIndex).props().helperText).toEqual(
      tagsFieldSpec.description
    )
    expect(fields.at(tagsFieldIndex).props().name).toEqual(tagsFieldSpec.name)
    expect(fields.at(tagsFieldIndex).props().value).toEqual(tagsInitialValue)
  })
})


================================================
FILE: ui/src/components/__tests__/ClusterSelect.spec.tsx
================================================
import React from "react"
import { mount } from "enzyme"
import Creatable from "react-select/lib/Creatable"
import Connected, { ClusterSelect } from "../ClusterSelect"
import api from "../../api"

jest.mock("../../helpers/FlotillaClient")

describe("ClusterSelect", () => {
  describe("Unconnected", () => {
    it("renders a Creatable component", () => {
      const props = {
        options: [
          { label: "a", value: "a" },
          { label: "b", value: "b" },
          { label: "c", value: "c" },
        ],
        value: "a",
        onChange: jest.fn(),
      }
      const wrapper = mount(<ClusterSelect {...props} isDisabled={false} />)
      const select = wrapper.find(Creatable)

      // Ensure <Select> component is rendered.
      expect(select).toHaveLength(1)

      // Ensure <Select> component has correct `options` prop.
      expect(select.prop("options")).toEqual(props.options)

      // Ensure <Select> component has correct `value` prop.
      expect(select.prop("value")).toEqual({
        label: props.value,
        value: props.value,
      })

      // Ensure props.onChange is called when <Select>'s onChange prop is
      // called.
      expect(props.onChange).toHaveBeenCalledTimes(0)
      const onChangeProp = select.prop("onChange")
      if (onChangeProp) {
        onChangeProp({ label: "b", value: "b" }, { action: "select-option" })
      }
      expect(props.onChange).toHaveBeenCalledTimes(1)
    })
  })

  describe("Connected", () => {
    beforeEach(() => {
      jest.clearAllMocks()
    })

    it("calls api.listClusters", () => {
      expect(api.listClusters).toHaveBeenCalledTimes(0)
      mount(<Connected value="" onChange={jest.fn()} isDisabled={false} />)
      expect(api.listClusters).toHaveBeenCalledTimes(1)
    })

    it("sends an empty array to the select if the server returns null", () => {
      const mk = jest.spyOn(api, "listClusters")
      mk.mockImplementationOnce(
        () =>
          new Promise(resolve => {
            resolve({
              offset: 0,
              limit: 10,
              clusters: null,
              total: 0,
            })
          })
      )
      const wrapper = mount(
        <Connected value="" onChange={jest.fn()} isDisabled={false} />
      )
      const unconnected = wrapper.find(ClusterSelect)
      expect(unconnected).toHaveLength(1)
      expect(unconnected.prop("options")).toEqual([])
    })
  })
})


================================================
FILE: ui/src/components/__tests__/CreateTaskForm.spec.tsx
================================================
import * as React from "react"
import flushPromiseQueue from "flush-promises"
import { mount, ReactWrapper } from "enzyme"
import CreateTaskForm, {
  ConnectedProps as Props,
  CreateTaskForm as UnconnectedCreateTaskForm,
} from "../CreateTaskForm"
import api from "../../api"
import { Formik } from "formik"
import {
  createMockRouteComponentProps,
  mockFormikActions,
} from "../../helpers/testHelpers"
import Request from "../Request"
import BaseTaskForm from "../BaseTaskForm"

jest.mock("../../helpers/FlotillaClient")

describe("CreateTaskForm", () => {
  // Instantiate mock route component props object.
  const mockRouteComponentProps = createMockRouteComponentProps({
    path: "/tasks/create",
    url: "/tasks/create",
    params: {},
  })

  // Instantiate props object.
  const props: Props = {
    ...mockRouteComponentProps,
    history: {
      ...mockRouteComponentProps.history,
      push: jest.fn(),
    },
    initialValues: {
      env: [{ name: "foo", value: "bar" }],
      image: "my_image",
      group_name: "my_group",
      alias: "my_alias",
      memory: 1024,
      command: "my_command",
      tags: ["a", "b"],
      cpu: 512,
    },
    onSuccess: jest.fn(),
  }

  let wrapper: ReactWrapper

  beforeEach(() => {
    jest.clearAllMocks()
    wrapper = mount(<CreateTaskForm {...props} />)
  })

  it("renders the correct components", () => {
    // Note: there will be more than 1 Request component due to those wrapping
    // GroupNameSelect, etc.
    expect(wrapper.find(Request).length).toBeGreaterThanOrEqual(1)
    expect(
      wrapper
        .find(Request)
        .at(0)
        .props().requestFn
    ).toBe(api.createTask)
    expect(
      wrapper
        .find(Request)
        .at(0)
        .props().shouldRequestOnMount
    ).toEqual(false)

    expect(wrapper.find(Formik)).toHaveLength(1)
    expect(wrapper.find(UnconnectedCreateTaskForm)).toHaveLength(1)
    expect(wrapper.find(BaseTaskForm)).toHaveLength(1)
    expect(wrapper.find('input[name="alias"]')).toHaveLength(1)
    expect(wrapper.find("button#submitButton")).toHaveLength(1)
  })

  it("calls api.createTask when submitted", async () => {
    // At this point, we don't expect any functions to have been called.
    expect(api.createTask).toHaveBeenCalledTimes(0)
    expect(props.onSuccess).toHaveBeenCalledTimes(0)
    expect(props.history.push).toHaveBeenCalledTimes(0)

    // Manually invoke Formik's onSubmit prop.
    wrapper
      .find(Formik)
      .props()
      .onSubmit(
        {
          env: [{ name: "foo", value: "bar" }],
          image: "my_image",
          group_name: "my_group",
          alias: "my_alias",
          memory: 1024,
          command: "my_command",
          tags: ["a", "b"],
        },
        mockFormikActions
      )

    // Expect FlotillaClient's `createTask` method to be invoked once.
    expect(api.createTask).toHaveBeenCalledTimes(1)

    // Flush the promise queue.
    await flushPromiseQueue()

    // Expect `onSuccess` and `push` to be invoked once.
    expect(props.onSuccess).toHaveBeenCalledTimes(1)
    expect(props.history.push).toHaveBeenCalledTimes(1)
  })
})


================================================
FILE: ui/src/components/__tests__/DeleteTaskButton.spec.tsx
================================================
import * as React from "react"
import { MemoryRouter } from "react-router-dom"
import { mount } from "enzyme"
import ConnectedDeleteTaskButton, {
  DeleteTaskButton,
  Props,
} from "../DeleteTaskButton"
import Request, { RequestStatus } from "../Request"
import api from "../../api"

jest.mock("../../helpers/FlotillaClient")

const defaultProps: Props = {
  requestStatus: RequestStatus.NOT_READY,
  data: null,
  isLoading: false,
  error: null,
  request: jest.fn(),
  definitionID: "definitionID",
  receivedAt: new Date(),
}

describe("DeleteTaskButton", () => {
  it("calls props.request with the correct args when this.handleSubmitClick is called", () => {
    const r = jest.fn()
    const wrapper = mount<DeleteTaskButton>(
      <DeleteTaskButton {...defaultProps} request={r} />
    )
    expect(r).toHaveBeenCalledTimes(0)
    wrapper.instance().handleSubmitClick()
    expect(r).toHaveBeenCalledTimes(1)
    expect(r).toHaveBeenCalledWith({
      definitionID: wrapper.prop("definitionID"),
    })
  })

  it("provides api.deleteTask as the requestFn", () => {
    // Note: this is testing the connected component so it must be wrapper in
    // a MemoryRouter component.
    const wrapper = mount(
      <MemoryRouter>
        <ConnectedDeleteTaskButton definitionID="id" />
      </MemoryRouter>
    )
    expect(wrapper.find(Request).prop("requestFn")).toEqual(api.deleteTask)
  })
})


================================================
FILE: ui/src/components/__tests__/EnvFieldArray.spec.tsx
================================================
import * as React from "react"
import { mount, ReactWrapper } from "enzyme"
import { Formik, FastField } from "formik"
import { Button } from "@blueprintjs/core"
import { EnvFieldArray } from "../EnvFieldArray"
import { Env } from "../../types"

describe("EnvFieldArray", () => {
  let wrapper: ReactWrapper
  const values: Env[] = [
    { name: "a", value: "b" },
    { name: "c", value: "d" },
    { name: "e", value: "f" },
  ]
  const push = jest.fn()
  const remove = jest.fn()

  beforeAll(() => {
    wrapper = mount(
      <Formik initialValues={{ env: values }} onSubmit={jest.fn()}>
        {() => (
          <EnvFieldArray
            values={values}
            push={push}
            remove={remove}
            errors={{}}
          />
        )}
      </Formik>
    )
  })

  it("renders props.values", () => {
    const items = wrapper.find(".flotilla-env-field-array-item")
    expect(items).toHaveLength(values.length)
    for (let i = 0; i < items.length; i++) {
      const item: ReactWrapper = items.at(i)
      expect(item.find(FastField)).toHaveLength(2)
      expect(item.find("button")).toHaveLength(1)
    }
  })

  it("calls props.remove with the index of the item when clicked", () => {
    // Get the second item
    const index = 1
    const second = wrapper.find(".flotilla-env-field-array-item").at(index)
    expect(remove).toHaveBeenCalledTimes(0)
    second.find("button").simulate("click")
    expect(remove).toHaveBeenCalledTimes(1)
    expect(remove).toHaveBeenCalledWith(index)
  })

  it("calls props.push with an empty env struct when the add button is clicked", () => {
    const addButton = wrapper
      .find(Button)
      .filterWhere(r => r.hasClass("flotilla-env-field-array-add-button"))
    expect(push).toHaveBeenCalledTimes(0)
    addButton.simulate("click")
    expect(push).toHaveBeenCalledTimes(1)
    expect(push).toHaveBeenCalledWith({ name: "", value: "" })
  })
})


================================================
FILE: ui/src/components/__tests__/GroupNameSelect.spec.tsx
================================================
import React from "react"
import { mount } from "enzyme"
import Creatable from "react-select/lib/Creatable"
import Connected, { GroupNameSelect } from "../GroupNameSelect"
import api from "../../api"

jest.mock("../../helpers/FlotillaClient")

describe("GroupNameSelect", () => {
  beforeEach(() => {
    jest.clearAllMocks()
  })

  it("renders a Select component", () => {
    const props = {
      options: [
        { label: "a", value: "a" },
        { label: "b", value: "b" },
        { label: "c", value: "c" },
      ],
      value: "a",
      onChange: jest.fn(),
    }
    const wrapper = mount(<GroupNameSelect {...props} isDisabled={false} />)
    const select = wrapper.find(Creatable)

    // Ensure <Select> component is rendered.
    expect(select).toHaveLength(1)

    // Ensure <Select> component has correct `options` prop.
    expect(select.prop("options")).toEqual(props.options)

    // Ensure <Select> component has correct `value` prop.
    expect(select.prop("value")).toEqual({
      label: props.value,
      value: props.value,
    })

    // Ensure props.onChange is called when <Select>'s onChange prop is
    // called.
    expect(props.onChange).toHaveBeenCalledTimes(0)
    const onChangeProp = select.prop("onChange")
    if (onChangeProp) {
      onChangeProp({ label: "b", value: "b" }, { action: "select-option" })
    }
    expect(props.onChange).toHaveBeenCalledTimes(1)
  })

  it("calls api.listGroups", () => {
    expect(api.listGroups).toHaveBeenCalledTimes(0)
    mount(<Connected value="" onChange={jest.fn()} isDisabled={false} />)
    expect(api.listGroups).toHaveBeenCalledTimes(1)
  })
})


================================================
FILE: ui/src/components/__tests__/ListRequest.spec.tsx
================================================
import * as React from "react"
import { mount, ReactWrapper } from "enzyme"
import { ListRequest, Props, ChildProps } from "../ListRequest"
import { RequestStatus } from "../Request"
import { SortOrder } from "../../types"

const DEFAULT_PROPS: Props<any, any> = {
  requestStatus: RequestStatus.NOT_READY,
  data: null,
  isLoading: false,
  error: null,
  query: {},
  request: (args: any) => {},
  setQuery: (query: object, shouldReplace?: boolean) => {},
  initialQuery: {},
  getRequestArgs: (query: object) => {},
  children: (props: ChildProps<any, any>) => <span />,
  receivedAt: new Date(),
}

describe("ListRequest", () => {
  it("calls props.setQuery w/ props.initialQuery if props.query is empty on componentDidMount", () => {
    const realReq = ListRequest.prototype.request
    ListRequest.prototype.request = jest.fn()
    const setQuery = jest.fn()
    const initialQuery = { foo: "bar" }

    expect(setQuery).toHaveBeenCalledTimes(0)

    mount(
      <ListRequest
        {...DEFAULT_PROPS}
        initialQuery={initialQuery}
        query={{}}
        setQuery={setQuery}
      >
        {() => <span />}
      </ListRequest>
    )

    expect(setQuery).toHaveBeenCalledTimes(1)
    expect(setQuery).toHaveBeenCalledWith(initialQuery, true)
    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(0)
    ListRequest.prototype.request = realReq
  })

  it("calls this.request if props.query is not empty on componentDidMount", () => {
    const realReq = ListRequest.prototype.request
    ListRequest.prototype.request = jest.fn()
    const setQuery = jest.fn()

    expect(setQuery).toHaveBeenCalledTimes(0)
    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(0)

    const wrapper = mount(
      <ListRequest
        {...DEFAULT_PROPS}
        query={{ foo: "bar" }}
        setQuery={setQuery}
      >
        {() => <span />}
      </ListRequest>
    )

    expect(setQuery).toHaveBeenCalledTimes(0)
    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(1)
    ListRequest.prototype.request = realReq
  })

  it("calls this.request if prevProps.query and props.query are not equal on componentDidUpdate", () => {
    const realReq = ListRequest.prototype.request
    ListRequest.prototype.request = jest.fn()
    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(0)

    const wrapper = mount(
      <ListRequest {...DEFAULT_PROPS} query={{ foo: "bar" }}>
        {() => <span />}
      </ListRequest>
    )

    // Should have been called once when the component mounts.
    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(1)

    wrapper.setProps({ query: { foo: "not-bar" } })

    expect(ListRequest.prototype.request).toHaveBeenCalledTimes(2)

    ListRequest.prototype.request = realReq
  })

  it("calls props.request with the correct args", () => {
    const request = jest.fn()
    const getRequestArgs = jest.fn(q => q)
    const query = { foo: "bar" }

    const wrapper = mount<ListRequest<any, any>>(
      <ListRequest
        {...DEFAULT_PROPS}
        request={request}
        getRequestArgs={getRequestArgs}
        query={query}
      >
        {() => <span />}
      </ListRequest>
    )

    const inst = wrapper.instance()

    expect(request).toHaveBeenCalledTimes(1)

    inst.request()
    expect(request).toHaveBeenCalledTimes(2)
    expect(request).toHaveBeenCalledWith(getRequestArgs(query))
  })

  it("calls props.children with the correct args", () => {
    const realUpdateSort = ListRequest.prototype.updateSort
    const realUpdatePage = ListRequest.prototype.updatePage
    const realUpdateFilter = ListRequest.prototype.updateFilter
    ListRequest.prototype.updateSort = jest.fn()
    ListRequest.prototype.updatePage = jest.fn()
    ListRequest.prototype.updateFilter = jest.fn()

    const wrapper = mount<ListRequest<any, any>>(
      <ListRequest {...DEFAULT_PROPS}>
        {(props: ChildProps<any, any>) => (
          <span>
            <button
              id="filter-btn"
              onClick={() => {
                props.updateFilter("foo", "bar")
              }}
            />
            <button
              id="page-btn"
              onClick={() => {
                props.updatePage(10)
              }}
            />
            <button
              id="sort-btn"
              onClick={() => {
                props.updateSort("a")
              }}
            />
          </span>
        )}
      </ListRequest>
    )

    // Test sort
    expect(ListRequest.prototype.updateSort).toHaveBeenCalledTimes(0)
    const sortButton = wrapper.find("#sort-btn")
    sortButton.simulate("click")
    expect(ListRequest.prototype.updateSort).toHaveBeenCalledTimes(1)
    expect(ListRequest.prototype.updateSort).toHaveBeenCalledWith("a")

    // Test page
    expect(ListRequest.prototype.updateFilter).toHaveBeenCalledTimes(0)
    const filterButton = wrapper.find("#filter-btn")
    filterButton.simulate("click")
    expect(ListRequest.prototype.updateFilter).toHaveBeenCalledTimes(1)
    expect(ListRequest.prototype.updateFilter).toHaveBeenCalledWith(
      "foo",
      "bar"
    )

    // Test filter
    expect(ListRequest.prototype.updatePage).toHaveBeenCalledTimes(0)
    const pageButton = wrapper.find("#page-btn")
    pageButton.simulate("click")
    expect(ListRequest.prototype.updatePage).toHaveBeenCalledTimes(1)
    expect(ListRequest.prototype.updatePage).toHaveBeenCalledWith(10)

    ListRequest.prototype.updateSort = realUpdateSort
    ListRequest.prototype.updatePage = realUpdatePage
    ListRequest.prototype.updateFilter = realUpdateFilter
  })

  describe("query update methods", () => {
    const setQuery = jest.fn()
    let wrapper: ReactWrapper<any>
    let instance: any

    beforeEach(() => {
      wrapper = mount<ListRequest<any, any>>(
        <ListRequest {...DEFAULT_PROPS} setQuery={setQuery} query={{ a: 1 }}>
          {() => <span />}
        </ListRequest>
      )
      instance = wrapper.instance() as ListRequest<any, any>
    })

    afterEach(() => {
      setQuery.mockReset()
    })

    it("updateSort calls setQuery with the correct arguments", () => {
      // Note: we're manually setting the wrapper's query prop since we're
      // mocking setQuery and it won't actually update the query.
      expect(setQuery).toHaveBeenCalledTimes(0)
      instance.updateSort("x")
      expect(setQuery).toHaveBeenCalledTimes(1)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 1,
        sort_by: "x",
        order: SortOrder.ASC,
      })
      wrapper.setProps({ query: { sort_by: "x", order: SortOrder.ASC } })

      instance.updateSort("x")
      expect(setQuery).toHaveBeenCalledTimes(2)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 1,
        sort_by: "x",
        order: SortOrder.DESC,
      })
      wrapper.setProps({ query: { sort_by: "x", order: SortOrder.DESC } })

      instance.updateSort("x")
      expect(setQuery).toHaveBeenCalledTimes(3)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 1,
        sort_by: "x",
        order: SortOrder.ASC,
      })
      wrapper.setProps({ query: { sort_by: "x", order: SortOrder.ASC } })

      instance.updateSort("y")
      expect(setQuery).toHaveBeenCalledTimes(4)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 1,
        sort_by: "y",
        order: SortOrder.ASC,
      })
    })

    it("updatePage calls setQuery with the correct arguments", () => {
      expect(setQuery).toHaveBeenCalledTimes(0)
      instance.updatePage(5000)
      expect(setQuery).toHaveBeenCalledTimes(1)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 5000,
      })
    })

    it("updateFilter calls setQuery with the correct arguments", () => {
      expect(setQuery).toHaveBeenCalledTimes(0)
      instance.updateFilter("foo", "bar")
      expect(setQuery).toHaveBeenCalledTimes(1)
      expect(setQuery).toHaveBeenCalledWith({
        ...wrapper.prop("query"),
        page: 1,
        foo: "bar",
      })
    })
  })
})


================================================
FILE: ui/src/components/__tests__/LogProcessor.spec.tsx
================================================
import * as React from "react"
import { shallow } from "enzyme"
import { LogProcessor } from "../LogProcessor"

jest.mock("../../workers/index")

describe("LogProcessor", () => {
  it("calls processLogs upon mounting and if logs/width changes", () => {
    const process = LogProcessor.prototype.processLogs
    LogProcessor.prototype.processLogs = jest.fn()
    const wrapper = shallow(
      <LogProcessor logs="abc" width={100} height={100} />
    )
    expect(LogProcessor.prototype.processLogs).toHaveBeenCalledTimes(1)
    wrapper.setProps({ logs: "abcdefg" })
    expect(LogProcessor.prototype.processLogs).toHaveBeenCalledTimes(2)
    LogProcessor.prototype.processLogs = process
  })
})


================================================
FILE: ui/src/components/__tests__/LogVirtualized.spec.tsx
================================================
import * as React from "react"
import { mount, shallow } from "enzyme"
import { LogVirtualized, Props } from "../LogVirtualized"

const defaultProps: Props = {
  width: 100,
  height: 100,
  logs: ["a", "b", "c", "d"],
  shouldAutoscroll: true,
  dispatch: jest.fn(),
  hasRunFinished: false,
  hasLogs: true,
  settings: {
    USE_OPTIMIZED_LOG_RENDERER: true,
    SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW: true,
  },
}

describe("LogVirtualized", () => {
  it("scrolls to the most recent line upon mounting", () => {
    const scrollTo = LogVirtualized.prototype.scrollTo
    LogVirtualized.prototype.scrollTo = jest.fn()

    expect(LogVirtualized.prototype.scrollTo).toHaveBeenCalledTimes(0)

    // Mount LogVirtualized with shouldAutoscroll === true.
    shallow(<LogVirtualized {...defaultProps} />)

    expect(LogVirtualized.prototype.scrollTo).toHaveBeenCalledTimes(1)

    // Mount LogVirtualized with shouldAutoscroll === false.
    shallow(<LogVirtualized {...defaultProps} shouldAutoscroll={false} />)

    expect(LogVirtualized.prototype.scrollTo).toHaveBeenCalledTimes(1)
    LogVirtualized.prototype.scrollTo = scrollTo
  })

  it("calls this.handleCursorChange if state.searchCursor is updated", () => {
    const handleCursorChange = LogVirtualized.prototype.handleCursorChange
    LogVirtualized.prototype.handleCursorChange = jest.fn()
    expect(LogVirtualized.prototype.handleCursorChange).toHaveBeenCalledTimes(0)
    const wrapper = mount(<LogVirtualized {...defaultProps} />)
    wrapper.setState({ searchCursor: 10 })
    expect(LogVirtualized.prototype.handleCursorChange).toHaveBeenCalledTimes(1)
    LogVirtualized.prototype.handleCursorChange = handleCursorChange
  })

  it("scrolls to the most recent line if the number of lines is different", () => {
    const scrollTo = LogVirtualized.prototype.scrollTo
    LogVirtualized.prototype.scrollTo = jest.fn()
    const wrapper = mount(<LogVirtualized {...defaultProps} />)
    expect(LogVirtualized.prototype.scrollTo).toHaveBeenCalledTimes(1)
    wrapper.setProps({ logs: ["a", "b", "c", "d", "e", "f"] })
    expect(LogVirtualized.prototype.scrollTo).toHaveBeenCalledTimes(2)
    LogVirtualized.prototype.scrollTo = scrollTo
  })

  it("handles search correctly", () => {
    const logs = ["one two three", "four five six", "seven eight nine"]
    const wrapper = mount<LogVirtualized>(
      <LogVirtualized {...defaultProps} logs={logs} />
    )
    expect(wrapper.state().searchMatches).toEqual([])
    expect(wrapper.state().searchCursor).toEqual(0)
    let query = "s"
    wrapper.instance().search(query)
    expect(wrapper.state().searchMatches).toEqual([
      [1, logs[1].indexOf(query)],
      [2, logs[2].indexOf(query)],
    ])
    expect(wrapper.state().searchCursor).toEqual(0)

    query = "seven"
    wrapper.instance().search(query)
    expect(wrapper.state().searchMatches).toEqual([[2, logs[2].indexOf(query)]])
    expect(wrapper.state().searchCursor).toEqual(0)
  })

  it("handles cursor changes correctly", () => {
    const scrollTo = LogVirtualized.prototype.scrollTo
    LogVirtualized.prototype.scrollTo = jest.fn()
    const fn = LogVirtualized.prototype.scrollTo as jest.Mock
    const wrapper = mount<LogVirtualized>(<LogVirtualized {...defaultProps} />)
    const searchMatches: [number, number][] = [
      [0, 0],
      [1, 0],
      [2, 0],
      [3, 0],
    ]

    wrapper.setState({ searchMatches })

    let cursor = 1
    wrapper.setState({ searchCursor: cursor })
    expect(fn.mock.calls[fn.mock.calls.length - 1]).toEqual([
      searchMatches[cursor][0],
      "center",
    ])

    cursor = 2
    wrapper.setState({ searchCursor: cursor })
    expect(fn.mock.calls[fn.mock.calls.length - 1]).toEqual([
      searchMatches[cursor][0],
      "center",
    ])

    LogVirtualized.prototype.scrollTo = scrollTo
  })
})


================================================
FILE: ui/src/components/__tests__/LogVirtualizedSearch.spec.tsx
================================================
import * as React from "react"
import { mount, ReactWrapper } from "enzyme"
import { DebounceInput } from "react-debounce-input"
import { ButtonGroup, Button } from "@blueprintjs/core"
import LogVirtualizedSearch from "../LogVirtualizedSearch"

describe("LogVirtualizedSearch", () => {
  let wrapper: ReactWrapper
  const onChange = jest.fn()
  const onFocus = jest.fn()
  const onBlur = jest.fn()
  const onIncrement = jest.fn()
  const onDecrement = jest.fn()
  beforeAll(() => {
    wrapper = mount(
      <LogVirtualizedSearch
        onChange={onChange}
        onFocus={onFocus}
        onBlur={onBlur}
        onIncrement={onIncrement}
        onDecrement={onDecrement}
        inputRef={null}
        cursorIndex={0}
        totalMatches={0}
      />
    )
  })
  it("renders the correct components", () => {
    expect(
      wrapper.find(".flotilla-logs-virtualized-search-container")
    ).toHaveLength(1)
    expect(wrapper.find(DebounceInput)).toHaveLength(1)
    expect(wrapper.find(Button)).toHaveLength(2)
  })

  it("handles input events", () => {
    const input = wrapper.find(DebounceInput)
    expect(onFocus).toHaveBeenCalledTimes(0)
    expect(onBlur).toHaveBeenCalledTimes(0)
    input.simulate("focus")
    expect(onFocus).toHaveBeenCalledTimes(1)
    expect(onBlur).toHaveBeenCalledTimes(0)
    input.simulate("blur")
    expect(onFocus).toHaveBeenCalledTimes(1)
    expect(onBlur).toHaveBeenCalledTimes(1)
  })

  it("handles button click events", () => {
    wrapper.setProps({ cursorIndex: 5, totalMatches: 20 })
    const buttons = wrapper.find(Button)
    expect(onIncrement).toHaveBeenCalledTimes(0)
    expect(onDecrement).toHaveBeenCalledTimes(0)
    buttons.at(0).simulate("click")
    expect(onIncrement).toHaveBeenCalledTimes(0)
    expect(onDecrement).toHaveBeenCalledTimes(1)
    buttons.at(1).simulate("click")
    expect(onIncrement).toHaveBeenCalledTimes(1)
    expect(onDecrement).toHaveBeenCalledTimes(1)
  })
})


================================================
FILE: ui/src/components/__tests__/Pagination.spec.tsx
================================================
import * as React from "react"
import { mount, ReactWrapper } from "enzyme"
import { Button, ButtonGroup } from "@blueprintjs/core"
import Pagination, { Props } from "../Pagination"

describe("Pagination", () => {
  let wrapper: ReactWrapper<Props>

  beforeEach(() => {
    wrapper = mount(
      <Pagination
        updatePage={() => {}}
        currentPage={1}
        numItems={100}
        pageSize={20}
        isLoading={false}
      />
    )
  })

  it("renders two buttons", () => {
    expect(wrapper.find(ButtonGroup)).toHaveLength(1)
    expect(wrapper.find(Button)).toHaveLength(2)
  })

  it("disables the previous button if on the first page", () => {
    wrapper.setProps({ currentPage: 1 })
    expect(
      wrapper
        .find(Button)
        .at(0)
        .props().disabled
    ).toEqual(true)
  })

  it("disables the next button if on the last page", () => {
    wrapper.setProps({ numItems: 113, currentPage: 5 })
    expect(
      wrapper
        .find(Button)
        .at(1)
        .props().disabled
    ).toEqual(false)

    wrapper.setProps({ numItems: 113, currentPage: 6 })
    expect(
      wrapper
        .find(Button)
        .at(1)
        .props().disabled
    ).toEqual(true)
  })

  it("calls props.updatePage when the prev or next buttons are clicked", () => {
    const updatePage = jest.fn()

    wrapper.setProps({ updatePage, currentPage: 1 })
    expect(updatePage).toHaveBeenCalledTimes(0)
    wrapper
      .find(Button)
      .at(1)
      .simulate("click")
    expect(updatePage).toHaveBeenCalledTimes(1)
    expect(updatePage).toHaveBeenCalledWith(wrapper.props().currentPage + 1)

    wrapper.setProps({ currentPage: 2 })
    wrapper
      .find(Button)
      .at(0)
      .simulate("click")
    expect(updatePage).toHaveBeenCalledTimes(2)
    expect(updatePage).toHaveBeenCalledWith(wrapper.props().currentPage - 1)
  })
})


================================================
FILE: ui/src/components/__tests__/QueryParams.spec.tsx
================================================
import * as React from "react"
import { mount } from "enzyme"
import { MemoryRouter } from "react-router-dom"
import qs from "qs"
import ConnectedQueryParams from "../QueryParams"

describe("QueryParams", () => {
  it("provides a `query` and `setQuery` prop to it's children", () => {
    const children = jest.fn(() => <span />)
    const q = "?foo=bar&bar=baz&env=a|b&env=c|d"
    const wrapper = mount(
      <MemoryRouter
        initialEntries={[
          {
            pathname: "foo",
            search: q,
          },
        ]}
      >
        <ConnectedQueryParams>{children}</ConnectedQueryParams>
      </MemoryRouter>
    )
    expect(children).toHaveBeenCalledWith({
      query: qs.parse(q.substr(1)),
      setQuery: expect.any(Function),
    })
  })
})


================================================
FILE: ui/src/components/__tests__/Request.spec.tsx
================================================
import * as React from "react"
import { mount, shallow } from "enzyme"
import flushPromises from "flush-promises"
import Request, { Props, ChildProps, RequestStatus } from "../Request"

describe("Request", () => {
  it("calls props.request with props.initialArgs when the component mounts", () => {
    const realRequest = Request.prototype.request
    Request.prototype.request = jest.fn()
    expect(Request.prototype.request).toHaveBeenCalledTimes(0)
    const wrapper = mount(
      <Request
        requestFn={() =>
          new Promise(resolve => {
            resolve()
          })
        }
        initialRequestArgs={{ foo: "bar" }}
      >
        {() => null}
      </Request>
    )
    expect(Request.prototype.request).toHaveBeenCalledTimes(1)
    expect(Request.prototype.request).toHaveBeenCalledWith(
      wrapper.prop("initialRequestArgs")
    )
    Request.prototype.request = realRequest
  })

  it("doesn't call props.request when the component mounts if props.shouldRequestOnMount is false", () => {
    const realRequest = Request.prototype.request
    Request.prototype.request = jest.fn()
    expect(Request.prototype.request).toHaveBeenCalledTimes(0)
    const wrapper = mount(
      <Request
        requestFn={() =>
          new Promise(resolve => {
            resolve()
          })
        }
        initialRequestArgs={{ foo: "bar" }}
        shouldRequestOnMount={false}
      >
        {() => null}
      </Request>
    )
    expect(Request.prototype.request).toHaveBeenCalledTimes(0)
    Request.prototype.request = realRequest
  })

  it("sets state correctly during the request method", async () => {
    const data = "data"
    const onSuccess = jest.fn()
    const successWrapper = shallow(
      <Request
        requestFn={() =>
          new Promise(resolve => {
            resolve(data)
          })
        }
        initialRequestArgs={{ foo: "bar" }}
        onSuccess={onSuccess}
      >
        {(props: ChildProps<any, any>) => null}
      </Request>
    )
    expect(successWrapper.state("requestStatus")).toEqual(
      RequestStatus.NOT_READY
    )
    expect(successWrapper.state("data")).toEqual(null)
    expect(successWrapper.state("isLoading")).toEqual(true)
    expect(successWrapper.state("error")).toEqual(null)
    expect(onSuccess).toHaveBeenCalledTimes(0)
    await flushPromises()
    expect(successWrapper.state("requestStatus")).toEqual(RequestStatus.READY)
    expect(successWrapper.state("data")).toEqual(data)
    expect(successWrapper.state("isLoading")).toEqual(false)
    expect(successWrapper.state("error")).toEqual(null)
    expect(onSuccess).toHaveBeenCalledTimes(1)
    expect(onSuccess).toHaveBeenCalledWith(data)

    const onFailure = jest.fn()
    const err = "err"
    const errorWrapper = shallow(
      <Request
        requestFn={() =>
          new Promise((_, reject) => {
            reject(err)
          })
        }
        initialRequestArgs={{ foo: "bar" }}
        onFailure={onFailure}
      >
        {(props: ChildProps<any, any>) => null}
      </Request>
    )
    expect(errorWrapper.state("requestStatus")).toEqual(RequestStatus.NOT_READY)
    expect(errorWrapper.state("data")).toEqual(null)
    expect(errorWrapper.state("isLoading")).toEqual(true)
    expect(errorWrapper.state("error")).toEqual(null)
    expect(onFailure).toHaveBeenCalledTimes(0)
    await flushPromises()
    expect(errorWrapper.state("requestStatus")).toEqual(RequestStatus.ERROR)
    expect(errorWrapper.state("data")).toEqual(null)
    expect(errorWrapper.state("isLoading")).toEqual(false)
    expect(errorWrapper.state("error")).toEqual(err)
    expect(onFailure).toHaveBeenCalledTimes(1)
    expect(onFailure).toHaveBeenCalledWith(err)
  })
})


================================================
FILE: ui/src/components/__tests__/Run.spec.tsx
================================================
import * as React from "react"
import { mount } from "enzyme"
import { MemoryRouter } from "react-router-dom"
import { Run, Props } from "../Run"
import {
  Run as RunType,
  RunStatus,
  ExecutionEngine,
  NodeLifecycle,
} from "../../types"
import { RequestStatus } from "../Request"
import { Provider } from "react-redux"
import store from "../../state/store"

jest.mock("../../workers/index")

export type RunInstance = {}

const MockRun: RunType = {
  instance: {
    dns_name: "dns_name",
    instance_id: "instance_id",
  },
  task_arn: "task_arn",
  run_id: "run_id",
  definition_id: "definition_id",
  alias: "alias",
  image: "image",
  cluster: "cluster",
  exit_code: 0,
  status: RunStatus.RUNNING,
  started_at: "2019-10-24T05:21:51",
  finished_at: "2019-10-25T06:21:51",
  group_name: "group_name",
  env: [],
  engine: ExecutionEngine.EKS,
  node_lifecycle: NodeLifecycle.ON_DEMAND,
  max_cpu_used: 0,
  max_memory_used: 0,
  pod_name: "",
  cpu: 100,
  memory: 100,
  queued_at: "2019-10-24T04:21:51",
}

const Proxy: React.FunctionComponent<Props> = props => (
  <Provider store={store}>
    <MemoryRouter>
      <Run {...props} />
    </MemoryRouter>
  </Provider>
)

const defaultProps: Props = {
  requestStatus: RequestStatus.READY,
  data: MockRun,
  isLoading: false,
  error: null,
  runID: MockRun.run_id,
  request: jest.fn(),
  query: {},
  setQuery: jest.fn(),
  receivedAt: new Date(),
}

describe("Run", () => {
  const realSet = Run.prototype.setRequestInterval
  const realClear = Run.prototype.clearRequestInterval

  beforeEach(() => {
    Run.prototype.setRequestInterval = jest.fn()
    Run.prototype.clearRequestInterval = jest.fn()
  })

  afterEach(() => {
    Run.prototype.setRequestInterval = realSet
    Run.prototype.clearRequestInterval = realClear
  })

  /**
   * If the run is in a non-stopped state, the component should start an
   * interval to continuously fetch the run.
   */
  it("sets a request interval if the run isn't stopped on componentDidMount", () => {
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(0)

    // Mount a stopped run.
    mount(
      <Proxy
        {...defaultProps}
        data={{
          ...MockRun,
          status: RunStatus.STOPPED,
        }}
      />
    )
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(0)

    // Mount a running one.
    mount(<Proxy {...defaultProps} />)
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(1)
  })

  it("sets the request interval if props.requestStatus changes from NOT_READY to READY and the run is not stopped.", () => {
    // Request has not completed.
    const stoppedWrapper = mount(
      <Proxy
        requestStatus={RequestStatus.NOT_READY}
        data={null}
        isLoading={false}
        error={null}
        runID="a"
        request={jest.fn()}
        query={{}}
        setQuery={jest.fn()}
        receivedAt={new Date()}
      />
    )
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(0)

    // Set requestStatus to READY.
    stoppedWrapper.setProps({
      requestStatus: RequestStatus.READY,
      data: {
        ...MockRun,
        status: RunStatus.STOPPED,
      },
    })

    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(0)

    // Request has not completed.
    const runningWrapper = mount<Run>(
      <Proxy
        requestStatus={RequestStatus.NOT_READY}
        data={null}
        isLoading={false}
        error={null}
        runID="a"
        request={jest.fn()}
        query={{}}
        setQuery={jest.fn()}
        receivedAt={new Date()}
      />
    )
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(0)

    // Set requestStatus to READY.
    runningWrapper.setProps({
      requestStatus: RequestStatus.READY,
      data: {
        ...MockRun,
        status: RunStatus.RUNNING,
      },
    })

    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(1)
  })

  it("clears the request interval if the run transitions into a stopped state on componentDidUpdate", () => {
    const wrapper = mount(
      <Proxy
        requestStatus={RequestStatus.READY}
        data={MockRun}
        isLoading={false}
        error={null}
        runID="a"
        request={jest.fn()}
        query={{}}
        setQuery={jest.fn()}
        receivedAt={new Date()}
      />
    )
    expect(Run.prototype.clearRequestInterval).toHaveBeenCalledTimes(0)
    expect(Run.prototype.setRequestInterval).toHaveBeenCalledTimes(1)

    // Set the state to stopped
    wrapper.setProps({
      data: {
        ...MockRun,
        status: RunStatus.STOPPED,
      },
    })

    expect(Run.prototype.clearRequestInterval).toHaveBeenCalledTimes(1)
  })
})


================================================
FILE: ui/src/components/__tests__/Runs.spec.tsx
================================================
import React from "react"
import { mount } from "enzyme"
import { MemoryRouter } from "react-router-dom"
import ConnectedRuns, {
  Runs as UnconnectedRuns,
  Props,
  initialQuery,
} from "../Runs"
import { RequestStatus } from "../Request"
import ListRequest from "../ListRequest"
import { SortOrder } from "../../types"
import { Spinner } from "@blueprintjs/core"
import Table from "../Table"
import api from "../../api"
import ErrorCallout from "../ErrorCallout"
import { createMockRunObject } from "../../helpers/testHelpers"

jest.mock("../../helpers/FlotillaClient")

describe("Runs", () => {
  describe("Connected", () => {
    it("renders ListRequest and provides api.listRun as the requestFn", () => {
      expect(api.listRun).toHaveBeenCalledTimes(0)

      const wrapper = mount(
        <MemoryRouter>
          <ConnectedRuns />
        </MemoryRouter>
      )

      expect(wrapper.find(ListRequest)).toHaveLength(1)
      expect(wrapper.find(ListRequest).prop("requestFn")).toEqual(api.listRun)
      expect(api.listRun).toHaveBeenCalledTimes(1)
    })
  })

  describe("Unconnected", () => {
    const defaultProps: Props = {
      requestStatus: RequestStatus.NOT_READY,
      data: null,
      isLoading: false,
      error: null,
      updateSort: () => {},
      updatePage: () => {},
      updateFilter: () => {},
      currentPage: 1,
      currentSortKey: "started_at",
      currentSortOrder: SortOrder.DESC,
      query: initialQuery,
      receivedAt: new Date(),
    }

    it("renders a Spinner props.requestStatus is `NOT_READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedRuns
            {...defaultProps}
            requestStatus={RequestStatus.NOT_READY}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(1)
    })

    it("renders a Table props.requestStatus is `READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedRuns
            {...defaultProps}
            requestStatus={RequestStatus.READY}
            data={{
              offset: 0,
              limit: 20,
              total: 3,
              history: [
                createMockRunObject({ run_id: "a" }),
                createMockRunObject({ run_id: "b" }),
                createMockRunObject({ run_id: "c" }),
              ],
            }}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(1)
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("status")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("started_at")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("run_id")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("alias")
    })

    it("renders an ErrorCallout props.requestStatus is `ERROR`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedRuns
            {...defaultProps}
            requestStatus={RequestStatus.ERROR}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(1)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
    })
  })
})


================================================
FILE: ui/src/components/__tests__/StopRunButton.spec.tsx
================================================
import * as React from "react"
import { MemoryRouter } from "react-router-dom"
import { mount } from "enzyme"
import ConnectedStopRunButton, { StopRunButton, Props } from "../StopRunButton"
import Request, { RequestStatus } from "../Request"
import api from "../../api"

const defaultProps: Props = {
  requestStatus: RequestStatus.NOT_READY,
  data: null,
  isLoading: false,
  error: null,
  request: jest.fn(),
  definitionID: "definitionID",
  runID: "runID",
  receivedAt: new Date(),
}

describe("StopRunButton", () => {
  it("calls props.request with the correct args when this.handleSubmitClick is called", () => {
    const r = jest.fn()
    const wrapper = mount<StopRunButton>(
      <StopRunButton {...defaultProps} request={r} />
    )
    expect(r).toHaveBeenCalledTimes(0)
    wrapper.instance().handleSubmitClick()
    expect(r).toHaveBeenCalledTimes(1)
    expect(r).toHaveBeenCalledWith({
      definitionID: wrapper.prop("definitionID"),
      runID: wrapper.prop("runID"),
    })
  })

  it("provides api.stopRun as the requestFn", () => {
    // Note: this is testing the connected component so it must be wrapper in
    // a MemoryRouter component.
    const wrapper = mount(
      <MemoryRouter>
        <ConnectedStopRunButton definitionID="id" runID="rid" />
      </MemoryRouter>
    )
    expect(wrapper.find(Request).prop("requestFn")).toEqual(api.stopRun)
  })
})


================================================
FILE: ui/src/components/__tests__/TaskRuns.spec.tsx
================================================
import React from "react"
import { mount } from "enzyme"
import { MemoryRouter } from "react-router-dom"
import ConnectedTaskRuns, {
  TaskRuns as UnconnectedTaskRuns,
  Props,
  initialQuery,
} from "../TaskRuns"
import { RequestStatus } from "../Request"
import ListRequest from "../ListRequest"
import { SortOrder } from "../../types"
import { Spinner } from "@blueprintjs/core"
import Table from "../Table"
import api from "../../api"
import ErrorCallout from "../ErrorCallout"
import { createMockRunObject } from "../../helpers/testHelpers"

jest.mock("../../helpers/FlotillaClient")

describe("TaskRuns", () => {
  describe("Connected", () => {
    it("renders ListRequest and provides api.listTaskRuns as the requestFn", () => {
      const definitionID = "foo"
      expect(api.listTaskRuns).toHaveBeenCalledTimes(0)

      const wrapper = mount(
        <MemoryRouter>
          <ConnectedTaskRuns definitionID={definitionID} />
        </MemoryRouter>
      )

      expect(wrapper.find(ListRequest)).toHaveLength(1)
      expect(wrapper.find(ListRequest).prop("requestFn")).toEqual(
        api.listTaskRuns
      )
      expect(api.listTaskRuns).toHaveBeenCalledTimes(1)
      expect(api.listTaskRuns).toHaveBeenCalledWith(
        expect.objectContaining({
          definitionID,
        })
      )
    })
  })

  describe("Unconnected", () => {
    const defaultProps: Props = {
      requestStatus: RequestStatus.NOT_READY,
      data: null,
      isLoading: false,
      error: null,
      updateSort: () => {},
      updatePage: () => {},
      updateFilter: () => {},
      currentPage: 1,
      currentSortKey: "alias",
      currentSortOrder: SortOrder.ASC,
      query: initialQuery,
      receivedAt: new Date(),
    }

    it("renders a Spinner props.requestStatus is `NOT_READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTaskRuns
            {...defaultProps}
            requestStatus={RequestStatus.NOT_READY}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(1)
    })

    it("renders a Table props.requestStatus is `READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTaskRuns
            {...defaultProps}
            requestStatus={RequestStatus.READY}
            data={{
              offset: 0,
              limit: 20,
              total: 3,
              history: [
                createMockRunObject({ run_id: "a" }),
                createMockRunObject({ run_id: "b" }),
                createMockRunObject({ run_id: "c" }),
              ],
            }}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(1)
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("run_id")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("status")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("started_at")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("finished_at")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("cluster")
    })

    it("renders an ErrorCallout props.requestStatus is `ERROR`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTaskRuns
            {...defaultProps}
            requestStatus={RequestStatus.ERROR}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(1)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
    })
  })
})


================================================
FILE: ui/src/components/__tests__/Tasks.spec.tsx
================================================
import React from "react"
import { mount } from "enzyme"
import { MemoryRouter } from "react-router-dom"
import ConnectedTasks, {
  Tasks as UnconnectedTasks,
  Props,
  initialQuery,
} from "../Tasks"
import { RequestStatus } from "../Request"
import ListRequest from "../ListRequest"
import { SortOrder } from "../../types"
import { Spinner } from "@blueprintjs/core"
import Table from "../Table"
import api from "../../api"
import ErrorCallout from "../ErrorCallout"
import { createMockTaskObject } from "../../helpers/testHelpers"

jest.mock("../../helpers/FlotillaClient")

describe("Tasks", () => {
  describe("Connected", () => {
    it("renders ListRequest and provides api.listTasks as the requestFn", () => {
      expect(api.listTasks).toHaveBeenCalledTimes(0)

      const wrapper = mount(
        <MemoryRouter>
          <ConnectedTasks />
        </MemoryRouter>
      )

      expect(wrapper.find(ListRequest)).toHaveLength(1)
      expect(wrapper.find(ListRequest).prop("requestFn")).toEqual(api.listTasks)
      expect(api.listTasks).toHaveBeenCalledTimes(1)
    })
  })

  describe("Unconnected", () => {
    const defaultProps: Props = {
      requestStatus: RequestStatus.NOT_READY,
      data: null,
      isLoading: false,
      error: null,
      updateSort: () => {},
      updatePage: () => {},
      updateFilter: () => {},
      currentPage: 1,
      currentSortKey: "alias",
      currentSortOrder: SortOrder.ASC,
      query: initialQuery,
      receivedAt: new Date(),
    }

    it("renders a Spinner props.requestStatus is `NOT_READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTasks
            {...defaultProps}
            requestStatus={RequestStatus.NOT_READY}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(1)
    })

    it("renders a Table props.requestStatus is `READY`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTasks
            {...defaultProps}
            requestStatus={RequestStatus.READY}
            data={{
              offset: 0,
              limit: 20,
              total: 20,
              definitions: [
                createMockTaskObject({ definition_id: "a" }),
                createMockTaskObject({ definition_id: "b" }),
                createMockTaskObject({ definition_id: "c" }),
              ],
            }}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
      expect(wrapper.find(Table)).toHaveLength(1)
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("alias")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("group_name")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("image")
      expect(wrapper.find(Table).prop("columns")).toHaveProperty("memory")
    })

    it("renders an ErrorCallout props.requestStatus is `ERROR`", () => {
      const wrapper = mount(
        <MemoryRouter>
          <UnconnectedTasks
            {...defaultProps}
            requestStatus={RequestStatus.ERROR}
          />
        </MemoryRouter>
      )
      expect(wrapper.find(ErrorCallout)).toHaveLength(1)
      expect(wrapper.find(Table)).toHaveLength(0)
      expect(wrapper.find(Spinner)).toHaveLength(0)
    })
  })
})


================================================
FILE: ui/src/components/__tests__/UpdateTaskForm.spec.tsx
================================================
import * as React from "react"
import flushPromiseQueue from "flush-promises"
import { mount, ReactWrapper } from "enzyme"
import UpdateTaskForm, {
  ConnectedProps as Props,
  UpdateTaskForm as UnconnectedUpdateTaskForm,
} from "../UpdateTaskForm"
import api from "../../api"
import { Formik } from "formik"
import {
  createMockRouteComponentProps,
  mockFormikActions,
  createMockTaskObject,
} from "../../helpers/testHelpers"
import Request, { RequestStatus } from "../Request"
import BaseTaskForm from "../BaseTaskForm"
import { TaskContext, TaskCtx as TaskContextTypeDef } from "../Task"

jest.mock("../../helpers/FlotillaClient")

describe("UpdateTaskForm", () => {
  const DEFINITION_ID = "my_def_id"

  // Instantiate mock route component props object.
  const mockRouteComponentProps = createMockRouteComponentProps({
    path: "/tasks/create",
    url: "/tasks/create",
    params: {},
  })

  // Instantiate props object.
  const props: Props = {
    ...mockRouteComponentProps,
    history: {
      ...mockRouteComponentProps.history,
      push: jest.fn(),
    },
    definitionID: DEFINITION_ID,
  }

  // Instantiate context object.
  const mockTaskCtx: TaskContextTypeDef = {
    data: createMockTaskObject({ definition_id: DEFINITION_ID }),
    requestStatus: RequestStatus.READY,
    isLoading: false,
    error: null,
    request: jest.fn(),
    basePath: "",
    definitionID: DEFINITION_ID,
    receivedAt: new Date(),
  }

  let wrapper: ReactWrapper

  beforeEach(() => {
    jest.clearAllMocks()
    wrapper = mount(
      <TaskContext.Provider value={mockTaskCtx}>
        <UpdateTaskForm {...props} />
      </TaskContext.Provider>
    )
  })

  it("renders the correct components", () => {
    // Note: there will be more than 1 Request component due to those wrapping
    // GroupNameSelect, etc.
    expect(wrapper.find(Request).length).toBeGreaterThanOrEqual(1)
    expect(
      wrapper
        .find(Request)
        .at(0)
        .props().requestFn
    ).toBe(api.updateTask)
    expect(
      wrapper
        .find(Request)
        .at(0)
        .props().shouldRequestOnMount
    ).toEqual(false)

    expect(wrapper.find(Formik)).toHaveLength(1)
    expect(wrapper.find(UnconnectedUpdateTaskForm)).toHaveLength(1)
    expect(wrapper.find(BaseTaskForm)).toHaveLength(1)
    expect(wrapper.find("button#submitButton")).toHaveLength(1)
  })

  it("calls api.updateTask when submitted", async () => {
    // At this point, we don't expect any functions to have been called.
    expect(api.updateTask).toHaveBeenCalledTimes(0)
    expect(props.history.push).toHaveBeenCalledTimes(0)
    expect(mockTaskCtx.request).toHaveBeenCalledTimes(0)

    // Manually invoke Formik's onSubmit prop.
    wrapper
      .find(Formik)
      .props()
      .onSubmit(
        {
          env: [{ name: "foo", value: "bar" }],
          image: "my_image",
          group_name: "my_group",
          alias: "my_alias",
          memory: 1024,
          command: "my_command",
          tags: ["a", "b"],
        },
        mockFormikActions
      )

    // Expect FlotillaClient's `createTask` method to be invoked once.
    expect(api.updateTask).toHaveBeenCalledTimes(1)

    // Flush the promise queue.
    await flushPromiseQueue()

    // Expect `onSuccess` and `push` to be invoked once.
    expect(props.history.push).toHaveBeenCalledTimes(1)
    expect(mockTaskCtx.request).toHaveBeenCalledTimes(1)
  })
})


================================================
FILE: ui/src/constants.ts
================================================
import { EnhancedRunStatus, RunStatus } from "./types"
import { Colors } from "@blueprintjs/core"
import { ReactJsonViewProps } from "react-json-view"

export const PAGE_SIZE = 20
export const RUN_FETCH_INTERVAL_MS = 5000 // 5 sec
export const LOG_FETCH_INTERVAL_MS = 10000 // 10 sec
export const KILL_LOG_POLLING_TIMEOUT_MS = 120000 // 2 mins
export const RUN_TAB_ID_QUERY_KEY = "rt"
export const LOG_SEARCH_QUERY_KEY = "log_search"
export const RUN_STATUS_COLOR_MAP = new Map<
  EnhancedRunStatus | RunStatus,
  string
>([
  [EnhancedRunStatus.PENDING, Colors.GRAY3],
  [EnhancedRunStatus.QUEUED, Colors.GOLD5],
  [EnhancedRunStatus.RUNNING, Colors.COBALT4],
  [EnhancedRunStatus.STOPPED, Colors.RED4],
  [EnhancedRunStatus.NEEDS_RETRY, Colors.RED4],
  [EnhancedRunStatus.SUCCESS, Colors.GREEN5],
  [EnhancedRunStatus.FAILED, Colors.RED4],
])
export const LOCAL_STORAGE_SETTINGS_KEY = "settings"
export const LOCAL_STORAGE_IS_ONBOARDED_KEY = "is_onboarded"
export const CHAR_TO_PX_RATIO = 40 / 300
export const JSON_VIEW_PROPS: Partial<ReactJsonViewProps> = {
  name: false,
  collapsed: 2,
  enableClipboard: false,
  displayDataTypes: false,
  displayObjectSize: false,
  theme: "ocean",
  style: {
    background: Colors.DARK_GRAY1,
    fontFamily: "Roboto Mono",
    fontSize: "0.8rem",
  },
}


================================================
FILE: ui/src/helpers/FlotillaClient.ts
================================================
import axios, { AxiosInstance, AxiosError, AxiosResponse } from "axios"
import * as qs from "qs"
import { has, omit, Omit } from "lodash"
import {
  HTTPMethod,
  CreateTaskPayload,
  RequestArgs,
  Run,
  ListRunParams,
  ListRunResponse,
  RunLog,
  LaunchRequestV2,
  Task,
  ListTaskResponse,
  ListTaskRunsResponse,
  UpdateTaskPayload,
  ListTaskParams,
  ListTaskRunsParams,
  ListClustersResponse,
  ListGroupsResponse,
  ListTagsResponse,
  ListRunEventsResponse,
  RunLogRaw,
  ListTemplateParams,
  ListTemplateResponse,
  Template,
  TemplateExecutionRequest,
  ListTemplateHistoryParams,
  ListTemplateHistoryResponse,
} from "../types"

interface IInitOpts {
  baseURL: string
  headers?: object
}

class FlotillaClient {
  private axios: AxiosInstance

  constructor({ baseURL, headers = {} }: IInitOpts) {
    this.axios = axios.create({
      baseURL,
      headers,
      // Note: this is the array format that the Flotilla server accepts.
      paramsSerializer: params =>
        qs.stringify(params, { arrayFormat: "repeat" }),
    })
  }

  /** Requests a task definition. */
  public getTask = ({
    definitionID,
  }: {
    definitionID: string
  }): Promise<Task> =>
    this.request<Task>({
      method: HTTPMethod.GET,
      url: `/v6/task/${definitionID}`,
    })

  /** Requests a task definition by its alias. */
  public getTaskByAlias = ({ alias }: { alias: string }): Promise<Task> =>
    this.request<Task>({
      method: HTTPMethod.GET,
      url: `/v6/task/alias/${alias}`,
    })

  /** Requests a task definition's history. */
  public listTaskRuns = ({
    definitionID,
    params,
  }: {
    definitionID: string
    params: ListTaskRunsParams
  }): Promise<ListTaskRunsResponse> =>
    this.request<ListTaskRunsResponse>({
      method: HTTPMethod.GET,
      url: `/v6/task/${definitionID}/history`,
      params,
    })

  /** Requests a list of task definitions. */
  public listTasks = ({
    params,
  }: {
    params: ListTaskParams
  }): Promise<ListTaskResponse> =>
    this.request<ListTaskResponse>({
      method: HTTPMethod.GET,
      url: `/v6/task`,
      params,
    })

  /** Create a new task definition. */
  public createTask = ({ data }: { data: CreateTaskPayload }): Promise<Task> =>
    this.request<Task>({
      method: HTTPMethod.POST,
      url: `/v6/task`,
      data,
    })

  /** Update an existing task definition. */
  public updateTask = ({
    definitionID,
    data,
  }: {
    definitionID: string
    data: UpdateTaskPayload
  }): Promise<Task> =>
    this.request<Task>({
      method: HTTPMethod.PUT,
      url: `/v6/task/${definitionID}`,
      data,
    })

  /** Delete an existing task definition. */
  public deleteTask = ({
    definitionID,
  }: {
    definitionID: string
  }): Promise<any> =>
    this.request<any>({
      method: HTTPMethod.DELETE,
      url: `/v6/task/${definitionID}`,
    })

  /** Runs a task. */
  public runTask = ({
    definitionID,
    data,
  }: {
    definitionID: string
    data: LaunchRequestV2
  }): Promise<Run> => {
    const d: Omit<LaunchRequestV2, "owner_id"> = omit(data, "owner_id")

    if (has(data, "owner_id")) {
      if (d.run_tags) {
        d.run_tags["OWNER_ID"] = data.owner_id
      } else {
        d.run_tags = { OWNER_ID: data.owner_id }
      }
    }

    return this.request<Run>({
      method: HTTPMethod.PUT,
      url: `/v6/task/${definitionID}/execute`,
      data: d,
    })
  }

  /** Requests list of runs. */
  public listRun = ({
    params,
  }: {
    params: ListRunParams
  }): Promise<ListRunResponse> =>
    this.request<ListRunResponse>({
      method: HTTPMethod.GET,
      url: `/v6/history`,
      params,
    })

  /** Requests a single run. */
  public getRun = ({ runID }: { runID: string }): Promise<Run> =>
    this.request<Run>({
      method: HTTPMethod.GET,
      url: `/v6/task/history/${runID}`,
    })

  /** Requests the logs of a single run. */
  public getRunLog = ({
    runID,
    lastSeen = "",
  }: {
    runID: string
    lastSeen?: string
  }): Promise<RunLog> =>
    this.request<RunLog>({
      method: HTTPMethod.GET,
      url: `/v6/${runID}/logs`,
      params: { last_seen: lastSeen },
    })

  /** Requests the logs of a single run. */
  public getRunLogRaw = ({ runID }: { runID: string }): Promise<RunLogRaw> =>
    this.request<RunLogRaw>({
      method: HTTPMethod.GET,
      url: `/v6/${runID}/logs`,
      params: { raw_text: true },
    })

  /** Stops an existing run */
  public stopRun = ({
    definitionID,
    runID,
  }: {
    definitionID: string
    runID: string
  }): Promise<any> =>
    this.request<any>({
      method: HTTPMethod.DELETE,
      url: `/v6/task/${definitionID}/history/${runID}`,
    })

  /** Requests available clusters. */
  public listClusters = (): Promise<ListClustersResponse> =>
    this.request<ListClustersResponse>({
      method: HTTPMethod.GET,
      url: `/v6/clusters`,
    })

  /** Requests available groups. */
  public listGroups = (): Promise<ListGroupsResponse> =>
    this.request<ListGroupsResponse>({
      method: HTTPMethod.GET,
      url: `/v6/groups`,
      params: { offset: 0, limit: 10000 },
    })

  /** Requests available tags. */
  public listTags = (): Promise<ListTagsResponse> =>
    this.request<ListTagsResponse>({
      method: HTTPMethod.GET,
      url: `/v6/tags`,
      params: { offset: 0, limit: 10000 },
    })

  /** Requests available tags. */
  public listRunEvents = (runID: string): Promise<ListRunEventsResponse> =>
    this.request<ListRunEventsResponse>({
      method: HTTPMethod.GET,
      url: `/v6/${runID}/events`,
    })

  /** Requests a list of task definitions. */
  public listTemplates = ({
    params,
  }: {
    params: ListTemplateParams
  }): Promise<ListTemplateResponse> =>
    this.request<ListTemplateResponse>({
      method: HTTPMethod.GET,
      url: `/v7/template`,
      params,
    })

  /** Requests a task definition. */
  public getTemplate = ({
    templateID,
  }: {
    templateID: string
  }): Promise<Template> =>
    this.request<Template>({
      method: HTTPMethod.GET,
      url: `/v7/template/${templateID}`,
    })

  /** Runs a task. */
  public runTemplate = ({
    templateID,
    data,
  }: {
    templateID: string
    data: TemplateExecutionRequest
  }): Promise<Run> => {
    return this.request<Run>({
      method: HTTPMethod.PUT,
      url: `/v7/template/${templateID}/execute`,
      data,
    })
  }

  /** Requests a task definition's history. */
  public listTemplateHistoryByTemplateID = ({
    templateID,
    params,
  }: {
    templateID: string
    params: ListTemplateHistoryParams
  }): Promise<ListTemplateHistoryResponse> =>
    this.request<ListTemplateHistoryResponse>({
      method: HTTPMethod.GET,
      url: `/v7/template/${templateID}/history`,
      params,
    })

  /** Requests a task definition's history. */
  public listTemplateHistoryByTemplateName = ({
    templateName,
    params,
  }: {
    templateName: string
    params: ListTemplateHistoryParams
  }): Promise<ListTemplateHistoryResponse> =>
    this.request<ListTemplateHistoryResponse>({
      method: HTTPMethod.GET,
      url: `/v7/template/name/${templateName}/history`,
      params,
    })

  /** Returns a new Promise that sends an HTTP request when invoked. */
  private request<T>({ method, url, params, data }: RequestArgs): Promise<T> {
    return new Promise((resolve, reject) => {
      this.axios
        .request({ url, method, params, data })
        .then((res: AxiosResponse) => {
          resolve(res.data as T)
        })
        .catch((error: AxiosError) => {
          reject(error)
        })
    })
  }
}

export default FlotillaClient


================================================
FILE: ui/src/helpers/__mocks__/FlotillaClient.ts
================================================
import {
  CreateTaskPayload,
  ListClustersResponse,
  ListGroupsResponse,
  ListRunParams,
  ListRunResponse,
  ListTagsResponse,
  ListTaskParams,
  ListTaskResponse,
  ListTaskRunsParams,
  ListTaskRunsResponse,
  Run,
  RunLog,
  RunStatus,
  LaunchRequestV2,
  Task,
  UpdateTaskPayload,
  ExecutionEngine,
  NodeLifecycle,
} from "../../types"
import { createMockRunObject, createMockTaskObject } from "../testHelpers"

const getTask = jest.fn(
  ({ definitionID }: { definitionID: string }): Promise<Task> =>
    new Promise<Task>(resolve => {
      resolve(createMockTaskObject({ definition_id: definitionID }))
    })
)

const getTaskByAlias = jest.fn(
  ({ alias }: { alias: string }): Promise<Task> =>
    new Promise<Task>(resolve => {
      resolve(createMockTaskObject({ alias }))
    })
)

const listTaskRuns = jest.fn(
  ({
    definitionID,
    params,
  }: {
    definitionID: string
    params: ListTaskRunsParams
  }): Promise<ListTaskRunsResponse> =>
    new Promise<ListTaskRunsResponse>(resolve => {
      resolve({
        offset: params.offset,
        limit: params.limit,
        sort_by: params.sort_by,
        order: params.order,
        total: 0,
        history: [], // @TODO
        env_filters: {},
        cluster_name: params.cluster_name,
        status: params.status,
      })
    })
)

const listTasks = jest.fn(
  ({ params }: { params: ListTaskParams }): Promise<ListTaskResponse> =>
    new Promise<ListTaskResponse>(resolve => {
      resolve({
        offset: params.offset,
        limit: params.limit,
        sort_by: params.sort_by,
        order: params.order,
        total: 0,
        definitions: [], // @TODO
        alias: params.alias,
        group_name: params.group_name,
        image: params.image,
      })
    })
)

const createTask = jest.fn(
  ({ data }: { data: CreateTaskPayload }): Promise<Task> =>
    new Promise<Task>(resolve => {
      resolve(createMockTaskObject(data))
    })
)

const updateTask = jest.fn(
  ({
    definitionID,
    data,
  }: {
    definitionID: string
    data: UpdateTaskPayload
  }): Promise<Task> =>
    new Promise<Task>(resolve => {
      resolve(createMockTaskObject({ ...data, definition_id: definitionID }))
    })
)

const deleteTask = jest.fn(
  ({ definitionID }: { definitionID: string }): Promise<any> =>
    new Promise<any>(resolve => {
      resolve(true)
    })
)

const runTask = jest.fn(
  ({
    definitionID,
    data,
  }: {
    definitionID: string
    data: LaunchRequestV2
  }): Promise<Run> =>
    new Promise<Run>(resolve => {
      resolve(
        createMockRunObject({
          instance: {
            dns_name: "dns_name",
            instance_id: "instance_id",
          },
          task_arn: "task_arn",
          run_id: "run_id",
          definition_id: definitionID,
          alias: "alias",
          image: "image",
          cluster: data.cluster,
          status: RunStatus.PENDING,
          group_name: "group_name",
          env: data.env || [],
          cpu: 1,
          memory: 1024,
          command: "echo 'hi'",
          queued_at: "2019-10-24T05:21:51",
          engine: ExecutionEngine.ECS,
          node_lifecycle: NodeLifecycle.SPOT,
          max_cpu_used: 1,
          max_memory_used: 1,
          pod_name: "pod",
          cloudtrail_notifications: { Records: [] },
        })
      )
    })
)

const listRun = jest.fn(
  ({ params }: { params: ListRunParams }): Promise<ListRunResponse> =>
    new Promise<ListRunResponse>(resolve => {
      resolve({
        offset: params.offset,
        limit: params.limit,
        sort_by: params.sort_by,
        order: params.order,
        total: 0,
        history: [],
        env_filters: params.env,
        cluster_name: params.cluster_name,
        alias: params.alias,
        status: params.status,
      })
    })
)

const getRun = jest.fn(
  ({ runID }: { runID: string }): Promise<Run> =>
    new Promise<Run>(resolve => {
      resolve(
        createMockRunObject({
          instance: {
            dns_name: "dns_name",
            instance_id: "instance_id",
          },
          task_arn: "task_arn",
          run_id: runID,
          definition_id: "definition_id",
          alias: "alias",
          image: "image",
          cluster: "cluster",
          status: RunStatus.PENDING,
          group_name: "group_name",
          env: [],
          cpu: 1,
          memory: 1024,
          command: "echo 'hi'",
          queued_at: "2019-10-24T05:21:51",
          engine: ExecutionEngine.ECS,
          node_lifecycle: NodeLifecycle.SPOT,
          max_cpu_used: 1,
          max_memory_used: 1,
          pod_name: "pod",
          cloudtrail_notifications: { Records: [] },
        })
      )
    })
)

const getRunLog = jest.fn(
  ({
    runID,
    lastSeen = "",
  }: {
    runID: string
    lastSeen?: string
  }): Promise<RunLog> =>
    new Promise<RunLog>(resolve => {
      resolve({
        log: "",
        last_seen: lastSeen,
      })
    })
)

const stopRun = jest.fn(
  ({
    definitionID,
    runID,
  }: {
    definitionID: string
    runID: string
  }): Promise<any> =>
    new Promise<any>(resolve => {
      resolve(true)
    })
)

export const listClusters = jest.fn(
  (): Promise<ListClustersResponse> =>
    new Promise<ListClustersResponse>(resolve => {
      resolve({
        offset: 0,
        limit: 20,
        total: 0,
        clusters: ["a", "b", "c"],
      })
    })
)

const listGroups = jest.fn(
  (): Promise<ListGroupsResponse> =>
    new Promise<ListGroupsResponse>(resolve => {
      resolve({
        offset: 0,
        limit: 20,
        total: 0,
        groups: ["a", "b", "c"],
      })
    })
)

const listTags = jest.fn(
  (): Promise<ListTagsResponse> =>
    new Promise<ListTagsResponse>(resolve => {
      resolve({
        offset: 0,
        limit: 20,
        total: 0,
        tags: ["a", "b", "c"],
      })
    })
)

export default jest.fn().mockImplementation(() => {
  return {
    getTask,
    getTaskByAlias,
    listTaskRuns,
    listTasks,
    createTask,
    updateTask,
    deleteTask,
    runTask,
    listRun,
    getRun,
    getRunLog,
    stopRun,
    listClusters,
    listGroups,
    listTags,
  }
})


================================================
FILE: ui/src/helpers/__tests__/FlotillaClient.spec.ts
================================================
import axios from "axios"
import MockAdapter from "axios-mock-adapter"
import FlotillaClient from "../FlotillaClient"
import {
  Task,
  Run,
  RunStatus,
  ListTaskResponse,
  SortOrder,
  CreateTaskPayload,
  UpdateTaskPayload,
  ListTaskRunsResponse,
  LaunchRequestV2,
  ListRunParams,
  ListRunResponse,
  RunLog,
  ExecutionEngine,
} from "../../types"
import { createMockRunObject, createMockTaskObject } from "../testHelpers"

const mock = new MockAdapter(axios)

describe("FlotillaClient", () => {
  let client: FlotillaClient

  beforeAll(() => {
    client = new FlotillaClient({ baseURL: "" })
  })

  afterAll(() => {
    mock.reset()
  })

  afterEach(() => {
    mock.restore()
  })

  // ---------------------------------------------------------------------------
  // Task-related endpoints
  // ---------------------------------------------------------------------------
  it("getTasks", async () => {
    const res: ListTaskResponse = {
      definitions: [createMockTaskObject()],
      total: 1,
      offset: 0,
      limit: 20,
      sort_by: "alias",
      order: SortOrder.ASC,
    }
    mock.onGet(`/v6/task`).reply(200, res)
    expect(
      await client.listTasks({ params: { offset: 0, limit: 20 } })
    ).toEqual(res)
  })

  it("getTask", async () => {
    const id = "my_task"
    mock.onGet(`/v6/task/${id}`).reply(200, createMockTaskObject())
    expect(await client.getTask({ definitionID: id })).toEqual(
      createMockTaskObject()
    )
  })

  it("getTaskByAlias", async () => {
    const alias = "my_task_alias"
    mock.onGet(`/v6/task/alias/${alias}`).reply(200, createMockTaskObject())
    expect(await client.getTaskByAlias({ alias })).toEqual(
      createMockTaskObject()
    )
  })

  it("getTaskHistory", async () => {
    const id = "my_task"
    const res: ListTaskRunsResponse = {
      history: [createMockRunObject()],
      total: 1,
      offset: 0,
      limit: 20,
      sort_by: "alias",
      order: SortOrder.ASC,
    }
    mock.onGet(`/v6/task/${id}/history`).reply(200, res)
    expect(
      await client.listTaskRuns({
        definitionID: id,
        params: { offset: 0, limit: 20 },
      })
    ).toEqual(res)
  })

  it("createTask", async () => {
    const data: CreateTaskPayload = {
      env: [],
      image: "image",
      group_name: "group_name",
      alias: "alias",
      memory: 1000,
      cpu: 1000,
      command: "command",
      tags: ["tag_one"],
    }
    const res: Task = {
      ...data,
      arn: "arn",
      definition_id: "definition_id",
      container_name: "container_name",
      privileged: false,
    }
    mock.onPost(`/v6/task`).reply(200, res)
    expect(await client.createTask({ data })).toEqual(res)
  })

  it("updateTask", async () => {
    const id = "my_task"
    const data: UpdateTaskPayload = {
      env: [],
      image: "image",
      group_name: "group_name",
      memory: 1000,
      cpu: 1000,
      command: "command",
      tags: ["tag_one"],
    }
    const res: Task = {
      ...data,
      alias: "alias",
      arn: "arn",
      definition_id: "definition_id",
      container_name: "container_name",
      privileged: false,
    }
    mock.onPut(`/v6/task/${id}`).reply(200, res)
    expect(await client.updateTask({ definitionID: id, data })).toEqual(res)
  })

  it("deleteTask", async () => {
    const id = "my_task"
    const res = {}
    mock.onDelete(`/v6/task/${id}`).reply(200, res)
    expect(await client.deleteTask({ definitionID: id })).toEqual(res)
  })

  it("runTask", async () => {
    const id = "my_task"
    const data: LaunchRequestV2 = {
      cluster: "cluster",
      env: [],
      run_tags: {},
      engine: ExecutionEngine.ECS,
    }

    mock.onPut(`/v6/task/${id}/execute`).reply(200, createMockRunObject())
    expect(await client.runTask({ definitionID: id, data })).toEqual(
      createMockRunObject()
    )
  })

  // ---------------------------------------------------------------------------
  // Run-related endpoints
  // ---------------------------------------------------------------------------
  it("listRun", async () => {
    const params: ListRunParams = {
      offset: 0,
      limit: 20,
    }
    const res: ListRunResponse = {
      history: [createMockRunObject()],
      offset: 0,
      limit: 20,
      sort_by: "started_at",
      order: SortOrder.ASC,
      total: 1,
    }

    mock.onGet(`/v6/history`).reply(200, res)
    expect(await client.listRun({ params })).toEqual(res)
  })

  it("getRun", async () => {
    const runID = "run_id"
    mock.onGet(`/v6/task/history/${runID}`).reply(200, createMockRunObject())
    expect(await client.getRun({ runID })).toEqual(createMockRunObject())
  })

  it("getRunLogs", async () => {
    const runID = "run_id"
    const lastSeen = ""
    const res: RunLog = {
      log: "log",
      last_seen: "last_seen",
    }
    mock.onGet(`/v6/${runID}/logs`).reply(200, res)
    expect(await client.getRunLog({ runID, lastSeen })).toEqual(res)
  })

  it("stopRun", async () => {
    const definitionID = "definition_id"
    const runID = "run_id"

    mock.onDelete(`/v6/task/${definitionID}/history/${runID}`).reply(200, {})
    expect(await client.stopRun({ runID, definitionID })).toEqual({})
  })

  // ---------------------------------------------------------------------------
  // Misc endpoints
  // ---------------------------------------------------------------------------
  it("getClusters", async () => {
    const res = { clusters: [] }
    mock.onGet(`/v6/clusters`).reply(200, res)
    expect(await client.listClusters()).toEqual(res)
  })

  it("getTags", async () => {
    const res = { tags: [], offset: 0, limit: 20, total: 0 }
    mock.onGet(`/v6/tags`).reply(200, res)
    expect(await client.listTags()).toEqual(res)
  })

  it("getGroups", async () => {
    const res = { groups: [], offset: 0, limit: 20, total: 0 }
    mock.onGet(`/v6/groups`).reply(200, res)
    expect(await client.listGroups()).toEqual(res)
  })
})


================================================
FILE: ui/src/helpers/__tests__/getInitialValuesForTaskRun.spec.ts
================================================
import getInitialValuesForTaskRun from "../getInitialValuesForTaskRun"
import { createMockTaskObject } from "../testHelpers"
import { LaunchRequestV2, ExecutionEngine } from "../../types"

describe("getInitialValuesForTaskRun", () => {
  it("works correctly", () => {
    const td = createMockTaskObject()
    const expectedEks: LaunchRequestV2 = {
      cluster: process.env.REACT_APP_DEFAULT_EXECUTION_ENGINE || "",
      cpu: td.cpu,
      memory: td.memory,
      env: td.env,
      engine: ExecutionEngine.EKS,
      command: td.command,
    }

    expect(
      getInitialValuesForTaskRun({
        task: td,
        routerState: null,
        settings: {
          USE_OPTIMIZED_LOG_RENDERER: true,
          SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW: true,
        },
      })
    ).toEqual(expectedEks)
  })
})


================================================
FILE: ui/src/helpers/__tests__/pageToOffsetLimit.spec.ts
================================================
import pageToOffsetLimit from "../pageToOffsetLimit"

describe("pageToOffsetLimit", () => {
  it("works correctly", () => {
    expect(pageToOffsetLimit({ page: 1, limit: 20 })).toEqual({
      offset: 0,
      limit: 20,
    })
    expect(pageToOffsetLimit({ page: 2, limit: 20 })).toEqual({
      offset: 20,
      limit: 20,
    })
  })
})


================================================
FILE: ui/src/helpers/calculateDuration.ts
================================================
const calculateDuration = (
  start: string,
  end: string | null | undefined
): number => {
  const s = Date.parse(start)
  const e = end ? Date.parse(end) : Date.now()

  if (isNaN(s) || isNaN(e)) return 0
  return e - s
}

export default calculateDuration


================================================
FILE: ui/src/helpers/constructDefaultObjectFromJsonSchema.ts
================================================
import { get, isObject } from "lodash"

const DEFAULT_ARRAY: any[] = []
const DEFAULT_STRING = ""
const DEFAULT_NUM = 0
const DEFAULT_BOOL = false

export default function constructDefaultObjectFromJsonSchema(
  schema: object
): object {
  let root: { [k: string]: any } = {}
  const properties = get(schema, "properties", {})

  if (isObject(properties)) {
    try {
      helper(properties, root)
    } catch (e) {
      console.error(
        "Unable to convert JSONSchema to default object, defaulting to `{}`."
      )
    }
  }

  return root
}

function helper(properties: object, root: { [k: string]: any }): void {
  Object.entries(properties).forEach(([k, v]) => {
    if (v.type) {
      switch (v.type) {
        case "object":
          root[k] = {}
          if (v.properties) helper(v.properties, root[k])
          break
        case "array":
          root[k] = v.default ? v.default : DEFAULT_ARRAY
          break
        case "boolean":
          root[k] = v.default ? v.default : DEFAULT_BOOL
          break
        case "string":
          root[k] = v.default ? v.default : DEFAULT_STRING
          break
        case "number":
          root[k] = v.default ? v.default : DEFAULT_NUM
          break
        default:
          root[k] = v.default ? v.default : null
      }
    }
  })
}


================================================
FILE: ui/src/helpers/getEnhancedRunStatus.ts
================================================
import { Run, RunStatus, EnhancedRunStatus } from "../types"

const getEnhancedRunStatus = (run: Run): EnhancedRunStatus | RunStatus => {
  if (run.status === RunStatus.STOPPED) {
    if (run.exit_code === 0) {
      return EnhancedRunStatus.SUCCESS
    } else {
      return EnhancedRunStatus.FAILED
    }
  }

  return run.status
}

export default getEnhancedRunStatus


================================================
FILE: ui/src/helpers/getInitialValuesForExecutionForm.ts
================================================
import { get } from "lodash"
import getOwnerIdRunTagFromCookies from "./getOwnerIdRunTagFromCookies"
import {
  Executable,
  LaunchRequestV2,
  Run,
  Task,
  Template,
  TemplateExecutionRequest,
  ExecutionRequestCommon,
  ExecutionEngine,
  Env,
  DefaultNodeLifecycle,
  DefaultExecutionEngine,
} from "../types"
import constructDefaultObjectFromJsonSchema from "./constructDefaultObjectFromJsonSchema"

export function getInitialValuesForTaskExecutionForm(
  t: Task,
  r: Run | null
): LaunchRequestV2 {
  const common = getInitialValuesForCommonExecutionFields(t, r)

  // Set command value.
  const command: string = r && r.command ? r.command : t.command

  common.command = command

  return common
}

export function getInitialValuesForTemplateExecutionForm(
  t: Template,
  r: Run | null
): TemplateExecutionRequest {
  const req: TemplateExecutionRequest = {
    ...getInitialValuesForCommonExecutionFields(t, r),
    template_payload: get(
      r,
      ["execution_request_custom", "template_payload"],
      constructDefaultObjectFromJsonSchema(t.schema)
    ),
  }

  return req
}

function getInitialValuesForCommonExecutionFields(
  e: Executable,
  r: Run | null
): ExecutionRequestCommon {
  // Set ownerID value.
  const ownerID = get(
    r,
    ["run_tags", "owner_id"],
    getOwnerIdRunTagFromCookies()
  )

  // Set env value.
  let env: Env[] | null = r && r.env ? r.env : e.env

  // Filter out invalid run env if specified in dotenv file.
  if (env === null) {
    env = []
  } else if (process.env.REACT_APP_INVALID_RUN_ENV !== undefined) {
    const invalidEnvs = new Set(
      process.env.REACT_APP_INVALID_RUN_ENV.split(",")
    )
    env = env.filter(e => !invalidEnvs.has(e.name))
  }

  // Set CPU value.
  let cpu: number = r && r.cpu ? r.cpu : e.cpu
  if (cpu < 512) cpu = 512

  // Set memory value.
  const memory: number = r && r.memory ? r.memory : e.memory

  // Set engine.
  const engine: ExecutionEngine = get(r, "engine", DefaultExecutionEngine)

  switch (engine) {
    case ExecutionEngine.ECS:
      return {
        cluster: get(r, "cluster", ""),
        env,
        cpu,
        memory,
        owner_id: ownerID,
        engine,
      }
    case ExecutionEngine.EKS:
    default:
      return {
        cluster: get(
          r,
          "cluster",
          process.env.REACT_APP_EKS_CLUSTER_NAME || ""
        ),
        node_lifecycle: get(r, "node_lifecycle", DefaultNodeLifecycle),
        env,
        cpu,
        memory,
        owner_id: ownerID,
        engine,
      }
  }
}


================================================
FILE: ui/src/helpers/getOwnerIdRunTagFromCookies.ts
================================================
import { get, has } from "lodash"
import cookie from "cookie"

const getOwnerIdRunTagFromCookies = (): string => {
  // Get owner ID.
  let ownerID: string = "flotilla-ui"

  // Check if the `REACT_APP_RUN_TAG_OWNER_ID_COOKIE_PATH` env var is set;
  // proceed to extract it from the cookies if so.
  if (process.env.REACT_APP_RUN_TAG_OWNER_ID_COOKIE_PATH) {
    const cookies = cookie.parse(document.cookie)
    const cookiePath = process.env.REACT_APP_RUN_TAG_OWNER_ID_COOKIE_PATH.split(
      "."
    )

    if (cookiePath.length > 1 && has(cookies, cookiePath[0])) {
      ownerID = get(
        JSON.parse(get(cookies, cookiePath[0], "{}")),
        cookiePath.slice(1),
        "flotilla-ui"
      )
    } else {
      ownerID = get(
        cookies,
        process.env.REACT_APP_RUN_TAG_OWNER_ID_COOKIE_PATH,
        "flotilla-ui"
      )
    }
  }

  return ownerID
}

export default getOwnerIdRunTagFromCookies


================================================
FILE: ui/src/helpers/pageToOffsetLimit.ts
================================================
const pageToOffsetLimit = ({
  page,
  limit,
}: {
  page: number
  limit: number
}) => ({
  offset: (page - 1) * limit,
  limit,
})

export default pageToOffsetLimit


================================================
FILE: ui/src/helpers/runFormHelpers.ts
================================================
import { FieldSpec } from "../types"

export const clusterFieldSpec: FieldSpec = {
  name: "cluster",
  label: "Cluster",
  description: "Select a cluster for this task to execute on.",
  initialValue: "",
}

export const memoryFieldSpec: FieldSpec = {
  name: "memory",
  label: "Memory (MB)",
  description: "The amount of memory (MB) this task needs.",
  initialValue: 1024,
}

export const cpuFieldSpec: FieldSpec = {
  name: "cpu",
  label: "CPU (Units)",
  description:
    "The amount of CPU (units) this task needs. Note: 1024 CPU unit is 1 CPU core.",
  initialValue: 512,
}

export const ownerIdFieldSpec: FieldSpec = {
  name: "owner_id",
  label: "Owner ID",
  description: "Please set the Owner ID.",
  initialValue: "",
}

export const nodeLifecycleFieldSpec: FieldSpec = {
  name: "node_lifecycle",
  label: "Node Lifecycle",
  description:
    "This field is only applicable to tasks running on EKS. For more information, please view this document: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-instances.html",
  initialValue: [],
}


================================================
FILE: ui/src/helpers/selectHelpers.ts
================================================
import { isArray } from "lodash"
import { SelectOption } from "../types"
import { ValueType, Theme } from "react-select/lib/types"
import { Colors } from "@blueprintjs/core"

export const stringToSelectOpt = (s: string): SelectOption => ({
  label: s,
  value: s,
})

export const selectOptToString = (o: SelectOption): string => o.value

export const preprocessSelectOption = (
  option: ValueType<SelectOption>
): string => {
  if (option === null || option === undefined || isArray(option)) return ""
  return option.value
}

export const preprocessMultiSelectOption = (
  options: ValueType<SelectOption>
): string[] => {
  if (options === null || options === undefined || !isArray(options)) return []
  return options.map(selectOptToString)
}

export const selectStyles = {
  container: (provided: any) => ({
    ...provided,
    width: "100%",
  }),
  control: (provided: any) => ({
    ...provided,
    borderWidth: 2,
  }),
  menu: (provided: any) => ({
    ...provided,
    color: Colors.LIGHT_GRAY1,
  }),
  option: (provided: any) => ({
    ...provided,
    color: Colors.LIGHT_GRAY1,
    paddingTop: 8,
    paddingBottom: 8,
  }),
}

export const selectTheme = (theme: Theme): Theme => ({
  ...theme,
  colors: {
    ...theme.colors,
    primary: Colors.COBALT1,
    primary75: Colors.COBALT1,
    primary50: Colors.COBALT1,
    primary25: Colors.COBALT1,
    danger: Colors.RED1,
    dangerLight: Colors.RED4,
    neutral0: Colors.BLACK,
    neutral5: Colors.BLACK,
    neutral10: Colors.DARK_GRAY4,
    neutral20: Colors.DARK_GRAY4,
    neutral30: Colors.DARK_GRAY4,
    neutral40: Colors.GRAY1,
    neutral50: Colors.GRAY1,
    neutral60: Colors.GRAY2,
    neutral70: Colors.GRAY3,
    neutral80: Colors.GRAY4,
    neutral90: Colors.LIGHT_GRAY1,
  },
})


================================================
FILE: ui/src/helpers/taskFormHelpers.ts
================================================
import { FieldSpec } from "../types"

export const aliasFieldSpec: FieldSpec = {
  name: "alias",
  label: "alias",
  description: "alias",
  initialValue: "",
}
export const groupNameFieldSpec: FieldSpec = {
  name: "group_name",
  label: "Group Name",
  description:
    "Create a new group name or select an existing one to help searching for this task in the future.",
  initialValue: "",
}
export const imageFieldSpec: FieldSpec = {
  name: "image",
  label: "Docker Image",
  description: "The full URL of the Docker image and tag.",
  initialValue: "",
}
export const commandFieldSpec: FieldSpec = {
  name: "command",
  label: "Command",
  description: "The command for this task to execute.",
  initialValue: "",
}
export const memoryFieldSpec: FieldSpec = {
  name: "memory",
  label: "Memory (MB)",
  description: "The amount of memory (MB) this task needs.",
  initialValue: 1024,
}
export const cpuFieldSpec: FieldSpec = {
  name: "cpu",
  label: "CPU (Units)",
  description:
    "The amount of CPU (units) this task needs. Note: 1024 CPU unit is 1 CPU core.",
  initialValue: 512,
}
export const tagsFieldSpec: FieldSpec = {
  name: "tags",
  label: "Tags",
  description: "",
  initialValue: [],
}
export const envFieldSpec: FieldSpec = {
  name: "env",
  label: "Environment Variables",
  description: "",
  initialValue: [],
}


================================================
FILE: ui/src/helpers/testHelpers.ts
================================================
import { FormikActions } from "formik"
import { createMemoryHistory, createLocation } from "history"
import { RouteComponentProps } from "react-router-dom"
import {
  Task,
  Run,
  RunStatus,
  ExecutionEngine,
  NodeLifecycle,
  ExecutableType,
} from "../types"

export function createMockRouteComponentProps<MatchParams>({
  path,
  url,
  params,
}: {
  path: string
  url: string
  params: MatchParams
}): RouteComponentProps {
  return {
    history: createMemoryHistory(),
    match: {
      isExact: false,
      path,
      url,
      params,
    },
    location: createLocation(url),
  }
}

export const mockFormikActions: FormikActions<any> = {
  setStatus: jest.fn(),
  setError: jest.fn(),
  setErrors: jest.fn(),
  setSubmitting: jest.fn(),
  setTouched: jest.fn(),
  setValues: jest.fn(),
  setFieldValue: jest.fn(),
  setFieldError: jest.fn(),
  setFieldTouched: jest.fn(),
  validateForm: jest.fn(),
  validateField: jest.fn(),
  resetForm: jest.fn(),
  submitForm: jest.fn(),
  setFormikState: jest.fn(),
}

export const createMockTaskObject = (overrides?: Partial<Task>): Task => ({
  env: [{ name: "a", value: "b" }],
  arn: "arn",
  definition_id: "my_definition_id",
  image: "image",
  group_name: "group_name",
  container_name: "container_name",
  alias: "alias",
  memory: 1024,
  cpu: 512,
  command: "command",
  tags: ["a", "b", "c"],
  privileged: false,
  gpu: 0,
  adaptive_resource_allocation: true,
  ...overrides,
})

export const createMockRunObject = (overrides?: Partial<Run>): Run => ({
  attempt_count: 1,
  instance: {
    dns_name: "my_dns_name",
    instance_id: "my_instance_id",
  },
  task_arn: "my_task_arn",
  run_id: "my_run_id",
  definition_id: "my_definition_id",
  alias: "my_alias",
  image: "my_image",
  cluster: "my_cluster",
  status: RunStatus.RUNNING,
  started_at: "2019-10-24T05:21:51",
  group_name: "group_name",
  env: [],
  cpu: 1,
  cpu_limit: 1,
  memory: 1024,
  memory_limit: 2048,
  command: "echo 'hi'",
  queued_at: "queued_at",
  engine: ExecutionEngine.ECS,
  node_lifecycle: NodeLifecycle.SPOT,
  max_cpu_used: 1,
  max_memory_used: 1,
  pod_name: "pod",
  cloudtrail_notifications: { Records: [] },
  executable_id: "my_executable_id",
  executable_type: ExecutableType.ExecutableTypeDefinition,
  execution_request_custom: {},
  ...overrides,
})


================================================
FILE: ui/src/index.css
================================================
:root {
  --spacing: 12px;
  --blueprint-navbar-height: 50px;
  --default-border-color: #293742;
  --default-border: 1px solid var(--default-border-color);
}

body {
  background: #182026;
}

.flotilla-app-container {
  margin-top: 50px;
  padding: calc(var(--spacing) * 2);
}

.flotilla-view-header-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: space-between;
  align-items: center;
  margin-bottom: calc(var(--spacing) * 2);
}

.flotilla-card-header-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: space-between;
  align-items: center;
  margin-bottom: var(--spacing);
}

.flotilla-card-header {
}

/**
 * Form
 */
.flotilla-form-container {
  display: flex;
  flex-flow: column nowrap;
  justify-content: flex-start;
  align-items: center;
  width: 100%;
}

.flotilla-form-container > * {
  width: 100%;
  max-width: 600px;
}

.flotilla-form-section-header-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: space-between;
  align-items: center;
  width: 100%;
  margin: var(--spacing) 0;
}

.flotilla-form-section-divider {
  width: 100%;
  border-top: var(--default-border);
  margin: var(--spacing) 0;
}

/**
 * Attribute
 */
.flotilla-attributes-container.flotilla-attributes-container-vertical {
  display: flex;
  flex-flow: column nowrap;
}

.flotilla-attributes-container.flotilla-attributes-container-vertical
  .flotilla-attribute-container {
  display: flex;
  flex-flow: column nowrap;
  margin-bottom: var(--spacing);
}

.flotilla-attributes-container.flotilla-attributes-container-vertical
  .flotilla-attribute-container:last-child {
  margin-bottom: 0;
}

.flotilla-attributes-container.flotilla-attributes-container-horizontal {
  display: flex;
  flex-flow: row nowrap;
  justify-content: flex-start;
}

.flotilla-attributes-container.flotilla-attributes-container-horizontal
  .flotilla-attribute-container {
  display: flex;
  flex-flow: column nowrap;
  margin-right: calc(var(--spacing) * 2);
}

.flotilla-attributes-container.flotilla-attributes-container-horizontal
  .flotilla-attribute-container:last-child {
  margin-right: 0;
}

.flotilla-attribute-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: flex-start;
}

.flotilla-attribute-name {
  font-size: 0.85rem;
  font-weight: 500;
  margin-bottom: 4px;
  white-space: nowrap;
  display: flex;
  flex-flow: row nowrap;
  justify-content: flex-start;
  align-items: center;
}

.flotilla-attribute-name > * {
  margin-right: calc(var(--spacing) / 2);
}

.flotilla-attribute-name > *:last-child {
  margin-right: 0;
}

.flotilla-attribute-value {
  color: #adbecc;
  flex: 1;
  font-size: 0.85rem;
  word-wrap: break-word;
}

/**
 * Env Field Array
 */
.flotilla-env-field-array-header {
  display: flex;
  flex-flow: row nowrap;
  justify-content: space-between;
  align-items: center;
  margin-bottom: calc(var(--spacing) * 2);
}

.flotilla-env-field-array-item {
  display: flex;
  flex-flow: row nowrap;
  justify-content: space-between;
  align-items: center;
  margin-bottom: 8px;
}

.flotilla-env-field-array-item > .bp3-form-group {
  flex: 1;
  margin-right: var(--spacing);
  margin-bottom: 0;
}

/**
 * List Utils
 */
.flotilla-list-utils-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: flex-start;
  align-items: center;
  width: 100%;
  margin-bottom: var(--spacing);
}

.flotilla-list-utils-container > * {
  margin-right: calc(var(--spacing) / 2);
}

.flotilla-list-utils-container > *:last-child {
  margin-right: 0;
}

.flotilla-list-utils-container .bp3-form-group {
  flex: 1;
}

.flotilla-list-utils-container .bp3-input {
  width: 100%;
}

.flotilla-list-utils-container .bp3-button {
  transform: translateY(-7px);
}

.flotilla-list-filters-card {
  width: 360px;
}

.flotilla-list-filters-card > *,
.flotilla-list-filters-card .bp3-input {
  width: 100%;
}

/**
 * Table
 */
table {
  border: var(--default-border);
  width: 100%;
}

.flotilla-th-sortable {
  cursor: pointer;
}

.flotilla-th-sortable:hover,
.flotilla-th-sortable.active {
  background: #202b33;
}

.flotilla-th-sortable.active.active-asc::after {
  content: "▲";
  font-size: 0.6rem;
  margin-left: calc(var(--spacing) / 2);
}

.flotilla-th-sortable.active.active-desc::after {
  content: "▼";
  font-size: 0.6rem;
  margin-left: calc(var(--spacing) / 2);
}

/**
 * Sidebar View
 */
.flotilla-sidebar-view-container {
  display: flex;
  flex-flow: row wrap;
  justify-content: flex-start;
  align-items: flex-start;
}

.flotilla-sidebar-view-container > .flotilla-sidebar-view-sidebar {
  flex: 1;
  min-width: 450px;
  max-width: 540px;
}

.flotilla-sidebar-view-container > .flotilla-sidebar-view-content {
  flex: 2;
  min-width: 720px;
}

@media screen and (max-width: 1230px) {
  .flotilla-sidebar-view-container > .flotilla-sidebar-view-sidebar {
    width: 100%;
    max-width: 100%;
    margin-top: var(--spacing);
    order: 2;
  }
}

@media screen and (min-width: 1230px) {
  .flotilla-sidebar-view-container > .flotilla-sidebar-view-sidebar {
    margin-right: var(--spacing);
  }
}

/**
 * Logs
 */
.flotilla-logs-container {
  max-height: 600px;
  overflow-y: scroll;
  padding: 0;
  width: 100%;
  overscroll-behavior: contain;
}

.flotilla-logs-loader-container {
  display: flex;
  flex-flow: row nowrap;
  justify-content: flex-start;
  align-items: center;
  width: 100%;
  margin: 0 var(--spacing) var(--spacing);
}

.flotilla-logs-virtualized-search-container {
  display: flex;
  flex-flow: row nowrap;
  width: 100%;
  border: var(--default-border);
  border-bottom: none;
}

.flotilla-logs-virtualized-search-input {
  flex: 1;
}

.flotilla-logs-virtualized-search-info {
  display: flex;
  flex-flow: row nowrap;
  justify-content: center;
  align-items: center;
  border-left: var(--default-border);
  border-right: var(--default-border);
  padding: 0 var(--spacing);
}

/**
 * Pre
 */
.flotilla-pre {
  opacity: 1;
  white-space: pre-wrap;
  margin-top: 0;
  margin-bottom: 0;
}

.flotilla-logs-container {
  overflow-y: scroll;
  background: #10161a;
  flex: 1;
  border: 1px solid #293742;
}

.flotilla-ansi,
.bp3-code-block,
.bp3-code {
  font-family: "Roboto Mono", "Courier New", Courier, monospace !important;
  font-size: 0.9rem;
  white-space: pre-wrap;
  word-break: break-all;
}

.flotilla-pre {
  background: transparent;
  border-radius: 0 !important;
  border: none !important;
  box-shadow: none !important;
  font-size: 12px;
  margin-bottom: 0;
  margin-top: 0;
  opacity: 1;
  padding: 12px !important;
  white-space: pre-wrap;
}

/**
 * Blueprint overrides
 */
.bp3-input {
  height: 36px;
}
.bp3-form-group {
  margin-bottom: 28px;
}
.bp3-form-group:last-child {
  margin-bottom: 0;
}
.bp3-form-group input,
.bp3-form-group textarea {
  width: 100%;
  border: 2px solid #293742;
}

.bp3-label {
  text-transform: uppercase;
  font-weight: 600;
  font-size: 0.8rem;
}

.bp3-card {
  background: transparent !important;
  border: 1px solid #293742;
}

.bp3-navbar.bp3-dark {
  background: #182026;
  box-shadow: none !important;
  border-bottom: 1px solid #293742;
}

.bp3-tag {
  font-weight: 500;
  text-transform: uppercase;
}

.bp3-tab-panel.bp3-tab-panel-no-margin-top {
  margin-top: 0;
}

.flotilla-templates-container {
  display: flex;
  flex-flow: row wrap;
  width: 100%;
}
.flotilla-template-container {
  width: 160px;
  height: 160px;
  display: flex;
  flex-flow: column nowrap;
  justify-content: center;
  align-items: center;
  border: var(--default-border);
  margin-right: 24px;
  margin-bottom: 24px;
  cursor: pointer;
}
.flotilla-template-container:hover {
  border-color: #48aff0;
}

fieldset {
  border: none;
}
/* .btn.btn-info {
  display: none;
} */
.bp3-spinner-animation {
  animation-duration: 1s;
}
.bp3-spinner-track {
  stroke: #323c4280;
}


================================================
FILE: ui/src/index.tsx
================================================
import "normalize.css/normalize.css"
import "@blueprintjs/icons/lib/css/blueprint-icons.css"
import "@blueprintjs/core/lib/css/blueprint.css"
import React from "react"
import ReactDOM from "react-dom"
import { Provider } from "react-redux"
import "./index.css"
import App from "./components/App"
import store from "./state/store"
import { init } from "./state/settings"

store.dispatch<any>(init())

ReactDOM.render(
  <Provider store={store}>
    <App />
  </Provider>,
  document.getElementById("root")
)


================================================
FILE: ui/src/localstorage.ts
================================================
import localforage from "localforage"
const localstorage = localforage.createInstance({ name: "flotilla-ui" })
export default localstorage


================================================
FILE: ui/src/react-app-env.d.ts
================================================
/// <reference types="react-scripts" />
declare module "react-json-editor-ajrm"
declare module "react-json-editor-ajrm/locale/en"


================================================
FILE: ui/src/setupTests.js
================================================
import Enzyme from "enzyme"
import Adapter from "enzyme-adapter-react-16"

Enzyme.configure({ adapter: new Adapter() })


================================================
FILE: ui/src/state/runView.ts
================================================
import { createSlice, PayloadAction } from "@reduxjs/toolkit"

type RunViewReducer = {
  shouldAutoscroll: boolean
  hasLogs: boolean
  isLogRequestIntervalActive: boolean
}

const initialState: RunViewReducer = {
  shouldAutoscroll: true,
  hasLogs: false,
  isLogRequestIntervalActive: false,
}

const runViewReducer = createSlice({
  name: "runViewReducer",
  initialState: initialState,
  reducers: {
    toggleAutoscroll(state) {
      state.shouldAutoscroll = !state.shouldAutoscroll
    },

    setHasLogs(state) {
      state.hasLogs = true
    },

    toggleIsLogRequestIntervalActive(
      state,
      { payload }: PayloadAction<boolean>
    ) {
      state.isLogRequestIntervalActive = payload
    },
  },
})

export const {
  toggleAutoscroll,
  setHasLogs,
  toggleIsLogRequestIntervalActive,
} = runViewReducer.actions

export default runViewReducer.reducer


================================================
FILE: ui/src/state/settings.ts
================================================
import { createSlice, PayloadAction } from "@reduxjs/toolkit"
import { get } from "lodash"
import ls from "../localstorage"
import { LOCAL_STORAGE_SETTINGS_KEY } from "../constants"
import { AppThunk } from "./store"

export type Settings = {
  USE_OPTIMIZED_LOG_RENDERER: boolean
  SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW: boolean
}

type SettingsReducer = {
  isLoading: boolean
  isSettingsDialogOpen: boolean
  settings: Settings
}

const initialState: SettingsReducer = {
  isLoading: false,
  isSettingsDialogOpen: false,
  settings: {
    USE_OPTIMIZED_LOG_RENDERER: true,
    SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW: true,
  },
}

const merge = (initial: Settings, cached: any): Settings => ({
  USE_OPTIMIZED_LOG_RENDERER: get(
    cached,
    "USE_OPTIMIZED_LOG_RENDERER",
    initial.USE_OPTIMIZED_LOG_RENDERER
  ),
  SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW: get(
    cached,
    "SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW",
    initial.SHOULD_OVERRIDE_CMD_F_IN_RUN_VIEW
  ),
})

const settingsReducer = createSlice({
  name: "settingsReducer",
  initialState: initialState,
  reducers: {
    initStart() {},
    initSuccess(state, { payload }: PayloadAction<any>) {
      state.settings = merge(state.settings, payload)
    },
    initFailure() {},
    updateStart(state) {
      state.isLoading = true
    },
    updateSuccess(state, { payload }: PayloadAction<Settings>) {
      state.isLoading = false
      state.settings = merge(state.settings, payload)
      state.isSettingsDialogOpen = false
    },
    updateFailure(state) {
      state.isLoading = false
    },
    toggleDialogVisibilityChange(
      state,
      { payload }: PayloadAction<boolean | undefined>
    ) {
      state.isSettingsDialogOpen =
        payload === undefined ? !state.isSettingsDialogOpen : payload
    },
  },
})

export const {
  initStart,
  initSuccess,
  initFailure,
  updateStart,
  updateSuccess,
  updateFailure,
  toggleDialogVisibilityChange,
} = settingsReducer.actions

export const init = (): AppThunk => async dispatch => {
  try {
    dispatch(initStart())
    const cached = await ls.getItem<any>(LOCAL_STORAGE_SETTINGS_KEY)
    dispatch(initSuccess(cached))
  } catch (error) {
    console.error("Failed to initialize app settings from cache.")
    dispatch(initFailure())
  }
}

export const update = (s: Settings): AppThunk => async dispatch => {
  try {
    dispatch(updateStart())
    const cached = await ls.setItem<Settings>(LOCAL_STORAGE_SETTINGS_KEY, s)
    dispatch(updateSuccess(cached))
  } catch (error) {
    console.error("Failed to initialize app settings from cache.")
    dispatch(updateFailure())
  }
}

export default settingsReducer.reducer


================================================
FILE: ui/src/state/store.ts
================================================
import {
  configureStore,
  Action,
  combineReducers,
  getDefaultMiddleware,
} from "@reduxjs/toolkit"
import { ThunkAction } from "redux-thunk"
import { createLogger } from "redux-logger"
import settings from "./settings"
import runView from "./runView"

const middleware = [...getDefaultMiddleware()]

// Only use redux-logger in non-production.
if (process.env.NODE_ENV !== "production") {
  const logger = createLogger({
    collapsed: true,
    timestamp: false,
  })

  middleware.push(logger)
}

const rootReducer = combineReducers({
  settings,
  runView,
})

const store = configureStore({
  reducer: rootReducer,
  middleware,
})

export type RootState = ReturnType<typeof rootReducer>
export type AppDispatch = typeof store.dispatch
export type AppThunk = ThunkAction<void, RootState, null, Action<string>>
export default store


================================================
FILE: ui/src/types.ts
================================================
import { Omit } from "lodash"

export type Env = {
  name: string
  value: any
}

export type Task = {
  env: Env[]
  arn: string
  definition_id: string
  group_name: string
  alias: string
  command: string
  shared_memory_size?: number
} & ExecutableResources

export type RunInstance = {
  dns_name: string
  instance_id: string
}

export type CloudTrailNotifications = {}

export enum ExecutableType {
  ExecutableTypeDefinition = "task_definition",
  ExecutableTypeTemplate = "template",
}

export type Run = {
  attempt_count: number
  alias: string
  cluster: string
  command?: string
  cpu: number
  cpu_limit: number
  definition_id: string
  env: Env[]
  exit_code?: number
  exit_reason?: string
  finished_at?: string
  gpu?: number
  group_name: string
  image: string
  instance: RunInstance
  memory: number
  memory_limit: number
  queued_at: string | undefined
  run_id: string
  started_at?: string
  status: RunStatus
  task_arn: string
  engine: ExecutionEngine
  node_lifecycle?: NodeLifecycle
  max_cpu_used: number | null | undefined
  max_memory_used: number | null | undefined
  pod_name: string | null | undefined
  cloudtrail_notifications: CloudtrailRecords
  executable_id: string
  executable_type: ExecutableType
  execution_request_custom: any
}

export type RunLog = {
  log: string
  last_seen?: string
}

export type RunLogRaw = string

//
// Enums
//

export enum HTTPMethod {
  GET = "get",
  PUT = "put",
  POST = "post",
  DELETE = "delete",
}

export enum SortOrder {
  ASC = "asc",
  DESC = "desc",
}

export enum RunStatus {
  PENDING = "PENDING",
  QUEUED = "QUEUED",
  RUNNING = "RUNNING",
  STOPPED = "STOPPED",
  NEEDS_RETRY = "NEEDS_RETRY",
}

export enum EnhancedRunStatus {
  PENDING = "PENDING",
  QUEUED = "QUEUED",
  RUNNING = "RUNNING",
  STOPPED = "STOPPED",
  NEEDS_RETRY = "NEEDS_RETRY",
  SUCCESS = "SUCCESS",
  FAILED = "FAILED",
}

// 3rd party

export type SelectOption = { label: string; value: string }

export type SelectProps = {
  value: string
  onChange: (value: string) => void
  isDisabled: boolean
}

export type MultiSelectProps = {
  value: string[]
  onChange: (value: string[]) => void
  isDisabled: boolean
}

//
// Request/Response
// These type definitions relate to the arguments required to invoke methods
// of the Flotilla client and the response the server returns.
//
export type RequestArgs = {
  method: HTTPMethod
  url: string
  params?: object
  data?: any
}

export type ListRequestArgs = {
  offset: number
  limit: number
  sort_by?: string
  order?: SortOrder
}

export type ListResponse = {
  offset: number
  limit: number
  sort_by?: string
  order?: SortOrder
  total: number
}

export type ListTaskRunsParams = Omit<ListRunParams, "alias">
export type ListTaskRunsResponse = Omit<ListRunResponse, "alias">

export type ListTaskParams = ListRequestArgs & {
  alias?: string[]
  group_name?: string[]
  image?: string[]
}

export type ListTaskResponse = ListResponse & {
  definitions: Task[]
  alias?: string[]
  group_name?: string[]
  image?: string[]
}

export type CreateTaskPayload = UpdateTaskPayload & { alias: string }

export type UpdateTaskPayload = {
  env: Env[]
  image: string
  group_name: string
  memory: number
  cpu: number
  command: string
  tags: string[]
  adaptive_resource_allocation?: boolean
}

export enum ExecutionEngine {
  ECS = "ecs",
  EKS = "eks",
}

export const DefaultExecutionEngine = ExecutionEngine.EKS

export enum NodeLifecycle {
  SPOT = "spot",
  ON_DEMAND = "ondemand",
}

export const DefaultNodeLifecycle = NodeLifecycle.SPOT

export type LaunchRequestV2 = {
  cluster: string
  command?: string | null
  cpu?: number
  engine: ExecutionEngine
  env?: Env[]
  memory?: number
  node_lifecycle?: NodeLifecycle
  owner_id?: string
  run_tags?: { [key: string]: any }
}

export type ListRunParams = ListRequestArgs & {
  env?: string[]
  cluster_name?: string
  alias?: string[]
  status?: RunStatus
}

export type ListRunResponse = ListResponse & {
  history: Run[]
  env_filters?: { [name: string]: any }
  cluster_name?: string
  alias?: string[]
  status?: RunStatus
}

export type ListClustersResponse = ListResponse & { clusters: string[] | null }
export type ListGroupsResponse = ListResponse & { groups: string[] | null }
export type ListTagsResponse = ListResponse & { tags: string[] | null }

export type FieldSpec = {
  name: string
  label: string
  description: string
  initialValue: any
}

export type PodEvent = {
  timestamp: string
  event_type: string
  reason: string
  source_object: string
  message: string
}

export type ListRunEventsResponse = {
  total: number
  pod_events: PodEvent[] | null
}

export enum RunTabId {
  LOGS = "l",
  EVENTS = "e",
  CLOUDTRAIL = "ct",
  METADATA = "md",
}

export type ExecutableResources = {
  image: string
  memory: number
  gpu: number
  cpu: number
  env: Env[]
  privileged: boolean
  adaptive_resource_allocation: boolean
  container_name: string
  tags: string[]
}

export type Template = {
  template_id: string
  template_name: string
  version: number
  schema: object
  command_template: string
  defaults: object | null | undefined
  avatar_uri: string | null | undefined
} & ExecutableResources

export type ListTemplateParams = ListRequestArgs & {
  latest_only?: boolean
}

export type ListTemplateResponse = ListResponse & {
  templates: Template[]
}

export type TemplateExecutionRequest = {
  template_payload: object
} & ExecutionRequestCommon

export type ExecutionRequestCommon = {
  cluster: string
  command?: string
  cpu?: number
  engine: ExecutionEngine
  env?: Env[]
  memory?: number
  node_lifecycle?: NodeLifecycle
  owner_id: string
}
export type CloudtrailRecord = {
  eventSource: string
  eventName: string
}

export type CloudtrailRecords = {
  Records: CloudtrailRecord[] | null
}

export const EnhancedRunStatusEmojiMap: Map<
  EnhancedRunStatus,
  string
> = new Map([
  [EnhancedRunStatus.PENDING, "🕒"],
  [EnhancedRunStatus.QUEUED, "🕒"],
  [EnhancedRunStatus.RUNNING, "🌀"],
  [EnhancedRunStatus.SUCCESS, "✅"],
  [EnhancedRunStatus.FAILED, "❌"],
  [EnhancedRunStatus.NEEDS_RETRY, "❌"],
])

export type ListTemplateHistoryParams = Omit<ListRunParams, "alias">
export type ListTemplateHistoryResponse = Omit<ListRunResponse, "alias">
export type Executable = Template | Task


================================================
FILE: ui/src/workers/index.ts
================================================
export default class WebWorker {
  constructor(worker: any) {
    const code = worker.toString()
    const blob = new Blob(["(" + code + ")()"])
    return new Worker(URL.createObjectURL(blob))
  }
}


================================================
FILE: ui/src/workers/log.worker.ts
================================================
export default () => {
  onmessage = (evt: { data: { logs: string; maxLen: number } }) => {
    const { logs, maxLen } = evt.data
    let processed: string[] = []

    if (logs.length === 0) {
      postMessage([])
      return
    }

    // Split `logs` string by newline char.
    const lines: string[] = logs.split("\n")

    // Iterate over each line. If line.length <= maxLen, push to `processed`
    // array. If the length of the line is greater than maxLen, iterate over
    // the line `maxLen` chars at a time and push each sub-line to the
    // `processed` array.
    for (let j = 0; j < lines.length; j++) {
      const line = lines[j]

      if (line.length <= maxLen) {
        processed.push(line)
      } else {
        let k = 0

        while (k < line.length) {
          processed.push(line.substring(k, k + maxLen))
          k += maxLen
        }
      }
    }

    postMessage(processed)
  }
}


================================================
FILE: ui/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "es5",
    "lib": ["WebWorker", "dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,
    "esModuleInterop": true,
    "allowSyntheticDefaultImports": true,
    "strict": true,
    "forceConsistentCasingInFileNames": true,
    "module": "esnext",
    "moduleResolution": "node",
    "resolveJsonModule": true,
    "noEmit": true,
    "jsx": "preserve",
    "isolatedModules": true
  },
  "include": ["src"]
}


================================================
FILE: utils/dd_tracing.go
================================================
package utils

import (
	"context"

	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/tracing"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
)

// TraceJob starts or continues a trace for a job operation
func TraceJob(ctx context.Context, operationName string, runID string) (context.Context, tracer.Span) {
	return tracing.TraceJob(ctx, operationName, runID)
}

// TagJobRun adds standardized job metadata to a span
func TagJobRun(span tracer.Span, run state.Run) {
	tracing.TagRunInfo(span,
		run.RunID, run.DefinitionID, run.Alias, run.Status, run.ClusterName,
		run.QueuedAt, run.StartedAt, run.FinishedAt,
		run.PodName, run.Namespace, run.ExitReason, run.ExitCode, string(run.Tier))
}


================================================
FILE: utils/utils.go
================================================
package utils

import (
	"fmt"
	"github.com/go-redis/redis"
	"github.com/stitchfix/flotilla-os/config"
	"strings"
)

// StringSliceContains checks is a string slice contains a particular string.
func StringSliceContains(s []string, e string) bool {
	for _, a := range s {
		if a == e {
			return true
		}
	}
	return false
}

func SetupRedisClient(c config.Config) (*redis.Client, error) {
	if !c.IsSet("redis_address") {
		return nil, fmt.Errorf("redis_address not configured")
	}

	redisAddress := strings.TrimPrefix(c.GetString("redis_address"), "redis://")
	redisDB := c.GetInt("redis_db")

	client := redis.NewClient(&redis.Options{
		Addr: redisAddress,
		DB:   redisDB,
	})

	_, err := client.Ping().Result()

	return client, err
}


================================================
FILE: worker/events_worker.go
================================================
package worker

import (
	"context"
	"fmt"
	"regexp"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/utils"
	"gopkg.in/tomb.v2"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type eventsWorker struct {
	sm                state.Manager
	qm                queue.Manager
	conf              config.Config
	log               flotillaLog.Logger
	pollInterval      time.Duration
	t                 tomb.Tomb
	queue             string
	emrJobStatusQueue string
	s3Client          *s3.S3
	emrHistoryServer  string
	emrAppServer      map[string]string
	emrMetricsServer  string
	eksMetricsServer  string
	emrMaxPodEvents   int
	eksMaxPodEvents   int
	eksEngine         engine.Engine
	emrEngine         engine.Engine
	clusterManager    *engine.DynamicClusterManager
}

func (ew *eventsWorker) Initialize(conf config.Config, sm state.Manager, eksEngine engine.Engine, emrEngine engine.Engine, log flotillaLog.Logger, pollInterval time.Duration, qm queue.Manager, clusterManager *engine.DynamicClusterManager) error {
	ew.pollInterval = pollInterval
	ew.conf = conf
	ew.sm = sm
	ew.qm = qm
	ew.log = log
	ew.eksEngine = eksEngine
	ew.emrEngine = emrEngine
	eventsQueue, err := ew.qm.QurlFor(conf.GetString("eks_events_queue"), false)
	emrJobStatusQueue, err := ew.qm.QurlFor(conf.GetString("emr_job_status_queue"), false)
	ew.emrHistoryServer = conf.GetString("emr_history_server_uri")
	ew.emrAppServer = conf.GetStringMapString("emr_app_server_uri")
	ew.emrMetricsServer = conf.GetString("emr_metrics_server_uri")
	ew.eksMetricsServer = conf.GetString("eks_metrics_server_uri")
	ew.clusterManager = clusterManager
	if conf.IsSet("emr_max_pod_events") {
		ew.emrMaxPodEvents = conf.GetInt("emr_max_pod_events")
	} else {
		ew.emrMaxPodEvents = 20000
	}

	if conf.IsSet("eks_max_pod_events") {
		ew.eksMaxPodEvents = conf.GetInt("eks_max_pod_events")
	} else {
		ew.eksMaxPodEvents = 20
	}

	if err != nil {
		_ = ew.log.Log("level", "error", "message", "Error receiving Kubernetes Event queue", "error", fmt.Sprintf("%+v", err))
		return nil
	}
	ew.queue = eventsQueue
	ew.emrJobStatusQueue = emrJobStatusQueue
	_ = ew.qm.Initialize(ew.conf, "eks")

	return nil
}

func (ew *eventsWorker) GetTomb() *tomb.Tomb {
	return &ew.t
}

func (ew *eventsWorker) Run(ctx context.Context) error {
	for {
		select {
		case <-ew.t.Dying():
			_ = ew.log.Log("level", "info", "message", "An events worker was terminated")
			return nil
		default:
			loopCtx, span := utils.TraceJob(ctx, "events_worker.run_loop", "events_worker")
			ew.runOnce(loopCtx)
			ew.runOnceEMR(loopCtx)
			span.Finish()
			time.Sleep(ew.pollInterval)
		}
	}
}

func (ew *eventsWorker) runOnceEMR(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "events_worker.run_once_emr", "events_worker")
	defer span.Finish()
	emrEvent, err := ew.qm.ReceiveEMREvent(ew.emrJobStatusQueue)
	if err != nil {
		_ = ew.log.Log("level", "error", "message", "Error receiving EMR Events", "error", fmt.Sprintf("%+v", err))
		return
	}
	ew.processEventEMR(ctx, emrEvent)
}

func (ew *eventsWorker) processEventEMR(ctx context.Context, emrEvent state.EmrEvent) {
	if emrEvent.Detail == nil {
		return
	}

	emrJobId := emrEvent.Detail.ID
	run, err := ew.sm.GetRunByEMRJobId(ctx, *emrJobId)
	if err == nil {
		layout := "2020-08-31T17:27:50Z"
		timestamp, err := time.Parse(layout, *emrEvent.Time)
		if err != nil {
			timestamp = time.Now()
		}
		switch *emrEvent.Detail.State {
		case "COMPLETED":
			run.ExitCode = aws.Int64(0)
			run.Status = state.StatusStopped
			run.FinishedAt = &timestamp
			if run.StartedAt == nil || run.StartedAt.After(*run.FinishedAt) {
				run.StartedAt = run.QueuedAt
			}
			run.ExitReason = emrEvent.Detail.StateDetails
			// var events state.PodEvents
			// Pod Events are verbose and should be only stored for failed or running jobs.
			// run.PodEvents = &events
		case "RUNNING":
			run.Status = state.StatusRunning
			run.StartedAt = &timestamp
		case "FAILED":
			run.ExitCode = aws.Int64(-1)
			run.Status = state.StatusStopped
			run.FinishedAt = &timestamp
			if run.StartedAt == nil || run.StartedAt.After(*run.FinishedAt) {
				run.StartedAt = run.QueuedAt
			}

			run.ExitReason = aws.String("Job failed, please look at Driver Init and/or Driver Stdout logs.")

			if emrEvent.Detail != nil {
				if emrEvent.Detail.StateDetails != nil && !strings.Contains(*emrEvent.Detail.StateDetails, "JobRun failed. Please refer logs uploaded") {
					exitReason := strings.Replace(*emrEvent.Detail.StateDetails, "Please refer logs uploaded to S3/CloudWatch based on your monitoring configuration.", "", -1)
					run.ExitReason = aws.String(exitReason)
				} else {
					if emrEvent.Detail.FailureReason != nil && !strings.Contains(*emrEvent.Detail.FailureReason, "USER_ERROR") {
						exitReason := strings.Replace(*emrEvent.Detail.FailureReason, "Please refer logs uploaded to S3/CloudWatch based on your monitoring configuration.", "", -1)
						run.ExitReason = aws.String(exitReason)
					}
				}
			}

			if run.SparkExtension.DriverOOM != nil && *run.SparkExtension.DriverOOM == true {
				run.ExitReason = aws.String("Driver OOMKilled, retry with more driver memory.")
				run.ExitCode = aws.Int64(137)
			}

			if run.SparkExtension.ExecutorOOM != nil && *run.SparkExtension.ExecutorOOM == true {
				run.ExitReason = aws.String("Executor OOMKilled, retry with more executor memory.")
				run.ExitCode = aws.Int64(137)
			}

		case "SUBMITTED":
			run.Status = state.StatusPending
		}

		ew.setEMRMetricsUri(&run)
		_, err = ew.sm.UpdateRun(ctx, run.RunID, run)
		if err == nil {
			_ = emrEvent.Done()
		}
	}
}
func (ew *eventsWorker) runOnce(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "events_worker.run_once_eks", "events_worker")
	defer span.Finish()
	kubernetesEvent, err := ew.qm.ReceiveKubernetesEvent(ew.queue)
	if err != nil {
		_ = ew.log.Log("level", "error", "message", "Error receiving Kubernetes Events", "error", fmt.Sprintf("%+v", err))
		return
	}
	ew.processEvent(ctx, kubernetesEvent)
}

func (ew *eventsWorker) processEMRPodEvents(ctx context.Context, kubernetesEvent state.KubernetesEvent) {
	if kubernetesEvent.InvolvedObject.Kind == "Pod" {
		// Skip events with empty cluster name
		if kubernetesEvent.InvolvedObject.Labels.ClusterName == "" {
			_ = kubernetesEvent.Done()
			return
		}
		var emrJobId *string = nil
		var sparkAppId *string = nil
		var driverServiceName *string = nil
		var executorOOM *bool = nil
		var driverOOM *bool = nil

		kClient, err := ew.clusterManager.GetKubernetesClient(kubernetesEvent.InvolvedObject.Labels.ClusterName)

		if err == nil {
			pod, err := kClient.CoreV1().Pods(kubernetesEvent.InvolvedObject.Namespace).Get(ctx, kubernetesEvent.InvolvedObject.Name, metav1.GetOptions{})
			if err == nil {
				for k, v := range pod.Labels {
					if emrJobId == nil && strings.Compare(k, "emr-containers.amazonaws.com/job.id") == 0 {
						emrJobId = aws.String(v)
					}
					if sparkAppId == nil && strings.Compare(k, "spark-app-selector") == 0 {
						sparkAppId = aws.String(v)
					}
					if sparkAppId != nil && emrJobId != nil {
						break
					}
				}
			}

			if pod != nil {
				for _, container := range pod.Spec.Containers {
					for _, v := range container.Env {
						if v.Name == "SPARK_DRIVER_URL" {
							pat := regexp.MustCompile(`.*@(.*-svc).*`)
							matches := pat.FindAllStringSubmatch(v.Value, -1)
							for _, match := range matches {
								if len(match) == 2 {
									driverServiceName = &match[1]
								}
							}
						}
					}
				}

				if pod.Status.ContainerStatuses != nil && len(pod.Status.ContainerStatuses) > 0 {
					for _, containerStatus := range pod.Status.ContainerStatuses {
						if containerStatus.State.Terminated != nil {
							if containerStatus.State.Terminated.ExitCode == 137 {
								if strings.Contains(containerStatus.Name, "driver") {
									driverOOM = aws.Bool(true)
									_ = ew.log.Log("level", "warn", "message", "Detected driver OOM",
										"container", containerStatus.Name)
								} else {
									executorOOM = aws.Bool(true)
									_ = ew.log.Log("level", "warn", "message", "Detected executor OOM",
										"container", containerStatus.Name)
								}
							}
						}
					}
				}
			}
		}

		if emrJobId != nil {
			run, err := ew.sm.GetRunByEMRJobId(ctx, *emrJobId)
			if err == nil {
				_, span := utils.TraceJob(ctx, "flotilla.job.process_emr_pod_event", run.RunID)
				defer span.Finish()
				utils.TagJobRun(span, run)
				span.SetTag("emr.job_id", *emrJobId)

				// Only store events that are used to determine the list of executors in models.go Run.MarshalJSON.
				// We don't care about other events as they are no longer shown in the UI.
				if strings.Contains(kubernetesEvent.InvolvedObject.Name, "-exec-") {
					layout := "2006-01-02T15:04:05Z"
					timestamp, err := time.Parse(layout, kubernetesEvent.FirstTimestamp)
					if err != nil {
						timestamp = time.Now()
					}

					event := state.PodEvent{
						Timestamp:    &timestamp,
						EventType:    kubernetesEvent.Type,
						Reason:       kubernetesEvent.Reason,
						SourceObject: kubernetesEvent.InvolvedObject.Name,
						Message:      kubernetesEvent.Message,
					}

					var events state.PodEvents
					if run.PodEvents != nil {
						// de-dupe: only record this event if it's a unique SourceObject (executor name), which
						// is used in the UI to show the list of executors.
						found := false
						for _, e := range *run.PodEvents {
							if e.SourceObject == event.SourceObject {
								found = true
								break
							}
						}
						if !found {
							events = append(*run.PodEvents, event)
						} else {
							events = *run.PodEvents
						}

					} else {
						events = state.PodEvents{event}
					}
					run.PodEvents = &events
				}

				if executorOOM != nil && *executorOOM == true {
					run.SparkExtension.ExecutorOOM = executorOOM
				}
				if driverOOM != nil && *driverOOM == true {
					run.SparkExtension.DriverOOM = driverOOM
				}

				if sparkAppId != nil {
					sparkHistoryUri := fmt.Sprintf("%s/%s/jobs/", ew.emrHistoryServer, *sparkAppId)
					run.SparkExtension.SparkAppId = sparkAppId
					run.SparkExtension.HistoryUri = &sparkHistoryUri

					if driverServiceName != nil {
						appUri := ""
						if run.SparkExtension.SparkServerURI != nil {
							appUri = fmt.Sprintf("%s/job/%s", *run.SparkExtension.SparkServerURI, *driverServiceName)
						} else if serverURI, ok := ew.emrAppServer[run.ClusterName]; ok {
							appUri = fmt.Sprintf("%s/job/%s", serverURI, *driverServiceName)
						}

						if appUri != "" {
							run.SparkExtension.AppUri = &appUri
						}
					}
				}

				ew.setEMRMetricsUri(&run)

				run, err = ew.sm.UpdateRun(ctx, run.RunID, run)
				if err != nil {
					_ = ew.log.Log("level", "error", "message", "error saving kubernetes events", "emrJobId", emrJobId, "error", fmt.Sprintf("%+v", err))
					span.SetTag("error", true)
					span.SetTag("error.msg", err.Error())
				}

				if run.PodEvents != nil && len(*run.PodEvents) >= ew.emrMaxPodEvents {
					_ = ew.emrEngine.Terminate(ctx, run)
				}

			}
		}
		_ = kubernetesEvent.Done()
	}
}

func (ew *eventsWorker) setEMRMetricsUri(run *state.Run) {
	if run != nil && run.SparkExtension != nil && run.SparkExtension.SparkAppId != nil {
		// https://production-stitchfix.datadoghq.com/data-jobs?query=%40app_id%3Aspark-000000035ee16lm6uri
		metricsUri :=
			fmt.Sprintf("%s?query=%%40app_id%%3A%s",
				ew.emrMetricsServer,
				*run.SparkExtension.SparkAppId,
			)
		run.MetricsUri = &metricsUri
	}
}

func (ew *eventsWorker) setEKSMetricsUri(run *state.Run) {
	if run != nil {
		to := time.Now().Add(1*time.Minute*1).UnixNano() / 1000000

		if run.FinishedAt != nil {
			to = run.FinishedAt.Add(time.Minute*1).UnixNano() / 1000000
		}

		from := time.Now().Add(-1*time.Minute*1).UnixNano() / 1000000
		if run.StartedAt != nil {
			from = run.StartedAt.Add(-1*time.Minute*1).UnixNano() / 1000000
		}

		metricsUri :=
			fmt.Sprintf("%s&tpl_var_flotilla_run_id=%s&from_ts=%d&to_ts=%d&live=true",
				ew.eksMetricsServer,
				run.RunID,
				from,
				to,
			)
		run.MetricsUri = &metricsUri
	}
}

func (ew *eventsWorker) processEvent(ctx context.Context, kubernetesEvent state.KubernetesEvent) {
	runId := kubernetesEvent.InvolvedObject.Labels.JobName
	ctx, span := utils.TraceJob(ctx, "flotilla.job.process_event", runId)
	defer span.Finish()
	if strings.HasPrefix(runId, "eks-spark") || len(runId) == 0 {
		ew.processEMRPodEvents(ctx, kubernetesEvent)
		return
	}

	layout := "2020-08-31T17:27:50Z"
	timestamp, err := time.Parse(layout, kubernetesEvent.FirstTimestamp)

	if err != nil {
		timestamp = time.Now()
	}

	run, err := ew.sm.GetRun(ctx, runId)
	if err == nil {
		if kubernetesEvent.Reason == "Scheduled" {
			podName, err := ew.parsePodName(kubernetesEvent)
			if err == nil {
				run.PodName = &podName
			}
		}

		if kubernetesEvent.Reason == "DeadlineExceeded" {
			run.ExitReason = &kubernetesEvent.Message
			exitCode := int64(124)
			run.ExitCode = &exitCode
			run.Status = state.StatusStopped
			run.StartedAt = run.QueuedAt
			run.FinishedAt = &timestamp
		}

		if kubernetesEvent.Reason == "Completed" {
			run.ExitReason = &kubernetesEvent.Message
			exitCode := int64(0)
			run.ExitCode = &exitCode
			run.Status = state.StatusStopped
			run.StartedAt = run.QueuedAt
			run.FinishedAt = &timestamp
		}
		ew.setEKSMetricsUri(&run)
		run, err = ew.sm.UpdateRun(ctx, runId, run)
		if err != nil {
			_ = ew.log.Log("level", "error", "message", "error saving kubernetes events", "run", runId, "error", fmt.Sprintf("%+v", err))
		} else {
			_ = kubernetesEvent.Done()
		}
	}
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	}
}

func (ew *eventsWorker) parsePodName(kubernetesEvent state.KubernetesEvent) (string, error) {
	expression := regexp.MustCompile(`(eks-\w+-\w+-\w+-\w+-\w+-\w+)`)
	matches := expression.FindStringSubmatch(kubernetesEvent.Message)
	if matches != nil && len(matches) >= 1 {
		return matches[0], nil
	}
	return "", errors.Errorf("no pod name found for [%s]", kubernetesEvent.Message)
}

// applySlidingWindow maintains a sliding window of the most recent events
// keeping only the last maxEvents entries, ordered by timestamp (newest first)
func (ew *eventsWorker) applySlidingWindow(events state.PodEvents, newEvent state.PodEvent, maxEvents int) state.PodEvents {
	// Add the new event
	updatedEvents := append(events, newEvent)

	// If we're under the limit, return as-is
	if len(updatedEvents) <= maxEvents {
		return updatedEvents
	}

	// Sort by timestamp (newest first) to ensure we keep the most recent events
	// Note: We assume events are generally added in chronological order, but this
	// ensures correctness if events arrive out of order
	for i := 0; i < len(updatedEvents)-1; i++ {
		for j := i + 1; j < len(updatedEvents); j++ {
			if updatedEvents[i].Timestamp != nil && updatedEvents[j].Timestamp != nil {
				if updatedEvents[i].Timestamp.Before(*updatedEvents[j].Timestamp) {
					updatedEvents[i], updatedEvents[j] = updatedEvents[j], updatedEvents[i]
				}
			}
		}
	}

	// Keep only the most recent maxEvents
	return updatedEvents[:maxEvents]
}


================================================
FILE: worker/events_worker_test.go
================================================
package worker

import (
	"fmt"
	"testing"
	"time"

	"github.com/stitchfix/flotilla-os/state"
)

func TestEventsWorker_applySlidingWindow(t *testing.T) {
	ew := &eventsWorker{}

	now := time.Now()

	// Create test events with different timestamps
	event1 := state.PodEvent{
		Message:   "Event 1",
		Timestamp: &now,
	}

	event2Time := now.Add(1 * time.Minute)
	event2 := state.PodEvent{
		Message:   "Event 2",
		Timestamp: &event2Time,
	}

	event3Time := now.Add(2 * time.Minute)
	event3 := state.PodEvent{
		Message:   "Event 3",
		Timestamp: &event3Time,
	}

	event4Time := now.Add(3 * time.Minute)
	event4 := state.PodEvent{
		Message:   "Event 4",
		Timestamp: &event4Time,
	}

	// Test case 1: Under the limit
	t.Run("UnderLimit", func(t *testing.T) {
		var events state.PodEvents
		result := ew.applySlidingWindow(events, event1, 3)

		if len(result) != 1 {
			t.Errorf("Expected 1 event, got %d", len(result))
		}
		if result[0].Message != "Event 1" {
			t.Errorf("Expected 'Event 1', got %s", result[0].Message)
		}
	})

	// Test case 2: At the limit
	t.Run("AtLimit", func(t *testing.T) {
		events := state.PodEvents{event1, event2}
		result := ew.applySlidingWindow(events, event3, 3)

		if len(result) != 3 {
			t.Errorf("Expected 3 events, got %d", len(result))
		}
	})

	// Test case 3: Over the limit - should keep only the most recent
	t.Run("OverLimit", func(t *testing.T) {
		events := state.PodEvents{event1, event2, event3}
		result := ew.applySlidingWindow(events, event4, 3)

		if len(result) != 3 {
			t.Errorf("Expected 3 events, got %d", len(result))
		}

		// Should keep the 3 most recent: event4, event3, event2 (newest first)
		if result[0].Message != "Event 4" {
			t.Errorf("Expected newest event 'Event 4' first, got %s", result[0].Message)
		}
		if result[1].Message != "Event 3" {
			t.Errorf("Expected second newest 'Event 3', got %s", result[1].Message)
		}
		if result[2].Message != "Event 2" {
			t.Errorf("Expected third newest 'Event 2', got %s", result[2].Message)
		}
	})

	// Test case 4: EKS default limit (20) - realistic scenario
	t.Run("EKSDefaultLimit", func(t *testing.T) {
		var events state.PodEvents
		// Add 21 events to test the sliding window at default EKS limit
		for i := 1; i <= 21; i++ {
			eventTime := now.Add(time.Duration(i) * time.Minute)
			newEvent := state.PodEvent{
				Message:   fmt.Sprintf("Event %d", i),
				Timestamp: &eventTime,
			}
			events = ew.applySlidingWindow(events, newEvent, 20)
		}

		if len(events) != 20 {
			t.Errorf("Expected 20 events for EKS limit, got %d", len(events))
		}

		// Should have events 21, 20, 19, ... 2 (newest first)
		if events[0].Message != "Event 21" {
			t.Errorf("Expected newest event 'Event 21' first, got %s", events[0].Message)
		}
		if events[19].Message != "Event 2" {
			t.Errorf("Expected oldest kept event 'Event 2', got %s", events[19].Message)
		}
	})
}


================================================
FILE: worker/retry_worker.go
================================================
package worker

import (
	"context"
	"fmt"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/utils"
	"time"

	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"gopkg.in/tomb.v2"
)

type retryWorker struct {
	sm             state.Manager
	ee             engine.Engine
	conf           config.Config
	log            flotillaLog.Logger
	pollInterval   time.Duration
	t              tomb.Tomb
	clusterManager *engine.DynamicClusterManager
}

func (rw *retryWorker) Initialize(conf config.Config, sm state.Manager, eksEngine engine.Engine, emrEngine engine.Engine, log flotillaLog.Logger, pollInterval time.Duration, qm queue.Manager, clusterManager *engine.DynamicClusterManager) error {
	rw.pollInterval = pollInterval
	rw.conf = conf
	rw.sm = sm
	rw.ee = eksEngine
	rw.log = log
	rw.clusterManager = clusterManager
	rw.log.Log("level", "info", "message", "initialized a retry worker")
	return nil
}

func (rw *retryWorker) GetTomb() *tomb.Tomb {
	return &rw.t
}

// Run finds tasks that NEED_RETRY and requeues them
func (rw *retryWorker) Run(ctx context.Context) error {
	for {
		select {
		case <-rw.t.Dying():
			rw.log.Log("level", "info", "message", "A retry worker was terminated")
			return nil
		default:
			rw.runOnce(ctx)
			time.Sleep(rw.pollInterval)
		}
	}
}

func (rw *retryWorker) runOnce(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "flotilla.retry_worker.poll", "retry_worker")
	defer span.Finish()
	// List runs in the StatusNeedsRetry state and requeue them
	runList, err := rw.sm.ListRuns(ctx, 25, 0, "started_at", "asc", map[string][]string{"status": {state.StatusNeedsRetry}}, nil, []string{state.EKSEngine})
	if runList.Total > 0 {
		rw.log.Log("level", "info", "message", fmt.Sprintf("Got %v jobs to retry", runList.Total))
	}

	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		rw.log.Log("level", "error", "message", "Error listing runs for retry", "error", fmt.Sprintf("%+v", err))
		return
	}

	for _, run := range runList.Runs {
		_, childSpan := utils.TraceJob(ctx, "flotilla.job.retry", run.RunID)
		func() {
			defer childSpan.Finish()
			utils.TagJobRun(childSpan, run)

			if _, err = rw.sm.UpdateRun(ctx, run.RunID, state.Run{Status: state.StatusQueued}); err != nil {
				rw.log.Log("level", "error", "message", "Error updating run status to StatusQueued", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
				return
			}

			if err = rw.ee.Enqueue(ctx, run); err != nil {
				rw.log.Log("level", "error", "message", "Error enqueuing run", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
				return
			}
		}()
	}
	return
}


================================================
FILE: worker/retry_worker_test.go
================================================
package worker

import (
	"context"
	gklog "github.com/go-kit/kit/log"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
	"os"
	"testing"
)

func setUpRetryWorkerTest(t *testing.T) (*retryWorker, *testutils.ImplementsAllTheThings) {
	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	logger := flotillaLog.NewLogger(l, nil)

	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"A": {DefinitionID: "A"},
			"B": {DefinitionID: "B"},
			"C": {DefinitionID: "C", ExecutableResources: state.ExecutableResources{Image: "invalidimage"}},
		},
		Runs: map[string]state.Run{
			"runA": {
				DefinitionID: "A", ClusterName: "A",
				GroupName: "A", RunID: "runA", Status: state.StatusNeedsRetry},
		},
		Qurls: map[string]string{
			"A": "a/",
			"B": "b/",
		},
	}
	return &retryWorker{
		sm:  &imp,
		ee:  &imp,
		log: logger,
	}, &imp
}

func TestRetryWorker_Run(t *testing.T) {
	worker, imp := setUpRetryWorkerTest(t)
	worker.runOnce(context.Background())

	//
	// Make sure that the worker resets the status to StatusQueued, and calls the appropriate methods
	// in order (get runs to retry, get qurls for them, update them to queued status, then enqueue them)
	//
	expected := []string{"ListRuns", "UpdateRun", "Enqueue"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}

	// Ensure the run gets updated to StatusQueued
	run, _ := imp.GetRun(context.Background(), "runA")
	if run.Status != state.StatusQueued {
		t.Errorf("Expected retry worker to update run status to Queued")
	}
}


================================================
FILE: worker/status_worker.go
================================================
package worker

import (
	"context"
	"encoding/json"
	"fmt"
	"github.com/aws/aws-sdk-go/aws"
	"github.com/go-redis/redis"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/clients/metrics"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/utils"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
	"gopkg.in/tomb.v2"
	"io/ioutil"
	"math/rand"
	"net/http"
	"strings"
	"time"
)

type statusWorker struct {
	sm                       state.Manager
	ee                       engine.Engine
	conf                     config.Config
	log                      flotillaLog.Logger
	pollInterval             time.Duration
	t                        tomb.Tomb
	engine                   *string
	redisClient              *redis.Client
	workerId                 string
	exceptionExtractorClient *http.Client
	exceptionExtractorUrl    string
	emrEngine                engine.Engine
	clusterManager           *engine.DynamicClusterManager
}

func (sw *statusWorker) Initialize(conf config.Config, sm state.Manager, eksEngine engine.Engine, emrEngine engine.Engine, log flotillaLog.Logger, pollInterval time.Duration, qm queue.Manager, clusterManager *engine.DynamicClusterManager) error {
	sw.pollInterval = pollInterval
	sw.conf = conf
	sw.sm = sm
	sw.ee = eksEngine
	sw.log = log
	sw.workerId = fmt.Sprintf("workerid:%d", rand.Int())
	sw.engine = &state.EKSEngine
	sw.emrEngine = emrEngine
	sw.clusterManager = clusterManager
	if sw.conf.IsSet("eks_exception_extractor_url") {
		sw.exceptionExtractorClient = &http.Client{
			Timeout: time.Second * 5,
		}
		sw.exceptionExtractorUrl = sw.conf.GetString("eks_exception_extractor_url")
	}
	sw.redisClient, _ = utils.SetupRedisClient(conf)
	_ = sw.log.Log("level", "info", "message", "initialized a status worker")
	return nil
}

func (sw *statusWorker) GetTomb() *tomb.Tomb {
	return &sw.t
}

// Run updates status of tasks
func (sw *statusWorker) Run(ctx context.Context) error {
	for {
		select {
		case <-sw.t.Dying():
			sw.log.Log("level", "info", "message", "A status worker was terminated")
			return nil
		default:
			if *sw.engine == state.EKSEngine {
				sw.runOnceEKS(ctx)
				sw.runTimeouts(ctx)
			}
			time.Sleep(sw.pollInterval)
		}
	}
}

func (sw *statusWorker) runTimeouts(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "status_worker.run_timeouts", sw.workerId)
	defer span.Finish()
	rl, err := sw.sm.ListRuns(ctx, 1000, 0, "started_at", "asc", map[string][]string{
		"queued_at_since": {
			time.Now().AddDate(0, 0, -300).Format(time.RFC3339),
		},
		"task_type": {state.DefaultTaskType},
		"status":    {state.StatusNeedsRetry, state.StatusRunning, state.StatusQueued, state.StatusPending},
	}, nil, state.Engines)

	if err != nil {
		_ = sw.log.Log("level", "error", "message", "unable to receive runs", "error", fmt.Sprintf("%+v", err))
		return
	}
	runs := rl.Runs
	sw.processTimeouts(runs)
}

func (sw *statusWorker) processTimeouts(runs []state.Run) {
	ctx := context.Background()
	span, ctx := tracer.StartSpanFromContext(ctx, "flotilla.job.timeout_check")
	defer span.Finish()
	timeoutCount := 0
	for _, run := range runs {
		if run.StartedAt != nil && run.ActiveDeadlineSeconds != nil {
			runningDuration := time.Now().Sub(*run.StartedAt)
			if int64(runningDuration.Seconds()) > *run.ActiveDeadlineSeconds {
				timeoutCount++
				timeoutCtx, childSpan := utils.TraceJob(ctx, "flotilla.job.timeout", run.RunID)
				utils.TagJobRun(childSpan, run)
				if run.Engine != nil && *run.Engine == state.EKSSparkEngine {
					_ = sw.emrEngine.Terminate(timeoutCtx, run)
				} else {
					_ = sw.ee.Terminate(timeoutCtx, run)
				}

				exitCode := int64(1)
				finishedAt := time.Now()
				_, _ = sw.sm.UpdateRun(ctx, run.RunID, state.Run{
					Status:     state.StatusStopped,
					ExitReason: aws.String(fmt.Sprintf("JobRun exceeded specified timeout of %v seconds", *run.ActiveDeadlineSeconds)),
					ExitCode:   &exitCode,
					FinishedAt: &finishedAt,
				})
				childSpan.Finish()
			}
		}
	}
}

func (sw *statusWorker) runOnceEKS(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "status_worker.run_once_eks", sw.workerId)
	defer span.Finish()
	rl, err := sw.sm.ListRuns(ctx, 1000, 0, "started_at", "asc", map[string][]string{
		"queued_at_since": {
			time.Now().AddDate(0, 0, -300).Format(time.RFC3339),
		},
		"task_type": {state.DefaultTaskType},
		"status":    {state.StatusNeedsRetry, state.StatusRunning, state.StatusQueued, state.StatusPending},
	}, nil, []string{state.EKSEngine})

	if err != nil {
		_ = sw.log.Log("level", "error", "message", "unable to receive runs", "error", fmt.Sprintf("%+v", err))
		return
	}
	runs := rl.Runs
	sw.processEKSRuns(ctx, runs)
}

func (sw *statusWorker) processEKSRuns(ctx context.Context, runs []state.Run) {
	ctx, span := utils.TraceJob(ctx, "status_worker.process_eks_runs", sw.workerId)
	defer span.Finish()

	var lockedRuns []state.Run

	for _, run := range runs {
		_, lockSpan := utils.TraceJob(ctx, "status_worker.acquire_lock", run.RunID)

		duration := 45 * time.Second
		locked := sw.acquireLock(run, "status", duration)
		if locked {
			lockedRuns = append(lockedRuns, run)
		} else {
		}

		lockSpan.Finish()
	}

	_ = metrics.Increment(metrics.StatusWorkerLockedRuns, []string{sw.workerId}, float64(len(lockedRuns)))

	for _, run := range lockedRuns {
		runCopy := run
		go func() {
			runCtx, runSpan := utils.TraceJob(ctx, "flotilla.job.status_check", runCopy.RunID)
			defer runSpan.Finish()

			utils.TagJobRun(runSpan, runCopy)

			start := time.Now()
			sw.processEKSRun(runCtx, runCopy)
			_ = metrics.Timing(metrics.StatusWorkerProcessEKSRun, time.Since(start), []string{sw.workerId}, 1)
		}()
	}
}

func (sw *statusWorker) acquireLock(run state.Run, purpose string, expiration time.Duration) bool {
	start := time.Now()
	key := fmt.Sprintf("%s-%s", run.RunID, purpose)
	ttl, err := sw.redisClient.TTL(key).Result()
	if err == nil && ttl.Nanoseconds() < 0 {
		_, err = sw.redisClient.Del(key).Result()
	}
	set, err := sw.redisClient.SetNX(key, sw.workerId, expiration).Result()
	if err != nil {
		_ = sw.log.Log("level", "error", "message", "unable to set lock", "error", fmt.Sprintf("%+v", err))
		return true
	}
	_ = metrics.Timing(metrics.StatusWorkerAcquireLock, time.Since(start), []string{sw.workerId}, 1)
	return set
}

func (sw *statusWorker) processEKSRun(ctx context.Context, run state.Run) {
	ctx, span := utils.TraceJob(ctx, "flotilla.job.status_check", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	reloadRun, err := sw.sm.GetRun(ctx, run.RunID)
	if err == nil && reloadRun.Status == state.StatusStopped {
		// Run was updated by another worker process.
		return
	}
	start := time.Now()
	if reloadRun.Status == state.StatusQueued {
		queuedDuration := time.Since(*reloadRun.QueuedAt)
		if queuedDuration < 10*time.Second {
			return
		}
	}

	start = time.Now()
	statusCtx, statusSpan := utils.TraceJob(ctx, "flotilla.job.fetch_update_status", reloadRun.RunID)
	defer statusSpan.Finish()
	utils.TagJobRun(statusSpan, reloadRun)
	statusSpan.SetTag("cluster_name", reloadRun.ClusterName)

	updatedRun, err := sw.ee.FetchUpdateStatus(statusCtx, reloadRun)
	if err != nil {
		_ = sw.log.Log("level", "error", "message", "fetch update status", "run", run.RunID, "error", fmt.Sprintf("%+v", err))

		if strings.Contains(err.Error(), "not found") {
			if run.Status == state.StatusPending || run.Status == state.StatusQueued {
				statusSpan.SetTag("error.expected", true)
				statusSpan.SetTag("error", false)
			}
		}
	}
	_ = metrics.Timing(metrics.StatusWorkerFetchUpdateStatus, time.Since(start), []string{sw.workerId}, 1)

	if err == nil {
		subRuns, err := sw.sm.ListRuns(ctx, 1000, 0, "status", "desc", nil, map[string]string{"PARENT_FLOTILLA_RUN_ID": run.RunID}, state.Engines)
		if err == nil && subRuns.Total > 0 {
			var spawnedRuns state.SpawnedRuns
			for _, subRun := range subRuns.Runs {
				spawnedRuns = append(spawnedRuns, state.SpawnedRun{RunID: subRun.RunID})
			}
			updatedRun.SpawnedRuns = &spawnedRuns
		}
	}
	if err != nil {
		message := fmt.Sprintf("%+v", err)
		minutesInQueue := time.Now().Sub(*run.QueuedAt).Minutes()
		if strings.Contains(message, "not found") && minutesInQueue > float64(30) {
			stoppedAt := time.Now()
			reason := "Job either timed out or not found on the EKS cluster."
			updatedRun.Status = state.StatusStopped
			updatedRun.FinishedAt = &stoppedAt
			updatedRun.ExitReason = &reason
			_, err = sw.sm.UpdateRun(ctx, updatedRun.RunID, updatedRun)
		}

	} else {
		fullUpdate := false

		if run.PodName != nil {
			if *run.PodName == *updatedRun.PodName && run.Status != updatedRun.Status {
				fullUpdate = true
			}
		}

		if fullUpdate {
			sw.logStatusUpdate(updatedRun)
			if updatedRun.ExitCode != nil {
				go sw.cleanupRun(ctx, run.RunID)
			}
			_, err = sw.sm.UpdateRun(ctx, updatedRun.RunID, updatedRun)
			if err != nil {
				_ = sw.log.Log("level", "error", "message", "unable to save eks runs", "error", fmt.Sprintf("%+v", err))
			}

			if updatedRun.Status == state.StatusStopped {
				//TODO - move to a separate worker.
				//_ = sw.eksEngine.Terminate(run)
			}
		} else {
			if updatedRun.MaxMemoryUsed != run.MaxMemoryUsed ||
				updatedRun.MaxCpuUsed != run.MaxCpuUsed ||
				updatedRun.Cpu != run.Cpu ||
				updatedRun.PodName != run.PodName ||
				updatedRun.Memory != run.Memory ||
				updatedRun.PodEvents != run.PodEvents ||
				updatedRun.SpawnedRuns != run.SpawnedRuns {
				_, err = sw.sm.UpdateRun(ctx, updatedRun.RunID, updatedRun)
			}
		}
	}
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
	} else if updatedRun.Status != run.Status {
		utils.TagJobRun(span, updatedRun)
	}
}

func (sw *statusWorker) cleanupRun(ctx context.Context, runID string) {
	ctx, span := utils.TraceJob(ctx, "flotilla.job.cleanup", runID)
	defer span.Finish()

	defer span.Finish()
	//Logs maybe delayed before being persisted to S3.
	time.Sleep(120 * time.Second)
	run, err := sw.sm.GetRun(ctx, runID)
	if err == nil {
		//Delete run from Kubernetes
		_ = sw.ee.Terminate(ctx, run)
	}
}

func (sw *statusWorker) extractExceptions(ctx context.Context, runID string) {
	ctx, span := utils.TraceJob(ctx, "flotilla.job.extract_exceptions", runID)
	defer span.Finish()

	time.Sleep(60 * time.Second)
	run, err := sw.sm.GetRun(ctx, runID)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return
	}
	jobUrl := fmt.Sprintf("%s/extract/%s", sw.exceptionExtractorUrl, run.RunID)
	res, err := sw.exceptionExtractorClient.Get(jobUrl)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return
	}
	defer res.Body.Close()

	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		span.SetTag("error", true)
		return
	}
	var runExceptions state.RunExceptions
	if err := json.Unmarshal(body, &runExceptions); err == nil {
		run.RunExceptions = &runExceptions
		_, _ = sw.sm.UpdateRun(ctx, run.RunID, run)
	}
}

func (sw *statusWorker) processEKSRunMetrics(ctx context.Context, run state.Run) {
	ctx, span := utils.TraceJob(ctx, "flotilla.job.metrics_check", run.RunID)
	defer span.Finish()
	utils.TagJobRun(span, run)
	updatedRun, err := sw.ee.FetchPodMetrics(ctx, run)
	if err == nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		if updatedRun.MaxMemoryUsed != run.MaxMemoryUsed ||
			updatedRun.MaxCpuUsed != run.MaxCpuUsed {
			_, err = sw.sm.UpdateRun(ctx, updatedRun.RunID, updatedRun)
		}
	}
}

func (sw *statusWorker) logStatusUpdate(update state.Run) {
	var err error
	var startedAt, finishedAt time.Time
	var duration float64
	var env state.EnvList
	var command string

	if update.StartedAt != nil {
		startedAt = *update.StartedAt
		duration = time.Now().Sub(startedAt).Seconds()
	}

	if update.FinishedAt != nil {
		finishedAt = *update.FinishedAt
		duration = finishedAt.Sub(startedAt).Seconds()
	}

	if update.Env != nil {
		env = *update.Env
	}

	if update.Command != nil {
		command = *update.Command
	}

	if update.ExitCode != nil {
		err = sw.log.Event("eventClassName", "FlotillaTaskStatus",
			"run_id", update.RunID,
			"definition_id", update.DefinitionID,
			"alias", update.Alias,
			"image", update.Image,
			"cluster_name", update.ClusterName,
			"command", command,
			"exit_code", *update.ExitCode,
			"status", update.Status,
			"started_at", startedAt,
			"finished_at", finishedAt,
			"duration", duration,
			"instance_id", update.InstanceID,
			"instance_dns_name", update.InstanceDNSName,
			"group_name", update.GroupName,
			"user", update.User,
			"task_type", update.TaskType,
			"env", env,
			"executable_id", update.ExecutableID,
			"executable_type", update.ExecutableType)
	} else {
		err = sw.log.Event("eventClassName", "FlotillaTaskStatus",
			"run_id", update.RunID,
			"definition_id", update.DefinitionID,
			"alias", update.Alias,
			"image", update.Image,
			"cluster_name", update.ClusterName,
			"command", command,
			"status", update.Status,
			"started_at", startedAt,
			"finished_at", finishedAt,
			"duration", duration,
			"instance_id", update.InstanceID,
			"instance_dns_name", update.InstanceDNSName,
			"group_name", update.GroupName,
			"user", update.User,
			"task_type", update.TaskType,
			"env", env,
			"executable_id", update.ExecutableID,
			"executable_type", update.ExecutableType)
	}

	if err != nil {
		sw.log.Log("level", "error", "message", "Failed to emit status event", "run_id", update.RunID, "error", err.Error())
	}
}

func (sw *statusWorker) findRun(ctx context.Context, taskArn string) (state.Run, error) {
	ctx, span := utils.TraceJob(ctx, "status_worker.find_run", taskArn)
	defer span.Finish()

	var engines []string
	if sw.engine != nil {
		engines = []string{*sw.engine}
	}

	runs, err := sw.sm.ListRuns(ctx, 1, 0, "started_at", "asc", map[string][]string{
		"task_arn": {taskArn},
	}, nil, engines)
	if err != nil {
		span.SetTag("error", true)
		span.SetTag("error.msg", err.Error())
		return state.Run{}, errors.Wrapf(err, "problem finding run by task arn [%s]", taskArn)
	}
	if runs.Total > 0 && len(runs.Runs) > 0 {
		return runs.Runs[0], nil
	}
	return state.Run{}, errors.Errorf("no run found for [%s]", taskArn)
}


================================================
FILE: worker/status_worker_test.go
================================================
package worker

import (
	gklog "github.com/go-kit/kit/log"
	"github.com/stitchfix/flotilla-os/config"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
	"os"
	"testing"
)

func setUpStatusWorkerTest(t *testing.T) (*statusWorker, *testutils.ImplementsAllTheThings) {
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)

	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	logger := flotillaLog.NewLogger(l, nil)
	run := state.Run{
		RunID:  "somerun",
		Status: state.StatusPending,
	}
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Qurls: map[string]string{
			"A": "a/",
		},
		Runs: map[string]state.Run{
			"somerun": run,
		},
		StatusUpdatesAsRuns: []state.Run{
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "test",
					},
				},
				Status: state.StatusRunning,
			},
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "test",
					},
				},
				Status: state.StatusPending,
			},
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "test",
					},
				},
				Status: state.StatusStopped,
			},
		},
	}
	return &statusWorker{
		sm:   &imp,
		ee:   &imp,
		log:  logger,
		conf: c,
	}, &imp
}

func setUpStatusWorkerTest2(t *testing.T) (*statusWorker, *testutils.ImplementsAllTheThings) {
	confDir := "../conf"
	c, _ := config.NewConfig(&confDir)

	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	logger := flotillaLog.NewLogger(l, nil)
	run := state.Run{
		RunID:  "somerun",
		Status: state.StatusPending,
	}
	imp := testutils.ImplementsAllTheThings{
		T: t,
		Qurls: map[string]string{
			"A": "a/",
		},
		Runs: map[string]state.Run{
			"somerun": run,
		},
		StatusUpdatesAsRuns: []state.Run{
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "prod",
					},
				},
				Status: state.StatusStopped,
			},
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "staging",
					},
				},
				Status: state.StatusStopped,
			},
			{
				Env: &state.EnvList{
					{
						Name:  "FLOTILLA_SERVER_MODE",
						Value: "test",
					},
				},
				Status: state.StatusRunning,
			},
		},
	}
	return &statusWorker{
		sm:   &imp,
		ee:   &imp,
		log:  logger,
		conf: c,
	}, &imp
}


================================================
FILE: worker/submit_worker.go
================================================
package worker

import (
	"context"
	"fmt"
	"github.com/stitchfix/flotilla-os/tracing"

	"github.com/stitchfix/flotilla-os/utils"
	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
	"time"

	"github.com/go-redis/redis"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/state"
	"gopkg.in/tomb.v2"
)

type submitWorker struct {
	sm             state.Manager
	eksEngine      engine.Engine
	emrEngine      engine.Engine
	conf           config.Config
	log            flotillaLog.Logger
	pollInterval   time.Duration
	t              tomb.Tomb
	redisClient    *redis.Client
	clusterManager *engine.DynamicClusterManager
}

func (sw *submitWorker) Initialize(conf config.Config, sm state.Manager, eksEngine engine.Engine, emrEngine engine.Engine, log flotillaLog.Logger, pollInterval time.Duration, qm queue.Manager, clusterManager *engine.DynamicClusterManager) error {
	sw.pollInterval = pollInterval
	sw.conf = conf
	sw.sm = sm
	sw.eksEngine = eksEngine
	sw.emrEngine = emrEngine
	sw.log = log
	sw.redisClient, _ = utils.SetupRedisClient(conf)
	sw.clusterManager = clusterManager
	_ = sw.log.Log("level", "info", "message", "initialized a submit worker")
	return nil
}

func (sw *submitWorker) GetTomb() *tomb.Tomb {
	return &sw.t
}

// Run lists queues, consumes runs from them, and executes them using the execution engine
func (sw *submitWorker) Run(ctx context.Context) error {
	for {
		select {
		case <-sw.t.Dying():
			sw.log.Log("level", "info", "message", "A submit worker was terminated")
			return nil
		default:
			sw.runOnce(ctx)
			time.Sleep(sw.pollInterval)
		}
	}
}
func (sw *submitWorker) runOnce(ctx context.Context) {
	ctx, span := utils.TraceJob(ctx, "submit_worker.poll", "submit_worker")
	defer span.Finish()
	var receipts []engine.RunReceipt
	var run state.Run
	var err error

	pollStart := time.Now()
	receipts, err = sw.eksEngine.PollRuns(ctx)
	span.SetTag("sqs.poll_duration_ms", time.Since(pollStart).Milliseconds())
	span.SetTag("sqs.received_count", len(receipts))
	receiptsEMR, err := sw.emrEngine.PollRuns(ctx)
	receipts = append(receipts, receiptsEMR...)
	if err != nil {
		sw.log.Log("level", "error", "message", "Error receiving runs", "error", fmt.Sprintf("%+v", err))
	}
	for _, runReceipt := range receipts {
		if runReceipt.Run == nil {
			continue
		}
		sw.log.Log("level", "info", "message", "Processing run receipt",
			"run_id", runReceipt.Run.RunID,
			"has_trace_context", runReceipt.TraceID != 0 && runReceipt.ParentID != 0,
			"trace_id", runReceipt.TraceID,
			"parent_id", runReceipt.ParentID)

		var runCtx context.Context
		if runReceipt.RunReceipt.TraceID != 0 && runReceipt.RunReceipt.ParentID != 0 {
			carrier := tracing.TextMapCarrier{
				"x-datadog-trace-id":          fmt.Sprintf("%d", runReceipt.TraceID),
				"x-datadog-parent-id":         fmt.Sprintf("%d", runReceipt.ParentID),
				"x-datadog-sampling-priority": fmt.Sprintf("%d", runReceipt.SamplingPriority),
			}
			spanCtx, err := tracer.Extract(carrier)
			if err != nil {
				sw.log.Log("level", "error", "message", "Error extracting span context", "error", err.Error())
				runCtx = ctx
			} else {
				bridgeSpan := tracer.StartSpan("flotilla.queue.sqs_receive", tracer.ChildOf(spanCtx))
				bridgeSpan.SetTag("run_id", runReceipt.Run.RunID)
				runCtx = tracer.ContextWithSpan(ctx, bridgeSpan)
				defer bridgeSpan.Finish()
			}
		} else {
			runCtx = ctx
		}
		runCtx, childSpan := utils.TraceJob(runCtx, "flotilla.job.submit_worker.process", "")
		childSpan.SetTag("job.run_id", runReceipt.Run.RunID)
		utils.TagJobRun(childSpan, *runReceipt.Run)

		//
		// Fetch run from state manager to ensure its existence
		//
		run, err = sw.sm.GetRun(ctx, runReceipt.Run.RunID)
		if err != nil {
			sw.log.Log("level", "error", "message", "Error fetching run from state, acking", "run_id", runReceipt.Run.RunID, "error", fmt.Sprintf("%+v", err))
			if err = runReceipt.Done(); err != nil {
				sw.log.Log("level", "error", "message", "Acking run failed", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
			}
			continue
		}

		//
		// Only valid to process if it's in the StatusQueued state
		//
		if run.Status == state.StatusQueued {
			var (
				launched  state.Run
				retryable bool
			)

			// 1. Check for existence of run.ExecutableType; set to `task_definition`
			// if not set.
			if run.ExecutableType == nil {
				defaultExecutableType := state.ExecutableTypeDefinition
				run.ExecutableType = &defaultExecutableType
			}

			// 2. Check for existence of run.ExecutableID; set to run.DefinitionID if
			// not set.
			if run.ExecutableID == nil {
				defID := run.DefinitionID
				run.ExecutableID = &defID
			}

			// 3. Switch by executable type.
			switch *run.ExecutableType {
			case state.ExecutableTypeDefinition:
				var d state.Definition
				d, err = sw.sm.GetDefinition(runCtx, *run.ExecutableID)

				if err != nil {
					sw.logFailedToGetExecutableMessage(run, err)
					if err = runReceipt.Done(); err != nil {
						sw.log.Log("level", "error", "message", "Acking run failed", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
					}
					continue
				}

				// Execute the run using the execution engine.
				if run.Engine == nil || *run.Engine == state.EKSEngine {
					launched, retryable, err = sw.eksEngine.Execute(runCtx, d, run, sw.sm)
				} else {
					launched, retryable, err = sw.emrEngine.Execute(runCtx, d, run, sw.sm)
				}

				break
			case state.ExecutableTypeTemplate:
				var tpl state.Template
				tpl, err = sw.sm.GetTemplateByID(runCtx, *run.ExecutableID)

				if err != nil {
					sw.logFailedToGetExecutableMessage(run, err)
					if err = runReceipt.Done(); err != nil {
						sw.log.Log("level", "error", "message", "Acking run failed", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
					}
					continue
				}

				// Execute the run using the execution engine.
				sw.log.Log("level", "info", "message", "Submitting", "run_id", run.RunID)
				launched, retryable, err = sw.eksEngine.Execute(runCtx, tpl, run, sw.sm)
				break
			default:
				// If executable type is invalid; log message and continue processing
				// other runs.
				sw.log.Log("level", "error", "message", "submit worker failed", "run_id", run.RunID, "error", "invalid executable type")
				continue
			}

			if err != nil {
				sw.log.Log("level", "error", "message", "Error executing run", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err), "retryable", retryable)
				if !retryable {
					// Set status to StatusStopped, and ack
					launched.Status = state.StatusStopped
				} else {
					// Don't change status, don't ack
					continue
				}
			} else {
				sw.log.Log("level", "info", "message", "Task submitted from SQS to the cluster", "run_id", run.RunID)
			}

			//
			// Emit event with current definition
			//
			err = sw.log.Event("eventClassName", "FlotillaSubmitTask", "executable_id", *run.ExecutableID, "run_id", run.RunID)
			if err != nil {
				sw.log.Log("level", "error", "message", "Failed to emit event", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
			}

			//
			// UpdateStatus the status and information of the run;
			// either the run submitted successfully -or- it did not and is not retryable
			//
			if _, err = sw.sm.UpdateRun(runCtx, run.RunID, launched); err != nil {
				sw.log.Log("level", "error", "message", "Failed to update run status", "run_id", run.RunID, "status", launched.Status, "error", fmt.Sprintf("%+v", err))
			}
		} else {
			sw.log.Log("level", "warn", "message", "Received run that is not runnable", "run_id", run.RunID, "status", run.Status)
		}

		if err = runReceipt.Done(); err != nil {
			childSpan.SetTag("error", true)
			childSpan.SetTag("error.msg", err.Error())
			childSpan.SetTag("error.type", "sqs_ack")
			sw.log.Log("level", "error", "message", "Acking run failed", "run_id", run.RunID, "error", fmt.Sprintf("%+v", err))
		} else {
			childSpan.SetTag("sqs.ack_success", true)
		}
		childSpan.Finish()
	}
}

func (sw *submitWorker) logFailedToGetExecutableMessage(run state.Run, err error) {
	sw.log.Log(
		"level", "error",
		"message", "Error fetching executable for run",
		"run_id", run.RunID,
		"executable_id", run.ExecutableID,
		"executable_type", run.ExecutableType,
		"error", err.Error())
}


================================================
FILE: worker/submit_worker_test.go
================================================
package worker

import (
	"context"
	"errors"
	gklog "github.com/go-kit/kit/log"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"github.com/stitchfix/flotilla-os/testutils"
	"os"
	"testing"
)

// Set up situation with runnable run
func setUpSubmitWorkerTest1(t *testing.T) (*submitWorker, *testutils.ImplementsAllTheThings) {
	validRun := state.Run{
		RunID:        "run:cupcake",
		DefinitionID: "def:cupcake",
		Status:       state.StatusQueued,
	}

	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	eventSinks := []flotillaLog.EventSink{flotillaLog.NewLocalEventSink()}
	logger := flotillaLog.NewLogger(l, eventSinks)

	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"def:cupcake": {DefinitionID: "def:cupcake"},
		},
		Runs: map[string]state.Run{
			"run:cupcake": validRun,
		},
		Qurls: map[string]string{
			"A": "a/",
		},
		Queued: []string{"run:cupcake"},
	}
	return &submitWorker{
		sm:        &imp,
		eksEngine: &imp,
		emrEngine: &imp,
		log:       logger,
	}, &imp
}

// Set up situation with unrunnable run
func setUpSubmitWorkerTest2(t *testing.T) (*submitWorker, *testutils.ImplementsAllTheThings) {
	invalidRun := state.Run{
		RunID:        "run:shoebox",
		DefinitionID: "def:shoebox",
		Status:       state.StatusRunning,
	}

	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	eventSinks := []flotillaLog.EventSink{flotillaLog.NewLocalEventSink()}
	logger := flotillaLog.NewLogger(l, eventSinks)

	imp := testutils.ImplementsAllTheThings{
		T: t,
		Definitions: map[string]state.Definition{
			"def:shoebox": {DefinitionID: "def:shoebox"},
		},
		Runs: map[string]state.Run{
			"run:shoebox": invalidRun,
		},
		Qurls: map[string]string{
			"A": "a/",
		},
		Queued: []string{"run:shoebox"},
	}
	return &submitWorker{
		sm:        &imp,
		eksEngine: &imp,
		emrEngine: &imp,
		log:       logger,
	}, &imp
}

// Another unrunnable run
func setUpSubmitWorkerTest3(t *testing.T) (*submitWorker, *testutils.ImplementsAllTheThings) {
	l := gklog.NewLogfmtLogger(gklog.NewSyncWriter(os.Stderr))
	eventSinks := []flotillaLog.EventSink{flotillaLog.NewLocalEventSink()}
	logger := flotillaLog.NewLogger(l, eventSinks)

	imp := testutils.ImplementsAllTheThings{
		T: t,
		Qurls: map[string]string{
			"A": "a/",
		},
		Queued: []string{"run:nope"},
	}
	return &submitWorker{
		sm:        &imp,
		eksEngine: &imp,
		emrEngine: &imp,
		log:       logger,
	}, &imp
}

// we should only ack when
//   (a) run or def is missing
//   (b) status is not queued
//   (c) we hit a non-retryable error
//   (d) we successfully launch
// we should only NOT ack if
//   (a) we hit a retryable error

func TestSubmitWorker_Run(t *testing.T) {
	// 1. test that we only run queued runs
	// 2. test that we only run runs with a valid run and definition
	// 3. test that we don't ack on retryable errors, and properly set statusstopped on non-retryable errors

	// Test valid run; it's status is queued, it exists in state, its definition exists in state
	worker, imp := setUpSubmitWorkerTest1(t)
	worker.runOnce(context.Background())

	expected := []string{"PollRuns", "PollRuns", "GetRun", "GetDefinition", "Execute", "UpdateRun", "RunReceipt.Done"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}

func TestSubmitWorker_Run2(t *testing.T) {
	// Test invalid run; it's status is running (this can happen with duplication in queues, which sqs allows)
	worker, imp := setUpSubmitWorkerTest2(t)
	worker.runOnce(context.Background())

	// Importantly, execute is NOT called and it -is- acked
	expected := []string{"PollRuns", "PollRuns", "GetRun", "RunReceipt.Done"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}

func TestSubmitWorker_Run3(t *testing.T) {
	// Test invalid run; it's queued but does not exist; this should not happen
	// (run is queued but does not exist in state)
	worker, imp := setUpSubmitWorkerTest3(t)
	worker.runOnce(context.Background())

	// Importantly, execute is NOT called and it -is- acked
	expected := []string{"PollRuns", "PollRuns", "GetRun", "RunReceipt.Done"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}

func TestSubmitWorker_Run4(t *testing.T) {
	// Test that we ack on non-retryable erorrs and change state to stopped
	worker, imp := setUpSubmitWorkerTest1(t)

	imp.ExecuteError = errors.New("nope")
	imp.ExecuteErrorIsRetryable = false

	worker.runOnce(context.Background())

	// Importantly, execute is called and it -is- acked
	expected := []string{"PollRuns", "PollRuns", "GetRun", "GetDefinition", "Execute", "UpdateRun", "RunReceipt.Done"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}

	// Ensure the run gets updated to StatusQueued
	run, _ := imp.GetRun(context.Background(), "run:cupcake")
	if run.Status != state.StatusStopped {
		t.Errorf("Expected submit worker to update run status to Stopped for non-retryable error")
	}
}

func TestSubmitWorker_Run5(t *testing.T) {
	// Test that we DON'T ack on retryable erorrs and don't change state
	worker, imp := setUpSubmitWorkerTest1(t)

	imp.ExecuteError = errors.New("nope")
	imp.ExecuteErrorIsRetryable = true

	worker.runOnce(context.Background())

	// Importantly, execute it called but it is not updated nor is it acked
	expected := []string{"PollRuns", "PollRuns", "GetRun", "GetDefinition", "Execute"}
	if len(imp.Calls) != len(expected) {
		t.Errorf("Unexpected number of run calls, expected %v but was %v", len(expected), len(imp.Calls))
	}

	for i, call := range imp.Calls {
		if expected[i] != call {
			t.Errorf("Expected call %v to be %s but was %s", i, expected[i], call)
		}
	}
}


================================================
FILE: worker/worker.go
================================================
package worker

import (
	"context"
	"fmt"
	"github.com/stitchfix/flotilla-os/queue"
	"time"

	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
	"gopkg.in/tomb.v2"
)

// Worker defines a background worker process
type Worker interface {
	Initialize(conf config.Config, sm state.Manager, eksEngine engine.Engine, emrEngine engine.Engine, log flotillaLog.Logger, pollInterval time.Duration, qm queue.Manager, clusterManager *engine.DynamicClusterManager) error
	Run(ctx context.Context) error
	GetTomb() *tomb.Tomb
}

// NewWorker instantiates a new worker.
func NewWorker(workerType string, log flotillaLog.Logger, conf config.Config, eksEngine engine.Engine, emrEngine engine.Engine, sm state.Manager, qm queue.Manager, clusterManager *engine.DynamicClusterManager) (Worker, error) {
	var worker Worker

	switch workerType {
	case "submit":
		worker = &submitWorker{}
	case "retry":
		worker = &retryWorker{}
	case "status":
		worker = &statusWorker{}
	case "worker_manager":
		worker = &workerManager{}
	case "events":
		worker = &eventsWorker{}
	default:
		return nil, errors.Errorf("no workerType [%s] exists", workerType)
	}

	pollInterval, err := GetPollInterval(workerType, conf)
	if err = worker.Initialize(conf, sm, eksEngine, emrEngine, log, pollInterval, qm, clusterManager); err != nil {
		return worker, errors.Wrapf(err, "problem initializing worker [%s]", workerType)
	}
	return worker, nil
}

// GetPollInterval returns the frequency at which a worker will run.
func GetPollInterval(workerType string, conf config.Config) (time.Duration, error) {
	var interval time.Duration
	pollIntervalString := conf.GetString(fmt.Sprintf("worker_%s_interval", workerType))
	if len(pollIntervalString) == 0 {
		return interval, errors.Errorf("worker type: [%s] needs worker_%s_interval set", workerType, workerType)
	}
	return time.ParseDuration(pollIntervalString)
}


================================================
FILE: worker/worker_manager.go
================================================
package worker

import (
	"context"
	"fmt"
	"github.com/pkg/errors"
	"github.com/stitchfix/flotilla-os/queue"
	"github.com/stitchfix/flotilla-os/utils"
	"gopkg.in/tomb.v2"
	"time"

	"github.com/stitchfix/flotilla-os/config"
	"github.com/stitchfix/flotilla-os/execution/engine"
	flotillaLog "github.com/stitchfix/flotilla-os/log"
	"github.com/stitchfix/flotilla-os/state"
)

type workerManager struct {
	sm             state.Manager
	eksEngine      engine.Engine
	emrEngine      engine.Engine
	conf           config.Config
	log            flotillaLog.Logger
	pollInterval   time.Duration
	workers        map[string][]Worker
	t              tomb.Tomb
	engine         *string
	qm             queue.Manager
	clusterManager *engine.DynamicClusterManager
}

func (wm *workerManager) Initialize(
	conf config.Config,
	sm state.Manager,
	eksEngine engine.Engine,
	emrEngine engine.Engine,
	log flotillaLog.Logger,
	pollInterval time.Duration,
	qm queue.Manager,
	clusterManager *engine.DynamicClusterManager,
) error {
	wm.conf = conf
	wm.log = log
	wm.eksEngine = eksEngine
	wm.emrEngine = emrEngine
	wm.sm = sm
	wm.qm = qm
	wm.pollInterval = pollInterval
	wm.clusterManager = clusterManager

	ctx, span := utils.TraceJob(context.Background(), "worker_manager.initialize_workers", "worker_manager")
	defer span.Finish()

	if err := wm.InitializeWorkers(ctx); err != nil {
		span.SetTag("error", err.Error())
		return errors.Errorf("WorkerManager unable to initialize workers: %s", err.Error())
	}
	return nil
}

func (wm *workerManager) GetTomb() *tomb.Tomb {
	return &wm.t
}

// InitializeWorkers will first check the DB for the total count per instance
// of each worker type (retry, submit, or status), start each worker's  `Run`
// goroutine via tomb, then append the worker to the appropriate slice.
func (wm *workerManager) InitializeWorkers(ctx context.Context) error {
	workerList, err := wm.sm.ListWorkers(ctx, state.EKSEngine)

	if err != nil {
		return err
	}

	wm.workers = make(map[string][]Worker)

	// Iterate through list of workers.
	for _, w := range workerList.Workers {
		wm.workers[w.WorkerType] = make([]Worker, w.CountPerInstance)
		for i := 0; i < w.CountPerInstance; i++ {
			// Instantiate a new worker.
			wk, err := NewWorker(w.WorkerType, wm.log, wm.conf, wm.eksEngine, wm.emrEngine, wm.sm, wm.qm, wm.clusterManager)

			if err != nil {
				return err
			}

			// Start goroutine via tomb
			wk.GetTomb().Go(func() error {
				return wk.Run(ctx)
			})
			wm.workers[w.WorkerType][i] = wk
		}
	}

	return nil
}

func (wm *workerManager) Run(ctx context.Context) error {
	for {
		select {
		case <-wm.t.Dying():
			wm.log.Log("level", "info", "message", "Worker manager was terminated")
			return nil
		default:
			ctx, span := utils.TraceJob(context.Background(), "worker_manager.run_once", "worker_manager")
			wm.runOnce(ctx)
			span.Finish()
			time.Sleep(wm.pollInterval)
		}
	}
}

func (wm *workerManager) runOnce(ctx context.Context) error {
	// Check worker count via state manager.
	workerList, err := wm.sm.ListWorkers(ctx, state.EKSEngine)

	if err != nil {
		return err
	}

	for _, w := range workerList.Workers {
		currentWorkerCount := len(wm.workers[w.WorkerType])
		// Is our current number of workers not the desired number of workers?
		if currentWorkerCount != w.CountPerInstance {

			if err := wm.updateWorkerCount(ctx, w.WorkerType, currentWorkerCount, w.CountPerInstance); err != nil {
				wm.log.Log(
					"level", "error",
					"message", "problem updating worker count",
					"error", err.Error())
			}
		}
	}

	return nil
}

func (wm *workerManager) updateWorkerCount(
	ctx context.Context,
	workerType string,
	currentWorkerCount int,
	desiredWorkerCount int,
) error {
	ctx, span := utils.TraceJob(ctx, "worker_manager.update_worker_count", workerType)
	defer span.Finish()

	if currentWorkerCount > desiredWorkerCount {
		for i := desiredWorkerCount; i < currentWorkerCount; i++ {
			wm.log.Log("level", "info", "message", fmt.Sprintf(
				"Scaling down %s workers from %d to %d", workerType, currentWorkerCount, desiredWorkerCount))
			if err := wm.removeWorker(ctx, workerType); err != nil {
				return err
			}
		}
	} else if currentWorkerCount < desiredWorkerCount {
		for i := currentWorkerCount; i < desiredWorkerCount; i++ {
			wm.log.Log("level", "info", "message", fmt.Sprintf(
				"Scaling up %s workers from %d to %d", workerType, currentWorkerCount, desiredWorkerCount))
			if err := wm.addWorker(ctx, workerType); err != nil {
				return err
			}
		}
	}
	return nil
}

func (wm *workerManager) removeWorker(ctx context.Context, workerType string) error {
	ctx, span := utils.TraceJob(ctx, "worker_manager.remove_worker", workerType)
	defer span.Finish()

	if workers, ok := wm.workers[workerType]; ok {
		if len(workers) > 0 {
			toKill := workers[len(workers)-1]
			toKill.GetTomb().Kill(nil)
			wm.workers[workerType] = workers[:len(workers)-1]
			wm.log.Log("level", "info", "message", "Removed worker", "type", workerType)
		}
	} else {
		return fmt.Errorf("invalid worker type %s", workerType)
	}
	return nil
}

func (wm *workerManager) addWorker(ctx context.Context, workerType string) error {
	ctx, span := utils.TraceJob(ctx, "worker_manager.add_worker", workerType)
	defer span.Finish()

	wk, err := NewWorker(workerType, wm.log, wm.conf, wm.eksEngine, wm.emrEngine, wm.sm, wm.qm, wm.clusterManager)
	if err != nil {
		return err
	}
	wk.GetTomb().Go(func() error {
		return wk.Run(ctx)
	})
	if _, ok := wm.workers[workerType]; ok {
		wm.workers[workerType] = append(wm.workers[workerType], wk)
	} else {
		return fmt.Errorf("invalid worker type %s", workerType)
	}
	wm.log.Log("level", "info", "message", "Added worker", "type", workerType)
	return nil
}


================================================
FILE: worker/worker_test.go
================================================
package worker

import (
	"os"
	"testing"
	"time"

	"github.com/stitchfix/flotilla-os/config"
)

func TestGetPollInterval(t *testing.T) {
	conf, _ := config.NewConfig(nil)

	expected := time.Duration(500) * time.Millisecond
	os.Setenv("WORKER_RETRY_INTERVAL", "500ms")

	interval, err := GetPollInterval("retry", conf)
	if err != nil {
		t.Error(err.Error())
	}

	if interval != expected {
		t.Errorf("Expected interval: [%v] but was [%v]", expected, interval)
	}
}