Repository: obsidiandynamics/goharvest Branch: master Commit: 1239a594e9dc Files: 40 Total size: 169.9 KB Directory structure: gitextract_j1n1kb08/ ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── battery.go ├── battery_test.go ├── cmd/ │ ├── goharvest_example/ │ │ └── example_main.go │ ├── pump/ │ │ └── pump_main.go │ └── reaper/ │ └── reaper_main.go ├── config.go ├── config_test.go ├── db.go ├── db_mock_test.go ├── event.go ├── event_test.go ├── examples/ │ ├── reaper.yaml │ └── reaper_secure.yaml ├── go.mod ├── go.sum ├── goharvest_doc_test.go ├── harvest.go ├── harvest_test.go ├── int/ │ ├── faulty_kafka_test.go │ └── harvest_int_test.go ├── kafka.go ├── kafka_mock_test.go ├── metric/ │ ├── meter.go │ ├── meter_test.go │ └── metric.go ├── neli.go ├── postgres.go ├── postgres_test.go ├── sh/ │ ├── .gitignore │ ├── build-librdkafka.sh │ ├── init-outbox.sh │ └── soak.sh └── stasher/ ├── stasher.go ├── stasher_doc_test.go └── statsher_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .vscode .DS_Store *.cer *.pem /bin /log* ================================================ FILE: .travis.yml ================================================ language: go go: - 1.13.x - 1.14.x services: - docker before_install: - | docker run --name kafka --rm -d -p 2181:2181 -p 9092:9092 \ -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 \ obsidiandynamics/kafka - | docker run --name postgres --rm -d -p 5432:5432 \ -e POSTGRES_HOST_AUTH_METHOD=trust \ postgres:12 - go get -u -v all script: - make - make int after_success: - bash <(curl -s https://codecov.io/bash) ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2020, Obsidian Dynamics All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ default: build test all: test lint build: dirs go build -race -o bin ./... test: dirs go test ./... -race -count=1 -coverprofile=bin/coverage.out soaktest: dirs SOAK_CMD="make test" sh/soak.sh int: FORCE GOLABELS=int go test -timeout 180s -v -race -count=1 ./int soakint: FORCE SOAK_CMD="make int" sh/soak.sh dirs: mkdir -p bin lint: golint ./... clean: rm -rf bin list: FORCE @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' FORCE: ================================================ FILE: README.md ================================================ logo  === ![Go version](https://img.shields.io/github/go-mod/go-version/obsidiandynamics/goharvest) [![Build](https://travis-ci.org/obsidiandynamics/goharvest.svg?branch=master) ](https://travis-ci.org/obsidiandynamics/goharvest#) ![Release](https://img.shields.io/github/v/release/obsidiandynamics/goharvest?color=ff69b4) [![Codecov](https://codecov.io/gh/obsidiandynamics/goharvest/branch/master/graph/badge.svg)](https://codecov.io/gh/obsidiandynamics/goharvest) [![Go Report Card](https://goreportcard.com/badge/github.com/obsidiandynamics/goharvest)](https://goreportcard.com/report/github.com/obsidiandynamics/goharvest) [![Total alerts](https://img.shields.io/lgtm/alerts/g/obsidiandynamics/goharvest.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/obsidiandynamics/goharvest/alerts/) [![GoDoc Reference](https://img.shields.io/badge/docs-GoDoc-blue.svg)](https://pkg.go.dev/github.com/obsidiandynamics/goharvest?tab=doc) `goharvest` is a Go implementation of the [Transactional Outbox](https://microservices.io/patterns/data/transactional-outbox.html) pattern for Postgres and Kafka. Transactional Outbox While `goharvest` is a complex beast, the end result is dead simple: to publish Kafka messages reliably and atomically, simply write a record to a dedicated **outbox table** in a transaction, alongside any other database changes. (Outbox schema provided below.) `goharvest` scrapes the outbox table in the background and publishes records to a Kafka topic of the application's choosing, using the key, value and headers specified in the outbox record. `goharvest` currently works with Postgres. It maintains causal order of messages and does not require CDC to be enabled on the database, making for a zero-hassle setup. It handles thousands of records/second on commodity hardware. # Getting started ## 1. Create an outbox table for your application ```sql CREATE TABLE IF NOT EXISTS outbox ( id BIGSERIAL PRIMARY KEY, create_time TIMESTAMP WITH TIME ZONE NOT NULL, kafka_topic VARCHAR(249) NOT NULL, kafka_key VARCHAR(100) NOT NULL, -- pick your own maximum key size kafka_value VARCHAR(10000), -- pick your own maximum value size kafka_header_keys TEXT[] NOT NULL, kafka_header_values TEXT[] NOT NULL, leader_id UUID ) ``` ## 2. Run `goharvest` ### Standalone mode This runs `goharvest` within a separate process called `reaper`, which will work alongside **any** application that writes to a standard outbox. (Not just applications written in Go.) #### Install `reaper` ```sh go get -u github.com/obsidiandynamics/goharvest/cmd/reaper ``` #### Create `reaper.yaml` configuration ```yaml harvest: baseKafkaConfig: bootstrap.servers: localhost:9092 producerKafkaConfig: compression.type: lz4 delivery.timeout.ms: 10000 leaderTopic: my-app-name leaderGroupID: my-app-name dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable outboxTable: outbox limits: minPollInterval: 1s heartbeatTimeout: 5s maxInFlightRecords: 1000 minMetricsInterval: 5s sendConcurrency: 4 sendBuffer: 10 logging: level: Debug ``` #### Start `reaper` ```sh reaper -f reaper.yaml ``` ### Embedded mode `goharvest` can be run in the same process as your application. #### Add the dependency ```sh go get -u github.com/obsidiandynamics/goharvest ``` #### Create and start a `Harvest` instance ```go import "github.com/obsidiandynamics/goharvest" ``` ```go // Configure the harvester. It will use its own database and Kafka connections under the hood. config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", } // Create a new harvester. harvest, err := New(config) if err != nil { panic(err) } // Start harvesting in the background. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for the harvester to end. log.Fatal(harvest.Await()) ``` ### Using a custom logger `goharvest` uses `log.Printf` for output by default. Logger configuration is courtesy of the Scribe façade, from [libstdgo](https://github.com/obsidiandynamics/libstdgo). The example below uses a Logrus binding for Scribe. ```go import ( "github.com/obsidiandynamics/goharvest" scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus" "github.com/sirupsen/logrus" ) ``` ```sh log := logrus.StandardLogger() log.SetLevel(logrus.DebugLevel) // Configure the custom logger using a binding. config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, Scribe: scribe.New(scribelogrus.Bind()), DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", } ``` ### Listening for leader status updates Just like `goharvest` uses [NELI](https://github.com/obsidiandynamics/goneli) to piggy-back on Kafka's leader election, you can piggy-back on `goharvest` to get leader status updates: ```go log := logrus.StandardLogger() log.SetLevel(logrus.TraceLevel) config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", Scribe: scribe.New(scribelogrus.Bind()), } // Create a new harvester and register an event hander. harvest, err := New(config) // Register a handler callback, invoked when an event occurs within goharvest. // The callback is completely optional; it lets the application piggy-back on leader // status updates, in case it needs to schedule some additional work (other than // harvesting outbox records) that should only be run on one process at any given time. harvest.SetEventHandler(func(e Event) { switch event := e.(type) { case LeaderAcquired: // The application may initialise any state necessary to perform work as a leader. log.Infof("Got event: leader acquired: %v", event.LeaderID()) case LeaderRefreshed: // Indicates that a new leader ID was generated, as a result of having to remark // a record (typically as due to an earlier delivery error). This is purely // informational; there is nothing an application should do about this, other // than taking note of the new leader ID if it has come to rely on it. log.Infof("Got event: leader refreshed: %v", event.LeaderID()) case LeaderRevoked: // The application may block the callback until it wraps up any in-flight // activity. Only upon returning from the callback, will a new leader be elected. log.Infof("Got event: leader revoked") case LeaderFenced: // The application must immediately terminate any ongoing activity, on the assumption // that another leader may be imminently elected. Unlike the handling of LeaderRevoked, // blocking in the callback will not prevent a new leader from being elected. log.Infof("Got event: leader fenced") case MeterRead: // Periodic statistics regarding the harvester's throughput. log.Infof("Got event: meter read: %v", event.Stats()) } }) // Start harvesting in the background. err = harvest.Start() ``` ### Which mode should I use Running `goharvest` in standalone mode using `reaper` is the recommended approach for most use cases, as it fully insulates the harvester from the rest of the application. Ideally, you should deploy `reaper` as a sidecar daemon, to run alongside your application. All the reaper needs is access to the outbox table and the Kafka cluster. Embedded `goharvest` is useful if you require additional insights into its operation, which is accomplished by registering an `EventHandler` callback, as shown in the example above. This callback is invoked whenever the underlying leader status changes, which may be useful if you need to schedule additional workloads that should only be run on one process at any given time. ## 3. Write outbox records ### Directly, using SQL You can write database records from any app, by simply issuing the following `INSERT` statement: ```sql INSERT INTO ${outbox_table} ( create_time, kafka_topic, kafka_key, kafka_value, kafka_header_keys, kafka_header_values ) VALUES (NOW(), $1, $2, $3, $4, $5) ``` Replace `${outbox_table}` and bind the query variables as appropriate: * `kafka_topic` column specifies an arbitrary topic name, which may differ among records. * `kafka_key` is a mandatory `string` key. Each record must be published with a specified key, which will affect its placement among the topic's partitions. * `kafka_value` is an optional `string` value. If unspecified, the record will be published with a `nil` value, allowing it to be used as a compaction tombstone. * `kafka_header_keys` and `kafka_header_values` are arrays that specify the keys and values of record headers. When used each element in `kafka_header_keys` corresponds to an element in `kafka_header_values` at the same index. If not using headers, set both arrays to empty. > **Note**: **Writing outbox records should be performed in the same transaction as other related database updates.** Otherwise, messaging will not be atomic — the updates may be stably persisted while the message might be lost, and *vice versa*. ### Using `stasher` The `goharvest` library comes with a `stasher` helper package for writing records to an outbox. #### One-off messages When one database update corresponds to one message, the easiest approach is to call `Stasher.Stash()`: ```go import "github.com/obsidiandynamics/goharvest" ``` ```go db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable") if err != nil { panic(err) } defer db.Close() st := New("outbox") // Begin a transaction. tx, _ := db.Begin() defer tx.Rollback() // Update other database entities in transaction scope. // Stash an outbox record for subsequent harvesting. err = st.Stash(tx, goharvest.OutboxRecord{ KafkaTopic: "my-app.topic", KafkaKey: "hello", KafkaValue: goharvest.String("world"), KafkaHeaders: goharvest.KafkaHeaders{ {Key: "applicationId", Value: "my-app"}, }, }) if err != nil { panic(err) } // Commit the transaction. tx.Commit() ``` #### Multiple messages Sending multiple messages within a single transaction may be done more efficiently using prepared statements: ```go // Begin a transaction. tx, _ := db.Begin() defer tx.Rollback() // Update other database entities in transaction scope. // ... // Formulates a prepared statement that may be reused within the scope of the transaction. prestash, _ := st.Prepare(tx) // Publish a bunch of messages using the same prepared statement. for i := 0; i < 10; i++ { // Stash an outbox record for subsequent harvesting. err = prestash.Stash(goharvest.OutboxRecord{ KafkaTopic: "my-app.topic", KafkaKey: "hello", KafkaValue: goharvest.String("world"), KafkaHeaders: goharvest.KafkaHeaders{ {Key: "applicationId", Value: "my-app"}, }, }) if err != nil { panic(err) } } // Commit the transaction. tx.Commit() ``` # Configuration There are handful of parameters that for configuring `goharvest`, assigned via the `Config` struct:
Parameter Default value Description
BaseKafkaConfig Map containing bootstrap.servers=localhost:9092. Configuration shared by the underlying Kafka producer and consumer clients, including those used for leader election.
ProducerKafkaConfig Empty map. Additional configuration on top of BaseKafkaConfig that is specific to the producer clients created by goharvest for publishing harvested messages. This configuration does not apply to the underlying NELI leader election protocol.
LeaderGroupID Assumes the filename of the application binary. Used by the underlying leader election protocol as a unique identifier shared by all instances in a group of competing processes. The LeaderGroupID is used as Kafka group.id property under the hood, when subscribing to the leader election topic.
LeaderTopic Assumes the value of LeaderGroupID, suffixed with the string .neli. Used by NELI as the name of the Kafka topic for orchestrating leader election. Competing processes subscribe to the same topic under an identical consumer group ID, using Kafka's exclusive partition assignment as a mechanism for arbitrating leader status.
DataSource Local Postgres data source host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable. The database driver-specific data source string.
OutboxTable outbox The name of the outbox table, optionally including the schema name.
Scribe Scribe configured with bindings for log.Printf(); effectively the result of running scribe.New(scribe.StandardBinding()). The logging façade used by the library, preconfigured with your logger of choice. See Scribe GoDocs.
Name A string in the form {hostname}_{pid}_{time}, where {hostname} is the result of invoking os.Hostname(), {pid} is the process ID, and {time} is the UNIX epoch time, in seconds. The symbolic name of this instance. This field is informational only, accompanying all log messages.
Limits.MinPollInterval 100 ms The lower bound on the poll interval, preventing the over-polling of Kafka on successive Pulse() invocations. Assuming Pulse() is called repeatedly by the application, NELI may poll Kafka at a longer interval than MinPollInterval. (Regular polling is necessary to prove client's liveness and maintain internal partition assignment, but polling excessively is counterproductive.)
Limits.HeartbeatTimeout 5 s The period that a leader will maintain its status, not having received a heartbeat message on the leader topic. After the timeout elapses, the leader will assume a network partition and will voluntarily yield its status, signalling a LeaderFenced event to the application.
Limits.QueueTimeout 30 s The maximum period of time a record may be queued after having been marked, before timing out and triggering a remark.
Limits.MarkBackoff 10 ms The backoff delay introduced by the mark thread when a query returns no results, indicating the absence of backlogged records. A mark backoff prevents aggressive querying of the database in the absence of a steady flow of outbox records.
Limits.IOErrorBackoff 500 ms The backoff delay introduced when any of the mark, purge or reset queries encounter a database error.
Limits.MaxInFlightRecords 1000 An upper bound on the number of marked records that may be in flight at any given time. I.e. the number of records that have been enqueued with a producer client, for which acknowledgements have yet to be received.
Limits.SendConcurrency 8 The number of concurrent shards used for queuing causally unrelated records. Each shard is equipped with a dedicated producer client, allowing for its records to be sent independently of other shards.
Limits.SendBuffer 10 The maximum number of marked records that may be buffered for subsequent sending, for any given shard. When the buffer is full, the marker will halt — waiting for records to be sent and for their acknowledgements to flow through.
Limits.MarkQueryRecords 100 An upper bound on the number of records that may be marked in any given query. Limiting this number avoids long-running database queries.
Limits.MinMetricsInterval 5 s The minimum interval at which throughput metrics are emitted. Metrics are emitted conservatively and may be observed less frequently; in fact, throughput metrics are only emitted upon a successful message acknowledgement, which will not occur during periods of inactivity.
# Docs [Design](https://github.com/obsidiandynamics/goharvest/wiki/Design) [Comparison of messaging patterns](https://github.com/obsidiandynamics/goharvest/wiki/Comparison-of-messaging-patterns) [Comparison of harvesting methods](https://github.com/obsidiandynamics/goharvest/wiki/Comparison-of-harvesting-methods) [FAQ](https://github.com/obsidiandynamics/goharvest/wiki/FAQ) ================================================ FILE: battery.go ================================================ package goharvest import ( "hash/fnv" ) type cell struct { records chan OutboxRecord done chan int } func (c cell) stop() { close(c.records) } func (c cell) await() { <-c.done } func (c cell) enqueue(rec OutboxRecord) bool { select { case <-c.done: return false case c.records <- rec: return true } } type cellHandler func(records chan OutboxRecord) func newCell(buffer int, handler cellHandler) cell { c := cell{ records: make(chan OutboxRecord), done: make(chan int), } go func() { defer close(c.done) handler(c.records) }() return c } type battery interface { stop() await() shutdown() enqueue(rec OutboxRecord) bool } type concurrentBattery []cell func (b *concurrentBattery) stop() { for _, c := range *b { c.stop() } } func (b *concurrentBattery) await() { for _, c := range *b { c.await() } } func (b *concurrentBattery) shutdown() { b.stop() b.await() } func (b *concurrentBattery) enqueue(rec OutboxRecord) bool { if length := len(*b); length > 1 { return (*b)[hash(rec.KafkaKey)%uint32(length)].enqueue(rec) } return (*b)[0].enqueue(rec) } func newConcurrentBattery(concurrency int, buffer int, handler cellHandler) *concurrentBattery { b := make(concurrentBattery, concurrency) for i := 0; i < concurrency; i++ { b[i] = newCell(buffer, handler) } return &b } func hash(str string) uint32 { algorithm := fnv.New32a() algorithm.Write([]byte(str)) return algorithm.Sum32() } ================================================ FILE: battery_test.go ================================================ package goharvest import ( "testing" "github.com/stretchr/testify/assert" ) func TestEnqueue_concurrencyOf1(t *testing.T) { enqueued := make(chan OutboxRecord) b := newConcurrentBattery(1, 0, func(records chan OutboxRecord) { for rec := range records { enqueued <- rec } }) defer b.shutdown() rec := OutboxRecord{} assert.True(t, b.enqueue(rec)) assert.Equal(t, rec, <-enqueued) } func TestEnqueue_concurrencyOf2(t *testing.T) { enqueued := make(chan OutboxRecord) b := newConcurrentBattery(2, 0, func(records chan OutboxRecord) { for rec := range records { enqueued <- rec } }) defer b.shutdown() rec := OutboxRecord{} assert.True(t, b.enqueue(rec)) assert.Equal(t, rec, <-enqueued) } func TestEnqueue_afterDone(t *testing.T) { b := newConcurrentBattery(2, 0, func(records chan OutboxRecord) {}) b.await() assert.False(t, b.enqueue(OutboxRecord{})) b.stop() } ================================================ FILE: cmd/goharvest_example/example_main.go ================================================ package main import ( "database/sql" "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/libstdgo/scribe" scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus" "github.com/sirupsen/logrus" ) func main() { const dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable" // Optional: Ensure the database table exists before we start harvesting. func() { db, err := sql.Open("postgres", dataSource) if err != nil { panic(err) } defer db.Close() _, err = db.Exec(` CREATE TABLE IF NOT EXISTS outbox ( id BIGSERIAL PRIMARY KEY, create_time TIMESTAMP WITH TIME ZONE NOT NULL, kafka_topic VARCHAR(249) NOT NULL, kafka_key VARCHAR(100) NOT NULL, -- pick your own key size kafka_value VARCHAR(10000), -- pick your own value size kafka_header_keys TEXT[] NOT NULL, kafka_header_values TEXT[] NOT NULL, leader_id UUID ) `) if err != nil { panic(err) } }() // Configure the harvester. It will use its own database connections under the hood. log := logrus.StandardLogger() log.SetLevel(logrus.DebugLevel) config := goharvest.Config{ BaseKafkaConfig: goharvest.KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, DataSource: dataSource, Scribe: scribe.New(scribelogrus.Bind()), } // Create a new harvester. harvest, err := goharvest.New(config) if err != nil { panic(err) } // Start it. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for it to end. log.Fatal(harvest.Await()) } ================================================ FILE: cmd/pump/pump_main.go ================================================ package main import ( "database/sql" "flag" "fmt" "log" "math/rand" "strconv" "time" "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/goharvest/metric" "github.com/obsidiandynamics/goharvest/stasher" ) const recordsPerTxn = 20 func main() { var keys, records, interval int var dataSource, outboxTable, kafkaTopic string var blank bool flag.IntVar(&keys, "keys", -1, "Number of unique keys") flag.IntVar(&records, "records", -1, "Number of records to generate") flag.IntVar(&interval, "interval", 0, "Write interval (in milliseconds") flag.StringVar(&dataSource, "ds", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", "Data source") flag.StringVar(&outboxTable, "outbox", "outbox", "Outbox table name") flag.StringVar(&kafkaTopic, "topic", "pump", "Kafka output topic name") flag.BoolVar(&blank, "blank", false, "Generate blank records (nil value)") flag.Parse() errorFunc := func(field string) { flag.PrintDefaults() panic(fmt.Errorf("required '-%s' has not been set", field)) } if keys == -1 { errorFunc("keys") } if records == -1 { errorFunc("records") } fmt.Printf("Starting stasher; keys: %d, records: %d, interval: %d ms\n", keys, records, interval) fmt.Printf(" Data source: %s\n", dataSource) fmt.Printf(" Outbox table name: %s\n", outboxTable) db, err := sql.Open("postgres", dataSource) if err != nil { panic(err) } defer db.Close() st := stasher.New(outboxTable) meter := metric.NewMeter("pump", 5*time.Second) var tx *sql.Tx var pre stasher.PreStash for i := 0; i < records; i++ { if i%recordsPerTxn == 0 { finaliseTx(tx) tx, err = db.Begin() if err != nil { panic(err) } pre, err = st.Prepare(tx) if err != nil { panic(err) } } rand := rand.Uint64() var value *string if !blank { value = goharvest.String(fmt.Sprintf("value-%x", rand)) } rec := goharvest.OutboxRecord{ KafkaTopic: kafkaTopic, KafkaKey: fmt.Sprintf("key-%x", rand%uint64(keys)), KafkaValue: value, KafkaHeaders: goharvest.KafkaHeaders{ goharvest.KafkaHeader{Key: "Seq", Value: strconv.Itoa(i)}, }, } err := pre.Stash(rec) if err != nil { panic(err) } time.Sleep(time.Duration(interval * int(time.Millisecond))) meter.Add(1) meter.MaybeStatsLog(log.Printf) } finaliseTx(tx) } func finaliseTx(tx *sql.Tx) { if tx != nil { err := tx.Commit() if err != nil { panic(err) } } } ================================================ FILE: cmd/reaper/reaper_main.go ================================================ package main import ( "flag" "fmt" "io/ioutil" "os" "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/libstdgo/scribe" "gopkg.in/yaml.v2" scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus" logrus "github.com/sirupsen/logrus" ) func panicOnError(scr scribe.Scribe, err error) { if err != nil { scr.E()("Error: %v", err.Error()) panic(err) } } func main() { var configFile string flag.StringVar(&configFile, "f", "", "Configuration file (shorthand)") flag.StringVar(&configFile, "file", "", "Configuration file") flag.Parse() errorFunc := func(field string) { flag.PrintDefaults() panic(fmt.Errorf("required '-%s' has not been set", field)) } if configFile == "" { errorFunc("f") } lr := logrus.StandardLogger() lr.SetLevel(logrus.TraceLevel) scr := scribe.New(scribelogrus.Bind()) workDir, err := os.Getwd() panicOnError(scr, err) scr.I()("Starting GoHarvest Reaper") executable, err := os.Executable() panicOnError(scr, err) scr.I()("Executable: %s; working directory: %s", executable, workDir) cfgData, err := ioutil.ReadFile(configFile) panicOnError(scr, err) cfg, err := unmarshal(cfgData) panicOnError(scr, err) cfg.Harvest.Scribe = scr level, err := scribe.ParseLevelName(cfg.Logging.Level) panicOnError(scr, err) scr.SetEnabled(level.Level) h, err := goharvest.New(cfg.Harvest) panicOnError(scr, err) panicOnError(scr, h.Start()) panicOnError(scr, h.Await()) } type LoggingConfig struct { Level string `yaml:"level"` } func (l *LoggingConfig) setDefaults() { if l.Level == "" { l.Level = scribe.Levels[scribe.Debug].Name } } type ReaperConfig struct { Harvest goharvest.Config `yaml:"harvest"` Logging LoggingConfig `yaml:"logging"` } func (r *ReaperConfig) setDefaults() { r.Harvest.SetDefaults() r.Logging.setDefaults() } func unmarshal(in []byte) (ReaperConfig, error) { cfg := ReaperConfig{} err := yaml.UnmarshalStrict(in, &cfg) if err == nil { cfg.setDefaults() } return cfg, err } ================================================ FILE: config.go ================================================ package goharvest import ( "fmt" "os" "time" validation "github.com/go-ozzo/ozzo-validation" "github.com/obsidiandynamics/goneli" "github.com/obsidiandynamics/libstdgo/scribe" "gopkg.in/yaml.v2" ) // Duration is a convenience for deriving a pointer from a given Duration argument. func Duration(d time.Duration) *time.Duration { return &d } // Int is a convenience for deriving a pointer from a given int argument. func Int(i int) *int { return &i } // Limits configuration. type Limits struct { IOErrorBackoff *time.Duration `yaml:"ioErrorBackoff"` PollDuration *time.Duration `yaml:"pollDuration"` MinPollInterval *time.Duration `yaml:"minPollInterval"` MaxPollInterval *time.Duration `yaml:"maxPollInterval"` HeartbeatTimeout *time.Duration `yaml:"heartbeatTimeout"` DrainInterval *time.Duration `yaml:"drainInterval"` QueueTimeout *time.Duration `yaml:"queueTimeout"` MarkBackoff *time.Duration `yaml:"markBackoff"` MaxInFlightRecords *int `yaml:"maxInFlightRecords"` SendConcurrency *int `yaml:"sendConcurrency"` SendBuffer *int `yaml:"sendBuffer"` MarkQueryRecords *int `yaml:"markQueryRecords"` MinMetricsInterval *time.Duration `yaml:"minMetricsInterval"` } func defaultInt(i **int, def int) { if *i == nil { *i = &def } } func defaultDuration(d **time.Duration, def time.Duration) { if *d == nil { *d = &def } } // SetDefaults assigns the defaults for optional values. func (l *Limits) SetDefaults() { defaultDuration(&l.IOErrorBackoff, 500*time.Millisecond) defaultDuration(&l.HeartbeatTimeout, goneli.DefaultHeartbeatTimeout) defaultDuration(&l.MaxPollInterval, *l.HeartbeatTimeout/2) defaultDuration(&l.QueueTimeout, 30*time.Second) defaultDuration(&l.DrainInterval, minDuration(*l.MaxPollInterval, *l.QueueTimeout)) defaultDuration(&l.MarkBackoff, 10*time.Millisecond) defaultInt(&l.MaxInFlightRecords, 1000) defaultInt(&l.SendConcurrency, 8) defaultInt(&l.SendBuffer, 10) defaultInt(&l.MarkQueryRecords, 100) defaultDuration(&l.MinMetricsInterval, 5*time.Second) } func minDuration(d0, d1 time.Duration) time.Duration { if d0 < d1 { return d0 } return d1 } // Validate the Limits configuration, returning an error if invalid func (l Limits) Validate() error { minimumMaxPollInterval := 1 * time.Millisecond if l.MinPollInterval != nil { minimumMaxPollInterval = *l.MinPollInterval } return validation.ValidateStruct(&l, validation.Field(&l.IOErrorBackoff, validation.Min(0)), validation.Field(&l.DrainInterval, validation.Required, validation.Min(1*time.Millisecond)), validation.Field(&l.MaxPollInterval, validation.Required, validation.Min(minimumMaxPollInterval)), validation.Field(&l.QueueTimeout, validation.Required, validation.Min(1*time.Millisecond)), validation.Field(&l.MarkBackoff, validation.Min(0)), validation.Field(&l.MaxInFlightRecords, validation.Required, validation.Min(1)), validation.Field(&l.SendConcurrency, validation.Required, validation.Min(1)), validation.Field(&l.SendBuffer, validation.Min(0)), validation.Field(&l.MarkQueryRecords, validation.Required, validation.Min(1)), validation.Field(&l.MinMetricsInterval, validation.Min(0)), ) } // String obtains a textural representation of Limits. func (l Limits) String() string { return fmt.Sprint( "Limits[IOErrorBackoff=", l.IOErrorBackoff, ", PollDuration=", l.PollDuration, ", MinPollInterval=", l.MinPollInterval, ", MaxPollInterval=", l.MaxPollInterval, ", HeartbeatTimeout=", l.HeartbeatTimeout, ", DrainInterval=", l.DrainInterval, ", QueueTimeout=", l.QueueTimeout, ", MarkBackoff=", l.MarkBackoff, ", MaxInFlightRecords=", l.MaxInFlightRecords, ", SendConcurrency=", l.SendConcurrency, ", SendBuffer=", l.SendBuffer, ", MarkQueryRecords=", l.MarkQueryRecords, ", MinMetricsInterval=", l.MinMetricsInterval, "]", ) } // KafkaConfigMap represents the Kafka key-value configuration. type KafkaConfigMap map[string]interface{} // Config encapsulates configuration for Harvest. type Config struct { BaseKafkaConfig KafkaConfigMap `yaml:"baseKafkaConfig"` ProducerKafkaConfig KafkaConfigMap `yaml:"producerKafkaConfig"` LeaderTopic string `yaml:"leaderTopic"` LeaderGroupID string `yaml:"leaderGroupID"` DataSource string `yaml:"dataSource"` OutboxTable string `yaml:"outboxTable"` Limits Limits `yaml:"limits"` KafkaConsumerProvider KafkaConsumerProvider KafkaProducerProvider KafkaProducerProvider DatabaseBindingProvider DatabaseBindingProvider NeliProvider NeliProvider Scribe scribe.Scribe Name string `yaml:"name"` } // Validate the Config, returning an error if invalid. func (c Config) Validate() error { return validation.ValidateStruct(&c, validation.Field(&c.BaseKafkaConfig, validation.NotNil), validation.Field(&c.ProducerKafkaConfig, validation.NotNil), validation.Field(&c.DataSource, validation.Required), validation.Field(&c.OutboxTable, validation.Required), validation.Field(&c.Limits), validation.Field(&c.KafkaConsumerProvider, validation.NotNil), validation.Field(&c.KafkaProducerProvider, validation.NotNil), validation.Field(&c.DatabaseBindingProvider, validation.NotNil), validation.Field(&c.NeliProvider, validation.NotNil), validation.Field(&c.Scribe, validation.NotNil), validation.Field(&c.Name, validation.Required), ) } // Obtains a textual representation of the configuration. func (c Config) String() string { return fmt.Sprint( "Config[BaseKafkaConfig=", c.BaseKafkaConfig, ", ProducerKafkaConfig=", c.ProducerKafkaConfig, ", LeaderTopic=", c.LeaderTopic, ", LeaderGroupID=", c.LeaderGroupID, ", DataSource=", c.DataSource, ", OutboxTable=", c.OutboxTable, ", Limits=", c.Limits, ", KafkaConsumerProvider=", c.KafkaConsumerProvider, ", KafkaProducerProvider=", c.KafkaProducerProvider, ", DatabaseBindingProvider=", c.DatabaseBindingProvider, ", NeliProvider=", c.NeliProvider, ", Scribe=", c.Scribe, ", Name=", c.Name, "]") } // SetDefaults assigns the default values to optional fields. func (c *Config) SetDefaults() { if c.BaseKafkaConfig == nil { c.BaseKafkaConfig = KafkaConfigMap{} } if _, ok := c.BaseKafkaConfig["bootstrap.servers"]; !ok { c.BaseKafkaConfig["bootstrap.servers"] = "localhost:9092" } if c.ProducerKafkaConfig == nil { c.ProducerKafkaConfig = KafkaConfigMap{} } if c.DataSource == "" { c.DataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable" } if c.OutboxTable == "" { c.OutboxTable = "outbox" } c.Limits.SetDefaults() if c.KafkaConsumerProvider == nil { c.KafkaConsumerProvider = StandardKafkaConsumerProvider() } if c.KafkaProducerProvider == nil { c.KafkaProducerProvider = StandardKafkaProducerProvider() } if c.DatabaseBindingProvider == nil { c.DatabaseBindingProvider = StandardPostgresBindingProvider() } if c.NeliProvider == nil { c.NeliProvider = StandardNeliProvider() } if c.Scribe == nil { c.Scribe = scribe.New(scribe.StandardBinding()) } if c.Name == "" { c.Name = fmt.Sprintf("%s_%d_%d", goneli.Sanitise(getString("localhost", os.Hostname)), os.Getpid(), time.Now().Unix()) } } // Unmarshal a configuration from a byte slice, returning the configuration struct with pre-initialised defaults, // or an error if unmarshalling failed. The configuration is not validated prior to returning, in case further // amendments are required by the caller. The caller should call Validate() independently. func Unmarshal(in []byte) (Config, error) { cfg := Config{} err := yaml.UnmarshalStrict(in, &cfg) if err == nil { cfg.SetDefaults() } return cfg, err } type stringGetter func() (string, error) func getString(def string, stringGetter stringGetter) string { str, err := stringGetter() if err != nil { return def } return str } ================================================ FILE: config_test.go ================================================ package goharvest import ( "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/obsidiandynamics/goneli" "github.com/obsidiandynamics/libstdgo/check" "github.com/obsidiandynamics/libstdgo/scribe" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) func TestDefaultKafkaConsumerProvider(t *testing.T) { c := Config{} c.SetDefaults() cons, err := c.KafkaConsumerProvider(&KafkaConfigMap{}) assert.Nil(t, cons) if assert.NotNil(t, err) { assert.Contains(t, err.Error(), "Required property") } } func TestDefaultKafkaProducerProvider(t *testing.T) { c := Config{} c.SetDefaults() prod, err := c.KafkaProducerProvider(&KafkaConfigMap{"foo": "bar"}) assert.Nil(t, prod) if assert.NotNil(t, err) { assert.Contains(t, err.Error(), "No such configuration property") } } func TestDefaultNeliProvider(t *testing.T) { c := Config{} c.SetDefaults() consMock := &consMock{} consMock.fillDefaults() prodMock := &prodMock{} prodMock.fillDefaults() neli, err := c.NeliProvider(goneli.Config{ KafkaConsumerProvider: convertKafkaConsumerProvider(mockKafkaConsumerProvider(consMock)), KafkaProducerProvider: convertKafkaProducerProvider(mockKafkaProducerProvider(prodMock)), }, goneli.NopBarrier()) assert.NotNil(t, neli) assert.Nil(t, err) assert.Nil(t, neli.Close()) } func TestLimitsString(t *testing.T) { lim := Limits{} lim.SetDefaults() assert.Contains(t, lim.String(), "Limits[") } func TestLimitsFromYaml(t *testing.T) { const y = ` ioErrorBackoff: 10ms pollDuration: 20ms minPollInterval: 30ms ` lim := Limits{} err := yaml.UnmarshalStrict([]byte(y), &lim) assert.Nil(t, err) assert.Equal(t, 10*time.Millisecond, *lim.IOErrorBackoff) assert.Equal(t, 20*time.Millisecond, *lim.PollDuration) assert.Equal(t, 30*time.Millisecond, *lim.MinPollInterval) lim.SetDefaults() // Check that the defaults weren't overridden. def := Limits{} def.SetDefaults() assert.Equal(t, *def.MarkBackoff, *lim.MarkBackoff) } func TestGetString(t *testing.T) { assert.Equal(t, "some-default", getString("some-default", func() (string, error) { return "", check.ErrSimulated })) assert.Equal(t, "some-string", getString("some-default", func() (string, error) { return "some-string", nil })) } func TestValidateLimits(t *testing.T) { lim := Limits{} lim.SetDefaults() assert.Nil(t, lim.Validate()) lim = Limits{ IOErrorBackoff: Duration(-1), PollDuration: Duration(time.Millisecond), } lim.SetDefaults() if err := lim.Validate(); assert.NotNil(t, err) { assert.Equal(t, "IOErrorBackoff: must be no less than 0.", lim.Validate().Error()) } lim = Limits{ DrainInterval: Duration(0), } lim.SetDefaults() if err := lim.Validate(); assert.NotNil(t, err) { assert.Equal(t, "DrainInterval: cannot be blank.", lim.Validate().Error()) } lim = Limits{ DrainInterval: Duration(1 * time.Nanosecond), } lim.SetDefaults() if err := lim.Validate(); assert.NotNil(t, err) { assert.Equal(t, "DrainInterval: must be no less than 1ms.", lim.Validate().Error()) } } func TestConfigString(t *testing.T) { cfg := Config{} cfg.SetDefaults() assert.Contains(t, cfg.String(), "Config[") } func TestValidateConfig_valid(t *testing.T) { cfg := Config{ BaseKafkaConfig: KafkaConfigMap{}, ProducerKafkaConfig: KafkaConfigMap{}, LeaderTopic: "leader-topic", LeaderGroupID: "leader-group-d", DataSource: "data-source", OutboxTable: "outbox-table", KafkaConsumerProvider: StandardKafkaConsumerProvider(), KafkaProducerProvider: StandardKafkaProducerProvider(), DatabaseBindingProvider: StandardPostgresBindingProvider(), Scribe: scribe.New(scribe.StandardBinding()), Name: "name", } cfg.SetDefaults() assert.Nil(t, cfg.Validate()) } func TestValidateConfig_invalidLimits(t *testing.T) { cfg := Config{ BaseKafkaConfig: KafkaConfigMap{}, ProducerKafkaConfig: KafkaConfigMap{}, LeaderTopic: "leader-topic", LeaderGroupID: "leader-group-id", DataSource: "data-source", OutboxTable: "outbox-table", Limits: Limits{ SendConcurrency: Int(-1), }, KafkaConsumerProvider: StandardKafkaConsumerProvider(), KafkaProducerProvider: StandardKafkaProducerProvider(), DatabaseBindingProvider: StandardPostgresBindingProvider(), Scribe: scribe.New(scribe.StandardBinding()), Name: "name", } cfg.SetDefaults() assert.NotNil(t, cfg.Validate()) } func TestValidateConfig_default(t *testing.T) { cfg := Config{} cfg.SetDefaults() assert.Nil(t, cfg.Validate()) } func TestDefaultDrainTimeout(t *testing.T) { cfg := Config{ Limits: Limits{ HeartbeatTimeout: Duration(40 * time.Second), }, } cfg.SetDefaults() assert.Equal(t, 20*time.Second, *cfg.Limits.MaxPollInterval) assert.Equal(t, 20*time.Second, *cfg.Limits.DrainInterval) cfg = Config{ Limits: Limits{ HeartbeatTimeout: Duration(40 * time.Second), QueueTimeout: Duration(15 * time.Second), }, } cfg.SetDefaults() assert.Equal(t, 20*time.Second, *cfg.Limits.MaxPollInterval) assert.Equal(t, 15*time.Second, *cfg.Limits.DrainInterval) } func TestUnmarshal_fullyPopulated(t *testing.T) { const y = ` baseKafkaConfig: bootstrap.servers: localhost:9093 producerKafkaConfig: compression.type: lz4 leaderTopic: leader-topic leaderGroupID: leader-group-id dataSource: data-source outboxTable: outbox-table limits: ioErrorBackoff: 10ms pollDuration: 20ms minPollInterval: 30ms maxPollInterval: 35ms heartbeatTimeout: 15ms drainInterval: 32ms queueTimeout: 40ms markBackoff: 50ms maxInFlightRecords: 60 sendConcurrency: 70 sendBuffer: 80 minMetricsInterval: 90ms name: test-name ` cfg, err := Unmarshal([]byte(y)) require.Nil(t, err) if !assert.Nil(t, cfg.Validate()) { t.Errorf("Validation error: %s", cfg.Validate().Error()) } exp := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9093", }, ProducerKafkaConfig: KafkaConfigMap{ "compression.type": "lz4", }, LeaderTopic: "leader-topic", LeaderGroupID: "leader-group-id", DataSource: "data-source", OutboxTable: "outbox-table", Limits: Limits{ IOErrorBackoff: Duration(10 * time.Millisecond), PollDuration: Duration(20 * time.Millisecond), MinPollInterval: Duration(30 * time.Millisecond), MaxPollInterval: Duration(35 * time.Millisecond), HeartbeatTimeout: Duration(15 * time.Millisecond), DrainInterval: Duration(32 * time.Millisecond), QueueTimeout: Duration(40 * time.Millisecond), MarkBackoff: Duration(50 * time.Millisecond), MaxInFlightRecords: Int(60), SendConcurrency: Int(70), SendBuffer: Int(80), MinMetricsInterval: Duration(90 * time.Millisecond), }, Name: "test-name", } exp.SetDefaults() ignoreFields := cmpopts.IgnoreFields( Config{}, "KafkaConsumerProvider", "KafkaProducerProvider", "DatabaseBindingProvider", "NeliProvider", "Scribe", ) assert.True(t, cmp.Equal(exp, cfg, ignoreFields), "Diff: %v", cmp.Diff(exp, cfg, ignoreFields)) } func TestUnmarshal_empty(t *testing.T) { const y = `` cfg, err := Unmarshal([]byte(y)) assert.Nil(t, err) if !assert.Nil(t, cfg.Validate()) { t.Errorf("Validation error: %s", cfg.Validate().Error()) } exp := Config{} exp.SetDefaults() ignoreFields := cmpopts.IgnoreFields( Config{}, "KafkaConsumerProvider", "KafkaProducerProvider", "DatabaseBindingProvider", "NeliProvider", "Scribe", "Name", ) assert.True(t, cmp.Equal(exp, cfg, ignoreFields), "Diff: %v", cmp.Diff(exp, cfg, ignoreFields)) } ================================================ FILE: db.go ================================================ package goharvest import ( "fmt" "time" "github.com/google/uuid" ) // KafkaHeader is a key-value tuple representing a single header entry. type KafkaHeader struct { Key string Value string } // String obtains a textual representation of a KafkaHeader. func (h KafkaHeader) String() string { return h.Key + ":" + h.Value } // KafkaHeaders is a slice of KafkaHeader tuples. type KafkaHeaders []KafkaHeader // OutboxRecord depicts a single entry in the outbox table. It can be used for both reading and writing operations. type OutboxRecord struct { ID int64 CreateTime time.Time KafkaTopic string KafkaKey string KafkaValue *string KafkaHeaders KafkaHeaders LeaderID *uuid.UUID } // String is a convenience function that returns a pointer to the given str argument, for use with setting OutboxRecord.Value. func String(str string) *string { return &str } // String provides a textual representation of an OutboxRecord. func (rec OutboxRecord) String() string { return fmt.Sprint("OutboxRecord[ID=", rec.ID, ", CreateTime=", rec.CreateTime, ", KafkaTopic=", rec.KafkaTopic, ", KafkaKey=", rec.KafkaKey, ", KafkaValue=", rec.KafkaValue, ", KafkaHeaders=", rec.KafkaHeaders, ", LeaderID=", rec.LeaderID, "]") } // DatabaseBinding is an abstraction over the data access layer, allowing goharvest to use arbitrary database implementations. type DatabaseBinding interface { Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error) Purge(id int64) (bool, error) Reset(id int64) (bool, error) Dispose() } // DatabaseBindingProvider is a factory for creating instances of a DatabaseBinding. type DatabaseBindingProvider func(dataSource string, outboxTable string) (DatabaseBinding, error) ================================================ FILE: db_mock_test.go ================================================ package goharvest import ( "github.com/google/uuid" "github.com/obsidiandynamics/libstdgo/concurrent" ) type dbMockFuncs struct { Mark func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) Purge func(m *dbMock, id int64) (bool, error) Reset func(m *dbMock, id int64) (bool, error) Dispose func(m *dbMock) } type dbMockCounts struct { Mark, Purge, Reset, Dispose concurrent.AtomicCounter } type dbMock struct { markedRecords chan []OutboxRecord f dbMockFuncs c dbMockCounts } func (m *dbMock) Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { defer m.c.Mark.Inc() return m.f.Mark(m, leaderID, limit) } func (m *dbMock) Purge(id int64) (bool, error) { defer m.c.Purge.Inc() return m.f.Purge(m, id) } func (m *dbMock) Reset(id int64) (bool, error) { defer m.c.Reset.Inc() return m.f.Reset(m, id) } func (m *dbMock) Dispose() { defer m.c.Dispose.Inc() m.f.Dispose(m) } func (m *dbMock) fillDefaults() { if m.markedRecords == nil { m.markedRecords = make(chan []OutboxRecord) } if m.f.Mark == nil { m.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { select { case records := <-m.markedRecords: return records, nil default: return []OutboxRecord{}, nil } } } if m.f.Purge == nil { m.f.Purge = func(m *dbMock, id int64) (bool, error) { return true, nil } } if m.f.Reset == nil { m.f.Reset = func(m *dbMock, id int64) (bool, error) { return true, nil } } if m.f.Dispose == nil { m.f.Dispose = func(m *dbMock) {} } m.c.Mark = concurrent.NewAtomicCounter() m.c.Purge = concurrent.NewAtomicCounter() m.c.Reset = concurrent.NewAtomicCounter() m.c.Dispose = concurrent.NewAtomicCounter() } func mockDatabaseBindingProvider(m *dbMock) func(string, string) (DatabaseBinding, error) { return func(dataSource string, table string) (DatabaseBinding, error) { return m, nil } } ================================================ FILE: event.go ================================================ package goharvest import ( "fmt" "github.com/google/uuid" "github.com/obsidiandynamics/goharvest/metric" ) // EventHandler is a callback function for handling GoHarvest events. type EventHandler func(e Event) // Event encapsulates a GoHarvest event. type Event interface { fmt.Stringer } // LeaderAcquired is emitted upon successful acquisition of leader status. type LeaderAcquired struct { leaderID uuid.UUID } // String obtains a textual representation of the LeaderAcquired event. func (e LeaderAcquired) String() string { return fmt.Sprint("LeaderAcquired[leaderID=", e.leaderID, "]") } // LeaderID returns the local UUID of the elected leader. func (e LeaderAcquired) LeaderID() uuid.UUID { return e.leaderID } // LeaderRefreshed is emitted when a new leader ID is generated as a result of a remarking request. type LeaderRefreshed struct { leaderID uuid.UUID } // String obtains a textual representation of the LeaderRefreshed event. func (e LeaderRefreshed) String() string { return fmt.Sprint("LeaderRefreshed[leaderID=", e.leaderID, "]") } // LeaderID returns the local UUID of the elected leader. func (e LeaderRefreshed) LeaderID() uuid.UUID { return e.leaderID } // LeaderRevoked is emitted when the leader status has been revoked. type LeaderRevoked struct{} // String obtains a textual representation of the LeaderRevoked event. func (e LeaderRevoked) String() string { return fmt.Sprint("LeaderRevoked[]") } // LeaderFenced is emitted when the leader status has been revoked. type LeaderFenced struct{} // String obtains a textual representation of the LeaderFenced event. func (e LeaderFenced) String() string { return fmt.Sprint("LeaderFenced[]") } // MeterRead is emitted when the internal throughput Meter has been read. type MeterRead struct { stats metric.MeterStats } // String obtains a textual representation of the MeterRead event. func (e MeterRead) String() string { return fmt.Sprint("MeterRead[stats=", e.stats, "]") } // Stats embedded in the MeterRead event. func (e MeterRead) Stats() metric.MeterStats { return e.stats } ================================================ FILE: event_test.go ================================================ package goharvest import ( "testing" "github.com/google/uuid" "github.com/obsidiandynamics/goharvest/metric" "github.com/stretchr/testify/assert" ) func TestLeaderAcquired_string(t *testing.T) { leaderID, _ := uuid.NewRandom() assert.Contains(t, LeaderAcquired{leaderID}.String(), "LeaderAcquired[") assert.Contains(t, LeaderAcquired{leaderID}.String(), leaderID.String()) } func TestLeaderAcquired_getter(t *testing.T) { leaderID, _ := uuid.NewRandom() e := LeaderAcquired{leaderID} assert.Equal(t, leaderID, e.LeaderID()) } func TestLeaderRefreshed_string(t *testing.T) { leaderID, _ := uuid.NewRandom() assert.Contains(t, LeaderRefreshed{leaderID}.String(), "LeaderRefreshed[") assert.Contains(t, LeaderRefreshed{leaderID}.String(), leaderID.String()) } func TestLeaderRefreshed_getter(t *testing.T) { leaderID, _ := uuid.NewRandom() e := LeaderRefreshed{leaderID} assert.Equal(t, leaderID, e.LeaderID()) } func TestLeaderRevoked_string(t *testing.T) { assert.Equal(t, "LeaderRevoked[]", LeaderRevoked{}.String()) } func TestLeaderFenced_string(t *testing.T) { assert.Equal(t, "LeaderFenced[]", LeaderFenced{}.String()) } func TestMeterStats_string(t *testing.T) { stats := metric.MeterStats{} assert.Contains(t, MeterRead{stats}.String(), "MeterRead[") assert.Contains(t, MeterRead{stats}.String(), stats.String()) } ================================================ FILE: examples/reaper.yaml ================================================ harvest: baseKafkaConfig: bootstrap.servers: localhost:9092 producerKafkaConfig: compression.type: lz4 delivery.timeout.ms: 10000 leaderTopic: my-app-name leaderGroupID: my-app-name dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable outboxTable: outbox limits: minPollInterval: 1s heartbeatTimeout: 5s maxInFlightRecords: 1000 minMetricsInterval: 5s sendConcurrency: 4 sendBuffer: 10 logging: level: Debug ================================================ FILE: examples/reaper_secure.yaml ================================================ harvest: baseKafkaConfig: bootstrap.servers: localhost:9094 security.protocol: sasl_ssl ssl.ca.location: ca-cert.pem sasl.mechanism: SCRAM-SHA-512 sasl.username: alice sasl.password: alice-secret leaderTopic: __consumer_offsets leaderGroupID: my-app-name dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable outboxTable: outbox logging: level: Debug ================================================ FILE: go.mod ================================================ module github.com/obsidiandynamics/goharvest go 1.14 require ( github.com/DATA-DOG/go-sqlmock v1.4.1 github.com/confluentinc/confluent-kafka-go v1.5.2 // indirect github.com/go-ozzo/ozzo-validation v3.6.0+incompatible github.com/google/go-cmp v0.4.0 github.com/google/uuid v1.1.1 github.com/lib/pq v1.5.1 github.com/obsidiandynamics/goneli v0.4.3 github.com/obsidiandynamics/libstdgo v0.4.1 github.com/sirupsen/logrus v1.5.0 github.com/stretchr/testify v1.5.1 golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f // indirect gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2 gopkg.in/yaml.v2 v2.2.8 ) ================================================ FILE: go.sum ================================================ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM= github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496 h1:zV3ejI06GQ59hwDQAvmK1qxOQGB3WuVTRoY0okPTAv0= github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= github.com/cihub/seelog v0.0.0-20170130134532-f561c5e57575 h1:kHaBemcxl8o/pQ5VM1c8PVE1PubbNx3mjUr09OqWGCs= github.com/cihub/seelog v0.0.0-20170130134532-f561c5e57575/go.mod h1:9d6lWj8KzO/fd/NrVaLscBKmPigpZpn5YawRPw+e3Yo= github.com/confluentinc/confluent-kafka-go v1.5.2 h1:l+qt+a0Okmq0Bdr1P55IX4fiwFJyg0lZQmfHkAFkv7E= github.com/confluentinc/confluent-kafka-go v1.5.2/go.mod h1:u2zNLny2xq+5rWeTQjFHbDzzNuba4P1vo31r9r4uAdg= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-ozzo/ozzo-validation v3.6.0+incompatible h1:msy24VGS42fKO9K1vLz82/GeYW1cILu7Nuuj1N3BBkE= github.com/go-ozzo/ozzo-validation v3.6.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/inconshreveable/log15 v0.0.0-20200109203555-b30bc20e4fd1 h1:KUDFlmBg2buRWNzIcwLlKvfcnujcHQRQ1As1LoaCLAM= github.com/inconshreveable/log15 v0.0.0-20200109203555-b30bc20e4fd1/go.mod h1:cOaXtrgN4ScfRrD9Bre7U1thNq5RtJ8ZoP4iXVGRj6o= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v1.5.1 h1:Jn6HYxiYrtQ92CopqJLvfPCJUrrruw1+1cn0jM9dKrI= github.com/lib/pq v1.5.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE= github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/obsidiandynamics/goneli v0.4.3 h1:lf3x/qSgEX9S6+Ak5GPcc3TBUQBhPJeiWvGrCykZcbM= github.com/obsidiandynamics/goneli v0.4.3/go.mod h1:1i3mTL/PaaDKu6f+hlndeRUCbV8uiDxu+203vBpn6oE= github.com/obsidiandynamics/libstdgo v0.4.1 h1:ZUnz+72xQSMgAjEqxp7i7NOBZlu6AcAE6ppmvVKxK3M= github.com/obsidiandynamics/libstdgo v0.4.1/go.mod h1:0gKiFsJhfrlCqbWFNhDDUJgj6XbXWZyrl0JS/C+jU5g= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q= github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.14.1 h1:nYDKopTbvAPq/NrUVZwT15y2lpROBiLLyoRTbXOYWOo= go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY= golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8= golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5 h1:hKsoRgsbwY1NafxrwTs+k64bikrLBkAgPir1TNCj3Zs= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200417140056-c07e33ef3290/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/confluentinc/confluent-kafka-go.v1 v1.4.0 h1:70Hht0HKadDe6GpSgstEtYrDMtHo3ZqK+3KeHepusaw= gopkg.in/confluentinc/confluent-kafka-go.v1 v1.4.0/go.mod h1:ZdI3yfYmdNSLQPNCpO1y00EHyWaHG5EnQEyL/ntAegY= gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2 h1:g0WBLy6fobNUU8W/e9zx6I0Yl79Ya+BDW1NwzAlTiiQ= gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2/go.mod h1:ZdI3yfYmdNSLQPNCpO1y00EHyWaHG5EnQEyL/ntAegY= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= ================================================ FILE: goharvest_doc_test.go ================================================ package goharvest import ( "database/sql" "log" "testing" "github.com/obsidiandynamics/libstdgo/check" "github.com/obsidiandynamics/libstdgo/scribe" scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus" "github.com/sirupsen/logrus" ) func Example() { const dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable" // Optional: Ensure the database table exists before we start harvesting. func() { db, err := sql.Open("postgres", dataSource) if err != nil { panic(err) } defer db.Close() _, err = db.Exec(` CREATE TABLE IF NOT EXISTS outbox ( id BIGSERIAL PRIMARY KEY, create_time TIMESTAMP WITH TIME ZONE NOT NULL, kafka_topic VARCHAR(249) NOT NULL, kafka_key VARCHAR(100) NOT NULL, -- pick your own key size kafka_value VARCHAR(10000), -- pick your own value size kafka_header_keys TEXT[] NOT NULL, kafka_header_values TEXT[] NOT NULL, leader_id UUID ) `) if err != nil { panic(err) } }() // Configure the harvester. It will use its own database and Kafka connections under the hood. config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, DataSource: dataSource, } // Create a new harvester. harvest, err := New(config) if err != nil { panic(err) } // Start it. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for it to end. log.Fatal(harvest.Await()) } func TestExample(t *testing.T) { check.RunTargetted(t, Example) } func Example_withCustomLogger() { // Example: Configure GoHarvest with a Logrus binding for Scribe. log := logrus.StandardLogger() log.SetLevel(logrus.DebugLevel) // Configure the custom logger using a binding. config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, Scribe: scribe.New(scribelogrus.Bind()), DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", } // Create a new harvester. harvest, err := New(config) if err != nil { panic(err) } // Start it. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for it to end. log.Fatal(harvest.Await()) } func TestExample_withCustomLogger(t *testing.T) { check.RunTargetted(t, Example_withCustomLogger) } func Example_withSaslSslAndCustomProducerConfig() { // Example: Using Kafka with sasl_ssl for authentication and encryption. config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9094", "security.protocol": "sasl_ssl", "ssl.ca.location": "ca-cert.pem", "sasl.mechanism": "SCRAM-SHA-512", "sasl.username": "alice", "sasl.password": "alice-secret", }, ProducerKafkaConfig: KafkaConfigMap{ "compression.type": "lz4", }, DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", } // Create a new harvester. harvest, err := New(config) if err != nil { panic(err) } // Start harvesting in the background. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for the harvester to end. log.Fatal(harvest.Await()) } func TestExample_withSaslSslAndCustomProducerConfig(t *testing.T) { check.RunTargetted(t, Example_withSaslSslAndCustomProducerConfig) } func Example_withEventHandler() { // Example: Registering a custom event handler to get notified of leadership changes and metrics. log := logrus.StandardLogger() log.SetLevel(logrus.TraceLevel) config := Config{ BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": "localhost:9092", }, DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", Scribe: scribe.New(scribelogrus.Bind()), } // Create a new harvester and register an event hander. harvest, err := New(config) if err != nil { panic(err) } // Register a handler callback, invoked when an event occurs within goharvest. // The callback is completely optional; it lets the application piggy-back on leader // status updates, in case it needs to schedule some additional work (other than // harvesting outbox records) that should only be run on one process at any given time. harvest.SetEventHandler(func(e Event) { switch event := e.(type) { case LeaderAcquired: // The application may initialise any state necessary to perform work as a leader. log.Infof("Got event: leader acquired: %v", event.LeaderID()) case LeaderRefreshed: // Indicates that a new leader ID was generated, as a result of having to remark // a record (typically as due to an earlier delivery error). This is purely // informational; there is nothing an application should do about this, other // than taking note of the new leader ID if it has come to rely on it. log.Infof("Got event: leader refreshed: %v", event.LeaderID()) case LeaderRevoked: // The application may block the callback until it wraps up any in-flight // activity. Only upon returning from the callback, will a new leader be elected. log.Infof("Got event: leader revoked") case LeaderFenced: // The application must immediately terminate any ongoing activity, on the assumption // that another leader may be imminently elected. Unlike the handling of LeaderRevoked, // blocking in the callback will not prevent a new leader from being elected. log.Infof("Got event: leader fenced") case MeterRead: // Periodic statistics regarding the harvester's throughput. log.Infof("Got event: meter read: %v", event.Stats()) } }) // Start harvesting in the background. err = harvest.Start() if err != nil { panic(err) } // Wait indefinitely for it to end. log.Fatal(harvest.Await()) } func TestExample_withEventHandler(t *testing.T) { check.RunTargetted(t, Example_withEventHandler) } ================================================ FILE: harvest.go ================================================ package goharvest import ( "fmt" "runtime/debug" "sync" "sync/atomic" "time" "github.com/google/uuid" "github.com/obsidiandynamics/goharvest/metric" "github.com/obsidiandynamics/goneli" "github.com/obsidiandynamics/libstdgo/concurrent" "github.com/obsidiandynamics/libstdgo/diags" "github.com/obsidiandynamics/libstdgo/scribe" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" _ "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka/librdkafka" ) var noLeader uuid.UUID // State of the Harvest instance. type State int const ( // Created — initialised (configured) but not started. Created State = iota // Running — currently running. Running // Stopping — in the process of being stopped. I.e. Stop() has been invoked, but workers are still running. Stopping // Stopped — has been completely disposed of. Stopped ) type tracedPanic struct { cause interface{} stack string } func (e tracedPanic) Error() string { return fmt.Sprintf("%v\n%s", e.cause, e.stack) } // Harvest performs background harvesting of a transactional outbox table. type Harvest interface { Start() error Stop() Await() error State() State IsLeader() bool LeaderID() *uuid.UUID InFlightRecords() int InFlightRecordKeys() []string SetEventHandler(eventHandler EventHandler) } const watcherTimeout = 60 * time.Second type harvest struct { config Config producerConfigs KafkaConfigMap scribe scribe.Scribe state concurrent.AtomicReference shouldBeRunningFlag concurrent.AtomicCounter neli goneli.Neli leaderID atomic.Value db DatabaseBinding queuedRecords concurrent.AtomicCounter inFlightRecords concurrent.AtomicCounter inFlightKeys concurrent.Scoreboard throughput *metric.Meter throughputLock sync.Mutex panicCause atomic.Value eventHandler EventHandler forceRemarkFlag concurrent.AtomicCounter sendBattery battery } // New creates a new Harvest instance from the supplied config. func New(config Config) (Harvest, error) { config.SetDefaults() if err := config.Validate(); err != nil { return nil, err } h := &harvest{ config: config, scribe: config.Scribe, state: concurrent.NewAtomicReference(Created), shouldBeRunningFlag: concurrent.NewAtomicCounter(1), queuedRecords: concurrent.NewAtomicCounter(), inFlightRecords: concurrent.NewAtomicCounter(), inFlightKeys: concurrent.NewScoreboard(*config.Limits.SendConcurrency), forceRemarkFlag: concurrent.NewAtomicCounter(), eventHandler: func(e Event) {}, } h.leaderID.Store(noLeader) h.producerConfigs = copyKafkaConfig(h.config.BaseKafkaConfig) putAllKafkaConfig(h.config.ProducerKafkaConfig, h.producerConfigs) err := setKafkaConfigs(h.producerConfigs, KafkaConfigMap{ "enable.idempotence": true, }) if err != nil { return nil, err } return h, nil } // State obtains the present state of this Harvest instance. func (h *harvest) State() State { return h.state.Get().(State) } func (h *harvest) logger() scribe.StdLogAPI { return h.scribe.Capture(h.scene()) } func (h *harvest) scene() scribe.Scene { return scribe.Scene{Fields: scribe.Fields{ "name": h.config.Name, "lib": "goharvest", }} } func (h *harvest) cleanupFailedStart() { if h.State() != Created { return } if h.db != nil { h.db.Dispose() } } // Start the harvester. func (h *harvest) Start() error { ensureState(h.State() == Created, "Cannot start at this time") defer h.cleanupFailedStart() db, err := h.config.DatabaseBindingProvider(h.config.DataSource, h.config.OutboxTable) if err != nil { return err } h.db = db neliConfig := goneli.Config{ KafkaConfig: configToNeli(h.config.BaseKafkaConfig), LeaderTopic: h.config.LeaderTopic, LeaderGroupID: h.config.LeaderGroupID, KafkaConsumerProvider: convertKafkaConsumerProvider(h.config.KafkaConsumerProvider), KafkaProducerProvider: convertKafkaProducerProvider(h.config.KafkaProducerProvider), Scribe: h.config.Scribe, Name: h.config.Name, PollDuration: h.config.Limits.PollDuration, MinPollInterval: h.config.Limits.MinPollInterval, HeartbeatTimeout: h.config.Limits.HeartbeatTimeout, } h.logger().T()("Creating NELI with config %v", neliConfig) n, err := h.config.NeliProvider(neliConfig, func(e goneli.Event) { switch e.(type) { case goneli.LeaderAcquired: h.onAcquired() case goneli.LeaderRevoked: h.onRevoked() case goneli.LeaderFenced: h.onFenced() } }) if err != nil { return err } h.neli = n h.throughput = metric.NewMeter("throughput", *h.config.Limits.MinMetricsInterval) h.state.Set(Running) go backgroundPoller(h) return nil } // IsLeader returns true if the current Harvest is the leader among competing instances. func (h *harvest) IsLeader() bool { return h.LeaderID() != nil } // LeaderID returns the leader UUID of the current instance, if it is a leader at the time of this call. // Otherwise, a nil is returned. func (h *harvest) LeaderID() *uuid.UUID { if stored := h.leaderID.Load().(uuid.UUID); stored != noLeader { return &stored } return nil } // InFlightRecords returns the number of in-flight records; i.e. records that have been published on Kafka for which an // acknowledgement is still pending. func (h *harvest) InFlightRecords() int { return h.inFlightRecords.GetInt() } // InFlightRecordKeys returns the keys of records that are still in-flight. For any given key, there will be at most one // record pending acknowledgement. func (h *harvest) InFlightRecordKeys() []string { view := h.inFlightKeys.View() keys := make([]string, len(view)) i := 0 for k := range view { keys[i] = k i++ } return keys } // SetEventHandler assigns an optional event handler callback to be notified of changes in leader state as well as other // events of interest. // // This method must be invoked prior to Start(). func (h *harvest) SetEventHandler(eventHandler EventHandler) { ensureState(h.State() == Created, "Cannot set event handler at this time") h.eventHandler = eventHandler } func (h *harvest) shouldBeRunning() bool { return h.shouldBeRunningFlag.Get() == 1 } func (h *harvest) reportPanic(goroutineName string) { if r := recover(); r != nil { h.logger().E()("Caught panic in %s: %v", goroutineName, r) h.panicCause.Store(tracedPanic{r, string(debug.Stack())}) h.logger().E()(string(debug.Stack())) h.Stop() } } func ensureState(expected bool, format string, args ...interface{}) { if !expected { panic(fmt.Errorf("state assertion failed: "+format, args...)) } } func backgroundPoller(h *harvest) { h.logger().I()("Starting background poller") defer h.logger().I()("Stopped") defer h.state.Set(Stopped) defer h.reportPanic("background poller") defer h.db.Dispose() defer h.neli.Close() defer h.shutdownSendBattery() defer h.state.Set(Stopping) defer h.logger().I()("Stopping") for h.shouldBeRunning() { isLeader, err := h.neli.Pulse(1 * time.Millisecond) if err != nil { panic(err) } if isLeader { if h.forceRemarkFlag.Get() == 1 { h.logger().D()("Remark requested") h.shutdownSendBattery() h.refreshLeader() } if h.sendBattery == nil { inFlightRecordsValue := h.inFlightRecords.Get() ensureState(inFlightRecordsValue == 0, "inFlightRecords=%d", inFlightRecordsValue) inFlightKeysView := h.inFlightKeys.View() ensureState(len(inFlightKeysView) == 0, "inFlightKeys=%d", inFlightKeysView) h.spawnSendBattery() } onLeaderPoll(h) } } } func (h *harvest) spawnSendBattery() { ensureState(h.sendBattery == nil, "send battery not nil before spawn") h.logger().D()("Spawning send battery") h.sendBattery = newConcurrentBattery(*h.config.Limits.SendConcurrency, *h.config.Limits.SendBuffer, func(records chan OutboxRecord) { defer h.reportPanic("send cell") h.logger().T()("Creating Kafka producer with config %v", h.producerConfigs) prod, err := h.config.KafkaProducerProvider(&h.producerConfigs) if err != nil { panic(err) } deliveryHandlerDone := make(chan int) go backgroundDeliveryHandler(h, prod, deliveryHandlerDone) defer func() { <-deliveryHandlerDone }() defer func() { go func() { // A bug in confluent-kafka-go (#463) occasionally causes an indefinite syscall hang in Close(), after it closes // the Events channel. So we delegate this to a separate goroutine — better an orphaned goroutine than a // frozen harvester. (The rest of the battery will still unwind normally.) closeWatcher := h.watch("close producer") prod.Close() closeWatcher.End() }() }() var lastID *int64 for rec := range records { ensureState(lastID == nil || rec.ID >= *lastID, "discontinuity for key %s: ID %s, lastID: %v", rec.KafkaKey, rec.ID, lastID) lastID = &rec.ID m := &kafka.Message{ TopicPartition: kafka.TopicPartition{Topic: &rec.KafkaTopic, Partition: kafka.PartitionAny}, Key: []byte(rec.KafkaKey), Value: stringPointerToByteArray(rec.KafkaValue), Opaque: rec, Headers: toNativeKafkaHeaders(rec.KafkaHeaders), } h.inFlightRecords.Drain(int64(*h.config.Limits.MaxInFlightRecords-1), concurrent.Indefinitely) startTime := time.Now() for { if h.deadlineExceeded("poll", h.neli.Deadline().Elapsed(), *h.config.Limits.MaxPollInterval) { break } if h.deadlineExceeded("message queueing", time.Now().Sub(startTime), *h.config.Limits.QueueTimeout) { break } if remaining := h.inFlightKeys.Drain(rec.KafkaKey, 0, *h.config.Limits.DrainInterval); remaining <= 0 { ensureState(remaining == 0, "drain failed: %d remaining in-flight records for key %s", remaining, rec.KafkaKey) break } h.logger().D()("Drain stalled for record %d (key %s)", rec.ID, rec.KafkaKey) } if h.forceRemarkFlag.Get() == 1 { h.queuedRecords.Dec() continue } h.inFlightRecords.Inc() h.queuedRecords.Dec() h.inFlightKeys.Inc(rec.KafkaKey) err := prod.Produce(m, nil) if err != nil { h.logger().W()("Error publishing record %v: %v", rec, err) h.inFlightKeys.Dec(rec.KafkaKey) h.inFlightRecords.Dec() h.forceRemarkFlag.Set(1) } } }) } func stringPointerToByteArray(str *string) []byte { if str != nil { return []byte(*str) } return nil } func (h *harvest) shutdownSendBattery() { if h.sendBattery != nil { shutdownWatcher := h.watch("shutdown send battery") h.logger().D()("Shutting down send battery") // Expedite shutdown by raising the remark flag, forcing any queued records to be skipped. h.forceRemarkFlag.Set(1) // Take the battery down, waiting for all goroutines to complete. h.sendBattery.shutdown() h.sendBattery = nil // Reset flags and counters for next time. h.forceRemarkFlag.Set(0) h.inFlightRecords.Set(0) h.inFlightKeys.Clear() h.logger().D()("Send battery terminated") shutdownWatcher.End() } } func onLeaderPoll(h *harvest) { markBegin := time.Now() records, err := h.db.Mark(*h.LeaderID(), *h.config.Limits.MarkQueryRecords) if err != nil { h.logger().W()("Error executing mark query: %v", err) // When an error occurs during marking, we cannot just backoff and retry, as the error could have // occurred on the return leg (i.e. DB operation succeeded on the server, but timed out on the client). h.forceRemarkFlag.Set(1) time.Sleep(*h.config.Limits.IOErrorBackoff) return } if len(records) > 0 { sendBegin := time.Now() h.logger().T()("Leader poll: marked %d in the range %d-%d, took %v", len(records), records[0].ID, records[len(records)-1].ID, sendBegin.Sub(markBegin)) enqueueWatcher := h.watch("enqueue marked records") for _, rec := range records { h.queuedRecords.Inc() h.sendBattery.enqueue(rec) } enqueueWatcher.End() h.logger().T()("Send took %v", time.Now().Sub(sendBegin)) } else { time.Sleep(*h.config.Limits.MarkBackoff) } } func (h *harvest) watch(operation string) *diags.Watcher { return diags.Watch(operation, watcherTimeout, diags.Print(h.logger().W())) } func (h *harvest) refreshLeader() { newLeaderID, _ := uuid.NewRandom() h.leaderID.Store(newLeaderID) h.logger().W()("Refreshed leader ID: %v", newLeaderID) h.eventHandler(LeaderRefreshed{newLeaderID}) } func (h *harvest) deadlineExceeded(deadline string, elapsed time.Duration, threshold time.Duration) bool { if excess := elapsed - threshold; excess > 0 { if h.forceRemarkFlag.CompareAndSwap(0, 1) { h.logger().W()("Exceeded %s deadline by %v", deadline, excess) } return true } return false } func backgroundDeliveryHandler(h *harvest, prod KafkaProducer, done chan int) { h.logger().I()("Starting background delivery handler") defer h.reportPanic("background delivery handler") defer close(done) for e := range prod.Events() { switch ev := e.(type) { case *kafka.Message: rec := ev.Opaque.(OutboxRecord) if ev.TopicPartition.Error != nil { onFailedDelivery(h, rec, ev.TopicPartition.Error) } else { onSuccessfulDelivery(h, rec) h.updateStats() } default: h.logger().I()("Observed event: %v (%T)", e, e) } } } func (h *harvest) updateStats() { h.throughputLock.Lock() defer h.throughputLock.Unlock() h.throughput.MaybeStatsCall(func(stats metric.MeterStats) { h.logger().D()("%v", stats) h.eventHandler(MeterRead{stats}) }) h.throughput.Add(1) } func onSuccessfulDelivery(h *harvest, rec OutboxRecord) { for { done, err := h.db.Purge(rec.ID) if err == nil { if !done { h.logger().W()("Did not purge record %v", rec) } break } h.logger().W()("Error executing purge query for record %v: %v", rec, err) time.Sleep(*h.config.Limits.IOErrorBackoff) } h.inFlightKeys.Dec(rec.KafkaKey) h.inFlightRecords.Dec() } func onFailedDelivery(h *harvest, rec OutboxRecord, err error) { h.logger().W()("Delivery failed for %v, err: %v", rec, err) for { done, err := h.db.Reset(rec.ID) if err == nil { if !done { h.logger().W()("Did not reset record %v", rec) } else { h.forceRemarkFlag.Set(1) } break } h.logger().W()("Error executing reset query for record %v: %v", rec, err) time.Sleep(*h.config.Limits.IOErrorBackoff) } h.inFlightKeys.Dec(rec.KafkaKey) h.inFlightRecords.Dec() } func (h *harvest) onAcquired() { newLeaderID, _ := uuid.NewRandom() h.leaderID.Store(newLeaderID) h.logger().I()("Elected as leader, ID: %v", newLeaderID) h.eventHandler(LeaderAcquired{newLeaderID}) } func (h *harvest) onRevoked() { h.logger().I()("Lost leader status") h.cleanupLeaderState() h.eventHandler(LeaderRevoked{}) } func (h *harvest) onFenced() { h.logger().W()("Leader fenced") h.cleanupLeaderState() h.eventHandler(LeaderFenced{}) } func (h *harvest) cleanupLeaderState() { h.shutdownSendBattery() h.leaderID.Store(noLeader) } // Stop the harvester, returning immediately. // // This method does not wait until the underlying Goroutines have been terminated // and all resources have been disposed off properly. This is accomplished by calling Await() func (h *harvest) Stop() { h.shouldBeRunningFlag.Set(0) } // Await the termination of this Harvest instance. // // This method blocks indefinitely, returning only when this instance has completed an orderly shutdown. I.e. // when all Goroutines have returned and all resources have been disposed of. func (h *harvest) Await() error { h.state.Await(concurrent.RefEqual(Stopped), concurrent.Indefinitely) panicCause := h.panicCause.Load() if panicCause != nil { return panicCause.(tracedPanic) } return nil } ================================================ FILE: harvest_test.go ================================================ package goharvest import ( "fmt" "math" "strconv" "sync" "testing" "time" "github.com/google/uuid" "github.com/obsidiandynamics/goneli" "github.com/obsidiandynamics/libstdgo/check" "github.com/obsidiandynamics/libstdgo/concurrent" "github.com/obsidiandynamics/libstdgo/scribe" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" ) func wait(t check.Tester) check.Timesert { return check.Wait(t, 10*time.Second) } // Aggressive limits used for (fast) testing and without send concurrency to simplify assertions. func testLimits() Limits { return Limits{ IOErrorBackoff: Duration(1 * time.Millisecond), PollDuration: Duration(1 * time.Millisecond), MinPollInterval: Duration(1 * time.Millisecond), MaxPollInterval: Duration(60 * time.Second), HeartbeatTimeout: Duration(60 * time.Second), DrainInterval: Duration(60 * time.Second), QueueTimeout: Duration(60 * time.Second), MarkBackoff: Duration(1 * time.Millisecond), MaxInFlightRecords: Int(math.MaxInt64), SendConcurrency: Int(1), SendBuffer: Int(0), } } type fixtures struct { producerMockSetup producerMockSetup } func (f *fixtures) setDefaults() { if f.producerMockSetup == nil { f.producerMockSetup = func(prodMock *prodMock) {} } } type producerMockSetup func(prodMock *prodMock) func (f fixtures) create() (scribe.MockScribe, *dbMock, *goneli.MockNeli, Config) { f.setDefaults() m := scribe.NewMock() db := &dbMock{} db.fillDefaults() var neli goneli.MockNeli config := Config{ Limits: testLimits(), Scribe: scribe.New(m.Factories()), DatabaseBindingProvider: mockDatabaseBindingProvider(db), NeliProvider: func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) { n, err := goneli.NewMock(goneli.MockConfig{ MinPollInterval: config.MinPollInterval, }, barrier) if err != nil { panic(err) } neli = n return n, nil }, KafkaProducerProvider: func(conf *KafkaConfigMap) (KafkaProducer, error) { prod := &prodMock{} prod.fillDefaults() f.producerMockSetup(prod) return prod, nil }, } config.Scribe.SetEnabled(scribe.All) return m, db, &neli, config } type testEventHandler struct { mutex sync.Mutex events []Event } func (c *testEventHandler) handler() EventHandler { return func(e Event) { c.mutex.Lock() defer c.mutex.Unlock() c.events = append(c.events, e) } } func (c *testEventHandler) list() []Event { c.mutex.Lock() defer c.mutex.Unlock() eventsCopy := make([]Event, len(c.events)) copy(eventsCopy, c.events) return eventsCopy } func (c *testEventHandler) length() int { c.mutex.Lock() defer c.mutex.Unlock() return len(c.events) } func TestCorrectInitialisation(t *testing.T) { _, db, neli, config := fixtures{}.create() var givenDataSource string var givenOutboxTable string config.DatabaseBindingProvider = func(dataSource string, outboxTable string) (DatabaseBinding, error) { givenDataSource = dataSource givenOutboxTable = outboxTable return db, nil } config.DataSource = "test data source" config.OutboxTable = "test table name" config.LeaderGroupID = "test leader group ID" config.BaseKafkaConfig = KafkaConfigMap{ "bootstrap.servers": "localhost:9092", } h, err := New(config) require.Nil(t, err) assert.Equal(t, Created, h.State()) assertNoError(t, h.Start) assert.Equal(t, Running, h.State()) assert.Equal(t, config.DataSource, givenDataSource) assert.Equal(t, config.OutboxTable, givenOutboxTable) h.Stop() assert.Nil(t, h.Await()) assert.Equal(t, Stopped, h.State()) assert.Equal(t, 1, db.c.Dispose.GetInt()) assert.Equal(t, goneli.Closed, (*neli).State()) } func TestConfigError(t *testing.T) { h, err := New(Config{ Limits: Limits{ IOErrorBackoff: Duration(-1), }, }) assert.Nil(t, h) assert.NotNil(t, err) } func TestErrorDuringDBInitialisation(t *testing.T) { _, _, _, config := fixtures{}.create() config.DatabaseBindingProvider = func(dataSource string, outboxTable string) (DatabaseBinding, error) { return nil, check.ErrSimulated } h, err := New(config) require.Nil(t, err) assertErrorContaining(t, h.Start, "simulated") assert.Equal(t, Created, h.State()) } func TestErrorDuringNeliInitialisation(t *testing.T) { _, db, _, config := fixtures{}.create() config.NeliProvider = func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) { return nil, check.ErrSimulated } h, err := New(config) require.Nil(t, err) assertErrorContaining(t, h.Start, "simulated") assert.Equal(t, Created, h.State()) assert.Equal(t, 1, db.c.Dispose.GetInt()) } func TestErrorDuringProducerConfiguration(t *testing.T) { _, _, _, config := fixtures{}.create() config.ProducerKafkaConfig = KafkaConfigMap{ "enable.idempotence": false, } h, err := New(config) require.NotNil(t, err) assert.Contains(t, err.Error(), "cannot override configuration 'enable.idempotence'") assert.Nil(t, h) } func TestErrorDuringProducerInitialisation(t *testing.T) { m, db, neli, config := fixtures{}.create() config.KafkaProducerProvider = func(conf *KafkaConfigMap) (KafkaProducer, error) { return nil, check.ErrSimulated } h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Induce leadership and wait until leader. (*neli).AcquireLeader() wait(t).Until(h.IsLeader) wait(t).UntilAsserted(func(t check.Tester) { assert.Equal(t, 1, eh.length()) }) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Error)). Having(scribe.MessageEqual("Caught panic in send cell: simulated")). Passes(scribe.Count(1))) // Having detected a panic, it should self-destruct assertErrorContaining(t, h.Await, "simulated") assert.Equal(t, 1, db.c.Dispose.GetInt()) assert.Equal(t, (*neli).State(), goneli.Closed) } func TestUncaughtPanic_backgroundPoller(t *testing.T) { m, _, neli, config := fixtures{}.create() h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) (*neli).PulseError(check.ErrSimulated) // Having detected a panic, it should self-destruct assertErrorContaining(t, h.Await, "simulated") assert.Equal(t, 0, eh.length()) t.Log(m.Entries().List()) m.Entries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageEqual("Starting background poller")). Assert(t, scribe.Count(1)) m.Entries(). Having(scribe.LogLevel(scribe.Error)). Having(scribe.MessageEqual("Caught panic in background poller: simulated")). Assert(t, scribe.Count(1)) } func TestUncaughtPanic_backgroundDeliveryHandler(t *testing.T) { prodRef := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) { prodRef.Set(prodMock) }}.create() db.f.Reset = func(m *dbMock, id int64) (bool, error) { panic(check.ErrSimulated) } h, err := New(config) require.Nil(t, err) assertNoError(t, h.Start) // Induce leadership and await (*neli).AcquireLeader() wait(t).Until(h.IsLeader) // Feed a delivery event to cause a DB reset query wait(t).UntilAsserted(isNotNil(prodRef.Get)) prodRef.Get().(*prodMock).events <- message(OutboxRecord{ID: 777}, check.ErrSimulated) // Having detected a panic, it should self-destruct assertErrorContaining(t, h.Await, "simulated") t.Log(m.Entries().List()) m.Entries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageEqual("Starting background delivery handler")). Assert(t, scribe.Count(1)) m.Entries(). Having(scribe.LogLevel(scribe.Error)). Having(scribe.MessageEqual("Caught panic in background delivery handler: simulated")). Assert(t, scribe.Count(1)) } func TestBasicLeaderElectionAndRevocation(t *testing.T) { m, _, neli, config := fixtures{}.create() h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Starts off in a non-leader state assert.Equal(t, false, h.IsLeader()) assert.Nil(t, h.LeaderID()) // Assign leadership via the rebalance listener and wait for the assignment to take effect (*neli).AcquireLeader() wait(t).UntilAsserted(isTrue(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageEqual(fmt.Sprintf("Elected as leader, ID: %s", h.LeaderID()))). Passes(scribe.Count(1))) m.Reset() wait(t).UntilAsserted(func(t check.Tester) { if assert.Equal(t, 1, eh.length()) { e := eh.list()[0].(LeaderAcquired) assert.Equal(t, e.LeaderID(), *(h.LeaderID())) } }) // Revoke leadership via the rebalance listener and await its effect (*neli).RevokeLeader() wait(t).UntilAsserted(isFalse(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageEqual("Lost leader status")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) m.Reset() wait(t).UntilAsserted(func(t check.Tester) { if assert.Equal(t, 2, eh.length()) { _ = eh.list()[1].(LeaderRevoked) } }) // Reassign leadership via the rebalance listener and wait for the assignment to take effect (*neli).AcquireLeader() wait(t).UntilAsserted(isTrue(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageEqual(fmt.Sprintf("Elected as leader, ID: %s", h.LeaderID()))). Passes(scribe.Count(1))) m.Reset() wait(t).UntilAsserted(func(t check.Tester) { if assert.Equal(t, 3, eh.length()) { e := eh.list()[2].(LeaderAcquired) assert.Equal(t, e.LeaderID(), *(h.LeaderID())) } }) // Fence the leader (*neli).FenceLeader() wait(t).UntilAsserted(isFalse(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageEqual("Leader fenced")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) m.Reset() wait(t).UntilAsserted(func(t check.Tester) { if assert.Equal(t, 4, eh.length()) { _ = eh.list()[3].(LeaderFenced) } }) h.Stop() assert.Nil(t, h.Await()) } func TestMetrics(t *testing.T) { prodRef := concurrent.NewAtomicReference() m, _, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) { prodRef.Set(prodMock) }}.create() config.Limits.MinMetricsInterval = Duration(1 * time.Millisecond) h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Induce leadership and wait for the leadership event (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prodRef.Get)) wait(t).UntilAsserted(func(t check.Tester) { assert.Equal(t, 1, eh.length()) }) wait(t).UntilAsserted(func(t check.Tester) { backlogRecords := generateRecords(1, 0) deliverAll(backlogRecords, nil, prodRef.Get().(*prodMock).events) if assert.GreaterOrEqual(t, eh.length(), 2) { e := eh.list()[1].(MeterRead) if stats := e.Stats(); assert.NotNil(t, stats) { assert.Equal(t, stats.Name, "throughput") } } }) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageContaining("throughput")). Passes(scribe.CountAtLeast(1))) h.Stop() assert.Nil(t, h.Await()) } func TestHandleNonMessageEvent(t *testing.T) { prodRef := concurrent.NewAtomicReference() m, _, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) { prodRef.Set(prodMock) }}.create() config.Limits.MinMetricsInterval = Duration(1 * time.Millisecond) h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Induce leadership and wait for the leadership event (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod := prodRef.Get().(*prodMock) wait(t).UntilAsserted(func(t check.Tester) { assert.Equal(t, 1, eh.length()) }) prod.events <- kafka.NewError(kafka.ErrAllBrokersDown, "brokers down", false) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageContaining("Observed event: brokers down")). Passes(scribe.CountAtLeast(1))) h.Stop() assert.Nil(t, h.Await()) } func TestThrottleKeys(t *testing.T) { prod := concurrent.NewAtomicReference() lastPublished := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error { lastPublished.Set(msg) return nil } prod.Set(pm) }}.create() h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Starts off with no backlog. assert.Equal(t, 0, h.InFlightRecords()) // Induce leadership and wait until a producer has been spawned. (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prod.Get)) const backlog = 10 backlogRecords := generateCyclicKeyedRecords(1, backlog, 0) db.markedRecords <- backlogRecords // Even though we pushed several records through, they all had a common key, so only one should // should be published. wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) assert.True(t, h.IsLeader()) // should definitely be leader by now wait(t).UntilAsserted(intEqual(1, prod.Get().(*prodMock).c.Produce.GetInt)) msg := lastPublished.Get().(*kafka.Message) assert.Equal(t, msg.Value, []byte(*backlogRecords[0].KafkaValue)) assert.ElementsMatch(t, h.InFlightRecordKeys(), []string{backlogRecords[0].KafkaKey}) // Drain the in-flight record... another one should then be published. deliverAll(backlogRecords[0:1], nil, prod.Get().(*prodMock).events) wait(t).UntilAsserted(func(t check.Tester) { msg := lastPublished.Get() if assert.NotNil(t, msg) { assert.Equal(t, msg.(*kafka.Message).Value, []byte(*backlogRecords[1].KafkaValue)) } }) // Drain the backlog by feeding in delivery confirmations one at a time. for i := 1; i < backlog; i++ { wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) wait(t).UntilAsserted(func(t check.Tester) { msg := lastPublished.Get() if assert.NotNil(t, msg) { assert.Equal(t, []byte(*backlogRecords[i].KafkaValue), msg.(*kafka.Message).Value) } }) deliverAll(backlogRecords[i:i+1], nil, prod.Get().(*prodMock).events) } // Revoke leadership... (*neli).RevokeLeader() // Wait for the backlog to drain... leadership status will be cleared when done. wait(t).Until(check.Not(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageContaining("Lost leader status")). Passes(scribe.Count(1))) assert.Equal(t, backlog, db.c.Purge.GetInt()) assert.Equal(t, backlog, prod.Get().(*prodMock).c.Produce.GetInt()) assert.Equal(t, 0, h.InFlightRecords()) h.Stop() assert.Nil(t, h.Await()) } func TestPollDeadlineExceeded(t *testing.T) { m, db, neli, config := fixtures{}.create() config.Limits.DrainInterval = Duration(time.Millisecond) config.Limits.MaxPollInterval = Duration(time.Millisecond) h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Starts off with no backlog. assert.Equal(t, 0, h.InFlightRecords()) // Induce leadership and wait until a producer has been spawned. (*neli).AcquireLeader() db.markedRecords <- generateCyclicKeyedRecords(1, 2, 0) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Exceeded poll deadline")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) h.Stop() assert.Nil(t, h.Await()) } func TestQueueLimitExceeded(t *testing.T) { m, db, neli, config := fixtures{}.create() config.Limits.DrainInterval = Duration(time.Millisecond) config.Limits.QueueTimeout = Duration(time.Millisecond) h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Starts off with no backlog. assert.Equal(t, 0, h.InFlightRecords()) // Induce leadership and wait until a producer has been spawned. (*neli).AcquireLeader() db.markedRecords <- generateCyclicKeyedRecords(1, 2, 0) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Exceeded message queueing deadline")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) h.Stop() assert.Nil(t, h.Await()) } func TestDrainInFlightRecords_failedDelivery(t *testing.T) { prod := concurrent.NewAtomicReference() lastPublished := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error { lastPublished.Set(msg) return nil } prod.Set(pm) }}.create() h, err := New(config) require.Nil(t, err) assertNoError(t, h.Start) // Starts off with no backlog assert.Equal(t, 0, h.InFlightRecords()) // Induce leadership (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prod.Get)) // Generate a backlog const backlog = 10 backlogRecords := generateRecords(backlog, 0) db.markedRecords <- backlogRecords // Wait for the backlog to register. wait(t).UntilAsserted(intEqual(backlog, h.InFlightRecords)) wait(t).UntilAsserted(intEqual(backlog, prod.Get().(*prodMock).c.Produce.GetInt)) assert.True(t, h.IsLeader()) // should be leader by now // Revoke leadership... this will start the backlog drain. (*neli).RevokeLeader() wait(t).Until(check.Not(h.IsLeader)) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Shutting down send battery")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageEqual("Send battery terminated")). Passes(scribe.Count(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageContaining("Lost leader status")). Passes(scribe.Count(1))) assert.Equal(t, h.InFlightRecords(), 0) h.Stop() assert.Nil(t, h.Await()) } func TestErrorInMarkQuery(t *testing.T) { m, db, neli, config := fixtures{}.create() db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { return nil, check.ErrSimulated } h, err := New(config) require.Nil(t, err) assertNoError(t, h.Start) // Induce leadership (*neli).AcquireLeader() // Wait for the error to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Error executing mark query")). Passes(scribe.CountAtLeast(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Debug)). Having(scribe.MessageContaining("Remark requested")). Passes(scribe.CountAtLeast(1))) assert.Equal(t, Running, h.State()) h.Stop() assert.Nil(t, h.Await()) } func TestErrorInProduce(t *testing.T) { prodRef := concurrent.NewAtomicReference() produceError := concurrent.NewAtomicCounter(1) // 1=true, 0=false m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error { if produceError.Get() == 1 { return kafka.NewError(kafka.ErrFail, "simulated", false) } return nil } prodRef.Set(pm) }}.create() h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Induce leadership (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod := prodRef.Get().(*prodMock) prodRef.Set(nil) // Mark one record records := generateRecords(1, 0) db.markedRecords <- records // Wait for the error to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Error publishing record")). Passes(scribe.CountAtLeast(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Refreshed leader ID")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod = prodRef.Get().(*prodMock) // Resume normal production... error should clear but the record count should not go up, as // there can only be one in-flight record for a given key produceError.Set(0) db.markedRecords <- records wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) wait(t).UntilAsserted(func(t check.Tester) { assert.ElementsMatch(t, h.InFlightRecordKeys(), []string{records[0].KafkaKey}) }) if assert.GreaterOrEqual(t, eh.length(), 2) { _ = eh.list()[0].(LeaderAcquired) _ = eh.list()[1].(LeaderRefreshed) } // Feed successful delivery report for the first record prod.events <- message(records[0], nil) h.Stop() assert.Nil(t, h.Await()) } // Tests remarking by feeding through two records for the same key, forcing them to come through in sequence. // The first is published, but fails upon delivery, which raises the forceRemark flag. // As the second on is processed, the forceRemark flag raised by the first should be spotted, and a leader // refresh should occur. func TestReset(t *testing.T) { prodRef := concurrent.NewAtomicReference() lastPublished := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error { lastPublished.Set(msg) return nil } prodRef.Set(pm) }}.create() h, err := New(config) require.Nil(t, err) eh := &testEventHandler{} h.SetEventHandler(eh.handler()) assertNoError(t, h.Start) // Induce leadership (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod := prodRef.Get().(*prodMock) // Mark two records for the same key records := generateCyclicKeyedRecords(1, 2, 0) db.markedRecords <- records // Wait for the backlog to register wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) wait(t).UntilAsserted(func(t check.Tester) { if msg := lastPublished.Get(); assert.NotNil(t, msg) { assert.Equal(t, *records[0].KafkaValue, string(msg.(*kafka.Message).Value)) } }) // Feed an error prod.events <- message(records[0], check.ErrSimulated) // Wait for the error to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Delivery failed")). Passes(scribe.CountAtLeast(1))) wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Refreshed leader ID")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) wait(t).UntilAsserted(isNotNil(prodRef.Get)) h.Stop() assert.Nil(t, h.Await()) } func TestErrorInPurgeAndResetQueries(t *testing.T) { prodRef := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { prodRef.Set(pm) }}.create() records := generateRecords(2, 0) purgeError := concurrent.NewAtomicCounter(1) // 1=true, 0=false resetError := concurrent.NewAtomicCounter(1) // 1=true, 0=false db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { if db.c.Mark.Get() == 0 { return records, nil } return []OutboxRecord{}, nil } db.f.Purge = func(m *dbMock, id int64) (bool, error) { if purgeError.Get() == 1 { return false, check.ErrSimulated } return true, nil } db.f.Reset = func(m *dbMock, id int64) (bool, error) { if resetError.Get() == 1 { return false, check.ErrSimulated } return true, nil } h, err := New(config) require.Nil(t, err) assertNoError(t, h.Start) // Induce leadership and await its registration (*neli).AcquireLeader() wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod := prodRef.Get().(*prodMock) wait(t).UntilAsserted(isTrue(h.IsLeader)) wait(t).UntilAsserted(intEqual(2, h.InFlightRecords)) // Feed successful delivery report for the first record prod.events <- message(records[0], nil) // Wait for the error to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Error executing purge query for record")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) assert.Equal(t, 2, h.InFlightRecords()) // Resume normal production... error should clear purgeError.Set(0) wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) // Feed failed delivery report for the first record prodRef.Get().(*prodMock).events <- message(records[1], kafka.NewError(kafka.ErrFail, "simulated", false)) // Wait for the error to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Error executing reset query for record")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) assert.Equal(t, 1, h.InFlightRecords()) // Resume normal production... error should clear resetError.Set(0) wait(t).UntilAsserted(intEqual(0, h.InFlightRecords)) h.Stop() assert.Nil(t, h.Await()) } func TestIncompletePurgeAndResetQueries(t *testing.T) { prodRef := concurrent.NewAtomicReference() m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) { prodRef.Set(pm) }}.create() records := generateRecords(2, 0) db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { if db.c.Mark.Get() == 0 { return records, nil } return []OutboxRecord{}, nil } db.f.Purge = func(m *dbMock, id int64) (bool, error) { return false, nil } db.f.Reset = func(m *dbMock, id int64) (bool, error) { return false, nil } h, err := New(config) require.Nil(t, err) assertNoError(t, h.Start) // Induce leadership and await its registration (*neli).AcquireLeader() wait(t).UntilAsserted(isTrue(h.IsLeader)) wait(t).UntilAsserted(intEqual(2, h.InFlightRecords)) wait(t).UntilAsserted(isNotNil(prodRef.Get)) prod := prodRef.Get().(*prodMock) // Feed successful delivery report for the first record prod.events <- message(records[0], nil) // Wait for the warning to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Did not purge record")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) wait(t).UntilAsserted(intEqual(1, h.InFlightRecords)) // Feed failed delivery report for the first record prod.events <- message(records[1], kafka.NewError(kafka.ErrFail, "simulated", false)) // Wait for the warning to be logged wait(t).UntilAsserted(m.ContainsEntries(). Having(scribe.LogLevel(scribe.Warn)). Having(scribe.MessageContaining("Did not reset record")). Passes(scribe.CountAtLeast(1))) m.Reset() assert.Equal(t, Running, h.State()) wait(t).UntilAsserted(intEqual(0, h.InFlightRecords)) h.Stop() assert.Nil(t, h.Await()) } func TestEnsureState(t *testing.T) { check.ThatPanicsAsExpected(t, check.ErrorContaining("must not be false"), func() { ensureState(false, "must not be false") }) ensureState(true, "must not be false") } func intEqual(expected int, intSupplier func() int) func(t check.Tester) { return func(t check.Tester) { assert.Equal(t, expected, intSupplier()) } } func lengthEqual(expected int, sliceSupplier func() []string) func(t check.Tester) { return func(t check.Tester) { assert.Len(t, sliceSupplier(), expected) } } func atLeast(min int, f func() int) check.Assertion { return func(t check.Tester) { assert.GreaterOrEqual(t, f(), min) } } func isTrue(f func() bool) check.Assertion { return func(t check.Tester) { assert.True(t, f()) } } func isFalse(f func() bool) check.Assertion { return func(t check.Tester) { assert.False(t, f()) } } func isNotNil(f func() interface{}) check.Assertion { return func(t check.Tester) { assert.NotNil(t, f()) } } func assertErrorContaining(t *testing.T, f func() error, substr string) { err := f() if assert.NotNil(t, err) { assert.Contains(t, err.Error(), substr) } } func assertNoError(t *testing.T, f func() error) { err := f() require.Nil(t, err) } func newTimedOutError() kafka.Error { return kafka.NewError(kafka.ErrTimedOut, "Timed out", false) } func generatePartitions(indexes ...int32) []kafka.TopicPartition { parts := make([]kafka.TopicPartition, len(indexes)) for i, index := range indexes { parts[i] = kafka.TopicPartition{Partition: index} } return parts } func generateRecords(numRecords int, startID int) []OutboxRecord { records := make([]OutboxRecord, numRecords) now := time.Now() for i := 0; i < numRecords; i++ { records[i] = OutboxRecord{ ID: int64(startID + i), CreateTime: now, KafkaTopic: "test_topic", KafkaKey: fmt.Sprintf("key-%x", i), KafkaValue: String(fmt.Sprintf("value-%x", i)), KafkaHeaders: KafkaHeaders{ KafkaHeader{Key: "ID", Value: strconv.FormatInt(int64(startID+i), 10)}, }, } } return records } func generateCyclicKeyedRecords(numKeys int, numRecords int, startID int) []OutboxRecord { records := make([]OutboxRecord, numRecords) now := time.Now() for i := 0; i < numRecords; i++ { records[i] = OutboxRecord{ ID: int64(startID + i), CreateTime: now, KafkaTopic: "test_topic", KafkaKey: fmt.Sprintf("key-%x", i%numKeys), KafkaValue: String(fmt.Sprintf("value-%x", i)), KafkaHeaders: KafkaHeaders{ KafkaHeader{Key: "ID", Value: strconv.FormatInt(int64(startID+i), 10)}, }, } } return records } func message(record OutboxRecord, err error) *kafka.Message { return &kafka.Message{ TopicPartition: kafka.TopicPartition{Topic: &record.KafkaTopic, Error: err}, Key: []byte(record.KafkaKey), Value: stringPointerToByteArray(record.KafkaValue), Timestamp: record.CreateTime, TimestampType: kafka.TimestampCreateTime, Opaque: record, } } func deliverAll(records []OutboxRecord, err error, events chan kafka.Event) { for _, record := range records { events <- message(record, err) } } ================================================ FILE: int/faulty_kafka_test.go ================================================ package int import ( "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/libstdgo/fault" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" ) type ProducerFaultSpecs struct { OnProduce fault.Spec OnDelivery fault.Spec } func (specs ProducerFaultSpecs) build() producerFaults { return producerFaults{ onProduce: specs.OnProduce.Build(), onDelivery: specs.OnDelivery.Build(), } } func FaultyKafkaProducerProvider(realProvider goharvest.KafkaProducerProvider, specs ProducerFaultSpecs) goharvest.KafkaProducerProvider { return func(conf *goharvest.KafkaConfigMap) (goharvest.KafkaProducer, error) { real, err := realProvider(conf) if err != nil { return nil, err } return newFaultyProducer(real, specs.build()), nil } } type producerFaults struct { onProduce fault.Fault onDelivery fault.Fault } type faultyProducer struct { real goharvest.KafkaProducer faults producerFaults events chan kafka.Event } func newFaultyProducer(real goharvest.KafkaProducer, faults producerFaults) *faultyProducer { f := &faultyProducer{ real: real, faults: faults, events: make(chan kafka.Event), } go func() { defer close(f.events) for e := range real.Events() { switch ev := e.(type) { case *kafka.Message: if ev.TopicPartition.Error != nil { f.events <- e } else if err := f.faults.onDelivery.Try(); err != nil { rewrittenMessage := *ev rewrittenMessage.TopicPartition = kafka.TopicPartition{ Topic: ev.TopicPartition.Topic, Partition: ev.TopicPartition.Partition, Offset: ev.TopicPartition.Offset, Metadata: ev.TopicPartition.Metadata, Error: err, } f.events <- &rewrittenMessage } else { f.events <- e } default: f.events <- e } } }() return f } func (f *faultyProducer) Events() chan kafka.Event { return f.events } func (f *faultyProducer) Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error { if err := f.faults.onProduce.Try(); err != nil { return err } return f.real.Produce(msg, deliveryChan) } func (f *faultyProducer) Close() { f.real.Close() } ================================================ FILE: int/harvest_int_test.go ================================================ package int import ( "context" "database/sql" "fmt" "os" "os/signal" "strconv" "strings" "syscall" "testing" "time" "github.com/google/uuid" . "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/goharvest/stasher" "github.com/obsidiandynamics/libstdgo/check" "github.com/obsidiandynamics/libstdgo/concurrent" "github.com/obsidiandynamics/libstdgo/diags" "github.com/obsidiandynamics/libstdgo/fault" "github.com/obsidiandynamics/libstdgo/scribe" "github.com/obsidiandynamics/libstdgo/scribe/overlog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" ) type externals struct { cons *kafka.Consumer admin *kafka.AdminClient db *sql.DB } const ( kafkaNamespace = "goharvest_test" topic = kafkaNamespace + ".topic" partitions = 10 dbSchema = "goharvest_test" outboxTable = dbSchema + ".outbox" leaderTopic = kafkaNamespace + ".neli" leaderGroupID = kafkaNamespace + ".group" receiverGroupID = kafkaNamespace + ".receiver_group" bootstrapServers = "localhost:9092" dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable" generateInterval = 5 * time.Millisecond generateRecordsPerTxn = 20 generateMinRecords = 100 generateUniqueKeys = 10 receiverPollDuration = 500 * time.Millisecond receiverNoMessagesWarningTime = 10 * time.Second waitTimeout = 90 * time.Second ) var logger = overlog.New(overlog.StandardFormat()) var scr = scribe.New(overlog.Bind(logger)) func openExternals() externals { cons, err := kafka.NewConsumer(&kafka.ConfigMap{ "bootstrap.servers": bootstrapServers, "group.id": receiverGroupID, "enable.auto.commit": true, "auto.offset.reset": "earliest", "socket.timeout.ms": 10000, // "debug": "all", }) if err != nil { panic(err) } admin, err := kafka.NewAdminClientFromConsumer(cons) if err != nil { panic(err) } for { result, err := admin.CreateTopics(context.Background(), []kafka.TopicSpecification{ { Topic: topic, NumPartitions: partitions, ReplicationFactor: 1, }, }) if err != nil { if isFatalError(err) { panic(err) } else { // Allow for timeouts and other non-fatal errors. scr.W()("Non-fatal error creating topic: %v", err) } } else { if result[0].Error.Code() == kafka.ErrTopicAlreadyExists { scr.I()("Topic %s already exists", topic) } else if result[0].Error.Code() != kafka.ErrNoError { panic(result[0].Error) } break } } db, err := sql.Open("postgres", dataSource) if err != nil { panic(err) } const ddlTemplate = ` CREATE SCHEMA IF NOT EXISTS %s; DROP TABLE IF EXISTS %s; CREATE TABLE %s ( id BIGSERIAL PRIMARY KEY, create_time TIMESTAMP WITH TIME ZONE NOT NULL, kafka_topic VARCHAR(249) NOT NULL, kafka_key VARCHAR(5) NOT NULL, kafka_value VARCHAR(50), kafka_header_keys TEXT[] NOT NULL, kafka_header_values TEXT[] NOT NULL, leader_id UUID ) ` _, err = db.Exec(fmt.Sprintf(ddlTemplate, dbSchema, outboxTable, outboxTable)) if err != nil { panic(err) } return externals{cons, admin, db} } func (x *externals) close() { x.cons.Close() x.db.Close() x.admin.Close() } func wait(t check.Tester) check.Timesert { return check.Wait(t, waitTimeout) } func TestOneNode_withFailures(t *testing.T) { test(t, 1, 5*time.Second, ProducerFaultSpecs{ OnProduce: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated}, OnDelivery: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated}, }) } func TestFourNodes_withFailures(t *testing.T) { test(t, 4, 5*time.Second, ProducerFaultSpecs{ OnProduce: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated}, OnDelivery: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated}, }) } func TestEightNodes_withoutFailures(t *testing.T) { test(t, 8, 2*time.Second, ProducerFaultSpecs{}) } func test(t *testing.T, numHarvests int, spawnInterval time.Duration, producerFaultSpecs ProducerFaultSpecs) { check.RequireLabel(t, "int") installSigQuitHandler() testID, _ := uuid.NewRandom() x := openExternals() defer x.close() scr.I()("Starting generator") generator := startGenerator(t, testID, x.db, generateInterval, generateUniqueKeys) defer func() { <-generator.stop() }() scr.I()("Starting receiver") receiver := startReceiver(t, testID, x.cons) defer func() { <-receiver.stop() }() harvests := make([]Harvest, numHarvests) defer func() { for _, h := range harvests { if h != nil { h.Stop() } } }() // Start harvests at a set interval. for i := 0; i < numHarvests; i++ { config := Config{ KafkaProducerProvider: FaultyKafkaProducerProvider(StandardKafkaProducerProvider(), producerFaultSpecs), Name: fmt.Sprintf("harvest-#%d", i+1), Scribe: scribe.New(overlog.Bind(logger)), BaseKafkaConfig: KafkaConfigMap{ "bootstrap.servers": bootstrapServers, "socket.timeout.ms": 10000, }, ProducerKafkaConfig: KafkaConfigMap{ "delivery.timeout.ms": 10000, // "debug": "broker,topic,metadata", }, LeaderTopic: leaderTopic, OutboxTable: outboxTable, LeaderGroupID: leaderGroupID, DataSource: dataSource, Limits: Limits{ MinPollInterval: Duration(100 * time.Millisecond), MarkBackoff: Duration(1 * time.Millisecond), IOErrorBackoff: Duration(1 * time.Millisecond), }, } config.Scribe.SetEnabled(scribe.Trace) scr.I()("Starting harvest %d/%d", i+1, numHarvests) h, err := New(config) require.Nil(t, err) harvests[i] = h require.Nil(t, h.Start()) scr.I()("Sleeping") sleepWithDeadline(spawnInterval) } // Stop harvests in the order they were started, except for the last one. The last harvest will be stopped // only after we've asserted the receipt of all messages. for i := 0; i < numHarvests-1; i++ { scr.I()("Stopping harvest %d/%d", i+1, numHarvests) harvests[i].Stop() scr.I()("In-flight records: %d", harvests[i].InFlightRecords()) sleepWithDeadline(spawnInterval) } // Wait until the generator produces some records. Once we've produced enough records, stop the // generator so that we can assert receipt. generator.recs.Fill(generateMinRecords, concurrent.Indefinitely) scr.I()("Stopping generator") <-generator.stop() generated := generator.recs.GetInt() scr.I()("Generated %d records", generated) // Wait until we received all records. Keep sliding in bite-sized chunks through successive assertions so that, as // long as we keep on receiving records, the assertion does not fail. This deals with slow harvesters (when we are // simulating lots of faults). const waitBatchSize = 100 for r := waitBatchSize; r < generated; r += waitBatchSize { advanced := wait(t).UntilAsserted(func(t check.Tester) { assert.GreaterOrEqual(t, receiver.recs.GetInt(), r) }) if !advanced { scr.E()("Stack traces:\n%s", diags.DumpAllStacks()) } require.True(t, advanced) scr.I()("Received %d messages", r) } wait(t).UntilAsserted(func(t check.Tester) { assert.GreaterOrEqual(t, receiver.recs.GetInt(), generated) }) assert.Equal(t, generated, receiver.recs.GetInt()) scr.I()("Stopping receiver") <-receiver.stop() // Stop the last harvest as we've already received all messages and there's nothing more to publish. scr.I()("Stopping harvest %d/%d", numHarvests, numHarvests) harvests[numHarvests-1].Stop() // Await harvests. for i, h := range harvests { scr.I()("Awaiting harvest %d/%d", i+1, numHarvests) assert.Nil(t, h.Await()) } scr.I()("Done") } func sleepWithDeadline(duration time.Duration) { beforeSleep := time.Now() time.Sleep(duration) if elapsed := time.Now().Sub(beforeSleep); elapsed > 2*duration { scr.W()("Sleep deadline exceeded; expected %v but slept for %v", duration, elapsed) } } type generator struct { cancel context.CancelFunc recs concurrent.AtomicCounter stopped chan int } func (g generator) stop() chan int { g.cancel() return g.stopped } func startGenerator(t *testing.T, testID uuid.UUID, db *sql.DB, interval time.Duration, keys int) generator { st := stasher.New(outboxTable) ctx, cancel := concurrent.Forever(context.Background()) recs := concurrent.NewAtomicCounter() stopped := make(chan int, 1) go func() { defer scr.T()("Generator exiting") defer close(stopped) ticker := time.NewTicker(interval) defer ticker.Stop() var tx *sql.Tx defer func() { err := finaliseTx(t, tx) if err != nil { scr.E()("Could not finalise transaction: %v", err) t.Errorf("Could not finalise transaction: %v", err) } }() var pre stasher.PreStash seq := 0 for { if seq%generateRecordsPerTxn == 0 { err := finaliseTx(t, tx) if err != nil { scr.E()("Could not finalise transaction: %v", err) t.Errorf("Could not finalise transaction: %v", err) return } newTx, err := db.Begin() tx = newTx if err != nil { scr.E()("Could not begin transaction: %v", err) t.Errorf("Could not begin transaction: %v", err) return } pre, err = st.Prepare(tx) if err != nil { scr.E()("Could not prepare: %v", err) t.Errorf("Could not prepare: %v", err) return } } testIDStr := testID.String() rec := OutboxRecord{ KafkaTopic: topic, KafkaKey: strconv.Itoa(seq % keys), KafkaValue: String(testIDStr + "_" + strconv.Itoa(seq)), KafkaHeaders: KafkaHeaders{ KafkaHeader{Key: "testId", Value: testIDStr}, }, } err := pre.Stash(rec) if err != nil { scr.E()("Could not stash: %v", err) t.Errorf("Could not stash: %v", err) return } seq = int(recs.Inc()) select { case <-ctx.Done(): return case <-ticker.C: } } }() return generator{cancel, recs, stopped} } func finaliseTx(t *testing.T, tx *sql.Tx) error { if tx != nil { return tx.Commit() } return nil } type receiver struct { cancel context.CancelFunc received map[string]int recs concurrent.AtomicCounter stopped chan int } func (r receiver) stop() chan int { r.cancel() return r.stopped } func startReceiver(t *testing.T, testID uuid.UUID, cons *kafka.Consumer) receiver { received := make(map[string]int) ctx, cancel := concurrent.Forever(context.Background()) recs := concurrent.NewAtomicCounter() stopped := make(chan int, 1) go func() { defer scr.T()("Receiver exiting") defer close(stopped) successiveTimeouts := 0 resetTimeouts := func() { if successiveTimeouts > 0 { successiveTimeouts = 0 } } err := cons.Subscribe(topic, func(_ *kafka.Consumer, event kafka.Event) error { switch e := event.(type) { case kafka.AssignedPartitions: resetTimeouts() scr.I()("Receiver: assigned partitions %v", e.Partitions) case kafka.RevokedPartitions: resetTimeouts() scr.I()("Receiver: revoked partitions %v", e.Partitions) } return nil }) if err != nil { scr.E()("Could not subscribe: %v", err) t.Errorf("Could not subscribe: %v", err) return } lastMessageReceivedTime := time.Now() messageAbsencePrinted := false expectedTestID := testID.String() const partitions = 64 lastReceivedOffsets := make([]kafka.Offset, partitions) for i := 0; i < partitions; i++ { lastReceivedOffsets[i] = kafka.Offset(-1) } for { msg, err := cons.ReadMessage(receiverPollDuration) if err != nil { if isFatalError(err) { scr.E()("Fatal error during poll: %v", err) t.Errorf("Fatal error during poll: %v", err) return } else if !isTimedOutError(err) { scr.W()("Error during poll: %v", err) } else { successiveTimeouts++ logger.Raw(".") } } if msg != nil { if msg.TopicPartition.Offset <= lastReceivedOffsets[msg.TopicPartition.Partition] { scr.D()("Skipping duplicate delivery at offset %d", msg.TopicPartition.Offset) continue } lastReceivedOffsets[msg.TopicPartition.Partition] = msg.TopicPartition.Offset lastMessageReceivedTime = time.Now() messageAbsencePrinted = false resetTimeouts() valueFrags := strings.Split(string(msg.Value), "_") if len(valueFrags) != 2 { scr.E()("invalid value '%s'", string(msg.Value)) t.Errorf("invalid value '%s'", string(msg.Value)) return } receivedTestID, value := valueFrags[0], valueFrags[1] if receivedTestID != expectedTestID { scr.I()("Skipping %s (test ID %s)", string(msg.Value), expectedTestID) continue } key := string(msg.Key) receivedSeq, err := strconv.Atoi(value) if err != nil { scr.E()("Could not convert message value to sequence: '%s'", value) t.Errorf("Could not convert message value to sequence: '%s'", value) return } if assert.Equal(t, 1, len(msg.Headers)) { assert.Equal(t, expectedTestID, string(msg.Headers[0].Value)) } if existingSeq, ok := received[key]; ok { if assert.GreaterOrEqual(t, receivedSeq, existingSeq) { if receivedSeq > existingSeq { received[key] = receivedSeq recs.Inc() } else { scr.I()("Received duplicate %d for key %s (this is okay)", existingSeq, key) } } else { scr.E()("Received records out of order, %d is behind %d", receivedSeq, existingSeq) t.Errorf("Received records out of order, %d is behind %d", receivedSeq, existingSeq) } } else { keyInt, err := strconv.Atoi(key) if err != nil { scr.E()("Could not convert message key '%s'", key) t.Errorf("Could not convert message key '%s'", key) return } if assert.Equal(t, keyInt, receivedSeq) { recs.Inc() received[key] = receivedSeq } } } else { elapsed := time.Now().Sub(lastMessageReceivedTime) if elapsed > receiverNoMessagesWarningTime && !messageAbsencePrinted { scr.W()("No messages received since %v", lastMessageReceivedTime) messageAbsencePrinted = true } } select { case <-ctx.Done(): return default: } } }() return receiver{cancel, received, recs, stopped} } func isTimedOutError(err error) bool { kafkaError, ok := err.(kafka.Error) return ok && kafkaError.Code() == kafka.ErrTimedOut } func isFatalError(err error) bool { kafkaError, ok := err.(kafka.Error) return ok && kafkaError.IsFatal() } var sigQuitHandlerInstalled = concurrent.NewAtomicCounter() func installSigQuitHandler() { if sigQuitHandlerInstalled.CompareAndSwap(0, 1) { sig := make(chan os.Signal, 1) go func() { signal.Notify(sig, syscall.SIGQUIT) select { case <-sig: scr.I()("Stack\n%s", diags.DumpAllStacks()) } }() } } ================================================ FILE: kafka.go ================================================ package goharvest import ( "fmt" "time" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" ) /* Interfaces. */ // KafkaConsumer specifies the methods of a minimal consumer. type KafkaConsumer interface { Subscribe(topic string, rebalanceCb kafka.RebalanceCb) error ReadMessage(timeout time.Duration) (*kafka.Message, error) Close() error } // KafkaConsumerProvider is a factory for creating KafkaConsumer instances. type KafkaConsumerProvider func(conf *KafkaConfigMap) (KafkaConsumer, error) // KafkaProducer specifies the methods of a minimal producer. type KafkaProducer interface { Events() chan kafka.Event Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error Close() } // KafkaProducerProvider is a factory for creating KafkaProducer instances. type KafkaProducerProvider func(conf *KafkaConfigMap) (KafkaProducer, error) /* Standard provider implementations. */ // StandardKafkaConsumerProvider returns a factory for creating a conventional KafkaConsumer, backed by the real client API. func StandardKafkaConsumerProvider() KafkaConsumerProvider { return func(conf *KafkaConfigMap) (KafkaConsumer, error) { return kafka.NewConsumer(toKafkaNativeConfig(conf)) } } // StandardKafkaProducerProvider returns a factory for creating a conventional KafkaProducer, backed by the real client API. func StandardKafkaProducerProvider() KafkaProducerProvider { return func(conf *KafkaConfigMap) (KafkaProducer, error) { return kafka.NewProducer(toKafkaNativeConfig(conf)) } } /* Various helpers. */ func toKafkaNativeConfig(conf *KafkaConfigMap) *kafka.ConfigMap { result := kafka.ConfigMap{} for k, v := range *conf { result[k] = v } return &result } func copyKafkaConfig(configMap KafkaConfigMap) KafkaConfigMap { copy := KafkaConfigMap{} putAllKafkaConfig(configMap, copy) return copy } func putAllKafkaConfig(source, target KafkaConfigMap) { for k, v := range source { target[k] = v } } func setKafkaConfig(configMap KafkaConfigMap, key string, value interface{}) error { _, containsKey := configMap[key] if containsKey { return fmt.Errorf("cannot override configuration '%s'", key) } configMap[key] = value return nil } func setKafkaConfigs(configMap, toSet KafkaConfigMap) error { for k, v := range toSet { err := setKafkaConfig(configMap, k, v) if err != nil { return err } } return nil } func toNativeKafkaHeaders(headers KafkaHeaders) (nativeHeaders []kafka.Header) { if numHeaders := len(headers); numHeaders > 0 { nativeHeaders = make([]kafka.Header, numHeaders) for i, header := range headers { nativeHeaders[i] = kafka.Header{Key: header.Key, Value: []byte(header.Value)} } } return } ================================================ FILE: kafka_mock_test.go ================================================ package goharvest import ( "time" "github.com/obsidiandynamics/libstdgo/concurrent" "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka" ) type consMockFuncs struct { Subscribe func(m *consMock, topic string, rebalanceCb kafka.RebalanceCb) error ReadMessage func(m *consMock, timeout time.Duration) (*kafka.Message, error) Close func(m *consMock) error } type consMockCounts struct { Subscribe, ReadMessage, Close concurrent.AtomicCounter } type consMock struct { rebalanceCallback kafka.RebalanceCb rebalanceEvents chan kafka.Event f consMockFuncs c consMockCounts } func (m *consMock) Subscribe(topic string, rebalanceCb kafka.RebalanceCb) error { defer m.c.Subscribe.Inc() m.rebalanceCallback = rebalanceCb return m.f.Subscribe(m, topic, rebalanceCb) } func (m *consMock) ReadMessage(timeout time.Duration) (*kafka.Message, error) { defer m.c.ReadMessage.Inc() if m.rebalanceCallback != nil { // The rebalance events should only be delivered in the polling thread, which is why we wait for // ReadMessage before forwarding the events to the rebalance callback select { case e := <-m.rebalanceEvents: m.rebalanceCallback(nil, e) default: } } return m.f.ReadMessage(m, timeout) } func (m *consMock) Close() error { defer m.c.Close.Inc() return m.f.Close(m) } func (m *consMock) fillDefaults() { if m.rebalanceEvents == nil { m.rebalanceEvents = make(chan kafka.Event) } if m.f.Subscribe == nil { m.f.Subscribe = func(m *consMock, topic string, rebalanceCb kafka.RebalanceCb) error { return nil } } if m.f.ReadMessage == nil { m.f.ReadMessage = func(m *consMock, timeout time.Duration) (*kafka.Message, error) { return nil, newTimedOutError() } } if m.f.Close == nil { m.f.Close = func(m *consMock) error { return nil } } m.c.Subscribe = concurrent.NewAtomicCounter() m.c.ReadMessage = concurrent.NewAtomicCounter() m.c.Close = concurrent.NewAtomicCounter() } func mockKafkaConsumerProvider(m *consMock) func(conf *KafkaConfigMap) (KafkaConsumer, error) { return func(conf *KafkaConfigMap) (KafkaConsumer, error) { return m, nil } } type prodMockFuncs struct { Events func(m *prodMock) chan kafka.Event Produce func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error Close func(m *prodMock) } type prodMockCounts struct { Events, Produce, Close concurrent.AtomicCounter } type prodMock struct { events chan kafka.Event f prodMockFuncs c prodMockCounts } func (m *prodMock) Events() chan kafka.Event { defer m.c.Events.Inc() return m.f.Events(m) } func (m *prodMock) Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error { defer m.c.Produce.Inc() return m.f.Produce(m, msg, deliveryChan) } func (m *prodMock) Close() { defer m.c.Close.Inc() m.f.Close(m) } func (m *prodMock) fillDefaults() { if m.events == nil { m.events = make(chan kafka.Event) } if m.f.Events == nil { m.f.Events = func(m *prodMock) chan kafka.Event { return m.events } } if m.f.Produce == nil { m.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error { return nil } } if m.f.Close == nil { m.f.Close = func(m *prodMock) { close(m.events) } } m.c.Events = concurrent.NewAtomicCounter() m.c.Produce = concurrent.NewAtomicCounter() m.c.Close = concurrent.NewAtomicCounter() } func mockKafkaProducerProvider(m *prodMock) func(conf *KafkaConfigMap) (KafkaProducer, error) { return func(conf *KafkaConfigMap) (KafkaProducer, error) { return m, nil } } ================================================ FILE: metric/meter.go ================================================ package metric import ( "fmt" "time" "github.com/obsidiandynamics/libstdgo/scribe" ) // MeterStats is an immutable snapshot of meter statistics. type MeterStats struct { Name string Start time.Time TotalCount int64 TotalRatePerS float64 IntervalCount int64 IntervalRatePerS float64 } // String produces a textual representation of a MeterStats object. func (s MeterStats) String() string { return fmt.Sprintf("Meter <%s>: %d since %v, rate: %.3f current, %.3f average\n", s.Name, s.TotalCount, s.Start.Format(timeFormat), s.IntervalRatePerS, s.TotalRatePerS) } // Meter is a simple structure for tracking the volume of events observed from two points in time: // 1. When the Meter object was created (or when it was last reset) // 2. From the last snapshot point. // // A meter can be updated by adding more observations. Statistics can be periodically extracted from the // meter, reflecting the total observed volume as well as the volume in the most recent period. // // A meter is not thread-safe. In the absence of locking, it should only be accessed from a single // goroutine. type Meter struct { name string printInterval time.Duration start time.Time totalCount int64 lastIntervalStart time.Time lastCount int64 lastStats MeterStats } const timeFormat = "2006-01-02T15:04:05" // String produces a textual representation of a Meter object. func (m Meter) String() string { return fmt.Sprint("Meter[name=", m.name, ", snapshotInterval=", m.printInterval, ", start=", m.start.Format(timeFormat), ", totalCount=", m.totalCount, ", lastIntervalStart=", m.lastIntervalStart.Format(timeFormat), ", lastCount=", m.lastCount, ", lastStats=", m.lastStats, "]") } // NewMeter constructs a new meter object, with a given name and snapshot interval. The actual snapshotting // of meter statistics is the responsibility of the goroutine that owns the meter. func NewMeter(name string, snapshotInterval time.Duration) *Meter { m := Meter{} m.name = name m.printInterval = snapshotInterval m.Reset() return &m } // Reset the meter to its initial state — clearing all counters and resetting the clocks. func (m *Meter) Reset() { m.start = time.Now() m.totalCount = 0 m.lastIntervalStart = m.start m.lastCount = 0 } // Add a value to the meter, contributing to the overall count and to the current interval. func (m *Meter) Add(amount int64) { m.totalCount += amount } // MaybeStats conditionally returns a stats snapshot if the current sampling interval has lapsed. Otherwise, if the // sampling interval is still valid, a nil is returned. func (m *Meter) MaybeStats() *MeterStats { now := time.Now() elapsedInIntervalMs := now.Sub(m.lastIntervalStart).Milliseconds() if elapsedInIntervalMs > m.printInterval.Milliseconds() { intervalCount := m.totalCount - m.lastCount intervalRatePerS := float64(intervalCount) / float64(elapsedInIntervalMs) * 1000.0 m.lastCount = m.totalCount m.lastIntervalStart = now elapsedTotalMs := now.Sub(m.start).Milliseconds() totalRatePerS := float64(m.totalCount) / float64(elapsedTotalMs) * 1000.0 m.lastStats = MeterStats{ Name: m.name, Start: m.start, TotalCount: m.totalCount, TotalRatePerS: totalRatePerS, IntervalCount: intervalCount, IntervalRatePerS: intervalRatePerS, } return &m.lastStats } return nil } // MeterStatsCallback is invoked by MaybeStatsCall(). type MeterStatsCallback func(stats MeterStats) // MaybeStatsCall conditionally invokes the given MeterStatsCallback if the current sampling interval has lapsed, returning true // if the callback was invoked. func (m *Meter) MaybeStatsCall(cb MeterStatsCallback) bool { s := m.MaybeStats() if s != nil { cb(*s) return true } return false } // MaybeStatsLog conditionally logs the snapshot of the recent sampling interval if the latter has lapsed, returning true if an // entry was logged. func (m *Meter) MaybeStatsLog(logger scribe.Logger) bool { return m.MaybeStatsCall(func(stats MeterStats) { logger("%v", stats) }) } ================================================ FILE: metric/meter_test.go ================================================ package metric import ( "testing" "time" "github.com/obsidiandynamics/libstdgo/check" "github.com/obsidiandynamics/libstdgo/scribe" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func wait(t *testing.T) check.Timesert { return check.Wait(t, 10*time.Second) } func TestMeterString(t *testing.T) { m := NewMeter("test-name", time.Second) str := m.String() require.Contains(t, str, "Meter[") require.Contains(t, str, m.name) } func TestMeterMaybeStats(t *testing.T) { m := NewMeter("test-name", time.Millisecond) m.Add(1) wait(t).UntilAsserted(func(t check.Tester) { s := m.MaybeStats() if assert.NotNil(t, s) { assert.Equal(t, "test-name", s.Name) assert.Equal(t, int64(1), s.TotalCount) assert.Equal(t, int64(1), s.IntervalCount) } }) m.Add(2) wait(t).UntilAsserted(func(t check.Tester) { s := m.MaybeStats() if assert.NotNil(t, s) { assert.Equal(t, "test-name", s.Name) assert.Equal(t, int64(3), s.TotalCount) assert.Equal(t, int64(2), s.IntervalCount) } }) m.Add(1) m.Reset() wait(t).UntilAsserted(func(t check.Tester) { s := m.MaybeStats() if assert.NotNil(t, s) { assert.Equal(t, "test-name", s.Name) assert.Equal(t, int64(0), s.TotalCount) assert.Equal(t, int64(0), s.IntervalCount) } }) } func TestMeterMaybeStatsCall(t *testing.T) { m := NewMeter("test-name", time.Millisecond) m.Add(1) wait(t).UntilAsserted(func(t check.Tester) { var statsPtr *MeterStats called := m.MaybeStatsCall(func(stats MeterStats) { statsPtr = &stats }) if assert.True(t, called) { assert.NotNil(t, statsPtr) assert.Equal(t, "test-name", statsPtr.Name) assert.Equal(t, int64(1), statsPtr.TotalCount) assert.Equal(t, int64(1), statsPtr.IntervalCount) } else { assert.Nil(t, statsPtr) } }) } func TestMeterMaybeStatsLog(t *testing.T) { m := NewMeter("test-name", time.Millisecond) m.Add(1) mockscribe := scribe.NewMock() scr := scribe.New(mockscribe.Factories()) wait(t).UntilAsserted(func(t check.Tester) { called := m.MaybeStatsLog(scr.I()) if assert.True(t, called) { mockscribe.Entries(). Having(scribe.LogLevel(scribe.Info)). Having(scribe.MessageContaining("test-name")). Assert(t, scribe.Count(1)) } else { mockscribe.Entries(). Assert(t, scribe.Count(0)) } }) } ================================================ FILE: metric/metric.go ================================================ // Package metric contains data structures for working with metrics. package metric ================================================ FILE: neli.go ================================================ package goharvest import "github.com/obsidiandynamics/goneli" // NeliProvider is a factory for creating Neli instances. type NeliProvider func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) // StandardNeliProvider returns a factory for creating a conventional Neli instance, backed by the real client API. func StandardNeliProvider() NeliProvider { return func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) { return goneli.New(config, barrier) } } func configToNeli(hConfigMap KafkaConfigMap) goneli.KafkaConfigMap { return map[string]interface{}(hConfigMap) } func configToHarvest(nConfigMap goneli.KafkaConfigMap) KafkaConfigMap { return map[string]interface{}(nConfigMap) } func convertKafkaConsumerProvider(hProvider KafkaConsumerProvider) goneli.KafkaConsumerProvider { return func(conf *goneli.KafkaConfigMap) (goneli.KafkaConsumer, error) { hCfg := configToHarvest(*conf) return hProvider(&hCfg) } } func convertKafkaProducerProvider(hProvider KafkaProducerProvider) goneli.KafkaProducerProvider { return func(conf *goneli.KafkaConfigMap) (goneli.KafkaProducer, error) { hCfg := configToHarvest(*conf) return hProvider(&hCfg) } } ================================================ FILE: postgres.go ================================================ package goharvest import ( "database/sql" "fmt" "sort" "github.com/google/uuid" // init postgres driver "github.com/lib/pq" ) type database struct { db *sql.DB markStmt *sql.Stmt purgeStmt *sql.Stmt resetStmt *sql.Stmt } const markQueryTemplate = ` -- mark query UPDATE %s SET leader_id = $1 WHERE id IN ( SELECT id FROM %s WHERE leader_id IS NULL OR leader_id != $1 ORDER BY id LIMIT $2 ) RETURNING id, create_time, kafka_topic, kafka_key, kafka_value, kafka_header_keys, kafka_header_values, leader_id ` const purgeQueryTemplate = ` -- purge query DELETE FROM %s WHERE id = $1 ` const resetQueryTemplate = ` -- reset query UPDATE %s SET leader_id = NULL WHERE id = $1 ` func closeResource(stmt *sql.Stmt) { if stmt != nil { stmt.Close() } } func closeResources(stmts ...*sql.Stmt) { for _, resource := range stmts { closeResource(resource) } } type databaseProvider func() (*sql.DB, error) // StandardPostgresBindingProvider returns a DatabaseBindingProvider that connects to a real Postgres database. func StandardPostgresBindingProvider() DatabaseBindingProvider { return NewPostgresBinding } // NewPostgresBinding creates a Postgres binding for the given dataSource and outboxTable args. func NewPostgresBinding(dataSource string, outboxTable string) (DatabaseBinding, error) { return newPostgresBinding(func() (*sql.DB, error) { return sql.Open("postgres", dataSource) }, outboxTable) } func newPostgresBinding(dbProvider databaseProvider, outboxTable string) (DatabaseBinding, error) { success := false var db *sql.DB var markStmt, purgeStmt, resetStmt *sql.Stmt defer func() { if !success { if db != nil { db.Close() } closeResources(markStmt, purgeStmt, resetStmt) } }() db, err := dbProvider() if err != nil { return nil, err } db.SetMaxOpenConns(2) db.SetMaxIdleConns(2) markStmt, err = db.Prepare(fmt.Sprintf(markQueryTemplate, outboxTable, outboxTable)) if err != nil { return nil, err } purgeStmt, err = db.Prepare(fmt.Sprintf(purgeQueryTemplate, outboxTable)) if err != nil { return nil, err } resetStmt, err = db.Prepare(fmt.Sprintf(resetQueryTemplate, outboxTable)) if err != nil { return nil, err } success = true return &database{ db: db, markStmt: markStmt, purgeStmt: purgeStmt, resetStmt: resetStmt, }, nil } func (db *database) Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error) { rows, err := db.markStmt.Query(leaderID, limit) if err != nil { return nil, err } defer rows.Close() records := make([]OutboxRecord, 0, limit) for rows.Next() { record := OutboxRecord{} var keys []string var values []string err := rows.Scan( &record.ID, &record.CreateTime, &record.KafkaTopic, &record.KafkaKey, &record.KafkaValue, pq.Array(&keys), pq.Array(&values), &record.LeaderID, ) if err != nil { return nil, err } numKeys := len(keys) if len(keys) != len(values) { return nil, fmt.Errorf("unequal number of header keys (%d) and values (%d)", numKeys, len(values)) } record.KafkaHeaders = make(KafkaHeaders, numKeys) for i := 0; i < numKeys; i++ { record.KafkaHeaders[i] = KafkaHeader{keys[i], values[i]} } records = append(records, record) } sort.Slice(records, func(i, j int) bool { return records[i].ID < records[j].ID }) return records, nil } func (db *database) Purge(id int64) (bool, error) { res, err := db.purgeStmt.Exec(id) if err != nil { return false, err } affected, _ := res.RowsAffected() if affected != 1 { return false, nil } return true, err } func (db *database) Reset(id int64) (bool, error) { res, err := db.resetStmt.Exec(id) if err != nil { return false, err } affected, _ := res.RowsAffected() if affected != 1 { return false, nil } return true, err } func (db *database) Dispose() { db.db.Close() closeResources(db.markStmt, db.purgeStmt, db.resetStmt) } ================================================ FILE: postgres_test.go ================================================ package goharvest import ( "database/sql" "database/sql/driver" "testing" "time" "github.com/DATA-DOG/go-sqlmock" "github.com/google/uuid" "github.com/lib/pq" "github.com/obsidiandynamics/libstdgo/check" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) const outboxTable = "outbox" const markPrepare = "-- mark query" const purgePrepare = "-- purge query" const resetPrepare = "-- reset query" func pgFixtures() (databaseProvider, sqlmock.Sqlmock) { db, mock, err := sqlmock.New() if err != nil { panic(err) } dbProvider := func() (*sql.DB, error) { return db, nil } return dbProvider, mock } func TestErrorInDBProvider(t *testing.T) { dbProvider := func() (*sql.DB, error) { return nil, check.ErrSimulated } b, err := newPostgresBinding(dbProvider, outboxTable) assert.Nil(t, b) assert.Equal(t, check.ErrSimulated, err) } func TestErrorInPrepareMarkQuery(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare).WillReturnError(check.ErrSimulated) mock.ExpectClose() b, err := newPostgresBinding(dbProvider, outboxTable) assert.Nil(t, b) assert.Equal(t, check.ErrSimulated, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestErrorInPreparePurgeQuery(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare).WillReturnError(check.ErrSimulated) mark.WillBeClosed() mock.ExpectClose() b, err := newPostgresBinding(dbProvider, outboxTable) assert.Nil(t, b) assert.Equal(t, check.ErrSimulated, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestErrorInPrepareResetQuery(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) purge := mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare).WillReturnError(check.ErrSimulated) mark.WillBeClosed() purge.WillBeClosed() mock.ExpectClose() b, err := newPostgresBinding(dbProvider, outboxTable) assert.Nil(t, b) assert.Equal(t, check.ErrSimulated, err) assert.Nil(t, mock.ExpectationsWereMet()) } const testMarkQueryLimit = 100 func TestExecuteMark_queryError(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) purge := mock.ExpectPrepare(purgePrepare) reset := mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) leaderID, _ := uuid.NewRandom() mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnError(check.ErrSimulated) records, err := b.Mark(leaderID, testMarkQueryLimit) assert.Nil(t, records) assert.Equal(t, check.ErrSimulated, err) mock.ExpectClose() mark.WillBeClosed() purge.WillBeClosed() reset.WillBeClosed() b.Dispose() assert.Nil(t, mock.ExpectationsWereMet()) } // Tests error when one of the columns is of the wrong data type. func TestExecuteMarkQuery_scanError(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) leaderID, _ := uuid.NewRandom() rows := sqlmock.NewRows([]string{ "id", "create_time", "kafka_topic", "kafka_key", "kafka_value", "kafka_header_keys", "kafka_header_values", "leader_id", }) rows.AddRow("non-int", "", "", "", "", pq.Array([]string{"some-key"}), pq.Array([]string{"some-value"}), leaderID) mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows) records, err := b.Mark(leaderID, testMarkQueryLimit) assert.Nil(t, records) if assert.NotNil(t, err) { assert.Contains(t, err.Error(), "Scan error on column") } } func TestExecuteMark_success(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) leaderID, _ := uuid.NewRandom() exp := []OutboxRecord{ { ID: 77, CreateTime: time.Now(), KafkaTopic: "kafka_topic", KafkaKey: "kafka_key", KafkaValue: String("kafka_value"), KafkaHeaders: KafkaHeaders{ KafkaHeader{Key: "some-key", Value: "some-value"}, }, LeaderID: nil, }, { ID: 78, CreateTime: time.Now(), KafkaTopic: "kafka_topic", KafkaKey: "kafka_key", KafkaValue: String("kafka_value"), KafkaHeaders: KafkaHeaders{}, LeaderID: nil, }, } reverse := func(recs []OutboxRecord) []OutboxRecord { reversed := make([]OutboxRecord, len(recs)) for i, j := len(recs)-1, 0; i >= 0; i, j = i-1, j+1 { reversed[i] = recs[j] } return reversed } rows := sqlmock.NewRows([]string{ "id", "create_time", "kafka_topic", "kafka_key", "kafka_value", "kafka_header_keys", "kafka_header_values", "leader_id", }) // Reverse the order before returning to test the sorter inside the marker implementation. for _, expRec := range reverse(exp) { headerKeys, headerValues := flattenHeaders(expRec.KafkaHeaders) rows.AddRow( expRec.ID, expRec.CreateTime, expRec.KafkaTopic, expRec.KafkaKey, expRec.KafkaValue, pq.Array(headerKeys), pq.Array(headerValues), expRec.LeaderID, ) } mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows) records, err := b.Mark(leaderID, testMarkQueryLimit) assert.Nil(t, err) assert.ElementsMatch(t, []interface{}{exp[0], exp[1]}, records) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecuteMark_headerLengthMismatch(t *testing.T) { dbProvider, mock := pgFixtures() mark := mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) leaderID, _ := uuid.NewRandom() rows := sqlmock.NewRows([]string{ "id", "create_time", "kafka_topic", "kafka_key", "kafka_value", "kafka_header_keys", "kafka_header_values", "leader_id", }) rows.AddRow( 1, time.Now(), "some-topic", "some-key", "some-value", pq.Array([]string{"k0"}), pq.Array([]string{"v0", "v1"}), leaderID, ) mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows) records, err := b.Mark(leaderID, testMarkQueryLimit) assert.Nil(t, records) require.NotNil(t, err) assert.Equal(t, "unequal number of header keys (1) and values (2)", err.Error()) } func flattenHeaders(headers KafkaHeaders) (headerKeys, headerValues []string) { if numHeaders := len(headers); numHeaders > 0 { headerKeys = make([]string, numHeaders) headerValues = make([]string, numHeaders) for i, header := range headers { headerKeys[i], headerValues[i] = header.Key, header.Value } } else { headerKeys, headerValues = []string{}, []string{} } return } func TestExecutePurge_error(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) purge := mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 purge.ExpectExec().WithArgs(id).WillReturnError(check.ErrSimulated) done, err := b.Purge(id) assert.False(t, done) assert.Equal(t, check.ErrSimulated, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecutePurge_success(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) purge := mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 purge.ExpectExec().WithArgs(id).WillReturnResult(sqlmock.NewResult(-1, 1)) done, err := b.Purge(id) assert.True(t, done) assert.Nil(t, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecutePurge_notDone(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) purge := mock.ExpectPrepare(purgePrepare) mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 purge.ExpectExec().WithArgs(id).WillReturnResult(driver.ResultNoRows) done, err := b.Purge(id) assert.False(t, done) assert.Nil(t, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecuteReset_error(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) reset := mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 reset.ExpectExec().WithArgs(id).WillReturnError(check.ErrSimulated) done, err := b.Reset(id) assert.False(t, done) assert.Equal(t, check.ErrSimulated, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecuteReset_success(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) reset := mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 reset.ExpectExec().WithArgs(id).WillReturnResult(sqlmock.NewResult(-1, 1)) done, err := b.Reset(id) assert.True(t, done) assert.Nil(t, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestExecuteReset_notDone(t *testing.T) { dbProvider, mock := pgFixtures() mock.ExpectPrepare(markPrepare) mock.ExpectPrepare(purgePrepare) reset := mock.ExpectPrepare(resetPrepare) b, err := newPostgresBinding(dbProvider, outboxTable) assert.NotNil(t, b) assert.Nil(t, err) const id = 77 reset.ExpectExec().WithArgs(id).WillReturnResult(driver.ResultNoRows) done, err := b.Reset(id) assert.False(t, done) assert.Nil(t, err) assert.Nil(t, mock.ExpectationsWereMet()) } func TestRealPostgresBinding(t *testing.T) { b, err := NewPostgresBinding("***corrupt connection info string***", outboxTable) assert.Nil(t, b) assert.NotNil(t, err) } ================================================ FILE: sh/.gitignore ================================================ librdkafka ================================================ FILE: sh/build-librdkafka.sh ================================================ #!/bin/sh cd $(dirname $0) set -e if [ -d librdkafka ]; then cd librdkafka git pull cd .. else git clone https://github.com/edenhill/librdkafka.git fi cd librdkafka ./configure --prefix /usr make sudo make install rm -rf librdkafka ================================================ FILE: sh/init-outbox.sh ================================================ #!/bin/sh cat < 0 { headerKeys = make([]string, numHeaders) headerValues = make([]string, numHeaders) for i, header := range rec.KafkaHeaders { headerKeys[i], headerValues[i] = header.Key, header.Value } } else { headerKeys, headerValues = []string{}, []string{} } return headerKeys, headerValues } // Stash one record within the given transaction scope. func (s *stasher) Stash(tx *sql.Tx, rec goharvest.OutboxRecord) error { headerKeys, headerValues := makeHeaders(rec) _, err := tx.Exec(s.query, rec.KafkaTopic, rec.KafkaKey, rec.KafkaValue, pq.Array(headerKeys), pq.Array(headerValues)) return err } ================================================ FILE: stasher/stasher_doc_test.go ================================================ package stasher import ( "database/sql" "testing" "github.com/obsidiandynamics/goharvest" "github.com/obsidiandynamics/libstdgo/check" ) func Example() { db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable") if err != nil { panic(err) } defer db.Close() st := New("outbox") // Begin a transaction. tx, _ := db.Begin() defer tx.Rollback() // Update other database entities in transaction scope. // ... // Stash an outbox record for subsequent harvesting. err = st.Stash(tx, goharvest.OutboxRecord{ KafkaTopic: "my-app.topic", KafkaKey: "hello", KafkaValue: goharvest.String("world"), KafkaHeaders: goharvest.KafkaHeaders{ {Key: "applicationId", Value: "my-app"}, }, }) if err != nil { panic(err) } // Commit the transaction. tx.Commit() } func TestExample(t *testing.T) { check.RunTargetted(t, Example) } func Example_prepare() { db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable") if err != nil { panic(err) } defer db.Close() st := New("outbox") // Begin a transaction. tx, _ := db.Begin() defer tx.Rollback() // Update other database entities in transaction scope. // ... // Formulates a prepared statement that may be reused within the scope of the transaction. prestash, _ := st.Prepare(tx) // Publish a bunch of messages using the same prepared statement. for i := 0; i < 10; i++ { // Stash an outbox record for subsequent harvesting. err = prestash.Stash(goharvest.OutboxRecord{ KafkaTopic: "my-app.topic", KafkaKey: "hello", KafkaValue: goharvest.String("world"), KafkaHeaders: goharvest.KafkaHeaders{ {Key: "applicationId", Value: "my-app"}, }, }) if err != nil { panic(err) } } // Commit the transaction. tx.Commit() } func TestExample_prepare(t *testing.T) { check.RunTargetted(t, Example_prepare) } ================================================ FILE: stasher/statsher_test.go ================================================ package stasher import ( "testing" "github.com/DATA-DOG/go-sqlmock" "github.com/lib/pq" "github.com/obsidiandynamics/goharvest" "github.com/stretchr/testify/require" ) const ( testTopic = "topic" testKey = "key" testValue = "value" testHeaderKey = "header-key" testHeaderValue = "header-value" testInsertQuery = "-- insert query" ) func TestStash_withHeaders(t *testing.T) { s := New("outbox") db, mock, err := sqlmock.New() require.Nil(t, err) mock.ExpectBegin() tx, err := db.Begin() require.Nil(t, err) mock.ExpectExec(testInsertQuery). WithArgs(testTopic, testKey, testValue, pq.Array([]string{testHeaderKey}), pq.Array([]string{testHeaderValue})). WillReturnResult(sqlmock.NewResult(-1, 1)) err = s.Stash(tx, goharvest.OutboxRecord{ KafkaTopic: testTopic, KafkaKey: testKey, KafkaValue: goharvest.String(testValue), KafkaHeaders: goharvest.KafkaHeaders{ {Key: testHeaderKey, Value: testHeaderValue}, }, }) require.Nil(t, err) require.Nil(t, mock.ExpectationsWereMet()) } func TestStash_withoutHeaders(t *testing.T) { s := New("outbox") db, mock, err := sqlmock.New() require.Nil(t, err) mock.ExpectBegin() tx, err := db.Begin() require.Nil(t, err) mock.ExpectExec(testInsertQuery). WithArgs(testTopic, testKey, testValue, pq.Array([]string{}), pq.Array([]string{})). WillReturnResult(sqlmock.NewResult(-1, 1)) err = s.Stash(tx, goharvest.OutboxRecord{ KafkaTopic: testTopic, KafkaKey: testKey, KafkaValue: goharvest.String(testValue), }) require.Nil(t, err) require.Nil(t, mock.ExpectationsWereMet()) } func TestStash_prepare(t *testing.T) { s := New("outbox") db, mock, err := sqlmock.New() require.Nil(t, err) mock.ExpectBegin() tx, err := db.Begin() require.Nil(t, err) mock.ExpectPrepare(testInsertQuery) prestash, err := s.Prepare(tx) require.Nil(t, err) require.NotNil(t, prestash) mock.ExpectExec(testInsertQuery). WithArgs(testTopic, testKey, testValue, pq.Array([]string{}), pq.Array([]string{})). WillReturnResult(sqlmock.NewResult(-1, 1)) err = prestash.Stash(goharvest.OutboxRecord{ KafkaTopic: testTopic, KafkaKey: testKey, KafkaValue: goharvest.String(testValue), }) require.Nil(t, err) require.Nil(t, mock.ExpectationsWereMet()) }