Repository: obsidiandynamics/goharvest
Branch: master
Commit: 1239a594e9dc
Files: 40
Total size: 169.9 KB
Directory structure:
gitextract_j1n1kb08/
├── .gitignore
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── battery.go
├── battery_test.go
├── cmd/
│ ├── goharvest_example/
│ │ └── example_main.go
│ ├── pump/
│ │ └── pump_main.go
│ └── reaper/
│ └── reaper_main.go
├── config.go
├── config_test.go
├── db.go
├── db_mock_test.go
├── event.go
├── event_test.go
├── examples/
│ ├── reaper.yaml
│ └── reaper_secure.yaml
├── go.mod
├── go.sum
├── goharvest_doc_test.go
├── harvest.go
├── harvest_test.go
├── int/
│ ├── faulty_kafka_test.go
│ └── harvest_int_test.go
├── kafka.go
├── kafka_mock_test.go
├── metric/
│ ├── meter.go
│ ├── meter_test.go
│ └── metric.go
├── neli.go
├── postgres.go
├── postgres_test.go
├── sh/
│ ├── .gitignore
│ ├── build-librdkafka.sh
│ ├── init-outbox.sh
│ └── soak.sh
└── stasher/
├── stasher.go
├── stasher_doc_test.go
└── statsher_test.go
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
.vscode
.DS_Store
*.cer
*.pem
/bin
/log*
================================================
FILE: .travis.yml
================================================
language: go
go:
- 1.13.x
- 1.14.x
services:
- docker
before_install:
- |
docker run --name kafka --rm -d -p 2181:2181 -p 9092:9092 \
-e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 \
obsidiandynamics/kafka
- |
docker run --name postgres --rm -d -p 5432:5432 \
-e POSTGRES_HOST_AUTH_METHOD=trust \
postgres:12
- go get -u -v all
script:
- make
- make int
after_success:
- bash <(curl -s https://codecov.io/bash)
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2020, Obsidian Dynamics
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Makefile
================================================
default: build test
all: test lint
build: dirs
go build -race -o bin ./...
test: dirs
go test ./... -race -count=1 -coverprofile=bin/coverage.out
soaktest: dirs
SOAK_CMD="make test" sh/soak.sh
int: FORCE
GOLABELS=int go test -timeout 180s -v -race -count=1 ./int
soakint: FORCE
SOAK_CMD="make int" sh/soak.sh
dirs:
mkdir -p bin
lint:
golint ./...
clean:
rm -rf bin
list: FORCE
@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$'
FORCE:
================================================
FILE: README.md
================================================
===

[ ](https://travis-ci.org/obsidiandynamics/goharvest#)

[](https://codecov.io/gh/obsidiandynamics/goharvest)
[](https://goreportcard.com/report/github.com/obsidiandynamics/goharvest)
[](https://lgtm.com/projects/g/obsidiandynamics/goharvest/alerts/)
[](https://pkg.go.dev/github.com/obsidiandynamics/goharvest?tab=doc)
`goharvest` is a Go implementation of the [Transactional Outbox](https://microservices.io/patterns/data/transactional-outbox.html) pattern for Postgres and Kafka.
While `goharvest` is a complex beast, the end result is dead simple: to publish Kafka messages reliably and atomically, simply write a record to a dedicated **outbox table** in a transaction, alongside any other database changes. (Outbox schema provided below.) `goharvest` scrapes the outbox table in the background and publishes records to a Kafka topic of the application's choosing, using the key, value and headers specified in the outbox record. `goharvest` currently works with Postgres. It maintains causal order of messages and does not require CDC to be enabled on the database, making for a zero-hassle setup. It handles thousands of records/second on commodity hardware.
# Getting started
## 1. Create an outbox table for your application
```sql
CREATE TABLE IF NOT EXISTS outbox (
id BIGSERIAL PRIMARY KEY,
create_time TIMESTAMP WITH TIME ZONE NOT NULL,
kafka_topic VARCHAR(249) NOT NULL,
kafka_key VARCHAR(100) NOT NULL, -- pick your own maximum key size
kafka_value VARCHAR(10000), -- pick your own maximum value size
kafka_header_keys TEXT[] NOT NULL,
kafka_header_values TEXT[] NOT NULL,
leader_id UUID
)
```
## 2. Run `goharvest`
### Standalone mode
This runs `goharvest` within a separate process called `reaper`, which will work alongside **any** application that writes to a standard outbox. (Not just applications written in Go.)
#### Install `reaper`
```sh
go get -u github.com/obsidiandynamics/goharvest/cmd/reaper
```
#### Create `reaper.yaml` configuration
```yaml
harvest:
baseKafkaConfig:
bootstrap.servers: localhost:9092
producerKafkaConfig:
compression.type: lz4
delivery.timeout.ms: 10000
leaderTopic: my-app-name
leaderGroupID: my-app-name
dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable
outboxTable: outbox
limits:
minPollInterval: 1s
heartbeatTimeout: 5s
maxInFlightRecords: 1000
minMetricsInterval: 5s
sendConcurrency: 4
sendBuffer: 10
logging:
level: Debug
```
#### Start `reaper`
```sh
reaper -f reaper.yaml
```
### Embedded mode
`goharvest` can be run in the same process as your application.
#### Add the dependency
```sh
go get -u github.com/obsidiandynamics/goharvest
```
#### Create and start a `Harvest` instance
```go
import "github.com/obsidiandynamics/goharvest"
```
```go
// Configure the harvester. It will use its own database and Kafka connections under the hood.
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
}
// Create a new harvester.
harvest, err := New(config)
if err != nil {
panic(err)
}
// Start harvesting in the background.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for the harvester to end.
log.Fatal(harvest.Await())
```
### Using a custom logger
`goharvest` uses `log.Printf` for output by default. Logger configuration is courtesy of the Scribe façade, from [libstdgo](https://github.com/obsidiandynamics/libstdgo). The example below uses a Logrus binding for Scribe.
```go
import (
"github.com/obsidiandynamics/goharvest"
scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus"
"github.com/sirupsen/logrus"
)
```
```sh
log := logrus.StandardLogger()
log.SetLevel(logrus.DebugLevel)
// Configure the custom logger using a binding.
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
Scribe: scribe.New(scribelogrus.Bind()),
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
}
```
### Listening for leader status updates
Just like `goharvest` uses [NELI](https://github.com/obsidiandynamics/goneli) to piggy-back on Kafka's leader election, you can piggy-back on `goharvest` to get leader status updates:
```go
log := logrus.StandardLogger()
log.SetLevel(logrus.TraceLevel)
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
Scribe: scribe.New(scribelogrus.Bind()),
}
// Create a new harvester and register an event hander.
harvest, err := New(config)
// Register a handler callback, invoked when an event occurs within goharvest.
// The callback is completely optional; it lets the application piggy-back on leader
// status updates, in case it needs to schedule some additional work (other than
// harvesting outbox records) that should only be run on one process at any given time.
harvest.SetEventHandler(func(e Event) {
switch event := e.(type) {
case LeaderAcquired:
// The application may initialise any state necessary to perform work as a leader.
log.Infof("Got event: leader acquired: %v", event.LeaderID())
case LeaderRefreshed:
// Indicates that a new leader ID was generated, as a result of having to remark
// a record (typically as due to an earlier delivery error). This is purely
// informational; there is nothing an application should do about this, other
// than taking note of the new leader ID if it has come to rely on it.
log.Infof("Got event: leader refreshed: %v", event.LeaderID())
case LeaderRevoked:
// The application may block the callback until it wraps up any in-flight
// activity. Only upon returning from the callback, will a new leader be elected.
log.Infof("Got event: leader revoked")
case LeaderFenced:
// The application must immediately terminate any ongoing activity, on the assumption
// that another leader may be imminently elected. Unlike the handling of LeaderRevoked,
// blocking in the callback will not prevent a new leader from being elected.
log.Infof("Got event: leader fenced")
case MeterRead:
// Periodic statistics regarding the harvester's throughput.
log.Infof("Got event: meter read: %v", event.Stats())
}
})
// Start harvesting in the background.
err = harvest.Start()
```
### Which mode should I use
Running `goharvest` in standalone mode using `reaper` is the recommended approach for most use cases, as it fully insulates the harvester from the rest of the application. Ideally, you should deploy `reaper` as a sidecar daemon, to run alongside your application. All the reaper needs is access to the outbox table and the Kafka cluster.
Embedded `goharvest` is useful if you require additional insights into its operation, which is accomplished by registering an `EventHandler` callback, as shown in the example above. This callback is invoked whenever the underlying leader status changes, which may be useful if you need to schedule additional workloads that should only be run on one process at any given time.
## 3. Write outbox records
### Directly, using SQL
You can write database records from any app, by simply issuing the following `INSERT` statement:
```sql
INSERT INTO ${outbox_table} (
create_time,
kafka_topic,
kafka_key,
kafka_value,
kafka_header_keys,
kafka_header_values
)
VALUES (NOW(), $1, $2, $3, $4, $5)
```
Replace `${outbox_table}` and bind the query variables as appropriate:
* `kafka_topic` column specifies an arbitrary topic name, which may differ among records.
* `kafka_key` is a mandatory `string` key. Each record must be published with a specified key, which will affect its placement among the topic's partitions.
* `kafka_value` is an optional `string` value. If unspecified, the record will be published with a `nil` value, allowing it to be used as a compaction tombstone.
* `kafka_header_keys` and `kafka_header_values` are arrays that specify the keys and values of record headers. When used each element in `kafka_header_keys` corresponds to an element in `kafka_header_values` at the same index. If not using headers, set both arrays to empty.
> **Note**: **Writing outbox records should be performed in the same transaction as other related database updates.** Otherwise, messaging will not be atomic — the updates may be stably persisted while the message might be lost, and *vice versa*.
### Using `stasher`
The `goharvest` library comes with a `stasher` helper package for writing records to an outbox.
#### One-off messages
When one database update corresponds to one message, the easiest approach is to call `Stasher.Stash()`:
```go
import "github.com/obsidiandynamics/goharvest"
```
```go
db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable")
if err != nil {
panic(err)
}
defer db.Close()
st := New("outbox")
// Begin a transaction.
tx, _ := db.Begin()
defer tx.Rollback()
// Update other database entities in transaction scope.
// Stash an outbox record for subsequent harvesting.
err = st.Stash(tx, goharvest.OutboxRecord{
KafkaTopic: "my-app.topic",
KafkaKey: "hello",
KafkaValue: goharvest.String("world"),
KafkaHeaders: goharvest.KafkaHeaders{
{Key: "applicationId", Value: "my-app"},
},
})
if err != nil {
panic(err)
}
// Commit the transaction.
tx.Commit()
```
#### Multiple messages
Sending multiple messages within a single transaction may be done more efficiently using prepared statements:
```go
// Begin a transaction.
tx, _ := db.Begin()
defer tx.Rollback()
// Update other database entities in transaction scope.
// ...
// Formulates a prepared statement that may be reused within the scope of the transaction.
prestash, _ := st.Prepare(tx)
// Publish a bunch of messages using the same prepared statement.
for i := 0; i < 10; i++ {
// Stash an outbox record for subsequent harvesting.
err = prestash.Stash(goharvest.OutboxRecord{
KafkaTopic: "my-app.topic",
KafkaKey: "hello",
KafkaValue: goharvest.String("world"),
KafkaHeaders: goharvest.KafkaHeaders{
{Key: "applicationId", Value: "my-app"},
},
})
if err != nil {
panic(err)
}
}
// Commit the transaction.
tx.Commit()
```
# Configuration
There are handful of parameters that for configuring `goharvest`, assigned via the `Config` struct:
| Parameter |
Default value |
Description |
BaseKafkaConfig |
Map containing bootstrap.servers=localhost:9092. |
Configuration shared by the underlying Kafka producer and consumer clients, including those used for leader election. |
ProducerKafkaConfig |
Empty map. |
Additional configuration on top of BaseKafkaConfig that is specific to the producer clients created by goharvest for publishing harvested messages. This configuration does not apply to the underlying NELI leader election protocol. |
LeaderGroupID |
Assumes the filename of the application binary. |
Used by the underlying leader election protocol as a unique identifier shared by all instances in a group of competing processes. The LeaderGroupID is used as Kafka group.id property under the hood, when subscribing to the leader election topic. |
LeaderTopic |
Assumes the value of LeaderGroupID, suffixed with the string .neli. |
Used by NELI as the name of the Kafka topic for orchestrating leader election. Competing processes subscribe to the same topic under an identical consumer group ID, using Kafka's exclusive partition assignment as a mechanism for arbitrating leader status. |
DataSource |
Local Postgres data source host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable. |
The database driver-specific data source string. |
OutboxTable |
outbox |
The name of the outbox table, optionally including the schema name. |
Scribe |
Scribe configured with bindings for log.Printf(); effectively the result of running scribe.New(scribe.StandardBinding()). |
The logging façade used by the library, preconfigured with your logger of choice. See Scribe GoDocs. |
Name |
A string in the form {hostname}_{pid}_{time}, where {hostname} is the result of invoking os.Hostname(), {pid} is the process ID, and {time} is the UNIX epoch time, in seconds. |
The symbolic name of this instance. This field is informational only, accompanying all log messages. |
Limits.MinPollInterval |
100 ms |
The lower bound on the poll interval, preventing the over-polling of Kafka on successive Pulse() invocations. Assuming Pulse() is called repeatedly by the application, NELI may poll Kafka at a longer interval than MinPollInterval. (Regular polling is necessary to prove client's liveness and maintain internal partition assignment, but polling excessively is counterproductive.) |
Limits.HeartbeatTimeout |
5 s |
The period that a leader will maintain its status, not having received a heartbeat message on the leader topic. After the timeout elapses, the leader will assume a network partition and will voluntarily yield its status, signalling a LeaderFenced event to the application. |
Limits.QueueTimeout |
30 s |
The maximum period of time a record may be queued after having been marked, before timing out and triggering a remark. |
Limits.MarkBackoff |
10 ms |
The backoff delay introduced by the mark thread when a query returns no results, indicating the absence of backlogged records. A mark backoff prevents aggressive querying of the database in the absence of a steady flow of outbox records. |
Limits.IOErrorBackoff |
500 ms |
The backoff delay introduced when any of the mark, purge or reset queries encounter a database error. |
Limits.MaxInFlightRecords |
1000 |
An upper bound on the number of marked records that may be in flight at any given time. I.e. the number of records that have been enqueued with a producer client, for which acknowledgements have yet to be received. |
Limits.SendConcurrency |
8 |
The number of concurrent shards used for queuing causally unrelated records. Each shard is equipped with a dedicated producer client, allowing for its records to be sent independently of other shards. |
Limits.SendBuffer |
10 |
The maximum number of marked records that may be buffered for subsequent sending, for any given shard. When the buffer is full, the marker will halt — waiting for records to be sent and for their acknowledgements to flow through. |
Limits.MarkQueryRecords |
100 |
An upper bound on the number of records that may be marked in any given query. Limiting this number avoids long-running database queries. |
Limits.MinMetricsInterval |
5 s |
The minimum interval at which throughput metrics are emitted. Metrics are emitted conservatively and may be observed less frequently; in fact, throughput metrics are only emitted upon a successful message acknowledgement, which will not occur during periods of inactivity. |
# Docs
[Design](https://github.com/obsidiandynamics/goharvest/wiki/Design)
[Comparison of messaging patterns](https://github.com/obsidiandynamics/goharvest/wiki/Comparison-of-messaging-patterns)
[Comparison of harvesting methods](https://github.com/obsidiandynamics/goharvest/wiki/Comparison-of-harvesting-methods)
[FAQ](https://github.com/obsidiandynamics/goharvest/wiki/FAQ)
================================================
FILE: battery.go
================================================
package goharvest
import (
"hash/fnv"
)
type cell struct {
records chan OutboxRecord
done chan int
}
func (c cell) stop() {
close(c.records)
}
func (c cell) await() {
<-c.done
}
func (c cell) enqueue(rec OutboxRecord) bool {
select {
case <-c.done:
return false
case c.records <- rec:
return true
}
}
type cellHandler func(records chan OutboxRecord)
func newCell(buffer int, handler cellHandler) cell {
c := cell{
records: make(chan OutboxRecord),
done: make(chan int),
}
go func() {
defer close(c.done)
handler(c.records)
}()
return c
}
type battery interface {
stop()
await()
shutdown()
enqueue(rec OutboxRecord) bool
}
type concurrentBattery []cell
func (b *concurrentBattery) stop() {
for _, c := range *b {
c.stop()
}
}
func (b *concurrentBattery) await() {
for _, c := range *b {
c.await()
}
}
func (b *concurrentBattery) shutdown() {
b.stop()
b.await()
}
func (b *concurrentBattery) enqueue(rec OutboxRecord) bool {
if length := len(*b); length > 1 {
return (*b)[hash(rec.KafkaKey)%uint32(length)].enqueue(rec)
}
return (*b)[0].enqueue(rec)
}
func newConcurrentBattery(concurrency int, buffer int, handler cellHandler) *concurrentBattery {
b := make(concurrentBattery, concurrency)
for i := 0; i < concurrency; i++ {
b[i] = newCell(buffer, handler)
}
return &b
}
func hash(str string) uint32 {
algorithm := fnv.New32a()
algorithm.Write([]byte(str))
return algorithm.Sum32()
}
================================================
FILE: battery_test.go
================================================
package goharvest
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestEnqueue_concurrencyOf1(t *testing.T) {
enqueued := make(chan OutboxRecord)
b := newConcurrentBattery(1, 0, func(records chan OutboxRecord) {
for rec := range records {
enqueued <- rec
}
})
defer b.shutdown()
rec := OutboxRecord{}
assert.True(t, b.enqueue(rec))
assert.Equal(t, rec, <-enqueued)
}
func TestEnqueue_concurrencyOf2(t *testing.T) {
enqueued := make(chan OutboxRecord)
b := newConcurrentBattery(2, 0, func(records chan OutboxRecord) {
for rec := range records {
enqueued <- rec
}
})
defer b.shutdown()
rec := OutboxRecord{}
assert.True(t, b.enqueue(rec))
assert.Equal(t, rec, <-enqueued)
}
func TestEnqueue_afterDone(t *testing.T) {
b := newConcurrentBattery(2, 0, func(records chan OutboxRecord) {})
b.await()
assert.False(t, b.enqueue(OutboxRecord{}))
b.stop()
}
================================================
FILE: cmd/goharvest_example/example_main.go
================================================
package main
import (
"database/sql"
"github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/libstdgo/scribe"
scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus"
"github.com/sirupsen/logrus"
)
func main() {
const dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable"
// Optional: Ensure the database table exists before we start harvesting.
func() {
db, err := sql.Open("postgres", dataSource)
if err != nil {
panic(err)
}
defer db.Close()
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS outbox (
id BIGSERIAL PRIMARY KEY,
create_time TIMESTAMP WITH TIME ZONE NOT NULL,
kafka_topic VARCHAR(249) NOT NULL,
kafka_key VARCHAR(100) NOT NULL, -- pick your own key size
kafka_value VARCHAR(10000), -- pick your own value size
kafka_header_keys TEXT[] NOT NULL,
kafka_header_values TEXT[] NOT NULL,
leader_id UUID
)
`)
if err != nil {
panic(err)
}
}()
// Configure the harvester. It will use its own database connections under the hood.
log := logrus.StandardLogger()
log.SetLevel(logrus.DebugLevel)
config := goharvest.Config{
BaseKafkaConfig: goharvest.KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
DataSource: dataSource,
Scribe: scribe.New(scribelogrus.Bind()),
}
// Create a new harvester.
harvest, err := goharvest.New(config)
if err != nil {
panic(err)
}
// Start it.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for it to end.
log.Fatal(harvest.Await())
}
================================================
FILE: cmd/pump/pump_main.go
================================================
package main
import (
"database/sql"
"flag"
"fmt"
"log"
"math/rand"
"strconv"
"time"
"github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/goharvest/metric"
"github.com/obsidiandynamics/goharvest/stasher"
)
const recordsPerTxn = 20
func main() {
var keys, records, interval int
var dataSource, outboxTable, kafkaTopic string
var blank bool
flag.IntVar(&keys, "keys", -1, "Number of unique keys")
flag.IntVar(&records, "records", -1, "Number of records to generate")
flag.IntVar(&interval, "interval", 0, "Write interval (in milliseconds")
flag.StringVar(&dataSource, "ds", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable", "Data source")
flag.StringVar(&outboxTable, "outbox", "outbox", "Outbox table name")
flag.StringVar(&kafkaTopic, "topic", "pump", "Kafka output topic name")
flag.BoolVar(&blank, "blank", false, "Generate blank records (nil value)")
flag.Parse()
errorFunc := func(field string) {
flag.PrintDefaults()
panic(fmt.Errorf("required '-%s' has not been set", field))
}
if keys == -1 {
errorFunc("keys")
}
if records == -1 {
errorFunc("records")
}
fmt.Printf("Starting stasher; keys: %d, records: %d, interval: %d ms\n", keys, records, interval)
fmt.Printf(" Data source: %s\n", dataSource)
fmt.Printf(" Outbox table name: %s\n", outboxTable)
db, err := sql.Open("postgres", dataSource)
if err != nil {
panic(err)
}
defer db.Close()
st := stasher.New(outboxTable)
meter := metric.NewMeter("pump", 5*time.Second)
var tx *sql.Tx
var pre stasher.PreStash
for i := 0; i < records; i++ {
if i%recordsPerTxn == 0 {
finaliseTx(tx)
tx, err = db.Begin()
if err != nil {
panic(err)
}
pre, err = st.Prepare(tx)
if err != nil {
panic(err)
}
}
rand := rand.Uint64()
var value *string
if !blank {
value = goharvest.String(fmt.Sprintf("value-%x", rand))
}
rec := goharvest.OutboxRecord{
KafkaTopic: kafkaTopic,
KafkaKey: fmt.Sprintf("key-%x", rand%uint64(keys)),
KafkaValue: value,
KafkaHeaders: goharvest.KafkaHeaders{
goharvest.KafkaHeader{Key: "Seq", Value: strconv.Itoa(i)},
},
}
err := pre.Stash(rec)
if err != nil {
panic(err)
}
time.Sleep(time.Duration(interval * int(time.Millisecond)))
meter.Add(1)
meter.MaybeStatsLog(log.Printf)
}
finaliseTx(tx)
}
func finaliseTx(tx *sql.Tx) {
if tx != nil {
err := tx.Commit()
if err != nil {
panic(err)
}
}
}
================================================
FILE: cmd/reaper/reaper_main.go
================================================
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/libstdgo/scribe"
"gopkg.in/yaml.v2"
scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus"
logrus "github.com/sirupsen/logrus"
)
func panicOnError(scr scribe.Scribe, err error) {
if err != nil {
scr.E()("Error: %v", err.Error())
panic(err)
}
}
func main() {
var configFile string
flag.StringVar(&configFile, "f", "", "Configuration file (shorthand)")
flag.StringVar(&configFile, "file", "", "Configuration file")
flag.Parse()
errorFunc := func(field string) {
flag.PrintDefaults()
panic(fmt.Errorf("required '-%s' has not been set", field))
}
if configFile == "" {
errorFunc("f")
}
lr := logrus.StandardLogger()
lr.SetLevel(logrus.TraceLevel)
scr := scribe.New(scribelogrus.Bind())
workDir, err := os.Getwd()
panicOnError(scr, err)
scr.I()("Starting GoHarvest Reaper")
executable, err := os.Executable()
panicOnError(scr, err)
scr.I()("Executable: %s; working directory: %s", executable, workDir)
cfgData, err := ioutil.ReadFile(configFile)
panicOnError(scr, err)
cfg, err := unmarshal(cfgData)
panicOnError(scr, err)
cfg.Harvest.Scribe = scr
level, err := scribe.ParseLevelName(cfg.Logging.Level)
panicOnError(scr, err)
scr.SetEnabled(level.Level)
h, err := goharvest.New(cfg.Harvest)
panicOnError(scr, err)
panicOnError(scr, h.Start())
panicOnError(scr, h.Await())
}
type LoggingConfig struct {
Level string `yaml:"level"`
}
func (l *LoggingConfig) setDefaults() {
if l.Level == "" {
l.Level = scribe.Levels[scribe.Debug].Name
}
}
type ReaperConfig struct {
Harvest goharvest.Config `yaml:"harvest"`
Logging LoggingConfig `yaml:"logging"`
}
func (r *ReaperConfig) setDefaults() {
r.Harvest.SetDefaults()
r.Logging.setDefaults()
}
func unmarshal(in []byte) (ReaperConfig, error) {
cfg := ReaperConfig{}
err := yaml.UnmarshalStrict(in, &cfg)
if err == nil {
cfg.setDefaults()
}
return cfg, err
}
================================================
FILE: config.go
================================================
package goharvest
import (
"fmt"
"os"
"time"
validation "github.com/go-ozzo/ozzo-validation"
"github.com/obsidiandynamics/goneli"
"github.com/obsidiandynamics/libstdgo/scribe"
"gopkg.in/yaml.v2"
)
// Duration is a convenience for deriving a pointer from a given Duration argument.
func Duration(d time.Duration) *time.Duration {
return &d
}
// Int is a convenience for deriving a pointer from a given int argument.
func Int(i int) *int {
return &i
}
// Limits configuration.
type Limits struct {
IOErrorBackoff *time.Duration `yaml:"ioErrorBackoff"`
PollDuration *time.Duration `yaml:"pollDuration"`
MinPollInterval *time.Duration `yaml:"minPollInterval"`
MaxPollInterval *time.Duration `yaml:"maxPollInterval"`
HeartbeatTimeout *time.Duration `yaml:"heartbeatTimeout"`
DrainInterval *time.Duration `yaml:"drainInterval"`
QueueTimeout *time.Duration `yaml:"queueTimeout"`
MarkBackoff *time.Duration `yaml:"markBackoff"`
MaxInFlightRecords *int `yaml:"maxInFlightRecords"`
SendConcurrency *int `yaml:"sendConcurrency"`
SendBuffer *int `yaml:"sendBuffer"`
MarkQueryRecords *int `yaml:"markQueryRecords"`
MinMetricsInterval *time.Duration `yaml:"minMetricsInterval"`
}
func defaultInt(i **int, def int) {
if *i == nil {
*i = &def
}
}
func defaultDuration(d **time.Duration, def time.Duration) {
if *d == nil {
*d = &def
}
}
// SetDefaults assigns the defaults for optional values.
func (l *Limits) SetDefaults() {
defaultDuration(&l.IOErrorBackoff, 500*time.Millisecond)
defaultDuration(&l.HeartbeatTimeout, goneli.DefaultHeartbeatTimeout)
defaultDuration(&l.MaxPollInterval, *l.HeartbeatTimeout/2)
defaultDuration(&l.QueueTimeout, 30*time.Second)
defaultDuration(&l.DrainInterval, minDuration(*l.MaxPollInterval, *l.QueueTimeout))
defaultDuration(&l.MarkBackoff, 10*time.Millisecond)
defaultInt(&l.MaxInFlightRecords, 1000)
defaultInt(&l.SendConcurrency, 8)
defaultInt(&l.SendBuffer, 10)
defaultInt(&l.MarkQueryRecords, 100)
defaultDuration(&l.MinMetricsInterval, 5*time.Second)
}
func minDuration(d0, d1 time.Duration) time.Duration {
if d0 < d1 {
return d0
}
return d1
}
// Validate the Limits configuration, returning an error if invalid
func (l Limits) Validate() error {
minimumMaxPollInterval := 1 * time.Millisecond
if l.MinPollInterval != nil {
minimumMaxPollInterval = *l.MinPollInterval
}
return validation.ValidateStruct(&l,
validation.Field(&l.IOErrorBackoff, validation.Min(0)),
validation.Field(&l.DrainInterval, validation.Required, validation.Min(1*time.Millisecond)),
validation.Field(&l.MaxPollInterval, validation.Required, validation.Min(minimumMaxPollInterval)),
validation.Field(&l.QueueTimeout, validation.Required, validation.Min(1*time.Millisecond)),
validation.Field(&l.MarkBackoff, validation.Min(0)),
validation.Field(&l.MaxInFlightRecords, validation.Required, validation.Min(1)),
validation.Field(&l.SendConcurrency, validation.Required, validation.Min(1)),
validation.Field(&l.SendBuffer, validation.Min(0)),
validation.Field(&l.MarkQueryRecords, validation.Required, validation.Min(1)),
validation.Field(&l.MinMetricsInterval, validation.Min(0)),
)
}
// String obtains a textural representation of Limits.
func (l Limits) String() string {
return fmt.Sprint(
"Limits[IOErrorBackoff=", l.IOErrorBackoff,
", PollDuration=", l.PollDuration,
", MinPollInterval=", l.MinPollInterval,
", MaxPollInterval=", l.MaxPollInterval,
", HeartbeatTimeout=", l.HeartbeatTimeout,
", DrainInterval=", l.DrainInterval,
", QueueTimeout=", l.QueueTimeout,
", MarkBackoff=", l.MarkBackoff,
", MaxInFlightRecords=", l.MaxInFlightRecords,
", SendConcurrency=", l.SendConcurrency,
", SendBuffer=", l.SendBuffer,
", MarkQueryRecords=", l.MarkQueryRecords,
", MinMetricsInterval=", l.MinMetricsInterval, "]",
)
}
// KafkaConfigMap represents the Kafka key-value configuration.
type KafkaConfigMap map[string]interface{}
// Config encapsulates configuration for Harvest.
type Config struct {
BaseKafkaConfig KafkaConfigMap `yaml:"baseKafkaConfig"`
ProducerKafkaConfig KafkaConfigMap `yaml:"producerKafkaConfig"`
LeaderTopic string `yaml:"leaderTopic"`
LeaderGroupID string `yaml:"leaderGroupID"`
DataSource string `yaml:"dataSource"`
OutboxTable string `yaml:"outboxTable"`
Limits Limits `yaml:"limits"`
KafkaConsumerProvider KafkaConsumerProvider
KafkaProducerProvider KafkaProducerProvider
DatabaseBindingProvider DatabaseBindingProvider
NeliProvider NeliProvider
Scribe scribe.Scribe
Name string `yaml:"name"`
}
// Validate the Config, returning an error if invalid.
func (c Config) Validate() error {
return validation.ValidateStruct(&c,
validation.Field(&c.BaseKafkaConfig, validation.NotNil),
validation.Field(&c.ProducerKafkaConfig, validation.NotNil),
validation.Field(&c.DataSource, validation.Required),
validation.Field(&c.OutboxTable, validation.Required),
validation.Field(&c.Limits),
validation.Field(&c.KafkaConsumerProvider, validation.NotNil),
validation.Field(&c.KafkaProducerProvider, validation.NotNil),
validation.Field(&c.DatabaseBindingProvider, validation.NotNil),
validation.Field(&c.NeliProvider, validation.NotNil),
validation.Field(&c.Scribe, validation.NotNil),
validation.Field(&c.Name, validation.Required),
)
}
// Obtains a textual representation of the configuration.
func (c Config) String() string {
return fmt.Sprint(
"Config[BaseKafkaConfig=", c.BaseKafkaConfig,
", ProducerKafkaConfig=", c.ProducerKafkaConfig,
", LeaderTopic=", c.LeaderTopic,
", LeaderGroupID=", c.LeaderGroupID,
", DataSource=", c.DataSource,
", OutboxTable=", c.OutboxTable,
", Limits=", c.Limits,
", KafkaConsumerProvider=", c.KafkaConsumerProvider,
", KafkaProducerProvider=", c.KafkaProducerProvider,
", DatabaseBindingProvider=", c.DatabaseBindingProvider,
", NeliProvider=", c.NeliProvider,
", Scribe=", c.Scribe,
", Name=", c.Name, "]")
}
// SetDefaults assigns the default values to optional fields.
func (c *Config) SetDefaults() {
if c.BaseKafkaConfig == nil {
c.BaseKafkaConfig = KafkaConfigMap{}
}
if _, ok := c.BaseKafkaConfig["bootstrap.servers"]; !ok {
c.BaseKafkaConfig["bootstrap.servers"] = "localhost:9092"
}
if c.ProducerKafkaConfig == nil {
c.ProducerKafkaConfig = KafkaConfigMap{}
}
if c.DataSource == "" {
c.DataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable"
}
if c.OutboxTable == "" {
c.OutboxTable = "outbox"
}
c.Limits.SetDefaults()
if c.KafkaConsumerProvider == nil {
c.KafkaConsumerProvider = StandardKafkaConsumerProvider()
}
if c.KafkaProducerProvider == nil {
c.KafkaProducerProvider = StandardKafkaProducerProvider()
}
if c.DatabaseBindingProvider == nil {
c.DatabaseBindingProvider = StandardPostgresBindingProvider()
}
if c.NeliProvider == nil {
c.NeliProvider = StandardNeliProvider()
}
if c.Scribe == nil {
c.Scribe = scribe.New(scribe.StandardBinding())
}
if c.Name == "" {
c.Name = fmt.Sprintf("%s_%d_%d", goneli.Sanitise(getString("localhost", os.Hostname)), os.Getpid(), time.Now().Unix())
}
}
// Unmarshal a configuration from a byte slice, returning the configuration struct with pre-initialised defaults,
// or an error if unmarshalling failed. The configuration is not validated prior to returning, in case further
// amendments are required by the caller. The caller should call Validate() independently.
func Unmarshal(in []byte) (Config, error) {
cfg := Config{}
err := yaml.UnmarshalStrict(in, &cfg)
if err == nil {
cfg.SetDefaults()
}
return cfg, err
}
type stringGetter func() (string, error)
func getString(def string, stringGetter stringGetter) string {
str, err := stringGetter()
if err != nil {
return def
}
return str
}
================================================
FILE: config_test.go
================================================
package goharvest
import (
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/obsidiandynamics/goneli"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/obsidiandynamics/libstdgo/scribe"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
func TestDefaultKafkaConsumerProvider(t *testing.T) {
c := Config{}
c.SetDefaults()
cons, err := c.KafkaConsumerProvider(&KafkaConfigMap{})
assert.Nil(t, cons)
if assert.NotNil(t, err) {
assert.Contains(t, err.Error(), "Required property")
}
}
func TestDefaultKafkaProducerProvider(t *testing.T) {
c := Config{}
c.SetDefaults()
prod, err := c.KafkaProducerProvider(&KafkaConfigMap{"foo": "bar"})
assert.Nil(t, prod)
if assert.NotNil(t, err) {
assert.Contains(t, err.Error(), "No such configuration property")
}
}
func TestDefaultNeliProvider(t *testing.T) {
c := Config{}
c.SetDefaults()
consMock := &consMock{}
consMock.fillDefaults()
prodMock := &prodMock{}
prodMock.fillDefaults()
neli, err := c.NeliProvider(goneli.Config{
KafkaConsumerProvider: convertKafkaConsumerProvider(mockKafkaConsumerProvider(consMock)),
KafkaProducerProvider: convertKafkaProducerProvider(mockKafkaProducerProvider(prodMock)),
}, goneli.NopBarrier())
assert.NotNil(t, neli)
assert.Nil(t, err)
assert.Nil(t, neli.Close())
}
func TestLimitsString(t *testing.T) {
lim := Limits{}
lim.SetDefaults()
assert.Contains(t, lim.String(), "Limits[")
}
func TestLimitsFromYaml(t *testing.T) {
const y = `
ioErrorBackoff: 10ms
pollDuration: 20ms
minPollInterval: 30ms
`
lim := Limits{}
err := yaml.UnmarshalStrict([]byte(y), &lim)
assert.Nil(t, err)
assert.Equal(t, 10*time.Millisecond, *lim.IOErrorBackoff)
assert.Equal(t, 20*time.Millisecond, *lim.PollDuration)
assert.Equal(t, 30*time.Millisecond, *lim.MinPollInterval)
lim.SetDefaults()
// Check that the defaults weren't overridden.
def := Limits{}
def.SetDefaults()
assert.Equal(t, *def.MarkBackoff, *lim.MarkBackoff)
}
func TestGetString(t *testing.T) {
assert.Equal(t, "some-default", getString("some-default", func() (string, error) { return "", check.ErrSimulated }))
assert.Equal(t, "some-string", getString("some-default", func() (string, error) { return "some-string", nil }))
}
func TestValidateLimits(t *testing.T) {
lim := Limits{}
lim.SetDefaults()
assert.Nil(t, lim.Validate())
lim = Limits{
IOErrorBackoff: Duration(-1),
PollDuration: Duration(time.Millisecond),
}
lim.SetDefaults()
if err := lim.Validate(); assert.NotNil(t, err) {
assert.Equal(t, "IOErrorBackoff: must be no less than 0.", lim.Validate().Error())
}
lim = Limits{
DrainInterval: Duration(0),
}
lim.SetDefaults()
if err := lim.Validate(); assert.NotNil(t, err) {
assert.Equal(t, "DrainInterval: cannot be blank.", lim.Validate().Error())
}
lim = Limits{
DrainInterval: Duration(1 * time.Nanosecond),
}
lim.SetDefaults()
if err := lim.Validate(); assert.NotNil(t, err) {
assert.Equal(t, "DrainInterval: must be no less than 1ms.", lim.Validate().Error())
}
}
func TestConfigString(t *testing.T) {
cfg := Config{}
cfg.SetDefaults()
assert.Contains(t, cfg.String(), "Config[")
}
func TestValidateConfig_valid(t *testing.T) {
cfg := Config{
BaseKafkaConfig: KafkaConfigMap{},
ProducerKafkaConfig: KafkaConfigMap{},
LeaderTopic: "leader-topic",
LeaderGroupID: "leader-group-d",
DataSource: "data-source",
OutboxTable: "outbox-table",
KafkaConsumerProvider: StandardKafkaConsumerProvider(),
KafkaProducerProvider: StandardKafkaProducerProvider(),
DatabaseBindingProvider: StandardPostgresBindingProvider(),
Scribe: scribe.New(scribe.StandardBinding()),
Name: "name",
}
cfg.SetDefaults()
assert.Nil(t, cfg.Validate())
}
func TestValidateConfig_invalidLimits(t *testing.T) {
cfg := Config{
BaseKafkaConfig: KafkaConfigMap{},
ProducerKafkaConfig: KafkaConfigMap{},
LeaderTopic: "leader-topic",
LeaderGroupID: "leader-group-id",
DataSource: "data-source",
OutboxTable: "outbox-table",
Limits: Limits{
SendConcurrency: Int(-1),
},
KafkaConsumerProvider: StandardKafkaConsumerProvider(),
KafkaProducerProvider: StandardKafkaProducerProvider(),
DatabaseBindingProvider: StandardPostgresBindingProvider(),
Scribe: scribe.New(scribe.StandardBinding()),
Name: "name",
}
cfg.SetDefaults()
assert.NotNil(t, cfg.Validate())
}
func TestValidateConfig_default(t *testing.T) {
cfg := Config{}
cfg.SetDefaults()
assert.Nil(t, cfg.Validate())
}
func TestDefaultDrainTimeout(t *testing.T) {
cfg := Config{
Limits: Limits{
HeartbeatTimeout: Duration(40 * time.Second),
},
}
cfg.SetDefaults()
assert.Equal(t, 20*time.Second, *cfg.Limits.MaxPollInterval)
assert.Equal(t, 20*time.Second, *cfg.Limits.DrainInterval)
cfg = Config{
Limits: Limits{
HeartbeatTimeout: Duration(40 * time.Second),
QueueTimeout: Duration(15 * time.Second),
},
}
cfg.SetDefaults()
assert.Equal(t, 20*time.Second, *cfg.Limits.MaxPollInterval)
assert.Equal(t, 15*time.Second, *cfg.Limits.DrainInterval)
}
func TestUnmarshal_fullyPopulated(t *testing.T) {
const y = `
baseKafkaConfig:
bootstrap.servers: localhost:9093
producerKafkaConfig:
compression.type: lz4
leaderTopic: leader-topic
leaderGroupID: leader-group-id
dataSource: data-source
outboxTable: outbox-table
limits:
ioErrorBackoff: 10ms
pollDuration: 20ms
minPollInterval: 30ms
maxPollInterval: 35ms
heartbeatTimeout: 15ms
drainInterval: 32ms
queueTimeout: 40ms
markBackoff: 50ms
maxInFlightRecords: 60
sendConcurrency: 70
sendBuffer: 80
minMetricsInterval: 90ms
name: test-name
`
cfg, err := Unmarshal([]byte(y))
require.Nil(t, err)
if !assert.Nil(t, cfg.Validate()) {
t.Errorf("Validation error: %s", cfg.Validate().Error())
}
exp := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9093",
},
ProducerKafkaConfig: KafkaConfigMap{
"compression.type": "lz4",
},
LeaderTopic: "leader-topic",
LeaderGroupID: "leader-group-id",
DataSource: "data-source",
OutboxTable: "outbox-table",
Limits: Limits{
IOErrorBackoff: Duration(10 * time.Millisecond),
PollDuration: Duration(20 * time.Millisecond),
MinPollInterval: Duration(30 * time.Millisecond),
MaxPollInterval: Duration(35 * time.Millisecond),
HeartbeatTimeout: Duration(15 * time.Millisecond),
DrainInterval: Duration(32 * time.Millisecond),
QueueTimeout: Duration(40 * time.Millisecond),
MarkBackoff: Duration(50 * time.Millisecond),
MaxInFlightRecords: Int(60),
SendConcurrency: Int(70),
SendBuffer: Int(80),
MinMetricsInterval: Duration(90 * time.Millisecond),
},
Name: "test-name",
}
exp.SetDefaults()
ignoreFields := cmpopts.IgnoreFields(
Config{},
"KafkaConsumerProvider", "KafkaProducerProvider", "DatabaseBindingProvider", "NeliProvider", "Scribe",
)
assert.True(t, cmp.Equal(exp, cfg, ignoreFields), "Diff: %v", cmp.Diff(exp, cfg, ignoreFields))
}
func TestUnmarshal_empty(t *testing.T) {
const y = ``
cfg, err := Unmarshal([]byte(y))
assert.Nil(t, err)
if !assert.Nil(t, cfg.Validate()) {
t.Errorf("Validation error: %s", cfg.Validate().Error())
}
exp := Config{}
exp.SetDefaults()
ignoreFields := cmpopts.IgnoreFields(
Config{},
"KafkaConsumerProvider", "KafkaProducerProvider", "DatabaseBindingProvider", "NeliProvider", "Scribe", "Name",
)
assert.True(t, cmp.Equal(exp, cfg, ignoreFields), "Diff: %v", cmp.Diff(exp, cfg, ignoreFields))
}
================================================
FILE: db.go
================================================
package goharvest
import (
"fmt"
"time"
"github.com/google/uuid"
)
// KafkaHeader is a key-value tuple representing a single header entry.
type KafkaHeader struct {
Key string
Value string
}
// String obtains a textual representation of a KafkaHeader.
func (h KafkaHeader) String() string {
return h.Key + ":" + h.Value
}
// KafkaHeaders is a slice of KafkaHeader tuples.
type KafkaHeaders []KafkaHeader
// OutboxRecord depicts a single entry in the outbox table. It can be used for both reading and writing operations.
type OutboxRecord struct {
ID int64
CreateTime time.Time
KafkaTopic string
KafkaKey string
KafkaValue *string
KafkaHeaders KafkaHeaders
LeaderID *uuid.UUID
}
// String is a convenience function that returns a pointer to the given str argument, for use with setting OutboxRecord.Value.
func String(str string) *string {
return &str
}
// String provides a textual representation of an OutboxRecord.
func (rec OutboxRecord) String() string {
return fmt.Sprint("OutboxRecord[ID=", rec.ID,
", CreateTime=", rec.CreateTime,
", KafkaTopic=", rec.KafkaTopic,
", KafkaKey=", rec.KafkaKey,
", KafkaValue=", rec.KafkaValue,
", KafkaHeaders=", rec.KafkaHeaders,
", LeaderID=", rec.LeaderID, "]")
}
// DatabaseBinding is an abstraction over the data access layer, allowing goharvest to use arbitrary database implementations.
type DatabaseBinding interface {
Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error)
Purge(id int64) (bool, error)
Reset(id int64) (bool, error)
Dispose()
}
// DatabaseBindingProvider is a factory for creating instances of a DatabaseBinding.
type DatabaseBindingProvider func(dataSource string, outboxTable string) (DatabaseBinding, error)
================================================
FILE: db_mock_test.go
================================================
package goharvest
import (
"github.com/google/uuid"
"github.com/obsidiandynamics/libstdgo/concurrent"
)
type dbMockFuncs struct {
Mark func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error)
Purge func(m *dbMock, id int64) (bool, error)
Reset func(m *dbMock, id int64) (bool, error)
Dispose func(m *dbMock)
}
type dbMockCounts struct {
Mark,
Purge,
Reset,
Dispose concurrent.AtomicCounter
}
type dbMock struct {
markedRecords chan []OutboxRecord
f dbMockFuncs
c dbMockCounts
}
func (m *dbMock) Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
defer m.c.Mark.Inc()
return m.f.Mark(m, leaderID, limit)
}
func (m *dbMock) Purge(id int64) (bool, error) {
defer m.c.Purge.Inc()
return m.f.Purge(m, id)
}
func (m *dbMock) Reset(id int64) (bool, error) {
defer m.c.Reset.Inc()
return m.f.Reset(m, id)
}
func (m *dbMock) Dispose() {
defer m.c.Dispose.Inc()
m.f.Dispose(m)
}
func (m *dbMock) fillDefaults() {
if m.markedRecords == nil {
m.markedRecords = make(chan []OutboxRecord)
}
if m.f.Mark == nil {
m.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
select {
case records := <-m.markedRecords:
return records, nil
default:
return []OutboxRecord{}, nil
}
}
}
if m.f.Purge == nil {
m.f.Purge = func(m *dbMock, id int64) (bool, error) {
return true, nil
}
}
if m.f.Reset == nil {
m.f.Reset = func(m *dbMock, id int64) (bool, error) {
return true, nil
}
}
if m.f.Dispose == nil {
m.f.Dispose = func(m *dbMock) {}
}
m.c.Mark = concurrent.NewAtomicCounter()
m.c.Purge = concurrent.NewAtomicCounter()
m.c.Reset = concurrent.NewAtomicCounter()
m.c.Dispose = concurrent.NewAtomicCounter()
}
func mockDatabaseBindingProvider(m *dbMock) func(string, string) (DatabaseBinding, error) {
return func(dataSource string, table string) (DatabaseBinding, error) {
return m, nil
}
}
================================================
FILE: event.go
================================================
package goharvest
import (
"fmt"
"github.com/google/uuid"
"github.com/obsidiandynamics/goharvest/metric"
)
// EventHandler is a callback function for handling GoHarvest events.
type EventHandler func(e Event)
// Event encapsulates a GoHarvest event.
type Event interface {
fmt.Stringer
}
// LeaderAcquired is emitted upon successful acquisition of leader status.
type LeaderAcquired struct {
leaderID uuid.UUID
}
// String obtains a textual representation of the LeaderAcquired event.
func (e LeaderAcquired) String() string {
return fmt.Sprint("LeaderAcquired[leaderID=", e.leaderID, "]")
}
// LeaderID returns the local UUID of the elected leader.
func (e LeaderAcquired) LeaderID() uuid.UUID {
return e.leaderID
}
// LeaderRefreshed is emitted when a new leader ID is generated as a result of a remarking request.
type LeaderRefreshed struct {
leaderID uuid.UUID
}
// String obtains a textual representation of the LeaderRefreshed event.
func (e LeaderRefreshed) String() string {
return fmt.Sprint("LeaderRefreshed[leaderID=", e.leaderID, "]")
}
// LeaderID returns the local UUID of the elected leader.
func (e LeaderRefreshed) LeaderID() uuid.UUID {
return e.leaderID
}
// LeaderRevoked is emitted when the leader status has been revoked.
type LeaderRevoked struct{}
// String obtains a textual representation of the LeaderRevoked event.
func (e LeaderRevoked) String() string {
return fmt.Sprint("LeaderRevoked[]")
}
// LeaderFenced is emitted when the leader status has been revoked.
type LeaderFenced struct{}
// String obtains a textual representation of the LeaderFenced event.
func (e LeaderFenced) String() string {
return fmt.Sprint("LeaderFenced[]")
}
// MeterRead is emitted when the internal throughput Meter has been read.
type MeterRead struct {
stats metric.MeterStats
}
// String obtains a textual representation of the MeterRead event.
func (e MeterRead) String() string {
return fmt.Sprint("MeterRead[stats=", e.stats, "]")
}
// Stats embedded in the MeterRead event.
func (e MeterRead) Stats() metric.MeterStats {
return e.stats
}
================================================
FILE: event_test.go
================================================
package goharvest
import (
"testing"
"github.com/google/uuid"
"github.com/obsidiandynamics/goharvest/metric"
"github.com/stretchr/testify/assert"
)
func TestLeaderAcquired_string(t *testing.T) {
leaderID, _ := uuid.NewRandom()
assert.Contains(t, LeaderAcquired{leaderID}.String(), "LeaderAcquired[")
assert.Contains(t, LeaderAcquired{leaderID}.String(), leaderID.String())
}
func TestLeaderAcquired_getter(t *testing.T) {
leaderID, _ := uuid.NewRandom()
e := LeaderAcquired{leaderID}
assert.Equal(t, leaderID, e.LeaderID())
}
func TestLeaderRefreshed_string(t *testing.T) {
leaderID, _ := uuid.NewRandom()
assert.Contains(t, LeaderRefreshed{leaderID}.String(), "LeaderRefreshed[")
assert.Contains(t, LeaderRefreshed{leaderID}.String(), leaderID.String())
}
func TestLeaderRefreshed_getter(t *testing.T) {
leaderID, _ := uuid.NewRandom()
e := LeaderRefreshed{leaderID}
assert.Equal(t, leaderID, e.LeaderID())
}
func TestLeaderRevoked_string(t *testing.T) {
assert.Equal(t, "LeaderRevoked[]", LeaderRevoked{}.String())
}
func TestLeaderFenced_string(t *testing.T) {
assert.Equal(t, "LeaderFenced[]", LeaderFenced{}.String())
}
func TestMeterStats_string(t *testing.T) {
stats := metric.MeterStats{}
assert.Contains(t, MeterRead{stats}.String(), "MeterRead[")
assert.Contains(t, MeterRead{stats}.String(), stats.String())
}
================================================
FILE: examples/reaper.yaml
================================================
harvest:
baseKafkaConfig:
bootstrap.servers: localhost:9092
producerKafkaConfig:
compression.type: lz4
delivery.timeout.ms: 10000
leaderTopic: my-app-name
leaderGroupID: my-app-name
dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable
outboxTable: outbox
limits:
minPollInterval: 1s
heartbeatTimeout: 5s
maxInFlightRecords: 1000
minMetricsInterval: 5s
sendConcurrency: 4
sendBuffer: 10
logging:
level: Debug
================================================
FILE: examples/reaper_secure.yaml
================================================
harvest:
baseKafkaConfig:
bootstrap.servers: localhost:9094
security.protocol: sasl_ssl
ssl.ca.location: ca-cert.pem
sasl.mechanism: SCRAM-SHA-512
sasl.username: alice
sasl.password: alice-secret
leaderTopic: __consumer_offsets
leaderGroupID: my-app-name
dataSource: host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable
outboxTable: outbox
logging:
level: Debug
================================================
FILE: go.mod
================================================
module github.com/obsidiandynamics/goharvest
go 1.14
require (
github.com/DATA-DOG/go-sqlmock v1.4.1
github.com/confluentinc/confluent-kafka-go v1.5.2 // indirect
github.com/go-ozzo/ozzo-validation v3.6.0+incompatible
github.com/google/go-cmp v0.4.0
github.com/google/uuid v1.1.1
github.com/lib/pq v1.5.1
github.com/obsidiandynamics/goneli v0.4.3
github.com/obsidiandynamics/libstdgo v0.4.1
github.com/sirupsen/logrus v1.5.0
github.com/stretchr/testify v1.5.1
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f // indirect
gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2
gopkg.in/yaml.v2 v2.2.8
)
================================================
FILE: go.sum
================================================
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM=
github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496 h1:zV3ejI06GQ59hwDQAvmK1qxOQGB3WuVTRoY0okPTAv0=
github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg=
github.com/cihub/seelog v0.0.0-20170130134532-f561c5e57575 h1:kHaBemcxl8o/pQ5VM1c8PVE1PubbNx3mjUr09OqWGCs=
github.com/cihub/seelog v0.0.0-20170130134532-f561c5e57575/go.mod h1:9d6lWj8KzO/fd/NrVaLscBKmPigpZpn5YawRPw+e3Yo=
github.com/confluentinc/confluent-kafka-go v1.5.2 h1:l+qt+a0Okmq0Bdr1P55IX4fiwFJyg0lZQmfHkAFkv7E=
github.com/confluentinc/confluent-kafka-go v1.5.2/go.mod h1:u2zNLny2xq+5rWeTQjFHbDzzNuba4P1vo31r9r4uAdg=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-ozzo/ozzo-validation v3.6.0+incompatible h1:msy24VGS42fKO9K1vLz82/GeYW1cILu7Nuuj1N3BBkE=
github.com/go-ozzo/ozzo-validation v3.6.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU=
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/inconshreveable/log15 v0.0.0-20200109203555-b30bc20e4fd1 h1:KUDFlmBg2buRWNzIcwLlKvfcnujcHQRQ1As1LoaCLAM=
github.com/inconshreveable/log15 v0.0.0-20200109203555-b30bc20e4fd1/go.mod h1:cOaXtrgN4ScfRrD9Bre7U1thNq5RtJ8ZoP4iXVGRj6o=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v1.5.1 h1:Jn6HYxiYrtQ92CopqJLvfPCJUrrruw1+1cn0jM9dKrI=
github.com/lib/pq v1.5.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/obsidiandynamics/goneli v0.4.3 h1:lf3x/qSgEX9S6+Ak5GPcc3TBUQBhPJeiWvGrCykZcbM=
github.com/obsidiandynamics/goneli v0.4.3/go.mod h1:1i3mTL/PaaDKu6f+hlndeRUCbV8uiDxu+203vBpn6oE=
github.com/obsidiandynamics/libstdgo v0.4.1 h1:ZUnz+72xQSMgAjEqxp7i7NOBZlu6AcAE6ppmvVKxK3M=
github.com/obsidiandynamics/libstdgo v0.4.1/go.mod h1:0gKiFsJhfrlCqbWFNhDDUJgj6XbXWZyrl0JS/C+jU5g=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q=
github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
go.uber.org/zap v1.14.1 h1:nYDKopTbvAPq/NrUVZwT15y2lpROBiLLyoRTbXOYWOo=
go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY=
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5 h1:hKsoRgsbwY1NafxrwTs+k64bikrLBkAgPir1TNCj3Zs=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200417140056-c07e33ef3290/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/confluentinc/confluent-kafka-go.v1 v1.4.0 h1:70Hht0HKadDe6GpSgstEtYrDMtHo3ZqK+3KeHepusaw=
gopkg.in/confluentinc/confluent-kafka-go.v1 v1.4.0/go.mod h1:ZdI3yfYmdNSLQPNCpO1y00EHyWaHG5EnQEyL/ntAegY=
gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2 h1:g0WBLy6fobNUU8W/e9zx6I0Yl79Ya+BDW1NwzAlTiiQ=
gopkg.in/confluentinc/confluent-kafka-go.v1 v1.5.2/go.mod h1:ZdI3yfYmdNSLQPNCpO1y00EHyWaHG5EnQEyL/ntAegY=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
================================================
FILE: goharvest_doc_test.go
================================================
package goharvest
import (
"database/sql"
"log"
"testing"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/obsidiandynamics/libstdgo/scribe"
scribelogrus "github.com/obsidiandynamics/libstdgo/scribe/logrus"
"github.com/sirupsen/logrus"
)
func Example() {
const dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable"
// Optional: Ensure the database table exists before we start harvesting.
func() {
db, err := sql.Open("postgres", dataSource)
if err != nil {
panic(err)
}
defer db.Close()
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS outbox (
id BIGSERIAL PRIMARY KEY,
create_time TIMESTAMP WITH TIME ZONE NOT NULL,
kafka_topic VARCHAR(249) NOT NULL,
kafka_key VARCHAR(100) NOT NULL, -- pick your own key size
kafka_value VARCHAR(10000), -- pick your own value size
kafka_header_keys TEXT[] NOT NULL,
kafka_header_values TEXT[] NOT NULL,
leader_id UUID
)
`)
if err != nil {
panic(err)
}
}()
// Configure the harvester. It will use its own database and Kafka connections under the hood.
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
DataSource: dataSource,
}
// Create a new harvester.
harvest, err := New(config)
if err != nil {
panic(err)
}
// Start it.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for it to end.
log.Fatal(harvest.Await())
}
func TestExample(t *testing.T) {
check.RunTargetted(t, Example)
}
func Example_withCustomLogger() {
// Example: Configure GoHarvest with a Logrus binding for Scribe.
log := logrus.StandardLogger()
log.SetLevel(logrus.DebugLevel)
// Configure the custom logger using a binding.
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
Scribe: scribe.New(scribelogrus.Bind()),
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
}
// Create a new harvester.
harvest, err := New(config)
if err != nil {
panic(err)
}
// Start it.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for it to end.
log.Fatal(harvest.Await())
}
func TestExample_withCustomLogger(t *testing.T) {
check.RunTargetted(t, Example_withCustomLogger)
}
func Example_withSaslSslAndCustomProducerConfig() {
// Example: Using Kafka with sasl_ssl for authentication and encryption.
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9094",
"security.protocol": "sasl_ssl",
"ssl.ca.location": "ca-cert.pem",
"sasl.mechanism": "SCRAM-SHA-512",
"sasl.username": "alice",
"sasl.password": "alice-secret",
},
ProducerKafkaConfig: KafkaConfigMap{
"compression.type": "lz4",
},
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
}
// Create a new harvester.
harvest, err := New(config)
if err != nil {
panic(err)
}
// Start harvesting in the background.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for the harvester to end.
log.Fatal(harvest.Await())
}
func TestExample_withSaslSslAndCustomProducerConfig(t *testing.T) {
check.RunTargetted(t, Example_withSaslSslAndCustomProducerConfig)
}
func Example_withEventHandler() {
// Example: Registering a custom event handler to get notified of leadership changes and metrics.
log := logrus.StandardLogger()
log.SetLevel(logrus.TraceLevel)
config := Config{
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
},
DataSource: "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable",
Scribe: scribe.New(scribelogrus.Bind()),
}
// Create a new harvester and register an event hander.
harvest, err := New(config)
if err != nil {
panic(err)
}
// Register a handler callback, invoked when an event occurs within goharvest.
// The callback is completely optional; it lets the application piggy-back on leader
// status updates, in case it needs to schedule some additional work (other than
// harvesting outbox records) that should only be run on one process at any given time.
harvest.SetEventHandler(func(e Event) {
switch event := e.(type) {
case LeaderAcquired:
// The application may initialise any state necessary to perform work as a leader.
log.Infof("Got event: leader acquired: %v", event.LeaderID())
case LeaderRefreshed:
// Indicates that a new leader ID was generated, as a result of having to remark
// a record (typically as due to an earlier delivery error). This is purely
// informational; there is nothing an application should do about this, other
// than taking note of the new leader ID if it has come to rely on it.
log.Infof("Got event: leader refreshed: %v", event.LeaderID())
case LeaderRevoked:
// The application may block the callback until it wraps up any in-flight
// activity. Only upon returning from the callback, will a new leader be elected.
log.Infof("Got event: leader revoked")
case LeaderFenced:
// The application must immediately terminate any ongoing activity, on the assumption
// that another leader may be imminently elected. Unlike the handling of LeaderRevoked,
// blocking in the callback will not prevent a new leader from being elected.
log.Infof("Got event: leader fenced")
case MeterRead:
// Periodic statistics regarding the harvester's throughput.
log.Infof("Got event: meter read: %v", event.Stats())
}
})
// Start harvesting in the background.
err = harvest.Start()
if err != nil {
panic(err)
}
// Wait indefinitely for it to end.
log.Fatal(harvest.Await())
}
func TestExample_withEventHandler(t *testing.T) {
check.RunTargetted(t, Example_withEventHandler)
}
================================================
FILE: harvest.go
================================================
package goharvest
import (
"fmt"
"runtime/debug"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
"github.com/obsidiandynamics/goharvest/metric"
"github.com/obsidiandynamics/goneli"
"github.com/obsidiandynamics/libstdgo/concurrent"
"github.com/obsidiandynamics/libstdgo/diags"
"github.com/obsidiandynamics/libstdgo/scribe"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
_ "gopkg.in/confluentinc/confluent-kafka-go.v1/kafka/librdkafka"
)
var noLeader uuid.UUID
// State of the Harvest instance.
type State int
const (
// Created — initialised (configured) but not started.
Created State = iota
// Running — currently running.
Running
// Stopping — in the process of being stopped. I.e. Stop() has been invoked, but workers are still running.
Stopping
// Stopped — has been completely disposed of.
Stopped
)
type tracedPanic struct {
cause interface{}
stack string
}
func (e tracedPanic) Error() string {
return fmt.Sprintf("%v\n%s", e.cause, e.stack)
}
// Harvest performs background harvesting of a transactional outbox table.
type Harvest interface {
Start() error
Stop()
Await() error
State() State
IsLeader() bool
LeaderID() *uuid.UUID
InFlightRecords() int
InFlightRecordKeys() []string
SetEventHandler(eventHandler EventHandler)
}
const watcherTimeout = 60 * time.Second
type harvest struct {
config Config
producerConfigs KafkaConfigMap
scribe scribe.Scribe
state concurrent.AtomicReference
shouldBeRunningFlag concurrent.AtomicCounter
neli goneli.Neli
leaderID atomic.Value
db DatabaseBinding
queuedRecords concurrent.AtomicCounter
inFlightRecords concurrent.AtomicCounter
inFlightKeys concurrent.Scoreboard
throughput *metric.Meter
throughputLock sync.Mutex
panicCause atomic.Value
eventHandler EventHandler
forceRemarkFlag concurrent.AtomicCounter
sendBattery battery
}
// New creates a new Harvest instance from the supplied config.
func New(config Config) (Harvest, error) {
config.SetDefaults()
if err := config.Validate(); err != nil {
return nil, err
}
h := &harvest{
config: config,
scribe: config.Scribe,
state: concurrent.NewAtomicReference(Created),
shouldBeRunningFlag: concurrent.NewAtomicCounter(1),
queuedRecords: concurrent.NewAtomicCounter(),
inFlightRecords: concurrent.NewAtomicCounter(),
inFlightKeys: concurrent.NewScoreboard(*config.Limits.SendConcurrency),
forceRemarkFlag: concurrent.NewAtomicCounter(),
eventHandler: func(e Event) {},
}
h.leaderID.Store(noLeader)
h.producerConfigs = copyKafkaConfig(h.config.BaseKafkaConfig)
putAllKafkaConfig(h.config.ProducerKafkaConfig, h.producerConfigs)
err := setKafkaConfigs(h.producerConfigs, KafkaConfigMap{
"enable.idempotence": true,
})
if err != nil {
return nil, err
}
return h, nil
}
// State obtains the present state of this Harvest instance.
func (h *harvest) State() State {
return h.state.Get().(State)
}
func (h *harvest) logger() scribe.StdLogAPI {
return h.scribe.Capture(h.scene())
}
func (h *harvest) scene() scribe.Scene {
return scribe.Scene{Fields: scribe.Fields{
"name": h.config.Name,
"lib": "goharvest",
}}
}
func (h *harvest) cleanupFailedStart() {
if h.State() != Created {
return
}
if h.db != nil {
h.db.Dispose()
}
}
// Start the harvester.
func (h *harvest) Start() error {
ensureState(h.State() == Created, "Cannot start at this time")
defer h.cleanupFailedStart()
db, err := h.config.DatabaseBindingProvider(h.config.DataSource, h.config.OutboxTable)
if err != nil {
return err
}
h.db = db
neliConfig := goneli.Config{
KafkaConfig: configToNeli(h.config.BaseKafkaConfig),
LeaderTopic: h.config.LeaderTopic,
LeaderGroupID: h.config.LeaderGroupID,
KafkaConsumerProvider: convertKafkaConsumerProvider(h.config.KafkaConsumerProvider),
KafkaProducerProvider: convertKafkaProducerProvider(h.config.KafkaProducerProvider),
Scribe: h.config.Scribe,
Name: h.config.Name,
PollDuration: h.config.Limits.PollDuration,
MinPollInterval: h.config.Limits.MinPollInterval,
HeartbeatTimeout: h.config.Limits.HeartbeatTimeout,
}
h.logger().T()("Creating NELI with config %v", neliConfig)
n, err := h.config.NeliProvider(neliConfig, func(e goneli.Event) {
switch e.(type) {
case goneli.LeaderAcquired:
h.onAcquired()
case goneli.LeaderRevoked:
h.onRevoked()
case goneli.LeaderFenced:
h.onFenced()
}
})
if err != nil {
return err
}
h.neli = n
h.throughput = metric.NewMeter("throughput", *h.config.Limits.MinMetricsInterval)
h.state.Set(Running)
go backgroundPoller(h)
return nil
}
// IsLeader returns true if the current Harvest is the leader among competing instances.
func (h *harvest) IsLeader() bool {
return h.LeaderID() != nil
}
// LeaderID returns the leader UUID of the current instance, if it is a leader at the time of this call.
// Otherwise, a nil is returned.
func (h *harvest) LeaderID() *uuid.UUID {
if stored := h.leaderID.Load().(uuid.UUID); stored != noLeader {
return &stored
}
return nil
}
// InFlightRecords returns the number of in-flight records; i.e. records that have been published on Kafka for which an
// acknowledgement is still pending.
func (h *harvest) InFlightRecords() int {
return h.inFlightRecords.GetInt()
}
// InFlightRecordKeys returns the keys of records that are still in-flight. For any given key, there will be at most one
// record pending acknowledgement.
func (h *harvest) InFlightRecordKeys() []string {
view := h.inFlightKeys.View()
keys := make([]string, len(view))
i := 0
for k := range view {
keys[i] = k
i++
}
return keys
}
// SetEventHandler assigns an optional event handler callback to be notified of changes in leader state as well as other
// events of interest.
//
// This method must be invoked prior to Start().
func (h *harvest) SetEventHandler(eventHandler EventHandler) {
ensureState(h.State() == Created, "Cannot set event handler at this time")
h.eventHandler = eventHandler
}
func (h *harvest) shouldBeRunning() bool {
return h.shouldBeRunningFlag.Get() == 1
}
func (h *harvest) reportPanic(goroutineName string) {
if r := recover(); r != nil {
h.logger().E()("Caught panic in %s: %v", goroutineName, r)
h.panicCause.Store(tracedPanic{r, string(debug.Stack())})
h.logger().E()(string(debug.Stack()))
h.Stop()
}
}
func ensureState(expected bool, format string, args ...interface{}) {
if !expected {
panic(fmt.Errorf("state assertion failed: "+format, args...))
}
}
func backgroundPoller(h *harvest) {
h.logger().I()("Starting background poller")
defer h.logger().I()("Stopped")
defer h.state.Set(Stopped)
defer h.reportPanic("background poller")
defer h.db.Dispose()
defer h.neli.Close()
defer h.shutdownSendBattery()
defer h.state.Set(Stopping)
defer h.logger().I()("Stopping")
for h.shouldBeRunning() {
isLeader, err := h.neli.Pulse(1 * time.Millisecond)
if err != nil {
panic(err)
}
if isLeader {
if h.forceRemarkFlag.Get() == 1 {
h.logger().D()("Remark requested")
h.shutdownSendBattery()
h.refreshLeader()
}
if h.sendBattery == nil {
inFlightRecordsValue := h.inFlightRecords.Get()
ensureState(inFlightRecordsValue == 0, "inFlightRecords=%d", inFlightRecordsValue)
inFlightKeysView := h.inFlightKeys.View()
ensureState(len(inFlightKeysView) == 0, "inFlightKeys=%d", inFlightKeysView)
h.spawnSendBattery()
}
onLeaderPoll(h)
}
}
}
func (h *harvest) spawnSendBattery() {
ensureState(h.sendBattery == nil, "send battery not nil before spawn")
h.logger().D()("Spawning send battery")
h.sendBattery = newConcurrentBattery(*h.config.Limits.SendConcurrency, *h.config.Limits.SendBuffer, func(records chan OutboxRecord) {
defer h.reportPanic("send cell")
h.logger().T()("Creating Kafka producer with config %v", h.producerConfigs)
prod, err := h.config.KafkaProducerProvider(&h.producerConfigs)
if err != nil {
panic(err)
}
deliveryHandlerDone := make(chan int)
go backgroundDeliveryHandler(h, prod, deliveryHandlerDone)
defer func() {
<-deliveryHandlerDone
}()
defer func() {
go func() {
// A bug in confluent-kafka-go (#463) occasionally causes an indefinite syscall hang in Close(), after it closes
// the Events channel. So we delegate this to a separate goroutine — better an orphaned goroutine than a
// frozen harvester. (The rest of the battery will still unwind normally.)
closeWatcher := h.watch("close producer")
prod.Close()
closeWatcher.End()
}()
}()
var lastID *int64
for rec := range records {
ensureState(lastID == nil || rec.ID >= *lastID, "discontinuity for key %s: ID %s, lastID: %v", rec.KafkaKey, rec.ID, lastID)
lastID = &rec.ID
m := &kafka.Message{
TopicPartition: kafka.TopicPartition{Topic: &rec.KafkaTopic, Partition: kafka.PartitionAny},
Key: []byte(rec.KafkaKey),
Value: stringPointerToByteArray(rec.KafkaValue),
Opaque: rec,
Headers: toNativeKafkaHeaders(rec.KafkaHeaders),
}
h.inFlightRecords.Drain(int64(*h.config.Limits.MaxInFlightRecords-1), concurrent.Indefinitely)
startTime := time.Now()
for {
if h.deadlineExceeded("poll", h.neli.Deadline().Elapsed(), *h.config.Limits.MaxPollInterval) {
break
}
if h.deadlineExceeded("message queueing", time.Now().Sub(startTime), *h.config.Limits.QueueTimeout) {
break
}
if remaining := h.inFlightKeys.Drain(rec.KafkaKey, 0, *h.config.Limits.DrainInterval); remaining <= 0 {
ensureState(remaining == 0, "drain failed: %d remaining in-flight records for key %s", remaining, rec.KafkaKey)
break
}
h.logger().D()("Drain stalled for record %d (key %s)", rec.ID, rec.KafkaKey)
}
if h.forceRemarkFlag.Get() == 1 {
h.queuedRecords.Dec()
continue
}
h.inFlightRecords.Inc()
h.queuedRecords.Dec()
h.inFlightKeys.Inc(rec.KafkaKey)
err := prod.Produce(m, nil)
if err != nil {
h.logger().W()("Error publishing record %v: %v", rec, err)
h.inFlightKeys.Dec(rec.KafkaKey)
h.inFlightRecords.Dec()
h.forceRemarkFlag.Set(1)
}
}
})
}
func stringPointerToByteArray(str *string) []byte {
if str != nil {
return []byte(*str)
}
return nil
}
func (h *harvest) shutdownSendBattery() {
if h.sendBattery != nil {
shutdownWatcher := h.watch("shutdown send battery")
h.logger().D()("Shutting down send battery")
// Expedite shutdown by raising the remark flag, forcing any queued records to be skipped.
h.forceRemarkFlag.Set(1)
// Take the battery down, waiting for all goroutines to complete.
h.sendBattery.shutdown()
h.sendBattery = nil
// Reset flags and counters for next time.
h.forceRemarkFlag.Set(0)
h.inFlightRecords.Set(0)
h.inFlightKeys.Clear()
h.logger().D()("Send battery terminated")
shutdownWatcher.End()
}
}
func onLeaderPoll(h *harvest) {
markBegin := time.Now()
records, err := h.db.Mark(*h.LeaderID(), *h.config.Limits.MarkQueryRecords)
if err != nil {
h.logger().W()("Error executing mark query: %v", err)
// When an error occurs during marking, we cannot just backoff and retry, as the error could have
// occurred on the return leg (i.e. DB operation succeeded on the server, but timed out on the client).
h.forceRemarkFlag.Set(1)
time.Sleep(*h.config.Limits.IOErrorBackoff)
return
}
if len(records) > 0 {
sendBegin := time.Now()
h.logger().T()("Leader poll: marked %d in the range %d-%d, took %v",
len(records), records[0].ID, records[len(records)-1].ID, sendBegin.Sub(markBegin))
enqueueWatcher := h.watch("enqueue marked records")
for _, rec := range records {
h.queuedRecords.Inc()
h.sendBattery.enqueue(rec)
}
enqueueWatcher.End()
h.logger().T()("Send took %v", time.Now().Sub(sendBegin))
} else {
time.Sleep(*h.config.Limits.MarkBackoff)
}
}
func (h *harvest) watch(operation string) *diags.Watcher {
return diags.Watch(operation, watcherTimeout, diags.Print(h.logger().W()))
}
func (h *harvest) refreshLeader() {
newLeaderID, _ := uuid.NewRandom()
h.leaderID.Store(newLeaderID)
h.logger().W()("Refreshed leader ID: %v", newLeaderID)
h.eventHandler(LeaderRefreshed{newLeaderID})
}
func (h *harvest) deadlineExceeded(deadline string, elapsed time.Duration, threshold time.Duration) bool {
if excess := elapsed - threshold; excess > 0 {
if h.forceRemarkFlag.CompareAndSwap(0, 1) {
h.logger().W()("Exceeded %s deadline by %v", deadline, excess)
}
return true
}
return false
}
func backgroundDeliveryHandler(h *harvest, prod KafkaProducer, done chan int) {
h.logger().I()("Starting background delivery handler")
defer h.reportPanic("background delivery handler")
defer close(done)
for e := range prod.Events() {
switch ev := e.(type) {
case *kafka.Message:
rec := ev.Opaque.(OutboxRecord)
if ev.TopicPartition.Error != nil {
onFailedDelivery(h, rec, ev.TopicPartition.Error)
} else {
onSuccessfulDelivery(h, rec)
h.updateStats()
}
default:
h.logger().I()("Observed event: %v (%T)", e, e)
}
}
}
func (h *harvest) updateStats() {
h.throughputLock.Lock()
defer h.throughputLock.Unlock()
h.throughput.MaybeStatsCall(func(stats metric.MeterStats) {
h.logger().D()("%v", stats)
h.eventHandler(MeterRead{stats})
})
h.throughput.Add(1)
}
func onSuccessfulDelivery(h *harvest, rec OutboxRecord) {
for {
done, err := h.db.Purge(rec.ID)
if err == nil {
if !done {
h.logger().W()("Did not purge record %v", rec)
}
break
}
h.logger().W()("Error executing purge query for record %v: %v", rec, err)
time.Sleep(*h.config.Limits.IOErrorBackoff)
}
h.inFlightKeys.Dec(rec.KafkaKey)
h.inFlightRecords.Dec()
}
func onFailedDelivery(h *harvest, rec OutboxRecord, err error) {
h.logger().W()("Delivery failed for %v, err: %v", rec, err)
for {
done, err := h.db.Reset(rec.ID)
if err == nil {
if !done {
h.logger().W()("Did not reset record %v", rec)
} else {
h.forceRemarkFlag.Set(1)
}
break
}
h.logger().W()("Error executing reset query for record %v: %v", rec, err)
time.Sleep(*h.config.Limits.IOErrorBackoff)
}
h.inFlightKeys.Dec(rec.KafkaKey)
h.inFlightRecords.Dec()
}
func (h *harvest) onAcquired() {
newLeaderID, _ := uuid.NewRandom()
h.leaderID.Store(newLeaderID)
h.logger().I()("Elected as leader, ID: %v", newLeaderID)
h.eventHandler(LeaderAcquired{newLeaderID})
}
func (h *harvest) onRevoked() {
h.logger().I()("Lost leader status")
h.cleanupLeaderState()
h.eventHandler(LeaderRevoked{})
}
func (h *harvest) onFenced() {
h.logger().W()("Leader fenced")
h.cleanupLeaderState()
h.eventHandler(LeaderFenced{})
}
func (h *harvest) cleanupLeaderState() {
h.shutdownSendBattery()
h.leaderID.Store(noLeader)
}
// Stop the harvester, returning immediately.
//
// This method does not wait until the underlying Goroutines have been terminated
// and all resources have been disposed off properly. This is accomplished by calling Await()
func (h *harvest) Stop() {
h.shouldBeRunningFlag.Set(0)
}
// Await the termination of this Harvest instance.
//
// This method blocks indefinitely, returning only when this instance has completed an orderly shutdown. I.e.
// when all Goroutines have returned and all resources have been disposed of.
func (h *harvest) Await() error {
h.state.Await(concurrent.RefEqual(Stopped), concurrent.Indefinitely)
panicCause := h.panicCause.Load()
if panicCause != nil {
return panicCause.(tracedPanic)
}
return nil
}
================================================
FILE: harvest_test.go
================================================
package goharvest
import (
"fmt"
"math"
"strconv"
"sync"
"testing"
"time"
"github.com/google/uuid"
"github.com/obsidiandynamics/goneli"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/obsidiandynamics/libstdgo/concurrent"
"github.com/obsidiandynamics/libstdgo/scribe"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
)
func wait(t check.Tester) check.Timesert {
return check.Wait(t, 10*time.Second)
}
// Aggressive limits used for (fast) testing and without send concurrency to simplify assertions.
func testLimits() Limits {
return Limits{
IOErrorBackoff: Duration(1 * time.Millisecond),
PollDuration: Duration(1 * time.Millisecond),
MinPollInterval: Duration(1 * time.Millisecond),
MaxPollInterval: Duration(60 * time.Second),
HeartbeatTimeout: Duration(60 * time.Second),
DrainInterval: Duration(60 * time.Second),
QueueTimeout: Duration(60 * time.Second),
MarkBackoff: Duration(1 * time.Millisecond),
MaxInFlightRecords: Int(math.MaxInt64),
SendConcurrency: Int(1),
SendBuffer: Int(0),
}
}
type fixtures struct {
producerMockSetup producerMockSetup
}
func (f *fixtures) setDefaults() {
if f.producerMockSetup == nil {
f.producerMockSetup = func(prodMock *prodMock) {}
}
}
type producerMockSetup func(prodMock *prodMock)
func (f fixtures) create() (scribe.MockScribe, *dbMock, *goneli.MockNeli, Config) {
f.setDefaults()
m := scribe.NewMock()
db := &dbMock{}
db.fillDefaults()
var neli goneli.MockNeli
config := Config{
Limits: testLimits(),
Scribe: scribe.New(m.Factories()),
DatabaseBindingProvider: mockDatabaseBindingProvider(db),
NeliProvider: func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) {
n, err := goneli.NewMock(goneli.MockConfig{
MinPollInterval: config.MinPollInterval,
}, barrier)
if err != nil {
panic(err)
}
neli = n
return n, nil
},
KafkaProducerProvider: func(conf *KafkaConfigMap) (KafkaProducer, error) {
prod := &prodMock{}
prod.fillDefaults()
f.producerMockSetup(prod)
return prod, nil
},
}
config.Scribe.SetEnabled(scribe.All)
return m, db, &neli, config
}
type testEventHandler struct {
mutex sync.Mutex
events []Event
}
func (c *testEventHandler) handler() EventHandler {
return func(e Event) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.events = append(c.events, e)
}
}
func (c *testEventHandler) list() []Event {
c.mutex.Lock()
defer c.mutex.Unlock()
eventsCopy := make([]Event, len(c.events))
copy(eventsCopy, c.events)
return eventsCopy
}
func (c *testEventHandler) length() int {
c.mutex.Lock()
defer c.mutex.Unlock()
return len(c.events)
}
func TestCorrectInitialisation(t *testing.T) {
_, db, neli, config := fixtures{}.create()
var givenDataSource string
var givenOutboxTable string
config.DatabaseBindingProvider = func(dataSource string, outboxTable string) (DatabaseBinding, error) {
givenDataSource = dataSource
givenOutboxTable = outboxTable
return db, nil
}
config.DataSource = "test data source"
config.OutboxTable = "test table name"
config.LeaderGroupID = "test leader group ID"
config.BaseKafkaConfig = KafkaConfigMap{
"bootstrap.servers": "localhost:9092",
}
h, err := New(config)
require.Nil(t, err)
assert.Equal(t, Created, h.State())
assertNoError(t, h.Start)
assert.Equal(t, Running, h.State())
assert.Equal(t, config.DataSource, givenDataSource)
assert.Equal(t, config.OutboxTable, givenOutboxTable)
h.Stop()
assert.Nil(t, h.Await())
assert.Equal(t, Stopped, h.State())
assert.Equal(t, 1, db.c.Dispose.GetInt())
assert.Equal(t, goneli.Closed, (*neli).State())
}
func TestConfigError(t *testing.T) {
h, err := New(Config{
Limits: Limits{
IOErrorBackoff: Duration(-1),
},
})
assert.Nil(t, h)
assert.NotNil(t, err)
}
func TestErrorDuringDBInitialisation(t *testing.T) {
_, _, _, config := fixtures{}.create()
config.DatabaseBindingProvider = func(dataSource string, outboxTable string) (DatabaseBinding, error) {
return nil, check.ErrSimulated
}
h, err := New(config)
require.Nil(t, err)
assertErrorContaining(t, h.Start, "simulated")
assert.Equal(t, Created, h.State())
}
func TestErrorDuringNeliInitialisation(t *testing.T) {
_, db, _, config := fixtures{}.create()
config.NeliProvider = func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) {
return nil, check.ErrSimulated
}
h, err := New(config)
require.Nil(t, err)
assertErrorContaining(t, h.Start, "simulated")
assert.Equal(t, Created, h.State())
assert.Equal(t, 1, db.c.Dispose.GetInt())
}
func TestErrorDuringProducerConfiguration(t *testing.T) {
_, _, _, config := fixtures{}.create()
config.ProducerKafkaConfig = KafkaConfigMap{
"enable.idempotence": false,
}
h, err := New(config)
require.NotNil(t, err)
assert.Contains(t, err.Error(), "cannot override configuration 'enable.idempotence'")
assert.Nil(t, h)
}
func TestErrorDuringProducerInitialisation(t *testing.T) {
m, db, neli, config := fixtures{}.create()
config.KafkaProducerProvider = func(conf *KafkaConfigMap) (KafkaProducer, error) {
return nil, check.ErrSimulated
}
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Induce leadership and wait until leader.
(*neli).AcquireLeader()
wait(t).Until(h.IsLeader)
wait(t).UntilAsserted(func(t check.Tester) {
assert.Equal(t, 1, eh.length())
})
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Error)).
Having(scribe.MessageEqual("Caught panic in send cell: simulated")).
Passes(scribe.Count(1)))
// Having detected a panic, it should self-destruct
assertErrorContaining(t, h.Await, "simulated")
assert.Equal(t, 1, db.c.Dispose.GetInt())
assert.Equal(t, (*neli).State(), goneli.Closed)
}
func TestUncaughtPanic_backgroundPoller(t *testing.T) {
m, _, neli, config := fixtures{}.create()
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
(*neli).PulseError(check.ErrSimulated)
// Having detected a panic, it should self-destruct
assertErrorContaining(t, h.Await, "simulated")
assert.Equal(t, 0, eh.length())
t.Log(m.Entries().List())
m.Entries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageEqual("Starting background poller")).
Assert(t, scribe.Count(1))
m.Entries().
Having(scribe.LogLevel(scribe.Error)).
Having(scribe.MessageEqual("Caught panic in background poller: simulated")).
Assert(t, scribe.Count(1))
}
func TestUncaughtPanic_backgroundDeliveryHandler(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) {
prodRef.Set(prodMock)
}}.create()
db.f.Reset = func(m *dbMock, id int64) (bool, error) {
panic(check.ErrSimulated)
}
h, err := New(config)
require.Nil(t, err)
assertNoError(t, h.Start)
// Induce leadership and await
(*neli).AcquireLeader()
wait(t).Until(h.IsLeader)
// Feed a delivery event to cause a DB reset query
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prodRef.Get().(*prodMock).events <- message(OutboxRecord{ID: 777}, check.ErrSimulated)
// Having detected a panic, it should self-destruct
assertErrorContaining(t, h.Await, "simulated")
t.Log(m.Entries().List())
m.Entries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageEqual("Starting background delivery handler")).
Assert(t, scribe.Count(1))
m.Entries().
Having(scribe.LogLevel(scribe.Error)).
Having(scribe.MessageEqual("Caught panic in background delivery handler: simulated")).
Assert(t, scribe.Count(1))
}
func TestBasicLeaderElectionAndRevocation(t *testing.T) {
m, _, neli, config := fixtures{}.create()
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Starts off in a non-leader state
assert.Equal(t, false, h.IsLeader())
assert.Nil(t, h.LeaderID())
// Assign leadership via the rebalance listener and wait for the assignment to take effect
(*neli).AcquireLeader()
wait(t).UntilAsserted(isTrue(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageEqual(fmt.Sprintf("Elected as leader, ID: %s", h.LeaderID()))).
Passes(scribe.Count(1)))
m.Reset()
wait(t).UntilAsserted(func(t check.Tester) {
if assert.Equal(t, 1, eh.length()) {
e := eh.list()[0].(LeaderAcquired)
assert.Equal(t, e.LeaderID(), *(h.LeaderID()))
}
})
// Revoke leadership via the rebalance listener and await its effect
(*neli).RevokeLeader()
wait(t).UntilAsserted(isFalse(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageEqual("Lost leader status")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
m.Reset()
wait(t).UntilAsserted(func(t check.Tester) {
if assert.Equal(t, 2, eh.length()) {
_ = eh.list()[1].(LeaderRevoked)
}
})
// Reassign leadership via the rebalance listener and wait for the assignment to take effect
(*neli).AcquireLeader()
wait(t).UntilAsserted(isTrue(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageEqual(fmt.Sprintf("Elected as leader, ID: %s", h.LeaderID()))).
Passes(scribe.Count(1)))
m.Reset()
wait(t).UntilAsserted(func(t check.Tester) {
if assert.Equal(t, 3, eh.length()) {
e := eh.list()[2].(LeaderAcquired)
assert.Equal(t, e.LeaderID(), *(h.LeaderID()))
}
})
// Fence the leader
(*neli).FenceLeader()
wait(t).UntilAsserted(isFalse(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageEqual("Leader fenced")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
m.Reset()
wait(t).UntilAsserted(func(t check.Tester) {
if assert.Equal(t, 4, eh.length()) {
_ = eh.list()[3].(LeaderFenced)
}
})
h.Stop()
assert.Nil(t, h.Await())
}
func TestMetrics(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
m, _, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) {
prodRef.Set(prodMock)
}}.create()
config.Limits.MinMetricsInterval = Duration(1 * time.Millisecond)
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Induce leadership and wait for the leadership event
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prodRef.Get))
wait(t).UntilAsserted(func(t check.Tester) {
assert.Equal(t, 1, eh.length())
})
wait(t).UntilAsserted(func(t check.Tester) {
backlogRecords := generateRecords(1, 0)
deliverAll(backlogRecords, nil, prodRef.Get().(*prodMock).events)
if assert.GreaterOrEqual(t, eh.length(), 2) {
e := eh.list()[1].(MeterRead)
if stats := e.Stats(); assert.NotNil(t, stats) {
assert.Equal(t, stats.Name, "throughput")
}
}
})
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageContaining("throughput")).
Passes(scribe.CountAtLeast(1)))
h.Stop()
assert.Nil(t, h.Await())
}
func TestHandleNonMessageEvent(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
m, _, neli, config := fixtures{producerMockSetup: func(prodMock *prodMock) {
prodRef.Set(prodMock)
}}.create()
config.Limits.MinMetricsInterval = Duration(1 * time.Millisecond)
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Induce leadership and wait for the leadership event
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod := prodRef.Get().(*prodMock)
wait(t).UntilAsserted(func(t check.Tester) {
assert.Equal(t, 1, eh.length())
})
prod.events <- kafka.NewError(kafka.ErrAllBrokersDown, "brokers down", false)
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageContaining("Observed event: brokers down")).
Passes(scribe.CountAtLeast(1)))
h.Stop()
assert.Nil(t, h.Await())
}
func TestThrottleKeys(t *testing.T) {
prod := concurrent.NewAtomicReference()
lastPublished := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error {
lastPublished.Set(msg)
return nil
}
prod.Set(pm)
}}.create()
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Starts off with no backlog.
assert.Equal(t, 0, h.InFlightRecords())
// Induce leadership and wait until a producer has been spawned.
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prod.Get))
const backlog = 10
backlogRecords := generateCyclicKeyedRecords(1, backlog, 0)
db.markedRecords <- backlogRecords
// Even though we pushed several records through, they all had a common key, so only one should
// should be published.
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
assert.True(t, h.IsLeader()) // should definitely be leader by now
wait(t).UntilAsserted(intEqual(1, prod.Get().(*prodMock).c.Produce.GetInt))
msg := lastPublished.Get().(*kafka.Message)
assert.Equal(t, msg.Value, []byte(*backlogRecords[0].KafkaValue))
assert.ElementsMatch(t, h.InFlightRecordKeys(), []string{backlogRecords[0].KafkaKey})
// Drain the in-flight record... another one should then be published.
deliverAll(backlogRecords[0:1], nil, prod.Get().(*prodMock).events)
wait(t).UntilAsserted(func(t check.Tester) {
msg := lastPublished.Get()
if assert.NotNil(t, msg) {
assert.Equal(t, msg.(*kafka.Message).Value, []byte(*backlogRecords[1].KafkaValue))
}
})
// Drain the backlog by feeding in delivery confirmations one at a time.
for i := 1; i < backlog; i++ {
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
wait(t).UntilAsserted(func(t check.Tester) {
msg := lastPublished.Get()
if assert.NotNil(t, msg) {
assert.Equal(t, []byte(*backlogRecords[i].KafkaValue), msg.(*kafka.Message).Value)
}
})
deliverAll(backlogRecords[i:i+1], nil, prod.Get().(*prodMock).events)
}
// Revoke leadership...
(*neli).RevokeLeader()
// Wait for the backlog to drain... leadership status will be cleared when done.
wait(t).Until(check.Not(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageContaining("Lost leader status")).
Passes(scribe.Count(1)))
assert.Equal(t, backlog, db.c.Purge.GetInt())
assert.Equal(t, backlog, prod.Get().(*prodMock).c.Produce.GetInt())
assert.Equal(t, 0, h.InFlightRecords())
h.Stop()
assert.Nil(t, h.Await())
}
func TestPollDeadlineExceeded(t *testing.T) {
m, db, neli, config := fixtures{}.create()
config.Limits.DrainInterval = Duration(time.Millisecond)
config.Limits.MaxPollInterval = Duration(time.Millisecond)
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Starts off with no backlog.
assert.Equal(t, 0, h.InFlightRecords())
// Induce leadership and wait until a producer has been spawned.
(*neli).AcquireLeader()
db.markedRecords <- generateCyclicKeyedRecords(1, 2, 0)
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Exceeded poll deadline")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
h.Stop()
assert.Nil(t, h.Await())
}
func TestQueueLimitExceeded(t *testing.T) {
m, db, neli, config := fixtures{}.create()
config.Limits.DrainInterval = Duration(time.Millisecond)
config.Limits.QueueTimeout = Duration(time.Millisecond)
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Starts off with no backlog.
assert.Equal(t, 0, h.InFlightRecords())
// Induce leadership and wait until a producer has been spawned.
(*neli).AcquireLeader()
db.markedRecords <- generateCyclicKeyedRecords(1, 2, 0)
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Exceeded message queueing deadline")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
h.Stop()
assert.Nil(t, h.Await())
}
func TestDrainInFlightRecords_failedDelivery(t *testing.T) {
prod := concurrent.NewAtomicReference()
lastPublished := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error {
lastPublished.Set(msg)
return nil
}
prod.Set(pm)
}}.create()
h, err := New(config)
require.Nil(t, err)
assertNoError(t, h.Start)
// Starts off with no backlog
assert.Equal(t, 0, h.InFlightRecords())
// Induce leadership
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prod.Get))
// Generate a backlog
const backlog = 10
backlogRecords := generateRecords(backlog, 0)
db.markedRecords <- backlogRecords
// Wait for the backlog to register.
wait(t).UntilAsserted(intEqual(backlog, h.InFlightRecords))
wait(t).UntilAsserted(intEqual(backlog, prod.Get().(*prodMock).c.Produce.GetInt))
assert.True(t, h.IsLeader()) // should be leader by now
// Revoke leadership... this will start the backlog drain.
(*neli).RevokeLeader()
wait(t).Until(check.Not(h.IsLeader))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Shutting down send battery")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageEqual("Send battery terminated")).
Passes(scribe.Count(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageContaining("Lost leader status")).
Passes(scribe.Count(1)))
assert.Equal(t, h.InFlightRecords(), 0)
h.Stop()
assert.Nil(t, h.Await())
}
func TestErrorInMarkQuery(t *testing.T) {
m, db, neli, config := fixtures{}.create()
db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
return nil, check.ErrSimulated
}
h, err := New(config)
require.Nil(t, err)
assertNoError(t, h.Start)
// Induce leadership
(*neli).AcquireLeader()
// Wait for the error to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Error executing mark query")).
Passes(scribe.CountAtLeast(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Debug)).
Having(scribe.MessageContaining("Remark requested")).
Passes(scribe.CountAtLeast(1)))
assert.Equal(t, Running, h.State())
h.Stop()
assert.Nil(t, h.Await())
}
func TestErrorInProduce(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
produceError := concurrent.NewAtomicCounter(1) // 1=true, 0=false
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error {
if produceError.Get() == 1 {
return kafka.NewError(kafka.ErrFail, "simulated", false)
}
return nil
}
prodRef.Set(pm)
}}.create()
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Induce leadership
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod := prodRef.Get().(*prodMock)
prodRef.Set(nil)
// Mark one record
records := generateRecords(1, 0)
db.markedRecords <- records
// Wait for the error to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Error publishing record")).
Passes(scribe.CountAtLeast(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Refreshed leader ID")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod = prodRef.Get().(*prodMock)
// Resume normal production... error should clear but the record count should not go up, as
// there can only be one in-flight record for a given key
produceError.Set(0)
db.markedRecords <- records
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
wait(t).UntilAsserted(func(t check.Tester) {
assert.ElementsMatch(t, h.InFlightRecordKeys(), []string{records[0].KafkaKey})
})
if assert.GreaterOrEqual(t, eh.length(), 2) {
_ = eh.list()[0].(LeaderAcquired)
_ = eh.list()[1].(LeaderRefreshed)
}
// Feed successful delivery report for the first record
prod.events <- message(records[0], nil)
h.Stop()
assert.Nil(t, h.Await())
}
// Tests remarking by feeding through two records for the same key, forcing them to come through in sequence.
// The first is published, but fails upon delivery, which raises the forceRemark flag.
// As the second on is processed, the forceRemark flag raised by the first should be spotted, and a leader
// refresh should occur.
func TestReset(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
lastPublished := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
pm.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error {
lastPublished.Set(msg)
return nil
}
prodRef.Set(pm)
}}.create()
h, err := New(config)
require.Nil(t, err)
eh := &testEventHandler{}
h.SetEventHandler(eh.handler())
assertNoError(t, h.Start)
// Induce leadership
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod := prodRef.Get().(*prodMock)
// Mark two records for the same key
records := generateCyclicKeyedRecords(1, 2, 0)
db.markedRecords <- records
// Wait for the backlog to register
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
wait(t).UntilAsserted(func(t check.Tester) {
if msg := lastPublished.Get(); assert.NotNil(t, msg) {
assert.Equal(t, *records[0].KafkaValue, string(msg.(*kafka.Message).Value))
}
})
// Feed an error
prod.events <- message(records[0], check.ErrSimulated)
// Wait for the error to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Delivery failed")).
Passes(scribe.CountAtLeast(1)))
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Refreshed leader ID")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
wait(t).UntilAsserted(isNotNil(prodRef.Get))
h.Stop()
assert.Nil(t, h.Await())
}
func TestErrorInPurgeAndResetQueries(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
prodRef.Set(pm)
}}.create()
records := generateRecords(2, 0)
purgeError := concurrent.NewAtomicCounter(1) // 1=true, 0=false
resetError := concurrent.NewAtomicCounter(1) // 1=true, 0=false
db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
if db.c.Mark.Get() == 0 {
return records, nil
}
return []OutboxRecord{}, nil
}
db.f.Purge = func(m *dbMock, id int64) (bool, error) {
if purgeError.Get() == 1 {
return false, check.ErrSimulated
}
return true, nil
}
db.f.Reset = func(m *dbMock, id int64) (bool, error) {
if resetError.Get() == 1 {
return false, check.ErrSimulated
}
return true, nil
}
h, err := New(config)
require.Nil(t, err)
assertNoError(t, h.Start)
// Induce leadership and await its registration
(*neli).AcquireLeader()
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod := prodRef.Get().(*prodMock)
wait(t).UntilAsserted(isTrue(h.IsLeader))
wait(t).UntilAsserted(intEqual(2, h.InFlightRecords))
// Feed successful delivery report for the first record
prod.events <- message(records[0], nil)
// Wait for the error to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Error executing purge query for record")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
assert.Equal(t, 2, h.InFlightRecords())
// Resume normal production... error should clear
purgeError.Set(0)
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
// Feed failed delivery report for the first record
prodRef.Get().(*prodMock).events <- message(records[1], kafka.NewError(kafka.ErrFail, "simulated", false))
// Wait for the error to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Error executing reset query for record")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
assert.Equal(t, 1, h.InFlightRecords())
// Resume normal production... error should clear
resetError.Set(0)
wait(t).UntilAsserted(intEqual(0, h.InFlightRecords))
h.Stop()
assert.Nil(t, h.Await())
}
func TestIncompletePurgeAndResetQueries(t *testing.T) {
prodRef := concurrent.NewAtomicReference()
m, db, neli, config := fixtures{producerMockSetup: func(pm *prodMock) {
prodRef.Set(pm)
}}.create()
records := generateRecords(2, 0)
db.f.Mark = func(m *dbMock, leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
if db.c.Mark.Get() == 0 {
return records, nil
}
return []OutboxRecord{}, nil
}
db.f.Purge = func(m *dbMock, id int64) (bool, error) {
return false, nil
}
db.f.Reset = func(m *dbMock, id int64) (bool, error) {
return false, nil
}
h, err := New(config)
require.Nil(t, err)
assertNoError(t, h.Start)
// Induce leadership and await its registration
(*neli).AcquireLeader()
wait(t).UntilAsserted(isTrue(h.IsLeader))
wait(t).UntilAsserted(intEqual(2, h.InFlightRecords))
wait(t).UntilAsserted(isNotNil(prodRef.Get))
prod := prodRef.Get().(*prodMock)
// Feed successful delivery report for the first record
prod.events <- message(records[0], nil)
// Wait for the warning to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Did not purge record")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
wait(t).UntilAsserted(intEqual(1, h.InFlightRecords))
// Feed failed delivery report for the first record
prod.events <- message(records[1], kafka.NewError(kafka.ErrFail, "simulated", false))
// Wait for the warning to be logged
wait(t).UntilAsserted(m.ContainsEntries().
Having(scribe.LogLevel(scribe.Warn)).
Having(scribe.MessageContaining("Did not reset record")).
Passes(scribe.CountAtLeast(1)))
m.Reset()
assert.Equal(t, Running, h.State())
wait(t).UntilAsserted(intEqual(0, h.InFlightRecords))
h.Stop()
assert.Nil(t, h.Await())
}
func TestEnsureState(t *testing.T) {
check.ThatPanicsAsExpected(t, check.ErrorContaining("must not be false"), func() {
ensureState(false, "must not be false")
})
ensureState(true, "must not be false")
}
func intEqual(expected int, intSupplier func() int) func(t check.Tester) {
return func(t check.Tester) {
assert.Equal(t, expected, intSupplier())
}
}
func lengthEqual(expected int, sliceSupplier func() []string) func(t check.Tester) {
return func(t check.Tester) {
assert.Len(t, sliceSupplier(), expected)
}
}
func atLeast(min int, f func() int) check.Assertion {
return func(t check.Tester) {
assert.GreaterOrEqual(t, f(), min)
}
}
func isTrue(f func() bool) check.Assertion {
return func(t check.Tester) {
assert.True(t, f())
}
}
func isFalse(f func() bool) check.Assertion {
return func(t check.Tester) {
assert.False(t, f())
}
}
func isNotNil(f func() interface{}) check.Assertion {
return func(t check.Tester) {
assert.NotNil(t, f())
}
}
func assertErrorContaining(t *testing.T, f func() error, substr string) {
err := f()
if assert.NotNil(t, err) {
assert.Contains(t, err.Error(), substr)
}
}
func assertNoError(t *testing.T, f func() error) {
err := f()
require.Nil(t, err)
}
func newTimedOutError() kafka.Error {
return kafka.NewError(kafka.ErrTimedOut, "Timed out", false)
}
func generatePartitions(indexes ...int32) []kafka.TopicPartition {
parts := make([]kafka.TopicPartition, len(indexes))
for i, index := range indexes {
parts[i] = kafka.TopicPartition{Partition: index}
}
return parts
}
func generateRecords(numRecords int, startID int) []OutboxRecord {
records := make([]OutboxRecord, numRecords)
now := time.Now()
for i := 0; i < numRecords; i++ {
records[i] = OutboxRecord{
ID: int64(startID + i),
CreateTime: now,
KafkaTopic: "test_topic",
KafkaKey: fmt.Sprintf("key-%x", i),
KafkaValue: String(fmt.Sprintf("value-%x", i)),
KafkaHeaders: KafkaHeaders{
KafkaHeader{Key: "ID", Value: strconv.FormatInt(int64(startID+i), 10)},
},
}
}
return records
}
func generateCyclicKeyedRecords(numKeys int, numRecords int, startID int) []OutboxRecord {
records := make([]OutboxRecord, numRecords)
now := time.Now()
for i := 0; i < numRecords; i++ {
records[i] = OutboxRecord{
ID: int64(startID + i),
CreateTime: now,
KafkaTopic: "test_topic",
KafkaKey: fmt.Sprintf("key-%x", i%numKeys),
KafkaValue: String(fmt.Sprintf("value-%x", i)),
KafkaHeaders: KafkaHeaders{
KafkaHeader{Key: "ID", Value: strconv.FormatInt(int64(startID+i), 10)},
},
}
}
return records
}
func message(record OutboxRecord, err error) *kafka.Message {
return &kafka.Message{
TopicPartition: kafka.TopicPartition{Topic: &record.KafkaTopic, Error: err},
Key: []byte(record.KafkaKey),
Value: stringPointerToByteArray(record.KafkaValue),
Timestamp: record.CreateTime,
TimestampType: kafka.TimestampCreateTime,
Opaque: record,
}
}
func deliverAll(records []OutboxRecord, err error, events chan kafka.Event) {
for _, record := range records {
events <- message(record, err)
}
}
================================================
FILE: int/faulty_kafka_test.go
================================================
package int
import (
"github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/libstdgo/fault"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
)
type ProducerFaultSpecs struct {
OnProduce fault.Spec
OnDelivery fault.Spec
}
func (specs ProducerFaultSpecs) build() producerFaults {
return producerFaults{
onProduce: specs.OnProduce.Build(),
onDelivery: specs.OnDelivery.Build(),
}
}
func FaultyKafkaProducerProvider(realProvider goharvest.KafkaProducerProvider, specs ProducerFaultSpecs) goharvest.KafkaProducerProvider {
return func(conf *goharvest.KafkaConfigMap) (goharvest.KafkaProducer, error) {
real, err := realProvider(conf)
if err != nil {
return nil, err
}
return newFaultyProducer(real, specs.build()), nil
}
}
type producerFaults struct {
onProduce fault.Fault
onDelivery fault.Fault
}
type faultyProducer struct {
real goharvest.KafkaProducer
faults producerFaults
events chan kafka.Event
}
func newFaultyProducer(real goharvest.KafkaProducer, faults producerFaults) *faultyProducer {
f := &faultyProducer{
real: real,
faults: faults,
events: make(chan kafka.Event),
}
go func() {
defer close(f.events)
for e := range real.Events() {
switch ev := e.(type) {
case *kafka.Message:
if ev.TopicPartition.Error != nil {
f.events <- e
} else if err := f.faults.onDelivery.Try(); err != nil {
rewrittenMessage := *ev
rewrittenMessage.TopicPartition = kafka.TopicPartition{
Topic: ev.TopicPartition.Topic,
Partition: ev.TopicPartition.Partition,
Offset: ev.TopicPartition.Offset,
Metadata: ev.TopicPartition.Metadata,
Error: err,
}
f.events <- &rewrittenMessage
} else {
f.events <- e
}
default:
f.events <- e
}
}
}()
return f
}
func (f *faultyProducer) Events() chan kafka.Event {
return f.events
}
func (f *faultyProducer) Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error {
if err := f.faults.onProduce.Try(); err != nil {
return err
}
return f.real.Produce(msg, deliveryChan)
}
func (f *faultyProducer) Close() {
f.real.Close()
}
================================================
FILE: int/harvest_int_test.go
================================================
package int
import (
"context"
"database/sql"
"fmt"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"testing"
"time"
"github.com/google/uuid"
. "github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/goharvest/stasher"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/obsidiandynamics/libstdgo/concurrent"
"github.com/obsidiandynamics/libstdgo/diags"
"github.com/obsidiandynamics/libstdgo/fault"
"github.com/obsidiandynamics/libstdgo/scribe"
"github.com/obsidiandynamics/libstdgo/scribe/overlog"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
)
type externals struct {
cons *kafka.Consumer
admin *kafka.AdminClient
db *sql.DB
}
const (
kafkaNamespace = "goharvest_test"
topic = kafkaNamespace + ".topic"
partitions = 10
dbSchema = "goharvest_test"
outboxTable = dbSchema + ".outbox"
leaderTopic = kafkaNamespace + ".neli"
leaderGroupID = kafkaNamespace + ".group"
receiverGroupID = kafkaNamespace + ".receiver_group"
bootstrapServers = "localhost:9092"
dataSource = "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable"
generateInterval = 5 * time.Millisecond
generateRecordsPerTxn = 20
generateMinRecords = 100
generateUniqueKeys = 10
receiverPollDuration = 500 * time.Millisecond
receiverNoMessagesWarningTime = 10 * time.Second
waitTimeout = 90 * time.Second
)
var logger = overlog.New(overlog.StandardFormat())
var scr = scribe.New(overlog.Bind(logger))
func openExternals() externals {
cons, err := kafka.NewConsumer(&kafka.ConfigMap{
"bootstrap.servers": bootstrapServers,
"group.id": receiverGroupID,
"enable.auto.commit": true,
"auto.offset.reset": "earliest",
"socket.timeout.ms": 10000,
// "debug": "all",
})
if err != nil {
panic(err)
}
admin, err := kafka.NewAdminClientFromConsumer(cons)
if err != nil {
panic(err)
}
for {
result, err := admin.CreateTopics(context.Background(), []kafka.TopicSpecification{
{
Topic: topic,
NumPartitions: partitions,
ReplicationFactor: 1,
},
})
if err != nil {
if isFatalError(err) {
panic(err)
} else {
// Allow for timeouts and other non-fatal errors.
scr.W()("Non-fatal error creating topic: %v", err)
}
} else {
if result[0].Error.Code() == kafka.ErrTopicAlreadyExists {
scr.I()("Topic %s already exists", topic)
} else if result[0].Error.Code() != kafka.ErrNoError {
panic(result[0].Error)
}
break
}
}
db, err := sql.Open("postgres", dataSource)
if err != nil {
panic(err)
}
const ddlTemplate = `
CREATE SCHEMA IF NOT EXISTS %s;
DROP TABLE IF EXISTS %s;
CREATE TABLE %s (
id BIGSERIAL PRIMARY KEY,
create_time TIMESTAMP WITH TIME ZONE NOT NULL,
kafka_topic VARCHAR(249) NOT NULL,
kafka_key VARCHAR(5) NOT NULL,
kafka_value VARCHAR(50),
kafka_header_keys TEXT[] NOT NULL,
kafka_header_values TEXT[] NOT NULL,
leader_id UUID
)
`
_, err = db.Exec(fmt.Sprintf(ddlTemplate, dbSchema, outboxTable, outboxTable))
if err != nil {
panic(err)
}
return externals{cons, admin, db}
}
func (x *externals) close() {
x.cons.Close()
x.db.Close()
x.admin.Close()
}
func wait(t check.Tester) check.Timesert {
return check.Wait(t, waitTimeout)
}
func TestOneNode_withFailures(t *testing.T) {
test(t, 1, 5*time.Second, ProducerFaultSpecs{
OnProduce: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated},
OnDelivery: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated},
})
}
func TestFourNodes_withFailures(t *testing.T) {
test(t, 4, 5*time.Second, ProducerFaultSpecs{
OnProduce: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated},
OnDelivery: fault.Spec{Cnt: fault.Random(0.02), Err: check.ErrSimulated},
})
}
func TestEightNodes_withoutFailures(t *testing.T) {
test(t, 8, 2*time.Second, ProducerFaultSpecs{})
}
func test(t *testing.T, numHarvests int, spawnInterval time.Duration, producerFaultSpecs ProducerFaultSpecs) {
check.RequireLabel(t, "int")
installSigQuitHandler()
testID, _ := uuid.NewRandom()
x := openExternals()
defer x.close()
scr.I()("Starting generator")
generator := startGenerator(t, testID, x.db, generateInterval, generateUniqueKeys)
defer func() { <-generator.stop() }()
scr.I()("Starting receiver")
receiver := startReceiver(t, testID, x.cons)
defer func() { <-receiver.stop() }()
harvests := make([]Harvest, numHarvests)
defer func() {
for _, h := range harvests {
if h != nil {
h.Stop()
}
}
}()
// Start harvests at a set interval.
for i := 0; i < numHarvests; i++ {
config := Config{
KafkaProducerProvider: FaultyKafkaProducerProvider(StandardKafkaProducerProvider(), producerFaultSpecs),
Name: fmt.Sprintf("harvest-#%d", i+1),
Scribe: scribe.New(overlog.Bind(logger)),
BaseKafkaConfig: KafkaConfigMap{
"bootstrap.servers": bootstrapServers,
"socket.timeout.ms": 10000,
},
ProducerKafkaConfig: KafkaConfigMap{
"delivery.timeout.ms": 10000,
// "debug": "broker,topic,metadata",
},
LeaderTopic: leaderTopic,
OutboxTable: outboxTable,
LeaderGroupID: leaderGroupID,
DataSource: dataSource,
Limits: Limits{
MinPollInterval: Duration(100 * time.Millisecond),
MarkBackoff: Duration(1 * time.Millisecond),
IOErrorBackoff: Duration(1 * time.Millisecond),
},
}
config.Scribe.SetEnabled(scribe.Trace)
scr.I()("Starting harvest %d/%d", i+1, numHarvests)
h, err := New(config)
require.Nil(t, err)
harvests[i] = h
require.Nil(t, h.Start())
scr.I()("Sleeping")
sleepWithDeadline(spawnInterval)
}
// Stop harvests in the order they were started, except for the last one. The last harvest will be stopped
// only after we've asserted the receipt of all messages.
for i := 0; i < numHarvests-1; i++ {
scr.I()("Stopping harvest %d/%d", i+1, numHarvests)
harvests[i].Stop()
scr.I()("In-flight records: %d", harvests[i].InFlightRecords())
sleepWithDeadline(spawnInterval)
}
// Wait until the generator produces some records. Once we've produced enough records, stop the
// generator so that we can assert receipt.
generator.recs.Fill(generateMinRecords, concurrent.Indefinitely)
scr.I()("Stopping generator")
<-generator.stop()
generated := generator.recs.GetInt()
scr.I()("Generated %d records", generated)
// Wait until we received all records. Keep sliding in bite-sized chunks through successive assertions so that, as
// long as we keep on receiving records, the assertion does not fail. This deals with slow harvesters (when we are
// simulating lots of faults).
const waitBatchSize = 100
for r := waitBatchSize; r < generated; r += waitBatchSize {
advanced := wait(t).UntilAsserted(func(t check.Tester) {
assert.GreaterOrEqual(t, receiver.recs.GetInt(), r)
})
if !advanced {
scr.E()("Stack traces:\n%s", diags.DumpAllStacks())
}
require.True(t, advanced)
scr.I()("Received %d messages", r)
}
wait(t).UntilAsserted(func(t check.Tester) {
assert.GreaterOrEqual(t, receiver.recs.GetInt(), generated)
})
assert.Equal(t, generated, receiver.recs.GetInt())
scr.I()("Stopping receiver")
<-receiver.stop()
// Stop the last harvest as we've already received all messages and there's nothing more to publish.
scr.I()("Stopping harvest %d/%d", numHarvests, numHarvests)
harvests[numHarvests-1].Stop()
// Await harvests.
for i, h := range harvests {
scr.I()("Awaiting harvest %d/%d", i+1, numHarvests)
assert.Nil(t, h.Await())
}
scr.I()("Done")
}
func sleepWithDeadline(duration time.Duration) {
beforeSleep := time.Now()
time.Sleep(duration)
if elapsed := time.Now().Sub(beforeSleep); elapsed > 2*duration {
scr.W()("Sleep deadline exceeded; expected %v but slept for %v", duration, elapsed)
}
}
type generator struct {
cancel context.CancelFunc
recs concurrent.AtomicCounter
stopped chan int
}
func (g generator) stop() chan int {
g.cancel()
return g.stopped
}
func startGenerator(t *testing.T, testID uuid.UUID, db *sql.DB, interval time.Duration, keys int) generator {
st := stasher.New(outboxTable)
ctx, cancel := concurrent.Forever(context.Background())
recs := concurrent.NewAtomicCounter()
stopped := make(chan int, 1)
go func() {
defer scr.T()("Generator exiting")
defer close(stopped)
ticker := time.NewTicker(interval)
defer ticker.Stop()
var tx *sql.Tx
defer func() {
err := finaliseTx(t, tx)
if err != nil {
scr.E()("Could not finalise transaction: %v", err)
t.Errorf("Could not finalise transaction: %v", err)
}
}()
var pre stasher.PreStash
seq := 0
for {
if seq%generateRecordsPerTxn == 0 {
err := finaliseTx(t, tx)
if err != nil {
scr.E()("Could not finalise transaction: %v", err)
t.Errorf("Could not finalise transaction: %v", err)
return
}
newTx, err := db.Begin()
tx = newTx
if err != nil {
scr.E()("Could not begin transaction: %v", err)
t.Errorf("Could not begin transaction: %v", err)
return
}
pre, err = st.Prepare(tx)
if err != nil {
scr.E()("Could not prepare: %v", err)
t.Errorf("Could not prepare: %v", err)
return
}
}
testIDStr := testID.String()
rec := OutboxRecord{
KafkaTopic: topic,
KafkaKey: strconv.Itoa(seq % keys),
KafkaValue: String(testIDStr + "_" + strconv.Itoa(seq)),
KafkaHeaders: KafkaHeaders{
KafkaHeader{Key: "testId", Value: testIDStr},
},
}
err := pre.Stash(rec)
if err != nil {
scr.E()("Could not stash: %v", err)
t.Errorf("Could not stash: %v", err)
return
}
seq = int(recs.Inc())
select {
case <-ctx.Done():
return
case <-ticker.C:
}
}
}()
return generator{cancel, recs, stopped}
}
func finaliseTx(t *testing.T, tx *sql.Tx) error {
if tx != nil {
return tx.Commit()
}
return nil
}
type receiver struct {
cancel context.CancelFunc
received map[string]int
recs concurrent.AtomicCounter
stopped chan int
}
func (r receiver) stop() chan int {
r.cancel()
return r.stopped
}
func startReceiver(t *testing.T, testID uuid.UUID, cons *kafka.Consumer) receiver {
received := make(map[string]int)
ctx, cancel := concurrent.Forever(context.Background())
recs := concurrent.NewAtomicCounter()
stopped := make(chan int, 1)
go func() {
defer scr.T()("Receiver exiting")
defer close(stopped)
successiveTimeouts := 0
resetTimeouts := func() {
if successiveTimeouts > 0 {
successiveTimeouts = 0
}
}
err := cons.Subscribe(topic, func(_ *kafka.Consumer, event kafka.Event) error {
switch e := event.(type) {
case kafka.AssignedPartitions:
resetTimeouts()
scr.I()("Receiver: assigned partitions %v", e.Partitions)
case kafka.RevokedPartitions:
resetTimeouts()
scr.I()("Receiver: revoked partitions %v", e.Partitions)
}
return nil
})
if err != nil {
scr.E()("Could not subscribe: %v", err)
t.Errorf("Could not subscribe: %v", err)
return
}
lastMessageReceivedTime := time.Now()
messageAbsencePrinted := false
expectedTestID := testID.String()
const partitions = 64
lastReceivedOffsets := make([]kafka.Offset, partitions)
for i := 0; i < partitions; i++ {
lastReceivedOffsets[i] = kafka.Offset(-1)
}
for {
msg, err := cons.ReadMessage(receiverPollDuration)
if err != nil {
if isFatalError(err) {
scr.E()("Fatal error during poll: %v", err)
t.Errorf("Fatal error during poll: %v", err)
return
} else if !isTimedOutError(err) {
scr.W()("Error during poll: %v", err)
} else {
successiveTimeouts++
logger.Raw(".")
}
}
if msg != nil {
if msg.TopicPartition.Offset <= lastReceivedOffsets[msg.TopicPartition.Partition] {
scr.D()("Skipping duplicate delivery at offset %d", msg.TopicPartition.Offset)
continue
}
lastReceivedOffsets[msg.TopicPartition.Partition] = msg.TopicPartition.Offset
lastMessageReceivedTime = time.Now()
messageAbsencePrinted = false
resetTimeouts()
valueFrags := strings.Split(string(msg.Value), "_")
if len(valueFrags) != 2 {
scr.E()("invalid value '%s'", string(msg.Value))
t.Errorf("invalid value '%s'", string(msg.Value))
return
}
receivedTestID, value := valueFrags[0], valueFrags[1]
if receivedTestID != expectedTestID {
scr.I()("Skipping %s (test ID %s)", string(msg.Value), expectedTestID)
continue
}
key := string(msg.Key)
receivedSeq, err := strconv.Atoi(value)
if err != nil {
scr.E()("Could not convert message value to sequence: '%s'", value)
t.Errorf("Could not convert message value to sequence: '%s'", value)
return
}
if assert.Equal(t, 1, len(msg.Headers)) {
assert.Equal(t, expectedTestID, string(msg.Headers[0].Value))
}
if existingSeq, ok := received[key]; ok {
if assert.GreaterOrEqual(t, receivedSeq, existingSeq) {
if receivedSeq > existingSeq {
received[key] = receivedSeq
recs.Inc()
} else {
scr.I()("Received duplicate %d for key %s (this is okay)", existingSeq, key)
}
} else {
scr.E()("Received records out of order, %d is behind %d", receivedSeq, existingSeq)
t.Errorf("Received records out of order, %d is behind %d", receivedSeq, existingSeq)
}
} else {
keyInt, err := strconv.Atoi(key)
if err != nil {
scr.E()("Could not convert message key '%s'", key)
t.Errorf("Could not convert message key '%s'", key)
return
}
if assert.Equal(t, keyInt, receivedSeq) {
recs.Inc()
received[key] = receivedSeq
}
}
} else {
elapsed := time.Now().Sub(lastMessageReceivedTime)
if elapsed > receiverNoMessagesWarningTime && !messageAbsencePrinted {
scr.W()("No messages received since %v", lastMessageReceivedTime)
messageAbsencePrinted = true
}
}
select {
case <-ctx.Done():
return
default:
}
}
}()
return receiver{cancel, received, recs, stopped}
}
func isTimedOutError(err error) bool {
kafkaError, ok := err.(kafka.Error)
return ok && kafkaError.Code() == kafka.ErrTimedOut
}
func isFatalError(err error) bool {
kafkaError, ok := err.(kafka.Error)
return ok && kafkaError.IsFatal()
}
var sigQuitHandlerInstalled = concurrent.NewAtomicCounter()
func installSigQuitHandler() {
if sigQuitHandlerInstalled.CompareAndSwap(0, 1) {
sig := make(chan os.Signal, 1)
go func() {
signal.Notify(sig, syscall.SIGQUIT)
select {
case <-sig:
scr.I()("Stack\n%s", diags.DumpAllStacks())
}
}()
}
}
================================================
FILE: kafka.go
================================================
package goharvest
import (
"fmt"
"time"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
)
/*
Interfaces.
*/
// KafkaConsumer specifies the methods of a minimal consumer.
type KafkaConsumer interface {
Subscribe(topic string, rebalanceCb kafka.RebalanceCb) error
ReadMessage(timeout time.Duration) (*kafka.Message, error)
Close() error
}
// KafkaConsumerProvider is a factory for creating KafkaConsumer instances.
type KafkaConsumerProvider func(conf *KafkaConfigMap) (KafkaConsumer, error)
// KafkaProducer specifies the methods of a minimal producer.
type KafkaProducer interface {
Events() chan kafka.Event
Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error
Close()
}
// KafkaProducerProvider is a factory for creating KafkaProducer instances.
type KafkaProducerProvider func(conf *KafkaConfigMap) (KafkaProducer, error)
/*
Standard provider implementations.
*/
// StandardKafkaConsumerProvider returns a factory for creating a conventional KafkaConsumer, backed by the real client API.
func StandardKafkaConsumerProvider() KafkaConsumerProvider {
return func(conf *KafkaConfigMap) (KafkaConsumer, error) {
return kafka.NewConsumer(toKafkaNativeConfig(conf))
}
}
// StandardKafkaProducerProvider returns a factory for creating a conventional KafkaProducer, backed by the real client API.
func StandardKafkaProducerProvider() KafkaProducerProvider {
return func(conf *KafkaConfigMap) (KafkaProducer, error) {
return kafka.NewProducer(toKafkaNativeConfig(conf))
}
}
/*
Various helpers.
*/
func toKafkaNativeConfig(conf *KafkaConfigMap) *kafka.ConfigMap {
result := kafka.ConfigMap{}
for k, v := range *conf {
result[k] = v
}
return &result
}
func copyKafkaConfig(configMap KafkaConfigMap) KafkaConfigMap {
copy := KafkaConfigMap{}
putAllKafkaConfig(configMap, copy)
return copy
}
func putAllKafkaConfig(source, target KafkaConfigMap) {
for k, v := range source {
target[k] = v
}
}
func setKafkaConfig(configMap KafkaConfigMap, key string, value interface{}) error {
_, containsKey := configMap[key]
if containsKey {
return fmt.Errorf("cannot override configuration '%s'", key)
}
configMap[key] = value
return nil
}
func setKafkaConfigs(configMap, toSet KafkaConfigMap) error {
for k, v := range toSet {
err := setKafkaConfig(configMap, k, v)
if err != nil {
return err
}
}
return nil
}
func toNativeKafkaHeaders(headers KafkaHeaders) (nativeHeaders []kafka.Header) {
if numHeaders := len(headers); numHeaders > 0 {
nativeHeaders = make([]kafka.Header, numHeaders)
for i, header := range headers {
nativeHeaders[i] = kafka.Header{Key: header.Key, Value: []byte(header.Value)}
}
}
return
}
================================================
FILE: kafka_mock_test.go
================================================
package goharvest
import (
"time"
"github.com/obsidiandynamics/libstdgo/concurrent"
"gopkg.in/confluentinc/confluent-kafka-go.v1/kafka"
)
type consMockFuncs struct {
Subscribe func(m *consMock, topic string, rebalanceCb kafka.RebalanceCb) error
ReadMessage func(m *consMock, timeout time.Duration) (*kafka.Message, error)
Close func(m *consMock) error
}
type consMockCounts struct {
Subscribe,
ReadMessage,
Close concurrent.AtomicCounter
}
type consMock struct {
rebalanceCallback kafka.RebalanceCb
rebalanceEvents chan kafka.Event
f consMockFuncs
c consMockCounts
}
func (m *consMock) Subscribe(topic string, rebalanceCb kafka.RebalanceCb) error {
defer m.c.Subscribe.Inc()
m.rebalanceCallback = rebalanceCb
return m.f.Subscribe(m, topic, rebalanceCb)
}
func (m *consMock) ReadMessage(timeout time.Duration) (*kafka.Message, error) {
defer m.c.ReadMessage.Inc()
if m.rebalanceCallback != nil {
// The rebalance events should only be delivered in the polling thread, which is why we wait for
// ReadMessage before forwarding the events to the rebalance callback
select {
case e := <-m.rebalanceEvents:
m.rebalanceCallback(nil, e)
default:
}
}
return m.f.ReadMessage(m, timeout)
}
func (m *consMock) Close() error {
defer m.c.Close.Inc()
return m.f.Close(m)
}
func (m *consMock) fillDefaults() {
if m.rebalanceEvents == nil {
m.rebalanceEvents = make(chan kafka.Event)
}
if m.f.Subscribe == nil {
m.f.Subscribe = func(m *consMock, topic string, rebalanceCb kafka.RebalanceCb) error {
return nil
}
}
if m.f.ReadMessage == nil {
m.f.ReadMessage = func(m *consMock, timeout time.Duration) (*kafka.Message, error) {
return nil, newTimedOutError()
}
}
if m.f.Close == nil {
m.f.Close = func(m *consMock) error {
return nil
}
}
m.c.Subscribe = concurrent.NewAtomicCounter()
m.c.ReadMessage = concurrent.NewAtomicCounter()
m.c.Close = concurrent.NewAtomicCounter()
}
func mockKafkaConsumerProvider(m *consMock) func(conf *KafkaConfigMap) (KafkaConsumer, error) {
return func(conf *KafkaConfigMap) (KafkaConsumer, error) {
return m, nil
}
}
type prodMockFuncs struct {
Events func(m *prodMock) chan kafka.Event
Produce func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error
Close func(m *prodMock)
}
type prodMockCounts struct {
Events,
Produce,
Close concurrent.AtomicCounter
}
type prodMock struct {
events chan kafka.Event
f prodMockFuncs
c prodMockCounts
}
func (m *prodMock) Events() chan kafka.Event {
defer m.c.Events.Inc()
return m.f.Events(m)
}
func (m *prodMock) Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error {
defer m.c.Produce.Inc()
return m.f.Produce(m, msg, deliveryChan)
}
func (m *prodMock) Close() {
defer m.c.Close.Inc()
m.f.Close(m)
}
func (m *prodMock) fillDefaults() {
if m.events == nil {
m.events = make(chan kafka.Event)
}
if m.f.Events == nil {
m.f.Events = func(m *prodMock) chan kafka.Event {
return m.events
}
}
if m.f.Produce == nil {
m.f.Produce = func(m *prodMock, msg *kafka.Message, deliveryChan chan kafka.Event) error {
return nil
}
}
if m.f.Close == nil {
m.f.Close = func(m *prodMock) {
close(m.events)
}
}
m.c.Events = concurrent.NewAtomicCounter()
m.c.Produce = concurrent.NewAtomicCounter()
m.c.Close = concurrent.NewAtomicCounter()
}
func mockKafkaProducerProvider(m *prodMock) func(conf *KafkaConfigMap) (KafkaProducer, error) {
return func(conf *KafkaConfigMap) (KafkaProducer, error) {
return m, nil
}
}
================================================
FILE: metric/meter.go
================================================
package metric
import (
"fmt"
"time"
"github.com/obsidiandynamics/libstdgo/scribe"
)
// MeterStats is an immutable snapshot of meter statistics.
type MeterStats struct {
Name string
Start time.Time
TotalCount int64
TotalRatePerS float64
IntervalCount int64
IntervalRatePerS float64
}
// String produces a textual representation of a MeterStats object.
func (s MeterStats) String() string {
return fmt.Sprintf("Meter <%s>: %d since %v, rate: %.3f current, %.3f average\n",
s.Name, s.TotalCount, s.Start.Format(timeFormat), s.IntervalRatePerS, s.TotalRatePerS)
}
// Meter is a simple structure for tracking the volume of events observed from two points in time:
// 1. When the Meter object was created (or when it was last reset)
// 2. From the last snapshot point.
//
// A meter can be updated by adding more observations. Statistics can be periodically extracted from the
// meter, reflecting the total observed volume as well as the volume in the most recent period.
//
// A meter is not thread-safe. In the absence of locking, it should only be accessed from a single
// goroutine.
type Meter struct {
name string
printInterval time.Duration
start time.Time
totalCount int64
lastIntervalStart time.Time
lastCount int64
lastStats MeterStats
}
const timeFormat = "2006-01-02T15:04:05"
// String produces a textual representation of a Meter object.
func (m Meter) String() string {
return fmt.Sprint("Meter[name=", m.name,
", snapshotInterval=", m.printInterval,
", start=", m.start.Format(timeFormat),
", totalCount=", m.totalCount,
", lastIntervalStart=", m.lastIntervalStart.Format(timeFormat),
", lastCount=", m.lastCount,
", lastStats=", m.lastStats, "]")
}
// NewMeter constructs a new meter object, with a given name and snapshot interval. The actual snapshotting
// of meter statistics is the responsibility of the goroutine that owns the meter.
func NewMeter(name string, snapshotInterval time.Duration) *Meter {
m := Meter{}
m.name = name
m.printInterval = snapshotInterval
m.Reset()
return &m
}
// Reset the meter to its initial state — clearing all counters and resetting the clocks.
func (m *Meter) Reset() {
m.start = time.Now()
m.totalCount = 0
m.lastIntervalStart = m.start
m.lastCount = 0
}
// Add a value to the meter, contributing to the overall count and to the current interval.
func (m *Meter) Add(amount int64) {
m.totalCount += amount
}
// MaybeStats conditionally returns a stats snapshot if the current sampling interval has lapsed. Otherwise, if the
// sampling interval is still valid, a nil is returned.
func (m *Meter) MaybeStats() *MeterStats {
now := time.Now()
elapsedInIntervalMs := now.Sub(m.lastIntervalStart).Milliseconds()
if elapsedInIntervalMs > m.printInterval.Milliseconds() {
intervalCount := m.totalCount - m.lastCount
intervalRatePerS := float64(intervalCount) / float64(elapsedInIntervalMs) * 1000.0
m.lastCount = m.totalCount
m.lastIntervalStart = now
elapsedTotalMs := now.Sub(m.start).Milliseconds()
totalRatePerS := float64(m.totalCount) / float64(elapsedTotalMs) * 1000.0
m.lastStats = MeterStats{
Name: m.name,
Start: m.start,
TotalCount: m.totalCount,
TotalRatePerS: totalRatePerS,
IntervalCount: intervalCount,
IntervalRatePerS: intervalRatePerS,
}
return &m.lastStats
}
return nil
}
// MeterStatsCallback is invoked by MaybeStatsCall().
type MeterStatsCallback func(stats MeterStats)
// MaybeStatsCall conditionally invokes the given MeterStatsCallback if the current sampling interval has lapsed, returning true
// if the callback was invoked.
func (m *Meter) MaybeStatsCall(cb MeterStatsCallback) bool {
s := m.MaybeStats()
if s != nil {
cb(*s)
return true
}
return false
}
// MaybeStatsLog conditionally logs the snapshot of the recent sampling interval if the latter has lapsed, returning true if an
// entry was logged.
func (m *Meter) MaybeStatsLog(logger scribe.Logger) bool {
return m.MaybeStatsCall(func(stats MeterStats) {
logger("%v", stats)
})
}
================================================
FILE: metric/meter_test.go
================================================
package metric
import (
"testing"
"time"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/obsidiandynamics/libstdgo/scribe"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func wait(t *testing.T) check.Timesert {
return check.Wait(t, 10*time.Second)
}
func TestMeterString(t *testing.T) {
m := NewMeter("test-name", time.Second)
str := m.String()
require.Contains(t, str, "Meter[")
require.Contains(t, str, m.name)
}
func TestMeterMaybeStats(t *testing.T) {
m := NewMeter("test-name", time.Millisecond)
m.Add(1)
wait(t).UntilAsserted(func(t check.Tester) {
s := m.MaybeStats()
if assert.NotNil(t, s) {
assert.Equal(t, "test-name", s.Name)
assert.Equal(t, int64(1), s.TotalCount)
assert.Equal(t, int64(1), s.IntervalCount)
}
})
m.Add(2)
wait(t).UntilAsserted(func(t check.Tester) {
s := m.MaybeStats()
if assert.NotNil(t, s) {
assert.Equal(t, "test-name", s.Name)
assert.Equal(t, int64(3), s.TotalCount)
assert.Equal(t, int64(2), s.IntervalCount)
}
})
m.Add(1)
m.Reset()
wait(t).UntilAsserted(func(t check.Tester) {
s := m.MaybeStats()
if assert.NotNil(t, s) {
assert.Equal(t, "test-name", s.Name)
assert.Equal(t, int64(0), s.TotalCount)
assert.Equal(t, int64(0), s.IntervalCount)
}
})
}
func TestMeterMaybeStatsCall(t *testing.T) {
m := NewMeter("test-name", time.Millisecond)
m.Add(1)
wait(t).UntilAsserted(func(t check.Tester) {
var statsPtr *MeterStats
called := m.MaybeStatsCall(func(stats MeterStats) {
statsPtr = &stats
})
if assert.True(t, called) {
assert.NotNil(t, statsPtr)
assert.Equal(t, "test-name", statsPtr.Name)
assert.Equal(t, int64(1), statsPtr.TotalCount)
assert.Equal(t, int64(1), statsPtr.IntervalCount)
} else {
assert.Nil(t, statsPtr)
}
})
}
func TestMeterMaybeStatsLog(t *testing.T) {
m := NewMeter("test-name", time.Millisecond)
m.Add(1)
mockscribe := scribe.NewMock()
scr := scribe.New(mockscribe.Factories())
wait(t).UntilAsserted(func(t check.Tester) {
called := m.MaybeStatsLog(scr.I())
if assert.True(t, called) {
mockscribe.Entries().
Having(scribe.LogLevel(scribe.Info)).
Having(scribe.MessageContaining("test-name")).
Assert(t, scribe.Count(1))
} else {
mockscribe.Entries().
Assert(t, scribe.Count(0))
}
})
}
================================================
FILE: metric/metric.go
================================================
// Package metric contains data structures for working with metrics.
package metric
================================================
FILE: neli.go
================================================
package goharvest
import "github.com/obsidiandynamics/goneli"
// NeliProvider is a factory for creating Neli instances.
type NeliProvider func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error)
// StandardNeliProvider returns a factory for creating a conventional Neli instance, backed by the real client API.
func StandardNeliProvider() NeliProvider {
return func(config goneli.Config, barrier goneli.Barrier) (goneli.Neli, error) {
return goneli.New(config, barrier)
}
}
func configToNeli(hConfigMap KafkaConfigMap) goneli.KafkaConfigMap {
return map[string]interface{}(hConfigMap)
}
func configToHarvest(nConfigMap goneli.KafkaConfigMap) KafkaConfigMap {
return map[string]interface{}(nConfigMap)
}
func convertKafkaConsumerProvider(hProvider KafkaConsumerProvider) goneli.KafkaConsumerProvider {
return func(conf *goneli.KafkaConfigMap) (goneli.KafkaConsumer, error) {
hCfg := configToHarvest(*conf)
return hProvider(&hCfg)
}
}
func convertKafkaProducerProvider(hProvider KafkaProducerProvider) goneli.KafkaProducerProvider {
return func(conf *goneli.KafkaConfigMap) (goneli.KafkaProducer, error) {
hCfg := configToHarvest(*conf)
return hProvider(&hCfg)
}
}
================================================
FILE: postgres.go
================================================
package goharvest
import (
"database/sql"
"fmt"
"sort"
"github.com/google/uuid"
// init postgres driver
"github.com/lib/pq"
)
type database struct {
db *sql.DB
markStmt *sql.Stmt
purgeStmt *sql.Stmt
resetStmt *sql.Stmt
}
const markQueryTemplate = `
-- mark query
UPDATE %s
SET leader_id = $1
WHERE id IN (
SELECT id FROM %s
WHERE leader_id IS NULL OR leader_id != $1
ORDER BY id
LIMIT $2
)
RETURNING id, create_time, kafka_topic, kafka_key, kafka_value, kafka_header_keys, kafka_header_values, leader_id
`
const purgeQueryTemplate = `
-- purge query
DELETE FROM %s
WHERE id = $1
`
const resetQueryTemplate = `
-- reset query
UPDATE %s
SET leader_id = NULL
WHERE id = $1
`
func closeResource(stmt *sql.Stmt) {
if stmt != nil {
stmt.Close()
}
}
func closeResources(stmts ...*sql.Stmt) {
for _, resource := range stmts {
closeResource(resource)
}
}
type databaseProvider func() (*sql.DB, error)
// StandardPostgresBindingProvider returns a DatabaseBindingProvider that connects to a real Postgres database.
func StandardPostgresBindingProvider() DatabaseBindingProvider {
return NewPostgresBinding
}
// NewPostgresBinding creates a Postgres binding for the given dataSource and outboxTable args.
func NewPostgresBinding(dataSource string, outboxTable string) (DatabaseBinding, error) {
return newPostgresBinding(func() (*sql.DB, error) {
return sql.Open("postgres", dataSource)
}, outboxTable)
}
func newPostgresBinding(dbProvider databaseProvider, outboxTable string) (DatabaseBinding, error) {
success := false
var db *sql.DB
var markStmt, purgeStmt, resetStmt *sql.Stmt
defer func() {
if !success {
if db != nil {
db.Close()
}
closeResources(markStmt, purgeStmt, resetStmt)
}
}()
db, err := dbProvider()
if err != nil {
return nil, err
}
db.SetMaxOpenConns(2)
db.SetMaxIdleConns(2)
markStmt, err = db.Prepare(fmt.Sprintf(markQueryTemplate, outboxTable, outboxTable))
if err != nil {
return nil, err
}
purgeStmt, err = db.Prepare(fmt.Sprintf(purgeQueryTemplate, outboxTable))
if err != nil {
return nil, err
}
resetStmt, err = db.Prepare(fmt.Sprintf(resetQueryTemplate, outboxTable))
if err != nil {
return nil, err
}
success = true
return &database{
db: db,
markStmt: markStmt,
purgeStmt: purgeStmt,
resetStmt: resetStmt,
}, nil
}
func (db *database) Mark(leaderID uuid.UUID, limit int) ([]OutboxRecord, error) {
rows, err := db.markStmt.Query(leaderID, limit)
if err != nil {
return nil, err
}
defer rows.Close()
records := make([]OutboxRecord, 0, limit)
for rows.Next() {
record := OutboxRecord{}
var keys []string
var values []string
err := rows.Scan(
&record.ID,
&record.CreateTime,
&record.KafkaTopic,
&record.KafkaKey,
&record.KafkaValue,
pq.Array(&keys),
pq.Array(&values),
&record.LeaderID,
)
if err != nil {
return nil, err
}
numKeys := len(keys)
if len(keys) != len(values) {
return nil, fmt.Errorf("unequal number of header keys (%d) and values (%d)", numKeys, len(values))
}
record.KafkaHeaders = make(KafkaHeaders, numKeys)
for i := 0; i < numKeys; i++ {
record.KafkaHeaders[i] = KafkaHeader{keys[i], values[i]}
}
records = append(records, record)
}
sort.Slice(records, func(i, j int) bool {
return records[i].ID < records[j].ID
})
return records, nil
}
func (db *database) Purge(id int64) (bool, error) {
res, err := db.purgeStmt.Exec(id)
if err != nil {
return false, err
}
affected, _ := res.RowsAffected()
if affected != 1 {
return false, nil
}
return true, err
}
func (db *database) Reset(id int64) (bool, error) {
res, err := db.resetStmt.Exec(id)
if err != nil {
return false, err
}
affected, _ := res.RowsAffected()
if affected != 1 {
return false, nil
}
return true, err
}
func (db *database) Dispose() {
db.db.Close()
closeResources(db.markStmt, db.purgeStmt, db.resetStmt)
}
================================================
FILE: postgres_test.go
================================================
package goharvest
import (
"database/sql"
"database/sql/driver"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/google/uuid"
"github.com/lib/pq"
"github.com/obsidiandynamics/libstdgo/check"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const outboxTable = "outbox"
const markPrepare = "-- mark query"
const purgePrepare = "-- purge query"
const resetPrepare = "-- reset query"
func pgFixtures() (databaseProvider, sqlmock.Sqlmock) {
db, mock, err := sqlmock.New()
if err != nil {
panic(err)
}
dbProvider := func() (*sql.DB, error) {
return db, nil
}
return dbProvider, mock
}
func TestErrorInDBProvider(t *testing.T) {
dbProvider := func() (*sql.DB, error) {
return nil, check.ErrSimulated
}
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.Nil(t, b)
assert.Equal(t, check.ErrSimulated, err)
}
func TestErrorInPrepareMarkQuery(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare).WillReturnError(check.ErrSimulated)
mock.ExpectClose()
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.Nil(t, b)
assert.Equal(t, check.ErrSimulated, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestErrorInPreparePurgeQuery(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare).WillReturnError(check.ErrSimulated)
mark.WillBeClosed()
mock.ExpectClose()
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.Nil(t, b)
assert.Equal(t, check.ErrSimulated, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestErrorInPrepareResetQuery(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
purge := mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare).WillReturnError(check.ErrSimulated)
mark.WillBeClosed()
purge.WillBeClosed()
mock.ExpectClose()
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.Nil(t, b)
assert.Equal(t, check.ErrSimulated, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
const testMarkQueryLimit = 100
func TestExecuteMark_queryError(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
purge := mock.ExpectPrepare(purgePrepare)
reset := mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
leaderID, _ := uuid.NewRandom()
mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnError(check.ErrSimulated)
records, err := b.Mark(leaderID, testMarkQueryLimit)
assert.Nil(t, records)
assert.Equal(t, check.ErrSimulated, err)
mock.ExpectClose()
mark.WillBeClosed()
purge.WillBeClosed()
reset.WillBeClosed()
b.Dispose()
assert.Nil(t, mock.ExpectationsWereMet())
}
// Tests error when one of the columns is of the wrong data type.
func TestExecuteMarkQuery_scanError(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
leaderID, _ := uuid.NewRandom()
rows := sqlmock.NewRows([]string{
"id",
"create_time",
"kafka_topic",
"kafka_key",
"kafka_value",
"kafka_header_keys",
"kafka_header_values",
"leader_id",
})
rows.AddRow("non-int", "", "", "", "", pq.Array([]string{"some-key"}), pq.Array([]string{"some-value"}), leaderID)
mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows)
records, err := b.Mark(leaderID, testMarkQueryLimit)
assert.Nil(t, records)
if assert.NotNil(t, err) {
assert.Contains(t, err.Error(), "Scan error on column")
}
}
func TestExecuteMark_success(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
leaderID, _ := uuid.NewRandom()
exp := []OutboxRecord{
{
ID: 77,
CreateTime: time.Now(),
KafkaTopic: "kafka_topic",
KafkaKey: "kafka_key",
KafkaValue: String("kafka_value"),
KafkaHeaders: KafkaHeaders{
KafkaHeader{Key: "some-key", Value: "some-value"},
},
LeaderID: nil,
},
{
ID: 78,
CreateTime: time.Now(),
KafkaTopic: "kafka_topic",
KafkaKey: "kafka_key",
KafkaValue: String("kafka_value"),
KafkaHeaders: KafkaHeaders{},
LeaderID: nil,
},
}
reverse := func(recs []OutboxRecord) []OutboxRecord {
reversed := make([]OutboxRecord, len(recs))
for i, j := len(recs)-1, 0; i >= 0; i, j = i-1, j+1 {
reversed[i] = recs[j]
}
return reversed
}
rows := sqlmock.NewRows([]string{
"id",
"create_time",
"kafka_topic",
"kafka_key",
"kafka_value",
"kafka_header_keys",
"kafka_header_values",
"leader_id",
})
// Reverse the order before returning to test the sorter inside the marker implementation.
for _, expRec := range reverse(exp) {
headerKeys, headerValues := flattenHeaders(expRec.KafkaHeaders)
rows.AddRow(
expRec.ID,
expRec.CreateTime,
expRec.KafkaTopic,
expRec.KafkaKey,
expRec.KafkaValue,
pq.Array(headerKeys),
pq.Array(headerValues),
expRec.LeaderID,
)
}
mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows)
records, err := b.Mark(leaderID, testMarkQueryLimit)
assert.Nil(t, err)
assert.ElementsMatch(t, []interface{}{exp[0], exp[1]}, records)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecuteMark_headerLengthMismatch(t *testing.T) {
dbProvider, mock := pgFixtures()
mark := mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
leaderID, _ := uuid.NewRandom()
rows := sqlmock.NewRows([]string{
"id",
"create_time",
"kafka_topic",
"kafka_key",
"kafka_value",
"kafka_header_keys",
"kafka_header_values",
"leader_id",
})
rows.AddRow(
1,
time.Now(),
"some-topic",
"some-key",
"some-value",
pq.Array([]string{"k0"}),
pq.Array([]string{"v0", "v1"}),
leaderID,
)
mark.ExpectQuery().WithArgs(leaderID, testMarkQueryLimit).WillReturnRows(rows)
records, err := b.Mark(leaderID, testMarkQueryLimit)
assert.Nil(t, records)
require.NotNil(t, err)
assert.Equal(t, "unequal number of header keys (1) and values (2)", err.Error())
}
func flattenHeaders(headers KafkaHeaders) (headerKeys, headerValues []string) {
if numHeaders := len(headers); numHeaders > 0 {
headerKeys = make([]string, numHeaders)
headerValues = make([]string, numHeaders)
for i, header := range headers {
headerKeys[i], headerValues[i] = header.Key, header.Value
}
} else {
headerKeys, headerValues = []string{}, []string{}
}
return
}
func TestExecutePurge_error(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
purge := mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
purge.ExpectExec().WithArgs(id).WillReturnError(check.ErrSimulated)
done, err := b.Purge(id)
assert.False(t, done)
assert.Equal(t, check.ErrSimulated, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecutePurge_success(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
purge := mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
purge.ExpectExec().WithArgs(id).WillReturnResult(sqlmock.NewResult(-1, 1))
done, err := b.Purge(id)
assert.True(t, done)
assert.Nil(t, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecutePurge_notDone(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
purge := mock.ExpectPrepare(purgePrepare)
mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
purge.ExpectExec().WithArgs(id).WillReturnResult(driver.ResultNoRows)
done, err := b.Purge(id)
assert.False(t, done)
assert.Nil(t, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecuteReset_error(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
reset := mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
reset.ExpectExec().WithArgs(id).WillReturnError(check.ErrSimulated)
done, err := b.Reset(id)
assert.False(t, done)
assert.Equal(t, check.ErrSimulated, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecuteReset_success(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
reset := mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
reset.ExpectExec().WithArgs(id).WillReturnResult(sqlmock.NewResult(-1, 1))
done, err := b.Reset(id)
assert.True(t, done)
assert.Nil(t, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestExecuteReset_notDone(t *testing.T) {
dbProvider, mock := pgFixtures()
mock.ExpectPrepare(markPrepare)
mock.ExpectPrepare(purgePrepare)
reset := mock.ExpectPrepare(resetPrepare)
b, err := newPostgresBinding(dbProvider, outboxTable)
assert.NotNil(t, b)
assert.Nil(t, err)
const id = 77
reset.ExpectExec().WithArgs(id).WillReturnResult(driver.ResultNoRows)
done, err := b.Reset(id)
assert.False(t, done)
assert.Nil(t, err)
assert.Nil(t, mock.ExpectationsWereMet())
}
func TestRealPostgresBinding(t *testing.T) {
b, err := NewPostgresBinding("***corrupt connection info string***", outboxTable)
assert.Nil(t, b)
assert.NotNil(t, err)
}
================================================
FILE: sh/.gitignore
================================================
librdkafka
================================================
FILE: sh/build-librdkafka.sh
================================================
#!/bin/sh
cd $(dirname $0)
set -e
if [ -d librdkafka ]; then
cd librdkafka
git pull
cd ..
else
git clone https://github.com/edenhill/librdkafka.git
fi
cd librdkafka
./configure --prefix /usr
make
sudo make install
rm -rf librdkafka
================================================
FILE: sh/init-outbox.sh
================================================
#!/bin/sh
cat < 0 {
headerKeys = make([]string, numHeaders)
headerValues = make([]string, numHeaders)
for i, header := range rec.KafkaHeaders {
headerKeys[i], headerValues[i] = header.Key, header.Value
}
} else {
headerKeys, headerValues = []string{}, []string{}
}
return headerKeys, headerValues
}
// Stash one record within the given transaction scope.
func (s *stasher) Stash(tx *sql.Tx, rec goharvest.OutboxRecord) error {
headerKeys, headerValues := makeHeaders(rec)
_, err := tx.Exec(s.query, rec.KafkaTopic, rec.KafkaKey, rec.KafkaValue, pq.Array(headerKeys), pq.Array(headerValues))
return err
}
================================================
FILE: stasher/stasher_doc_test.go
================================================
package stasher
import (
"database/sql"
"testing"
"github.com/obsidiandynamics/goharvest"
"github.com/obsidiandynamics/libstdgo/check"
)
func Example() {
db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable")
if err != nil {
panic(err)
}
defer db.Close()
st := New("outbox")
// Begin a transaction.
tx, _ := db.Begin()
defer tx.Rollback()
// Update other database entities in transaction scope.
// ...
// Stash an outbox record for subsequent harvesting.
err = st.Stash(tx, goharvest.OutboxRecord{
KafkaTopic: "my-app.topic",
KafkaKey: "hello",
KafkaValue: goharvest.String("world"),
KafkaHeaders: goharvest.KafkaHeaders{
{Key: "applicationId", Value: "my-app"},
},
})
if err != nil {
panic(err)
}
// Commit the transaction.
tx.Commit()
}
func TestExample(t *testing.T) {
check.RunTargetted(t, Example)
}
func Example_prepare() {
db, err := sql.Open("postgres", "host=localhost port=5432 user=postgres password= dbname=postgres sslmode=disable")
if err != nil {
panic(err)
}
defer db.Close()
st := New("outbox")
// Begin a transaction.
tx, _ := db.Begin()
defer tx.Rollback()
// Update other database entities in transaction scope.
// ...
// Formulates a prepared statement that may be reused within the scope of the transaction.
prestash, _ := st.Prepare(tx)
// Publish a bunch of messages using the same prepared statement.
for i := 0; i < 10; i++ {
// Stash an outbox record for subsequent harvesting.
err = prestash.Stash(goharvest.OutboxRecord{
KafkaTopic: "my-app.topic",
KafkaKey: "hello",
KafkaValue: goharvest.String("world"),
KafkaHeaders: goharvest.KafkaHeaders{
{Key: "applicationId", Value: "my-app"},
},
})
if err != nil {
panic(err)
}
}
// Commit the transaction.
tx.Commit()
}
func TestExample_prepare(t *testing.T) {
check.RunTargetted(t, Example_prepare)
}
================================================
FILE: stasher/statsher_test.go
================================================
package stasher
import (
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/lib/pq"
"github.com/obsidiandynamics/goharvest"
"github.com/stretchr/testify/require"
)
const (
testTopic = "topic"
testKey = "key"
testValue = "value"
testHeaderKey = "header-key"
testHeaderValue = "header-value"
testInsertQuery = "-- insert query"
)
func TestStash_withHeaders(t *testing.T) {
s := New("outbox")
db, mock, err := sqlmock.New()
require.Nil(t, err)
mock.ExpectBegin()
tx, err := db.Begin()
require.Nil(t, err)
mock.ExpectExec(testInsertQuery).
WithArgs(testTopic, testKey, testValue, pq.Array([]string{testHeaderKey}), pq.Array([]string{testHeaderValue})).
WillReturnResult(sqlmock.NewResult(-1, 1))
err = s.Stash(tx, goharvest.OutboxRecord{
KafkaTopic: testTopic,
KafkaKey: testKey,
KafkaValue: goharvest.String(testValue),
KafkaHeaders: goharvest.KafkaHeaders{
{Key: testHeaderKey, Value: testHeaderValue},
},
})
require.Nil(t, err)
require.Nil(t, mock.ExpectationsWereMet())
}
func TestStash_withoutHeaders(t *testing.T) {
s := New("outbox")
db, mock, err := sqlmock.New()
require.Nil(t, err)
mock.ExpectBegin()
tx, err := db.Begin()
require.Nil(t, err)
mock.ExpectExec(testInsertQuery).
WithArgs(testTopic, testKey, testValue, pq.Array([]string{}), pq.Array([]string{})).
WillReturnResult(sqlmock.NewResult(-1, 1))
err = s.Stash(tx, goharvest.OutboxRecord{
KafkaTopic: testTopic,
KafkaKey: testKey,
KafkaValue: goharvest.String(testValue),
})
require.Nil(t, err)
require.Nil(t, mock.ExpectationsWereMet())
}
func TestStash_prepare(t *testing.T) {
s := New("outbox")
db, mock, err := sqlmock.New()
require.Nil(t, err)
mock.ExpectBegin()
tx, err := db.Begin()
require.Nil(t, err)
mock.ExpectPrepare(testInsertQuery)
prestash, err := s.Prepare(tx)
require.Nil(t, err)
require.NotNil(t, prestash)
mock.ExpectExec(testInsertQuery).
WithArgs(testTopic, testKey, testValue, pq.Array([]string{}), pq.Array([]string{})).
WillReturnResult(sqlmock.NewResult(-1, 1))
err = prestash.Stash(goharvest.OutboxRecord{
KafkaTopic: testTopic,
KafkaKey: testKey,
KafkaValue: goharvest.String(testValue),
})
require.Nil(t, err)
require.Nil(t, mock.ExpectationsWereMet())
}