Full Code of jike-engineering/mongo-es for AI

master 019e37686a76 cached
22 files
59.5 KB
15.5k tokens
72 symbols
1 requests
Download .txt
Repository: jike-engineering/mongo-es
Branch: master
Commit: 019e37686a76
Files: 22
Total size: 59.5 KB

Directory structure:
gitextract_qgmg3l8r/

├── .gitignore
├── .npmignore
├── .npmrc
├── .prettierignore
├── .prettierrc
├── README.md
├── examples/
│   ├── config.json
│   └── example.js
├── license
├── package.json
├── src/
│   ├── config.ts
│   ├── elasticsearch.ts
│   ├── index.ts
│   ├── indices.ts
│   ├── main.ts
│   ├── mongodb.ts
│   ├── processor.ts
│   └── types.ts
├── test/
│   ├── _init.ts
│   ├── elasticsearch.test.ts
│   └── processor.test.ts
└── tsconfig.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.idea/
node_modules/
dist/
*temp*
npm-debug.log*


================================================
FILE: .npmignore
================================================
node_modules/
*temp*


================================================
FILE: .npmrc
================================================
registry=https://registry.npmjs.org


================================================
FILE: .prettierignore
================================================
package-lock.json
dist/
.*
license


================================================
FILE: .prettierrc
================================================
{
  "printWidth": 100,
  "semi": false,
  "singleQuote": true,
  "trailingComma": "all"
}


================================================
FILE: README.md
================================================
# Mongo-ES

A MongoDB to Elasticsearch connector

[![npm version](https://badge.fury.io/js/mongo-es.svg)](https://badge.fury.io/js/mongo-es)

## Installation

```bash
npm i -g mongo-es
```

## Usage

### Command Line

```bash
# normal mode
mongo-es ./config.json

# debug mode, with debug info printed
NODE_ENV=dev mongo-es ./config.json
```

### Programmatically

```javascript
const fs = require('fs')
const Redis = require('ioredis')
const { Config, Task, run } = require('mongo-es')

const redis = new Redis('localhost')

Task.onSaveCheckpoint((name, checkpoint) => {
  return redis.set(`mongo-es:${name}`, JSON.stringify(checkpoint))
})

// this will overwrite task.from in config file
Task.onLoadCheckpoint((name) => {
  return redis.get(`mongo-es:${name}`).then(JSON.parse)
})

run(new Config(fs.readFileSync('config.json', 'utf8')))
```

## Concepts

### Scan phase

scan entire database for existed documents

### Tail phase

tail the oplog for documents' create, update or delete

## Configuration

Structure:

```json
{
  "controls": {},
  "mongodb": {},
  "elasticsearch": {},
  "tasks": [
    {
      "extract": {},
      "transform": {},
      "load": {}
    }
  ]
}
```

[Detail example](https://github.com/jike-engineering/mongo-es/blob/master/examples/config.json)

### controls

- `mongodbReadCapacity` - Max docs read per second (default: `10000`). (optional)
- `elasticsearchBulkInterval` - Max bluk interval per request (default: `5000`). (optional)
- `elasticsearchBulkSize` - Max bluk size per request (default: `5000`). (optional)
- `indexNameSuffix` - Index name suffix, for index version control. (optional)

### mongodb

- `url` - The connection URI string, eg: `mongodb://user:password@localhost:27017/db?replicaSet=rs0`.
  **notice**: must use a `admin` user to access oplog.
- `options` - Connection settings, see: [MongoClient](http://mongodb.github.io/node-mongodb-native/2.1/api/MongoClient.html#.connect). (optional)

### elasticsearch

- `options` - Elasticsearch Config Options, see: [Configuration](https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/configuration.html).
- `indices` - If set, auto create indices when program start, see: [Indeces Create](https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference-5-0.html#api-indices-create-5-0). (optional)

### task.from

- `phase` - `scan` or `tail`
- `time` - tail oplog with query: `{ ts: { $gte: new Timestamp(0, new Date(time).getTime() / 1000) } }`
- `id` - scan collection with query `{ _id: { $gte: id }}`

### task.extract

- `db` - Database name.
- `collection` - Collection name in database.
- `projection` - Projection selector, see [Projection](https://docs.mongodb.com/manual/reference/operator/projection/).

### task.transform

- `mapping` - The field mapping from mongodb's collection to elasticsearch's index.
- `parent` - The field in mongodb's collection to use as the `_parent` in elasticsearch's index. (optional)

### task.load

- `index` - The name of the index.
- `type` - The name of the document type.
- `body` - The request body, see [Put Mapping](https://www.elastic.co/guide/en/elasticsearch/reference/5.x/indices-put-mapping.html).

## License

[Mozilla Public License Version 2.0](https://www.mozilla.org/en-US/MPL/2.0/)


================================================
FILE: examples/config.json
================================================
{
  "controls": {
    "mongodbReadCapacity": 10000,
    "elasticsearchBulkSize": 5000,
    "elasticsearchBulkInterval": 5000,
    "indexNameSuffix": "_v1"
  },
  "mongodb": {
    "url": "mongodb://user:password@localhost:27017/db?replicaSet=rs0",
    "options": {
      "authSource": "admin",
      "readPreference": "secondaryPreferred"
    }
  },
  "elasticsearch": {
    "options": {
      "host": "http://host:9200",
      "apiVersion": "5.2"
    },
    "indices": [
      {
        "index": "banner",
        "body": {
          "settings": {
            "index": {
              "number_of_shards": 3,
              "number_of_replicas": 1,
              "mapper.dynamic": false
            }
          }
        }
      }
    ]
  },
  "tasks": [
    {
      "from": {
        "phase": "tail",
        "time": "2017-08-16T10:55:24.474Z"
      },
      "extract": {
        "db": "jike",
        "collection": "banners",
        "projection": {
          "name": 1,
          "shareCount.all": 1,
          "deleted": 1
        }
      },
      "transform": {
        "mapping": {
          "name": "name",
          "shareCount.all": "shareCount.all",
          "deleted": "isDeleted"
        }
      },
      "load": {
        "index": "banner",
        "type": "banner",
        "body": {
          "dynamic": false,
          "properties": {
            "name": {
              "type": "text",
              "analyzer": "ik_max_word",
              "search_analyzer": "ik_smart",
              "fields": {
                "exact": {
                  "type": "keyword"
                }
              }
            },
            "shareCount.all": {
              "type": "long"
            },
            "isDeleted": {
              "type": "boolean"
            }
          }
        }
      }
    }
  ]
}


================================================
FILE: examples/example.js
================================================
const fs = require('fs')

const Redis = require('ioredis')

const { Config, Task, run } = require('../dist/src/index')

const redis = new Redis('localhost')

Task.onSaveCheckpoint((name, checkpoint) => {
  return redis.set(`mongo-es:${name}`, JSON.stringify(checkpoint))
})

Task.onLoadCheckpoint((name) => {
  return redis.get(`mongo-es:${name}`).then((str) => {
    console.log('loaded', `mongo-es:${name}`, str)
    return JSON.parse(str)
  })
})

run(new Config(fs.readFileSync('examples/config.json', 'utf8')))


================================================
FILE: license
================================================
Mozilla Public License Version 2.0
==================================

1. Definitions
--------------

1.1. "Contributor"
    means each individual or legal entity that creates, contributes to
    the creation of, or owns Covered Software.

1.2. "Contributor Version"
    means the combination of the Contributions of others (if any) used
    by a Contributor and that particular Contributor's Contribution.

1.3. "Contribution"
    means Covered Software of a particular Contributor.

1.4. "Covered Software"
    means Source Code Form to which the initial Contributor has attached
    the notice in Exhibit A, the Executable Form of such Source Code
    Form, and Modifications of such Source Code Form, in each case
    including portions thereof.

1.5. "Incompatible With Secondary Licenses"
    means

    (a) that the initial Contributor has attached the notice described
        in Exhibit B to the Covered Software; or

    (b) that the Covered Software was made available under the terms of
        version 1.1 or earlier of the License, but not also under the
        terms of a Secondary License.

1.6. "Executable Form"
    means any form of the work other than Source Code Form.

1.7. "Larger Work"
    means a work that combines Covered Software with other material, in 
    a separate file or files, that is not Covered Software.

1.8. "License"
    means this document.

1.9. "Licensable"
    means having the right to grant, to the maximum extent possible,
    whether at the time of the initial grant or subsequently, any and
    all of the rights conveyed by this License.

1.10. "Modifications"
    means any of the following:

    (a) any file in Source Code Form that results from an addition to,
        deletion from, or modification of the contents of Covered
        Software; or

    (b) any new file in Source Code Form that contains any Covered
        Software.

1.11. "Patent Claims" of a Contributor
    means any patent claim(s), including without limitation, method,
    process, and apparatus claims, in any patent Licensable by such
    Contributor that would be infringed, but for the grant of the
    License, by the making, using, selling, offering for sale, having
    made, import, or transfer of either its Contributions or its
    Contributor Version.

1.12. "Secondary License"
    means either the GNU General Public License, Version 2.0, the GNU
    Lesser General Public License, Version 2.1, the GNU Affero General
    Public License, Version 3.0, or any later versions of those
    licenses.

1.13. "Source Code Form"
    means the form of the work preferred for making modifications.

1.14. "You" (or "Your")
    means an individual or a legal entity exercising rights under this
    License. For legal entities, "You" includes any entity that
    controls, is controlled by, or is under common control with You. For
    purposes of this definition, "control" means (a) the power, direct
    or indirect, to cause the direction or management of such entity,
    whether by contract or otherwise, or (b) ownership of more than
    fifty percent (50%) of the outstanding shares or beneficial
    ownership of such entity.

2. License Grants and Conditions
--------------------------------

2.1. Grants

Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:

(a) under intellectual property rights (other than patent or trademark)
    Licensable by such Contributor to use, reproduce, make available,
    modify, display, perform, distribute, and otherwise exploit its
    Contributions, either on an unmodified basis, with Modifications, or
    as part of a Larger Work; and

(b) under Patent Claims of such Contributor to make, use, sell, offer
    for sale, have made, import, and otherwise transfer either its
    Contributions or its Contributor Version.

2.2. Effective Date

The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.

2.3. Limitations on Grant Scope

The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:

(a) for any code that a Contributor has removed from Covered Software;
    or

(b) for infringements caused by: (i) Your and any other third party's
    modifications of Covered Software, or (ii) the combination of its
    Contributions with other software (except as part of its Contributor
    Version); or

(c) under Patent Claims infringed by Covered Software in the absence of
    its Contributions.

This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).

2.4. Subsequent Licenses

No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).

2.5. Representation

Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.

2.6. Fair Use

This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.

2.7. Conditions

Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.

3. Responsibilities
-------------------

3.1. Distribution of Source Form

All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.

3.2. Distribution of Executable Form

If You distribute Covered Software in Executable Form then:

(a) such Covered Software must also be made available in Source Code
    Form, as described in Section 3.1, and You must inform recipients of
    the Executable Form how they can obtain a copy of such Source Code
    Form by reasonable means in a timely manner, at a charge no more
    than the cost of distribution to the recipient; and

(b) You may distribute such Executable Form under the terms of this
    License, or sublicense it under different terms, provided that the
    license for the Executable Form does not attempt to limit or alter
    the recipients' rights in the Source Code Form under this License.

3.3. Distribution of a Larger Work

You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).

3.4. Notices

You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.

3.5. Application of Additional Terms

You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.

4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------

If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.

5. Termination
--------------

5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.

5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.

5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.

************************************************************************
*                                                                      *
*  6. Disclaimer of Warranty                                           *
*  -------------------------                                           *
*                                                                      *
*  Covered Software is provided under this License on an "as is"       *
*  basis, without warranty of any kind, either expressed, implied, or  *
*  statutory, including, without limitation, warranties that the       *
*  Covered Software is free of defects, merchantable, fit for a        *
*  particular purpose or non-infringing. The entire risk as to the     *
*  quality and performance of the Covered Software is with You.        *
*  Should any Covered Software prove defective in any respect, You     *
*  (not any Contributor) assume the cost of any necessary servicing,   *
*  repair, or correction. This disclaimer of warranty constitutes an   *
*  essential part of this License. No use of any Covered Software is   *
*  authorized under this License except under this disclaimer.         *
*                                                                      *
************************************************************************

************************************************************************
*                                                                      *
*  7. Limitation of Liability                                          *
*  --------------------------                                          *
*                                                                      *
*  Under no circumstances and under no legal theory, whether tort      *
*  (including negligence), contract, or otherwise, shall any           *
*  Contributor, or anyone who distributes Covered Software as          *
*  permitted above, be liable to You for any direct, indirect,         *
*  special, incidental, or consequential damages of any character      *
*  including, without limitation, damages for lost profits, loss of    *
*  goodwill, work stoppage, computer failure or malfunction, or any    *
*  and all other commercial damages or losses, even if such party      *
*  shall have been informed of the possibility of such damages. This   *
*  limitation of liability shall not apply to liability for death or   *
*  personal injury resulting from such party's negligence to the       *
*  extent applicable law prohibits such limitation. Some               *
*  jurisdictions do not allow the exclusion or limitation of           *
*  incidental or consequential damages, so this exclusion and          *
*  limitation may not apply to You.                                    *
*                                                                      *
************************************************************************

8. Litigation
-------------

Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.

9. Miscellaneous
----------------

This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.

10. Versions of the License
---------------------------

10.1. New Versions

Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.

10.2. Effect of New Versions

You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.

10.3. Modified Versions

If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).

10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses

If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.

Exhibit A - Source Code Form License Notice
-------------------------------------------

  This Source Code Form is subject to the terms of the Mozilla Public
  License, v. 2.0. If a copy of the MPL was not distributed with this
  file, You can obtain one at http://mozilla.org/MPL/2.0/.

If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.

You may add additional accurate notices of copyright ownership.

Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------

  This Source Code Form is "Incompatible With Secondary Licenses", as
  defined by the Mozilla Public License, v. 2.0.


================================================
FILE: package.json
================================================
{
  "name": "mongo-es",
  "version": "3.4.4",
  "description": "A MongoDB to Elasticsearch connector",
  "keywords": [
    "mongodb",
    "elasticsearch",
    "connector",
    "etl"
  ],
  "main": "dist/src/index.js",
  "types": "dist/src/index.d.ts",
  "typings": "dist/src/index.d.ts",
  "scripts": {
    "start": "node dist/src/main.js",
    "compile": "tsc",
    "build": "npm run clean && npm run compile",
    "dev": "npm run clean && tsc -w & NODE_ENV=dev nodemon dist/src/main.js",
    "test": "npm run compile && ava dist/test/*.test.js",
    "clean": "rm -rf dist",
    "purge": "rm -rf package-lock.json node_modules && npm run clean",
    "format": "prettier --config .prettierrc --write '**/*'",
    "prepublishOnly": "npm run build"
  },
  "bin": {
    "mongo-es": "dist/src/main.js"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/jike-engineering/mongo-es.git"
  },
  "author": "",
  "license": "MPL-2.0",
  "bugs": {
    "url": "https://github.com/jike-engineering/mongo-es/issues"
  },
  "homepage": "https://github.com/jike-engineering/mongo-es#readme",
  "dependencies": {
    "elasticsearch": "16.7.1",
    "lodash": "4.17.21",
    "mongodb": "3.6.0",
    "rx": "4.1.0"
  },
  "devDependencies": {
    "@types/elasticsearch": "5.0.36",
    "@types/lodash": "4.14.159",
    "@types/mongodb": "3.5.26",
    "@types/rx": "4.1.2",
    "ava": "3.11.1",
    "husky": "4.2.5",
    "nodemon": "2.0.4",
    "prettier": "2.0.5",
    "pretty-quick": "2.0.1",
    "source-map-support": "0.5.19",
    "typescript": "3.9.7"
  },
  "husky": {
    "hooks": {
      "pre-commit": "pretty-quick --staged"
    }
  }
}


================================================
FILE: src/config.ts
================================================
import { ObjectID, MongoClientOptions } from 'mongodb'
import { ConfigOptions, IndicesCreateParams, IndicesPutMappingParams } from 'elasticsearch'

export class MongoConfig {
  url: string
  options?: MongoClientOptions

  constructor({ url, options = {} }) {
    this.url = url
    this.options = options
  }
}

export class ElasticsearchConfig {
  options: ConfigOptions
  indices: IndicesCreateParams[]

  constructor({ options, indices = [] }) {
    this.options = options
    this.indices = indices
  }
}

export class CheckPoint {
  phase: 'scan' | 'tail'
  id: ObjectID
  time: Date

  constructor({ phase, id = '000000000000000000000000', time = Date.now() }) {
    this.phase = phase
    if (phase === 'scan') {
      this.id = new ObjectID(id)
    }
    this.time = new Date(time)
  }
}

export type ExtractTask = {
  db: string
  collection: string
  projection: {
    [key: string]: 1 | 0
  }
}

export type TransformTask = {
  parent?: string
  mapping: {
    [key: string]: string
  }
  static?: {
    [key: string]: string
  }
}

export type LoadTask = IndicesPutMappingParams

export class Task {
  from: CheckPoint
  extract: ExtractTask
  transform: TransformTask
  load: LoadTask
  static onSaveCallback: (name: string, checkPoint: CheckPoint) => Promise<void>
  static onLoadCallback: (name: string) => Promise<any | null>

  constructor({ from, extract, transform, load }) {
    this.from = new CheckPoint(from)
    this.extract = extract
    this.transform = transform
    this.load = load
  }

  name(): string {
    return `${this.extract.db}.${this.extract.collection}___${this.load.index}.${this.load.type}`
  }

  async endScan(): Promise<void> {
    this.from.phase = 'tail'
    delete this.from.id
    await Task.saveCheckpoint(this.name(), this.from)
  }

  static onSaveCheckpoint(onSaveCallback: (name: string, checkPoint: CheckPoint) => Promise<void>) {
    Task.onSaveCallback = onSaveCallback
  }

  static onLoadCheckpoint(onLoadCallback: (name: string) => Promise<any | null>) {
    Task.onLoadCallback = onLoadCallback
  }

  static async saveCheckpoint(name: string, checkPoint: CheckPoint): Promise<void> {
    if (Task.onSaveCallback && Task.onSaveCallback instanceof Function) {
      try {
        await Task.onSaveCallback(name, checkPoint)
      } catch (err) {
        console.error('on save checkpoint', name, checkPoint, err)
      }
    }
  }

  static async loadCheckpoint(name: string): Promise<CheckPoint | null> {
    try {
      if (Task.onLoadCallback && Task.onLoadCallback instanceof Function) {
        const obj = await Task.onLoadCallback(name)
        if (obj && obj.phase) {
          return new CheckPoint(obj)
        }
      }
      return null
    } catch (err) {
      console.error('on load checkpoint', name, err)
      return null
    }
  }
}

export class Controls {
  mongodbReadCapacity: number
  elasticsearchBulkInterval: number
  elasticsearchBulkSize: number
  indexNameSuffix: string

  constructor({
    mongodbReadCapacity = Infinity,
    elasticsearchBulkInterval = 5000,
    elasticsearchBulkSize = 5000,
    indexNameSuffix = '',
  }) {
    this.mongodbReadCapacity = mongodbReadCapacity
    this.elasticsearchBulkInterval = elasticsearchBulkInterval
    this.elasticsearchBulkSize = elasticsearchBulkSize
    this.indexNameSuffix = indexNameSuffix
  }
}

export class Config {
  mongodb: MongoConfig
  elasticsearch: ElasticsearchConfig
  tasks: Task[]
  controls: Controls

  constructor(str: string) {
    const { mongodb, elasticsearch, tasks, controls } = JSON.parse(str)
    this.mongodb = new MongoConfig(mongodb)
    this.elasticsearch = new ElasticsearchConfig(elasticsearch)
    this.tasks = tasks.map((task) => new Task(task))
    this.controls = new Controls(controls)
  }
}


================================================
FILE: src/elasticsearch.ts
================================================
import { Client, BulkIndexDocumentsParams } from 'elasticsearch'
import * as _ from 'lodash'

import { ESDoc } from './types'
import { ElasticsearchConfig, Task } from './config'

export default class Elasticsearch {
  static client: Client
  task: Task
  searchBuffer: { [id: string]: ((doc: ESDoc | null) => void)[] } = {}
  searchRunning: boolean = false
  retrieveBuffer: { [id: string]: ((doc: ESDoc | null) => void)[] } = {}
  retrieveRunning: boolean = false

  constructor(elasticsearch: ElasticsearchConfig, task: Task) {
    if (!Elasticsearch.client) {
      Elasticsearch.client = new Client({ ...elasticsearch.options })
    }
    this.task = task
  }

  async bulk(params: BulkIndexDocumentsParams): Promise<void> {
    return new Promise<void>((resolve, reject) => {
      Elasticsearch.client.bulk(params, (err, response) => {
        err ? reject(err) : resolve(response)
      })
    })
  }

  async search(id: string): Promise<ESDoc | null> {
    return new Promise<ESDoc | null>((resolve) => {
      this.searchBuffer[id] = this.searchBuffer[id] || []
      this.searchBuffer[id].push(resolve)
      if (!this.searchRunning) {
        this.searchRunning = true
        setTimeout(this._search.bind(this), 1000)
      }
    })
  }

  async _search(): Promise<void> {
    const ids = _.take(_.keys(this.searchBuffer), 1024)
    if (ids.length === 0) {
      this.searchRunning = false
      return
    }
    const docs = await this._searchBatchSafe(ids)
    ids.forEach((id) => {
      const cbs = this.searchBuffer[id]
      delete this.searchBuffer[id]
      cbs.forEach((cb) => {
        cb(docs[id] || null)
      })
    })
    setTimeout(this._search.bind(this), 1000)
  }

  async _searchBatchSafe(ids: string[]): Promise<{ [id: string]: ESDoc }> {
    return new Promise<{ [id: string]: ESDoc }>((resolve) => {
      Elasticsearch.client.search<ESDoc>(
        {
          index: this.task.load.index,
          type: this.task.load.type,
          body: {
            query: {
              terms: {
                _id: ids,
              },
            },
          },
        },
        (err, response) => {
          try {
            if (err) {
              console.warn('search from elasticsearch', this.task.name(), ids, err.message)
              resolve({})
              return
            }
            console.debug('search from elasticsearch', response)
            const docs: ESDoc[] = response.hits.hits.map(this._mapResponse.bind(this))
            resolve(_.keyBy(docs, (doc) => doc._id))
          } catch (err2) {
            console.error('search from elasticsearch', this.task.name(), ids, err2)
            resolve({})
          }
        },
      )
    })
  }

  async retrieve(id: string): Promise<ESDoc | null> {
    return new Promise<ESDoc | null>((resolve) => {
      this.retrieveBuffer[id] = this.retrieveBuffer[id] || []
      this.retrieveBuffer[id].push(resolve)
      if (!this.retrieveRunning) {
        this.retrieveRunning = true
        setTimeout(this._retrieve.bind(this), 1000)
      }
    })
  }

  async _retrieve(): Promise<void> {
    const ids = _.take(_.keys(this.retrieveBuffer), 1024)
    if (ids.length === 0) {
      this.retrieveRunning = false
      return
    }
    const docs = await this._retrieveBatchSafe(ids)
    ids.forEach((id) => {
      const cbs = this.retrieveBuffer[id]
      delete this.retrieveBuffer[id]
      cbs.forEach((cb) => {
        cb(docs[id] || null)
      })
    })
    setTimeout(this._retrieve.bind(this), 1000)
  }

  async _retrieveBatchSafe(ids: string[]): Promise<{ [id: string]: ESDoc }> {
    return new Promise<{ [id: string]: ESDoc }>((resolve) => {
      Elasticsearch.client.mget<ESDoc>(
        {
          index: this.task.load.index as string,
          type: this.task.load.type,
          body: {
            ids,
          },
        },
        (err, response) => {
          try {
            if (err || !response.docs) {
              console.warn('retrieve from elasticsearch', this.task.name(), ids, err.message)
              resolve({})
              return
            }
            console.debug('retrieve from elasticsearch', response)
            const docs: ESDoc[] = response.docs
              .filter((doc) => doc.found)
              .map(this._mapResponse.bind(this))
            resolve(_.keyBy(docs, (doc) => doc._id))
          } catch (err2) {
            console.error('retrieve from elasticsearch', this.task.name(), ids, err2)
            resolve({})
          }
        },
      )
    })
  }

  _mapResponse(hit: { _id: string; _parent?: string; _source: ESDoc }): ESDoc {
    const doc = hit._source || {}
    doc._id = hit._id
    if (this.task.transform.parent && hit._parent) {
      _.set(doc, this.task.transform.parent, hit._parent)
    }
    return doc
  }
}


================================================
FILE: src/index.ts
================================================
import MongoDB from './mongodb'
import Elasticsearch from './elasticsearch'
import Indices from './indices'
import Processor from './processor'
import { Config, Task } from './config'

export async function run(config: Config): Promise<void> {
  console.log('run', new Date())

  // check and create indices, mappings
  await Indices.init(config)

  // load checkpoint
  for (let task of config.tasks) {
    const checkpoint = await Task.loadCheckpoint(task.name())
    if (checkpoint) {
      task.from = checkpoint
    }
    console.log('from checkpoint', task.name(), task.from)
  }

  // run tasks
  for (let task of config.tasks) {
    const mongodb = await MongoDB.init(config.mongodb, task)
    const elasticsearch = new Elasticsearch(config.elasticsearch, task)
    const processor = new Processor(task, config.controls, mongodb, elasticsearch)
    if (task.from.phase === 'scan') {
      console.log('scan', task.name(), 'from', task.from.id)
      await processor.scanDocument()
      await task.endScan()
      console.log('scan', task.name(), 'end')
    }
    console.log('tail', task.name(), 'from', task.from.time)
    processor.tailOpLog().catch((err) => {
      console.error('tailOpLog', err)
      process.exit(0)
    })
  }
}

console.debug = process.env.NODE_ENV === 'dev' ? console.log : () => null

export { Config, Task }


================================================
FILE: src/indices.ts
================================================
import {
  Client,
  IndicesCreateParams,
  IndicesPutMappingParams,
  IndicesExistsParams,
} from 'elasticsearch'

import { Config, ElasticsearchConfig } from './config'

export default class Indices {
  static client: Client

  private constructor(elasticsearch: ElasticsearchConfig) {
    if (!Indices.client) {
      Indices.client = new Client({ ...elasticsearch.options })
    }
  }

  static async init(config: Config): Promise<void> {
    const indices = new Indices(config.elasticsearch)
    for (let index of config.elasticsearch.indices) {
      index.index += config.controls.indexNameSuffix
      if (!(await indices.exists(index))) {
        await indices.create(index)
        console.log('create index', index.index)
      }
    }
    for (let task of config.tasks) {
      task.load.index += config.controls.indexNameSuffix
      await indices.putMapping(task.load)
      console.log('put mapping', task.load.index, task.load.type)
    }
  }

  async create(params: IndicesCreateParams): Promise<void> {
    return new Promise<void>((resolve, reject) => {
      Indices.client.indices.create(params, (err, response) => {
        err ? reject(err) : resolve(response)
      })
    })
  }

  async putMapping(params: IndicesPutMappingParams): Promise<void> {
    return new Promise<void>((resolve, reject) => {
      Indices.client.indices.putMapping(params, (err, response) => {
        err ? reject(err) : resolve(response)
      })
    })
  }

  async exists(params: IndicesExistsParams): Promise<boolean> {
    return new Promise<boolean>((resolve, reject) => {
      Indices.client.indices.exists(params, (err, response) => {
        err ? reject(err) : resolve(response)
      })
    })
  }
}


================================================
FILE: src/main.ts
================================================
#!/usr/bin/env node

import { readFile } from 'fs'
import { resolve as resolvePath } from 'path'

import { Config } from './config'
import { run } from './index'

async function readConfig(path: string): Promise<Config> {
  return new Promise<Config>((resolve, reject) => {
    readFile(resolvePath(path), 'utf8', (err, str) => {
      err ? reject(err) : resolve(new Config(str))
    })
  })
}

readConfig(process.argv[2])
  .then(run)
  .catch((err) => {
    console.error('run', err)
  })


================================================
FILE: src/mongodb.ts
================================================
import { Readable } from 'stream'
import * as _ from 'lodash'

import { Timestamp, Cursor, MongoClient, ObjectID, Collection } from 'mongodb'

import { MongoDoc } from './types'
import { Task, MongoConfig } from './config'

export default class MongoDB {
  static oplog: Collection
  collection: Collection
  task: Task
  retrieveBuffer: { [id: string]: ((doc: MongoDoc | null) => void)[] } = {}
  retrieveRunning: boolean = false

  private constructor(collection: Collection, task: Task) {
    this.collection = collection
    this.task = task
  }

  static async init(mongodb: MongoConfig, task: Task): Promise<MongoDB> {
    const collection = (await MongoClient.connect(mongodb.url, mongodb.options))
      .db(task.extract.db)
      .collection(task.extract.collection)
    if (!MongoDB.oplog) {
      MongoDB.oplog = (await MongoClient.connect(mongodb.url, mongodb.options))
        .db('local')
        .collection('oplog.rs')
    }
    return new MongoDB(collection, task)
  }

  getCollection(): Readable {
    return this.collection
      .find({
        _id: {
          $gte: this.task.from.id,
        },
      })
      .project(this.task.extract.projection)
      .stream()
  }

  getOplog(): Cursor {
    return MongoDB.oplog
      .find({
        ns: `${this.task.extract.db}.${this.task.extract.collection}`,
        ts: {
          $gte: new Timestamp(0, this.task.from.time.getTime() / 1000),
        },
        fromMigrate: {
          $ne: true,
        },
      })
      .addCursorFlag('tailable', true)
      .addCursorFlag('oplogReplay', true)
      .addCursorFlag('noCursorTimeout', true)
      .addCursorFlag('awaitData', true)
  }

  async retrieve(id: ObjectID): Promise<MongoDoc | null> {
    return new Promise<MongoDoc | null>((resolve) => {
      this.retrieveBuffer[id.toHexString()] = this.retrieveBuffer[id.toHexString()] || []
      this.retrieveBuffer[id.toHexString()].push(resolve)
      if (!this.retrieveRunning) {
        this.retrieveRunning = true
        setTimeout(this._retrieve.bind(this), 1000)
      }
    })
  }

  async _retrieve(): Promise<void> {
    const ids = _.take(_.keys(this.retrieveBuffer), 1024)
    if (ids.length === 0) {
      this.retrieveRunning = false
      return
    }
    const docs = await this._retrieveBatchSafe(ids)
    ids.forEach((id) => {
      const cbs = this.retrieveBuffer[id]
      delete this.retrieveBuffer[id]
      cbs.forEach((cb) => {
        cb(docs[id] || null)
      })
    })
    setTimeout(this._retrieve.bind(this), 1000)
  }

  async _retrieveBatchSafe(ids: string[]): Promise<{ [id: string]: MongoDoc }> {
    try {
      const docs = await this.collection
        .find<MongoDoc>({
          _id: {
            $in: ids.map(ObjectID.createFromHexString),
          },
        })
        .toArray()
      console.debug('retrieve from mongodb', docs)
      return _.keyBy(docs, (doc) => doc._id.toHexString())
    } catch (err) {
      console.warn('retrieve from mongodb', this.task.name(), ids, err)
      return {}
    }
  }
}


================================================
FILE: src/processor.ts
================================================
import { Readable } from 'stream'

import { Observable } from 'rx'
import * as _ from 'lodash'
import { Timestamp } from 'mongodb'

import { Task, Controls, CheckPoint } from './config'
import { IR, MongoDoc, ESDoc, OpLog } from './types'
import Elasticsearch from './elasticsearch'
import MongoDB from './mongodb'

export default class Processor {
  static provisionedReadCapacity: number
  static consumedReadCapacity: number
  task: Task
  controls: Controls
  mongodb: MongoDB
  elasticsearch: Elasticsearch
  queue: OpLog[][] = []
  running: boolean = false

  constructor(task: Task, controls: Controls, mongodb: MongoDB, elasticsearch: Elasticsearch) {
    this.task = task
    this.controls = controls
    this.mongodb = mongodb
    this.elasticsearch = elasticsearch
    Processor.provisionedReadCapacity = controls.mongodbReadCapacity
    Processor.consumedReadCapacity = 0
  }

  static controlReadCapacity(stream: Readable): Readable {
    if (!Processor.provisionedReadCapacity) {
      return stream
    }
    const interval = setInterval(() => {
      Processor.consumedReadCapacity = 0
      stream.resume()
    }, 1000)
    stream.addListener('data', () => {
      Processor.consumedReadCapacity++
      if (Processor.consumedReadCapacity >= Processor.provisionedReadCapacity) {
        stream.pause()
      }
    })
    stream.addListener('end', () => {
      clearInterval(interval)
    })
    return stream
  }

  transformer(
    action: 'upsert' | 'delete',
    doc: MongoDoc | ESDoc,
    timestamp?: Timestamp,
    isESDoc: boolean = false,
  ): IR | null {
    if (action === 'delete') {
      return {
        action: 'delete',
        id: doc._id.toString(),
        parent: this.task.transform.parent && _.get(doc, this.task.transform.parent),
        timestamp: timestamp ? timestamp.getHighBits() : 0,
      }
    }

    const data = _.reduce(
      this.task.transform.mapping,
      (obj, value, key) => {
        if (isESDoc) {
          key = value
        }
        if (_.has(doc, key)) {
          _.set(obj, value, _.get(doc, key))
        }
        return obj
      },
      { ...this.task.transform.static } || {},
    )
    if (_.isEmpty(data)) {
      return null
    }
    return {
      action: 'upsert',
      id: doc._id.toString(),
      data,
      parent: this.task.transform.parent && _.get(doc, this.task.transform.parent),
      timestamp: timestamp ? timestamp.getHighBits() : 0,
    }
  }

  applyUpdateMongoDoc(
    doc: MongoDoc,
    set: { [key: string]: any } = {},
    unset: { [key: string]: any } = {},
  ): MongoDoc {
    _.forEach(this.task.transform.mapping, (ignored, key) => {
      if (_.get(unset, key)) {
        _.unset(doc, key)
      }
      if (_.has(set, key)) {
        _.set(doc, key, _.get(set, key))
      }
    })
    return doc
  }

  applyUpdateESDoc(
    doc: ESDoc,
    set: { [key: string]: any } = {},
    unset: { [key: string]: any } = {},
  ): ESDoc {
    _.forEach(this.task.transform.mapping, (value, key) => {
      if (_.get(unset, key)) {
        _.unset(doc, value)
      }
      if (_.has(set, key)) {
        _.set(doc, value, _.get(set, key))
      }
    })
    return doc
  }

  ignoreUpdate(oplog: OpLog): boolean {
    let ignore = true
    if (oplog.op === 'u') {
      _.forEach(this.task.transform.mapping, (value, key) => {
        ignore =
          ignore && !(_.has(oplog.o, key) || _.has(oplog.o.$set, key) || _.get(oplog.o.$unset, key))
      })
    }
    return ignore
  }

  scan(): Observable<MongoDoc> {
    return Observable.create<MongoDoc>((observer) => {
      try {
        const stream = Processor.controlReadCapacity(this.mongodb.getCollection())
        stream.addListener('data', (doc: MongoDoc) => {
          observer.onNext(doc)
        })
        stream.addListener('error', (err: Error) => {
          observer.onError(err)
        })
        stream.addListener('end', () => {
          observer.onCompleted()
        })
      } catch (err) {
        observer.onError(err)
      }
    })
  }

  tail(): Observable<OpLog> {
    return Observable.create<OpLog>((observer) => {
      try {
        const cursor = this.mongodb.getOplog()
        cursor.forEach(
          (log: OpLog) => {
            observer.onNext(log)
          },
          () => {
            observer.onCompleted()
          },
        )
      } catch (err) {
        observer.onError(err)
      }
    })
  }

  async oplog(oplog: OpLog): Promise<IR | null> {
    try {
      switch (oplog.op) {
        case 'i': {
          return this.transformer('upsert', oplog.o, oplog.ts)
        }
        case 'u': {
          if (!oplog.o2._id) {
            console.warn('oplog', 'cannot transform', oplog)
            return null
          }
          if (this.ignoreUpdate(oplog)) {
            console.debug('ignoreUpdate', oplog)
            return null
          }
          if (_.keys(oplog.o).find((key) => !key.startsWith('$'))) {
            return this.transformer(
              'upsert',
              {
                _id: oplog.o2._id,
                ...oplog.o,
              },
              oplog.ts,
            )
          }
          const old = this.task.transform.parent
            ? await this.elasticsearch.search(oplog.o2._id.toHexString())
            : await this.elasticsearch.retrieve(oplog.o2._id.toHexString())
          const doc = old
            ? this.applyUpdateESDoc(old, oplog.o.$set, oplog.o.$unset)
            : await this.mongodb.retrieve(oplog.o2._id)
          return doc ? this.transformer('upsert', doc, oplog.ts, !!old) : null
        }
        case 'd': {
          if (_.size(oplog.o) !== 1 || !oplog.o._id) {
            console.warn('oplog', 'cannot transform', oplog)
            return null
          }
          const doc = this.task.transform.parent
            ? await this.elasticsearch.search(oplog.o._id.toHexString())
            : oplog.o
          console.debug(doc)
          return doc ? this.transformer('delete', doc, oplog.ts) : null
        }
        default: {
          return null
        }
      }
    } catch (err) {
      console.error('oplog', err)
      return null
    }
  }

  async load(irs: IR[]): Promise<void> {
    if (irs.length === 0) {
      return
    }
    const body: any[] = []
    irs.forEach((ir) => {
      switch (ir.action) {
        case 'upsert': {
          body.push({
            index: {
              _index: this.task.load.index,
              _type: this.task.load.type,
              _id: ir.id,
              _parent: ir.parent,
            },
          })
          body.push(ir.data)
          break
        }
        case 'delete': {
          body.push({
            delete: {
              _index: this.task.load.index,
              _type: this.task.load.type,
              _id: ir.id,
              _parent: ir.parent,
            },
          })
          break
        }
      }
    })
    return await this.elasticsearch.bulk({ body })
  }

  mergeOplogs(oplogs: OpLog[]): OpLog[] {
    const store: { [key: string]: OpLog } = {}
    for (let oplog of _.sortBy(oplogs, 'ts')) {
      switch (oplog.op) {
        case 'i': {
          store[oplog.ns + oplog.o._id.toString()] = oplog
          break
        }
        case 'u': {
          const key = oplog.ns + oplog.o2._id.toString()
          const log = store[key]
          if (log && log.op === 'i') {
            log.o = this.applyUpdateMongoDoc(log.o, oplog.o.$set, oplog.o.$unset)
            log.ts = oplog.ts
          } else if (log && log.op === 'u') {
            log.o = _.merge(log.o, oplog.o)
            log.ts = oplog.ts
          } else {
            store[key] = oplog
          }
          break
        }
        case 'd': {
          const key = oplog.ns + oplog.o._id.toString()
          if (store[key] && store[key].op === 'i') {
            delete store[key]
          } else {
            store[key] = oplog
          }
          break
        }
      }
    }
    return _.sortBy(
      _.map(store, (oplog) => oplog),
      'ts',
    )
  }

  async scanDocument(): Promise<void> {
    return new Promise<void>((resolve, reject) => {
      this.scan()
        .bufferWithTimeOrCount(
          this.controls.elasticsearchBulkInterval,
          this.controls.elasticsearchBulkSize,
        )
        .map((docs) => _.compact<IR>(_.map(docs, (doc) => this.transformer('upsert', doc))))
        .subscribe(
          async (irs) => {
            if (irs.length === 0) {
              return
            }
            try {
              await this.load(irs)
              await Task.saveCheckpoint(
                this.task.name(),
                new CheckPoint({
                  phase: 'scan',
                  id: irs[0].id,
                }),
              )
              console.log('scan', this.task.name(), irs.length, irs[0].id)
            } catch (err) {
              console.warn('scan', this.task.name(), err.message)
            }
          },
          reject,
          resolve,
        )
    })
  }

  async tailOpLog(): Promise<never> {
    return new Promise<never>((resolve, reject) => {
      this.tail()
        .bufferWithTimeOrCount(
          this.controls.elasticsearchBulkInterval,
          this.controls.elasticsearchBulkSize,
        )
        .subscribe(
          (oplogs) => {
            this.queue.push(oplogs)
            if (!this.running) {
              this.running = true
              setImmediate(this._processOplog.bind(this))
            }
          },
          (err) => {
            console.error('tail', this.task.name(), err)
            reject(err)
          },
          () => {
            const err = new Error('should not complete')
            console.error('tail', this.task.name(), err)
            reject(err)
          },
        )
    })
  }

  async _processOplog() {
    if (this.queue.length === 0) {
      this.running = false
      return
    }
    while (this.queue.length > 0) {
      const oplogs = _.flatten(this.queue)
      this.queue = []
      await this._processOplogSafe(oplogs)
    }
    setImmediate(this._processOplog.bind(this))
  }

  async _processOplogSafe(oplogs) {
    try {
      const irs = _.compact(
        await Promise.all(
          this.mergeOplogs(oplogs).map(async (oplog) => {
            return await this.oplog(oplog)
          }),
        ),
      )
      if (irs.length > 0) {
        await this.load(irs)
        await Task.saveCheckpoint(
          this.task.name(),
          new CheckPoint({
            phase: 'tail',
            time: Date.now() - 1000 * 10,
          }),
        )
        console.log('tail', this.task.name(), irs.length, new Date(irs[0].timestamp * 1000))
      }
    } catch (err) {
      console.warn('tail', this.task.name(), err.message)
    }
  }
}


================================================
FILE: src/types.ts
================================================
import { Timestamp, ObjectID } from 'mongodb'

export type MongoDoc = {
  _id: ObjectID
  [key: string]: any
}

export type ESDoc = {
  _id: string
  [key: string]: any
}

export type OplogInsert = {
  op: 'i'
  o: {
    _id: ObjectID
    [key: string]: any
  }
}

export type OplogUpdate = {
  op: 'u'
  o: {
    $set?: { [key: string]: any }
    $unset?: { [key: string]: any }
    [key: string]: any
  }
  o2: {
    _id: ObjectID
  }
}

export type OplogDelete = {
  op: 'd'
  o: {
    _id: ObjectID
  }
}

export type OpLog = {
  ts: Timestamp
  ns: string
  fromMigrate?: boolean
} & (OplogInsert | OplogUpdate | OplogDelete)

export type IRUpsert = {
  action: 'upsert'
  id: string
  parent?: string
  data: {
    [key: string]: any
  }
  timestamp: number
}

export type IRDelete = {
  action: 'delete'
  id: string
  parent?: string
  timestamp: number
}

export type IR = IRUpsert | IRDelete


================================================
FILE: test/_init.ts
================================================
import 'source-map-support/register'


================================================
FILE: test/elasticsearch.test.ts
================================================
import test from 'ava'
import { Client } from 'elasticsearch'

import Processor from '../src/processor'
import Elasticsearch from '../src/elasticsearch'
import { Controls, Task } from '../src/config'

const client = new Client({
  host: 'localhost:9200',
})

const task: Task = new Task({
  from: {
    phase: 'scan',
  },
  extract: {},
  transform: {
    mapping: {
      'field0.field1': 'field1',
      'field0.field2': 'field2',
    },
  },
  load: {
    index: 'test',
    type: 'test',
  },
})

test('load', async (t) => {
  const elasticsearch = new Elasticsearch(
    {
      options: {
        host: 'localhost:9200',
      },
      indices: [],
    },
    task,
  )
  const processor = new Processor(task, new Controls({}), null as any, elasticsearch)
  await processor.load([
    {
      action: 'upsert',
      id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
      data: {
        field1: 1,
        field2: 2,
      },
      parent: undefined,
      timestamp: 0,
    },
  ])
  const data = await client.get<any>({
    index: 'test',
    type: 'test',
    id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
  })
  t.deepEqual(data, {
    _index: 'test',
    _type: 'test',
    _id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
    _version: 1,
    found: true,
    _source: {
      field1: 1,
      field2: 2,
    },
  })
})

test.before('create index', (t) => {
  return client.indices.create({
    index: 'test',
  })
})

test.after.always('delete index', (t) => {
  return client.indices.delete({
    index: 'test',
  })
})


================================================
FILE: test/processor.test.ts
================================================
import test from 'ava'
import { ObjectID, Timestamp } from 'mongodb'

import { OpLog, MongoDoc, IR, ESDoc } from '../src/types'
import { Controls, Task } from '../src/config'
import Processor from '../src/processor'

const oplog: OpLog = {
  ts: new Timestamp(14, 1495012567),
  op: 'u',
  ns: 'db0.collection0',
  o2: {
    _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
  },
  o: {
    $set: {
      'field0.field1': 'set nested field',
    },
    $unset: {
      'field0.field2': 1,
    },
  },
}

const task: Task = new Task({
  from: {
    phase: 'scan',
  },
  extract: {},
  transform: {
    mapping: {
      'field0.field1': 'field1',
      'field0.field2': 'field2',
    },
  },
  load: {},
})

const task2: Task = new Task({
  from: {
    phase: 'scan',
  },
  extract: {},
  transform: {
    mapping: {
      'field0.field3': 'field3',
    },
  },
  load: {},
})

const mongoDoc: MongoDoc = {
  _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
  field0: {
    field1: 1,
    field2: 2,
  },
}

const esDoc: ESDoc = {
  _id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
  field1: 1,
  field2: 2,
}

test('transformer create', (t) => {
  const processor = new Processor(task, new Controls({}), null as any, null as any)
  t.deepEqual(processor.transformer('upsert', mongoDoc), <IR>{
    action: 'upsert',
    id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
    data: {
      field1: 1,
      field2: 2,
    },
    parent: undefined,
    timestamp: 0,
  })
})

test('transformer update', (t) => {
  const processor = new Processor(task, new Controls({}), null as any, null as any)
  t.deepEqual(processor.transformer('upsert', mongoDoc), <IR>{
    action: 'upsert',
    id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
    data: {
      field1: 1,
      field2: 2,
    },
    parent: undefined,
    timestamp: 0,
  })
})

test('transformer delete', (t) => {
  const processor = new Processor(task, new Controls({}), null as any, null as any)
  t.deepEqual(processor.transformer('delete', mongoDoc), <IR>{
    action: 'delete',
    id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
    parent: undefined,
    timestamp: 0,
  })
})

test('applyUpdateMongoDoc', (t) => {
  const transform = new Processor(task, new Controls({}), null as any, null as any)
  t.deepEqual(transform.applyUpdateMongoDoc(mongoDoc, oplog.o.$set, oplog.o.$unset), {
    _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
    field0: {
      field1: 'set nested field',
    },
  })
})

test('applyUpdateESDoc', (t) => {
  const transform = new Processor(task, new Controls({}), null as any, null as any)
  t.deepEqual(transform.applyUpdateESDoc(esDoc, oplog.o.$set, oplog.o.$unset), {
    _id: 'aaaaaaaaaaaaaaaaaaaaaaaa',
    field1: 'set nested field',
  })
})

test('ignoreUpdate true', (t) => {
  const processor = new Processor(task2, new Controls({}), null as any, null as any)
  t.is(processor.ignoreUpdate(oplog), true)
})

test('ignoreUpdate false', (t) => {
  const processor = new Processor(task, new Controls({}), null as any, null as any)
  t.is(processor.ignoreUpdate(oplog), false)
})

test('mergeOplogs insert then update', (t) => {
  const processor = new Processor(
    {
      transform: {
        mapping: {
          'field0.field1': 'field1',
          'field0.field2': 'field2',
        },
      },
    } as any,
    new Controls({}),
    null as any,
    null as any,
  )
  const oplogs = processor.mergeOplogs([
    {
      ts: new Timestamp(0, 0),
      op: 'i',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
        'field0.field1': 0,
      },
    },
    {
      ts: new Timestamp(0, 1),
      op: 'u',
      ns: 'example1',
      o: {
        $set: {
          'field0.field1': 1,
        },
        $unset: {
          'field0.field2': 1,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
  t.deepEqual(oplogs, [
    {
      ts: new Timestamp(0, 1),
      op: 'i',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
        'field0.field1': 1,
      },
    },
  ])
})

test('mergeOplogs update then update', (t) => {
  const processor = new Processor(
    {
      transform: {
        mapping: {
          'field0.field1': 'field1',
          'field0.field2': 'field2',
        },
      },
    } as any,
    new Controls({}),
    null as any,
    null as any,
  )
  const oplogs = processor.mergeOplogs([
    {
      ts: new Timestamp(0, 1),
      op: 'u',
      ns: 'example1',
      o: {
        'field0.field1': 1,
        $set: {
          'field0.field2': 1,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
    {
      ts: new Timestamp(0, 0),
      op: 'u',
      ns: 'example1',
      o: {
        $set: {
          'field0.field1': 3,
          'field0.field2': 2,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
  t.deepEqual(oplogs, [
    {
      ts: new Timestamp(0, 1),
      op: 'u',
      ns: 'example1',
      o: {
        'field0.field1': 1,
        $set: {
          'field0.field1': 3,
          'field0.field2': 1,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
})

test('mergeOplogs update then delete', (t) => {
  const processor = new Processor(
    {
      transform: {
        mapping: {
          'field0.field1': 'field1',
          'field0.field2': 'field2',
        },
      },
    } as any,
    new Controls({}),
    null as any,
    null as any,
  )
  const oplogs = processor.mergeOplogs([
    {
      ts: new Timestamp(0, 0),
      op: 'u',
      ns: 'example1',
      o: {
        'field0.field1': 1,
        $set: {
          'field0.field2': 1,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
    {
      ts: new Timestamp(0, 1),
      op: 'd',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
  t.deepEqual(oplogs, [
    {
      ts: new Timestamp(0, 1),
      op: 'd',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
})

test('mergeOplogs insert then delete', (t) => {
  const processor = new Processor(
    {
      transform: {
        mapping: {
          'field0.field1': 'field1',
          'field0.field2': 'field2',
        },
      },
    } as any,
    new Controls({}),
    null as any,
    null as any,
  )
  const oplogs = processor.mergeOplogs([
    {
      ts: new Timestamp(0, 0),
      op: 'i',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
        field0: 1,
      },
    },
    {
      ts: new Timestamp(0, 1),
      op: 'd',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
  t.deepEqual(oplogs, [])
})

test('mergeOplogs insert then update then update', (t) => {
  const processor = new Processor(
    {
      transform: {
        mapping: {
          'field0.field1': 'field1',
        },
      },
    } as any,
    new Controls({}),
    null as any,
    null as any,
  )
  const oplogs = processor.mergeOplogs([
    {
      ts: new Timestamp(0, 0),
      op: 'i',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
        'field0.field1': 0,
      },
    },
    {
      ts: new Timestamp(0, 2),
      op: 'u',
      ns: 'example1',
      o: {
        $set: {
          'field0.field1': 2,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
    {
      ts: new Timestamp(0, 1),
      op: 'u',
      ns: 'example1',
      o: {
        $set: {
          'field0.field1': 1,
        },
      },
      o2: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
      },
    },
  ])
  t.deepEqual(oplogs, [
    {
      ts: new Timestamp(0, 2),
      op: 'i',
      ns: 'example1',
      o: {
        _id: new ObjectID('aaaaaaaaaaaaaaaaaaaaaaaa'),
        'field0.field1': 2,
      },
    },
  ])
})


================================================
FILE: tsconfig.json
================================================
{
  "compileOnSave": true,
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "./",
    "module": "commonjs",
    "sourceMap": true,
    "declaration": true,
    "target": "es6",
    "strictNullChecks": true,
    "typeRoots": ["node_modules/@types"]
  },
  "exclude": ["dist", "node_modules"]
}
Download .txt
gitextract_qgmg3l8r/

├── .gitignore
├── .npmignore
├── .npmrc
├── .prettierignore
├── .prettierrc
├── README.md
├── examples/
│   ├── config.json
│   └── example.js
├── license
├── package.json
├── src/
│   ├── config.ts
│   ├── elasticsearch.ts
│   ├── index.ts
│   ├── indices.ts
│   ├── main.ts
│   ├── mongodb.ts
│   ├── processor.ts
│   └── types.ts
├── test/
│   ├── _init.ts
│   ├── elasticsearch.test.ts
│   └── processor.test.ts
└── tsconfig.json
Download .txt
SYMBOL INDEX (72 symbols across 8 files)

FILE: src/config.ts
  class MongoConfig (line 4) | class MongoConfig {
    method constructor (line 8) | constructor({ url, options = {} }) {
  class ElasticsearchConfig (line 14) | class ElasticsearchConfig {
    method constructor (line 18) | constructor({ options, indices = [] }) {
  class CheckPoint (line 24) | class CheckPoint {
    method constructor (line 29) | constructor({ phase, id = '000000000000000000000000', time = Date.now(...
  type ExtractTask (line 38) | type ExtractTask = {
  type TransformTask (line 46) | type TransformTask = {
  type LoadTask (line 56) | type LoadTask = IndicesPutMappingParams
  class Task (line 58) | class Task {
    method constructor (line 66) | constructor({ from, extract, transform, load }) {
    method name (line 73) | name(): string {
    method endScan (line 77) | async endScan(): Promise<void> {
    method onSaveCheckpoint (line 83) | static onSaveCheckpoint(onSaveCallback: (name: string, checkPoint: Che...
    method onLoadCheckpoint (line 87) | static onLoadCheckpoint(onLoadCallback: (name: string) => Promise<any ...
    method saveCheckpoint (line 91) | static async saveCheckpoint(name: string, checkPoint: CheckPoint): Pro...
    method loadCheckpoint (line 101) | static async loadCheckpoint(name: string): Promise<CheckPoint | null> {
  class Controls (line 117) | class Controls {
    method constructor (line 123) | constructor({
  class Config (line 136) | class Config {
    method constructor (line 142) | constructor(str: string) {

FILE: src/elasticsearch.ts
  class Elasticsearch (line 7) | class Elasticsearch {
    method constructor (line 15) | constructor(elasticsearch: ElasticsearchConfig, task: Task) {
    method bulk (line 22) | async bulk(params: BulkIndexDocumentsParams): Promise<void> {
    method search (line 30) | async search(id: string): Promise<ESDoc | null> {
    method _search (line 41) | async _search(): Promise<void> {
    method _searchBatchSafe (line 58) | async _searchBatchSafe(ids: string[]): Promise<{ [id: string]: ESDoc }> {
    method retrieve (line 91) | async retrieve(id: string): Promise<ESDoc | null> {
    method _retrieve (line 102) | async _retrieve(): Promise<void> {
    method _retrieveBatchSafe (line 119) | async _retrieveBatchSafe(ids: string[]): Promise<{ [id: string]: ESDoc...
    method _mapResponse (line 150) | _mapResponse(hit: { _id: string; _parent?: string; _source: ESDoc }): ...

FILE: src/index.ts
  function run (line 7) | async function run(config: Config): Promise<void> {

FILE: src/indices.ts
  class Indices (line 10) | class Indices {
    method constructor (line 13) | private constructor(elasticsearch: ElasticsearchConfig) {
    method init (line 19) | static async init(config: Config): Promise<void> {
    method create (line 35) | async create(params: IndicesCreateParams): Promise<void> {
    method putMapping (line 43) | async putMapping(params: IndicesPutMappingParams): Promise<void> {
    method exists (line 51) | async exists(params: IndicesExistsParams): Promise<boolean> {

FILE: src/main.ts
  function readConfig (line 9) | async function readConfig(path: string): Promise<Config> {

FILE: src/mongodb.ts
  class MongoDB (line 9) | class MongoDB {
    method constructor (line 16) | private constructor(collection: Collection, task: Task) {
    method init (line 21) | static async init(mongodb: MongoConfig, task: Task): Promise<MongoDB> {
    method getCollection (line 33) | getCollection(): Readable {
    method getOplog (line 44) | getOplog(): Cursor {
    method retrieve (line 61) | async retrieve(id: ObjectID): Promise<MongoDoc | null> {
    method _retrieve (line 72) | async _retrieve(): Promise<void> {
    method _retrieveBatchSafe (line 89) | async _retrieveBatchSafe(ids: string[]): Promise<{ [id: string]: Mongo...

FILE: src/processor.ts
  class Processor (line 12) | class Processor {
    method constructor (line 22) | constructor(task: Task, controls: Controls, mongodb: MongoDB, elastics...
    method controlReadCapacity (line 31) | static controlReadCapacity(stream: Readable): Readable {
    method transformer (line 51) | transformer(
    method applyUpdateMongoDoc (line 91) | applyUpdateMongoDoc(
    method applyUpdateESDoc (line 107) | applyUpdateESDoc(
    method ignoreUpdate (line 123) | ignoreUpdate(oplog: OpLog): boolean {
    method scan (line 134) | scan(): Observable<MongoDoc> {
    method tail (line 153) | tail(): Observable<OpLog> {
    method oplog (line 171) | async oplog(oplog: OpLog): Promise<IR | null> {
    method load (line 225) | async load(irs: IR[]): Promise<void> {
    method mergeOplogs (line 260) | mergeOplogs(oplogs: OpLog[]): OpLog[] {
    method scanDocument (line 299) | async scanDocument(): Promise<void> {
    method tailOpLog (line 332) | async tailOpLog(): Promise<never> {
    method _processOplog (line 360) | async _processOplog() {
    method _processOplogSafe (line 373) | async _processOplogSafe(oplogs) {

FILE: src/types.ts
  type MongoDoc (line 3) | type MongoDoc = {
  type ESDoc (line 8) | type ESDoc = {
  type OplogInsert (line 13) | type OplogInsert = {
  type OplogUpdate (line 21) | type OplogUpdate = {
  type OplogDelete (line 33) | type OplogDelete = {
  type OpLog (line 40) | type OpLog = {
  type IRUpsert (line 46) | type IRUpsert = {
  type IRDelete (line 56) | type IRDelete = {
  type IR (line 63) | type IR = IRUpsert | IRDelete
Condensed preview — 22 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (65K chars).
[
  {
    "path": ".gitignore",
    "chars": 49,
    "preview": ".idea/\nnode_modules/\ndist/\n*temp*\nnpm-debug.log*\n"
  },
  {
    "path": ".npmignore",
    "chars": 21,
    "preview": "node_modules/\n*temp*\n"
  },
  {
    "path": ".npmrc",
    "chars": 36,
    "preview": "registry=https://registry.npmjs.org\n"
  },
  {
    "path": ".prettierignore",
    "chars": 35,
    "preview": "package-lock.json\ndist/\n.*\nlicense\n"
  },
  {
    "path": ".prettierrc",
    "chars": 90,
    "preview": "{\n  \"printWidth\": 100,\n  \"semi\": false,\n  \"singleQuote\": true,\n  \"trailingComma\": \"all\"\n}\n"
  },
  {
    "path": "README.md",
    "chars": 3299,
    "preview": "# Mongo-ES\n\nA MongoDB to Elasticsearch connector\n\n[![npm version](https://badge.fury.io/js/mongo-es.svg)](https://badge."
  },
  {
    "path": "examples/config.json",
    "chars": 1818,
    "preview": "{\n  \"controls\": {\n    \"mongodbReadCapacity\": 10000,\n    \"elasticsearchBulkSize\": 5000,\n    \"elasticsearchBulkInterval\": "
  },
  {
    "path": "examples/example.js",
    "chars": 516,
    "preview": "const fs = require('fs')\n\nconst Redis = require('ioredis')\n\nconst { Config, Task, run } = require('../dist/src/index')\n\n"
  },
  {
    "path": "license",
    "chars": 16726,
    "preview": "Mozilla Public License Version 2.0\n==================================\n\n1. Definitions\n--------------\n\n1.1. \"Contributor\""
  },
  {
    "path": "package.json",
    "chars": 1649,
    "preview": "{\n  \"name\": \"mongo-es\",\n  \"version\": \"3.4.4\",\n  \"description\": \"A MongoDB to Elasticsearch connector\",\n  \"keywords\": [\n "
  },
  {
    "path": "src/config.ts",
    "chars": 3770,
    "preview": "import { ObjectID, MongoClientOptions } from 'mongodb'\nimport { ConfigOptions, IndicesCreateParams, IndicesPutMappingPar"
  },
  {
    "path": "src/elasticsearch.ts",
    "chars": 4817,
    "preview": "import { Client, BulkIndexDocumentsParams } from 'elasticsearch'\nimport * as _ from 'lodash'\n\nimport { ESDoc } from './t"
  },
  {
    "path": "src/index.ts",
    "chars": 1345,
    "preview": "import MongoDB from './mongodb'\nimport Elasticsearch from './elasticsearch'\nimport Indices from './indices'\nimport Proce"
  },
  {
    "path": "src/indices.ts",
    "chars": 1714,
    "preview": "import {\n  Client,\n  IndicesCreateParams,\n  IndicesPutMappingParams,\n  IndicesExistsParams,\n} from 'elasticsearch'\n\nimpo"
  },
  {
    "path": "src/main.ts",
    "chars": 492,
    "preview": "#!/usr/bin/env node\n\nimport { readFile } from 'fs'\nimport { resolve as resolvePath } from 'path'\n\nimport { Config } from"
  },
  {
    "path": "src/mongodb.ts",
    "chars": 3029,
    "preview": "import { Readable } from 'stream'\nimport * as _ from 'lodash'\n\nimport { Timestamp, Cursor, MongoClient, ObjectID, Collec"
  },
  {
    "path": "src/processor.ts",
    "chars": 10773,
    "preview": "import { Readable } from 'stream'\n\nimport { Observable } from 'rx'\nimport * as _ from 'lodash'\nimport { Timestamp } from"
  },
  {
    "path": "src/types.ts",
    "chars": 902,
    "preview": "import { Timestamp, ObjectID } from 'mongodb'\n\nexport type MongoDoc = {\n  _id: ObjectID\n  [key: string]: any\n}\n\nexport t"
  },
  {
    "path": "test/_init.ts",
    "chars": 37,
    "preview": "import 'source-map-support/register'\n"
  },
  {
    "path": "test/elasticsearch.test.ts",
    "chars": 1489,
    "preview": "import test from 'ava'\nimport { Client } from 'elasticsearch'\n\nimport Processor from '../src/processor'\nimport Elasticse"
  },
  {
    "path": "test/processor.test.ts",
    "chars": 8067,
    "preview": "import test from 'ava'\nimport { ObjectID, Timestamp } from 'mongodb'\n\nimport { OpLog, MongoDoc, IR, ESDoc } from '../src"
  },
  {
    "path": "tsconfig.json",
    "chars": 304,
    "preview": "{\n  \"compileOnSave\": true,\n  \"compilerOptions\": {\n    \"outDir\": \"dist\",\n    \"rootDir\": \"./\",\n    \"module\": \"commonjs\",\n "
  }
]

About this extraction

This page contains the full source code of the jike-engineering/mongo-es GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 22 files (59.5 KB), approximately 15.5k tokens, and a symbol index with 72 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!