Full Code of heroku/awsdetailedbilling for AI

master b03e0ceda741 cached

18 files

45.6 KB

11.8k tokens

18 symbols

1 requests

Download .txt

Repository: heroku/awsdetailedbilling
Branch: master
Commit: b03e0ceda741
Files: 18
Total size: 45.6 KB

Directory structure:
gitextract_hev7e8q3/

├── .gitignore
├── .jshintrc
├── CODEOWNERS
├── LICENSE.txt
├── Procfile
├── import_finalized.js
├── import_month_to_date.js
├── lib/
│   ├── baseparser.js
│   ├── cliutils.js
│   ├── dbr.js
│   └── redshift.js
├── package.json
├── readme.md
├── sql/
│   ├── copy_ri_leases.sql
│   ├── create_line_items.sql
│   ├── create_month_to_date.sql
│   └── create_ri_leases.sql
└── unzip.js

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
node_modules
*.csv
*.zip


================================================
FILE: .jshintrc
================================================
{
  "esnext": true
}


================================================
FILE: CODEOWNERS
================================================
# Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing.
#ECCN:Open Source


================================================
FILE: LICENSE.txt
================================================
The MIT License (MIT)

Copyright (c) 2015 Heroku

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: Procfile
================================================
fake: this app is invoked by schedule!


================================================
FILE: import_finalized.js
================================================
'use strict'

/*******************************************************************************
Import finalized monthly DBRs.
*******************************************************************************/

var util = require('util')

var log = require('loglevel')
var rollbar = require('rollbar')
var moment = require('moment')

var BaseParser = require('./lib/baseparser.js')
var DBR = require('./lib/dbr.js')
var Redshift = require('./lib/redshift.js')
var cliUtils = require('./lib/cliutils.js')

rollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})
rollbar.handleUncaughtExceptions(process.env.ROLLBAR_TOKEN,
                                 {exitOnUncaughtException: true})

var parser = new BaseParser({
  version: '0.0.1',
  addHelp: true,
  description: 'Imports finalized (whole-month) detailed billing reports'
})

parser.addArgument(
  ['--force'], {
    action: 'storeConst',
    dest: 'force',
    help: 'Ignore existing DBRs in staging or redshift, and reimport them.',
    constant: true
  }
)

parser.addArgument(
  ['--specific'], {
    help: "Import a specific month's DBR. Specified in YYYY-MM format."
  }
)

parser.addArgument(
  ['--prune-months'], {
    help: 'The amount of history (in number of months) to retain in Redshift',
    type: 'int'
  }
)

var args = parser.parseArgs()

if (args.debug) {
  log.setLevel('debug')
  log.debug('Debugging output enabled.')
} else {
  log.setLevel('info')
}
log.debug(`Resolved invocation arguments were:\n${util.inspect(args)}`)

if (args.specific !== null && args.prune_months !== null) {
  log.error('The "--specific" and "--prune-months" options are mutually exclusive.')
  log.error('--prune-months can only be invoked when importing the latest DBR.')
  log.error('Aborting.')
  process.exit(1)
}

// Instantiate a DBR object to work with.
var dbrClientOptions = {
  accessKeyId: args.source_key,
  secretAccessKey: args.source_secret
}

var stagingClientOptions = {
  accessKeyId: args.staging_key,
  secretAccessKey: args.staging_secret
}

var dbr = new DBR(dbrClientOptions, stagingClientOptions,
                  args.source_bucket, args.staging_bucket)

// Instantiate a Redshift object to work with.
var redshift = new Redshift(args.redshift_uri, {
      key: args.staging_key,
      secret: args.staging_secret
})

let startTime = moment.utc()

chooseDBR()
  .then(importDBRCheck)
  .then(stageDBRCheck)
  .then(importDBR)
  .then(vacuum)
  .then(function () {
    cliUtils.runCompleteHandler(startTime, 0)
  })
  .catch(cliUtils.rejectHandler)

function chooseDBR () {
  return new Promise(function (resolve, reject) {
    if (args.specific) {
      log.debug(`Invoked with --specific ${args.specific}.`)
      try {
        let match = /^(\d{4})-(\d{2})$/.exec(args.specific)
        if (match === null) {
          return reject('--specific requires a year and month parameter in the form of YYYY-MM')
        }
        // moment.utc month argument is zero-indexed
        let month = moment.utc([match[1], match[2] - 1])
        log.debug(`Attempting to import ${month.toISOString()}`)
        return resolve(dbr.findDBR(month))
      } catch (err) {
        return reject(err)
      }
    } else {
      log.debug(`Invoked without --specific. Targeting latest finalized DBR...`)
      return resolve(dbr.getLatestFinalizedDBR())
    }
  })
}

// Given a latest finalized DBR object, decide whether to import it
function importDBRCheck (finalizedDBR) {
  let dbrMonth = finalizedDBR.Month.format('MMMM YYYY')
  return redshift.hasMonth(finalizedDBR.Month).then(function (hasMonth) {
    if (hasMonth) {
      log.info(`No new DBRs to import.`)
      if (args.force) {
        log.warn(`--force specified, importing DBR for ${dbrMonth} anyways`)
        return finalizedDBR
      }
      cliUtils.runCompleteHandler(startTime, 0)
    } else {
      return finalizedDBR
    }
  })
}

// Given a DBR, (optionally) stage it
function stageDBRCheck (finalizedDBR) {
  return dbr.findStagedDBR(finalizedDBR.Month).then(
    function (stagedDBR) {
      let dbrMonth = stagedDBR.Month.format('MMMM YYYY')
      // DBR is staged!
      if (!args.force) {
        // No need to re-stage
        log.warn(`Using existing staged DBR for ${dbrMonth}.`)
        let s3uri = `s3://${args.staging_bucket}/${stagedDBR.Key}`
        log.debug(`Staged s3uri: ${s3uri}`)
        return ({s3uri: s3uri, month: stagedDBR.Month})
      } else {
        // Force re-stage
        log.warn(`--force specified, overwriting staged DBR for ${dbrMonth}`)
        return dbr.stageDBR(stagedDBR.Month).then(function (s3uri) {
          return ({s3uri: s3uri, month: stagedDBR.Month})
        })
      }
    },
    function (err) {
      // DBR not staged. Stage then import.
      log.debug(`DBR not staged: ${err}`)
      log.info(`Staging DBR for ${finalizedDBR.Month.format('MMMM YYYY')}.`)
      return dbr.stageDBR(finalizedDBR.Month).then(function (s3uri) {
        return ({s3uri: s3uri, month: finalizedDBR.Month})
      })
    }
  )
}

// Given an object like {s3uri: <uri>, month: <moment>}
// Execute the import.
function importDBR (params) {
  log.info(`Importing DBR for ${params.month.format('MMMM YYYY')}`)
  if (args.prune_months !== null) {
    let pruneThreshold = moment(params.month)
      .subtract(args.prune_months, 'months')
      .format('MMMM YYYY')
    log.info(`... and pruning months prior to ${pruneThreshold}`)
    return redshift.importFullMonth(params.s3uri, params.month, args.prune_months)
  } else {
    return redshift.importFullMonth(params.s3uri, params.month)
  }
}

// Run VACUUM on the line_items table
function vacuum () {
  if (!args.no_vacuum) {
    log.info('Running VACUUM on line_items...')
    return redshift.vacuum(process.env.LINE_ITEMS_TABLE_NAME || 'line_items')
  } else {
    log.info('--no-vacuum specified, skiping vacuum.')
    return
  }
}


================================================
FILE: import_month_to_date.js
================================================
'use strict'

/*******************************************************************************
Import month-to-date DBRs, overwriting the existing month-to-date.
*******************************************************************************/

var util = require('util')

var log = require('loglevel')
var rollbar = require('rollbar')
var moment = require('moment')

var BaseParser = require('./lib/baseparser.js')
var DBR = require('./lib/dbr.js')
var Redshift = require('./lib/redshift.js')
var cliUtils = require('./lib/cliutils.js')

rollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})
rollbar.handleUncaughtExceptions(process.env.ROLLBAR_TOKEN,
                                 {exitOnUncaughtException: true})

var parser = new BaseParser({
  version: '0.0.1',
  addHelp: true,
  description: 'Imports month-to-date detailed billing reports'
})

parser.addArgument(
  ['--no-stage'], {
    action: 'storeConst',
    dest: 'no_stage',
    help: 'Use an existing staged month-to-date DBR.',
    constant: true
  }
)

var args = parser.parseArgs()

if (args.debug) {
  log.setLevel('debug')
  log.debug('Debugging output enabled.')
} else {
  log.setLevel('info')
}
log.debug(`Resolved invocation arguments were:\n${util.inspect(args)}`)

// Instantiate a DBR object to work with.
var dbrClientOptions = {
  accessKeyId: args.source_key,
  secretAccessKey: args.source_secret
}

var stagingClientOptions = {
  accessKeyId: args.staging_key,
  secretAccessKey: args.staging_secret
}

var dbr = new DBR(dbrClientOptions, stagingClientOptions,
                  args.source_bucket, args.staging_bucket)

// Instantiate a Redshift object to work with.
var redshift = new Redshift(args.redshift_uri, {
      key: args.staging_key,
      secret: args.staging_secret
})

let startTime = moment.utc()

dbr.getMonthToDateDBR()
  .then(stageDBRCheck)
  .then(importDBR)
  .then(vacuum)
  .then(function () {
    cliUtils.runCompleteHandler(startTime, 0)
  })
  .catch(cliUtils.rejectHandler)

// Determine whether to stage the latest month-to-date DBR or reuse existing
function stageDBRCheck (monthToDateDBR) {
  log.info(`Found month-to-date for ${monthToDateDBR.Month.format('MMMM YYYY')}...`)
  if (args.no_stage) {
    let s3uri = dbr.composeStagedURI(monthToDateDBR)
    log.info(`--no-stage specified, Attempting to use existing staged month-to-date DBR`)
    return s3uri
  } else {
    log.info(`Staging DBR file for ${monthToDateDBR.Month.format('MMMM YYYY')}.`)
    return dbr.stageDBR(monthToDateDBR.Month)
  }
}

// Import the staged month-to-date DBR
// TODO if we just chain like .then(redshift.importMonthToDate), it fails
// because 'this' inside importMonthToDate will be undefined. Why?
function importDBR (s3uri) {
  log.info(`Importing ${s3uri} into month_to_date...`)
  return redshift.importMonthToDate(s3uri)
}

// Run VACUUM on the month_to_date table
function vacuum () {
  if (!args.no_vacuum) {
    log.info('Running VACUUM on line_items...')
    return redshift.vacuum('month_to_date')
  } else {
    log.info('--no-vacuum specified, skiping vacuum.')
    return
  }
}


================================================
FILE: lib/baseparser.js
================================================
'use strict'

var ArgumentParser = require('argparse').ArgumentParser

module.exports = Parser

function Parser (opts) {
  let parser = new ArgumentParser(opts)

  parser.addArgument(
    ['--source-bucket'], {
      help: 'The S3 bucket which contains the detailed billing reports. Defaults to the environment variable "DBR_BUCKET".',
      defaultValue: process.env.SOURCE_BUCKET
    }
  )

  parser.addArgument(
    ['--source-key'], {
      help: 'An AWS access key ID with permissions to access the source DBR bucket. Defaults to the environment variable "SOURCE_AWS_KEY", then to "AWS_KEY".',
      defaultValue: process.env.SOURCE_AWS_KEY || process.env.AWS_KEY
    }
  )

  parser.addArgument(
    ['--source-secret'], {
      help: 'An AWS access key secret with permissions to access the source DBR bucket. Defaults to the environment variable "SOURCE_AWS_SECRET", then to "AWS_SECRET".',
      defaultValue: process.env.SOURCE_AWS_SECRET || process.env.AWS_SECRET
    }
  )

  parser.addArgument(
    ['--staging-bucket'], {
      help: 'The S3 bucket which serves as a staging area for loading detailed billing reports. Defaults to the environment variable "STAGING_BUCKET".',
      defaultValue: process.env.STAGING_BUCKET
    }
  )

  parser.addArgument(
    ['--staging-key'], {
      help: 'An AWS access key ID with permissions to access the staging DBR bucket. Defaults to the environment variable "STAGING_AWS_KEY", then to "AWS_KEY".',
      defaultValue: process.env.STAGING_AWS_KEY || process.env.AWS_KEY
    }
  )

  parser.addArgument(
    ['--staging-secret'], {
      help: 'An AWS access key secret with permissions to access the staging DBR bucket. Defaults to the environment variable "STAGING_AWS_KEY", then to "AWS_KEY".',
      defaultValue: process.env.STAGING_AWS_SECRET || process.env.AWS_SECRET
    }
  )

  parser.addArgument(
    ['--redshift-uri'], {
      help: 'The redshift connection string, in URI form',
      defaultValue: process.env.REDSHIFT_URI
    }
  )

  parser.addArgument(
    ['--no-vacuum'], {
      action: 'storeConst',
      dest: 'no_vacuum',
      help: 'Do not automatically run VACUUM following the import.',
      constant: true
    }
  )

  parser.addArgument(
    ['-d', '--debug'], {
      action: 'storeConst',
      dest: 'debug',
      help: 'Turn on debugging output.',
      constant: true
    }
  )

  return parser
}


================================================
FILE: lib/cliutils.js
================================================
'use strict'

var rollbar = require('rollbar')
var log = require('loglevel')
var moment = require('moment')

rollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})

exports.rejectHandler = function (err) {
  rollbar.handleError(err)
  log.error(err)
  log.error(err.message)
  log.error(err.stack)
  log.error('Aborting run.')
  process.exit(1)
}

exports.runCompleteHandler = function (startTime, exitCode) {
  let durationString = moment.utc(moment.utc() - startTime).format('HH:mm:ss.SSS')
  log.info(`Run complete. Took ${durationString}`)
  process.exit(exitCode || 0)
}


================================================
FILE: lib/dbr.js
================================================
'use strict'

var fs = require('fs')
var path = require('path')

var log = require('loglevel')
var _ = require('lodash')
var moment = require('moment')
var AWS = require('aws-sdk')
var progress = require('progress-stream')
var prettyBytes = require('pretty-bytes')
var numeral = require('numeral')
var child_process = require('child_process')
var zlib = require('zlib')
var debounce = require('debounce')

module.exports = DBR

function DBR (credentials, stagingCredentials, bucket, stagingBucket) {
  this.credentials = credentials
  this.stagingCredentials = stagingCredentials
  this.bucket = bucket
  this.stagingBucket = stagingBucket

  this.dbrClient = new AWS.S3(this.credentials)
  this.stagingClient = new AWS.S3(this.stagingCredentials)
}

// Download, unzip, gzip, upload a DBR to the staging bucket
// Returns a promise that resolves when everything is finished.
// Month is a UTC moment object for midnight on the first of the month.
DBR.prototype.stageDBR = function (month) {
  let self = this
  return this.findDBR(month)
    .then(function (result) {
      return downloadDBR(result, self.dbrClient, self.bucket)
    })
    .then(function (result) {
      return processDBR(result, self.stagingClient, self.stagingBucket)
    })
}

// Find a DBR for a given month or raise an error
// Month is a UTC moment object for midnight on the first of the month.
DBR.prototype.findDBR = function (month) {
  let self = this
  return new Promise(function (resolve, reject) {
    self.getDBRs()
        .then(function (dbrs) {
          let match = _.find(dbrs, function (d) { return month.isSame(d.Month) })
          if (match === undefined) {
            return reject(new Error(`Unable to find the DBR for ${month.format('MMMM YYYY')}.`))
          } else {
            return resolve(match)
          }
        })
  })
}

// Find a staged DBR for a given month or raise an error
// Month is a UTC moment object for midnight on the first of the month.
// TODO: is there a way to consolidate the handler in then() with findDBR()'s?
DBR.prototype.findStagedDBR = function (month) {
  let self = this
  return new Promise(function (resolve, reject) {
    self.getStagedDBRs()
        .then(function (dbrs) {
          let match = _.find(dbrs, function (d) { return month.isSame(d.Month) })
          if (match === undefined) {
            return reject(new Error(`Unable to find the staged DBR for ${month.format('MMMM YYYY')}.`))
          } else {
            return resolve(match)
          }
        })
  })
}

// Get the contents of a bucket. Returns a promise which resolves with an array
// of bucket objects.
// Will not work with buckets containing > 1000 objects, but that's okay
// for our purposes here.
DBR.prototype.getBucketContents = function (client, bucket) {
  return new Promise(function (resolve, reject) {
    client.listObjects({Bucket: bucket}, function (err, data) {
      if (err) return reject(err)
      if ('Contents' in data) {
        return resolve(data.Contents)
      } else {
        return reject(`Bucket listObjects response didn't contain "Contents" key.`)
      }
    })
  })
}

// Get a listing of avalable DBRs
// Returns a promise which resolves with an date-sorted array of objects like:
// {Key: <filename>, Size: <bytes>, Month: <moment>}
DBR.prototype.getDBRs = function () {
  return this.getBucketContents(this.dbrClient, this.bucket)
             .then(processDBRBucketContents)
}

// Get a listing of staged DBRs
// Returns a promise which resolves with an date-sorted array of objects like:
// {Key: <filename>, Size: <bytes>, Month: <moment>}
DBR.prototype.getStagedDBRs = function () {
  return this.getBucketContents(this.stagingClient, this.stagingBucket)
             .then(processDBRBucketContents)
}

// Get the month-to-date DBR.
// Returns a promise which resolves with an object like:
// {Key: <filename>, Size: <bytes>, Month: <moment>}
DBR.prototype.getMonthToDateDBR = function () {
  return this.getDBRs()
             .then(function (dbrs) {
               if (dbrs.length === 0) {
                 throw new Error('There are no existing DBRs.')
               } else {
                 return dbrs[dbrs.length - 1]
               }
             })
}

// Get the latest finalized DBR.
// Returns a promise which resolves with an object like:
// {Key: <filename>, Size: <bytes>, Month: <moment>}
DBR.prototype.getLatestFinalizedDBR = function () {
  return this.getDBRs()
             .then(function (dbrs) {
               if (dbrs.length === 0) {
                 throw new Error('There are no existing DBRs.')
               } else if (dbrs.length === 1) {
                 throw new Error('There are no finalized DBRs.')
               } else {
                 return dbrs[dbrs.length - 2]
               }
             })
}

DBR.prototype.composeStagedURI = function (dbrObject) {
  let gzFileName = path.basename(dbrObject.Key, '.zip') + '.gz'
  return `s3://${this.stagingBucket}/${gzFileName}`
}

// =============================================================================
// Module-private stuff down here

var dbrPattern = /\d+-aws-billing-detailed-line-items-with-resources-and-tags-(\d{4})-(\d{2}).csv.[gz|zip]/

function extractMonth (val) {
  let match = dbrPattern.exec(val)
  if (match === null) return null
  let year = parseInt(match[1], 10)
  let month = parseInt(match[2], 10)
  return new moment.utc([year, month - 1]) // eslint-disable-line new-cap
                                           // no control over moment...
}

// Take a bucket listing, filter out non-DBR entries, and return an array
// of objects ordered by the statement date (ascending). Each object has
// three properties: Key, Size, and Month:
//   Key:  the filename
//   Size: the size in bytes
//   Month: a utc moment object of the DBR month (midnight on first of the month)
function processDBRBucketContents (results) {
  let dbrs = []
  // Filter only DBRs
  for (let result of results) {
    let month = extractMonth(result.Key)
    if (month === null) continue
    // grab only the Key and Size properties
    let picked = _.pick(result, ['Key', 'Size'])
    // Add a Month property
    picked.Month = month
    dbrs.push(picked)
  }
  return dbrs.sort(function (a, b) {
    if (a.Month < b.Month) return -1
    else if (a.Month > b.Month) return 1
    else return 0
  })
}

// Downloads the specified DBR zip
// the argument is an object like:
//   {Key: <filename>, Size: <bytes>, Month: <moment>}
function downloadDBR (dbr, s3client, bucket) {
  const monthString = dbr.Month.format('MMM YYYY')
  log.info(`[${monthString}] (download): downloading from S3...`)
  return new Promise(function (resolve, reject) {
    let sourceParams = {
      Bucket: bucket,
      Key: dbr.Key
    }
    let outStream = fs.createWriteStream(dbr.Key)
    let downloadProgress = progress({
      length: dbr.Size,
      time: 1000
    })
    let request = s3client.getObject(sourceParams)

    downloadProgress.on('progress', function (dlprogress) {
      let percentage = numeral(dlprogress.percentage / 100).format('00.0%')
      let eta = moment.duration(dlprogress.eta * 1000).humanize()
      log.info(`[${monthString}] (download): ${percentage} (${eta} at ${prettyBytes(dlprogress.speed)}/sec)`)
    })

    // Kick off the stream
    let zipfileStream = request.createReadStream()
    zipfileStream.pipe(downloadProgress)
                 .pipe(outStream)

    outStream.on('close', function () {
      log.info(`[${monthString}] (download): complete.`)
      return resolve(dbr)
    })
  })
}

// Processes the specified local DBR zip: unzip, gzip, upload to staging.
// the argument is an object like:
//   {Key: <filename>, Size: <bytes>, Month: <moment>}
function processDBR (dbr, s3client, bucket) {
  const monthString = dbr.Month.format('MMM YYYY')

  // Unzip, gzip, and upload to the staging bucket on S3
  log.info(`[${monthString}] (process): processing '${dbr.Key}'...`)

  // In theory, zipfiles can contain multiple files
  // We know that the DBR zip has only one file inside, the DBR CSV
  return new Promise(function (resolve, reject) {
    var uncompressedLength = parseInt(child_process.execSync(
      `zipinfo -t ${dbr.Key} | cut -d ' ' -f 3`, {encoding: 'utf8'}
    ), 10)

    // Hack off the '.zip'
    var plainFileName = path.basename(dbr.Key, '.zip')

    // For monitoring unzip progress
    var unzipProgress = progress({time: 10000, length: uncompressedLength}, function (uzprogress) {
      let percentage = numeral(uzprogress.percentage / 100).format('00.0%')
      let eta = moment.duration(uzprogress.eta * 1000).humanize()
      log.info(`[${monthString}] (process-unzip): ${percentage} (${eta} at ${prettyBytes(uzprogress.speed)}/sec)`)
    })

    // For monitoring gzip progress.
    // From this point forward in the stream, we don't know the stream length as
    // we don't know how much the stream will compress down to until it's done.
    var gzipProgress = progress({time: 10000}, function (gzprogress) {
      log.info(`[${monthString}] (process-gzip): ${prettyBytes(gzprogress.transferred)} at ${prettyBytes(gzprogress.speed)}/sec`)
    })

    // Hook up every part of the stream prior to the HTTP upload to S3
    // Stream not flowing at this point! Triggered by request.send() below.
    var unzipGzipStream = child_process.spawn('unzip', ['-p', `./${dbr.Key}`])
                                       .stdout
                                       .pipe(unzipProgress)
                                       .pipe(zlib.createGzip())
                                       .pipe(gzipProgress)

    // Prepare the upload to S3 with the stream as the body
    var requestParams = {
      Bucket: bucket,
      Key: `${plainFileName}.gz`,
      Body: unzipGzipStream
    }
    var request = s3client.upload(requestParams)
    request.on('httpUploadProgress', debounce(function (progress) {
      log.info(`[${monthString}] (process-upload): ${prettyBytes(progress.loaded)}`)
    }, 1000, true))

    // Fire the upload request, gets the stream flowing.
    request.send(function (err, data) {
      if (err) return reject(err)
      log.info(`[${monthString}] (process-upload): complete.`)
      return resolve(`s3://${requestParams.Bucket}/${requestParams.Key}`)
    })
  })
}


================================================
FILE: lib/redshift.js
================================================
'use strict'

var log = require('loglevel')
var moment = require('moment')
var pg = require('pg')
var types = require('pg').types

// Redshift doesn't have TIMESTAMP WITH TIME ZONE
// All dates will therefore come back here as localtime
// This forces dates to come back as UTC.
// See https://github.com/brianc/node-pg-types/blob/master/lib/textParsers.js
// Also http://stackoverflow.com/questions/20712291/use-node-postgres-to-get-postgres-timestamp-without-timezone-in-utc
types.setTypeParser(1082, function (stringVal) {
  return new Date(stringVal)
})

module.exports = Redshift

function Redshift (connString, s3credentials) {
  this.connString = connString
  this.s3credentials = s3credentials
  this.lineItemsTableName = process.env.LINE_ITEMS_TABLE_NAME || 'line_items'
  this.schema = process.env.SCHEMA || 'heroku'
}

// Execute a query, using the query pool
// Return a promise which resolves with the output of the query.
Redshift.prototype.executeQuery = function (query, transform) {
  var self = this
  log.debug('Executing query:')
  log.debug(query)
  return new Promise(function (resolve, reject) {
    pg.connect(self.connString, function (err, client, done) {
      if (err) throw err
      client.query(query, function (err, result) {
        if (err) throw err
        if (transform !== undefined) {
          result = transform(result)
        }
        done(client)
        return resolve(result)
      })
    })
  })
}

// Execute a query where the desired ouput is a single scalar value.
Redshift.prototype.getScalar = function (query, keyName) {
  let transform = function (result) {
    let rowzero = result.rows[0]
    if (keyName !== undefined) {
      return rowzero[keyName]
    } else {
      // return the value of the first key
      return rowzero[Object.keys(rowzero)[0]]
    }
  }
  return this.executeQuery(query, transform)
}

// Execute a query where the desired output is a count of affected rows
// (for example, DELETE FROM queries).
Redshift.prototype.getRowCount = function (query) {
  let transform = function (result) {
    return result.rowCount
  }
  return this.executeQuery(query, transform)
}

// Check that a table exists. Won't match on views or other table-like things.
Redshift.prototype.checkTableExists = function (tableName, schema) {
  let query = `
    SELECT EXISTS (
      SELECT 1
      FROM   pg_catalog.pg_class c
      JOIN   pg_catalog.pg_namespace n ON n.oid = c.relnamespace
      WHERE  n.nspname = '${schema}'
      AND    c.relname = '${tableName}'
      AND    c.relkind = 'r'    -- only tables
    ) as exists;`

  let transform = function (result) {
    return result.rows[0].exists === 't'
  }
  return this.executeQuery(query, transform)
}

// Determine the month of the latest finalized DBR to be imported into Redshift.
Redshift.prototype.latestFullMonth = function () {
  let query = `SELECT MAX(statement_month) FROM ${this.schema}.${this.lineItemsTableName};`
  return this.getScalar(query, 'max').then(function (date) {
    let latest = moment.utc(date)
    if (latest.isValid()) {
      return latest
    } else {
      return null
    }
  })
}

// Determine whether a specific finalized month has already been imported.
Redshift.prototype.hasMonth = function (month) {
  let query = `
    SELECT COUNT(*)
    FROM ${this.schema}.${this.lineItemsTableName}
    WHERE statement_month = '${month.format('YYYY-MM-01')}';`
  return this.getScalar(query, 'count').then(function (count) {
    return (count > 0)
  })
}

// Import a finalized (full-month) DBR into Redshift.
// First, create a staging table and COPY FROM into that.
// Then, add the statement_month column, and copy it all to line_items.
// Then drop the staging table.
Redshift.prototype.importFullMonth = function (s3uri, month, pruneThresholdMonths) {
  const monthString = month.format('YYYY_MM')
  const monthDateString = month.format('YYYY-MM-01')
  const stagingTableName = `staging_${monthString}`
  let pruneQueryFragment = ''
  if (typeof pruneThresholdMonths === 'number') {
    // Delete statement months that are older than X months ago
    let pruneThresholdString = moment(month)
      .subtract(pruneThresholdMonths, 'months')
      .format('YYYY-MM-01')
    pruneQueryFragment = `DELETE FROM ${this.schema}.line_items WHERE statement_month <= '${pruneThresholdString}'::DATE;`
  }
  // Normally, creating the staging table would look like:
  //   CREATE TABLE ${stagingTableName} (LIKE line_items)
  // However you can't alter the staging table to drop statement_month because
  // it is specified as the SORTKEY, and that can't be touched in existing
  // tables. So, we create the staging table from scratch.
  let query = `
    BEGIN;
      CREATE TABLE IF NOT EXISTS ${this.schema}.${stagingTableName} (
        invoice_id TEXT,
        payer_account_id TEXT,
        linked_account_id TEXT,
        record_type TEXT,
        record_id TEXT,
        product_name TEXT,
        rate_id TEXT,
        subscription_id TEXT,
        pricing_plan_id TEXT,
        usage_type TEXT,
        operation TEXT,
        availability_zone TEXT,
        reserved_instance TEXT,
        item_description TEXT,
        usage_start_date TIMESTAMP,
        usage_end_date TIMESTAMP,
        usage_quantity FLOAT8,
        blended_rate NUMERIC(18,11),
        blended_cost NUMERIC(18,11),
        unblended_rate NUMERIC(18,11),
        unblended_cost NUMERIC(18,11),
        resource_id TEXT,
        cloud TEXT,
        slot TEXT,
        PRIMARY KEY(record_id)
      ) DISTSTYLE EVEN;

      COPY ${this.schema}.${stagingTableName}
        FROM '${s3uri}'
        CREDENTIALS 'aws_access_key_id=${this.s3credentials.key};aws_secret_access_key=${this.s3credentials.secret}'
        GZIP CSV IGNOREHEADER 1;
      ALTER TABLE ${this.schema}.${stagingTableName} ADD COLUMN statement_month DATE DEFAULT '${monthDateString}';
      DELETE FROM ${this.schema}.${this.lineItemsTableName} WHERE statement_month = '${monthDateString}';
      INSERT INTO ${this.schema}.${this.lineItemsTableName} SELECT * FROM ${this.schema}.${stagingTableName};
      DROP TABLE ${this.schema}.${stagingTableName};
      ${pruneQueryFragment}
    COMMIT;
  `

  return this.executeQuery(query)
}

// Import the month-to-date DBR into the month_to_date table, clobbering
// whatever was already there.
Redshift.prototype.importMonthToDate = function (s3uri) {
  let self = this
  let truncateQuery = `TRUNCATE ${self.schema}.month_to_date;`
  return this.executeQuery(truncateQuery).then(function () {
    log.debug('Month to date table truncated. Importing...')
    let query = `
      COPY ${self.schema}.month_to_date
        FROM '${s3uri}'
        CREDENTIALS 'aws_access_key_id=${self.s3credentials.key};aws_secret_access_key=${self.s3credentials.secret}'
        GZIP CSV IGNOREHEADER 1;
    `
    return self.executeQuery(query)
  })
}

// Vacuum the database.
Redshift.prototype.vacuum = function (tableName) {
  let query = `VACUUM ${(this.schema) + '.' + tableName || ''};`
  return this.executeQuery(query)
}


================================================
FILE: package.json
================================================
{
  "name": "awsdetailedbilling",
  "version": "0.0.1",
  "description": "Copies AWS Detailed Billing Reports to redshift",
  "main": "unzip.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "idan@heroku.com",
  "dependencies": {
    "argparse": "^1.0.1",
    "aws-sdk": "^2.1.17",
    "debounce": "^1.0.0",
    "lodash": "^3.6.0",
    "loglevel": "^1.2.0",
    "moment": "^2.9.0",
    "numeral": "^1.5.3",
    "pg": "^4.3.0",
    "pretty-bytes": "^1.0.3",
    "progress-stream": "^1.0.1",
    "rollbar": "^0.4.5"
  },
  "engines": {
    "iojs": "1.8.x"
  }
}


================================================
FILE: readme.md
================================================
# awsdetailedbilling

Loads AWS detailed billing reports into a redshift cluster.

[![js-standard-style](https://cdn.rawgit.com/feross/standard/master/badge.svg)](https://github.com/feross/standard)


# Setup

Still a manual process for now:

1. Create a Redshift cluster.
2. Once the cluster is alive, connect with your favorite postgres client and create the `line_items` and `month_to_date` tables. The SQL for creating each are in the `sql/` subdirectory.


## Configuration:

Set these environment variables. Some of them may be overridden at runtime with command-line switches, run the relevant script with `--help` for more details.

- `SOURCE_BUCKET`: the S3 bucket where DBRs are deposited by Amazon.
- `STAGING_BUCKET`: the S3 bucket into which pre-processed DBRs are staged before importing to redshift.
- `AWS_KEY` *or* `SOURCE_AWS_KEY` and `STAGING_AWS_KEY`: the AWS access key ID credential for accessing S3. If the same credentials are used for both the source and staging buckets, you can just set `AWS_KEY`. If separate credentials are neccessary, you can specify `SOURCE_AWS_KEY` *and* `STAGING_AWS_KEY` instead.
- `AWS_SECRET` *or* `SOURCE_AWS_SECRET` and `STAGING_AWS_SECRET`: Same as `AWS_KEY`, but for your AWS access key secret.
- `REDSHIFT_URI`: a connection URI for redshift. Should include credentials, like the form `postgres://myUser:s0mep4ssword@hostname:port/dbname`
- `ROLLBAR_TOKEN`: a token for error reporting to Rollbar.
- `ROLLBAR_ENVIRONMENT`: an environment name for error reporting to Rollbar.


## Usage

There are two scripts: `import_finalized.js` and `import_month_to_date.js`. Both are intended to be run on a daily schedule, preferably at night. Run duration is largely dependent on the size of your DBRs; for large DBRs runs of a few hours are common.

Invoke either with `--help` for invocation instructions.


#### `import_finalized.js`

This script imports "finalized" DBRs — specifically, the DBR for the previous month according to UTC.

The script first checks to see if there's a finalized DBR which hasn't been imported yet. If there is no new finalized DBR, the script terminates immediately. Once a month, when a new finalized DBR appears, the script will download, unzip, gzip, stage, and import the DBR into a temporary table named `staging_YYYY_MM`. Once that process is complete, it adds a `statement_month` column with the relevant month, copies the entire staging table into `line_items`, drops the staging table, and `VACUUM`s the line_items table.

#### `import_month_to_date.js`

This script imports "month-to-date" DBRs, which contain "estimated" billing data but are not 100% accurate. Upon every import, the current month's DBR is downloaded, unzipped, gzipped, and staged. The `month_to_date` table is emptied by means of  [TRUNCATE](http://docs.aws.amazon.com/redshift/latest/dg/r_TRUNCATE.html) (eliminating the need for an interim VACUUM), and the staged DBR is imported, followed by a VACUUM.

### Usage tips

You can run these on your local machine, but unless you live very nearby the AWS datacenters where your source and staging S3 buckets are located, you'll have better performance running them on Heroku.

Use PX dynos for invoking either script; smaller dyno types lack the memory and storage to get the job done.

Here's a sample invocation:

`heroku run -s PX "iojs import_finalized.js"

If you want to run it without fear of laptop disconnections, you can run the process in detached mode:

`heroku run:detached -s PX "iojs import_finalized.js"`

You can track progress by running `heroku logs -t`

## Future improvements

- One-off month imports
- Heroku button!

## Meta

License: MIT. See LICENSE.txt.

Questions? Comments? Hit up tools@heroku.com.


================================================
FILE: sql/copy_ri_leases.sql
================================================
COPY ri_leases
FROM 's3://heroku-detailed-billing-staging/a_lease_report.csv'
CREDENTIALS 'aws_access_key_id=FOO;aws_secret_access_key=BAR'
CSV
IGNOREHEADER 1
DATEFORMAT 'MM/DD/YYYY';


================================================
FILE: sql/create_line_items.sql
================================================
CREATE TABLE IF NOT EXISTS line_items (
  invoice_id TEXT,
  payer_account_id TEXT,
  linked_account_id TEXT,
  record_type TEXT,
  record_id TEXT,
  product_name TEXT,
  rate_id TEXT,
  subscription_id TEXT,
  pricing_plan_id TEXT,
  usage_type TEXT,
  operation TEXT,
  availability_zone TEXT,
  reserved_instance TEXT,
  item_description TEXT,
  usage_start_date TIMESTAMP,
  usage_end_date TIMESTAMP,
  usage_quantity FLOAT8,
  blended_rate NUMERIC(18,11),
  blended_cost NUMERIC(18,11),
  unblended_rate NUMERIC(18,11),
  unblended_cost NUMERIC(18,11),
  resource_id TEXT,
  cloud TEXT,
  slot TEXT,
  statement_month DATE,
  PRIMARY KEY(record_id)
) DISTSTYLE EVEN SORTKEY(statement_month);


================================================
FILE: sql/create_month_to_date.sql
================================================
CREATE TABLE IF NOT EXISTS month_to_date (
  invoice_id TEXT,
  payer_account_id TEXT,
  linked_account_id TEXT,
  record_type TEXT,
  record_id TEXT,
  product_name TEXT,
  rate_id TEXT,
  subscription_id TEXT,
  pricing_plan_id TEXT,
  usage_type TEXT,
  operation TEXT,
  availability_zone TEXT,
  reserved_instance TEXT,
  item_description TEXT,
  usage_start_date TIMESTAMP,
  usage_end_date TIMESTAMP,
  usage_quantity FLOAT8,
  blended_rate NUMERIC(18,11),
  blended_cost NUMERIC(18,11),
  unblended_rate NUMERIC(18,11),
  unblended_cost NUMERIC(18,11),
  resource_id TEXT,
  cloud TEXT,
  slot TEXT,
  PRIMARY KEY(record_id)
) DISTSTYLE EVEN SORTKEY(usage_start_date);


================================================
FILE: sql/create_ri_leases.sql
================================================
CREATE TABLE IF NOT EXISTS ri_leases (
  account_id TEXT,
  payer_account_id TEXT,
  start_date DATE,
  end_date DATE,
  lease_term TEXT,
  availability_zone TEXT,
  instance_type TEXT,
  os TEXT,
  utilization TEXT,
  tenancy TEXT,
  fixed_price NUMERIC(11, 6),
  usage_price NUMERIC(8, 6),
  instance_count INT,
  lease_id TEXT,
  subscription_id TEXT,
  state TEXT,
  PRIMARY KEY(subscription_id)
);


================================================
FILE: unzip.js
================================================
/* jshint esnext: true */

var util = require('util');
var fs = require('fs');
var AWS = require('aws-sdk');
var progress = require('progress-stream');
var prettyBytes = require('pretty-bytes');
var moment = require('moment');
var numeral = require('numeral');
var log = require('loglevel');
var pg = require('pg');
var child_process = require('child_process');
var zlib = require('zlib');
var debounce = require('debounce');
var ArgumentParser = require('argparse').ArgumentParser;

var rollbar = require('rollbar');
rollbar.init(process.env.ROLLBAR_TOKEN);

var parser = new ArgumentParser({
  version: '0.0.1',
  addHelp: true,
  description: "Unzips detailed billing reports"
});

parser.addArgument(
  ['-i', '--source-bucket'], {
    help: 'The source S3 bucket name',
    defaultValue: process.env.DBR_BUCKET
  }
);

parser.addArgument(
  ['-o', '--dest-bucket'], {
    help: 'The destination S3 bucket name',
    defaultValue: process.env.STAGING_BUCKET
  }
);

parser.addArgument(
  ['-r', '--redshift-url'], {
    help: 'The destination S3 bucket name',
    defaultValue: process.env.REDSHIFT_URL
  }
);

parser.addArgument(
  ['-t', '--target-table'], {
    help: 'The redshift table to copy data into',
    defaultValue: 'line_items'
  }
);

parser.addArgument(
  ['-f', '--file'], {
    help: 'The file to unzip and copy',
    required: true
  }
);

parser.addArgument(
  ['-d', '--debug'], {
    action: 'storeConst',
    dest: 'debug',
    help: 'Turn on debugging output',
    constant: true
  }
);

var args = parser.parseArgs();

if (args.debug) {
  log.setLevel('debug');
  log.debug("Debugging output enabled.");
} else {
  log.setLevel('info');
}
log.debug(args);

var dbrClientOptions = {
  accessKeyId: process.env.DBR_AWS_ACCESS_KEY_ID,
  secretAccessKey: process.env.DBR_AWS_SECRET_ACCESS_KEY
};

var stagingClientOptions = {
  accessKeyId: process.env.IDAN_AWS_ACCESS_KEY_ID,
  secretAccessKey: process.env.IDAN_AWS_SECRET_ACCESS_KEY
};

// We'll need these handy in various places
var monthMatch = /(\d{4})-(\d{2})/.exec(args.file);
var year = monthMatch[1];
var month = monthMatch[2];
var monthString = `${year}_${month}`;
var monthDateString = `${year}-${month}-01`;

// S3 Clients for the DBR and staging buckets
var dbrClient = new AWS.S3(dbrClientOptions);
var stagingClient = new AWS.S3(stagingClientOptions);


// ==============================================================
// Each of the major steps is a function which returns a promise.
// ==============================================================

var downloadFile = function(bucket, key) {
  // Downloads the specified DBR zip
  log.info(`${monthString} (download): downloading '${key}' from S3...`);
  return new Promise(function(resolve, reject) {
    var sourceParams = {
      Bucket: bucket,
      Key: key
    };
    var outStream = fs.createWriteStream(key);

    var downloadProgress = progress({
      length: 0,
      time: 1000
    });
    downloadProgress.on("progress", function(progress) {
      percentage = numeral(progress.percentage/100).format('00.0%');
      eta = moment.duration(progress.eta * 1000).humanize();
      log.info(`${monthString} (download): ${percentage} (${eta} at ${prettyBytes(progress.speed)}/sec)`);
    });

    var request = dbrClient.getObject(sourceParams);
    request.on('httpHeaders', function(status, headers, resp) {
      totalLength = parseInt(headers['content-length'], 10);
      downloadProgress.setLength(totalLength);
    });

    var zipfileStream = request.createReadStream();
    zipfileStream.pipe(downloadProgress)
                 .pipe(outStream);

    outStream.on('close', function() {
      var durationString = moment.utc(moment.utc() - startTime).format("HH:mm:ss.SSS");
      log.info(`${monthString} (download): complete (${durationString})`);
      resolve(key);
    });
  });
};

var processZipFile = function(zipFileName) {
  // Unzip, gzip, and upload to the staging bucket on S3
  log.info(`${monthString} (process): processing '${zipFileName}'...`);

  // In theory, zipfiles can contain multiple files
  // We know that the DBR zip has only one file inside, the DBR CSV
  return new Promise(function(resolve, reject) {
    var uncompressedLength = parseInt(child_process.execSync(
      `zipinfo -t ${zipFileName} | cut -d ' ' -f 3`, {encoding: 'utf8'}
    ));

    // Hack off the '.zip'
    var plainFileName = zipFileName.substr(0, zipFileName.length - 4);

    // For monitoring unzip progress
    var unzipProgress = progress({time: 10000, length: uncompressedLength}, function(progress) {
      percentage = numeral(progress.percentage/100).format('00.0%');
      eta = moment.duration(progress.eta * 1000).humanize();
      log.info(`${monthString} (process-unzip): ${percentage} (${eta} at ${prettyBytes(progress.speed)}/sec)`);
    });

    // For monitoring gzip progress.
    // From this point forward in the stream, we don't know the stream length as
    // we don't know how much the stream will compress down to until it's done.
    var gzipProgress = progress({time: 10000}, function(progress) {
      log.info(`${monthString} (process-gzip): ${prettyBytes(progress.transferred)} at ${prettyBytes(progress.speed)}/sec`);
    });

    // Hook up every part of the stream prior to the HTTP upload to S3
    // Stream not flowing at this point! Triggered by request.send() below.
    var unzipGzipStream = child_process.spawn('unzip', ['-p', './' + zipFileName])
                                       .stdout
                                       .pipe(unzipProgress)
                                       .pipe(zlib.createGzip())
                                       .pipe(gzipProgress);

    // Prepare the upload to S3 with the stream as the body
    var requestParams = {
      Bucket: process.env.STAGING_BUCKET,
      Key: `${plainFileName}.gz`,
      Body: unzipGzipStream
    };
    var request = stagingClient.upload(requestParams);
    request.on('httpUploadProgress', debounce(function(progress) {
      log.info(`${monthString} (process-upload): ${prettyBytes(progress.loaded)}`);
    }, 1000, true));

    // Fire the upload request, gets the stream flowing.
    request.send(function(err, data) {
      if (err) throw err;
      var durationString = moment.utc(moment.utc() - startTime).format("HH:mm:ss.SSS");
      log.info(`${monthString} (upload): complete (${durationString})`);
      resolve(`s3://${requestParams.Bucket}/${requestParams.Key}`);
    });

  });
};

var importToRedshift = function(s3uri) {
  // Import the gzipped DBR from the staging bucket into a staging table on
  // redshift. Add the statement_month column and then copy from staging into
  // line_items, then drop the staging table.
  log.info(`${monthString} (import): importing to redshift...`);
  return new Promise(function(resolve, reject) {
    var client = new pg.Client(args.redshift_url);
    client.connect(function(err) {
      if (err) throw err;
      var stagingTableName = `staging_${monthString}`;
      var query = `
        BEGIN;
          -- can't create and alter because statement_month is the sortkey
          -- must create from scratch
          -- CREATE TABLE ${stagingTableName} (LIKE line_items);
          -- ALTER TABLE ${stagingTableName} DROP COLUMN statement_month;

          CREATE TABLE IF NOT EXISTS ${stagingTableName} (
            invoice_id TEXT,
            payer_account_id TEXT,
            linked_account_id TEXT,
            record_type TEXT,
            record_id TEXT,
            product_name TEXT,
            rate_id TEXT,
            subscription_id TEXT,
            pricing_plan_id TEXT,
            usage_type TEXT,
            operation TEXT,
            availability_zone TEXT,
            reserved_instance TEXT,
            item_description TEXT,
            usage_start_date TIMESTAMP,
            usage_end_date TIMESTAMP,
            usage_quantity FLOAT8,
            blended_rate NUMERIC(18,11),
            blended_cost NUMERIC(18,11),
            unblended_rate NUMERIC(18,11),
            unblended_cost NUMERIC(18,11),
            resource_id TEXT,
            cloud TEXT,
            slot TEXT,
            PRIMARY KEY(record_id)
          ) DISTSTYLE EVEN;

          COPY ${stagingTableName}
            FROM '${s3uri}'
            CREDENTIALS 'aws_access_key_id=${stagingClientOptions.accessKeyId};aws_secret_access_key=${stagingClientOptions.secretAccessKey}'
            GZIP CSV IGNOREHEADER 1;
          ALTER TABLE ${stagingTableName} ADD COLUMN statement_month DATE DEFAULT '${monthDateString}';
          INSERT INTO line_items SELECT * FROM ${stagingTableName};
          DROP TABLE ${stagingTableName};
        COMMIT;
        -- ANALYZE line_items;
        -- VACUUM line_items;
      `;
      log.debug(query);
      client.query(query, function(err, result) {
        if (err) throw err;
        var durationString = moment.utc(moment.utc() - startTime).format("HH:mm:ss.SSS");
        log.info(`${monthString} (import): complete (${durationString})`);
        resolve(s3uri);
      });
    });
  });
};


// Kick off the promise chain.
var startTime = moment.utc();
downloadFile(args.source_bucket, args.file)
  .then(processZipFile)
  .then(importToRedshift)
  .then(function(s3uri) {
    var durationString = moment.utc(moment.utc() - startTime).format("HH:mm:ss.SSS");
    log.info(`${monthString}: Import complete! Took ${durationString}`);
    process.exit();
  })
  .catch(function(err) {
    var durationString = moment.utc(moment.utc() - startTime).format("HH:mm:ss.SSS");
    log.error(`${monthString}: Something went terribly wrong after ${durationString}`);
    log.error(err);
    log.error(err.stack);
		rollbar.handleError(err);
    process.exit();
  });

Download .txt

gitextract_hev7e8q3/

├── .gitignore
├── .jshintrc
├── CODEOWNERS
├── LICENSE.txt
├── Procfile
├── import_finalized.js
├── import_month_to_date.js
├── lib/
│   ├── baseparser.js
│   ├── cliutils.js
│   ├── dbr.js
│   └── redshift.js
├── package.json
├── readme.md
├── sql/
│   ├── copy_ri_leases.sql
│   ├── create_line_items.sql
│   ├── create_month_to_date.sql
│   └── create_ri_leases.sql
└── unzip.js

Download .txt

SYMBOL INDEX (18 symbols across 8 files)

FILE: import_finalized.js
  function chooseDBR (line 99) | function chooseDBR () {
  function importDBRCheck (line 123) | function importDBRCheck (finalizedDBR) {
  function stageDBRCheck (line 140) | function stageDBRCheck (finalizedDBR) {
  function importDBR (line 172) | function importDBR (params) {
  function vacuum (line 186) | function vacuum () {

FILE: import_month_to_date.js
  function stageDBRCheck (line 79) | function stageDBRCheck (monthToDateDBR) {
  function importDBR (line 94) | function importDBR (s3uri) {
  function vacuum (line 100) | function vacuum () {

FILE: lib/baseparser.js
  function Parser (line 7) | function Parser (opts) {

FILE: lib/dbr.js
  function DBR (line 19) | function DBR (credentials, stagingCredentials, bucket, stagingBucket) {
  function extractMonth (line 151) | function extractMonth (val) {
  function processDBRBucketContents (line 166) | function processDBRBucketContents (results) {
  function downloadDBR (line 188) | function downloadDBR (dbr, s3client, bucket) {
  function processDBR (line 224) | function processDBR (dbr, s3client, bucket) {

FILE: lib/redshift.js
  function Redshift (line 19) | function Redshift (connString, s3credentials) {

FILE: sql/create_line_items.sql
  type line_items (line 1) | CREATE TABLE IF NOT EXISTS line_items (

FILE: sql/create_month_to_date.sql
  type month_to_date (line 1) | CREATE TABLE IF NOT EXISTS month_to_date (

FILE: sql/create_ri_leases.sql
  type ri_leases (line 1) | CREATE TABLE IF NOT EXISTS ri_leases (

Download .json

Condensed preview — 18 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (49K chars).

[
  {
    "path": ".gitignore",
    "chars": 25,
    "preview": "node_modules\n*.csv\n*.zip\n"
  },
  {
    "path": ".jshintrc",
    "chars": 21,
    "preview": "{\n  \"esnext\": true\n}\n"
  },
  {
    "path": "CODEOWNERS",
    "chars": 140,
    "preview": "# Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing"
  },
  {
    "path": "LICENSE.txt",
    "chars": 1073,
    "preview": "The MIT License (MIT)\n\nCopyright (c) 2015 Heroku\n\nPermission is hereby granted, free of charge, to any person obtaining "
  },
  {
    "path": "Procfile",
    "chars": 39,
    "preview": "fake: this app is invoked by schedule!\n"
  },
  {
    "path": "import_finalized.js",
    "chars": 5888,
    "preview": "'use strict'\n\n/*******************************************************************************\nImport finalized monthly "
  },
  {
    "path": "import_month_to_date.js",
    "chars": 3127,
    "preview": "'use strict'\n\n/*******************************************************************************\nImport month-to-date DBRs"
  },
  {
    "path": "lib/baseparser.js",
    "chars": 2391,
    "preview": "'use strict'\n\nvar ArgumentParser = require('argparse').ArgumentParser\n\nmodule.exports = Parser\n\nfunction Parser (opts) {"
  },
  {
    "path": "lib/cliutils.js",
    "chars": 609,
    "preview": "'use strict'\n\nvar rollbar = require('rollbar')\nvar log = require('loglevel')\nvar moment = require('moment')\n\nrollbar.ini"
  },
  {
    "path": "lib/dbr.js",
    "chars": 10296,
    "preview": "'use strict'\n\nvar fs = require('fs')\nvar path = require('path')\n\nvar log = require('loglevel')\nvar _ = require('lodash')"
  },
  {
    "path": "lib/redshift.js",
    "chars": 7047,
    "preview": "'use strict'\n\nvar log = require('loglevel')\nvar moment = require('moment')\nvar pg = require('pg')\nvar types = require('p"
  },
  {
    "path": "package.json",
    "chars": 605,
    "preview": "{\n  \"name\": \"awsdetailedbilling\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Copies AWS Detailed Billing Reports to redshif"
  },
  {
    "path": "readme.md",
    "chars": 3736,
    "preview": "# awsdetailedbilling\n\nLoads AWS detailed billing reports into a redshift cluster.\n\n[![js-standard-style](https://cdn.raw"
  },
  {
    "path": "sql/copy_ri_leases.sql",
    "chars": 184,
    "preview": "COPY ri_leases\nFROM 's3://heroku-detailed-billing-staging/a_lease_report.csv'\nCREDENTIALS 'aws_access_key_id=FOO;aws_sec"
  },
  {
    "path": "sql/create_line_items.sql",
    "chars": 697,
    "preview": "CREATE TABLE IF NOT EXISTS line_items (\n  invoice_id TEXT,\n  payer_account_id TEXT,\n  linked_account_id TEXT,\n  record_t"
  },
  {
    "path": "sql/create_month_to_date.sql",
    "chars": 677,
    "preview": "CREATE TABLE IF NOT EXISTS month_to_date (\n  invoice_id TEXT,\n  payer_account_id TEXT,\n  linked_account_id TEXT,\n  recor"
  },
  {
    "path": "sql/create_ri_leases.sql",
    "chars": 403,
    "preview": "CREATE TABLE IF NOT EXISTS ri_leases (\n  account_id TEXT,\n  payer_account_id TEXT,\n  start_date DATE,\n  end_date DATE,\n "
  },
  {
    "path": "unzip.js",
    "chars": 9742,
    "preview": "/* jshint esnext: true */\n\nvar util = require('util');\nvar fs = require('fs');\nvar AWS = require('aws-sdk');\nvar progres"
  }
]

About this extraction

This page contains the full source code of the heroku/awsdetailedbilling GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 18 files (45.6 KB), approximately 11.8k tokens, and a symbol index with 18 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo