[
  {
    "path": ".gitignore",
    "content": "node_modules\n*.csv\n*.zip\n"
  },
  {
    "path": ".jshintrc",
    "content": "{\n  \"esnext\": true\n}\n"
  },
  {
    "path": "CODEOWNERS",
    "content": "# Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing.\n#ECCN:Open Source\n"
  },
  {
    "path": "LICENSE.txt",
    "content": "The MIT License (MIT)\n\nCopyright (c) 2015 Heroku\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
  },
  {
    "path": "Procfile",
    "content": "fake: this app is invoked by schedule!\n"
  },
  {
    "path": "import_finalized.js",
    "content": "'use strict'\n\n/*******************************************************************************\nImport finalized monthly DBRs.\n*******************************************************************************/\n\nvar util = require('util')\n\nvar log = require('loglevel')\nvar rollbar = require('rollbar')\nvar moment = require('moment')\n\nvar BaseParser = require('./lib/baseparser.js')\nvar DBR = require('./lib/dbr.js')\nvar Redshift = require('./lib/redshift.js')\nvar cliUtils = require('./lib/cliutils.js')\n\nrollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})\nrollbar.handleUncaughtExceptions(process.env.ROLLBAR_TOKEN,\n                                 {exitOnUncaughtException: true})\n\nvar parser = new BaseParser({\n  version: '0.0.1',\n  addHelp: true,\n  description: 'Imports finalized (whole-month) detailed billing reports'\n})\n\nparser.addArgument(\n  ['--force'], {\n    action: 'storeConst',\n    dest: 'force',\n    help: 'Ignore existing DBRs in staging or redshift, and reimport them.',\n    constant: true\n  }\n)\n\nparser.addArgument(\n  ['--specific'], {\n    help: \"Import a specific month's DBR. Specified in YYYY-MM format.\"\n  }\n)\n\nparser.addArgument(\n  ['--prune-months'], {\n    help: 'The amount of history (in number of months) to retain in Redshift',\n    type: 'int'\n  }\n)\n\nvar args = parser.parseArgs()\n\nif (args.debug) {\n  log.setLevel('debug')\n  log.debug('Debugging output enabled.')\n} else {\n  log.setLevel('info')\n}\nlog.debug(`Resolved invocation arguments were:\\n${util.inspect(args)}`)\n\nif (args.specific !== null && args.prune_months !== null) {\n  log.error('The \"--specific\" and \"--prune-months\" options are mutually exclusive.')\n  log.error('--prune-months can only be invoked when importing the latest DBR.')\n  log.error('Aborting.')\n  process.exit(1)\n}\n\n// Instantiate a DBR object to work with.\nvar dbrClientOptions = {\n  accessKeyId: args.source_key,\n  secretAccessKey: args.source_secret\n}\n\nvar stagingClientOptions = {\n  accessKeyId: args.staging_key,\n  secretAccessKey: args.staging_secret\n}\n\nvar dbr = new DBR(dbrClientOptions, stagingClientOptions,\n                  args.source_bucket, args.staging_bucket)\n\n// Instantiate a Redshift object to work with.\nvar redshift = new Redshift(args.redshift_uri, {\n      key: args.staging_key,\n      secret: args.staging_secret\n})\n\nlet startTime = moment.utc()\n\nchooseDBR()\n  .then(importDBRCheck)\n  .then(stageDBRCheck)\n  .then(importDBR)\n  .then(vacuum)\n  .then(function () {\n    cliUtils.runCompleteHandler(startTime, 0)\n  })\n  .catch(cliUtils.rejectHandler)\n\nfunction chooseDBR () {\n  return new Promise(function (resolve, reject) {\n    if (args.specific) {\n      log.debug(`Invoked with --specific ${args.specific}.`)\n      try {\n        let match = /^(\\d{4})-(\\d{2})$/.exec(args.specific)\n        if (match === null) {\n          return reject('--specific requires a year and month parameter in the form of YYYY-MM')\n        }\n        // moment.utc month argument is zero-indexed\n        let month = moment.utc([match[1], match[2] - 1])\n        log.debug(`Attempting to import ${month.toISOString()}`)\n        return resolve(dbr.findDBR(month))\n      } catch (err) {\n        return reject(err)\n      }\n    } else {\n      log.debug(`Invoked without --specific. Targeting latest finalized DBR...`)\n      return resolve(dbr.getLatestFinalizedDBR())\n    }\n  })\n}\n\n// Given a latest finalized DBR object, decide whether to import it\nfunction importDBRCheck (finalizedDBR) {\n  let dbrMonth = finalizedDBR.Month.format('MMMM YYYY')\n  return redshift.hasMonth(finalizedDBR.Month).then(function (hasMonth) {\n    if (hasMonth) {\n      log.info(`No new DBRs to import.`)\n      if (args.force) {\n        log.warn(`--force specified, importing DBR for ${dbrMonth} anyways`)\n        return finalizedDBR\n      }\n      cliUtils.runCompleteHandler(startTime, 0)\n    } else {\n      return finalizedDBR\n    }\n  })\n}\n\n// Given a DBR, (optionally) stage it\nfunction stageDBRCheck (finalizedDBR) {\n  return dbr.findStagedDBR(finalizedDBR.Month).then(\n    function (stagedDBR) {\n      let dbrMonth = stagedDBR.Month.format('MMMM YYYY')\n      // DBR is staged!\n      if (!args.force) {\n        // No need to re-stage\n        log.warn(`Using existing staged DBR for ${dbrMonth}.`)\n        let s3uri = `s3://${args.staging_bucket}/${stagedDBR.Key}`\n        log.debug(`Staged s3uri: ${s3uri}`)\n        return ({s3uri: s3uri, month: stagedDBR.Month})\n      } else {\n        // Force re-stage\n        log.warn(`--force specified, overwriting staged DBR for ${dbrMonth}`)\n        return dbr.stageDBR(stagedDBR.Month).then(function (s3uri) {\n          return ({s3uri: s3uri, month: stagedDBR.Month})\n        })\n      }\n    },\n    function (err) {\n      // DBR not staged. Stage then import.\n      log.debug(`DBR not staged: ${err}`)\n      log.info(`Staging DBR for ${finalizedDBR.Month.format('MMMM YYYY')}.`)\n      return dbr.stageDBR(finalizedDBR.Month).then(function (s3uri) {\n        return ({s3uri: s3uri, month: finalizedDBR.Month})\n      })\n    }\n  )\n}\n\n// Given an object like {s3uri: <uri>, month: <moment>}\n// Execute the import.\nfunction importDBR (params) {\n  log.info(`Importing DBR for ${params.month.format('MMMM YYYY')}`)\n  if (args.prune_months !== null) {\n    let pruneThreshold = moment(params.month)\n      .subtract(args.prune_months, 'months')\n      .format('MMMM YYYY')\n    log.info(`... and pruning months prior to ${pruneThreshold}`)\n    return redshift.importFullMonth(params.s3uri, params.month, args.prune_months)\n  } else {\n    return redshift.importFullMonth(params.s3uri, params.month)\n  }\n}\n\n// Run VACUUM on the line_items table\nfunction vacuum () {\n  if (!args.no_vacuum) {\n    log.info('Running VACUUM on line_items...')\n    return redshift.vacuum(process.env.LINE_ITEMS_TABLE_NAME || 'line_items')\n  } else {\n    log.info('--no-vacuum specified, skiping vacuum.')\n    return\n  }\n}\n"
  },
  {
    "path": "import_month_to_date.js",
    "content": "'use strict'\n\n/*******************************************************************************\nImport month-to-date DBRs, overwriting the existing month-to-date.\n*******************************************************************************/\n\nvar util = require('util')\n\nvar log = require('loglevel')\nvar rollbar = require('rollbar')\nvar moment = require('moment')\n\nvar BaseParser = require('./lib/baseparser.js')\nvar DBR = require('./lib/dbr.js')\nvar Redshift = require('./lib/redshift.js')\nvar cliUtils = require('./lib/cliutils.js')\n\nrollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})\nrollbar.handleUncaughtExceptions(process.env.ROLLBAR_TOKEN,\n                                 {exitOnUncaughtException: true})\n\nvar parser = new BaseParser({\n  version: '0.0.1',\n  addHelp: true,\n  description: 'Imports month-to-date detailed billing reports'\n})\n\nparser.addArgument(\n  ['--no-stage'], {\n    action: 'storeConst',\n    dest: 'no_stage',\n    help: 'Use an existing staged month-to-date DBR.',\n    constant: true\n  }\n)\n\nvar args = parser.parseArgs()\n\nif (args.debug) {\n  log.setLevel('debug')\n  log.debug('Debugging output enabled.')\n} else {\n  log.setLevel('info')\n}\nlog.debug(`Resolved invocation arguments were:\\n${util.inspect(args)}`)\n\n// Instantiate a DBR object to work with.\nvar dbrClientOptions = {\n  accessKeyId: args.source_key,\n  secretAccessKey: args.source_secret\n}\n\nvar stagingClientOptions = {\n  accessKeyId: args.staging_key,\n  secretAccessKey: args.staging_secret\n}\n\nvar dbr = new DBR(dbrClientOptions, stagingClientOptions,\n                  args.source_bucket, args.staging_bucket)\n\n// Instantiate a Redshift object to work with.\nvar redshift = new Redshift(args.redshift_uri, {\n      key: args.staging_key,\n      secret: args.staging_secret\n})\n\nlet startTime = moment.utc()\n\ndbr.getMonthToDateDBR()\n  .then(stageDBRCheck)\n  .then(importDBR)\n  .then(vacuum)\n  .then(function () {\n    cliUtils.runCompleteHandler(startTime, 0)\n  })\n  .catch(cliUtils.rejectHandler)\n\n// Determine whether to stage the latest month-to-date DBR or reuse existing\nfunction stageDBRCheck (monthToDateDBR) {\n  log.info(`Found month-to-date for ${monthToDateDBR.Month.format('MMMM YYYY')}...`)\n  if (args.no_stage) {\n    let s3uri = dbr.composeStagedURI(monthToDateDBR)\n    log.info(`--no-stage specified, Attempting to use existing staged month-to-date DBR`)\n    return s3uri\n  } else {\n    log.info(`Staging DBR file for ${monthToDateDBR.Month.format('MMMM YYYY')}.`)\n    return dbr.stageDBR(monthToDateDBR.Month)\n  }\n}\n\n// Import the staged month-to-date DBR\n// TODO if we just chain like .then(redshift.importMonthToDate), it fails\n// because 'this' inside importMonthToDate will be undefined. Why?\nfunction importDBR (s3uri) {\n  log.info(`Importing ${s3uri} into month_to_date...`)\n  return redshift.importMonthToDate(s3uri)\n}\n\n// Run VACUUM on the month_to_date table\nfunction vacuum () {\n  if (!args.no_vacuum) {\n    log.info('Running VACUUM on line_items...')\n    return redshift.vacuum('month_to_date')\n  } else {\n    log.info('--no-vacuum specified, skiping vacuum.')\n    return\n  }\n}\n"
  },
  {
    "path": "lib/baseparser.js",
    "content": "'use strict'\n\nvar ArgumentParser = require('argparse').ArgumentParser\n\nmodule.exports = Parser\n\nfunction Parser (opts) {\n  let parser = new ArgumentParser(opts)\n\n  parser.addArgument(\n    ['--source-bucket'], {\n      help: 'The S3 bucket which contains the detailed billing reports. Defaults to the environment variable \"DBR_BUCKET\".',\n      defaultValue: process.env.SOURCE_BUCKET\n    }\n  )\n\n  parser.addArgument(\n    ['--source-key'], {\n      help: 'An AWS access key ID with permissions to access the source DBR bucket. Defaults to the environment variable \"SOURCE_AWS_KEY\", then to \"AWS_KEY\".',\n      defaultValue: process.env.SOURCE_AWS_KEY || process.env.AWS_KEY\n    }\n  )\n\n  parser.addArgument(\n    ['--source-secret'], {\n      help: 'An AWS access key secret with permissions to access the source DBR bucket. Defaults to the environment variable \"SOURCE_AWS_SECRET\", then to \"AWS_SECRET\".',\n      defaultValue: process.env.SOURCE_AWS_SECRET || process.env.AWS_SECRET\n    }\n  )\n\n  parser.addArgument(\n    ['--staging-bucket'], {\n      help: 'The S3 bucket which serves as a staging area for loading detailed billing reports. Defaults to the environment variable \"STAGING_BUCKET\".',\n      defaultValue: process.env.STAGING_BUCKET\n    }\n  )\n\n  parser.addArgument(\n    ['--staging-key'], {\n      help: 'An AWS access key ID with permissions to access the staging DBR bucket. Defaults to the environment variable \"STAGING_AWS_KEY\", then to \"AWS_KEY\".',\n      defaultValue: process.env.STAGING_AWS_KEY || process.env.AWS_KEY\n    }\n  )\n\n  parser.addArgument(\n    ['--staging-secret'], {\n      help: 'An AWS access key secret with permissions to access the staging DBR bucket. Defaults to the environment variable \"STAGING_AWS_KEY\", then to \"AWS_KEY\".',\n      defaultValue: process.env.STAGING_AWS_SECRET || process.env.AWS_SECRET\n    }\n  )\n\n  parser.addArgument(\n    ['--redshift-uri'], {\n      help: 'The redshift connection string, in URI form',\n      defaultValue: process.env.REDSHIFT_URI\n    }\n  )\n\n  parser.addArgument(\n    ['--no-vacuum'], {\n      action: 'storeConst',\n      dest: 'no_vacuum',\n      help: 'Do not automatically run VACUUM following the import.',\n      constant: true\n    }\n  )\n\n  parser.addArgument(\n    ['-d', '--debug'], {\n      action: 'storeConst',\n      dest: 'debug',\n      help: 'Turn on debugging output.',\n      constant: true\n    }\n  )\n\n  return parser\n}\n"
  },
  {
    "path": "lib/cliutils.js",
    "content": "'use strict'\n\nvar rollbar = require('rollbar')\nvar log = require('loglevel')\nvar moment = require('moment')\n\nrollbar.init(process.env.ROLLBAR_TOKEN, {environment: process.env.ROLLBAR_ENVIRONMENT})\n\nexports.rejectHandler = function (err) {\n  rollbar.handleError(err)\n  log.error(err)\n  log.error(err.message)\n  log.error(err.stack)\n  log.error('Aborting run.')\n  process.exit(1)\n}\n\nexports.runCompleteHandler = function (startTime, exitCode) {\n  let durationString = moment.utc(moment.utc() - startTime).format('HH:mm:ss.SSS')\n  log.info(`Run complete. Took ${durationString}`)\n  process.exit(exitCode || 0)\n}\n"
  },
  {
    "path": "lib/dbr.js",
    "content": "'use strict'\n\nvar fs = require('fs')\nvar path = require('path')\n\nvar log = require('loglevel')\nvar _ = require('lodash')\nvar moment = require('moment')\nvar AWS = require('aws-sdk')\nvar progress = require('progress-stream')\nvar prettyBytes = require('pretty-bytes')\nvar numeral = require('numeral')\nvar child_process = require('child_process')\nvar zlib = require('zlib')\nvar debounce = require('debounce')\n\nmodule.exports = DBR\n\nfunction DBR (credentials, stagingCredentials, bucket, stagingBucket) {\n  this.credentials = credentials\n  this.stagingCredentials = stagingCredentials\n  this.bucket = bucket\n  this.stagingBucket = stagingBucket\n\n  this.dbrClient = new AWS.S3(this.credentials)\n  this.stagingClient = new AWS.S3(this.stagingCredentials)\n}\n\n// Download, unzip, gzip, upload a DBR to the staging bucket\n// Returns a promise that resolves when everything is finished.\n// Month is a UTC moment object for midnight on the first of the month.\nDBR.prototype.stageDBR = function (month) {\n  let self = this\n  return this.findDBR(month)\n    .then(function (result) {\n      return downloadDBR(result, self.dbrClient, self.bucket)\n    })\n    .then(function (result) {\n      return processDBR(result, self.stagingClient, self.stagingBucket)\n    })\n}\n\n// Find a DBR for a given month or raise an error\n// Month is a UTC moment object for midnight on the first of the month.\nDBR.prototype.findDBR = function (month) {\n  let self = this\n  return new Promise(function (resolve, reject) {\n    self.getDBRs()\n        .then(function (dbrs) {\n          let match = _.find(dbrs, function (d) { return month.isSame(d.Month) })\n          if (match === undefined) {\n            return reject(new Error(`Unable to find the DBR for ${month.format('MMMM YYYY')}.`))\n          } else {\n            return resolve(match)\n          }\n        })\n  })\n}\n\n// Find a staged DBR for a given month or raise an error\n// Month is a UTC moment object for midnight on the first of the month.\n// TODO: is there a way to consolidate the handler in then() with findDBR()'s?\nDBR.prototype.findStagedDBR = function (month) {\n  let self = this\n  return new Promise(function (resolve, reject) {\n    self.getStagedDBRs()\n        .then(function (dbrs) {\n          let match = _.find(dbrs, function (d) { return month.isSame(d.Month) })\n          if (match === undefined) {\n            return reject(new Error(`Unable to find the staged DBR for ${month.format('MMMM YYYY')}.`))\n          } else {\n            return resolve(match)\n          }\n        })\n  })\n}\n\n// Get the contents of a bucket. Returns a promise which resolves with an array\n// of bucket objects.\n// Will not work with buckets containing > 1000 objects, but that's okay\n// for our purposes here.\nDBR.prototype.getBucketContents = function (client, bucket) {\n  return new Promise(function (resolve, reject) {\n    client.listObjects({Bucket: bucket}, function (err, data) {\n      if (err) return reject(err)\n      if ('Contents' in data) {\n        return resolve(data.Contents)\n      } else {\n        return reject(`Bucket listObjects response didn't contain \"Contents\" key.`)\n      }\n    })\n  })\n}\n\n// Get a listing of avalable DBRs\n// Returns a promise which resolves with an date-sorted array of objects like:\n// {Key: <filename>, Size: <bytes>, Month: <moment>}\nDBR.prototype.getDBRs = function () {\n  return this.getBucketContents(this.dbrClient, this.bucket)\n             .then(processDBRBucketContents)\n}\n\n// Get a listing of staged DBRs\n// Returns a promise which resolves with an date-sorted array of objects like:\n// {Key: <filename>, Size: <bytes>, Month: <moment>}\nDBR.prototype.getStagedDBRs = function () {\n  return this.getBucketContents(this.stagingClient, this.stagingBucket)\n             .then(processDBRBucketContents)\n}\n\n// Get the month-to-date DBR.\n// Returns a promise which resolves with an object like:\n// {Key: <filename>, Size: <bytes>, Month: <moment>}\nDBR.prototype.getMonthToDateDBR = function () {\n  return this.getDBRs()\n             .then(function (dbrs) {\n               if (dbrs.length === 0) {\n                 throw new Error('There are no existing DBRs.')\n               } else {\n                 return dbrs[dbrs.length - 1]\n               }\n             })\n}\n\n// Get the latest finalized DBR.\n// Returns a promise which resolves with an object like:\n// {Key: <filename>, Size: <bytes>, Month: <moment>}\nDBR.prototype.getLatestFinalizedDBR = function () {\n  return this.getDBRs()\n             .then(function (dbrs) {\n               if (dbrs.length === 0) {\n                 throw new Error('There are no existing DBRs.')\n               } else if (dbrs.length === 1) {\n                 throw new Error('There are no finalized DBRs.')\n               } else {\n                 return dbrs[dbrs.length - 2]\n               }\n             })\n}\n\nDBR.prototype.composeStagedURI = function (dbrObject) {\n  let gzFileName = path.basename(dbrObject.Key, '.zip') + '.gz'\n  return `s3://${this.stagingBucket}/${gzFileName}`\n}\n\n// =============================================================================\n// Module-private stuff down here\n\nvar dbrPattern = /\\d+-aws-billing-detailed-line-items-with-resources-and-tags-(\\d{4})-(\\d{2}).csv.[gz|zip]/\n\nfunction extractMonth (val) {\n  let match = dbrPattern.exec(val)\n  if (match === null) return null\n  let year = parseInt(match[1], 10)\n  let month = parseInt(match[2], 10)\n  return new moment.utc([year, month - 1]) // eslint-disable-line new-cap\n                                           // no control over moment...\n}\n\n// Take a bucket listing, filter out non-DBR entries, and return an array\n// of objects ordered by the statement date (ascending). Each object has\n// three properties: Key, Size, and Month:\n//   Key:  the filename\n//   Size: the size in bytes\n//   Month: a utc moment object of the DBR month (midnight on first of the month)\nfunction processDBRBucketContents (results) {\n  let dbrs = []\n  // Filter only DBRs\n  for (let result of results) {\n    let month = extractMonth(result.Key)\n    if (month === null) continue\n    // grab only the Key and Size properties\n    let picked = _.pick(result, ['Key', 'Size'])\n    // Add a Month property\n    picked.Month = month\n    dbrs.push(picked)\n  }\n  return dbrs.sort(function (a, b) {\n    if (a.Month < b.Month) return -1\n    else if (a.Month > b.Month) return 1\n    else return 0\n  })\n}\n\n// Downloads the specified DBR zip\n// the argument is an object like:\n//   {Key: <filename>, Size: <bytes>, Month: <moment>}\nfunction downloadDBR (dbr, s3client, bucket) {\n  const monthString = dbr.Month.format('MMM YYYY')\n  log.info(`[${monthString}] (download): downloading from S3...`)\n  return new Promise(function (resolve, reject) {\n    let sourceParams = {\n      Bucket: bucket,\n      Key: dbr.Key\n    }\n    let outStream = fs.createWriteStream(dbr.Key)\n    let downloadProgress = progress({\n      length: dbr.Size,\n      time: 1000\n    })\n    let request = s3client.getObject(sourceParams)\n\n    downloadProgress.on('progress', function (dlprogress) {\n      let percentage = numeral(dlprogress.percentage / 100).format('00.0%')\n      let eta = moment.duration(dlprogress.eta * 1000).humanize()\n      log.info(`[${monthString}] (download): ${percentage} (${eta} at ${prettyBytes(dlprogress.speed)}/sec)`)\n    })\n\n    // Kick off the stream\n    let zipfileStream = request.createReadStream()\n    zipfileStream.pipe(downloadProgress)\n                 .pipe(outStream)\n\n    outStream.on('close', function () {\n      log.info(`[${monthString}] (download): complete.`)\n      return resolve(dbr)\n    })\n  })\n}\n\n// Processes the specified local DBR zip: unzip, gzip, upload to staging.\n// the argument is an object like:\n//   {Key: <filename>, Size: <bytes>, Month: <moment>}\nfunction processDBR (dbr, s3client, bucket) {\n  const monthString = dbr.Month.format('MMM YYYY')\n\n  // Unzip, gzip, and upload to the staging bucket on S3\n  log.info(`[${monthString}] (process): processing '${dbr.Key}'...`)\n\n  // In theory, zipfiles can contain multiple files\n  // We know that the DBR zip has only one file inside, the DBR CSV\n  return new Promise(function (resolve, reject) {\n    var uncompressedLength = parseInt(child_process.execSync(\n      `zipinfo -t ${dbr.Key} | cut -d ' ' -f 3`, {encoding: 'utf8'}\n    ), 10)\n\n    // Hack off the '.zip'\n    var plainFileName = path.basename(dbr.Key, '.zip')\n\n    // For monitoring unzip progress\n    var unzipProgress = progress({time: 10000, length: uncompressedLength}, function (uzprogress) {\n      let percentage = numeral(uzprogress.percentage / 100).format('00.0%')\n      let eta = moment.duration(uzprogress.eta * 1000).humanize()\n      log.info(`[${monthString}] (process-unzip): ${percentage} (${eta} at ${prettyBytes(uzprogress.speed)}/sec)`)\n    })\n\n    // For monitoring gzip progress.\n    // From this point forward in the stream, we don't know the stream length as\n    // we don't know how much the stream will compress down to until it's done.\n    var gzipProgress = progress({time: 10000}, function (gzprogress) {\n      log.info(`[${monthString}] (process-gzip): ${prettyBytes(gzprogress.transferred)} at ${prettyBytes(gzprogress.speed)}/sec`)\n    })\n\n    // Hook up every part of the stream prior to the HTTP upload to S3\n    // Stream not flowing at this point! Triggered by request.send() below.\n    var unzipGzipStream = child_process.spawn('unzip', ['-p', `./${dbr.Key}`])\n                                       .stdout\n                                       .pipe(unzipProgress)\n                                       .pipe(zlib.createGzip())\n                                       .pipe(gzipProgress)\n\n    // Prepare the upload to S3 with the stream as the body\n    var requestParams = {\n      Bucket: bucket,\n      Key: `${plainFileName}.gz`,\n      Body: unzipGzipStream\n    }\n    var request = s3client.upload(requestParams)\n    request.on('httpUploadProgress', debounce(function (progress) {\n      log.info(`[${monthString}] (process-upload): ${prettyBytes(progress.loaded)}`)\n    }, 1000, true))\n\n    // Fire the upload request, gets the stream flowing.\n    request.send(function (err, data) {\n      if (err) return reject(err)\n      log.info(`[${monthString}] (process-upload): complete.`)\n      return resolve(`s3://${requestParams.Bucket}/${requestParams.Key}`)\n    })\n  })\n}\n"
  },
  {
    "path": "lib/redshift.js",
    "content": "'use strict'\n\nvar log = require('loglevel')\nvar moment = require('moment')\nvar pg = require('pg')\nvar types = require('pg').types\n\n// Redshift doesn't have TIMESTAMP WITH TIME ZONE\n// All dates will therefore come back here as localtime\n// This forces dates to come back as UTC.\n// See https://github.com/brianc/node-pg-types/blob/master/lib/textParsers.js\n// Also http://stackoverflow.com/questions/20712291/use-node-postgres-to-get-postgres-timestamp-without-timezone-in-utc\ntypes.setTypeParser(1082, function (stringVal) {\n  return new Date(stringVal)\n})\n\nmodule.exports = Redshift\n\nfunction Redshift (connString, s3credentials) {\n  this.connString = connString\n  this.s3credentials = s3credentials\n  this.lineItemsTableName = process.env.LINE_ITEMS_TABLE_NAME || 'line_items'\n  this.schema = process.env.SCHEMA || 'heroku'\n}\n\n// Execute a query, using the query pool\n// Return a promise which resolves with the output of the query.\nRedshift.prototype.executeQuery = function (query, transform) {\n  var self = this\n  log.debug('Executing query:')\n  log.debug(query)\n  return new Promise(function (resolve, reject) {\n    pg.connect(self.connString, function (err, client, done) {\n      if (err) throw err\n      client.query(query, function (err, result) {\n        if (err) throw err\n        if (transform !== undefined) {\n          result = transform(result)\n        }\n        done(client)\n        return resolve(result)\n      })\n    })\n  })\n}\n\n// Execute a query where the desired ouput is a single scalar value.\nRedshift.prototype.getScalar = function (query, keyName) {\n  let transform = function (result) {\n    let rowzero = result.rows[0]\n    if (keyName !== undefined) {\n      return rowzero[keyName]\n    } else {\n      // return the value of the first key\n      return rowzero[Object.keys(rowzero)[0]]\n    }\n  }\n  return this.executeQuery(query, transform)\n}\n\n// Execute a query where the desired output is a count of affected rows\n// (for example, DELETE FROM queries).\nRedshift.prototype.getRowCount = function (query) {\n  let transform = function (result) {\n    return result.rowCount\n  }\n  return this.executeQuery(query, transform)\n}\n\n// Check that a table exists. Won't match on views or other table-like things.\nRedshift.prototype.checkTableExists = function (tableName, schema) {\n  let query = `\n    SELECT EXISTS (\n      SELECT 1\n      FROM   pg_catalog.pg_class c\n      JOIN   pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n      WHERE  n.nspname = '${schema}'\n      AND    c.relname = '${tableName}'\n      AND    c.relkind = 'r'    -- only tables\n    ) as exists;`\n\n  let transform = function (result) {\n    return result.rows[0].exists === 't'\n  }\n  return this.executeQuery(query, transform)\n}\n\n// Determine the month of the latest finalized DBR to be imported into Redshift.\nRedshift.prototype.latestFullMonth = function () {\n  let query = `SELECT MAX(statement_month) FROM ${this.schema}.${this.lineItemsTableName};`\n  return this.getScalar(query, 'max').then(function (date) {\n    let latest = moment.utc(date)\n    if (latest.isValid()) {\n      return latest\n    } else {\n      return null\n    }\n  })\n}\n\n// Determine whether a specific finalized month has already been imported.\nRedshift.prototype.hasMonth = function (month) {\n  let query = `\n    SELECT COUNT(*)\n    FROM ${this.schema}.${this.lineItemsTableName}\n    WHERE statement_month = '${month.format('YYYY-MM-01')}';`\n  return this.getScalar(query, 'count').then(function (count) {\n    return (count > 0)\n  })\n}\n\n// Import a finalized (full-month) DBR into Redshift.\n// First, create a staging table and COPY FROM into that.\n// Then, add the statement_month column, and copy it all to line_items.\n// Then drop the staging table.\nRedshift.prototype.importFullMonth = function (s3uri, month, pruneThresholdMonths) {\n  const monthString = month.format('YYYY_MM')\n  const monthDateString = month.format('YYYY-MM-01')\n  const stagingTableName = `staging_${monthString}`\n  let pruneQueryFragment = ''\n  if (typeof pruneThresholdMonths === 'number') {\n    // Delete statement months that are older than X months ago\n    let pruneThresholdString = moment(month)\n      .subtract(pruneThresholdMonths, 'months')\n      .format('YYYY-MM-01')\n    pruneQueryFragment = `DELETE FROM ${this.schema}.line_items WHERE statement_month <= '${pruneThresholdString}'::DATE;`\n  }\n  // Normally, creating the staging table would look like:\n  //   CREATE TABLE ${stagingTableName} (LIKE line_items)\n  // However you can't alter the staging table to drop statement_month because\n  // it is specified as the SORTKEY, and that can't be touched in existing\n  // tables. So, we create the staging table from scratch.\n  let query = `\n    BEGIN;\n      CREATE TABLE IF NOT EXISTS ${this.schema}.${stagingTableName} (\n        invoice_id TEXT,\n        payer_account_id TEXT,\n        linked_account_id TEXT,\n        record_type TEXT,\n        record_id TEXT,\n        product_name TEXT,\n        rate_id TEXT,\n        subscription_id TEXT,\n        pricing_plan_id TEXT,\n        usage_type TEXT,\n        operation TEXT,\n        availability_zone TEXT,\n        reserved_instance TEXT,\n        item_description TEXT,\n        usage_start_date TIMESTAMP,\n        usage_end_date TIMESTAMP,\n        usage_quantity FLOAT8,\n        blended_rate NUMERIC(18,11),\n        blended_cost NUMERIC(18,11),\n        unblended_rate NUMERIC(18,11),\n        unblended_cost NUMERIC(18,11),\n        resource_id TEXT,\n        cloud TEXT,\n        slot TEXT,\n        PRIMARY KEY(record_id)\n      ) DISTSTYLE EVEN;\n\n      COPY ${this.schema}.${stagingTableName}\n        FROM '${s3uri}'\n        CREDENTIALS 'aws_access_key_id=${this.s3credentials.key};aws_secret_access_key=${this.s3credentials.secret}'\n        GZIP CSV IGNOREHEADER 1;\n      ALTER TABLE ${this.schema}.${stagingTableName} ADD COLUMN statement_month DATE DEFAULT '${monthDateString}';\n      DELETE FROM ${this.schema}.${this.lineItemsTableName} WHERE statement_month = '${monthDateString}';\n      INSERT INTO ${this.schema}.${this.lineItemsTableName} SELECT * FROM ${this.schema}.${stagingTableName};\n      DROP TABLE ${this.schema}.${stagingTableName};\n      ${pruneQueryFragment}\n    COMMIT;\n  `\n\n  return this.executeQuery(query)\n}\n\n// Import the month-to-date DBR into the month_to_date table, clobbering\n// whatever was already there.\nRedshift.prototype.importMonthToDate = function (s3uri) {\n  let self = this\n  let truncateQuery = `TRUNCATE ${self.schema}.month_to_date;`\n  return this.executeQuery(truncateQuery).then(function () {\n    log.debug('Month to date table truncated. Importing...')\n    let query = `\n      COPY ${self.schema}.month_to_date\n        FROM '${s3uri}'\n        CREDENTIALS 'aws_access_key_id=${self.s3credentials.key};aws_secret_access_key=${self.s3credentials.secret}'\n        GZIP CSV IGNOREHEADER 1;\n    `\n    return self.executeQuery(query)\n  })\n}\n\n// Vacuum the database.\nRedshift.prototype.vacuum = function (tableName) {\n  let query = `VACUUM ${(this.schema) + '.' + tableName || ''};`\n  return this.executeQuery(query)\n}\n"
  },
  {
    "path": "package.json",
    "content": "{\n  \"name\": \"awsdetailedbilling\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Copies AWS Detailed Billing Reports to redshift\",\n  \"main\": \"unzip.js\",\n  \"scripts\": {\n    \"test\": \"echo \\\"Error: no test specified\\\" && exit 1\"\n  },\n  \"author\": \"idan@heroku.com\",\n  \"dependencies\": {\n    \"argparse\": \"^1.0.1\",\n    \"aws-sdk\": \"^2.1.17\",\n    \"debounce\": \"^1.0.0\",\n    \"lodash\": \"^3.6.0\",\n    \"loglevel\": \"^1.2.0\",\n    \"moment\": \"^2.9.0\",\n    \"numeral\": \"^1.5.3\",\n    \"pg\": \"^4.3.0\",\n    \"pretty-bytes\": \"^1.0.3\",\n    \"progress-stream\": \"^1.0.1\",\n    \"rollbar\": \"^0.4.5\"\n  },\n  \"engines\": {\n    \"iojs\": \"1.8.x\"\n  }\n}\n"
  },
  {
    "path": "readme.md",
    "content": "# awsdetailedbilling\n\nLoads AWS detailed billing reports into a redshift cluster.\n\n[![js-standard-style](https://cdn.rawgit.com/feross/standard/master/badge.svg)](https://github.com/feross/standard)\n\n\n# Setup\n\nStill a manual process for now:\n\n1. Create a Redshift cluster.\n2. Once the cluster is alive, connect with your favorite postgres client and create the `line_items` and `month_to_date` tables. The SQL for creating each are in the `sql/` subdirectory.\n\n\n## Configuration:\n\nSet these environment variables. Some of them may be overridden at runtime with command-line switches, run the relevant script with `--help` for more details.\n\n- `SOURCE_BUCKET`: the S3 bucket where DBRs are deposited by Amazon.\n- `STAGING_BUCKET`: the S3 bucket into which pre-processed DBRs are staged before importing to redshift.\n- `AWS_KEY` *or* `SOURCE_AWS_KEY` and `STAGING_AWS_KEY`: the AWS access key ID credential for accessing S3. If the same credentials are used for both the source and staging buckets, you can just set `AWS_KEY`. If separate credentials are neccessary, you can specify `SOURCE_AWS_KEY` *and* `STAGING_AWS_KEY` instead.\n- `AWS_SECRET` *or* `SOURCE_AWS_SECRET` and `STAGING_AWS_SECRET`: Same as `AWS_KEY`, but for your AWS access key secret.\n- `REDSHIFT_URI`: a connection URI for redshift. Should include credentials, like the form `postgres://myUser:s0mep4ssword@hostname:port/dbname`\n- `ROLLBAR_TOKEN`: a token for error reporting to Rollbar.\n- `ROLLBAR_ENVIRONMENT`: an environment name for error reporting to Rollbar.\n\n\n## Usage\n\nThere are two scripts: `import_finalized.js` and `import_month_to_date.js`. Both are intended to be run on a daily schedule, preferably at night. Run duration is largely dependent on the size of your DBRs; for large DBRs runs of a few hours are common.\n\nInvoke either with `--help` for invocation instructions.\n\n\n#### `import_finalized.js`\n\nThis script imports \"finalized\" DBRs — specifically, the DBR for the previous month according to UTC.\n\nThe script first checks to see if there's a finalized DBR which hasn't been imported yet. If there is no new finalized DBR, the script terminates immediately. Once a month, when a new finalized DBR appears, the script will download, unzip, gzip, stage, and import the DBR into a temporary table named `staging_YYYY_MM`. Once that process is complete, it adds a `statement_month` column with the relevant month, copies the entire staging table into `line_items`, drops the staging table, and `VACUUM`s the line_items table.\n\n#### `import_month_to_date.js`\n\nThis script imports \"month-to-date\" DBRs, which contain \"estimated\" billing data but are not 100% accurate. Upon every import, the current month's DBR is downloaded, unzipped, gzipped, and staged. The `month_to_date` table is emptied by means of  [TRUNCATE](http://docs.aws.amazon.com/redshift/latest/dg/r_TRUNCATE.html) (eliminating the need for an interim VACUUM), and the staged DBR is imported, followed by a VACUUM.\n\n### Usage tips\n\nYou can run these on your local machine, but unless you live very nearby the AWS datacenters where your source and staging S3 buckets are located, you'll have better performance running them on Heroku.\n\nUse PX dynos for invoking either script; smaller dyno types lack the memory and storage to get the job done.\n\nHere's a sample invocation:\n\n`heroku run -s PX \"iojs import_finalized.js\"\n\nIf you want to run it without fear of laptop disconnections, you can run the process in detached mode:\n\n`heroku run:detached -s PX \"iojs import_finalized.js\"`\n\nYou can track progress by running `heroku logs -t`\n\n## Future improvements\n\n- One-off month imports\n- Heroku button!\n\n## Meta\n\nLicense: MIT. See LICENSE.txt.\n\nQuestions? Comments? Hit up tools@heroku.com.\n"
  },
  {
    "path": "sql/copy_ri_leases.sql",
    "content": "COPY ri_leases\nFROM 's3://heroku-detailed-billing-staging/a_lease_report.csv'\nCREDENTIALS 'aws_access_key_id=FOO;aws_secret_access_key=BAR'\nCSV\nIGNOREHEADER 1\nDATEFORMAT 'MM/DD/YYYY';\n"
  },
  {
    "path": "sql/create_line_items.sql",
    "content": "CREATE TABLE IF NOT EXISTS line_items (\n  invoice_id TEXT,\n  payer_account_id TEXT,\n  linked_account_id TEXT,\n  record_type TEXT,\n  record_id TEXT,\n  product_name TEXT,\n  rate_id TEXT,\n  subscription_id TEXT,\n  pricing_plan_id TEXT,\n  usage_type TEXT,\n  operation TEXT,\n  availability_zone TEXT,\n  reserved_instance TEXT,\n  item_description TEXT,\n  usage_start_date TIMESTAMP,\n  usage_end_date TIMESTAMP,\n  usage_quantity FLOAT8,\n  blended_rate NUMERIC(18,11),\n  blended_cost NUMERIC(18,11),\n  unblended_rate NUMERIC(18,11),\n  unblended_cost NUMERIC(18,11),\n  resource_id TEXT,\n  cloud TEXT,\n  slot TEXT,\n  statement_month DATE,\n  PRIMARY KEY(record_id)\n) DISTSTYLE EVEN SORTKEY(statement_month);\n"
  },
  {
    "path": "sql/create_month_to_date.sql",
    "content": "CREATE TABLE IF NOT EXISTS month_to_date (\n  invoice_id TEXT,\n  payer_account_id TEXT,\n  linked_account_id TEXT,\n  record_type TEXT,\n  record_id TEXT,\n  product_name TEXT,\n  rate_id TEXT,\n  subscription_id TEXT,\n  pricing_plan_id TEXT,\n  usage_type TEXT,\n  operation TEXT,\n  availability_zone TEXT,\n  reserved_instance TEXT,\n  item_description TEXT,\n  usage_start_date TIMESTAMP,\n  usage_end_date TIMESTAMP,\n  usage_quantity FLOAT8,\n  blended_rate NUMERIC(18,11),\n  blended_cost NUMERIC(18,11),\n  unblended_rate NUMERIC(18,11),\n  unblended_cost NUMERIC(18,11),\n  resource_id TEXT,\n  cloud TEXT,\n  slot TEXT,\n  PRIMARY KEY(record_id)\n) DISTSTYLE EVEN SORTKEY(usage_start_date);\n"
  },
  {
    "path": "sql/create_ri_leases.sql",
    "content": "CREATE TABLE IF NOT EXISTS ri_leases (\n  account_id TEXT,\n  payer_account_id TEXT,\n  start_date DATE,\n  end_date DATE,\n  lease_term TEXT,\n  availability_zone TEXT,\n  instance_type TEXT,\n  os TEXT,\n  utilization TEXT,\n  tenancy TEXT,\n  fixed_price NUMERIC(11, 6),\n  usage_price NUMERIC(8, 6),\n  instance_count INT,\n  lease_id TEXT,\n  subscription_id TEXT,\n  state TEXT,\n  PRIMARY KEY(subscription_id)\n);\n"
  },
  {
    "path": "unzip.js",
    "content": "/* jshint esnext: true */\n\nvar util = require('util');\nvar fs = require('fs');\nvar AWS = require('aws-sdk');\nvar progress = require('progress-stream');\nvar prettyBytes = require('pretty-bytes');\nvar moment = require('moment');\nvar numeral = require('numeral');\nvar log = require('loglevel');\nvar pg = require('pg');\nvar child_process = require('child_process');\nvar zlib = require('zlib');\nvar debounce = require('debounce');\nvar ArgumentParser = require('argparse').ArgumentParser;\n\nvar rollbar = require('rollbar');\nrollbar.init(process.env.ROLLBAR_TOKEN);\n\nvar parser = new ArgumentParser({\n  version: '0.0.1',\n  addHelp: true,\n  description: \"Unzips detailed billing reports\"\n});\n\nparser.addArgument(\n  ['-i', '--source-bucket'], {\n    help: 'The source S3 bucket name',\n    defaultValue: process.env.DBR_BUCKET\n  }\n);\n\nparser.addArgument(\n  ['-o', '--dest-bucket'], {\n    help: 'The destination S3 bucket name',\n    defaultValue: process.env.STAGING_BUCKET\n  }\n);\n\nparser.addArgument(\n  ['-r', '--redshift-url'], {\n    help: 'The destination S3 bucket name',\n    defaultValue: process.env.REDSHIFT_URL\n  }\n);\n\nparser.addArgument(\n  ['-t', '--target-table'], {\n    help: 'The redshift table to copy data into',\n    defaultValue: 'line_items'\n  }\n);\n\nparser.addArgument(\n  ['-f', '--file'], {\n    help: 'The file to unzip and copy',\n    required: true\n  }\n);\n\nparser.addArgument(\n  ['-d', '--debug'], {\n    action: 'storeConst',\n    dest: 'debug',\n    help: 'Turn on debugging output',\n    constant: true\n  }\n);\n\nvar args = parser.parseArgs();\n\nif (args.debug) {\n  log.setLevel('debug');\n  log.debug(\"Debugging output enabled.\");\n} else {\n  log.setLevel('info');\n}\nlog.debug(args);\n\nvar dbrClientOptions = {\n  accessKeyId: process.env.DBR_AWS_ACCESS_KEY_ID,\n  secretAccessKey: process.env.DBR_AWS_SECRET_ACCESS_KEY\n};\n\nvar stagingClientOptions = {\n  accessKeyId: process.env.IDAN_AWS_ACCESS_KEY_ID,\n  secretAccessKey: process.env.IDAN_AWS_SECRET_ACCESS_KEY\n};\n\n// We'll need these handy in various places\nvar monthMatch = /(\\d{4})-(\\d{2})/.exec(args.file);\nvar year = monthMatch[1];\nvar month = monthMatch[2];\nvar monthString = `${year}_${month}`;\nvar monthDateString = `${year}-${month}-01`;\n\n// S3 Clients for the DBR and staging buckets\nvar dbrClient = new AWS.S3(dbrClientOptions);\nvar stagingClient = new AWS.S3(stagingClientOptions);\n\n\n// ==============================================================\n// Each of the major steps is a function which returns a promise.\n// ==============================================================\n\nvar downloadFile = function(bucket, key) {\n  // Downloads the specified DBR zip\n  log.info(`${monthString} (download): downloading '${key}' from S3...`);\n  return new Promise(function(resolve, reject) {\n    var sourceParams = {\n      Bucket: bucket,\n      Key: key\n    };\n    var outStream = fs.createWriteStream(key);\n\n    var downloadProgress = progress({\n      length: 0,\n      time: 1000\n    });\n    downloadProgress.on(\"progress\", function(progress) {\n      percentage = numeral(progress.percentage/100).format('00.0%');\n      eta = moment.duration(progress.eta * 1000).humanize();\n      log.info(`${monthString} (download): ${percentage} (${eta} at ${prettyBytes(progress.speed)}/sec)`);\n    });\n\n    var request = dbrClient.getObject(sourceParams);\n    request.on('httpHeaders', function(status, headers, resp) {\n      totalLength = parseInt(headers['content-length'], 10);\n      downloadProgress.setLength(totalLength);\n    });\n\n    var zipfileStream = request.createReadStream();\n    zipfileStream.pipe(downloadProgress)\n                 .pipe(outStream);\n\n    outStream.on('close', function() {\n      var durationString = moment.utc(moment.utc() - startTime).format(\"HH:mm:ss.SSS\");\n      log.info(`${monthString} (download): complete (${durationString})`);\n      resolve(key);\n    });\n  });\n};\n\nvar processZipFile = function(zipFileName) {\n  // Unzip, gzip, and upload to the staging bucket on S3\n  log.info(`${monthString} (process): processing '${zipFileName}'...`);\n\n  // In theory, zipfiles can contain multiple files\n  // We know that the DBR zip has only one file inside, the DBR CSV\n  return new Promise(function(resolve, reject) {\n    var uncompressedLength = parseInt(child_process.execSync(\n      `zipinfo -t ${zipFileName} | cut -d ' ' -f 3`, {encoding: 'utf8'}\n    ));\n\n    // Hack off the '.zip'\n    var plainFileName = zipFileName.substr(0, zipFileName.length - 4);\n\n    // For monitoring unzip progress\n    var unzipProgress = progress({time: 10000, length: uncompressedLength}, function(progress) {\n      percentage = numeral(progress.percentage/100).format('00.0%');\n      eta = moment.duration(progress.eta * 1000).humanize();\n      log.info(`${monthString} (process-unzip): ${percentage} (${eta} at ${prettyBytes(progress.speed)}/sec)`);\n    });\n\n    // For monitoring gzip progress.\n    // From this point forward in the stream, we don't know the stream length as\n    // we don't know how much the stream will compress down to until it's done.\n    var gzipProgress = progress({time: 10000}, function(progress) {\n      log.info(`${monthString} (process-gzip): ${prettyBytes(progress.transferred)} at ${prettyBytes(progress.speed)}/sec`);\n    });\n\n    // Hook up every part of the stream prior to the HTTP upload to S3\n    // Stream not flowing at this point! Triggered by request.send() below.\n    var unzipGzipStream = child_process.spawn('unzip', ['-p', './' + zipFileName])\n                                       .stdout\n                                       .pipe(unzipProgress)\n                                       .pipe(zlib.createGzip())\n                                       .pipe(gzipProgress);\n\n    // Prepare the upload to S3 with the stream as the body\n    var requestParams = {\n      Bucket: process.env.STAGING_BUCKET,\n      Key: `${plainFileName}.gz`,\n      Body: unzipGzipStream\n    };\n    var request = stagingClient.upload(requestParams);\n    request.on('httpUploadProgress', debounce(function(progress) {\n      log.info(`${monthString} (process-upload): ${prettyBytes(progress.loaded)}`);\n    }, 1000, true));\n\n    // Fire the upload request, gets the stream flowing.\n    request.send(function(err, data) {\n      if (err) throw err;\n      var durationString = moment.utc(moment.utc() - startTime).format(\"HH:mm:ss.SSS\");\n      log.info(`${monthString} (upload): complete (${durationString})`);\n      resolve(`s3://${requestParams.Bucket}/${requestParams.Key}`);\n    });\n\n  });\n};\n\nvar importToRedshift = function(s3uri) {\n  // Import the gzipped DBR from the staging bucket into a staging table on\n  // redshift. Add the statement_month column and then copy from staging into\n  // line_items, then drop the staging table.\n  log.info(`${monthString} (import): importing to redshift...`);\n  return new Promise(function(resolve, reject) {\n    var client = new pg.Client(args.redshift_url);\n    client.connect(function(err) {\n      if (err) throw err;\n      var stagingTableName = `staging_${monthString}`;\n      var query = `\n        BEGIN;\n          -- can't create and alter because statement_month is the sortkey\n          -- must create from scratch\n          -- CREATE TABLE ${stagingTableName} (LIKE line_items);\n          -- ALTER TABLE ${stagingTableName} DROP COLUMN statement_month;\n\n          CREATE TABLE IF NOT EXISTS ${stagingTableName} (\n            invoice_id TEXT,\n            payer_account_id TEXT,\n            linked_account_id TEXT,\n            record_type TEXT,\n            record_id TEXT,\n            product_name TEXT,\n            rate_id TEXT,\n            subscription_id TEXT,\n            pricing_plan_id TEXT,\n            usage_type TEXT,\n            operation TEXT,\n            availability_zone TEXT,\n            reserved_instance TEXT,\n            item_description TEXT,\n            usage_start_date TIMESTAMP,\n            usage_end_date TIMESTAMP,\n            usage_quantity FLOAT8,\n            blended_rate NUMERIC(18,11),\n            blended_cost NUMERIC(18,11),\n            unblended_rate NUMERIC(18,11),\n            unblended_cost NUMERIC(18,11),\n            resource_id TEXT,\n            cloud TEXT,\n            slot TEXT,\n            PRIMARY KEY(record_id)\n          ) DISTSTYLE EVEN;\n\n          COPY ${stagingTableName}\n            FROM '${s3uri}'\n            CREDENTIALS 'aws_access_key_id=${stagingClientOptions.accessKeyId};aws_secret_access_key=${stagingClientOptions.secretAccessKey}'\n            GZIP CSV IGNOREHEADER 1;\n          ALTER TABLE ${stagingTableName} ADD COLUMN statement_month DATE DEFAULT '${monthDateString}';\n          INSERT INTO line_items SELECT * FROM ${stagingTableName};\n          DROP TABLE ${stagingTableName};\n        COMMIT;\n        -- ANALYZE line_items;\n        -- VACUUM line_items;\n      `;\n      log.debug(query);\n      client.query(query, function(err, result) {\n        if (err) throw err;\n        var durationString = moment.utc(moment.utc() - startTime).format(\"HH:mm:ss.SSS\");\n        log.info(`${monthString} (import): complete (${durationString})`);\n        resolve(s3uri);\n      });\n    });\n  });\n};\n\n\n// Kick off the promise chain.\nvar startTime = moment.utc();\ndownloadFile(args.source_bucket, args.file)\n  .then(processZipFile)\n  .then(importToRedshift)\n  .then(function(s3uri) {\n    var durationString = moment.utc(moment.utc() - startTime).format(\"HH:mm:ss.SSS\");\n    log.info(`${monthString}: Import complete! Took ${durationString}`);\n    process.exit();\n  })\n  .catch(function(err) {\n    var durationString = moment.utc(moment.utc() - startTime).format(\"HH:mm:ss.SSS\");\n    log.error(`${monthString}: Something went terribly wrong after ${durationString}`);\n    log.error(err);\n    log.error(err.stack);\n\t\trollbar.handleError(err);\n    process.exit();\n  });\n"
  }
]