[
  {
    "path": "LICENSE",
    "content": "Copyright (c) 2015, Kelly Davis \nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above copyright notice, this\n  list of conditions and the following disclaimer.\n\n* Redistributions in binary form must reproduce the above copyright notice, this\n  list of conditions and the following disclaimer in the documentation and/or\n  other materials provided with the distribution.\n\n* Neither the name of the {organization} nor the names of its\n  contributors may be used to endorse or promote products derived from\n  this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "README.md",
    "content": "# Voice activity detection in Javascript\n\n\n__vad.js__ is a small Javascript library for voice activity detection.\n\n### Quick Start\n\n```html\n<!DOCTYPE html>\n<html>\n<head>\n<meta charset=\"utf-8\" />\n<title>VAD Test</title>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\" />\n</head>\n<body>\n<script type=\"text/javascript\" src=\"lib/vad.js\"></script>\n<script type=\"text/javascript\">\n  // Create AudioContext\n  window.AudioContext = window.AudioContext || window.webkitAudioContext;\n  var audioContext = new AudioContext();\n\n  // Define function called by getUserMedia \n  function startUserMedia(stream) {\n    // Create MediaStreamAudioSourceNode\n    var source = audioContext.createMediaStreamSource(stream);\n\n    // Setup options\n    var options = {\n     source: source,\n     voice_stop: function() {console.log('voice_stop');}, \n     voice_start: function() {console.log('voice_start');}\n    }; \n    \n    // Create VAD\n    var vad = new VAD(options);\n  }\n\n  // Ask for audio device\n  navigator.getUserMedia = navigator.getUserMedia || \n                           navigator.mozGetUserMedia || \n                           navigator.webkitGetUserMedia;\n  navigator.getUserMedia({audio: true}, startUserMedia, function(e) {\n          console.log(\"No live audio input in this browser: \" + e);\n  });\n</script>\n</body>\n</html>\n```\n\n\n### Tested - Browser\n* Firefox 45.0a1+\n\n##Author\n\n* Kelly Davis kdavis@mozilla.com\n* Mark Panaghiston https://github.com/thepag\n\n\n##Thanks\nThe code is based on the following implementations: \n\n+ https://github.com/happyworm/Playful-Demos\n\n##Contribution\n\nAny contribution will be welcome!\n\n"
  },
  {
    "path": "bower.json",
    "content": "{\n  \"name\": \"vad.js\",\n  \"homepage\": \"https://github.com/kdavis-mozilla/vad.js\",\n  \"authors\": [\n    \"Kelly Davis <kdavis@mozilla.com>\"\n  ],\n  \"description\": \"Voice activity detection in Javascript\",\n  \"main\": \"lib/vad.js\",\n  \"moduleType\": [],\n  \"keywords\": [\n    \"vad\",\n    \"voice\",\n    \"activity\",\n    \"detection\"\n  ],\n  \"license\": \"BSD-3-Clause\",\n  \"ignore\": [\n    \"**/.*\",\n    \"node_modules\",\n    \"bower_components\",\n    \"test\",\n    \"tests\"\n  ]\n}\n"
  },
  {
    "path": "lib/vad.js",
    "content": "(function(window) {\n\n  var VAD = function(options) {\n    // Default options\n    this.options = {\n      fftSize: 512,\n      bufferLen: 512, \n      voice_stop: function() {},\n      voice_start: function() {},\n      smoothingTimeConstant: 0.99, \n      energy_offset: 1e-8, // The initial offset.\n      energy_threshold_ratio_pos: 2, // Signal must be twice the offset\n      energy_threshold_ratio_neg: 0.5, // Signal must be half the offset\n      energy_integration: 1, // Size of integration change compared to the signal per second.\n      filter: [\n        {f: 200, v:0}, // 0 -> 200 is 0\n        {f: 2000, v:1} // 200 -> 2k is 1\n      ],\n      source: null,\n      context: null\n    };\n\n    // User options\n    for(var option in options) {\n      if(options.hasOwnProperty(option)) {\n        this.options[option] = options[option];\n      }\n    }\n\n    // Require source\n   if(!this.options.source)\n     throw new Error(\"The options must specify a MediaStreamAudioSourceNode.\");\n\n    // Set this.options.context\n    this.options.context = this.options.source.context;\n\n    // Calculate time relationships\n    this.hertzPerBin = this.options.context.sampleRate / this.options.fftSize;\n    this.iterationFrequency = this.options.context.sampleRate / this.options.bufferLen;\n    this.iterationPeriod = 1 / this.iterationFrequency;\n\n    var DEBUG = true;\n    if(DEBUG) console.log(\n      'Vad' +\n      ' | sampleRate: ' + this.options.context.sampleRate +\n      ' | hertzPerBin: ' + this.hertzPerBin +\n      ' | iterationFrequency: ' + this.iterationFrequency +\n      ' | iterationPeriod: ' + this.iterationPeriod\n    );\n\n    this.setFilter = function(shape) {\n      this.filter = [];\n      for(var i = 0, iLen = this.options.fftSize / 2; i < iLen; i++) {\n        this.filter[i] = 0;\n        for(var j = 0, jLen = shape.length; j < jLen; j++) {\n          if(i * this.hertzPerBin < shape[j].f) {\n            this.filter[i] = shape[j].v;\n            break; // Exit j loop\n          }\n        }\n      }\n    }\n\n    this.setFilter(this.options.filter);\n\n    this.ready = {};\n    this.vadState = false; // True when Voice Activity Detected\n\n    // Energy detector props\n    this.energy_offset = this.options.energy_offset;\n    this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;\n    this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;\n\n    this.voiceTrend = 0;\n    this.voiceTrendMax = 10;\n    this.voiceTrendMin = -10;\n    this.voiceTrendStart = 5;\n    this.voiceTrendEnd = -5;\n\n    // Create analyser \n    this.analyser = this.options.context.createAnalyser();\n    this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant; // 0.99;\n    this.analyser.fftSize = this.options.fftSize;\n\n    this.floatFrequencyData = new Float32Array(this.analyser.frequencyBinCount);\n\n    // Setup local storage of the Linear FFT data\n    this.floatFrequencyDataLinear = new Float32Array(this.floatFrequencyData.length);\n\n    // Connect this.analyser\n    this.options.source.connect(this.analyser); \n\n    // Create ScriptProcessorNode\n    this.scriptProcessorNode = this.options.context.createScriptProcessor(this.options.bufferLen, 1, 1);\n\n    // Connect scriptProcessorNode (Theretically, not required)\n    this.scriptProcessorNode.connect(this.options.context.destination);\n\n    // Create callback to update/analyze floatFrequencyData\n    var self = this;\n    this.scriptProcessorNode.onaudioprocess = function(event) {\n      self.analyser.getFloatFrequencyData(self.floatFrequencyData);\n      self.update();\n      self.monitor();\n    };\n\n    // Connect scriptProcessorNode\n    this.options.source.connect(this.scriptProcessorNode);\n\n    // log stuff\n    this.logging = false;\n    this.log_i = 0;\n    this.log_limit = 100;\n\n    this.triggerLog = function(limit) {\n      this.logging = true;\n      this.log_i = 0;\n      this.log_limit = typeof limit === 'number' ? limit : this.log_limit;\n    }\n\n    this.log = function(msg) {\n      if(this.logging && this.log_i < this.log_limit) {\n        this.log_i++;\n        console.log(msg);\n      } else {\n        this.logging = false;\n      }\n    }\n\n    this.update = function() {\n      // Update the local version of the Linear FFT\n      var fft = this.floatFrequencyData;\n      for(var i = 0, iLen = fft.length; i < iLen; i++) {\n        this.floatFrequencyDataLinear[i] = Math.pow(10, fft[i] / 10);\n      }\n      this.ready = {};\n    }\n\n    this.getEnergy = function() {\n      if(this.ready.energy) {\n        return this.energy;\n      }\n\n      var energy = 0;\n      var fft = this.floatFrequencyDataLinear;\n\n      for(var i = 0, iLen = fft.length; i < iLen; i++) {\n        energy += this.filter[i] * fft[i] * fft[i];\n      }\n\n      this.energy = energy;\n      this.ready.energy = true;\n\n      return energy;\n    }\n\n    this.monitor = function() {\n      var energy = this.getEnergy();\n      var signal = energy - this.energy_offset;\n\n      if(signal > this.energy_threshold_pos) {\n        this.voiceTrend = (this.voiceTrend + 1 > this.voiceTrendMax) ? this.voiceTrendMax : this.voiceTrend + 1;\n      } else if(signal < -this.energy_threshold_neg) {\n        this.voiceTrend = (this.voiceTrend - 1 < this.voiceTrendMin) ? this.voiceTrendMin : this.voiceTrend - 1;\n      } else {\n        // voiceTrend gets smaller\n        if(this.voiceTrend > 0) {\n          this.voiceTrend--;\n        } else if(this.voiceTrend < 0) {\n          this.voiceTrend++;\n        }\n      }\n\n      var start = false, end = false;\n      if(this.voiceTrend > this.voiceTrendStart) {\n        // Start of speech detected\n        start = true;\n      } else if(this.voiceTrend < this.voiceTrendEnd) {\n        // End of speech detected\n        end = true;\n      }\n\n      // Integration brings in the real-time aspect through the relationship with the frequency this functions is called.\n      var integration = signal * this.iterationPeriod * this.options.energy_integration;\n\n      // Idea?: The integration is affected by the voiceTrend magnitude? - Not sure. Not doing atm.\n\n      // The !end limits the offset delta boost till after the end is detected.\n      if(integration > 0 || !end) {\n        this.energy_offset += integration;\n      } else {\n        this.energy_offset += integration * 10;\n      }\n      this.energy_offset = this.energy_offset < 0 ? 0 : this.energy_offset;\n      this.energy_threshold_pos = this.energy_offset * this.options.energy_threshold_ratio_pos;\n      this.energy_threshold_neg = this.energy_offset * this.options.energy_threshold_ratio_neg;\n\n      // Broadcast the messages\n      if(start && !this.vadState) {\n        this.vadState = true;\n        this.options.voice_start();\n      }\n      if(end && this.vadState) {\n        this.vadState = false;\n        this.options.voice_stop();\n      }\n\n      this.log(\n        'e: ' + energy +\n        ' | e_of: ' + this.energy_offset +\n        ' | e+_th: ' + this.energy_threshold_pos +\n        ' | e-_th: ' + this.energy_threshold_neg +\n        ' | signal: ' + signal +\n        ' | int: ' + integration +\n        ' | voiceTrend: ' + this.voiceTrend +\n        ' | start: ' + start +\n        ' | end: ' + end\n      );\n\n      return signal;\n    }\n  };\n\n  window.VAD = VAD;\n\n})(window);\n"
  }
]